agabka/lung_cancer_data_project.ipynb

4824 lines
654 KiB
Plaintext
Raw Normal View History

2024-04-04 23:16:04 +02:00
{
"cells": [
{
"cell_type": "markdown",
"id": "033f13af",
"metadata": {},
"source": [
"This dataset contains information on patients with lung cancer, including their age, gender, air pollution exposure, alcohol use, dust allergy, occupational hazards, genetic risk, chronic lung disease, balanced diet, obesity, smoking, passive smoker, chest pain, coughing of blood, fatigue, weight loss ,shortness of breath ,wheezing ,swallowing difficulty ,clubbing of finger nails and snoring\n",
"\n",
"https://www.kaggle.com/datasets/thedevastator/cancer-patients-and-air-pollution-a-new-link/data"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "7ce53ad1",
"metadata": {},
"outputs": [],
"source": [
"\n",
"\n",
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import plotly.figure_factory as ff\n",
"import seaborn as sns\n",
"sns.set()\n",
"import plotly.express as px\n",
"import numpy as np\n",
"import sklearn \n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "3b9fd854",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: plotnine in c:\\users\\hp\\anaconda3\\lib\\site-packages (0.12.4)\n",
"Requirement already satisfied: matplotlib>=3.6.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from plotnine) (3.8.0)\n",
"Requirement already satisfied: mizani<0.10.0,>0.9.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from plotnine) (0.9.3)\n",
"Requirement already satisfied: numpy>=1.23.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from plotnine) (1.26.0)\n",
"Requirement already satisfied: pandas>=1.5.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from plotnine) (2.1.1)\n",
"Requirement already satisfied: patsy>=0.5.1 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from plotnine) (0.5.5)\n",
"Requirement already satisfied: scipy>=1.5.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from plotnine) (1.11.4)\n",
"Requirement already satisfied: statsmodels>=0.14.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from plotnine) (0.14.0)\n",
"Requirement already satisfied: contourpy>=1.0.1 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from matplotlib>=3.6.0->plotnine) (1.2.0)\n",
"Requirement already satisfied: cycler>=0.10 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from matplotlib>=3.6.0->plotnine) (0.12.1)\n",
"Requirement already satisfied: fonttools>=4.22.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from matplotlib>=3.6.0->plotnine) (4.25.0)\n",
"Requirement already satisfied: kiwisolver>=1.0.1 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from matplotlib>=3.6.0->plotnine) (1.4.4)\n",
"Requirement already satisfied: packaging>=20.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from matplotlib>=3.6.0->plotnine) (23.1)\n",
"Requirement already satisfied: pillow>=6.2.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from matplotlib>=3.6.0->plotnine) (10.0.1)\n",
"Requirement already satisfied: pyparsing>=2.3.1 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from matplotlib>=3.6.0->plotnine) (3.0.9)\n",
"Requirement already satisfied: python-dateutil>=2.7 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from matplotlib>=3.6.0->plotnine) (2.8.2)\n",
"Requirement already satisfied: tzdata in c:\\users\\hp\\anaconda3\\lib\\site-packages (from mizani<0.10.0,>0.9.0->plotnine) (2023.3)\n",
"Requirement already satisfied: pytz>=2020.1 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from pandas>=1.5.0->plotnine) (2023.3.post1)\n",
"Requirement already satisfied: six in c:\\users\\hp\\anaconda3\\lib\\site-packages (from patsy>=0.5.1->plotnine) (1.16.0)\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"[notice] A new release of pip is available: 23.3.2 -> 24.0\n",
"[notice] To update, run: python.exe -m pip install --upgrade pip\n"
]
}
],
"source": [
"pip install plotnine"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "6d369f6b",
"metadata": {},
"outputs": [],
"source": [
"import plotnine"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "73edef6d",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Patient Id</th>\n",
" <th>Age</th>\n",
" <th>Gender</th>\n",
" <th>Air Pollution</th>\n",
" <th>Alcohol use</th>\n",
" <th>Dust Allergy</th>\n",
" <th>OccuPational Hazards</th>\n",
" <th>Genetic Risk</th>\n",
" <th>chronic Lung Disease</th>\n",
" <th>Balanced Diet</th>\n",
" <th>...</th>\n",
" <th>Fatigue</th>\n",
" <th>Weight Loss</th>\n",
" <th>Shortness of Breath</th>\n",
" <th>Wheezing</th>\n",
" <th>Swallowing Difficulty</th>\n",
" <th>Clubbing of Finger Nails</th>\n",
" <th>Frequent Cold</th>\n",
" <th>Dry Cough</th>\n",
" <th>Snoring</th>\n",
" <th>Level</th>\n",
" </tr>\n",
" <tr>\n",
" <th>index</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>P1</td>\n",
" <td>33</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>...</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>Low</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>P10</td>\n",
" <td>17</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>5</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>7</td>\n",
" <td>8</td>\n",
" <td>6</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>7</td>\n",
" <td>2</td>\n",
" <td>Medium</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>P100</td>\n",
" <td>35</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>4</td>\n",
" <td>6</td>\n",
" <td>...</td>\n",
" <td>8</td>\n",
" <td>7</td>\n",
" <td>9</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>2</td>\n",
" <td>High</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>P1000</td>\n",
" <td>37</td>\n",
" <td>1</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>...</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>5</td>\n",
" <td>High</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>P101</td>\n",
" <td>46</td>\n",
" <td>1</td>\n",
" <td>6</td>\n",
" <td>8</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>...</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>High</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 25 columns</p>\n",
"</div>"
],
"text/plain": [
" Patient Id Age Gender Air Pollution Alcohol use Dust Allergy \\\n",
"index \n",
"0 P1 33 1 2 4 5 \n",
"1 P10 17 1 3 1 5 \n",
"2 P100 35 1 4 5 6 \n",
"3 P1000 37 1 7 7 7 \n",
"4 P101 46 1 6 8 7 \n",
"\n",
" OccuPational Hazards Genetic Risk chronic Lung Disease \\\n",
"index \n",
"0 4 3 2 \n",
"1 3 4 2 \n",
"2 5 5 4 \n",
"3 7 6 7 \n",
"4 7 7 6 \n",
"\n",
" Balanced Diet ... Fatigue Weight Loss Shortness of Breath \\\n",
"index ... \n",
"0 2 ... 3 4 2 \n",
"1 2 ... 1 3 7 \n",
"2 6 ... 8 7 9 \n",
"3 7 ... 4 2 3 \n",
"4 7 ... 3 2 4 \n",
"\n",
" Wheezing Swallowing Difficulty Clubbing of Finger Nails \\\n",
"index \n",
"0 2 3 1 \n",
"1 8 6 2 \n",
"2 2 1 4 \n",
"3 1 4 5 \n",
"4 1 4 2 \n",
"\n",
" Frequent Cold Dry Cough Snoring Level \n",
"index \n",
"0 2 3 4 Low \n",
"1 1 7 2 Medium \n",
"2 6 7 2 High \n",
"3 6 7 5 High \n",
"4 4 2 3 High \n",
"\n",
"[5 rows x 25 columns]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dane = pd.read_csv(r'C:\\Users\\HP\\Desktop\\podyplomówka\\cancer_patient_data_sets.csv', index_col = 0)\n",
"dane.head()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "1831fdd7",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 1000 entries, 0 to 999\n",
"Data columns (total 25 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 Patient Id 1000 non-null object\n",
" 1 Age 1000 non-null int64 \n",
" 2 Gender 1000 non-null int64 \n",
" 3 Air Pollution 1000 non-null int64 \n",
" 4 Alcohol use 1000 non-null int64 \n",
" 5 Dust Allergy 1000 non-null int64 \n",
" 6 OccuPational Hazards 1000 non-null int64 \n",
" 7 Genetic Risk 1000 non-null int64 \n",
" 8 chronic Lung Disease 1000 non-null int64 \n",
" 9 Balanced Diet 1000 non-null int64 \n",
" 10 Obesity 1000 non-null int64 \n",
" 11 Smoking 1000 non-null int64 \n",
" 12 Passive Smoker 1000 non-null int64 \n",
" 13 Chest Pain 1000 non-null int64 \n",
" 14 Coughing of Blood 1000 non-null int64 \n",
" 15 Fatigue 1000 non-null int64 \n",
" 16 Weight Loss 1000 non-null int64 \n",
" 17 Shortness of Breath 1000 non-null int64 \n",
" 18 Wheezing 1000 non-null int64 \n",
" 19 Swallowing Difficulty 1000 non-null int64 \n",
" 20 Clubbing of Finger Nails 1000 non-null int64 \n",
" 21 Frequent Cold 1000 non-null int64 \n",
" 22 Dry Cough 1000 non-null int64 \n",
" 23 Snoring 1000 non-null int64 \n",
" 24 Level 1000 non-null object\n",
"dtypes: int64(23), object(2)\n",
"memory usage: 203.1+ KB\n"
]
}
],
"source": [
"dane.info()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "af7da17c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>count</th>\n",
" <th>mean</th>\n",
" <th>std</th>\n",
" <th>min</th>\n",
" <th>25%</th>\n",
" <th>50%</th>\n",
" <th>75%</th>\n",
" <th>max</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Age</th>\n",
" <td>1000.0</td>\n",
" <td>37.174</td>\n",
" <td>12.005493</td>\n",
" <td>14.0</td>\n",
" <td>27.75</td>\n",
" <td>36.0</td>\n",
" <td>45.0</td>\n",
" <td>73.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Gender</th>\n",
" <td>1000.0</td>\n",
" <td>1.402</td>\n",
" <td>0.490547</td>\n",
" <td>1.0</td>\n",
" <td>1.00</td>\n",
" <td>1.0</td>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Air Pollution</th>\n",
" <td>1000.0</td>\n",
" <td>3.840</td>\n",
" <td>2.030400</td>\n",
" <td>1.0</td>\n",
" <td>2.00</td>\n",
" <td>3.0</td>\n",
" <td>6.0</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Alcohol use</th>\n",
" <td>1000.0</td>\n",
" <td>4.563</td>\n",
" <td>2.620477</td>\n",
" <td>1.0</td>\n",
" <td>2.00</td>\n",
" <td>5.0</td>\n",
" <td>7.0</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Dust Allergy</th>\n",
" <td>1000.0</td>\n",
" <td>5.165</td>\n",
" <td>1.980833</td>\n",
" <td>1.0</td>\n",
" <td>4.00</td>\n",
" <td>6.0</td>\n",
" <td>7.0</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>OccuPational Hazards</th>\n",
" <td>1000.0</td>\n",
" <td>4.840</td>\n",
" <td>2.107805</td>\n",
" <td>1.0</td>\n",
" <td>3.00</td>\n",
" <td>5.0</td>\n",
" <td>7.0</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Genetic Risk</th>\n",
" <td>1000.0</td>\n",
" <td>4.580</td>\n",
" <td>2.126999</td>\n",
" <td>1.0</td>\n",
" <td>2.00</td>\n",
" <td>5.0</td>\n",
" <td>7.0</td>\n",
" <td>7.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>chronic Lung Disease</th>\n",
" <td>1000.0</td>\n",
" <td>4.380</td>\n",
" <td>1.848518</td>\n",
" <td>1.0</td>\n",
" <td>3.00</td>\n",
" <td>4.0</td>\n",
" <td>6.0</td>\n",
" <td>7.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Balanced Diet</th>\n",
" <td>1000.0</td>\n",
" <td>4.491</td>\n",
" <td>2.135528</td>\n",
" <td>1.0</td>\n",
" <td>2.00</td>\n",
" <td>4.0</td>\n",
" <td>7.0</td>\n",
" <td>7.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Obesity</th>\n",
" <td>1000.0</td>\n",
" <td>4.465</td>\n",
" <td>2.124921</td>\n",
" <td>1.0</td>\n",
" <td>3.00</td>\n",
" <td>4.0</td>\n",
" <td>7.0</td>\n",
" <td>7.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Smoking</th>\n",
" <td>1000.0</td>\n",
" <td>3.948</td>\n",
" <td>2.495902</td>\n",
" <td>1.0</td>\n",
" <td>2.00</td>\n",
" <td>3.0</td>\n",
" <td>7.0</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Passive Smoker</th>\n",
" <td>1000.0</td>\n",
" <td>4.195</td>\n",
" <td>2.311778</td>\n",
" <td>1.0</td>\n",
" <td>2.00</td>\n",
" <td>4.0</td>\n",
" <td>7.0</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Chest Pain</th>\n",
" <td>1000.0</td>\n",
" <td>4.438</td>\n",
" <td>2.280209</td>\n",
" <td>1.0</td>\n",
" <td>2.00</td>\n",
" <td>4.0</td>\n",
" <td>7.0</td>\n",
" <td>9.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Coughing of Blood</th>\n",
" <td>1000.0</td>\n",
" <td>4.859</td>\n",
" <td>2.427965</td>\n",
" <td>1.0</td>\n",
" <td>3.00</td>\n",
" <td>4.0</td>\n",
" <td>7.0</td>\n",
" <td>9.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Fatigue</th>\n",
" <td>1000.0</td>\n",
" <td>3.856</td>\n",
" <td>2.244616</td>\n",
" <td>1.0</td>\n",
" <td>2.00</td>\n",
" <td>3.0</td>\n",
" <td>5.0</td>\n",
" <td>9.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Weight Loss</th>\n",
" <td>1000.0</td>\n",
" <td>3.855</td>\n",
" <td>2.206546</td>\n",
" <td>1.0</td>\n",
" <td>2.00</td>\n",
" <td>3.0</td>\n",
" <td>6.0</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Shortness of Breath</th>\n",
" <td>1000.0</td>\n",
" <td>4.240</td>\n",
" <td>2.285087</td>\n",
" <td>1.0</td>\n",
" <td>2.00</td>\n",
" <td>4.0</td>\n",
" <td>6.0</td>\n",
" <td>9.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Wheezing</th>\n",
" <td>1000.0</td>\n",
" <td>3.777</td>\n",
" <td>2.041921</td>\n",
" <td>1.0</td>\n",
" <td>2.00</td>\n",
" <td>4.0</td>\n",
" <td>5.0</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Swallowing Difficulty</th>\n",
" <td>1000.0</td>\n",
" <td>3.746</td>\n",
" <td>2.270383</td>\n",
" <td>1.0</td>\n",
" <td>2.00</td>\n",
" <td>4.0</td>\n",
" <td>5.0</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Clubbing of Finger Nails</th>\n",
" <td>1000.0</td>\n",
" <td>3.923</td>\n",
" <td>2.388048</td>\n",
" <td>1.0</td>\n",
" <td>2.00</td>\n",
" <td>4.0</td>\n",
" <td>5.0</td>\n",
" <td>9.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Frequent Cold</th>\n",
" <td>1000.0</td>\n",
" <td>3.536</td>\n",
" <td>1.832502</td>\n",
" <td>1.0</td>\n",
" <td>2.00</td>\n",
" <td>3.0</td>\n",
" <td>5.0</td>\n",
" <td>7.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Dry Cough</th>\n",
" <td>1000.0</td>\n",
" <td>3.853</td>\n",
" <td>2.039007</td>\n",
" <td>1.0</td>\n",
" <td>2.00</td>\n",
" <td>4.0</td>\n",
" <td>6.0</td>\n",
" <td>7.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Snoring</th>\n",
" <td>1000.0</td>\n",
" <td>2.926</td>\n",
" <td>1.474686</td>\n",
" <td>1.0</td>\n",
" <td>2.00</td>\n",
" <td>3.0</td>\n",
" <td>4.0</td>\n",
" <td>7.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" count mean std min 25% 50% 75% \\\n",
"Age 1000.0 37.174 12.005493 14.0 27.75 36.0 45.0 \n",
"Gender 1000.0 1.402 0.490547 1.0 1.00 1.0 2.0 \n",
"Air Pollution 1000.0 3.840 2.030400 1.0 2.00 3.0 6.0 \n",
"Alcohol use 1000.0 4.563 2.620477 1.0 2.00 5.0 7.0 \n",
"Dust Allergy 1000.0 5.165 1.980833 1.0 4.00 6.0 7.0 \n",
"OccuPational Hazards 1000.0 4.840 2.107805 1.0 3.00 5.0 7.0 \n",
"Genetic Risk 1000.0 4.580 2.126999 1.0 2.00 5.0 7.0 \n",
"chronic Lung Disease 1000.0 4.380 1.848518 1.0 3.00 4.0 6.0 \n",
"Balanced Diet 1000.0 4.491 2.135528 1.0 2.00 4.0 7.0 \n",
"Obesity 1000.0 4.465 2.124921 1.0 3.00 4.0 7.0 \n",
"Smoking 1000.0 3.948 2.495902 1.0 2.00 3.0 7.0 \n",
"Passive Smoker 1000.0 4.195 2.311778 1.0 2.00 4.0 7.0 \n",
"Chest Pain 1000.0 4.438 2.280209 1.0 2.00 4.0 7.0 \n",
"Coughing of Blood 1000.0 4.859 2.427965 1.0 3.00 4.0 7.0 \n",
"Fatigue 1000.0 3.856 2.244616 1.0 2.00 3.0 5.0 \n",
"Weight Loss 1000.0 3.855 2.206546 1.0 2.00 3.0 6.0 \n",
"Shortness of Breath 1000.0 4.240 2.285087 1.0 2.00 4.0 6.0 \n",
"Wheezing 1000.0 3.777 2.041921 1.0 2.00 4.0 5.0 \n",
"Swallowing Difficulty 1000.0 3.746 2.270383 1.0 2.00 4.0 5.0 \n",
"Clubbing of Finger Nails 1000.0 3.923 2.388048 1.0 2.00 4.0 5.0 \n",
"Frequent Cold 1000.0 3.536 1.832502 1.0 2.00 3.0 5.0 \n",
"Dry Cough 1000.0 3.853 2.039007 1.0 2.00 4.0 6.0 \n",
"Snoring 1000.0 2.926 1.474686 1.0 2.00 3.0 4.0 \n",
"\n",
" max \n",
"Age 73.0 \n",
"Gender 2.0 \n",
"Air Pollution 8.0 \n",
"Alcohol use 8.0 \n",
"Dust Allergy 8.0 \n",
"OccuPational Hazards 8.0 \n",
"Genetic Risk 7.0 \n",
"chronic Lung Disease 7.0 \n",
"Balanced Diet 7.0 \n",
"Obesity 7.0 \n",
"Smoking 8.0 \n",
"Passive Smoker 8.0 \n",
"Chest Pain 9.0 \n",
"Coughing of Blood 9.0 \n",
"Fatigue 9.0 \n",
"Weight Loss 8.0 \n",
"Shortness of Breath 9.0 \n",
"Wheezing 8.0 \n",
"Swallowing Difficulty 8.0 \n",
"Clubbing of Finger Nails 9.0 \n",
"Frequent Cold 7.0 \n",
"Dry Cough 7.0 \n",
"Snoring 7.0 "
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dane.describe().T"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "a043ec73",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['Patient Id', 'Age', 'Gender', 'Air Pollution', 'Alcohol use',\n",
" 'Dust Allergy', 'OccuPational Hazards', 'Genetic Risk',\n",
" 'chronic Lung Disease', 'Balanced Diet', 'Obesity', 'Smoking',\n",
" 'Passive Smoker', 'Chest Pain', 'Coughing of Blood', 'Fatigue',\n",
" 'Weight Loss', 'Shortness of Breath', 'Wheezing',\n",
" 'Swallowing Difficulty', 'Clubbing of Finger Nails', 'Frequent Cold',\n",
" 'Dry Cough', 'Snoring', 'Level'],\n",
" dtype='object')"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dane.columns"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "9dac40a9",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZoAAAGZCAYAAACwkvfNAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAABX5klEQVR4nO3dd3wUdf7H8dfMbnrvlRog9N6lN0UFFevJ6Xl6Vk5P7PoTTz27IiJYTk9Rz96RE7HSEnrvHdJ775udmd8fkWgkoYRsZnfzeT4ePB6wOzvz2QX2ne93vkUxDMNACCGEcBDV7AKEEEK4NwkaIYQQDiVBI4QQwqEkaIQQQjiUBI0QQgiHkqARQgjhUBI0QgghHEqCRgghhENJ0AjhQM42H9rZ6hFtgwSNaFXXXHMNiYmJDX51796dQYMGcfnll/Ptt982OH7ChAk88MADp33+Mz2+KV9++SWJiYmkp6cDsGDBAhITE0/79dnZ2dx8881kZGSc9Lj169eTmJjI+vXrT/s6f3zN6Xrttdd466236v98pu9JiOayml2AaHt69uzJP//5z/o/a5pGdnY277zzDnfddRcBAQGMGTMGgIULF+Lv729WqfUuv/xyRo8efdrHr1mzhhUrVjBnzpyTHterVy8++eQTunTpctrnbs5rAF566SX+/ve/1//5TN+TEM0lQSNanb+/P/379z/h8bFjxzJixAi++OKL+qDp2bNnK1fXuOjoaKKjo1v8vE19Fi39msY46j0J8UfSdSachqenJx4eHg0e+2NX2NKlS5k+fTp9+/Zl+PDh3HPPPeTm5jZ5zs8//5zu3buzYMGCJo/RdZ1XX32VcePG0a9fP2677TZKSkoaHPPHbqa0tDRuvfVWhg0bRr9+/bjyyitZuXIlUNft9uCDDwIwceLE+vonTJjAU089xV/+8hcGDhzII4880mQ32E8//cS5555Lnz59uPzyy1m7dm39c6fb3ZaYmFj/vo8/v3DhwvrfN/a6pUuXMmPGDAYMGMA555zDI4880uCzWLBgAZMnT2bFihVMmzaN3r17c+655/LVV181+fkKIUEjWp1hGNjt9vpfNTU1pKSk8PDDD1NRUcFFF13U6Os2b97MPffcw5QpU3jzzTd58MEHWbduHXfffXejxy9dupQ5c+Zwyy23cPvttzdZz/PPP88rr7zCpZdeysKFCwkJCWHu3LlNHq/rOjfffDOVlZU899xzvPrqqwQHB3PbbbeRkpLCuHHjuPXWW4G6L/bbbrut/rUffPBBfQA09T4BHnroIa699loWLFiAn58fN954I4cOHWry+FP55JNPALjsssvqf/9Hr776KrNnz6Zfv368/PLLzJo1i++//55rrrmG6urq+uPy8vJ4/PHHufbaa3njjTeIj4/ngQce4PDhw82uT7g36ToTrW7jxo306tWrwWOKotCtWzfmz5/PhAkTGn3d5s2b8fLy4sYbb8TLywuA4OBgdu7ciWEYKIpSf+zy5cu57777uOmmm7jzzjubrKW0tJT//ve/XHvttfVhNHr0aHJycli9enWjrykoKODw4cPccsstjB07FoC+ffuycOFCampq6NChA+3btwegR48exMfH1782MjKSBx54AFWt+xmvqRv6//znP7ngggsAGDFiBBMnTuS11147aQCezPGutujo6Ea73UpKSnjttde4/PLLG9w/69atGzNnzuTLL7/k6quvBqCqqoonn3ySESNGANCxY0fGjx/PypUrSUhIaFZ9wr1J0IhW16tXLx577DEAcnJymD9/PrW1tcybN++kX1RDhgxh3rx5TJs2jalTpzJmzBhGjRpV/2V/3O7du1m6dCmRkZH84x//OGkt27Zto7a2lokTJzZ4fOrUqU0GTXh4OF26dGHOnDmsWbOmvo7j3WUnk5CQUB8yTbFYLEyZMqX+z15eXowZM4bly5ef8vzNtW3bNmw2G9OmTWvw+ODBg4mLi2P9+vX1QQM0CKvj93kqKysdVp9wbdJ1Jlqdn58fffr0oU+fPkyaNIl33nmH8vJyrr/+egoLC5t83YABA3jjjTdo164db731FldffTVjx47l3XffbXDcgQMHGDlyJBkZGbz//vsnreX4/YfQ0NAGj0dERDT5GkVRePvtt7nkkktYvXo1s2fPZuTIkdx5550UFxef9Hrh4eEnfR7qWml/vFcVFhZGaWnpKV/bXMc/h8bqCw8Pp6ysrMFjPj4+9b8/HpwyR0c0RYJGmC4sLIxHHnmE7OxsnnzyyZMeO3r0aN566y02btzI66+/TteuXXnqqafYvn17/TGjRo3i9ddf58ILL2TevHlkZmY2eb6QkBCgrjvs904VGFFRUTz66KMkJSXx9ddfc8MNN/DDDz8wb968U7zbUysrKzvhSzs/P/+EMDzueJehpmn1j1VUVJzRNYOCguqv80d5eXn1n5MQzSFBI5zClClTGD16NP/73/+avG/x7LPPctlll2EYBj4+PowfP577778fgKysrPrjjrdGHnzwQaxWK4888kiT1x0wYADe3t4sW7asweMn66baunUrI0eOZMeOHSiKQo8ePZg9ezbdunUjOzsb4JTdYydjs9lYt25d/Z8rKipYsWIFw4YNa/T44/OMfv8ZbNmy5YTjTlZTv3798PT0ZMmSJQ0e37RpE5mZmQwcOPCM3oMQvydBI5zGQw89hIeHB0888QR2u/2E50eMGMGuXbt44IEHSE5OZsWKFTzxxBMEBwczfPjwE44PDw9n9uzZrF69msWLFzd6TT8/P2677TY+/PBDXnjhBZKSknjyySdPGjQ9e/bE29ub++67j2+//Zb169czb9489u7dy7nnngtAYGAgAD/++OMZj8by8PDgoYceYsmSJSxfvpy//e1vVFdXNxi99nvH71Edv2f05Zdf8s9//hM/P78GxwUGBrJ161Y2btx4QospODiYm266ic8++4zHHnuMpKQkPv74Y26//Xa6dOnCjBkzzug9CPF7EjTCaXTu3JlrrrmGAwcONHpvZcyYMbzwwgscPHiQv//979x11134+Pjw3nvvERwc3Og5r7rqKvr27ctTTz3V5P2fm2++mYceeohly5Zx6623sn///vqWUmO8vLx4++236dq1K08++SQ33HADP//8M48//nj9F/KwYcMYOXIkc+fO5dlnnz2jzyEoKIh7772XefPmcccdd2CxWHj//ffp3Llzo8d36tSJZ599lszMTG666Sbeffdd/vWvfxEZGdnguFtuuYWdO3dy4403Nmj9HHf77bfz6KOPsmHDBm655RYWLlzIeeedx4cfftjgnowQZ0ox5A6eEEIIB5IWjRBCCIeSoBFCCOFQEjRCCCEcSoJGCCGEQ0nQCCGEcCgJGiGEEA4lQSOEEMKhJGiEEEI4lASNEEIIh5KgEUII4VASNEIIIRxKgkYIIYRDSdAIIYRwKAkaIYQQDiVBI4QQwqEkaIQQQjiUBI0QQgiHkqARQgjhUBI0QgghHEqCRgghhENJ0AghhHAoCRohhBAOJUEjhBDCoSRohBBCOJQEjRBCCIeSoBFCCOFQEjRCCCEcSoJGCCGEQ0nQCCGEcCgJGiGEEA4lQSOEEMKhJGiEEEI4lASNEEIIh5KgEUII4VASNEIIIRxKgkYIIYRDSdAIIYRwKAkaIYQQDiVBI4QQwqEkaIQQQjiUBI0QQgiHkqARQgjhUBI0QgghHMpqdgFCuAJdN9ANA8MAVQWL2nI/o2m6joFed25FwaJaWuzcQjgDCRohALumoyi/BYhhGJSU2ygur6G0vIaSChtllTbKKmyUVdbW/f7XX+WVtVTb7Oj6b4H05/O6M3VkJ25d8hA1dhuqoqIqCl5WL/w8fPD99Zef52+/9/Xwwd/Tj3C/UCL9wgnxCcL6u9DRdA0DA4tiQVEUsz4qIc6YBI1oMwzDQNMNrJbfWiPFZTWk55WRnlNOZn4FWfnlZOZVkF1Qgc2uN/tax19bYauk2l7TrHMoKAR5BxDhF0a4bwjhvmFE+IUSExBJx+B2BHkHAKAbOoZhSEtIOC0JGuG27JqORVVQFIVqm539KUXsPVbI0cw
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"level_counts = dane['Level'].value_counts()\n",
"ax = level_counts.plot(kind = 'pie', autopct='%1.1f%%', startangle=90)\n",
"ax = plt.title('Risk distribiution')"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "b45bd771",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAj8AAAHJCAYAAABqj1iuAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAABZc0lEQVR4nO3dfVzN9/8/8Mc5tVTqKJTIdVa5zEWpjUQNm6uJbYZshOV6rnLN2lx/l1IjDUVzMY3MzC4xZiwNG0NyLUlJ6VoXTuf9+8Ov89lR6NS57Dzut1u3dV7vi/N8P5fTo/f7dd5HJAiCACIiIiIDIdZ2AURERESaxPBDREREBoXhh4iIiAwKww8REREZFIYfIiIiMigMP0RERGRQGH6IiIjIoDD8EBERkUFh+CEivaSv92fVZN362iMidWP4ITIw8+bNg5OTEzZv3qztUioVExODnj17olOnToiIiKh0nU2bNiEqKkr++IsvvoCTk5OmSqy2o0ePYv78+fLHCQkJcHJyQkJCgsqfa+/evVi7dq3K90tUGzD8EBmQgoIC/Prrr3B0dMQ333yjc2cGCgoKsGbNGnTo0AFRUVHw9fWtdL3169ejqKhIw9XV3Pbt25GWliZ/3L59e8TGxqJ9+/Yqf65NmzYhJydH5fslqg0YfogMyA8//ICysjIsWbIEKSkpOHnypLZLUpCbmwuZTIa+ffvCzc0NjRs31nZJamVhYYHOnTvDwsJC26UQGRSGHyIDEhcXB3d3d7i7u6NVq1bYs2dPhXWioqLg4+ODTp064f3338dvv/1W4dLMtWvXEBAQgK5du6Jr166YOnUqUlJSXvr8p06dwqhRo9CtWze4u7tjzpw58jMh+/fvh7e3NwBg0aJFz72MVT6+YcOGCuscP34cQ4YMQceOHdG/f38cOHBAYXlOTg6WLVuG119/HR07dsR7772H+Pj4F9a8f/9+ODk54cKFC/D19UWnTp0wePBg/Pjjjwrr3bt3D/PmzUPPnj3Rvn17vPbaa5g3bx6ys7MBAGPGjMFff/2Fv/76S97Pyi57vay35dvEx8fD398fLi4ueP3117F27VpIpVIAgLe3N1JTU/Htt9/CyckJ9+7dg0wmQ1hYGLy9vdGhQwd4e3sjJCQET548eeHxE9VGDD9EBuLmzZvyX+AAMGzYMBw7dgwPHjyQr7NhwwYEBwfjrbfeQkREBFxcXDBr1iyF/dy+fRvvv/8+srKysGbNGqxcuRIpKSkYOXIksrKynvv83333Hfz9/dGoUSOEhIRg4cKF+OeffzBixAhkZWWhd+/e2LBhAwBg8uTJiI2NrXQ/5ePvvPNOhXWWLVuGsWPHYtOmTbC1tcWCBQuQlJQEACgpKcGHH36Io0ePYtasWdiwYQPs7OwwYcKElwYgAAgICICPjw82bNiAVq1aYfbs2Th69CgAoKioCB988AFu3ryJTz75BFFRUfDz88OhQ4cQEhICAPjkk0/Qrl07tGvX7rmXupTp7dy5c9GtWzdERkZi8ODBiI6Oxr59+wA8/f9oY2MDLy8vxMbGwtbWFlu2bMGuXbswdepUREdHY+TIkdi6dSsiIyNfeuxEtY5ARAZhzZo1gqurq1BcXCwIgiA8ePBAaNu2rfDFF18IgiAIhYWFQqdOnYTly5crbLd06VLB0dFROH36tCAIgjB79mzhtddeE/Lz8+XrZGdnC926dRPWrFlT6XOXlZUJPXr0EMaOHaswnpycLLRv3174v//7P0EQBCElJUVwdHQU4uLiXngsjo6OQnh4uPxxeHi44OjoKPz+++/ysTt37giOjo5CTEyMIAiCEBsbKzg6Ogrnz5+XryOTyYTRo0cLw4YNe+5zxcXFCY6OjvI+lW/39ttvy7dLTEwURo4cKSQnJytsGxAQIPTr10/+2M/PT/Dz85M/Pn36tNK9Ld8mNDRU4bm8vb2FgIAA+eM+ffoI8+fPlz/29/ev0P8dO3YI33777XOPnai24pkfIgMglUpx8OBBvPHGGygpKUFeXh5MTU3h7u6OvXv3oqysDOfPn0dxcTHefPNNhW0HDRqk8Pj06dNwd3eHqakppFIppFIpLCws4Orqij///LPS5799+zYePnyIwYMHK4w3b94cXbp0Udm7nVxdXeXfN2vWDACQl5cHAIiPj4eNjQ3at28vr7usrAx9+vTBpUuXkJub+8J9v/322/LvRSIR+vbti8uXL6OoqAht27bF7t270bRpU6SkpOCPP/5AdHQ0bt26pdRlJWV626VLF4XHdnZ2ePz48XP37e7ujj///BOjRo3Ctm3bcPPmTfj5+WHo0KFVro+otjDWdgFEpH7Hjx9HZmYm9u/fj/3791dYfuzYMRQXFwMA6tevr7CsYcOGCo9zcnLw448/VpjzUtm2/92msn2VjyUmJlbpOF7G3Nxc/r1Y/PRvO+H/v6MtJycHDx8+fO47qx4+fIh69eo9d9+NGjVSeNygQQMIgoD8/HyYmZlh27Zt+PLLL5GdnY2GDRuiffv2MDMzQ35+fpXrV6a3pqamCo/FYvEL3703YcIE1K1bF3FxcVi7di3WrFkDR0dHLFq0CK+99lqVaySqDRh+iAzAvn37YG9vj9WrV1dYNmPGDOzZsweTJk0CADx69AitW7eWL3/06JHC+paWlnj99dcxbty4CvsyNq78JcXKygoAkJmZWWHZw4cPYW1tXeVjqS5LS0u0bNkSwcHBlS5v2rTpC7fPzs5WCECZmZkwMjKClZUVvv/+e6xZswZz5szBO++8Iw8qH3/8MS5evKhUjcr2tqrEYjFGjx6N0aNHIysrC7///jsiIyMxffp0/PnnnzAxManR/on0CS97EdVymZmZ+OOPPzBw4ED5O73++zVgwACcOnUKlpaWsLS0xK+//qqw/S+//KLwuHv37rhx4wbatm2Ljh07omPHjujQoQO2b9+Ow4cPV1pDq1atYGNjg++//15hPCUlBefPn0fXrl2VOqbyszrK6N69O9LS0tCgQQN53R07dkR8fDy2bt0KIyOjF27/22+/yb8XBAG//vorunXrBhMTE5w7dw6Wlpb46KOP5MGnsLAQ586dg0wmq3Ld1ent8zz7XO+//z5WrFgB4OlZq2HDhmH06NHIz89HQUGBUvsm0ncMP0S13LfffgupVIqBAwdWutzX1xcymQyHDh3ChAkTsHPnToSGhuLUqVMIDQ3F119/DeB/v0ynTJmCu3fvIiAgAEeOHMEff/yB6dOn44cffoCzs3OlzyEWizF79mz8+eefmDVrFn7//XccOHAA48aNQ7169So90/EiEokE//zzD86cOVPlGzUOGzYMTZo0wbhx4/Dtt9/i9OnTCAkJQWhoKGxtbfHKK6+8cPvPP/8cMTExOHHiBGbMmIGbN2/i448/BgB06tQJ+fn5WLNmDRISEvD9999j9OjRyMzMVLgZo0Qiwe3btxEfH1/pHKPq9PZ5JBIJEhMT8ddff6G4uBhubm74+uuvERkZiYSEBBw8eBDbtm1D9+7dn3u5kqi24mUvolru22+/xauvvvrcX56dOnVC69atERcXh+PHj0MmkyE2NhZRUVFwcXHB3LlzsXr1avl8GmdnZ+zatQuhoaGYN28eBEGAo6MjNm7cCB8fn+fWMWzYMNStWxdffvklpk6dCgsLC3h6emL27NmwsbFR6pgmTZqEiIgITJw4sdL5MZUxNzfHrl27sG7dOnz++efIz8+Hvb095syZA39//5duHxQUhC+//BIpKSlo164doqOj5ROsfX19ce/ePcTFxWH37t1o1KgRvLy8MGrUKCxduhQ3btxAmzZtMHr0aFy6dAkTJ07E6tWrYWtrq/Ac1e1tZfz9/bFq1SqMHz8e27Ztw8cffwwTExPExcVh48aNsLS0hLe3N+bMmaPUfolqA5FQ1T+biKhWk0qlOHToENzd3RXurLxr1y6sWLECCQkJkEgkWqxQO/bv34+FCxfi6NGjL50XRET6gWd+iAjA0wm1W7ZsQUxMDCZPngxra2skJSUhLCwMQ4cONcjgQ0S1E8MPEclFRkYiJCQEQUFByMvLQ5MmTTB27FgEBARouzQ
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"\n",
"#stworzenie 'binow' dla pokazania wieku pacjentów\n",
"bins = []\n",
"for i in range (0, 101, 10):\n",
" bins.append(i)\n",
"\n",
"plt.hist(dane['Age'], bins, histtype='bar', rwidth=0.8)\n",
"for i in range(len(bins) - 1):\n",
" count = ((dane['Age'] >= bins[i]) & (dane['Age'] < bins[i+1])).sum()\n",
" plt.text(bins[i] + 5, count, str(count), ha='center', va='bottom')\n",
"plt.xlabel('Age')\n",
"plt.ylabel('Number of patients')\n",
"plt.title('Age of the patients')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "78391055",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"37.174"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mean_age = dane['Age'].mean()\n",
"mean_age"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "966e57b9",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAioAAAHwCAYAAACIfURnAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAABKeklEQVR4nO3deVxUZf//8TcDIiAiSAJqKYgpmYqaKKaEabaIluZdWuLtrneS/MRSK/fKzDuScCsXzK3CUutusbrNNk1zwe0uNJfbqFQkFUGRRWbm94df5m6CShCZA7yej4cPmXOuOedz5sxh3lznOmecrFarVQAAAAZkcnQBAAAAf4SgAgAADIugAgAADIugAgAADIugAgAADIugAgAADIugAgAADIugAgAADIugApQDI9w30Qg1GElFvh689sD1Q1BBlTdo0CA1b97c9i8kJERt27bVgw8+qNWrV8tsNtu179atm5566qmrXv7mzZs1adKkv2z31FNPqVu3bmVezx8pKCjQ7Nmz9cEHH/zhuowgPj5eHTt2VJs2bfTee+9d13WlpKRo9OjRtse//PKLmjdvrg0bNpT7uq52/xfZsGGDmjdvrh07dpR7LeWpefPmmj9/vqPLAOTi6AKAitCiRQtNnz5dkmQ2m5WVlaWvvvpKL7zwglJSUpSQkCAnJydJ0oIFC+Tp6XnVy16xYsVVtRszZoz+/ve/l7r2v5KRkaEVK1Zo9uzZ131dZXX48GEtXbpUDz/8sB544AE1adLkuq7vnXfe0dGjR22P/fz8tHbtWjVq1Kjc13W1+79I0fus6H8Af46ggmrB09NTbdq0sZvWrVs3BQUFafbs2erWrZvuv/9+SVdCzfVwPT4kjbCuq3H+/HlJUlRUlNq3b1/h63d1dS22/x2lXr16kiR/f38HVwJUDpz6QbU2aNAg+fn5KTk52Tbt96dkNm7cqPvvv1+tW7dWeHi4nnzySWVkZNiev3PnTu3cudPWnb9jxw41b95cycnJuvPOO3X77bdr69atJZ6OuXz5sp5//nmFhYUpLCxMkyZN0rlz52zzS3rOb09j/PLLL+revbsk6emnn7a1/f3zzGaz3njjDfXu3VutW7dW165dFR8fr/z8fLt1DRkyROvXr9c999yjli1b6v7779dXX331l6/jxo0b9eCDD6pt27bq3Lmzpk2bpqysLEnS/PnzNWjQIEnS4MGD//CUVNHrtnXrVg0cOFCtW7dWjx49tGbNGrt2586d08yZM3XnnXeqZcuW6tChg2JiYvTLL7/YtuPdd9/ViRMn7F6n35/6OXnypMaPH68OHTooNDRUgwcPVmpqarHX+eOPP1ZsbKzatm2rsLAwTZ48WTk5OZJK3v+StHr1at17771q1aqVIiIiNGPGDF28eFGSdPPNN8vDw+NPw+SxY8c0cuRItWvXTrfffrsSEhL09NNP215HSbJYLFqyZIl69Oihli1b6p577tHq1avtljNo0CBNnjxZS5YsUdeuXdWqVSsNGDBA+/fvt2u3c+dO9e/fX6Ghobrnnnu0bdu2YjXl5+frn//8pyIjI9WyZUv17t1bGzdutGvTrVs3vfDCCxo8eLDatWunadOm/eE2AleLHhVUa87OzurUqZM2btyowsJCubjYHxIpKSl68sknNWbMGIWFhSk9PV0vvfSSnnjiCa1evVrTp0/XhAkTJEnTp09X06ZN9f3330uSEhISNHPmTOXn56tNmzb68MMPi63/448/VuvWrfXiiy/q3Llzio+PV1paml1w+jN+fn5asGCBHn/8cT322GO6++67S2w3bdo0vffeexoxYoQ6dOig1NRULVy4UAcPHtSyZctspyG+++47ZWRkKDY2Vp6enkpMTFRsbKy+/vpr1alTp8RlL1q0SImJiXr00UcVFxenn3/+WYmJidq3b5/efvttPfTQQ6pbt66effZZTZs2TW3btv3TbYqLi1OfPn30j3/8Q5s3b9Zzzz0nq9WqQYMGyWq1avTo0crKytITTzyhevXq6eDBg0pMTNS0adO0fPlyjRkzRufOnVNqaqoWLFigRo0a6dKlS3brOHfunAYMGCB3d3dNnTpV7u7uWrlypQYOHKh169YpODjY1nb69Onq16+fFi1apAMHDighIUF169bVE088UeL+/+ijjzRnzhxNmjRJzZs313//+1/NmTNHeXl5evHFF+Xv76+9e/f+4fafO3dO0dHR8vX11ezZs2U2m5WYmKiTJ0/a9QrNmDFDGzZs0OjRo9W2bVvt2rVLL7zwgrKzsxUTE2Nr9+mnnyo4OFhTpkyR1WrVnDlzFBsbq88//1zOzs76/vvvNWzYMHXs2NG2nvHjx9vVZLVaFRMToz179ig2NlbBwcHatGmT4uLiVFBQoD59+tjavvHGGxo4cKBGjRolNze3P93XwNUgqKDau+GGG3T58mWdP39eN9xwg928lJQU1axZUyNHjlTNmjUlSd7e3vrPf/4jq9Wqpk2b2saz/P7UwoABA3Tvvff+6bq9vLy0bNky2zJ8fHwUExOjrVu3qkuXLn9Zu6urq2655RZJV073lHTa6ujRo1q3bp3GjRunxx57TJLUuXNn+fn5aeLEifr6668VGRkpSbpw4YI2bNhg+2vfw8ND0dHR+vbbb3XPPfcUW3ZWVpZeffVVPfTQQ7YxQJLUrFkzDRw4UBs2bNCjjz6qpk2bSpKaNm36l6fW7rrrLk2ePFmSFBERoYyMDL366qsaOHCgfv31V7m7u2vSpEm2U0gdO3bUL7/8Ygt3jRo1Ut26de1O9/w+qKxcuVLnz5/XW2+9pYYNG0qS7rjjDvXs2VOJiYmaN2+erW1kZKRtsGynTp30zTff6Msvv9QTTzxR4v7fsWOHGjZsqIEDB8pkMqlDhw7y8PBQZmbmn253kdWrVysnJ0fvvfee7fRQUU9HkePHj+vtt9/W+PHjNWrUKElSly5d5OTkpMWLF+vRRx+Vj4+PJKmwsFBJSUm2OnNycjRp0iQdPHhQLVu21OLFi1W3bl29+uqrcnV1lXTlPR4XF2db37Zt27RlyxYlJCSoZ8+etn2Tm5ur+Ph49erVyxby/fz89NRTT8lkosMe5YN3EvB/ShrcGBYWpry8PPXu3VsJCQlKSUlRly5d9Pjjj//lYMjmzZv/5TojIyPtBu5269ZNNWrUKLHrvax27twpSerdu7fd9KioKDk7O9tdfVK3bl27UxIBAQGSpNzc3BKXvW/fPhUUFBRbdvv27dWwYcMyXdnywAMP2D2+++67dfbsWR0/flz+/v5atWqV2rdvr5MnT2r79u1as2aN9uzZo8uXL1/1OrZv365bbrlF/v7+KiwsVGFhoUwmk+64445ir/3vA2hAQECx4PNb4eHh+vHHH/Xggw9q0aJFSk1NVe/evTV48OCrqu3bb79V27Zt7cawNGzY0K4n6ttvv5XValW3bt1s9RcWFqpbt27Kz89XSkqKre1vw5T0v7ExRfs0JSVFERERtpAiXXnNnZ2dbY+3b98uJycnRUZGFlvfr7/+qiNHjtjaBgcHE1JQruhRQbV3+vRpubm5ydvbu9i8tm3basmSJVqxYoWSkpL02muvqV69eho5cuRffvD4+vr+5bp/34NjMpnk7e2t7OzsUm3DnykaK1I0iLOIi4uLfHx8dOHCBds0d3d3uzZFYcxisfzpsn+/HUXTfrvsq+Xn52f3uOh1LHpN3n//fc2dO1enTp2St7e3QkJCSn2K4fz580pLS9Ott95a4vzfBrPfvyYmk+lP75vSs2dPWSwWvfnmm1qwYIESExPVsGFDPfHEE4qKivrL2s6dO1diXfXq1dOvv/5qq1/SHy7v9OnTf1q/9L99mpWVpbp169q1KXpvFDl//rysVqvatWtX4voyMjJsPXslvReAa0FQQbVmNpu1c+dOtWvXzu4vyN+KiIiwdXN/++23WrVqlV544QW1adNGoaGh17T+3wcSs9mszMxM24ezk5NTsfu8/Nlf8yUpGlvy66+/6sYbb7RNv3z5sjIzM+0+kEqraNlnzpyxG9dRtL6bbrqp1Mss+hAucvbsWUlXAsvu3bs
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"gender_counts = dane['Gender'].value_counts()\n",
"ax = gender_counts.plot(kind='bar', color=['blue', 'pink'])\n",
"\n",
"# Dodawanie wartości do słupków\n",
"for i, value in enumerate(gender_counts):\n",
" ax.text(i, value + 0.1, str(value), ha='center', va='bottom')\n",
"\n",
"# Zmiana etykiet osi x\n",
"ax.set_xticks([0, 1])\n",
"ax.set_xticklabels(['Man', 'Woman'])\n",
"ax.set_title (\"Distribution of patients' gender\")\n",
"\n",
"# Dodanie legendy\n",
"plt.legend()\n",
"\n",
"# Wyświetlenie wykresu\n",
"plt.show()\n"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "98973a0f",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAj8AAAHJCAYAAABqj1iuAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAABfvUlEQVR4nO3deXxMV+PH8c9MdpIQse9KJdQuIUEagmptpbWVUEtR60Nb+74vtW+NfW1LqSqtPoraqmippbYqSmsJRYg1kZn5/eGXeTpNaESSScz3/Xp5yZx777nnTm4m35xz7r0Gi8ViQURERMRBGO3dABEREZG0pPAjIiIiDkXhR0RERByKwo+IiIg4FIUfERERcSgKPyIiIuJQFH5ERETEoSj8iIiIiENR+BFJ59LDfUjTQxtERFKKwo/IM2jdujV+fn7Wf/7+/pQvX5433niD5cuXYzKZbNYPCwujf//+Sa5/69at9OvX71/X69+/P2FhYcnez+PExsYybtw4NmzY8Nh9pQeTJk2icuXKlCtXjnXr1iVYvm/fPvz8/Ni3b1+at+3f3i97ti0ttG7dmtatW9u7GSI2nO3dAJGMrmTJkgwbNgwAk8nErVu32LFjB2PHjuXAgQNMnToVg8EAwKxZs/D09Exy3UuWLEnSel27dqVNmzZP3fZ/c/XqVZYsWcK4ceNSfV/JderUKebPn0+zZs14/fXXeeGFF+zdJBFJ5xR+RJ6Rp6cn5cqVsykLCwujSJEijBs3jrCwMBo2bAg8CkqpoWDBgqlSr733lRQ3b94EoF69egQEBNi3MSKSIWjYSySVtG7dmpw5c7Jy5Upr2T+HozZu3EjDhg0pU6YMQUFBfPDBB1y9etW6/Y8//siPP/5oHRaJHyJZuXIlNWrUoEqVKnz//feJDq08fPiQ0aNHExgYSGBgIP369ePGjRvW5Yltc+HCBfz8/Fi7di0XLlygZs2aAAwYMMC67j+3M5lMfPzxxzRo0IAyZcpQvXp1Jk2aRExMjM2+2rZty+eff06dOnUoVaoUDRs2ZMeOHf/6Pm7cuJE33niD8uXLU7VqVYYOHcqtW7cAmDlzpnVI5e23336q4bhTp07RuXNnKlSoQIUKFejWrRt//vknADExMQQEBDB27FibbcxmM9WqVWPEiBHWstWrV1OvXj1KlSpF9erVmTlzJnFxcUluR7zTp0/TsmVLSpcuTe3atVm+fLl1Wc+ePQkNDcVsNttsM3ToUGrWrPnYOVl37txh6NChBAcHU758eXr37s2SJUvw8/OzWW/Lli288cYblC5dmqpVqzJ69Gju3btnXT5z5kxq167N9u3badCgAaVKlaJOnTp88cUXNvVcunSJ7t27U7FiRapWrcrixYsTbde/vWf9+/fn7bffZtiwYQQEBNC4ceNkvacij6PwI5JKnJycCA4O5siRI4l+cB84cIAPPviAV155hfnz5zNgwAD27t3L+++/D8CwYcMoWbIkJUuWZNWqVbz00kvWbadOnUq/fv3o169fgl6neN988w1Hjx5l/Pjx9O3bl+3bt9O1a9cktz9nzpzMmjULgC5duli//qehQ4cyduxYwsLC+Oijj2jVqhUrVqyga9euNr+Ujx49ysKFC+nZsyezZ8/G2dmZnj17WoNMYubMmUPv3r0pW7YsM2bMoFu3bmzatInWrVvz4MEDmjZtytChQ63teFwb/+n333+nRYsWXL9+nfHjxzNmzBj+/PNP3nrrLa5fv46bmxt16tThm2++sQkc+/bt46+//uL1118HYO7cuQwZMoTg4GAiIiJo1aoV8+fPt7bpaYwbN46yZcsyZ84cQkJCGD16NJ999hkATZo0ITIy0mZeUGxsLN988w2NGze2Dqv+U7du3fjmm2/o0aMHU6dO5e7du0yePNlmnQ0bNtCtWzdeeOEFZs+eTffu3Vm/fn2C799ff/3FyJEjadOmDfPmzSN//vz079+fM2fOAHDv3j3Cw8M5efIkI0eOZOjQoaxevZqDBw/a7C+p79n+/fs5f/48M2fOpFu3bjg7a6BCUo7OJpFUlD17dh4+fMjNmzfJnj27zbIDBw7g5uZGx44dcXNzAyBr1qz88ssvWCwWihUrZp0f9M+A06JFC1599dUn7tvb25sFCxZY6/Dx8aFbt258//33VKtW7V/b7urqSokSJYBHQ12JDdmdPn2aNWvW0KtXL7p06QJA1apVyZkzJ3379mXnzp2EhoYCcPv2bdauXWsdNsuUKRPh4eHs3buXOnXqJKj71q1bfPTRRzRt2tQ6pwqgePHitGrVirVr19KyZUuKFSsGQLFixZI8rDhr1izc3d1ZsmSJ9f0JDg6mVq1aLFiwgH79+vH666+zZs0a9u/fT6VKlYBHQaFQoUKUK1eO27dv89FHH9G8eXMGDx4MQLVq1ciaNSuDBw+mXbt2vPjii0lqD8Abb7xhndweEhLClStXmD17Nk2aNKFatWrkzp2bdevWERwcDDzqrbl9+zaNGzdOtL49e/awd+9eZs6cySuvvALAyy+/TIMGDTh9+jTw6Cq+SZMmERISwqRJk6zbFi5cmLZt27Jjxw6qV68OwP379xkzZox1/4ULF6ZGjRrs2LGDokWL8sUXX3Dp0iW+/PJLa89SmTJlqF27trXep3nP4uLiGDFiBIUKFUryeyiSVOr5EUkDif1lHhgYyIMHD2jQoAFTp07lwIEDVKtWje7duz/2L/l4/xy2SExoaKjN5OqwsDBcXFz44Ycfnv4AHuPHH38EoEGDBjbl9erVw8nJyaanIlu2bDbzhXLnzg08+qWamEOHDhEbG5ug7oCAAPLly/dMV0ft3buXypUr4+7uTlxcHHFxcXh6ehIQEGB9fwIDA8mXLx9ff/018KinZfPmzdb5WwcPHuT+/fuEhYVZ64iLi7MOve3evfup2lS3bl2b17Vr1yYyMpKzZ89iNBpp3Lgx3377rfX9+uKLL6hcuTL58uV77DG6uLhQq1Yta5nRaOS1116zvj579iyRkZEJjiEwMBBPT88Ex/D3EB7//YsfHtu/fz8FChSwOTfz5Mljs83TvGfu7u7pbn6ZPD/U8yOSiq5cuYK7uztZs2ZNsKx8+fLMmzePJUuWsHDhQiIiIsiRIwcdO3bk7bfffmK9vr6+/7rvf/Y0GY1GsmbNSnR09FMdw5PED1nlyJHDptzZ2RkfHx9u375tLfPw8LBZJz7g/XMeyz/r/udxxJf9ve6ndfPmTTZu3MjGjRsTLMuWLZu1fQ0aNOCzzz5jyJAh7Ny5k+joaOuQV/xE606dOiW6j/i5W0n1z/cw/nsc/z68+eabRERE8O2331KlShV2795tcxXeP0VFRZE1a1aMRtu/cf/+fsYfw4gRI2zmMT3uGP7+PYyvN35o7NatW9b37p/Hde3aNZv9JeU98/X1/dc/AkSSS+FHJJWYTCZ+/PFHKlSogJOTU6LrhISEEBISwv3799m7dy/Lli1j7NixlCtXjrJlyz7T/v8ZckwmE1FRUdZfqgaDIcF9iP4+yTUpsmTJAjyaD5I/f35r+cOHD4mKisLHxyc5Tbep+9q1axQtWtRm2V9//UWBAgWSXbeXlxdVqlShXbt2CZb9fW7J66+/TkREBPv27eOrr76iQoUK1v16e3sDj+4xVLhw4QT1JBbanuSfc5/iA0P896tAgQJUqlSJb775htu3b+Ph4WEdzkpMrly5iIqKwmw22wSg69evW7+OP4a+fftah/b+Lv57kBQ+Pj6cP38+QXl84Pn7/lLqPRNJLg17iaSSlStXcvXqVd56661El0+YMIEmTZpgsVjw8PCgRo0a1jkfly9fBkjwV/vT+OGHH2wmWm/atIm4uDgqV64MQObMmYmKirK5Kuvnn3+2qeNxoS3e3+fC/N3XX3+NyWSiYsWKyW5/2bJlcXV1TVD3/v37uXTpEhUqVEh23ZUqVeL06dOUKFGC0qVLU7p0aUqVKsWSJUvYvHmzdb0XXniB0qVL8/XXX7N9+3Zrr098+1xcXLhy5Yq1jtKlS+Pi4sLkyZO5cOHCU7Vp165dNq+//vpr8uTJYzPnpUmTJvzwww+
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Grupowanie danych\n",
"grouped_data = dane.groupby(['Gender', 'Level']).size().unstack()\n",
"\n",
"# Ustawienia kategorii i szerokości słupków\n",
"categories = grouped_data.columns\n",
"bar_width = 0.35\n",
"bar_positions_man = np.arange(len(categories))\n",
"bar_positions_woman = [pos + bar_width for pos in bar_positions_man]\n",
"\n",
"# Wygenerowanie wykresu słupkowego\n",
"fig, ax = plt.subplots()\n",
"\n",
"ax.bar(bar_positions_man, grouped_data.loc[1], width=bar_width, label='Man')\n",
"ax.bar(bar_positions_woman, grouped_data.loc[2], width=bar_width, label='Woman')\n",
"\n",
"# Dodanie wartości procentowych do słupków\n",
"for i, column in enumerate(categories):\n",
" for j, value in enumerate(grouped_data.index):\n",
" total = grouped_data[column].sum()\n",
" percent = grouped_data.loc[value, column] / total\n",
" height = grouped_data.loc[value, column]\n",
" ax.text(i + j * bar_width, height + 0.2, f'{percent:.0%}', ha='center', va='bottom') \n",
"\n",
"# Ustawienia etykiet i tytułów\n",
"plt.xlabel('Gender')\n",
"plt.ylabel('Count')\n",
"plt.title('Distribution of level by gender')\n",
"\n",
"# Dodanie legendy\n",
"plt.legend(title='Level')\n",
"\n",
"# Zmiana etykiet osi x\n",
"ax.set_xticks([pos + bar_width / 2 for pos in bar_positions_man])\n",
"ax.set_xticklabels(categories)\n",
"\n",
"# Wyświetlenie wykresu\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "8d81604c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[Smoking\n",
" 1 181\n",
" 2 222\n",
" 3 172\n",
" 4 59\n",
" 5 10\n",
" 6 60\n",
" 7 207\n",
" 8 89\n",
" dtype: int64]"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dane3 = [dane.groupby('Smoking').size()]\n",
"dane3 "
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "d85261ce",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"No artists with labels found to put in legend. Note that artists whose label start with an underscore are ignored when legend() is called with no argument.\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABMoAAAImCAYAAACipWb6AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3yN5//H8Ve2IFPtEatmEjOIUcSoWFWkFYoSe9WeRfVr16w9QmpUKMq3NdqKr6Ko2qtFrRapUYnYSc45vz/yy6nTBAkhDu/n45FHk+u+7uv+3Hc+oj65ruu2MZlMJkRERERERERERF5ztukdgIiIiIiIiIiIyMtAhTIRERERERERERFUKBMREREREREREQFUKBMREREREREREQFUKBMREREREREREQFUKBMREREREREREQFUKBMREREREREREQFUKBMREREREREREQFUKBMREZGXlMlkSu8QXooYREREROTFUaFMREREUq1169YULVrU/FGsWDHKlClD06ZNWbp0KQaDwaJ/QEAAgwcPTvH4ERERDBo06In9Bg8eTEBAwFNf51FiY2MZN24c33zzzSOv9TKYNGkSFStWpHTp0qxbty69w0midevWtG7d+pHH165dS9GiRbl48eJzj2XGjBkULVr0uV8nvRQtWpQZM2Y812scOXKEt99+m9jYWAD2799PgwYNKF++PAMHDuTu3bsW/ZcsWUL79u2TjPP7778TEBBATEzMc41XRETkaahQJiIiIk+lRIkSrFy5kpUrV7J8+XImT56Mj48PY8eOpV+/fhazsWbOnEm3bt1SPHZYWBiRkZFP7NetWzdmzpz5VPE/ztWrVwkLCyM+Pv65X+tpnTp1igULFlC3bl0WLlzIW2+9ld4hpVqNGjVYuXIl2bJlS+9Q5AkePHjAoEGD6NevH46OjsTGxtKnTx/KlCnD5MmTOXz4MHPmzDH3v337NnPnzqVv375JxipcuDABAQGMGTPmRd6CiIhIitindwAiIiJinTJnzkzp0qUt2gICAihQoADjxo0jICCAxo0bAwlFtechX758z2Xc9L5WSkRHRwOYZ/RYI09PTzw9PdM7DEmBL7/8EhsbG+rWrQvA2bNnuXLlCv369cPDw4Nz587xzTff0K9fPwAWLFhAxYoV8fb2Tna8Tp06UaNGDdq0aUPJkiVf2H2IiIg8iWaUiYiISJpq3bo12bJlIzw83Nz27yWRGzdupHHjxvj6+lKpUiX69+/P1atXzefv3buXvXv3UrRoUX7++Wd+/vlnihYtSnh4ODVr1qRy5crs3Lkz2eWQcXFxjB49Gj8/P/z8/Bg0aBA3btwwH0/unIsXL1K0aFHWrl3LxYsXqVWrFgBDhgwx9/33eQaDgeXLl9OoUSN8fX2pUaMGkyZN4sGDBxbX+vDDD1mzZg1vv/023t7eNG7cmB9//PGJz3Hjxo00bdqUMmXKUKVKFUaMGMHNmzeBhGWEiUsa27Zt+9gloUuXLqVevXr4+PhQrVo1PvnkE27fvm0+XrRoUVasWMHgwYMpV64cFSpUYPTo0dy/f58JEyZQqVIlKlasyLBhwyzu7cGDB8yaNcs8dt26dZk/fz5Go/GRsezYsQNvb2+GDBmCyWRKsvQypc/r4MGDtGrVitKlS1OjRg2++OILPvzwwxQtu92yZQtvv/02Pj4+BAUFsXv3bgDi4+OpWrWqudDzsMDAQIYMGfLIMc+cOUPHjh0pW7YslStXZurUqQwZMsRi2anRaGT+/PnUqVMHb29v3n77bZYuXWoxTuvWrRk2bBjz58+nRo0a+Pj40KJFCw4fPmzRb+/evbz//vuUKlWKt99+m127diWJ6cGDB0ycOJHq1avj7e1No0aN2Lhxo0WfgIAAxo4dS9u2bSlbtiwjRoxI9v5iY2NZvHgxjRo1SnIsQ4YMADg4OJi/91euXGH58uX07t37kc8sW7ZsVKpUifnz5z+yj4iISHpQoUxERETSlJ2dHf7+/hw5csRi6WKi/fv3079/f+rWrcuCBQsYMmQIe/bsMRcoRo4cSYkSJcxLOx+ebTJ16lQGDRrEoEGDksxmS7Rp0yaOHTvG+PHjGThwINu2bUvVss9s2bKZl1h27dr1kcstR4wYwdixYwkICGDOnDm0atWKZcuW0a1bN4tlp8eOHSM0NJRevXoxa9Ys7O3t6dWrl7nolZzZs2fTp08fSpUqxeeff0737t357rvvaN26Nffv3ycoKMhc1BgxYsQjY9ywYQMTJkygVatWhIaG0r17d9avX8/o0aMt+k2aNAlHR0dmzpzJO++8w9KlS2nSpAmRkZF89tlntGjRgtWrV5sLOyaTiS5durBw4UKaN2/O3LlzqVevHtOmTWPkyJHJxvLLL7/Qo0cPGjRowJgxY7CxsUm235Oe15kzZ/jwww8BmDJlCj179mT+/Pns37//kc/zYUOHDqVNmzbMmDGDTJky0bFjR37//Xfs7e1p0qQJW7ZssSgkHj58mLNnz9K0adNkx7tx4wYffPABkZGRjBs3jo8//pjNmzfz7bffWvT75JNP+Pzzz2ncuLH5eY0dO5ZZs2ZZ9Pvuu++IiIjg448/ZsqUKVy/fp1evXqZ9/07fvw47du3J3PmzEyfPp22bdsmWd5oMpno3r074eHhtGvXjjlz5lCmTBn69OmTZC+75cuXm/c3e+edd5K9x59//pkrV65Qr149c1v+/Pnx8PBgzZo13Lhxg82bN1OuXDkgoZDbqFEjvLy8HvOdSChARkREcOfOncf2ExEReZG09FJERETS3BtvvEFcXBzR0dG88cYbFsf279+Pk5MTHTt2xMnJCQB3d3eOHj2KyWSicOHCZM6cGSBJMaxFixYW/1hPjqurKwsXLjSP4eHhQffu3dm5cydVq1Z9YuyOjo4UL14cSFhumdyy0d9//53Vq1fTu3dvunbtCkCVKlXIli0bAwcOZPv27VSvXh2AW7dusXbtWvPSzYwZM/LBBx+wZ88e3n777SRj37x5kzlz5hAUFGRRdCpSpAitWrVi7dq1tGzZksKFCwMJ+z09amnrzz//TO7cuWnVqhW2trZUqFCBjBkzEhUVZdGvUKFCfPrppwD4+fmxevVq4uLimDRpEvb29lSrVo2tW7dy4MABALZv386uXbv47LPPzMtrq1SpQoYMGczFm8T4IGET+M6dO1O3bl3GjRuHre2jf1f7pOc1b948MmfOzMKFC3F2dgagYMGCtGjR4pFjPmzkyJE0aNAAAH9/f2rVqsWcOXOYPHkyzZo1Y8GCBXz33Xc0a9YMgK+//pp8+fI9cnnr0qVLuXPnDuvWrSN79uwA5pleic6dO8eqVavo27cvnTp1AqBq1arY2Ngwb948WrZsiYeHB5Awsy00NNScv3fu3GHQoEH8+uuveHt7M2/ePDw9PZkzZw6Ojo5Awp+fPn36mK+3a9cuduzYwdSpU6lfvz4A1apV4969e0yaNImGDRtib5/wz4Bs2bIxePDgx35P9uzZg6urKwUKFDC3ZciQgfHjxzNkyBBGjx6Nv78/PXr04Pfff2fz5s1s2rSJbdu28fnnn2NnZ0efPn2oXLmyxbg+Pj7ExcWxb98+858XERGR9KYZZSIiIvLcJDdryM/Pj/v379OoUSOmTp3K/v37qVq1Kj169HjkLKNEKXlrYfXq1c1FBkhYXubg4JDs8rSntXfvXoAkS9EaNGiAnZ0dP//8s7nN09PTYn+zHDlyAHDv3r1kxz506BCxsbFJxi5fvjy5c+e2GPtJKlWqxPnz52natCmzZ8/mxIkTNGrUiLZt21r0K1OmjPlze3t7PDw88Pb2NhdTIKEYc+vWLfP929nZmYswiRKLZg/HePnyZTp27IjJZGLkyJGPLcjAk5/Xnj17qF69urlIlhh/7ty5n/g87OzszHtsATg5OfHWW2+Zc6NAgQKUK1eO9evXAwlLDjdu3EiTJk0emZt79uyhTJky5iIZQO7cuS2e6Z49ezCZTAQEBBAfH2/+CAgI4MGDBxaz4R4uFAPmcRPvf//+/VSrVs1cJAOoW7cudnZ25q93796NjY0N1atXT3K9a9e
"text/plain": [
"<Figure size 1400x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"dane['Gender'] = dane['Gender'].replace({1: 'Man', 2: 'Woman'})\n",
"smoking_counts = dane.groupby(['Smoking', 'Gender']).size()\n",
"\n",
"# Zamiana liczby na procent\n",
"smoking_percentages = smoking_counts / smoking_counts.groupby('Gender').sum() * 100\n",
"\n",
"# Sortowanie danych według stopnia 'Smoking'\n",
"smoking_percentages_sorted = smoking_percentages.sort_index(level='Smoking', sort_remaining=False)\n",
"\n",
"plt.figure(figsize=(14, 6))\n",
"\n",
"# Tworzenie wykresu słupkowego poziomego\n",
"ax = smoking_percentages_sorted.plot(kind='barh')\n",
"\n",
"# Dodawanie wartości procentowych do słupków\n",
"for i, value in enumerate(smoking_percentages_sorted):\n",
" ax.text(value + 0.1, i, f'{value:.2f}%', ha='left', va='center')\n",
"\n",
"# Dodanie legendy\n",
"plt.legend()\n",
"\n",
"# Ustawienia etykiet i tytułów\n",
"plt.xlabel('Percentage')\n",
"plt.ylabel('Smoking, Gender')\n",
"plt.title('Distribution of smoking by gender (%)')\n",
"\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "f5daf17c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[Passive Smoker\n",
" 1 60\n",
" 2 284\n",
" 3 140\n",
" 4 161\n",
" 5 30\n",
" 6 30\n",
" 7 187\n",
" 8 108\n",
" dtype: int64]"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dane3 = [dane.groupby('Passive Smoker').size()]\n",
"dane3 "
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "86122d04",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"No artists with labels found to put in legend. Note that artists whose label start with an underscore are ignored when legend() is called with no argument.\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABRQAAAImCAYAAAAi8wbAAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3yN5//H8Vd2jEwl9q6ZxIwIUSRoKaVGK0ZVUbu1R9Vq7RIrYqZSo2aVquhAW5S2NqFGg7Y2lWUmOef8/sgv5+s0QRJRxPv5eHg057qv+7qv+z6fo/E517AymUwmRERERERERERERNLB+kl3QERERERERERERJ4dSiiKiIiIiIiIiIhIuimhKCIiIiIiIiIiIummhKKIiIiIiIiIiIikmxKKIiIiIiIiIiIikm5KKIqIiIiIiIiIiEi6KaEoIiIiIiIiIiIi6aaEooiIiIiIiIiIiKSbEooiIiLyzDGZTE+6C09FH0REREREngQlFEVERCRLdezYkbJly5r/lCtXjipVqtCyZUuWLl2KwWCwqB8QEMCwYcPS3f7WrVsZOnToQ+sNGzaMgICATF/nfhISEpg4cSIbN26877WeBlOnTsXX15fKlSuzfv36J92dB/r1118pW7Ysv/7665PuymPVsWNHOnbs+KS7YfY0xm1WOXfuHGXLlmXdunWP9TrffvstQUFB5tfff/89gYGB+Pr6MmHChFR/302cOJGRI0emaufnn3+mRYsWJCYmPtb+ioiIZBXbJ90BERERyX4qVKjA6NGjATAYDMTGxvLTTz8xYcIE9u3bx/Tp07GysgIgJCSE3Llzp7vt8PDwdNXr1asXb731Vob7/jBXrlwhPDyciRMnPvZrZdbJkydZuHAhb7zxBs2bN6dkyZJPuksPVLFiRVatWkXp0qWfdFdE0u369euMHTuWBQsWmF8PHjyYTp064eXlxciRIylZsiRt27YFkpOc69at4+uvv07VVu3atVm6dClz587lvffe+0/vQ0REJDOUUBQREZEslzt3bipXrmxRFhAQQIkSJZg4cSIBAQG89tprQHLy8XEoWrToY2n3SV8rPWJiYgB49dVXqV69+pPtTDqkFS8iT7vQ0FAqVqyIp6cnAPv378fGxoZ+/fphZWXFL7/8wq5du8wJxenTp/Pmm2/i4eGRZnu9evWiffv2tG3blnz58v1n9yEiIpIZmvIsIiIi/5mOHTuSL18+Vq5caS7791TkiIgIXnvtNby9valZsyaDBg3iypUr5vN/++03fvvtN/MU2ZTpsitXrqR+/frUqlWLnTt3pjmdMzExkXHjxuHj44OPjw9Dhw7l+vXr5uNpnXPv1Mlz584RGBgIwPDhw811/32ewWBg+fLlNGvWDG9vb+rVq8fUqVO5e/euxbXefvttvvjiC15++WU8PT157bXX+Omnnx76HCMiImjZsiVVqlShdu3ajBo1itjYWABmz55tnlbbqVOn+05pTXluO3fupH379nh7e9OwYUOWLVtmUS9lFFb9+vXx9PSkRo0a9O7dm3Pnzpnr/P333/Ts2RNfX18qVarEm2++aXEfd+/eZezYsbz00kt4enryyiuv8Omnn6bqy6+//sr+/fspW7YsW7ZssehHVFQUZcuWZfPmzeY2p0yZQt26dfH09KRZs2ZEREQ89NktXbqUV155BS8vL+rUqcOYMWO4ceOG+XjZsmVZsWIFw4YNo1q1atSoUYNx48Zx584dJk+eTM2aNfH19WXEiBEW7+fdu3eZM2eOue1GjRqxYMECjEbjffuyY8cOPD09GT58uHlNzgsXLjBgwABq1KhBpUqV6NSpE8eOHTOfkxKPixcvpnHjxtSoUYN169Y99Bk/yKpVq6hXrx7e3t4W14uJicHLy4vg4GCL+nfv3sXHx4eQkJD7tnngwAHat29P5cqVqVevHp999hlvv/22xWc9Pe9hQEAAs2bNYvLkydSqVQtvb2+6dOnCmTNnLOp999135r83Xn/9dY4fP56qTzExMYwaNYpatWrh5eXFG2+8we7duy3qlC1blpCQEFq1akW1atUIDQ1N8/6uX7/O2rVradasmbnMysoKe3t78+hrOzs78/t/7Ngxdu7cybvvvnvfZ+bt7U3BggXTPQpbRETkSVJCUURERP4zNjY2+Pn5cfjwYZKSklId37dvH4MGDaJRo0YsXLiQ4cOH88svvzBw4EAARo8eTYUKFahQoQKrVq2iYsWK5nOnT5/O0KFDGTp06H1Hu23evJnIyEgmTZrEkCFD+PHHH+nVq1e6+58vXz5zEqVnz573TaiMGjWKCRMmEBAQwNy5c2nfvj3Lli2jV69eFpu5REZGEhYWxnvvvcecOXOwtbXlvffeMycH0xIaGkr//v2pVKkSs2bNonfv3nz77bd07NiRO3fu0KZNG0aNGmXux4OSPgD9+/enQoUKzJkzh9q1a/Pxxx+zdOlSIHnjme7du/Pzzz8zcOBAwsLC6NWrF7t27TJfw2g00r17d27dusWUKVMIDQ3F1dWVXr168eeffwIwfvx4fvrpJ4YOHUpYWBiBgYFMnjw5zfXtqlatSrFixVIlljZu3IiTkxMBAQGYTCZ69+7NypUr6dy5M3PnzqVKlSr079//getFbtq0icmTJ9O+fXvCwsLo3bs3GzZsYNy4cRb1pk6dir29PSEhITRv3pylS5fSokULLl68yCeffELbtm1Zu3atxXPq0aMHixYtonXr1sybN49XXnmFGTNmmKf+/9uePXvo06cPr776KuPHj8fKyorr16/Ttm1bjh49ysiRI5k2bRpGo5H27dsTFRVlcf706dPp0qUL48aNo2bNmhl6xve6dOkSs2fPpl+/fgQHBxMbG8tbb73F9evXcXV1pUGDBmzcuNEibrdu3Up8fDwtWrRIs82oqCjefvttAIKDg+nbty8LFixg37595joZeQ+XLFnC6dOnmThxIuPGjSMyMtIiMblt2zbee+89XnzxRUJCQmjcuDGDBw+2aOPu3bt06tSJrVu30r9/f0JCQsifPz9du3ZNlVScO3cuL7/8MsHBweYvEP7tu+++IykpyeK4p6cn8fHxbNmyhcuXL/Pjjz9SrVo1AD755BO6deuGs7Nz2m/E/3vllVf46quvHlhHRETkaaApzyIiIvKfeuGFF0hMTCQmJoYXXnjB4ti+fftwcHCgW7duODg4AODq6sqRI0cwmUyULl3avN7iv5OGbdu25ZVXXnngtZ2dnVm0aJG5DTc3N3r37s3OnTvx9/d/aN/t7e0pX748kDzNOa3p2n/88Qdr166lX79+9OzZE0heHy1fvnwMGTKE7du3U7duXQDi4+NZt26decp0zpw56dChA7/88gsvv/xyqrZjY2OZO3cubdq0sUhUlSlThvbt27Nu3TratWtnXouwdOnSD51S3qBBA0aMGAFAnTp1uHLlijkJevXqVXLkyMHQoUPNU6d9fX05d+6ceZTpP//8Q1RUFD169DDfl7e3NyEhIeYRfL/99hu1atXi1VdfNbeRM2dO3Nzc0uzTa6+9RlhYGLdv3yZHjhxAcjLwlVdewcHBgZ9//pkdO3Ywffp0mjRpYu777du3mTp1Kk2bNsXWNvWvub/++iuFChWiffv2WFtbU6NGDXLmzEl0dLRFvVKlSvHRRx8B4OPjw9q1a0lMTGTq1KnY2tpSp04dtm3bxv79+wHYvn07u3bt4pNPPjFP5a9duzaOjo7MnDmTTp06WawPefjwYbp3706jRo2YOHEi1tbJ3/F/9tlnxMTEsGLFCgoVKgTASy+9RJMmTZg5cyazZs0yt9GoUSNat25tfp3RZ5zCYDAQEhJi/jxVqlSJBg0aEB4ezoABA2jVqhURERH8+uuv1KxZE4Avv/wSX19fChcunGab8+fPJ3fu3CxatMj8/t27liDArl270v0eOjs7Exoaio2NDQB//fUXs2fPJjo6Gjc3N+bMmUPFihWZNm2a+ZkB5tcAGzZs4Pjx46xevZpKlSqZ63Xs2JGpU6fyxRdfmOt6e3s/cCQhwC+//EKpUqXIlSuXuczDw4PRo0czZMgQ7ty5w8svv0z
"text/plain": [
"<Figure size 1500x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"\n",
"dane['Gender'] = dane['Gender'].replace({1: 'Man', 2: 'Woman'})\n",
"smoking_counts = dane.groupby(['Passive Smoker', 'Gender']).size()\n",
"\n",
"# Zamiana liczby na procent\n",
"smoking_percentages = smoking_counts / smoking_counts.groupby('Gender').sum() * 100\n",
"\n",
"# Sortowanie danych według stopnia 'Passive smoker'\n",
"smoking_percentages_sorted = smoking_percentages.sort_index(level='Passive Smoker', sort_remaining=False)\n",
"\n",
"plt.figure(figsize=(15, 6))\n",
"\n",
"# Tworzenie wykresu słupkowego poziomego\n",
"ax = smoking_percentages_sorted.plot(kind='barh')\n",
"\n",
"# Dodawanie wartości procentowych do słupków\n",
"for i, value in enumerate(smoking_percentages_sorted):\n",
" ax.text(value + 0.1, i, f'{value:.2f}%', ha='left', va='center')\n",
"\n",
"# Dodanie legendy\n",
"plt.legend()\n",
"\n",
"# Ustawienia etykiet i tytułów\n",
"plt.xlabel('Percentage')\n",
"plt.ylabel('Passive Smoker, Gender')\n",
"plt.title('Distribution of passive smokers by gender (%)')\n",
"\n",
"\n",
"plt.show()\n"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "28c8acde",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"No artists with labels found to put in legend. Note that artists whose label start with an underscore are ignored when legend() is called with no argument.\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABRMAAAImCAYAAADALx25AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3zO1///8UcmEZlK7F0JTWJGxChCraK+VsX8WLVVVe1Rbc0SFBE7qKJUVc0WnxZt1R6hRmOrLRGJkeS6rt8f+eX69GqCLKI877dbbpVzzvu8z/udl6ReOcPKZDKZEBEREREREREREXkK66wegIiIiIiIiIiIiPw7KJkoIiIiIiIiIiIiqaJkooiIiIiIiIiIiKSKkokiIiIiIiIiIiKSKkomioiIiIiIiIiISKoomSgiIiIiIiIiIiKpomSiiIiIiIiIiIiIpIqSiSIiIiIiIiIiIpIqSiaKiIjIK89kMmX1EF6IMbys9G5FREREMo+SiSIiIvJC69ChA56enuYPLy8vypcvT/PmzVm2bBkGg8GifWBgIEOHDk11/9u3b2fIkCFPbTd06FACAwPTfZ/HiYuLY8KECXz//fePvdeLYMqUKfj7+1OuXDnWrVuX1cNJlef5bteuXYunpyeXL19+Ju3Te82/SWb9nXqSy5cvU6tWLe7cuQNAREQErVq1okKFCvTs2ZNbt25ZtN++fTsNGzZM9n3mzp071KxZk0uXLj3T8YqIiLyIlEwUERGRF16ZMmVYtWoVq1atYvny5UydOhUfHx/Gjx/Phx9+aDHzbNasWfTu3TvVfYeFhXH16tWntuvduzezZs1K1/if5MaNG4SFhZGQkPDM75Vep0+fZv78+dSrV48FCxbw5ptvZvWQUuV5vttatWqxatUq8uTJk+l9S+YwmUwMHz6cTp064e7uDsDgwYPJlSsXM2fOJDIykvHjx5vbGwwGgoODGThwIDY2NhZ9ubu785///Ifhw4dr5quIiLxybLN6ACIiIiJPkzNnTsqVK2dRFhgYSLFixZgwYQKBgYE0bdoUSEw8PguFCxd+Jv1m9b1SIyoqCoC3336bSpUqZe1gMuhZvVt3d3dzgkpeTD/++CMnT55k/vz5ANy7d4/w8HC++eYbvL29uX//PqNGjTK3/+abb8iZMydvvfVWiv21bduW0NBQtm3b9tg2IiIiLyPNTBQREZF/rQ4dOpAnTx5WrlxpLvvnUslNmzbRtGlTfH19qVKlCoMGDeLGjRvm6/fu3cvevXvx9PTk999/5/fff8fT05OVK1dSu3Ztqlatyu7du1NcHhsfH89nn32Gn58ffn5+DBkyxLx8ElJeUnv58mU8PT1Zu3Ytly9fpk6dOgAMGzbM3Paf1xkMBpYvX06TJk3w9fWlVq1aTJkyhUePHlnc6z//+Q/ffPMN9evXx9vbm6ZNm/Lzzz8/9T1u2rSJ5s2bU758eapVq8bo0aO5e/cuADNnzqRDhw4AdOrU6YlLhCMiIujevTsVKlSgatWqTJs2jWHDhpmvBzAajcybN4+33noLb29v6tevz7Jlyyz66dChAyNGjGDevHnUqlULHx8f2rRpw5EjRyzanT59mh49elChQgUqVKhAnz59zMtOU/tuTSYTy5cv5+2338bX15e33nqL+fPnP3G2maenJ7NmzaJFixZUrFiRkJCQZEuQ79y5w6BBg6hWrRo+Pj688847T1weHh0dzTvvvENgYOBTlzEfPHiQZs2a4ePjQ5MmTdi0aZO5rkWLFrRp0ybZNV27drX4OvzTjRs3+OCDD6hcuTJ+fn6MHj2aadOmJft6r169mrfffhtvb29q1arFzJkzLWZ+pjYOT548SefOnSlfvjy1a9dm/fr1ycaU2lgZNGgQ/fv3p0KFCrz33nuPfca5c+dSr149smXLBoCVlRUA2bNnB8DOzg6j0QjAgwcPmDlzJh9++OFj+8uWLRv16tVj7ty5j20jIiLyMlIyUURERP61bGxsCAgI4OjRoxYJjSQHDhxg0KBB1KtXj/nz5zNs2DD27NljThCMGTOGMmXKmJdRv/HGG+Zrp02bxpAhQxgyZEiyWZFJNm/eTHh4OBMnTmTw4MH89NNPaVpinSdPHvOS2169ej12+e3o0aMZP348gYGBzJkzh3bt2vHll1/Su3dvi6RXeHg4CxcupH///syePRtbW1v69+9vTgymJCQkhA8++ICyZcvyxRdf0KdPH7Zu3UqHDh14+PAhrVq1YvTo0eZxPG6Md+7coX379ly9epUJEyYwcuRItmzZwoYNGyzaffzxx3zxxRc0bdqU0NBQGjRowPjx45k9e7ZFu61bt7J9+3ZGjhxJcHAwt27don///ua9686dO0ebNm24ffs2EydOZNy4cVy6dImgoCBu376d6ncbHBzMuHHjqFmzJnPmzKFVq1ZMmzaNkJCQx74zgDlz5lC/fn2Cg4PNScu/++ijj/jzzz8ZO3Ys8+bNo0yZMgwZMoTff/89WdvY2Fi6d+9OdHQ0S5YsoWDBgk+896hRo2jQoAGzZ8+mZMmSfPDBB+zevRuAli1bcujQIS5cuGBuf/36dX777TdatGiRYn9xcXF06tSJgwcPMnz4cCZMmMDJkydZtGiRRbu5c+cyatQoAgICCA0NpV27dsyfP98cH0meFofXr1+nffv23L17l88//5z333+fKVOmcP36dYt+Uhsrmzdvxs7OjtmzZ9OxY8cUn/Hs2bOEh4fToEEDc1nOnDkpWbIka9euJTo6mvXr11OhQgUgcfuD0qVLU7ly5cd+HQAaNmzIsWPHOHfu3BPbiYiIvEy0zFlERET+1V577TXi4+OJioritddes6g7cOAA2bJlo3v37ubZSK6urhw7dgyTyUTJkiXJmTMnQLKEYZs2bSwSDylxdnZmwYIF5j7c3Nzo06cPu3fvpnr16k8du729PaVLlwYSl9+mtET7zz//ZM2aNQwYMIBevXoBUK1aNfLkycPgwYPZuXMnNWvWBBKXba5du9a8lDdHjhy0b9+ePXv2UL9+/WR9371715xAGzNmjLm8VKlStGvXjrVr19K2bVtKliwJQMmSJR+7jHzZsmXExsaybt06PDw8AChbtqzFfc+dO8fXX3/NwIEDzTPIqlevjpWVFXPnzqVt27a4ubkBkJCQwMKFC83vNjY2liFDhvDHH3/g7e3NrFmzyJ49O2FhYeY2AQEB1K1blwULFjBkyJCnvtvo6GgWL15Mhw4dGDx4sPnd3rlzhwMHDqT4nEl8fX0tZsEdP37con7v3r307t2bunXrAuDv74+rq2uyvfcePXpEr169uHbtGl9++SWFChV64n0B+vTpY773m2++yfnz55k1axbVq1encePGTJw4ke+++47+/fsDsH79erJnz069evVS7G/9+vWcPXvWvNwXoEqVKuaxQ2JszZkzh3fffZeRI0cCiV87V1dXRo4cSefOnXn99dfNbZ8Uh0n7WM6fP59cuXIBUKxYMVq3bm2+X1pixdramk8//ZQcOXI89p3t2bMHSPy6/d2ECRMYMGAACxcuxNvbmxkzZnDnzh0WLVrEsmXLOHr0KOPHj+fhw4d06dLFvJ1CEh8fHwB+++03ihUr9tj7i4iIvEw0M1FEREReCklLFv/Oz8+Phw8f0qRJE6ZNm8aBAweoXr06ffv2TbH933l6ej71njVr1jQnsiBxibWdnR2//vpr2h/gMfbu3QtAkyZNLMrffvttbGxsLGa6ubu7W+wJmDdvXiBxyWZKDh8+TFxcXLK+K1WqRIECBVKcRfc4e/bsoXz58uZEIkCBAgUoX768RRuTyURgYCAJCQnmj8DAQB49emSRwPt7ohcw95v0LHv27MHf35/s2bOb+8mZMyeVKlVK9fs/fPgw8fHxyfa7Gzp0aLJZef9UqlSpJ9b7+/szc+ZM3n//fdauXcudO3cYMmRIsj0nBw8ezO+//06/fv1SlUiExNlwf1e3bl0OHz5MbGwsTk5O1KtXz2LZ8Lp162jQoMFjk2179uyhUKFC5kQiJM7aq127tvnzQ4cO8eDBgxS/dgC//PKLue3T4vDAgQOUK1f
"text/plain": [
"<Figure size 1500x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"dane['Gender'] = dane['Gender'].replace({1: 'Man', 2: 'Woman'})\n",
"Genetic_risk_counts = dane.groupby(['Genetic Risk', 'Gender']).size()\n",
"Genetic_risk_percentages= Genetic_risk_counts / Genetic_risk_counts.groupby('Gender').sum() * 100\n",
"\n",
"Genetic_risk_percentages_sorted = Genetic_risk_percentages.sort_index(level='Genetic Risk', sort_remaining=False)\n",
"\n",
"plt.figure(figsize=(15, 6))\n",
"\n",
"# Tworzenie wykresu słupkowego poziomego\n",
"ax = Genetic_risk_percentages_sorted.plot(kind='barh')\n",
"\n",
"# Dodawanie wartości procentowych do słupków\n",
"for i, value in enumerate(Genetic_risk_percentages_sorted):\n",
" ax.text(value + 0.1, i, f'{value:.2f}%', ha='left', va='center')\n",
"\n",
"# Dodanie legendy\n",
"plt.legend()\n",
"\n",
"# Ustawienia etykiet i tytułów\n",
"plt.xlabel('Percentage')\n",
"plt.ylabel('Genetic risk, Gender')\n",
"plt.title('Distribution of genetic risk by gender (%)')\n",
"\n",
"# Wyświetlenie wykresu\n",
"plt.show()\n"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "818e4ee5",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA44AAAImCAYAAAAcz9SjAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAAC6GElEQVR4nOzdeVyUVf//8dewC4KCCy7gWmmlhAoqLimjVlia0qJkZi7dmktqt1ullGYuJSqKpimKlRVm5p1py6191VvcbjU10sxbyxTJVEABi2Vmfn/wc2oEERQciPfz8eARc65znetzMcfgM+dc5xgsFosFERERERERketwsHcAIiIiIiIiUrYpcRQREREREZFCKXEUERERERGRQilxFBERERERkUIpcRQREREREZFCKXEUERERERGRQilxFBERERERkUIpcRQREREREZFCKXEUEZHbxmKx2DuEMhGD/H2oP4lIRaHEUUREAOjfvz9NmjSxfjVt2pQWLVoQHh7Oe++9h8lksqlvNBqZNGlSkdvfsmULEydOvGG9SZMmYTQab/o615Odnc3MmTPZsGHDda9VFsyZM4c2bdoQGBjI+vXr8x3fs2cPTZo0Yc+ePbc9thv9vOwZmz18/PHHzJ492/p63bp1NGnShDNnzpTodc6cOUOTJk1Yt25dqdQXESkKJ3sHICIiZcc999zDq6++CoDJZOLSpUts27aNGTNmsH//fubNm4fBYAAgJiaGypUrF7ntuLi4ItUbPnw4zzzzTLFjv5HffvuNuLg4Zs6cWerXulk//vgjy5Yt48knn+TRRx+lUaNG9g5JCvH222/TunVr6+vOnTsTHx9PzZo1S/Q6NWvWJD4+nnr16pVouyIixaHEUURErCpXrkxgYKBNmdFopGHDhsycOROj0UjPnj2BvCSzNNzOP47L2h/iaWlpADz88MMEBQXZNxgpNh8fH3x8fEq8XRcXl3z/LkVEbjdNVRURkRvq378/NWvW5KOPPrKWXTuFdNOmTfTs2ZOAgADatm3LuHHj+O2336zn7927l71791qnMl6d1vjRRx8RGhpKu3bt2LFjR4HTIXNycpg+fTrBwcEEBwczceJEUlJSrMcLOuev0/XOnDlDly5dAHjppZesda89z2QysXr1anr06EFAQACdO3dmzpw5ZGVl2Vzr2Wef5ZNPPuHBBx+kWbNm9OzZk23btt3w57hp0ybCw8Np0aIF7du3JzIykkuXLgGwcOFC+vfvD8CAAQOKNYX2xx9/ZOjQobRs2ZKWLVsyYsQITp8+DUBWVhZBQUHMmDHD5hyz2UyHDh2YOnWqtezjjz/m4YcfplmzZnTu3JmFCxeSm5tb5Diu+t///sdTTz1F8+bN6datG++995712AsvvECnTp0wm80250RGRtKlS5frPjOYkZFBZGQkISEhtGjRgrFjxxIXF0eTJk1s6m3evJnw8HCaN29O+/btmT59OleuXLEeX7hwId26dWPr1q306NGDZs2a8eCDD/Lpp5/atJOWlkZkZCTt2rWjefPmPPnkk+zatct63Gg0kpSUxKeffmqdnlrQVNWEhAT69etHixYt6NChg817XpD+/fszbtw4XnjhBVq2bMk//vGPfFNPzWYz0dHRGI1GmjVrhtFoZO7cueTk5BTYpsViYdKkSTRv3pzt27df99oiIoVR4igiIjfk6OhISEgIhw8fLjCR2L9/P+PGjeOBBx5g2bJlvPTSS+zevZt//vOfALz66qvcc8893HPPPcTHx3Pvvfdaz503bx4TJ05k4sSJ1x1V+eKLL0hMTGTWrFlMmDCBrVu3Mnz48CLHX7NmTWJiYgB4/vnnrd9fKzIykhkzZmA0Gnn77bfp168f77//PsOHD7dJaBITE4mNjeWFF15g0aJFODk58cILLxSaECxevJixY8dy3333sWDBAkaMGMFXX31F//79+eOPP3jiiSeIjIy0xnG9GK/1008/0bdvXy5evMisWbN44403OH36NBEREVy8eBFXV1cefPBBvvjiC5tkbc+ePZw/f55HH30UgKVLlzJlyhRCQkJYsmQJ/fr1Y9myZdaYimPmzJncd999LF68mI4dOzJ9+nTWrFkDwOOPP86vv/5q8xxkdnY2X3zxBb1797ZOhb7WiBEj+OKLLxg1ahTz5s0jMzOTqKgomzobNmxgxIgRNGrUiEWLFjFy5Eg+++yzfO/f+fPnmTZtGs888wzvvPMOfn5+TJo0iRMnTgB5yfaAAQPYsmULY8eOJSYmhlq1ajFkyBBr8hgTE0ONGjXo1KnTdaenbtu2jSFDhlC1alXmzZvH+PHj+eabb3jhhRcK/fl98cUXODs7s2jRogKnUi9btozVq1czYsQIVqxYQUREBMuXL2fJkiUFtjd9+nQ+//xzYmJiuP/++wu9tojI9WiqqoiIFEn16tXJyckhLS2N6tWr2xzbv38/rq6uPPfcc7i6ugJQtWpVvvvuOywWC3fccYf1echrk8O+ffvy0EMPFXptLy8vli9fbm3D29ubESNGsGPHDjp06HDD2F1cXLj77ruBvOmpBU2z/d///sfatWsZM2YMzz//PADt27enZs2aTJgwge3bt9OpUycA0tPTWbdunXWqq7u7O08//TS7d+/mwQcfzNf2pUuXePvtt3niiSesz5AC3HXXXfTr149169bx1FNPcccddwBwxx13FHkqcExMDG5ubsTFxVl/PiEhIXTt2pXly5czceJEHn30UdauXcu+ffusz+Rt2LCB+vXrExgYSHp6Om+//TZ9+vRh8uTJAHTo0IGqVasyefJkBg4cyJ133lmkeADCw8OtCyF17NiRc+fOsWjRIh5//HE6dOhArVq1WL9+PSEhIUDeKGF6ejq9e/cusL1du3axe/duFi5cyAMPPADA/fffT48ePfjf//4H5I2qzZkzh44dOzJnzhzruQ0aNODZZ59l27ZtdO7cGYDff/+dN954w3r9Bg0aEBoayrZt22jcuDH/+te/+OGHH1izZg333Xef9Xr9+/dnzpw5fPLJJ9xzzz24uLjg4+Nz3Q88FixYQNOmTVm0aJG1zM3Njblz53Lu3Dl8fX0LPM/BwYHXX38dd3d3gHyL7ezdu5d7772Xxx57DIDWrVtTqVKlAp85joqKIj4+noULF1r7r4jIzdCIo4iIFEtBI0LBwcH88ccf9OjRg3nz5rF//346dOjAyJEjrzuCdNW1Uw0L0qlTJ5s/io1GI87OzuzcubP4N3Ade/fuBaBHjx425Q8//DCOjo42I2Q+Pj42z0fWqlULyEtICnLw4EGys7PztR0UFETdunVvaRXS3bt306ZNG9zc3MjNzSU3N5fKlSsTFBRk/fkEBwdTt25dNm7cCOSN8P373/+2Pq/67bff8vvvv2M0Gq1t5ObmWqfLJiQkFCum7t2727zu1q0bv/76KydPnsTBwYHevXvz9ddfW39en376KW3atKFu3brXvUdnZ2e6du1qLXNwcCAsLMz6+uTJk/z666/57iE4OJjKlSvnu4e/JntX37+rU1p37dpFjRo1uPfee63tmEwmQkNDSUxMLHRk+ao//viD77//3iZmgAcffJCvvvrqukkjgJ+fnzVpLEibNm3YuXMnTz31FCtXruTEiRM8/fTT9OrVy6be6tWreeedd+jevTuhoaE3jFlEpDAacRQRkSI5d+4cbm5uVK1aNd+xFi1a8M477xAXF0dsbCxLliyhRo0aPPfccwwYMKDQdqtVq3bDa187wung4EDVqlW5fPlyse6hMFeTgRo1atiUOzk54e3tTXp6urWsUqVKNnWuJsfXPrd3bdvX3sfVsr+2XVxpaWls2rSJTZs25Tt2daEWg8FAjx49WLNmDVOmTGH79u1cvnzZOk316qI8//jHPwq8xtVnVYvq2p/h1ff46s/hscceY8mSJXz99de0a9eOhIQEm9Vur5WamkrVqlVxcLD9vPuvP8+r9zB16lSb5zavdw9/fQ+vtnt1OmtaWhrnz5+3mVL9V+fPn6dKlSrXjRfy7tVisRSpf1+roH7yV0OGDMHDw4NPPvmE2bNnM2vWLO666y5efvl
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"\n",
"Genetic_risk_counts = dane.groupby(['Genetic Risk', 'Level']).size()\n",
"\n",
"# Sortowanie danych według ryzyka genetycznego i liczby w odwrotnej kolejności\n",
"Genetic_risk_counts_sorted = Genetic_risk_counts.sort_index(level=['Genetic Risk', 'Level'], key=lambda x: x.map({'High': 1, 'Medium': 2, 'Low': 3}))\n",
"\n",
"plt.figure(figsize=(10, 6))\n",
"\n",
"# Tworzenie wykresu słupkowego horyzontalnego\n",
"ax = Genetic_risk_counts_sorted.plot(kind='barh')\n",
"\n",
"# Dodawanie wartości do słupków\n",
"for i, value in enumerate(Genetic_risk_counts_sorted):\n",
" ax.text(value + 0.1, i, str(value), ha='left', va='center')\n",
"\n",
"# Ustawienia etykiet i tytułów\n",
"plt.xlabel('Count')\n",
"plt.ylabel('Genetic Risk, Level')\n",
"plt.title('Distribution of level by genetic risk')\n",
"\n",
"# Wyświetlenie wykresu\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "f10041ca",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Smoking</th>\n",
" <th>Alcohol use</th>\n",
" <th>Level</th>\n",
" </tr>\n",
" <tr>\n",
" <th>index</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>8</td>\n",
" <td>8</td>\n",
" <td>High</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>8</td>\n",
" <td>8</td>\n",
" <td>High</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>8</td>\n",
" <td>8</td>\n",
" <td>High</td>\n",
" </tr>\n",
" <tr>\n",
" <th>46</th>\n",
" <td>8</td>\n",
" <td>8</td>\n",
" <td>High</td>\n",
" </tr>\n",
" <tr>\n",
" <th>68</th>\n",
" <td>8</td>\n",
" <td>8</td>\n",
" <td>High</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>989</th>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>High</td>\n",
" </tr>\n",
" <tr>\n",
" <th>992</th>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>High</td>\n",
" </tr>\n",
" <tr>\n",
" <th>993</th>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>High</td>\n",
" </tr>\n",
" <tr>\n",
" <th>994</th>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>High</td>\n",
" </tr>\n",
" <tr>\n",
" <th>995</th>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>High</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>256 rows × 3 columns</p>\n",
"</div>"
],
"text/plain": [
" Smoking Alcohol use Level\n",
"index \n",
"4 8 8 High\n",
"20 8 8 High\n",
"22 8 8 High\n",
"46 8 8 High\n",
"68 8 8 High\n",
"... ... ... ...\n",
"989 7 7 High\n",
"992 7 7 High\n",
"993 7 7 High\n",
"994 7 7 High\n",
"995 7 7 High\n",
"\n",
"[256 rows x 3 columns]"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x = dane[(dane['Smoking'] >= 7) & (dane['Alcohol use'] >= 7)]\n",
"x_sorted = x[['Smoking', 'Alcohol use', 'Level']].sort_values(by=['Smoking', 'Alcohol use'], ascending=False)\n",
"x_sorted"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "15eebd5b",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Air Pollution\n",
"1 141\n",
"2 201\n",
"3 173\n",
"4 90\n",
"5 20\n",
"6 326\n",
"7 30\n",
"8 19\n",
"Name: count, dtype: int64"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dane7 = dane['Air Pollution'].value_counts()\n",
"dane7.sort_index()"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "9bd91fc9",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA44AAAImCAYAAAAcz9SjAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAADBm0lEQVR4nOzde1xU1fo/8M9wvyt4QQnES4p6cEQFcbykDFJiaUL1VULzgh4VMS+pWCmlqahHULwgKqOoWVKEnkwss46ZKJoYGGlqmHdE5TqgcpmZ3x/8nBoHcAZmYJTP+/XqpbP22ms/M/OcTs+stdcWKBQKBYiIiIiIiIhqYNTYARAREREREZFhY+FIREREREREtWLhSERERERERLVi4UhERERERES1YuFIREREREREtWLhSERERERERLVi4UhERERERES1YuFIREREREREtWLhSEREDUKhUDR2CAYRA9HTME+JyBCxcCQiIowbNw5ubm7Kf7p27YpevXohMDAQu3fvhkwmU+kvFouxcOFCjcf/4YcfEB4e/tR+CxcuhFgsrvN1alJeXo7IyEgcOHCgxmsZgjVr1sDb2xseHh7Yv3+/2vFTp07Bzc0Np06davDYnvZ5NWZsQFUOjxs3rlGuDai//w0bNsDNzU2rMYqLixEeHo4zZ84o2xr7fRERPWbS2AEQEZFh6N69Oz766CMAgEwmQ1FREX766SesWLEC6enpWLt2LQQCAQBg48aNsLGx0XjshIQEjfqFhobinXfe0Tr2p7l79y4SEhIQGRmp92vV1aVLl7Bt2zb83//9H15//XV07NixsUN6pjzO3WfZhQsXsH//fgQGBirbnof3RUTPBxaOREQEALCxsYGHh4dKm1gsRocOHRAZGQmxWIyRI0cCqCoy9aFdu3Z6Gbexr6WJwsJCAMCrr74KT0/Pxg3mGfTiiy82dgh68by+LyJ69nCpKhER1WrcuHFo3bo19u7dq2x7cglpSkoKRo4cCaFQiH79+mHevHm4e/eu8vzTp0/j9OnTyqV8j5f17d27Fz4+Pujfvz+OHz9e7XLIiooKLFu2DF5eXvDy8kJ4eDjy8/OVx6s75+bNm3Bzc0NycjJu3rwJX19fAMD777+v7PvkeTKZDHv27MGIESMgFAoxZMgQrFmzBmVlZSrXmjBhAr766iu88sorcHd3x8iRI/HTTz899XNMSUlBYGAgevXqhQEDBiAiIgJFRUUAqpY1Pl6OOH78eK2W0F66dAlTp05F79690bt3b8yYMQM3btwAAJSVlcHT0xMrVqxQOUcul2PgwIFYsmSJsu3LL7/Eq6++Cnd3dwwZMgQbNmxAZWWlxnE89ueff+Ltt99Gjx494Ofnh927dyuPvfvuuxg8eDDkcrnKOREREfD19a3x3r78/HwsWbIEPj4+cHd3R9++fTFjxgzcvHlT2efJJZ1ubm7YuHEj3njjDfTp0wexsbHVjj1u3DgsXLgQW7ZswYABA9C7d29Mnz5d+Rk+9ttvvyEkJATe3t7o3bs3pk2bhsuXL2v8uVS35PSfy1tPnTqlnAF/5513lH2fPK+srAybNm3CsGHD0KNHD7z88svYunWrymc6btw4fPjhh9i6dSuGDBmCHj16YMyYMcjMzNQ4XiKiJ7FwJCKiWhkbG0MkEuHcuXPVFhLp6emYN28eXn75ZWzbtg3vv/8+0tLS8N577wGoWmrXvXt3dO/eHYmJifjXv/6lPHft2rUIDw9HeHi42mznY4cOHUJWVhZWrlyJBQsW4OjRowgNDdU4/tatW2Pjxo0AgOnTpyv//qSIiAisWLECYrEYmzdvRnBwMD799FOEhoaqFDRZWVmQSCR49913sWnTJpiYmODdd99VFoHViY2NxZw5c9CzZ0+sX78eM2bMwHfffYdx48bh0aNHeOuttxAREaGMo6YYn/TXX39hzJgxyMvLw8qVK7F8+XLcuHEDQUFByMvLg7m5OV555RUcOnRIpbA4deoU7t27h9dffx0AsGXLFixevBgikQhxcXEIDg7Gtm3blDFpIzIyEj179kRsbCwGDRqEZcuW4YsvvgAAvPnmm7hz547KfZDl5eU4dOgQAgIClEuh/0mhUGDq1KlITU3Fe++9B4lEgtDQUJw4ceKp8W3evBmvvPIKoqOjlT8eVOeHH37AV199hQ8//BBLly7FH3/8gXfeeQcPHjwAAKSlpSEoKAhyuRzLly/HsmXLkJOTgzFjxiA7O1vrz6g6//rXv1RyoLolqgqFAtOmTUN8fDzefPNNxMXFYdiwYVi3bp1a/++++w4//PADFi1ahOjoaNy/fx/vvvuu2v3KRESa4lJVIiJ6qpYtW6KiogKFhYVo2bKlyrH09HSYm5tjypQpMDc3BwA0b94cv/32GxQKBV588UXl/ZBPFodjxozBsGHDar22nZ0d4uPjlWPY29tjxowZOH78OAYOHPjU2M3MzNCtWzcAVctTq1tm++effyIpKQmzZ8/G9OnTAQADBgxA69atsWDBAhw7dgyDBw8GAEilUiQnJyuXulpZWWHs2LFIS0vDK6+8ojZ2UVERNm/ejLfeekvlP+67dOmC4OBgJCcn4+2331YuSXzxxRc1Xgq8ceNGWFhYICEhQfn5iEQiDB06FPHx8QgPD8frr7+OpKQknDlzBn379gUAHDhwAK6urvDw8IBUKsXmzZsxevRoLFq0CAAwcOBANG/eHIsWLcLEiRPRuXNnjeIBgMDAQOVGSIMGDUJubi42bdqEN998EwMHDkSbNm2wf/9+iEQiAMCRI0cglUoREBBQ7Xh3796FpaUlwsPDlUt4vb29cfPmTZVZ8OoIhUL8+9//fmrMDx48wFdffaX8Tjt27IiAgADs27cPwcHBiIqKgouLC+Lj42FsbAyg6jPy8/PDhg0bsG7dOo0+m9rY2Nio5EB1S1SPHTuGEydO4D//+Y9y2fiAAQNgYWGBmJgYjB8/XnleZWUlJBKJMi9KS0sRHh6OCxcuwN3dvd7xElHTwxlHIiLSWHUzQl5eXnj06BFGjBiBtWvXIj09HQMHDkRYWFi1/f9Jk10nBw8erLIRj1gshqmpKU6cOKH9G6jB6dOnAQAjRoxQaX/11VdhbGysMkPm4OCgcn9kmzZtAAAPHz6sduyMjAyUl5erje3p6YkXXnihXruQpqWlwdvbGxYWFqisrERlZSVsbGzg6emp/Hy8vLzwwgsv4ODBgwCqZvi+//57ZeHx66+/4uHDhxCLxcoxKisrlctlU1NTtYpp+PDhKq/9/Pxw584dXLlyBUZGRggICMDhw4eVn9e+ffvg7e2NF154odrxHB0dsWvXLnh6euL27ds4efIkPv30U5w9exYVFRW1xtKlSxeNYu7Vq5fKd9q9e3e4uLjgzJkzePDgAX777TcMHz5cWTQCVT9o+Pj4NOgusqdPn4axsbHaZ/z4u/xnLP/8wQao+hyBmvOUiOhpOONIRERPlZubCwsLCzRv3lztWK9evbB161YkJCRAIpEgLi4OrVq1wpQpUzB+/Phax23RosVTr/3kDKeRkRGaN2+O4uJird5DbR4vM23VqpVKu4mJCezt7SGVSpVtlpaWKn0eF8dP3rf35NhPvo/Hbf8cW1uFhYVISUlBSkqK2jEHBwdlfCNGjMAXX3yBxYsX49ixYyguLlYuU328KU9NM3OP71XV1JOf4ePv+PHn8MYbbyAuLg6HDx9G//79kZqaqrLbbXW+/vprREdHIycnB82bN0fXrl1hYWHx1Fiq+8yr07p1a7W2Fi1aoLi4GFKpFAqFQi/fn7aKiopgb28PExPV/3x7/JnXlqdGRlVzBTXlKRHR07BwJCKiWslkMpw+fRq9e/dWmXH5p0GDBmHQoEF4+PAh0tLSsGvXLqxYsQIeHh7o2bNnva7/ZIEok8lQUFCgLEgEAoHafVuP703TVLNmzQAA9+7dg7Ozs7K9oqICBQUFsLe3r0voKmPfv38fnTp1Ujl27949uLi41HlsW1tb9O/fHxMnTlQ79s/i4vXXX0dcXBxOnTqFb775Br1791Ze187ODkDVMyTbt2+vNo6mxddjT97ref/+fQB
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"air_pollution = dane.groupby(['Air Pollution', 'Level']).size()\n",
"\n",
"# Sortowanie danych według ryzyka genetycznego i liczby w odwrotnej kolejności\n",
"air_pollution_sorted = air_pollution.sort_index(level=['Air Pollution', 'Level'], key=lambda x: x.map({'High': 1, 'Medium': 2, 'Low': 3}))\n",
"\n",
"plt.figure(figsize=(10, 6))\n",
"\n",
"# Tworzenie wykresu słupkowego horyzontalnego\n",
"ax = air_pollution_sorted.plot(kind='barh')\n",
"\n",
"# Dodawanie wartości do słupków\n",
"for i, value in enumerate(air_pollution_sorted):\n",
" ax.text(value + 0.1, i, str(value), ha='left', va='center')\n",
"\n",
"# Ustawienia etykiet i tytułów\n",
"plt.xlabel('Count')\n",
"plt.ylabel('Air Pollution, Level')\n",
"plt.title('Distribution of level by air pollution')\n",
"\n",
"# Wyświetlenie wykresu\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "402f9c5a",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"plotlyServerURL": "https://plot.ly"
},
"data": [
{
"coloraxis": "coloraxis",
"hovertemplate": "x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>",
"name": "0",
"type": "heatmap",
"x": [
"Air Pollution",
"Alcohol use",
"Dust Allergy",
"OccuPational Hazards",
"Genetic Risk",
"chronic Lung Disease",
"Balanced Diet",
"Obesity",
"Smoking",
"Passive Smoker",
"Chest Pain",
"Coughing of Blood",
"Fatigue",
"Weight Loss",
"Shortness of Breath",
"Wheezing",
"Swallowing Difficulty",
"Clubbing of Finger Nails",
"Frequent Cold",
"Dry Cough",
"Snoring",
"Level"
],
"xaxis": "x",
"y": [
"Air Pollution",
"Alcohol use",
"Dust Allergy",
"OccuPational Hazards",
"Genetic Risk",
"chronic Lung Disease",
"Balanced Diet",
"Obesity",
"Smoking",
"Passive Smoker",
"Chest Pain",
"Coughing of Blood",
"Fatigue",
"Weight Loss",
"Shortness of Breath",
"Wheezing",
"Swallowing Difficulty",
"Clubbing of Finger Nails",
"Frequent Cold",
"Dry Cough",
"Snoring",
"Level"
],
"yaxis": "y",
"z": [
[
1,
0.7472926058498683,
0.6375034996625167,
0.6089244579575642,
0.7052760643034229,
0.626700905110362,
0.5248729122576195,
0.6014675032618791,
0.48190160681156735,
0.6067637039382361,
0.5857335128463244,
0.6078285986246399,
0.21172390492820753,
0.25801612324197076,
0.26955773337958067,
0.05536764340501297,
-0.08091766685805872,
0.2410647796206403,
0.17453908675195515,
0.26148863680644063,
-0.02134255082489496,
0.6360384924565836
],
[
0.7472926058498683,
1,
0.8186435195099222,
0.8787859206801721,
0.8772098877053724,
0.7635757734743269,
0.6533518946824166,
0.669311562578026,
0.5470346258708494,
0.5925763846377347,
0.7172422755316215,
0.6676118365599926,
0.23724513214830584,
0.20785108373725228,
0.4357852814107128,
0.18081697750089573,
-0.11407317175635699,
0.4149921465527775,
0.18077782299931028,
0.2112771957700296,
0.12269396851923134,
0.7187103217647229
],
[
0.6375034996625167,
0.8186435195099222,
1,
0.8358597714567234,
0.7879038844221704,
0.6195559248460843,
0.6471968314468607,
0.7006758175431829,
0.35869057928060827,
0.5600024750904067,
0.639983122247139,
0.6252914694481858,
0.33247155600320866,
0.3217561852405069,
0.5186816794097431,
0.30485002589035987,
0.03114127077150482,
0.34571422756920306,
0.21938921110661536,
0.3001951038840922,
0.052844485803307716,
0.7138387882756947
],
[
0.6089244579575642,
0.8787859206801721,
0.8358597714567234,
1,
0.8930485226784659,
0.8582838533909203,
0.6915088208673867,
0.7221907449774281,
0.4976925766273257,
0.5553106663560373,
0.7756187292467873,
0.6459465026678011,
0.2678439918223778,
0.1762255794523663,
0.36648159899418925,
0.17892547215311277,
-0.002853115111954358,
0.3664467602819259,
0.07716600785633315,
0.1598870392835012,
0.022916085202845295,
0.6732548771044937
],
[
0.7052760643034229,
0.8772098877053724,
0.7879038844221704,
0.8930485226784659,
1,
0.8362308273811675,
0.6799048528409987,
0.7298260755637197,
0.5432592725592386,
0.6090712923109151,
0.8317508263736857,
0.6322364064117612,
0.23053043957872787,
0.27174267654094947,
0.4582004725527073,
0.20497278160205787,
-0.06294834758508021,
0.3578151378190881,
0.08709160622973089,
0.19439933154901312,
-0.056830680395746874,
0.7013027231492879
],
[
0.626700905110362,
0.7635757734743269,
0.6195559248460843,
0.8582838533909203,
0.8362308273811675,
1,
0.6226320567782637,
0.6017543934856161,
0.5785852906845784,
0.5726982314709294,
0.7826460785184837,
0.6029867166712027,
0.2476970600535975,
0.10407976201161599,
0.1824257534809888,
0.05721405487136304,
0.0072794129646244245,
0.298022572585234,
0.028758662363666858,
0.11416140647724242,
0.04337453500186096,
0.6099713329190288
],
[
0.5248729122576195,
0.6533518946824166,
0.6471968314468607,
0.6915088208673867,
0.6799048528409987,
0.6226320567782637,
1,
0.7069224405618073,
0.6453899024049164,
0.7251227607171947,
0.7982067791747338,
0.7450537631029008,
0.4006776600014423,
-0.006543913485000122,
0.3436234976276185,
0.06392999274534257,
0.046806749853070026,
0.04196701757892942,
0.26393107394675547,
0.33199459885294236,
0.15267679702142942,
0.7062730211355572
],
[
0.6014675032618791,
0.669311562578026,
0.7006758175431829,
0.7221907449774281,
0.7298260755637197,
0.6017543934856161,
0.7069224405618073,
1,
0.48679469989834867,
0.6818888545456794,
0.6731502170680845,
0.8148047650547877,
0.5527881669123098,
0.31349492910365195,
0.4062032308084264,
0.09428706364654374,
0.12721284364934773,
0.14909338063120228,
0.2883681319725162,
0.2006181947696451,
0.039422347197728726,
0.8274350995887054
],
[
0.48190160681156735,
0.5470346258708494,
0.35869057928060827,
0.4976925766273257,
0.5432592725592386,
0.5785852906845784,
0.6453899024049164,
0.48679469989834867,
1,
0.7616215035339874,
0.6479261237926883,
0.5552894547697397,
0.2000293248342993,
-0.2129370461901842,
-0.023258717036380692,
-0.04705954274741461,
0.23614119265970507,
-0.0411469008340726,
0.03958530619825651,
0.0101013531007671,
0.18905460318599,
0.5195301454571828
],
[
0.6067637039382361,
0.5925763846377347,
0.5600024750904067,
0.5553106663560373,
0.6090712923109151,
0.5726982314709294,
0.7251227607171947,
0.6818888545456794,
0.7616215035339874,
1,
0.6960772645208237,
0.6362228524218005,
0.3779186915149908,
0.058335614511866356,
0.06294847907066993,
0.20028318541455717,
0.3489223068751215,
-0.03553596174503037,
0.10455326476259358,
0.12076084601185079,
0.24794332390321414,
0.7035944161827955
],
[
0.5857335128463244,
0.7172422755316215,
0.639983122247139,
0.7756187292467873,
0.8317508263736857,
0.7826460785184837,
0.7982067791747338,
0.6731502170680845,
0.6479261237926883,
0.6960772645208237,
1,
0.7121584615356457,
0.2511348745483826,
-0.0010922427808254683,
0.23704455680976363,
0.10721056867397605,
0.07178429688840016,
0.08138637724156475,
0.0429369264584655,
0.1421801455656391,
0.14003564818677447,
0.6454611826047201
],
[
0.6078285986246399,
0.6676118365599926,
0.6252914694481858,
0.6459465026678011,
0.6322364064117612,
0.6029867166712027,
0.7450537631029008,
0.8148047650547877,
0.5552894547697397,
0.6362228524218005,
0.7121584615356457,
1,
0.48153993666948375,
0.10585740527808256,
0.31877673243043725,
-0.0856983734540463,
0.08628923431875687,
-0.06644287392119042,
0.24423503631093776,
0.14765857408811633,
0.08794361697500454,
0.7820916752649858
],
[
0.21172390492820753,
0.23724513214830584,
0.33247155600320866,
0.2678439918223778,
0.23053043957872787,
0.2476970600535975,
0.4006776600014423,
0.5527881669123098,
0.2000293248342993,
0.3779186915149908,
0.2511348745483826,
0.48153993666948375,
1,
0.4695167837943399,
0.3986249929146146,
0.17447749255763642,
0.14956151301239978,
0.040694008890101364,
0.40791505333033795,
0.27116683829736205,
0.2317483309186956,
0.6251136281446781
],
[
0.25801612324197076,
0.20785108373725228,
0.3217561852405069,
0.1762255794523663,
0.27174267654094947,
0.10407976201161599,
-0.006543913485000122,
0.31349492910365195,
-0.2129370461901842,
0.058335614511866356,
-0.0010922427808254683,
0.10585740527808256,
0.4695167837943399,
1,
0.5744965197986118,
0.3311789739051432,
0.05338388384423785,
0.3764835551995504,
0.16034836534273045,
0.18859814746347422,
-0.18910649688602374,
0.3527375470708955
],
[
0.26955773337958067,
0.4357852814107128,
0.5186816794097431,
0.36648159899418925,
0.4582004725527073,
0.1824257534809888,
0.3436234976276185,
0.4062032308084264,
-0.023258717036380692,
0.06294847907066993,
0.23704455680976363,
0.31877673243043725,
0.3986249929146146,
0.5744965197986118,
1,
0.20756440402590953,
-0.20047719041795967,
0.47427466239202515,
0.3514885103896674,
0.4933305283568406,
-0.15929108551795007,
0.49702424773117393
],
[
0.05536764340501297,
0.18081697750089573,
0.30485002589035987,
0.17892547215311277,
0.20497278160205787,
0.05721405487136304,
0.06392999274534257,
0.09428706364654374,
-0.04705954274741461,
0.20028318541455717,
0.10721056867397605,
-0.0856983734540463,
0.17447749255763642,
0.3311789739051432,
0.20756440402590953,
1,
0.39348683785487854,
0.3382709432531792,
0.0988549946423244,
0.0543883693527527,
0.11618251465381582,
0.2427938018413044
],
[
-0.08091766685805872,
-0.11407317175635699,
0.03114127077150482,
-0.002853115111954358,
-0.06294834758508021,
0.0072794129646244245,
0.046806749853070026,
0.12721284364934773,
0.23614119265970507,
0.3489223068751215,
0.07178429688840016,
0.08628923431875687,
0.14956151301239978,
0.05338388384423785,
-0.20047719041795967,
0.39348683785487854,
1,
-0.11974052232162992,
0.13236324965578655,
-0.05542806610590742,
0.21053986477168404,
0.24914177369963997
],
[
0.2410647796206403,
0.4149921465527775,
0.34571422756920306,
0.3664467602819259,
0.3578151378190881,
0.298022572585234,
0.04196701757892942,
0.14909338063120228,
-0.0411469008340726,
-0.03553596174503037,
0.08138637724156475,
-0.06644287392119042,
0.040694008890101364,
0.3764835551995504,
0.47427466239202515,
0.3382709432531792,
-0.11974052232162992,
1,
0.2425293893913259,
0.3072707986442465,
-0.017537309997086217,
0.28006285128959885
],
[
0.17453908675195515,
0.18077782299931028,
0.21938921110661536,
0.07716600785633315,
0.08709160622973089,
0.028758662363666858,
0.26393107394675547,
0.2883681319725162,
0.03958530619825651,
0.10455326476259358,
0.0429369264584655,
0.24423503631093776,
0.40791505333033795,
0.16034836534273045,
0.3514885103896674,
0.0988549946423244,
0.13236324965578655,
0.2425293893913259,
1,
0.5159182208339762,
0.3358435232246879,
0.444016774902805
],
[
0.26148863680644063,
0.2112771957700296,
0.3001951038840922,
0.1598870392835012,
0.19439933154901312,
0.11416140647724242,
0.33199459885294236,
0.2006181947696451,
0.0101013531007671,
0.12076084601185079,
0.1421801455656391,
0.14765857408811633,
0.27116683829736205,
0.18859814746347422,
0.4933305283568406,
0.0543883693527527,
-0.05542806610590742,
0.3072707986442465,
0.5159182208339762,
1,
0.17614572967355413,
0.3739683597073294
],
[
-0.02134255082489496,
0.12269396851923134,
0.052844485803307716,
0.022916085202845295,
-0.056830680395746874,
0.04337453500186096,
0.15267679702142942,
0.039422347197728726,
0.18905460318599,
0.24794332390321414,
0.14003564818677447,
0.08794361697500454,
0.2317483309186956,
-0.18910649688602374,
-0.15929108551795007,
0.11618251465381582,
0.21053986477168404,
-0.017537309997086217,
0.3358435232246879,
0.17614572967355413,
1,
0.2893659547265505
],
[
0.6360384924565836,
0.7187103217647229,
0.7138387882756947,
0.6732548771044937,
0.7013027231492879,
0.6099713329190288,
0.7062730211355572,
0.8274350995887054,
0.5195301454571828,
0.7035944161827955,
0.6454611826047201,
0.7820916752649858,
0.6251136281446781,
0.3527375470708955,
0.49702424773117393,
0.2427938018413044,
0.24914177369963997,
0.28006285128959885,
0.444016774902805,
0.3739683597073294,
0.2893659547265505,
1
]
]
}
],
"layout": {
"coloraxis": {
"cmax": 1,
"cmin": -1,
"colorbar": {
"ticktext": [
"-1",
"-0.5",
"0",
"0.5",
"1"
],
"tickvals": [
-1,
-0.5,
0,
0.5,
1
],
"title": {
"text": "Correlation"
}
},
"colorscale": [
[
0,
"rgb(58, 89, 156)"
],
[
0.5,
"rgb(255, 255, 255)"
],
[
1,
"rgb(179, 35, 26)"
]
]
},
"font": {
"color": "black",
"family": "Arial",
"size": 12
},
"height": 800,
"margin": {
"t": 60
},
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
},
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
},
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"fillpattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "Correlation heatmap for health-related features"
},
"width": 800,
"xaxis": {
"anchor": "y",
"constrain": "domain",
"domain": [
0,
1
],
"scaleanchor": "y"
},
"yaxis": {
"anchor": "x",
"autorange": "reversed",
"constrain": "domain",
"domain": [
0,
1
]
}
}
}
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"data = dane.replace({'Level':{'High' : 3, 'Medium' : 2, 'Low' : 1}})\n",
"data = data.drop(['Patient Id', 'Gender', 'Age'], axis=1)\n",
"\n",
"corr_matrix = data.corr()\n",
"corr_df = corr_matrix.stack().reset_index()\n",
"corr_df.columns = ['x', 'y', 'value']\n",
"\n",
"\n",
"fig = px.imshow(corr_matrix, x=corr_matrix.columns.tolist(), y=corr_matrix.columns.tolist(), zmin=-1, zmax=1, color_continuous_scale=['blue', 'white', 'red'])\n",
"\n",
"fig.update_layout(\n",
" title='Correlation heatmap for health-related features',\n",
" width=800,\n",
" height=800, \n",
" coloraxis_colorbar=dict(\n",
" title='Correlation', \n",
" tickvals=[-1, -0.5, 0, 0.5, 1], \n",
" ticktext=['-1', '-0.5', '0', '0.5', '1'], \n",
" ),\n",
" coloraxis=dict(\n",
" colorscale=[\n",
" [0, 'rgb(58, 89, 156)'], # Niebieski (cool)\n",
" [0.5, 'rgb(255, 255, 255)'], # Biały\n",
" [1, 'rgb(179, 35, 26)'] # Czerwony (warm)\n",
" ], \n",
" ),\n",
" font=dict(\n",
" family=\"Arial\", \n",
" size=12, \n",
" color=\"black\", \n",
" ))\n",
" \n",
"fig.show()"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "00915ec0",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import sklearn "
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "f02e1f34",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Patient Id</th>\n",
" <th>Age</th>\n",
" <th>Gender</th>\n",
" <th>Air Pollution</th>\n",
" <th>Alcohol use</th>\n",
" <th>Dust Allergy</th>\n",
" <th>OccuPational Hazards</th>\n",
" <th>Genetic Risk</th>\n",
" <th>chronic Lung Disease</th>\n",
" <th>Balanced Diet</th>\n",
" <th>...</th>\n",
" <th>Fatigue</th>\n",
" <th>Weight Loss</th>\n",
" <th>Shortness of Breath</th>\n",
" <th>Wheezing</th>\n",
" <th>Swallowing Difficulty</th>\n",
" <th>Clubbing of Finger Nails</th>\n",
" <th>Frequent Cold</th>\n",
" <th>Dry Cough</th>\n",
" <th>Snoring</th>\n",
" <th>Level</th>\n",
" </tr>\n",
" <tr>\n",
" <th>index</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>P1</td>\n",
" <td>33</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>...</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>P10</td>\n",
" <td>17</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>5</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>7</td>\n",
" <td>8</td>\n",
" <td>6</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>7</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>P100</td>\n",
" <td>35</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>4</td>\n",
" <td>6</td>\n",
" <td>...</td>\n",
" <td>8</td>\n",
" <td>7</td>\n",
" <td>9</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>P1000</td>\n",
" <td>37</td>\n",
" <td>1</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>...</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>5</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>P101</td>\n",
" <td>46</td>\n",
" <td>1</td>\n",
" <td>6</td>\n",
" <td>8</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>...</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 25 columns</p>\n",
"</div>"
],
"text/plain": [
" Patient Id Age Gender Air Pollution Alcohol use Dust Allergy \\\n",
"index \n",
"0 P1 33 1 2 4 5 \n",
"1 P10 17 1 3 1 5 \n",
"2 P100 35 1 4 5 6 \n",
"3 P1000 37 1 7 7 7 \n",
"4 P101 46 1 6 8 7 \n",
"\n",
" OccuPational Hazards Genetic Risk chronic Lung Disease \\\n",
"index \n",
"0 4 3 2 \n",
"1 3 4 2 \n",
"2 5 5 4 \n",
"3 7 6 7 \n",
"4 7 7 6 \n",
"\n",
" Balanced Diet ... Fatigue Weight Loss Shortness of Breath \\\n",
"index ... \n",
"0 2 ... 3 4 2 \n",
"1 2 ... 1 3 7 \n",
"2 6 ... 8 7 9 \n",
"3 7 ... 4 2 3 \n",
"4 7 ... 3 2 4 \n",
"\n",
" Wheezing Swallowing Difficulty Clubbing of Finger Nails \\\n",
"index \n",
"0 2 3 1 \n",
"1 8 6 2 \n",
"2 2 1 4 \n",
"3 1 4 5 \n",
"4 1 4 2 \n",
"\n",
" Frequent Cold Dry Cough Snoring Level \n",
"index \n",
"0 2 3 4 1 \n",
"1 1 7 2 2 \n",
"2 6 7 2 3 \n",
"3 6 7 5 3 \n",
"4 4 2 3 3 \n",
"\n",
"[5 rows x 25 columns]"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = dane.replace({'Level':{'High' : 3, 'Medium' : 2, 'Low' : 1}})\n",
"data['Gender'] = data['Gender'].replace({'Man' : 1, 'Woman' : 2})\n",
"data.head()"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "a47f580a",
"metadata": {},
"outputs": [],
"source": [
"np.random.seed(10)\n",
"np.set_printoptions(precision=6, suppress=True)\n"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "7caae544",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Y shape: (1000,)\n",
"X shape: (1000, 23)\n"
]
}
],
"source": [
"X = data.drop(['Level', 'Patient Id'], axis=1)\n",
"y = data['Level']\n",
"\n",
"\n",
"print(\"Y shape:\", y.shape)\n",
"print(\"X shape:\", X.shape)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "9139408a",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split\n",
"X_train, X_test, y_train, y_test = train_test_split (X, y)\n",
"\n",
"\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "2f45152a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"X_train shape: (750, 23)\n",
"y_train shape: (750,)\n",
"X_test shape: (250, 23)\n",
"y_test shape: (250,)\n"
]
}
],
"source": [
"print(\"X_train shape:\", X_train.shape)\n",
"print(\"y_train shape:\", y_train.shape)\n",
"print(\"X_test shape:\", X_test.shape)\n",
"print(\"y_test shape:\", y_test.shape)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "8ba2674d",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"c:\\Users\\HP\\anaconda3\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:460: ConvergenceWarning:\n",
"\n",
"lbfgs failed to converge (status=1):\n",
"STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
"\n",
"Increase the number of iterations (max_iter) or scale the data as shown in:\n",
" https://scikit-learn.org/stable/modules/preprocessing.html\n",
"Please also refer to the documentation for alternative solver options:\n",
" https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
"\n"
]
},
{
"data": {
"text/html": [
"<style>#sk-container-id-1 {color: black;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-r
],
"text/plain": [
"LogisticRegression(max_iter=200)"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"from sklearn.linear_model import LogisticRegression\n",
"#classifier = LogisticRegression(multi_class='multinomial', solver='lbfgs')\n",
"classifier = LogisticRegression(max_iter = 200)\n",
"\n",
"\n",
"classifier.fit(X_train, y_train)"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "ba0a5bda",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[0.01354 , 0.98631 , 0.00015 ],\n",
" [0. , 0.000005, 0.999995],\n",
" [0.000005, 0.00068 , 0.999315],\n",
" [0.999999, 0.000001, 0. ],\n",
" [0.000003, 0.000947, 0.99905 ],\n",
" [0.001809, 0.982182, 0.016009],\n",
" [0. , 0.012048, 0.987952],\n",
" [0.059547, 0.831086, 0.109367],\n",
" [0. , 0.075388, 0.924612],\n",
" [0.039136, 0.960851, 0.000013],\n",
" [0.000022, 0.003654, 0.996323],\n",
" [0.006091, 0.993801, 0.000108],\n",
" [0.017895, 0.982104, 0.000001],\n",
" [0.998827, 0.001173, 0. ],\n",
" [0.928696, 0.070643, 0.000662],\n",
" [0.033735, 0.966263, 0.000003],\n",
" [0. , 0.000113, 0.999887],\n",
" [0.934007, 0.009674, 0.056319],\n",
" [0. , 0.000031, 0.999969],\n",
" [0.999648, 0.000306, 0.000047],\n",
" [0.000004, 0.00404 , 0.995956],\n",
" [0.999251, 0.000216, 0.000533],\n",
" [0. , 0.00512 , 0.99488 ],\n",
" [0. , 0.004695, 0.995305],\n",
" [0. , 0.000124, 0.999876],\n",
" [0.1259 , 0.859667, 0.014433],\n",
" [0.059547, 0.831086, 0.109367],\n",
" [0.991172, 0.008807, 0.000021],\n",
" [0.999909, 0.00009 , 0.000001],\n",
" [0.1259 , 0.859667, 0.014433],\n",
" [0.976724, 0.023247, 0.000028],\n",
" [0.008947, 0.990345, 0.000708],\n",
" [0. , 0.004695, 0.995305],\n",
" [0.999251, 0.000216, 0.000533],\n",
" [0.005118, 0.994787, 0.000095],\n",
" [0. , 0.00008 , 0.99992 ],\n",
" [0.015368, 0.98458 , 0.000052],\n",
" [0. , 0.000003, 0.999997],\n",
" [0.00583 , 0.994147, 0.000023],\n",
" [0.993283, 0.00006 , 0.006656],\n",
" [0.1259 , 0.859667, 0.014433],\n",
" [0.004655, 0.989938, 0.005407],\n",
" [0. , 0.000134, 0.999866],\n",
" [0.000007, 0.999991, 0.000002],\n",
" [0. , 0.012048, 0.987952],\n",
" [0.000022, 0.003654, 0.996323],\n",
" [0.931524, 0.068283, 0.000192],\n",
" [0. , 0.000031, 0.999969],\n",
" [0. , 0.000165, 0.999835],\n",
" [0.009961, 0.988549, 0.00149 ],\n",
" [0.006484, 0.974177, 0.019339],\n",
" [0.00523 , 0.994198, 0.000572],\n",
" [0.021549, 0.975853, 0.002598],\n",
" [0. , 0.000012, 0.999988],\n",
" [0.009961, 0.988549, 0.00149 ],\n",
" [0.000001, 0.000025, 0.999974],\n",
" [0. , 0.016222, 0.983778],\n",
" [0.00003 , 0.010307, 0.989664],\n",
" [0.993286, 0.006713, 0.000001],\n",
" [0.000175, 0.040597, 0.959228],\n",
" [0.000004, 0.00404 , 0.995956],\n",
" [0.984367, 0.012436, 0.003197],\n",
" [0.000005, 0.00068 , 0.999315],\n",
" [0.967662, 0.024355, 0.007983],\n",
" [0. , 0.000134, 0.999866],\n",
" [0.976724, 0.023247, 0.000028],\n",
" [0.007089, 0.99289 , 0.000021],\n",
" [0. , 0.000165, 0.999835],\n",
" [0. , 0.00512 , 0.99488 ],\n",
" [0.016788, 0.983198, 0.000014],\n",
" [0.988196, 0.002318, 0.009486],\n",
" [0.999947, 0.000021, 0.000032],\n",
" [0.990009, 0.009991, 0. ],\n",
" [0.016788, 0.983198, 0.000014],\n",
" [0.015399, 0.984596, 0.000005],\n",
" [0.022511, 0.975873, 0.001616],\n",
" [0.008947, 0.990345, 0.000708],\n",
" [0.007156, 0.985278, 0.007566],\n",
" [0. , 0.00068 , 0.99932 ],\n",
" [0.991172, 0.008807, 0.000021],\n",
" [0. , 0.000106, 0.999894],\n",
" [0.993286, 0.006713, 0.000001],\n",
" [0. , 0.004695, 0.995305],\n",
" [0.006484, 0.974177, 0.019339],\n",
" [0. , 0.00001 , 0.99999 ],\n",
" [0. , 0.03253 , 0.967469],\n",
" [0. , 0.000014, 0.999986],\n",
" [0.000011, 0.999989, 0. ],\n",
" [0.991172, 0.008807, 0.000021],\n",
" [0. , 0.00068 , 0.99932 ],\n",
" [0.013247, 0.986728, 0.000025],\n",
" [0.003605, 0.971443, 0.024952],\n",
" [0.999999, 0.000001, 0. ],\n",
" [0.931524, 0.068283, 0.000192],\n",
" [0.00583 , 0.994147, 0.000023],\n",
" [0.00583 , 0.994147, 0.000023],\n",
" [0.1259 , 0.859667, 0.014433],\n",
" [0.022062, 0.977641, 0.000297],\n",
" [0.016788, 0.983198, 0.000014],\n",
" [0.957888, 0.04211 , 0.000001],\n",
" [0. , 0.016222, 0.983778],\n",
" [0.000003, 0.999994, 0.000003],\n",
" [0.9999 , 0.0001 , 0. ],\n",
" [0. , 0.012048, 0.987952],\n",
" [0.999909, 0.00009 , 0.000001],\n",
" [0.928696, 0.070643, 0.000662],\n",
" [0.931524, 0.068283, 0.000192],\n",
" [0.000003, 0.99999 , 0.000007],\n",
" [0. , 0.004695, 0.995305],\n",
" [0. , 0.000014, 0.999986],\n",
" [0.993283, 0.00006 , 0.006656],\n",
" [0.000003, 0.000947, 0.99905 ],\n",
" [0.008109, 0.979381, 0.01251 ],\n",
" [0.934007, 0.009674, 0.056319],\n",
" [0.999648, 0.000306, 0.000047],\n",
" [0.000022, 0.003654, 0.996323],\n",
" [0.013247, 0.986728, 0.000025],\n",
" [0.000011, 0.999989, 0. ],\n",
" [0.993286, 0.006713, 0.000001],\n",
" [0.9999 , 0.0001 , 0. ],\n",
" [0. , 0.000113, 0.999887],\n",
" [0. , 0.000063, 0.999937],\n",
" [0. , 0.00512 , 0.99488 ],\n",
" [0.011601, 0.988358, 0.000041],\n",
" [0.999909, 0.00009 , 0.000001],\n",
" [0.000005, 0.00068 , 0.999315],\n",
" [0.000175, 0.040597, 0.959228],\n",
" [0.990009, 0.009991, 0. ],\n",
" [0.978169, 0.021712, 0.000119],\n",
" [0.015368, 0.98458 , 0.000052],\n",
" [0.022062, 0.977641, 0.000297],\n",
" [0. , 0.021094, 0.978906],\n",
" [0.973999, 0.013972, 0.012029],\n",
" [0. , 0.004695, 0.995305],\n",
" [0.996352, 0.003647, 0.000001],\n",
" [0.967662, 0.024355, 0.007983],\n",
" [0.985581, 0.000103, 0.014316],\n",
" [0. , 0.00512 , 0.99488 ],\n",
" [0.984367, 0.012436, 0.003197],\n",
" [0. , 0.000468, 0.999532],\n",
" [0.008109, 0.979381, 0.01251 ],\n",
" [0.991172, 0.008807, 0.000021],\n",
" [0.013247, 0.986728, 0.000025],\n",
" [0.013247, 0.986728, 0.000025],\n",
" [0.999999, 0.000001, 0. ],\n",
" [0.985581, 0.000103, 0.014316],\n",
" [0.007156, 0.985278, 0.007566],\n",
" [0.000022, 0.003654, 0.996323],\n",
" [0.999947, 0.000021, 0.000032],\n",
" [0.973999, 0.013972, 0.012029],\n",
" [0.000022, 0.003654, 0.996323],\n",
" [0.993286, 0.006713, 0.000001],\n",
" [0.1259 , 0.859667, 0.014433],\n",
" [0.008803, 0.991195, 0.000002],\n",
" [0.008947, 0.990345, 0.000708],\n",
" [0. , 0.002567, 0.997433],\n",
" [0.999999, 0.000001, 0. ],\n",
" [0. , 0.004695, 0.995305],\n",
" [0. , 0.000013, 0.999987],\n",
" [0.98178 , 0.01822 , 0. ],\n",
" [0.891574, 0.108425, 0. ],\n",
" [0.006622, 0.993257, 0.000122],\n",
" [0.972232, 0.027758, 0.00001 ],\n",
" [0.008109, 0.979381, 0.01251 ],\n",
" [0.928696, 0.070643, 0.000662],\n",
" [0. , 0.000196, 0.999804],\n",
" [0.000003, 0.999994, 0.000003],\n",
" [0.991172, 0.008807, 0.000021],\n",
" [0.021549, 0.975853, 0.002598],\n",
" [0.999947, 0.000021, 0.000032],\n",
" [0. , 0.000134, 0.999866],\n",
" [0.013247, 0.986728, 0.000025],\n",
" [0. , 0.00512 , 0.99488 ],\n",
" [0.991172, 0.008807, 0.000021],\n",
" [0. , 0.00512 , 0.99488 ],\n",
" [0.153033, 0.8185 , 0.028467],\n",
" [0.008947, 0.990345, 0.000708],\n",
" [0.009961, 0.988549, 0.00149 ],\n",
" [0.015399, 0.984596, 0.000005],\n",
" [0.000011, 0.999989, 0. ],\n",
" [0.022062, 0.977641, 0.000297],\n",
" [0.999999, 0.000001, 0. ],\n",
" [0.967662, 0.024355, 0.007983],\n",
" [0.006622, 0.993257, 0.000122],\n",
" [0.015399, 0.984596, 0.000005],\n",
" [0. , 0.000196, 0.999804],\n",
" [0. , 0.016222, 0.983778],\n",
" [0. , 0.000124, 0.999876],\n",
" [0. , 0.001328, 0.998672],\n",
" [0.000011, 0.999989, 0. ],\n",
" [0.022511, 0.975873, 0.001616],\n",
" [0.999251, 0.000216, 0.000533],\n",
" [0.999999, 0.000001, 0. ],\n",
" [0. , 0.03253 , 0.967469],\n",
" [0. , 0.000063, 0.999937],\n",
" [0.993283, 0.00006 , 0.006656],\n",
" [0.007089, 0.99289 , 0.000021],\n",
" [0.009961, 0.988549, 0.00149 ],\n",
" [0.931524, 0.068283, 0.000192],\n",
" [0.007156, 0.985278, 0.007566],\n",
" [0.006622, 0.993257, 0.000122],\n",
" [0.021549, 0.975853, 0.002598],\n",
" [0. , 0.000003, 0.999997],\n",
" [0.009393, 0.990606, 0.000001],\n",
" [0. , 0.001767, 0.998233],\n",
" [0.000175, 0.040597, 0.959228],\n",
" [0.931524, 0.068283, 0.000192],\n",
" [0.01354 , 0.98631 , 0.00015 ],\n",
" [0.000011, 0.999989, 0. ],\n",
" [0.07487 , 0.902033, 0.023097],\n",
" [0. , 0.000113, 0.999887],\n",
" [0.999999, 0.000001, 0. ],\n",
" [0. , 0.000031, 0.999969],\n",
" [0. , 0.0011 , 0.9989 ],\n",
" [0.891574, 0.108425, 0. ],\n",
" [0.957888, 0.04211 , 0.000001],\n",
" [0.004898, 0.993937, 0.001166],\n",
" [0. , 0. , 1. ],\n",
" [0.891574, 0.108425, 0. ],\n",
" [0.990009, 0.009991, 0. ],\n",
" [0.976724, 0.023247, 0.000028],\n",
" [0.000007, 0.999991, 0.000002],\n",
" [0.891574, 0.108425, 0. ],\n",
" [0. , 0.000014, 0.999986],\n",
" [0.015399, 0.984596, 0.000005],\n",
" [0.003605, 0.971443, 0.024952],\n",
" [0.1259 , 0.859667, 0.014433],\n",
" [0.991172, 0.008807, 0.000021],\n",
" [0. , 0.00001 , 0.99999 ],\n",
" [0.004655, 0.989938, 0.005407],\n",
" [0. , 0.000063, 0.999937],\n",
" [0.999999, 0.000001, 0. ],\n",
" [0.007156, 0.985278, 0.007566],\n",
" [0.000007, 0.999991, 0.000002],\n",
" [0. , 0.001638, 0.998362],\n",
" [0.006622, 0.993257, 0.000122],\n",
" [0. , 0.000468, 0.999532],\n",
" [0.978169, 0.021712, 0.000119],\n",
" [0.891574, 0.108425, 0. ],\n",
" [0. , 0.000196, 0.999804],\n",
" [0.934007, 0.009674, 0.056319],\n",
" [0.008109, 0.979381, 0.01251 ],\n",
" [0.000004, 0.00404 , 0.995956],\n",
" [0.022511, 0.975873, 0.001616],\n",
" [0. , 0.03253 , 0.967469],\n",
" [0.00007 , 0.024082, 0.975848],\n",
" [0. , 0.000031, 0.999969],\n",
" [0. , 0.012048, 0.987952],\n",
" [0.999947, 0.000021, 0.000032],\n",
" [0.004898, 0.993937, 0.001166]])"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y_prob = classifier.predict_proba(X_test)\n",
"y_prob"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "08f121e4",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([2, 3, 3, 1, 3, 2, 3, 2, 3, 2, 3, 2, 2, 1, 1, 2, 3, 1, 3, 1, 3, 1,\n",
" 3, 3, 3, 2, 2, 1, 1, 2, 1, 2, 3, 1, 2, 3, 2, 3, 2, 1, 2, 2, 3, 2,\n",
" 3, 3, 1, 3, 3, 2, 2, 2, 2, 3, 2, 3, 3, 3, 1, 3, 3, 1, 3, 1, 3, 1,\n",
" 2, 3, 3, 2, 1, 1, 1, 2, 2, 2, 2, 2, 3, 1, 3, 1, 3, 2, 3, 3, 3, 2,\n",
" 1, 3, 2, 2, 1, 1, 2, 2, 2, 2, 2, 1, 3, 2, 1, 3, 1, 1, 1, 2, 3, 3,\n",
" 1, 3, 2, 1, 1, 3, 2, 2, 1, 1, 3, 3, 3, 2, 1, 3, 3, 1, 1, 2, 2, 3,\n",
" 1, 3, 1, 1, 1, 3, 1, 3, 2, 1, 2, 2, 1, 1, 2, 3, 1, 1, 3, 1, 2, 2,\n",
" 2, 3, 1, 3, 3, 1, 1, 2, 1, 2, 1, 3, 2, 1, 2, 1, 3, 2, 3, 1, 3, 2,\n",
" 2, 2, 2, 2, 2, 1, 1, 2, 2, 3, 3, 3, 3, 2, 2, 1, 1, 3, 3, 1, 2, 2,\n",
" 1, 2, 2, 2, 3, 2, 3, 3, 1, 2, 2, 2, 3, 1, 3, 3, 1, 1, 2, 3, 1, 1,\n",
" 1, 2, 1, 3, 2, 2, 2, 1, 3, 2, 3, 1, 2, 2, 3, 2, 3, 1, 1, 3, 1, 2,\n",
" 3, 2, 3, 3, 3, 3, 1, 2], dtype=int64)"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y_pred = classifier.predict(X_test)\n",
"y_pred"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "322588b1",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: mlxtend in c:\\users\\hp\\anaconda3\\lib\\site-packages (0.23.0)\n",
"Requirement already satisfied: scipy>=1.2.1 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from mlxtend) (1.11.4)\n",
"Requirement already satisfied: numpy>=1.16.2 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from mlxtend) (1.26.0)\n",
"Requirement already satisfied: pandas>=0.24.2 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from mlxtend) (2.1.1)\n",
"Requirement already satisfied: scikit-learn>=1.0.2 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from mlxtend) (1.3.2)\n",
"Requirement already satisfied: matplotlib>=3.0.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from mlxtend) (3.8.0)\n",
"Requirement already satisfied: joblib>=0.13.2 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from mlxtend) (1.3.2)\n",
"Requirement already satisfied: contourpy>=1.0.1 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from matplotlib>=3.0.0->mlxtend) (1.2.0)\n",
"Requirement already satisfied: cycler>=0.10 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from matplotlib>=3.0.0->mlxtend) (0.12.1)\n",
"Requirement already satisfied: fonttools>=4.22.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from matplotlib>=3.0.0->mlxtend) (4.25.0)\n",
"Requirement already satisfied: kiwisolver>=1.0.1 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from matplotlib>=3.0.0->mlxtend) (1.4.4)\n",
"Requirement already satisfied: packaging>=20.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from matplotlib>=3.0.0->mlxtend) (23.1)\n",
"Requirement already satisfied: pillow>=6.2.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from matplotlib>=3.0.0->mlxtend) (10.0.1)\n",
"Requirement already satisfied: pyparsing>=2.3.1 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from matplotlib>=3.0.0->mlxtend) (3.0.9)\n",
"Requirement already satisfied: python-dateutil>=2.7 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from matplotlib>=3.0.0->mlxtend) (2.8.2)\n",
"Requirement already satisfied: pytz>=2020.1 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from pandas>=0.24.2->mlxtend) (2023.3.post1)\n",
"Requirement already satisfied: tzdata>=2022.1 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from pandas>=0.24.2->mlxtend) (2023.3)\n",
"Requirement already satisfied: threadpoolctl>=2.0.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from scikit-learn>=1.0.2->mlxtend) (3.2.0)\n",
"Requirement already satisfied: six>=1.5 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from python-dateutil>=2.7->matplotlib>=3.0.0->mlxtend) (1.16.0)\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"[notice] A new release of pip is available: 23.3.2 -> 24.0\n",
"[notice] To update, run: python.exe -m pip install --upgrade pip\n"
]
}
],
"source": [
"pip install mlxtend"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "c876fff8",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.metrics import confusion_matrix\n",
"from sklearn.metrics import accuracy_score\n",
"\n",
"from mlxtend.plotting import plot_confusion_matrix\n",
"import seaborn as sns\n",
"sns.set()\n"
]
},
{
"cell_type": "code",
"execution_count": 34,
"id": "cbb6c719",
"metadata": {},
"outputs": [],
"source": [
"#cm = confusion_matrix(y_test, y_pred)\n",
"#plot_confusion_matrix(cm)\n",
"\n",
"#acc = accuracy_score(y_test, y_pred)\n",
"#print('Accuracy',':', acc)"
]
},
{
"cell_type": "code",
"execution_count": 35,
"id": "e3c2afe8",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"plotlyServerURL": "https://plot.ly"
},
"data": [
{
"colorscale": [
[
0,
"rgb(3, 5, 18)"
],
[
0.09090909090909091,
"rgb(25, 25, 51)"
],
[
0.18181818181818182,
"rgb(44, 42, 87)"
],
[
0.2727272727272727,
"rgb(58, 60, 125)"
],
[
0.36363636363636365,
"rgb(62, 83, 160)"
],
[
0.45454545454545453,
"rgb(62, 109, 178)"
],
[
0.5454545454545454,
"rgb(72, 134, 187)"
],
[
0.6363636363636364,
"rgb(89, 159, 196)"
],
[
0.7272727272727273,
"rgb(114, 184, 205)"
],
[
0.8181818181818182,
"rgb(149, 207, 216)"
],
[
0.9090909090909091,
"rgb(192, 229, 232)"
],
[
1,
"rgb(234, 252, 253)"
]
],
"reversescale": true,
"showscale": true,
"type": "heatmap",
"x": [
"pred_1",
"pred_2",
"pred_3"
],
"y": [
"true_3",
"true_2",
"true_1"
],
"z": [
[
0,
0,
85
],
[
0,
89,
0
],
[
76,
0,
0
]
]
}
],
"layout": {
"annotations": [
{
"font": {
"color": "#000000"
},
"showarrow": false,
"text": "0",
"x": "pred_1",
"xref": "x",
"y": "true_3",
"yref": "y"
},
{
"font": {
"color": "#000000"
},
"showarrow": false,
"text": "0",
"x": "pred_2",
"xref": "x",
"y": "true_3",
"yref": "y"
},
{
"font": {
"color": "#FFFFFF"
},
"showarrow": false,
"text": "85",
"x": "pred_3",
"xref": "x",
"y": "true_3",
"yref": "y"
},
{
"font": {
"color": "#000000"
},
"showarrow": false,
"text": "0",
"x": "pred_1",
"xref": "x",
"y": "true_2",
"yref": "y"
},
{
"font": {
"color": "#FFFFFF"
},
"showarrow": false,
"text": "89",
"x": "pred_2",
"xref": "x",
"y": "true_2",
"yref": "y"
},
{
"font": {
"color": "#000000"
},
"showarrow": false,
"text": "0",
"x": "pred_3",
"xref": "x",
"y": "true_2",
"yref": "y"
},
{
"font": {
"color": "#FFFFFF"
},
"showarrow": false,
"text": "76",
"x": "pred_1",
"xref": "x",
"y": "true_1",
"yref": "y"
},
{
"font": {
"color": "#000000"
},
"showarrow": false,
"text": "0",
"x": "pred_2",
"xref": "x",
"y": "true_1",
"yref": "y"
},
{
"font": {
"color": "#000000"
},
"showarrow": false,
"text": "0",
"x": "pred_3",
"xref": "x",
"y": "true_1",
"yref": "y"
}
],
"font": {
"size": 16
},
"height": 500,
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
},
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
},
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"fillpattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "Confusion Matrix - Accuracy: 1.0000"
},
"width": 500,
"xaxis": {
"dtick": 1,
"gridcolor": "rgb(0, 0, 0)",
"side": "top",
"ticks": ""
},
"yaxis": {
"dtick": 1,
"ticks": "",
"ticksuffix": " "
}
}
}
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"acc = accuracy_score(y_test, y_pred)\n",
"cm = confusion_matrix(y_test, y_pred)\n",
"def plot_confusion_matrix(cm):\n",
" cm = cm[::-1]\n",
" cm = pd.DataFrame(cm, columns=['pred_1', 'pred_2', 'pred_3'], index=['true_3', 'true_2', 'true_1'])\n",
" fig = ff.create_annotated_heatmap(z = cm.values, x = list(cm.columns), y = list(cm.index), colorscale = 'ice', showscale = True, reversescale = True)\n",
" fig.update_layout(width=500, height=500, title=f'Confusion Matrix - Accuracy: {acc:.4f}'. format(acc), font_size=16)\n",
" fig.show()\n",
"\n",
"plot_confusion_matrix(cm)"
]
},
{
"cell_type": "code",
"execution_count": 36,
"id": "a1ffeb65",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" precision recall f1-score support\n",
"\n",
" pred_1 1.00 1.00 1.00 76\n",
" pred_2 1.00 1.00 1.00 89\n",
" pred_3 1.00 1.00 1.00 85\n",
"\n",
" accuracy 1.00 250\n",
" macro avg 1.00 1.00 1.00 250\n",
"weighted avg 1.00 1.00 1.00 250\n",
"\n"
]
}
],
"source": [
"from sklearn.metrics import classification_report\n",
"print(classification_report(y_test, y_pred, target_names=['pred_1', 'pred_2', 'pred_3']))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}