agabka/projekt.ipynb

3621 lines
241 KiB
Plaintext
Raw Normal View History

2023-12-29 18:23:11 +01:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 393,
2023-12-29 18:23:11 +01:00
"id": "7ce53ad1",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import plotly.figure_factory as ff\n",
"import seaborn as sns\n",
"sns.set()\n"
2023-12-29 18:23:11 +01:00
]
},
{
"cell_type": "code",
"execution_count": 394,
2023-12-29 18:23:11 +01:00
"id": "73edef6d",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Patient Id</th>\n",
" <th>Age</th>\n",
" <th>Gender</th>\n",
" <th>Air Pollution</th>\n",
" <th>Alcohol use</th>\n",
" <th>Dust Allergy</th>\n",
" <th>OccuPational Hazards</th>\n",
" <th>Genetic Risk</th>\n",
" <th>chronic Lung Disease</th>\n",
" <th>Balanced Diet</th>\n",
" <th>...</th>\n",
" <th>Fatigue</th>\n",
" <th>Weight Loss</th>\n",
" <th>Shortness of Breath</th>\n",
" <th>Wheezing</th>\n",
" <th>Swallowing Difficulty</th>\n",
" <th>Clubbing of Finger Nails</th>\n",
" <th>Frequent Cold</th>\n",
" <th>Dry Cough</th>\n",
" <th>Snoring</th>\n",
" <th>Level</th>\n",
" </tr>\n",
" <tr>\n",
" <th>index</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>P1</td>\n",
" <td>33</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>...</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>Low</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>P10</td>\n",
" <td>17</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>5</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>7</td>\n",
" <td>8</td>\n",
" <td>6</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>7</td>\n",
" <td>2</td>\n",
" <td>Medium</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>P100</td>\n",
" <td>35</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>4</td>\n",
" <td>6</td>\n",
" <td>...</td>\n",
" <td>8</td>\n",
" <td>7</td>\n",
" <td>9</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>2</td>\n",
" <td>High</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>P1000</td>\n",
" <td>37</td>\n",
" <td>1</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>...</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>5</td>\n",
" <td>High</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>P101</td>\n",
" <td>46</td>\n",
" <td>1</td>\n",
" <td>6</td>\n",
" <td>8</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>...</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>High</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 25 columns</p>\n",
"</div>"
],
"text/plain": [
" Patient Id Age Gender Air Pollution Alcohol use Dust Allergy \\\n",
"index \n",
"0 P1 33 1 2 4 5 \n",
"1 P10 17 1 3 1 5 \n",
"2 P100 35 1 4 5 6 \n",
"3 P1000 37 1 7 7 7 \n",
"4 P101 46 1 6 8 7 \n",
"\n",
" OccuPational Hazards Genetic Risk chronic Lung Disease \\\n",
"index \n",
"0 4 3 2 \n",
"1 3 4 2 \n",
"2 5 5 4 \n",
"3 7 6 7 \n",
"4 7 7 6 \n",
"\n",
" Balanced Diet ... Fatigue Weight Loss Shortness of Breath \\\n",
"index ... \n",
"0 2 ... 3 4 2 \n",
"1 2 ... 1 3 7 \n",
"2 6 ... 8 7 9 \n",
"3 7 ... 4 2 3 \n",
"4 7 ... 3 2 4 \n",
"\n",
" Wheezing Swallowing Difficulty Clubbing of Finger Nails \\\n",
"index \n",
"0 2 3 1 \n",
"1 8 6 2 \n",
"2 2 1 4 \n",
"3 1 4 5 \n",
"4 1 4 2 \n",
"\n",
" Frequent Cold Dry Cough Snoring Level \n",
"index \n",
"0 2 3 4 Low \n",
"1 1 7 2 Medium \n",
"2 6 7 2 High \n",
"3 6 7 5 High \n",
"4 4 2 3 High \n",
"\n",
"[5 rows x 25 columns]"
]
},
"execution_count": 394,
2023-12-29 18:23:11 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dane = pd.read_csv(r'C:\\Users\\HP\\Desktop\\podyplomówka\\cancer_patient_data_sets.csv', index_col = 0)\n",
"dane.head()"
]
},
{
"cell_type": "code",
"execution_count": 395,
2023-12-29 18:23:11 +01:00
"id": "1831fdd7",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 1000 entries, 0 to 999\n",
"Data columns (total 25 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 Patient Id 1000 non-null object\n",
" 1 Age 1000 non-null int64 \n",
" 2 Gender 1000 non-null int64 \n",
" 3 Air Pollution 1000 non-null int64 \n",
" 4 Alcohol use 1000 non-null int64 \n",
" 5 Dust Allergy 1000 non-null int64 \n",
" 6 OccuPational Hazards 1000 non-null int64 \n",
" 7 Genetic Risk 1000 non-null int64 \n",
" 8 chronic Lung Disease 1000 non-null int64 \n",
" 9 Balanced Diet 1000 non-null int64 \n",
" 10 Obesity 1000 non-null int64 \n",
" 11 Smoking 1000 non-null int64 \n",
" 12 Passive Smoker 1000 non-null int64 \n",
" 13 Chest Pain 1000 non-null int64 \n",
" 14 Coughing of Blood 1000 non-null int64 \n",
" 15 Fatigue 1000 non-null int64 \n",
" 16 Weight Loss 1000 non-null int64 \n",
" 17 Shortness of Breath 1000 non-null int64 \n",
" 18 Wheezing 1000 non-null int64 \n",
" 19 Swallowing Difficulty 1000 non-null int64 \n",
" 20 Clubbing of Finger Nails 1000 non-null int64 \n",
" 21 Frequent Cold 1000 non-null int64 \n",
" 22 Dry Cough 1000 non-null int64 \n",
" 23 Snoring 1000 non-null int64 \n",
" 24 Level 1000 non-null object\n",
"dtypes: int64(23), object(2)\n",
"memory usage: 203.1+ KB\n"
]
}
],
"source": [
"dane.info()"
]
},
{
"cell_type": "markdown",
"id": "69f1b9c9",
"metadata": {},
"source": []
},
{
"cell_type": "code",
"execution_count": 396,
"id": "422c8e2c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Air Pollution</th>\n",
" <th>Smoking</th>\n",
" <th>Passive Smoker</th>\n",
" </tr>\n",
" <tr>\n",
" <th>index</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>6</td>\n",
" <td>8</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>995</th>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>996</th>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>997</th>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>998</th>\n",
" <td>6</td>\n",
" <td>8</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>999</th>\n",
" <td>6</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>365 rows × 3 columns</p>\n",
"</div>"
],
"text/plain": [
" Air Pollution Smoking Passive Smoker\n",
"index \n",
"2 4 2 3\n",
"3 7 7 7\n",
"4 6 8 7\n",
"5 4 2 3\n",
"10 6 7 8\n",
"... ... ... ...\n",
"995 6 7 8\n",
"996 6 7 8\n",
"997 4 2 3\n",
"998 6 8 7\n",
"999 6 2 3\n",
"\n",
"[365 rows x 3 columns]"
]
},
"execution_count": 396,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dane0 = dane[dane['Level'] == 'High'][['Air Pollution', 'Smoking', 'Passive Smoker']]\n",
"dane0"
]
},
2023-12-29 18:23:11 +01:00
{
"cell_type": "code",
"execution_count": 397,
2023-12-29 18:23:11 +01:00
"id": "af7da17c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>count</th>\n",
" <th>mean</th>\n",
" <th>std</th>\n",
" <th>min</th>\n",
" <th>25%</th>\n",
" <th>50%</th>\n",
" <th>75%</th>\n",
" <th>max</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Age</th>\n",
" <td>1000.0</td>\n",
" <td>37.174</td>\n",
" <td>12.005493</td>\n",
" <td>14.0</td>\n",
" <td>27.75</td>\n",
" <td>36.0</td>\n",
" <td>45.0</td>\n",
" <td>73.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Gender</th>\n",
" <td>1000.0</td>\n",
" <td>1.402</td>\n",
" <td>0.490547</td>\n",
" <td>1.0</td>\n",
" <td>1.00</td>\n",
" <td>1.0</td>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Air Pollution</th>\n",
" <td>1000.0</td>\n",
" <td>3.840</td>\n",
" <td>2.030400</td>\n",
" <td>1.0</td>\n",
" <td>2.00</td>\n",
" <td>3.0</td>\n",
" <td>6.0</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Alcohol use</th>\n",
" <td>1000.0</td>\n",
" <td>4.563</td>\n",
" <td>2.620477</td>\n",
" <td>1.0</td>\n",
" <td>2.00</td>\n",
" <td>5.0</td>\n",
" <td>7.0</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Dust Allergy</th>\n",
" <td>1000.0</td>\n",
" <td>5.165</td>\n",
" <td>1.980833</td>\n",
" <td>1.0</td>\n",
" <td>4.00</td>\n",
" <td>6.0</td>\n",
" <td>7.0</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>OccuPational Hazards</th>\n",
" <td>1000.0</td>\n",
" <td>4.840</td>\n",
" <td>2.107805</td>\n",
" <td>1.0</td>\n",
" <td>3.00</td>\n",
" <td>5.0</td>\n",
" <td>7.0</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Genetic Risk</th>\n",
" <td>1000.0</td>\n",
" <td>4.580</td>\n",
" <td>2.126999</td>\n",
" <td>1.0</td>\n",
" <td>2.00</td>\n",
" <td>5.0</td>\n",
" <td>7.0</td>\n",
" <td>7.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>chronic Lung Disease</th>\n",
" <td>1000.0</td>\n",
" <td>4.380</td>\n",
" <td>1.848518</td>\n",
" <td>1.0</td>\n",
" <td>3.00</td>\n",
" <td>4.0</td>\n",
" <td>6.0</td>\n",
" <td>7.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Balanced Diet</th>\n",
" <td>1000.0</td>\n",
" <td>4.491</td>\n",
" <td>2.135528</td>\n",
" <td>1.0</td>\n",
" <td>2.00</td>\n",
" <td>4.0</td>\n",
" <td>7.0</td>\n",
" <td>7.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Obesity</th>\n",
" <td>1000.0</td>\n",
" <td>4.465</td>\n",
" <td>2.124921</td>\n",
" <td>1.0</td>\n",
" <td>3.00</td>\n",
" <td>4.0</td>\n",
" <td>7.0</td>\n",
" <td>7.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Smoking</th>\n",
" <td>1000.0</td>\n",
" <td>3.948</td>\n",
" <td>2.495902</td>\n",
" <td>1.0</td>\n",
" <td>2.00</td>\n",
" <td>3.0</td>\n",
" <td>7.0</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Passive Smoker</th>\n",
" <td>1000.0</td>\n",
" <td>4.195</td>\n",
" <td>2.311778</td>\n",
" <td>1.0</td>\n",
" <td>2.00</td>\n",
" <td>4.0</td>\n",
" <td>7.0</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Chest Pain</th>\n",
" <td>1000.0</td>\n",
" <td>4.438</td>\n",
" <td>2.280209</td>\n",
" <td>1.0</td>\n",
" <td>2.00</td>\n",
" <td>4.0</td>\n",
" <td>7.0</td>\n",
" <td>9.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Coughing of Blood</th>\n",
" <td>1000.0</td>\n",
" <td>4.859</td>\n",
" <td>2.427965</td>\n",
" <td>1.0</td>\n",
" <td>3.00</td>\n",
" <td>4.0</td>\n",
" <td>7.0</td>\n",
" <td>9.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Fatigue</th>\n",
" <td>1000.0</td>\n",
" <td>3.856</td>\n",
" <td>2.244616</td>\n",
" <td>1.0</td>\n",
" <td>2.00</td>\n",
" <td>3.0</td>\n",
" <td>5.0</td>\n",
" <td>9.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Weight Loss</th>\n",
" <td>1000.0</td>\n",
" <td>3.855</td>\n",
" <td>2.206546</td>\n",
" <td>1.0</td>\n",
" <td>2.00</td>\n",
" <td>3.0</td>\n",
" <td>6.0</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Shortness of Breath</th>\n",
" <td>1000.0</td>\n",
" <td>4.240</td>\n",
" <td>2.285087</td>\n",
" <td>1.0</td>\n",
" <td>2.00</td>\n",
" <td>4.0</td>\n",
" <td>6.0</td>\n",
" <td>9.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Wheezing</th>\n",
" <td>1000.0</td>\n",
" <td>3.777</td>\n",
" <td>2.041921</td>\n",
" <td>1.0</td>\n",
" <td>2.00</td>\n",
" <td>4.0</td>\n",
" <td>5.0</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Swallowing Difficulty</th>\n",
" <td>1000.0</td>\n",
" <td>3.746</td>\n",
" <td>2.270383</td>\n",
" <td>1.0</td>\n",
" <td>2.00</td>\n",
" <td>4.0</td>\n",
" <td>5.0</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Clubbing of Finger Nails</th>\n",
" <td>1000.0</td>\n",
" <td>3.923</td>\n",
" <td>2.388048</td>\n",
" <td>1.0</td>\n",
" <td>2.00</td>\n",
" <td>4.0</td>\n",
" <td>5.0</td>\n",
" <td>9.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Frequent Cold</th>\n",
" <td>1000.0</td>\n",
" <td>3.536</td>\n",
" <td>1.832502</td>\n",
" <td>1.0</td>\n",
" <td>2.00</td>\n",
" <td>3.0</td>\n",
" <td>5.0</td>\n",
" <td>7.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Dry Cough</th>\n",
" <td>1000.0</td>\n",
" <td>3.853</td>\n",
" <td>2.039007</td>\n",
" <td>1.0</td>\n",
" <td>2.00</td>\n",
" <td>4.0</td>\n",
" <td>6.0</td>\n",
" <td>7.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Snoring</th>\n",
" <td>1000.0</td>\n",
" <td>2.926</td>\n",
" <td>1.474686</td>\n",
" <td>1.0</td>\n",
" <td>2.00</td>\n",
" <td>3.0</td>\n",
" <td>4.0</td>\n",
" <td>7.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" count mean std min 25% 50% 75% \\\n",
"Age 1000.0 37.174 12.005493 14.0 27.75 36.0 45.0 \n",
"Gender 1000.0 1.402 0.490547 1.0 1.00 1.0 2.0 \n",
"Air Pollution 1000.0 3.840 2.030400 1.0 2.00 3.0 6.0 \n",
"Alcohol use 1000.0 4.563 2.620477 1.0 2.00 5.0 7.0 \n",
"Dust Allergy 1000.0 5.165 1.980833 1.0 4.00 6.0 7.0 \n",
"OccuPational Hazards 1000.0 4.840 2.107805 1.0 3.00 5.0 7.0 \n",
"Genetic Risk 1000.0 4.580 2.126999 1.0 2.00 5.0 7.0 \n",
"chronic Lung Disease 1000.0 4.380 1.848518 1.0 3.00 4.0 6.0 \n",
"Balanced Diet 1000.0 4.491 2.135528 1.0 2.00 4.0 7.0 \n",
"Obesity 1000.0 4.465 2.124921 1.0 3.00 4.0 7.0 \n",
"Smoking 1000.0 3.948 2.495902 1.0 2.00 3.0 7.0 \n",
"Passive Smoker 1000.0 4.195 2.311778 1.0 2.00 4.0 7.0 \n",
"Chest Pain 1000.0 4.438 2.280209 1.0 2.00 4.0 7.0 \n",
"Coughing of Blood 1000.0 4.859 2.427965 1.0 3.00 4.0 7.0 \n",
"Fatigue 1000.0 3.856 2.244616 1.0 2.00 3.0 5.0 \n",
"Weight Loss 1000.0 3.855 2.206546 1.0 2.00 3.0 6.0 \n",
"Shortness of Breath 1000.0 4.240 2.285087 1.0 2.00 4.0 6.0 \n",
"Wheezing 1000.0 3.777 2.041921 1.0 2.00 4.0 5.0 \n",
"Swallowing Difficulty 1000.0 3.746 2.270383 1.0 2.00 4.0 5.0 \n",
"Clubbing of Finger Nails 1000.0 3.923 2.388048 1.0 2.00 4.0 5.0 \n",
"Frequent Cold 1000.0 3.536 1.832502 1.0 2.00 3.0 5.0 \n",
"Dry Cough 1000.0 3.853 2.039007 1.0 2.00 4.0 6.0 \n",
"Snoring 1000.0 2.926 1.474686 1.0 2.00 3.0 4.0 \n",
"\n",
" max \n",
"Age 73.0 \n",
"Gender 2.0 \n",
"Air Pollution 8.0 \n",
"Alcohol use 8.0 \n",
"Dust Allergy 8.0 \n",
"OccuPational Hazards 8.0 \n",
"Genetic Risk 7.0 \n",
"chronic Lung Disease 7.0 \n",
"Balanced Diet 7.0 \n",
"Obesity 7.0 \n",
"Smoking 8.0 \n",
"Passive Smoker 8.0 \n",
"Chest Pain 9.0 \n",
"Coughing of Blood 9.0 \n",
"Fatigue 9.0 \n",
"Weight Loss 8.0 \n",
"Shortness of Breath 9.0 \n",
"Wheezing 8.0 \n",
"Swallowing Difficulty 8.0 \n",
"Clubbing of Finger Nails 9.0 \n",
"Frequent Cold 7.0 \n",
"Dry Cough 7.0 \n",
"Snoring 7.0 "
]
},
"execution_count": 397,
2023-12-29 18:23:11 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dane.describe().T"
]
},
{
"cell_type": "code",
"execution_count": 398,
"id": "c6867768",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Genetic Risk 5.0\n",
"dtype: float64"
]
},
"execution_count": 398,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dane[['Genetic Risk']].median()"
]
},
{
"cell_type": "code",
"execution_count": 399,
2023-12-29 18:23:11 +01:00
"id": "a043ec73",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['Patient Id', 'Age', 'Gender', 'Air Pollution', 'Alcohol use',\n",
" 'Dust Allergy', 'OccuPational Hazards', 'Genetic Risk',\n",
" 'chronic Lung Disease', 'Balanced Diet', 'Obesity', 'Smoking',\n",
" 'Passive Smoker', 'Chest Pain', 'Coughing of Blood', 'Fatigue',\n",
" 'Weight Loss', 'Shortness of Breath', 'Wheezing',\n",
" 'Swallowing Difficulty', 'Clubbing of Finger Nails', 'Frequent Cold',\n",
" 'Dry Cough', 'Snoring', 'Level'],\n",
" dtype='object')"
]
},
"execution_count": 399,
2023-12-29 18:23:11 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dane.columns"
]
},
{
"cell_type": "code",
"execution_count": 400,
2023-12-29 18:23:11 +01:00
"id": "e6cad188",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Gender\n",
"1 598\n",
"2 402\n",
"dtype: int64"
]
},
"execution_count": 400,
2023-12-29 18:23:11 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dane2 = dane.groupby('Gender').size()\n",
"dane2"
]
},
{
"cell_type": "code",
"execution_count": 401,
2023-12-29 18:23:11 +01:00
"id": "966e57b9",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAioAAAGvCAYAAAB1pf5FAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAArCElEQVR4nO3deXhU9aH/8c8s2RcMERKUy9IgIAIGJJQ8twGa1mAVvU2xLZV4bQSMgk0JBhRBZRchLEJBoexXKHIFaa16Qen1qi0gUCtPDYgiAgJJDIGEhCTDLL8/+GXslGiZrN9k3q/n4SFzlpnvycxJ3jlzZsbi8Xg8AgAAMJC1uQcAAADwTQgVAABgLEIFAAAYi1ABAADGIlQAAICxCBUAAGAsQgUAABiLUAEAAMayN/cAGoLH45HbzfvWBQqr1cL9DbRS7N+Bw2q1yGKx/MvlWkWouN0elZRUNPcw0ATsdqtiYiJUVnZJTqe7uYcDoAGxfweWtm0jZLP961DhqR8AAGAsQgUAABiLUAEAAMYiVAAAgLEIFQAAYKxW8aofAAD84Xa75XI5m3sYrZbNZpfV2jDHQggVAEDA8Hg8KisrUWVleXMPpdULC4tUdHTba3qvlG9DqAAAAkZNpERGxig4OKTev0RxNY/HI4ejWuXl5yVJbdrE1uv6CBUAQEBwu13eSImMjG7u4bRqwcEhkqTy8vOKioqp19NAnEwLAAgILpdL0te/RNG4ar7P9T0XqE6hsmPHDt15553q06eP7rrrLr355pveeV9++aWysrLUv39/fe9739OSJUu8D44amzZt0g9+8AP17dtX9913n/Lz8+u1EQAAXCue7mkaDfV99jtUfv/732vq1KkaNWqUXn/9dQ0fPlwTJ07Uhx9+qMuXL2v06NGSpC1btmj69On63e9+p+XLl3vXf/XVVzV//nz9+te/1vbt29WxY0dlZmaqpKSkQTYIAAB/Wa0W2e3WZvlntRJO38avc1Q8Ho+ef/55/ed//qdGjRolSXrkkUd04MABffDBBzp9+rTOnDmjrVu3qk2bNurevbvOnTun+fPn6+GHH1ZwcLBefPFFZWRk6J577pEkzZ07Vz/84Q/13//938rKymr4LQQA4FtYrRZdd124bLbmORvC5XLrwoVLfGr0N/ArVI4fP67Tp0/r7rvv9pm+Zs0aSdL06dN1yy23qE2bNt55gwYNUnl5uQ4fPqyOHTvqiy++UHJy8tcDsNs1YMAA7d+/n1ABADQ5q9Uim82qvE0H9WXhxSa97Y5xUcoddZusVkuLCpXS0gt67713NHz4jxv9tvwOFUm6dOmSRo8erfz8fHXs2FGPPPKIUlNTVVBQoPj4eJ912rdvL0k6e/as7PYrN9ehQ4erljly5EidN0K68vHgaP1q/uJprr98ADSext6/3e5vf4rly8KLOna6tFFuu7VZvvx5nTlz+ppCxWaz1Ot3tF+hUl5+5Q1yHn/8cT366KPKzc3Vzp07NW7cOK1bt05VVVWKjvZ9yVdIyJWzfqurq1VZWSlJCg4OvmqZ6urqOm+E1WpRTExEnddHyxMdHdbcQwDQSBpr/66qsqm42HrVL04T/vAxYQz+sli+PUDcbousVqvatAlXaGhonW/Hr1AJCgqSJI0ePVrp6emSpJtvvln5+flat26dQkND5XA4fNapCZDw8K8HWtsyYWF1f2C63R6VlV2q8/otkcViUVRUaIt8cKNuXC63Ll6sksfTcg4PA/6w2ayKjg5TWVmlXC53g1+/w1H9/9863yOns+Gvvz5cLrdfY7p06ZJWrvyN3nlnty5duqQePW7Wo4/mqGfPm/X3vx/SqlUr9Mknh2W32/Xv/z5Y48f/Wm3aXCdJuvfeu/WjHw3X6NFfn27xj9PeeOM1bdiwRg88MFobNqxRUVGhunZN0IQJuerbN1Fz5kzXm2/+UZI0aFB/vf/+gW/YJo/cbrdKSy+pstJ11fzo6LBr+h3mV6jExcVJkrp37+4zvVu3bnrnnXc0cOBAHT161GdeUVGRd92ap3yKioqUkJDgs0zNddeVaQ+6xma3W5vtOVU0vZrnsT0e837AAg3N31/a1369rSfyn376CZ06dVJPPjldN97YURs3rlVOznjl5S3Vr36VpXvuSdfEiY+rpOScFi16Tjk5j+q3v90gm812TddfWFigHTu26amnZik8PFwLF87TnDnTtWXLq/r1r3NVXV2toqJCzZkz/19eV33D0K9QueWWWxQREaGPPvpIAwYM8E4/evSoOnXqpKSkJO3YsUPl5eWKjIyUJO3du1cRERHq2bOngoOD1bVrV+3bt897Qq3T6dSBAwd033331XkjAhnPqQJAYDl58gvt3fsXLVr0Gw0cOEiS9NhjTygqKkqbN29QQsJNysmZLEnq0qWrnnlmjjIz79MHH+xRcvL3ruk2nE6nJk2aoptu6iFJGjlylKZMydW5c+d0/fXXKyQkRHa7XbGx1zfORv4Dv543CA0N1ZgxY7R8+XL98Y9/1MmTJ/XCCy/oz3/+szIzM/XDH/5Q7dq104QJE3TkyBG9/fbbWrRokR588EHveSkPPvig1q1bp1dffVWfffaZnnzySVVVVenee+9tlA0EAKA1OXbsM0nSLbf09k4LCQnRr341USdOfKE+fW71Wf6mm7orMjLSu9616ty5q/friIgrBx+czst1HXad+f1ZP+PGjVNYWJgWL16swsJCJSQkaNmyZfrud78rSVq9erVmzJihn/3sZ2rTpo3uu+8+jRs3zrv+z372M128eFFLlizRhQsX1Lt3b61bt05t27ZtuK0CAKCVqnkFbW2+6Rw2j8fzrev98zvIS1e/8OXbrr8x1elDCTMzM5WZmVnrvM6dO2vt2rXfuv7o0aO972ALAACuXc2RjsOH8zVgwEBJV56qGTkyXUVFhQoN9X1xyqefHlVFRYW6dPmOJMluD9KlSxXe+RUV5SopOefXGJryYwj49GQAAHTlpPWWcJudOnXWkCHf16JFzyk3d4quv76dXnppvRwOh154Ya3GjRutxYvnKz39pyopOafFi+ere/ce3qjp3buPdu9+S0OH/kCRkVFas+ZF2Wz+5UBYWJiKi4t15sxp3XDDjX5vgz8IFQBAQHO7PXK53ModdVuz3L7L5fb7XWmnTHlGy5c/r6eeelwOx2X16tVbixb9Rt263aSFC5fpt799QQ8+OErh4RFKSRmqRx551PvUT1bWeJWVlWrChHGKjIzSyJEZunix3K/b/9GPhuvdd9/R/ff/TC+/vEPXX9/Or/X9YfG0gjdlcLncKimp+NcLtiJ2u1UxMRGasOgdXvUTABJubKMlE4fq/PkKXp6MVqvm51pjPc4vX3bo3Lmzio3toKAg3/MvrFZLs304oNvtaVFvn3+tvu37LUlt20Y0/PuoAADQGrXWWGgNeFtTAABgLEIFAAAYi1ABAADGIlQAAAGlFbyGpEVoqO8zoQIACAg1H8jncFQ380gCQ8332d/3aPlnvOoHABAQrFabwsIiVV5+XpIUHBzSpO+wGig8Ho8cjmqVl59XWFikrNb6HRMhVAAAASM6+srnytXEChpPWFik9/tdH4QKACBgWCwWtWkTq6ioGLlczuYeTqtls9nrfSSlBqECAAg4VqtVVuvV75YK83AyLQAAMBahAgAAjEWoAAAAYxEqAADAWIQKAAAwFqECAACMRagAAABjESoAAMBYhAoAADAWoQIAAIxFqAAAAGMRKgAAwFiECgAAMBahAgAAjEWoAAAAYxEqAADAWIQKAAAwFqECAACMRagAAABjESoAAMBYhAoAADAWoQIAAIxFqAAAAGMRKgAAwFiECgAAMBahAgAAjEWoAAAAYxEqAADAWIQKAAAwFqECAACMRagAAABjESoAAMBYhAoAADAWoQIAAIzld6gUFhaqR48eV/3bvn27JOnw4cPKyMhQYmKiUlNTtXHjRp/13W63li5dqpSUFCUmJmrs2LE6depUw2wNAABoVez+rnDkyBGFhITo7bfflsVi8U6PiorS+fPnlZmZqdTUVM2YMUN/+9vfNGPGDEVERGjEiBGSpBU
2023-12-29 18:23:11 +01:00
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"\n",
"_ = dane['Gender'].value_counts().plot(kind = 'bar')\n",
"_ = plt.legend()\n"
2023-12-29 18:23:11 +01:00
]
},
{
"cell_type": "code",
"execution_count": 402,
2023-12-29 18:23:11 +01:00
"id": "8d81604c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[Smoking\n",
" 1 181\n",
" 2 222\n",
" 3 172\n",
" 4 59\n",
" 5 10\n",
" 6 60\n",
" 7 207\n",
" 8 89\n",
" dtype: int64]"
]
},
"execution_count": 402,
2023-12-29 18:23:11 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dane3 = [dane.groupby('Smoking').size()]\n",
"dane3 "
]
},
{
"cell_type": "code",
"execution_count": 403,
2023-12-29 18:23:11 +01:00
"id": "d85261ce",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZoAAAGFCAYAAADEhjUtAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABbNUlEQVR4nO3dd3gU5doG8HtmdmfTeza9UkMLvXcRQUBA/SzHLgiK5YggoEcFK4i9IEUEFcUCKoqIokivCZBASCGBkN43Pdns7sx8f4RgIYSUnZ0tz++6vDwnIbM3GPbOzLzzvIwkSRIIIYQQmbBKByCEEGLfqGgIIYTIioqGEEKIrKhoCCGEyIqKhhBCiKyoaAghhMiKioYQQoisqGgIIYTIioqGEEKIrKhoCCGEyIqKhhBCiKyoaAghhMiKioYQQoisqGgIIYTIioqGEEKIrKhoCCGEyIqKhhBCiKyoaAghhMiKioYQQoisqGgIIYTIioqGEEKIrKhoCCGEyIqKhhBCiKyoaAghhMiKioYQQoisqGgIIYTIioqGEEKIrKhoCCGEyIqKhhBCiKyoaAghhMiKioYQQoisqGgIIYTIioqGEEKIrKhoCCGEyIqKhhBCiKyoaAghhMiKioYQQoisqGgIIYTIioqGEEKIrKhoCCGEyIqKhhBCiKyoaAghhMhKpXQAYn2OHTuGe++9t9nPhYaGYvfu3RZORAixZVQ05Ar9+vXDwYMH//GxhIQEPP7445g3b55CqQghtoqRJElSOgSxbnV1dZg6dSqGDBmC5cuXKx2HEGJj6IyGXNOaNWtQX1+PxYsXKx1FMZIkAqJ46f8xAMuCYRjzHFsUAUkEwIDhOLMckxBrQkVDWqTT6fDpp59iwYIF8PLyUjqObCTBBDAMGLbxjV6SJAi1lRDrqyDUVUGoq4aor4FYXwNBXwNRXwuxvhpCfdP/roFo1F86Bgsw3KV/s43H/Nf/ZhgWjFoDztUTKjdvcK5e4Ny8wLn5QOXhA87VC6yT2z/KTJIkQBQAljNbyRFiCVQ0pEWbN2+Gu7s7br/9dqWjdFjTmQPD/fVtL+hrYCwrgLE0G4ayAhh1eTCWFcBUXghJMCqYFgDDgnP1AOfqDc7NCypPf/B+oeD9w8H7h4Nz9QQAiCYjRIaDiqNFpMQ6UdGQFm3btg0zZsyAk5OT0lHaTBJMl3/6F+proM9OhqE0B8ayfBjL8mDUFUDU1ygd8+okEUJNBYSaCqDoyk+zTq7QTv8vVOG9sTs+C13CvRER6A61qvGszCSIVD7EKlDRkKtKTU1FTk4Opk2bpnSUVpEE0+WzFWN5EeqzkqDPSYE+JxWm8gKF05mfqK8F5+GH4vIGfPTdaQAAyzII1bqhU4gXuoR5oX93LUL83SBJEkRRAkfFQxRARUOuKj4+Hr6+vujevbvSUZolCQIYjoMkCjAUXUR91lnoc1PQkJsGobZS6XjyY1jwviHISiq+/CFRlJBdWI3swmrsOZEDAPDxcELfrv7o180fA7oFwN2Vhyg2LjZlWbrXQ+RHRUOuKjk5Gd26dVM6xmWSJDXeY2E5GCuKUZt8CHUXEtCQnw7J2KB0PItT+wSB4VQ4m1nW4q/TVenxZ3wO/ozPAcMAEYEe6NfNH/27adEz2hdqFQeTIIJjGVpkQGRBRUOuqqSkRPGVZo3lIoFhWRjL8lCbfAg1qUdhLMlWNJc14LURAID45MJWf40kARcLqnCxoAo/7D0PtYpFTJQP+nXVYmB3LSKDPSGIIliGSoeYDz2wSayOJIoAAzAMi4aC86hJOYza1GN2eZ+lI7zH3AmPodMxY/EvZjtmoK8Lxg4Iw/WDw6H1dqEFBcQs6IyGWA1JFACGgT43DbUph1GbdgxCdcuXhRwZHxCJGr1g1mMWltXh611p+HpXGmIifTBuQCjG9A+Fi5MagiDSYgLSLlQ0RFGSKIBhOZhqK1B14ldUJ+yGUK1TOpZN0AREIVdXJ9vxUy7qkHJRh49/TMKgHgG4blA4BnTXXr6kxtKlNdJKVDREEU0Fo89JQVXcTtSmxzU+9U5aheGdofLwxYWkTNlfy2gScfh0AQ6fLoCHK4/R/UIwYVA4OoV60VkOaRUqGmIxjfdeGEjGBlQn7EbVyd9gLMtTOpZN4v3DAQCJ6SUWfd2qWgN+PpiJnw9mIizAHVNHRGHikAgwLMCxVDikebQYgMiu6UFKQ3E2KuN2oObsAYdcjmxO7v0nwm/SQ7jt2Z3QG0yKZvFy02DaqGhMGxUNjZoDw4BWrJF/oDMaIhtJEACWQW3qUVTG7UBD3jmlI9kN3j8cJqNJ8ZIBgIqaBmzamYKtf6Zj0rBI3DKuMzxceUig+zikERUNMbumgqlJPoiKg1th1OUrHcnuaAKjoatRvmT+rr7BhB/2ZuDngxcwbkAYbruuCwJ8XSGIIl1Wc3BUNMRsLhfM2QOoOLQVRh099yIXXhuO3AtVSsdoltEkYtexLPxxPAvDegfj9gldERXiSQsHHBgVDemwxoJhUZNyGOX7v6EHK2XGefiB5Z1xLitL6SgtEiXg0Ol8HDqdj9gu/rhzYjf0jPaFKEo0Y83BUNGQdmtaolyfmQjdni9gKLbuNz57wWsbV5ydSC2+xq+0HonpJUhML8HQXkGYM6MXfL2c6f6NA6GiIW0mSWLjeJj8DOj+3AR9TorSkRyKRhsBSRCQll2udJQ2O5pUgBOpRZg+uhPumNgNKpahy2kOgIqGtIkkChDqq1H268eoTT2qdByHxPtHoN4KVpu1l9EkYuuf6fgzPgf3TemB8QPD6P6NnaOiIa3SNIesKv5X6PZ9BclQr3Qkh8UHRaOgwvafQ9JV6fHOVyfxy6FMPHxzH3QO86L7N3aKioa0qOl5XkNJDkp2fARDwXmFEzk4TgW1dyAyE+xnwUVadjmeem8fxvYPw4M39YSHC09lY2eoaMhVSYIASTRBt+dLVMXvBCRR6UgOj/cNAcNyOHu+VOkoZiVJwJ4TOTiaVIBbx3fBzeM6AwBtUWAnqGjIFSRRBMOyqEuPR+mu9TRN2Yo0bXYWl1KkcBJ51DeYsGlnCnbHZePpuweiU6gnjbOxA1Q05B8kUYRQW47SX9aiLuOE0nHIv/DaCAhGI8oq9UpHkVV+aS0Wvr8ft03oijuu7wYJEk0XsGFUNATAX0uWq+J/gW7vZhp6aaV4bSSq6m13xVlbCKKEr3al4WRqMZ6+ZyD8vJzB0b0bm0Q/IhBIggCxoR6F3y5H2e8bqWSsGB8YhfxS+TY7s0Zp2eV47I0/8cfxxgeCRZEGztsaKhoHJ0kSGgoykLtuPurS45WOQ1rAOrtB5eqJjFzbe1Czo/QGAR9uScTLG46hVm+EINDCFFtCReOgJFGEJEmoOPQd8j9/DkJ1mdKRyDXw/o0LARLOWXazM2ty/Gwh5q388/KfAW2nZRuoaByQJAgQ9bUo/OollO/7ipYt2wheGwFJFJGQbl9Lm9uqoroBy9YfxaqtiTCaRDq7sQFUNA5GkiToc1KQu+5J1GeeVjoOaQNeGw6D0QiTid5YAeDXIxfxxFt7UVBWC0GkPxNrRkXjICRRgCSJKN/3FQo2vwihtkLpSKSNNIHRKKs2Kh3DquSV1OCpd/cjIa2ELqNZMSoaByCJAkR9HQo2LUXFoe/oUplNYqD2D0NOUbXSQaxOfYMJL31yFD/tvwCA7ttYIyoaOyeJAkwVxcjbsAj6nGSl45B2UnkHgFXxSM2kKQ3NESVg/U9J+HBLAkRJoiXQVoaKxo5JooiGvHTkbVwCU6XtbJJFrtS02Vlcqn2OnjGX345m4YW1R1BvMMFEiwSsBhWNnZIkCbWpR1Dw5TKI+hql45AO4rUREAUTsgro0tm1nM4oxVPv7ENJeT2tSLMSVDR2qvLINhT/8A4kgW4e2wNeG4k6vaB0DJuRX1qL+e/uQ3KmzuKX0SoqKvDCCy9g9OjR6N+/P+68807Exzv2w9BUNHZEkkRIkoiSneug2/MFALpObS80gdEoKqfN5tqitt6I59cexq5jWRZ93aeeegqnTp3C22+/je+++w4xMTGYNWsWLly4YNEc1oSKxk5
2023-12-29 18:23:11 +01:00
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"_ = dane['Smoking'].value_counts().plot(kind = 'pie')"
2023-12-29 18:23:11 +01:00
]
},
{
"cell_type": "code",
"execution_count": 404,
2023-12-29 18:23:11 +01:00
"id": "86122d04",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[Passive Smoker\n",
" 1 60\n",
" 2 284\n",
" 3 140\n",
" 4 161\n",
" 5 30\n",
" 6 30\n",
" 7 187\n",
" 8 108\n",
" dtype: int64]"
]
},
"execution_count": 404,
2023-12-29 18:23:11 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dane4 = [dane.groupby('Passive Smoker').size()]\n",
"dane4"
]
},
{
"cell_type": "code",
"execution_count": 405,
2023-12-29 18:23:11 +01:00
"id": "c78bbd4c",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZoAAAGFCAYAAADEhjUtAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABdP0lEQVR4nO3dd3hUVf4G8PfeOzOZ9EwmvZOQAmn03pEiAiKi7k+RtYusu7qiYkd3ZUHEsmIXBcVesWBbEBQU6Z1UCCG9ZybJ9Hvv749AlJ4yM2fK9/M8Po9O4M6bCHlz7jn3HE6WZRmEEEKIg/CsAxBCCPFsVDSEEEIcioqGEEKIQ1HREEIIcSgqGkIIIQ5FRUMIIcShqGgIIYQ4FBUNIYQQh6KiIYQQ4lBUNIQQQhyKioYQQohDUdEQQghxKCoaQgghDkVFQwghxKGoaAghhDgUFQ0hhBCHoqIhhBDiUFQ0hBBCHIqKhhBCiENR0RBCCHEoKhpCCCEORUVDCCHEoahoCCGEOBQVDSGEEIeioiGEEOJQVDSEEEIcioqGEEKIQ1HREEIIcSgqGkIIIQ5FRUMIIcShqGgIIYQ4FBUNIYQQh6KiIYQQ4lBUNIQQQhyKioYQQohDUdEQQghxKCoaQgghDkVFQwghxKGoaAghhDgUFQ0hhBCHoqIhhBDiUFQ0hBBCHIqKhhBCiENR0RBCCHEoKhpCCCEOpWAdgBB3IIoSJBngeUDg7fvzmSjJkCUZAMBxgCDQz3/Es1DREK8nyzJESYbAc+A4ruN1k8WG5hYz6pqMqG82okFvQnOLCQaTDSaLCJPZBqPFBrNFhNFsg+nkv8sywPMcOA7gufZr8nz7v/M8B7VKgQBfJQL8lPD3Vbb/u68K/n5KBKiVCA1WI0rrh9AgXygVf5SOKEmQZZyVkxBXR0VDvIYkyZBkGYo/jRgadSaUVutxoroF5bUtqKxvQ32zEU0tZhjNNoZp20c3wQE+iNT4ITzUFxEaP0Ro/BAb7o9eMcEIDvABcO7PixBXwsmyLLMOQYi9ybIMSCLA8+A4Hi0GCwpKm3CsQocTNe2lUlHbCpNFZB2124L8VUiMCkJSTBCSooOQEhuM+MhAqJRCxyiNyoe4Aioa4hFkSQIgg+MFyKIIS20JZBlQRSThlmU/oa7JyDqiU/AcEBsRiIxEDTKSQpGVokVMWAAAwCZKVDyECbp1RtyWLNrACQrIog2mE0dgPH4AprICmKuKIdss8EsbgqirFiHYX+U1RSPJQFlNC8pqWvC/HScAtI98MpO1yErWIjctHAmRgeA4DqIo0cID4hRUNMRtyLIMyBI4XoBo0KOtcAcMxbthLDkA2WI669ebKwsBAKP7xaK4XOfsuC5D32bBtoNV2HawCkB78QzMiMSQzEgM6hMJtUpBox3iUFQ0xKXJkghwPDiOg6XmONoKt8NQtBuW6hIAF77rK7Y2w9bSiKyUMOeEdRP6Ngs27S7Dpt1lUAgc+vbSYnDfSAzPjkFkqB8kSQbHgVa2EbuhORrictr/SMqALMNQvAeGwh0wFO+B2Nbc5WtFzF4IPnEgrnn0e7vn9EQxYf4YkRODiYPjERcRSLfXiF1Q0RCXIYsiOEGApbYU+r0b0Hp4CyRjS4+uGTxkBkInzsOcB9fDYpPslNQ7JEUHYfzAOEwYlICQQB+6vUa6jYqGMCVLYvuci6kNrQc2oeXAJlhqjtvt+j5x6Yj963+wfO0ubNlXYbfrehOeA7JSwjB+YBxG9YuFj1LoeCiVkM6goiFO1z6p3/7HznB0D1r2/wRD0W5Asv8DkpxChaT73sUv+6qw4r3ddr++t1EpeIzuH4vLx6SgV0wwjXJIp9BiAOI0siSB43mIbc3Qbf8arQd/7ta8S5fe02aBpfYEUhPCHfo+3sJik7BxZxk27ixDeqIGM0YlY1RuDMBxEGiEQ86DioY43KnbY9bGKjT/9hlaD29tf2rfSUzl+QjPiXPa+3mLgtImFJTuxqqvDmHK0ERMH9ULIYFqSJJMt9XIaejWGXGYU5P7prJ8NP32GYzFe3GxJcmOEJA1BhGX34Wbn/wRtV7y4CYLAs9hZG4M/m9yevuKNUmy+07XxD3RiIbY3amCMRzdjebfvoC5opBpHlNFEQBg7IA4fLKxiGkWTyZKMn7ZW4Et+yowPCsa107JQGJ0kMsWTnNzM5599lls3rwZra2tSE9Px8KFCzFo0CDW0TwOjWiI3cgnb4e1HNgM3e9fwtrgOqu8Ehe+g8PlBjz8ym+so3iVwX0jcd2UDKTEhbhc4dx0002oq6vD4sWLodVqsXbtWnz22Wf44osvkJyczDqeR6GiIT12agTTcugXNG16HzZ9HetIZ4n6yyOwRmRg7uM/so7ilQakR+C6KRlIS9S4xBxOaWkpJk+ejPfffx8DBw4E0L4acvLkyZg+fTruuusupvk8Dd06I912ahWZqbIQDf9bDUvVUdaRzstUXoCQpGzWMbzWnoJa7CmoxbCsKNwyMwsRoX4A2G1zo9Fo8PrrryM7+48/E9zJQ+r0ej2TTJ6MioZ02alBsE1Xi4YNa2Ao3Mk40cWZK4vACwrkpoZhf1E96zhe6/dD1diVV4NpI3rhuqkZ8FEpmCyLDgoKwtixY0977YcffkBpaSkeeughp+fxdFQ0pEtkSYJkMaLp5w+g3/OjU5cp94S5sn0RwMicGCoaxmyijK+2HMNPu8rwl0npmD66F2QZTB/83LNnDx588EFMnjwZ48aNY5bDU1HRkE6RJRGQZei2f42m3z6HbDawjtQlkqkN1qZq9EkKZR2FnNRqtGLVV4ew/rcS3DQjE8OyopnM32zYsAH33nsvBgwYgBUrVjj1vb2F6ywBIS5Jlts3ojQeP4iyV+5E46Z33a5kTjGdyEN0qC/rGOQMVfVtWLJ6Bx5/YxuaWkyQJOetT3r33Xfx97//HePHj8err74KHx8fp723N6GiIeclSyJkiwm1X61E9Qf/hk3neqvJusJUWQSVjwIBahrIu6Ld+bWYv2wjvt56DLIsQxQdu9v2+++/j3//+9+47rrr8Oyzz0KlUjn0/bwZLW8mZ5FlCRzHo61wJ+q/exViazPrSHahiuyFuFtW4KVP9+P7bcdZxyEXkBofgrv+0r/j2Gl7KykpwYwZMzBu3DgsXrz4tI+p1WoEBgba/T29GRUNOY0siZDMRtR/9xra8jzs4UaOR9L972NHfj2WrN7BOg25CIHncMW43rh2SgY4zr6LBV599VU899xz5/zYFVdcgWXLltntvQgVDTnp1DMxrYe3ov7HNyEZPPNZgph5S9ASkICbnvwf6yikk6LD/HH/3EFIjgsGT8dLuyWaoyHtoxhjC6o/WYbadc95bMkA7Ts5hwYoWccgXVBV34Z7X/gFn2wobJ+7ceJiAWIfVDQEhqN7UPbqP9ziwcueMlcUQVAqkRwTzDoK6QJRkvHu9/l46OVfoWs1O3yhALEvWn7jRNu3b8e8efPO+bG4uDhs3LjRaVnaN8Dk0LjpXeh+/9Jp78uaqbJ9J+nR/WJxrFLHOA3pqkPHGvC35T/h71f3w4icGMiyzGwbG9J5VDRO1L9/f2zduvW01/bt24e///3vWLBggdNytN8qa0XN5ytgOnHEae/rCsSWRtham5HVW8s6CummVqMVS9/eiclDE3D7FTkQeA4CHSft0qhonEilUiE8/I8jhQ0GA5YuXYorrrgCV155pVMyyLIMU3kBaj9/xuHHKLsqU3k+4hP6s45BeujH7SeQd7wJj940FBGhvi51BAE5Hf2fYejVV1+F0WjEokWLHP5ep57wb/7tC1S9u9hrSwYAzBWF8FMroFDQH393V1bTgruf24xdR2pZRyEXQH/TGGlsbMSaNWswf/58hISEOPS9Tj3hX/3Rf9C0+T1A9u6JVHNFIThewNC+kayjEDswmGxYsmY71n6XB1mWIdETGy6HioaR999/H4GBgbjmmmsc+j6yJMJSV4byNxbCULzboe/lLsxVRyFLEoZmRrOOQuxEloGPNxTiydU7YLWKtCrNxVDRMLJu3TrMmjULarXaYe8hyxKMJQdQ+fbDsOn
2023-12-29 18:23:11 +01:00
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"_ = dane['Passive Smoker'].value_counts().plot(kind = 'pie')\n"
2023-12-29 18:23:11 +01:00
]
},
{
"cell_type": "code",
"execution_count": 406,
2023-12-29 18:23:11 +01:00
"id": "6385071c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Smoking Gender\n",
"1 1 102\n",
" 2 79\n",
"2 1 102\n",
" 2 120\n",
"3 1 79\n",
" 2 93\n",
"4 1 49\n",
" 2 10\n",
"5 1 10\n",
"6 1 28\n",
" 2 32\n",
"7 1 167\n",
" 2 40\n",
"8 1 61\n",
" 2 28\n",
"dtype: int64"
]
},
"execution_count": 406,
2023-12-29 18:23:11 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dane.groupby(['Smoking','Gender']).size()"
]
},
{
"cell_type": "code",
"execution_count": 407,
2023-12-29 18:23:11 +01:00
"id": "af3dd196",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAioAAAHKCAYAAAAtnGCsAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABF7UlEQVR4nO3deXwTdeL/8XeSnlAKBaRVUEG0LWe5BQUWKqKrqIsoshwrCFpAbhCQQwRFUcp9yCmHlgVcEFdFRVl/XivnCrgCAlqFAm2h0Jbaiybz+4Nvs8a2QNuknbav5+PBAzqfmfm8M0ngzWSSWAzDMAQAAGBC1tIOAAAAUBCKCgAAMC2KCgAAMC2KCgAAMC2KCgAAMC2KCgAAMC2KCgAAMC2KCgAAMC2v0g7gDoZhyOEo+ufWWa2WYm3vDmQwVw4ykIEMZCCDZzNYrRZZLJZrrlcuiorDYejChd+KtK2Xl1VBQZWVmpqunByHm5ORoSzmIAMZyEAGMng+Q/XqlWWzXbuo8NIPAAAwLYoKAAAwLYoKAAAwLYoKAAAwLYoKAAAwrXLxrh8AAArD4XDIbs8pwnYWZWbalJ2dJbu9dN4eXBYy2Gxeslrdcy6EogIAqDAMw1Bq6gVlZKQVeR/nz1vlcJTexziUlQz+/gEKDKx+XZ+VcjUUFQBAhZFbUgICguTj41ukf0RtNkupnckoCxkMw1B2dpbS0i5KkqpWrVGseSgqAIAKweGwO0tKQEBgkffj5WUt1Q/GLAsZfHx8JUlpaRdVpUpQsV4G4mJaAECFYLfbJf3vH1F4Vu5xLsq1QL9HUQEAVCjFvWYC18ddx5mXfgAAFZ7VapHVen3/sNps7v0/vsNRvC/WLe8oKgCACs1qtahatUpuLyDXy253KDk5nbJSAIoKAKBCs1otstmsio7Zr7iESyU6d53gKhrXp6WsVkuZKiopKcn65psv9cADD3t8LooKAACS4hIu6afTKaUdo0xYsmSBzp49Q1EBAFQsV7tWJPelmau9RMP1HiXDMEruGFNUAACmcL3XigQG+hc4VlGu90hPT9eSJQv1//7fTqWnpyssrIGGDRut8PAG+u9/D2nFiqX68ccj8vLy0t13d9Szz45U1arVJEmPPfaQ/vznbho4MMq5v98v2779fa1bt1pPPjlQ69atVmJigurVq69Ro8apadNmmjnzRX300QeSpPbtW+nrr/d59LZSVAAAplDca0XK6vUeRTF58gSdPPmrJk16UbVr19H69W9q9OhnFR29UMOHR+nhh7trzJgJunAhSXPnvqbRo4dp5cp1stls17X/hIR4bdu2RVOnvqRKlSppzpxZmjnzRW3c+K5GjhynrKwsnTuXoJdfft3Dt5SiAgAwGa4VubqTJ3/Rt99+o7lzF6tNm7aSpLFjJ6pKlSrasGGd6te/Q6NHj5ck1a1bT9OmzdSAAb21Z8+3ateu/XXNkZOTo+eee1533BEmSerVq4+ef36ckpKSVLNmTfn6+srLy1s1atT0zI38HT7wDQCAMuSnn05Ikho1auxc5uvrq+HDx+jXX39RkyYRLuvfcUeoAgICnNtdr1tvref8c+XKAZKknJzLRY1dZBQVAADKEC+vgl8MKegiV8Mwrrpd7tcL/J6Pj89179+TKCoAAJQhuWc6jhw57FyWk5Ojxx57SKdOndShQwdc1j9+/Jh+++031a17myTJy8tb6em/Ocd/+y1NFy4kFSpDSX4NAdeoAACgKxfjloU5b7nlVnXqFKm5c1/TuHHPq2bNG/T222uVnZ2tN954U0OHDtS8ea+re/fHdeFCkubNe12hoWFq1aqNJKlx4ybaufNTdep0jwICqmj16mWy2QpXB/z9/XX+/DmdOXNaN91Uu9C3oTAoKgCACs3hMGS3OzSuT8tSmd9udxT6XUpTpkzTggXzNXXqBGVnX1bDho01d+5i3X77HZozZ5FWrnxDTz3VR5UqVVaHDp00ZMgw50s/UVHPKjU1RaNGDVVAQBX16tVXly6lFWr+P/+5m7788v+pX7+e2rRpm2rWvKFQ2xeGxSiNF5zczG536MKF3669Yj68vKwKCqqsixd/U06Ow83JyFAWc5CBDGQonQy5c4ya+/+K9K6f+rWrav6YTgVmvHw5W0lJZ1Wjxo3y9na9/qKwX0pot7vvGBTlQ+q8vKyl+nf19WS42vGWpOrVK1/X9ytxRgUAUOEVtiyUdkmoSIp1Me3y5cvVr18/l2WJiYkaM2aMWrVqpTvvvFNjx47VhQsXXNaJiYnRPffco6ZNm6p37946fPiwAAAA/qjIRSUmJkbz5893WZadna2nnnpKZ86c0fr167VixQodPXpUEyZMcK7z7rvv6vXXX9fIkSO1detW1alTRwMGDMhTZgAAAApdVBISEjR48GBFR0erbt26LmMffPCBTp8+rcWLF6thw4aKiIjQxIkTFRsbq7S0KxfqLFu2TH379tXDDz+s22+/Xa+88or8/f31zjvvuOUGAQCA8qPQ16j88MMP8vb21j//+U8tWbJEp0+fdo59/fXXatu2rWrW/N9H6nbo0EGfffaZJCkpKUm//PKL2rVr978AXl5q1aqV9u7dq6io/31BUqFviFfRTg5dz7dxehoZzJWDDGQgQ+lkcNe+C9qPw3FleXHeQ5L78SEWi1Rab0UpKxlyj7PNZi3yv9FSEYpKZGSkIiMj8x2LjY1Vq1attGTJEm3btk05OTlq3769nnvuOQUGBio+Pl6SdOONN7psV6tWLR09erQI8a+wWi0KCqpc5O2lq38bZ0khw/+YIQcZyEAGc2a4loIy2u1+Sko6K7s9W15exbsdpf2furKQISMjWzabVTVrBl73lyHmx63v+klLS9O2bdvUrl07zZkzRykpKXr11Vc1dOhQvfXWW8rIyJCU92N5fX19lZWVVeR5HQ5DqanpRdrWZrMqMNBfqakZbn27GRnKbg4ykIEMpZMhd47iulpGP7/KSkm5KLvdkI+Pb6E/YdVikfPbmUvzbIaZMxiGoezsLKWlJaty5QClpmbmu4/AQP+Sf3uyl5fX/30d9Bx5e3tLkqpWrarHH39c33//vfz8/CRduej297KysuTvX7wHZ3HfKma3O0r97WZkMFcOMpCBDObMcC1XyxgQECSHw1Ba2sUi799qtcrhKN1jUBYy+PsHKCAgqNiPF7cWlZCQEBmG4SwpknTHHXdIkuLi4nTnnXdKuvIW5vr16zvXSUxMVHBwsDujAACQh8ViUdWqNVSlSpDs9pxCb2+zWVS1aiWlpKTLbi+d0xllIYPN5iWr1T0vTbm1qLRu3Vrr169XZmam8+zJsWPHJEm33nqratSooXr16mn37t3OC2pzcnK0b98+9e7d251RAAAokNVqldWa99NSr8XLyyo/Pz9lZNhL9VOCK1IGt16J06tXL9lsNo0dO1bHjx/X/v37NWXKFN15551q1KiRJOmpp57SmjVr9O677+rEiROaNGmSMjMz9dhjj7kzCgAAKAfcekalevXqiomJ0auvvqrHH39cPj4+6tKliyZOnOhcp2fPnrp06ZLmz5+v5ORkNW7cWGvWrFH16tXdGQUAAJQDxSoqs2bNyrOsbt26Wr58+VW3GzhwoAYOHFicqQEAQAVQ+m/CBgAAKABFBQAAmBZFBQAAmBZFBQAAmBZFBQAAmBZFBQAAmBZFBQAAmBZFBQAAmBZFBQAAmBZFBQAAmBZFBQAAmBZFBQAAmBZFBQAAmBZFBQAAmBZFBQAAmBZFBQAAmBZFBQAAmBZFBQAAmBZFBQAAmBZFBQAAmBZFBQAAmBZFBQAAmBZFBQAAmBZFBQAAmBZFBQAAmBZFBQAAmBZFBQAAmBZFBQAAmBZFBQAAmBZFBQAAmFaxisry5cvVr1+/AsenTJmiyMhIl2UOh0MLFy5Uhw4d1KxZMz399NM6depUcWIAAIByqshFJSYmRvPnzy9w/LPPPtM777yTZ/nSpUu1YcMGvfTSS9q4caMcDocGDRqk7OzsokYBAADlVKGLSkJCggYPHqzo6GjVrVs333USExM1depUtWnTxmV5dna23nzzTY0YMUKdOnV
2023-12-29 18:23:11 +01:00
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"dane6 = dane.groupby(['Smoking','Gender'])\n",
"_ = dane6[['Smoking', 'Gender']].value_counts().plot(kind = 'bar')\n",
"_ = plt.legend()\n"
2023-12-29 18:23:11 +01:00
]
},
{
"cell_type": "code",
"execution_count": 408,
"id": "18002f3f",
2023-12-29 18:23:11 +01:00
"metadata": {},
2023-12-30 09:44:14 +01:00
"outputs": [
{
"data": {
"text/plain": [
"Smoking 3.0\n",
"dtype: float64"
2023-12-30 09:44:14 +01:00
]
},
"execution_count": 408,
2023-12-30 09:44:14 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dane[['Smoking']].median()"
2023-12-30 09:44:14 +01:00
]
},
{
"cell_type": "code",
"execution_count": 409,
"id": "f21f91ec",
2023-12-30 09:44:14 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Genetic Risk</th>\n",
" <th>Smoking</th>\n",
" <th>Alcohol use</th>\n",
" </tr>\n",
" <tr>\n",
" <th>index</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>725</th>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>59</th>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>727</th>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>940</th>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>819</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>537</th>\n",
" <td>7</td>\n",
" <td>8</td>\n",
" <td>8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>538</th>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>755</th>\n",
" <td>7</td>\n",
" <td>4</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>533</th>\n",
" <td>7</td>\n",
" <td>4</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>812</th>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1000 rows × 3 columns</p>\n",
"</div>"
],
"text/plain": [
" Genetic Risk Smoking Alcohol use\n",
"index \n",
"725 1 4 3\n",
"59 1 4 3\n",
"727 1 3 3\n",
"940 1 4 2\n",
"819 1 1 2\n",
"... ... ... ...\n",
"537 7 8 8\n",
"538 7 7 7\n",
"755 7 4 7\n",
"533 7 4 7\n",
"812 7 7 7\n",
"\n",
"[1000 rows x 3 columns]"
]
},
"execution_count": 409,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x = dane[['Genetic Risk', 'Smoking','Alcohol use']]\n",
"x.sort_values('Genetic Risk')\n"
]
},
{
"cell_type": "code",
"execution_count": 410,
"id": "15eebd5b",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Air Pollution\n",
"8 19\n",
"5 20\n",
"7 30\n",
"4 90\n",
"1 141\n",
"3 173\n",
"2 201\n",
"6 326\n",
"Name: count, dtype: int64"
]
},
"execution_count": 410,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dane7 = dane['Air Pollution'].value_counts()\n",
"dane7.sort_values()"
]
},
{
"cell_type": "code",
"execution_count": 411,
"id": "c0b501b8",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAioAAAGvCAYAAAB1pf5FAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAzDklEQVR4nO3deXQUZdr+8aurOytJIGwJIyIYIIiyhigZ2QYV8QVHkXd0FDjKsAqIEjZBGGQVNaCDioCAoJIDjCDjjKOi/AZFX3ZRGdkxAjImAQIJSxbTnd8fnvTYIkovpJ+kv59zcqSr6um6704wF1VPVdnKysrKBAAAYCAr2AUAAABcCkEFAAAYi6ACAACMRVABAADGIqgAAABjEVQAAICxCCoAAMBYBBUAAGAsR7ALCISysjK5XMG5b51l2YK272Ci79BC36GFvkNLsPq2LJtsNtuvblclgorLVaa8vPMVvl+Hw1J8fDUVFFxQaamrwvcfLPRN36GAvuk7FASz75o1q8lu//WgwqkfAABgLIIKAAAwFkEFAAAYi6ACAACMRVABAADGqhJX/QAA4A2XyyWns/RHr20qKrKrpKRYTmfoXKJ8pfq22x2yrMAcCyGoAABCRllZmQoK8lRYeO6idSdPWnK5QufS5HJXqu+oqBjFxdW8rHul/BKCCgAgZJSHlJiYeIWHR3j8ErXbbSF1NKVcoPsuKytTSUmxzp07LUmqXr2WX+9HUAEAhASXy+kOKTExcRetdziskLrZW7kr0Xd4eIQk6dy504qNjffrNBCTaQEAIcHpdEr67y9RXFnln/OP5wL5gqACAAgp/s6ZwOUJ1OfMqR8AQMizLJvs9or/t7vLFbyH6lYWBBUAQEizLJtq1IgOSlBxOl06c+YCYeUXEFQAACGt/GhKxoqd+jbnbIXtt35CrMb0SZFl2SpdUMnPP6NNmzaqZ8+7r/i+CCoAAEj6NuesDh/PD3YZlcJLL/1F//nPcYJKRbAsmyzLtwk/5YcJfT1cyLlJAEBlVFZWcb+7QjqoBOq8ZFxclE/jODcJAPDFhQsXtHDhi9q4cYMuXLig5OTrNGLEKDVrdp3+/e8vtWjRfO3fv1cOh0M339xJw4c/qurVa0iS/vd/79Qdd/TUgAFD3O/342X//OfftXz5Ej344AAtX75Eubk5atQoSY89NkYtW7bWzJlP6t13/yFJ6tChnT75ZMcV7TXkg0owzktKlfvcJAAguP7858d17NhRTZz4pK66qr5ee22pRo0aroyMeXrkkSH6/e97KT19vPLyTmnu3Kc1atQIvfLKctnt9st6/5ycbK1bt0aTJ09XdHS05syZrZkzn9TKlW/p0UfHqLi4WLm5OZo585kr3GmIB5VynJcEAFQWR49+oy1b/k9z576oG29sL0kaPfpxxcbGKjNzuZKSmmjUqHGSpIYNG2nKlJnq3/8Bbdu2WWlpHS5rH6WlpRo7doKaNEmWJP3xj300YcIYnTp1SrVr11ZERIQcDodq1ap9ZZr8EW74BgBAJXL48CFJ0vXX3+BeFhERoUceSdeRI9+oRYtWHts3adJUMTEx7nGX65prGrn/XK1ajCSptPR7X8v2GUEFAIBKxOG49MmQS01yLSsr+8Vx5Y8X+LHw8PDLfv8riaACAEAlUn6kY+/ePe5lpaWl+t//vVPHjh3Vl19+7rH9wYMHdP78eTVseK0kyeEI04UL593rz58/p7y8U17VUJGPIWCOCgAA+uEih8qwvwYNrlHnzr/T3LlPa8yYCapdu47eeGOZSkpK9PLLSzVs2AA999wz6tXrD8rLO6XnnntGTZsmq127GyVJN9zQQhs2fKAuXW5RTEysli5dKLvduzgQFRWlkydP6j//Oa7f/OYqn/q4XAQVAEBIc7nK5HS6NKZPSoXv2+l0+XTl54QJU/TSS3/R5MnjVVLyvZo3v0Fz576oxo2baM6cF/TKKy/rT3/qo+joaurYsYsefniE+9TPkCHDVVCQr8ceG6aYmFg98EA/FRR4d+XrHXf01Mcfb1S/fvdq1ap1ql27jtc9XC5bWTBOOAWY0+lSXt75X9/wJxwOS/Hx1fTY3I0VftVP0lXV9Xx6F50+fV6lpa4K3be/yj+3yli7P+ibvkNBVe77++9LdOrUd6pVq57CwjznX1iWTWFhdjmdFduzCTf+dDisK/K9/qXPW5Jq1qx2Wfcx44gKACDklR9VqWrhrCpgMi0AADAWQQUAABiLoAIAAIxFUAEAhJQqcA1JpRCoz5mgAgAICeUP5CspKQ5yJaGh/HP29h4tP8VVPwCAkGBZdkVFxejcudOSpPDwCI87rLpcNjmdoXe0JdB9l5WVqaSkWOfOnVZUVIwsy79jIgQVAEDIiIurKUnusPJjlmXJ5Qq9y5OvVN9RUTHuz9sfBBUAQMiw2WyqXr2WYmPj5XSWupfb7TZVrx6t/PwLIXVU5Ur1bbc7/D6SUo6gAgAIOZZlybL+e7dUh8NSZGSkCgudIXXTt8rQN5NpAQCAsQgqAADAWAQVAABgLIIKAAAwltdB5dSpUxo7dqzat2+vNm3aaPDgwTp8+LB7/d69e9W3b1+1bt1aXbt21WuvveYx3uVyad68eerYsaNat26tQYMG6dixY/53AgAAqhyvg8rw4cN15MgRLVq0SG+++aYiIyP10EMPqbCwUKdPn1b//v3VoEEDrVmzRsOHD1dGRobWrFnjHj9//nxlZmZq+vTpWrlypVwulwYOHKiSkpKANgYAACo/ry5Pzs/P11VXXaUhQ4aoadOmkqRhw4bprrvu0sGDB7V582aFhYVp2rRpcjgcSkpKcoea3r17q6SkREuXLtWYMWPUpUsXSdJzzz2njh07av369erZs2fAGwQAAJWXV0dUqlevrjlz5rhDSl5enpYtW6bExEQ1btxYO3bs0I033iiH47/5p3379vrmm2908uRJ7du3T+fPn1daWpp7fVxcnJo3b67t27cHqCUAAFBV+HzDt8mTJ2v16tUKDw/Xyy+/rOjoaGVnZ7tDTLm6detKkr777jtlZ2dLkurVq3fRNuXrfOVweD8v2G4P/lxiE2rwVnnNlbF2f9A3fYcC+qZv0/gcVB588EHdd999WrFihYYPH67MzEwVFRUpPDzcY7uIiAhJUnFxsQoLCyXpZ7fJz8/3tRRZlk3x8dV8Hh9McXFRwS7BZ5W5dn/Qd2ih79BC3+bxOag0btxYkjRz5kx98cUXeuONNxQZGXnRpNji4h8e8xwdHa3IyEhJUklJifvP5dtERfn+IblcZSoouOD1OLvdCvo3p6CgUE6nmbctvpTyz60y1u4P+qbvUEDf9F1R4uKiLutIjldBJS8vT5s3b9btt9/unodiWZYaN26s3NxcJSYmKjc312NM+euEhASVlpa6lzVo0MBjm+TkZG9KuYipzyj4NU6ni9orGfoOLfQdWujbPF6dlDp58qTS09O1efNm97Lvv/9ee/bsUVJSklJTU7Vz5045nU73+i1btqhRo0aqVauWmjVrppiYGG3dutW9vqCgQHv27FFqamoA2gEAAFWJV0GladOm6tSpk2bMmKHt27frwIEDevzxx1VQUKCHHnpIvXv31rlz5/TEE0/o0KFDWrt2rZYtW6YhQ4ZI+mFuSt++fZWRkaENGzZo3759GjVqlBITE9WtW7cr0iAAAKi8vJ6jMnfuXM2ZM0ejRo3S2bNn1a5dO61YsUK/+c1vJEmLFy/WzJkz1atXL9WpU0fjxo1Tr1693ONHjhyp0tJSTZo0SUVFRUpNTdWSJUsUFhYWuK4AAECVYCsrKysLdhH+cjpdyss77/U4h8NSfHw1PTZ3ow4f9/2qI18kXVVdz6d30enT5409L3gp5Z9bZazdH/RN36GAvum7otSsWe2yJtOae+E0AAAIeQQVAABgLIIKAAAwFkEFAAAYi6ACAACMRVABAADGIqgAAABjEVQAAICxCCoAAMBYBBUAAGAsggoAADAWQQUAABiLoAIAAIxFUAEAAMYiqAAAAGMRVAAAgLEIKgAAwFgEFQAAYCyCCgAAMBZBBQAAGIugAgAAjEVQAQAAxiKoAAA
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"_ = dane7 = dane['Air Pollution'].value_counts().plot(kind = 'bar')\n",
"_ = plt.legend()"
]
},
{
"cell_type": "code",
"execution_count": 412,
"id": "00915ec0",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import sklearn "
]
},
{
"cell_type": "code",
"execution_count": 413,
"id": "5024be32",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Genetic Risk\n",
"4 40\n",
"1 40\n",
"5 100\n",
"6 108\n",
"3 173\n",
"2 212\n",
"7 327\n",
"Name: count, dtype: int64"
]
},
"execution_count": 413,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"_ = dane8 = dane['Genetic Risk'].value_counts()\n",
"dane8.sort_values()\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 414,
"id": "8388706b",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Patient Id</th>\n",
" <th>Age</th>\n",
" <th>Gender</th>\n",
" <th>Air Pollution</th>\n",
" <th>Alcohol use</th>\n",
" <th>Dust Allergy</th>\n",
" <th>OccuPational Hazards</th>\n",
" <th>Genetic Risk</th>\n",
" <th>chronic Lung Disease</th>\n",
" <th>Balanced Diet</th>\n",
" <th>...</th>\n",
" <th>Fatigue</th>\n",
" <th>Weight Loss</th>\n",
" <th>Shortness of Breath</th>\n",
" <th>Wheezing</th>\n",
" <th>Swallowing Difficulty</th>\n",
" <th>Clubbing of Finger Nails</th>\n",
" <th>Frequent Cold</th>\n",
" <th>Dry Cough</th>\n",
" <th>Snoring</th>\n",
" <th>Level</th>\n",
" </tr>\n",
" <tr>\n",
" <th>index</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>P1</td>\n",
" <td>33</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>...</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>Low</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>P10</td>\n",
" <td>17</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>5</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>7</td>\n",
" <td>8</td>\n",
" <td>6</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>7</td>\n",
" <td>2</td>\n",
" <td>Medium</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>P100</td>\n",
" <td>35</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>4</td>\n",
" <td>6</td>\n",
" <td>...</td>\n",
" <td>8</td>\n",
" <td>7</td>\n",
" <td>9</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>2</td>\n",
" <td>High</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>P1000</td>\n",
" <td>37</td>\n",
" <td>1</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>...</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>5</td>\n",
" <td>High</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>P101</td>\n",
" <td>46</td>\n",
" <td>1</td>\n",
" <td>6</td>\n",
" <td>8</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>...</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>High</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>995</th>\n",
" <td>P995</td>\n",
" <td>44</td>\n",
" <td>1</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>...</td>\n",
" <td>5</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>7</td>\n",
" <td>8</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>3</td>\n",
" <td>High</td>\n",
" </tr>\n",
" <tr>\n",
" <th>996</th>\n",
" <td>P996</td>\n",
" <td>37</td>\n",
" <td>2</td>\n",
" <td>6</td>\n",
" <td>8</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>...</td>\n",
" <td>9</td>\n",
" <td>6</td>\n",
" <td>5</td>\n",
" <td>7</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>High</td>\n",
" </tr>\n",
" <tr>\n",
" <th>997</th>\n",
" <td>P997</td>\n",
" <td>25</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>4</td>\n",
" <td>6</td>\n",
" <td>...</td>\n",
" <td>8</td>\n",
" <td>7</td>\n",
" <td>9</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>2</td>\n",
" <td>High</td>\n",
" </tr>\n",
" <tr>\n",
" <th>998</th>\n",
" <td>P998</td>\n",
" <td>18</td>\n",
" <td>2</td>\n",
" <td>6</td>\n",
" <td>8</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>...</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>High</td>\n",
" </tr>\n",
" <tr>\n",
" <th>999</th>\n",
" <td>P999</td>\n",
" <td>47</td>\n",
" <td>1</td>\n",
" <td>6</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>4</td>\n",
" <td>6</td>\n",
" <td>...</td>\n",
" <td>8</td>\n",
" <td>7</td>\n",
" <td>9</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>2</td>\n",
" <td>High</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1000 rows × 25 columns</p>\n",
"</div>"
],
"text/plain": [
" Patient Id Age Gender Air Pollution Alcohol use Dust Allergy \\\n",
"index \n",
"0 P1 33 1 2 4 5 \n",
"1 P10 17 1 3 1 5 \n",
"2 P100 35 1 4 5 6 \n",
"3 P1000 37 1 7 7 7 \n",
"4 P101 46 1 6 8 7 \n",
"... ... ... ... ... ... ... \n",
"995 P995 44 1 6 7 7 \n",
"996 P996 37 2 6 8 7 \n",
"997 P997 25 2 4 5 6 \n",
"998 P998 18 2 6 8 7 \n",
"999 P999 47 1 6 5 6 \n",
"\n",
" OccuPational Hazards Genetic Risk chronic Lung Disease \\\n",
"index \n",
"0 4 3 2 \n",
"1 3 4 2 \n",
"2 5 5 4 \n",
"3 7 6 7 \n",
"4 7 7 6 \n",
"... ... ... ... \n",
"995 7 7 6 \n",
"996 7 7 6 \n",
"997 5 5 4 \n",
"998 7 7 6 \n",
"999 5 5 4 \n",
"\n",
" Balanced Diet ... Fatigue Weight Loss Shortness of Breath \\\n",
"index ... \n",
"0 2 ... 3 4 2 \n",
"1 2 ... 1 3 7 \n",
"2 6 ... 8 7 9 \n",
"3 7 ... 4 2 3 \n",
"4 7 ... 3 2 4 \n",
"... ... ... ... ... ... \n",
"995 7 ... 5 3 2 \n",
"996 7 ... 9 6 5 \n",
"997 6 ... 8 7 9 \n",
"998 7 ... 3 2 4 \n",
"999 6 ... 8 7 9 \n",
"\n",
" Wheezing Swallowing Difficulty Clubbing of Finger Nails \\\n",
"index \n",
"0 2 3 1 \n",
"1 8 6 2 \n",
"2 2 1 4 \n",
"3 1 4 5 \n",
"4 1 4 2 \n",
"... ... ... ... \n",
"995 7 8 2 \n",
"996 7 2 4 \n",
"997 2 1 4 \n",
"998 1 4 2 \n",
"999 2 1 4 \n",
"\n",
" Frequent Cold Dry Cough Snoring Level \n",
"index \n",
"0 2 3 4 Low \n",
"1 1 7 2 Medium \n",
"2 6 7 2 High \n",
"3 6 7 5 High \n",
"4 4 2 3 High \n",
"... ... ... ... ... \n",
"995 4 5 3 High \n",
"996 3 1 4 High \n",
"997 6 7 2 High \n",
"998 4 2 3 High \n",
"999 6 7 2 High \n",
"\n",
"[1000 rows x 25 columns]"
]
},
"execution_count": 414,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dane"
]
},
{
"cell_type": "code",
"execution_count": 415,
"id": "f2c57644",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Level\n",
"High 365\n",
"Medium 332\n",
"Low 303\n",
"Name: count, dtype: int64"
]
},
"execution_count": 415,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dane['Level'].value_counts()\n"
]
},
{
"cell_type": "code",
"execution_count": 416,
"id": "f02e1f34",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Patient Id</th>\n",
" <th>Age</th>\n",
" <th>Gender</th>\n",
" <th>Air Pollution</th>\n",
" <th>Alcohol use</th>\n",
" <th>Dust Allergy</th>\n",
" <th>OccuPational Hazards</th>\n",
" <th>Genetic Risk</th>\n",
" <th>chronic Lung Disease</th>\n",
" <th>Balanced Diet</th>\n",
" <th>...</th>\n",
" <th>Fatigue</th>\n",
" <th>Weight Loss</th>\n",
" <th>Shortness of Breath</th>\n",
" <th>Wheezing</th>\n",
" <th>Swallowing Difficulty</th>\n",
" <th>Clubbing of Finger Nails</th>\n",
" <th>Frequent Cold</th>\n",
" <th>Dry Cough</th>\n",
" <th>Snoring</th>\n",
" <th>Level</th>\n",
" </tr>\n",
" <tr>\n",
" <th>index</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>P1</td>\n",
" <td>33</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>...</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>P10</td>\n",
" <td>17</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>5</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>7</td>\n",
" <td>8</td>\n",
" <td>6</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>7</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>P100</td>\n",
" <td>35</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>4</td>\n",
" <td>6</td>\n",
" <td>...</td>\n",
" <td>8</td>\n",
" <td>7</td>\n",
" <td>9</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>P1000</td>\n",
" <td>37</td>\n",
" <td>1</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>...</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>5</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>P101</td>\n",
" <td>46</td>\n",
" <td>1</td>\n",
" <td>6</td>\n",
" <td>8</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>...</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 25 columns</p>\n",
"</div>"
],
"text/plain": [
" Patient Id Age Gender Air Pollution Alcohol use Dust Allergy \\\n",
"index \n",
"0 P1 33 1 2 4 5 \n",
"1 P10 17 1 3 1 5 \n",
"2 P100 35 1 4 5 6 \n",
"3 P1000 37 1 7 7 7 \n",
"4 P101 46 1 6 8 7 \n",
"\n",
" OccuPational Hazards Genetic Risk chronic Lung Disease \\\n",
"index \n",
"0 4 3 2 \n",
"1 3 4 2 \n",
"2 5 5 4 \n",
"3 7 6 7 \n",
"4 7 7 6 \n",
"\n",
" Balanced Diet ... Fatigue Weight Loss Shortness of Breath \\\n",
"index ... \n",
"0 2 ... 3 4 2 \n",
"1 2 ... 1 3 7 \n",
"2 6 ... 8 7 9 \n",
"3 7 ... 4 2 3 \n",
"4 7 ... 3 2 4 \n",
"\n",
" Wheezing Swallowing Difficulty Clubbing of Finger Nails \\\n",
"index \n",
"0 2 3 1 \n",
"1 8 6 2 \n",
"2 2 1 4 \n",
"3 1 4 5 \n",
"4 1 4 2 \n",
"\n",
" Frequent Cold Dry Cough Snoring Level \n",
"index \n",
"0 2 3 4 1 \n",
"1 1 7 2 2 \n",
"2 6 7 2 3 \n",
"3 6 7 5 3 \n",
"4 4 2 3 3 \n",
"\n",
"[5 rows x 25 columns]"
]
},
"execution_count": 416,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = dane.replace({'Level':{'High' : 3, 'Medium' : 2, 'Low' : 1}})\n",
"data.head()"
]
},
{
"cell_type": "code",
"execution_count": 417,
"id": "52632684",
"metadata": {},
"outputs": [],
"source": [
"import sklearn"
]
},
{
"cell_type": "code",
"execution_count": 418,
"id": "a47f580a",
"metadata": {},
"outputs": [],
"source": [
"np.random.seed(10)\n",
"np.set_printoptions(precision=6, suppress=True)\n"
]
},
{
"cell_type": "code",
"execution_count": 419,
"id": "7caae544",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Y shape: (1000,)\n",
"X shape: (1000, 23)\n"
]
}
],
"source": [
"X = data.drop(['Level', 'Patient Id'], axis=1)\n",
"y = data['Level']\n",
"\n",
"\n",
"print(\"Y shape:\", y.shape)\n",
"print(\"X shape:\", X.shape)"
]
},
{
"cell_type": "code",
"execution_count": 420,
"id": "9139408a",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split\n",
"X_train, X_test, y_train, y_test = train_test_split (X, y)\n",
"\n",
"\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 421,
"id": "2f45152a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"X_train shape: (750, 23)\n",
"y_train shape: (750,)\n",
"X_test shape: (250, 23)\n",
"y_test shape: (250,)\n"
]
}
],
"source": [
"print(\"X_train shape:\", X_train.shape)\n",
"print(\"y_train shape:\", y_train.shape)\n",
"print(\"X_test shape:\", X_test.shape)\n",
"print(\"y_test shape:\", y_test.shape)"
]
},
{
"cell_type": "code",
"execution_count": 422,
"id": "8ba2674d",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"c:\\Users\\HP\\anaconda3\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:460: ConvergenceWarning:\n",
"\n",
"lbfgs failed to converge (status=1):\n",
"STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
"\n",
"Increase the number of iterations (max_iter) or scale the data as shown in:\n",
" https://scikit-learn.org/stable/modules/preprocessing.html\n",
"Please also refer to the documentation for alternative solver options:\n",
" https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
"\n"
]
},
{
"data": {
"text/html": [
"<style>#sk-container-id-3 {color: black;}#sk-container-id-3 pre{padding: 0;}#sk-container-id-3 div.sk-toggleable {background-color: white;}#sk-container-id-3 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-3 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-3 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-3 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-3 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-3 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-3 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-3 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-3 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-3 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-3 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-3 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-3 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-3 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-3 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-3 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-3 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-3 div.sk-item {position: relative;z-index: 1;}#sk-container-id-3 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-3 div.sk-item::before, #sk-container-id-3 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-3 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-3 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-3 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-3 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-3 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-3 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-3 div.sk-label-container {text-align: center;}#sk-container-id-3 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-3 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-3\" class=\"sk-top-container\"><div class=\"sk-text-r
],
"text/plain": [
"LogisticRegression()"
]
},
"execution_count": 422,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.linear_model import LogisticRegression\n",
"classifier = LogisticRegression()\n",
"classifier.fit(X_train, y_train)"
]
},
{
"cell_type": "code",
"execution_count": 423,
"id": "ba0a5bda",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[0.019763, 0.980237, 0. ],\n",
" [0. , 0. , 1. ],\n",
" [0. , 0.000002, 0.999998],\n",
" [0.999979, 0.000021, 0. ],\n",
" [0. , 0.000038, 0.999962],\n",
" [0.000022, 0.983401, 0.016577],\n",
" [0. , 0.023981, 0.976019],\n",
" [0.025065, 0.943631, 0.031305],\n",
" [0. , 0.011278, 0.988722],\n",
" [0.077079, 0.922921, 0. ],\n",
" [0.000003, 0.000326, 0.999672],\n",
" [0.000473, 0.999527, 0. ],\n",
" [0.16753 , 0.83247 , 0. ],\n",
" [0.995731, 0.004269, 0. ],\n",
" [0.949387, 0.050613, 0. ],\n",
" [0.21037 , 0.78963 , 0. ],\n",
" [0. , 0. , 1. ],\n",
" [0.91181 , 0.045917, 0.042272],\n",
" [0. , 0.002178, 0.997822],\n",
" [0.984437, 0.015558, 0.000005],\n",
" [0. , 0.002066, 0.997934],\n",
" [0.99922 , 0.00078 , 0. ],\n",
" [0. , 0.000298, 0.999702],\n",
" [0. , 0.004236, 0.995764],\n",
" [0. , 0.000004, 0.999996],\n",
" [0.238042, 0.760375, 0.001583],\n",
" [0.025065, 0.943631, 0.031305],\n",
" [0.99808 , 0.001917, 0.000003],\n",
" [0.997777, 0.002223, 0. ],\n",
" [0.238042, 0.760375, 0.001583],\n",
" [0.913746, 0.086254, 0. ],\n",
" [0.002141, 0.997859, 0. ],\n",
" [0. , 0.004236, 0.995764],\n",
" [0.99922 , 0.00078 , 0. ],\n",
" [0.00192 , 0.99808 , 0. ],\n",
" [0. , 0.012453, 0.987547],\n",
" [0.00145 , 0.99855 , 0. ],\n",
" [0. , 0. , 1. ],\n",
" [0.00248 , 0.99752 , 0. ],\n",
" [0.998712, 0.000937, 0.000351],\n",
" [0.238042, 0.760375, 0.001583],\n",
" [0.001213, 0.97811 , 0.020677],\n",
" [0. , 0. , 1. ],\n",
" [0.015839, 0.984161, 0. ],\n",
" [0. , 0.023981, 0.976019],\n",
" [0.000003, 0.000326, 0.999672],\n",
" [0.930462, 0.069418, 0.000121],\n",
" [0. , 0.002178, 0.997822],\n",
" [0. , 0.000003, 0.999997],\n",
" [0.001321, 0.998655, 0.000024],\n",
" [0.001178, 0.998807, 0.000014],\n",
" [0.00035 , 0.99965 , 0. ],\n",
" [0.05124 , 0.948155, 0.000606],\n",
" [0. , 0. , 1. ],\n",
" [0.001321, 0.998655, 0.000024],\n",
" [0. , 0. , 1. ],\n",
" [0. , 0.004285, 0.995715],\n",
" [0.000004, 0.02071 , 0.979285],\n",
" [0.969283, 0.030717, 0. ],\n",
" [0.000063, 0.040843, 0.959093],\n",
" [0. , 0.002066, 0.997934],\n",
" [0.942577, 0.018654, 0.038768],\n",
" [0. , 0.000002, 0.999998],\n",
" [0.870813, 0.129142, 0.000044],\n",
" [0. , 0. , 1. ],\n",
" [0.913746, 0.086254, 0. ],\n",
" [0.000637, 0.999363, 0. ],\n",
" [0. , 0.000003, 0.999997],\n",
" [0. , 0.000298, 0.999702],\n",
" [0.029989, 0.970011, 0. ],\n",
" [0.994782, 0.000846, 0.004371],\n",
" [0.999889, 0.000075, 0.000035],\n",
" [0.947557, 0.052443, 0. ],\n",
" [0.029989, 0.970011, 0. ],\n",
" [0.129826, 0.870174, 0. ],\n",
" [0.055579, 0.94417 , 0.000251],\n",
" [0.002141, 0.997859, 0. ],\n",
" [0.001398, 0.9986 , 0.000002],\n",
" [0. , 0.000062, 0.999938],\n",
" [0.99808 , 0.001917, 0.000003],\n",
" [0. , 0.000004, 0.999996],\n",
" [0.969283, 0.030717, 0. ],\n",
" [0. , 0.004236, 0.995764],\n",
" [0.001178, 0.998807, 0.000014],\n",
" [0. , 0. , 1. ],\n",
" [0. , 0.014023, 0.985977],\n",
" [0. , 0. , 1. ],\n",
" [0.033577, 0.966423, 0. ],\n",
" [0.99808 , 0.001917, 0.000003],\n",
" [0. , 0.000062, 0.999938],\n",
" [0.099592, 0.900408, 0. ],\n",
" [0.000177, 0.999787, 0.000036],\n",
" [0.997699, 0.002301, 0. ],\n",
" [0.930462, 0.069418, 0.000121],\n",
" [0.00248 , 0.99752 , 0. ],\n",
" [0.00248 , 0.99752 , 0. ],\n",
" [0.238042, 0.760375, 0.001583],\n",
" [0.101768, 0.898232, 0. ],\n",
" [0.029989, 0.970011, 0. ],\n",
" [0.98255 , 0.01745 , 0. ],\n",
" [0. , 0.004285, 0.995715],\n",
" [0.000332, 0.999668, 0. ],\n",
" [0.999328, 0.000672, 0. ],\n",
" [0. , 0.023981, 0.976019],\n",
" [0.997777, 0.002223, 0. ],\n",
" [0.949387, 0.050613, 0. ],\n",
" [0.930462, 0.069418, 0.000121],\n",
" [0.00028 , 0.99972 , 0. ],\n",
" [0. , 0.004236, 0.995764],\n",
" [0. , 0. , 1. ],\n",
" [0.998712, 0.000937, 0.000351],\n",
" [0. , 0.000038, 0.999962],\n",
" [0.000898, 0.99788 , 0.001222],\n",
" [0.91181 , 0.045917, 0.042272],\n",
" [0.984437, 0.015558, 0.000005],\n",
" [0.000003, 0.000326, 0.999672],\n",
" [0.099592, 0.900408, 0. ],\n",
" [0.033577, 0.966423, 0. ],\n",
" [0.969283, 0.030717, 0. ],\n",
" [0.999328, 0.000672, 0. ],\n",
" [0. , 0. , 1. ],\n",
" [0. , 0.008068, 0.991932],\n",
" [0. , 0.000298, 0.999702],\n",
" [0.003572, 0.996428, 0. ],\n",
" [0.997777, 0.002223, 0. ],\n",
" [0. , 0.000002, 0.999998],\n",
" [0.000063, 0.040843, 0.959093],\n",
" [0.947557, 0.052443, 0. ],\n",
" [0.907855, 0.092061, 0.000084],\n",
" [0.00145 , 0.99855 , 0. ],\n",
" [0.101768, 0.898232, 0. ],\n",
" [0. , 0.009143, 0.990857],\n",
" [0.861344, 0.134685, 0.00397 ],\n",
" [0. , 0.004236, 0.995764],\n",
" [0.990975, 0.009025, 0. ],\n",
" [0.870813, 0.129142, 0.000044],\n",
" [0.995587, 0.000049, 0.004364],\n",
" [0. , 0.000298, 0.999702],\n",
" [0.942577, 0.018654, 0.038768],\n",
" [0. , 0.000005, 0.999995],\n",
" [0.000898, 0.99788 , 0.001222],\n",
" [0.99808 , 0.001917, 0.000003],\n",
" [0.099592, 0.900408, 0. ],\n",
" [0.099592, 0.900408, 0. ],\n",
" [0.999979, 0.000021, 0. ],\n",
" [0.995587, 0.000049, 0.004364],\n",
" [0.001398, 0.9986 , 0.000002],\n",
" [0.000003, 0.000326, 0.999672],\n",
" [0.999889, 0.000075, 0.000035],\n",
" [0.861344, 0.134685, 0.00397 ],\n",
" [0.000003, 0.000326, 0.999672],\n",
" [0.969283, 0.030717, 0. ],\n",
" [0.238042, 0.760375, 0.001583],\n",
" [0.000977, 0.999023, 0. ],\n",
" [0.002141, 0.997859, 0. ],\n",
" [0.000005, 0.000077, 0.999918],\n",
" [0.997699, 0.002301, 0. ],\n",
" [0. , 0.004236, 0.995764],\n",
" [0. , 0. , 1. ],\n",
" [0.999425, 0.000575, 0. ],\n",
" [0.974593, 0.025407, 0. ],\n",
" [0.002453, 0.997528, 0.000019],\n",
" [0.84426 , 0.15574 , 0. ],\n",
" [0.000898, 0.99788 , 0.001222],\n",
" [0.949387, 0.050613, 0. ],\n",
" [0. , 0.000032, 0.999968],\n",
" [0.000332, 0.999668, 0. ],\n",
" [0.99808 , 0.001917, 0.000003],\n",
" [0.05124 , 0.948155, 0.000606],\n",
" [0.999889, 0.000075, 0.000035],\n",
" [0. , 0. , 1. ],\n",
" [0.099592, 0.900408, 0. ],\n",
" [0. , 0.000298, 0.999702],\n",
" [0.99808 , 0.001917, 0.000003],\n",
" [0. , 0.000298, 0.999702],\n",
" [0.598003, 0.391491, 0.010506],\n",
" [0.002141, 0.997859, 0. ],\n",
" [0.001321, 0.998655, 0.000024],\n",
" [0.129826, 0.870174, 0. ],\n",
" [0.033577, 0.966423, 0. ],\n",
" [0.101768, 0.898232, 0. ],\n",
" [0.999979, 0.000021, 0. ],\n",
" [0.870813, 0.129142, 0.000044],\n",
" [0.002453, 0.997528, 0.000019],\n",
" [0.129826, 0.870174, 0. ],\n",
" [0. , 0.000032, 0.999968],\n",
" [0. , 0.004285, 0.995715],\n",
" [0. , 0.000004, 0.999996],\n",
" [0. , 0.000345, 0.999655],\n",
" [0.033577, 0.966423, 0. ],\n",
" [0.055579, 0.94417 , 0.000251],\n",
" [0.99922 , 0.00078 , 0. ],\n",
" [0.997699, 0.002301, 0. ],\n",
" [0. , 0.014023, 0.985977],\n",
" [0. , 0.008068, 0.991932],\n",
" [0.998712, 0.000937, 0.000351],\n",
" [0.000637, 0.999363, 0. ],\n",
" [0.001321, 0.998655, 0.000024],\n",
" [0.930462, 0.069418, 0.000121],\n",
" [0.001398, 0.9986 , 0.000002],\n",
" [0.002453, 0.997528, 0.000019],\n",
" [0.05124 , 0.948155, 0.000606],\n",
" [0. , 0. , 1. ],\n",
" [0.001111, 0.998889, 0. ],\n",
" [0. , 0.000203, 0.999797],\n",
" [0.000063, 0.040843, 0.959093],\n",
" [0.930462, 0.069418, 0.000121],\n",
" [0.019763, 0.980237, 0. ],\n",
" [0.033577, 0.966423, 0. ],\n",
" [0.659404, 0.319765, 0.020831],\n",
" [0. , 0. , 1. ],\n",
" [0.999979, 0.000021, 0. ],\n",
" [0. , 0.002178, 0.997822],\n",
" [0. , 0.000085, 0.999915],\n",
" [0.974593, 0.025407, 0. ],\n",
" [0.98255 , 0.01745 , 0. ],\n",
" [0.000308, 0.999692, 0. ],\n",
" [0. , 0. , 1. ],\n",
" [0.974593, 0.025407, 0. ],\n",
" [0.947557, 0.052443, 0. ],\n",
" [0.913746, 0.086254, 0. ],\n",
" [0.015839, 0.984161, 0. ],\n",
" [0.974593, 0.025407, 0. ],\n",
" [0. , 0. , 1. ],\n",
" [0.129826, 0.870174, 0. ],\n",
" [0.000177, 0.999787, 0.000036],\n",
" [0.238042, 0.760375, 0.001583],\n",
" [0.99808 , 0.001917, 0.000003],\n",
" [0. , 0. , 1. ],\n",
" [0.001213, 0.97811 , 0.020677],\n",
" [0. , 0.008068, 0.991932],\n",
" [0.999979, 0.000021, 0. ],\n",
" [0.001398, 0.9986 , 0.000002],\n",
" [0.015839, 0.984161, 0. ],\n",
" [0. , 0.021595, 0.978405],\n",
" [0.002453, 0.997528, 0.000019],\n",
" [0. , 0.000005, 0.999995],\n",
" [0.907855, 0.092061, 0.000084],\n",
" [0.974593, 0.025407, 0. ],\n",
" [0. , 0.000032, 0.999968],\n",
" [0.91181 , 0.045917, 0.042272],\n",
" [0.000898, 0.99788 , 0.001222],\n",
" [0. , 0.002066, 0.997934],\n",
" [0.055579, 0.94417 , 0.000251],\n",
" [0. , 0.014023, 0.985977],\n",
" [0.000029, 0.006112, 0.993859],\n",
" [0. , 0.002178, 0.997822],\n",
" [0. , 0.023981, 0.976019],\n",
" [0.999889, 0.000075, 0.000035],\n",
" [0.000308, 0.999692, 0. ]])"
]
},
"execution_count": 423,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y_prob = classifier.predict_proba(X_test)\n",
"y_prob"
]
},
{
"cell_type": "code",
"execution_count": 424,
"id": "08f121e4",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([2, 3, 3, 1, 3, 2, 3, 2, 3, 2, 3, 2, 2, 1, 1, 2, 3, 1, 3, 1, 3, 1,\n",
" 3, 3, 3, 2, 2, 1, 1, 2, 1, 2, 3, 1, 2, 3, 2, 3, 2, 1, 2, 2, 3, 2,\n",
" 3, 3, 1, 3, 3, 2, 2, 2, 2, 3, 2, 3, 3, 3, 1, 3, 3, 1, 3, 1, 3, 1,\n",
" 2, 3, 3, 2, 1, 1, 1, 2, 2, 2, 2, 2, 3, 1, 3, 1, 3, 2, 3, 3, 3, 2,\n",
" 1, 3, 2, 2, 1, 1, 2, 2, 2, 2, 2, 1, 3, 2, 1, 3, 1, 1, 1, 2, 3, 3,\n",
" 1, 3, 2, 1, 1, 3, 2, 2, 1, 1, 3, 3, 3, 2, 1, 3, 3, 1, 1, 2, 2, 3,\n",
" 1, 3, 1, 1, 1, 3, 1, 3, 2, 1, 2, 2, 1, 1, 2, 3, 1, 1, 3, 1, 2, 2,\n",
" 2, 3, 1, 3, 3, 1, 1, 2, 1, 2, 1, 3, 2, 1, 2, 1, 3, 2, 3, 1, 3, 1,\n",
" 2, 2, 2, 2, 2, 1, 1, 2, 2, 3, 3, 3, 3, 2, 2, 1, 1, 3, 3, 1, 2, 2,\n",
" 1, 2, 2, 2, 3, 2, 3, 3, 1, 2, 2, 1, 3, 1, 3, 3, 1, 1, 2, 3, 1, 1,\n",
" 1, 2, 1, 3, 2, 2, 2, 1, 3, 2, 3, 1, 2, 2, 3, 2, 3, 1, 1, 3, 1, 2,\n",
" 3, 2, 3, 3, 3, 3, 1, 2], dtype=int64)"
]
},
"execution_count": 424,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y_pred = classifier.predict(X_test)\n",
"y_pred"
]
},
{
"cell_type": "code",
"execution_count": 425,
"id": "c876fff8",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.metrics import confusion_matrix\n",
"from sklearn.metrics import accuracy_score\n",
"from mlxtend.plotting import plot_confusion_matrix\n",
"import seaborn as sns\n",
"sns.set()\n"
]
},
{
"cell_type": "code",
"execution_count": 426,
"id": "cbb6c719",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Accuracy : 0.992\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAa8AAAG1CAYAAABOJnv8AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAp8ElEQVR4nO3deViVdeL+8fsAAqKCgLlruIMrOUKaaYimRVpqtoeamWamk0vuazZkCbhkpjbZN80lNR1yasZtcl9SM2dy66ugqYl+TRHcQOX8/uDnmU6gAh58+OD7dV1cl3ye5xxuz1PefJ7VZrfb7QIAwCBuVgcAACCvKC8AgHEoLwCAcSgvAIBxKC8AgHEoLwCAcSgvAIBxKC8AgHEoLwCAcTysDuBqV69l6viZC1bHwF1wfzlfqyMAcDGbJJvt9usVufI6fuaC6r6xxOoYuAvOLX7V6ggAXMzTPavAbofdhgAA41BeAADjUF4AAONQXgAA41BeAADjUF4AAONQXgAA41BeAADjUF4AAONQXgAA41BeAADjUF4AAONQXgAA41BeAADjUF4AAONQXgAA41BeAADjUF4AAONQXgAA41BeAADjUF4AAONQXgAA41BeAADjUF4AAONQXgAA41BeAADjUF4AAONQXgAA41BeAADjUF4AAONQXgAA41BeAADjUF4AAONQXgAA41BeAADjUF4AAONQXgAA41BeAADjUF4AAONQXgAA41BeAADjUF4AAONQXgAA41BeAADjUF4AAONQXgAA41BeAADjUF4AAONQXgAA41BeAADjUF4AAONQXgAA41BeBmhRr7wuL3v1pl8jnn1AklQxwEefvfWIjn/+kk59Ea1vxj6mRtUCLU4PV1mzepWaNw1TgK+PgmtV0+T4WNntdqtjoQCwrW/Pw+oAuL0fE3/TI8O+zjY+9oU/6U8179PijYdV0ruYVr/7hNKvXle/mZt1JeO6hj8Tqr+PfUxhA5Yp+dxlC5LDVbZv26bOT7VXl2ee09jxE7Rl8yaNHDZE165d09tDhlkdDy7Ets4dm93iOs/MzNT06dO1ZMkSpaWlKSwsTGPGjFGVKlXy9X5Jyamq+8YSF6csfJ4Iq6qlwx/Vi5PWavnWIxr2TKj+/GR9PdD/K0dRlStdXFsmPaXhn3+vxZsSLU7seucWv2p1hLumQ1Q7paSkaOOW7Y6xkcOH6pNZH+voiVMqXry4hengSvf6tvZ0l9xst1/P8t2GM2bM0IIFCzRhwgQtWrRImZmZ6tmzpzIyMqyOVmh5e7orvmczfbvzFy3fekSS1KlZkJZvPeI0wzqVclk1XltUJIvrXpKenq4N69fpyY6dnMY7de6itLQ0bdm8yaJkcDW2de5ZWl4ZGRmaM2eO+vfvr4iICAUHB2vy5MlKTk7WqlWrrIxWqL35RD1VDPDR23O2SZI83G0Kqeyv/z1xXmNeaKzET19Q6uJX9M/xjyukSmlrw+KOJSUmKiMjQ7Vq1XYar1GzpiTp54MHrYiFAsC2zj1Ly+vAgQO6ePGimjVr5hjz9fVV3bp1tWPHDguTFV7FPNz0Rvt6WrIpUYnJaZIk/5JeKubhpn4d6umR+hX0xoxNio77l8r4FdeqCU+ogr+PxalxJ86fPy8p6/+N3ytVqpQkKS0t9a5nQsFgW+eepSdsJCcnS5IqVKjgNF62bFnHsj9q3br1Td9v3rx5kltJ1wUshDo3C1IFfx9NTviPY8zT47+/gzw5YaUuXrkmSfrh8Bn956Nn9HpUiMbO33XXs8I1MjMzb7nc5mb53n+4CNs69yz9JC5fzjo+4+np6TTu5eWl9PR0KyIVep2aVdPeX87pP0fOOsbSLl+VJG34KdlRXJJ07MxFHTiewunyhvPz85MkpaWlOY2npmb9Fu7n63fXM6FgsK1zz9KZl7e3t6SsY183/ixlHbS82Rk1a9euveV7JiUX3Wm1h7tNbUIrKW75v53GUy9d1enzl+VVLPvvIsXc3XQl4/rdiogCUL1GDbm7u+vw4UNO44cPZX0fHBJiRSwUALZ17lk687qxu/D06dNO46dPn1a5cuWsiFSo1b8/QCW8i2nrgVPZlq3cdUytGlZSYCkvx1itin6qXclPm/flvAsWZvD29tbDLVoqYfkypwtV/7b8K/n5+alJWLiF6eBKbOvcs7S8goODVbJkSW3f/t/rGVJTU7Vv3z6FhYVZmKxwqlfVX5J04HhKtmUxS36UXXatGPuYOoTfr6cfqqZlIx7V8TMX9dman+9yUrjasBGjtOP77XrphWe18p//0PixozU5bpLeHjZCPj6ckFOUsK1zx9Ldhp6ennr55ZcVGxurgIAAVapUSZMmTVL58uXVtm1bK6MVSuVKZ+1KPXch+zVwR06lqdXwv+vd6DB9+ueWup5p17/2/Kohn23ThStX73ZUuFhEq0gtXPyV3h0/Vs8+3VEVK1VSzPuT9NaAQVZHg4uxrXPH8jtsXL9+XfHx8Vq2bJmuXLniuMNG5cqV8/V+98odNnBv3WEDuFfk9g4blpeXq1Fe9w7KCyh6jLk9FAAAeUV5AQCMQ3kBAIxDeQEAjEN5AQCMQ3kBAIxDeQEAjEN5AQCMQ3kBAIxDeQEAjEN5AQCMQ3kBAIxDeQEAjEN5AQCMQ3kBAIxDeQEAjEN5AQCMQ3kBAIxDeQEAjEN5AQCMQ3kBAIxDeQEAjEN5AQCMQ3kBAIxDeQEAjEN5AQCMQ3kBAIxDeQEAjEN5AQCMQ3kBAIxDeQEAjEN5AQCMQ3kBAIxDeQEAjEN5AQCMQ3kBAIxDeQEAjEN5AQCMQ3kBAIxDeQEAjEN5AQCMQ3kBAIxDeQEAjEN5AQCMQ3kBAIxDeQEAjEN5AQCMQ3kBAIxDeQEAjEN5AQCMQ3kBAIxDeQEAjEN5AQCMQ3kBAIxjs9vtdqtDuFKm3a5LGUXqr4SbuK9pf6sj4C46t2O61RFwF3i6S26226/HzAsAYBzKCwBgHMoLAGAcygsAYBzKCwBgHMoLAGAcygsAYBzKCwBgHMoLAGAcygsAYBzKCwBgHMoLAGAcygsAYBzKCwBgHMoLAGAcygsAYBzKCwBgHMoLAGAcygsAYBzKCwBgHMoLAGAcygsAYBzKCwBgHMoLAGAcygsAYBzKCwBgHMoLAGAcygsAYBzKCwBgHI/crLRjx448vWlYWFi+wgAAkBu5Kq/o6GjZbLbbrme322Wz2bR///47DgYAwM3kqrzmzp1b0DkAAMi1XJVXeHj4TZelp6fL09MzVzMzAABcIVfl9UeJiYmaNm2atmzZogsXLmjJkiVaunSpqlevrujoaFdnBADASZ7PNty/f7+6dOmivXv3qkOHDrLb7ZIkd3d3xcTEaPny5S4PCQDA7+V55vX++++rfv36mjNnjiRp/vz5kqRRo0YpPT1dc+fOVadOnVybEgCA38nzzOvHH39U9+7d5eHhke04V1RUlI4cOeKqbAAA5CjP5eXl5aUrV67kuCwlJUWenp53HAoAgFvJc3k1b95c06ZNU3JysmPMZrPp4sWLmjNnjh566CGXBgQA4I9s9htnXOTSyZMn9dxzzyk1NVXBwcHas2ePwsLClJSUJLvdroULF6pKlSoFlfe2Mu12XcrI018JhrqvaX+rI+AuOrdjutURcBd4uktuubjyKs8zrwoVKighIUHdunWT3W5X1apVdenSJbVv317Lli2ztLgAAPeGfF3n5e/vrwEDBrg6CwAAuZKv8kpOTtbcuXO1c+dOnT9/XoGBgWratKmio6Pl7+/v6owAADjJ10XKHTp00IIFC+Tj46P69evLw8NDn3zyiTp27Khjx44VRE4AABzydZFy5cqV9cknn6hMmTKO8ZMnT6pnz5567733NGPGDJeGBADg9/I889q9e7fefPNNp+KSsk7k6N+/v7Zu3eqycAAA5CTP5RUQEKCLFy/muMzd3V0lSpS441AAANxKnsurT58+iouL0969e53Gjx07pqlTp6pXr14uCwcAQE5ydcwrMjLS6T6GZ86cUZcuXVSlShWVKVNG58+fV1JSkjw9PbVy5Up17dq1wAIDAJDrh1He7mGTDRs2dEk
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"cm = confusion_matrix(y_test, y_pred)\n",
"plot_confusion_matrix(cm)\n",
"\n",
"acc = accuracy_score(y_test, y_pred)\n",
"print('Accuracy',':', acc)"
]
},
{
"cell_type": "code",
"execution_count": 427,
"id": "e3c2afe8",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"plotlyServerURL": "https://plot.ly"
},
"data": [
{
"colorscale": [
[
0,
"rgb(3, 5, 18)"
],
[
0.09090909090909091,
"rgb(25, 25, 51)"
],
[
0.18181818181818182,
"rgb(44, 42, 87)"
],
[
0.2727272727272727,
"rgb(58, 60, 125)"
],
[
0.36363636363636365,
"rgb(62, 83, 160)"
],
[
0.45454545454545453,
"rgb(62, 109, 178)"
],
[
0.5454545454545454,
"rgb(72, 134, 187)"
],
[
0.6363636363636364,
"rgb(89, 159, 196)"
],
[
0.7272727272727273,
"rgb(114, 184, 205)"
],
[
0.8181818181818182,
"rgb(149, 207, 216)"
],
[
0.9090909090909091,
"rgb(192, 229, 232)"
],
[
1,
"rgb(234, 252, 253)"
]
],
"reversescale": true,
"showscale": true,
"type": "heatmap",
"x": [
"pred_1",
"pred_2",
"pred_3"
],
"y": [
"true_1",
"true_2",
"true_3"
],
"z": [
[
0,
0,
85
],
[
2,
87,
0
],
[
76,
0,
0
]
]
}
],
"layout": {
"annotations": [
{
"font": {
"color": "#000000"
},
"showarrow": false,
"text": "0",
"x": "pred_1",
"xref": "x",
"y": "true_1",
"yref": "y"
},
{
"font": {
"color": "#000000"
},
"showarrow": false,
"text": "0",
"x": "pred_2",
"xref": "x",
"y": "true_1",
"yref": "y"
},
{
"font": {
"color": "#FFFFFF"
},
"showarrow": false,
"text": "85",
"x": "pred_3",
"xref": "x",
"y": "true_1",
"yref": "y"
},
{
"font": {
"color": "#000000"
},
"showarrow": false,
"text": "2",
"x": "pred_1",
"xref": "x",
"y": "true_2",
"yref": "y"
},
{
"font": {
"color": "#FFFFFF"
},
"showarrow": false,
"text": "87",
"x": "pred_2",
"xref": "x",
"y": "true_2",
"yref": "y"
},
{
"font": {
"color": "#000000"
},
"showarrow": false,
"text": "0",
"x": "pred_3",
"xref": "x",
"y": "true_2",
"yref": "y"
},
{
"font": {
"color": "#FFFFFF"
},
"showarrow": false,
"text": "76",
"x": "pred_1",
"xref": "x",
"y": "true_3",
"yref": "y"
},
{
"font": {
"color": "#000000"
},
"showarrow": false,
"text": "0",
"x": "pred_2",
"xref": "x",
"y": "true_3",
"yref": "y"
},
{
"font": {
"color": "#000000"
},
"showarrow": false,
"text": "0",
"x": "pred_3",
"xref": "x",
"y": "true_3",
"yref": "y"
}
],
"font": {
"size": 14
},
"height": 400,
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
},
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
},
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"fillpattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "Confusion Matrix - Accuracy: 0.9920"
},
"width": 400,
"xaxis": {
"dtick": 1,
"gridcolor": "rgb(0, 0, 0)",
"side": "top",
"ticks": ""
},
"yaxis": {
"dtick": 1,
"ticks": "",
"ticksuffix": " "
}
}
}
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"def plot_confusion_matrix(cm):\n",
" cm = cm[::-1]\n",
" cm = pd.DataFrame(cm, columns=['pred_1', 'pred_2', 'pred_3'], index=['true_1', 'true_2', 'true_3'])\n",
" fig = ff.create_annotated_heatmap(z = cm.values, x = list(cm.columns), y = list(cm.index), colorscale = 'ice', showscale = True, reversescale = True)\n",
" fig.update_layout(width=400, height=400, title='Confusion Matrix - Accuracy: {:.4f}'.format(acc), font_size=14)\n",
" fig.show()\n",
"plot_confusion_matrix(cm)"
]
},
{
"cell_type": "code",
"execution_count": 435,
"id": "a1ffeb65",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" precision recall f1-score support\n",
"\n",
" pred_1 0.97 1.00 0.99 76\n",
" pred_2 1.00 0.98 0.99 89\n",
" pred_3 1.00 1.00 1.00 85\n",
"\n",
" accuracy 0.99 250\n",
" macro avg 0.99 0.99 0.99 250\n",
"weighted avg 0.99 0.99 0.99 250\n",
"\n"
]
2023-12-30 09:44:14 +01:00
}
],
"source": [
"from sklearn.metrics import classification_report\n",
"print(classification_report(y_test, y_pred, target_names=['pred_1', 'pred_2', 'pred_3']))"
2023-12-30 09:44:14 +01:00
]
2023-12-29 18:23:11 +01:00
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}