{ "cells": [ { "cell_type": "code", "execution_count": 393, "id": "7ce53ad1", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import plotly.figure_factory as ff\n", "import seaborn as sns\n", "sns.set()\n" ] }, { "cell_type": "code", "execution_count": 394, "id": "73edef6d", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Patient IdAgeGenderAir PollutionAlcohol useDust AllergyOccuPational HazardsGenetic Riskchronic Lung DiseaseBalanced Diet...FatigueWeight LossShortness of BreathWheezingSwallowing DifficultyClubbing of Finger NailsFrequent ColdDry CoughSnoringLevel
index
0P13312454322...342231234Low
1P101713153422...137862172Medium
2P1003514565546...879214672High
3P10003717777677...423145675High
4P1014616877767...324142423High
\n", "

5 rows × 25 columns

\n", "
" ], "text/plain": [ " Patient Id Age Gender Air Pollution Alcohol use Dust Allergy \\\n", "index \n", "0 P1 33 1 2 4 5 \n", "1 P10 17 1 3 1 5 \n", "2 P100 35 1 4 5 6 \n", "3 P1000 37 1 7 7 7 \n", "4 P101 46 1 6 8 7 \n", "\n", " OccuPational Hazards Genetic Risk chronic Lung Disease \\\n", "index \n", "0 4 3 2 \n", "1 3 4 2 \n", "2 5 5 4 \n", "3 7 6 7 \n", "4 7 7 6 \n", "\n", " Balanced Diet ... Fatigue Weight Loss Shortness of Breath \\\n", "index ... \n", "0 2 ... 3 4 2 \n", "1 2 ... 1 3 7 \n", "2 6 ... 8 7 9 \n", "3 7 ... 4 2 3 \n", "4 7 ... 3 2 4 \n", "\n", " Wheezing Swallowing Difficulty Clubbing of Finger Nails \\\n", "index \n", "0 2 3 1 \n", "1 8 6 2 \n", "2 2 1 4 \n", "3 1 4 5 \n", "4 1 4 2 \n", "\n", " Frequent Cold Dry Cough Snoring Level \n", "index \n", "0 2 3 4 Low \n", "1 1 7 2 Medium \n", "2 6 7 2 High \n", "3 6 7 5 High \n", "4 4 2 3 High \n", "\n", "[5 rows x 25 columns]" ] }, "execution_count": 394, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dane = pd.read_csv(r'C:\\Users\\HP\\Desktop\\podyplomówka\\cancer_patient_data_sets.csv', index_col = 0)\n", "dane.head()" ] }, { "cell_type": "code", "execution_count": 395, "id": "1831fdd7", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Index: 1000 entries, 0 to 999\n", "Data columns (total 25 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 Patient Id 1000 non-null object\n", " 1 Age 1000 non-null int64 \n", " 2 Gender 1000 non-null int64 \n", " 3 Air Pollution 1000 non-null int64 \n", " 4 Alcohol use 1000 non-null int64 \n", " 5 Dust Allergy 1000 non-null int64 \n", " 6 OccuPational Hazards 1000 non-null int64 \n", " 7 Genetic Risk 1000 non-null int64 \n", " 8 chronic Lung Disease 1000 non-null int64 \n", " 9 Balanced Diet 1000 non-null int64 \n", " 10 Obesity 1000 non-null int64 \n", " 11 Smoking 1000 non-null int64 \n", " 12 Passive Smoker 1000 non-null int64 \n", " 13 Chest Pain 1000 non-null int64 \n", " 14 Coughing of Blood 1000 non-null int64 \n", " 15 Fatigue 1000 non-null int64 \n", " 16 Weight Loss 1000 non-null int64 \n", " 17 Shortness of Breath 1000 non-null int64 \n", " 18 Wheezing 1000 non-null int64 \n", " 19 Swallowing Difficulty 1000 non-null int64 \n", " 20 Clubbing of Finger Nails 1000 non-null int64 \n", " 21 Frequent Cold 1000 non-null int64 \n", " 22 Dry Cough 1000 non-null int64 \n", " 23 Snoring 1000 non-null int64 \n", " 24 Level 1000 non-null object\n", "dtypes: int64(23), object(2)\n", "memory usage: 203.1+ KB\n" ] } ], "source": [ "dane.info()" ] }, { "cell_type": "markdown", "id": "69f1b9c9", "metadata": {}, "source": [] }, { "cell_type": "code", "execution_count": 396, "id": "422c8e2c", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Air PollutionSmokingPassive Smoker
index
2423
3777
4687
5423
10678
............
995678
996678
997423
998687
999623
\n", "

365 rows × 3 columns

\n", "
" ], "text/plain": [ " Air Pollution Smoking Passive Smoker\n", "index \n", "2 4 2 3\n", "3 7 7 7\n", "4 6 8 7\n", "5 4 2 3\n", "10 6 7 8\n", "... ... ... ...\n", "995 6 7 8\n", "996 6 7 8\n", "997 4 2 3\n", "998 6 8 7\n", "999 6 2 3\n", "\n", "[365 rows x 3 columns]" ] }, "execution_count": 396, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dane0 = dane[dane['Level'] == 'High'][['Air Pollution', 'Smoking', 'Passive Smoker']]\n", "dane0" ] }, { "cell_type": "code", "execution_count": 397, "id": "af7da17c", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
countmeanstdmin25%50%75%max
Age1000.037.17412.00549314.027.7536.045.073.0
Gender1000.01.4020.4905471.01.001.02.02.0
Air Pollution1000.03.8402.0304001.02.003.06.08.0
Alcohol use1000.04.5632.6204771.02.005.07.08.0
Dust Allergy1000.05.1651.9808331.04.006.07.08.0
OccuPational Hazards1000.04.8402.1078051.03.005.07.08.0
Genetic Risk1000.04.5802.1269991.02.005.07.07.0
chronic Lung Disease1000.04.3801.8485181.03.004.06.07.0
Balanced Diet1000.04.4912.1355281.02.004.07.07.0
Obesity1000.04.4652.1249211.03.004.07.07.0
Smoking1000.03.9482.4959021.02.003.07.08.0
Passive Smoker1000.04.1952.3117781.02.004.07.08.0
Chest Pain1000.04.4382.2802091.02.004.07.09.0
Coughing of Blood1000.04.8592.4279651.03.004.07.09.0
Fatigue1000.03.8562.2446161.02.003.05.09.0
Weight Loss1000.03.8552.2065461.02.003.06.08.0
Shortness of Breath1000.04.2402.2850871.02.004.06.09.0
Wheezing1000.03.7772.0419211.02.004.05.08.0
Swallowing Difficulty1000.03.7462.2703831.02.004.05.08.0
Clubbing of Finger Nails1000.03.9232.3880481.02.004.05.09.0
Frequent Cold1000.03.5361.8325021.02.003.05.07.0
Dry Cough1000.03.8532.0390071.02.004.06.07.0
Snoring1000.02.9261.4746861.02.003.04.07.0
\n", "
" ], "text/plain": [ " count mean std min 25% 50% 75% \\\n", "Age 1000.0 37.174 12.005493 14.0 27.75 36.0 45.0 \n", "Gender 1000.0 1.402 0.490547 1.0 1.00 1.0 2.0 \n", "Air Pollution 1000.0 3.840 2.030400 1.0 2.00 3.0 6.0 \n", "Alcohol use 1000.0 4.563 2.620477 1.0 2.00 5.0 7.0 \n", "Dust Allergy 1000.0 5.165 1.980833 1.0 4.00 6.0 7.0 \n", "OccuPational Hazards 1000.0 4.840 2.107805 1.0 3.00 5.0 7.0 \n", "Genetic Risk 1000.0 4.580 2.126999 1.0 2.00 5.0 7.0 \n", "chronic Lung Disease 1000.0 4.380 1.848518 1.0 3.00 4.0 6.0 \n", "Balanced Diet 1000.0 4.491 2.135528 1.0 2.00 4.0 7.0 \n", "Obesity 1000.0 4.465 2.124921 1.0 3.00 4.0 7.0 \n", "Smoking 1000.0 3.948 2.495902 1.0 2.00 3.0 7.0 \n", "Passive Smoker 1000.0 4.195 2.311778 1.0 2.00 4.0 7.0 \n", "Chest Pain 1000.0 4.438 2.280209 1.0 2.00 4.0 7.0 \n", "Coughing of Blood 1000.0 4.859 2.427965 1.0 3.00 4.0 7.0 \n", "Fatigue 1000.0 3.856 2.244616 1.0 2.00 3.0 5.0 \n", "Weight Loss 1000.0 3.855 2.206546 1.0 2.00 3.0 6.0 \n", "Shortness of Breath 1000.0 4.240 2.285087 1.0 2.00 4.0 6.0 \n", "Wheezing 1000.0 3.777 2.041921 1.0 2.00 4.0 5.0 \n", "Swallowing Difficulty 1000.0 3.746 2.270383 1.0 2.00 4.0 5.0 \n", "Clubbing of Finger Nails 1000.0 3.923 2.388048 1.0 2.00 4.0 5.0 \n", "Frequent Cold 1000.0 3.536 1.832502 1.0 2.00 3.0 5.0 \n", "Dry Cough 1000.0 3.853 2.039007 1.0 2.00 4.0 6.0 \n", "Snoring 1000.0 2.926 1.474686 1.0 2.00 3.0 4.0 \n", "\n", " max \n", "Age 73.0 \n", "Gender 2.0 \n", "Air Pollution 8.0 \n", "Alcohol use 8.0 \n", "Dust Allergy 8.0 \n", "OccuPational Hazards 8.0 \n", "Genetic Risk 7.0 \n", "chronic Lung Disease 7.0 \n", "Balanced Diet 7.0 \n", "Obesity 7.0 \n", "Smoking 8.0 \n", "Passive Smoker 8.0 \n", "Chest Pain 9.0 \n", "Coughing of Blood 9.0 \n", "Fatigue 9.0 \n", "Weight Loss 8.0 \n", "Shortness of Breath 9.0 \n", "Wheezing 8.0 \n", "Swallowing Difficulty 8.0 \n", "Clubbing of Finger Nails 9.0 \n", "Frequent Cold 7.0 \n", "Dry Cough 7.0 \n", "Snoring 7.0 " ] }, "execution_count": 397, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dane.describe().T" ] }, { "cell_type": "code", "execution_count": 398, "id": "c6867768", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Genetic Risk 5.0\n", "dtype: float64" ] }, "execution_count": 398, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dane[['Genetic Risk']].median()" ] }, { "cell_type": "code", "execution_count": 399, "id": "a043ec73", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['Patient Id', 'Age', 'Gender', 'Air Pollution', 'Alcohol use',\n", " 'Dust Allergy', 'OccuPational Hazards', 'Genetic Risk',\n", " 'chronic Lung Disease', 'Balanced Diet', 'Obesity', 'Smoking',\n", " 'Passive Smoker', 'Chest Pain', 'Coughing of Blood', 'Fatigue',\n", " 'Weight Loss', 'Shortness of Breath', 'Wheezing',\n", " 'Swallowing Difficulty', 'Clubbing of Finger Nails', 'Frequent Cold',\n", " 'Dry Cough', 'Snoring', 'Level'],\n", " dtype='object')" ] }, "execution_count": 399, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dane.columns" ] }, { "cell_type": "code", "execution_count": 400, "id": "e6cad188", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Gender\n", "1 598\n", "2 402\n", "dtype: int64" ] }, "execution_count": 400, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dane2 = dane.groupby('Gender').size()\n", "dane2" ] }, { "cell_type": "code", "execution_count": 401, "id": "966e57b9", "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "\n", "_ = dane['Gender'].value_counts().plot(kind = 'bar')\n", "_ = plt.legend()\n" ] }, { "cell_type": "code", "execution_count": 402, "id": "8d81604c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[Smoking\n", " 1 181\n", " 2 222\n", " 3 172\n", " 4 59\n", " 5 10\n", " 6 60\n", " 7 207\n", " 8 89\n", " dtype: int64]" ] }, "execution_count": 402, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dane3 = [dane.groupby('Smoking').size()]\n", "dane3 " ] }, { "cell_type": "code", "execution_count": 403, "id": "d85261ce", "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "_ = dane['Smoking'].value_counts().plot(kind = 'pie')" ] }, { "cell_type": "code", "execution_count": 404, "id": "86122d04", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[Passive Smoker\n", " 1 60\n", " 2 284\n", " 3 140\n", " 4 161\n", " 5 30\n", " 6 30\n", " 7 187\n", " 8 108\n", " dtype: int64]" ] }, "execution_count": 404, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dane4 = [dane.groupby('Passive Smoker').size()]\n", "dane4" ] }, { "cell_type": "code", "execution_count": 405, "id": "c78bbd4c", "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "_ = dane['Passive Smoker'].value_counts().plot(kind = 'pie')\n" ] }, { "cell_type": "code", "execution_count": 406, "id": "6385071c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Smoking Gender\n", "1 1 102\n", " 2 79\n", "2 1 102\n", " 2 120\n", "3 1 79\n", " 2 93\n", "4 1 49\n", " 2 10\n", "5 1 10\n", "6 1 28\n", " 2 32\n", "7 1 167\n", " 2 40\n", "8 1 61\n", " 2 28\n", "dtype: int64" ] }, "execution_count": 406, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dane.groupby(['Smoking','Gender']).size()" ] }, { "cell_type": "code", "execution_count": 407, "id": "af3dd196", "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "dane6 = dane.groupby(['Smoking','Gender'])\n", "_ = dane6[['Smoking', 'Gender']].value_counts().plot(kind = 'bar')\n", "_ = plt.legend()\n" ] }, { "cell_type": "code", "execution_count": 408, "id": "18002f3f", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Smoking 3.0\n", "dtype: float64" ] }, "execution_count": 408, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dane[['Smoking']].median()" ] }, { "cell_type": "code", "execution_count": 409, "id": "f21f91ec", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Genetic RiskSmokingAlcohol use
index
725143
59143
727133
940142
819112
............
537788
538777
755747
533747
812777
\n", "

1000 rows × 3 columns

\n", "
" ], "text/plain": [ " Genetic Risk Smoking Alcohol use\n", "index \n", "725 1 4 3\n", "59 1 4 3\n", "727 1 3 3\n", "940 1 4 2\n", "819 1 1 2\n", "... ... ... ...\n", "537 7 8 8\n", "538 7 7 7\n", "755 7 4 7\n", "533 7 4 7\n", "812 7 7 7\n", "\n", "[1000 rows x 3 columns]" ] }, "execution_count": 409, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x = dane[['Genetic Risk', 'Smoking','Alcohol use']]\n", "x.sort_values('Genetic Risk')\n" ] }, { "cell_type": "code", "execution_count": 410, "id": "15eebd5b", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Air Pollution\n", "8 19\n", "5 20\n", "7 30\n", "4 90\n", "1 141\n", "3 173\n", "2 201\n", "6 326\n", "Name: count, dtype: int64" ] }, "execution_count": 410, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dane7 = dane['Air Pollution'].value_counts()\n", "dane7.sort_values()" ] }, { "cell_type": "code", "execution_count": 411, "id": "c0b501b8", "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "_ = dane7 = dane['Air Pollution'].value_counts().plot(kind = 'bar')\n", "_ = plt.legend()" ] }, { "cell_type": "code", "execution_count": 412, "id": "00915ec0", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import sklearn " ] }, { "cell_type": "code", "execution_count": 413, "id": "5024be32", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Genetic Risk\n", "4 40\n", "1 40\n", "5 100\n", "6 108\n", "3 173\n", "2 212\n", "7 327\n", "Name: count, dtype: int64" ] }, "execution_count": 413, "metadata": {}, "output_type": "execute_result" } ], "source": [ "_ = dane8 = dane['Genetic Risk'].value_counts()\n", "dane8.sort_values()\n", "\n" ] }, { "cell_type": "code", "execution_count": 414, "id": "8388706b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Patient IdAgeGenderAir PollutionAlcohol useDust AllergyOccuPational HazardsGenetic Riskchronic Lung DiseaseBalanced Diet...FatigueWeight LossShortness of BreathWheezingSwallowing DifficultyClubbing of Finger NailsFrequent ColdDry CoughSnoringLevel
index
0P13312454322...342231234Low
1P101713153422...137862172Medium
2P1003514565546...879214672High
3P10003717777677...423145675High
4P1014616877767...324142423High
..................................................................
995P9954416777767...532782453High
996P9963726877767...965724314High
997P9972524565546...879214672High
998P9981826877767...324142423High
999P9994716565546...879214672High
\n", "

1000 rows × 25 columns

\n", "
" ], "text/plain": [ " Patient Id Age Gender Air Pollution Alcohol use Dust Allergy \\\n", "index \n", "0 P1 33 1 2 4 5 \n", "1 P10 17 1 3 1 5 \n", "2 P100 35 1 4 5 6 \n", "3 P1000 37 1 7 7 7 \n", "4 P101 46 1 6 8 7 \n", "... ... ... ... ... ... ... \n", "995 P995 44 1 6 7 7 \n", "996 P996 37 2 6 8 7 \n", "997 P997 25 2 4 5 6 \n", "998 P998 18 2 6 8 7 \n", "999 P999 47 1 6 5 6 \n", "\n", " OccuPational Hazards Genetic Risk chronic Lung Disease \\\n", "index \n", "0 4 3 2 \n", "1 3 4 2 \n", "2 5 5 4 \n", "3 7 6 7 \n", "4 7 7 6 \n", "... ... ... ... \n", "995 7 7 6 \n", "996 7 7 6 \n", "997 5 5 4 \n", "998 7 7 6 \n", "999 5 5 4 \n", "\n", " Balanced Diet ... Fatigue Weight Loss Shortness of Breath \\\n", "index ... \n", "0 2 ... 3 4 2 \n", "1 2 ... 1 3 7 \n", "2 6 ... 8 7 9 \n", "3 7 ... 4 2 3 \n", "4 7 ... 3 2 4 \n", "... ... ... ... ... ... \n", "995 7 ... 5 3 2 \n", "996 7 ... 9 6 5 \n", "997 6 ... 8 7 9 \n", "998 7 ... 3 2 4 \n", "999 6 ... 8 7 9 \n", "\n", " Wheezing Swallowing Difficulty Clubbing of Finger Nails \\\n", "index \n", "0 2 3 1 \n", "1 8 6 2 \n", "2 2 1 4 \n", "3 1 4 5 \n", "4 1 4 2 \n", "... ... ... ... \n", "995 7 8 2 \n", "996 7 2 4 \n", "997 2 1 4 \n", "998 1 4 2 \n", "999 2 1 4 \n", "\n", " Frequent Cold Dry Cough Snoring Level \n", "index \n", "0 2 3 4 Low \n", "1 1 7 2 Medium \n", "2 6 7 2 High \n", "3 6 7 5 High \n", "4 4 2 3 High \n", "... ... ... ... ... \n", "995 4 5 3 High \n", "996 3 1 4 High \n", "997 6 7 2 High \n", "998 4 2 3 High \n", "999 6 7 2 High \n", "\n", "[1000 rows x 25 columns]" ] }, "execution_count": 414, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dane" ] }, { "cell_type": "code", "execution_count": 415, "id": "f2c57644", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Level\n", "High 365\n", "Medium 332\n", "Low 303\n", "Name: count, dtype: int64" ] }, "execution_count": 415, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dane['Level'].value_counts()\n" ] }, { "cell_type": "code", "execution_count": 416, "id": "f02e1f34", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Patient IdAgeGenderAir PollutionAlcohol useDust AllergyOccuPational HazardsGenetic Riskchronic Lung DiseaseBalanced Diet...FatigueWeight LossShortness of BreathWheezingSwallowing DifficultyClubbing of Finger NailsFrequent ColdDry CoughSnoringLevel
index
0P13312454322...3422312341
1P101713153422...1378621722
2P1003514565546...8792146723
3P10003717777677...4231456753
4P1014616877767...3241424233
\n", "

5 rows × 25 columns

\n", "
" ], "text/plain": [ " Patient Id Age Gender Air Pollution Alcohol use Dust Allergy \\\n", "index \n", "0 P1 33 1 2 4 5 \n", "1 P10 17 1 3 1 5 \n", "2 P100 35 1 4 5 6 \n", "3 P1000 37 1 7 7 7 \n", "4 P101 46 1 6 8 7 \n", "\n", " OccuPational Hazards Genetic Risk chronic Lung Disease \\\n", "index \n", "0 4 3 2 \n", "1 3 4 2 \n", "2 5 5 4 \n", "3 7 6 7 \n", "4 7 7 6 \n", "\n", " Balanced Diet ... Fatigue Weight Loss Shortness of Breath \\\n", "index ... \n", "0 2 ... 3 4 2 \n", "1 2 ... 1 3 7 \n", "2 6 ... 8 7 9 \n", "3 7 ... 4 2 3 \n", "4 7 ... 3 2 4 \n", "\n", " Wheezing Swallowing Difficulty Clubbing of Finger Nails \\\n", "index \n", "0 2 3 1 \n", "1 8 6 2 \n", "2 2 1 4 \n", "3 1 4 5 \n", "4 1 4 2 \n", "\n", " Frequent Cold Dry Cough Snoring Level \n", "index \n", "0 2 3 4 1 \n", "1 1 7 2 2 \n", "2 6 7 2 3 \n", "3 6 7 5 3 \n", "4 4 2 3 3 \n", "\n", "[5 rows x 25 columns]" ] }, "execution_count": 416, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = dane.replace({'Level':{'High' : 3, 'Medium' : 2, 'Low' : 1}})\n", "data.head()" ] }, { "cell_type": "code", "execution_count": 417, "id": "52632684", "metadata": {}, "outputs": [], "source": [ "import sklearn" ] }, { "cell_type": "code", "execution_count": 418, "id": "a47f580a", "metadata": {}, "outputs": [], "source": [ "np.random.seed(10)\n", "np.set_printoptions(precision=6, suppress=True)\n" ] }, { "cell_type": "code", "execution_count": 419, "id": "7caae544", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Y shape: (1000,)\n", "X shape: (1000, 23)\n" ] } ], "source": [ "X = data.drop(['Level', 'Patient Id'], axis=1)\n", "y = data['Level']\n", "\n", "\n", "print(\"Y shape:\", y.shape)\n", "print(\"X shape:\", X.shape)" ] }, { "cell_type": "code", "execution_count": 420, "id": "9139408a", "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split\n", "X_train, X_test, y_train, y_test = train_test_split (X, y)\n", "\n", "\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": 421, "id": "2f45152a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "X_train shape: (750, 23)\n", "y_train shape: (750,)\n", "X_test shape: (250, 23)\n", "y_test shape: (250,)\n" ] } ], "source": [ "print(\"X_train shape:\", X_train.shape)\n", "print(\"y_train shape:\", y_train.shape)\n", "print(\"X_test shape:\", X_test.shape)\n", "print(\"y_test shape:\", y_test.shape)" ] }, { "cell_type": "code", "execution_count": 422, "id": "8ba2674d", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\HP\\anaconda3\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:460: ConvergenceWarning:\n", "\n", "lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", "\n" ] }, { "data": { "text/html": [ "
LogisticRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "LogisticRegression()" ] }, "execution_count": 422, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.linear_model import LogisticRegression\n", "classifier = LogisticRegression()\n", "classifier.fit(X_train, y_train)" ] }, { "cell_type": "code", "execution_count": 423, "id": "ba0a5bda", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[0.019763, 0.980237, 0. ],\n", " [0. , 0. , 1. ],\n", " [0. , 0.000002, 0.999998],\n", " [0.999979, 0.000021, 0. ],\n", " [0. , 0.000038, 0.999962],\n", " [0.000022, 0.983401, 0.016577],\n", " [0. , 0.023981, 0.976019],\n", " [0.025065, 0.943631, 0.031305],\n", " [0. , 0.011278, 0.988722],\n", " [0.077079, 0.922921, 0. ],\n", " [0.000003, 0.000326, 0.999672],\n", " [0.000473, 0.999527, 0. ],\n", " [0.16753 , 0.83247 , 0. ],\n", " [0.995731, 0.004269, 0. ],\n", " [0.949387, 0.050613, 0. ],\n", " [0.21037 , 0.78963 , 0. ],\n", " [0. , 0. , 1. ],\n", " [0.91181 , 0.045917, 0.042272],\n", " [0. , 0.002178, 0.997822],\n", " [0.984437, 0.015558, 0.000005],\n", " [0. , 0.002066, 0.997934],\n", " [0.99922 , 0.00078 , 0. ],\n", " [0. , 0.000298, 0.999702],\n", " [0. , 0.004236, 0.995764],\n", " [0. , 0.000004, 0.999996],\n", " [0.238042, 0.760375, 0.001583],\n", " [0.025065, 0.943631, 0.031305],\n", " [0.99808 , 0.001917, 0.000003],\n", " [0.997777, 0.002223, 0. ],\n", " [0.238042, 0.760375, 0.001583],\n", " [0.913746, 0.086254, 0. ],\n", " [0.002141, 0.997859, 0. ],\n", " [0. , 0.004236, 0.995764],\n", " [0.99922 , 0.00078 , 0. ],\n", " [0.00192 , 0.99808 , 0. ],\n", " [0. , 0.012453, 0.987547],\n", " [0.00145 , 0.99855 , 0. ],\n", " [0. , 0. , 1. ],\n", " [0.00248 , 0.99752 , 0. ],\n", " [0.998712, 0.000937, 0.000351],\n", " [0.238042, 0.760375, 0.001583],\n", " [0.001213, 0.97811 , 0.020677],\n", " [0. , 0. , 1. ],\n", " [0.015839, 0.984161, 0. ],\n", " [0. , 0.023981, 0.976019],\n", " [0.000003, 0.000326, 0.999672],\n", " [0.930462, 0.069418, 0.000121],\n", " [0. , 0.002178, 0.997822],\n", " [0. , 0.000003, 0.999997],\n", " [0.001321, 0.998655, 0.000024],\n", " [0.001178, 0.998807, 0.000014],\n", " [0.00035 , 0.99965 , 0. ],\n", " [0.05124 , 0.948155, 0.000606],\n", " [0. , 0. , 1. ],\n", " [0.001321, 0.998655, 0.000024],\n", " [0. , 0. , 1. ],\n", " [0. , 0.004285, 0.995715],\n", " [0.000004, 0.02071 , 0.979285],\n", " [0.969283, 0.030717, 0. ],\n", " [0.000063, 0.040843, 0.959093],\n", " [0. , 0.002066, 0.997934],\n", " [0.942577, 0.018654, 0.038768],\n", " [0. , 0.000002, 0.999998],\n", " [0.870813, 0.129142, 0.000044],\n", " [0. , 0. , 1. ],\n", " [0.913746, 0.086254, 0. ],\n", " [0.000637, 0.999363, 0. ],\n", " [0. , 0.000003, 0.999997],\n", " [0. , 0.000298, 0.999702],\n", " [0.029989, 0.970011, 0. ],\n", " [0.994782, 0.000846, 0.004371],\n", " [0.999889, 0.000075, 0.000035],\n", " [0.947557, 0.052443, 0. ],\n", " [0.029989, 0.970011, 0. ],\n", " [0.129826, 0.870174, 0. ],\n", " [0.055579, 0.94417 , 0.000251],\n", " [0.002141, 0.997859, 0. ],\n", " [0.001398, 0.9986 , 0.000002],\n", " [0. , 0.000062, 0.999938],\n", " [0.99808 , 0.001917, 0.000003],\n", " [0. , 0.000004, 0.999996],\n", " [0.969283, 0.030717, 0. ],\n", " [0. , 0.004236, 0.995764],\n", " [0.001178, 0.998807, 0.000014],\n", " [0. , 0. , 1. ],\n", " [0. , 0.014023, 0.985977],\n", " [0. , 0. , 1. ],\n", " [0.033577, 0.966423, 0. ],\n", " [0.99808 , 0.001917, 0.000003],\n", " [0. , 0.000062, 0.999938],\n", " [0.099592, 0.900408, 0. ],\n", " [0.000177, 0.999787, 0.000036],\n", " [0.997699, 0.002301, 0. ],\n", " [0.930462, 0.069418, 0.000121],\n", " [0.00248 , 0.99752 , 0. ],\n", " [0.00248 , 0.99752 , 0. ],\n", " [0.238042, 0.760375, 0.001583],\n", " [0.101768, 0.898232, 0. ],\n", " [0.029989, 0.970011, 0. ],\n", " [0.98255 , 0.01745 , 0. ],\n", " [0. , 0.004285, 0.995715],\n", " [0.000332, 0.999668, 0. ],\n", " [0.999328, 0.000672, 0. ],\n", " [0. , 0.023981, 0.976019],\n", " [0.997777, 0.002223, 0. ],\n", " [0.949387, 0.050613, 0. ],\n", " [0.930462, 0.069418, 0.000121],\n", " [0.00028 , 0.99972 , 0. ],\n", " [0. , 0.004236, 0.995764],\n", " [0. , 0. , 1. ],\n", " [0.998712, 0.000937, 0.000351],\n", " [0. , 0.000038, 0.999962],\n", " [0.000898, 0.99788 , 0.001222],\n", " [0.91181 , 0.045917, 0.042272],\n", " [0.984437, 0.015558, 0.000005],\n", " [0.000003, 0.000326, 0.999672],\n", " [0.099592, 0.900408, 0. ],\n", " [0.033577, 0.966423, 0. ],\n", " [0.969283, 0.030717, 0. ],\n", " [0.999328, 0.000672, 0. ],\n", " [0. , 0. , 1. ],\n", " [0. , 0.008068, 0.991932],\n", " [0. , 0.000298, 0.999702],\n", " [0.003572, 0.996428, 0. ],\n", " [0.997777, 0.002223, 0. ],\n", " [0. , 0.000002, 0.999998],\n", " [0.000063, 0.040843, 0.959093],\n", " [0.947557, 0.052443, 0. ],\n", " [0.907855, 0.092061, 0.000084],\n", " [0.00145 , 0.99855 , 0. ],\n", " [0.101768, 0.898232, 0. ],\n", " [0. , 0.009143, 0.990857],\n", " [0.861344, 0.134685, 0.00397 ],\n", " [0. , 0.004236, 0.995764],\n", " [0.990975, 0.009025, 0. ],\n", " [0.870813, 0.129142, 0.000044],\n", " [0.995587, 0.000049, 0.004364],\n", " [0. , 0.000298, 0.999702],\n", " [0.942577, 0.018654, 0.038768],\n", " [0. , 0.000005, 0.999995],\n", " [0.000898, 0.99788 , 0.001222],\n", " [0.99808 , 0.001917, 0.000003],\n", " [0.099592, 0.900408, 0. ],\n", " [0.099592, 0.900408, 0. ],\n", " [0.999979, 0.000021, 0. ],\n", " [0.995587, 0.000049, 0.004364],\n", " [0.001398, 0.9986 , 0.000002],\n", " [0.000003, 0.000326, 0.999672],\n", " [0.999889, 0.000075, 0.000035],\n", " [0.861344, 0.134685, 0.00397 ],\n", " [0.000003, 0.000326, 0.999672],\n", " [0.969283, 0.030717, 0. ],\n", " [0.238042, 0.760375, 0.001583],\n", " [0.000977, 0.999023, 0. ],\n", " [0.002141, 0.997859, 0. ],\n", " [0.000005, 0.000077, 0.999918],\n", " [0.997699, 0.002301, 0. ],\n", " [0. , 0.004236, 0.995764],\n", " [0. , 0. , 1. ],\n", " [0.999425, 0.000575, 0. ],\n", " [0.974593, 0.025407, 0. ],\n", " [0.002453, 0.997528, 0.000019],\n", " [0.84426 , 0.15574 , 0. ],\n", " [0.000898, 0.99788 , 0.001222],\n", " [0.949387, 0.050613, 0. ],\n", " [0. , 0.000032, 0.999968],\n", " [0.000332, 0.999668, 0. ],\n", " [0.99808 , 0.001917, 0.000003],\n", " [0.05124 , 0.948155, 0.000606],\n", " [0.999889, 0.000075, 0.000035],\n", " [0. , 0. , 1. ],\n", " [0.099592, 0.900408, 0. ],\n", " [0. , 0.000298, 0.999702],\n", " [0.99808 , 0.001917, 0.000003],\n", " [0. , 0.000298, 0.999702],\n", " [0.598003, 0.391491, 0.010506],\n", " [0.002141, 0.997859, 0. ],\n", " [0.001321, 0.998655, 0.000024],\n", " [0.129826, 0.870174, 0. ],\n", " [0.033577, 0.966423, 0. ],\n", " [0.101768, 0.898232, 0. ],\n", " [0.999979, 0.000021, 0. ],\n", " [0.870813, 0.129142, 0.000044],\n", " [0.002453, 0.997528, 0.000019],\n", " [0.129826, 0.870174, 0. ],\n", " [0. , 0.000032, 0.999968],\n", " [0. , 0.004285, 0.995715],\n", " [0. , 0.000004, 0.999996],\n", " [0. , 0.000345, 0.999655],\n", " [0.033577, 0.966423, 0. ],\n", " [0.055579, 0.94417 , 0.000251],\n", " [0.99922 , 0.00078 , 0. ],\n", " [0.997699, 0.002301, 0. ],\n", " [0. , 0.014023, 0.985977],\n", " [0. , 0.008068, 0.991932],\n", " [0.998712, 0.000937, 0.000351],\n", " [0.000637, 0.999363, 0. ],\n", " [0.001321, 0.998655, 0.000024],\n", " [0.930462, 0.069418, 0.000121],\n", " [0.001398, 0.9986 , 0.000002],\n", " [0.002453, 0.997528, 0.000019],\n", " [0.05124 , 0.948155, 0.000606],\n", " [0. , 0. , 1. ],\n", " [0.001111, 0.998889, 0. ],\n", " [0. , 0.000203, 0.999797],\n", " [0.000063, 0.040843, 0.959093],\n", " [0.930462, 0.069418, 0.000121],\n", " [0.019763, 0.980237, 0. ],\n", " [0.033577, 0.966423, 0. ],\n", " [0.659404, 0.319765, 0.020831],\n", " [0. , 0. , 1. ],\n", " [0.999979, 0.000021, 0. ],\n", " [0. , 0.002178, 0.997822],\n", " [0. , 0.000085, 0.999915],\n", " [0.974593, 0.025407, 0. ],\n", " [0.98255 , 0.01745 , 0. ],\n", " [0.000308, 0.999692, 0. ],\n", " [0. , 0. , 1. ],\n", " [0.974593, 0.025407, 0. ],\n", " [0.947557, 0.052443, 0. ],\n", " [0.913746, 0.086254, 0. ],\n", " [0.015839, 0.984161, 0. ],\n", " [0.974593, 0.025407, 0. ],\n", " [0. , 0. , 1. ],\n", " [0.129826, 0.870174, 0. ],\n", " [0.000177, 0.999787, 0.000036],\n", " [0.238042, 0.760375, 0.001583],\n", " [0.99808 , 0.001917, 0.000003],\n", " [0. , 0. , 1. ],\n", " [0.001213, 0.97811 , 0.020677],\n", " [0. , 0.008068, 0.991932],\n", " [0.999979, 0.000021, 0. ],\n", " [0.001398, 0.9986 , 0.000002],\n", " [0.015839, 0.984161, 0. ],\n", " [0. , 0.021595, 0.978405],\n", " [0.002453, 0.997528, 0.000019],\n", " [0. , 0.000005, 0.999995],\n", " [0.907855, 0.092061, 0.000084],\n", " [0.974593, 0.025407, 0. ],\n", " [0. , 0.000032, 0.999968],\n", " [0.91181 , 0.045917, 0.042272],\n", " [0.000898, 0.99788 , 0.001222],\n", " [0. , 0.002066, 0.997934],\n", " [0.055579, 0.94417 , 0.000251],\n", " [0. , 0.014023, 0.985977],\n", " [0.000029, 0.006112, 0.993859],\n", " [0. , 0.002178, 0.997822],\n", " [0. , 0.023981, 0.976019],\n", " [0.999889, 0.000075, 0.000035],\n", " [0.000308, 0.999692, 0. ]])" ] }, "execution_count": 423, "metadata": {}, "output_type": "execute_result" } ], "source": [ "y_prob = classifier.predict_proba(X_test)\n", "y_prob" ] }, { "cell_type": "code", "execution_count": 424, "id": "08f121e4", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([2, 3, 3, 1, 3, 2, 3, 2, 3, 2, 3, 2, 2, 1, 1, 2, 3, 1, 3, 1, 3, 1,\n", " 3, 3, 3, 2, 2, 1, 1, 2, 1, 2, 3, 1, 2, 3, 2, 3, 2, 1, 2, 2, 3, 2,\n", " 3, 3, 1, 3, 3, 2, 2, 2, 2, 3, 2, 3, 3, 3, 1, 3, 3, 1, 3, 1, 3, 1,\n", " 2, 3, 3, 2, 1, 1, 1, 2, 2, 2, 2, 2, 3, 1, 3, 1, 3, 2, 3, 3, 3, 2,\n", " 1, 3, 2, 2, 1, 1, 2, 2, 2, 2, 2, 1, 3, 2, 1, 3, 1, 1, 1, 2, 3, 3,\n", " 1, 3, 2, 1, 1, 3, 2, 2, 1, 1, 3, 3, 3, 2, 1, 3, 3, 1, 1, 2, 2, 3,\n", " 1, 3, 1, 1, 1, 3, 1, 3, 2, 1, 2, 2, 1, 1, 2, 3, 1, 1, 3, 1, 2, 2,\n", " 2, 3, 1, 3, 3, 1, 1, 2, 1, 2, 1, 3, 2, 1, 2, 1, 3, 2, 3, 1, 3, 1,\n", " 2, 2, 2, 2, 2, 1, 1, 2, 2, 3, 3, 3, 3, 2, 2, 1, 1, 3, 3, 1, 2, 2,\n", " 1, 2, 2, 2, 3, 2, 3, 3, 1, 2, 2, 1, 3, 1, 3, 3, 1, 1, 2, 3, 1, 1,\n", " 1, 2, 1, 3, 2, 2, 2, 1, 3, 2, 3, 1, 2, 2, 3, 2, 3, 1, 1, 3, 1, 2,\n", " 3, 2, 3, 3, 3, 3, 1, 2], dtype=int64)" ] }, "execution_count": 424, "metadata": {}, "output_type": "execute_result" } ], "source": [ "y_pred = classifier.predict(X_test)\n", "y_pred" ] }, { "cell_type": "code", "execution_count": 425, "id": "c876fff8", "metadata": {}, "outputs": [], "source": [ "from sklearn.metrics import confusion_matrix\n", "from sklearn.metrics import accuracy_score\n", "from mlxtend.plotting import plot_confusion_matrix\n", "import seaborn as sns\n", "sns.set()\n" ] }, { "cell_type": "code", "execution_count": 426, "id": "cbb6c719", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy : 0.992\n" ] }, { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "cm = confusion_matrix(y_test, y_pred)\n", "plot_confusion_matrix(cm)\n", "\n", "acc = accuracy_score(y_test, y_pred)\n", "print('Accuracy',':', acc)" ] }, { "cell_type": "code", "execution_count": 427, "id": "e3c2afe8", "metadata": {}, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "config": { "plotlyServerURL": "https://plot.ly" }, "data": [ { "colorscale": [ [ 0, "rgb(3, 5, 18)" ], [ 0.09090909090909091, "rgb(25, 25, 51)" ], [ 0.18181818181818182, "rgb(44, 42, 87)" ], [ 0.2727272727272727, "rgb(58, 60, 125)" ], [ 0.36363636363636365, "rgb(62, 83, 160)" ], [ 0.45454545454545453, "rgb(62, 109, 178)" ], [ 0.5454545454545454, "rgb(72, 134, 187)" ], [ 0.6363636363636364, "rgb(89, 159, 196)" ], [ 0.7272727272727273, "rgb(114, 184, 205)" ], [ 0.8181818181818182, "rgb(149, 207, 216)" ], [ 0.9090909090909091, "rgb(192, 229, 232)" ], [ 1, "rgb(234, 252, 253)" ] ], "reversescale": true, "showscale": true, "type": "heatmap", "x": [ "pred_1", "pred_2", "pred_3" ], "y": [ "true_1", "true_2", "true_3" ], "z": [ [ 0, 0, 85 ], [ 2, 87, 0 ], [ 76, 0, 0 ] ] } ], "layout": { "annotations": [ { "font": { "color": "#000000" }, "showarrow": false, "text": "0", "x": "pred_1", "xref": "x", "y": "true_1", "yref": "y" }, { "font": { "color": "#000000" }, "showarrow": false, "text": "0", "x": "pred_2", "xref": "x", "y": "true_1", "yref": "y" }, { "font": { "color": "#FFFFFF" }, "showarrow": false, "text": "85", "x": "pred_3", "xref": "x", "y": "true_1", "yref": "y" }, { "font": { "color": "#000000" }, "showarrow": false, "text": "2", "x": "pred_1", "xref": "x", "y": "true_2", "yref": "y" }, { "font": { "color": "#FFFFFF" }, "showarrow": false, "text": "87", "x": "pred_2", "xref": "x", "y": "true_2", "yref": "y" }, { "font": { "color": "#000000" }, "showarrow": false, "text": "0", "x": "pred_3", "xref": "x", "y": "true_2", "yref": "y" }, { "font": { "color": "#FFFFFF" }, "showarrow": false, "text": "76", "x": "pred_1", "xref": "x", "y": "true_3", "yref": "y" }, { "font": { "color": "#000000" }, "showarrow": false, "text": "0", "x": "pred_2", "xref": "x", "y": "true_3", "yref": "y" }, { "font": { "color": "#000000" }, "showarrow": false, "text": "0", "x": "pred_3", "xref": "x", "y": "true_3", "yref": "y" } ], "font": { "size": 14 }, "height": 400, "template": { "data": { "bar": [ { "error_x": { "color": "#2a3f5f" }, "error_y": { "color": "#2a3f5f" }, "marker": { "line": { "color": "#E5ECF6", "width": 0.5 }, "pattern": { "fillmode": "overlay", "size": 10, "solidity": 0.2 } }, "type": "bar" } ], "barpolar": [ { "marker": { "line": { "color": "#E5ECF6", "width": 0.5 }, "pattern": { "fillmode": "overlay", "size": 10, "solidity": 0.2 } }, "type": "barpolar" } ], "carpet": [ { "aaxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "baxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "type": "carpet" } ], "choropleth": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "choropleth" } ], "contour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "contour" } ], "contourcarpet": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "contourcarpet" } ], "heatmap": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmap" } ], "heatmapgl": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmapgl" } ], "histogram": [ { "marker": { "pattern": { "fillmode": "overlay", "size": 10, "solidity": 0.2 } }, "type": "histogram" } ], "histogram2d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2d" } ], "histogram2dcontour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2dcontour" } ], "mesh3d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "mesh3d" } ], "parcoords": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "parcoords" } ], "pie": [ { "automargin": true, "type": "pie" } ], "scatter": [ { "fillpattern": { "fillmode": "overlay", "size": 10, "solidity": 0.2 }, "type": "scatter" } ], "scatter3d": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter3d" } ], "scattercarpet": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattercarpet" } ], "scattergeo": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergeo" } ], "scattergl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergl" } ], "scattermapbox": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattermapbox" } ], "scatterpolar": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolar" } ], "scatterpolargl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolargl" } ], "scatterternary": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterternary" } ], "surface": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "surface" } ], "table": [ { "cells": { "fill": { "color": "#EBF0F8" }, "line": { "color": "white" } }, "header": { "fill": { "color": "#C8D4E3" }, "line": { "color": "white" } }, "type": "table" } ] }, "layout": { "annotationdefaults": { "arrowcolor": "#2a3f5f", "arrowhead": 0, "arrowwidth": 1 }, "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "colorscale": { "diverging": [ [ 0, "#8e0152" ], [ 0.1, "#c51b7d" ], [ 0.2, "#de77ae" ], [ 0.3, "#f1b6da" ], [ 0.4, "#fde0ef" ], [ 0.5, "#f7f7f7" ], [ 0.6, "#e6f5d0" ], [ 0.7, "#b8e186" ], [ 0.8, "#7fbc41" ], [ 0.9, "#4d9221" ], [ 1, "#276419" ] ], "sequential": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "sequentialminus": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "colorway": [ "#636efa", "#EF553B", "#00cc96", "#ab63fa", "#FFA15A", "#19d3f3", "#FF6692", "#B6E880", "#FF97FF", "#FECB52" ], "font": { "color": "#2a3f5f" }, "geo": { "bgcolor": "white", "lakecolor": "white", "landcolor": "#E5ECF6", "showlakes": true, "showland": true, "subunitcolor": "white" }, "hoverlabel": { "align": "left" }, "hovermode": "closest", "mapbox": { "style": "light" }, "paper_bgcolor": "white", "plot_bgcolor": "#E5ECF6", "polar": { "angularaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "radialaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "scene": { "xaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "yaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "zaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" } }, "shapedefaults": { "line": { "color": "#2a3f5f" } }, "ternary": { "aaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "baxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "caxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "title": { "x": 0.05 }, "xaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 }, "yaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 } } }, "title": { "text": "Confusion Matrix - Accuracy: 0.9920" }, "width": 400, "xaxis": { "dtick": 1, "gridcolor": "rgb(0, 0, 0)", "side": "top", "ticks": "" }, "yaxis": { "dtick": 1, "ticks": "", "ticksuffix": " " } } } }, "metadata": {}, "output_type": "display_data" } ], "source": [ "def plot_confusion_matrix(cm):\n", " cm = cm[::-1]\n", " cm = pd.DataFrame(cm, columns=['pred_1', 'pred_2', 'pred_3'], index=['true_1', 'true_2', 'true_3'])\n", " fig = ff.create_annotated_heatmap(z = cm.values, x = list(cm.columns), y = list(cm.index), colorscale = 'ice', showscale = True, reversescale = True)\n", " fig.update_layout(width=400, height=400, title='Confusion Matrix - Accuracy: {:.4f}'.format(acc), font_size=14)\n", " fig.show()\n", "plot_confusion_matrix(cm)" ] }, { "cell_type": "code", "execution_count": 435, "id": "a1ffeb65", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " precision recall f1-score support\n", "\n", " pred_1 0.97 1.00 0.99 76\n", " pred_2 1.00 0.98 0.99 89\n", " pred_3 1.00 1.00 1.00 85\n", "\n", " accuracy 0.99 250\n", " macro avg 0.99 0.99 0.99 250\n", "weighted avg 0.99 0.99 0.99 250\n", "\n" ] } ], "source": [ "from sklearn.metrics import classification_report\n", "print(classification_report(y_test, y_pred, target_names=['pred_1', 'pred_2', 'pred_3']))" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.13" } }, "nbformat": 4, "nbformat_minor": 5 }