{ "cells": [ { "cell_type": "markdown", "id": "033f13af", "metadata": {}, "source": [ "This dataset contains information on patients with lung cancer, including their age, gender, air pollution exposure, alcohol use, dust allergy, occupational hazards, genetic risk, chronic lung disease, balanced diet, obesity, smoking, passive smoker, chest pain, coughing of blood, fatigue, weight loss ,shortness of breath ,wheezing ,swallowing difficulty ,clubbing of finger nails and snoring\n", "\n", "https://www.kaggle.com/datasets/thedevastator/cancer-patients-and-air-pollution-a-new-link/data" ] }, { "cell_type": "code", "execution_count": 5, "id": "7ce53ad1", "metadata": {}, "outputs": [], "source": [ "\n", "\n", "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import plotly.figure_factory as ff\n", "import seaborn as sns\n", "sns.set()\n", "import plotly.express as px\n" ] }, { "cell_type": "code", "execution_count": 6, "id": "3b9fd854", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: plotnine in c:\\users\\hp\\anaconda3\\lib\\site-packages (0.12.4)\n", "Requirement already satisfied: matplotlib>=3.6.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from plotnine) (3.8.0)\n", "Requirement already satisfied: mizani<0.10.0,>0.9.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from plotnine) (0.9.3)\n", "Requirement already satisfied: numpy>=1.23.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from plotnine) (1.26.0)\n", "Requirement already satisfied: pandas>=1.5.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from plotnine) (2.1.1)\n", "Requirement already satisfied: patsy>=0.5.1 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from plotnine) (0.5.5)\n", "Requirement already satisfied: scipy>=1.5.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from plotnine) (1.11.4)\n", "Requirement already satisfied: statsmodels>=0.14.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from plotnine) (0.14.0)\n", "Requirement already satisfied: contourpy>=1.0.1 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from matplotlib>=3.6.0->plotnine) (1.2.0)\n", "Requirement already satisfied: cycler>=0.10 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from matplotlib>=3.6.0->plotnine) (0.12.1)\n", "Requirement already satisfied: fonttools>=4.22.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from matplotlib>=3.6.0->plotnine) (4.25.0)\n", "Requirement already satisfied: kiwisolver>=1.0.1 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from matplotlib>=3.6.0->plotnine) (1.4.4)\n", "Requirement already satisfied: packaging>=20.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from matplotlib>=3.6.0->plotnine) (23.1)\n", "Requirement already satisfied: pillow>=6.2.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from matplotlib>=3.6.0->plotnine) (10.0.1)\n", "Requirement already satisfied: pyparsing>=2.3.1 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from matplotlib>=3.6.0->plotnine) (3.0.9)\n", "Requirement already satisfied: python-dateutil>=2.7 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from matplotlib>=3.6.0->plotnine) (2.8.2)\n", "Requirement already satisfied: tzdata in c:\\users\\hp\\anaconda3\\lib\\site-packages (from mizani<0.10.0,>0.9.0->plotnine) (2023.3)\n", "Requirement already satisfied: pytz>=2020.1 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from pandas>=1.5.0->plotnine) (2023.3.post1)\n", "Requirement already satisfied: six in c:\\users\\hp\\anaconda3\\lib\\site-packages (from patsy>=0.5.1->plotnine) (1.16.0)\n", "Note: you may need to restart the kernel to use updated packages.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n", "[notice] A new release of pip is available: 23.3.2 -> 24.0\n", "[notice] To update, run: python.exe -m pip install --upgrade pip\n" ] } ], "source": [ "pip install plotnine" ] }, { "cell_type": "code", "execution_count": 7, "id": "6d369f6b", "metadata": {}, "outputs": [], "source": [ "import plotnine" ] }, { "cell_type": "code", "execution_count": 8, "id": "73edef6d", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | Patient Id | \n", "Age | \n", "Gender | \n", "Air Pollution | \n", "Alcohol use | \n", "Dust Allergy | \n", "OccuPational Hazards | \n", "Genetic Risk | \n", "chronic Lung Disease | \n", "Balanced Diet | \n", "... | \n", "Fatigue | \n", "Weight Loss | \n", "Shortness of Breath | \n", "Wheezing | \n", "Swallowing Difficulty | \n", "Clubbing of Finger Nails | \n", "Frequent Cold | \n", "Dry Cough | \n", "Snoring | \n", "Level | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
index | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
0 | \n", "P1 | \n", "33 | \n", "1 | \n", "2 | \n", "4 | \n", "5 | \n", "4 | \n", "3 | \n", "2 | \n", "2 | \n", "... | \n", "3 | \n", "4 | \n", "2 | \n", "2 | \n", "3 | \n", "1 | \n", "2 | \n", "3 | \n", "4 | \n", "Low | \n", "
1 | \n", "P10 | \n", "17 | \n", "1 | \n", "3 | \n", "1 | \n", "5 | \n", "3 | \n", "4 | \n", "2 | \n", "2 | \n", "... | \n", "1 | \n", "3 | \n", "7 | \n", "8 | \n", "6 | \n", "2 | \n", "1 | \n", "7 | \n", "2 | \n", "Medium | \n", "
2 | \n", "P100 | \n", "35 | \n", "1 | \n", "4 | \n", "5 | \n", "6 | \n", "5 | \n", "5 | \n", "4 | \n", "6 | \n", "... | \n", "8 | \n", "7 | \n", "9 | \n", "2 | \n", "1 | \n", "4 | \n", "6 | \n", "7 | \n", "2 | \n", "High | \n", "
3 | \n", "P1000 | \n", "37 | \n", "1 | \n", "7 | \n", "7 | \n", "7 | \n", "7 | \n", "6 | \n", "7 | \n", "7 | \n", "... | \n", "4 | \n", "2 | \n", "3 | \n", "1 | \n", "4 | \n", "5 | \n", "6 | \n", "7 | \n", "5 | \n", "High | \n", "
4 | \n", "P101 | \n", "46 | \n", "1 | \n", "6 | \n", "8 | \n", "7 | \n", "7 | \n", "7 | \n", "6 | \n", "7 | \n", "... | \n", "3 | \n", "2 | \n", "4 | \n", "1 | \n", "4 | \n", "2 | \n", "4 | \n", "2 | \n", "3 | \n", "High | \n", "
5 rows × 25 columns
\n", "\n", " | count | \n", "mean | \n", "std | \n", "min | \n", "25% | \n", "50% | \n", "75% | \n", "max | \n", "
---|---|---|---|---|---|---|---|---|
Age | \n", "1000.0 | \n", "37.174 | \n", "12.005493 | \n", "14.0 | \n", "27.75 | \n", "36.0 | \n", "45.0 | \n", "73.0 | \n", "
Gender | \n", "1000.0 | \n", "1.402 | \n", "0.490547 | \n", "1.0 | \n", "1.00 | \n", "1.0 | \n", "2.0 | \n", "2.0 | \n", "
Air Pollution | \n", "1000.0 | \n", "3.840 | \n", "2.030400 | \n", "1.0 | \n", "2.00 | \n", "3.0 | \n", "6.0 | \n", "8.0 | \n", "
Alcohol use | \n", "1000.0 | \n", "4.563 | \n", "2.620477 | \n", "1.0 | \n", "2.00 | \n", "5.0 | \n", "7.0 | \n", "8.0 | \n", "
Dust Allergy | \n", "1000.0 | \n", "5.165 | \n", "1.980833 | \n", "1.0 | \n", "4.00 | \n", "6.0 | \n", "7.0 | \n", "8.0 | \n", "
OccuPational Hazards | \n", "1000.0 | \n", "4.840 | \n", "2.107805 | \n", "1.0 | \n", "3.00 | \n", "5.0 | \n", "7.0 | \n", "8.0 | \n", "
Genetic Risk | \n", "1000.0 | \n", "4.580 | \n", "2.126999 | \n", "1.0 | \n", "2.00 | \n", "5.0 | \n", "7.0 | \n", "7.0 | \n", "
chronic Lung Disease | \n", "1000.0 | \n", "4.380 | \n", "1.848518 | \n", "1.0 | \n", "3.00 | \n", "4.0 | \n", "6.0 | \n", "7.0 | \n", "
Balanced Diet | \n", "1000.0 | \n", "4.491 | \n", "2.135528 | \n", "1.0 | \n", "2.00 | \n", "4.0 | \n", "7.0 | \n", "7.0 | \n", "
Obesity | \n", "1000.0 | \n", "4.465 | \n", "2.124921 | \n", "1.0 | \n", "3.00 | \n", "4.0 | \n", "7.0 | \n", "7.0 | \n", "
Smoking | \n", "1000.0 | \n", "3.948 | \n", "2.495902 | \n", "1.0 | \n", "2.00 | \n", "3.0 | \n", "7.0 | \n", "8.0 | \n", "
Passive Smoker | \n", "1000.0 | \n", "4.195 | \n", "2.311778 | \n", "1.0 | \n", "2.00 | \n", "4.0 | \n", "7.0 | \n", "8.0 | \n", "
Chest Pain | \n", "1000.0 | \n", "4.438 | \n", "2.280209 | \n", "1.0 | \n", "2.00 | \n", "4.0 | \n", "7.0 | \n", "9.0 | \n", "
Coughing of Blood | \n", "1000.0 | \n", "4.859 | \n", "2.427965 | \n", "1.0 | \n", "3.00 | \n", "4.0 | \n", "7.0 | \n", "9.0 | \n", "
Fatigue | \n", "1000.0 | \n", "3.856 | \n", "2.244616 | \n", "1.0 | \n", "2.00 | \n", "3.0 | \n", "5.0 | \n", "9.0 | \n", "
Weight Loss | \n", "1000.0 | \n", "3.855 | \n", "2.206546 | \n", "1.0 | \n", "2.00 | \n", "3.0 | \n", "6.0 | \n", "8.0 | \n", "
Shortness of Breath | \n", "1000.0 | \n", "4.240 | \n", "2.285087 | \n", "1.0 | \n", "2.00 | \n", "4.0 | \n", "6.0 | \n", "9.0 | \n", "
Wheezing | \n", "1000.0 | \n", "3.777 | \n", "2.041921 | \n", "1.0 | \n", "2.00 | \n", "4.0 | \n", "5.0 | \n", "8.0 | \n", "
Swallowing Difficulty | \n", "1000.0 | \n", "3.746 | \n", "2.270383 | \n", "1.0 | \n", "2.00 | \n", "4.0 | \n", "5.0 | \n", "8.0 | \n", "
Clubbing of Finger Nails | \n", "1000.0 | \n", "3.923 | \n", "2.388048 | \n", "1.0 | \n", "2.00 | \n", "4.0 | \n", "5.0 | \n", "9.0 | \n", "
Frequent Cold | \n", "1000.0 | \n", "3.536 | \n", "1.832502 | \n", "1.0 | \n", "2.00 | \n", "3.0 | \n", "5.0 | \n", "7.0 | \n", "
Dry Cough | \n", "1000.0 | \n", "3.853 | \n", "2.039007 | \n", "1.0 | \n", "2.00 | \n", "4.0 | \n", "6.0 | \n", "7.0 | \n", "
Snoring | \n", "1000.0 | \n", "2.926 | \n", "1.474686 | \n", "1.0 | \n", "2.00 | \n", "3.0 | \n", "4.0 | \n", "7.0 | \n", "
\n", " | Smoking | \n", "Alcohol use | \n", "Level | \n", "
---|---|---|---|
index | \n", "\n", " | \n", " | \n", " |
4 | \n", "8 | \n", "8 | \n", "High | \n", "
20 | \n", "8 | \n", "8 | \n", "High | \n", "
22 | \n", "8 | \n", "8 | \n", "High | \n", "
46 | \n", "8 | \n", "8 | \n", "High | \n", "
68 | \n", "8 | \n", "8 | \n", "High | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "
989 | \n", "7 | \n", "7 | \n", "High | \n", "
992 | \n", "7 | \n", "7 | \n", "High | \n", "
993 | \n", "7 | \n", "7 | \n", "High | \n", "
994 | \n", "7 | \n", "7 | \n", "High | \n", "
995 | \n", "7 | \n", "7 | \n", "High | \n", "
256 rows × 3 columns
\n", "\n", " | Patient Id | \n", "Age | \n", "Gender | \n", "Air Pollution | \n", "Alcohol use | \n", "Dust Allergy | \n", "OccuPational Hazards | \n", "Genetic Risk | \n", "chronic Lung Disease | \n", "Balanced Diet | \n", "... | \n", "Fatigue | \n", "Weight Loss | \n", "Shortness of Breath | \n", "Wheezing | \n", "Swallowing Difficulty | \n", "Clubbing of Finger Nails | \n", "Frequent Cold | \n", "Dry Cough | \n", "Snoring | \n", "Level | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
index | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
0 | \n", "P1 | \n", "33 | \n", "1 | \n", "2 | \n", "4 | \n", "5 | \n", "4 | \n", "3 | \n", "2 | \n", "2 | \n", "... | \n", "3 | \n", "4 | \n", "2 | \n", "2 | \n", "3 | \n", "1 | \n", "2 | \n", "3 | \n", "4 | \n", "1 | \n", "
1 | \n", "P10 | \n", "17 | \n", "1 | \n", "3 | \n", "1 | \n", "5 | \n", "3 | \n", "4 | \n", "2 | \n", "2 | \n", "... | \n", "1 | \n", "3 | \n", "7 | \n", "8 | \n", "6 | \n", "2 | \n", "1 | \n", "7 | \n", "2 | \n", "2 | \n", "
2 | \n", "P100 | \n", "35 | \n", "1 | \n", "4 | \n", "5 | \n", "6 | \n", "5 | \n", "5 | \n", "4 | \n", "6 | \n", "... | \n", "8 | \n", "7 | \n", "9 | \n", "2 | \n", "1 | \n", "4 | \n", "6 | \n", "7 | \n", "2 | \n", "3 | \n", "
3 | \n", "P1000 | \n", "37 | \n", "1 | \n", "7 | \n", "7 | \n", "7 | \n", "7 | \n", "6 | \n", "7 | \n", "7 | \n", "... | \n", "4 | \n", "2 | \n", "3 | \n", "1 | \n", "4 | \n", "5 | \n", "6 | \n", "7 | \n", "5 | \n", "3 | \n", "
4 | \n", "P101 | \n", "46 | \n", "1 | \n", "6 | \n", "8 | \n", "7 | \n", "7 | \n", "7 | \n", "6 | \n", "7 | \n", "... | \n", "3 | \n", "2 | \n", "4 | \n", "1 | \n", "4 | \n", "2 | \n", "4 | \n", "2 | \n", "3 | \n", "3 | \n", "
5 rows × 25 columns
\n", "LogisticRegression(max_iter=200)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
LogisticRegression(max_iter=200)