{ "cells": [ { "cell_type": "markdown", "id": "033f13af", "metadata": {}, "source": [ "This dataset contains information on patients with lung cancer, including their age, gender, air pollution exposure, alcohol use, dust allergy, occupational hazards, genetic risk, chronic lung disease, balanced diet, obesity, smoking, passive smoker, chest pain, coughing of blood, fatigue, weight loss ,shortness of breath ,wheezing ,swallowing difficulty ,clubbing of finger nails and snoring\n", "\n", "https://www.kaggle.com/datasets/thedevastator/cancer-patients-and-air-pollution-a-new-link/data" ] }, { "cell_type": "code", "execution_count": 5, "id": "7ce53ad1", "metadata": {}, "outputs": [], "source": [ "\n", "\n", "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import plotly.figure_factory as ff\n", "import seaborn as sns\n", "sns.set()\n", "import plotly.express as px\n" ] }, { "cell_type": "code", "execution_count": null, "id": "3b9fd854", "metadata": {}, "outputs": [], "source": [ "pip install plotnine" ] }, { "cell_type": "code", "execution_count": 7, "id": "6d369f6b", "metadata": {}, "outputs": [], "source": [ "import plotnine" ] }, { "cell_type": "code", "execution_count": 8, "id": "73edef6d", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | Patient Id | \n", "Age | \n", "Gender | \n", "Air Pollution | \n", "Alcohol use | \n", "Dust Allergy | \n", "OccuPational Hazards | \n", "Genetic Risk | \n", "chronic Lung Disease | \n", "Balanced Diet | \n", "... | \n", "Fatigue | \n", "Weight Loss | \n", "Shortness of Breath | \n", "Wheezing | \n", "Swallowing Difficulty | \n", "Clubbing of Finger Nails | \n", "Frequent Cold | \n", "Dry Cough | \n", "Snoring | \n", "Level | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
index | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
0 | \n", "P1 | \n", "33 | \n", "1 | \n", "2 | \n", "4 | \n", "5 | \n", "4 | \n", "3 | \n", "2 | \n", "2 | \n", "... | \n", "3 | \n", "4 | \n", "2 | \n", "2 | \n", "3 | \n", "1 | \n", "2 | \n", "3 | \n", "4 | \n", "Low | \n", "
1 | \n", "P10 | \n", "17 | \n", "1 | \n", "3 | \n", "1 | \n", "5 | \n", "3 | \n", "4 | \n", "2 | \n", "2 | \n", "... | \n", "1 | \n", "3 | \n", "7 | \n", "8 | \n", "6 | \n", "2 | \n", "1 | \n", "7 | \n", "2 | \n", "Medium | \n", "
2 | \n", "P100 | \n", "35 | \n", "1 | \n", "4 | \n", "5 | \n", "6 | \n", "5 | \n", "5 | \n", "4 | \n", "6 | \n", "... | \n", "8 | \n", "7 | \n", "9 | \n", "2 | \n", "1 | \n", "4 | \n", "6 | \n", "7 | \n", "2 | \n", "High | \n", "
3 | \n", "P1000 | \n", "37 | \n", "1 | \n", "7 | \n", "7 | \n", "7 | \n", "7 | \n", "6 | \n", "7 | \n", "7 | \n", "... | \n", "4 | \n", "2 | \n", "3 | \n", "1 | \n", "4 | \n", "5 | \n", "6 | \n", "7 | \n", "5 | \n", "High | \n", "
4 | \n", "P101 | \n", "46 | \n", "1 | \n", "6 | \n", "8 | \n", "7 | \n", "7 | \n", "7 | \n", "6 | \n", "7 | \n", "... | \n", "3 | \n", "2 | \n", "4 | \n", "1 | \n", "4 | \n", "2 | \n", "4 | \n", "2 | \n", "3 | \n", "High | \n", "
5 rows × 25 columns
\n", "\n", " | count | \n", "mean | \n", "std | \n", "min | \n", "25% | \n", "50% | \n", "75% | \n", "max | \n", "
---|---|---|---|---|---|---|---|---|
Age | \n", "1000.0 | \n", "37.174 | \n", "12.005493 | \n", "14.0 | \n", "27.75 | \n", "36.0 | \n", "45.0 | \n", "73.0 | \n", "
Gender | \n", "1000.0 | \n", "1.402 | \n", "0.490547 | \n", "1.0 | \n", "1.00 | \n", "1.0 | \n", "2.0 | \n", "2.0 | \n", "
Air Pollution | \n", "1000.0 | \n", "3.840 | \n", "2.030400 | \n", "1.0 | \n", "2.00 | \n", "3.0 | \n", "6.0 | \n", "8.0 | \n", "
Alcohol use | \n", "1000.0 | \n", "4.563 | \n", "2.620477 | \n", "1.0 | \n", "2.00 | \n", "5.0 | \n", "7.0 | \n", "8.0 | \n", "
Dust Allergy | \n", "1000.0 | \n", "5.165 | \n", "1.980833 | \n", "1.0 | \n", "4.00 | \n", "6.0 | \n", "7.0 | \n", "8.0 | \n", "
OccuPational Hazards | \n", "1000.0 | \n", "4.840 | \n", "2.107805 | \n", "1.0 | \n", "3.00 | \n", "5.0 | \n", "7.0 | \n", "8.0 | \n", "
Genetic Risk | \n", "1000.0 | \n", "4.580 | \n", "2.126999 | \n", "1.0 | \n", "2.00 | \n", "5.0 | \n", "7.0 | \n", "7.0 | \n", "
chronic Lung Disease | \n", "1000.0 | \n", "4.380 | \n", "1.848518 | \n", "1.0 | \n", "3.00 | \n", "4.0 | \n", "6.0 | \n", "7.0 | \n", "
Balanced Diet | \n", "1000.0 | \n", "4.491 | \n", "2.135528 | \n", "1.0 | \n", "2.00 | \n", "4.0 | \n", "7.0 | \n", "7.0 | \n", "
Obesity | \n", "1000.0 | \n", "4.465 | \n", "2.124921 | \n", "1.0 | \n", "3.00 | \n", "4.0 | \n", "7.0 | \n", "7.0 | \n", "
Smoking | \n", "1000.0 | \n", "3.948 | \n", "2.495902 | \n", "1.0 | \n", "2.00 | \n", "3.0 | \n", "7.0 | \n", "8.0 | \n", "
Passive Smoker | \n", "1000.0 | \n", "4.195 | \n", "2.311778 | \n", "1.0 | \n", "2.00 | \n", "4.0 | \n", "7.0 | \n", "8.0 | \n", "
Chest Pain | \n", "1000.0 | \n", "4.438 | \n", "2.280209 | \n", "1.0 | \n", "2.00 | \n", "4.0 | \n", "7.0 | \n", "9.0 | \n", "
Coughing of Blood | \n", "1000.0 | \n", "4.859 | \n", "2.427965 | \n", "1.0 | \n", "3.00 | \n", "4.0 | \n", "7.0 | \n", "9.0 | \n", "
Fatigue | \n", "1000.0 | \n", "3.856 | \n", "2.244616 | \n", "1.0 | \n", "2.00 | \n", "3.0 | \n", "5.0 | \n", "9.0 | \n", "
Weight Loss | \n", "1000.0 | \n", "3.855 | \n", "2.206546 | \n", "1.0 | \n", "2.00 | \n", "3.0 | \n", "6.0 | \n", "8.0 | \n", "
Shortness of Breath | \n", "1000.0 | \n", "4.240 | \n", "2.285087 | \n", "1.0 | \n", "2.00 | \n", "4.0 | \n", "6.0 | \n", "9.0 | \n", "
Wheezing | \n", "1000.0 | \n", "3.777 | \n", "2.041921 | \n", "1.0 | \n", "2.00 | \n", "4.0 | \n", "5.0 | \n", "8.0 | \n", "
Swallowing Difficulty | \n", "1000.0 | \n", "3.746 | \n", "2.270383 | \n", "1.0 | \n", "2.00 | \n", "4.0 | \n", "5.0 | \n", "8.0 | \n", "
Clubbing of Finger Nails | \n", "1000.0 | \n", "3.923 | \n", "2.388048 | \n", "1.0 | \n", "2.00 | \n", "4.0 | \n", "5.0 | \n", "9.0 | \n", "
Frequent Cold | \n", "1000.0 | \n", "3.536 | \n", "1.832502 | \n", "1.0 | \n", "2.00 | \n", "3.0 | \n", "5.0 | \n", "7.0 | \n", "
Dry Cough | \n", "1000.0 | \n", "3.853 | \n", "2.039007 | \n", "1.0 | \n", "2.00 | \n", "4.0 | \n", "6.0 | \n", "7.0 | \n", "
Snoring | \n", "1000.0 | \n", "2.926 | \n", "1.474686 | \n", "1.0 | \n", "2.00 | \n", "3.0 | \n", "4.0 | \n", "7.0 | \n", "
\n", " | Smoking | \n", "Alcohol use | \n", "Level | \n", "
---|---|---|---|
index | \n", "\n", " | \n", " | \n", " |
4 | \n", "8 | \n", "8 | \n", "High | \n", "
20 | \n", "8 | \n", "8 | \n", "High | \n", "
22 | \n", "8 | \n", "8 | \n", "High | \n", "
46 | \n", "8 | \n", "8 | \n", "High | \n", "
68 | \n", "8 | \n", "8 | \n", "High | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "
989 | \n", "7 | \n", "7 | \n", "High | \n", "
992 | \n", "7 | \n", "7 | \n", "High | \n", "
993 | \n", "7 | \n", "7 | \n", "High | \n", "
994 | \n", "7 | \n", "7 | \n", "High | \n", "
995 | \n", "7 | \n", "7 | \n", "High | \n", "
256 rows × 3 columns
\n", "\n", " | Patient Id | \n", "Age | \n", "Gender | \n", "Air Pollution | \n", "Alcohol use | \n", "Dust Allergy | \n", "OccuPational Hazards | \n", "Genetic Risk | \n", "chronic Lung Disease | \n", "Balanced Diet | \n", "... | \n", "Fatigue | \n", "Weight Loss | \n", "Shortness of Breath | \n", "Wheezing | \n", "Swallowing Difficulty | \n", "Clubbing of Finger Nails | \n", "Frequent Cold | \n", "Dry Cough | \n", "Snoring | \n", "Level | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
index | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
0 | \n", "P1 | \n", "33 | \n", "1 | \n", "2 | \n", "4 | \n", "5 | \n", "4 | \n", "3 | \n", "2 | \n", "2 | \n", "... | \n", "3 | \n", "4 | \n", "2 | \n", "2 | \n", "3 | \n", "1 | \n", "2 | \n", "3 | \n", "4 | \n", "1 | \n", "
1 | \n", "P10 | \n", "17 | \n", "1 | \n", "3 | \n", "1 | \n", "5 | \n", "3 | \n", "4 | \n", "2 | \n", "2 | \n", "... | \n", "1 | \n", "3 | \n", "7 | \n", "8 | \n", "6 | \n", "2 | \n", "1 | \n", "7 | \n", "2 | \n", "2 | \n", "
2 | \n", "P100 | \n", "35 | \n", "1 | \n", "4 | \n", "5 | \n", "6 | \n", "5 | \n", "5 | \n", "4 | \n", "6 | \n", "... | \n", "8 | \n", "7 | \n", "9 | \n", "2 | \n", "1 | \n", "4 | \n", "6 | \n", "7 | \n", "2 | \n", "3 | \n", "
3 | \n", "P1000 | \n", "37 | \n", "1 | \n", "7 | \n", "7 | \n", "7 | \n", "7 | \n", "6 | \n", "7 | \n", "7 | \n", "... | \n", "4 | \n", "2 | \n", "3 | \n", "1 | \n", "4 | \n", "5 | \n", "6 | \n", "7 | \n", "5 | \n", "3 | \n", "
4 | \n", "P101 | \n", "46 | \n", "1 | \n", "6 | \n", "8 | \n", "7 | \n", "7 | \n", "7 | \n", "6 | \n", "7 | \n", "... | \n", "3 | \n", "2 | \n", "4 | \n", "1 | \n", "4 | \n", "2 | \n", "4 | \n", "2 | \n", "3 | \n", "3 | \n", "
5 rows × 25 columns
\n", "LogisticRegression(max_iter=200)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
LogisticRegression(max_iter=200)