{ "metadata": { "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" }, "orig_nbformat": 2, "kernelspec": { "name": "python385jvsc74a57bd0916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1", "display_name": "Python 3.8.5 64-bit" }, "metadata": { "interpreter": { "hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1" } } }, "nbformat": 4, "nbformat_minor": 2, "cells": [ { "cell_type": "code", "execution_count": 133, "metadata": {}, "outputs": [], "source": [ "import pandas as pd \n", "import numpy as np\n", "from sklearn.model_selection import train_test_split\n", "\n", "def NormalizeData(data):\n", " for col in data.columns:\n", " if data[col].dtype == object: \n", " data[col] = data[col].str.lower()\n", " if col == 'smoking_status':\n", " data[col] = data[col].str.replace(\" \", \"_\")\n", " if col == 'work_type':\n", " data[col] = data[col].str.replace(\"-\", \"_\")\n", " if col == 'bmi':\n", " bins = [0, 21, 28, 40]\n", " labels=['low','mid','high']\n", " data[col] = pd.cut(data[col], bins=bins, labels=labels)\n", " if col == 'stroke':\n", " data[col] = data[col].replace({1: 'yes'})\n", " data[col] = data[col].replace({0: 'no'})\n", " if col == 'hypertension':\n", " data[col] = data[col].replace({1: 'yes'})\n", " data[col] = data[col].replace({0: 'no'})\n", " if col == 'heart_disease':\n", " data[col] = data[col].replace({1: 'yes'})\n", " data[col] = data[col].replace({0: 'no'})\n", " data = data.dropna()\n", " return data\n", "\n", "data = pd.read_csv(\"healthcare-dataset-stroke-data.csv\")" ] }, { "cell_type": "code", "execution_count": 136, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " id gender age hypertension heart_disease ever_married \\\n", "0 9046 male 67.0 no yes yes \n", "2 31112 male 80.0 no yes yes \n", "3 60182 female 49.0 no no yes \n", "4 1665 female 79.0 yes no yes \n", "5 56669 male 81.0 no no yes \n", "... ... ... ... ... ... ... \n", "5104 14180 female 13.0 no no no \n", "5106 44873 female 81.0 no no yes \n", "5107 19723 female 35.0 no no yes \n", "5108 37544 male 51.0 no no yes \n", "5109 44679 female 44.0 no no yes \n", "\n", " work_type Residence_type avg_glucose_level bmi smoking_status \\\n", "0 private urban 228.69 high formerly_smoked \n", "2 private rural 105.92 high never_smoked \n", "3 private urban 171.23 high smokes \n", "4 self_employed rural 174.12 mid never_smoked \n", "5 private urban 186.21 high formerly_smoked \n", "... ... ... ... ... ... \n", "5104 children rural 103.08 low unknown \n", "5106 self_employed urban 125.20 high never_smoked \n", "5107 self_employed rural 82.99 high never_smoked \n", "5108 private rural 166.29 mid formerly_smoked \n", "5109 govt_job urban 85.28 mid unknown \n", "\n", " stroke \n", "0 yes \n", "2 yes \n", "3 yes \n", "4 yes \n", "5 yes \n", "... ... \n", "5104 no \n", "5106 no \n", "5107 no \n", "5108 no \n", "5109 no \n", "\n", "[4501 rows x 12 columns]" ], "text/html": "
\n | id | \ngender | \nage | \nhypertension | \nheart_disease | \never_married | \nwork_type | \nResidence_type | \navg_glucose_level | \nbmi | \nsmoking_status | \nstroke | \n
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n9046 | \nmale | \n67.0 | \nno | \nyes | \nyes | \nprivate | \nurban | \n228.69 | \nhigh | \nformerly_smoked | \nyes | \n
2 | \n31112 | \nmale | \n80.0 | \nno | \nyes | \nyes | \nprivate | \nrural | \n105.92 | \nhigh | \nnever_smoked | \nyes | \n
3 | \n60182 | \nfemale | \n49.0 | \nno | \nno | \nyes | \nprivate | \nurban | \n171.23 | \nhigh | \nsmokes | \nyes | \n
4 | \n1665 | \nfemale | \n79.0 | \nyes | \nno | \nyes | \nself_employed | \nrural | \n174.12 | \nmid | \nnever_smoked | \nyes | \n
5 | \n56669 | \nmale | \n81.0 | \nno | \nno | \nyes | \nprivate | \nurban | \n186.21 | \nhigh | \nformerly_smoked | \nyes | \n
... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n
5104 | \n14180 | \nfemale | \n13.0 | \nno | \nno | \nno | \nchildren | \nrural | \n103.08 | \nlow | \nunknown | \nno | \n
5106 | \n44873 | \nfemale | \n81.0 | \nno | \nno | \nyes | \nself_employed | \nurban | \n125.20 | \nhigh | \nnever_smoked | \nno | \n
5107 | \n19723 | \nfemale | \n35.0 | \nno | \nno | \nyes | \nself_employed | \nrural | \n82.99 | \nhigh | \nnever_smoked | \nno | \n
5108 | \n37544 | \nmale | \n51.0 | \nno | \nno | \nyes | \nprivate | \nrural | \n166.29 | \nmid | \nformerly_smoked | \nno | \n
5109 | \n44679 | \nfemale | \n44.0 | \nno | \nno | \nyes | \ngovt_job | \nurban | \n85.28 | \nmid | \nunknown | \nno | \n
4501 rows × 12 columns
\n\n | gender | \never_married | \nResidence_type | \nbmi | \nsmoking_status | \nwork_type | \nstroke | \nhypertension | \nheart_disease | \n
---|---|---|---|---|---|---|---|---|---|
0 | \nmale | \nyes | \nurban | \nhigh | \nformerly_smoked | \nprivate | \nyes | \nno | \nyes | \n
2 | \nmale | \nyes | \nrural | \nhigh | \nnever_smoked | \nprivate | \nyes | \nno | \nyes | \n
3 | \nfemale | \nyes | \nurban | \nhigh | \nsmokes | \nprivate | \nyes | \nno | \nno | \n
4 | \nfemale | \nyes | \nrural | \nmid | \nnever_smoked | \nself_employed | \nyes | \nyes | \nno | \n
5 | \nmale | \nyes | \nurban | \nhigh | \nformerly_smoked | \nprivate | \nyes | \nno | \nno | \n
... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n
5104 | \nfemale | \nno | \nrural | \nlow | \nunknown | \nchildren | \nno | \nno | \nno | \n
5106 | \nfemale | \nyes | \nurban | \nhigh | \nnever_smoked | \nself_employed | \nno | \nno | \nno | \n
5107 | \nfemale | \nyes | \nrural | \nhigh | \nnever_smoked | \nself_employed | \nno | \nno | \nno | \n
5108 | \nmale | \nyes | \nrural | \nmid | \nformerly_smoked | \nprivate | \nno | \nno | \nno | \n
5109 | \nfemale | \nyes | \nurban | \nmid | \nunknown | \ngovt_job | \nno | \nno | \nno | \n
4501 rows × 9 columns
\n