diff --git a/main.ipynb b/main.ipynb deleted file mode 100644 index 946e327..0000000 --- a/main.ipynb +++ /dev/null @@ -1,390 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd \n", - "import numpy as np\n", - "from sklearn.model_selection import train_test_split\n", - "\n", - "#Wczytanie i normalizacja danych\n", - "def NormalizeData(data):\n", - " for col in data.columns:\n", - " if data[col].dtype == object: \n", - " data[col] = data[col].str.lower()\n", - " if col == 'smoking_status':\n", - " data[col] = data[col].str.replace(\" \", \"_\")\n", - " if col == 'work_type':\n", - " data[col] = data[col].str.replace(\"-\", \"_\")\n", - " if col == 'bmi':\n", - " bins = [0, 21, 28, 40]\n", - " labels=['low','mid','high']\n", - " data[col] = pd.cut(data[col], bins=bins, labels=labels)\n", - " if col == 'age':\n", - " bins = [18, 30, 40, 50, 60, 70, 120]\n", - " labels = ['18-29', '30-39', '40-49', '50-59', '60-69', '70+']\n", - " data[col] = pd.cut(data[col], bins, labels = labels,include_lowest = True)\n", - " if col == 'stroke':\n", - " data[col] = data[col].replace({1: 'yes'})\n", - " data[col] = data[col].replace({0: 'no'})\n", - " if col == 'hypertension':\n", - " data[col] = data[col].replace({1: 'yes'})\n", - " data[col] = data[col].replace({0: 'no'})\n", - " if col == 'heart_disease':\n", - " data[col] = data[col].replace({1: 'yes'})\n", - " data[col] = data[col].replace({0: 'no'})\n", - " data = data.dropna()\n", - " return data\n", - "\n", - "data = pd.read_csv(\"healthcare-dataset-stroke-data.csv\")\n", - "data = NormalizeData(data)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " | id | \n", - "gender | \n", - "age | \n", - "hypertension | \n", - "heart_disease | \n", - "ever_married | \n", - "work_type | \n", - "Residence_type | \n", - "avg_glucose_level | \n", - "bmi | \n", - "smoking_status | \n", - "stroke | \n", - "
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", - "9046 | \n", - "male | \n", - "60-69 | \n", - "no | \n", - "yes | \n", - "yes | \n", - "private | \n", - "urban | \n", - "228.69 | \n", - "high | \n", - "formerly_smoked | \n", - "yes | \n", - "
2 | \n", - "31112 | \n", - "male | \n", - "70+ | \n", - "no | \n", - "yes | \n", - "yes | \n", - "private | \n", - "rural | \n", - "105.92 | \n", - "high | \n", - "never_smoked | \n", - "yes | \n", - "
3 | \n", - "60182 | \n", - "female | \n", - "40-49 | \n", - "no | \n", - "no | \n", - "yes | \n", - "private | \n", - "urban | \n", - "171.23 | \n", - "high | \n", - "smokes | \n", - "yes | \n", - "
4 | \n", - "1665 | \n", - "female | \n", - "70+ | \n", - "yes | \n", - "no | \n", - "yes | \n", - "self_employed | \n", - "rural | \n", - "174.12 | \n", - "mid | \n", - "never_smoked | \n", - "yes | \n", - "
5 | \n", - "56669 | \n", - "male | \n", - "70+ | \n", - "no | \n", - "no | \n", - "yes | \n", - "private | \n", - "urban | \n", - "186.21 | \n", - "high | \n", - "formerly_smoked | \n", - "yes | \n", - "
... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "
5102 | \n", - "45010 | \n", - "female | \n", - "50-59 | \n", - "no | \n", - "no | \n", - "yes | \n", - "private | \n", - "rural | \n", - "77.93 | \n", - "mid | \n", - "never_smoked | \n", - "no | \n", - "
5106 | \n", - "44873 | \n", - "female | \n", - "70+ | \n", - "no | \n", - "no | \n", - "yes | \n", - "self_employed | \n", - "urban | \n", - "125.20 | \n", - "high | \n", - "never_smoked | \n", - "no | \n", - "
5107 | \n", - "19723 | \n", - "female | \n", - "30-39 | \n", - "no | \n", - "no | \n", - "yes | \n", - "self_employed | \n", - "rural | \n", - "82.99 | \n", - "high | \n", - "never_smoked | \n", - "no | \n", - "
5108 | \n", - "37544 | \n", - "male | \n", - "50-59 | \n", - "no | \n", - "no | \n", - "yes | \n", - "private | \n", - "rural | \n", - "166.29 | \n", - "mid | \n", - "formerly_smoked | \n", - "no | \n", - "
5109 | \n", - "44679 | \n", - "female | \n", - "40-49 | \n", - "no | \n", - "no | \n", - "yes | \n", - "govt_job | \n", - "urban | \n", - "85.28 | \n", - "mid | \n", - "unknown | \n", - "no | \n", - "
3681 rows × 12 columns
\n", - "