794 lines
36 KiB
Plaintext
794 lines
36 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Requirement already satisfied: kaggle in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (1.5.12)\n",
|
||
"Requirement already satisfied: tqdm in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from kaggle) (4.63.0)\n",
|
||
"Requirement already satisfied: certifi in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from kaggle) (2021.10.8)\n",
|
||
"Requirement already satisfied: six>=1.10 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from kaggle) (1.16.0)\n",
|
||
"Requirement already satisfied: requests in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from kaggle) (2.27.1)\n",
|
||
"Requirement already satisfied: python-slugify in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from kaggle) (6.1.1)\n",
|
||
"Requirement already satisfied: urllib3 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from kaggle) (1.26.9)\n",
|
||
"Requirement already satisfied: python-dateutil in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from kaggle) (2.8.2)\n",
|
||
"Requirement already satisfied: text-unidecode>=1.3 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from python-slugify->kaggle) (1.3)\n",
|
||
"Requirement already satisfied: charset-normalizer~=2.0.0 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from requests->kaggle) (2.0.12)\n",
|
||
"Requirement already satisfied: idna<4,>=2.5 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from requests->kaggle) (3.3)\n",
|
||
"Requirement already satisfied: colorama in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from tqdm->kaggle) (0.4.4)\n",
|
||
"Requirement already satisfied: pandas in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (1.3.5)\n",
|
||
"Requirement already satisfied: pytz>=2017.3 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from pandas) (2022.1)\n",
|
||
"Requirement already satisfied: python-dateutil>=2.7.3 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from pandas) (2.8.2)\n",
|
||
"Requirement already satisfied: numpy>=1.17.3 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from pandas) (1.21.5)\n",
|
||
"Requirement already satisfied: six>=1.5 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from python-dateutil>=2.7.3->pandas) (1.16.0)\n",
|
||
"Requirement already satisfied: seaborn in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (0.11.2)\n",
|
||
"Requirement already satisfied: scipy>=1.0 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from seaborn) (1.7.3)\n",
|
||
"Requirement already satisfied: numpy>=1.15 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from seaborn) (1.21.5)\n",
|
||
"Requirement already satisfied: matplotlib>=2.2 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from seaborn) (3.5.1)\n",
|
||
"Requirement already satisfied: pandas>=0.23 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from seaborn) (1.3.5)\n",
|
||
"Requirement already satisfied: fonttools>=4.22.0 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from matplotlib>=2.2->seaborn) (4.31.1)\n",
|
||
"Requirement already satisfied: pyparsing>=2.2.1 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from matplotlib>=2.2->seaborn) (3.0.7)\n",
|
||
"Requirement already satisfied: cycler>=0.10 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from matplotlib>=2.2->seaborn) (0.11.0)\n",
|
||
"Requirement already satisfied: packaging>=20.0 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from matplotlib>=2.2->seaborn) (21.3)\n",
|
||
"Requirement already satisfied: python-dateutil>=2.7 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from matplotlib>=2.2->seaborn) (2.8.2)\n",
|
||
"Requirement already satisfied: pillow>=6.2.0 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from matplotlib>=2.2->seaborn) (9.0.1)\n",
|
||
"Requirement already satisfied: kiwisolver>=1.0.1 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from matplotlib>=2.2->seaborn) (1.4.0)\n",
|
||
"Requirement already satisfied: typing-extensions in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from kiwisolver>=1.0.1->matplotlib>=2.2->seaborn) (4.1.1)\n",
|
||
"Requirement already satisfied: pytz>=2017.3 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from pandas>=0.23->seaborn) (2022.1)\n",
|
||
"Requirement already satisfied: six>=1.5 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from python-dateutil>=2.7->matplotlib>=2.2->seaborn) (1.16.0)\n",
|
||
"Requirement already satisfied: torch in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (1.11.0)\n",
|
||
"Requirement already satisfied: typing-extensions in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from torch) (4.1.1)\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"!pip install kaggle\n",
|
||
"!pip install pandas\n",
|
||
"!pip install torch\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"401 - Unauthorized\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# 1 Pobranie zbioru\n",
|
||
"!kaggle datasets download -d joniarroba/noshowappointments"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"'unzip' is not recognized as an internal or external command,\n",
|
||
"operable program or batch file.\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"!unzip -o noshowappointments.zip"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>PatientId</th>\n",
|
||
" <th>AppointmentID</th>\n",
|
||
" <th>Gender</th>\n",
|
||
" <th>ScheduledDay</th>\n",
|
||
" <th>AppointmentDay</th>\n",
|
||
" <th>Age</th>\n",
|
||
" <th>Neighbourhood</th>\n",
|
||
" <th>Scholarship</th>\n",
|
||
" <th>Hipertension</th>\n",
|
||
" <th>Diabetes</th>\n",
|
||
" <th>Alcoholism</th>\n",
|
||
" <th>Handcap</th>\n",
|
||
" <th>SMS_received</th>\n",
|
||
" <th>No-show</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>2.987250e+13</td>\n",
|
||
" <td>5642903</td>\n",
|
||
" <td>F</td>\n",
|
||
" <td>2016-04-29T18:38:08Z</td>\n",
|
||
" <td>2016-04-29T00:00:00Z</td>\n",
|
||
" <td>62</td>\n",
|
||
" <td>JARDIM DA PENHA</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>No</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>5.589978e+14</td>\n",
|
||
" <td>5642503</td>\n",
|
||
" <td>M</td>\n",
|
||
" <td>2016-04-29T16:08:27Z</td>\n",
|
||
" <td>2016-04-29T00:00:00Z</td>\n",
|
||
" <td>56</td>\n",
|
||
" <td>JARDIM DA PENHA</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>No</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>4.262962e+12</td>\n",
|
||
" <td>5642549</td>\n",
|
||
" <td>F</td>\n",
|
||
" <td>2016-04-29T16:19:04Z</td>\n",
|
||
" <td>2016-04-29T00:00:00Z</td>\n",
|
||
" <td>62</td>\n",
|
||
" <td>MATA DA PRAIA</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>No</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>8.679512e+11</td>\n",
|
||
" <td>5642828</td>\n",
|
||
" <td>F</td>\n",
|
||
" <td>2016-04-29T17:29:31Z</td>\n",
|
||
" <td>2016-04-29T00:00:00Z</td>\n",
|
||
" <td>8</td>\n",
|
||
" <td>PONTAL DE CAMBURI</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>No</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>8.841186e+12</td>\n",
|
||
" <td>5642494</td>\n",
|
||
" <td>F</td>\n",
|
||
" <td>2016-04-29T16:07:23Z</td>\n",
|
||
" <td>2016-04-29T00:00:00Z</td>\n",
|
||
" <td>56</td>\n",
|
||
" <td>JARDIM DA PENHA</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>No</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>110522</th>\n",
|
||
" <td>2.572134e+12</td>\n",
|
||
" <td>5651768</td>\n",
|
||
" <td>F</td>\n",
|
||
" <td>2016-05-03T09:15:35Z</td>\n",
|
||
" <td>2016-06-07T00:00:00Z</td>\n",
|
||
" <td>56</td>\n",
|
||
" <td>MARIA ORTIZ</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>No</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>110523</th>\n",
|
||
" <td>3.596266e+12</td>\n",
|
||
" <td>5650093</td>\n",
|
||
" <td>F</td>\n",
|
||
" <td>2016-05-03T07:27:33Z</td>\n",
|
||
" <td>2016-06-07T00:00:00Z</td>\n",
|
||
" <td>51</td>\n",
|
||
" <td>MARIA ORTIZ</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>No</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>110524</th>\n",
|
||
" <td>1.557663e+13</td>\n",
|
||
" <td>5630692</td>\n",
|
||
" <td>F</td>\n",
|
||
" <td>2016-04-27T16:03:52Z</td>\n",
|
||
" <td>2016-06-07T00:00:00Z</td>\n",
|
||
" <td>21</td>\n",
|
||
" <td>MARIA ORTIZ</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>No</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>110525</th>\n",
|
||
" <td>9.213493e+13</td>\n",
|
||
" <td>5630323</td>\n",
|
||
" <td>F</td>\n",
|
||
" <td>2016-04-27T15:09:23Z</td>\n",
|
||
" <td>2016-06-07T00:00:00Z</td>\n",
|
||
" <td>38</td>\n",
|
||
" <td>MARIA ORTIZ</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>No</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>110526</th>\n",
|
||
" <td>3.775115e+14</td>\n",
|
||
" <td>5629448</td>\n",
|
||
" <td>F</td>\n",
|
||
" <td>2016-04-27T13:30:56Z</td>\n",
|
||
" <td>2016-06-07T00:00:00Z</td>\n",
|
||
" <td>54</td>\n",
|
||
" <td>MARIA ORTIZ</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>No</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>110527 rows × 14 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" PatientId AppointmentID Gender ScheduledDay \\\n",
|
||
"0 2.987250e+13 5642903 F 2016-04-29T18:38:08Z \n",
|
||
"1 5.589978e+14 5642503 M 2016-04-29T16:08:27Z \n",
|
||
"2 4.262962e+12 5642549 F 2016-04-29T16:19:04Z \n",
|
||
"3 8.679512e+11 5642828 F 2016-04-29T17:29:31Z \n",
|
||
"4 8.841186e+12 5642494 F 2016-04-29T16:07:23Z \n",
|
||
"... ... ... ... ... \n",
|
||
"110522 2.572134e+12 5651768 F 2016-05-03T09:15:35Z \n",
|
||
"110523 3.596266e+12 5650093 F 2016-05-03T07:27:33Z \n",
|
||
"110524 1.557663e+13 5630692 F 2016-04-27T16:03:52Z \n",
|
||
"110525 9.213493e+13 5630323 F 2016-04-27T15:09:23Z \n",
|
||
"110526 3.775115e+14 5629448 F 2016-04-27T13:30:56Z \n",
|
||
"\n",
|
||
" AppointmentDay Age Neighbourhood Scholarship \\\n",
|
||
"0 2016-04-29T00:00:00Z 62 JARDIM DA PENHA 0 \n",
|
||
"1 2016-04-29T00:00:00Z 56 JARDIM DA PENHA 0 \n",
|
||
"2 2016-04-29T00:00:00Z 62 MATA DA PRAIA 0 \n",
|
||
"3 2016-04-29T00:00:00Z 8 PONTAL DE CAMBURI 0 \n",
|
||
"4 2016-04-29T00:00:00Z 56 JARDIM DA PENHA 0 \n",
|
||
"... ... ... ... ... \n",
|
||
"110522 2016-06-07T00:00:00Z 56 MARIA ORTIZ 0 \n",
|
||
"110523 2016-06-07T00:00:00Z 51 MARIA ORTIZ 0 \n",
|
||
"110524 2016-06-07T00:00:00Z 21 MARIA ORTIZ 0 \n",
|
||
"110525 2016-06-07T00:00:00Z 38 MARIA ORTIZ 0 \n",
|
||
"110526 2016-06-07T00:00:00Z 54 MARIA ORTIZ 0 \n",
|
||
"\n",
|
||
" Hipertension Diabetes Alcoholism Handcap SMS_received No-show \n",
|
||
"0 1 0 0 0 0 No \n",
|
||
"1 0 0 0 0 0 No \n",
|
||
"2 0 0 0 0 0 No \n",
|
||
"3 0 0 0 0 0 No \n",
|
||
"4 1 1 0 0 0 No \n",
|
||
"... ... ... ... ... ... ... \n",
|
||
"110522 0 0 0 0 1 No \n",
|
||
"110523 0 0 0 0 1 No \n",
|
||
"110524 0 0 0 0 1 No \n",
|
||
"110525 0 0 0 0 1 No \n",
|
||
"110526 0 0 0 0 1 No \n",
|
||
"\n",
|
||
"[110527 rows x 14 columns]"
|
||
]
|
||
},
|
||
"execution_count": 4,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"no_shows=pd.read_csv('KaggleV2-May-2016.csv')\n",
|
||
"no_shows"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# 2. Podział na train/test\n",
|
||
"import torch\n",
|
||
"\n",
|
||
"train_size = int(0.8 * len(no_shows))\n",
|
||
"test_size = (len(no_shows) - train_size)\n",
|
||
"no_shows_train, no_shows_test = torch.utils.data.random_split(no_shows, [train_size, test_size])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Wielkosc zbioru: 110527, podzbiór train: 88421, podzbiór test 22106.\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>PatientId</th>\n",
|
||
" <th>AppointmentID</th>\n",
|
||
" <th>Gender</th>\n",
|
||
" <th>ScheduledDay</th>\n",
|
||
" <th>AppointmentDay</th>\n",
|
||
" <th>Age</th>\n",
|
||
" <th>Neighbourhood</th>\n",
|
||
" <th>Scholarship</th>\n",
|
||
" <th>Hipertension</th>\n",
|
||
" <th>Diabetes</th>\n",
|
||
" <th>Alcoholism</th>\n",
|
||
" <th>Handcap</th>\n",
|
||
" <th>SMS_received</th>\n",
|
||
" <th>No-show</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>count</th>\n",
|
||
" <td>1.105270e+05</td>\n",
|
||
" <td>1.105270e+05</td>\n",
|
||
" <td>110527</td>\n",
|
||
" <td>110527</td>\n",
|
||
" <td>110527</td>\n",
|
||
" <td>110527.000000</td>\n",
|
||
" <td>110527</td>\n",
|
||
" <td>110527.000000</td>\n",
|
||
" <td>110527.000000</td>\n",
|
||
" <td>110527.000000</td>\n",
|
||
" <td>110527.000000</td>\n",
|
||
" <td>110527.000000</td>\n",
|
||
" <td>110527.000000</td>\n",
|
||
" <td>110527</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>unique</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>103549</td>\n",
|
||
" <td>27</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>81</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>top</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>F</td>\n",
|
||
" <td>2016-05-06T07:09:54Z</td>\n",
|
||
" <td>2016-06-06T00:00:00Z</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>JARDIM CAMBURI</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>No</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>freq</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>71840</td>\n",
|
||
" <td>24</td>\n",
|
||
" <td>4692</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>7717</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>88208</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>mean</th>\n",
|
||
" <td>1.474963e+14</td>\n",
|
||
" <td>5.675305e+06</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>37.088874</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.098266</td>\n",
|
||
" <td>0.197246</td>\n",
|
||
" <td>0.071865</td>\n",
|
||
" <td>0.030400</td>\n",
|
||
" <td>0.022248</td>\n",
|
||
" <td>0.321026</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>std</th>\n",
|
||
" <td>2.560949e+14</td>\n",
|
||
" <td>7.129575e+04</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>23.110205</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.297675</td>\n",
|
||
" <td>0.397921</td>\n",
|
||
" <td>0.258265</td>\n",
|
||
" <td>0.171686</td>\n",
|
||
" <td>0.161543</td>\n",
|
||
" <td>0.466873</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>min</th>\n",
|
||
" <td>3.921784e+04</td>\n",
|
||
" <td>5.030230e+06</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>-1.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>25%</th>\n",
|
||
" <td>4.172614e+12</td>\n",
|
||
" <td>5.640286e+06</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>18.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>50%</th>\n",
|
||
" <td>3.173184e+13</td>\n",
|
||
" <td>5.680573e+06</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>37.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>75%</th>\n",
|
||
" <td>9.439172e+13</td>\n",
|
||
" <td>5.725524e+06</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>55.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>max</th>\n",
|
||
" <td>9.999816e+14</td>\n",
|
||
" <td>5.790484e+06</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>115.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>4.000000</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" PatientId AppointmentID Gender ScheduledDay \\\n",
|
||
"count 1.105270e+05 1.105270e+05 110527 110527 \n",
|
||
"unique NaN NaN 2 103549 \n",
|
||
"top NaN NaN F 2016-05-06T07:09:54Z \n",
|
||
"freq NaN NaN 71840 24 \n",
|
||
"mean 1.474963e+14 5.675305e+06 NaN NaN \n",
|
||
"std 2.560949e+14 7.129575e+04 NaN NaN \n",
|
||
"min 3.921784e+04 5.030230e+06 NaN NaN \n",
|
||
"25% 4.172614e+12 5.640286e+06 NaN NaN \n",
|
||
"50% 3.173184e+13 5.680573e+06 NaN NaN \n",
|
||
"75% 9.439172e+13 5.725524e+06 NaN NaN \n",
|
||
"max 9.999816e+14 5.790484e+06 NaN NaN \n",
|
||
"\n",
|
||
" AppointmentDay Age Neighbourhood Scholarship \\\n",
|
||
"count 110527 110527.000000 110527 110527.000000 \n",
|
||
"unique 27 NaN 81 NaN \n",
|
||
"top 2016-06-06T00:00:00Z NaN JARDIM CAMBURI NaN \n",
|
||
"freq 4692 NaN 7717 NaN \n",
|
||
"mean NaN 37.088874 NaN 0.098266 \n",
|
||
"std NaN 23.110205 NaN 0.297675 \n",
|
||
"min NaN -1.000000 NaN 0.000000 \n",
|
||
"25% NaN 18.000000 NaN 0.000000 \n",
|
||
"50% NaN 37.000000 NaN 0.000000 \n",
|
||
"75% NaN 55.000000 NaN 0.000000 \n",
|
||
"max NaN 115.000000 NaN 1.000000 \n",
|
||
"\n",
|
||
" Hipertension Diabetes Alcoholism Handcap \\\n",
|
||
"count 110527.000000 110527.000000 110527.000000 110527.000000 \n",
|
||
"unique NaN NaN NaN NaN \n",
|
||
"top NaN NaN NaN NaN \n",
|
||
"freq NaN NaN NaN NaN \n",
|
||
"mean 0.197246 0.071865 0.030400 0.022248 \n",
|
||
"std 0.397921 0.258265 0.171686 0.161543 \n",
|
||
"min 0.000000 0.000000 0.000000 0.000000 \n",
|
||
"25% 0.000000 0.000000 0.000000 0.000000 \n",
|
||
"50% 0.000000 0.000000 0.000000 0.000000 \n",
|
||
"75% 0.000000 0.000000 0.000000 0.000000 \n",
|
||
"max 1.000000 1.000000 1.000000 4.000000 \n",
|
||
"\n",
|
||
" SMS_received No-show \n",
|
||
"count 110527.000000 110527 \n",
|
||
"unique NaN 2 \n",
|
||
"top NaN No \n",
|
||
"freq NaN 88208 \n",
|
||
"mean 0.321026 NaN \n",
|
||
"std 0.466873 NaN \n",
|
||
"min 0.000000 NaN \n",
|
||
"25% 0.000000 NaN \n",
|
||
"50% 0.000000 NaN \n",
|
||
"75% 1.000000 NaN \n",
|
||
"max 1.000000 NaN "
|
||
]
|
||
},
|
||
"execution_count": 6,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# 3. Statystyki\n",
|
||
"# Wielkość zbioru i podzbiorów\n",
|
||
"print(f\"Wielkosc zbioru: {len(no_shows)}, podzbiór train: {train_size}, podzbiór test {test_size}.\")\n",
|
||
"# Opis parametrów\n",
|
||
"no_shows.describe(include='all')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"<AxesSubplot:title={'center':'No-show'}>"
|
||
]
|
||
},
|
||
"execution_count": 7,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
},
|
||
{
|
||
"data": {
|
||
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAEOCAYAAABrSnsUAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAQnUlEQVR4nO3df6zddX3H8efL1iLC+H3HpK20G3WuskyxQcD4CxYoopYt/gDnaAjaGFFwwWmdm2QoiUaFiUETIiCgszJ0oUJdR1CM04DcikELIjflR1tBLpQfIgoU3vvjfKrHcm/vqW3vufQ8H8nN/X4/38/3nM9J2vu853vOaVNVSJIG23P6vQBJUv8ZA0mSMZAkGQNJEsZAkoQxkCRhDKQdIsl1Sd7Z73VIvTIGGihJ7kxyX5LdusbemeS6Pi5L6jtjoEE0DTi934uQphJjoEH0KeADSfba/ECSI5LcmOTh9v2I8W4kyfOSfDnJA0keavP375pyYJLvJ/lVkv9Nsl/XuW9Ksrqdd12Sv2rjJyf5Zte825P8V9f+2iQv3baHLz2TMdAgGgauAz7QPZhkH+Bq4DxgX+Ac4Ook+45zO4uBPYHZbf67gd90HX87cDLwp8CMTfeX5EXAV4H3A0PACuCbSWYA3wVeleQ5SQ5o5x3ezvtzYHfg5j/6kUvjMAYaVB8F3pdkqGvsOOD2qrqsqjZW1VeBnwFvHOc2nqQTgYOq6qmqWlVVj3Qdv7iqfl5VvwEuB17axt8GXF1V11TVk8CngV2BI6pqDfCrNvfVwErgF0leDLwG+F5VPb3Nj17azPR+L0Dqh6r6aZKrgKXArW34AOCuzabeBcwESPJo1/h84DI6zwqWtUtOXwY+0n7AA9zbNf8xOr/VP+N+qurpJGs33Q+dZwevBQ5q2w/RCcHhbV/a7nxmoEF2JvAufv9D+BfAgZvNeSGwHqCqdu/6uruqnqyqf6+q+cARwBuAk3q43z+4nyShE5X1bWhTDF7Vtr9LJwavwRhoBzEGGlhVNQJ8DTitDa0AXpTk7UmmJ3kbnWcAV411fpLXJfnrJNOAR+hcNurlEs7lwHFJjkryXOAM4HHgB+34d4HXAbtW1Trge8BCOpekbvojHqo0IWOgQXcWsBtAVT1A57f7M4AHgA8Cb6iq+8c598+AK+iE4FY6P8Qvm+gOq+o24B3A54D76bwm8caqeqId/znwKJ0I0F6HWAN8v6qe+qMepTSB+J/bSJJ8ZiBJMgaSJGMgScIYSJJ4Fn/obL/99qs5c+b0exmS9KyxatWq+6tqaKxjz9oYzJkzh+Hh4X4vQ5KeNZJs/gn73/EykSTJGEiSjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJJ4Fn8C+dlgztKr+72Encqdnziu30uQdlo+M5AkGQNJkjGQJGEMJEkYA0kSxkCShDGQJGEMJEkYA0kSxkCShDGQJGEMJEkYA0kSxkCShDGQJNFjDJL8U5LVSX6a5KtJnpdkbpIbkowk+VqSGW3uLm1/pB2f03U7H27jtyU5pmt8YRsbSbJ0uz9KSdIWTRiDJDOB04AFVXUwMA04AfgkcG5VHQQ8CJzSTjkFeLCNn9vmkWR+O+8lwELg80mmJZkGnA8cC8wHTmxzJUmTpNfLRNOBXZNMB54P3AMcCVzRjl8CHN+2F7V92vGjkqSNL6uqx6vqDmAEOLR9jVTVmqp6AljW5kqSJsmEMaiq9cCngbvpROBhYBXwUFVtbNPWATPb9kxgbTt3Y5u/b/f4ZueMN/4MSZYkGU4yPDo62svjkyT1oJfLRHvT+U19LnAAsBudyzyTrqouqKoFVbVgaGioH0uQpJ1SL5eJ/ha4o6pGq+pJ4BvAK4G92mUjgFnA+ra9HpgN0I7vCTzQPb7ZOeONS5ImSS8xuBs4LMnz27X/o4BbgO8Ab25zFgNXtu3lbZ92/NtVVW38hPZuo7nAPOCHwI3AvPbupBl0XmRevu0PTZLUq+kTTaiqG5JcAfwI2AjcBFwAXA0sS/LxNnZhO+VC4LIkI8AGOj/cqarVSS6nE5KNwKlV9RRAkvcCK+m8U+miqlq9/R6iJGkiE8YAoKrOBM7cbHgNnXcCbT73t8Bbxrmds4GzxxhfAazoZS2SpO3PTyBLkoyBJMkYSJIwBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgSaLHGCTZK8kVSX6W5NYkhyfZJ8k1SW5v3/duc5PkvCQjSW5OckjX7Sxu829Psrhr/OVJftLOOS9Jtv9DlSSNp9dnBp8F/qeqXgz8DXArsBS4tqrmAde2fYBjgXntawnwBYAk+wBnAq8ADgXO3BSQNuddXect3LaHJUnaGhPGIMmewKuBCwGq6omqeghYBFzSpl0CHN+2FwGXVsf1wF5JXgAcA1xTVRuq6kHgGmBhO7ZHVV1fVQVc2nVbkqRJ0Mszg7nAKHBxkpuSfDHJbsD+VXVPm3MvsH/bngms7Tp/XRvb0vi6McafIcmSJMNJhkdHR3tYuiSpF73EYDpwCPCFqnoZ8Gt+f0kIgPYbfW3/5f2hqrqgqhZU1YKhoaEdfXeSNDB6icE6YF1V3dD2r6ATh1+2Szy07/e14+uB2V3nz2pjWxqfNca4JGmSTBiDqroXWJvkL9vQUcAtwHJg0zuCFgNXtu3lwEntXUWHAQ+3y0krgaOT7N1eOD4aWNmOPZLksPYuopO6bkuSNAmm9zjvfcBXkswA1gAn0wnJ5UlOAe4C3trmrgBeD4wAj7W5VNWGJB8DbmzzzqqqDW37PcCXgF2Bb7UvSdIk6SkGVfVjYMEYh44aY24Bp45zOxcBF40xPgwc3MtaJEnbn59AliQZA0mSMZAkYQwkSRgDSRLGQJKEMZAkYQwkSRgDSRLGQJKEMZAkYQwkSRgDSRLGQJKEMZAkYQwkSRgDSRLGQJKEMZAkYQwkSRgDSRLGQJKEMZAkYQwkSRgDSRLGQJKEMZAkYQwkSRgDSRLGQJKEMZAkYQwkSRgDSRLGQJKEMZAkYQwkSRgDSRLGQJLEVsQgybQkNyW5qu3PTXJDkpEkX0syo43v0vZH2vE5Xbfx4TZ+W5JjusYXtrGRJEu34+OTJPVga54ZnA7c2rX/SeDcqjoIeBA4pY2fAjzYxs9t80gyHzgBeAmwEPh8C8w04HzgWGA+cGKbK0maJD3FIMks4Djgi20/wJHAFW3KJcDxbXtR26cdP6rNXwQsq6rHq+oOYAQ4tH2NVNWaqnoCWNbmSpImSa/PDP4D+CDwdNvfF3ioqja2/XXAzLY9E1gL0I4/3Ob/bnyzc8Ybf4YkS5IMJxkeHR3tcemSpIlMGIMkbwDuq6pVk7CeLaqqC6pqQVUtGBoa6vdyJGmnMb2HOa8E3pTk9cDzgD2AzwJ7JZnefvufBaxv89cDs4F1SaYDewIPdI1v0n3OeOOSpEkw4TODqvpwVc2qqjl0XgD+dlX9A/Ad4M1t2mLgyra9vO3Tjn+7qqqNn9DebTQXmAf8ELgRmNfenTSj3cfy7fLoJEk96eWZwXg+BCxL8nHgJuDCNn4hcFmSEWADnR/uVNXqJJcDtwAbgVOr6imAJO8FVgLTgIuqavU2rEuStJW2KgZVdR1wXdteQ+edQJvP+S3wlnHOPxs4e4zxFcCKrVmLJGn78RPIkiRjIEkyBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJIwBpIkYHq/FyCpP+YsvbrfS9ip3PmJ4/q9hG3iMwNJkjGQJPUQgySzk3wnyS1JVic5vY3vk+SaJLe373u38SQ5L8lIkpuTHNJ1W4vb/NuTLO4af3mSn7RzzkuSHfFgJUlj6+WZwUbgjKqaDxwGnJpkPrAUuLaq5gHXtn2AY4F57WsJ8AXoxAM4E3gFcChw5qaAtDnv6jpv4bY/NElSryaMQVXdU1U/atu/Am4FZgKLgEvatEuA49v2IuDS6rge2CvJC4BjgGuqakNVPQhcAyxsx/aoquurqoBLu25LkjQJtuo1gyRzgJcBNwD7V9U97dC9wP5teyawtuu0dW1sS+Prxhgf6/6XJBlOMjw6Oro1S5ckbUHPMUiyO/B14P1V9Uj3sfYbfW3ntT1DVV1QVQuqasHQ0NCOvjtJGhg9xSDJc+mE4CtV9Y02/Mt2iYf2/b42vh6Y3XX6rDa2pfFZY4xLkiZJL+8mCnAhcGtVndN1aDmw6R1Bi4Eru8ZPau8qOgx4uF1OWgkcnWTv9sLx0cDKduyRJIe1+zqp67YkSZOgl08gvxL4R+AnSX7cxv4F+ARweZJTgLuAt7ZjK4DXAyPAY8DJAFW1IcnHgBvbvLOqakPbfg/wJWBX4FvtS5I0SSaMQVX9HzDe+/6PGmN+AaeOc1sXAReNMT4MHDzRWiRJO4afQJYkGQNJkjGQJGEMJEkYA0kSxkCShDGQJGEMJEkYA0kSxkCShDGQJGEMJEkYA0kSxkCShDGQJGEMJEkYA0kSxkCShDGQJGEMJEkYA0kSxkCShDGQJGEMJEkYA0kSxkCShDGQJGEMJEkYA0kSxkCShDGQJGEMJEkYA0kSxkCShDGQJGEMJEkYA0kSxkCSxBSKQZKFSW5LMpJkab/XI0mDZErEIMk04HzgWGA+cGKS+f1dlSQNjikRA+BQYKSq1lTVE8AyYFGf1yRJA2N6vxfQzATWdu2vA16x+aQkS4AlbffRJLdNwtoGwX7A/f1exETyyX6vQH3in8/t58DxDkyVGPSkqi4ALuj3OnY2SYarakG/1yGNxT+fk2OqXCZaD8zu2p/VxiRJk2CqxOBGYF6SuUlmACcAy/u8JkkaGFPiMlFVbUzyXmAlMA24qKpW93lZg8RLb5rK/PM5CVJV/V6DJKnPpsplIklSHxkDSZIxkCQZA0lTTJK/SLJL235tktOS7NXnZe30jMGASjIryX8nGU1yX5KvJ5nV73VJwNeBp5IcROedRLOB/+zvknZ+xmBwXUznsxwvAA4AvtnGpH57uqo2An8HfK6q/pnOn1PtQMZgcA1V1cVVtbF9fQkY6veiJODJJCcCi4Gr2thz+7iegWAMBtcDSd6RZFr7egfwQL8XJQEnA4cDZ1fVHUnmApf1eU07PT90NqCSHAh8js5fugJ+AJxWVXf3dWESkGRX4IVV5b9MPEmMgaQpJckbgU8DM6pqbpKXAmdV1Zv6u7KdmzEYMEk+uoXDVVUfm7TFSGNIsgo4Eriuql7Wxn5aVQf3d2U7tynxD9VpUv16jLHdgFOAfQFjoH57sqoeTtI99nS/FjMojMGAqarPbNpO8ifA6XResFsGfGa886QdLckK4FRgdZK3A9OSzANOo/OalnYg3000gJLsk+TjwM10fiE4pKo+VFX39XlpGmwX0/ln7O8EDgYep/Nhs4fp/NKiHcjXDAZMkk8Bf0/nk53nV9WjfV6S9DtJdgf+DVhI5+2km35AVVWd07eFDQAvEw2eM+j8xvWvwEe6rsuGzl+4Pfq1MAl4gs7rWrsAu/P7GGgHMwYDpqq8NKgpKclC4Bw6/0zKIVX1WJ+XNFC8TCRpSkjyPeDd/pe3/WEMJEm+m0iSZAwkSRgDSRLGQJKEMZAkAf8PP9ePQZsYa28AAAAASUVORK5CYII=",
|
||
"text/plain": [
|
||
"<Figure size 432x288 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {
|
||
"needs_background": "light"
|
||
},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"# Rozkład częstości dla klas\n",
|
||
"no_shows[\"No-show\"].value_counts().plot(kind=\"bar\", title=\"No-show\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Wyczyszczenie zbioru\n",
|
||
"# Usunięcie negatywnego wieku\n",
|
||
"no_shows = no_shows.drop(no_shows[no_shows[\"Age\"] < 0].index)\n",
|
||
"\n",
|
||
"# Usunięcie niewiadomego wieku (zależy od zastosowania)\n",
|
||
"# no_shows = no_shows.drop(no_shows[no_shows[\"Age\"] == 0].index)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 9,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Normalizacja danych\n",
|
||
"\n",
|
||
"# Usunięcie kolumn PatientId oraz AppointmentID\n",
|
||
"no_shows.drop([\"PatientId\", \"AppointmentID\"], inplace=True, axis=1)\n",
|
||
"\n",
|
||
"# Zmiena wartości kolumny No-show z Yes/No na wartość boolowską\n",
|
||
"no_shows[\"No-show\"] = no_shows[\"No-show\"].map({'Yes': 1, 'No': 0})\n",
|
||
"\n",
|
||
"# Normalizacja kolumny Age\n",
|
||
"no_shows[\"Age\"]=(no_shows[\"Age\"]-no_shows[\"Age\"].min())/(no_shows[\"Age\"].max()-no_shows[\"Age\"].min())"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"interpreter": {
|
||
"hash": "3c12dc341c1078754dffca0e61bfc548ab04f96cfe0a82a85a936b702c4881ab"
|
||
},
|
||
"kernelspec": {
|
||
"display_name": "Python 3.7.11 ('ium')",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.7.11"
|
||
},
|
||
"orig_nbformat": 4
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 2
|
||
}
|