{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: kaggle in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (1.5.12)\n", "Requirement already satisfied: tqdm in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from kaggle) (4.63.0)\n", "Requirement already satisfied: certifi in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from kaggle) (2021.10.8)\n", "Requirement already satisfied: six>=1.10 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from kaggle) (1.16.0)\n", "Requirement already satisfied: requests in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from kaggle) (2.27.1)\n", "Requirement already satisfied: python-slugify in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from kaggle) (6.1.1)\n", "Requirement already satisfied: urllib3 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from kaggle) (1.26.9)\n", "Requirement already satisfied: python-dateutil in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from kaggle) (2.8.2)\n", "Requirement already satisfied: text-unidecode>=1.3 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from python-slugify->kaggle) (1.3)\n", "Requirement already satisfied: charset-normalizer~=2.0.0 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from requests->kaggle) (2.0.12)\n", "Requirement already satisfied: idna<4,>=2.5 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from requests->kaggle) (3.3)\n", "Requirement already satisfied: colorama in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from tqdm->kaggle) (0.4.4)\n", "Requirement already satisfied: pandas in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (1.3.5)\n", "Requirement already satisfied: pytz>=2017.3 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from pandas) (2022.1)\n", "Requirement already satisfied: python-dateutil>=2.7.3 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from pandas) (2.8.2)\n", "Requirement already satisfied: numpy>=1.17.3 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from pandas) (1.21.5)\n", "Requirement already satisfied: six>=1.5 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from python-dateutil>=2.7.3->pandas) (1.16.0)\n", "Requirement already satisfied: seaborn in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (0.11.2)\n", "Requirement already satisfied: scipy>=1.0 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from seaborn) (1.7.3)\n", "Requirement already satisfied: numpy>=1.15 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from seaborn) (1.21.5)\n", "Requirement already satisfied: matplotlib>=2.2 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from seaborn) (3.5.1)\n", "Requirement already satisfied: pandas>=0.23 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from seaborn) (1.3.5)\n", "Requirement already satisfied: fonttools>=4.22.0 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from matplotlib>=2.2->seaborn) (4.31.1)\n", "Requirement already satisfied: pyparsing>=2.2.1 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from matplotlib>=2.2->seaborn) (3.0.7)\n", "Requirement already satisfied: cycler>=0.10 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from matplotlib>=2.2->seaborn) (0.11.0)\n", "Requirement already satisfied: packaging>=20.0 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from matplotlib>=2.2->seaborn) (21.3)\n", "Requirement already satisfied: python-dateutil>=2.7 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from matplotlib>=2.2->seaborn) (2.8.2)\n", "Requirement already satisfied: pillow>=6.2.0 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from matplotlib>=2.2->seaborn) (9.0.1)\n", "Requirement already satisfied: kiwisolver>=1.0.1 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from matplotlib>=2.2->seaborn) (1.4.0)\n", "Requirement already satisfied: typing-extensions in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from kiwisolver>=1.0.1->matplotlib>=2.2->seaborn) (4.1.1)\n", "Requirement already satisfied: pytz>=2017.3 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from pandas>=0.23->seaborn) (2022.1)\n", "Requirement already satisfied: six>=1.5 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from python-dateutil>=2.7->matplotlib>=2.2->seaborn) (1.16.0)\n", "Requirement already satisfied: torch in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (1.11.0)\n", "Requirement already satisfied: typing-extensions in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from torch) (4.1.1)\n" ] } ], "source": [ "!pip install kaggle\n", "!pip install pandas\n", "!pip install torch\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "401 - Unauthorized\n" ] } ], "source": [ "# 1 Pobranie zbioru\n", "!kaggle datasets download -d joniarroba/noshowappointments" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "'unzip' is not recognized as an internal or external command,\n", "operable program or batch file.\n" ] } ], "source": [ "!unzip -o noshowappointments.zip" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PatientIdAppointmentIDGenderScheduledDayAppointmentDayAgeNeighbourhoodScholarshipHipertensionDiabetesAlcoholismHandcapSMS_receivedNo-show
02.987250e+135642903F2016-04-29T18:38:08Z2016-04-29T00:00:00Z62JARDIM DA PENHA010000No
15.589978e+145642503M2016-04-29T16:08:27Z2016-04-29T00:00:00Z56JARDIM DA PENHA000000No
24.262962e+125642549F2016-04-29T16:19:04Z2016-04-29T00:00:00Z62MATA DA PRAIA000000No
38.679512e+115642828F2016-04-29T17:29:31Z2016-04-29T00:00:00Z8PONTAL DE CAMBURI000000No
48.841186e+125642494F2016-04-29T16:07:23Z2016-04-29T00:00:00Z56JARDIM DA PENHA011000No
.............................................
1105222.572134e+125651768F2016-05-03T09:15:35Z2016-06-07T00:00:00Z56MARIA ORTIZ000001No
1105233.596266e+125650093F2016-05-03T07:27:33Z2016-06-07T00:00:00Z51MARIA ORTIZ000001No
1105241.557663e+135630692F2016-04-27T16:03:52Z2016-06-07T00:00:00Z21MARIA ORTIZ000001No
1105259.213493e+135630323F2016-04-27T15:09:23Z2016-06-07T00:00:00Z38MARIA ORTIZ000001No
1105263.775115e+145629448F2016-04-27T13:30:56Z2016-06-07T00:00:00Z54MARIA ORTIZ000001No
\n", "

110527 rows × 14 columns

\n", "
" ], "text/plain": [ " PatientId AppointmentID Gender ScheduledDay \\\n", "0 2.987250e+13 5642903 F 2016-04-29T18:38:08Z \n", "1 5.589978e+14 5642503 M 2016-04-29T16:08:27Z \n", "2 4.262962e+12 5642549 F 2016-04-29T16:19:04Z \n", "3 8.679512e+11 5642828 F 2016-04-29T17:29:31Z \n", "4 8.841186e+12 5642494 F 2016-04-29T16:07:23Z \n", "... ... ... ... ... \n", "110522 2.572134e+12 5651768 F 2016-05-03T09:15:35Z \n", "110523 3.596266e+12 5650093 F 2016-05-03T07:27:33Z \n", "110524 1.557663e+13 5630692 F 2016-04-27T16:03:52Z \n", "110525 9.213493e+13 5630323 F 2016-04-27T15:09:23Z \n", "110526 3.775115e+14 5629448 F 2016-04-27T13:30:56Z \n", "\n", " AppointmentDay Age Neighbourhood Scholarship \\\n", "0 2016-04-29T00:00:00Z 62 JARDIM DA PENHA 0 \n", "1 2016-04-29T00:00:00Z 56 JARDIM DA PENHA 0 \n", "2 2016-04-29T00:00:00Z 62 MATA DA PRAIA 0 \n", "3 2016-04-29T00:00:00Z 8 PONTAL DE CAMBURI 0 \n", "4 2016-04-29T00:00:00Z 56 JARDIM DA PENHA 0 \n", "... ... ... ... ... \n", "110522 2016-06-07T00:00:00Z 56 MARIA ORTIZ 0 \n", "110523 2016-06-07T00:00:00Z 51 MARIA ORTIZ 0 \n", "110524 2016-06-07T00:00:00Z 21 MARIA ORTIZ 0 \n", "110525 2016-06-07T00:00:00Z 38 MARIA ORTIZ 0 \n", "110526 2016-06-07T00:00:00Z 54 MARIA ORTIZ 0 \n", "\n", " Hipertension Diabetes Alcoholism Handcap SMS_received No-show \n", "0 1 0 0 0 0 No \n", "1 0 0 0 0 0 No \n", "2 0 0 0 0 0 No \n", "3 0 0 0 0 0 No \n", "4 1 1 0 0 0 No \n", "... ... ... ... ... ... ... \n", "110522 0 0 0 0 1 No \n", "110523 0 0 0 0 1 No \n", "110524 0 0 0 0 1 No \n", "110525 0 0 0 0 1 No \n", "110526 0 0 0 0 1 No \n", "\n", "[110527 rows x 14 columns]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "no_shows=pd.read_csv('KaggleV2-May-2016.csv')\n", "no_shows" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "# 2. Podział na train/test\n", "import torch\n", "\n", "train_size = int(0.8 * len(no_shows))\n", "test_size = (len(no_shows) - train_size)\n", "no_shows_train, no_shows_test = torch.utils.data.random_split(no_shows, [train_size, test_size])" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Wielkosc zbioru: 110527, podzbiór train: 88421, podzbiór test 22106.\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PatientIdAppointmentIDGenderScheduledDayAppointmentDayAgeNeighbourhoodScholarshipHipertensionDiabetesAlcoholismHandcapSMS_receivedNo-show
count1.105270e+051.105270e+05110527110527110527110527.000000110527110527.000000110527.000000110527.000000110527.000000110527.000000110527.000000110527
uniqueNaNNaN210354927NaN81NaNNaNNaNNaNNaNNaN2
topNaNNaNF2016-05-06T07:09:54Z2016-06-06T00:00:00ZNaNJARDIM CAMBURINaNNaNNaNNaNNaNNaNNo
freqNaNNaN71840244692NaN7717NaNNaNNaNNaNNaNNaN88208
mean1.474963e+145.675305e+06NaNNaNNaN37.088874NaN0.0982660.1972460.0718650.0304000.0222480.321026NaN
std2.560949e+147.129575e+04NaNNaNNaN23.110205NaN0.2976750.3979210.2582650.1716860.1615430.466873NaN
min3.921784e+045.030230e+06NaNNaNNaN-1.000000NaN0.0000000.0000000.0000000.0000000.0000000.000000NaN
25%4.172614e+125.640286e+06NaNNaNNaN18.000000NaN0.0000000.0000000.0000000.0000000.0000000.000000NaN
50%3.173184e+135.680573e+06NaNNaNNaN37.000000NaN0.0000000.0000000.0000000.0000000.0000000.000000NaN
75%9.439172e+135.725524e+06NaNNaNNaN55.000000NaN0.0000000.0000000.0000000.0000000.0000001.000000NaN
max9.999816e+145.790484e+06NaNNaNNaN115.000000NaN1.0000001.0000001.0000001.0000004.0000001.000000NaN
\n", "
" ], "text/plain": [ " PatientId AppointmentID Gender ScheduledDay \\\n", "count 1.105270e+05 1.105270e+05 110527 110527 \n", "unique NaN NaN 2 103549 \n", "top NaN NaN F 2016-05-06T07:09:54Z \n", "freq NaN NaN 71840 24 \n", "mean 1.474963e+14 5.675305e+06 NaN NaN \n", "std 2.560949e+14 7.129575e+04 NaN NaN \n", "min 3.921784e+04 5.030230e+06 NaN NaN \n", "25% 4.172614e+12 5.640286e+06 NaN NaN \n", "50% 3.173184e+13 5.680573e+06 NaN NaN \n", "75% 9.439172e+13 5.725524e+06 NaN NaN \n", "max 9.999816e+14 5.790484e+06 NaN NaN \n", "\n", " AppointmentDay Age Neighbourhood Scholarship \\\n", "count 110527 110527.000000 110527 110527.000000 \n", "unique 27 NaN 81 NaN \n", "top 2016-06-06T00:00:00Z NaN JARDIM CAMBURI NaN \n", "freq 4692 NaN 7717 NaN \n", "mean NaN 37.088874 NaN 0.098266 \n", "std NaN 23.110205 NaN 0.297675 \n", "min NaN -1.000000 NaN 0.000000 \n", "25% NaN 18.000000 NaN 0.000000 \n", "50% NaN 37.000000 NaN 0.000000 \n", "75% NaN 55.000000 NaN 0.000000 \n", "max NaN 115.000000 NaN 1.000000 \n", "\n", " Hipertension Diabetes Alcoholism Handcap \\\n", "count 110527.000000 110527.000000 110527.000000 110527.000000 \n", "unique NaN NaN NaN NaN \n", "top NaN NaN NaN NaN \n", "freq NaN NaN NaN NaN \n", "mean 0.197246 0.071865 0.030400 0.022248 \n", "std 0.397921 0.258265 0.171686 0.161543 \n", "min 0.000000 0.000000 0.000000 0.000000 \n", "25% 0.000000 0.000000 0.000000 0.000000 \n", "50% 0.000000 0.000000 0.000000 0.000000 \n", "75% 0.000000 0.000000 0.000000 0.000000 \n", "max 1.000000 1.000000 1.000000 4.000000 \n", "\n", " SMS_received No-show \n", "count 110527.000000 110527 \n", "unique NaN 2 \n", "top NaN No \n", "freq NaN 88208 \n", "mean 0.321026 NaN \n", "std 0.466873 NaN \n", "min 0.000000 NaN \n", "25% 0.000000 NaN \n", "50% 0.000000 NaN \n", "75% 1.000000 NaN \n", "max 1.000000 NaN " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 3. Statystyki\n", "# Wielkość zbioru i podzbiorów\n", "print(f\"Wielkosc zbioru: {len(no_shows)}, podzbiór train: {train_size}, podzbiór test {test_size}.\")\n", "# Opis parametrów\n", "no_shows.describe(include='all')" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAEOCAYAAABrSnsUAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAQnUlEQVR4nO3df6zddX3H8efL1iLC+H3HpK20G3WuskyxQcD4CxYoopYt/gDnaAjaGFFwwWmdm2QoiUaFiUETIiCgszJ0oUJdR1CM04DcikELIjflR1tBLpQfIgoU3vvjfKrHcm/vqW3vufQ8H8nN/X4/38/3nM9J2vu853vOaVNVSJIG23P6vQBJUv8ZA0mSMZAkGQNJEsZAkoQxkCRhDKQdIsl1Sd7Z73VIvTIGGihJ7kxyX5LdusbemeS6Pi5L6jtjoEE0DTi934uQphJjoEH0KeADSfba/ECSI5LcmOTh9v2I8W4kyfOSfDnJA0keavP375pyYJLvJ/lVkv9Nsl/XuW9Ksrqdd12Sv2rjJyf5Zte825P8V9f+2iQv3baHLz2TMdAgGgauAz7QPZhkH+Bq4DxgX+Ac4Ook+45zO4uBPYHZbf67gd90HX87cDLwp8CMTfeX5EXAV4H3A0PACuCbSWYA3wVeleQ5SQ5o5x3ezvtzYHfg5j/6kUvjMAYaVB8F3pdkqGvsOOD2qrqsqjZW1VeBnwFvHOc2nqQTgYOq6qmqWlVVj3Qdv7iqfl5VvwEuB17axt8GXF1V11TVk8CngV2BI6pqDfCrNvfVwErgF0leDLwG+F5VPb3Nj17azPR+L0Dqh6r6aZKrgKXArW34AOCuzabeBcwESPJo1/h84DI6zwqWtUtOXwY+0n7AA9zbNf8xOr/VP+N+qurpJGs33Q+dZwevBQ5q2w/RCcHhbV/a7nxmoEF2JvAufv9D+BfAgZvNeSGwHqCqdu/6uruqnqyqf6+q+cARwBuAk3q43z+4nyShE5X1bWhTDF7Vtr9LJwavwRhoBzEGGlhVNQJ8DTitDa0AXpTk7UmmJ3kbnWcAV411fpLXJfnrJNOAR+hcNurlEs7lwHFJjkryXOAM4HHgB+34d4HXAbtW1Trge8BCOpekbvojHqo0IWOgQXcWsBtAVT1A57f7M4AHgA8Cb6iq+8c598+AK+iE4FY6P8Qvm+gOq+o24B3A54D76bwm8caqeqId/znwKJ0I0F6HWAN8v6qe+qMepTSB+J/bSJJ8ZiBJMgaSJGMgScIYSJJ4Fn/obL/99qs5c+b0exmS9KyxatWq+6tqaKxjz9oYzJkzh+Hh4X4vQ5KeNZJs/gn73/EykSTJGEiSjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJJ4Fn8C+dlgztKr+72Encqdnziu30uQdlo+M5AkGQNJkjGQJGEMJEkYA0kSxkCShDGQJGEMJEkYA0kSxkCShDGQJGEMJEkYA0kSxkCShDGQJNFjDJL8U5LVSX6a5KtJnpdkbpIbkowk+VqSGW3uLm1/pB2f03U7H27jtyU5pmt8YRsbSbJ0uz9KSdIWTRiDJDOB04AFVXUwMA04AfgkcG5VHQQ8CJzSTjkFeLCNn9vmkWR+O+8lwELg80mmJZkGnA8cC8wHTmxzJUmTpNfLRNOBXZNMB54P3AMcCVzRjl8CHN+2F7V92vGjkqSNL6uqx6vqDmAEOLR9jVTVmqp6AljW5kqSJsmEMaiq9cCngbvpROBhYBXwUFVtbNPWATPb9kxgbTt3Y5u/b/f4ZueMN/4MSZYkGU4yPDo62svjkyT1oJfLRHvT+U19LnAAsBudyzyTrqouqKoFVbVgaGioH0uQpJ1SL5eJ/ha4o6pGq+pJ4BvAK4G92mUjgFnA+ra9HpgN0I7vCTzQPb7ZOeONS5ImSS8xuBs4LMnz27X/o4BbgO8Ab25zFgNXtu3lbZ92/NtVVW38hPZuo7nAPOCHwI3AvPbupBl0XmRevu0PTZLUq+kTTaiqG5JcAfwI2AjcBFwAXA0sS/LxNnZhO+VC4LIkI8AGOj/cqarVSS6nE5KNwKlV9RRAkvcCK+m8U+miqlq9/R6iJGkiE8YAoKrOBM7cbHgNnXcCbT73t8Bbxrmds4GzxxhfAazoZS2SpO3PTyBLkoyBJMkYSJIwBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgSaLHGCTZK8kVSX6W5NYkhyfZJ8k1SW5v3/duc5PkvCQjSW5OckjX7Sxu829Psrhr/OVJftLOOS9Jtv9DlSSNp9dnBp8F/qeqXgz8DXArsBS4tqrmAde2fYBjgXntawnwBYAk+wBnAq8ADgXO3BSQNuddXect3LaHJUnaGhPGIMmewKuBCwGq6omqeghYBFzSpl0CHN+2FwGXVsf1wF5JXgAcA1xTVRuq6kHgGmBhO7ZHVV1fVQVc2nVbkqRJ0Mszg7nAKHBxkpuSfDHJbsD+VXVPm3MvsH/bngms7Tp/XRvb0vi6McafIcmSJMNJhkdHR3tYuiSpF73EYDpwCPCFqnoZ8Gt+f0kIgPYbfW3/5f2hqrqgqhZU1YKhoaEdfXeSNDB6icE6YF1V3dD2r6ATh1+2Szy07/e14+uB2V3nz2pjWxqfNca4JGmSTBiDqroXWJvkL9vQUcAtwHJg0zuCFgNXtu3lwEntXUWHAQ+3y0krgaOT7N1eOD4aWNmOPZLksPYuopO6bkuSNAmm9zjvfcBXkswA1gAn0wnJ5UlOAe4C3trmrgBeD4wAj7W5VNWGJB8DbmzzzqqqDW37PcCXgF2Bb7UvSdIk6SkGVfVjYMEYh44aY24Bp45zOxcBF40xPgwc3MtaJEnbn59AliQZA0mSMZAkYQwkSRgDSRLGQJKEMZAkYQwkSRgDSRLGQJKEMZAkYQwkSRgDSRLGQJKEMZAkYQwkSRgDSRLGQJKEMZAkYQwkSRgDSRLGQJKEMZAkYQwkSRgDSRLGQJKEMZAkYQwkSRgDSRLGQJKEMZAkYQwkSRgDSRLGQJKEMZAkYQwkSRgDSRLGQJLEVsQgybQkNyW5qu3PTXJDkpEkX0syo43v0vZH2vE5Xbfx4TZ+W5JjusYXtrGRJEu34+OTJPVga54ZnA7c2rX/SeDcqjoIeBA4pY2fAjzYxs9t80gyHzgBeAmwEPh8C8w04HzgWGA+cGKbK0maJD3FIMks4Djgi20/wJHAFW3KJcDxbXtR26cdP6rNXwQsq6rHq+oOYAQ4tH2NVNWaqnoCWNbmSpImSa/PDP4D+CDwdNvfF3ioqja2/XXAzLY9E1gL0I4/3Ob/bnyzc8Ybf4YkS5IMJxkeHR3tcemSpIlMGIMkbwDuq6pVk7CeLaqqC6pqQVUtGBoa6vdyJGmnMb2HOa8E3pTk9cDzgD2AzwJ7JZnefvufBaxv89cDs4F1SaYDewIPdI1v0n3OeOOSpEkw4TODqvpwVc2qqjl0XgD+dlX9A/Ad4M1t2mLgyra9vO3Tjn+7qqqNn9DebTQXmAf8ELgRmNfenTSj3cfy7fLoJEk96eWZwXg+BCxL8nHgJuDCNn4hcFmSEWADnR/uVNXqJJcDtwAbgVOr6imAJO8FVgLTgIuqavU2rEuStJW2KgZVdR1wXdteQ+edQJvP+S3wlnHOPxs4e4zxFcCKrVmLJGn78RPIkiRjIEkyBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJIwBpIkYHq/FyCpP+YsvbrfS9ip3PmJ4/q9hG3iMwNJkjGQJPUQgySzk3wnyS1JVic5vY3vk+SaJLe373u38SQ5L8lIkpuTHNJ1W4vb/NuTLO4af3mSn7RzzkuSHfFgJUlj6+WZwUbgjKqaDxwGnJpkPrAUuLaq5gHXtn2AY4F57WsJ8AXoxAM4E3gFcChw5qaAtDnv6jpv4bY/NElSryaMQVXdU1U/atu/Am4FZgKLgEvatEuA49v2IuDS6rge2CvJC4BjgGuqakNVPQhcAyxsx/aoquurqoBLu25LkjQJtuo1gyRzgJcBNwD7V9U97dC9wP5teyawtuu0dW1sS+Prxhgf6/6XJBlOMjw6Oro1S5ckbUHPMUiyO/B14P1V9Uj3sfYbfW3ntT1DVV1QVQuqasHQ0NCOvjtJGhg9xSDJc+mE4CtV9Y02/Mt2iYf2/b42vh6Y3XX6rDa2pfFZY4xLkiZJL+8mCnAhcGtVndN1aDmw6R1Bi4Eru8ZPau8qOgx4uF1OWgkcnWTv9sLx0cDKduyRJIe1+zqp67YkSZOgl08gvxL4R+AnSX7cxv4F+ARweZJTgLuAt7ZjK4DXAyPAY8DJAFW1IcnHgBvbvLOqakPbfg/wJWBX4FvtS5I0SSaMQVX9HzDe+/6PGmN+AaeOc1sXAReNMT4MHDzRWiRJO4afQJYkGQNJkjGQJGEMJEkYA0kSxkCShDGQJGEMJEkYA0kSxkCShDGQJGEMJEkYA0kSxkCShDGQJGEMJEkYA0kSxkCShDGQJGEMJEkYA0kSxkCShDGQJGEMJEkYA0kSxkCShDGQJGEMJEkYA0kSxkCShDGQJGEMJEkYA0kSxkCShDGQJGEMJEkYA0kSxkCSxBSKQZKFSW5LMpJkab/XI0mDZErEIMk04HzgWGA+cGKS+f1dlSQNjikRA+BQYKSq1lTVE8AyYFGf1yRJA2N6vxfQzATWdu2vA16x+aQkS4AlbffRJLdNwtoGwX7A/f1exETyyX6vQH3in8/t58DxDkyVGPSkqi4ALuj3OnY2SYarakG/1yGNxT+fk2OqXCZaD8zu2p/VxiRJk2CqxOBGYF6SuUlmACcAy/u8JkkaGFPiMlFVbUzyXmAlMA24qKpW93lZg8RLb5rK/PM5CVJV/V6DJKnPpsplIklSHxkDSZIxkCQZA0lTTJK/SLJL235tktOS7NXnZe30jMGASjIryX8nGU1yX5KvJ5nV73VJwNeBp5IcROedRLOB/+zvknZ+xmBwXUznsxwvAA4AvtnGpH57uqo2An8HfK6q/pnOn1PtQMZgcA1V1cVVtbF9fQkY6veiJODJJCcCi4Gr2thz+7iegWAMBtcDSd6RZFr7egfwQL8XJQEnA4cDZ1fVHUnmApf1eU07PT90NqCSHAh8js5fugJ+AJxWVXf3dWESkGRX4IVV5b9MPEmMgaQpJckbgU8DM6pqbpKXAmdV1Zv6u7KdmzEYMEk+uoXDVVUfm7TFSGNIsgo4Eriuql7Wxn5aVQf3d2U7tynxD9VpUv16jLHdgFOAfQFjoH57sqoeTtI99nS/FjMojMGAqarPbNpO8ifA6XResFsGfGa886QdLckK4FRgdZK3A9OSzANOo/OalnYg3000gJLsk+TjwM10fiE4pKo+VFX39XlpGmwX0/ln7O8EDgYep/Nhs4fp/NKiHcjXDAZMkk8Bf0/nk53nV9WjfV6S9DtJdgf+DVhI5+2km35AVVWd07eFDQAvEw2eM+j8xvWvwEe6rsuGzl+4Pfq1MAl4gs7rWrsAu/P7GGgHMwYDpqq8NKgpKclC4Bw6/0zKIVX1WJ+XNFC8TCRpSkjyPeDd/pe3/WEMJEm+m0iSZAwkSRgDSRLGQJKEMZAkAf8PP9ePQZsYa28AAAAASUVORK5CYII=", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# Rozkład częstości dla klas\n", "no_shows[\"No-show\"].value_counts().plot(kind=\"bar\", title=\"No-show\")" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "# Wyczyszczenie zbioru\n", "# Usunięcie negatywnego wieku\n", "no_shows = no_shows.drop(no_shows[no_shows[\"Age\"] < 0].index)\n", "\n", "# Usunięcie niewiadomego wieku (zależy od zastosowania)\n", "# no_shows = no_shows.drop(no_shows[no_shows[\"Age\"] == 0].index)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "# Normalizacja danych\n", "\n", "# Usunięcie kolumn PatientId oraz AppointmentID\n", "no_shows.drop([\"PatientId\", \"AppointmentID\"], inplace=True, axis=1)\n", "\n", "# Zmiena wartości kolumny No-show z Yes/No na wartość boolowską\n", "no_shows[\"No-show\"] = no_shows[\"No-show\"].map({'Yes': 1, 'No': 0})\n", "\n", "# Normalizacja kolumny Age\n", "no_shows[\"Age\"]=(no_shows[\"Age\"]-no_shows[\"Age\"].min())/(no_shows[\"Age\"].max()-no_shows[\"Age\"].min())" ] } ], "metadata": { "interpreter": { "hash": "3c12dc341c1078754dffca0e61bfc548ab04f96cfe0a82a85a936b702c4881ab" }, "kernelspec": { "display_name": "Python 3.7.11 ('ium')", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.11" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }