{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: kaggle in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (1.5.12)\n", "Requirement already satisfied: tqdm in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from kaggle) (4.63.0)\n", "Requirement already satisfied: certifi in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from kaggle) (2021.10.8)\n", "Requirement already satisfied: six>=1.10 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from kaggle) (1.16.0)\n", "Requirement already satisfied: requests in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from kaggle) (2.27.1)\n", "Requirement already satisfied: python-slugify in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from kaggle) (6.1.1)\n", "Requirement already satisfied: urllib3 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from kaggle) (1.26.9)\n", "Requirement already satisfied: python-dateutil in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from kaggle) (2.8.2)\n", "Requirement already satisfied: text-unidecode>=1.3 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from python-slugify->kaggle) (1.3)\n", "Requirement already satisfied: charset-normalizer~=2.0.0 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from requests->kaggle) (2.0.12)\n", "Requirement already satisfied: idna<4,>=2.5 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from requests->kaggle) (3.3)\n", "Requirement already satisfied: colorama in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from tqdm->kaggle) (0.4.4)\n", "Requirement already satisfied: pandas in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (1.3.5)\n", "Requirement already satisfied: pytz>=2017.3 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from pandas) (2022.1)\n", "Requirement already satisfied: python-dateutil>=2.7.3 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from pandas) (2.8.2)\n", "Requirement already satisfied: numpy>=1.17.3 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from pandas) (1.21.5)\n", "Requirement already satisfied: six>=1.5 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from python-dateutil>=2.7.3->pandas) (1.16.0)\n", "Requirement already satisfied: seaborn in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (0.11.2)\n", "Requirement already satisfied: scipy>=1.0 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from seaborn) (1.7.3)\n", "Requirement already satisfied: numpy>=1.15 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from seaborn) (1.21.5)\n", "Requirement already satisfied: matplotlib>=2.2 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from seaborn) (3.5.1)\n", "Requirement already satisfied: pandas>=0.23 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from seaborn) (1.3.5)\n", "Requirement already satisfied: fonttools>=4.22.0 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from matplotlib>=2.2->seaborn) (4.31.1)\n", "Requirement already satisfied: pyparsing>=2.2.1 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from matplotlib>=2.2->seaborn) (3.0.7)\n", "Requirement already satisfied: cycler>=0.10 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from matplotlib>=2.2->seaborn) (0.11.0)\n", "Requirement already satisfied: packaging>=20.0 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from matplotlib>=2.2->seaborn) (21.3)\n", "Requirement already satisfied: python-dateutil>=2.7 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from matplotlib>=2.2->seaborn) (2.8.2)\n", "Requirement already satisfied: pillow>=6.2.0 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from matplotlib>=2.2->seaborn) (9.0.1)\n", "Requirement already satisfied: kiwisolver>=1.0.1 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from matplotlib>=2.2->seaborn) (1.4.0)\n", "Requirement already satisfied: typing-extensions in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from kiwisolver>=1.0.1->matplotlib>=2.2->seaborn) (4.1.1)\n", "Requirement already satisfied: pytz>=2017.3 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from pandas>=0.23->seaborn) (2022.1)\n", "Requirement already satisfied: six>=1.5 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from python-dateutil>=2.7->matplotlib>=2.2->seaborn) (1.16.0)\n", "Requirement already satisfied: torch in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (1.11.0)\n", "Requirement already satisfied: typing-extensions in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from torch) (4.1.1)\n" ] } ], "source": [ "!pip install kaggle\n", "!pip install pandas\n", "!pip install torch\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "401 - Unauthorized\n" ] } ], "source": [ "# 1 Pobranie zbioru\n", "!kaggle datasets download -d joniarroba/noshowappointments" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "'unzip' is not recognized as an internal or external command,\n", "operable program or batch file.\n" ] } ], "source": [ "!unzip -o noshowappointments.zip" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | PatientId | \n", "AppointmentID | \n", "Gender | \n", "ScheduledDay | \n", "AppointmentDay | \n", "Age | \n", "Neighbourhood | \n", "Scholarship | \n", "Hipertension | \n", "Diabetes | \n", "Alcoholism | \n", "Handcap | \n", "SMS_received | \n", "No-show | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "2.987250e+13 | \n", "5642903 | \n", "F | \n", "2016-04-29T18:38:08Z | \n", "2016-04-29T00:00:00Z | \n", "62 | \n", "JARDIM DA PENHA | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "No | \n", "
1 | \n", "5.589978e+14 | \n", "5642503 | \n", "M | \n", "2016-04-29T16:08:27Z | \n", "2016-04-29T00:00:00Z | \n", "56 | \n", "JARDIM DA PENHA | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "No | \n", "
2 | \n", "4.262962e+12 | \n", "5642549 | \n", "F | \n", "2016-04-29T16:19:04Z | \n", "2016-04-29T00:00:00Z | \n", "62 | \n", "MATA DA PRAIA | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "No | \n", "
3 | \n", "8.679512e+11 | \n", "5642828 | \n", "F | \n", "2016-04-29T17:29:31Z | \n", "2016-04-29T00:00:00Z | \n", "8 | \n", "PONTAL DE CAMBURI | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "No | \n", "
4 | \n", "8.841186e+12 | \n", "5642494 | \n", "F | \n", "2016-04-29T16:07:23Z | \n", "2016-04-29T00:00:00Z | \n", "56 | \n", "JARDIM DA PENHA | \n", "0 | \n", "1 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "No | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
110522 | \n", "2.572134e+12 | \n", "5651768 | \n", "F | \n", "2016-05-03T09:15:35Z | \n", "2016-06-07T00:00:00Z | \n", "56 | \n", "MARIA ORTIZ | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "No | \n", "
110523 | \n", "3.596266e+12 | \n", "5650093 | \n", "F | \n", "2016-05-03T07:27:33Z | \n", "2016-06-07T00:00:00Z | \n", "51 | \n", "MARIA ORTIZ | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "No | \n", "
110524 | \n", "1.557663e+13 | \n", "5630692 | \n", "F | \n", "2016-04-27T16:03:52Z | \n", "2016-06-07T00:00:00Z | \n", "21 | \n", "MARIA ORTIZ | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "No | \n", "
110525 | \n", "9.213493e+13 | \n", "5630323 | \n", "F | \n", "2016-04-27T15:09:23Z | \n", "2016-06-07T00:00:00Z | \n", "38 | \n", "MARIA ORTIZ | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "No | \n", "
110526 | \n", "3.775115e+14 | \n", "5629448 | \n", "F | \n", "2016-04-27T13:30:56Z | \n", "2016-06-07T00:00:00Z | \n", "54 | \n", "MARIA ORTIZ | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "No | \n", "
110527 rows × 14 columns
\n", "\n", " | PatientId | \n", "AppointmentID | \n", "Gender | \n", "ScheduledDay | \n", "AppointmentDay | \n", "Age | \n", "Neighbourhood | \n", "Scholarship | \n", "Hipertension | \n", "Diabetes | \n", "Alcoholism | \n", "Handcap | \n", "SMS_received | \n", "No-show | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | \n", "1.105270e+05 | \n", "1.105270e+05 | \n", "110527 | \n", "110527 | \n", "110527 | \n", "110527.000000 | \n", "110527 | \n", "110527.000000 | \n", "110527.000000 | \n", "110527.000000 | \n", "110527.000000 | \n", "110527.000000 | \n", "110527.000000 | \n", "110527 | \n", "
unique | \n", "NaN | \n", "NaN | \n", "2 | \n", "103549 | \n", "27 | \n", "NaN | \n", "81 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "2 | \n", "
top | \n", "NaN | \n", "NaN | \n", "F | \n", "2016-05-06T07:09:54Z | \n", "2016-06-06T00:00:00Z | \n", "NaN | \n", "JARDIM CAMBURI | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "No | \n", "
freq | \n", "NaN | \n", "NaN | \n", "71840 | \n", "24 | \n", "4692 | \n", "NaN | \n", "7717 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "88208 | \n", "
mean | \n", "1.474963e+14 | \n", "5.675305e+06 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "37.088874 | \n", "NaN | \n", "0.098266 | \n", "0.197246 | \n", "0.071865 | \n", "0.030400 | \n", "0.022248 | \n", "0.321026 | \n", "NaN | \n", "
std | \n", "2.560949e+14 | \n", "7.129575e+04 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "23.110205 | \n", "NaN | \n", "0.297675 | \n", "0.397921 | \n", "0.258265 | \n", "0.171686 | \n", "0.161543 | \n", "0.466873 | \n", "NaN | \n", "
min | \n", "3.921784e+04 | \n", "5.030230e+06 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "-1.000000 | \n", "NaN | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "NaN | \n", "
25% | \n", "4.172614e+12 | \n", "5.640286e+06 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "18.000000 | \n", "NaN | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "NaN | \n", "
50% | \n", "3.173184e+13 | \n", "5.680573e+06 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "37.000000 | \n", "NaN | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "NaN | \n", "
75% | \n", "9.439172e+13 | \n", "5.725524e+06 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "55.000000 | \n", "NaN | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "1.000000 | \n", "NaN | \n", "
max | \n", "9.999816e+14 | \n", "5.790484e+06 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "115.000000 | \n", "NaN | \n", "1.000000 | \n", "1.000000 | \n", "1.000000 | \n", "1.000000 | \n", "4.000000 | \n", "1.000000 | \n", "NaN | \n", "