diff --git a/02_Dane.ipynb b/02_Dane.ipynb index 7fd4f66..93eaa50 100644 --- a/02_Dane.ipynb +++ b/02_Dane.ipynb @@ -2,131 +2,66 @@ "cells": [ { "cell_type": "code", - "execution_count": 6, + "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Defaulting to user installation because normal site-packages is not writeable\n", - "Requirement already satisfied: kaggle in \\\\files\\students\\s478855\\.appdata\\python\\python38\\site-packages (1.5.12)\n", - "Requirement already satisfied: tqdm in c:\\software\\python3\\lib\\site-packages (from kaggle) (4.62.1)\n", - "Requirement already satisfied: urllib3 in c:\\software\\python3\\lib\\site-packages (from kaggle) (1.26.6)\n", - "Requirement already satisfied: certifi in c:\\software\\python3\\lib\\site-packages (from kaggle) (2021.5.30)\n", - "Requirement already satisfied: python-dateutil in c:\\software\\python3\\lib\\site-packages (from kaggle) (2.8.2)\n", - "Requirement already satisfied: six>=1.10 in c:\\software\\python3\\lib\\site-packages (from kaggle) (1.15.0)\n", - "Requirement already satisfied: python-slugify in \\\\files\\students\\s478855\\.appdata\\python\\python38\\site-packages (from kaggle) (6.1.1)\n", - "Requirement already satisfied: requests in c:\\software\\python3\\lib\\site-packages (from kaggle) (2.26.0)\n", - "Requirement already satisfied: text-unidecode>=1.3 in \\\\files\\students\\s478855\\.appdata\\python\\python38\\site-packages (from python-slugify->kaggle) (1.3)\n", - "Requirement already satisfied: idna<4,>=2.5 in c:\\software\\python3\\lib\\site-packages (from requests->kaggle) (3.2)\n", - "Requirement already satisfied: charset-normalizer~=2.0.0 in c:\\software\\python3\\lib\\site-packages (from requests->kaggle) (2.0.4)\n", - "Requirement already satisfied: colorama in c:\\software\\python3\\lib\\site-packages (from tqdm->kaggle) (0.4.4)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING: You are using pip version 21.2.4; however, version 22.0.4 is available.\n", - "You should consider upgrading via the 'c:\\software\\python3\\python3.exe -m pip install --upgrade pip' command.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Defaulting to user installation because normal site-packages is not writeable\n", - "Requirement already satisfied: pandas in c:\\software\\python3\\lib\\site-packages (1.3.2)\n", - "Requirement already satisfied: pytz>=2017.3 in c:\\software\\python3\\lib\\site-packages (from pandas) (2021.1)\n", - "Requirement already satisfied: python-dateutil>=2.7.3 in c:\\software\\python3\\lib\\site-packages (from pandas) (2.8.2)\n", - "Requirement already satisfied: numpy>=1.17.3 in c:\\software\\python3\\lib\\site-packages (from pandas) (1.19.5)\n", - "Requirement already satisfied: six>=1.5 in c:\\software\\python3\\lib\\site-packages (from python-dateutil>=2.7.3->pandas) (1.15.0)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING: You are using pip version 21.2.4; however, version 22.0.4 is available.\n", - "You should consider upgrading via the 'c:\\software\\python3\\python3.exe -m pip install --upgrade pip' command.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Defaulting to user installation because normal site-packages is not writeable" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING: You are using pip version 21.2.4; however, version 22.0.4 is available.\n", - "You should consider upgrading via the 'c:\\software\\python3\\python3.exe -m pip install --upgrade pip' command.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Requirement already satisfied: seaborn in \\\\files\\students\\s478855\\.appdata\\python\\python38\\site-packages (0.11.2)\n", - "Requirement already satisfied: numpy>=1.15 in c:\\software\\python3\\lib\\site-packages (from seaborn) (1.19.5)\n", - "Requirement already satisfied: pandas>=0.23 in c:\\software\\python3\\lib\\site-packages (from seaborn) (1.3.2)\n", - "Requirement already satisfied: matplotlib>=2.2 in c:\\software\\python3\\lib\\site-packages (from seaborn) (3.4.3)\n", - "Requirement already satisfied: scipy>=1.0 in c:\\software\\python3\\lib\\site-packages (from seaborn) (1.7.1)\n", - "Requirement already satisfied: python-dateutil>=2.7 in c:\\software\\python3\\lib\\site-packages (from matplotlib>=2.2->seaborn) (2.8.2)\n", - "Requirement already satisfied: pyparsing>=2.2.1 in c:\\software\\python3\\lib\\site-packages (from matplotlib>=2.2->seaborn) (2.4.7)\n", - "Requirement already satisfied: pillow>=6.2.0 in c:\\software\\python3\\lib\\site-packages (from matplotlib>=2.2->seaborn) (8.3.1)\n", - "Requirement already satisfied: kiwisolver>=1.0.1 in c:\\software\\python3\\lib\\site-packages (from matplotlib>=2.2->seaborn) (1.3.1)\n", - "Requirement already satisfied: cycler>=0.10 in c:\\software\\python3\\lib\\site-packages (from matplotlib>=2.2->seaborn) (0.10.0)\n", - "Requirement already satisfied: six in c:\\software\\python3\\lib\\site-packages (from cycler>=0.10->matplotlib>=2.2->seaborn) (1.15.0)\n", - "Requirement already satisfied: pytz>=2017.3 in c:\\software\\python3\\lib\\site-packages (from pandas>=0.23->seaborn) (2021.1)\n", - "Defaulting to user installation because normal site-packages is not writeable\n", - "Collecting torch\n", - " Downloading torch-1.11.0-cp38-cp38-win_amd64.whl (158.0 MB)\n", - "Requirement already satisfied: typing-extensions in c:\\software\\python3\\lib\\site-packages (from torch) (3.7.4.3)\n", - "Installing collected packages: torch\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " WARNING: The scripts convert-caffe2-to-onnx.exe, convert-onnx-to-caffe2.exe and torchrun.exe are installed in 'j:\\.AppData\\Python\\Python38\\Scripts' which is not on PATH.\n", - " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\n", - "WARNING: You are using pip version 21.2.4; however, version 22.0.4 is available.\n", - "You should consider upgrading via the 'c:\\software\\python3\\python3.exe -m pip install --upgrade pip' command.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Successfully installed torch-1.11.0\n" + "Requirement already satisfied: kaggle in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (1.5.12)\n", + "Requirement already satisfied: tqdm in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from kaggle) (4.63.0)\n", + "Requirement already satisfied: certifi in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from kaggle) (2021.10.8)\n", + "Requirement already satisfied: six>=1.10 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from kaggle) (1.16.0)\n", + "Requirement already satisfied: requests in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from kaggle) (2.27.1)\n", + "Requirement already satisfied: python-slugify in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from kaggle) (6.1.1)\n", + "Requirement already satisfied: urllib3 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from kaggle) (1.26.9)\n", + "Requirement already satisfied: python-dateutil in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from kaggle) (2.8.2)\n", + "Requirement already satisfied: text-unidecode>=1.3 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from python-slugify->kaggle) (1.3)\n", + "Requirement already satisfied: charset-normalizer~=2.0.0 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from requests->kaggle) (2.0.12)\n", + "Requirement already satisfied: idna<4,>=2.5 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from requests->kaggle) (3.3)\n", + "Requirement already satisfied: colorama in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from tqdm->kaggle) (0.4.4)\n", + "Requirement already satisfied: pandas in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (1.3.5)\n", + "Requirement already satisfied: pytz>=2017.3 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from pandas) (2022.1)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from pandas) (2.8.2)\n", + "Requirement already satisfied: numpy>=1.17.3 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from pandas) (1.21.5)\n", + "Requirement already satisfied: six>=1.5 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from python-dateutil>=2.7.3->pandas) (1.16.0)\n", + "Requirement already satisfied: seaborn in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (0.11.2)\n", + "Requirement already satisfied: scipy>=1.0 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from seaborn) (1.7.3)\n", + "Requirement already satisfied: numpy>=1.15 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from seaborn) (1.21.5)\n", + "Requirement already satisfied: matplotlib>=2.2 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from seaborn) (3.5.1)\n", + "Requirement already satisfied: pandas>=0.23 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from seaborn) (1.3.5)\n", + "Requirement already satisfied: fonttools>=4.22.0 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from matplotlib>=2.2->seaborn) (4.31.1)\n", + "Requirement already satisfied: pyparsing>=2.2.1 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from matplotlib>=2.2->seaborn) (3.0.7)\n", + "Requirement already satisfied: cycler>=0.10 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from matplotlib>=2.2->seaborn) (0.11.0)\n", + "Requirement already satisfied: packaging>=20.0 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from matplotlib>=2.2->seaborn) (21.3)\n", + "Requirement already satisfied: python-dateutil>=2.7 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from matplotlib>=2.2->seaborn) (2.8.2)\n", + "Requirement already satisfied: pillow>=6.2.0 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from matplotlib>=2.2->seaborn) (9.0.1)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from matplotlib>=2.2->seaborn) (1.4.0)\n", + "Requirement already satisfied: typing-extensions in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from kiwisolver>=1.0.1->matplotlib>=2.2->seaborn) (4.1.1)\n", + "Requirement already satisfied: pytz>=2017.3 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from pandas>=0.23->seaborn) (2022.1)\n", + "Requirement already satisfied: six>=1.5 in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from python-dateutil>=2.7->matplotlib>=2.2->seaborn) (1.16.0)\n", + "Requirement already satisfied: torch in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (1.11.0)\n", + "Requirement already satisfied: typing-extensions in c:\\programy\\anaconda3\\envs\\ium\\lib\\site-packages (from torch) (4.1.1)\n" ] } ], "source": [ "!pip install kaggle\n", "!pip install pandas\n", - "!pip install seaborn\n", "!pip install torch\n" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 2, "metadata": {}, "outputs": [ { - "name": "stderr", + "name": "stdout", "output_type": "stream", "text": [ - "'kaggle' is not recognized as an internal or external command,\n", - "operable program or batch file.\n" + "401 - Unauthorized\n" ] } ], @@ -137,15 +72,15 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 3, "metadata": {}, "outputs": [ { - "name": "stdout", + "name": "stderr", "output_type": "stream", "text": [ - "Archive: noshowappointments.zip\n", - " inflating: KaggleV2-May-2016.csv \n" + "'unzip' is not recognized as an internal or external command,\n", + "operable program or batch file.\n" ] } ], @@ -155,7 +90,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -431,7 +366,7 @@ "[110527 rows x 14 columns]" ] }, - "execution_count": 9, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -444,7 +379,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -458,7 +393,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -751,7 +686,7 @@ "max 1.000000 NaN " ] }, - "execution_count": 11, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -766,7 +701,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -775,13 +710,13 @@ "" ] }, - "execution_count": 12, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" }, { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAEOCAYAAABrSnsUAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAAQnUlEQVR4nO3df6zddX3H8efL1iLC+H3HpK20G3WuskyxQcD4CxYoopYt/gDnaAjaGFFwwWmdm2QoiUaFiUETIiCgszJ0oUJdR1CM04DcikELIjflR1tBLpQfIgoU3vvjfKrHcm/vqW3vufQ8H8nN/X4/38/3nM9J2vu853vOaVNVSJIG23P6vQBJUv8ZA0mSMZAkGQNJEsZAkoQxkCRhDKQdIsl1Sd7Z73VIvTIGGihJ7kxyX5LdusbemeS6Pi5L6jtjoEE0DTi934uQphJjoEH0KeADSfba/ECSI5LcmOTh9v2I8W4kyfOSfDnJA0keavP375pyYJLvJ/lVkv9Nsl/XuW9Ksrqdd12Sv2rjJyf5Zte825P8V9f+2iQv3baHLz2TMdAgGgauAz7QPZhkH+Bq4DxgX+Ac4Ook+45zO4uBPYHZbf67gd90HX87cDLwp8CMTfeX5EXAV4H3A0PACuCbSWYA3wVeleQ5SQ5o5x3ezvtzYHfg5j/6kUvjMAYaVB8F3pdkqGvsOOD2qrqsqjZW1VeBnwFvHOc2nqQTgYOq6qmqWlVVj3Qdv7iqfl5VvwEuB17axt8GXF1V11TVk8CngV2BI6pqDfCrNvfVwErgF0leDLwG+F5VPb3Nj17azPR+L0Dqh6r6aZKrgKXArW34AOCuzabeBcwESPJo1/h84DI6zwqWtUtOXwY+0n7AA9zbNf8xOr/VP+N+qurpJGs33Q+dZwevBQ5q2w/RCcHhbV/a7nxmoEF2JvAufv9D+BfAgZvNeSGwHqCqdu/6uruqnqyqf6+q+cARwBuAk3q43z+4nyShE5X1bWhTDF7Vtr9LJwavwRhoBzEGGlhVNQJ8DTitDa0AXpTk7UmmJ3kbnWcAV411fpLXJfnrJNOAR+hcNurlEs7lwHFJjkryXOAM4HHgB+34d4HXAbtW1Trge8BCOpekbvojHqo0IWOgQXcWsBtAVT1A57f7M4AHgA8Cb6iq+8c598+AK+iE4FY6P8Qvm+gOq+o24B3A54D76bwm8caqeqId/znwKJ0I0F6HWAN8v6qe+qMepTSB+J/bSJJ8ZiBJMgaSJGMgScIYSJJ4Fn/obL/99qs5c+b0exmS9KyxatWq+6tqaKxjz9oYzJkzh+Hh4X4vQ5KeNZJs/gn73/EykSTJGEiSjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJJ4Fn8C+dlgztKr+72Encqdnziu30uQdlo+M5AkGQNJkjGQJGEMJEkYA0kSxkCShDGQJGEMJEkYA0kSxkCShDGQJGEMJEkYA0kSxkCShDGQJNFjDJL8U5LVSX6a5KtJnpdkbpIbkowk+VqSGW3uLm1/pB2f03U7H27jtyU5pmt8YRsbSbJ0uz9KSdIWTRiDJDOB04AFVXUwMA04AfgkcG5VHQQ8CJzSTjkFeLCNn9vmkWR+O+8lwELg80mmJZkGnA8cC8wHTmxzJUmTpNfLRNOBXZNMB54P3AMcCVzRjl8CHN+2F7V92vGjkqSNL6uqx6vqDmAEOLR9jVTVmqp6AljW5kqSJsmEMaiq9cCngbvpROBhYBXwUFVtbNPWATPb9kxgbTt3Y5u/b/f4ZueMN/4MSZYkGU4yPDo62svjkyT1oJfLRHvT+U19LnAAsBudyzyTrqouqKoFVbVgaGioH0uQpJ1SL5eJ/ha4o6pGq+pJ4BvAK4G92mUjgFnA+ra9HpgN0I7vCTzQPb7ZOeONS5ImSS8xuBs4LMnz27X/o4BbgO8Ab25zFgNXtu3lbZ92/NtVVW38hPZuo7nAPOCHwI3AvPbupBl0XmRevu0PTZLUq+kTTaiqG5JcAfwI2AjcBFwAXA0sS/LxNnZhO+VC4LIkI8AGOj/cqarVSS6nE5KNwKlV9RRAkvcCK+m8U+miqlq9/R6iJGkiE8YAoKrOBM7cbHgNnXcCbT73t8Bbxrmds4GzxxhfAazoZS2SpO3PTyBLkoyBJMkYSJIwBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgSaLHGCTZK8kVSX6W5NYkhyfZJ8k1SW5v3/duc5PkvCQjSW5OckjX7Sxu829Psrhr/OVJftLOOS9Jtv9DlSSNp9dnBp8F/qeqXgz8DXArsBS4tqrmAde2fYBjgXntawnwBYAk+wBnAq8ADgXO3BSQNuddXect3LaHJUnaGhPGIMmewKuBCwGq6omqeghYBFzSpl0CHN+2FwGXVsf1wF5JXgAcA1xTVRuq6kHgGmBhO7ZHVV1fVQVc2nVbkqRJ0Mszg7nAKHBxkpuSfDHJbsD+VXVPm3MvsH/bngms7Tp/XRvb0vi6McafIcmSJMNJhkdHR3tYuiSpF73EYDpwCPCFqnoZ8Gt+f0kIgPYbfW3/5f2hqrqgqhZU1YKhoaEdfXeSNDB6icE6YF1V3dD2r6ATh1+2Szy07/e14+uB2V3nz2pjWxqfNca4JGmSTBiDqroXWJvkL9vQUcAtwHJg0zuCFgNXtu3lwEntXUWHAQ+3y0krgaOT7N1eOD4aWNmOPZLksPYuopO6bkuSNAmm9zjvfcBXkswA1gAn0wnJ5UlOAe4C3trmrgBeD4wAj7W5VNWGJB8DbmzzzqqqDW37PcCXgF2Bb7UvSdIk6SkGVfVjYMEYh44aY24Bp45zOxcBF40xPgwc3MtaJEnbn59AliQZA0mSMZAkYQwkSRgDSRLGQJKEMZAkYQwkSRgDSRLGQJKEMZAkYQwkSRgDSRLGQJKEMZAkYQwkSRgDSRLGQJKEMZAkYQwkSRgDSRLGQJKEMZAkYQwkSRgDSRLGQJKEMZAkYQwkSRgDSRLGQJKEMZAkYQwkSRgDSRLGQJKEMZAkYQwkSRgDSRLGQJLEVsQgybQkNyW5qu3PTXJDkpEkX0syo43v0vZH2vE5Xbfx4TZ+W5JjusYXtrGRJEu34+OTJPVga54ZnA7c2rX/SeDcqjoIeBA4pY2fAjzYxs9t80gyHzgBeAmwEPh8C8w04HzgWGA+cGKbK0maJD3FIMks4Djgi20/wJHAFW3KJcDxbXtR26cdP6rNXwQsq6rHq+oOYAQ4tH2NVNWaqnoCWNbmSpImSa/PDP4D+CDwdNvfF3ioqja2/XXAzLY9E1gL0I4/3Ob/bnyzc8Ybf4YkS5IMJxkeHR3tcemSpIlMGIMkbwDuq6pVk7CeLaqqC6pqQVUtGBoa6vdyJGmnMb2HOa8E3pTk9cDzgD2AzwJ7JZnefvufBaxv89cDs4F1SaYDewIPdI1v0n3OeOOSpEkw4TODqvpwVc2qqjl0XgD+dlX9A/Ad4M1t2mLgyra9vO3Tjn+7qqqNn9DebTQXmAf8ELgRmNfenTSj3cfy7fLoJEk96eWZwXg+BCxL8nHgJuDCNn4hcFmSEWADnR/uVNXqJJcDtwAbgVOr6imAJO8FVgLTgIuqavU2rEuStJW2KgZVdR1wXdteQ+edQJvP+S3wlnHOPxs4e4zxFcCKrVmLJGn78RPIkiRjIEkyBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJIwBpIkYHq/FyCpP+YsvbrfS9ip3PmJ4/q9hG3iMwNJkjGQJPUQgySzk3wnyS1JVic5vY3vk+SaJLe373u38SQ5L8lIkpuTHNJ1W4vb/NuTLO4af3mSn7RzzkuSHfFgJUlj6+WZwUbgjKqaDxwGnJpkPrAUuLaq5gHXtn2AY4F57WsJ8AXoxAM4E3gFcChw5qaAtDnv6jpv4bY/NElSryaMQVXdU1U/atu/Am4FZgKLgEvatEuA49v2IuDS6rge2CvJC4BjgGuqakNVPQhcAyxsx/aoquurqoBLu25LkjQJtuo1gyRzgJcBNwD7V9U97dC9wP5teyawtuu0dW1sS+Prxhgf6/6XJBlOMjw6Oro1S5ckbUHPMUiyO/B14P1V9Uj3sfYbfW3ntT1DVV1QVQuqasHQ0NCOvjtJGhg9xSDJc+mE4CtV9Y02/Mt2iYf2/b42vh6Y3XX6rDa2pfFZY4xLkiZJL+8mCnAhcGtVndN1aDmw6R1Bi4Eru8ZPau8qOgx4uF1OWgkcnWTv9sLx0cDKduyRJIe1+zqp67YkSZOgl08gvxL4R+AnSX7cxv4F+ARweZJTgLuAt7ZjK4DXAyPAY8DJAFW1IcnHgBvbvLOqakPbfg/wJWBX4FvtS5I0SSaMQVX9HzDe+/6PGmN+AaeOc1sXAReNMT4MHDzRWiRJO4afQJYkGQNJkjGQJGEMJEkYA0kSxkCShDGQJGEMJEkYA0kSxkCShDGQJGEMJEkYA0kSxkCShDGQJGEMJEkYA0kSxkCShDGQJGEMJEkYA0kSxkCShDGQJGEMJEkYA0kSxkCShDGQJGEMJEkYA0kSxkCShDGQJGEMJEkYA0kSxkCShDGQJGEMJEkYA0kSxkCSxBSKQZKFSW5LMpJkab/XI0mDZErEIMk04HzgWGA+cGKS+f1dlSQNjikRA+BQYKSq1lTVE8AyYFGf1yRJA2N6vxfQzATWdu2vA16x+aQkS4AlbffRJLdNwtoGwX7A/f1exETyyX6vQH3in8/t58DxDkyVGPSkqi4ALuj3OnY2SYarakG/1yGNxT+fk2OqXCZaD8zu2p/VxiRJk2CqxOBGYF6SuUlmACcAy/u8JkkaGFPiMlFVbUzyXmAlMA24qKpW93lZg8RLb5rK/PM5CVJV/V6DJKnPpsplIklSHxkDSZIxkCQZA0lTTJK/SLJL235tktOS7NXnZe30jMGASjIryX8nGU1yX5KvJ5nV73VJwNeBp5IcROedRLOB/+zvknZ+xmBwXUznsxwvAA4AvtnGpH57uqo2An8HfK6q/pnOn1PtQMZgcA1V1cVVtbF9fQkY6veiJODJJCcCi4Gr2thz+7iegWAMBtcDSd6RZFr7egfwQL8XJQEnA4cDZ1fVHUnmApf1eU07PT90NqCSHAh8js5fugJ+AJxWVXf3dWESkGRX4IVV5b9MPEmMgaQpJckbgU8DM6pqbpKXAmdV1Zv6u7KdmzEYMEk+uoXDVVUfm7TFSGNIsgo4Eriuql7Wxn5aVQf3d2U7tynxD9VpUv16jLHdgFOAfQFjoH57sqoeTtI99nS/FjMojMGAqarPbNpO8ifA6XResFsGfGa886QdLckK4FRgdZK3A9OSzANOo/OalnYg3000gJLsk+TjwM10fiE4pKo+VFX39XlpGmwX0/ln7O8EDgYep/Nhs4fp/NKiHcjXDAZMkk8Bf0/nk53nV9WjfV6S9DtJdgf+DVhI5+2km35AVVWd07eFDQAvEw2eM+j8xvWvwEe6rsuGzl+4Pfq1MAl4gs7rWrsAu/P7GGgHMwYDpqq8NKgpKclC4Bw6/0zKIVX1WJ+XNFC8TCRpSkjyPeDd/pe3/WEMJEm+m0iSZAwkSRgDSRLGQJKEMZAkAf8PP9ePQZsYa28AAAAASUVORK5CYII=", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAEOCAYAAABrSnsUAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAQnUlEQVR4nO3df6zddX3H8efL1iLC+H3HpK20G3WuskyxQcD4CxYoopYt/gDnaAjaGFFwwWmdm2QoiUaFiUETIiCgszJ0oUJdR1CM04DcikELIjflR1tBLpQfIgoU3vvjfKrHcm/vqW3vufQ8H8nN/X4/38/3nM9J2vu853vOaVNVSJIG23P6vQBJUv8ZA0mSMZAkGQNJEsZAkoQxkCRhDKQdIsl1Sd7Z73VIvTIGGihJ7kxyX5LdusbemeS6Pi5L6jtjoEE0DTi934uQphJjoEH0KeADSfba/ECSI5LcmOTh9v2I8W4kyfOSfDnJA0keavP375pyYJLvJ/lVkv9Nsl/XuW9Ksrqdd12Sv2rjJyf5Zte825P8V9f+2iQv3baHLz2TMdAgGgauAz7QPZhkH+Bq4DxgX+Ac4Ook+45zO4uBPYHZbf67gd90HX87cDLwp8CMTfeX5EXAV4H3A0PACuCbSWYA3wVeleQ5SQ5o5x3ezvtzYHfg5j/6kUvjMAYaVB8F3pdkqGvsOOD2qrqsqjZW1VeBnwFvHOc2nqQTgYOq6qmqWlVVj3Qdv7iqfl5VvwEuB17axt8GXF1V11TVk8CngV2BI6pqDfCrNvfVwErgF0leDLwG+F5VPb3Nj17azPR+L0Dqh6r6aZKrgKXArW34AOCuzabeBcwESPJo1/h84DI6zwqWtUtOXwY+0n7AA9zbNf8xOr/VP+N+qurpJGs33Q+dZwevBQ5q2w/RCcHhbV/a7nxmoEF2JvAufv9D+BfAgZvNeSGwHqCqdu/6uruqnqyqf6+q+cARwBuAk3q43z+4nyShE5X1bWhTDF7Vtr9LJwavwRhoBzEGGlhVNQJ8DTitDa0AXpTk7UmmJ3kbnWcAV411fpLXJfnrJNOAR+hcNurlEs7lwHFJjkryXOAM4HHgB+34d4HXAbtW1Trge8BCOpekbvojHqo0IWOgQXcWsBtAVT1A57f7M4AHgA8Cb6iq+8c598+AK+iE4FY6P8Qvm+gOq+o24B3A54D76bwm8caqeqId/znwKJ0I0F6HWAN8v6qe+qMepTSB+J/bSJJ8ZiBJMgaSJGMgScIYSJJ4Fn/obL/99qs5c+b0exmS9KyxatWq+6tqaKxjz9oYzJkzh+Hh4X4vQ5KeNZJs/gn73/EykSTJGEiSjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJJ4Fn8C+dlgztKr+72Encqdnziu30uQdlo+M5AkGQNJkjGQJGEMJEkYA0kSxkCShDGQJGEMJEkYA0kSxkCShDGQJGEMJEkYA0kSxkCShDGQJNFjDJL8U5LVSX6a5KtJnpdkbpIbkowk+VqSGW3uLm1/pB2f03U7H27jtyU5pmt8YRsbSbJ0uz9KSdIWTRiDJDOB04AFVXUwMA04AfgkcG5VHQQ8CJzSTjkFeLCNn9vmkWR+O+8lwELg80mmJZkGnA8cC8wHTmxzJUmTpNfLRNOBXZNMB54P3AMcCVzRjl8CHN+2F7V92vGjkqSNL6uqx6vqDmAEOLR9jVTVmqp6AljW5kqSJsmEMaiq9cCngbvpROBhYBXwUFVtbNPWATPb9kxgbTt3Y5u/b/f4ZueMN/4MSZYkGU4yPDo62svjkyT1oJfLRHvT+U19LnAAsBudyzyTrqouqKoFVbVgaGioH0uQpJ1SL5eJ/ha4o6pGq+pJ4BvAK4G92mUjgFnA+ra9HpgN0I7vCTzQPb7ZOeONS5ImSS8xuBs4LMnz27X/o4BbgO8Ab25zFgNXtu3lbZ92/NtVVW38hPZuo7nAPOCHwI3AvPbupBl0XmRevu0PTZLUq+kTTaiqG5JcAfwI2AjcBFwAXA0sS/LxNnZhO+VC4LIkI8AGOj/cqarVSS6nE5KNwKlV9RRAkvcCK+m8U+miqlq9/R6iJGkiE8YAoKrOBM7cbHgNnXcCbT73t8Bbxrmds4GzxxhfAazoZS2SpO3PTyBLkoyBJMkYSJIwBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgSaLHGCTZK8kVSX6W5NYkhyfZJ8k1SW5v3/duc5PkvCQjSW5OckjX7Sxu829Psrhr/OVJftLOOS9Jtv9DlSSNp9dnBp8F/qeqXgz8DXArsBS4tqrmAde2fYBjgXntawnwBYAk+wBnAq8ADgXO3BSQNuddXect3LaHJUnaGhPGIMmewKuBCwGq6omqeghYBFzSpl0CHN+2FwGXVsf1wF5JXgAcA1xTVRuq6kHgGmBhO7ZHVV1fVQVc2nVbkqRJ0Mszg7nAKHBxkpuSfDHJbsD+VXVPm3MvsH/bngms7Tp/XRvb0vi6McafIcmSJMNJhkdHR3tYuiSpF73EYDpwCPCFqnoZ8Gt+f0kIgPYbfW3/5f2hqrqgqhZU1YKhoaEdfXeSNDB6icE6YF1V3dD2r6ATh1+2Szy07/e14+uB2V3nz2pjWxqfNca4JGmSTBiDqroXWJvkL9vQUcAtwHJg0zuCFgNXtu3lwEntXUWHAQ+3y0krgaOT7N1eOD4aWNmOPZLksPYuopO6bkuSNAmm9zjvfcBXkswA1gAn0wnJ5UlOAe4C3trmrgBeD4wAj7W5VNWGJB8DbmzzzqqqDW37PcCXgF2Bb7UvSdIk6SkGVfVjYMEYh44aY24Bp45zOxcBF40xPgwc3MtaJEnbn59AliQZA0mSMZAkYQwkSRgDSRLGQJKEMZAkYQwkSRgDSRLGQJKEMZAkYQwkSRgDSRLGQJKEMZAkYQwkSRgDSRLGQJKEMZAkYQwkSRgDSRLGQJKEMZAkYQwkSRgDSRLGQJKEMZAkYQwkSRgDSRLGQJKEMZAkYQwkSRgDSRLGQJKEMZAkYQwkSRgDSRLGQJLEVsQgybQkNyW5qu3PTXJDkpEkX0syo43v0vZH2vE5Xbfx4TZ+W5JjusYXtrGRJEu34+OTJPVga54ZnA7c2rX/SeDcqjoIeBA4pY2fAjzYxs9t80gyHzgBeAmwEPh8C8w04HzgWGA+cGKbK0maJD3FIMks4Djgi20/wJHAFW3KJcDxbXtR26cdP6rNXwQsq6rHq+oOYAQ4tH2NVNWaqnoCWNbmSpImSa/PDP4D+CDwdNvfF3ioqja2/XXAzLY9E1gL0I4/3Ob/bnyzc8Ybf4YkS5IMJxkeHR3tcemSpIlMGIMkbwDuq6pVk7CeLaqqC6pqQVUtGBoa6vdyJGmnMb2HOa8E3pTk9cDzgD2AzwJ7JZnefvufBaxv89cDs4F1SaYDewIPdI1v0n3OeOOSpEkw4TODqvpwVc2qqjl0XgD+dlX9A/Ad4M1t2mLgyra9vO3Tjn+7qqqNn9DebTQXmAf8ELgRmNfenTSj3cfy7fLoJEk96eWZwXg+BCxL8nHgJuDCNn4hcFmSEWADnR/uVNXqJJcDtwAbgVOr6imAJO8FVgLTgIuqavU2rEuStJW2KgZVdR1wXdteQ+edQJvP+S3wlnHOPxs4e4zxFcCKrVmLJGn78RPIkiRjIEkyBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJIwBpIkjIEkCWMgScIYSJIwBpIkYHq/FyCpP+YsvbrfS9ip3PmJ4/q9hG3iMwNJkjGQJPUQgySzk3wnyS1JVic5vY3vk+SaJLe373u38SQ5L8lIkpuTHNJ1W4vb/NuTLO4af3mSn7RzzkuSHfFgJUlj6+WZwUbgjKqaDxwGnJpkPrAUuLaq5gHXtn2AY4F57WsJ8AXoxAM4E3gFcChw5qaAtDnv6jpv4bY/NElSryaMQVXdU1U/atu/Am4FZgKLgEvatEuA49v2IuDS6rge2CvJC4BjgGuqakNVPQhcAyxsx/aoquurqoBLu25LkjQJtuo1gyRzgJcBNwD7V9U97dC9wP5teyawtuu0dW1sS+Prxhgf6/6XJBlOMjw6Oro1S5ckbUHPMUiyO/B14P1V9Uj3sfYbfW3ntT1DVV1QVQuqasHQ0NCOvjtJGhg9xSDJc+mE4CtV9Y02/Mt2iYf2/b42vh6Y3XX6rDa2pfFZY4xLkiZJL+8mCnAhcGtVndN1aDmw6R1Bi4Eru8ZPau8qOgx4uF1OWgkcnWTv9sLx0cDKduyRJIe1+zqp67YkSZOgl08gvxL4R+AnSX7cxv4F+ARweZJTgLuAt7ZjK4DXAyPAY8DJAFW1IcnHgBvbvLOqakPbfg/wJWBX4FvtS5I0SSaMQVX9HzDe+/6PGmN+AaeOc1sXAReNMT4MHDzRWiRJO4afQJYkGQNJkjGQJGEMJEkYA0kSxkCShDGQJGEMJEkYA0kSxkCShDGQJGEMJEkYA0kSxkCShDGQJGEMJEkYA0kSxkCShDGQJGEMJEkYA0kSxkCShDGQJGEMJEkYA0kSxkCShDGQJGEMJEkYA0kSxkCShDGQJGEMJEkYA0kSxkCShDGQJGEMJEkYA0kSxkCSxBSKQZKFSW5LMpJkab/XI0mDZErEIMk04HzgWGA+cGKS+f1dlSQNjikRA+BQYKSq1lTVE8AyYFGf1yRJA2N6vxfQzATWdu2vA16x+aQkS4AlbffRJLdNwtoGwX7A/f1exETyyX6vQH3in8/t58DxDkyVGPSkqi4ALuj3OnY2SYarakG/1yGNxT+fk2OqXCZaD8zu2p/VxiRJk2CqxOBGYF6SuUlmACcAy/u8JkkaGFPiMlFVbUzyXmAlMA24qKpW93lZg8RLb5rK/PM5CVJV/V6DJKnPpsplIklSHxkDSZIxkCQZA0lTTJK/SLJL235tktOS7NXnZe30jMGASjIryX8nGU1yX5KvJ5nV73VJwNeBp5IcROedRLOB/+zvknZ+xmBwXUznsxwvAA4AvtnGpH57uqo2An8HfK6q/pnOn1PtQMZgcA1V1cVVtbF9fQkY6veiJODJJCcCi4Gr2thz+7iegWAMBtcDSd6RZFr7egfwQL8XJQEnA4cDZ1fVHUnmApf1eU07PT90NqCSHAh8js5fugJ+AJxWVXf3dWESkGRX4IVV5b9MPEmMgaQpJckbgU8DM6pqbpKXAmdV1Zv6u7KdmzEYMEk+uoXDVVUfm7TFSGNIsgo4Eriuql7Wxn5aVQf3d2U7tynxD9VpUv16jLHdgFOAfQFjoH57sqoeTtI99nS/FjMojMGAqarPbNpO8ifA6XResFsGfGa886QdLckK4FRgdZK3A9OSzANOo/OalnYg3000gJLsk+TjwM10fiE4pKo+VFX39XlpGmwX0/ln7O8EDgYep/Nhs4fp/NKiHcjXDAZMkk8Bf0/nk53nV9WjfV6S9DtJdgf+DVhI5+2km35AVVWd07eFDQAvEw2eM+j8xvWvwEe6rsuGzl+4Pfq1MAl4gs7rWrsAu/P7GGgHMwYDpqq8NKgpKclC4Bw6/0zKIVX1WJ+XNFC8TCRpSkjyPeDd/pe3/WEMJEm+m0iSZAwkSRgDSRLGQJKEMZAkAf8PP9ePQZsYa28AAAAASUVORK5CYII=", "text/plain": [ "
" ] @@ -799,7 +734,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -813,7 +748,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -849,7 +784,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.10" + "version": "3.7.11" }, "orig_nbformat": 4 }, diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..c60c430 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,23 @@ +FROM python:3.7 + +WORKDIR / + +RUN pip install kaggle + +RUN pip install pandas + +RUN pip install sklearn + +COPY KaggleV2-May-2016.csv ./ + +COPY create_data.py ./ + +COPY stats_data.py ./ + +# CMD ["python", "./create_data.py"] + +# CMD ["python", "./stats_data.py"] + +# RUN kaggle datasets download -d joniarroba/noshowappointments + +# RUN unzip -o noshowappointments.zip \ No newline at end of file diff --git a/create_data.ipynb b/create_data.ipynb deleted file mode 100644 index 770f321..0000000 --- a/create_data.ipynb +++ /dev/null @@ -1,94 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!pip install kaggle\n", - "!pip install pandas\n", - "!pip install seaborn\n", - "!pip install torch" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# 1 Pobranie zbioru\n", - "!kaggle datasets download -d joniarroba/noshowappointments" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!unzip -o noshowappointments.zip" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "no_shows=pd.read_csv('KaggleV2-May-2016.csv')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Wyczyszczenie zbioru\n", - "# Usunięcie negatywnego wieku\n", - "no_shows = no_shows.drop(no_shows[no_shows[\"Age\"] < 0].index)\n", - "\n", - "# Usunięcie niewiadomego wieku (zależy od zastosowania)\n", - "# no_shows = no_shows.drop(no_shows[no_shows[\"Age\"] == 0].index)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Normalizacja danych\n", - "\n", - "# Usunięcie kolumn PatientId oraz AppointmentID\n", - "no_shows.drop([\"PatientId\", \"AppointmentID\"], inplace=True, axis=1)\n", - "\n", - "# Zmiena wartości kolumny No-show z Yes/No na wartość boolowską\n", - "no_shows[\"No-show\"] = no_shows[\"No-show\"].map({'Yes': 1, 'No': 0})\n", - "\n", - "# Normalizacja kolumny Age\n", - "no_shows[\"Age\"]=(no_shows[\"Age\"]-no_shows[\"Age\"].min())/(no_shows[\"Age\"].max()-no_shows[\"Age\"].min())" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Zapisanie wyników jako artefakt" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/create_data.py b/create_data.py new file mode 100644 index 0000000..5155a45 --- /dev/null +++ b/create_data.py @@ -0,0 +1,23 @@ +import pandas as pd +from sklearn.model_selection import train_test_split + +# Data preproccesing + +no_shows=pd.read_csv('KaggleV2-May-2016.csv') + +# Usunięcie negatywnego wieku +no_shows = no_shows.drop(no_shows[no_shows["Age"] < 0].index) + +# Usunięcie kolumn PatientId oraz AppointmentID +no_shows.drop(["PatientId", "AppointmentID"], inplace=True, axis=1) + +# Zmiena wartości kolumny No-show z Yes/No na wartość boolowską +no_shows["No-show"] = no_shows["No-show"].map({'Yes': 1, 'No': 0}) + +# Normalizacja kolumny Age +no_shows["Age"]=(no_shows["Age"]-no_shows["Age"].min())/(no_shows["Age"].max()-no_shows["Age"].min()) + +X = no_shows.drop(columns=['No-show']) +y = no_shows['No-show'] + +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) \ No newline at end of file diff --git a/preparation.sh b/preparation.sh index dfe1747..43900fd 100644 --- a/preparation.sh +++ b/preparation.sh @@ -1 +1 @@ -echo "Preparation inner" \ No newline at end of file +python create_data.py \ No newline at end of file diff --git a/statistics.sh b/statistics.sh index 65062f9..8abe7c2 100644 --- a/statistics.sh +++ b/statistics.sh @@ -1 +1 @@ -wc -l KaggleV2-May-2016.csv >> statistics.csv \ No newline at end of file +python stats_data.py \ No newline at end of file diff --git a/stats_data.py b/stats_data.py new file mode 100644 index 0000000..abd769d --- /dev/null +++ b/stats_data.py @@ -0,0 +1,11 @@ +import pandas as pd + +# Data description + +no_shows=pd.read_csv('KaggleV2-May-2016.csv') + +# Wielkość zbioru +print(f"Wielkosc zbioru: {len(no_shows)}") + +# Opis parametrów +print(no_shows.describe(include='all')) \ No newline at end of file