{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!pip install kaggle\n", "!pip install pandas\n", "!pip install seaborn\n", "!pip install torch" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# 1 Pobranie zbioru\n", "!kaggle datasets download -d joniarroba/noshowappointments" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!unzip -o noshowappointments.zip" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "no_shows=pd.read_csv('KaggleV2-May-2016.csv')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Wyczyszczenie zbioru\n", "# Usunięcie negatywnego wieku\n", "no_shows = no_shows.drop(no_shows[no_shows[\"Age\"] < 0].index)\n", "\n", "# Usunięcie niewiadomego wieku (zależy od zastosowania)\n", "# no_shows = no_shows.drop(no_shows[no_shows[\"Age\"] == 0].index)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Normalizacja danych\n", "\n", "# Usunięcie kolumn PatientId oraz AppointmentID\n", "no_shows.drop([\"PatientId\", \"AppointmentID\"], inplace=True, axis=1)\n", "\n", "# Zmiena wartości kolumny No-show z Yes/No na wartość boolowską\n", "no_shows[\"No-show\"] = no_shows[\"No-show\"].map({'Yes': 1, 'No': 0})\n", "\n", "# Normalizacja kolumny Age\n", "no_shows[\"Age\"]=(no_shows[\"Age\"]-no_shows[\"Age\"].min())/(no_shows[\"Age\"].max()-no_shows[\"Age\"].min())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Zapisanie wyników jako artefakt" ] } ], "metadata": { "language_info": { "name": "python" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }