From a80462968a15b224b369540011567b0a671a2ed6 Mon Sep 17 00:00:00 2001
From: Halal37 <59863785+Halal37@users.noreply.github.com>
Date: Tue, 21 Mar 2023 11:45:41 +0100
Subject: [PATCH] Lab2
---
ium_lab2.ipynb | 2256 ++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 2256 insertions(+)
create mode 100644 ium_lab2.ipynb
diff --git a/ium_lab2.ipynb b/ium_lab2.ipynb
new file mode 100644
index 0000000..dae3d3d
--- /dev/null
+++ b/ium_lab2.ipynb
@@ -0,0 +1,2256 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 100,
+ "id": "78e785f1",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Requirement already satisfied: kaggle in ./jupyter_env/lib/python3.10/site-packages (1.5.13)\n",
+ "Requirement already satisfied: requests in ./jupyter_env/lib/python3.10/site-packages (from kaggle) (2.28.2)\n",
+ "Requirement already satisfied: six>=1.10 in ./jupyter_env/lib/python3.10/site-packages (from kaggle) (1.16.0)\n",
+ "Requirement already satisfied: tqdm in ./jupyter_env/lib/python3.10/site-packages (from kaggle) (4.65.0)\n",
+ "Requirement already satisfied: urllib3 in ./jupyter_env/lib/python3.10/site-packages (from kaggle) (1.26.15)\n",
+ "Requirement already satisfied: certifi in ./jupyter_env/lib/python3.10/site-packages (from kaggle) (2022.12.7)\n",
+ "Requirement already satisfied: python-slugify in ./jupyter_env/lib/python3.10/site-packages (from kaggle) (8.0.1)\n",
+ "Requirement already satisfied: python-dateutil in ./jupyter_env/lib/python3.10/site-packages (from kaggle) (2.8.2)\n",
+ "Requirement already satisfied: text-unidecode>=1.3 in ./jupyter_env/lib/python3.10/site-packages (from python-slugify->kaggle) (1.3)\n",
+ "Requirement already satisfied: idna<4,>=2.5 in ./jupyter_env/lib/python3.10/site-packages (from requests->kaggle) (3.4)\n",
+ "Requirement already satisfied: charset-normalizer<4,>=2 in ./jupyter_env/lib/python3.10/site-packages (from requests->kaggle) (3.1.0)\n",
+ "Requirement already satisfied: pandas in ./jupyter_env/lib/python3.10/site-packages (1.5.3)\n",
+ "Requirement already satisfied: numpy>=1.21.0 in ./jupyter_env/lib/python3.10/site-packages (from pandas) (1.24.2)\n",
+ "Requirement already satisfied: python-dateutil>=2.8.1 in ./jupyter_env/lib/python3.10/site-packages (from pandas) (2.8.2)\n",
+ "Requirement already satisfied: pytz>=2020.1 in ./jupyter_env/lib/python3.10/site-packages (from pandas) (2022.7.1)\n",
+ "Requirement already satisfied: six>=1.5 in ./jupyter_env/lib/python3.10/site-packages (from python-dateutil>=2.8.1->pandas) (1.16.0)\n",
+ "Requirement already satisfied: unzip in ./jupyter_env/lib/python3.10/site-packages (1.0.0)\n",
+ "Requirement already satisfied: scikit-learn in ./jupyter_env/lib/python3.10/site-packages (1.2.2)\n",
+ "Requirement already satisfied: threadpoolctl>=2.0.0 in ./jupyter_env/lib/python3.10/site-packages (from scikit-learn) (3.1.0)\n",
+ "Requirement already satisfied: numpy>=1.17.3 in ./jupyter_env/lib/python3.10/site-packages (from scikit-learn) (1.24.2)\n",
+ "Requirement already satisfied: joblib>=1.1.1 in ./jupyter_env/lib/python3.10/site-packages (from scikit-learn) (1.2.0)\n",
+ "Requirement already satisfied: scipy>=1.3.2 in ./jupyter_env/lib/python3.10/site-packages (from scikit-learn) (1.10.1)\n",
+ "Requirement already satisfied: seaborn in ./jupyter_env/lib/python3.10/site-packages (0.12.2)\n",
+ "Requirement already satisfied: numpy!=1.24.0,>=1.17 in ./jupyter_env/lib/python3.10/site-packages (from seaborn) (1.24.2)\n",
+ "Requirement already satisfied: pandas>=0.25 in ./jupyter_env/lib/python3.10/site-packages (from seaborn) (1.5.3)\n",
+ "Requirement already satisfied: matplotlib!=3.6.1,>=3.1 in ./jupyter_env/lib/python3.10/site-packages (from seaborn) (3.7.1)\n",
+ "Requirement already satisfied: pillow>=6.2.0 in ./jupyter_env/lib/python3.10/site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (9.4.0)\n",
+ "Requirement already satisfied: fonttools>=4.22.0 in ./jupyter_env/lib/python3.10/site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (4.39.2)\n",
+ "Requirement already satisfied: pyparsing>=2.3.1 in ./jupyter_env/lib/python3.10/site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (3.0.9)\n",
+ "Requirement already satisfied: contourpy>=1.0.1 in ./jupyter_env/lib/python3.10/site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (1.0.7)\n",
+ "Requirement already satisfied: cycler>=0.10 in ./jupyter_env/lib/python3.10/site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (0.11.0)\n",
+ "Requirement already satisfied: kiwisolver>=1.0.1 in ./jupyter_env/lib/python3.10/site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (1.4.4)\n",
+ "Requirement already satisfied: packaging>=20.0 in ./jupyter_env/lib/python3.10/site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (23.0)\n",
+ "Requirement already satisfied: python-dateutil>=2.7 in ./jupyter_env/lib/python3.10/site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (2.8.2)\n",
+ "Requirement already satisfied: pytz>=2020.1 in ./jupyter_env/lib/python3.10/site-packages (from pandas>=0.25->seaborn) (2022.7.1)\n",
+ "Requirement already satisfied: six>=1.5 in ./jupyter_env/lib/python3.10/site-packages (from python-dateutil>=2.7->matplotlib!=3.6.1,>=3.1->seaborn) (1.16.0)\n"
+ ]
+ }
+ ],
+ "source": [
+ "#instalacja pakietow\n",
+ "!pip install kaggle\n",
+ "!pip install pandas\n",
+ "!pip install unzip\n",
+ "!pip install scikit-learn\n",
+ "!pip install seaborn"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 101,
+ "id": "d8fffef2",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Warning: Your Kaggle API key is readable by other users on this system! To fix this, you can run 'chmod 600 /home/user/.kaggle/kaggle.json'\n",
+ "crime-in-baltimore.zip: Skipping, found more recently modified local copy (use --force to force download)\n"
+ ]
+ }
+ ],
+ "source": [
+ "#Pobranie zbioru\n",
+ "!kaggle datasets download -d sohier/crime-in-baltimore"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 102,
+ "id": "febfcbd4",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Archive: crime-in-baltimore.zip\n",
+ " inflating: BPD_Part_1_Victim_Based_Crime_Data.csv \n"
+ ]
+ }
+ ],
+ "source": [
+ "!unzip -o crime-in-baltimore.zip"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 103,
+ "id": "11bc16fe",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "! grep -P \"^$\" -n BPD_Part_1_Victim_Based_Crime_Data.csv"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 104,
+ "id": "cb85e933",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 105,
+ "id": "20e6099e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " CrimeDate \n",
+ " CrimeTime \n",
+ " CrimeCode \n",
+ " Location \n",
+ " Description \n",
+ " Inside/Outside \n",
+ " Weapon \n",
+ " Post \n",
+ " District \n",
+ " Neighborhood \n",
+ " Longitude \n",
+ " Latitude \n",
+ " Location 1 \n",
+ " Premise \n",
+ " Total Incidents \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 09/02/2017 \n",
+ " 23:30:00 \n",
+ " 3JK \n",
+ " 4200 AUDREY AVE \n",
+ " ROBBERY - RESIDENCE \n",
+ " I \n",
+ " KNIFE \n",
+ " 913.0 \n",
+ " SOUTHERN \n",
+ " Brooklyn \n",
+ " -76.60541 \n",
+ " 39.22951 \n",
+ " (39.2295100000, -76.6054100000) \n",
+ " ROW/TOWNHO \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 09/02/2017 \n",
+ " 23:00:00 \n",
+ " 7A \n",
+ " 800 NEWINGTON AVE \n",
+ " AUTO THEFT \n",
+ " O \n",
+ " NaN \n",
+ " 133.0 \n",
+ " CENTRAL \n",
+ " Reservoir Hill \n",
+ " -76.63217 \n",
+ " 39.31360 \n",
+ " (39.3136000000, -76.6321700000) \n",
+ " STREET \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 09/02/2017 \n",
+ " 22:53:00 \n",
+ " 9S \n",
+ " 600 RADNOR AV \n",
+ " SHOOTING \n",
+ " Outside \n",
+ " FIREARM \n",
+ " 524.0 \n",
+ " NORTHERN \n",
+ " Winston-Govans \n",
+ " -76.60697 \n",
+ " 39.34768 \n",
+ " (39.3476800000, -76.6069700000) \n",
+ " Street \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 09/02/2017 \n",
+ " 22:50:00 \n",
+ " 4C \n",
+ " 1800 RAMSAY ST \n",
+ " AGG. ASSAULT \n",
+ " I \n",
+ " OTHER \n",
+ " 934.0 \n",
+ " SOUTHERN \n",
+ " Carrollton Ridge \n",
+ " -76.64526 \n",
+ " 39.28315 \n",
+ " (39.2831500000, -76.6452600000) \n",
+ " ROW/TOWNHO \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " 09/02/2017 \n",
+ " 22:31:00 \n",
+ " 4E \n",
+ " 100 LIGHT ST \n",
+ " COMMON ASSAULT \n",
+ " O \n",
+ " HANDS \n",
+ " 113.0 \n",
+ " CENTRAL \n",
+ " Downtown West \n",
+ " -76.61365 \n",
+ " 39.28756 \n",
+ " (39.2875600000, -76.6136500000) \n",
+ " STREET \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " \n",
+ " \n",
+ " 276524 \n",
+ " 01/01/2012 \n",
+ " 00:00:00 \n",
+ " 6J \n",
+ " 1400 JOH AVE \n",
+ " LARCENY \n",
+ " I \n",
+ " NaN \n",
+ " 832.0 \n",
+ " SOUTHWESTERN \n",
+ " Violetville \n",
+ " -76.67195 \n",
+ " 39.26132 \n",
+ " (39.2613200000, -76.6719500000) \n",
+ " OTHER - IN \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ " 276525 \n",
+ " 01/01/2012 \n",
+ " 00:00:00 \n",
+ " 6J \n",
+ " 5500 SINCLAIR LN \n",
+ " LARCENY \n",
+ " O \n",
+ " NaN \n",
+ " 444.0 \n",
+ " NORTHEASTERN \n",
+ " Frankford \n",
+ " -76.53829 \n",
+ " 39.32493 \n",
+ " (39.3249300000, -76.5382900000) \n",
+ " OTHER - OU \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ " 276526 \n",
+ " 01/01/2012 \n",
+ " 00:00:00 \n",
+ " 6E \n",
+ " 400 N PATTERSON PK AV \n",
+ " LARCENY \n",
+ " O \n",
+ " NaN \n",
+ " 321.0 \n",
+ " EASTERN \n",
+ " CARE \n",
+ " -76.58497 \n",
+ " 39.29573 \n",
+ " (39.2957300000, -76.5849700000) \n",
+ " STREET \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ " 276527 \n",
+ " 01/01/2012 \n",
+ " 00:00:00 \n",
+ " 5A \n",
+ " 5800 LILLYAN AV \n",
+ " BURGLARY \n",
+ " I \n",
+ " NaN \n",
+ " 425.0 \n",
+ " NORTHEASTERN \n",
+ " Glenham-Belhar \n",
+ " -76.54578 \n",
+ " 39.34701 \n",
+ " (39.3470100000, -76.5457800000) \n",
+ " APT. LOCKE \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ " 276528 \n",
+ " 01/01/2012 \n",
+ " 00:00:00 \n",
+ " 5A \n",
+ " 1900 GRINNALDS AV \n",
+ " BURGLARY \n",
+ " I \n",
+ " NaN \n",
+ " 831.0 \n",
+ " SOUTHWESTERN \n",
+ " Morrell Park \n",
+ " -76.65094 \n",
+ " 39.26698 \n",
+ " (39.2669800000, -76.6509400000) \n",
+ " ROW/TOWNHO \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
276529 rows × 15 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " CrimeDate CrimeTime CrimeCode Location \\\n",
+ "0 09/02/2017 23:30:00 3JK 4200 AUDREY AVE \n",
+ "1 09/02/2017 23:00:00 7A 800 NEWINGTON AVE \n",
+ "2 09/02/2017 22:53:00 9S 600 RADNOR AV \n",
+ "3 09/02/2017 22:50:00 4C 1800 RAMSAY ST \n",
+ "4 09/02/2017 22:31:00 4E 100 LIGHT ST \n",
+ "... ... ... ... ... \n",
+ "276524 01/01/2012 00:00:00 6J 1400 JOH AVE \n",
+ "276525 01/01/2012 00:00:00 6J 5500 SINCLAIR LN \n",
+ "276526 01/01/2012 00:00:00 6E 400 N PATTERSON PK AV \n",
+ "276527 01/01/2012 00:00:00 5A 5800 LILLYAN AV \n",
+ "276528 01/01/2012 00:00:00 5A 1900 GRINNALDS AV \n",
+ "\n",
+ " Description Inside/Outside Weapon Post District \\\n",
+ "0 ROBBERY - RESIDENCE I KNIFE 913.0 SOUTHERN \n",
+ "1 AUTO THEFT O NaN 133.0 CENTRAL \n",
+ "2 SHOOTING Outside FIREARM 524.0 NORTHERN \n",
+ "3 AGG. ASSAULT I OTHER 934.0 SOUTHERN \n",
+ "4 COMMON ASSAULT O HANDS 113.0 CENTRAL \n",
+ "... ... ... ... ... ... \n",
+ "276524 LARCENY I NaN 832.0 SOUTHWESTERN \n",
+ "276525 LARCENY O NaN 444.0 NORTHEASTERN \n",
+ "276526 LARCENY O NaN 321.0 EASTERN \n",
+ "276527 BURGLARY I NaN 425.0 NORTHEASTERN \n",
+ "276528 BURGLARY I NaN 831.0 SOUTHWESTERN \n",
+ "\n",
+ " Neighborhood Longitude Latitude \\\n",
+ "0 Brooklyn -76.60541 39.22951 \n",
+ "1 Reservoir Hill -76.63217 39.31360 \n",
+ "2 Winston-Govans -76.60697 39.34768 \n",
+ "3 Carrollton Ridge -76.64526 39.28315 \n",
+ "4 Downtown West -76.61365 39.28756 \n",
+ "... ... ... ... \n",
+ "276524 Violetville -76.67195 39.26132 \n",
+ "276525 Frankford -76.53829 39.32493 \n",
+ "276526 CARE -76.58497 39.29573 \n",
+ "276527 Glenham-Belhar -76.54578 39.34701 \n",
+ "276528 Morrell Park -76.65094 39.26698 \n",
+ "\n",
+ " Location 1 Premise Total Incidents \n",
+ "0 (39.2295100000, -76.6054100000) ROW/TOWNHO 1 \n",
+ "1 (39.3136000000, -76.6321700000) STREET 1 \n",
+ "2 (39.3476800000, -76.6069700000) Street 1 \n",
+ "3 (39.2831500000, -76.6452600000) ROW/TOWNHO 1 \n",
+ "4 (39.2875600000, -76.6136500000) STREET 1 \n",
+ "... ... ... ... \n",
+ "276524 (39.2613200000, -76.6719500000) OTHER - IN 1 \n",
+ "276525 (39.3249300000, -76.5382900000) OTHER - OU 1 \n",
+ "276526 (39.2957300000, -76.5849700000) STREET 1 \n",
+ "276527 (39.3470100000, -76.5457800000) APT. LOCKE 1 \n",
+ "276528 (39.2669800000, -76.6509400000) ROW/TOWNHO 1 \n",
+ "\n",
+ "[276529 rows x 15 columns]"
+ ]
+ },
+ "execution_count": 105,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "baltimore=pd.read_csv('BPD_Part_1_Victim_Based_Crime_Data.csv')\n",
+ "baltimore"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 106,
+ "id": "89b1028c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "CrimeDate 0\n",
+ "CrimeTime 0\n",
+ "CrimeCode 0\n",
+ "Location 2207\n",
+ "Description 0\n",
+ "Inside/Outside 10279\n",
+ "Weapon 180952\n",
+ "Post 224\n",
+ "District 80\n",
+ "Neighborhood 2740\n",
+ "Longitude 2204\n",
+ "Latitude 2204\n",
+ "Location 1 2204\n",
+ "Premise 10757\n",
+ "Total Incidents 0\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 106,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "baltimore.isnull().sum()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 107,
+ "id": "7109d8f7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# W wiekszosci przestepstw nie uzywa sie broni, zastepujemy\n",
+ "# puste pola przez None\n",
+ "baltimore[\"Weapon\"].fillna(\"None\", inplace=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 108,
+ "id": "1c67e681",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "CrimeDate 0\n",
+ "CrimeTime 0\n",
+ "CrimeCode 0\n",
+ "Location 2207\n",
+ "Description 0\n",
+ "Inside/Outside 10279\n",
+ "Weapon 0\n",
+ "Post 224\n",
+ "District 80\n",
+ "Neighborhood 2740\n",
+ "Longitude 2204\n",
+ "Latitude 2204\n",
+ "Location 1 2204\n",
+ "Premise 10757\n",
+ "Total Incidents 0\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 108,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "baltimore.isnull().sum()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 109,
+ "id": "31966b62",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#Wyczyszczenie zbioru z artefaktow\n",
+ "baltimore.dropna(inplace=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 110,
+ "id": "75f39653",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "CrimeDate 0\n",
+ "CrimeTime 0\n",
+ "CrimeCode 0\n",
+ "Location 0\n",
+ "Description 0\n",
+ "Inside/Outside 0\n",
+ "Weapon 0\n",
+ "Post 0\n",
+ "District 0\n",
+ "Neighborhood 0\n",
+ "Longitude 0\n",
+ "Latitude 0\n",
+ "Location 1 0\n",
+ "Premise 0\n",
+ "Total Incidents 0\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 110,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "baltimore.isnull().sum()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 111,
+ "id": "6cd411df",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearn.model_selection import train_test_split"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 112,
+ "id": "8b8b4732",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#Normalizacja\n",
+ "baltimore['Post'] = baltimore['Post'] /baltimore['Post'].abs().max()\n",
+ "baltimore['Location']=baltimore['Location'].str.lower()\n",
+ "baltimore['Description']=baltimore['Description'].str.lower()\n",
+ "baltimore['Weapon']=baltimore['Weapon'].str.lower()\n",
+ "baltimore['Premise']=baltimore['Premise'].str.lower()\n",
+ "baltimore['District']=baltimore['District'].str.lower()\n",
+ "baltimore['CrimeCode']=baltimore['CrimeCode'].str.lower()\n",
+ "baltimore['Neighborhood']=baltimore['Neighborhood'].str.lower()\n",
+ "baltimore['Inside/Outside']=baltimore['Inside/Outside'].str.lower()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 113,
+ "id": "d9adbe06",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 113,
+ "metadata": {},
+ "output_type": "execute_result"
+ },
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "baltimore['District'].value_counts().plot(kind=\"bar\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 114,
+ "id": "24b7582f",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 114,
+ "metadata": {},
+ "output_type": "execute_result"
+ },
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "import seaborn as sns\n",
+ "sns.set_theme()\n",
+ "sns.relplot(data=baltimore[:20], x='Longitude', y='Latitude', hue='Weapon')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 115,
+ "id": "c9cf1067",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#Podzial na zbiory\n",
+ "baltimore_train, baltimore_test = train_test_split(baltimore, test_size=0.1, random_state=1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 116,
+ "id": "350e7098",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " CrimeDate \n",
+ " CrimeTime \n",
+ " CrimeCode \n",
+ " Location \n",
+ " Description \n",
+ " Inside/Outside \n",
+ " Weapon \n",
+ " Post \n",
+ " District \n",
+ " Neighborhood \n",
+ " Longitude \n",
+ " Latitude \n",
+ " Location 1 \n",
+ " Premise \n",
+ " Total Incidents \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 20700 \n",
+ " 04/10/2017 \n",
+ " 22:26:00 \n",
+ " 4e \n",
+ " 4900 eastern av \n",
+ " common assault \n",
+ " o \n",
+ " hands \n",
+ " 0.256628 \n",
+ " southeastern \n",
+ " greektown \n",
+ " -76.55422 \n",
+ " 39.28706 \n",
+ " (39.2870600000, -76.5542200000) \n",
+ " alley \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ " 63746 \n",
+ " 06/05/2016 \n",
+ " 20:44:00 \n",
+ " 4e \n",
+ " 3000 s hanover st \n",
+ " common assault \n",
+ " o \n",
+ " hands \n",
+ " 0.977731 \n",
+ " southern \n",
+ " middle branch/reedbird pa \n",
+ " -76.61504 \n",
+ " 39.25134 \n",
+ " (39.2513400000, -76.6150400000) \n",
+ " street \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ " 169854 \n",
+ " 03/10/2014 \n",
+ " 20:00:00 \n",
+ " 4e \n",
+ " 4100 parkside dr \n",
+ " common assault \n",
+ " o \n",
+ " hands \n",
+ " 0.447508 \n",
+ " northeastern \n",
+ " belair-parkside \n",
+ " -76.56605 \n",
+ " 39.32783 \n",
+ " (39.3278300000, -76.5660500000) \n",
+ " street \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ " 42473 \n",
+ " 10/31/2016 \n",
+ " 09:30:00 \n",
+ " 4e \n",
+ " 5600 loch raven blvd \n",
+ " common assault \n",
+ " i \n",
+ " hands \n",
+ " 0.440085 \n",
+ " northeastern \n",
+ " loch raven \n",
+ " -76.58856 \n",
+ " 39.35952 \n",
+ " (39.3595200000, -76.5885600000) \n",
+ " hotel/mote \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ " 86103 \n",
+ " 12/05/2015 \n",
+ " 08:15:00 \n",
+ " 4e \n",
+ " 1100 guilford ave \n",
+ " common assault \n",
+ " i \n",
+ " hands \n",
+ " 0.149523 \n",
+ " central \n",
+ " mid-town belvedere \n",
+ " -76.61194 \n",
+ " 39.30319 \n",
+ " (39.3031900000, -76.6119400000) \n",
+ " apt/condo \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " \n",
+ " \n",
+ " 182763 \n",
+ " 11/20/2013 \n",
+ " 20:00:00 \n",
+ " 6d \n",
+ " 3800 dolfield av \n",
+ " larceny from auto \n",
+ " o \n",
+ " none \n",
+ " 0.681866 \n",
+ " northwestern \n",
+ " dolfield \n",
+ " -76.68090 \n",
+ " 39.33938 \n",
+ " (39.3393800000, -76.6809000000) \n",
+ " street \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ " 14972 \n",
+ " 05/22/2017 \n",
+ " 03:30:00 \n",
+ " 4c \n",
+ " 3000 w garrison ave \n",
+ " agg. assault \n",
+ " i \n",
+ " other \n",
+ " 0.651113 \n",
+ " northwestern \n",
+ " central park heights \n",
+ " -76.67146 \n",
+ " 39.34863 \n",
+ " (39.3486300000, -76.6714600000) \n",
+ " row/townho \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ " 44956 \n",
+ " 10/15/2016 \n",
+ " 23:30:00 \n",
+ " 7a \n",
+ " 500 jack st \n",
+ " auto theft \n",
+ " o \n",
+ " none \n",
+ " 0.968187 \n",
+ " southern \n",
+ " brooklyn \n",
+ " -76.60582 \n",
+ " 39.23265 \n",
+ " (39.2326500000, -76.6058200000) \n",
+ " street \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ " 36873 \n",
+ " 12/08/2016 \n",
+ " 18:30:00 \n",
+ " 4e \n",
+ " 3800 cedarhurst rd \n",
+ " common assault \n",
+ " o \n",
+ " hands \n",
+ " 0.451750 \n",
+ " northeastern \n",
+ " waltherson \n",
+ " -76.56315 \n",
+ " 39.33720 \n",
+ " (39.3372000000, -76.5631500000) \n",
+ " street \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ " 230084 \n",
+ " 12/06/2012 \n",
+ " 14:00:00 \n",
+ " 4e \n",
+ " 800 s highland av \n",
+ " common assault \n",
+ " i \n",
+ " hands \n",
+ " 0.246023 \n",
+ " southeastern \n",
+ " canton \n",
+ " -76.56878 \n",
+ " 39.28342 \n",
+ " (39.2834200000, -76.5687800000) \n",
+ " school \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
26312 rows × 15 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " CrimeDate CrimeTime CrimeCode Location \\\n",
+ "20700 04/10/2017 22:26:00 4e 4900 eastern av \n",
+ "63746 06/05/2016 20:44:00 4e 3000 s hanover st \n",
+ "169854 03/10/2014 20:00:00 4e 4100 parkside dr \n",
+ "42473 10/31/2016 09:30:00 4e 5600 loch raven blvd \n",
+ "86103 12/05/2015 08:15:00 4e 1100 guilford ave \n",
+ "... ... ... ... ... \n",
+ "182763 11/20/2013 20:00:00 6d 3800 dolfield av \n",
+ "14972 05/22/2017 03:30:00 4c 3000 w garrison ave \n",
+ "44956 10/15/2016 23:30:00 7a 500 jack st \n",
+ "36873 12/08/2016 18:30:00 4e 3800 cedarhurst rd \n",
+ "230084 12/06/2012 14:00:00 4e 800 s highland av \n",
+ "\n",
+ " Description Inside/Outside Weapon Post District \\\n",
+ "20700 common assault o hands 0.256628 southeastern \n",
+ "63746 common assault o hands 0.977731 southern \n",
+ "169854 common assault o hands 0.447508 northeastern \n",
+ "42473 common assault i hands 0.440085 northeastern \n",
+ "86103 common assault i hands 0.149523 central \n",
+ "... ... ... ... ... ... \n",
+ "182763 larceny from auto o none 0.681866 northwestern \n",
+ "14972 agg. assault i other 0.651113 northwestern \n",
+ "44956 auto theft o none 0.968187 southern \n",
+ "36873 common assault o hands 0.451750 northeastern \n",
+ "230084 common assault i hands 0.246023 southeastern \n",
+ "\n",
+ " Neighborhood Longitude Latitude \\\n",
+ "20700 greektown -76.55422 39.28706 \n",
+ "63746 middle branch/reedbird pa -76.61504 39.25134 \n",
+ "169854 belair-parkside -76.56605 39.32783 \n",
+ "42473 loch raven -76.58856 39.35952 \n",
+ "86103 mid-town belvedere -76.61194 39.30319 \n",
+ "... ... ... ... \n",
+ "182763 dolfield -76.68090 39.33938 \n",
+ "14972 central park heights -76.67146 39.34863 \n",
+ "44956 brooklyn -76.60582 39.23265 \n",
+ "36873 waltherson -76.56315 39.33720 \n",
+ "230084 canton -76.56878 39.28342 \n",
+ "\n",
+ " Location 1 Premise Total Incidents \n",
+ "20700 (39.2870600000, -76.5542200000) alley 1 \n",
+ "63746 (39.2513400000, -76.6150400000) street 1 \n",
+ "169854 (39.3278300000, -76.5660500000) street 1 \n",
+ "42473 (39.3595200000, -76.5885600000) hotel/mote 1 \n",
+ "86103 (39.3031900000, -76.6119400000) apt/condo 1 \n",
+ "... ... ... ... \n",
+ "182763 (39.3393800000, -76.6809000000) street 1 \n",
+ "14972 (39.3486300000, -76.6714600000) row/townho 1 \n",
+ "44956 (39.2326500000, -76.6058200000) street 1 \n",
+ "36873 (39.3372000000, -76.5631500000) street 1 \n",
+ "230084 (39.2834200000, -76.5687800000) school 1 \n",
+ "\n",
+ "[26312 rows x 15 columns]"
+ ]
+ },
+ "execution_count": 116,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "baltimore_test"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 117,
+ "id": "ed66b750",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "baltimore_train, baltimore_val= train_test_split(baltimore_train, test_size=0.25, random_state=1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 118,
+ "id": "3840c547",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " CrimeDate \n",
+ " CrimeTime \n",
+ " CrimeCode \n",
+ " Location \n",
+ " Description \n",
+ " Inside/Outside \n",
+ " Weapon \n",
+ " Post \n",
+ " District \n",
+ " Neighborhood \n",
+ " Longitude \n",
+ " Latitude \n",
+ " Location 1 \n",
+ " Premise \n",
+ " Total Incidents \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " count \n",
+ " 263118 \n",
+ " 263118 \n",
+ " 263118 \n",
+ " 263118 \n",
+ " 263118 \n",
+ " 263118 \n",
+ " 263118 \n",
+ " 263118.000000 \n",
+ " 263118 \n",
+ " 263118 \n",
+ " 263118.000000 \n",
+ " 263118.000000 \n",
+ " 263118 \n",
+ " 263118 \n",
+ " 263118.0 \n",
+ " \n",
+ " \n",
+ " unique \n",
+ " 2072 \n",
+ " 2935 \n",
+ " 80 \n",
+ " 25276 \n",
+ " 15 \n",
+ " 4 \n",
+ " 5 \n",
+ " NaN \n",
+ " 9 \n",
+ " 278 \n",
+ " NaN \n",
+ " NaN \n",
+ " 93543 \n",
+ " 118 \n",
+ " NaN \n",
+ " \n",
+ " \n",
+ " top \n",
+ " 04/27/2015 \n",
+ " 18:00:00 \n",
+ " 4e \n",
+ " 200 e pratt st \n",
+ " larceny \n",
+ " i \n",
+ " none \n",
+ " NaN \n",
+ " northeastern \n",
+ " downtown \n",
+ " NaN \n",
+ " NaN \n",
+ " (39.3180000000, -76.6582100000) \n",
+ " street \n",
+ " NaN \n",
+ " \n",
+ " \n",
+ " freq \n",
+ " 407 \n",
+ " 6483 \n",
+ " 43093 \n",
+ " 632 \n",
+ " 58246 \n",
+ " 131015 \n",
+ " 173175 \n",
+ " NaN \n",
+ " 40842 \n",
+ " 8701 \n",
+ " NaN \n",
+ " NaN \n",
+ " 503 \n",
+ " 102544 \n",
+ " NaN \n",
+ " \n",
+ " \n",
+ " mean \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " 0.536416 \n",
+ " NaN \n",
+ " NaN \n",
+ " -76.617469 \n",
+ " 39.307456 \n",
+ " NaN \n",
+ " NaN \n",
+ " 1.0 \n",
+ " \n",
+ " \n",
+ " std \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " 0.276554 \n",
+ " NaN \n",
+ " NaN \n",
+ " 0.042220 \n",
+ " 0.029537 \n",
+ " NaN \n",
+ " NaN \n",
+ " 0.0 \n",
+ " \n",
+ " \n",
+ " min \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " 0.117709 \n",
+ " NaN \n",
+ " NaN \n",
+ " -76.711280 \n",
+ " 39.200410 \n",
+ " NaN \n",
+ " NaN \n",
+ " 1.0 \n",
+ " \n",
+ " \n",
+ " 25% \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " 0.256628 \n",
+ " NaN \n",
+ " NaN \n",
+ " -76.648420 \n",
+ " 39.288340 \n",
+ " NaN \n",
+ " NaN \n",
+ " 1.0 \n",
+ " \n",
+ " \n",
+ " 50% \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " 0.541888 \n",
+ " NaN \n",
+ " NaN \n",
+ " -76.614010 \n",
+ " 39.303680 \n",
+ " NaN \n",
+ " NaN \n",
+ " 1.0 \n",
+ " \n",
+ " \n",
+ " 75% \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " 0.775186 \n",
+ " NaN \n",
+ " NaN \n",
+ " -76.587490 \n",
+ " 39.327890 \n",
+ " NaN \n",
+ " NaN \n",
+ " 1.0 \n",
+ " \n",
+ " \n",
+ " max \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " 1.000000 \n",
+ " NaN \n",
+ " NaN \n",
+ " -76.529770 \n",
+ " 39.371980 \n",
+ " NaN \n",
+ " NaN \n",
+ " 1.0 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " CrimeDate CrimeTime CrimeCode Location Description \\\n",
+ "count 263118 263118 263118 263118 263118 \n",
+ "unique 2072 2935 80 25276 15 \n",
+ "top 04/27/2015 18:00:00 4e 200 e pratt st larceny \n",
+ "freq 407 6483 43093 632 58246 \n",
+ "mean NaN NaN NaN NaN NaN \n",
+ "std NaN NaN NaN NaN NaN \n",
+ "min NaN NaN NaN NaN NaN \n",
+ "25% NaN NaN NaN NaN NaN \n",
+ "50% NaN NaN NaN NaN NaN \n",
+ "75% NaN NaN NaN NaN NaN \n",
+ "max NaN NaN NaN NaN NaN \n",
+ "\n",
+ " Inside/Outside Weapon Post District Neighborhood \\\n",
+ "count 263118 263118 263118.000000 263118 263118 \n",
+ "unique 4 5 NaN 9 278 \n",
+ "top i none NaN northeastern downtown \n",
+ "freq 131015 173175 NaN 40842 8701 \n",
+ "mean NaN NaN 0.536416 NaN NaN \n",
+ "std NaN NaN 0.276554 NaN NaN \n",
+ "min NaN NaN 0.117709 NaN NaN \n",
+ "25% NaN NaN 0.256628 NaN NaN \n",
+ "50% NaN NaN 0.541888 NaN NaN \n",
+ "75% NaN NaN 0.775186 NaN NaN \n",
+ "max NaN NaN 1.000000 NaN NaN \n",
+ "\n",
+ " Longitude Latitude Location 1 Premise \\\n",
+ "count 263118.000000 263118.000000 263118 263118 \n",
+ "unique NaN NaN 93543 118 \n",
+ "top NaN NaN (39.3180000000, -76.6582100000) street \n",
+ "freq NaN NaN 503 102544 \n",
+ "mean -76.617469 39.307456 NaN NaN \n",
+ "std 0.042220 0.029537 NaN NaN \n",
+ "min -76.711280 39.200410 NaN NaN \n",
+ "25% -76.648420 39.288340 NaN NaN \n",
+ "50% -76.614010 39.303680 NaN NaN \n",
+ "75% -76.587490 39.327890 NaN NaN \n",
+ "max -76.529770 39.371980 NaN NaN \n",
+ "\n",
+ " Total Incidents \n",
+ "count 263118.0 \n",
+ "unique NaN \n",
+ "top NaN \n",
+ "freq NaN \n",
+ "mean 1.0 \n",
+ "std 0.0 \n",
+ "min 1.0 \n",
+ "25% 1.0 \n",
+ "50% 1.0 \n",
+ "75% 1.0 \n",
+ "max 1.0 "
+ ]
+ },
+ "execution_count": 118,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "baltimore.describe(include='all')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 119,
+ "id": "06e5c943",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " CrimeDate \n",
+ " CrimeTime \n",
+ " CrimeCode \n",
+ " Location \n",
+ " Description \n",
+ " Inside/Outside \n",
+ " Weapon \n",
+ " Post \n",
+ " District \n",
+ " Neighborhood \n",
+ " Longitude \n",
+ " Latitude \n",
+ " Location 1 \n",
+ " Premise \n",
+ " Total Incidents \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " count \n",
+ " 26312 \n",
+ " 26312 \n",
+ " 26312 \n",
+ " 26312 \n",
+ " 26312 \n",
+ " 26312 \n",
+ " 26312 \n",
+ " 26312.000000 \n",
+ " 26312 \n",
+ " 26312 \n",
+ " 26312.000000 \n",
+ " 26312.000000 \n",
+ " 26312 \n",
+ " 26312 \n",
+ " 26312.0 \n",
+ " \n",
+ " \n",
+ " unique \n",
+ " 2071 \n",
+ " 1513 \n",
+ " 71 \n",
+ " 11180 \n",
+ " 15 \n",
+ " 4 \n",
+ " 5 \n",
+ " NaN \n",
+ " 9 \n",
+ " 276 \n",
+ " NaN \n",
+ " NaN \n",
+ " 18843 \n",
+ " 104 \n",
+ " NaN \n",
+ " \n",
+ " \n",
+ " top \n",
+ " 04/27/2015 \n",
+ " 18:00:00 \n",
+ " 4e \n",
+ " 1500 russell st \n",
+ " larceny \n",
+ " i \n",
+ " none \n",
+ " NaN \n",
+ " northeastern \n",
+ " downtown \n",
+ " NaN \n",
+ " NaN \n",
+ " (39.3180000000, -76.6582100000) \n",
+ " street \n",
+ " NaN \n",
+ " \n",
+ " \n",
+ " freq \n",
+ " 28 \n",
+ " 650 \n",
+ " 4357 \n",
+ " 56 \n",
+ " 5740 \n",
+ " 13248 \n",
+ " 17358 \n",
+ " NaN \n",
+ " 4137 \n",
+ " 853 \n",
+ " NaN \n",
+ " NaN \n",
+ " 49 \n",
+ " 10075 \n",
+ " NaN \n",
+ " \n",
+ " \n",
+ " mean \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " 0.535663 \n",
+ " NaN \n",
+ " NaN \n",
+ " -76.617518 \n",
+ " 39.307771 \n",
+ " NaN \n",
+ " NaN \n",
+ " 1.0 \n",
+ " \n",
+ " \n",
+ " std \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " 0.275572 \n",
+ " NaN \n",
+ " NaN \n",
+ " 0.042479 \n",
+ " 0.029477 \n",
+ " NaN \n",
+ " NaN \n",
+ " 0.0 \n",
+ " \n",
+ " \n",
+ " min \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " 0.117709 \n",
+ " NaN \n",
+ " NaN \n",
+ " -76.711220 \n",
+ " 39.200470 \n",
+ " NaN \n",
+ " NaN \n",
+ " 1.0 \n",
+ " \n",
+ " \n",
+ " 25% \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " 0.257688 \n",
+ " NaN \n",
+ " NaN \n",
+ " -76.648905 \n",
+ " 39.288490 \n",
+ " NaN \n",
+ " NaN \n",
+ " 1.0 \n",
+ " \n",
+ " \n",
+ " 50% \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " 0.541888 \n",
+ " NaN \n",
+ " NaN \n",
+ " -76.614170 \n",
+ " 39.303850 \n",
+ " NaN \n",
+ " NaN \n",
+ " 1.0 \n",
+ " \n",
+ " \n",
+ " 75% \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " 0.766702 \n",
+ " NaN \n",
+ " NaN \n",
+ " -76.587170 \n",
+ " 39.328290 \n",
+ " NaN \n",
+ " NaN \n",
+ " 1.0 \n",
+ " \n",
+ " \n",
+ " max \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " 1.000000 \n",
+ " NaN \n",
+ " NaN \n",
+ " -76.529770 \n",
+ " 39.371970 \n",
+ " NaN \n",
+ " NaN \n",
+ " 1.0 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " CrimeDate CrimeTime CrimeCode Location Description \\\n",
+ "count 26312 26312 26312 26312 26312 \n",
+ "unique 2071 1513 71 11180 15 \n",
+ "top 04/27/2015 18:00:00 4e 1500 russell st larceny \n",
+ "freq 28 650 4357 56 5740 \n",
+ "mean NaN NaN NaN NaN NaN \n",
+ "std NaN NaN NaN NaN NaN \n",
+ "min NaN NaN NaN NaN NaN \n",
+ "25% NaN NaN NaN NaN NaN \n",
+ "50% NaN NaN NaN NaN NaN \n",
+ "75% NaN NaN NaN NaN NaN \n",
+ "max NaN NaN NaN NaN NaN \n",
+ "\n",
+ " Inside/Outside Weapon Post District Neighborhood \\\n",
+ "count 26312 26312 26312.000000 26312 26312 \n",
+ "unique 4 5 NaN 9 276 \n",
+ "top i none NaN northeastern downtown \n",
+ "freq 13248 17358 NaN 4137 853 \n",
+ "mean NaN NaN 0.535663 NaN NaN \n",
+ "std NaN NaN 0.275572 NaN NaN \n",
+ "min NaN NaN 0.117709 NaN NaN \n",
+ "25% NaN NaN 0.257688 NaN NaN \n",
+ "50% NaN NaN 0.541888 NaN NaN \n",
+ "75% NaN NaN 0.766702 NaN NaN \n",
+ "max NaN NaN 1.000000 NaN NaN \n",
+ "\n",
+ " Longitude Latitude Location 1 Premise \\\n",
+ "count 26312.000000 26312.000000 26312 26312 \n",
+ "unique NaN NaN 18843 104 \n",
+ "top NaN NaN (39.3180000000, -76.6582100000) street \n",
+ "freq NaN NaN 49 10075 \n",
+ "mean -76.617518 39.307771 NaN NaN \n",
+ "std 0.042479 0.029477 NaN NaN \n",
+ "min -76.711220 39.200470 NaN NaN \n",
+ "25% -76.648905 39.288490 NaN NaN \n",
+ "50% -76.614170 39.303850 NaN NaN \n",
+ "75% -76.587170 39.328290 NaN NaN \n",
+ "max -76.529770 39.371970 NaN NaN \n",
+ "\n",
+ " Total Incidents \n",
+ "count 26312.0 \n",
+ "unique NaN \n",
+ "top NaN \n",
+ "freq NaN \n",
+ "mean 1.0 \n",
+ "std 0.0 \n",
+ "min 1.0 \n",
+ "25% 1.0 \n",
+ "50% 1.0 \n",
+ "75% 1.0 \n",
+ "max 1.0 "
+ ]
+ },
+ "execution_count": 119,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "baltimore_test.describe(include='all')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 120,
+ "id": "1566d1b1",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " CrimeDate \n",
+ " CrimeTime \n",
+ " CrimeCode \n",
+ " Location \n",
+ " Description \n",
+ " Inside/Outside \n",
+ " Weapon \n",
+ " Post \n",
+ " District \n",
+ " Neighborhood \n",
+ " Longitude \n",
+ " Latitude \n",
+ " Location 1 \n",
+ " Premise \n",
+ " Total Incidents \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " count \n",
+ " 177604 \n",
+ " 177604 \n",
+ " 177604 \n",
+ " 177604 \n",
+ " 177604 \n",
+ " 177604 \n",
+ " 177604 \n",
+ " 177604.000000 \n",
+ " 177604 \n",
+ " 177604 \n",
+ " 177604.000000 \n",
+ " 177604.000000 \n",
+ " 177604 \n",
+ " 177604 \n",
+ " 177604.0 \n",
+ " \n",
+ " \n",
+ " unique \n",
+ " 2072 \n",
+ " 2435 \n",
+ " 79 \n",
+ " 22781 \n",
+ " 15 \n",
+ " 4 \n",
+ " 5 \n",
+ " NaN \n",
+ " 9 \n",
+ " 278 \n",
+ " NaN \n",
+ " NaN \n",
+ " 74417 \n",
+ " 116 \n",
+ " NaN \n",
+ " \n",
+ " \n",
+ " top \n",
+ " 04/27/2015 \n",
+ " 18:00:00 \n",
+ " 4e \n",
+ " 200 e pratt st \n",
+ " larceny \n",
+ " i \n",
+ " none \n",
+ " NaN \n",
+ " northeastern \n",
+ " downtown \n",
+ " NaN \n",
+ " NaN \n",
+ " (39.3180000000, -76.6582100000) \n",
+ " street \n",
+ " NaN \n",
+ " \n",
+ " \n",
+ " freq \n",
+ " 298 \n",
+ " 4340 \n",
+ " 29065 \n",
+ " 440 \n",
+ " 39287 \n",
+ " 88319 \n",
+ " 116884 \n",
+ " NaN \n",
+ " 27451 \n",
+ " 5877 \n",
+ " NaN \n",
+ " NaN \n",
+ " 337 \n",
+ " 69325 \n",
+ " NaN \n",
+ " \n",
+ " \n",
+ " mean \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " 0.536132 \n",
+ " NaN \n",
+ " NaN \n",
+ " -76.617452 \n",
+ " 39.307395 \n",
+ " NaN \n",
+ " NaN \n",
+ " 1.0 \n",
+ " \n",
+ " \n",
+ " std \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " 0.276695 \n",
+ " NaN \n",
+ " NaN \n",
+ " 0.042192 \n",
+ " 0.029526 \n",
+ " NaN \n",
+ " NaN \n",
+ " 0.0 \n",
+ " \n",
+ " \n",
+ " min \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " 0.117709 \n",
+ " NaN \n",
+ " NaN \n",
+ " -76.711280 \n",
+ " 39.200410 \n",
+ " NaN \n",
+ " NaN \n",
+ " 1.0 \n",
+ " \n",
+ " \n",
+ " 25% \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " 0.256628 \n",
+ " NaN \n",
+ " NaN \n",
+ " -76.648290 \n",
+ " 39.288330 \n",
+ " NaN \n",
+ " NaN \n",
+ " 1.0 \n",
+ " \n",
+ " \n",
+ " 50% \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " 0.541888 \n",
+ " NaN \n",
+ " NaN \n",
+ " -76.613990 \n",
+ " 39.303580 \n",
+ " NaN \n",
+ " NaN \n",
+ " 1.0 \n",
+ " \n",
+ " \n",
+ " 75% \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " 0.775186 \n",
+ " NaN \n",
+ " NaN \n",
+ " -76.587500 \n",
+ " 39.327742 \n",
+ " NaN \n",
+ " NaN \n",
+ " 1.0 \n",
+ " \n",
+ " \n",
+ " max \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " 1.000000 \n",
+ " NaN \n",
+ " NaN \n",
+ " -76.529770 \n",
+ " 39.371970 \n",
+ " NaN \n",
+ " NaN \n",
+ " 1.0 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " CrimeDate CrimeTime CrimeCode Location Description \\\n",
+ "count 177604 177604 177604 177604 177604 \n",
+ "unique 2072 2435 79 22781 15 \n",
+ "top 04/27/2015 18:00:00 4e 200 e pratt st larceny \n",
+ "freq 298 4340 29065 440 39287 \n",
+ "mean NaN NaN NaN NaN NaN \n",
+ "std NaN NaN NaN NaN NaN \n",
+ "min NaN NaN NaN NaN NaN \n",
+ "25% NaN NaN NaN NaN NaN \n",
+ "50% NaN NaN NaN NaN NaN \n",
+ "75% NaN NaN NaN NaN NaN \n",
+ "max NaN NaN NaN NaN NaN \n",
+ "\n",
+ " Inside/Outside Weapon Post District Neighborhood \\\n",
+ "count 177604 177604 177604.000000 177604 177604 \n",
+ "unique 4 5 NaN 9 278 \n",
+ "top i none NaN northeastern downtown \n",
+ "freq 88319 116884 NaN 27451 5877 \n",
+ "mean NaN NaN 0.536132 NaN NaN \n",
+ "std NaN NaN 0.276695 NaN NaN \n",
+ "min NaN NaN 0.117709 NaN NaN \n",
+ "25% NaN NaN 0.256628 NaN NaN \n",
+ "50% NaN NaN 0.541888 NaN NaN \n",
+ "75% NaN NaN 0.775186 NaN NaN \n",
+ "max NaN NaN 1.000000 NaN NaN \n",
+ "\n",
+ " Longitude Latitude Location 1 Premise \\\n",
+ "count 177604.000000 177604.000000 177604 177604 \n",
+ "unique NaN NaN 74417 116 \n",
+ "top NaN NaN (39.3180000000, -76.6582100000) street \n",
+ "freq NaN NaN 337 69325 \n",
+ "mean -76.617452 39.307395 NaN NaN \n",
+ "std 0.042192 0.029526 NaN NaN \n",
+ "min -76.711280 39.200410 NaN NaN \n",
+ "25% -76.648290 39.288330 NaN NaN \n",
+ "50% -76.613990 39.303580 NaN NaN \n",
+ "75% -76.587500 39.327742 NaN NaN \n",
+ "max -76.529770 39.371970 NaN NaN \n",
+ "\n",
+ " Total Incidents \n",
+ "count 177604.0 \n",
+ "unique NaN \n",
+ "top NaN \n",
+ "freq NaN \n",
+ "mean 1.0 \n",
+ "std 0.0 \n",
+ "min 1.0 \n",
+ "25% 1.0 \n",
+ "50% 1.0 \n",
+ "75% 1.0 \n",
+ "max 1.0 "
+ ]
+ },
+ "execution_count": 120,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "baltimore_train.describe(include='all')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 121,
+ "id": "02e5bf0c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " CrimeDate \n",
+ " CrimeTime \n",
+ " CrimeCode \n",
+ " Location \n",
+ " Description \n",
+ " Inside/Outside \n",
+ " Weapon \n",
+ " Post \n",
+ " District \n",
+ " Neighborhood \n",
+ " Longitude \n",
+ " Latitude \n",
+ " Location 1 \n",
+ " Premise \n",
+ " Total Incidents \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " count \n",
+ " 59202 \n",
+ " 59202 \n",
+ " 59202 \n",
+ " 59202 \n",
+ " 59202 \n",
+ " 59202 \n",
+ " 59202 \n",
+ " 59202.000000 \n",
+ " 59202 \n",
+ " 59202 \n",
+ " 59202.000000 \n",
+ " 59202.000000 \n",
+ " 59202 \n",
+ " 59202 \n",
+ " 59202.0 \n",
+ " \n",
+ " \n",
+ " unique \n",
+ " 2070 \n",
+ " 1804 \n",
+ " 77 \n",
+ " 16050 \n",
+ " 15 \n",
+ " 4 \n",
+ " 5 \n",
+ " NaN \n",
+ " 9 \n",
+ " 276 \n",
+ " NaN \n",
+ " NaN \n",
+ " 35435 \n",
+ " 112 \n",
+ " NaN \n",
+ " \n",
+ " \n",
+ " top \n",
+ " 04/27/2015 \n",
+ " 18:00:00 \n",
+ " 4e \n",
+ " 200 e pratt st \n",
+ " larceny \n",
+ " i \n",
+ " none \n",
+ " NaN \n",
+ " northeastern \n",
+ " downtown \n",
+ " NaN \n",
+ " NaN \n",
+ " (39.3180000000, -76.6582100000) \n",
+ " street \n",
+ " NaN \n",
+ " \n",
+ " \n",
+ " freq \n",
+ " 81 \n",
+ " 1493 \n",
+ " 9671 \n",
+ " 140 \n",
+ " 13219 \n",
+ " 29448 \n",
+ " 38933 \n",
+ " NaN \n",
+ " 9254 \n",
+ " 1971 \n",
+ " NaN \n",
+ " NaN \n",
+ " 117 \n",
+ " 23144 \n",
+ " NaN \n",
+ " \n",
+ " \n",
+ " mean \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " 0.537601 \n",
+ " NaN \n",
+ " NaN \n",
+ " -76.617499 \n",
+ " 39.307502 \n",
+ " NaN \n",
+ " NaN \n",
+ " 1.0 \n",
+ " \n",
+ " \n",
+ " std \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " 0.276567 \n",
+ " NaN \n",
+ " NaN \n",
+ " 0.042191 \n",
+ " 0.029595 \n",
+ " NaN \n",
+ " NaN \n",
+ " 0.0 \n",
+ " \n",
+ " \n",
+ " min \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " 0.117709 \n",
+ " NaN \n",
+ " NaN \n",
+ " -76.711270 \n",
+ " 39.202540 \n",
+ " NaN \n",
+ " NaN \n",
+ " 1.0 \n",
+ " \n",
+ " \n",
+ " 25% \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " 0.257688 \n",
+ " NaN \n",
+ " NaN \n",
+ " -76.648500 \n",
+ " 39.288340 \n",
+ " NaN \n",
+ " NaN \n",
+ " 1.0 \n",
+ " \n",
+ " \n",
+ " 50% \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " 0.541888 \n",
+ " NaN \n",
+ " NaN \n",
+ " -76.614020 \n",
+ " 39.303930 \n",
+ " NaN \n",
+ " NaN \n",
+ " 1.0 \n",
+ " \n",
+ " \n",
+ " 75% \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " 0.775186 \n",
+ " NaN \n",
+ " NaN \n",
+ " -76.587592 \n",
+ " 39.328030 \n",
+ " NaN \n",
+ " NaN \n",
+ " 1.0 \n",
+ " \n",
+ " \n",
+ " max \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " 1.000000 \n",
+ " NaN \n",
+ " NaN \n",
+ " -76.529770 \n",
+ " 39.371980 \n",
+ " NaN \n",
+ " NaN \n",
+ " 1.0 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " CrimeDate CrimeTime CrimeCode Location Description \\\n",
+ "count 59202 59202 59202 59202 59202 \n",
+ "unique 2070 1804 77 16050 15 \n",
+ "top 04/27/2015 18:00:00 4e 200 e pratt st larceny \n",
+ "freq 81 1493 9671 140 13219 \n",
+ "mean NaN NaN NaN NaN NaN \n",
+ "std NaN NaN NaN NaN NaN \n",
+ "min NaN NaN NaN NaN NaN \n",
+ "25% NaN NaN NaN NaN NaN \n",
+ "50% NaN NaN NaN NaN NaN \n",
+ "75% NaN NaN NaN NaN NaN \n",
+ "max NaN NaN NaN NaN NaN \n",
+ "\n",
+ " Inside/Outside Weapon Post District Neighborhood \\\n",
+ "count 59202 59202 59202.000000 59202 59202 \n",
+ "unique 4 5 NaN 9 276 \n",
+ "top i none NaN northeastern downtown \n",
+ "freq 29448 38933 NaN 9254 1971 \n",
+ "mean NaN NaN 0.537601 NaN NaN \n",
+ "std NaN NaN 0.276567 NaN NaN \n",
+ "min NaN NaN 0.117709 NaN NaN \n",
+ "25% NaN NaN 0.257688 NaN NaN \n",
+ "50% NaN NaN 0.541888 NaN NaN \n",
+ "75% NaN NaN 0.775186 NaN NaN \n",
+ "max NaN NaN 1.000000 NaN NaN \n",
+ "\n",
+ " Longitude Latitude Location 1 Premise \\\n",
+ "count 59202.000000 59202.000000 59202 59202 \n",
+ "unique NaN NaN 35435 112 \n",
+ "top NaN NaN (39.3180000000, -76.6582100000) street \n",
+ "freq NaN NaN 117 23144 \n",
+ "mean -76.617499 39.307502 NaN NaN \n",
+ "std 0.042191 0.029595 NaN NaN \n",
+ "min -76.711270 39.202540 NaN NaN \n",
+ "25% -76.648500 39.288340 NaN NaN \n",
+ "50% -76.614020 39.303930 NaN NaN \n",
+ "75% -76.587592 39.328030 NaN NaN \n",
+ "max -76.529770 39.371980 NaN NaN \n",
+ "\n",
+ " Total Incidents \n",
+ "count 59202.0 \n",
+ "unique NaN \n",
+ "top NaN \n",
+ "freq NaN \n",
+ "mean 1.0 \n",
+ "std 0.0 \n",
+ "min 1.0 \n",
+ "25% 1.0 \n",
+ "50% 1.0 \n",
+ "75% 1.0 \n",
+ "max 1.0 "
+ ]
+ },
+ "execution_count": 121,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "baltimore_val.describe(include='all')"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.6"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}