1
0
ium_487197/ium_lab2.ipynb

2257 lines
152 KiB
Plaintext
Raw Normal View History

2023-03-21 11:45:41 +01:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 100,
"id": "78e785f1",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: kaggle in ./jupyter_env/lib/python3.10/site-packages (1.5.13)\n",
"Requirement already satisfied: requests in ./jupyter_env/lib/python3.10/site-packages (from kaggle) (2.28.2)\n",
"Requirement already satisfied: six>=1.10 in ./jupyter_env/lib/python3.10/site-packages (from kaggle) (1.16.0)\n",
"Requirement already satisfied: tqdm in ./jupyter_env/lib/python3.10/site-packages (from kaggle) (4.65.0)\n",
"Requirement already satisfied: urllib3 in ./jupyter_env/lib/python3.10/site-packages (from kaggle) (1.26.15)\n",
"Requirement already satisfied: certifi in ./jupyter_env/lib/python3.10/site-packages (from kaggle) (2022.12.7)\n",
"Requirement already satisfied: python-slugify in ./jupyter_env/lib/python3.10/site-packages (from kaggle) (8.0.1)\n",
"Requirement already satisfied: python-dateutil in ./jupyter_env/lib/python3.10/site-packages (from kaggle) (2.8.2)\n",
"Requirement already satisfied: text-unidecode>=1.3 in ./jupyter_env/lib/python3.10/site-packages (from python-slugify->kaggle) (1.3)\n",
"Requirement already satisfied: idna<4,>=2.5 in ./jupyter_env/lib/python3.10/site-packages (from requests->kaggle) (3.4)\n",
"Requirement already satisfied: charset-normalizer<4,>=2 in ./jupyter_env/lib/python3.10/site-packages (from requests->kaggle) (3.1.0)\n",
"Requirement already satisfied: pandas in ./jupyter_env/lib/python3.10/site-packages (1.5.3)\n",
"Requirement already satisfied: numpy>=1.21.0 in ./jupyter_env/lib/python3.10/site-packages (from pandas) (1.24.2)\n",
"Requirement already satisfied: python-dateutil>=2.8.1 in ./jupyter_env/lib/python3.10/site-packages (from pandas) (2.8.2)\n",
"Requirement already satisfied: pytz>=2020.1 in ./jupyter_env/lib/python3.10/site-packages (from pandas) (2022.7.1)\n",
"Requirement already satisfied: six>=1.5 in ./jupyter_env/lib/python3.10/site-packages (from python-dateutil>=2.8.1->pandas) (1.16.0)\n",
"Requirement already satisfied: unzip in ./jupyter_env/lib/python3.10/site-packages (1.0.0)\n",
"Requirement already satisfied: scikit-learn in ./jupyter_env/lib/python3.10/site-packages (1.2.2)\n",
"Requirement already satisfied: threadpoolctl>=2.0.0 in ./jupyter_env/lib/python3.10/site-packages (from scikit-learn) (3.1.0)\n",
"Requirement already satisfied: numpy>=1.17.3 in ./jupyter_env/lib/python3.10/site-packages (from scikit-learn) (1.24.2)\n",
"Requirement already satisfied: joblib>=1.1.1 in ./jupyter_env/lib/python3.10/site-packages (from scikit-learn) (1.2.0)\n",
"Requirement already satisfied: scipy>=1.3.2 in ./jupyter_env/lib/python3.10/site-packages (from scikit-learn) (1.10.1)\n",
"Requirement already satisfied: seaborn in ./jupyter_env/lib/python3.10/site-packages (0.12.2)\n",
"Requirement already satisfied: numpy!=1.24.0,>=1.17 in ./jupyter_env/lib/python3.10/site-packages (from seaborn) (1.24.2)\n",
"Requirement already satisfied: pandas>=0.25 in ./jupyter_env/lib/python3.10/site-packages (from seaborn) (1.5.3)\n",
"Requirement already satisfied: matplotlib!=3.6.1,>=3.1 in ./jupyter_env/lib/python3.10/site-packages (from seaborn) (3.7.1)\n",
"Requirement already satisfied: pillow>=6.2.0 in ./jupyter_env/lib/python3.10/site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (9.4.0)\n",
"Requirement already satisfied: fonttools>=4.22.0 in ./jupyter_env/lib/python3.10/site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (4.39.2)\n",
"Requirement already satisfied: pyparsing>=2.3.1 in ./jupyter_env/lib/python3.10/site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (3.0.9)\n",
"Requirement already satisfied: contourpy>=1.0.1 in ./jupyter_env/lib/python3.10/site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (1.0.7)\n",
"Requirement already satisfied: cycler>=0.10 in ./jupyter_env/lib/python3.10/site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (0.11.0)\n",
"Requirement already satisfied: kiwisolver>=1.0.1 in ./jupyter_env/lib/python3.10/site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (1.4.4)\n",
"Requirement already satisfied: packaging>=20.0 in ./jupyter_env/lib/python3.10/site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (23.0)\n",
"Requirement already satisfied: python-dateutil>=2.7 in ./jupyter_env/lib/python3.10/site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (2.8.2)\n",
"Requirement already satisfied: pytz>=2020.1 in ./jupyter_env/lib/python3.10/site-packages (from pandas>=0.25->seaborn) (2022.7.1)\n",
"Requirement already satisfied: six>=1.5 in ./jupyter_env/lib/python3.10/site-packages (from python-dateutil>=2.7->matplotlib!=3.6.1,>=3.1->seaborn) (1.16.0)\n"
]
}
],
"source": [
"#instalacja pakietow\n",
"!pip install kaggle\n",
"!pip install pandas\n",
"!pip install unzip\n",
"!pip install scikit-learn\n",
"!pip install seaborn"
]
},
{
"cell_type": "code",
"execution_count": 101,
"id": "d8fffef2",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Warning: Your Kaggle API key is readable by other users on this system! To fix this, you can run 'chmod 600 /home/user/.kaggle/kaggle.json'\n",
"crime-in-baltimore.zip: Skipping, found more recently modified local copy (use --force to force download)\n"
]
}
],
"source": [
"#Pobranie zbioru\n",
"!kaggle datasets download -d sohier/crime-in-baltimore"
]
},
{
"cell_type": "code",
"execution_count": 102,
"id": "febfcbd4",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Archive: crime-in-baltimore.zip\n",
" inflating: BPD_Part_1_Victim_Based_Crime_Data.csv \n"
]
}
],
"source": [
"!unzip -o crime-in-baltimore.zip"
]
},
{
"cell_type": "code",
"execution_count": 103,
"id": "11bc16fe",
"metadata": {},
"outputs": [],
"source": [
"! grep -P \"^$\" -n BPD_Part_1_Victim_Based_Crime_Data.csv"
]
},
{
"cell_type": "code",
"execution_count": 104,
"id": "cb85e933",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 105,
"id": "20e6099e",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>CrimeDate</th>\n",
" <th>CrimeTime</th>\n",
" <th>CrimeCode</th>\n",
" <th>Location</th>\n",
" <th>Description</th>\n",
" <th>Inside/Outside</th>\n",
" <th>Weapon</th>\n",
" <th>Post</th>\n",
" <th>District</th>\n",
" <th>Neighborhood</th>\n",
" <th>Longitude</th>\n",
" <th>Latitude</th>\n",
" <th>Location 1</th>\n",
" <th>Premise</th>\n",
" <th>Total Incidents</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>09/02/2017</td>\n",
" <td>23:30:00</td>\n",
" <td>3JK</td>\n",
" <td>4200 AUDREY AVE</td>\n",
" <td>ROBBERY - RESIDENCE</td>\n",
" <td>I</td>\n",
" <td>KNIFE</td>\n",
" <td>913.0</td>\n",
" <td>SOUTHERN</td>\n",
" <td>Brooklyn</td>\n",
" <td>-76.60541</td>\n",
" <td>39.22951</td>\n",
" <td>(39.2295100000, -76.6054100000)</td>\n",
" <td>ROW/TOWNHO</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>09/02/2017</td>\n",
" <td>23:00:00</td>\n",
" <td>7A</td>\n",
" <td>800 NEWINGTON AVE</td>\n",
" <td>AUTO THEFT</td>\n",
" <td>O</td>\n",
" <td>NaN</td>\n",
" <td>133.0</td>\n",
" <td>CENTRAL</td>\n",
" <td>Reservoir Hill</td>\n",
" <td>-76.63217</td>\n",
" <td>39.31360</td>\n",
" <td>(39.3136000000, -76.6321700000)</td>\n",
" <td>STREET</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>09/02/2017</td>\n",
" <td>22:53:00</td>\n",
" <td>9S</td>\n",
" <td>600 RADNOR AV</td>\n",
" <td>SHOOTING</td>\n",
" <td>Outside</td>\n",
" <td>FIREARM</td>\n",
" <td>524.0</td>\n",
" <td>NORTHERN</td>\n",
" <td>Winston-Govans</td>\n",
" <td>-76.60697</td>\n",
" <td>39.34768</td>\n",
" <td>(39.3476800000, -76.6069700000)</td>\n",
" <td>Street</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>09/02/2017</td>\n",
" <td>22:50:00</td>\n",
" <td>4C</td>\n",
" <td>1800 RAMSAY ST</td>\n",
" <td>AGG. ASSAULT</td>\n",
" <td>I</td>\n",
" <td>OTHER</td>\n",
" <td>934.0</td>\n",
" <td>SOUTHERN</td>\n",
" <td>Carrollton Ridge</td>\n",
" <td>-76.64526</td>\n",
" <td>39.28315</td>\n",
" <td>(39.2831500000, -76.6452600000)</td>\n",
" <td>ROW/TOWNHO</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>09/02/2017</td>\n",
" <td>22:31:00</td>\n",
" <td>4E</td>\n",
" <td>100 LIGHT ST</td>\n",
" <td>COMMON ASSAULT</td>\n",
" <td>O</td>\n",
" <td>HANDS</td>\n",
" <td>113.0</td>\n",
" <td>CENTRAL</td>\n",
" <td>Downtown West</td>\n",
" <td>-76.61365</td>\n",
" <td>39.28756</td>\n",
" <td>(39.2875600000, -76.6136500000)</td>\n",
" <td>STREET</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>276524</th>\n",
" <td>01/01/2012</td>\n",
" <td>00:00:00</td>\n",
" <td>6J</td>\n",
" <td>1400 JOH AVE</td>\n",
" <td>LARCENY</td>\n",
" <td>I</td>\n",
" <td>NaN</td>\n",
" <td>832.0</td>\n",
" <td>SOUTHWESTERN</td>\n",
" <td>Violetville</td>\n",
" <td>-76.67195</td>\n",
" <td>39.26132</td>\n",
" <td>(39.2613200000, -76.6719500000)</td>\n",
" <td>OTHER - IN</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>276525</th>\n",
" <td>01/01/2012</td>\n",
" <td>00:00:00</td>\n",
" <td>6J</td>\n",
" <td>5500 SINCLAIR LN</td>\n",
" <td>LARCENY</td>\n",
" <td>O</td>\n",
" <td>NaN</td>\n",
" <td>444.0</td>\n",
" <td>NORTHEASTERN</td>\n",
" <td>Frankford</td>\n",
" <td>-76.53829</td>\n",
" <td>39.32493</td>\n",
" <td>(39.3249300000, -76.5382900000)</td>\n",
" <td>OTHER - OU</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>276526</th>\n",
" <td>01/01/2012</td>\n",
" <td>00:00:00</td>\n",
" <td>6E</td>\n",
" <td>400 N PATTERSON PK AV</td>\n",
" <td>LARCENY</td>\n",
" <td>O</td>\n",
" <td>NaN</td>\n",
" <td>321.0</td>\n",
" <td>EASTERN</td>\n",
" <td>CARE</td>\n",
" <td>-76.58497</td>\n",
" <td>39.29573</td>\n",
" <td>(39.2957300000, -76.5849700000)</td>\n",
" <td>STREET</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>276527</th>\n",
" <td>01/01/2012</td>\n",
" <td>00:00:00</td>\n",
" <td>5A</td>\n",
" <td>5800 LILLYAN AV</td>\n",
" <td>BURGLARY</td>\n",
" <td>I</td>\n",
" <td>NaN</td>\n",
" <td>425.0</td>\n",
" <td>NORTHEASTERN</td>\n",
" <td>Glenham-Belhar</td>\n",
" <td>-76.54578</td>\n",
" <td>39.34701</td>\n",
" <td>(39.3470100000, -76.5457800000)</td>\n",
" <td>APT. LOCKE</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>276528</th>\n",
" <td>01/01/2012</td>\n",
" <td>00:00:00</td>\n",
" <td>5A</td>\n",
" <td>1900 GRINNALDS AV</td>\n",
" <td>BURGLARY</td>\n",
" <td>I</td>\n",
" <td>NaN</td>\n",
" <td>831.0</td>\n",
" <td>SOUTHWESTERN</td>\n",
" <td>Morrell Park</td>\n",
" <td>-76.65094</td>\n",
" <td>39.26698</td>\n",
" <td>(39.2669800000, -76.6509400000)</td>\n",
" <td>ROW/TOWNHO</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>276529 rows × 15 columns</p>\n",
"</div>"
],
"text/plain": [
" CrimeDate CrimeTime CrimeCode Location \\\n",
"0 09/02/2017 23:30:00 3JK 4200 AUDREY AVE \n",
"1 09/02/2017 23:00:00 7A 800 NEWINGTON AVE \n",
"2 09/02/2017 22:53:00 9S 600 RADNOR AV \n",
"3 09/02/2017 22:50:00 4C 1800 RAMSAY ST \n",
"4 09/02/2017 22:31:00 4E 100 LIGHT ST \n",
"... ... ... ... ... \n",
"276524 01/01/2012 00:00:00 6J 1400 JOH AVE \n",
"276525 01/01/2012 00:00:00 6J 5500 SINCLAIR LN \n",
"276526 01/01/2012 00:00:00 6E 400 N PATTERSON PK AV \n",
"276527 01/01/2012 00:00:00 5A 5800 LILLYAN AV \n",
"276528 01/01/2012 00:00:00 5A 1900 GRINNALDS AV \n",
"\n",
" Description Inside/Outside Weapon Post District \\\n",
"0 ROBBERY - RESIDENCE I KNIFE 913.0 SOUTHERN \n",
"1 AUTO THEFT O NaN 133.0 CENTRAL \n",
"2 SHOOTING Outside FIREARM 524.0 NORTHERN \n",
"3 AGG. ASSAULT I OTHER 934.0 SOUTHERN \n",
"4 COMMON ASSAULT O HANDS 113.0 CENTRAL \n",
"... ... ... ... ... ... \n",
"276524 LARCENY I NaN 832.0 SOUTHWESTERN \n",
"276525 LARCENY O NaN 444.0 NORTHEASTERN \n",
"276526 LARCENY O NaN 321.0 EASTERN \n",
"276527 BURGLARY I NaN 425.0 NORTHEASTERN \n",
"276528 BURGLARY I NaN 831.0 SOUTHWESTERN \n",
"\n",
" Neighborhood Longitude Latitude \\\n",
"0 Brooklyn -76.60541 39.22951 \n",
"1 Reservoir Hill -76.63217 39.31360 \n",
"2 Winston-Govans -76.60697 39.34768 \n",
"3 Carrollton Ridge -76.64526 39.28315 \n",
"4 Downtown West -76.61365 39.28756 \n",
"... ... ... ... \n",
"276524 Violetville -76.67195 39.26132 \n",
"276525 Frankford -76.53829 39.32493 \n",
"276526 CARE -76.58497 39.29573 \n",
"276527 Glenham-Belhar -76.54578 39.34701 \n",
"276528 Morrell Park -76.65094 39.26698 \n",
"\n",
" Location 1 Premise Total Incidents \n",
"0 (39.2295100000, -76.6054100000) ROW/TOWNHO 1 \n",
"1 (39.3136000000, -76.6321700000) STREET 1 \n",
"2 (39.3476800000, -76.6069700000) Street 1 \n",
"3 (39.2831500000, -76.6452600000) ROW/TOWNHO 1 \n",
"4 (39.2875600000, -76.6136500000) STREET 1 \n",
"... ... ... ... \n",
"276524 (39.2613200000, -76.6719500000) OTHER - IN 1 \n",
"276525 (39.3249300000, -76.5382900000) OTHER - OU 1 \n",
"276526 (39.2957300000, -76.5849700000) STREET 1 \n",
"276527 (39.3470100000, -76.5457800000) APT. LOCKE 1 \n",
"276528 (39.2669800000, -76.6509400000) ROW/TOWNHO 1 \n",
"\n",
"[276529 rows x 15 columns]"
]
},
"execution_count": 105,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"baltimore=pd.read_csv('BPD_Part_1_Victim_Based_Crime_Data.csv')\n",
"baltimore"
]
},
{
"cell_type": "code",
"execution_count": 106,
"id": "89b1028c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"CrimeDate 0\n",
"CrimeTime 0\n",
"CrimeCode 0\n",
"Location 2207\n",
"Description 0\n",
"Inside/Outside 10279\n",
"Weapon 180952\n",
"Post 224\n",
"District 80\n",
"Neighborhood 2740\n",
"Longitude 2204\n",
"Latitude 2204\n",
"Location 1 2204\n",
"Premise 10757\n",
"Total Incidents 0\n",
"dtype: int64"
]
},
"execution_count": 106,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"baltimore.isnull().sum()"
]
},
{
"cell_type": "code",
"execution_count": 107,
"id": "7109d8f7",
"metadata": {},
"outputs": [],
"source": [
"# W wiekszosci przestepstw nie uzywa sie broni, zastepujemy\n",
"# puste pola przez None\n",
"baltimore[\"Weapon\"].fillna(\"None\", inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 108,
"id": "1c67e681",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"CrimeDate 0\n",
"CrimeTime 0\n",
"CrimeCode 0\n",
"Location 2207\n",
"Description 0\n",
"Inside/Outside 10279\n",
"Weapon 0\n",
"Post 224\n",
"District 80\n",
"Neighborhood 2740\n",
"Longitude 2204\n",
"Latitude 2204\n",
"Location 1 2204\n",
"Premise 10757\n",
"Total Incidents 0\n",
"dtype: int64"
]
},
"execution_count": 108,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"baltimore.isnull().sum()"
]
},
{
"cell_type": "code",
"execution_count": 109,
"id": "31966b62",
"metadata": {},
"outputs": [],
"source": [
"#Wyczyszczenie zbioru z artefaktow\n",
"baltimore.dropna(inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 110,
"id": "75f39653",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"CrimeDate 0\n",
"CrimeTime 0\n",
"CrimeCode 0\n",
"Location 0\n",
"Description 0\n",
"Inside/Outside 0\n",
"Weapon 0\n",
"Post 0\n",
"District 0\n",
"Neighborhood 0\n",
"Longitude 0\n",
"Latitude 0\n",
"Location 1 0\n",
"Premise 0\n",
"Total Incidents 0\n",
"dtype: int64"
]
},
"execution_count": 110,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"baltimore.isnull().sum()"
]
},
{
"cell_type": "code",
"execution_count": 111,
"id": "6cd411df",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split"
]
},
{
"cell_type": "code",
"execution_count": 112,
"id": "8b8b4732",
"metadata": {},
"outputs": [],
"source": [
"#Normalizacja\n",
"baltimore['Post'] = baltimore['Post'] /baltimore['Post'].abs().max()\n",
"baltimore['Location']=baltimore['Location'].str.lower()\n",
"baltimore['Description']=baltimore['Description'].str.lower()\n",
"baltimore['Weapon']=baltimore['Weapon'].str.lower()\n",
"baltimore['Premise']=baltimore['Premise'].str.lower()\n",
"baltimore['District']=baltimore['District'].str.lower()\n",
"baltimore['CrimeCode']=baltimore['CrimeCode'].str.lower()\n",
"baltimore['Neighborhood']=baltimore['Neighborhood'].str.lower()\n",
"baltimore['Inside/Outside']=baltimore['Inside/Outside'].str.lower()"
]
},
{
"cell_type": "code",
"execution_count": 113,
"id": "d9adbe06",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<Axes: >"
]
},
"execution_count": 113,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkEAAAH6CAYAAAAA4ZiGAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABmnklEQVR4nO3dd1wUd/4/8NcusNhYiiHGLmBEVAhgRQjGEgu25GuJXmLlFHswktN4avASaywo2MDVmGiK5c4UET2NkWjI2VCjydmWGKIiJsouCLKU/f3hjzk3i4m7lGFmXs/Hwwdh57Mz7zdhltfOZ2ZWZTabzSAiIiJSGLXYBRARERGJgSGIiIiIFIkhiIiIiBSJIYiIiIgUiSGIiIiIFIkhiIiIiBSJIYiIiIgUiSGIiIiIFMlR7AJqMrPZjNLS6ruXpFqtqtbtVSf2Jk3sTbrk3B97k6bq6k2tVkGlUj3RWIagP1Baasbdu/erZVuOjmq4u9eF0ZiP4uLSatlmdWFv0sTepEvO/bE3aarO3jw86sLB4clCEKfDiIiISJEYgoiIiEiRGIKIiIhIkRiCiIiISJEYgoiIiEiRGIKIiIhIkRiCiIiISJEYgoiIiEiRGIKIiIhIkRiCiIiISJEYgoiIiEiRGIKIiIhIkRiCiIiISJEYgoiIiEiRGIKIiIhIkRzFLkCO1GoV1GqVTc9xcFBbfLVFaakZpaVmm59HRESkZAxBlUytVsHNrY5dYQYAtNraNj+npKQUOTn5DEJEREQ2YAiqZGq1Cg4OaqzYcRq/3M6t8u01aeCCmFfbQ61WMQQRERHZoEIh6P79++jXrx9u376N3bt3w9/fX1i2a9cubN68GTdv3oSXlxdmzpyJ7t27Wzw/NzcXS5YswaFDh1BUVITnn38e8+bNw9NPP20x7syZM1i2bBl+/PFH1K9fHyNHjsSECROgUv1vyslsNiMpKQkfffQR7t69Cz8/P7z11lsIDAysSIt2++V2Lq7dMIiybSIiIvpzFToxev369SgpKbF6fN++fZg/fz769euHpKQkBAYGYtq0aTh79qzFuOjoaBw/fhyxsbFYsWIFMjIyMGHCBBQXFwtjrl+/jsjISHh6emLTpk0YM2YM1q5diy1btlisKykpCWvXrsXYsWOxadMmeHp6Yvz48cjMzKxIi0RERCRTdoega9eu4aOPPsL06dOtlq1duxb9+/dHdHQ0unTpgn/84x/w9/fHunXrhDHp6ek4duwYFi1ahIiICPTs2RNr1qzBpUuXcPDgQWGcTqeDu7s7Vq1ahZCQEIwdOxbjx4/Hxo0bYTKZAACFhYXYtGkTxo8fj7FjxyIkJASrVq2Cm5sbdDqdvS0SERGRjNkdgt59912MGDECXl5eFo9nZmbip59+Qr9+/Swej4iIQFpamhBcUlNTodVqERoaKozx9vaGn58fUlNThcdSU1PRs2dPaDQai3UZjUakp6cDeDhdlpeXZ7FNjUaDF1980WJdRERERGXsOicoJSUFly9fRnx8PC5evGixTK/XA4BVOPLx8UFRUREyMzPh4+MDvV4PLy8vi/N6gIdBqGwd+fn5uHXrFry9va3GqFQq6PV6dO7cWRj/+3E+Pj7Ytm0bHjx4gFq1atnTKhwdbcuJ9l4VVlFibfdJVOTy/5qOvUmTnHsD5N0fe5OmmtqbzSGooKAAS5cuxcyZM1GvXj2r5QbDw5OBtVqtxeNl35ctNxqNcHFxsXq+q6srLly4AODhidPlrUuj0aB27doW69JoNHB2drbaptlshsFgsCsEqdUquLvXtfl5YrDn0vrqJoUa7cXepEnOvQHy7o+9SVNN683mELRhwwbUr18fQ4YMqYp6apTSUjOMxnybnuPgoBblf7LRWICSktJq3+6TKPuZ1OQa7cXepEnOvQHy7o+9SVN19qbV1n7iI042haAbN25gy5YtWLdunXCUJj8/X/h6//59uLq6Anh4FMfT01N4rtFoBABhuVarRVZWltU2DAaDMKbsSFHZtsqYTCYUFBRYrMtkMqGwsNDiaJDRaIRKpRLG2aO4WBq/iCUlpTW+VinUaC/2Jk1y7g2Qd3/sTZpqWm82haBffvkFRUVFmDhxotWy0aNH47nnnsPKlSsBPDw36NFzdPR6PZycnNC0aVMAD8/fSUtLg9lstjgvKCMjA61atQIA1KlTBw0bNhTO+Xl0jNlsFtZf9jUjIwOtW7e22GajRo3sPh+IiIiI5MumM5T8/PzwwQcfWPx76623AAALFy7E22+/jaZNm6JFixZISUmxeG5ycjJCQkKEq7zCw8NhMBiQlpYmjMnIyMAPP/yA8PBw4bHw8HAcPnwYRUVFFuvSarUICgoCAAQHB6NevXrYv3+/MKaoqAgHDx60WBcRERFRGZuOBGm1WnTu3LncZW3btkXbtm0BANOnT0dMTAyaNWuGzp07Izk5GefPn8f27duF8UFBQQgLC8PcuXMxe/ZsODs7Y/Xq1fD19UXv3r2FcZGRkfjiiy8wa9YsjBw5EpcvX4ZOp8PMmTOFQOXs7IyoqCjEx8fDw8MDrVq1wscff4ycnBxERkba/EMhIiIi+auSzw4bMGAACgoKkJSUhMTERHh5eSEhIUE4clMmLi4OS5YswYIFC1BcXIywsDDMmzcPjo7/K6t58+bQ6XRYunQpJk6cCA8PD8yYMQPjx4+3WNeECRNgNpuxZcsW4WMzdDqdMP1GRERE9CiV2Wzmp24+RklJKe7evW/Tcxwd1XB3r4voVV9Xy2eH+TR2RdwbL+Devfs16mSzR5X9TGpyjfZib9Ik594AeffH3qSpOnvz8Kj7xFeH1ay7FhERERFVE4YgIiIiUiSGICIiIlIkhiAiIiJSJIYgIiIiUiSGICIiIlIkhiAiIiJSJIYgIiIiUiSGICIiIlIkhiAiIiJSJIYgIiIiUiSGICIiIlIkhiAiIiJSJIYgIiIiUiSGICIiIlIkhiAiIiJSJIYgIiIiUiSGICIiIlIkhiAiIiJSJIYgIiIiUiRHsQsgaVGrVVCrVTY9x8FBbfHVFqWlZpSWmm1+HhER0Z9hCKInplar4OZWx64wAwBabW2bn1NSUoqcnHwGISIiqnQMQfTE1GoVHBzUWLHjNH65nVvl22vSwAUxr7aHWq1iCCIiokrHEEQ2++V2Lq7dMIhdBhERUYXwxGgiIiJSJIYgIiIiUiSGICIiIlIkhiAiIiJSJIYgIiIiUiSGICIiIlIkm0LQ0aNH8dprr6FLly5o164devbsiSVLliA393/3jJkzZw58fX2t/qWmplqsy2QyYdmyZQgNDUVgYCDGjRsHvV5vtc1r165h3LhxCAwMRGhoKJYvXw6TyWQ1bteuXejTpw/8/f0xaNAgHDlyxJbWiIiISGFsuk9QTk4OAgICMGrUKLi5ueHKlSuIj4/HlStXsGXLFmFc06ZNsWLFCovn+vj4WHz/7rvvIjk5GXPmzEGDBg2wceNGjB07Fvv27YOLiwsAwGAwYMyYMWjRogXi4+Nx+/ZtLF26FA8ePMCCBQuEde3btw/z58/HpEmT0KVLFyQnJ2PatGnYsWMHAgMDbf2ZEBERkQLYFIIGDx5s8X3nzp2h0Wgwf/583L59Gw0aNAAA1KpV6w/DR1ZWFnbv3o23334bQ4cOBQD4+/uje/fu+OSTTzBhwgQAwCeffIL79+8jISEBbm5uAICSkhIsXLgQUVFRwvbWrl2L/v37Izo6GgDQpUsXXL58GevWrUNSUpItLRIREZFCVPicoLJwUlRU9MTPOXbsGEpLS9G3b1+L9YSGhlpMm6WmpiIkJETYBgD069cPpaWlOH78OAAgMzMTP/30E/r162exjYiICKSlpZU7dUZERERk18dmlJSUoLi4GFevXsW6devQo0cPNGnSRFh+/fp1tG/fHoWFhWjVqhWmTJmCXr16Ccv1ej3q168PV1dXi/X6+Phg9+7dFuOGDBliMUar1cLT01M4f6jsq5eXl9W6ioqKkJmZaTUVZwtHR9tyor0fLlpR1bFdOfdmr7LaanKN9mJv0iXn/tibNNXU3uwKQd27d8ft27cBAM8//zxWrlw
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"baltimore['District'].value_counts().plot(kind=\"bar\")"
]
},
{
"cell_type": "code",
"execution_count": 114,
"id": "24b7582f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<seaborn.axisgrid.FacetGrid at 0x7f9756fab6a0>"
]
},
"execution_count": 114,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkcAAAHkCAYAAAA0I4sqAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABtb0lEQVR4nO3dd3hUVf7H8ffMpCeEhBCiEHoJJUAoShcJRURWUFEQRFwLggZ1sYErsu7CAgoKAoKUAIrSbIgggoBlAVFBQRGkCwENQkJ6mczc3x/8Ms6YBAhpk+Tzeh4fnHvPnHu+GZJ8uPfcc02GYRiIiIiICADmsh6AiIiIiDtROBIRERFxonAkIiIi4kThSERERMSJwpGIiIiIE4UjEREREScKRyIiIiJOFI5EREREnCgciYiIiDjxKOsBVCY2m52EhLSyHgYAZrOJatX8SUhIw26vuIukV4Y6VWPFURnqrEg1hoZWKeshSAnRmaNKymw2YTKZMJtNZT2UElUZ6lSNFUdlqLMy1Cjln8KRiIiIiBOFIxEREREnCkciIiIiThSORERERJwoHImIiIg4UTgSERERcaJwJCIiIuJE4UhERETEicKRiIiIiBOFIxEREREnCkciIiIiThSORERERJx4lPUARKRis3iayDJlYLPb8TBZ8DZ8ycmxl/WwREQKpHAkIiXG8Lby6fGv2HjkczJyMgn1D2Fo5AAaV22EKUc/fkTEPemymoiUCMMzh6V7V/HBwY1k5GQC8EfaeWbtiuXHhJ+xeJjKeIQiIvlTOBKREpFmT+O7M/vy3ffOjx+SZcoo5RGJiFwZhSMRKXZms4m4pDMF7k/JSiXTllWKIxIRuXIKRyJS7AzDoIp3QIH7TZjwtGjOkYi4J4UjESl2hgFh/qH4evrku7/1Nc3wJv99IiJlTeFIREqEZ44Pz3Z5BC+Lp8v2MP/q/D1qMKYcSxmNTETk0nReW0RKhGGHMM9reKn38xxJOE582jkaV6vPNf418LD6YBhGWQ9RRCRfCkciUmLsNvC0+RJZtQUtg0zYbAZGtoGBgpGIuC+FIxEpcTabAQpEIlJOaM6RiIiIiBOFIxEREREnCkciIiIiThSORERERJwoHImIiIg4UTgSERERcaJwJCIiIuJE4UhERETEicKRiIiIiBOFIxEREREnCkciIiIiThSORERERJy4VTj64osvuOeee+jYsSORkZH07NmTKVOmkJKS4mhjGAYLFy4kOjqayMhI+vfvz4YNGy7bd0pKCmPGjCE6OppWrVrRsWNHHnzwQfbt21fge+x2O7fffjsRERFs3LixWGoUERER9+ZR1gNwduHCBVq1asXw4cMJCgri8OHDzJ49m8OHDxMbGwvAokWLmDlzJqNHjyYqKoqtW7cyduxYfHx8iI6OLrDv7OxsvLy8GD16NOHh4aSmprJs2TJGjBjB+++/T/369fO8Z+XKlcTHx5dYvSIiIuJ+3CocDRgwwOV1hw4d8PLyYsKECcTHxxMcHMy8efMYPnw4MTExAHTt2pUzZ84wc+bMS4ajkJAQZsyY4bKtc+fOdOjQgU8//ZRRo0a57EtISGDWrFk888wzPPfcc8VUoYiIiLg7t7qslp+goCAArFYrp06dIi0tjS5duri06dq1K7/88gtnzpwpVN9+fn54e3tjtVrz7HvllVfo0KEDHTp0uOqxi4iISPnjVmeOctlsNnJycjhy5Ahz584lOjqa8PBwfv75ZwC8vLxc2ue+Pnr0KDVr1rxk33a7HbvdTkJCAosXL8ZsNjNw4ECXNvv27ePjjz/m448/Lr6iREREpFxwy3DUo0cPx1yfbt26OS6H1alTB5PJxL59+1zO6Pzwww8AJCUlXbbvWbNmMX/+fODipbYFCxZQu3Ztx3673c6LL77I3//+d8LDw4mLiyuusgDw8HCPk3UWi9nlz4qqMtSpGiuOylBnZahRyj+3DEcLFiwgIyODI0eOMG/ePEaNGsWSJUsICAjg1ltvZdGiRTRp0oSoqCi2bdvG+vXrATCZTJfte+jQofTq1Ys//viDNWvWMHLkSJYuXUqLFi0AWLNmDefOnWPkyJHFXpfZbCI42L/Y+y2KwEDfsh5CqagMdarGiqMy1FkZapTyyy3DUdOmTQFo06YNLVu2ZMCAAWzevJm+ffsyfvx4l/ASHBzM448/zrRp0wgNDb1s32FhYYSFhQFw4403MmjQIF577TXeeOMN0tLSeOWVV/jHP/6B1WrFarWSmpoKQGZmJqmpqQQEBFx1XXa7QXJy+lW/vzhZLGYCA31JTs7AZrOX9XBKTGWoUzVWHJWhzopUo7v9Y1eKj1uGI2cRERF4enpy8uRJ4GIYio2NJT4+nqSkJOrVq8eWLVvw9PSkefPmherbbDbTrFkzdu/eDUBiYiIXLlxg4sSJTJw40aXts88+S/Xq1dm+fXuR6snJca8fBjab3e3GVBIqQ52qseKoDHVWhhql/HL7cLR3716sVivh4eEu23PPANlsNlasWEG/fv0KfVYnJyeHffv2OeYchYaG8uabb7q0OXfuHGPHjmXMmDF07ty5aMWIiIiI23OrcBQTE0NkZCQRERH4+Phw8OBBFi9eTEREBL169QLgo48+Iisrizp16nD27FlWrVpFXFwc06dPd+mrd+/e1KxZk2XLlgGwatUq9u3bR+fOnQkNDeXcuXOsXLmS48ePO84SeXt757l1P3dCdqNGjWjbtm1JfwlERESkjLlVOGrVqhUbNmxgwYIFGIZBrVq1uPPOO3nggQcct+sbhkFsbCxxcXH4+fnRvXt3pk+fTo0aNVz6stls2O1/nrJt1KgRmzZtYvLkySQnJxMaGkrLli159913HXOcREREREyGYRhlPYjKwmazk5CQVtbDAC4uKRAc7E9iYlqFvu5fGepUjRVHeajTZIKi/NYoDzVeqdDQKmU9BCkhbnXmSERE3I/FDL6kY089jz0rA8/ga8i2+JFl068QqZj0N1tERArkYQGvlFPEvzcNe0bq/281EdCmDwEd7yDN5nXJ94uUR1qiVERECuRrT+Hsyv84BSMAg9TvPyX76Dda6VoqJP2tFhGRfFksZjJP7MPIyc53f9KO9/Ax3GMepUhxUjgSEZF8mc0mrOcLfr6kLTURE+V7UrVIfhSOREQkX3a7He+aEQXu9wyphV1TV6UCUjgSEZF82WwGnjUbY/YLzHd/0I3DyDLpAbJS8SgciYhIgTJMAYQN/Tde1zRwbDP7+FOt78MQ2rjcPzxWJD86HyoiIgWy2QzSPYIJGvgslpx0DJsVvPzJsgSQmaM1hKViUjgSEZFLstsNMvAGs/fF6w0GoGAkFZguq4mIiIg40ZkjEREpFywWM95GGqacTExmD6wWP7LtliI9600kPwpHIiLi9rzMOZj/OML5zbHkJJ0Fkxm/xu0J6nEvaeYq2O1KSFJ8dFlNRETcmtlswuPCSf54d+rFYARg2Ek/9A1nV/4bX9LLdoBS4SgciYiIW/Mmk8Rtb+a7LyfpLLZzJzCbTaU8KqnIFI5ERMStWcghO/5Egfszf/1JD8CVYqW/TSIi4tYMTJh9Awrc71G1BoZmZUsxUjgSERG3lmX2J/C6v+W/02TGp0EbcnK0UrcUH4UjERFxazk5Br6RN+LbqJ3rDosHoXc8Q5a54LNKIldDt/KLiIjbS83xpkqvhwnqlkTWb4cx+1TBM6w+mSZ/cuyajC3FS+FIRETKhQy7F3iGYqkfhs0wyLIbFx9lIlLMFI5ERKRcsdk0v0hKluYciYiIiDhROBIRERFxonAkIiIi4kThSERERMSJwpGIiIiIE4UjEREREScKRyIiIiJOFI5EREREnCgciYiIiDhROBIRERFxonAkIiIi4kThSERERMSJwpGIiIiIE4UjEREREScKRyIiIiJOFI5EREREnCgciYiIiDhROBIRERFxonAkIiIi4kThSERERMSJwpGIiIiIE4UjEREREScKRyIiIiJOFI5EREREnCgciYiIiDhROBIRERFxonAkIiIi4kThSERERMSJwpGIiIiIE4+yHoCzL774goULF3LkyBFSU1MJCwu
"text/plain": [
"<Figure size 610.5x500 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import seaborn as sns\n",
"sns.set_theme()\n",
"sns.relplot(data=baltimore[:20], x='Longitude', y='Latitude', hue='Weapon')"
]
},
{
"cell_type": "code",
"execution_count": 115,
"id": "c9cf1067",
"metadata": {},
"outputs": [],
"source": [
"#Podzial na zbiory\n",
"baltimore_train, baltimore_test = train_test_split(baltimore, test_size=0.1, random_state=1)"
]
},
{
"cell_type": "code",
"execution_count": 116,
"id": "350e7098",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>CrimeDate</th>\n",
" <th>CrimeTime</th>\n",
" <th>CrimeCode</th>\n",
" <th>Location</th>\n",
" <th>Description</th>\n",
" <th>Inside/Outside</th>\n",
" <th>Weapon</th>\n",
" <th>Post</th>\n",
" <th>District</th>\n",
" <th>Neighborhood</th>\n",
" <th>Longitude</th>\n",
" <th>Latitude</th>\n",
" <th>Location 1</th>\n",
" <th>Premise</th>\n",
" <th>Total Incidents</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>20700</th>\n",
" <td>04/10/2017</td>\n",
" <td>22:26:00</td>\n",
" <td>4e</td>\n",
" <td>4900 eastern av</td>\n",
" <td>common assault</td>\n",
" <td>o</td>\n",
" <td>hands</td>\n",
" <td>0.256628</td>\n",
" <td>southeastern</td>\n",
" <td>greektown</td>\n",
" <td>-76.55422</td>\n",
" <td>39.28706</td>\n",
" <td>(39.2870600000, -76.5542200000)</td>\n",
" <td>alley</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>63746</th>\n",
" <td>06/05/2016</td>\n",
" <td>20:44:00</td>\n",
" <td>4e</td>\n",
" <td>3000 s hanover st</td>\n",
" <td>common assault</td>\n",
" <td>o</td>\n",
" <td>hands</td>\n",
" <td>0.977731</td>\n",
" <td>southern</td>\n",
" <td>middle branch/reedbird pa</td>\n",
" <td>-76.61504</td>\n",
" <td>39.25134</td>\n",
" <td>(39.2513400000, -76.6150400000)</td>\n",
" <td>street</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>169854</th>\n",
" <td>03/10/2014</td>\n",
" <td>20:00:00</td>\n",
" <td>4e</td>\n",
" <td>4100 parkside dr</td>\n",
" <td>common assault</td>\n",
" <td>o</td>\n",
" <td>hands</td>\n",
" <td>0.447508</td>\n",
" <td>northeastern</td>\n",
" <td>belair-parkside</td>\n",
" <td>-76.56605</td>\n",
" <td>39.32783</td>\n",
" <td>(39.3278300000, -76.5660500000)</td>\n",
" <td>street</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>42473</th>\n",
" <td>10/31/2016</td>\n",
" <td>09:30:00</td>\n",
" <td>4e</td>\n",
" <td>5600 loch raven blvd</td>\n",
" <td>common assault</td>\n",
" <td>i</td>\n",
" <td>hands</td>\n",
" <td>0.440085</td>\n",
" <td>northeastern</td>\n",
" <td>loch raven</td>\n",
" <td>-76.58856</td>\n",
" <td>39.35952</td>\n",
" <td>(39.3595200000, -76.5885600000)</td>\n",
" <td>hotel/mote</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>86103</th>\n",
" <td>12/05/2015</td>\n",
" <td>08:15:00</td>\n",
" <td>4e</td>\n",
" <td>1100 guilford ave</td>\n",
" <td>common assault</td>\n",
" <td>i</td>\n",
" <td>hands</td>\n",
" <td>0.149523</td>\n",
" <td>central</td>\n",
" <td>mid-town belvedere</td>\n",
" <td>-76.61194</td>\n",
" <td>39.30319</td>\n",
" <td>(39.3031900000, -76.6119400000)</td>\n",
" <td>apt/condo</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>182763</th>\n",
" <td>11/20/2013</td>\n",
" <td>20:00:00</td>\n",
" <td>6d</td>\n",
" <td>3800 dolfield av</td>\n",
" <td>larceny from auto</td>\n",
" <td>o</td>\n",
" <td>none</td>\n",
" <td>0.681866</td>\n",
" <td>northwestern</td>\n",
" <td>dolfield</td>\n",
" <td>-76.68090</td>\n",
" <td>39.33938</td>\n",
" <td>(39.3393800000, -76.6809000000)</td>\n",
" <td>street</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14972</th>\n",
" <td>05/22/2017</td>\n",
" <td>03:30:00</td>\n",
" <td>4c</td>\n",
" <td>3000 w garrison ave</td>\n",
" <td>agg. assault</td>\n",
" <td>i</td>\n",
" <td>other</td>\n",
" <td>0.651113</td>\n",
" <td>northwestern</td>\n",
" <td>central park heights</td>\n",
" <td>-76.67146</td>\n",
" <td>39.34863</td>\n",
" <td>(39.3486300000, -76.6714600000)</td>\n",
" <td>row/townho</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>44956</th>\n",
" <td>10/15/2016</td>\n",
" <td>23:30:00</td>\n",
" <td>7a</td>\n",
" <td>500 jack st</td>\n",
" <td>auto theft</td>\n",
" <td>o</td>\n",
" <td>none</td>\n",
" <td>0.968187</td>\n",
" <td>southern</td>\n",
" <td>brooklyn</td>\n",
" <td>-76.60582</td>\n",
" <td>39.23265</td>\n",
" <td>(39.2326500000, -76.6058200000)</td>\n",
" <td>street</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>36873</th>\n",
" <td>12/08/2016</td>\n",
" <td>18:30:00</td>\n",
" <td>4e</td>\n",
" <td>3800 cedarhurst rd</td>\n",
" <td>common assault</td>\n",
" <td>o</td>\n",
" <td>hands</td>\n",
" <td>0.451750</td>\n",
" <td>northeastern</td>\n",
" <td>waltherson</td>\n",
" <td>-76.56315</td>\n",
" <td>39.33720</td>\n",
" <td>(39.3372000000, -76.5631500000)</td>\n",
" <td>street</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>230084</th>\n",
" <td>12/06/2012</td>\n",
" <td>14:00:00</td>\n",
" <td>4e</td>\n",
" <td>800 s highland av</td>\n",
" <td>common assault</td>\n",
" <td>i</td>\n",
" <td>hands</td>\n",
" <td>0.246023</td>\n",
" <td>southeastern</td>\n",
" <td>canton</td>\n",
" <td>-76.56878</td>\n",
" <td>39.28342</td>\n",
" <td>(39.2834200000, -76.5687800000)</td>\n",
" <td>school</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>26312 rows × 15 columns</p>\n",
"</div>"
],
"text/plain": [
" CrimeDate CrimeTime CrimeCode Location \\\n",
"20700 04/10/2017 22:26:00 4e 4900 eastern av \n",
"63746 06/05/2016 20:44:00 4e 3000 s hanover st \n",
"169854 03/10/2014 20:00:00 4e 4100 parkside dr \n",
"42473 10/31/2016 09:30:00 4e 5600 loch raven blvd \n",
"86103 12/05/2015 08:15:00 4e 1100 guilford ave \n",
"... ... ... ... ... \n",
"182763 11/20/2013 20:00:00 6d 3800 dolfield av \n",
"14972 05/22/2017 03:30:00 4c 3000 w garrison ave \n",
"44956 10/15/2016 23:30:00 7a 500 jack st \n",
"36873 12/08/2016 18:30:00 4e 3800 cedarhurst rd \n",
"230084 12/06/2012 14:00:00 4e 800 s highland av \n",
"\n",
" Description Inside/Outside Weapon Post District \\\n",
"20700 common assault o hands 0.256628 southeastern \n",
"63746 common assault o hands 0.977731 southern \n",
"169854 common assault o hands 0.447508 northeastern \n",
"42473 common assault i hands 0.440085 northeastern \n",
"86103 common assault i hands 0.149523 central \n",
"... ... ... ... ... ... \n",
"182763 larceny from auto o none 0.681866 northwestern \n",
"14972 agg. assault i other 0.651113 northwestern \n",
"44956 auto theft o none 0.968187 southern \n",
"36873 common assault o hands 0.451750 northeastern \n",
"230084 common assault i hands 0.246023 southeastern \n",
"\n",
" Neighborhood Longitude Latitude \\\n",
"20700 greektown -76.55422 39.28706 \n",
"63746 middle branch/reedbird pa -76.61504 39.25134 \n",
"169854 belair-parkside -76.56605 39.32783 \n",
"42473 loch raven -76.58856 39.35952 \n",
"86103 mid-town belvedere -76.61194 39.30319 \n",
"... ... ... ... \n",
"182763 dolfield -76.68090 39.33938 \n",
"14972 central park heights -76.67146 39.34863 \n",
"44956 brooklyn -76.60582 39.23265 \n",
"36873 waltherson -76.56315 39.33720 \n",
"230084 canton -76.56878 39.28342 \n",
"\n",
" Location 1 Premise Total Incidents \n",
"20700 (39.2870600000, -76.5542200000) alley 1 \n",
"63746 (39.2513400000, -76.6150400000) street 1 \n",
"169854 (39.3278300000, -76.5660500000) street 1 \n",
"42473 (39.3595200000, -76.5885600000) hotel/mote 1 \n",
"86103 (39.3031900000, -76.6119400000) apt/condo 1 \n",
"... ... ... ... \n",
"182763 (39.3393800000, -76.6809000000) street 1 \n",
"14972 (39.3486300000, -76.6714600000) row/townho 1 \n",
"44956 (39.2326500000, -76.6058200000) street 1 \n",
"36873 (39.3372000000, -76.5631500000) street 1 \n",
"230084 (39.2834200000, -76.5687800000) school 1 \n",
"\n",
"[26312 rows x 15 columns]"
]
},
"execution_count": 116,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"baltimore_test"
]
},
{
"cell_type": "code",
"execution_count": 117,
"id": "ed66b750",
"metadata": {},
"outputs": [],
"source": [
"baltimore_train, baltimore_val= train_test_split(baltimore_train, test_size=0.25, random_state=1)"
]
},
{
"cell_type": "code",
"execution_count": 118,
"id": "3840c547",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>CrimeDate</th>\n",
" <th>CrimeTime</th>\n",
" <th>CrimeCode</th>\n",
" <th>Location</th>\n",
" <th>Description</th>\n",
" <th>Inside/Outside</th>\n",
" <th>Weapon</th>\n",
" <th>Post</th>\n",
" <th>District</th>\n",
" <th>Neighborhood</th>\n",
" <th>Longitude</th>\n",
" <th>Latitude</th>\n",
" <th>Location 1</th>\n",
" <th>Premise</th>\n",
" <th>Total Incidents</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>263118</td>\n",
" <td>263118</td>\n",
" <td>263118</td>\n",
" <td>263118</td>\n",
" <td>263118</td>\n",
" <td>263118</td>\n",
" <td>263118</td>\n",
" <td>263118.000000</td>\n",
" <td>263118</td>\n",
" <td>263118</td>\n",
" <td>263118.000000</td>\n",
" <td>263118.000000</td>\n",
" <td>263118</td>\n",
" <td>263118</td>\n",
" <td>263118.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>unique</th>\n",
" <td>2072</td>\n",
" <td>2935</td>\n",
" <td>80</td>\n",
" <td>25276</td>\n",
" <td>15</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>NaN</td>\n",
" <td>9</td>\n",
" <td>278</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>93543</td>\n",
" <td>118</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>top</th>\n",
" <td>04/27/2015</td>\n",
" <td>18:00:00</td>\n",
" <td>4e</td>\n",
" <td>200 e pratt st</td>\n",
" <td>larceny</td>\n",
" <td>i</td>\n",
" <td>none</td>\n",
" <td>NaN</td>\n",
" <td>northeastern</td>\n",
" <td>downtown</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>(39.3180000000, -76.6582100000)</td>\n",
" <td>street</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>freq</th>\n",
" <td>407</td>\n",
" <td>6483</td>\n",
" <td>43093</td>\n",
" <td>632</td>\n",
" <td>58246</td>\n",
" <td>131015</td>\n",
" <td>173175</td>\n",
" <td>NaN</td>\n",
" <td>40842</td>\n",
" <td>8701</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>503</td>\n",
" <td>102544</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.536416</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>-76.617469</td>\n",
" <td>39.307456</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.276554</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.042220</td>\n",
" <td>0.029537</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.117709</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>-76.711280</td>\n",
" <td>39.200410</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.256628</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>-76.648420</td>\n",
" <td>39.288340</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.541888</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>-76.614010</td>\n",
" <td>39.303680</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.775186</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>-76.587490</td>\n",
" <td>39.327890</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>-76.529770</td>\n",
" <td>39.371980</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" CrimeDate CrimeTime CrimeCode Location Description \\\n",
"count 263118 263118 263118 263118 263118 \n",
"unique 2072 2935 80 25276 15 \n",
"top 04/27/2015 18:00:00 4e 200 e pratt st larceny \n",
"freq 407 6483 43093 632 58246 \n",
"mean NaN NaN NaN NaN NaN \n",
"std NaN NaN NaN NaN NaN \n",
"min NaN NaN NaN NaN NaN \n",
"25% NaN NaN NaN NaN NaN \n",
"50% NaN NaN NaN NaN NaN \n",
"75% NaN NaN NaN NaN NaN \n",
"max NaN NaN NaN NaN NaN \n",
"\n",
" Inside/Outside Weapon Post District Neighborhood \\\n",
"count 263118 263118 263118.000000 263118 263118 \n",
"unique 4 5 NaN 9 278 \n",
"top i none NaN northeastern downtown \n",
"freq 131015 173175 NaN 40842 8701 \n",
"mean NaN NaN 0.536416 NaN NaN \n",
"std NaN NaN 0.276554 NaN NaN \n",
"min NaN NaN 0.117709 NaN NaN \n",
"25% NaN NaN 0.256628 NaN NaN \n",
"50% NaN NaN 0.541888 NaN NaN \n",
"75% NaN NaN 0.775186 NaN NaN \n",
"max NaN NaN 1.000000 NaN NaN \n",
"\n",
" Longitude Latitude Location 1 Premise \\\n",
"count 263118.000000 263118.000000 263118 263118 \n",
"unique NaN NaN 93543 118 \n",
"top NaN NaN (39.3180000000, -76.6582100000) street \n",
"freq NaN NaN 503 102544 \n",
"mean -76.617469 39.307456 NaN NaN \n",
"std 0.042220 0.029537 NaN NaN \n",
"min -76.711280 39.200410 NaN NaN \n",
"25% -76.648420 39.288340 NaN NaN \n",
"50% -76.614010 39.303680 NaN NaN \n",
"75% -76.587490 39.327890 NaN NaN \n",
"max -76.529770 39.371980 NaN NaN \n",
"\n",
" Total Incidents \n",
"count 263118.0 \n",
"unique NaN \n",
"top NaN \n",
"freq NaN \n",
"mean 1.0 \n",
"std 0.0 \n",
"min 1.0 \n",
"25% 1.0 \n",
"50% 1.0 \n",
"75% 1.0 \n",
"max 1.0 "
]
},
"execution_count": 118,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"baltimore.describe(include='all')"
]
},
{
"cell_type": "code",
"execution_count": 119,
"id": "06e5c943",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>CrimeDate</th>\n",
" <th>CrimeTime</th>\n",
" <th>CrimeCode</th>\n",
" <th>Location</th>\n",
" <th>Description</th>\n",
" <th>Inside/Outside</th>\n",
" <th>Weapon</th>\n",
" <th>Post</th>\n",
" <th>District</th>\n",
" <th>Neighborhood</th>\n",
" <th>Longitude</th>\n",
" <th>Latitude</th>\n",
" <th>Location 1</th>\n",
" <th>Premise</th>\n",
" <th>Total Incidents</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>26312</td>\n",
" <td>26312</td>\n",
" <td>26312</td>\n",
" <td>26312</td>\n",
" <td>26312</td>\n",
" <td>26312</td>\n",
" <td>26312</td>\n",
" <td>26312.000000</td>\n",
" <td>26312</td>\n",
" <td>26312</td>\n",
" <td>26312.000000</td>\n",
" <td>26312.000000</td>\n",
" <td>26312</td>\n",
" <td>26312</td>\n",
" <td>26312.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>unique</th>\n",
" <td>2071</td>\n",
" <td>1513</td>\n",
" <td>71</td>\n",
" <td>11180</td>\n",
" <td>15</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>NaN</td>\n",
" <td>9</td>\n",
" <td>276</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>18843</td>\n",
" <td>104</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>top</th>\n",
" <td>04/27/2015</td>\n",
" <td>18:00:00</td>\n",
" <td>4e</td>\n",
" <td>1500 russell st</td>\n",
" <td>larceny</td>\n",
" <td>i</td>\n",
" <td>none</td>\n",
" <td>NaN</td>\n",
" <td>northeastern</td>\n",
" <td>downtown</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>(39.3180000000, -76.6582100000)</td>\n",
" <td>street</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>freq</th>\n",
" <td>28</td>\n",
" <td>650</td>\n",
" <td>4357</td>\n",
" <td>56</td>\n",
" <td>5740</td>\n",
" <td>13248</td>\n",
" <td>17358</td>\n",
" <td>NaN</td>\n",
" <td>4137</td>\n",
" <td>853</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>49</td>\n",
" <td>10075</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.535663</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>-76.617518</td>\n",
" <td>39.307771</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.275572</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.042479</td>\n",
" <td>0.029477</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.117709</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>-76.711220</td>\n",
" <td>39.200470</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.257688</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>-76.648905</td>\n",
" <td>39.288490</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.541888</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>-76.614170</td>\n",
" <td>39.303850</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.766702</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>-76.587170</td>\n",
" <td>39.328290</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>-76.529770</td>\n",
" <td>39.371970</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" CrimeDate CrimeTime CrimeCode Location Description \\\n",
"count 26312 26312 26312 26312 26312 \n",
"unique 2071 1513 71 11180 15 \n",
"top 04/27/2015 18:00:00 4e 1500 russell st larceny \n",
"freq 28 650 4357 56 5740 \n",
"mean NaN NaN NaN NaN NaN \n",
"std NaN NaN NaN NaN NaN \n",
"min NaN NaN NaN NaN NaN \n",
"25% NaN NaN NaN NaN NaN \n",
"50% NaN NaN NaN NaN NaN \n",
"75% NaN NaN NaN NaN NaN \n",
"max NaN NaN NaN NaN NaN \n",
"\n",
" Inside/Outside Weapon Post District Neighborhood \\\n",
"count 26312 26312 26312.000000 26312 26312 \n",
"unique 4 5 NaN 9 276 \n",
"top i none NaN northeastern downtown \n",
"freq 13248 17358 NaN 4137 853 \n",
"mean NaN NaN 0.535663 NaN NaN \n",
"std NaN NaN 0.275572 NaN NaN \n",
"min NaN NaN 0.117709 NaN NaN \n",
"25% NaN NaN 0.257688 NaN NaN \n",
"50% NaN NaN 0.541888 NaN NaN \n",
"75% NaN NaN 0.766702 NaN NaN \n",
"max NaN NaN 1.000000 NaN NaN \n",
"\n",
" Longitude Latitude Location 1 Premise \\\n",
"count 26312.000000 26312.000000 26312 26312 \n",
"unique NaN NaN 18843 104 \n",
"top NaN NaN (39.3180000000, -76.6582100000) street \n",
"freq NaN NaN 49 10075 \n",
"mean -76.617518 39.307771 NaN NaN \n",
"std 0.042479 0.029477 NaN NaN \n",
"min -76.711220 39.200470 NaN NaN \n",
"25% -76.648905 39.288490 NaN NaN \n",
"50% -76.614170 39.303850 NaN NaN \n",
"75% -76.587170 39.328290 NaN NaN \n",
"max -76.529770 39.371970 NaN NaN \n",
"\n",
" Total Incidents \n",
"count 26312.0 \n",
"unique NaN \n",
"top NaN \n",
"freq NaN \n",
"mean 1.0 \n",
"std 0.0 \n",
"min 1.0 \n",
"25% 1.0 \n",
"50% 1.0 \n",
"75% 1.0 \n",
"max 1.0 "
]
},
"execution_count": 119,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"baltimore_test.describe(include='all')"
]
},
{
"cell_type": "code",
"execution_count": 120,
"id": "1566d1b1",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>CrimeDate</th>\n",
" <th>CrimeTime</th>\n",
" <th>CrimeCode</th>\n",
" <th>Location</th>\n",
" <th>Description</th>\n",
" <th>Inside/Outside</th>\n",
" <th>Weapon</th>\n",
" <th>Post</th>\n",
" <th>District</th>\n",
" <th>Neighborhood</th>\n",
" <th>Longitude</th>\n",
" <th>Latitude</th>\n",
" <th>Location 1</th>\n",
" <th>Premise</th>\n",
" <th>Total Incidents</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>177604</td>\n",
" <td>177604</td>\n",
" <td>177604</td>\n",
" <td>177604</td>\n",
" <td>177604</td>\n",
" <td>177604</td>\n",
" <td>177604</td>\n",
" <td>177604.000000</td>\n",
" <td>177604</td>\n",
" <td>177604</td>\n",
" <td>177604.000000</td>\n",
" <td>177604.000000</td>\n",
" <td>177604</td>\n",
" <td>177604</td>\n",
" <td>177604.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>unique</th>\n",
" <td>2072</td>\n",
" <td>2435</td>\n",
" <td>79</td>\n",
" <td>22781</td>\n",
" <td>15</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>NaN</td>\n",
" <td>9</td>\n",
" <td>278</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>74417</td>\n",
" <td>116</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>top</th>\n",
" <td>04/27/2015</td>\n",
" <td>18:00:00</td>\n",
" <td>4e</td>\n",
" <td>200 e pratt st</td>\n",
" <td>larceny</td>\n",
" <td>i</td>\n",
" <td>none</td>\n",
" <td>NaN</td>\n",
" <td>northeastern</td>\n",
" <td>downtown</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>(39.3180000000, -76.6582100000)</td>\n",
" <td>street</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>freq</th>\n",
" <td>298</td>\n",
" <td>4340</td>\n",
" <td>29065</td>\n",
" <td>440</td>\n",
" <td>39287</td>\n",
" <td>88319</td>\n",
" <td>116884</td>\n",
" <td>NaN</td>\n",
" <td>27451</td>\n",
" <td>5877</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>337</td>\n",
" <td>69325</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.536132</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>-76.617452</td>\n",
" <td>39.307395</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.276695</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.042192</td>\n",
" <td>0.029526</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.117709</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>-76.711280</td>\n",
" <td>39.200410</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.256628</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>-76.648290</td>\n",
" <td>39.288330</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.541888</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>-76.613990</td>\n",
" <td>39.303580</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.775186</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>-76.587500</td>\n",
" <td>39.327742</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>-76.529770</td>\n",
" <td>39.371970</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" CrimeDate CrimeTime CrimeCode Location Description \\\n",
"count 177604 177604 177604 177604 177604 \n",
"unique 2072 2435 79 22781 15 \n",
"top 04/27/2015 18:00:00 4e 200 e pratt st larceny \n",
"freq 298 4340 29065 440 39287 \n",
"mean NaN NaN NaN NaN NaN \n",
"std NaN NaN NaN NaN NaN \n",
"min NaN NaN NaN NaN NaN \n",
"25% NaN NaN NaN NaN NaN \n",
"50% NaN NaN NaN NaN NaN \n",
"75% NaN NaN NaN NaN NaN \n",
"max NaN NaN NaN NaN NaN \n",
"\n",
" Inside/Outside Weapon Post District Neighborhood \\\n",
"count 177604 177604 177604.000000 177604 177604 \n",
"unique 4 5 NaN 9 278 \n",
"top i none NaN northeastern downtown \n",
"freq 88319 116884 NaN 27451 5877 \n",
"mean NaN NaN 0.536132 NaN NaN \n",
"std NaN NaN 0.276695 NaN NaN \n",
"min NaN NaN 0.117709 NaN NaN \n",
"25% NaN NaN 0.256628 NaN NaN \n",
"50% NaN NaN 0.541888 NaN NaN \n",
"75% NaN NaN 0.775186 NaN NaN \n",
"max NaN NaN 1.000000 NaN NaN \n",
"\n",
" Longitude Latitude Location 1 Premise \\\n",
"count 177604.000000 177604.000000 177604 177604 \n",
"unique NaN NaN 74417 116 \n",
"top NaN NaN (39.3180000000, -76.6582100000) street \n",
"freq NaN NaN 337 69325 \n",
"mean -76.617452 39.307395 NaN NaN \n",
"std 0.042192 0.029526 NaN NaN \n",
"min -76.711280 39.200410 NaN NaN \n",
"25% -76.648290 39.288330 NaN NaN \n",
"50% -76.613990 39.303580 NaN NaN \n",
"75% -76.587500 39.327742 NaN NaN \n",
"max -76.529770 39.371970 NaN NaN \n",
"\n",
" Total Incidents \n",
"count 177604.0 \n",
"unique NaN \n",
"top NaN \n",
"freq NaN \n",
"mean 1.0 \n",
"std 0.0 \n",
"min 1.0 \n",
"25% 1.0 \n",
"50% 1.0 \n",
"75% 1.0 \n",
"max 1.0 "
]
},
"execution_count": 120,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"baltimore_train.describe(include='all')"
]
},
{
"cell_type": "code",
"execution_count": 121,
"id": "02e5bf0c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>CrimeDate</th>\n",
" <th>CrimeTime</th>\n",
" <th>CrimeCode</th>\n",
" <th>Location</th>\n",
" <th>Description</th>\n",
" <th>Inside/Outside</th>\n",
" <th>Weapon</th>\n",
" <th>Post</th>\n",
" <th>District</th>\n",
" <th>Neighborhood</th>\n",
" <th>Longitude</th>\n",
" <th>Latitude</th>\n",
" <th>Location 1</th>\n",
" <th>Premise</th>\n",
" <th>Total Incidents</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>59202</td>\n",
" <td>59202</td>\n",
" <td>59202</td>\n",
" <td>59202</td>\n",
" <td>59202</td>\n",
" <td>59202</td>\n",
" <td>59202</td>\n",
" <td>59202.000000</td>\n",
" <td>59202</td>\n",
" <td>59202</td>\n",
" <td>59202.000000</td>\n",
" <td>59202.000000</td>\n",
" <td>59202</td>\n",
" <td>59202</td>\n",
" <td>59202.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>unique</th>\n",
" <td>2070</td>\n",
" <td>1804</td>\n",
" <td>77</td>\n",
" <td>16050</td>\n",
" <td>15</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>NaN</td>\n",
" <td>9</td>\n",
" <td>276</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>35435</td>\n",
" <td>112</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>top</th>\n",
" <td>04/27/2015</td>\n",
" <td>18:00:00</td>\n",
" <td>4e</td>\n",
" <td>200 e pratt st</td>\n",
" <td>larceny</td>\n",
" <td>i</td>\n",
" <td>none</td>\n",
" <td>NaN</td>\n",
" <td>northeastern</td>\n",
" <td>downtown</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>(39.3180000000, -76.6582100000)</td>\n",
" <td>street</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>freq</th>\n",
" <td>81</td>\n",
" <td>1493</td>\n",
" <td>9671</td>\n",
" <td>140</td>\n",
" <td>13219</td>\n",
" <td>29448</td>\n",
" <td>38933</td>\n",
" <td>NaN</td>\n",
" <td>9254</td>\n",
" <td>1971</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>117</td>\n",
" <td>23144</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.537601</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>-76.617499</td>\n",
" <td>39.307502</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.276567</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.042191</td>\n",
" <td>0.029595</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.117709</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>-76.711270</td>\n",
" <td>39.202540</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.257688</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>-76.648500</td>\n",
" <td>39.288340</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.541888</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>-76.614020</td>\n",
" <td>39.303930</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.775186</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>-76.587592</td>\n",
" <td>39.328030</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>-76.529770</td>\n",
" <td>39.371980</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" CrimeDate CrimeTime CrimeCode Location Description \\\n",
"count 59202 59202 59202 59202 59202 \n",
"unique 2070 1804 77 16050 15 \n",
"top 04/27/2015 18:00:00 4e 200 e pratt st larceny \n",
"freq 81 1493 9671 140 13219 \n",
"mean NaN NaN NaN NaN NaN \n",
"std NaN NaN NaN NaN NaN \n",
"min NaN NaN NaN NaN NaN \n",
"25% NaN NaN NaN NaN NaN \n",
"50% NaN NaN NaN NaN NaN \n",
"75% NaN NaN NaN NaN NaN \n",
"max NaN NaN NaN NaN NaN \n",
"\n",
" Inside/Outside Weapon Post District Neighborhood \\\n",
"count 59202 59202 59202.000000 59202 59202 \n",
"unique 4 5 NaN 9 276 \n",
"top i none NaN northeastern downtown \n",
"freq 29448 38933 NaN 9254 1971 \n",
"mean NaN NaN 0.537601 NaN NaN \n",
"std NaN NaN 0.276567 NaN NaN \n",
"min NaN NaN 0.117709 NaN NaN \n",
"25% NaN NaN 0.257688 NaN NaN \n",
"50% NaN NaN 0.541888 NaN NaN \n",
"75% NaN NaN 0.775186 NaN NaN \n",
"max NaN NaN 1.000000 NaN NaN \n",
"\n",
" Longitude Latitude Location 1 Premise \\\n",
"count 59202.000000 59202.000000 59202 59202 \n",
"unique NaN NaN 35435 112 \n",
"top NaN NaN (39.3180000000, -76.6582100000) street \n",
"freq NaN NaN 117 23144 \n",
"mean -76.617499 39.307502 NaN NaN \n",
"std 0.042191 0.029595 NaN NaN \n",
"min -76.711270 39.202540 NaN NaN \n",
"25% -76.648500 39.288340 NaN NaN \n",
"50% -76.614020 39.303930 NaN NaN \n",
"75% -76.587592 39.328030 NaN NaN \n",
"max -76.529770 39.371980 NaN NaN \n",
"\n",
" Total Incidents \n",
"count 59202.0 \n",
"unique NaN \n",
"top NaN \n",
"freq NaN \n",
"mean 1.0 \n",
"std 0.0 \n",
"min 1.0 \n",
"25% 1.0 \n",
"50% 1.0 \n",
"75% 1.0 \n",
"max 1.0 "
]
},
"execution_count": 121,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"baltimore_val.describe(include='all')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}