{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "advanced-namibia", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: kaggle in /home/students/s444380/.local/lib/python3.7/site-packages (1.5.12)\n", "Requirement already satisfied: pandas in /usr/lib/python3/dist-packages (0.23.3+dfsg)\n", "Requirement already satisfied: seaborn in /usr/local/lib/python3.7/dist-packages (0.11.2)\n", "Requirement already satisfied: python-dateutil in /usr/local/lib/python3.7/dist-packages (from kaggle) (2.8.1)\n", "Requirement already satisfied: urllib3 in /usr/local/lib/python3.7/dist-packages (from kaggle) (1.26.3)\n", "Requirement already satisfied: certifi in /usr/local/lib/python3.7/dist-packages (from kaggle) (2020.12.5)\n", "Requirement already satisfied: python-slugify in /home/students/s444380/.local/lib/python3.7/site-packages (from kaggle) (6.1.1)\n", "Requirement already satisfied: tqdm in /usr/local/lib/python3.7/dist-packages (from kaggle) (4.59.0)\n", "Requirement already satisfied: six>=1.10 in /usr/local/lib/python3.7/dist-packages (from kaggle) (1.15.0)\n", "Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from kaggle) (2.25.1)\n", "Requirement already satisfied: scipy>=1.0 in /usr/local/lib/python3.7/dist-packages (from seaborn) (1.7.3)\n", "Requirement already satisfied: numpy>=1.15 in /usr/local/lib/python3.7/dist-packages (from seaborn) (1.21.5)\n", "Requirement already satisfied: matplotlib>=2.2 in /usr/lib/python3/dist-packages (from seaborn) (3.0.2)\n", "Requirement already satisfied: text-unidecode>=1.3 in /home/students/s444380/.local/lib/python3.7/site-packages (from python-slugify->kaggle) (1.3)\n", "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->kaggle) (2.10)\n", "Requirement already satisfied: chardet<5,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->kaggle) (4.0.0)\n" ] } ], "source": [ "!pip install --user kaggle pandas seaborn" ] }, { "cell_type": "code", "execution_count": 2, "id": "lasting-tomorrow", "metadata": {}, "outputs": [], "source": [ "# Wydziałowy jupyter z jakiegoś powodu nie rozpoznaje polecenia kaggle. Dane pobrałem ręcznie\n", "# !kaggle datasets download -d AnalyzeBoston/crimes-in-boston" ] }, { "cell_type": "code", "execution_count": 2, "id": "awful-excerpt", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Archive: archive.zip\n", " inflating: crime.csv \n", " inflating: offense_codes.csv \n" ] } ], "source": [ "!unzip -o archive.zip" ] }, { "cell_type": "code", "execution_count": 3, "id": "stylish-singer", "metadata": {}, "outputs": [], "source": [ "!iconv -f \"windows-1252\" -t \"UTF-8\" crime.csv > crime_conv.csv" ] }, { "cell_type": "code", "execution_count": 4, "id": "excessive-refrigerator", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "INCIDENT_NUMBER,OFFENSE_CODE,OFFENSE_CODE_GROUP,OFFENSE_DESCRIPTION,DISTRICT,REPORTING_AREA,SHOOTING,OCCURRED_ON_DATE,YEAR,MONTH,DAY_OF_WEEK,HOUR,UCR_PART,STREET,Lat,Long,Location\n", "I182070945,00619,Larceny,LARCENY ALL OTHERS,D14,808,,2018-09-02 13:00:00,2018,9,Sunday,13,Part One,LINCOLN ST,42.35779134,-71.13937053,\"(42.35779134, -71.13937053)\"\n", "I182070943,01402,Vandalism,VANDALISM,C11,347,,2018-08-21 00:00:00,2018,8,Tuesday,0,Part Two,HECLA ST,42.30682138,-71.06030035,\"(42.30682138, -71.06030035)\"\n", "I182070941,03410,Towed,TOWED MOTOR VEHICLE,D4,151,,2018-09-03 19:27:00,2018,9,Monday,19,Part Three,CAZENOVE ST,42.34658879,-71.07242943,\"(42.34658879, -71.07242943)\"\n", "I182070940,03114,Investigate Property,INVESTIGATE PROPERTY,D4,272,,2018-09-03 21:16:00,2018,9,Monday,21,Part Three,NEWCOMB ST,42.33418175,-71.07866441,\"(42.33418175, -71.07866441)\"\n" ] } ], "source": [ "!head -n 5 crime_conv.csv" ] }, { "cell_type": "code", "execution_count": 5, "id": "genetic-switch", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
INCIDENT_NUMBEROFFENSE_CODEOFFENSE_CODE_GROUPOFFENSE_DESCRIPTIONDISTRICTREPORTING_AREASHOOTINGOCCURRED_ON_DATEYEARMONTHDAY_OF_WEEKHOURUCR_PARTSTREETLatLongLocation
0I182070945619LarcenyLARCENY ALL OTHERSD14808NaN2018-09-02 13:00:0020189Sunday13Part OneLINCOLN ST42.357791-71.139371(42.35779134, -71.13937053)
1I1820709431402VandalismVANDALISMC11347NaN2018-08-21 00:00:0020188Tuesday0Part TwoHECLA ST42.306821-71.060300(42.30682138, -71.06030035)
2I1820709413410TowedTOWED MOTOR VEHICLED4151NaN2018-09-03 19:27:0020189Monday19Part ThreeCAZENOVE ST42.346589-71.072429(42.34658879, -71.07242943)
3I1820709403114Investigate PropertyINVESTIGATE PROPERTYD4272NaN2018-09-03 21:16:0020189Monday21Part ThreeNEWCOMB ST42.334182-71.078664(42.33418175, -71.07866441)
4I1820709383114Investigate PropertyINVESTIGATE PROPERTYB3421NaN2018-09-03 21:05:0020189Monday21Part ThreeDELHI ST42.275365-71.090361(42.27536542, -71.09036101)
5I1820709363820Motor Vehicle Accident ResponseM/V ACCIDENT INVOLVING PEDESTRIAN - INJURYC11398NaN2018-09-03 21:09:0020189Monday21Part ThreeTALBOT AVE42.290196-71.071590(42.29019621, -71.07159012)
6I182070933724Auto TheftAUTO THEFTB2330NaN2018-09-03 21:25:0020189Monday21Part OneNORMANDY ST42.306072-71.082733(42.30607218, -71.08273260)
7I1820709323301Verbal DisputesVERBAL DISPUTEB2584NaN2018-09-03 20:39:3720189Monday20Part ThreeLAWN ST42.327016-71.105551(42.32701648, -71.10555088)
8I182070931301RobberyROBBERY - STREETC6177NaN2018-09-03 20:48:0020189Monday20Part OneMASSACHUSETTS AVE42.331521-71.070853(42.33152148, -71.07085307)
9I1820709293301Verbal DisputesVERBAL DISPUTEC11364NaN2018-09-03 20:38:0020189Monday20Part ThreeLESLIE ST42.295147-71.058608(42.29514664, -71.05860832)
10I1820709283301Verbal DisputesVERBAL DISPUTEC6913NaN2018-09-03 19:55:0020189Monday19Part ThreeOCEAN VIEW DR42.319579-71.040328(42.31957856, -71.04032766)
11I1820709273114Investigate PropertyINVESTIGATE PROPERTYC6936NaN2018-09-03 20:19:0020189Monday20Part ThreeDALESSIO CT42.340115-71.053390(42.34011469, -71.05339029)
12I1820709233108Fire Related ReportsFIRE REPORT - HOUSE, BUILDING, ETC.D4139NaN2018-09-03 19:58:0020189Monday19Part ThreeMARLBOROUGH ST42.350388-71.087853(42.35038760, -71.08785290)
13I1820709222647OtherTHREATS TO DO BODILY HARMB3429NaN2018-09-03 20:39:0020189Monday20Part TwoWOODROW AVE42.286470-71.087147(42.28647012, -71.08714661)
14I1820709213201Property LostPROPERTY - LOSTB3469NaN2018-09-02 14:00:0020189Sunday14Part ThreeMULVEY ST42.279241-71.096674(42.27924052, -71.09667382)
15I1820709203006Medical AssistanceSICK/INJURED/MEDICAL - PERSONNaNNaN2018-09-03 19:43:0020189Monday19Part ThreeNaN42.352875-71.073830(42.35287456, -71.07382970)
16I1820709193301Verbal DisputesVERBAL DISPUTEC11341NaN2018-09-03 18:52:0020189Monday18Part ThreeSTONEHURST ST42.305264-71.066838(42.30526428, -71.06683755)
17I1820709183305Assembly or Gathering ViolationsDEMONSTRATIONS/RIOTD4130NaN2018-09-03 17:00:0020189Monday17Part ThreeHUNTINGTON AVE42.348577-71.077720(42.34857652, -71.07772012)
18I1820709172647OtherTHREATS TO DO BODILY HARMB2901NaN2018-09-03 19:52:0020189Monday19Part TwoHORADAN WAY42.333717-71.096658(42.33371742, -71.09665806)
19I182070915614Larceny From Motor VehicleLARCENY THEFT FROM MV - NON-ACCESSORYB2181NaN2018-09-02 18:00:0020189Sunday18Part OneSHIRLEY ST42.325695-71.068168(42.32569490, -71.06816778)
20I1820709133006Medical AssistanceSICK/INJURED/MEDICAL - PERSONNaNNaN2018-09-03 18:46:0020189Monday18Part ThreeWOLCOTT-1.000000-1.000000(-1.00000000, -1.00000000)
21I1820709113801Motor Vehicle Accident ResponseM/V ACCIDENT - OTHERA169NaN2018-09-03 18:30:0020189Monday18Part ThreeBEACON ST42.355644-71.071681(42.35564426, -71.07168077)
22I1820709103006Medical AssistanceSICK/INJURED/MEDICAL - PERSONB3434NaN2018-09-03 18:42:0020189Monday18Part ThreeCAPEN ST42.283402-71.080797(42.28340243, -71.08079740)
23I1820709093803Motor Vehicle Accident ResponseM/V ACCIDENT - PERSONAL INJURYE5550NaN2018-09-03 18:33:0020189Monday18Part ThreeWASHINGTON ST42.275818-71.139913(42.27581799, -71.13991259)
24I182070908522Residential BurglaryBURGLARY - RESIDENTIAL - NO FORCEB2911NaN2018-09-03 18:38:0020189Monday18Part OneANNUNCIATION RD42.335062-71.093168(42.33506218, -71.09316781)
25I1820709063831Motor Vehicle Accident ResponseM/V - LEAVING SCENE - PROPERTY DAMAGENaNNaN2018-09-03 18:20:0020189Monday18Part ThreeNaN42.283593-71.055657(42.28359328, -71.05565683)
26I1820709053006Medical AssistanceSICK/INJURED/MEDICAL - PERSOND4172NaN2018-09-03 18:50:0020189Monday18Part ThreeMASSACHUSETTS AVE42.333112-71.072764(42.33311189, -71.07276370)
27I182070904802Simple AssaultASSAULT SIMPLE - BATTERYC11242NaN2018-09-03 18:34:0020189Monday18Part TwoANNAPOLIS ST42.317319-71.061509(42.31731905, -71.06150882)
28I1820709042007Restraining Order ViolationsVIOL. OF RESTRAINING ORDER W NO ARRESTC11242NaN2018-09-03 18:34:0020189Monday18Part TwoANNAPOLIS ST42.317319-71.061509(42.31731905, -71.06150882)
29I1820709032900OtherVAL - VIOLATION OF AUTO LAW - OTHERB3463NaN2018-09-03 18:55:0020189Monday18Part TwoBLUE HILL AVE42.295904-71.087733(42.29590385, -71.08773294)
......................................................
319043I110551302-003125Warrant ArrestsWARRANT ARRESTD4171NaN2015-07-22 22:00:0020157Wednesday22Part ThreeHARRISON AVE42.335560-71.074364(42.33555954, -71.07436364)
319044I110551302-00623LarcenyLARCENY SHOPLIFTING $50 TO $199D4171NaN2015-07-22 22:00:0020157Wednesday22Part OneHARRISON AVE42.335560-71.074364(42.33555954, -71.07436364)
319045I110372326-00403Aggravated AssaultASSAULT & BATTERY D/W - OTHERA197NaN2016-06-14 09:40:0020166Tuesday9Part OneSCHOOL ST42.357428-71.058326(42.35742837, -71.05832551)
319046I110372326-003125Warrant ArrestsWARRANT ARRESTA197NaN2016-06-14 09:40:0020166Tuesday9Part ThreeSCHOOL ST42.357428-71.058326(42.35742837, -71.05832551)
319047I110261417-003125Warrant ArrestsWARRANT ARRESTB2324NaN2016-07-29 00:00:0020167Friday0Part ThreeBOWDOIN ST42.307038-71.066153(42.30703835, -71.06615319)
319048I110261417-00619LarcenyLARCENY OTHER $200 & OVERB2324NaN2016-07-29 00:00:0020167Friday0Part OneBOWDOIN ST42.307038-71.066153(42.30703835, -71.06615319)
319049I110177502-003125Warrant ArrestsWARRANT ARRESTB2318NaN2015-10-02 21:00:00201510Friday21Part ThreeHOMESTEAD ST42.311277-71.089093(42.31127726, -71.08909334)
319050I110177502-00802Simple AssaultASSAULT & BATTERYB2318NaN2015-10-02 21:00:00201510Friday21Part TwoHOMESTEAD ST42.311277-71.089093(42.31127726, -71.08909334)
319051I110177502-003125Warrant ArrestsWARRANT ARRESTB2318NaN2015-10-02 21:00:00201510Friday21Part ThreeHOMESTEAD ST42.311277-71.089093(42.31127726, -71.08909334)
319052I100636670-00629LarcenyLARCENY OTHER $50 TO $199D4285NaN2016-06-05 17:23:0020166Sunday17Part OneCOVENTRY ST42.336951-71.085748(42.33695098, -71.08574813)
319053I100636670-003125Warrant ArrestsWARRANT ARRESTD4285NaN2016-06-05 17:23:0020166Sunday17Part ThreeCOVENTRY ST42.336951-71.085748(42.33695098, -71.08574813)
319054I100340225-003125Warrant ArrestsWARRANT ARRESTA177NaN2015-07-27 10:47:0020157Monday10Part ThreeBOWDOIN SQ42.361645-71.062299(42.36164502, -71.06229949)
319055I100340225-00339RobberyROBBERY - UNARMED - STREETA177NaN2015-07-27 10:47:0020157Monday10Part OneBOWDOIN SQ42.361645-71.062299(42.36164502, -71.06229949)
319056I100222105-023125Warrant ArrestsWARRANT ARRESTE13572NaN2015-08-03 16:22:0020158Monday16Part ThreeCOLUMBUS AVE42.313628-71.095603(42.31362799, -71.09560307)
319057I100033064-002907ViolationsVAL - OPERATING AFTER REV/SUSP.B2304NaN2016-07-29 18:20:0020167Friday18Part TwoSLAYTON WAY42.321770-71.097798(42.32177032, -71.09779774)
319058I100033064-002910ViolationsVAL - OPERATING AFTER REV/SUSP.B2304NaN2016-07-29 18:20:0020167Friday18Part TwoSLAYTON WAY42.321770-71.097798(42.32177032, -71.09779774)
319059I090321958-003125Warrant ArrestsWARRANT ARRESTC11355NaN2016-02-01 01:43:0020162Monday1Part ThreeGENEVA AVENaNNaN(0.00000000, 0.00000000)
319060I090321958-003125Warrant ArrestsWARRANT ARRESTC11355NaN2016-02-01 01:43:0020162Monday1Part ThreeGENEVA AVENaNNaN(0.00000000, 0.00000000)
319061I090317057-00403Aggravated AssaultASSAULT & BATTERY D/W - OTHERB3458NaN2015-11-20 11:15:00201511Friday11Part OneBLUE HILL AVE42.301897-71.085549(42.30189690, -71.08554944)
319062I090317057-003125Warrant ArrestsWARRANT ARRESTB3458NaN2015-11-20 11:15:00201511Friday11Part ThreeBLUE HILL AVE42.301897-71.085549(42.30189690, -71.08554944)
319063I080542626-003125Warrant ArrestsWARRANT ARRESTA1111NaN2015-08-12 12:00:0020158Wednesday12Part ThreeBOYLSTON ST42.352312-71.063705(42.35231190, -71.06370510)
319064I080542626-001848Drug ViolationDRUGS - POSS CLASS B - INTENT TO MFR DIST DISPA1111NaN2015-08-12 12:00:0020158Wednesday12Part TwoBOYLSTON ST42.352312-71.063705(42.35231190, -71.06370510)
319065I080542626-001849Drug ViolationDRUGS - POSS CLASS B - COCAINE, ETC.A1111NaN2015-08-12 12:00:0020158Wednesday12Part TwoBOYLSTON ST42.352312-71.063705(42.35231190, -71.06370510)
319066I060168073-001864Drug ViolationDRUGS - POSS CLASS D - INTENT MFR DIST DISPE13912NaN2018-01-27 14:01:0020181Saturday14Part TwoCENTRE ST42.322838-71.100967(42.32283759, -71.10096723)
319067I060168073-003125Warrant ArrestsWARRANT ARRESTE13912NaN2018-01-27 14:01:0020181Saturday14Part ThreeCENTRE ST42.322838-71.100967(42.32283759, -71.10096723)
319068I050310906-003125Warrant ArrestsWARRANT ARRESTD4285NaN2016-06-05 17:25:0020166Sunday17Part ThreeCOVENTRY ST42.336951-71.085748(42.33695098, -71.08574813)
319069I030217815-08111HomicideMURDER, NON-NEGLIGIENT MANSLAUGHTERE18520NaN2015-07-09 13:38:0020157Thursday13Part OneRIVER ST42.255926-71.123172(42.25592648, -71.12317207)
319070I030217815-083125Warrant ArrestsWARRANT ARRESTE18520NaN2015-07-09 13:38:0020157Thursday13Part ThreeRIVER ST42.255926-71.123172(42.25592648, -71.12317207)
319071I010370257-003125Warrant ArrestsWARRANT ARRESTE13569NaN2016-05-31 19:35:0020165Tuesday19Part ThreeNEW WASHINGTON ST42.302333-71.111565(42.30233307, -71.11156487)
3190721420525503125Warrant ArrestsWARRANT ARRESTD4903NaN2015-06-22 00:12:0020156Monday0Part ThreeWASHINGTON ST42.333839-71.080290(42.33383935, -71.08029038)
\n", "

319073 rows × 17 columns

\n", "
" ], "text/plain": [ " INCIDENT_NUMBER OFFENSE_CODE OFFENSE_CODE_GROUP \\\n", "0 I182070945 619 Larceny \n", "1 I182070943 1402 Vandalism \n", "2 I182070941 3410 Towed \n", "3 I182070940 3114 Investigate Property \n", "4 I182070938 3114 Investigate Property \n", "5 I182070936 3820 Motor Vehicle Accident Response \n", "6 I182070933 724 Auto Theft \n", "7 I182070932 3301 Verbal Disputes \n", "8 I182070931 301 Robbery \n", "9 I182070929 3301 Verbal Disputes \n", "10 I182070928 3301 Verbal Disputes \n", "11 I182070927 3114 Investigate Property \n", "12 I182070923 3108 Fire Related Reports \n", "13 I182070922 2647 Other \n", "14 I182070921 3201 Property Lost \n", "15 I182070920 3006 Medical Assistance \n", "16 I182070919 3301 Verbal Disputes \n", "17 I182070918 3305 Assembly or Gathering Violations \n", "18 I182070917 2647 Other \n", "19 I182070915 614 Larceny From Motor Vehicle \n", "20 I182070913 3006 Medical Assistance \n", "21 I182070911 3801 Motor Vehicle Accident Response \n", "22 I182070910 3006 Medical Assistance \n", "23 I182070909 3803 Motor Vehicle Accident Response \n", "24 I182070908 522 Residential Burglary \n", "25 I182070906 3831 Motor Vehicle Accident Response \n", "26 I182070905 3006 Medical Assistance \n", "27 I182070904 802 Simple Assault \n", "28 I182070904 2007 Restraining Order Violations \n", "29 I182070903 2900 Other \n", "... ... ... ... \n", "319043 I110551302-00 3125 Warrant Arrests \n", "319044 I110551302-00 623 Larceny \n", "319045 I110372326-00 403 Aggravated Assault \n", "319046 I110372326-00 3125 Warrant Arrests \n", "319047 I110261417-00 3125 Warrant Arrests \n", "319048 I110261417-00 619 Larceny \n", "319049 I110177502-00 3125 Warrant Arrests \n", "319050 I110177502-00 802 Simple Assault \n", "319051 I110177502-00 3125 Warrant Arrests \n", "319052 I100636670-00 629 Larceny \n", "319053 I100636670-00 3125 Warrant Arrests \n", "319054 I100340225-00 3125 Warrant Arrests \n", "319055 I100340225-00 339 Robbery \n", "319056 I100222105-02 3125 Warrant Arrests \n", "319057 I100033064-00 2907 Violations \n", "319058 I100033064-00 2910 Violations \n", "319059 I090321958-00 3125 Warrant Arrests \n", "319060 I090321958-00 3125 Warrant Arrests \n", "319061 I090317057-00 403 Aggravated Assault \n", "319062 I090317057-00 3125 Warrant Arrests \n", "319063 I080542626-00 3125 Warrant Arrests \n", "319064 I080542626-00 1848 Drug Violation \n", "319065 I080542626-00 1849 Drug Violation \n", "319066 I060168073-00 1864 Drug Violation \n", "319067 I060168073-00 3125 Warrant Arrests \n", "319068 I050310906-00 3125 Warrant Arrests \n", "319069 I030217815-08 111 Homicide \n", "319070 I030217815-08 3125 Warrant Arrests \n", "319071 I010370257-00 3125 Warrant Arrests \n", "319072 142052550 3125 Warrant Arrests \n", "\n", " OFFENSE_DESCRIPTION DISTRICT \\\n", "0 LARCENY ALL OTHERS D14 \n", "1 VANDALISM C11 \n", "2 TOWED MOTOR VEHICLE D4 \n", "3 INVESTIGATE PROPERTY D4 \n", "4 INVESTIGATE PROPERTY B3 \n", "5 M/V ACCIDENT INVOLVING PEDESTRIAN - INJURY C11 \n", "6 AUTO THEFT B2 \n", "7 VERBAL DISPUTE B2 \n", "8 ROBBERY - STREET C6 \n", "9 VERBAL DISPUTE C11 \n", "10 VERBAL DISPUTE C6 \n", "11 INVESTIGATE PROPERTY C6 \n", "12 FIRE REPORT - HOUSE, BUILDING, ETC. D4 \n", "13 THREATS TO DO BODILY HARM B3 \n", "14 PROPERTY - LOST B3 \n", "15 SICK/INJURED/MEDICAL - PERSON NaN \n", "16 VERBAL DISPUTE C11 \n", "17 DEMONSTRATIONS/RIOT D4 \n", "18 THREATS TO DO BODILY HARM B2 \n", "19 LARCENY THEFT FROM MV - NON-ACCESSORY B2 \n", "20 SICK/INJURED/MEDICAL - PERSON NaN \n", "21 M/V ACCIDENT - OTHER A1 \n", "22 SICK/INJURED/MEDICAL - PERSON B3 \n", "23 M/V ACCIDENT - PERSONAL INJURY E5 \n", "24 BURGLARY - RESIDENTIAL - NO FORCE B2 \n", "25 M/V - LEAVING SCENE - PROPERTY DAMAGE NaN \n", "26 SICK/INJURED/MEDICAL - PERSON D4 \n", "27 ASSAULT SIMPLE - BATTERY C11 \n", "28 VIOL. OF RESTRAINING ORDER W NO ARREST C11 \n", "29 VAL - VIOLATION OF AUTO LAW - OTHER B3 \n", "... ... ... \n", "319043 WARRANT ARREST D4 \n", "319044 LARCENY SHOPLIFTING $50 TO $199 D4 \n", "319045 ASSAULT & BATTERY D/W - OTHER A1 \n", "319046 WARRANT ARREST A1 \n", "319047 WARRANT ARREST B2 \n", "319048 LARCENY OTHER $200 & OVER B2 \n", "319049 WARRANT ARREST B2 \n", "319050 ASSAULT & BATTERY B2 \n", "319051 WARRANT ARREST B2 \n", "319052 LARCENY OTHER $50 TO $199 D4 \n", "319053 WARRANT ARREST D4 \n", "319054 WARRANT ARREST A1 \n", "319055 ROBBERY - UNARMED - STREET A1 \n", "319056 WARRANT ARREST E13 \n", "319057 VAL - OPERATING AFTER REV/SUSP. B2 \n", "319058 VAL - OPERATING AFTER REV/SUSP. B2 \n", "319059 WARRANT ARREST C11 \n", "319060 WARRANT ARREST C11 \n", "319061 ASSAULT & BATTERY D/W - OTHER B3 \n", "319062 WARRANT ARREST B3 \n", "319063 WARRANT ARREST A1 \n", "319064 DRUGS - POSS CLASS B - INTENT TO MFR DIST DISP A1 \n", "319065 DRUGS - POSS CLASS B - COCAINE, ETC. A1 \n", "319066 DRUGS - POSS CLASS D - INTENT MFR DIST DISP E13 \n", "319067 WARRANT ARREST E13 \n", "319068 WARRANT ARREST D4 \n", "319069 MURDER, NON-NEGLIGIENT MANSLAUGHTER E18 \n", "319070 WARRANT ARREST E18 \n", "319071 WARRANT ARREST E13 \n", "319072 WARRANT ARREST D4 \n", "\n", " REPORTING_AREA SHOOTING OCCURRED_ON_DATE YEAR MONTH DAY_OF_WEEK \\\n", "0 808 NaN 2018-09-02 13:00:00 2018 9 Sunday \n", "1 347 NaN 2018-08-21 00:00:00 2018 8 Tuesday \n", "2 151 NaN 2018-09-03 19:27:00 2018 9 Monday \n", "3 272 NaN 2018-09-03 21:16:00 2018 9 Monday \n", "4 421 NaN 2018-09-03 21:05:00 2018 9 Monday \n", "5 398 NaN 2018-09-03 21:09:00 2018 9 Monday \n", "6 330 NaN 2018-09-03 21:25:00 2018 9 Monday \n", "7 584 NaN 2018-09-03 20:39:37 2018 9 Monday \n", "8 177 NaN 2018-09-03 20:48:00 2018 9 Monday \n", "9 364 NaN 2018-09-03 20:38:00 2018 9 Monday \n", "10 913 NaN 2018-09-03 19:55:00 2018 9 Monday \n", "11 936 NaN 2018-09-03 20:19:00 2018 9 Monday \n", "12 139 NaN 2018-09-03 19:58:00 2018 9 Monday \n", "13 429 NaN 2018-09-03 20:39:00 2018 9 Monday \n", "14 469 NaN 2018-09-02 14:00:00 2018 9 Sunday \n", "15 NaN 2018-09-03 19:43:00 2018 9 Monday \n", "16 341 NaN 2018-09-03 18:52:00 2018 9 Monday \n", "17 130 NaN 2018-09-03 17:00:00 2018 9 Monday \n", "18 901 NaN 2018-09-03 19:52:00 2018 9 Monday \n", "19 181 NaN 2018-09-02 18:00:00 2018 9 Sunday \n", "20 NaN 2018-09-03 18:46:00 2018 9 Monday \n", "21 69 NaN 2018-09-03 18:30:00 2018 9 Monday \n", "22 434 NaN 2018-09-03 18:42:00 2018 9 Monday \n", "23 550 NaN 2018-09-03 18:33:00 2018 9 Monday \n", "24 911 NaN 2018-09-03 18:38:00 2018 9 Monday \n", "25 NaN 2018-09-03 18:20:00 2018 9 Monday \n", "26 172 NaN 2018-09-03 18:50:00 2018 9 Monday \n", "27 242 NaN 2018-09-03 18:34:00 2018 9 Monday \n", "28 242 NaN 2018-09-03 18:34:00 2018 9 Monday \n", "29 463 NaN 2018-09-03 18:55:00 2018 9 Monday \n", "... ... ... ... ... ... ... \n", "319043 171 NaN 2015-07-22 22:00:00 2015 7 Wednesday \n", "319044 171 NaN 2015-07-22 22:00:00 2015 7 Wednesday \n", "319045 97 NaN 2016-06-14 09:40:00 2016 6 Tuesday \n", "319046 97 NaN 2016-06-14 09:40:00 2016 6 Tuesday \n", "319047 324 NaN 2016-07-29 00:00:00 2016 7 Friday \n", "319048 324 NaN 2016-07-29 00:00:00 2016 7 Friday \n", "319049 318 NaN 2015-10-02 21:00:00 2015 10 Friday \n", "319050 318 NaN 2015-10-02 21:00:00 2015 10 Friday \n", "319051 318 NaN 2015-10-02 21:00:00 2015 10 Friday \n", "319052 285 NaN 2016-06-05 17:23:00 2016 6 Sunday \n", "319053 285 NaN 2016-06-05 17:23:00 2016 6 Sunday \n", "319054 77 NaN 2015-07-27 10:47:00 2015 7 Monday \n", "319055 77 NaN 2015-07-27 10:47:00 2015 7 Monday \n", "319056 572 NaN 2015-08-03 16:22:00 2015 8 Monday \n", "319057 304 NaN 2016-07-29 18:20:00 2016 7 Friday \n", "319058 304 NaN 2016-07-29 18:20:00 2016 7 Friday \n", "319059 355 NaN 2016-02-01 01:43:00 2016 2 Monday \n", "319060 355 NaN 2016-02-01 01:43:00 2016 2 Monday \n", "319061 458 NaN 2015-11-20 11:15:00 2015 11 Friday \n", "319062 458 NaN 2015-11-20 11:15:00 2015 11 Friday \n", "319063 111 NaN 2015-08-12 12:00:00 2015 8 Wednesday \n", "319064 111 NaN 2015-08-12 12:00:00 2015 8 Wednesday \n", "319065 111 NaN 2015-08-12 12:00:00 2015 8 Wednesday \n", "319066 912 NaN 2018-01-27 14:01:00 2018 1 Saturday \n", "319067 912 NaN 2018-01-27 14:01:00 2018 1 Saturday \n", "319068 285 NaN 2016-06-05 17:25:00 2016 6 Sunday \n", "319069 520 NaN 2015-07-09 13:38:00 2015 7 Thursday \n", "319070 520 NaN 2015-07-09 13:38:00 2015 7 Thursday \n", "319071 569 NaN 2016-05-31 19:35:00 2016 5 Tuesday \n", "319072 903 NaN 2015-06-22 00:12:00 2015 6 Monday \n", "\n", " HOUR UCR_PART STREET Lat Long \\\n", "0 13 Part One LINCOLN ST 42.357791 -71.139371 \n", "1 0 Part Two HECLA ST 42.306821 -71.060300 \n", "2 19 Part Three CAZENOVE ST 42.346589 -71.072429 \n", "3 21 Part Three NEWCOMB ST 42.334182 -71.078664 \n", "4 21 Part Three DELHI ST 42.275365 -71.090361 \n", "5 21 Part Three TALBOT AVE 42.290196 -71.071590 \n", "6 21 Part One NORMANDY ST 42.306072 -71.082733 \n", "7 20 Part Three LAWN ST 42.327016 -71.105551 \n", "8 20 Part One MASSACHUSETTS AVE 42.331521 -71.070853 \n", "9 20 Part Three LESLIE ST 42.295147 -71.058608 \n", "10 19 Part Three OCEAN VIEW DR 42.319579 -71.040328 \n", "11 20 Part Three DALESSIO CT 42.340115 -71.053390 \n", "12 19 Part Three MARLBOROUGH ST 42.350388 -71.087853 \n", "13 20 Part Two WOODROW AVE 42.286470 -71.087147 \n", "14 14 Part Three MULVEY ST 42.279241 -71.096674 \n", "15 19 Part Three NaN 42.352875 -71.073830 \n", "16 18 Part Three STONEHURST ST 42.305264 -71.066838 \n", "17 17 Part Three HUNTINGTON AVE 42.348577 -71.077720 \n", "18 19 Part Two HORADAN WAY 42.333717 -71.096658 \n", "19 18 Part One SHIRLEY ST 42.325695 -71.068168 \n", "20 18 Part Three WOLCOTT -1.000000 -1.000000 \n", "21 18 Part Three BEACON ST 42.355644 -71.071681 \n", "22 18 Part Three CAPEN ST 42.283402 -71.080797 \n", "23 18 Part Three WASHINGTON ST 42.275818 -71.139913 \n", "24 18 Part One ANNUNCIATION RD 42.335062 -71.093168 \n", "25 18 Part Three NaN 42.283593 -71.055657 \n", "26 18 Part Three MASSACHUSETTS AVE 42.333112 -71.072764 \n", "27 18 Part Two ANNAPOLIS ST 42.317319 -71.061509 \n", "28 18 Part Two ANNAPOLIS ST 42.317319 -71.061509 \n", "29 18 Part Two BLUE HILL AVE 42.295904 -71.087733 \n", "... ... ... ... ... ... \n", "319043 22 Part Three HARRISON AVE 42.335560 -71.074364 \n", "319044 22 Part One HARRISON AVE 42.335560 -71.074364 \n", "319045 9 Part One SCHOOL ST 42.357428 -71.058326 \n", "319046 9 Part Three SCHOOL ST 42.357428 -71.058326 \n", "319047 0 Part Three BOWDOIN ST 42.307038 -71.066153 \n", "319048 0 Part One BOWDOIN ST 42.307038 -71.066153 \n", "319049 21 Part Three HOMESTEAD ST 42.311277 -71.089093 \n", "319050 21 Part Two HOMESTEAD ST 42.311277 -71.089093 \n", "319051 21 Part Three HOMESTEAD ST 42.311277 -71.089093 \n", "319052 17 Part One COVENTRY ST 42.336951 -71.085748 \n", "319053 17 Part Three COVENTRY ST 42.336951 -71.085748 \n", "319054 10 Part Three BOWDOIN SQ 42.361645 -71.062299 \n", "319055 10 Part One BOWDOIN SQ 42.361645 -71.062299 \n", "319056 16 Part Three COLUMBUS AVE 42.313628 -71.095603 \n", "319057 18 Part Two SLAYTON WAY 42.321770 -71.097798 \n", "319058 18 Part Two SLAYTON WAY 42.321770 -71.097798 \n", "319059 1 Part Three GENEVA AVE NaN NaN \n", "319060 1 Part Three GENEVA AVE NaN NaN \n", "319061 11 Part One BLUE HILL AVE 42.301897 -71.085549 \n", "319062 11 Part Three BLUE HILL AVE 42.301897 -71.085549 \n", "319063 12 Part Three BOYLSTON ST 42.352312 -71.063705 \n", "319064 12 Part Two BOYLSTON ST 42.352312 -71.063705 \n", "319065 12 Part Two BOYLSTON ST 42.352312 -71.063705 \n", "319066 14 Part Two CENTRE ST 42.322838 -71.100967 \n", "319067 14 Part Three CENTRE ST 42.322838 -71.100967 \n", "319068 17 Part Three COVENTRY ST 42.336951 -71.085748 \n", "319069 13 Part One RIVER ST 42.255926 -71.123172 \n", "319070 13 Part Three RIVER ST 42.255926 -71.123172 \n", "319071 19 Part Three NEW WASHINGTON ST 42.302333 -71.111565 \n", "319072 0 Part Three WASHINGTON ST 42.333839 -71.080290 \n", "\n", " Location \n", "0 (42.35779134, -71.13937053) \n", "1 (42.30682138, -71.06030035) \n", "2 (42.34658879, -71.07242943) \n", "3 (42.33418175, -71.07866441) \n", "4 (42.27536542, -71.09036101) \n", "5 (42.29019621, -71.07159012) \n", "6 (42.30607218, -71.08273260) \n", "7 (42.32701648, -71.10555088) \n", "8 (42.33152148, -71.07085307) \n", "9 (42.29514664, -71.05860832) \n", "10 (42.31957856, -71.04032766) \n", "11 (42.34011469, -71.05339029) \n", "12 (42.35038760, -71.08785290) \n", "13 (42.28647012, -71.08714661) \n", "14 (42.27924052, -71.09667382) \n", "15 (42.35287456, -71.07382970) \n", "16 (42.30526428, -71.06683755) \n", "17 (42.34857652, -71.07772012) \n", "18 (42.33371742, -71.09665806) \n", "19 (42.32569490, -71.06816778) \n", "20 (-1.00000000, -1.00000000) \n", "21 (42.35564426, -71.07168077) \n", "22 (42.28340243, -71.08079740) \n", "23 (42.27581799, -71.13991259) \n", "24 (42.33506218, -71.09316781) \n", "25 (42.28359328, -71.05565683) \n", "26 (42.33311189, -71.07276370) \n", "27 (42.31731905, -71.06150882) \n", "28 (42.31731905, -71.06150882) \n", "29 (42.29590385, -71.08773294) \n", "... ... \n", "319043 (42.33555954, -71.07436364) \n", "319044 (42.33555954, -71.07436364) \n", "319045 (42.35742837, -71.05832551) \n", "319046 (42.35742837, -71.05832551) \n", "319047 (42.30703835, -71.06615319) \n", "319048 (42.30703835, -71.06615319) \n", "319049 (42.31127726, -71.08909334) \n", "319050 (42.31127726, -71.08909334) \n", "319051 (42.31127726, -71.08909334) \n", "319052 (42.33695098, -71.08574813) \n", "319053 (42.33695098, -71.08574813) \n", "319054 (42.36164502, -71.06229949) \n", "319055 (42.36164502, -71.06229949) \n", "319056 (42.31362799, -71.09560307) \n", "319057 (42.32177032, -71.09779774) \n", "319058 (42.32177032, -71.09779774) \n", "319059 (0.00000000, 0.00000000) \n", "319060 (0.00000000, 0.00000000) \n", "319061 (42.30189690, -71.08554944) \n", "319062 (42.30189690, -71.08554944) \n", "319063 (42.35231190, -71.06370510) \n", "319064 (42.35231190, -71.06370510) \n", "319065 (42.35231190, -71.06370510) \n", "319066 (42.32283759, -71.10096723) \n", "319067 (42.32283759, -71.10096723) \n", "319068 (42.33695098, -71.08574813) \n", "319069 (42.25592648, -71.12317207) \n", "319070 (42.25592648, -71.12317207) \n", "319071 (42.30233307, -71.11156487) \n", "319072 (42.33383935, -71.08029038) \n", "\n", "[319073 rows x 17 columns]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "crime = pd.read_csv('crime_conv.csv')\n", "crime" ] }, { "cell_type": "code", "execution_count": 6, "id": "instant-monitor", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
INCIDENT_NUMBEROFFENSE_CODEOFFENSE_CODE_GROUPOFFENSE_DESCRIPTIONDISTRICTREPORTING_AREASHOOTINGOCCURRED_ON_DATEYEARMONTHDAY_OF_WEEKHOURUCR_PARTSTREETLatLongLocation
count319073319073.0000003190733190733173083190731019319073319073.000000319073.000000319073319073.000000318983308202299074.000000299074.000000319073
unique282517NaN67244128791233229NaNNaN7NaN44657NaNNaN18194
topI162030584NaNMotor Vehicle Accident ResponseSICK/INJURED/MEDICAL - PERSONB2Y2017-06-01 00:00:00NaNNaNFridayNaNPart ThreeWASHINGTON STNaNNaN(0.00000000, 0.00000000)
freq13NaN37132187834994520250101929NaNNaN48495NaN15855314194NaNNaN19999
meanNaN2317.546956NaNNaNNaNNaNNaNNaN2016.5605866.609719NaN13.118205NaNNaN42.214381-70.908272NaN
stdNaN1185.285543NaNNaNNaNNaNNaNNaN0.9963443.273691NaN6.294205NaNNaN2.1597663.493618NaN
minNaN111.000000NaNNaNNaNNaNNaNNaN2015.0000001.000000NaN0.000000NaNNaN-1.000000-71.178674NaN
25%NaN1001.000000NaNNaNNaNNaNNaNNaN2016.0000004.000000NaN9.000000NaNNaN42.297442-71.097135NaN
50%NaN2907.000000NaNNaNNaNNaNNaNNaN2017.0000007.000000NaN14.000000NaNNaN42.325538-71.077524NaN
75%NaN3201.000000NaNNaNNaNNaNNaNNaN2017.0000009.000000NaN18.000000NaNNaN42.348624-71.062467NaN
maxNaN3831.000000NaNNaNNaNNaNNaNNaN2018.00000012.000000NaN23.000000NaNNaN42.395042-1.000000NaN
\n", "
" ], "text/plain": [ " INCIDENT_NUMBER OFFENSE_CODE OFFENSE_CODE_GROUP \\\n", "count 319073 319073.000000 319073 \n", "unique 282517 NaN 67 \n", "top I162030584 NaN Motor Vehicle Accident Response \n", "freq 13 NaN 37132 \n", "mean NaN 2317.546956 NaN \n", "std NaN 1185.285543 NaN \n", "min NaN 111.000000 NaN \n", "25% NaN 1001.000000 NaN \n", "50% NaN 2907.000000 NaN \n", "75% NaN 3201.000000 NaN \n", "max NaN 3831.000000 NaN \n", "\n", " OFFENSE_DESCRIPTION DISTRICT REPORTING_AREA SHOOTING \\\n", "count 319073 317308 319073 1019 \n", "unique 244 12 879 1 \n", "top SICK/INJURED/MEDICAL - PERSON B2 Y \n", "freq 18783 49945 20250 1019 \n", "mean NaN NaN NaN NaN \n", "std NaN NaN NaN NaN \n", "min NaN NaN NaN NaN \n", "25% NaN NaN NaN NaN \n", "50% NaN NaN NaN NaN \n", "75% NaN NaN NaN NaN \n", "max NaN NaN NaN NaN \n", "\n", " OCCURRED_ON_DATE YEAR MONTH DAY_OF_WEEK \\\n", "count 319073 319073.000000 319073.000000 319073 \n", "unique 233229 NaN NaN 7 \n", "top 2017-06-01 00:00:00 NaN NaN Friday \n", "freq 29 NaN NaN 48495 \n", "mean NaN 2016.560586 6.609719 NaN \n", "std NaN 0.996344 3.273691 NaN \n", "min NaN 2015.000000 1.000000 NaN \n", "25% NaN 2016.000000 4.000000 NaN \n", "50% NaN 2017.000000 7.000000 NaN \n", "75% NaN 2017.000000 9.000000 NaN \n", "max NaN 2018.000000 12.000000 NaN \n", "\n", " HOUR UCR_PART STREET Lat \\\n", "count 319073.000000 318983 308202 299074.000000 \n", "unique NaN 4 4657 NaN \n", "top NaN Part Three WASHINGTON ST NaN \n", "freq NaN 158553 14194 NaN \n", "mean 13.118205 NaN NaN 42.214381 \n", "std 6.294205 NaN NaN 2.159766 \n", "min 0.000000 NaN NaN -1.000000 \n", "25% 9.000000 NaN NaN 42.297442 \n", "50% 14.000000 NaN NaN 42.325538 \n", "75% 18.000000 NaN NaN 42.348624 \n", "max 23.000000 NaN NaN 42.395042 \n", "\n", " Long Location \n", "count 299074.000000 319073 \n", "unique NaN 18194 \n", "top NaN (0.00000000, 0.00000000) \n", "freq NaN 19999 \n", "mean -70.908272 NaN \n", "std 3.493618 NaN \n", "min -71.178674 NaN \n", "25% -71.097135 NaN \n", "50% -71.077524 NaN \n", "75% -71.062467 NaN \n", "max -1.000000 NaN " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "crime.describe(include=\"all\")" ] }, { "cell_type": "code", "execution_count": 9, "id": "still-sweet", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
INCIDENT_NUMBEROFFENSE_CODEOFFENSE_CODE_GROUPOFFENSE_DESCRIPTIONDISTRICTREPORTING_AREASHOOTINGOCCURRED_ON_DATEYEARMONTHDAY_OF_WEEKHOURUCR_PARTSTREETLatLong
0I182070945619larcenylarceny all othersD14808N2018-09-02 13:00:0020189sunday13part onelincoln st42.357791-71.139371
1I1820709431402vandalismvandalismC11347N2018-08-21 00:00:0020188tuesday0part twohecla st42.306821-71.060300
2I1820709413410towedtowed motor vehicleD4151N2018-09-03 19:27:0020189monday19part threecazenove st42.346589-71.072429
3I1820709403114investigate propertyinvestigate propertyD4272N2018-09-03 21:16:0020189monday21part threenewcomb st42.334182-71.078664
4I1820709383114investigate propertyinvestigate propertyB3421N2018-09-03 21:05:0020189monday21part threedelhi st42.275365-71.090361
5I1820709363820motor vehicle accident responsem/v accident involving pedestrian - injuryC11398N2018-09-03 21:09:0020189monday21part threetalbot ave42.290196-71.071590
6I182070933724auto theftauto theftB2330N2018-09-03 21:25:0020189monday21part onenormandy st42.306072-71.082733
7I1820709323301verbal disputesverbal disputeB2584N2018-09-03 20:39:3720189monday20part threelawn st42.327016-71.105551
8I182070931301robberyrobbery - streetC6177N2018-09-03 20:48:0020189monday20part onemassachusetts ave42.331521-71.070853
9I1820709293301verbal disputesverbal disputeC11364N2018-09-03 20:38:0020189monday20part threeleslie st42.295147-71.058608
10I1820709283301verbal disputesverbal disputeC6913N2018-09-03 19:55:0020189monday19part threeocean view dr42.319579-71.040328
11I1820709273114investigate propertyinvestigate propertyC6936N2018-09-03 20:19:0020189monday20part threedalessio ct42.340115-71.053390
12I1820709233108fire related reportsfire report - house, building, etc.D4139N2018-09-03 19:58:0020189monday19part threemarlborough st42.350388-71.087853
13I1820709222647otherthreats to do bodily harmB3429N2018-09-03 20:39:0020189monday20part twowoodrow ave42.286470-71.087147
14I1820709213201property lostproperty - lostB3469N2018-09-02 14:00:0020189sunday14part threemulvey st42.279241-71.096674
16I1820709193301verbal disputesverbal disputeC11341N2018-09-03 18:52:0020189monday18part threestonehurst st42.305264-71.066838
17I1820709183305assembly or gathering violationsdemonstrations/riotD4130N2018-09-03 17:00:0020189monday17part threehuntington ave42.348577-71.077720
18I1820709172647otherthreats to do bodily harmB2901N2018-09-03 19:52:0020189monday19part twohoradan way42.333717-71.096658
19I182070915614larceny from motor vehiclelarceny theft from mv - non-accessoryB2181N2018-09-02 18:00:0020189sunday18part oneshirley st42.325695-71.068168
21I1820709113801motor vehicle accident responsem/v accident - otherA169N2018-09-03 18:30:0020189monday18part threebeacon st42.355644-71.071681
22I1820709103006medical assistancesick/injured/medical - personB3434N2018-09-03 18:42:0020189monday18part threecapen st42.283402-71.080797
23I1820709093803motor vehicle accident responsem/v accident - personal injuryE5550N2018-09-03 18:33:0020189monday18part threewashington st42.275818-71.139913
24I182070908522residential burglaryburglary - residential - no forceB2911N2018-09-03 18:38:0020189monday18part oneannunciation rd42.335062-71.093168
26I1820709053006medical assistancesick/injured/medical - personD4172N2018-09-03 18:50:0020189monday18part threemassachusetts ave42.333112-71.072764
27I182070904802simple assaultassault simple - batteryC11242N2018-09-03 18:34:0020189monday18part twoannapolis st42.317319-71.061509
28I1820709042007restraining order violationsviol. of restraining order w no arrestC11242N2018-09-03 18:34:0020189monday18part twoannapolis st42.317319-71.061509
29I1820709032900otherval - violation of auto law - otherB3463N2018-09-03 18:55:0020189monday18part twoblue hill ave42.295904-71.087733
30I1820709012907violationsval - operating after rev/susp.B3428N2018-09-03 18:41:0020189monday18part twoclarkwood st42.280137-71.090798
31I1820709002629harassmentharassmentB3464N2018-09-03 18:17:0020189monday18part twohansborough st42.288104-71.091533
32I182070898802simple assaultassault simple - batteryC11351N2018-09-03 19:11:0020189monday19part twosalisbury park42.299284-71.059172
...................................................
319040I110694557-003125warrant arrestswarrant arrestB3436N2016-01-22 09:45:0020161friday9part threewithington st42.288767-71.072897
319041I110694557-003115investigate personinvestigate personB3436N2016-01-22 09:45:0020161friday9part threewithington st42.288767-71.072897
319043I110551302-003125warrant arrestswarrant arrestD4171N2015-07-22 22:00:0020157wednesday22part threeharrison ave42.335560-71.074364
319044I110551302-00623larcenylarceny shoplifting $50 to $199D4171N2015-07-22 22:00:0020157wednesday22part oneharrison ave42.335560-71.074364
319045I110372326-00403aggravated assaultassault & battery d/w - otherA197N2016-06-14 09:40:0020166tuesday9part oneschool st42.357428-71.058326
319046I110372326-003125warrant arrestswarrant arrestA197N2016-06-14 09:40:0020166tuesday9part threeschool st42.357428-71.058326
319047I110261417-003125warrant arrestswarrant arrestB2324N2016-07-29 00:00:0020167friday0part threebowdoin st42.307038-71.066153
319048I110261417-00619larcenylarceny other $200 & overB2324N2016-07-29 00:00:0020167friday0part onebowdoin st42.307038-71.066153
319049I110177502-003125warrant arrestswarrant arrestB2318N2015-10-02 21:00:00201510friday21part threehomestead st42.311277-71.089093
319050I110177502-00802simple assaultassault & batteryB2318N2015-10-02 21:00:00201510friday21part twohomestead st42.311277-71.089093
319051I110177502-003125warrant arrestswarrant arrestB2318N2015-10-02 21:00:00201510friday21part threehomestead st42.311277-71.089093
319052I100636670-00629larcenylarceny other $50 to $199D4285N2016-06-05 17:23:0020166sunday17part onecoventry st42.336951-71.085748
319053I100636670-003125warrant arrestswarrant arrestD4285N2016-06-05 17:23:0020166sunday17part threecoventry st42.336951-71.085748
319054I100340225-003125warrant arrestswarrant arrestA177N2015-07-27 10:47:0020157monday10part threebowdoin sq42.361645-71.062299
319055I100340225-00339robberyrobbery - unarmed - streetA177N2015-07-27 10:47:0020157monday10part onebowdoin sq42.361645-71.062299
319056I100222105-023125warrant arrestswarrant arrestE13572N2015-08-03 16:22:0020158monday16part threecolumbus ave42.313628-71.095603
319057I100033064-002907violationsval - operating after rev/susp.B2304N2016-07-29 18:20:0020167friday18part twoslayton way42.321770-71.097798
319058I100033064-002910violationsval - operating after rev/susp.B2304N2016-07-29 18:20:0020167friday18part twoslayton way42.321770-71.097798
319061I090317057-00403aggravated assaultassault & battery d/w - otherB3458N2015-11-20 11:15:00201511friday11part oneblue hill ave42.301897-71.085549
319062I090317057-003125warrant arrestswarrant arrestB3458N2015-11-20 11:15:00201511friday11part threeblue hill ave42.301897-71.085549
319063I080542626-003125warrant arrestswarrant arrestA1111N2015-08-12 12:00:0020158wednesday12part threeboylston st42.352312-71.063705
319064I080542626-001848drug violationdrugs - poss class b - intent to mfr dist dispA1111N2015-08-12 12:00:0020158wednesday12part twoboylston st42.352312-71.063705
319065I080542626-001849drug violationdrugs - poss class b - cocaine, etc.A1111N2015-08-12 12:00:0020158wednesday12part twoboylston st42.352312-71.063705
319066I060168073-001864drug violationdrugs - poss class d - intent mfr dist dispE13912N2018-01-27 14:01:0020181saturday14part twocentre st42.322838-71.100967
319067I060168073-003125warrant arrestswarrant arrestE13912N2018-01-27 14:01:0020181saturday14part threecentre st42.322838-71.100967
319068I050310906-003125warrant arrestswarrant arrestD4285N2016-06-05 17:25:0020166sunday17part threecoventry st42.336951-71.085748
319069I030217815-08111homicidemurder, non-negligient manslaughterE18520N2015-07-09 13:38:0020157thursday13part oneriver st42.255926-71.123172
319070I030217815-083125warrant arrestswarrant arrestE18520N2015-07-09 13:38:0020157thursday13part threeriver st42.255926-71.123172
319071I010370257-003125warrant arrestswarrant arrestE13569N2016-05-31 19:35:0020165tuesday19part threenew washington st42.302333-71.111565
3190721420525503125warrant arrestswarrant arrestD4903N2015-06-22 00:12:0020156monday0part threewashington st42.333839-71.080290
\n", "

296421 rows × 16 columns

\n", "
" ], "text/plain": [ " INCIDENT_NUMBER OFFENSE_CODE OFFENSE_CODE_GROUP \\\n", "0 I182070945 619 larceny \n", "1 I182070943 1402 vandalism \n", "2 I182070941 3410 towed \n", "3 I182070940 3114 investigate property \n", "4 I182070938 3114 investigate property \n", "5 I182070936 3820 motor vehicle accident response \n", "6 I182070933 724 auto theft \n", "7 I182070932 3301 verbal disputes \n", "8 I182070931 301 robbery \n", "9 I182070929 3301 verbal disputes \n", "10 I182070928 3301 verbal disputes \n", "11 I182070927 3114 investigate property \n", "12 I182070923 3108 fire related reports \n", "13 I182070922 2647 other \n", "14 I182070921 3201 property lost \n", "16 I182070919 3301 verbal disputes \n", "17 I182070918 3305 assembly or gathering violations \n", "18 I182070917 2647 other \n", "19 I182070915 614 larceny from motor vehicle \n", "21 I182070911 3801 motor vehicle accident response \n", "22 I182070910 3006 medical assistance \n", "23 I182070909 3803 motor vehicle accident response \n", "24 I182070908 522 residential burglary \n", "26 I182070905 3006 medical assistance \n", "27 I182070904 802 simple assault \n", "28 I182070904 2007 restraining order violations \n", "29 I182070903 2900 other \n", "30 I182070901 2907 violations \n", "31 I182070900 2629 harassment \n", "32 I182070898 802 simple assault \n", "... ... ... ... \n", "319040 I110694557-00 3125 warrant arrests \n", "319041 I110694557-00 3115 investigate person \n", "319043 I110551302-00 3125 warrant arrests \n", "319044 I110551302-00 623 larceny \n", "319045 I110372326-00 403 aggravated assault \n", "319046 I110372326-00 3125 warrant arrests \n", "319047 I110261417-00 3125 warrant arrests \n", "319048 I110261417-00 619 larceny \n", "319049 I110177502-00 3125 warrant arrests \n", "319050 I110177502-00 802 simple assault \n", "319051 I110177502-00 3125 warrant arrests \n", "319052 I100636670-00 629 larceny \n", "319053 I100636670-00 3125 warrant arrests \n", "319054 I100340225-00 3125 warrant arrests \n", "319055 I100340225-00 339 robbery \n", "319056 I100222105-02 3125 warrant arrests \n", "319057 I100033064-00 2907 violations \n", "319058 I100033064-00 2910 violations \n", "319061 I090317057-00 403 aggravated assault \n", "319062 I090317057-00 3125 warrant arrests \n", "319063 I080542626-00 3125 warrant arrests \n", "319064 I080542626-00 1848 drug violation \n", "319065 I080542626-00 1849 drug violation \n", "319066 I060168073-00 1864 drug violation \n", "319067 I060168073-00 3125 warrant arrests \n", "319068 I050310906-00 3125 warrant arrests \n", "319069 I030217815-08 111 homicide \n", "319070 I030217815-08 3125 warrant arrests \n", "319071 I010370257-00 3125 warrant arrests \n", "319072 142052550 3125 warrant arrests \n", "\n", " OFFENSE_DESCRIPTION DISTRICT \\\n", "0 larceny all others D14 \n", "1 vandalism C11 \n", "2 towed motor vehicle D4 \n", "3 investigate property D4 \n", "4 investigate property B3 \n", "5 m/v accident involving pedestrian - injury C11 \n", "6 auto theft B2 \n", "7 verbal dispute B2 \n", "8 robbery - street C6 \n", "9 verbal dispute C11 \n", "10 verbal dispute C6 \n", "11 investigate property C6 \n", "12 fire report - house, building, etc. D4 \n", "13 threats to do bodily harm B3 \n", "14 property - lost B3 \n", "16 verbal dispute C11 \n", "17 demonstrations/riot D4 \n", "18 threats to do bodily harm B2 \n", "19 larceny theft from mv - non-accessory B2 \n", "21 m/v accident - other A1 \n", "22 sick/injured/medical - person B3 \n", "23 m/v accident - personal injury E5 \n", "24 burglary - residential - no force B2 \n", "26 sick/injured/medical - person D4 \n", "27 assault simple - battery C11 \n", "28 viol. of restraining order w no arrest C11 \n", "29 val - violation of auto law - other B3 \n", "30 val - operating after rev/susp. B3 \n", "31 harassment B3 \n", "32 assault simple - battery C11 \n", "... ... ... \n", "319040 warrant arrest B3 \n", "319041 investigate person B3 \n", "319043 warrant arrest D4 \n", "319044 larceny shoplifting $50 to $199 D4 \n", "319045 assault & battery d/w - other A1 \n", "319046 warrant arrest A1 \n", "319047 warrant arrest B2 \n", "319048 larceny other $200 & over B2 \n", "319049 warrant arrest B2 \n", "319050 assault & battery B2 \n", "319051 warrant arrest B2 \n", "319052 larceny other $50 to $199 D4 \n", "319053 warrant arrest D4 \n", "319054 warrant arrest A1 \n", "319055 robbery - unarmed - street A1 \n", "319056 warrant arrest E13 \n", "319057 val - operating after rev/susp. B2 \n", "319058 val - operating after rev/susp. B2 \n", "319061 assault & battery d/w - other B3 \n", "319062 warrant arrest B3 \n", "319063 warrant arrest A1 \n", "319064 drugs - poss class b - intent to mfr dist disp A1 \n", "319065 drugs - poss class b - cocaine, etc. A1 \n", "319066 drugs - poss class d - intent mfr dist disp E13 \n", "319067 warrant arrest E13 \n", "319068 warrant arrest D4 \n", "319069 murder, non-negligient manslaughter E18 \n", "319070 warrant arrest E18 \n", "319071 warrant arrest E13 \n", "319072 warrant arrest D4 \n", "\n", " REPORTING_AREA SHOOTING OCCURRED_ON_DATE YEAR MONTH DAY_OF_WEEK \\\n", "0 808 N 2018-09-02 13:00:00 2018 9 sunday \n", "1 347 N 2018-08-21 00:00:00 2018 8 tuesday \n", "2 151 N 2018-09-03 19:27:00 2018 9 monday \n", "3 272 N 2018-09-03 21:16:00 2018 9 monday \n", "4 421 N 2018-09-03 21:05:00 2018 9 monday \n", "5 398 N 2018-09-03 21:09:00 2018 9 monday \n", "6 330 N 2018-09-03 21:25:00 2018 9 monday \n", "7 584 N 2018-09-03 20:39:37 2018 9 monday \n", "8 177 N 2018-09-03 20:48:00 2018 9 monday \n", "9 364 N 2018-09-03 20:38:00 2018 9 monday \n", "10 913 N 2018-09-03 19:55:00 2018 9 monday \n", "11 936 N 2018-09-03 20:19:00 2018 9 monday \n", "12 139 N 2018-09-03 19:58:00 2018 9 monday \n", "13 429 N 2018-09-03 20:39:00 2018 9 monday \n", "14 469 N 2018-09-02 14:00:00 2018 9 sunday \n", "16 341 N 2018-09-03 18:52:00 2018 9 monday \n", "17 130 N 2018-09-03 17:00:00 2018 9 monday \n", "18 901 N 2018-09-03 19:52:00 2018 9 monday \n", "19 181 N 2018-09-02 18:00:00 2018 9 sunday \n", "21 69 N 2018-09-03 18:30:00 2018 9 monday \n", "22 434 N 2018-09-03 18:42:00 2018 9 monday \n", "23 550 N 2018-09-03 18:33:00 2018 9 monday \n", "24 911 N 2018-09-03 18:38:00 2018 9 monday \n", "26 172 N 2018-09-03 18:50:00 2018 9 monday \n", "27 242 N 2018-09-03 18:34:00 2018 9 monday \n", "28 242 N 2018-09-03 18:34:00 2018 9 monday \n", "29 463 N 2018-09-03 18:55:00 2018 9 monday \n", "30 428 N 2018-09-03 18:41:00 2018 9 monday \n", "31 464 N 2018-09-03 18:17:00 2018 9 monday \n", "32 351 N 2018-09-03 19:11:00 2018 9 monday \n", "... ... ... ... ... ... ... \n", "319040 436 N 2016-01-22 09:45:00 2016 1 friday \n", "319041 436 N 2016-01-22 09:45:00 2016 1 friday \n", "319043 171 N 2015-07-22 22:00:00 2015 7 wednesday \n", "319044 171 N 2015-07-22 22:00:00 2015 7 wednesday \n", "319045 97 N 2016-06-14 09:40:00 2016 6 tuesday \n", "319046 97 N 2016-06-14 09:40:00 2016 6 tuesday \n", "319047 324 N 2016-07-29 00:00:00 2016 7 friday \n", "319048 324 N 2016-07-29 00:00:00 2016 7 friday \n", "319049 318 N 2015-10-02 21:00:00 2015 10 friday \n", "319050 318 N 2015-10-02 21:00:00 2015 10 friday \n", "319051 318 N 2015-10-02 21:00:00 2015 10 friday \n", "319052 285 N 2016-06-05 17:23:00 2016 6 sunday \n", "319053 285 N 2016-06-05 17:23:00 2016 6 sunday \n", "319054 77 N 2015-07-27 10:47:00 2015 7 monday \n", "319055 77 N 2015-07-27 10:47:00 2015 7 monday \n", "319056 572 N 2015-08-03 16:22:00 2015 8 monday \n", "319057 304 N 2016-07-29 18:20:00 2016 7 friday \n", "319058 304 N 2016-07-29 18:20:00 2016 7 friday \n", "319061 458 N 2015-11-20 11:15:00 2015 11 friday \n", "319062 458 N 2015-11-20 11:15:00 2015 11 friday \n", "319063 111 N 2015-08-12 12:00:00 2015 8 wednesday \n", "319064 111 N 2015-08-12 12:00:00 2015 8 wednesday \n", "319065 111 N 2015-08-12 12:00:00 2015 8 wednesday \n", "319066 912 N 2018-01-27 14:01:00 2018 1 saturday \n", "319067 912 N 2018-01-27 14:01:00 2018 1 saturday \n", "319068 285 N 2016-06-05 17:25:00 2016 6 sunday \n", "319069 520 N 2015-07-09 13:38:00 2015 7 thursday \n", "319070 520 N 2015-07-09 13:38:00 2015 7 thursday \n", "319071 569 N 2016-05-31 19:35:00 2016 5 tuesday \n", "319072 903 N 2015-06-22 00:12:00 2015 6 monday \n", "\n", " HOUR UCR_PART STREET Lat Long \n", "0 13 part one lincoln st 42.357791 -71.139371 \n", "1 0 part two hecla st 42.306821 -71.060300 \n", "2 19 part three cazenove st 42.346589 -71.072429 \n", "3 21 part three newcomb st 42.334182 -71.078664 \n", "4 21 part three delhi st 42.275365 -71.090361 \n", "5 21 part three talbot ave 42.290196 -71.071590 \n", "6 21 part one normandy st 42.306072 -71.082733 \n", "7 20 part three lawn st 42.327016 -71.105551 \n", "8 20 part one massachusetts ave 42.331521 -71.070853 \n", "9 20 part three leslie st 42.295147 -71.058608 \n", "10 19 part three ocean view dr 42.319579 -71.040328 \n", "11 20 part three dalessio ct 42.340115 -71.053390 \n", "12 19 part three marlborough st 42.350388 -71.087853 \n", "13 20 part two woodrow ave 42.286470 -71.087147 \n", "14 14 part three mulvey st 42.279241 -71.096674 \n", "16 18 part three stonehurst st 42.305264 -71.066838 \n", "17 17 part three huntington ave 42.348577 -71.077720 \n", "18 19 part two horadan way 42.333717 -71.096658 \n", "19 18 part one shirley st 42.325695 -71.068168 \n", "21 18 part three beacon st 42.355644 -71.071681 \n", "22 18 part three capen st 42.283402 -71.080797 \n", "23 18 part three washington st 42.275818 -71.139913 \n", "24 18 part one annunciation rd 42.335062 -71.093168 \n", "26 18 part three massachusetts ave 42.333112 -71.072764 \n", "27 18 part two annapolis st 42.317319 -71.061509 \n", "28 18 part two annapolis st 42.317319 -71.061509 \n", "29 18 part two blue hill ave 42.295904 -71.087733 \n", "30 18 part two clarkwood st 42.280137 -71.090798 \n", "31 18 part two hansborough st 42.288104 -71.091533 \n", "32 19 part two salisbury park 42.299284 -71.059172 \n", "... ... ... ... ... ... \n", "319040 9 part three withington st 42.288767 -71.072897 \n", "319041 9 part three withington st 42.288767 -71.072897 \n", "319043 22 part three harrison ave 42.335560 -71.074364 \n", "319044 22 part one harrison ave 42.335560 -71.074364 \n", "319045 9 part one school st 42.357428 -71.058326 \n", "319046 9 part three school st 42.357428 -71.058326 \n", "319047 0 part three bowdoin st 42.307038 -71.066153 \n", "319048 0 part one bowdoin st 42.307038 -71.066153 \n", "319049 21 part three homestead st 42.311277 -71.089093 \n", "319050 21 part two homestead st 42.311277 -71.089093 \n", "319051 21 part three homestead st 42.311277 -71.089093 \n", "319052 17 part one coventry st 42.336951 -71.085748 \n", "319053 17 part three coventry st 42.336951 -71.085748 \n", "319054 10 part three bowdoin sq 42.361645 -71.062299 \n", "319055 10 part one bowdoin sq 42.361645 -71.062299 \n", "319056 16 part three columbus ave 42.313628 -71.095603 \n", "319057 18 part two slayton way 42.321770 -71.097798 \n", "319058 18 part two slayton way 42.321770 -71.097798 \n", "319061 11 part one blue hill ave 42.301897 -71.085549 \n", "319062 11 part three blue hill ave 42.301897 -71.085549 \n", "319063 12 part three boylston st 42.352312 -71.063705 \n", "319064 12 part two boylston st 42.352312 -71.063705 \n", "319065 12 part two boylston st 42.352312 -71.063705 \n", "319066 14 part two centre st 42.322838 -71.100967 \n", "319067 14 part three centre st 42.322838 -71.100967 \n", "319068 17 part three coventry st 42.336951 -71.085748 \n", "319069 13 part one river st 42.255926 -71.123172 \n", "319070 13 part three river st 42.255926 -71.123172 \n", "319071 19 part three new washington st 42.302333 -71.111565 \n", "319072 0 part three washington st 42.333839 -71.080290 \n", "\n", "[296421 rows x 16 columns]" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Większość danych o strzelaninach jest pusta więc zakładam, że są to inceydenty bez strzelanin\n", "crime[\"SHOOTING\"].fillna(\"N\", inplace=True)\n", "\n", "# Kolumna location powtarza wartości z Lat i Long\n", "crime.drop(columns=[\"Location\"], inplace=True)\n", "\n", "# Usuwam błędne/brakujące wartości współrzędnych\n", "crime = crime[(crime[\"Lat\"] > 35) & (crime[\"Long\"] < -65)]\n", "\n", "# Lowercase na polach tekstowych\n", "crime[\"OFFENSE_CODE_GROUP\"] = crime[\"OFFENSE_CODE_GROUP\"].str.lower()\n", "crime[\"OFFENSE_DESCRIPTION\"] = crime[\"OFFENSE_DESCRIPTION\"].str.lower()\n", "crime[\"DAY_OF_WEEK\"] = crime[\"DAY_OF_WEEK\"].str.lower()\n", "crime[\"UCR_PART\"] = crime[\"UCR_PART\"].str.lower()\n", "crime[\"STREET\"] = crime[\"STREET\"].str.lower()\n", "\n", "# Usuwam pozostałe wiersze zawierające nulle\n", "crime.dropna()" ] }, { "cell_type": "code", "execution_count": 10, "id": "lucky-compression", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: scikit-learn in /usr/lib/python3/dist-packages (0.20.2)\n", "Note: you may need to restart the kernel to use updated packages.\n" ] } ], "source": [ "pip install --user scikit-learn" ] }, { "cell_type": "code", "execution_count": 11, "id": "further-asset", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(238329, 16)\n", "(30000, 16)\n", "(30000, 16)\n" ] } ], "source": [ "from sklearn.model_selection import train_test_split\n", "\n", "# Zbiór jest całkiem duży - 300k wierszy po oczyszczeniu, więc wybieram podział 8:1:1, czyli w przybliżeniu 30k na dev i test\n", "crime_train, crime_test = train_test_split(crime, test_size=60000, random_state=1)\n", "crime_test, crime_dev = train_test_split(crime_test, test_size=30000, random_state=1)\n", "print(crime_train.shape)\n", "print(crime_test.shape)\n", "print(crime_dev.shape)" ] }, { "cell_type": "code", "execution_count": 12, "id": "united-belly", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
INCIDENT_NUMBEROFFENSE_CODEOFFENSE_CODE_GROUPOFFENSE_DESCRIPTIONDISTRICTREPORTING_AREASHOOTINGOCCURRED_ON_DATEYEARMONTHDAY_OF_WEEKHOURUCR_PARTSTREETLatLong
count238329238329.000000238329238329237585238329238329238329238329.000000238329.000000238329238329.000000238253236876238329.000000238329.000000
unique215646NaN66233128782183782NaNNaN7NaN43760NaNNaN
topI162030584NaNmotor vehicle accident responsesick/injured/medical - personB2111N2016-08-01 00:00:00NaNNaNfridayNaNpart threewashington stNaNNaN
freq11NaN247831438036780185123757522NaNNaN36273NaN11749211305NaNNaN
meanNaN2296.198717NaNNaNNaNNaNNaNNaN2016.5517546.615175NaN13.129107NaNNaN42.322310-71.082836
stdNaN1182.831284NaNNaNNaNNaNNaNNaN1.0010313.277604NaN6.278188NaNNaN0.0318910.029754
minNaN111.000000NaNNaNNaNNaNNaNNaN2015.0000001.000000NaN0.000000NaNNaN42.232413-71.178674
25%NaN802.000000NaNNaNNaNNaNNaNNaN2016.0000004.000000NaN9.000000NaNNaN42.297555-71.097193
50%NaN2907.000000NaNNaNNaNNaNNaNNaN2017.0000007.000000NaN14.000000NaNNaN42.325629-71.077551
75%NaN3201.000000NaNNaNNaNNaNNaNNaN2017.0000009.000000NaN18.000000NaNNaN42.348624-71.062563
maxNaN3831.000000NaNNaNNaNNaNNaNNaN2018.00000012.000000NaN23.000000NaNNaN42.395042-70.963676
\n", "
" ], "text/plain": [ " INCIDENT_NUMBER OFFENSE_CODE OFFENSE_CODE_GROUP \\\n", "count 238329 238329.000000 238329 \n", "unique 215646 NaN 66 \n", "top I162030584 NaN motor vehicle accident response \n", "freq 11 NaN 24783 \n", "mean NaN 2296.198717 NaN \n", "std NaN 1182.831284 NaN \n", "min NaN 111.000000 NaN \n", "25% NaN 802.000000 NaN \n", "50% NaN 2907.000000 NaN \n", "75% NaN 3201.000000 NaN \n", "max NaN 3831.000000 NaN \n", "\n", " OFFENSE_DESCRIPTION DISTRICT REPORTING_AREA SHOOTING \\\n", "count 238329 237585 238329 238329 \n", "unique 233 12 878 2 \n", "top sick/injured/medical - person B2 111 N \n", "freq 14380 36780 1851 237575 \n", "mean NaN NaN NaN NaN \n", "std NaN NaN NaN NaN \n", "min NaN NaN NaN NaN \n", "25% NaN NaN NaN NaN \n", "50% NaN NaN NaN NaN \n", "75% NaN NaN NaN NaN \n", "max NaN NaN NaN NaN \n", "\n", " OCCURRED_ON_DATE YEAR MONTH DAY_OF_WEEK \\\n", "count 238329 238329.000000 238329.000000 238329 \n", "unique 183782 NaN NaN 7 \n", "top 2016-08-01 00:00:00 NaN NaN friday \n", "freq 22 NaN NaN 36273 \n", "mean NaN 2016.551754 6.615175 NaN \n", "std NaN 1.001031 3.277604 NaN \n", "min NaN 2015.000000 1.000000 NaN \n", "25% NaN 2016.000000 4.000000 NaN \n", "50% NaN 2017.000000 7.000000 NaN \n", "75% NaN 2017.000000 9.000000 NaN \n", "max NaN 2018.000000 12.000000 NaN \n", "\n", " HOUR UCR_PART STREET Lat Long \n", "count 238329.000000 238253 236876 238329.000000 238329.000000 \n", "unique NaN 4 3760 NaN NaN \n", "top NaN part three washington st NaN NaN \n", "freq NaN 117492 11305 NaN NaN \n", "mean 13.129107 NaN NaN 42.322310 -71.082836 \n", "std 6.278188 NaN NaN 0.031891 0.029754 \n", "min 0.000000 NaN NaN 42.232413 -71.178674 \n", "25% 9.000000 NaN NaN 42.297555 -71.097193 \n", "50% 14.000000 NaN NaN 42.325629 -71.077551 \n", "75% 18.000000 NaN NaN 42.348624 -71.062563 \n", "max 23.000000 NaN NaN 42.395042 -70.963676 " ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "crime_train.describe(include=\"all\")" ] }, { "cell_type": "code", "execution_count": 13, "id": "instrumental-procurement", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
INCIDENT_NUMBEROFFENSE_CODEOFFENSE_CODE_GROUPOFFENSE_DESCRIPTIONDISTRICTREPORTING_AREASHOOTINGOCCURRED_ON_DATEYEARMONTHDAY_OF_WEEKHOURUCR_PARTSTREETLatLong
count3000030000.00000030000300002992530000300003000030000.00000030000.0000003000030000.000000299952983630000.00000030000.000000
unique29560NaN6218312869228717NaNNaN7NaN42646NaNNaN
topI162050329NaNmotor vehicle accident responsesick/injured/medical - personB2111N2015-12-16 20:00:00NaNNaNfridayNaNpart threewashington stNaNNaN
freq4NaN308718004659229298896NaNNaN4667NaN146981374NaNNaN
meanNaN2289.794500NaNNaNNaNNaNNaNNaN2016.5606336.584300NaN13.063833NaNNaN42.322205-71.082903
stdNaN1182.877414NaNNaNNaNNaNNaNNaN0.9961713.295642NaN6.281078NaNNaN0.0318850.029818
minNaN111.000000NaNNaNNaNNaNNaNNaN2015.0000001.000000NaN0.000000NaNNaN42.233157-71.176805
25%NaN802.000000NaNNaNNaNNaNNaNNaN2016.0000004.000000NaN9.000000NaNNaN42.297344-71.097374
50%NaN2906.000000NaNNaNNaNNaNNaNNaN2017.0000007.000000NaN14.000000NaNNaN42.325474-71.077720
75%NaN3201.000000NaNNaNNaNNaNNaNNaN2017.0000009.000000NaN18.000000NaNNaN42.348610-71.062570
maxNaN3831.000000NaNNaNNaNNaNNaNNaN2018.00000012.000000NaN23.000000NaNNaN42.395042-70.996769
\n", "
" ], "text/plain": [ " INCIDENT_NUMBER OFFENSE_CODE OFFENSE_CODE_GROUP \\\n", "count 30000 30000.000000 30000 \n", "unique 29560 NaN 62 \n", "top I162050329 NaN motor vehicle accident response \n", "freq 4 NaN 3087 \n", "mean NaN 2289.794500 NaN \n", "std NaN 1182.877414 NaN \n", "min NaN 111.000000 NaN \n", "25% NaN 802.000000 NaN \n", "50% NaN 2906.000000 NaN \n", "75% NaN 3201.000000 NaN \n", "max NaN 3831.000000 NaN \n", "\n", " OFFENSE_DESCRIPTION DISTRICT REPORTING_AREA SHOOTING \\\n", "count 30000 29925 30000 30000 \n", "unique 183 12 869 2 \n", "top sick/injured/medical - person B2 111 N \n", "freq 1800 4659 229 29889 \n", "mean NaN NaN NaN NaN \n", "std NaN NaN NaN NaN \n", "min NaN NaN NaN NaN \n", "25% NaN NaN NaN NaN \n", "50% NaN NaN NaN NaN \n", "75% NaN NaN NaN NaN \n", "max NaN NaN NaN NaN \n", "\n", " OCCURRED_ON_DATE YEAR MONTH DAY_OF_WEEK \\\n", "count 30000 30000.000000 30000.000000 30000 \n", "unique 28717 NaN NaN 7 \n", "top 2015-12-16 20:00:00 NaN NaN friday \n", "freq 6 NaN NaN 4667 \n", "mean NaN 2016.560633 6.584300 NaN \n", "std NaN 0.996171 3.295642 NaN \n", "min NaN 2015.000000 1.000000 NaN \n", "25% NaN 2016.000000 4.000000 NaN \n", "50% NaN 2017.000000 7.000000 NaN \n", "75% NaN 2017.000000 9.000000 NaN \n", "max NaN 2018.000000 12.000000 NaN \n", "\n", " HOUR UCR_PART STREET Lat Long \n", "count 30000.000000 29995 29836 30000.000000 30000.000000 \n", "unique NaN 4 2646 NaN NaN \n", "top NaN part three washington st NaN NaN \n", "freq NaN 14698 1374 NaN NaN \n", "mean 13.063833 NaN NaN 42.322205 -71.082903 \n", "std 6.281078 NaN NaN 0.031885 0.029818 \n", "min 0.000000 NaN NaN 42.233157 -71.176805 \n", "25% 9.000000 NaN NaN 42.297344 -71.097374 \n", "50% 14.000000 NaN NaN 42.325474 -71.077720 \n", "75% 18.000000 NaN NaN 42.348610 -71.062570 \n", "max 23.000000 NaN NaN 42.395042 -70.996769 " ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "crime_test.describe(include=\"all\")" ] }, { "cell_type": "code", "execution_count": 13, "id": "piano-gambling", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
INCIDENT_NUMBEROFFENSE_CODEOFFENSE_CODE_GROUPOFFENSE_DESCRIPTIONDISTRICTREPORTING_AREASHOOTINGOCCURRED_ON_DATEYEARMONTHDAY_OF_WEEKHOURUCR_PARTSTREETLatLong
count3000030000.00000030000300002992230000300003000030000.00000030000.0000003000030000.000000299912983630000.00000030000.000000
unique29561NaN6118912868228778NaNNaN7NaN42689NaNNaN
topI162010747NaNmotor vehicle accident responseinvestigate personB2111N2017-06-01 00:00:00NaNNaNthursdayNaNpart threewashington stNaNNaN
freq4NaN314519084762258298826NaNNaN4425NaN149101434NaNNaN
meanNaN2303.541933NaNNaNNaNNaNNaNNaN2016.5607006.626700NaN13.157033NaNNaN42.322292-71.082911
stdNaN1185.561127NaNNaNNaNNaNNaNNaN0.9988743.264563NaN6.282363NaNNaN0.0318040.029857
minNaN111.000000NaNNaNNaNNaNNaNNaN2015.0000001.000000NaN0.000000NaNNaN42.232656-71.178674
25%NaN802.000000NaNNaNNaNNaNNaNNaN2016.0000004.000000NaN9.000000NaNNaN42.297555-71.097193
50%NaN2907.000000NaNNaNNaNNaNNaNNaN2017.0000007.000000NaN14.000000NaNNaN42.325834-71.077564
75%NaN3201.000000NaNNaNNaNNaNNaNNaN2017.0000009.000000NaN18.000000NaNNaN42.348610-71.062607
maxNaN3831.000000NaNNaNNaNNaNNaNNaN2018.00000012.000000NaN23.000000NaNNaN42.395042-70.996769
\n", "
" ], "text/plain": [ " INCIDENT_NUMBER OFFENSE_CODE OFFENSE_CODE_GROUP \\\n", "count 30000 30000.000000 30000 \n", "unique 29561 NaN 61 \n", "top I162010747 NaN motor vehicle accident response \n", "freq 4 NaN 3145 \n", "mean NaN 2303.541933 NaN \n", "std NaN 1185.561127 NaN \n", "min NaN 111.000000 NaN \n", "25% NaN 802.000000 NaN \n", "50% NaN 2907.000000 NaN \n", "75% NaN 3201.000000 NaN \n", "max NaN 3831.000000 NaN \n", "\n", " OFFENSE_DESCRIPTION DISTRICT REPORTING_AREA SHOOTING \\\n", "count 30000 29922 30000 30000 \n", "unique 189 12 868 2 \n", "top investigate person B2 111 N \n", "freq 1908 4762 258 29882 \n", "mean NaN NaN NaN NaN \n", "std NaN NaN NaN NaN \n", "min NaN NaN NaN NaN \n", "25% NaN NaN NaN NaN \n", "50% NaN NaN NaN NaN \n", "75% NaN NaN NaN NaN \n", "max NaN NaN NaN NaN \n", "\n", " OCCURRED_ON_DATE YEAR MONTH DAY_OF_WEEK \\\n", "count 30000 30000.000000 30000.000000 30000 \n", "unique 28778 NaN NaN 7 \n", "top 2017-06-01 00:00:00 NaN NaN thursday \n", "freq 6 NaN NaN 4425 \n", "mean NaN 2016.560700 6.626700 NaN \n", "std NaN 0.998874 3.264563 NaN \n", "min NaN 2015.000000 1.000000 NaN \n", "25% NaN 2016.000000 4.000000 NaN \n", "50% NaN 2017.000000 7.000000 NaN \n", "75% NaN 2017.000000 9.000000 NaN \n", "max NaN 2018.000000 12.000000 NaN \n", "\n", " HOUR UCR_PART STREET Lat Long \n", "count 30000.000000 29991 29836 30000.000000 30000.000000 \n", "unique NaN 4 2689 NaN NaN \n", "top NaN part three washington st NaN NaN \n", "freq NaN 14910 1434 NaN NaN \n", "mean 13.157033 NaN NaN 42.322292 -71.082911 \n", "std 6.282363 NaN NaN 0.031804 0.029857 \n", "min 0.000000 NaN NaN 42.232656 -71.178674 \n", "25% 9.000000 NaN NaN 42.297555 -71.097193 \n", "50% 14.000000 NaN NaN 42.325834 -71.077564 \n", "75% 18.000000 NaN NaN 42.348610 -71.062607 \n", "max 23.000000 NaN NaN 42.395042 -70.996769 " ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "crime_dev.describe(include=\"all\")" ] }, { "cell_type": "code", "execution_count": 14, "id": "historical-rebecca", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "3006 14380\n", "3115 14336\n", "1402 11677\n", "3831 11635\n", "802 11229\n", "3301 10373\n", "3410 8577\n", "3114 8462\n", "617 7114\n", "2647 7010\n", "3201 6734\n", "614 6614\n", "613 6250\n", "3125 6060\n", "619 4490\n", "3802 4308\n", "413 3625\n", "3502 3470\n", "1102 3461\n", "2629 3159\n", "3803 3123\n", "3501 2975\n", "3207 2763\n", "724 2676\n", "1106 2466\n", "2610 2461\n", "301 2182\n", "423 2162\n", "520 2039\n", "2900 1973\n", " ... \n", "1302 2\n", "2910 2\n", "1002 2\n", "803 2\n", "2672 2\n", "629 2\n", "1866 2\n", "633 2\n", "123 2\n", "770 2\n", "1807 2\n", "627 1\n", "349 1\n", "624 1\n", "112 1\n", "402 1\n", "527 1\n", "637 1\n", "530 1\n", "1620 1\n", "2609 1\n", "404 1\n", "1105 1\n", "547 1\n", "335 1\n", "315 1\n", "714 1\n", "1864 1\n", "1863 1\n", "639 1\n", "Name: OFFENSE_CODE, Length: 215, dtype: int64" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "crime_train[\"OFFENSE_CODE\"].value_counts()" ] }, { "cell_type": "code", "execution_count": 15, "id": "advance-profile", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "B2 36780\n", "C11 32723\n", "D4 30659\n", "B3 26864\n", "A1 25879\n", "C6 17285\n", "D14 15259\n", "E18 13366\n", "E13 13297\n", "A7 10412\n", "E5 10149\n", "A15 4912\n", "Name: DISTRICT, dtype: int64" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "crime_train[\"DISTRICT\"].value_counts()" ] }, { "cell_type": "code", "execution_count": 15, "id": "three-horizontal", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "2017 74671\n", "2016 73720\n", "2018 48922\n", "2015 41016\n", "Name: YEAR, dtype: int64" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "crime_train[\"YEAR\"].value_counts()" ] }, { "cell_type": "code", "execution_count": 17, "id": "certain-opera", "metadata": {}, "outputs": [], "source": [ "crime_test.to_csv(\"crime_test.csv\", encoding=\"utf-8\", index=False)\n", "crime_dev.to_csv(\"crime_dev.csv\", encoding=\"utf-8\", index=False)\n", "crime_train.to_csv(\"crime_train.csv\", encoding=\"utf-8\", index=False)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 5 }