A few random Pandas commands from the SFPD.CSV File

In [1]:
import pandas as pd
In [2]:
# read in the csv file that was created in the Python Example
sfpd = pd.read_csv('sfpd.csv', sep='\t')
In [3]:
# review size and shape
sfpd.shape
Out[3]:
(2170785, 13)
In [4]:
# obtain summary statistics
sfpd.groupby(['category']).size()
Out[4]:
category
ARSON                            3839
ASSAULT                        190394
BAD CHECKS                        914
BRIBERY                           797
BURGLARY                        89528
DISORDERLY CONDUCT               9950
DRIVING UNDER THE INFLUENCE      5593
DRUG/NARCOTIC                  118260
DRUNKENNESS                      9746
EMBEZZLEMENT                     2927
EXTORTION                         725
FAMILY OFFENSES                  1171
FORGERY/COUNTERFEITING          22839
FRAUD                           40733
GAMBLING                          341
KIDNAPPING                       5275
LARCENY/THEFT                  467657
LIQUOR LAWS                      4069
LOITERING                        2414
MISSING PERSON                  63706
NON-CRIMINAL                   233323
OTHER OFFENSES                 304042
PORNOGRAPHY/OBSCENE MAT            55
PROSTITUTION                    16652
RECOVERED VEHICLE                8714
ROBBERY                         54854
RUNAWAY                          4355
SECONDARY CODES                 25223
SEX OFFENSES, FORCIBLE          11411
SEX OFFENSES, NON FORCIBLE        420
STOLEN PROPERTY                 11645
SUICIDE                          1275
SUSPICIOUS OCC                  78823
TREA                               14
TRESPASS                        18959
VANDALISM                      113436
VEHICLE THEFT                  125209
WARRANTS                        99799
WEAPON LAWS                     21698
dtype: int64
In [5]:
# show column names
sfpd.columns
Out[5]:
Index(['Unnamed: 0', 'unique_key', 'category', 'descript', 'dayofweek',
       'pddistrict', 'resolution', 'address', 'longitude', 'latitude',
       'location', 'pdid', 'timestamp'],
      dtype='object')
In [6]:
# Create variale to filter df
family = sfpd['category'] == 'FAMILY OFFENSES'
In [7]:
# Create variale to filter df
suicide= sfpd['category'] == 'SUICIDE'
In [8]:
# Create DF
family_db = sfpd[family]
In [9]:
# Create DF
suicide_db = sfpd[suicide]
In [10]:
# Drop columns 
ColList = ['descript', 'dayofweek','location', 'resolution','Unnamed: 0','unique_key','pdid','timestamp']
family_db.drop(ColList, axis = 1, inplace = True)
/anaconda3/envs/PythonData/lib/python3.7/site-packages/pandas/core/frame.py:3930: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)
In [11]:
ColList = ['descript', 'dayofweek','location', 'resolution','Unnamed: 0','unique_key','pdid','timestamp']
suicide_db.drop(ColList, axis = 1, inplace = True)
In [12]:
# Append the two DF's together
sf_db = suicide_db.append(family_db)
In [22]:
sf_db.to_csv("../sf_db.csv", index=False)
In [31]:
# How many times did Loitering take place on Monday's
df = len(sfpd.groupby(['category','dayofweek']).groups['LOITERING','Sunday'])
df
Out[31]:
289
In [32]:
# What are the top three categories sorted by count of occurences 
df = sfpd.groupby(['category','dayofweek'])['unique_key'].count().sort_values(ascending=False)
df[:21]
Out[32]:
category        dayofweek
LARCENY/THEFT   Friday       72722
                Saturday     72580
                Thursday     65815
                Wednesday    65420
                Sunday       64467
                Tuesday      64127
                Monday       62526
OTHER OFFENSES  Wednesday    47605
                Tuesday      45477
                Friday       45049
                Thursday     44250
                Monday       42933
                Saturday     41634
                Sunday       37094
NON-CRIMINAL    Friday       35622
                Saturday     35171
                Wednesday    32715
                Thursday     32651
                Monday       32634
                Tuesday      32270
                Sunday       32260
Name: unique_key, dtype: int64
In [33]:
# filter the data to Larceny and sort it by day of week
df = sfpd[sfpd['category']=='LARCENY/THEFT'].sort_values(by='dayofweek')
df.head()
Out[33]:
Unnamed: 0 unique_key category descript dayofweek pddistrict resolution address longitude latitude location pdid timestamp
1649774 1649774 160169152 LARCENY/THEFT PETTY THEFT SHOPLIFTING Friday SOUTHERN ARREST, BOOKED 700 Block of MISSION ST -122.403070 37.785269 (37.78526863090873, -122.40307000084677) 16016915206361 2016-02-26 16:21:00+00:00
1492763 1492763 170308572 LARCENY/THEFT THEFT OF COMPUTERS OR CELL PHONES Friday SOUTHERN NONE 0 Block of DORE ST -122.412933 37.773927 (37.773927452481935, -122.41293306238357) 17030857206126 2017-04-14 18:00:00+00:00
1729070 1729070 90295534 LARCENY/THEFT GRAND THEFT FROM LOCKED AUTO Friday SOUTHERN NONE 4TH ST / BRYANT ST -122.398063 37.779566 (37.7795658460293, -122.398063445395) 9029553406244 2009-03-20 22:43:00+00:00
802204 802204 166282449 LARCENY/THEFT GRAND THEFT FROM LOCKED AUTO Friday MISSION NONE 2300 Block of 16TH ST -122.409530 37.765718 (37.76571843952822, -122.40952991327822) 16628244906244 2016-12-02 19:25:00+00:00
802207 802207 126137577 LARCENY/THEFT GRAND THEFT FROM LOCKED AUTO Friday MISSION NONE 1700 Block of HARRISON ST -122.413492 37.769083 (37.7690826019573, -122.413492224653) 12613757706244 2012-09-07 18:00:00+00:00
In [34]:
# what are all the crimes that occured on Monday or Friday
df = sfpd[(sfpd['dayofweek'] =='Monday') | (sfpd['dayofweek'] == 'Friday')]
df.head()
Out[34]:
Unnamed: 0 unique_key category descript dayofweek pddistrict resolution address longitude latitude location pdid timestamp
10 10 60616120 BURGLARY BURGLARY, UNLAWFUL ENTRY Friday PARK NONE 1100 Block of HAYES ST -122.435311 37.775255 (37.7752551506123, -122.435310896707) 6061612005073 2006-06-09 21:30:00+00:00
13 13 40119229 WEAPON LAWS POSS OF PROHIBITED WEAPON Friday PARK ARREST, BOOKED 0 Block of KEZAR DR -122.455127 37.769520 (37.7695200227183, -122.45512727517) 4011922912120 2004-01-30 09:30:00+00:00
15 15 51249067 BURGLARY BURGLARY OF APARTMENT HOUSE, FORCIBLE ENTRY Friday PARK NONE 300 Block of WARREN DR -122.461414 37.755607 (37.7556066306132, -122.461414342148) 5124906705011 2005-11-04 11:30:00+00:00
16 16 30429179 RUNAWAY RUNAWAY Friday PARK NONE 1200 Block of PAGE ST -122.441394 37.771727 (37.7717273141771, -122.441393701114) 3042917929100 2003-04-11 18:00:00+00:00
25 25 90856926 WARRANTS ENROUTE TO DEPARTMENT OF CORRECTIONS Friday PARK ARREST, BOOKED 1700 Block of HAIGHT ST -122.451288 37.769397 (37.7693967099165, -122.451288061231) 9085692662020 2009-08-21 15:31:00+00:00
In [35]:
# count the number of occurences per crime per district
df = sfpd.groupby(['category','dayofweek','pddistrict'])['dayofweek'].count().unstack(0)
df.head()
Out[35]:
category ARSON ASSAULT BAD CHECKS BRIBERY BURGLARY DISORDERLY CONDUCT DRIVING UNDER THE INFLUENCE DRUG/NARCOTIC DRUNKENNESS EMBEZZLEMENT ... SEX OFFENSES, NON FORCIBLE STOLEN PROPERTY SUICIDE SUSPICIOUS OCC TREA TRESPASS VANDALISM VEHICLE THEFT WARRANTS WEAPON LAWS
dayofweek pddistrict
Friday BAYVIEW 133.0 3508.0 9.0 25.0 1658.0 81.0 58.0 1393.0 66.0 48.0 ... 11.0 133.0 13.0 1491.0 NaN 247.0 2045.0 2657.0 1442.0 662.0
CENTRAL 38.0 2504.0 21.0 4.0 2040.0 134.0 63.0 603.0 176.0 68.0 ... 2.0 202.0 23.0 1141.0 1.0 346.0 1944.0 1596.0 980.0 211.0
INGLESIDE 75.0 3050.0 12.0 13.0 1366.0 49.0 69.0 814.0 78.0 22.0 ... 6.0 136.0 21.0 1177.0 NaN 152.0 2090.0 3143.0 879.0 459.0
MISSION 59.0 4091.0 16.0 20.0 1512.0 345.0 135.0 2626.0 275.0 48.0 ... 9.0 223.0 21.0 1526.0 NaN 417.0 2158.0 2627.0 2345.0 502.0
NORTHERN 55.0 2959.0 12.0 9.0 2331.0 150.0 100.0 1392.0 107.0 63.0 ... 1.0 243.0 34.0 1282.0 NaN 293.0 2266.0 2392.0 1411.0 295.0

5 rows × 39 columns

In [36]:
from mapboxgl.utils import create_color_stops, df_to_geojson
from mapboxgl.viz import CircleViz
In [37]:
category_color_stops = [['SUICIDE', 'rgb(211,47,47)'],  
                        ['FAMILY OFFENSES', 'rgb(34,139,34)']]
In [39]:
df_to_geojson(sf_db,
             filename='markers.geojson',
             lat='latitude', lon='longitude', precision=3)

token= "pk.Removed for Privacy "

viz = CircleViz('markers.geojson', access_token=token,
               radius=2, center = (sf_db.longitude.mean(), sf_db.latitude.mean()),
               zoom=12,
               style='mapbox://styles/mapbox/streets-v11',
               color_property='category', 
               color_function_type='match',
               color_stops=category_color_stops,
               label_color= 'blue',
               color_default = 'red',
               stroke_color = 'black')

viz.show()

MapBox Map Image From Code Above