Found this dataset on SFPD Incidents on SF OpenData yesterday. I had to filter on the beginning and end dates before downloading to get all of the observations (over a million).
After I imported it into SAS I took a look at the variables and was interested in the frequency of Drug and Narcotics incidences by Police District. I also wanted to be able to run analyses by year. Since the variable “Date” was entered as 01/01/2003, I needed to make some modifications.
(Current district map. New boundaries are being considered).
libname freq '\\psf\Home\Desktop\';
proc import datafile="\\psf\Home\Desktop\all.csv" out=freq.sfpd dbms=csv replace; getnames=yes; run;
ods pdf file='\\psf\Home\Desktop\01.pdf' style=SASdocprinter; options nodate; title Original Data Set: SFPD; proc contents data=freq.sfpd; run; title; ods pdf close;
/* Extract month, day, and year from Date*/ data freq.sfpd_01; set freq.sfpd; month=MONTH(Date); day=DAY(Date); year=YEAR(Date); run;
proc print data=freq.sfpd_01 (obs=20); run;
/* Get number of date observations */ proc tabulate data=freq.sfpd_01; var Date; table Date, n nmiss (min max median)*f=mmddyy10. range; run;
/* Check that month, day, and year = 1816480 instances*/ proc freq data=freq.sfpd_01; table month day year/nocol nopercent; run;
/* Transform month, day, and year from numeric to character */ data freq.sfpd_01; set freq.sfpd_01; char_month = put(month, 2.); drop month; rename char_month=month; char_day = put(day, 2.); drop day; rename char_day=day; char_year = put(year, 4.); drop year; rename char_year=year; run;
proc datasets library=freq; modify sfpd_01; format month $2. day $2. year $4.; informat month $2. day $2. year $4.; label Address="Address" Category="Category" Date="Date" DayOfWeek="Day Of Week" Descript="Description" IncidntNum="Incident Number" Location="Location" PdDistrict="Police Dept. District" PdId="Police Dept. ID" Resolution="Resolution" Time="Time" X="X Coordinate" Y="Y Coordinate" month="Month" day="Day" year="Year"; run; quit;
/* Variables changed to proper case */ data freq.sfpd_01; set freq.sfpd_01; PdDistrict = PROPCASE(PdDistrict); Category = PROPCASE(Category); Descript = PROPCASE(Descript); Resolution = PROPCASE(Resolution); Address = PROPCASE(Address); run;
ods pdf file='\\psf\Home\Desktop\02.pdf' style=SASdocprinter; options nodate; title Modified Data Set: SFPD; proc contents data=freq.sfpd_01; run; title; ods pdf close;
proc print data=freq.sfpd_01 (obs=20); run;
ods pdf file='\\psf\Home\Desktop\03.pdf' style=SASdocprinter; options nodate; title Drug/Narcotic Incidents by PD District, All Years; proc freq data = freq.sfpd_01; where Category='Drug/Narcotic'; tables PdDistrict/nocum nocol norow; run; title;
option gstyle; ods listing style=statistical; goptions reset=all device=gif hsize=20cm vsize=10cm; ods graphics on; title Drug/Narcotic Incidents by PD District "January 1,2003-September 14,2015"; proc gchart data=freq.sfpd_01; where Category='Drug/Narcotic'; vbar PdDistrict; run; quit; title; ods graphics off; ods pdf close;
To be continued.