-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathoverview_data_cleaning.R
126 lines (100 loc) · 4.79 KB
/
overview_data_cleaning.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# ------------------------------------- Loading Packages and Data ------------------------------------------
# Loading Libraries
library(lubridate)
library(janitor)
library(zoo)
library(tidyverse)
# Defining Links
pd_sizes_link = "original_data/overview_original_data/pe_1960_2022.csv"
agency_locations_link = "original_data/misconduct_original_data/data_agency-reference-list.csv"
pd_references_link = "original_data/overview_original_data/ICPSR_35158/DS0001/35158-0001-Data.rda"
# Reading in data
pd_sizes <- read_csv(here::here(pd_sizes_link))
agency_locations <- read_csv(here::here(agency_locations_link))
# Loading in data
load(here::here(pd_references_link))
pd_references <- da35158.0001
# ------------------------------------- Cleaning Data Process ----------------------------------------------
pd_references <- pd_references %>%
select(ORI9, NAME) %>%
rename(ori = ORI9,
agency_full_name = NAME)
pd_references
# Defining agency type key words
constable <- c("constable")
marshal <- c("marshal")
police_department <- c("department", "police department")
sheriff <- c("sheriff")
university <- c("university", "college", "campus", "lsu", "uno", "usl", "ula", "lsuhc")
# Defining the type of agency
agency_locations <- agency_locations %>%
mutate(agency_type = ifelse(str_detect(tolower(agency_name), paste(university, collapse = "|")), "University or Campus Police",
ifelse(str_detect(tolower(agency_name), paste(marshal, collapse = "|")), "Marshal's Office",
ifelse(str_detect(tolower(agency_name), paste(constable, collapse = "|")), "Constable's Office",
ifelse(str_detect(tolower(agency_name), paste(sheriff, collapse = "|")), "Sheriff's Office",
ifelse(str_detect(tolower(agency_name), paste(police_department, collapse = "|")), "Police Department",
"Other Law Enforcement Agency"
))))))
# Connecting department references with police department sizes
la_pd_sizes <- pd_sizes %>%
filter(state_abbr == "LA") %>%
left_join(pd_references, by = "ori") %>%
mutate(agency_name = str_trim(str_to_title(agency_full_name)),
agency_name = ifelse(is.na(agency_name), pub_agency_name, agency_name),
agency_name = str_replace(agency_name, "Dept|Dept.|Pd", "Police Department"),
agency_name = str_remove(agency_name, "\\.$"))
# Filtering data for just 2022
la_pd_sizes_2022 <- la_pd_sizes %>%
filter(data_year == "2022")
# ------------------------------------- Data Analysis Process ----------------------------------------------
# Mapping police departments
agency_map <- agency_locations %>%
select(agency_name, agency_type, location)
# Distribution of agency types
agency_distribution <- agency_locations %>%
count(agency_type)
# Number of sheriff offices
n_so <- agency_distribution %>%
filter(agency_type == "Sheriff's Office") %>%
pull(n)
# Number of officers over time
officers_over_time <- la_pd_sizes %>%
pivot_wider(names_from = data_year, values_from = officer_ct) %>%
select(-c("ori", "pub_agency_unit", "state_abbr",
"division_name", "region_name", "county_name",
"agency_type_name", "population_group_desc",
"population", "male_officer_ct", "male_civilian_ct",
"male_total_ct", "female_officer_ct", "female_civilian_ct",
"female_total_ct", "civilian_ct",
"total_pe_ct", "pe_ct_per_1000", "agency_full_name",
"pub_agency_name")) %>%
group_by(agency_name) %>%
fill(2:64, .direction = 'updown') %>%
distinct(agency_name, .keep_all = TRUE) %>%
select(64:2) %>%
arrange(agency_name)
# Mapping average number of officers per agency
average_agency_map <- la_pd_sizes_2022 %>%
separate_rows(county_name, sep = ", ") %>%
group_by(county_name) %>%
summarize(pct_per_county = mean(total_pe_ct)) %>%
mutate(county_name = county_name %>% str_to_title())
# Increase in officers per law enforcement agency
average_increase <- la_pd_sizes %>%
group_by(data_year) %>%
filter(data_year %in% c("1960", "2022")) %>%
summarize(ave_officers = mean(total_pe_ct))
# Plotting the average number of offers per 100,000 residents
officers_per_residents <- la_pd_sizes %>%
group_by(data_year) %>%
summarize(ave_per_hundredthousand = 100 * mean(pe_ct_per_1000))
# Number of agencies in 2022
n_agencies_2022 = length(unique(la_pd_sizes_2022$agency_name))
# Number of officers in 2022
n_officers_2022 = sum(la_pd_sizes_2022$total_pe_ct)
# Number of agencies throughout time
n_agencies = length(unique(la_pd_sizes$agency_name))
# Number of police departments
n_pd <- agency_distribution %>%
filter(agency_type == "Police Department") %>%
pull(n)