-
Notifications
You must be signed in to change notification settings - Fork 2
/
summarize.R
70 lines (63 loc) · 3.43 KB
/
summarize.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
##############################################################
# Summarize cleaned eviction data for Shiny app #
# Authors: Jacob Goldstein-Greenwood, Michele Claibourn #
# GitHub: jacob-gg, mclaibourn #
# Last revised: 2023-02-22 #
##############################################################
######################## Instructions ########################
# This code takes cleaned eviction case data (processed via
# clean.R) and aggregates it up to the plaintiff, yearly,
# and monthly levels for display in the Virginia Evictors
# Catalog Shiny app
# Modifiable user presets
input_data_directory <- 'processed-data'
input_data_file <- "cases_residential_only.txt"
output_data_directory <- 'va-evictors-catalog'
##############################################################
# Packages
required <- c('stringi', 'tidyverse', 'lubridate')
handle_package <- function(pkg) {
if (grepl(x = pkg, pattern = '\\/')) { devtools::install_github(pkg) }
else if (!(pkg %in% installed.packages())) { install.packages(pkg) }
pkg <- sub(x = pkg, pattern = '.+\\/', replacement = '')
library(pkg, character.only = TRUE)
}
lapply(required, function(x) handle_package(x))
# Load
cases <- read.csv(paste0(input_data_directory, '/', input_data_file), colClasses = 'character')
# Summarize by plaintiff
# Note that defendant_zips drops NA, so we report it as "Known Virginia Defendant ZIP Codes"
plaintiff_aggregated <- cases %>%
group_by(county, plaintiff_name) %>%
summarize(cases_filed = n(),
plaintiff_judgments = sum(judgment == 'Plaintiff', na.rm = TRUE),
serial_filings = sum(serial_filing == TRUE, na.rm = TRUE),
filing_years = paste0(sort(unique(as.numeric(filed_year))), collapse = ', '),
defendant_zips = paste0(na.omit(unique(defendant_zip)), collapse = ', ')) %>%
ungroup() %>%
relocate(filing_years, .after = last_col())
# Summarize by plaintiff and year
plaintiff_aggregated_yearly <- cases %>%
group_by(county, filed_year, plaintiff_name) %>%
summarize(cases_filed = n(),
plaintiff_judgments = sum(judgment == 'Plaintiff', na.rm = TRUE),
serial_filings = sum(serial_filing == TRUE, na.rm = TRUE),
defendant_zips = paste0(na.omit(unique(defendant_zip)), collapse = ', ')) %>%
ungroup() %>%
relocate(filed_year, .after = last_col())
# Summarize by plaintiff, month, and year
plaintiff_aggregated_monthly <- cases %>%
mutate(filing_month = paste0(year(filed_date), "-", month(filed_date)),
filing_month = ym(filing_month),
filing_month = format(filing_month, format = "%Y-%m")) %>%
group_by(county, filing_month, plaintiff_name) %>%
summarize(cases_filed = n(),
plaintiff_judgments = sum(judgment == 'Plaintiff', na.rm = TRUE),
serial_filings = sum(serial_filing == TRUE, na.rm = TRUE),
defendant_zips = paste0(na.omit(unique(defendant_zip)), collapse = ', ')) %>%
ungroup() %>%
relocate(filing_month, .after = last_col())
# Export for Shiny app
write.csv(plaintiff_aggregated, file = paste0(output_data_directory, '/data-plaintiff-aggregated.txt'), row.names = FALSE)
write.csv(plaintiff_aggregated_yearly, file = paste0(output_data_directory, '/data-yearly-plaintiff-aggregated.txt'), row.names = FALSE)
write.csv(plaintiff_aggregated_monthly, file = paste0(output_data_directory, '/data-monthly-plaintiff-aggregated.txt'), row.names = FALSE)