-
Notifications
You must be signed in to change notification settings - Fork 5
/
_check.R
23 lines (22 loc) · 1.19 KB
/
_check.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
# Checks:
# 1. theoretical unnest count matches actual row count
# 2. check for NAs in crucial columns (stname, lat, lng, bbl)
"RUNNING CHECKS" %>% print
checks <- list(
missing_lats = expanded %>% filter(is.na(lat)) %>% nrow,
missing_lons = expanded %>% filter(is.na(lng)) %>% nrow,
missing_bbls = expanded %>% filter(is.na(bbl)) %>% nrow,
missing_streets = expanded %>% filter(is.na(alt_st_name)) %>% nrow,
missing_zips = expanded %>% filter(is.na(zipcode)) %>% nrow,
total_rows = expanded %>% nrow,
distinct_rows = expanded %>% distinct %>% nrow
)
gc()
checks$missing_lats %>% ifelse(., paste("✗ WARNING!", ., "MISSING LATITUDES"), "✓ LATITUDES") %>% print
checks$missing_lons %>% ifelse(., paste("✗ WARNING!", ., "MISSING LONGITUDES"), "✓ LONGITUDES") %>% print
checks$missing_bbls %>% ifelse(., paste("✗ WARNING!", ., "MISSING BBLS"), "✓ BBLS") %>% print
checks$missing_streets %>% ifelse(., paste("✗ WARNING!", ., "MISSING STREETS"), "✓ STREETS") %>% print
checks$missing_zips %>% ifelse(., paste("✗ WARNING!", ., "MISSING ZIPCODES"), "✓ ZIPCODES") %>% print
checks$total_rows %>% paste("TOTAL ROWS:", .) %>% print
checks$distinct_rows %>% paste("DISTINCT ROWS:",.) %>% print
gc()