-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
setup for initial build of new cdbg data product (#1355)
* add cdbg build and test to github actions * cdbg product folder and dbt project setup * ingest template for hud data * cdbg recipe, sources.yml * cdbg transformation logic * cdbg export script --------- Co-authored-by: Finn van Krieken <[email protected]>
- Loading branch information
1 parent
47f2592
commit 2bac1f3
Showing
23 changed files
with
753 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
name: CDBG - 🏗️ Build | ||
on: | ||
workflow_call: | ||
inputs: | ||
image_tag: | ||
type: string | ||
required: false | ||
build_name: | ||
type: string | ||
required: true | ||
recipe_file: | ||
type: string | ||
required: true | ||
plan_command: | ||
type: string | ||
default: recipe | ||
dev_bucket: | ||
type: string | ||
required: false | ||
|
||
jobs: | ||
build: | ||
name: Build CDBG | ||
runs-on: ubuntu-22.04 | ||
defaults: | ||
run: | ||
shell: bash | ||
working-directory: products/cdbg | ||
container: | ||
image: nycplanning/build-base:${{ inputs.image_tag || 'latest' }} | ||
env: | ||
BUILD_ENGINE_DB: db-cdbg | ||
BUILD_NAME: ${{ inputs.build_name }} | ||
RECIPES_BUCKET: ${{ inputs.dev_bucket || 'edm-recipes' }} | ||
PUBLISHING_BUCKET: ${{ inputs.dev_bucket || 'edm-publishing' }} | ||
DEV_FLAG: ${{ inputs.dev_bucket && 'true' || 'false' }} | ||
steps: | ||
- uses: actions/checkout@v4 | ||
|
||
- name: Load Secrets | ||
uses: 1password/load-secrets-action@v1 | ||
with: | ||
export-env: true | ||
env: | ||
OP_SERVICE_ACCOUNT_TOKEN: ${{ secrets.OP_SERVICE_ACCOUNT_TOKEN }} | ||
AWS_S3_ENDPOINT: "op://Data Engineering/DO_keys/AWS_S3_ENDPOINT" | ||
AWS_SECRET_ACCESS_KEY: "op://Data Engineering/DO_keys/AWS_SECRET_ACCESS_KEY" | ||
AWS_ACCESS_KEY_ID: "op://Data Engineering/DO_keys/AWS_ACCESS_KEY_ID" | ||
BUILD_ENGINE_SERVER: "op://Data Engineering/EDM_DATA/server_url" | ||
BUILD_ENGINE_HOST: "op://Data Engineering/EDM_DATA/server" | ||
BUILD_ENGINE_USER: "op://Data Engineering/EDM_DATA/username" | ||
BUILD_ENGINE_PASSWORD: "op://Data Engineering/EDM_DATA/password" | ||
BUILD_ENGINE_PORT: "op://Data Engineering/EDM_DATA/port" | ||
|
||
- name: Setup build environment | ||
working-directory: ./ | ||
run: | | ||
./bash/docker_container_setup.sh | ||
./bash/build_env_setup.sh | ||
- name: Plan build | ||
run: python3 -m dcpy.lifecycle.builds.plan ${{ inputs.plan_command }} | ||
|
||
- name: Dataloading | ||
run: python -m dcpy.lifecycle.builds.load load --recipe-path ${{ inputs.recipe_file }}.lock.yml | ||
|
||
- name: Build | ||
run: | | ||
dbt debug | ||
dbt build | ||
- name: Export | ||
run: ./bash/export.sh | ||
|
||
- name: Upload | ||
run: python3 -m dcpy.connectors.edm.publishing upload --product db-cdbg --acl public-read |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
60 changes: 60 additions & 0 deletions
60
dcpy/lifecycle/ingest/templates/hud_lowmodincomebyblockgroup.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
id: hud_lowmodincomebyblockgroup | ||
acl: public-read | ||
|
||
attributes: | ||
name: HUD Low to Moderate Income Population by Block Group | ||
description: >- | ||
This particular version of this dataset has come from OMB. We haven't fully assessed whether | ||
we could just pull from the linked feature service instead and filter to NYC or if OMB has | ||
done any other preprocessing. | ||
This service identifies U.S. Census Block Groups in which 51% or more of the households earn | ||
less than 80 percent of the Area Median Income (AMI). The Community Development Block Grant | ||
(CDBG) program requires that each CDBG funded activity must either principally benefit low- | ||
and moderate-income persons, aid in the prevention or elimination of slums or blight, or meet | ||
a community development need having a particular urgency because existing conditions pose a | ||
serious and immediate threat to the health or welfare of the community and other financial | ||
resources are not available to meet that need. With respect to activities that principally | ||
benefit low- and moderate-income persons, at least 51 percent of the activity's beneficiaries | ||
must be low and moderate income. | ||
The Community Development Block Grant (CDBG) program requires that each CDBG funded activity | ||
must either principally benefit low- and moderate-income persons, aid in the prevention or | ||
elimination of slums or blight, or meet a community development need having a particular urgency | ||
because existing conditions pose a serious and immediate threat to the health or welfare of | ||
the community and other financial resources are not available to meet that need. With respect | ||
to activities that principally benefit low- and moderate-income persons, at least 51 percent | ||
of the activity's beneficiaries must be low and moderate income. For CDBG, a person is considered | ||
to be of low income only if he or she is a member of a household whose income would qualify as | ||
"very low income" under the Section 8 Housing Assistance Payments program. Generally, these | ||
Section 8 limits are based on 50% of area median. Similarly, CDBG moderate income relies on | ||
Section 8 "lower income" limits, which are generally tied to 80% of area median. These data | ||
are from the 2011-2015 American Community Survey (ACS). | ||
url: https://hudgis-hud.opendata.arcgis.com/datasets/HUD::low-to-moderate-income-population-by-block-group/about | ||
|
||
ingestion: | ||
source: | ||
type: s3 | ||
bucket: edm-recipes | ||
key: inbox/omb/20241227/ACS-2020-Low-Mod-Summarized-All-Block-Groups-2023.csv | ||
file_format: | ||
type: csv | ||
|
||
#columns: | ||
#- id: CDBGUOGID | ||
#- id: GEOID | ||
#- id: CDBGNAME | ||
#- id: STUSAB | ||
#- id: STATE | ||
#- id: CDBGTYPE | ||
#- id: Geoname | ||
#- id: COUNTY | ||
#- id: TRACT | ||
#- id: BLKGRP | ||
#- id: LOW | ||
#- id: LOWMOD | ||
#- id: LMMI | ||
#- id: LOWMODUNIV | ||
#- id: LOWMOD_PCT | ||
#- id: MOE_LOWMODPCT | ||
#- id: Column1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
#!/bin/bash | ||
source ../../bash/utils.sh | ||
set_error_traps | ||
|
||
rm -rf output | ||
|
||
echo "Export product tables" | ||
mkdir -p output && ( | ||
cd output | ||
|
||
echo "Copy metadata files" | ||
cp ../source_data_versions.csv . | ||
cp ../build_metadata.json . | ||
|
||
echo "export cdbg_block_groups.csv ..." | ||
csv_export cdbg_block_groups cdbg_block_groups | ||
|
||
echo "export cdbg_tracts.csv ..." | ||
csv_export cdbg_tracts cdbg_tracts | ||
) | ||
|
||
zip -r output/output.zip output |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
name: "cdbg" | ||
|
||
profile: "dcp-de-postgres" | ||
|
||
model-paths: ["models"] | ||
|
||
tests: | ||
+store_failures: true | ||
schema: "_tests" | ||
|
||
models: | ||
cdbg: | ||
staging: | ||
+materialized: view | ||
intermediate: | ||
+materialized: table | ||
product: | ||
+materialized: table | ||
|
||
flags: | ||
fail-fast: true |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
{% test sum_by(model, group_by, target_column, val, precision=4) %} | ||
|
||
SELECT | ||
{{ group_by }}, sum({{ target_column }}) AS sum, array_agg( {{target_column }}) AS vals | ||
FROM {{ model }} | ||
GROUP BY {{ group_by }} | ||
HAVING round(sum({{ target_column }})::numeric, {{ precision }}) <> {{ val }} | ||
|
||
{% endtest %} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
version: 2 | ||
|
||
sources: | ||
- name: recipe_sources | ||
schema: "{{ env_var('BUILD_ENGINE_SCHEMA') }}" | ||
tables: | ||
- name: dcp_mappluto_clipped | ||
columns: | ||
- name: bbl | ||
tests: | ||
- not_null | ||
- name: wkb_geometry | ||
tests: | ||
- not_null | ||
- name: dcp_cb2020_wi | ||
- name: dcp_ct2020_wi | ||
- name: hud_lowmodincomebyblockgroup |
68 changes: 68 additions & 0 deletions
68
products/cdbg/models/intermediate/_intermediate_models.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
version: 2 | ||
|
||
models: | ||
- name: int__block_groups | ||
description: residential area and low-to-moderate income data aggregated by census block group | ||
columns: | ||
- name: geoid | ||
tests: [unique, not_null] | ||
- name: borough_name | ||
- name: tract | ||
- name: block_group | ||
- name: total_floor_area | ||
- name: residential_floor_area | ||
- name: residential_floor_area_percentage | ||
- name: total_population | ||
- name: lowmod_population | ||
- name: lowmod_population_percentage | ||
|
||
- name: int__lot_block_groups_details | ||
description: int__lot_block_groups joined to pluto for lot info | ||
columns: | ||
- name: bbl | ||
- name: block_group_geoid | ||
- name: overlap_ratio | ||
- name: bldgarea | ||
- name: bldgarea_in_block_group | ||
- name: resarea | ||
- name: resarea_in_block_group | ||
|
||
- name: int__lot_block_groups_raw | ||
description: unique intersections of pluto lots and census block groups with proportion of lot in block group | ||
columns: | ||
- name: bbl | ||
- name: block_group_geoid | ||
- name: overlap_ratio | ||
|
||
- name: int__lot_block_groups | ||
description: >- | ||
unique intersections of pluto lots and census block groups with proportion of lot in block group, | ||
corrected to assign lots fully to block groups that contain 90%+ of a lot | ||
columns: | ||
- name: bbl | ||
- name: block_group_geoid | ||
- name: overlap_ratio | ||
tests: | ||
- dbt_utils.unique_combination_of_columns: | ||
combination_of_columns: [bbl, block_group_geoid] | ||
- sum_by: | ||
group_by: bbl | ||
target_column: overlap_ratio | ||
val: 1 | ||
config: | ||
severity: warn | ||
|
||
- name: int__tracts | ||
description: residential area and low-to-moderate income data aggregated by census tract | ||
columns: | ||
- name: geoid | ||
tests: [unique, not_null] | ||
- name: borough_name | ||
- name: tract | ||
- name: block_group | ||
- name: total_floor_area | ||
- name: residential_floor_area | ||
- name: residential_floor_area_percentage | ||
- name: total_population | ||
- name: lowmod_population | ||
- name: lowmod_population_percentage |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
WITH lot_block_groups AS ( | ||
SELECT * FROM {{ ref("int__lot_block_groups_details") }} | ||
), | ||
|
||
block_groups_income AS ( | ||
SELECT * FROM {{ ref("stg__low_mod_by_block_group") }} | ||
), | ||
|
||
block_groups_floor_area AS ( | ||
SELECT | ||
block_group_geoid AS geoid, | ||
sum(bldgarea_in_block_group) AS total_floor_area, | ||
sum(resarea_in_block_group) AS residential_floor_area | ||
FROM lot_block_groups | ||
GROUP BY geoid | ||
), | ||
|
||
block_group_details AS ( | ||
SELECT | ||
block_groups_floor_area.geoid, | ||
block_groups_income.boro AS borough_name, | ||
block_groups_income.tract, | ||
block_groups_income.block_group, | ||
total_floor_area, | ||
residential_floor_area, | ||
CASE | ||
WHEN total_floor_area = 0 | ||
THEN 0 | ||
ELSE (residential_floor_area / total_floor_area) * 100 | ||
END AS residential_floor_area_percentage, | ||
block_groups_income.total_population, | ||
block_groups_income.lowmod_population AS low_mod_income_population, | ||
block_groups_income.lowmod_pct AS low_mod_income_population_percentage | ||
FROM block_groups_floor_area | ||
LEFT JOIN block_groups_income | ||
ON block_groups_floor_area.geoid = block_groups_income.geoid | ||
) | ||
|
||
SELECT * FROM block_group_details |
35 changes: 35 additions & 0 deletions
35
products/cdbg/models/intermediate/int__lot_block_groups.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
WITH lot_block_groups AS ( | ||
SELECT | ||
bbl, | ||
block_group_geoid, | ||
overlap_ratio | ||
FROM {{ ref("int__lot_block_groups_raw") }} | ||
), | ||
|
||
valid_lot_block_groups AS ( | ||
SELECT * FROM lot_block_groups | ||
WHERE overlap_ratio IS NOT null | ||
), | ||
|
||
lots_easy AS ( | ||
SELECT | ||
bbl, | ||
block_group_geoid, | ||
1 AS overlap_ratio | ||
FROM valid_lot_block_groups | ||
WHERE overlap_ratio > 0.9 | ||
), | ||
|
||
lots_split AS ( | ||
SELECT * | ||
FROM valid_lot_block_groups | ||
WHERE bbl NOT IN (SELECT bbl FROM lots_easy) | ||
), | ||
|
||
lots AS ( | ||
SELECT * FROM lots_easy | ||
UNION ALL | ||
SELECT * FROM lots_split | ||
) | ||
|
||
SELECT * FROM lots |
Oops, something went wrong.