Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add cdbg data product #1355

Merged
merged 25 commits into from
Dec 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
10c2df7
add cdbg build and test to github actions
damonmcc Dec 31, 2024
812820b
add cdbg product folder and dbt project
damonmcc Dec 31, 2024
600d11b
start cdbg product models
damonmcc Dec 31, 2024
24a60f1
add low to mod income by block group recipe ds
fvankrieken Dec 31, 2024
8a71d00
add cdbg recipe
fvankrieken Dec 31, 2024
d533d82
add barebones cdbg sources.yml
fvankrieken Dec 31, 2024
2479661
fix product models config
damonmcc Dec 31, 2024
a36c007
fix pluto source name
damonmcc Dec 31, 2024
8245c50
start lot to tracts model
damonmcc Dec 31, 2024
7c0a332
add census block group staging table
fvankrieken Dec 31, 2024
d614b52
stg model to clean hud data
fvankrieken Dec 31, 2024
fd15ea9
materialize all intermediate models as tables
damonmcc Dec 31, 2024
32c6767
add intermediate models
damonmcc Dec 31, 2024
74659d7
add columns to staging table
damonmcc Dec 31, 2024
32a9359
add int models for block groups and tracts
damonmcc Dec 31, 2024
cd92b5d
add product models for block groups and tracts
damonmcc Dec 31, 2024
e131514
add borough name to tracts models
damonmcc Dec 31, 2024
824608a
less enfored product models
damonmcc Dec 31, 2024
8468ffe
add export to cdbg
damonmcc Dec 31, 2024
f45e638
do build and export in cdbg action
damonmcc Dec 31, 2024
e78eb53
rm product/cdbg_boroughs
fvankrieken Dec 31, 2024
4098aba
sqlfluff
fvankrieken Dec 31, 2024
5fecb18
add intermediate models yml
fvankrieken Dec 31, 2024
7d3b0bc
add packages.yml
fvankrieken Dec 31, 2024
5514820
add test to ensure all of lots area assigned
fvankrieken Dec 31, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ on:
options:
- template
- cbbr
- cdbg
- checkbook
- colp
- cpdb
Expand Down Expand Up @@ -128,6 +129,17 @@ jobs:
logging_level: ${{ inputs.logging_level }}
build_note: ${{ inputs.build_note }}
dev_bucket: ${{ inputs.dev_bucket && format('de-dev-{0}', inputs.dev_bucket) || '' }}
cdbg:
needs: health_check
if: inputs.dataset_name == 'cdbg' || inputs.dataset_name == 'all'
uses: ./.github/workflows/cdbg_build.yml
secrets: inherit
with:
image_tag: ${{ needs.health_check.outputs.tag }}
recipe_file: ${{ inputs.recipe_file }}
build_name: ${{ needs.health_check.outputs.build_name }}
plan_command: ${{ needs.health_check.outputs.plan_command }}
dev_bucket: ${{ inputs.dev_bucket && format('de-dev-{0}', inputs.dev_bucket) || '' }}
checkbook:
needs: health_check
if: inputs.dataset_name == 'checkbook' || inputs.dataset_name == 'all'
Expand Down
76 changes: 76 additions & 0 deletions .github/workflows/cdbg_build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
name: CDBG - 🏗️ Build
on:
workflow_call:
inputs:
image_tag:
type: string
required: false
build_name:
type: string
required: true
recipe_file:
type: string
required: true
plan_command:
type: string
default: recipe
dev_bucket:
type: string
required: false

jobs:
build:
name: Build CDBG
runs-on: ubuntu-22.04
defaults:
run:
shell: bash
working-directory: products/cdbg
container:
image: nycplanning/build-base:${{ inputs.image_tag || 'latest' }}
env:
BUILD_ENGINE_DB: db-cdbg
BUILD_NAME: ${{ inputs.build_name }}
RECIPES_BUCKET: ${{ inputs.dev_bucket || 'edm-recipes' }}
PUBLISHING_BUCKET: ${{ inputs.dev_bucket || 'edm-publishing' }}
DEV_FLAG: ${{ inputs.dev_bucket && 'true' || 'false' }}
steps:
- uses: actions/checkout@v4

- name: Load Secrets
uses: 1password/load-secrets-action@v1
with:
export-env: true
env:
OP_SERVICE_ACCOUNT_TOKEN: ${{ secrets.OP_SERVICE_ACCOUNT_TOKEN }}
AWS_S3_ENDPOINT: "op://Data Engineering/DO_keys/AWS_S3_ENDPOINT"
AWS_SECRET_ACCESS_KEY: "op://Data Engineering/DO_keys/AWS_SECRET_ACCESS_KEY"
AWS_ACCESS_KEY_ID: "op://Data Engineering/DO_keys/AWS_ACCESS_KEY_ID"
BUILD_ENGINE_SERVER: "op://Data Engineering/EDM_DATA/server_url"
BUILD_ENGINE_HOST: "op://Data Engineering/EDM_DATA/server"
BUILD_ENGINE_USER: "op://Data Engineering/EDM_DATA/username"
BUILD_ENGINE_PASSWORD: "op://Data Engineering/EDM_DATA/password"
BUILD_ENGINE_PORT: "op://Data Engineering/EDM_DATA/port"

- name: Setup build environment
working-directory: ./
run: |
./bash/docker_container_setup.sh
./bash/build_env_setup.sh

- name: Plan build
run: python3 -m dcpy.lifecycle.builds.plan ${{ inputs.plan_command }}

- name: Dataloading
run: python -m dcpy.lifecycle.builds.load load --recipe-path ${{ inputs.recipe_file }}.lock.yml

- name: Build
run: |
dbt debug
dbt build

- name: Export
run: ./bash/export.sh

- name: Upload
run: python3 -m dcpy.connectors.edm.publishing upload --product db-cdbg --acl public-read
1 change: 1 addition & 0 deletions .github/workflows/test_helper.yml
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ jobs:
project:
- green_fast_track
- zoningtaxlots
- cdbg
steps:
- uses: actions/checkout@v4
- name: setup
Expand Down
60 changes: 60 additions & 0 deletions dcpy/lifecycle/ingest/templates/hud_lowmodincomebyblockgroup.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
id: hud_lowmodincomebyblockgroup
acl: public-read

attributes:
name: HUD Low to Moderate Income Population by Block Group
description: >-
This particular version of this dataset has come from OMB. We haven't fully assessed whether
we could just pull from the linked feature service instead and filter to NYC or if OMB has
done any other preprocessing.

This service identifies U.S. Census Block Groups in which 51% or more of the households earn
less than 80 percent of the Area Median Income (AMI). The Community Development Block Grant
(CDBG) program requires that each CDBG funded activity must either principally benefit low-
and moderate-income persons, aid in the prevention or elimination of slums or blight, or meet
a community development need having a particular urgency because existing conditions pose a
serious and immediate threat to the health or welfare of the community and other financial
resources are not available to meet that need. With respect to activities that principally
benefit low- and moderate-income persons, at least 51 percent of the activity's beneficiaries
must be low and moderate income.

The Community Development Block Grant (CDBG) program requires that each CDBG funded activity
must either principally benefit low- and moderate-income persons, aid in the prevention or
elimination of slums or blight, or meet a community development need having a particular urgency
because existing conditions pose a serious and immediate threat to the health or welfare of
the community and other financial resources are not available to meet that need. With respect
to activities that principally benefit low- and moderate-income persons, at least 51 percent
of the activity's beneficiaries must be low and moderate income. For CDBG, a person is considered
to be of low income only if he or she is a member of a household whose income would qualify as
"very low income" under the Section 8 Housing Assistance Payments program. Generally, these
Section 8 limits are based on 50% of area median. Similarly, CDBG moderate income relies on
Section 8 "lower income" limits, which are generally tied to 80% of area median. These data
are from the 2011-2015 American Community Survey (ACS).
url: https://hudgis-hud.opendata.arcgis.com/datasets/HUD::low-to-moderate-income-population-by-block-group/about

ingestion:
source:
type: s3
bucket: edm-recipes
key: inbox/omb/20241227/ACS-2020-Low-Mod-Summarized-All-Block-Groups-2023.csv
file_format:
type: csv

#columns:
#- id: CDBGUOGID
#- id: GEOID
#- id: CDBGNAME
#- id: STUSAB
#- id: STATE
#- id: CDBGTYPE
#- id: Geoname
#- id: COUNTY
#- id: TRACT
#- id: BLKGRP
#- id: LOW
#- id: LOWMOD
#- id: LMMI
#- id: LOWMODUNIV
#- id: LOWMOD_PCT
#- id: MOE_LOWMODPCT
#- id: Column1
22 changes: 22 additions & 0 deletions products/cdbg/bash/export.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/bin/bash
source ../../bash/utils.sh
set_error_traps

rm -rf output

echo "Export product tables"
mkdir -p output && (
cd output

echo "Copy metadata files"
cp ../source_data_versions.csv .
cp ../build_metadata.json .

echo "export cdbg_block_groups.csv ..."
csv_export cdbg_block_groups cdbg_block_groups

echo "export cdbg_tracts.csv ..."
csv_export cdbg_tracts cdbg_tracts
)

zip -r output/output.zip output
21 changes: 21 additions & 0 deletions products/cdbg/dbt_project.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
name: "cdbg"

profile: "dcp-de-postgres"

model-paths: ["models"]

tests:
+store_failures: true
schema: "_tests"

models:
cdbg:
staging:
+materialized: view
intermediate:
+materialized: table
product:
+materialized: table

flags:
fail-fast: true
9 changes: 9 additions & 0 deletions products/cdbg/macros/test_sum.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{% test sum_by(model, group_by, target_column, val, precision=4) %}

SELECT
{{ group_by }}, sum({{ target_column }}) AS sum, array_agg( {{target_column }}) AS vals
FROM {{ model }}
GROUP BY {{ group_by }}
HAVING round(sum({{ target_column }})::numeric, {{ precision }}) <> {{ val }}

{% endtest %}
17 changes: 17 additions & 0 deletions products/cdbg/models/_sources.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
version: 2

sources:
- name: recipe_sources
schema: "{{ env_var('BUILD_ENGINE_SCHEMA') }}"
tables:
- name: dcp_mappluto_clipped
columns:
- name: bbl
tests:
- not_null
- name: wkb_geometry
tests:
- not_null
- name: dcp_cb2020_wi
- name: dcp_ct2020_wi
- name: hud_lowmodincomebyblockgroup
68 changes: 68 additions & 0 deletions products/cdbg/models/intermediate/_intermediate_models.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
version: 2

models:
- name: int__block_groups
description: residential area and low-to-moderate income data aggregated by census block group
columns:
- name: geoid
tests: [unique, not_null]
- name: borough_name
- name: tract
- name: block_group
- name: total_floor_area
- name: residential_floor_area
- name: residential_floor_area_percentage
- name: total_population
- name: lowmod_population
- name: lowmod_population_percentage

- name: int__lot_block_groups_details
description: int__lot_block_groups joined to pluto for lot info
columns:
- name: bbl
- name: block_group_geoid
- name: overlap_ratio
- name: bldgarea
- name: bldgarea_in_block_group
- name: resarea
- name: resarea_in_block_group

- name: int__lot_block_groups_raw
description: unique intersections of pluto lots and census block groups with proportion of lot in block group
columns:
- name: bbl
- name: block_group_geoid
- name: overlap_ratio

- name: int__lot_block_groups
description: >-
unique intersections of pluto lots and census block groups with proportion of lot in block group,
corrected to assign lots fully to block groups that contain 90%+ of a lot
columns:
- name: bbl
- name: block_group_geoid
- name: overlap_ratio
tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns: [bbl, block_group_geoid]
- sum_by:
group_by: bbl
target_column: overlap_ratio
val: 1
config:
severity: warn

- name: int__tracts
description: residential area and low-to-moderate income data aggregated by census tract
columns:
- name: geoid
tests: [unique, not_null]
- name: borough_name
- name: tract
- name: block_group
- name: total_floor_area
- name: residential_floor_area
- name: residential_floor_area_percentage
- name: total_population
- name: lowmod_population
- name: lowmod_population_percentage
39 changes: 39 additions & 0 deletions products/cdbg/models/intermediate/int__block_groups.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
WITH lot_block_groups AS (
SELECT * FROM {{ ref("int__lot_block_groups_details") }}
),

block_groups_income AS (
SELECT * FROM {{ ref("stg__low_mod_by_block_group") }}
),

block_groups_floor_area AS (
SELECT
block_group_geoid AS geoid,
sum(bldgarea_in_block_group) AS total_floor_area,
sum(resarea_in_block_group) AS residential_floor_area
FROM lot_block_groups
GROUP BY geoid
),

block_group_details AS (
SELECT
block_groups_floor_area.geoid,
block_groups_income.boro AS borough_name,
block_groups_income.tract,
block_groups_income.block_group,
total_floor_area,
residential_floor_area,
CASE
WHEN total_floor_area = 0
THEN 0
ELSE (residential_floor_area / total_floor_area) * 100
END AS residential_floor_area_percentage,
block_groups_income.total_population,
block_groups_income.lowmod_population AS low_mod_income_population,
block_groups_income.lowmod_pct AS low_mod_income_population_percentage
FROM block_groups_floor_area
LEFT JOIN block_groups_income
ON block_groups_floor_area.geoid = block_groups_income.geoid
)

SELECT * FROM block_group_details
35 changes: 35 additions & 0 deletions products/cdbg/models/intermediate/int__lot_block_groups.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
WITH lot_block_groups AS (
SELECT
bbl,
block_group_geoid,
overlap_ratio
FROM {{ ref("int__lot_block_groups_raw") }}
),

valid_lot_block_groups AS (
SELECT * FROM lot_block_groups
WHERE overlap_ratio IS NOT null
),

lots_easy AS (
SELECT
bbl,
block_group_geoid,
1 AS overlap_ratio
FROM valid_lot_block_groups
WHERE overlap_ratio > 0.9
),

lots_split AS (
SELECT *
FROM valid_lot_block_groups
WHERE bbl NOT IN (SELECT bbl FROM lots_easy)
),

lots AS (
SELECT * FROM lots_easy
UNION ALL
SELECT * FROM lots_split
)

SELECT * FROM lots
Loading