From 10c2df76308d70178bd21165c360ade691745dbf Mon Sep 17 00:00:00 2001 From: Damon McCullough Date: Tue, 31 Dec 2024 09:35:18 -0500 Subject: [PATCH 01/25] add cdbg build and test to github actions --- .github/workflows/build.yml | 12 +++++ .github/workflows/cdbg_build.yml | 74 +++++++++++++++++++++++++++++++ .github/workflows/test_helper.yml | 1 + 3 files changed, 87 insertions(+) create mode 100644 .github/workflows/cdbg_build.yml diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e900f61f9..cb2131a76 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -11,6 +11,7 @@ on: options: - template - cbbr + - cdbg - checkbook - colp - cpdb @@ -128,6 +129,17 @@ jobs: logging_level: ${{ inputs.logging_level }} build_note: ${{ inputs.build_note }} dev_bucket: ${{ inputs.dev_bucket && format('de-dev-{0}', inputs.dev_bucket) || '' }} + cdbg: + needs: health_check + if: inputs.dataset_name == 'cdbg' || inputs.dataset_name == 'all' + uses: ./.github/workflows/cdbg_build.yml + secrets: inherit + with: + image_tag: ${{ needs.health_check.outputs.tag }} + recipe_file: ${{ inputs.recipe_file }} + build_name: ${{ needs.health_check.outputs.build_name }} + plan_command: ${{ needs.health_check.outputs.plan_command }} + dev_bucket: ${{ inputs.dev_bucket && format('de-dev-{0}', inputs.dev_bucket) || '' }} checkbook: needs: health_check if: inputs.dataset_name == 'checkbook' || inputs.dataset_name == 'all' diff --git a/.github/workflows/cdbg_build.yml b/.github/workflows/cdbg_build.yml new file mode 100644 index 000000000..e81d27789 --- /dev/null +++ b/.github/workflows/cdbg_build.yml @@ -0,0 +1,74 @@ +name: CDBG - 🏗️ Build +on: + workflow_call: + inputs: + image_tag: + type: string + required: false + build_name: + type: string + required: true + recipe_file: + type: string + required: true + plan_command: + type: string + default: recipe + dev_bucket: + type: string + required: false + +jobs: + build: + name: Build CDBG + runs-on: ubuntu-22.04 + defaults: + run: + shell: bash + working-directory: products/cdbg + container: + image: nycplanning/build-base:${{ inputs.image_tag || 'latest' }} + env: + BUILD_ENGINE_DB: db-cdbg + BUILD_NAME: ${{ inputs.build_name }} + RECIPES_BUCKET: ${{ inputs.dev_bucket || 'edm-recipes' }} + PUBLISHING_BUCKET: ${{ inputs.dev_bucket || 'edm-publishing' }} + DEV_FLAG: ${{ inputs.dev_bucket && 'true' || 'false' }} + steps: + - uses: actions/checkout@v4 + + - name: Load Secrets + uses: 1password/load-secrets-action@v1 + with: + export-env: true + env: + OP_SERVICE_ACCOUNT_TOKEN: ${{ secrets.OP_SERVICE_ACCOUNT_TOKEN }} + AWS_S3_ENDPOINT: "op://Data Engineering/DO_keys/AWS_S3_ENDPOINT" + AWS_SECRET_ACCESS_KEY: "op://Data Engineering/DO_keys/AWS_SECRET_ACCESS_KEY" + AWS_ACCESS_KEY_ID: "op://Data Engineering/DO_keys/AWS_ACCESS_KEY_ID" + BUILD_ENGINE_SERVER: "op://Data Engineering/EDM_DATA/server_url" + BUILD_ENGINE_HOST: "op://Data Engineering/EDM_DATA/server" + BUILD_ENGINE_USER: "op://Data Engineering/EDM_DATA/username" + BUILD_ENGINE_PASSWORD: "op://Data Engineering/EDM_DATA/password" + BUILD_ENGINE_PORT: "op://Data Engineering/EDM_DATA/port" + + - name: Setup build environment + working-directory: ./ + run: | + ./bash/docker_container_setup.sh + ./bash/build_env_setup.sh + + - name: Plan build + run: python3 -m dcpy.lifecycle.builds.plan ${{ inputs.plan_command }} + + - name: Dataloading + run: python -m dcpy.lifecycle.builds.load load --recipe-path ${{ inputs.recipe_file }}.lock.yml + +# - name: Build +# run: ./bash/build.sh +# +# - name: Export +# run: ./bash/export.sh +# +# - name: Upload +# run: python3 -m dcpy.connectors.edm.publishing upload --product db-green-fast-track --acl public-read diff --git a/.github/workflows/test_helper.yml b/.github/workflows/test_helper.yml index ed08779e4..ea08e99f4 100644 --- a/.github/workflows/test_helper.yml +++ b/.github/workflows/test_helper.yml @@ -88,6 +88,7 @@ jobs: project: - green_fast_track - zoningtaxlots + - cdbg steps: - uses: actions/checkout@v4 - name: setup From 812820b18b6c51e6eb0112619ef858ceacdaefee Mon Sep 17 00:00:00 2001 From: Damon McCullough Date: Tue, 31 Dec 2024 09:34:57 -0500 Subject: [PATCH 02/25] add cdbg product folder and dbt project --- products/cdbg/dbt_project.yml | 21 +++++++++++++++++++++ products/cdbg/profiles.yml | 11 +++++++++++ 2 files changed, 32 insertions(+) create mode 100644 products/cdbg/dbt_project.yml create mode 100644 products/cdbg/profiles.yml diff --git a/products/cdbg/dbt_project.yml b/products/cdbg/dbt_project.yml new file mode 100644 index 000000000..84891832c --- /dev/null +++ b/products/cdbg/dbt_project.yml @@ -0,0 +1,21 @@ +name: "cdbg" + +profile: "dcp-de-postgres" + +model-paths: ["models"] + +tests: + +store_failures: true + schema: "_tests" + +models: + cdbg: + staging: + +materialized: view + intermediate: + +materialized: view + product: + +materialized: table + +flags: + fail-fast: true diff --git a/products/cdbg/profiles.yml b/products/cdbg/profiles.yml new file mode 100644 index 000000000..671252586 --- /dev/null +++ b/products/cdbg/profiles.yml @@ -0,0 +1,11 @@ +dcp-de-postgres: + target: dev + outputs: + dev: + type: postgres + host: "{{ env_var('BUILD_ENGINE_HOST') }}" + user: "{{ env_var('BUILD_ENGINE_USER') }}" + password: "{{ env_var('BUILD_ENGINE_PASSWORD') }}" + port: "{{ env_var('BUILD_ENGINE_PORT') | as_number }}" + dbname: "{{ env_var('BUILD_ENGINE_DB') }}" + schema: "{{ env_var('BUILD_ENGINE_SCHEMA') }}" From 600d11bedc0821e9a47046c7ffdbd835f4d335d9 Mon Sep 17 00:00:00 2001 From: Damon McCullough Date: Tue, 31 Dec 2024 10:05:44 -0500 Subject: [PATCH 03/25] start cdbg product models --- .../cdbg/models/product/_product_models.yml | 142 ++++++++++++++++++ .../cdbg/models/product/cdbg_block_groups.sql | 0 .../cdbg/models/product/cdbg_boroughs.sql | 0 products/cdbg/models/product/cdbg_tracts.sql | 0 4 files changed, 142 insertions(+) create mode 100644 products/cdbg/models/product/_product_models.yml create mode 100644 products/cdbg/models/product/cdbg_block_groups.sql create mode 100644 products/cdbg/models/product/cdbg_boroughs.sql create mode 100644 products/cdbg/models/product/cdbg_tracts.sql diff --git a/products/cdbg/models/product/_product_models.yml b/products/cdbg/models/product/_product_models.yml new file mode 100644 index 000000000..cc06a1dce --- /dev/null +++ b/products/cdbg/models/product/_product_models.yml @@ -0,0 +1,142 @@ +version: 2 + +models: + - name: cdbg_tracts + description: Census tracts and their Community Development Block Grant (CDBG) eligibility details + config: + contract: + enforced: true + + columns: + - name: geoid + data_type: string + tests: + - not_null + - unique + + - name: borough_name + data_type: string + tests: [not_null] + + - name: borough_code + data_type: integer + tests: [not_null] + + - name: borough_tract + data_type: string + tests: [not_null] + + - name: tract + data_type: string + tests: [not_null] + + - name: total_floor_area + data_type: integer + tests: [not_null] + + - name: residential_floor_area + data_type: integer + tests: [not_null] + + - name: low_mod_income_population + data_type: integer + tests: [not_null] + + - name: low_mod_income_population_percentage + data_type: float + tests: [not_null] + + - name: eligibility_flag + data_type: boolean + tests: [not_null] + + - name: eligibility + data_type: string + tests: [not_null] + + - name: cdbg_block_groups + description: Census block groups and their Community Development Block Grant (CDBG) eligibility details + config: + contract: + enforced: false + + columns: + - name: geoid + data_type: string + tests: + - not_null + - unique + + - name: borough_name + data_type: string + tests: [not_null] + + - name: borough_code + data_type: integer + tests: [not_null] + + - name: borough_tract + data_type: string + tests: [not_null] + + - name: tract + data_type: string + tests: [not_null] + + - name: borough_tract_block_group + data_type: string + tests: [not_null] + + - name: block_group + data_type: string + tests: [not_null] + + - name: total_floor_area + data_type: integer + tests: [not_null] + + - name: residential_floor_area + data_type: integer + tests: [not_null] + + - name: low_mod_income_population + data_type: integer + tests: [not_null] + + - name: low_mod_income_population_percentage + data_type: float + tests: [not_null] + + - name: eligibility_flag + data_type: boolean + tests: [not_null] + + - name: eligibility + data_type: string + tests: [not_null] + + - name: cdbg_borough + description: Borough and city-wide Community Development Block Grant (CDBG) details + config: + contract: + enforced: true + + - name: borough_name + data_type: string + tests: [not_null] + + - name: total_floor_area + data_type: integer + tests: [not_null] + + - name: residential_floor_area + data_type: integer + tests: [not_null] + + - name: low_mod_income_population + data_type: integer + tests: [not_null] + + - name: low_mod_income_population_percentage + data_type: float + tests: [not_null] diff --git a/products/cdbg/models/product/cdbg_block_groups.sql b/products/cdbg/models/product/cdbg_block_groups.sql new file mode 100644 index 000000000..e69de29bb diff --git a/products/cdbg/models/product/cdbg_boroughs.sql b/products/cdbg/models/product/cdbg_boroughs.sql new file mode 100644 index 000000000..e69de29bb diff --git a/products/cdbg/models/product/cdbg_tracts.sql b/products/cdbg/models/product/cdbg_tracts.sql new file mode 100644 index 000000000..e69de29bb From 24a60f1db3d5bae7e084d5036bab2a1a896c7847 Mon Sep 17 00:00:00 2001 From: Finn van Krieken Date: Tue, 31 Dec 2024 10:45:10 -0500 Subject: [PATCH 04/25] add low to mod income by block group recipe ds --- .../hud_lowmodincomebyblockgroup.yml | 60 +++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 dcpy/lifecycle/ingest/templates/hud_lowmodincomebyblockgroup.yml diff --git a/dcpy/lifecycle/ingest/templates/hud_lowmodincomebyblockgroup.yml b/dcpy/lifecycle/ingest/templates/hud_lowmodincomebyblockgroup.yml new file mode 100644 index 000000000..1ff10dd6a --- /dev/null +++ b/dcpy/lifecycle/ingest/templates/hud_lowmodincomebyblockgroup.yml @@ -0,0 +1,60 @@ +id: hud_lowmodincomebyblockgroup +acl: public-read + +attributes: + name: HUD Low to Moderate Income Population by Block Group + description: >- + This particular version of this dataset has come from OMB. We haven't fully assessed whether + we could just pull from the linked feature service instead and filter to NYC or if OMB has + done any other preprocessing. + + This service identifies U.S. Census Block Groups in which 51% or more of the households earn + less than 80 percent of the Area Median Income (AMI). The Community Development Block Grant + (CDBG) program requires that each CDBG funded activity must either principally benefit low- + and moderate-income persons, aid in the prevention or elimination of slums or blight, or meet + a community development need having a particular urgency because existing conditions pose a + serious and immediate threat to the health or welfare of the community and other financial + resources are not available to meet that need. With respect to activities that principally + benefit low- and moderate-income persons, at least 51 percent of the activity's beneficiaries + must be low and moderate income. + + The Community Development Block Grant (CDBG) program requires that each CDBG funded activity + must either principally benefit low- and moderate-income persons, aid in the prevention or + elimination of slums or blight, or meet a community development need having a particular urgency + because existing conditions pose a serious and immediate threat to the health or welfare of + the community and other financial resources are not available to meet that need. With respect + to activities that principally benefit low- and moderate-income persons, at least 51 percent + of the activity's beneficiaries must be low and moderate income. For CDBG, a person is considered + to be of low income only if he or she is a member of a household whose income would qualify as + "very low income" under the Section 8 Housing Assistance Payments program. Generally, these + Section 8 limits are based on 50% of area median. Similarly, CDBG moderate income relies on + Section 8 "lower income" limits, which are generally tied to 80% of area median. These data + are from the 2011-2015 American Community Survey (ACS). + url: https://hudgis-hud.opendata.arcgis.com/datasets/HUD::low-to-moderate-income-population-by-block-group/about + +ingestion: + source: + type: s3 + bucket: edm-recipes + key: inbox/omb/20241227/ACS-2020-Low-Mod-Summarized-All-Block-Groups-2023.csv + file_format: + type: csv + +#columns: +#- id: CDBGUOGID +#- id: GEOID +#- id: CDBGNAME +#- id: STUSAB +#- id: STATE +#- id: CDBGTYPE +#- id: Geoname +#- id: COUNTY +#- id: TRACT +#- id: BLKGRP +#- id: LOW +#- id: LOWMOD +#- id: LMMI +#- id: LOWMODUNIV +#- id: LOWMOD_PCT +#- id: MOE_LOWMODPCT +#- id: Column1 From 8a71d0013aaa3f797c8a31cb24f52c538590a22a Mon Sep 17 00:00:00 2001 From: Finn van Krieken Date: Tue, 31 Dec 2024 10:45:40 -0500 Subject: [PATCH 05/25] add cdbg recipe --- products/cdbg/recipe.yml | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 products/cdbg/recipe.yml diff --git a/products/cdbg/recipe.yml b/products/cdbg/recipe.yml new file mode 100644 index 000000000..b944ba3b7 --- /dev/null +++ b/products/cdbg/recipe.yml @@ -0,0 +1,9 @@ +name: Community Development Block Grant +product: db-cdbg +inputs: + missing_versions_strategy: find_latest + datasets: + - name: dcp_mappluto_clipped + - name: dcp_cb2020_wi # maybe not needed. including for now in case it's helpful for block groups + - name: dcp_ct2020_wi + - name: hud_lowmodincomebyblockgroup From d533d82566ef8b1fdac22058a35ff7cba2834c17 Mon Sep 17 00:00:00 2001 From: Finn van Krieken Date: Tue, 31 Dec 2024 10:47:25 -0500 Subject: [PATCH 06/25] add barebones cdbg sources.yml --- products/cdbg/models/_sources.yml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 products/cdbg/models/_sources.yml diff --git a/products/cdbg/models/_sources.yml b/products/cdbg/models/_sources.yml new file mode 100644 index 000000000..363b9b199 --- /dev/null +++ b/products/cdbg/models/_sources.yml @@ -0,0 +1,17 @@ +version: 2 + +sources: + - name: recipe_sources + schema: "{{ env_var('BUILD_ENGINE_SCHEMA') }}" + tables: + - name: dcp_mappluto_wi + columns: + - name: bbl + tests: + - not_null + - name: wkb_geometry + tests: + - not_null + - name: dcp_cb2020_wi + - name: dcp_ct2020_wi + - name: hud_lowmodincomebyblockgroup From 2479661f3a7129b916f604e88f225633845019f5 Mon Sep 17 00:00:00 2001 From: Damon McCullough Date: Tue, 31 Dec 2024 11:19:54 -0500 Subject: [PATCH 07/25] fix product models config --- products/cdbg/models/product/_product_models.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/products/cdbg/models/product/_product_models.yml b/products/cdbg/models/product/_product_models.yml index cc06a1dce..aff7939f4 100644 --- a/products/cdbg/models/product/_product_models.yml +++ b/products/cdbg/models/product/_product_models.yml @@ -121,6 +121,7 @@ models: contract: enforced: true + columns: - name: borough_name data_type: string tests: [not_null] From a36c007b203f271b25a3c8c1ebc47997340044f8 Mon Sep 17 00:00:00 2001 From: Damon McCullough Date: Tue, 31 Dec 2024 11:26:34 -0500 Subject: [PATCH 08/25] fix pluto source name --- products/cdbg/models/_sources.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/products/cdbg/models/_sources.yml b/products/cdbg/models/_sources.yml index 363b9b199..47127204f 100644 --- a/products/cdbg/models/_sources.yml +++ b/products/cdbg/models/_sources.yml @@ -4,7 +4,7 @@ sources: - name: recipe_sources schema: "{{ env_var('BUILD_ENGINE_SCHEMA') }}" tables: - - name: dcp_mappluto_wi + - name: dcp_mappluto_clipped columns: - name: bbl tests: From 8245c50f058e3b66996c4e538d4cd7561f176ae9 Mon Sep 17 00:00:00 2001 From: Damon McCullough Date: Tue, 31 Dec 2024 11:26:59 -0500 Subject: [PATCH 09/25] start lot to tracts model --- .../models/intermediate/int__lot_tracts.sql | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 products/cdbg/models/intermediate/int__lot_tracts.sql diff --git a/products/cdbg/models/intermediate/int__lot_tracts.sql b/products/cdbg/models/intermediate/int__lot_tracts.sql new file mode 100644 index 000000000..1187b2a1f --- /dev/null +++ b/products/cdbg/models/intermediate/int__lot_tracts.sql @@ -0,0 +1,25 @@ +with pluto as ( + select + bbl, + wkb_geometry + from {{ source("recipe_sources", "dcp_mappluto_clipped") }} +), + +tracts as ( + select + * + from {{ source("recipe_sources", "dcp_ct2020_wi") }} +), + +lot_tracts as ( + select + pluto.bbl, + pluto.wkb_geometry as lot_geometry, + tracts.geoid, + tracts.wkb_geometry as tract_geometry + from pluto + left join tracts + on ST_Intersects(pluto.wkb_geometry, tracts.wkb_geometry) +) + +select * from lot_tracts \ No newline at end of file From 7c0a3324ffd21d4ddac7495261dceec4ddb1b273 Mon Sep 17 00:00:00 2001 From: Finn van Krieken Date: Tue, 31 Dec 2024 11:28:25 -0500 Subject: [PATCH 10/25] add census block group staging table --- .../cdbg/models/staging/_staging_models.yml | 4 +++ .../staging/stg__census_block_groups.sql | 26 +++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 products/cdbg/models/staging/_staging_models.yml create mode 100644 products/cdbg/models/staging/stg__census_block_groups.sql diff --git a/products/cdbg/models/staging/_staging_models.yml b/products/cdbg/models/staging/_staging_models.yml new file mode 100644 index 000000000..0c521d5d0 --- /dev/null +++ b/products/cdbg/models/staging/_staging_models.yml @@ -0,0 +1,4 @@ +version: 2 + +models: + - name: stg__census_block_groups diff --git a/products/cdbg/models/staging/stg__census_block_groups.sql b/products/cdbg/models/staging/stg__census_block_groups.sql new file mode 100644 index 000000000..a66c52538 --- /dev/null +++ b/products/cdbg/models/staging/stg__census_block_groups.sql @@ -0,0 +1,26 @@ +{{ config( + materialized = 'table', + indexes=[ + {'columns': ['geom'], 'type': 'gist'}, + {'columns': ['geoid']}, + ] +) }} + +WITH census_blocks AS ( + SELECT + left(geoid, 12) AS block_group_geoid, + * + FROM {{ source("recipe_sources", "dcp_cb2020_wi") }} +) +SELECT + borocode, + boroname, + ct2020, + block_group_geoid AS geoid, + st_union(wkb_geometry) AS geom +FROM census_blocks +GROUP BY + borocode, + boroname, + ct2020, + block_group_geoid From d614b52ebdbddf024df43b287aa843126ed2e6e7 Mon Sep 17 00:00:00 2001 From: Finn van Krieken Date: Tue, 31 Dec 2024 11:53:04 -0500 Subject: [PATCH 11/25] stg model to clean hud data --- products/cdbg/models/staging/_staging_models.yml | 1 + .../models/staging/stg__low_mod_by_block_group.sql | 12 ++++++++++++ 2 files changed, 13 insertions(+) create mode 100644 products/cdbg/models/staging/stg__low_mod_by_block_group.sql diff --git a/products/cdbg/models/staging/_staging_models.yml b/products/cdbg/models/staging/_staging_models.yml index 0c521d5d0..dfce87a32 100644 --- a/products/cdbg/models/staging/_staging_models.yml +++ b/products/cdbg/models/staging/_staging_models.yml @@ -2,3 +2,4 @@ version: 2 models: - name: stg__census_block_groups + - name: stg__low_mod_by_block_group diff --git a/products/cdbg/models/staging/stg__low_mod_by_block_group.sql b/products/cdbg/models/staging/stg__low_mod_by_block_group.sql new file mode 100644 index 000000000..2ecd8074d --- /dev/null +++ b/products/cdbg/models/staging/stg__low_mod_by_block_group.sql @@ -0,0 +1,12 @@ +{{ config( + materialized = 'table', + indexes=[{'columns': ['geoid']}] +) }} + +SELECT + "GEOID"::text AS geoid, -- TODO: coerce to text in ingest + "BORO" as boro, + "TRACT" as tract, + "BLKGRP" as block_group, + RTRIM("LOWMOD_PCT", '%')::numeric AS lowmod_pct +FROM {{ source("recipe_sources", "hud_lowmodincomebyblockgroup") }} From fd15ea968dbde3cece84430ed5ddb398485fcd1e Mon Sep 17 00:00:00 2001 From: Damon McCullough Date: Tue, 31 Dec 2024 12:54:51 -0500 Subject: [PATCH 12/25] materialize all intermediate models as tables --- products/cdbg/dbt_project.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/products/cdbg/dbt_project.yml b/products/cdbg/dbt_project.yml index 84891832c..35b7e354b 100644 --- a/products/cdbg/dbt_project.yml +++ b/products/cdbg/dbt_project.yml @@ -13,7 +13,7 @@ models: staging: +materialized: view intermediate: - +materialized: view + +materialized: table product: +materialized: table From 32c67676f133c76fc40fa2179afa30481d6ec14d Mon Sep 17 00:00:00 2001 From: Damon McCullough Date: Tue, 31 Dec 2024 12:55:17 -0500 Subject: [PATCH 13/25] add intermediate models --- .../intermediate/int__lot_block_groups.sql | 36 +++++++++++++ .../int__lot_block_groups_details.sql | 37 ++++++++++++++ .../int__lot_block_groups_raw.sql | 51 +++++++++++++++++++ .../models/intermediate/int__lot_tracts.sql | 25 --------- 4 files changed, 124 insertions(+), 25 deletions(-) create mode 100644 products/cdbg/models/intermediate/int__lot_block_groups.sql create mode 100644 products/cdbg/models/intermediate/int__lot_block_groups_details.sql create mode 100644 products/cdbg/models/intermediate/int__lot_block_groups_raw.sql delete mode 100644 products/cdbg/models/intermediate/int__lot_tracts.sql diff --git a/products/cdbg/models/intermediate/int__lot_block_groups.sql b/products/cdbg/models/intermediate/int__lot_block_groups.sql new file mode 100644 index 000000000..2db80da43 --- /dev/null +++ b/products/cdbg/models/intermediate/int__lot_block_groups.sql @@ -0,0 +1,36 @@ +with lot_block_groups as ( + select + bbl, + block_group_geoid, + overlap_ratio + from {{ ref("int__lot_block_groups_raw") }} +), + +valid_lot_block_groups as ( + select * from lot_block_groups + where overlap_ratio is not null +), + +lots_easy as ( + select + bbl, + block_group_geoid, + 1 as overlap_ratio + from valid_lot_block_groups + where overlap_ratio > 0.9 +), + +lots_split as ( + select + * + from valid_lot_block_groups + where bbl not in (select bbl from lots_easy) +), + +lots as ( + select * from lots_easy + union all + select * from lots_split +) + +select * from lots diff --git a/products/cdbg/models/intermediate/int__lot_block_groups_details.sql b/products/cdbg/models/intermediate/int__lot_block_groups_details.sql new file mode 100644 index 000000000..955b98331 --- /dev/null +++ b/products/cdbg/models/intermediate/int__lot_block_groups_details.sql @@ -0,0 +1,37 @@ +with lot_block_groups as ( + select * from {{ ref("int__lot_block_groups") }} +), + +pluto as ( + select + bbl, + bldgarea, + resarea + from {{ source("recipe_sources", "dcp_mappluto_clipped") }} +), + +details as ( + select + pluto.bbl, + lot_block_groups.block_group_geoid, + pluto.bldgarea, + pluto.resarea, + lot_block_groups.overlap_ratio + from lot_block_groups + left join pluto + on lot_block_groups.bbl = pluto.bbl +), + +ratio_details as ( + select + bbl, + block_group_geoid, + overlap_ratio, + bldgarea, + bldgarea * overlap_ratio as bldgarea_in_block_group, + resarea, + resarea * overlap_ratio as resarea_in_block_group + from details +) + +select * from ratio_details diff --git a/products/cdbg/models/intermediate/int__lot_block_groups_raw.sql b/products/cdbg/models/intermediate/int__lot_block_groups_raw.sql new file mode 100644 index 000000000..79c15f0cd --- /dev/null +++ b/products/cdbg/models/intermediate/int__lot_block_groups_raw.sql @@ -0,0 +1,51 @@ +with pluto as ( + select + bbl, + wkb_geometry + from {{ source("recipe_sources", "dcp_mappluto_clipped") }} +), + +block_groups as ( + select + geoid, + geom + from {{ ref("stg__census_block_groups") }} +), + +lot_block_group_intersections as ( + select + pluto.bbl as bbl, + pluto.wkb_geometry as lot_geometry, + ST_AREA(pluto.wkb_geometry) as lot_area_sqft, + block_groups.geoid as block_group_geoid, + block_groups.geom as block_group_geometry + from pluto + left join block_groups + on ST_INTERSECTS(pluto.wkb_geometry, block_groups.geom) +), + +intersection_calculations as ( + select + bbl, + lot_geometry, + lot_area_sqft, + block_group_geoid, + block_group_geometry, + ST_AREA( + CASE + WHEN ST_COVEREDBY(lot_geometry, block_group_geometry) THEN lot_geometry + ELSE ST_INTERSECTION(lot_geometry, block_group_geometry) + END + ) AS area_of_intersection_sqft + from lot_block_group_intersections +), + +intersection_ratios as ( + select + bbl, + block_group_geoid, + area_of_intersection_sqft / lot_area_sqft as overlap_ratio + from intersection_calculations +) + +select * from intersection_ratios diff --git a/products/cdbg/models/intermediate/int__lot_tracts.sql b/products/cdbg/models/intermediate/int__lot_tracts.sql deleted file mode 100644 index 1187b2a1f..000000000 --- a/products/cdbg/models/intermediate/int__lot_tracts.sql +++ /dev/null @@ -1,25 +0,0 @@ -with pluto as ( - select - bbl, - wkb_geometry - from {{ source("recipe_sources", "dcp_mappluto_clipped") }} -), - -tracts as ( - select - * - from {{ source("recipe_sources", "dcp_ct2020_wi") }} -), - -lot_tracts as ( - select - pluto.bbl, - pluto.wkb_geometry as lot_geometry, - tracts.geoid, - tracts.wkb_geometry as tract_geometry - from pluto - left join tracts - on ST_Intersects(pluto.wkb_geometry, tracts.wkb_geometry) -) - -select * from lot_tracts \ No newline at end of file From 74659d70a159a610bed20aaf7e88c1866ff45b8a Mon Sep 17 00:00:00 2001 From: Damon McCullough Date: Tue, 31 Dec 2024 14:01:39 -0500 Subject: [PATCH 14/25] add columns to staging table --- products/cdbg/models/staging/stg__low_mod_by_block_group.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/products/cdbg/models/staging/stg__low_mod_by_block_group.sql b/products/cdbg/models/staging/stg__low_mod_by_block_group.sql index 2ecd8074d..ba4724403 100644 --- a/products/cdbg/models/staging/stg__low_mod_by_block_group.sql +++ b/products/cdbg/models/staging/stg__low_mod_by_block_group.sql @@ -8,5 +8,7 @@ SELECT "BORO" as boro, "TRACT" as tract, "BLKGRP" as block_group, + REPLACE("LOWMODUNIV", ',', '')::numeric as total_population, + REPLACE("LOWMOD", ',', '')::numeric as lowmod_population, RTRIM("LOWMOD_PCT", '%')::numeric AS lowmod_pct FROM {{ source("recipe_sources", "hud_lowmodincomebyblockgroup") }} From 32a93593018a799ea8f380c8d6dfca7a60cedaca Mon Sep 17 00:00:00 2001 From: Damon McCullough Date: Tue, 31 Dec 2024 14:01:58 -0500 Subject: [PATCH 15/25] add int models for block groups and tracts --- .../models/intermediate/int__block_groups.sql | 39 +++++++++++++++++++ .../cdbg/models/intermediate/int__tracts.sql | 35 +++++++++++++++++ 2 files changed, 74 insertions(+) create mode 100644 products/cdbg/models/intermediate/int__block_groups.sql create mode 100644 products/cdbg/models/intermediate/int__tracts.sql diff --git a/products/cdbg/models/intermediate/int__block_groups.sql b/products/cdbg/models/intermediate/int__block_groups.sql new file mode 100644 index 000000000..605659504 --- /dev/null +++ b/products/cdbg/models/intermediate/int__block_groups.sql @@ -0,0 +1,39 @@ +with lot_block_groups as ( + select * from {{ ref("int__lot_block_groups_details") }} +), + +block_groups_income as ( + select * from {{ ref("stg__low_mod_by_block_group") }} +), + +block_groups_floor_area as ( + select + block_group_geoid as geoid, + sum(bldgarea_in_block_group) as total_floor_area, + sum(resarea_in_block_group) as residential_floor_area + from lot_block_groups + group by geoid +), + +block_group_details as ( + select + block_groups_floor_area.geoid, + block_groups_income.boro as borough_name, + block_groups_income.tract, + block_groups_income.block_group, + total_floor_area, + residential_floor_area, + case + when total_floor_area = 0 + then 0 + else (residential_floor_area / total_floor_area) * 100 + end as residential_floor_area_percentage, + block_groups_income.total_population, + block_groups_income.lowmod_population as low_mod_income_population, + block_groups_income.lowmod_pct as low_mod_income_population_percentage + from block_groups_floor_area + left join block_groups_income + on block_groups_floor_area.geoid = block_groups_income.geoid +) + +select * from block_group_details \ No newline at end of file diff --git a/products/cdbg/models/intermediate/int__tracts.sql b/products/cdbg/models/intermediate/int__tracts.sql new file mode 100644 index 000000000..d47290679 --- /dev/null +++ b/products/cdbg/models/intermediate/int__tracts.sql @@ -0,0 +1,35 @@ +with block_groups as ( + select + *, + left(geoid, -1) as tract_id + from {{ ref("int__block_groups") }} +), + +tracts as ( + select + tract_id as tract, + sum(total_floor_area) as total_floor_area, + sum(residential_floor_area) as residential_floor_area, + sum(total_population) as total_population, + sum(low_mod_income_population) as low_mod_income_population + from block_groups + group by tract +), + +tracts_calculation as ( + select + *, + case + when total_floor_area = 0 + then 0 + else (residential_floor_area / total_floor_area) * 100 + end as residential_floor_area_percentage, + case + when total_population = 0 + then 0 + else (low_mod_income_population / total_population) * 100 + end as low_mod_income_population_percentage + from tracts +) + +select * from tracts_calculation \ No newline at end of file From cd92b5d11778ee8bef3aa5c70eb5f887722e534b Mon Sep 17 00:00:00 2001 From: Damon McCullough Date: Tue, 31 Dec 2024 14:02:11 -0500 Subject: [PATCH 16/25] add product models for block groups and tracts --- .../cdbg/models/product/_product_models.yml | 12 ++++++++++ .../cdbg/models/product/cdbg_block_groups.sql | 22 +++++++++++++++++++ products/cdbg/models/product/cdbg_tracts.sql | 22 +++++++++++++++++++ 3 files changed, 56 insertions(+) diff --git a/products/cdbg/models/product/_product_models.yml b/products/cdbg/models/product/_product_models.yml index aff7939f4..99072104e 100644 --- a/products/cdbg/models/product/_product_models.yml +++ b/products/cdbg/models/product/_product_models.yml @@ -38,6 +38,10 @@ models: data_type: integer tests: [not_null] + - name: residential_floor_area_percentage + data_type: float + tests: [not_null] + - name: low_mod_income_population data_type: integer tests: [not_null] @@ -99,6 +103,10 @@ models: data_type: integer tests: [not_null] + - name: residential_floor_area_percentage + data_type: float + tests: [not_null] + - name: low_mod_income_population data_type: integer tests: [not_null] @@ -134,6 +142,10 @@ models: data_type: integer tests: [not_null] + - name: residential_floor_area_percentage + data_type: float + tests: [not_null] + - name: low_mod_income_population data_type: integer tests: [not_null] diff --git a/products/cdbg/models/product/cdbg_block_groups.sql b/products/cdbg/models/product/cdbg_block_groups.sql index e69de29bb..64efa03eb 100644 --- a/products/cdbg/models/product/cdbg_block_groups.sql +++ b/products/cdbg/models/product/cdbg_block_groups.sql @@ -0,0 +1,22 @@ +with block_groups as ( + select * from {{ ref("int__block_groups") }} +), + +eligibility_calculation as ( + select + *, + low_mod_income_population_percentage > 51 and residential_floor_area_percentage > 50 as eligibility_flag + from block_groups +), + +eligibility as ( + select + *, + case + when eligibility_flag then 'CD Eligible' + else 'Ineligible' + end as eligibility + from eligibility_calculation +) + +select * from eligibility \ No newline at end of file diff --git a/products/cdbg/models/product/cdbg_tracts.sql b/products/cdbg/models/product/cdbg_tracts.sql index e69de29bb..db35d59e4 100644 --- a/products/cdbg/models/product/cdbg_tracts.sql +++ b/products/cdbg/models/product/cdbg_tracts.sql @@ -0,0 +1,22 @@ +with tracts as ( + select * from {{ ref("int__tracts") }} +), + +eligibility_calculation as ( + select + *, + low_mod_income_population_percentage > 51 and residential_floor_area_percentage > 50 as eligibility_flag + from tracts +), + +eligibility as ( + select + *, + case + when eligibility_flag then 'CD Eligible' + else 'Ineligible' + end as eligibility + from eligibility_calculation +) + +select * from eligibility From e131514f6c7e2558f7d7836b55b97997b6eb0731 Mon Sep 17 00:00:00 2001 From: Damon McCullough Date: Tue, 31 Dec 2024 14:08:55 -0500 Subject: [PATCH 17/25] add borough name to tracts models --- products/cdbg/models/intermediate/int__tracts.sql | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/products/cdbg/models/intermediate/int__tracts.sql b/products/cdbg/models/intermediate/int__tracts.sql index d47290679..e5c373198 100644 --- a/products/cdbg/models/intermediate/int__tracts.sql +++ b/products/cdbg/models/intermediate/int__tracts.sql @@ -7,13 +7,14 @@ with block_groups as ( tracts as ( select - tract_id as tract, + tract_id as geoid, + max(borough_name) as borough_name, sum(total_floor_area) as total_floor_area, sum(residential_floor_area) as residential_floor_area, sum(total_population) as total_population, sum(low_mod_income_population) as low_mod_income_population from block_groups - group by tract + group by tract_id ), tracts_calculation as ( From 824608a91ff7cbfe3feb11b0377c435da56a3ef6 Mon Sep 17 00:00:00 2001 From: Damon McCullough Date: Tue, 31 Dec 2024 14:04:15 -0500 Subject: [PATCH 18/25] less enfored product models --- .../cdbg/models/product/_product_models.yml | 122 +++++++++--------- 1 file changed, 61 insertions(+), 61 deletions(-) diff --git a/products/cdbg/models/product/_product_models.yml b/products/cdbg/models/product/_product_models.yml index 99072104e..0ffacd967 100644 --- a/products/cdbg/models/product/_product_models.yml +++ b/products/cdbg/models/product/_product_models.yml @@ -5,7 +5,7 @@ models: description: Census tracts and their Community Development Block Grant (CDBG) eligibility details config: contract: - enforced: true + enforced: false columns: - name: geoid @@ -18,17 +18,17 @@ models: data_type: string tests: [not_null] - - name: borough_code - data_type: integer - tests: [not_null] - - - name: borough_tract - data_type: string - tests: [not_null] - - - name: tract - data_type: string - tests: [not_null] +# - name: borough_code +# data_type: integer +# tests: [not_null] +# +# - name: borough_tract +# data_type: string +# tests: [not_null] +# +# - name: tract +# data_type: string +# tests: [not_null] - name: total_floor_area data_type: integer @@ -75,25 +75,25 @@ models: data_type: string tests: [not_null] - - name: borough_code - data_type: integer - tests: [not_null] - - - name: borough_tract - data_type: string - tests: [not_null] - - - name: tract - data_type: string - tests: [not_null] - - - name: borough_tract_block_group - data_type: string - tests: [not_null] - - - name: block_group - data_type: string - tests: [not_null] +# - name: borough_code +# data_type: integer +# tests: [not_null] +# +# - name: borough_tract +# data_type: string +# tests: [not_null] +# +# - name: tract +# data_type: string +# tests: [not_null] +# +# - name: borough_tract_block_group +# data_type: string +# tests: [not_null] +# +# - name: block_group +# data_type: string +# tests: [not_null] - name: total_floor_area data_type: integer @@ -123,33 +123,33 @@ models: data_type: string tests: [not_null] - - name: cdbg_borough - description: Borough and city-wide Community Development Block Grant (CDBG) details - config: - contract: - enforced: true - - columns: - - name: borough_name - data_type: string - tests: [not_null] - - - name: total_floor_area - data_type: integer - tests: [not_null] - - - name: residential_floor_area - data_type: integer - tests: [not_null] - - - name: residential_floor_area_percentage - data_type: float - tests: [not_null] - - - name: low_mod_income_population - data_type: integer - tests: [not_null] - - - name: low_mod_income_population_percentage - data_type: float - tests: [not_null] +# - name: cdbg_borough +# description: Borough and city-wide Community Development Block Grant (CDBG) details +# config: +# contract: +# enforced: true +# +# columns: +# - name: borough_name +# data_type: string +# tests: [not_null] +# +# - name: total_floor_area +# data_type: integer +# tests: [not_null] +# +# - name: residential_floor_area +# data_type: integer +# tests: [not_null] +# +# - name: residential_floor_area_percentage +# data_type: float +# tests: [not_null] +# +# - name: low_mod_income_population +# data_type: integer +# tests: [not_null] +# +# - name: low_mod_income_population_percentage +# data_type: float +# tests: [not_null] From 8468ffe2b9ce1705e1b2e57ad75ab5b972fba538 Mon Sep 17 00:00:00 2001 From: Damon McCullough Date: Tue, 31 Dec 2024 14:09:09 -0500 Subject: [PATCH 19/25] add export to cdbg --- products/cdbg/bash/export.sh | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100755 products/cdbg/bash/export.sh diff --git a/products/cdbg/bash/export.sh b/products/cdbg/bash/export.sh new file mode 100755 index 000000000..1650a4476 --- /dev/null +++ b/products/cdbg/bash/export.sh @@ -0,0 +1,22 @@ +#!/bin/bash +source ../../bash/utils.sh +set_error_traps + +rm -rf output + +echo "Export product tables" +mkdir -p output && ( + cd output + + echo "Copy metadata files" + cp ../source_data_versions.csv . + cp ../build_metadata.json . + + echo "export cdbg_block_groups.csv ..." + csv_export cdbg_block_groups cdbg_block_groups + + echo "export cdbg_tracts.csv ..." + csv_export cdbg_tracts cdbg_tracts +) + +zip -r output/output.zip output From f45e6386430fc31c431b88dbbc5923d3df64803a Mon Sep 17 00:00:00 2001 From: Damon McCullough Date: Tue, 31 Dec 2024 14:10:20 -0500 Subject: [PATCH 20/25] do build and export in cdbg action --- .github/workflows/cdbg_build.yml | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/.github/workflows/cdbg_build.yml b/.github/workflows/cdbg_build.yml index e81d27789..127b61aaa 100644 --- a/.github/workflows/cdbg_build.yml +++ b/.github/workflows/cdbg_build.yml @@ -64,11 +64,13 @@ jobs: - name: Dataloading run: python -m dcpy.lifecycle.builds.load load --recipe-path ${{ inputs.recipe_file }}.lock.yml -# - name: Build -# run: ./bash/build.sh -# -# - name: Export -# run: ./bash/export.sh -# -# - name: Upload -# run: python3 -m dcpy.connectors.edm.publishing upload --product db-green-fast-track --acl public-read + - name: Build + run: | + dbt debug + dbt build + + - name: Export + run: ./bash/export.sh + + - name: Upload + run: python3 -m dcpy.connectors.edm.publishing upload --product db-cdbg --acl public-read From e78eb53eedb3db62fa90cd1321b768b461d1c6db Mon Sep 17 00:00:00 2001 From: Finn van Krieken Date: Tue, 31 Dec 2024 14:28:58 -0500 Subject: [PATCH 21/25] rm product/cdbg_boroughs --- products/cdbg/models/product/cdbg_boroughs.sql | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 products/cdbg/models/product/cdbg_boroughs.sql diff --git a/products/cdbg/models/product/cdbg_boroughs.sql b/products/cdbg/models/product/cdbg_boroughs.sql deleted file mode 100644 index e69de29bb..000000000 From 4098aba71823a7dcc132b5186d9c3a60a0f6f260 Mon Sep 17 00:00:00 2001 From: Finn van Krieken Date: Tue, 31 Dec 2024 14:29:06 -0500 Subject: [PATCH 22/25] sqlfluff --- .../models/intermediate/int__block_groups.sql | 62 +++++++------- .../intermediate/int__lot_block_groups.sql | 51 ++++++------ .../int__lot_block_groups_details.sql | 58 ++++++------- .../int__lot_block_groups_raw.sql | 82 +++++++++---------- .../cdbg/models/intermediate/int__tracts.sql | 60 +++++++------- .../cdbg/models/product/cdbg_block_groups.sql | 32 ++++---- products/cdbg/models/product/cdbg_tracts.sql | 32 ++++---- .../staging/stg__low_mod_by_block_group.sql | 10 +-- 8 files changed, 193 insertions(+), 194 deletions(-) diff --git a/products/cdbg/models/intermediate/int__block_groups.sql b/products/cdbg/models/intermediate/int__block_groups.sql index 605659504..955abd722 100644 --- a/products/cdbg/models/intermediate/int__block_groups.sql +++ b/products/cdbg/models/intermediate/int__block_groups.sql @@ -1,39 +1,39 @@ -with lot_block_groups as ( - select * from {{ ref("int__lot_block_groups_details") }} +WITH lot_block_groups AS ( + SELECT * FROM {{ ref("int__lot_block_groups_details") }} ), -block_groups_income as ( - select * from {{ ref("stg__low_mod_by_block_group") }} +block_groups_income AS ( + SELECT * FROM {{ ref("stg__low_mod_by_block_group") }} ), -block_groups_floor_area as ( - select - block_group_geoid as geoid, - sum(bldgarea_in_block_group) as total_floor_area, - sum(resarea_in_block_group) as residential_floor_area - from lot_block_groups - group by geoid +block_groups_floor_area AS ( + SELECT + block_group_geoid AS geoid, + sum(bldgarea_in_block_group) AS total_floor_area, + sum(resarea_in_block_group) AS residential_floor_area + FROM lot_block_groups + GROUP BY geoid ), -block_group_details as ( - select - block_groups_floor_area.geoid, - block_groups_income.boro as borough_name, - block_groups_income.tract, - block_groups_income.block_group, - total_floor_area, - residential_floor_area, - case - when total_floor_area = 0 - then 0 - else (residential_floor_area / total_floor_area) * 100 - end as residential_floor_area_percentage, - block_groups_income.total_population, - block_groups_income.lowmod_population as low_mod_income_population, - block_groups_income.lowmod_pct as low_mod_income_population_percentage - from block_groups_floor_area - left join block_groups_income - on block_groups_floor_area.geoid = block_groups_income.geoid +block_group_details AS ( + SELECT + block_groups_floor_area.geoid, + block_groups_income.boro AS borough_name, + block_groups_income.tract, + block_groups_income.block_group, + total_floor_area, + residential_floor_area, + CASE + WHEN total_floor_area = 0 + THEN 0 + ELSE (residential_floor_area / total_floor_area) * 100 + END AS residential_floor_area_percentage, + block_groups_income.total_population, + block_groups_income.lowmod_population AS low_mod_income_population, + block_groups_income.lowmod_pct AS low_mod_income_population_percentage + FROM block_groups_floor_area + LEFT JOIN block_groups_income + ON block_groups_floor_area.geoid = block_groups_income.geoid ) -select * from block_group_details \ No newline at end of file +SELECT * FROM block_group_details diff --git a/products/cdbg/models/intermediate/int__lot_block_groups.sql b/products/cdbg/models/intermediate/int__lot_block_groups.sql index 2db80da43..95a710b75 100644 --- a/products/cdbg/models/intermediate/int__lot_block_groups.sql +++ b/products/cdbg/models/intermediate/int__lot_block_groups.sql @@ -1,36 +1,35 @@ -with lot_block_groups as ( - select - bbl, - block_group_geoid, - overlap_ratio - from {{ ref("int__lot_block_groups_raw") }} +WITH lot_block_groups AS ( + SELECT + bbl, + block_group_geoid, + overlap_ratio + FROM {{ ref("int__lot_block_groups_raw") }} ), -valid_lot_block_groups as ( - select * from lot_block_groups - where overlap_ratio is not null +valid_lot_block_groups AS ( + SELECT * FROM lot_block_groups + WHERE overlap_ratio IS NOT null ), -lots_easy as ( - select - bbl, - block_group_geoid, - 1 as overlap_ratio - from valid_lot_block_groups - where overlap_ratio > 0.9 +lots_easy AS ( + SELECT + bbl, + block_group_geoid, + 1 AS overlap_ratio + FROM valid_lot_block_groups + WHERE overlap_ratio > 0.9 ), -lots_split as ( - select - * - from valid_lot_block_groups - where bbl not in (select bbl from lots_easy) +lots_split AS ( + SELECT * + FROM valid_lot_block_groups + WHERE bbl NOT IN (SELECT bbl FROM lots_easy) ), -lots as ( - select * from lots_easy - union all - select * from lots_split +lots AS ( + SELECT * FROM lots_easy + UNION ALL + SELECT * FROM lots_split ) -select * from lots +SELECT * FROM lots diff --git a/products/cdbg/models/intermediate/int__lot_block_groups_details.sql b/products/cdbg/models/intermediate/int__lot_block_groups_details.sql index 955b98331..9150e36cc 100644 --- a/products/cdbg/models/intermediate/int__lot_block_groups_details.sql +++ b/products/cdbg/models/intermediate/int__lot_block_groups_details.sql @@ -1,37 +1,37 @@ -with lot_block_groups as ( - select * from {{ ref("int__lot_block_groups") }} +WITH lot_block_groups AS ( + SELECT * FROM {{ ref("int__lot_block_groups") }} ), -pluto as ( - select - bbl, - bldgarea, - resarea - from {{ source("recipe_sources", "dcp_mappluto_clipped") }} +pluto AS ( + SELECT + bbl, + bldgarea, + resarea + FROM {{ source("recipe_sources", "dcp_mappluto_clipped") }} ), -details as ( - select - pluto.bbl, - lot_block_groups.block_group_geoid, - pluto.bldgarea, - pluto.resarea, - lot_block_groups.overlap_ratio - from lot_block_groups - left join pluto - on lot_block_groups.bbl = pluto.bbl +details AS ( + SELECT + pluto.bbl, + lot_block_groups.block_group_geoid, + pluto.bldgarea, + pluto.resarea, + lot_block_groups.overlap_ratio + FROM lot_block_groups + LEFT JOIN pluto + ON lot_block_groups.bbl = pluto.bbl ), -ratio_details as ( - select - bbl, - block_group_geoid, - overlap_ratio, - bldgarea, - bldgarea * overlap_ratio as bldgarea_in_block_group, - resarea, - resarea * overlap_ratio as resarea_in_block_group - from details +ratio_details AS ( + SELECT + bbl, + block_group_geoid, + overlap_ratio, + bldgarea, + bldgarea * overlap_ratio AS bldgarea_in_block_group, + resarea, + resarea * overlap_ratio AS resarea_in_block_group + FROM details ) -select * from ratio_details +SELECT * FROM ratio_details diff --git a/products/cdbg/models/intermediate/int__lot_block_groups_raw.sql b/products/cdbg/models/intermediate/int__lot_block_groups_raw.sql index 79c15f0cd..5e0284e37 100644 --- a/products/cdbg/models/intermediate/int__lot_block_groups_raw.sql +++ b/products/cdbg/models/intermediate/int__lot_block_groups_raw.sql @@ -1,51 +1,51 @@ -with pluto as ( - select - bbl, - wkb_geometry - from {{ source("recipe_sources", "dcp_mappluto_clipped") }} +WITH pluto AS ( + SELECT + bbl, + wkb_geometry + FROM {{ source("recipe_sources", "dcp_mappluto_clipped") }} ), -block_groups as ( - select - geoid, - geom - from {{ ref("stg__census_block_groups") }} +block_groups AS ( + SELECT + geoid, + geom + FROM {{ ref("stg__census_block_groups") }} ), -lot_block_group_intersections as ( - select - pluto.bbl as bbl, - pluto.wkb_geometry as lot_geometry, - ST_AREA(pluto.wkb_geometry) as lot_area_sqft, - block_groups.geoid as block_group_geoid, - block_groups.geom as block_group_geometry - from pluto - left join block_groups - on ST_INTERSECTS(pluto.wkb_geometry, block_groups.geom) +lot_block_group_intersections AS ( + SELECT + pluto.bbl, + pluto.wkb_geometry AS lot_geometry, + ST_AREA(pluto.wkb_geometry) AS lot_area_sqft, + block_groups.geoid AS block_group_geoid, + block_groups.geom AS block_group_geometry + FROM pluto + LEFT JOIN block_groups + ON ST_INTERSECTS(pluto.wkb_geometry, block_groups.geom) ), -intersection_calculations as ( - select - bbl, - lot_geometry, - lot_area_sqft, - block_group_geoid, - block_group_geometry, - ST_AREA( - CASE - WHEN ST_COVEREDBY(lot_geometry, block_group_geometry) THEN lot_geometry - ELSE ST_INTERSECTION(lot_geometry, block_group_geometry) - END - ) AS area_of_intersection_sqft - from lot_block_group_intersections +intersection_calculations AS ( + SELECT + bbl, + lot_geometry, + lot_area_sqft, + block_group_geoid, + block_group_geometry, + ST_AREA( + CASE + WHEN ST_COVEREDBY(lot_geometry, block_group_geometry) THEN lot_geometry + ELSE ST_INTERSECTION(lot_geometry, block_group_geometry) + END + ) AS area_of_intersection_sqft + FROM lot_block_group_intersections ), -intersection_ratios as ( - select - bbl, - block_group_geoid, - area_of_intersection_sqft / lot_area_sqft as overlap_ratio - from intersection_calculations +intersection_ratios AS ( + SELECT + bbl, + block_group_geoid, + area_of_intersection_sqft / lot_area_sqft AS overlap_ratio + FROM intersection_calculations ) -select * from intersection_ratios +SELECT * FROM intersection_ratios diff --git a/products/cdbg/models/intermediate/int__tracts.sql b/products/cdbg/models/intermediate/int__tracts.sql index e5c373198..760783824 100644 --- a/products/cdbg/models/intermediate/int__tracts.sql +++ b/products/cdbg/models/intermediate/int__tracts.sql @@ -1,36 +1,36 @@ -with block_groups as ( - select - *, - left(geoid, -1) as tract_id - from {{ ref("int__block_groups") }} +WITH block_groups AS ( + SELECT + *, + left(geoid, -1) AS tract_id + FROM {{ ref("int__block_groups") }} ), -tracts as ( - select - tract_id as geoid, - max(borough_name) as borough_name, - sum(total_floor_area) as total_floor_area, - sum(residential_floor_area) as residential_floor_area, - sum(total_population) as total_population, - sum(low_mod_income_population) as low_mod_income_population - from block_groups - group by tract_id +tracts AS ( + SELECT + tract_id AS geoid, + max(borough_name) AS borough_name, + sum(total_floor_area) AS total_floor_area, + sum(residential_floor_area) AS residential_floor_area, + sum(total_population) AS total_population, + sum(low_mod_income_population) AS low_mod_income_population + FROM block_groups + GROUP BY tract_id ), -tracts_calculation as ( - select - *, - case - when total_floor_area = 0 - then 0 - else (residential_floor_area / total_floor_area) * 100 - end as residential_floor_area_percentage, - case - when total_population = 0 - then 0 - else (low_mod_income_population / total_population) * 100 - end as low_mod_income_population_percentage - from tracts +tracts_calculation AS ( + SELECT + *, + CASE + WHEN total_floor_area = 0 + THEN 0 + ELSE (residential_floor_area / total_floor_area) * 100 + END AS residential_floor_area_percentage, + CASE + WHEN total_population = 0 + THEN 0 + ELSE (low_mod_income_population / total_population) * 100 + END AS low_mod_income_population_percentage + FROM tracts ) -select * from tracts_calculation \ No newline at end of file +SELECT * FROM tracts_calculation diff --git a/products/cdbg/models/product/cdbg_block_groups.sql b/products/cdbg/models/product/cdbg_block_groups.sql index 64efa03eb..1934d6034 100644 --- a/products/cdbg/models/product/cdbg_block_groups.sql +++ b/products/cdbg/models/product/cdbg_block_groups.sql @@ -1,22 +1,22 @@ -with block_groups as ( - select * from {{ ref("int__block_groups") }} +WITH block_groups AS ( + SELECT * FROM {{ ref("int__block_groups") }} ), -eligibility_calculation as ( - select - *, - low_mod_income_population_percentage > 51 and residential_floor_area_percentage > 50 as eligibility_flag - from block_groups +eligibility_calculation AS ( + SELECT + *, + low_mod_income_population_percentage > 51 AND residential_floor_area_percentage > 50 AS eligibility_flag + FROM block_groups ), -eligibility as ( - select - *, - case - when eligibility_flag then 'CD Eligible' - else 'Ineligible' - end as eligibility - from eligibility_calculation +eligibility AS ( + SELECT + *, + CASE + WHEN eligibility_flag THEN 'CD Eligible' + ELSE 'Ineligible' + END AS eligibility + FROM eligibility_calculation ) -select * from eligibility \ No newline at end of file +SELECT * FROM eligibility diff --git a/products/cdbg/models/product/cdbg_tracts.sql b/products/cdbg/models/product/cdbg_tracts.sql index db35d59e4..374b14005 100644 --- a/products/cdbg/models/product/cdbg_tracts.sql +++ b/products/cdbg/models/product/cdbg_tracts.sql @@ -1,22 +1,22 @@ -with tracts as ( - select * from {{ ref("int__tracts") }} +WITH tracts AS ( + SELECT * FROM {{ ref("int__tracts") }} ), -eligibility_calculation as ( - select - *, - low_mod_income_population_percentage > 51 and residential_floor_area_percentage > 50 as eligibility_flag - from tracts +eligibility_calculation AS ( + SELECT + *, + low_mod_income_population_percentage > 51 AND residential_floor_area_percentage > 50 AS eligibility_flag + FROM tracts ), -eligibility as ( - select - *, - case - when eligibility_flag then 'CD Eligible' - else 'Ineligible' - end as eligibility - from eligibility_calculation +eligibility AS ( + SELECT + *, + CASE + WHEN eligibility_flag THEN 'CD Eligible' + ELSE 'Ineligible' + END AS eligibility + FROM eligibility_calculation ) -select * from eligibility +SELECT * FROM eligibility diff --git a/products/cdbg/models/staging/stg__low_mod_by_block_group.sql b/products/cdbg/models/staging/stg__low_mod_by_block_group.sql index ba4724403..190f3a8d4 100644 --- a/products/cdbg/models/staging/stg__low_mod_by_block_group.sql +++ b/products/cdbg/models/staging/stg__low_mod_by_block_group.sql @@ -5,10 +5,10 @@ SELECT "GEOID"::text AS geoid, -- TODO: coerce to text in ingest - "BORO" as boro, - "TRACT" as tract, - "BLKGRP" as block_group, - REPLACE("LOWMODUNIV", ',', '')::numeric as total_population, - REPLACE("LOWMOD", ',', '')::numeric as lowmod_population, + "BORO" AS boro, + "TRACT" AS tract, + "BLKGRP" AS block_group, + REPLACE("LOWMODUNIV", ',', '')::numeric AS total_population, + REPLACE("LOWMOD", ',', '')::numeric AS lowmod_population, RTRIM("LOWMOD_PCT", '%')::numeric AS lowmod_pct FROM {{ source("recipe_sources", "hud_lowmodincomebyblockgroup") }} From 5fecb18faa9cd724079c6e21f29a30cb99848c5b Mon Sep 17 00:00:00 2001 From: Finn van Krieken Date: Tue, 31 Dec 2024 14:43:16 -0500 Subject: [PATCH 23/25] add intermediate models yml --- .../intermediate/_intermediate_models.yml | 62 +++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 products/cdbg/models/intermediate/_intermediate_models.yml diff --git a/products/cdbg/models/intermediate/_intermediate_models.yml b/products/cdbg/models/intermediate/_intermediate_models.yml new file mode 100644 index 000000000..c200e5511 --- /dev/null +++ b/products/cdbg/models/intermediate/_intermediate_models.yml @@ -0,0 +1,62 @@ +version: 2 + +models: +- name: int__block_groups + description: residential area and low-to-moderate income data aggregated by census block group + columns: + - name: geoid + tests: [unique, not_null] + - name: borough_name + - name: tract + - name: block_group + - name: total_floor_area + - name: residential_floor_area + - name: residential_floor_area_percentage + - name: total_population + - name: lowmod_population + - name: lowmod_population_percentage + +- name: int__lot_block_groups_details + description: int__lot_block_groups joined to pluto for lot info + columns: + - name: bbl + - name: block_group_geoid + - name: overlap_ratio + - name: bldgarea + - name: bldgarea_in_block_group + - name: resarea + - name: resarea_in_block_group + +- name: int__lot_block_groups_raw + description: unique intersections of pluto lots and census block groups with proportion of lot in block group + columns: + - name: bbl + - name: block_group_geoid + - name: overlap_ratio + +- name: int__lot_block_groups + description: >- + unique intersections of pluto lots and census block groups with proportion of lot in block group, + corrected to assign lots fully to block groups that contain 90%+ of a lot + columns: + - name: bbl + - name: block_group_geoid + - name: overlap_ratio + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: [bbl, block_group_geoid] + +- name: int__tracts + description: residential area and low-to-moderate income data aggregated by census tract + columns: + - name: geoid + tests: [unique, not_null] + - name: borough_name + - name: tract + - name: block_group + - name: total_floor_area + - name: residential_floor_area + - name: residential_floor_area_percentage + - name: total_population + - name: lowmod_population + - name: lowmod_population_percentage From 7d3b0bcae2a32e3eb435d2ddd03115c3158f3484 Mon Sep 17 00:00:00 2001 From: Finn van Krieken Date: Tue, 31 Dec 2024 14:56:28 -0500 Subject: [PATCH 24/25] add packages.yml --- products/cdbg/packages.yml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 products/cdbg/packages.yml diff --git a/products/cdbg/packages.yml b/products/cdbg/packages.yml new file mode 100644 index 000000000..e124278c7 --- /dev/null +++ b/products/cdbg/packages.yml @@ -0,0 +1,5 @@ +packages: + - package: dbt-labs/dbt_utils + version: 1.1.1 + - package: calogica/dbt_expectations + version: 0.10.3 \ No newline at end of file From 5514820b62b5305506a9a30d45f2ba6de5fd2564 Mon Sep 17 00:00:00 2001 From: Finn van Krieken Date: Tue, 31 Dec 2024 15:23:14 -0500 Subject: [PATCH 25/25] add test to ensure all of lots area assigned --- products/cdbg/macros/test_sum.sql | 9 +++++++++ .../cdbg/models/intermediate/_intermediate_models.yml | 8 +++++++- 2 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 products/cdbg/macros/test_sum.sql diff --git a/products/cdbg/macros/test_sum.sql b/products/cdbg/macros/test_sum.sql new file mode 100644 index 000000000..ffc9f76df --- /dev/null +++ b/products/cdbg/macros/test_sum.sql @@ -0,0 +1,9 @@ +{% test sum_by(model, group_by, target_column, val, precision=4) %} + +SELECT + {{ group_by }}, sum({{ target_column }}) AS sum, array_agg( {{target_column }}) AS vals +FROM {{ model }} +GROUP BY {{ group_by }} +HAVING round(sum({{ target_column }})::numeric, {{ precision }}) <> {{ val }} + +{% endtest %} diff --git a/products/cdbg/models/intermediate/_intermediate_models.yml b/products/cdbg/models/intermediate/_intermediate_models.yml index c200e5511..d5937f1f9 100644 --- a/products/cdbg/models/intermediate/_intermediate_models.yml +++ b/products/cdbg/models/intermediate/_intermediate_models.yml @@ -33,7 +33,7 @@ models: - name: bbl - name: block_group_geoid - name: overlap_ratio - + - name: int__lot_block_groups description: >- unique intersections of pluto lots and census block groups with proportion of lot in block group, @@ -45,6 +45,12 @@ models: tests: - dbt_utils.unique_combination_of_columns: combination_of_columns: [bbl, block_group_geoid] + - sum_by: + group_by: bbl + target_column: overlap_ratio + val: 1 + config: + severity: warn - name: int__tracts description: residential area and low-to-moderate income data aggregated by census tract