From 1d9372c564516ad4676bbf3ad0ef3ea11a8189fd Mon Sep 17 00:00:00 2001 From: Damon McCullough Date: Thu, 2 Jan 2025 09:54:49 -0500 Subject: [PATCH 1/4] add census data to cdbg --- products/cdbg/models/_sources.yml | 1 + products/cdbg/models/staging/stg__census_data_blocks.sql | 7 +++++++ products/cdbg/recipe.yml | 2 ++ 3 files changed, 10 insertions(+) create mode 100644 products/cdbg/models/staging/stg__census_data_blocks.sql diff --git a/products/cdbg/models/_sources.yml b/products/cdbg/models/_sources.yml index 47127204f..4cf761b9d 100644 --- a/products/cdbg/models/_sources.yml +++ b/products/cdbg/models/_sources.yml @@ -14,4 +14,5 @@ sources: - not_null - name: dcp_cb2020_wi - name: dcp_ct2020_wi + - name: dcp_censusdata_blocks - name: hud_lowmodincomebyblockgroup diff --git a/products/cdbg/models/staging/stg__census_data_blocks.sql b/products/cdbg/models/staging/stg__census_data_blocks.sql new file mode 100644 index 000000000..14d9bdc10 --- /dev/null +++ b/products/cdbg/models/staging/stg__census_data_blocks.sql @@ -0,0 +1,7 @@ +SELECT + geoid20 AS bctcb2020, + borocode AS borough_code, + geogname AS borough_name, + "pop1.1"::numeric AS total_population +FROM {{ source("recipe_sources", "dcp_censusdata_blocks") }} +WHERE geogtype = 'CB2020' diff --git a/products/cdbg/recipe.yml b/products/cdbg/recipe.yml index b944ba3b7..d1a61f7aa 100644 --- a/products/cdbg/recipe.yml +++ b/products/cdbg/recipe.yml @@ -6,4 +6,6 @@ inputs: - name: dcp_mappluto_clipped - name: dcp_cb2020_wi # maybe not needed. including for now in case it's helpful for block groups - name: dcp_ct2020_wi + - name: dcp_censusdata_blocks + version: "2020" - name: hud_lowmodincomebyblockgroup From aa90c07b544f0b9dd674367214c913cf778fea98 Mon Sep 17 00:00:00 2001 From: Damon McCullough Date: Thu, 2 Jan 2025 15:09:48 -0500 Subject: [PATCH 2/4] create block groups in an intermediate model `dcp_censusdata_blocks` source data must be joined to `dcp_cb2020_wi` --- .../intermediate/_intermediate_models.yml | 15 +++++++ .../intermediate/int__block_groups_raw.sql | 42 +++++++++++++++++++ .../int__lot_block_groups_raw.sql | 2 +- .../cdbg/models/staging/_staging_models.yml | 20 ++++++++- .../staging/stg__census_block_groups.sql | 26 ------------ .../models/staging/stg__census_blocks.sql | 11 +++++ 6 files changed, 88 insertions(+), 28 deletions(-) create mode 100644 products/cdbg/models/intermediate/int__block_groups_raw.sql delete mode 100644 products/cdbg/models/staging/stg__census_block_groups.sql create mode 100644 products/cdbg/models/staging/stg__census_blocks.sql diff --git a/products/cdbg/models/intermediate/_intermediate_models.yml b/products/cdbg/models/intermediate/_intermediate_models.yml index d5937f1f9..c8e19b546 100644 --- a/products/cdbg/models/intermediate/_intermediate_models.yml +++ b/products/cdbg/models/intermediate/_intermediate_models.yml @@ -16,11 +16,26 @@ models: - name: lowmod_population - name: lowmod_population_percentage +- name: int__block_groups_raw + description: census block group geoemtries and demographic data + columns: + - name: geoid + tests: + - not_null + - unique + - name: total_population + tests: [not_null] + - name: int__lot_block_groups_details description: int__lot_block_groups joined to pluto for lot info columns: - name: bbl - name: block_group_geoid + tests: + - not_null + - relationships: + to: ref('int__block_groups_raw') + field: geoid - name: overlap_ratio - name: bldgarea - name: bldgarea_in_block_group diff --git a/products/cdbg/models/intermediate/int__block_groups_raw.sql b/products/cdbg/models/intermediate/int__block_groups_raw.sql new file mode 100644 index 000000000..82fefe816 --- /dev/null +++ b/products/cdbg/models/intermediate/int__block_groups_raw.sql @@ -0,0 +1,42 @@ +{{ config( + materialized = 'table', + indexes=[ + {'columns': ['geom'], 'type': 'gist'}, + {'columns': ['geoid']}, + ] +) }} + +WITH census_blocks AS ( + SELECT * FROM {{ ref("stg__census_blocks") }} +), + +census_data_blocks AS ( + SELECT * FROM {{ ref("stg__census_data_blocks") }} +), + +blocks AS ( + SELECT + census_blocks.block_group_geoid, + census_data_blocks.borough_code, + census_data_blocks.borough_name, + census_blocks.ct2020, + census_data_blocks.total_population, + census_blocks.wkb_geometry + FROM census_blocks + LEFT JOIN census_data_blocks + ON census_blocks.bctcb2020 = census_data_blocks.bctcb2020 +) + +SELECT + block_group_geoid AS geoid, + borough_code, + borough_name, + ct2020, + sum(total_population) AS total_population, + st_union(wkb_geometry) AS geom +FROM blocks +GROUP BY + block_group_geoid, + borough_code, + borough_name, + ct2020 diff --git a/products/cdbg/models/intermediate/int__lot_block_groups_raw.sql b/products/cdbg/models/intermediate/int__lot_block_groups_raw.sql index 5e0284e37..4525837b4 100644 --- a/products/cdbg/models/intermediate/int__lot_block_groups_raw.sql +++ b/products/cdbg/models/intermediate/int__lot_block_groups_raw.sql @@ -9,7 +9,7 @@ block_groups AS ( SELECT geoid, geom - FROM {{ ref("stg__census_block_groups") }} + FROM {{ ref("int__block_groups_raw") }} ), lot_block_group_intersections AS ( diff --git a/products/cdbg/models/staging/_staging_models.yml b/products/cdbg/models/staging/_staging_models.yml index dfce87a32..1ed36e7db 100644 --- a/products/cdbg/models/staging/_staging_models.yml +++ b/products/cdbg/models/staging/_staging_models.yml @@ -1,5 +1,23 @@ version: 2 models: - - name: stg__census_block_groups + - name: stg__census_blocks + columns: + - name: geoid + tests: + - not_null + - unique + + - name: stg__census_data_blocks + columns: + - name: bctcb2020 + tests: + - not_null + - unique + - name: stg__low_mod_by_block_group + columns: + - name: geoid + tests: + - not_null + - unique diff --git a/products/cdbg/models/staging/stg__census_block_groups.sql b/products/cdbg/models/staging/stg__census_block_groups.sql deleted file mode 100644 index a66c52538..000000000 --- a/products/cdbg/models/staging/stg__census_block_groups.sql +++ /dev/null @@ -1,26 +0,0 @@ -{{ config( - materialized = 'table', - indexes=[ - {'columns': ['geom'], 'type': 'gist'}, - {'columns': ['geoid']}, - ] -) }} - -WITH census_blocks AS ( - SELECT - left(geoid, 12) AS block_group_geoid, - * - FROM {{ source("recipe_sources", "dcp_cb2020_wi") }} -) -SELECT - borocode, - boroname, - ct2020, - block_group_geoid AS geoid, - st_union(wkb_geometry) AS geom -FROM census_blocks -GROUP BY - borocode, - boroname, - ct2020, - block_group_geoid diff --git a/products/cdbg/models/staging/stg__census_blocks.sql b/products/cdbg/models/staging/stg__census_blocks.sql new file mode 100644 index 000000000..96f3cc3a8 --- /dev/null +++ b/products/cdbg/models/staging/stg__census_blocks.sql @@ -0,0 +1,11 @@ +{{ config( + materialized = 'table', + indexes=[ + {'columns': ['wkb_geometry'], 'type': 'gist'}, + {'columns': ['geoid']}, + ] +) }} +SELECT + left(geoid, 12) AS block_group_geoid, + * +FROM {{ source("recipe_sources", "dcp_cb2020_wi") }} From 8ceaf558627c89fcb9084f90da3a65e476d82d57 Mon Sep 17 00:00:00 2001 From: Damon McCullough Date: Thu, 2 Jan 2025 15:10:50 -0500 Subject: [PATCH 3/4] use new source of total population --- .../intermediate/_intermediate_models.yml | 2 + .../models/intermediate/int__block_groups.sql | 51 ++++++++++++++----- .../staging/stg__low_mod_by_block_group.sql | 6 +-- 3 files changed, 44 insertions(+), 15 deletions(-) diff --git a/products/cdbg/models/intermediate/_intermediate_models.yml b/products/cdbg/models/intermediate/_intermediate_models.yml index c8e19b546..e1085e3fd 100644 --- a/products/cdbg/models/intermediate/_intermediate_models.yml +++ b/products/cdbg/models/intermediate/_intermediate_models.yml @@ -13,8 +13,10 @@ models: - name: residential_floor_area - name: residential_floor_area_percentage - name: total_population + - name: potential_lowmod_population - name: lowmod_population - name: lowmod_population_percentage + - name: low_mod_income_population_percentage_source - name: int__block_groups_raw description: census block group geoemtries and demographic data diff --git a/products/cdbg/models/intermediate/int__block_groups.sql b/products/cdbg/models/intermediate/int__block_groups.sql index 955abd722..469c03d4f 100644 --- a/products/cdbg/models/intermediate/int__block_groups.sql +++ b/products/cdbg/models/intermediate/int__block_groups.sql @@ -2,10 +2,30 @@ WITH lot_block_groups AS ( SELECT * FROM {{ ref("int__lot_block_groups_details") }} ), +block_groups_population AS ( + SELECT * FROM {{ ref("int__block_groups_raw") }} +), + block_groups_income AS ( SELECT * FROM {{ ref("stg__low_mod_by_block_group") }} ), +block_groups_demographics AS ( + SELECT + block_groups_population.geoid, + block_groups_population.borough_code, + block_groups_population.borough_name, + block_groups_population.total_population, + block_groups_income.tract, + block_groups_income.block_group, + block_groups_income.potential_lowmod_population, + block_groups_income.low_mod_income_population, + block_groups_income.low_mod_income_population_percentage + FROM block_groups_population + LEFT JOIN block_groups_income + ON block_groups_population.geoid = block_groups_income.geoid +), + block_groups_floor_area AS ( SELECT block_group_geoid AS geoid, @@ -18,22 +38,29 @@ block_groups_floor_area AS ( block_group_details AS ( SELECT block_groups_floor_area.geoid, - block_groups_income.boro AS borough_name, - block_groups_income.tract, - block_groups_income.block_group, - total_floor_area, - residential_floor_area, + block_groups_demographics.borough_name, + block_groups_demographics.tract, + block_groups_demographics.block_group, + block_groups_floor_area.total_floor_area, + block_groups_floor_area.residential_floor_area, CASE - WHEN total_floor_area = 0 + WHEN block_groups_floor_area.total_floor_area = 0 THEN 0 - ELSE (residential_floor_area / total_floor_area) * 100 + ELSE (block_groups_floor_area.residential_floor_area / block_groups_floor_area.total_floor_area) * 100 END AS residential_floor_area_percentage, - block_groups_income.total_population, - block_groups_income.lowmod_population AS low_mod_income_population, - block_groups_income.lowmod_pct AS low_mod_income_population_percentage + block_groups_demographics.total_population, + block_groups_demographics.potential_lowmod_population, + block_groups_demographics.low_mod_income_population, + CASE + WHEN block_groups_demographics.total_population = 0 + THEN 0 + ELSE + (block_groups_demographics.low_mod_income_population / block_groups_demographics.total_population) * 100 + END AS low_mod_income_population_percentage, + block_groups_demographics.low_mod_income_population_percentage AS low_mod_income_population_percentage_source FROM block_groups_floor_area - LEFT JOIN block_groups_income - ON block_groups_floor_area.geoid = block_groups_income.geoid + LEFT JOIN block_groups_demographics + ON block_groups_floor_area.geoid = block_groups_demographics.geoid ) SELECT * FROM block_group_details diff --git a/products/cdbg/models/staging/stg__low_mod_by_block_group.sql b/products/cdbg/models/staging/stg__low_mod_by_block_group.sql index 190f3a8d4..b3f26d002 100644 --- a/products/cdbg/models/staging/stg__low_mod_by_block_group.sql +++ b/products/cdbg/models/staging/stg__low_mod_by_block_group.sql @@ -8,7 +8,7 @@ SELECT "BORO" AS boro, "TRACT" AS tract, "BLKGRP" AS block_group, - REPLACE("LOWMODUNIV", ',', '')::numeric AS total_population, - REPLACE("LOWMOD", ',', '')::numeric AS lowmod_population, - RTRIM("LOWMOD_PCT", '%')::numeric AS lowmod_pct + REPLACE("LOWMODUNIV", ',', '')::numeric AS potential_lowmod_population, + REPLACE("LOWMOD", ',', '')::numeric AS low_mod_income_population, + RTRIM("LOWMOD_PCT", '%')::numeric AS low_mod_income_population_percentage FROM {{ source("recipe_sources", "hud_lowmodincomebyblockgroup") }} From 9f1014ad06d69754dc9a74a62d668c61fba59500 Mon Sep 17 00:00:00 2001 From: Damon McCullough Date: Thu, 2 Jan 2025 19:32:48 -0500 Subject: [PATCH 4/4] ignore sqlfuff rule RF01 a column name with a period in it seems unfixable with single/double quotes --- products/cdbg/models/staging/stg__census_data_blocks.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/products/cdbg/models/staging/stg__census_data_blocks.sql b/products/cdbg/models/staging/stg__census_data_blocks.sql index 14d9bdc10..5f2860891 100644 --- a/products/cdbg/models/staging/stg__census_data_blocks.sql +++ b/products/cdbg/models/staging/stg__census_data_blocks.sql @@ -2,6 +2,6 @@ SELECT geoid20 AS bctcb2020, borocode AS borough_code, geogname AS borough_name, - "pop1.1"::numeric AS total_population + "pop1.1"::numeric AS total_population -- noqa: RF01 FROM {{ source("recipe_sources", "dcp_censusdata_blocks") }} WHERE geogtype = 'CB2020'