From cabf0e18201897baaaee7b4fb38861c0bfa48db2 Mon Sep 17 00:00:00 2001 From: nlebovits Date: Thu, 10 Oct 2024 19:12:07 -0400 Subject: [PATCH 1/4] filter li complaints and violations for only blight-related instances --- data/src/data_utils/l_and_i.py | 65 +++++++++++++++++++++++++++------- 1 file changed, 52 insertions(+), 13 deletions(-) diff --git a/data/src/data_utils/l_and_i.py b/data/src/data_utils/l_and_i.py index 20cc9790..27f28147 100644 --- a/data/src/data_utils/l_and_i.py +++ b/data/src/data_utils/l_and_i.py @@ -1,18 +1,43 @@ import pandas as pd +import geopandas as gpd +from typing import List from classes.featurelayer import FeatureLayer from constants.services import COMPLAINTS_SQL_QUERY, VIOLATIONS_SQL_QUERY +def l_and_i(primary_featurelayer: FeatureLayer) -> FeatureLayer: + """ + Process L&I (Licenses and Inspections) data for complaints and violations. + + This function filters and processes L&I complaints and violations data, + joining it with the primary feature layer based on spatial relationships + and OPA (Office of Property Assessment) identifiers. + + Args: + primary_featurelayer (FeatureLayer): The primary feature layer to join L&I data to. + + Returns: + FeatureLayer: The primary feature layer updated with L&I data. + """ + keywords: List[str] = [ + 'dumping', 'blight', 'rubbish', 'weeds', 'graffiti', + 'abandoned', 'sanitation', 'litter', 'vacant', 'trash', + 'unsafe' + ] -def l_and_i(primary_featurelayer): # Load complaints data from L&I - l_and_i_complaints = FeatureLayer( + l_and_i_complaints: FeatureLayer = FeatureLayer( name="LI Complaints", carto_sql_queries=COMPLAINTS_SQL_QUERY ) - # filter for only Status = 'Open' + # Filter for rows where 'subject' contains any of the keywords l_and_i_complaints.gdf = l_and_i_complaints.gdf[ - l_and_i_complaints.gdf["status"] == "Open" + l_and_i_complaints.gdf["subject"].str.lower().str.contains('|'.join(keywords)) + ] + + # Filter for only Status = 'Open' + l_and_i_complaints.gdf = l_and_i_complaints.gdf[ + l_and_i_complaints.gdf["status"].str.lower() == "open" ] # Group by geometry and concatenate the violationcodetitle values into a list with a semicolon separator @@ -30,13 +55,18 @@ def l_and_i(primary_featurelayer): ) # Load data for violations from L&I - l_and_i_violations = FeatureLayer( + l_and_i_violations: FeatureLayer = FeatureLayer( name="LI Violations", carto_sql_queries=VIOLATIONS_SQL_QUERY, from_xy=True ) - all_violations_count_df = ( + # Filter for rows where 'casetype' contains any of the keywords, handling NaN values + l_and_i_violations.gdf = l_and_i_violations.gdf[ + l_and_i_violations.gdf["violationcodetitle"].fillna('').str.lower().str.contains('|'.join(keywords)) + ] + + all_violations_count_df: pd.DataFrame = ( l_and_i_violations.gdf.groupby("opa_account_num") .count() .reset_index()[["opa_account_num", "violationnumber", "geometry"]] @@ -45,11 +75,11 @@ def l_and_i(primary_featurelayer): columns={"violationnumber": "all_violations_past_year"} ) # filter for only cases where the casestatus is 'IN VIOLATION' or 'UNDER INVESTIGATION' - violations_gdf = l_and_i_violations.gdf[ - (l_and_i_violations.gdf["violationstatus"] == "OPEN") + violations_gdf: gpd.GeoDataFrame = l_and_i_violations.gdf[ + (l_and_i_violations.gdf["violationstatus"].str.lower() == "open") ] - open_violations_count_df = ( + open_violations_count_df: pd.DataFrame = ( violations_gdf.groupby("opa_account_num") .count() .reset_index()[["opa_account_num", "violationnumber", "geometry"]] @@ -58,7 +88,7 @@ def l_and_i(primary_featurelayer): columns={"violationnumber": "open_violations_past_year"} ) # join the all_violations_count_df and open_violations_count_df dataframes on opa_account_num - violations_count_gdf = all_violations_count_df.merge( + violations_count_gdf: gpd.GeoDataFrame = all_violations_count_df.merge( open_violations_count_df, how="left", on="opa_account_num" ) @@ -96,7 +126,7 @@ def l_and_i(primary_featurelayer): ) # Complaints need a spatial join, but we need to take special care to merge on just the parcel geoms first to get opa_id - complaints_with_opa_id = primary_featurelayer.gdf.sjoin( + complaints_with_opa_id: gpd.GeoDataFrame = primary_featurelayer.gdf.sjoin( l_and_i_complaints.gdf, how="left", predicate="contains" ) complaints_with_opa_id.drop(columns=["index_right"], inplace=True) @@ -109,7 +139,16 @@ def l_and_i(primary_featurelayer): ) # Clean up the NaN values in the li_complaints column - def remove_nan_strings(x): + def remove_nan_strings(x: str) -> str | None: + """ + Remove 'nan' strings from the input. + + Args: + x (str): Input string. + + Returns: + str | None: Cleaned string or None if only 'nan' values. + """ if x == "nan" or ("nan;" in x): return None else: @@ -136,4 +175,4 @@ def remove_nan_strings(x): .astype(int) ) - return primary_featurelayer + return primary_featurelayer \ No newline at end of file From 2a7b1307d0c9aa2a0a6849aac12e291cd92af573 Mon Sep 17 00:00:00 2001 From: nlebovits Date: Thu, 10 Oct 2024 19:29:00 -0400 Subject: [PATCH 2/4] update owner types to include public --- data/src/data_utils/llc_owner.py | 16 ---------- data/src/data_utils/owner_type.py | 37 ++++++++++++++++++++++ data/src/script.py | 4 +-- src/components/FilterView.tsx | 4 +-- src/components/Filters/DimensionFilter.tsx | 7 ++-- 5 files changed, 45 insertions(+), 23 deletions(-) delete mode 100644 data/src/data_utils/llc_owner.py create mode 100644 data/src/data_utils/owner_type.py diff --git a/data/src/data_utils/llc_owner.py b/data/src/data_utils/llc_owner.py deleted file mode 100644 index 181c5772..00000000 --- a/data/src/data_utils/llc_owner.py +++ /dev/null @@ -1,16 +0,0 @@ -def llc_owner(primary_featurelayer): - llc_owners = [] - - for _, row in primary_featurelayer.gdf.iterrows(): - # Extracting owner1 and owner2 from the row - owner1 = str(row["owner_1"]).lower() - owner2 = str(row["owner_2"]).lower() - - # Checking if " llc" is in either owner1 or owner2 - if " llc" in owner1 or " llc" in owner2: - llc_owners.append("Yes") - else: - llc_owners.append("No") - - primary_featurelayer.gdf["llc_owner"] = llc_owners - return primary_featurelayer diff --git a/data/src/data_utils/owner_type.py b/data/src/data_utils/owner_type.py new file mode 100644 index 00000000..291364df --- /dev/null +++ b/data/src/data_utils/owner_type.py @@ -0,0 +1,37 @@ +import pandas as pd +from classes.featurelayer import FeatureLayer + +def owner_type(primary_featurelayer: FeatureLayer) -> FeatureLayer: + """ + Determines the ownership type for each property in the primary feature layer based on + the 'owner_1', 'owner_2', and 'city_owner_agency' columns. The ownership type is set as: + - "Public" if 'city_owner_agency' is not NA. + - "Business (LLC)" if 'city_owner_agency' is NA and "LLC" is found in 'owner_1' or 'owner_2'. + - "Individual" if 'city_owner_agency' is NA and "LLC" is not found in 'owner_1' or 'owner_2'. + + Args: + primary_featurelayer (FeatureLayer): The feature layer containing property ownership data. + + Returns: + FeatureLayer: The updated feature layer with the 'owner_type' column added. + """ + owner_types = [] + + for _, row in primary_featurelayer.gdf.iterrows(): + # Extract owner1, owner2, and city_owner_agency + owner1 = str(row["owner_1"]).lower() + owner2 = str(row["owner_2"]).lower() + city_owner_agency = row["city_owner_agency"] + + # Determine ownership type based on the conditions + if pd.notna(city_owner_agency): + owner_types.append("Public") + elif " llc" in owner1 or " llc" in owner2: + owner_types.append("Business (LLC)") + else: + owner_types.append("Individual") + + # Add the 'owner_type' column to the GeoDataFrame + primary_featurelayer.gdf["owner_type"] = owner_types + + return primary_featurelayer diff --git a/data/src/script.py b/data/src/script.py index 763de925..c5f3a8aa 100644 --- a/data/src/script.py +++ b/data/src/script.py @@ -16,7 +16,7 @@ from data_utils.gun_crimes import gun_crimes from data_utils.imm_dang_buildings import imm_dang_buildings from data_utils.l_and_i import l_and_i -from data_utils.llc_owner import llc_owner +from data_utils.owner_type import owner_type from data_utils.nbhoods import nbhoods from data_utils.negligent_devs import negligent_devs from data_utils.opa_properties import opa_properties @@ -50,7 +50,7 @@ imm_dang_buildings, tactical_urbanism, conservatorship, - llc_owner, + owner_type, community_gardens, park_priority, ppr_properties, diff --git a/src/components/FilterView.tsx b/src/components/FilterView.tsx index fab565d7..2666f12c 100644 --- a/src/components/FilterView.tsx +++ b/src/components/FilterView.tsx @@ -51,9 +51,9 @@ const filters = [ type: 'buttonGroup', }, { - property: 'llc_owner', + property: 'owner_type', display: 'Owner', - options: ['Yes', 'No'], + options: ['Public', 'Business (LLC)', 'Individual'], type: 'buttonGroup', }, ]; diff --git a/src/components/Filters/DimensionFilter.tsx b/src/components/Filters/DimensionFilter.tsx index fdd1a391..0c7f374a 100644 --- a/src/components/Filters/DimensionFilter.tsx +++ b/src/components/Filters/DimensionFilter.tsx @@ -19,9 +19,10 @@ type OptionDisplayMapping = { }; const optionsDisplayMapping: OptionDisplayMapping = { - llc_owner: { - Yes: 'Business', - No: 'Individual', + owner_type: { + Public: 'Public', + 'Business (LLC)': 'Business (LLC)', + Individual: 'Individual', }, }; From 5397e2e1be4116c21824491b0ac029fbc45236b0 Mon Sep 17 00:00:00 2001 From: nlebovits Date: Thu, 10 Oct 2024 20:56:42 -0400 Subject: [PATCH 3/4] lint and format --- data/src/data_utils/access_process.py | 30 +++++++++++++++++--- data/src/data_utils/city_owned_properties.py | 25 +++++++++++++++- data/src/data_utils/conservatorship.py | 2 +- 3 files changed, 51 insertions(+), 6 deletions(-) diff --git a/data/src/data_utils/access_process.py b/data/src/data_utils/access_process.py index d1d2deb9..c888f5cc 100644 --- a/data/src/data_utils/access_process.py +++ b/data/src/data_utils/access_process.py @@ -1,4 +1,20 @@ -def access_process(dataset): +from typing import Any + +def access_process(dataset: Any) -> Any: + """ + Process a dataset to determine the access process for each property based on + city ownership and market value. The result is added as a new column in the dataset. + + Args: + dataset (Any): The dataset containing a GeoDataFrame named `gdf` with + columns "city_owner_agency" and "market_value". + + Returns: + Any: The updated dataset with an additional "access_process" column. + + Side Effects: + Prints the distribution of the "access_process" column. + """ access_processes = [] for _, row in dataset.gdf.iterrows(): @@ -9,9 +25,9 @@ def access_process(dataset): ) # Simplified decision logic - if city_owner_agency == "PLB": - access_process = "Land Bank" - elif city_owner_agency in ["PRA", "PHDC"]: + if city_owner_agency == "Land Bank (PHDC)": + access_process = "Go through Land Bank" + elif city_owner_agency == "PRA": access_process = "Do Nothing" else: if market_value_over_1000: @@ -22,4 +38,10 @@ def access_process(dataset): access_processes.append(access_process) dataset.gdf["access_process"] = access_processes + + # Print the distribution of "access_process" + distribution = dataset.gdf["access_process"].value_counts() + print("Distribution of access process:") + print(distribution) + return dataset diff --git a/data/src/data_utils/city_owned_properties.py b/data/src/data_utils/city_owned_properties.py index 602871d7..b4277ca8 100644 --- a/data/src/data_utils/city_owned_properties.py +++ b/data/src/data_utils/city_owned_properties.py @@ -1,8 +1,22 @@ +from typing import Any from classes.featurelayer import FeatureLayer from constants.services import CITY_OWNED_PROPERTIES_TO_LOAD +def city_owned_properties(primary_featurelayer: FeatureLayer) -> FeatureLayer: + """ + Processes city-owned property data by joining it with the primary feature layer, + renaming columns, and updating access information for properties based on ownership. + All instances where the "city_owner_agency" is "PLB" are changed to "Land Bank (PHDC)". + The function also prints the counts of properties with "PLB" and "Land Bank (PHDC)" agencies. -def city_owned_properties(primary_featurelayer): + Args: + primary_featurelayer (FeatureLayer): The primary feature layer to which city-owned + property data will be joined. + + Returns: + FeatureLayer: The updated primary feature layer with processed city ownership + information. + """ city_owned_properties = FeatureLayer( name="City Owned Properties", esri_rest_urls=CITY_OWNED_PROPERTIES_TO_LOAD, @@ -60,4 +74,13 @@ def city_owned_properties(primary_featurelayer): "side_yard_eligible" ].fillna("No") + # Update all instances where city_owner_agency is "PLB" to "Land Bank (PHDC)" + primary_featurelayer.gdf.loc[ + primary_featurelayer.gdf["city_owner_agency"] == "PLB", "city_owner_agency" + ] = "Land Bank (PHDC)" + + # Print the counts for "PLB" and "Land Bank (PHDC)" + plb_count = primary_featurelayer.gdf["city_owner_agency"].eq("PLB").sum() + land_bank_count = primary_featurelayer.gdf["city_owner_agency"].eq("Land Bank (PHDC)").sum() + return primary_featurelayer diff --git a/data/src/data_utils/conservatorship.py b/data/src/data_utils/conservatorship.py index 349c9e44..5f9c9793 100644 --- a/data/src/data_utils/conservatorship.py +++ b/data/src/data_utils/conservatorship.py @@ -44,7 +44,7 @@ def conservatorship(primary_featurelayer): sale_date_6_months_ago = False # Simplified decision logic - if city_owner_agency == "PLB" or ( + if city_owner_agency == "Land Bank (PHDC)" or ( not sale_date_6_months_ago and market_value_over_1000 ): conservatorship = "No" From 5b9f4aae1808832a49a7f6fc2e032a4547a56ac1 Mon Sep 17 00:00:00 2001 From: nlebovits Date: Thu, 10 Oct 2024 20:59:08 -0400 Subject: [PATCH 4/4] remove unneeded print statements --- data/src/data_utils/city_owned_properties.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/data/src/data_utils/city_owned_properties.py b/data/src/data_utils/city_owned_properties.py index b4277ca8..a5b21980 100644 --- a/data/src/data_utils/city_owned_properties.py +++ b/data/src/data_utils/city_owned_properties.py @@ -7,7 +7,6 @@ def city_owned_properties(primary_featurelayer: FeatureLayer) -> FeatureLayer: Processes city-owned property data by joining it with the primary feature layer, renaming columns, and updating access information for properties based on ownership. All instances where the "city_owner_agency" is "PLB" are changed to "Land Bank (PHDC)". - The function also prints the counts of properties with "PLB" and "Land Bank (PHDC)" agencies. Args: primary_featurelayer (FeatureLayer): The primary feature layer to which city-owned @@ -79,8 +78,4 @@ def city_owned_properties(primary_featurelayer: FeatureLayer) -> FeatureLayer: primary_featurelayer.gdf["city_owner_agency"] == "PLB", "city_owner_agency" ] = "Land Bank (PHDC)" - # Print the counts for "PLB" and "Land Bank (PHDC)" - plb_count = primary_featurelayer.gdf["city_owner_agency"].eq("PLB").sum() - land_bank_count = primary_featurelayer.gdf["city_owner_agency"].eq("Land Bank (PHDC)").sum() - return primary_featurelayer