Skip to content

Commit

Permalink
Merge pull request #952 from CodeForPhilly/staging
Browse files Browse the repository at this point in the history
Weekly PR from Staging to Main
CodeWritingCow authored Oct 14, 2024

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature.
2 parents e18127b + c13fec8 commit 52ae6ca
Showing 9 changed files with 143 additions and 42 deletions.
30 changes: 26 additions & 4 deletions data/src/data_utils/access_process.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,20 @@
def access_process(dataset):
from typing import Any

def access_process(dataset: Any) -> Any:
"""
Process a dataset to determine the access process for each property based on
city ownership and market value. The result is added as a new column in the dataset.
Args:
dataset (Any): The dataset containing a GeoDataFrame named `gdf` with
columns "city_owner_agency" and "market_value".
Returns:
Any: The updated dataset with an additional "access_process" column.
Side Effects:
Prints the distribution of the "access_process" column.
"""
access_processes = []

for _, row in dataset.gdf.iterrows():
@@ -9,9 +25,9 @@ def access_process(dataset):
)

# Simplified decision logic
if city_owner_agency == "PLB":
access_process = "Land Bank"
elif city_owner_agency in ["PRA", "PHDC"]:
if city_owner_agency == "Land Bank (PHDC)":
access_process = "Go through Land Bank"
elif city_owner_agency == "PRA":
access_process = "Do Nothing"
else:
if market_value_over_1000:
@@ -22,4 +38,10 @@ def access_process(dataset):
access_processes.append(access_process)

dataset.gdf["access_process"] = access_processes

# Print the distribution of "access_process"
distribution = dataset.gdf["access_process"].value_counts()
print("Distribution of access process:")
print(distribution)

return dataset
20 changes: 19 additions & 1 deletion data/src/data_utils/city_owned_properties.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,21 @@
from typing import Any
from classes.featurelayer import FeatureLayer
from constants.services import CITY_OWNED_PROPERTIES_TO_LOAD

def city_owned_properties(primary_featurelayer: FeatureLayer) -> FeatureLayer:
"""
Processes city-owned property data by joining it with the primary feature layer,
renaming columns, and updating access information for properties based on ownership.
All instances where the "city_owner_agency" is "PLB" are changed to "Land Bank (PHDC)".
def city_owned_properties(primary_featurelayer):
Args:
primary_featurelayer (FeatureLayer): The primary feature layer to which city-owned
property data will be joined.
Returns:
FeatureLayer: The updated primary feature layer with processed city ownership
information.
"""
city_owned_properties = FeatureLayer(
name="City Owned Properties",
esri_rest_urls=CITY_OWNED_PROPERTIES_TO_LOAD,
@@ -60,4 +73,9 @@ def city_owned_properties(primary_featurelayer):
"side_yard_eligible"
].fillna("No")

# Update all instances where city_owner_agency is "PLB" to "Land Bank (PHDC)"
primary_featurelayer.gdf.loc[
primary_featurelayer.gdf["city_owner_agency"] == "PLB", "city_owner_agency"
] = "Land Bank (PHDC)"

return primary_featurelayer
2 changes: 1 addition & 1 deletion data/src/data_utils/conservatorship.py
Original file line number Diff line number Diff line change
@@ -44,7 +44,7 @@ def conservatorship(primary_featurelayer):
sale_date_6_months_ago = False

# Simplified decision logic
if city_owner_agency == "PLB" or (
if city_owner_agency == "Land Bank (PHDC)" or (
not sale_date_6_months_ago and market_value_over_1000
):
conservatorship = "No"
65 changes: 52 additions & 13 deletions data/src/data_utils/l_and_i.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,43 @@
import pandas as pd
import geopandas as gpd
from typing import List
from classes.featurelayer import FeatureLayer
from constants.services import COMPLAINTS_SQL_QUERY, VIOLATIONS_SQL_QUERY

def l_and_i(primary_featurelayer: FeatureLayer) -> FeatureLayer:
"""
Process L&I (Licenses and Inspections) data for complaints and violations.
This function filters and processes L&I complaints and violations data,
joining it with the primary feature layer based on spatial relationships
and OPA (Office of Property Assessment) identifiers.
Args:
primary_featurelayer (FeatureLayer): The primary feature layer to join L&I data to.
Returns:
FeatureLayer: The primary feature layer updated with L&I data.
"""
keywords: List[str] = [
'dumping', 'blight', 'rubbish', 'weeds', 'graffiti',
'abandoned', 'sanitation', 'litter', 'vacant', 'trash',
'unsafe'
]

def l_and_i(primary_featurelayer):
# Load complaints data from L&I
l_and_i_complaints = FeatureLayer(
l_and_i_complaints: FeatureLayer = FeatureLayer(
name="LI Complaints",
carto_sql_queries=COMPLAINTS_SQL_QUERY
)

# filter for only Status = 'Open'
# Filter for rows where 'subject' contains any of the keywords
l_and_i_complaints.gdf = l_and_i_complaints.gdf[
l_and_i_complaints.gdf["status"] == "Open"
l_and_i_complaints.gdf["subject"].str.lower().str.contains('|'.join(keywords))
]

# Filter for only Status = 'Open'
l_and_i_complaints.gdf = l_and_i_complaints.gdf[
l_and_i_complaints.gdf["status"].str.lower() == "open"
]

# Group by geometry and concatenate the violationcodetitle values into a list with a semicolon separator
@@ -30,13 +55,18 @@ def l_and_i(primary_featurelayer):
)

# Load data for violations from L&I
l_and_i_violations = FeatureLayer(
l_and_i_violations: FeatureLayer = FeatureLayer(
name="LI Violations",
carto_sql_queries=VIOLATIONS_SQL_QUERY,
from_xy=True
)

all_violations_count_df = (
# Filter for rows where 'casetype' contains any of the keywords, handling NaN values
l_and_i_violations.gdf = l_and_i_violations.gdf[
l_and_i_violations.gdf["violationcodetitle"].fillna('').str.lower().str.contains('|'.join(keywords))
]

all_violations_count_df: pd.DataFrame = (
l_and_i_violations.gdf.groupby("opa_account_num")
.count()
.reset_index()[["opa_account_num", "violationnumber", "geometry"]]
@@ -45,11 +75,11 @@ def l_and_i(primary_featurelayer):
columns={"violationnumber": "all_violations_past_year"}
)
# filter for only cases where the casestatus is 'IN VIOLATION' or 'UNDER INVESTIGATION'
violations_gdf = l_and_i_violations.gdf[
(l_and_i_violations.gdf["violationstatus"] == "OPEN")
violations_gdf: gpd.GeoDataFrame = l_and_i_violations.gdf[
(l_and_i_violations.gdf["violationstatus"].str.lower() == "open")
]

open_violations_count_df = (
open_violations_count_df: pd.DataFrame = (
violations_gdf.groupby("opa_account_num")
.count()
.reset_index()[["opa_account_num", "violationnumber", "geometry"]]
@@ -58,7 +88,7 @@ def l_and_i(primary_featurelayer):
columns={"violationnumber": "open_violations_past_year"}
)
# join the all_violations_count_df and open_violations_count_df dataframes on opa_account_num
violations_count_gdf = all_violations_count_df.merge(
violations_count_gdf: gpd.GeoDataFrame = all_violations_count_df.merge(
open_violations_count_df, how="left", on="opa_account_num"
)

@@ -96,7 +126,7 @@ def l_and_i(primary_featurelayer):
)

# Complaints need a spatial join, but we need to take special care to merge on just the parcel geoms first to get opa_id
complaints_with_opa_id = primary_featurelayer.gdf.sjoin(
complaints_with_opa_id: gpd.GeoDataFrame = primary_featurelayer.gdf.sjoin(
l_and_i_complaints.gdf, how="left", predicate="contains"
)
complaints_with_opa_id.drop(columns=["index_right"], inplace=True)
@@ -109,7 +139,16 @@ def l_and_i(primary_featurelayer):
)

# Clean up the NaN values in the li_complaints column
def remove_nan_strings(x):
def remove_nan_strings(x: str) -> str | None:
"""
Remove 'nan' strings from the input.
Args:
x (str): Input string.
Returns:
str | None: Cleaned string or None if only 'nan' values.
"""
if x == "nan" or ("nan;" in x):
return None
else:
@@ -136,4 +175,4 @@ def remove_nan_strings(x):
.astype(int)
)

return primary_featurelayer
return primary_featurelayer
16 changes: 0 additions & 16 deletions data/src/data_utils/llc_owner.py

This file was deleted.

37 changes: 37 additions & 0 deletions data/src/data_utils/owner_type.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import pandas as pd
from classes.featurelayer import FeatureLayer

def owner_type(primary_featurelayer: FeatureLayer) -> FeatureLayer:
"""
Determines the ownership type for each property in the primary feature layer based on
the 'owner_1', 'owner_2', and 'city_owner_agency' columns. The ownership type is set as:
- "Public" if 'city_owner_agency' is not NA.
- "Business (LLC)" if 'city_owner_agency' is NA and "LLC" is found in 'owner_1' or 'owner_2'.
- "Individual" if 'city_owner_agency' is NA and "LLC" is not found in 'owner_1' or 'owner_2'.
Args:
primary_featurelayer (FeatureLayer): The feature layer containing property ownership data.
Returns:
FeatureLayer: The updated feature layer with the 'owner_type' column added.
"""
owner_types = []

for _, row in primary_featurelayer.gdf.iterrows():
# Extract owner1, owner2, and city_owner_agency
owner1 = str(row["owner_1"]).lower()
owner2 = str(row["owner_2"]).lower()
city_owner_agency = row["city_owner_agency"]

# Determine ownership type based on the conditions
if pd.notna(city_owner_agency):
owner_types.append("Public")
elif " llc" in owner1 or " llc" in owner2:
owner_types.append("Business (LLC)")
else:
owner_types.append("Individual")

# Add the 'owner_type' column to the GeoDataFrame
primary_featurelayer.gdf["owner_type"] = owner_types

return primary_featurelayer
4 changes: 2 additions & 2 deletions data/src/script.py
Original file line number Diff line number Diff line change
@@ -16,7 +16,7 @@
from data_utils.gun_crimes import gun_crimes
from data_utils.imm_dang_buildings import imm_dang_buildings
from data_utils.l_and_i import l_and_i
from data_utils.llc_owner import llc_owner
from data_utils.owner_type import owner_type
from data_utils.nbhoods import nbhoods
from data_utils.negligent_devs import negligent_devs
from data_utils.opa_properties import opa_properties
@@ -50,7 +50,7 @@
imm_dang_buildings,
tactical_urbanism,
conservatorship,
llc_owner,
owner_type,
community_gardens,
park_priority,
ppr_properties,
4 changes: 2 additions & 2 deletions src/components/FilterView.tsx
Original file line number Diff line number Diff line change
@@ -51,9 +51,9 @@ const filters = [
type: 'buttonGroup',
},
{
property: 'llc_owner',
property: 'owner_type',
display: 'Owner',
options: ['Yes', 'No'],
options: ['Public', 'Business (LLC)', 'Individual'],
type: 'buttonGroup',
},
];
7 changes: 4 additions & 3 deletions src/components/Filters/DimensionFilter.tsx
Original file line number Diff line number Diff line change
@@ -19,9 +19,10 @@ type OptionDisplayMapping = {
};

const optionsDisplayMapping: OptionDisplayMapping = {
llc_owner: {
Yes: 'Business',
No: 'Individual',
owner_type: {
Public: 'Public',
'Business (LLC)': 'Business (LLC)',
Individual: 'Individual',
},
};

0 comments on commit 52ae6ca

Please sign in to comment.