Skip to content

Commit

Permalink
Merge pull request #975 from CodeForPhilly/staging
Browse files Browse the repository at this point in the history
Weekly PR from Staging to Main
  • Loading branch information
CodeWritingCow authored Oct 29, 2024
2 parents 0da3b21 + ad413d4 commit f313207
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 9 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ data/src/tmp
.DS_Store
/data/src/local_outputs/
/data/notebooks/
/data/reports/

## App

Expand Down
14 changes: 8 additions & 6 deletions data/src/data_utils/kde.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,11 +147,13 @@ def apply_kde_to_primary(primary_featurelayer, name, query, resolution=resolutio


def label_percentile(value):
if value == 1:
return "1st Percentile"
elif value == 2:
return "2nd Percentile"
elif value == 3:
return "3rd Percentile"
if 10 <= value % 100 <= 13:
return f"{value}th Percentile"
elif value % 10 == 1:
return f"{value}st Percentile"
elif value % 10 == 2:
return f"{value}nd Percentile"
elif value % 10 == 3:
return f"{value}rd Percentile"
else:
return f"{value}th Percentile"
48 changes: 45 additions & 3 deletions data/src/data_utils/negligent_devs.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,18 +58,38 @@ def create_standardized_address(row):

def negligent_devs(primary_featurelayer):
devs = primary_featurelayer.gdf
city_owners = devs.loc[~devs["city_owner_agency"].isna()].copy()
non_city_owners = devs.loc[devs["city_owner_agency"].isna()].copy()

print("Columns in 'devs' DataFrame:", devs.columns)

print("Initial properties data:")
print(devs[['opa_id', 'city_owner_agency', 'mailing_street']].head(10))

city_owners = devs.loc[~devs["city_owner_agency"].isna() & (devs["city_owner_agency"] != "")].copy()
non_city_owners = devs.loc[devs["city_owner_agency"].isna() | (devs["city_owner_agency"] == "")].copy()

print(f"City owners shape: {city_owners.shape}, Non-city owners shape: {non_city_owners.shape}")

# Log before standardizing addresses
print("Non-city owners mailing streets before standardization:")
print(non_city_owners[['opa_id', 'mailing_street']].head(10))

non_city_owners.loc[:, "mailing_street"] = (
non_city_owners["mailing_street"].astype(str).apply(standardize_street)
)

print("Non-city owners mailing streets after standardization:")
print(non_city_owners[['opa_id', 'mailing_street']].head(10))

for term in ["ST", "AVE", "RD", "BLVD"]:
non_city_owners.loc[:, "mailing_street"] = non_city_owners[
"mailing_street"
].replace(regex={f"{term}.*": term})

# Log after applying term replacement
print("Non-city owners mailing streets after term replacement:")
print(non_city_owners[['opa_id', 'mailing_street']].head(10))

# Fill missing address components
non_city_owners.loc[:, "mailing_address_1"] = non_city_owners[
"mailing_address_1"
].fillna("")
Expand All @@ -84,33 +104,52 @@ def negligent_devs(primary_featurelayer):
].fillna("")
non_city_owners.loc[:, "mailing_zip"] = non_city_owners["mailing_zip"].fillna("")

# Log addresses before creating standardized address
print("Non-city owners mailing details before creating standardized address:")
print(non_city_owners[['opa_id', 'mailing_street', 'mailing_city_state', 'mailing_zip']].head(10))

non_city_owners.loc[:, "standardized_address"] = non_city_owners.apply(
create_standardized_address, axis=1
)

# Log standardized addresses and counts
print("Standardized addresses with counts:")
address_counts = (
non_city_owners.groupby("standardized_address")
.size()
.reset_index(name="property_count")
)
print(address_counts.head(10))

sorted_address_counts = address_counts.sort_values(
by="property_count", ascending=False
)
print("Top standardized addresses by property count:")
print(sorted_address_counts.head(10))

non_city_owners = non_city_owners.merge(
sorted_address_counts, on="standardized_address", how="left"
)

# Log merged data for city owners
city_owner_counts = (
city_owners.groupby("city_owner_agency")
.size()
.reset_index(name="property_count")
)
print("City owner counts:")
print(city_owner_counts.head(10))

city_owners = city_owners.merge(
city_owner_counts, on="city_owner_agency", how="left"
)

devs_combined = pd.concat([city_owners, non_city_owners], axis=0)

# Final check on the merged data before updating primary_featurelayer
print("Combined data with property counts:")
print(devs_combined[['opa_id', 'property_count']].head(10))

primary_featurelayer.gdf = primary_featurelayer.gdf.merge(
devs_combined[["opa_id", "property_count"]], on="opa_id", how="left"
)
Expand All @@ -119,6 +158,9 @@ def negligent_devs(primary_featurelayer):
)
primary_featurelayer.gdf.loc[:, "negligent_dev"] = (
primary_featurelayer.gdf["n_properties_owned"] > 5
) & (primary_featurelayer.gdf["city_owner_agency"].isna())
) & (primary_featurelayer.gdf["city_owner_agency"].isna() | (primary_featurelayer.gdf["city_owner_agency"] == ""))

print("Final feature layer data with negligent_dev flag:")
print(primary_featurelayer.gdf[['opa_id', 'n_properties_owned', 'negligent_dev']].head(10))

return primary_featurelayer

0 comments on commit f313207

Please sign in to comment.