Skip to content

Commit

Permalink
Merge pull request #961 from CodeForPhilly/staging
Browse files Browse the repository at this point in the history
Staging
  • Loading branch information
nlebovits authored Oct 18, 2024
2 parents ecdf029 + 397635d commit 1832ac0
Show file tree
Hide file tree
Showing 10 changed files with 329 additions and 245 deletions.
1 change: 1 addition & 0 deletions data/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ services:
- CLEAN_GREEN_GOOGLE_KEY
- PYTHONUNBUFFERED=1
- GOOGLE_CLOUD_BUCKET_NAME
- GOOGLE_CLOUD_PROJECT
- CAGP_SLACK_API_TOKEN
volumes:
- ./src:/usr/src/app
Expand Down
3 changes: 2 additions & 1 deletion data/src/classes/featurelayer.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,9 @@ def google_cloud_bucket() -> Bucket:

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = credentials_path
bucket_name = os.getenv("GOOGLE_CLOUD_BUCKET_NAME", "cleanandgreenphl")
project_name = os.getenv("GOOGLE_CLOUD_PROJECT", "clean-and-green-philly")

storage_client = storage.Client(project="clean-and-green-philly")
storage_client = storage.Client(project=project_name)
return storage_client.bucket(bucket_name)


Expand Down
1 change: 1 addition & 0 deletions data/src/data_utils/access_process.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from typing import Any


def access_process(dataset: Any) -> Any:
"""
Process a dataset to determine the access process for each property based on
Expand Down
123 changes: 4 additions & 119 deletions data/src/data_utils/drug_crimes.py
Original file line number Diff line number Diff line change
@@ -1,125 +1,10 @@
import mapclassify
import matplotlib.pyplot as plt
import numpy as np
import rasterio
from awkde.awkde import GaussianKDE
from classes.featurelayer import FeatureLayer
from config.config import USE_CRS
from constants.services import DRUGCRIME_SQL_QUERY
from rasterio.transform import Affine


def drug_crimes(primary_featurelayer):
# Initialize gun_crimes object
drug_crimes = FeatureLayer(
name="Drug Crimes", carto_sql_queries=DRUGCRIME_SQL_QUERY
)

# Extract x, y coordinates from geometry
x = np.array([])
y = np.array([])

for geom in drug_crimes.gdf.geometry:
coords = np.array(geom.xy)
x = np.concatenate([x, coords[0]])
y = np.concatenate([y, coords[1]])

# Prepare data for KDE
X = np.array(list(zip(x, y)))

# Generate grid for plotting
grid_length = 2500

x_grid, y_grid = (
np.linspace(x.min(), x.max(), grid_length),
np.linspace(y.min(), y.max(), grid_length),
)
xx, yy = np.meshgrid(x_grid, y_grid)
grid_points = np.array([xx.ravel(), yy.ravel()]).T

# Compute adaptive KDE values
print("fitting KDE for drug crime data")
kde = GaussianKDE(glob_bw=0.1, alpha=0.999, diag_cov=True)
kde.fit(X)

z = kde.predict(grid_points)
zz = z.reshape(xx.shape)

# Calculate resolutions and min values
x_res = (x.max() - x.min()) / (len(x_grid) - 1)
y_res = (y.max() - y.min()) / (len(y_grid) - 1)
min_x, min_y = x.min(), y.min()

# Save the plot in tmp folder
plt.pcolormesh(xx, yy, zz)
plt.scatter(x, y, c="red", s=0.005)
plt.colorbar()
plt.savefig("tmp/kde.png")

# Define the affine transform
transform = Affine.translation(min_x, min_y) * Affine.scale(x_res, y_res)

# Export as raster
with rasterio.open(
"tmp/drug_crimes.tif",
"w",
driver="GTiff",
height=zz.shape[0],
width=zz.shape[1],
count=1,
dtype=zz.dtype,
crs=USE_CRS,
transform=transform,
) as dst:
dst.write(zz, 1)
from data_utils.kde import apply_kde_to_primary

primary_featurelayer.gdf["centroid"] = primary_featurelayer.gdf.geometry.centroid

coord_list = [
(x, y)
for x, y in zip(
primary_featurelayer.gdf["centroid"].x,
primary_featurelayer.gdf["centroid"].y,
)
]

primary_featurelayer.gdf = primary_featurelayer.gdf.drop(columns=["centroid"])

src = rasterio.open("tmp/drug_crimes.tif")
sampled_values = [x[0] for x in src.sample(coord_list)]

primary_featurelayer.gdf["drugcrime_density"] = sampled_values

percentile_breaks = list(range(101)) # [0, 1, 2, ..., 100]

drugcrime_classifier = mapclassify.Percentiles(
primary_featurelayer.gdf["drugcrime_density"], pct=percentile_breaks
)

primary_featurelayer.gdf["drugcrime_density_percentile"] = primary_featurelayer.gdf[
"drugcrime_density"
].apply(drugcrime_classifier)

def label_percentile(value):
if value == 1:
return "1st Percentile"
elif value == 2:
return "2nd Percentile"
elif value == 3:
return "3rd Percentile"
else:
return f"{value}th Percentile"

primary_featurelayer.gdf["drugcrime_density_label"] = primary_featurelayer.gdf[
"drugcrime_density_percentile"
].apply(label_percentile)

primary_featurelayer.gdf["drugcrime_density_percentile"] = primary_featurelayer.gdf[
"drugcrime_density_percentile"
].astype(float)

primary_featurelayer.gdf = primary_featurelayer.gdf.drop(
columns=["drugcrime_density"]
def drug_crimes(primary_featurelayer):
return apply_kde_to_primary(
primary_featurelayer, "Drug Crimes", DRUGCRIME_SQL_QUERY
)

return primary_featurelayer
121 changes: 3 additions & 118 deletions data/src/data_utils/gun_crimes.py
Original file line number Diff line number Diff line change
@@ -1,123 +1,8 @@
import mapclassify
import matplotlib.pyplot as plt
import numpy as np
import rasterio
from awkde.awkde import GaussianKDE
from classes.featurelayer import FeatureLayer
from config.config import USE_CRS
from constants.services import GUNCRIME_SQL_QUERY
from rasterio.transform import Affine


def gun_crimes(primary_featurelayer):
# Initialize gun_crimes object
gun_crimes = FeatureLayer(name="Gun Crimes", carto_sql_queries=GUNCRIME_SQL_QUERY)

# Extract x, y coordinates from geometry
x = np.array([])
y = np.array([])

for geom in gun_crimes.gdf.geometry:
coords = np.array(geom.xy)
x = np.concatenate([x, coords[0]])
y = np.concatenate([y, coords[1]])

# Prepare data for KDE
X = np.array(list(zip(x, y)))

# Generate grid for plotting
grid_length = 2500

x_grid, y_grid = (
np.linspace(x.min(), x.max(), grid_length),
np.linspace(y.min(), y.max(), grid_length),
)
xx, yy = np.meshgrid(x_grid, y_grid)
grid_points = np.array([xx.ravel(), yy.ravel()]).T

# Compute adaptive KDE values
print("fitting KDE for gun crime data")
kde = GaussianKDE(glob_bw=0.1, alpha=0.999, diag_cov=True)
kde.fit(X)

z = kde.predict(grid_points)
zz = z.reshape(xx.shape)

# Calculate resolutions and min values
x_res = (x.max() - x.min()) / (len(x_grid) - 1)
y_res = (y.max() - y.min()) / (len(y_grid) - 1)
min_x, min_y = x.min(), y.min()

# Save the plot in tmp folder
plt.pcolormesh(xx, yy, zz)
plt.scatter(x, y, c="red", s=0.005)
plt.colorbar()
plt.savefig("tmp/kde.png")

# Define the affine transform
transform = Affine.translation(min_x, min_y) * Affine.scale(x_res, y_res)

# Export as raster
with rasterio.open(
"tmp/gun_crimes.tif",
"w",
driver="GTiff",
height=zz.shape[0],
width=zz.shape[1],
count=1,
dtype=zz.dtype,
crs=USE_CRS,
transform=transform,
) as dst:
dst.write(zz, 1)
from data_utils.kde import apply_kde_to_primary

primary_featurelayer.gdf["centroid"] = primary_featurelayer.gdf.geometry.centroid

coord_list = [
(x, y)
for x, y in zip(
primary_featurelayer.gdf["centroid"].x,
primary_featurelayer.gdf["centroid"].y,
)
]

primary_featurelayer.gdf = primary_featurelayer.gdf.drop(columns=["centroid"])

src = rasterio.open("tmp/gun_crimes.tif")
sampled_values = [x[0] for x in src.sample(coord_list)]

primary_featurelayer.gdf["guncrime_density"] = sampled_values

percentile_breaks = list(range(101)) # [0, 1, 2, ..., 100]

guncrime_classifier = mapclassify.Percentiles(
primary_featurelayer.gdf["guncrime_density"], pct=percentile_breaks
)

primary_featurelayer.gdf["guncrime_density_percentile"] = primary_featurelayer.gdf[
"guncrime_density"
].apply(guncrime_classifier)

def label_percentile(value):
if value == 1:
return "1st Percentile"
elif value == 2:
return "2nd Percentile"
elif value == 3:
return "3rd Percentile"
else:
return f"{value}th Percentile"

primary_featurelayer.gdf["guncrime_density_label"] = primary_featurelayer.gdf[
"guncrime_density_percentile"
].apply(label_percentile)

primary_featurelayer.gdf["guncrime_density_percentile"] = primary_featurelayer.gdf[
"guncrime_density_percentile"
].astype(float)

primary_featurelayer.gdf = primary_featurelayer.gdf.drop(
columns=["guncrime_density"]
)

return primary_featurelayer
def gun_crimes(primary_featurelayer):
return apply_kde_to_primary(primary_featurelayer, "Gun Crimes", GUNCRIME_SQL_QUERY)
Loading

0 comments on commit 1832ac0

Please sign in to comment.