Feature Request: Add NDVI and NDBI to ETL pipline #1016

nlebovits · 2024-11-23T01:20:12Z

Is your feature request related to a problem? Please describe.
We've discussed adding satellite image-derived NDVI and NDBI to our ETL pipeline to facilitate the identification of vacant land and vacant buildings.

Describe the solution you'd like
Using the pystac API and Sentinel 2 images, calculate NDVI and NDBI from a composite of the the 3 previous Sentinel 2 images (it has a revisit time of 10 days, so this should roughly correspond to 1 month), using a bounding box for the borders of Philadelphia. Then, extract those NDVI and NDBI raster values to a point on the surface (not a centroid) of all the polygons in our full dataset of the 500k+ properties in Philadelphia (the one created in new_etl via main.py, not the script.py pipeline).

Additional context
Here's Python code that should get you started:

# Import required libraries
import odc.stac
import pandas as pd
import planetary_computer
import pystac_client
import xarray as xr
import hvplot.xarray
import panel as pn
from shapely.geometry import box
import geopandas as gpd

# Enable Panel for interactive visualizations
pn.extension()

# Step 1: Connect to Planetary Computer Catalog
catalog = pystac_client.Client.open(
    "https://planetarycomputer.microsoft.com/api/stac/v1",
    modifier=planetary_computer.sign_inplace,  # Automatically signs requests
)

# List available collections
all_collections = [i.id for i in catalog.get_collections()]
sentinel_collections = [collection for collection in all_collections if "sentinel" in collection]
print("Available Sentinel Collections:", sentinel_collections)

# Corrected Query for Sentinel-2 with Cloud Cover Filter
bbox = [-75.2803, 39.8670, -74.9557, 40.1379]  # Philadelphia bounding box
datetime = "2024-06-01/2024-08-31"  # Summer 2024
cloudy_less_than = 10  # Percent cloud cover threshold

search = catalog.search(
    collections=["sentinel-2-l2a"],
    bbox=bbox,
    datetime=datetime,
    query={"eo:cloud_cover": {"lt": cloudy_less_than}}
)
items = search.item_collection()

# Print Results
print(f"Returned {len(items)} Items:")
if items:
    for item in items:
        print(item.id)
else:
    print("No items found with the current filter.")

selected_item = items[0]
selected_item

bands_of_interest = ["red", "green", "blue", "nir", "swir16"]

# Load all selected items (tiles) into a list of datasets
datasets = []
for item in items:
    ds_tile = odc.stac.stac_load(
        [item],
        bands=bands_of_interest,
        bbox=bbox,
        chunks={}  # Enable Dask for memory efficiency
    )
    datasets.append(ds_tile)

# Merge all datasets into one composite dataset
ds_merged = xr.concat(datasets, dim="time")

# Create a composite by taking the median across the `time` dimension
ds_composite = ds_merged.median(dim="time")

# note that this chunk may take about a minute to run
da = ds_composite.to_array(dim="band").compute()

da.attrs = selected_item.properties
da.attrs["crs"] = f"epsg:{selected_item.properties['proj:epsg']}"
da.attrs["crs"]

import matplotlib.pyplot as plt
import numpy as np

# Extract RGB bands
# Extract individual bands
red = da.sel(band="red").values
green = da.sel(band="green").values
blue = da.sel(band="blue").values

# Normalize bands, handling NaN values
red_norm = red / np.nanmax(red)
green_norm = green / np.nanmax(green)
blue_norm = blue / np.nanmax(blue)

# Stack into an RGB image
rgb_image = np.dstack((red_norm, green_norm, blue_norm))

# Plot the image
plt.figure(figsize=(10, 10))
plt.imshow(rgb_image)
plt.title("RGB Composite")
plt.axis("off")
plt.show()

red = da.sel(band="red").values
nir = da.sel(band="nir").values

print("Red Band Range:", red.min(), red.max())
print("NIR Band Range:", nir.min(), nir.max())

if red.max() > 1:
    red = red / 10000.0
if nir.max() > 1:
    nir = nir / 10000.0

# Calculate NDVI
ndvi = (nir - red) / (nir + red)

# Mask invalid values (divide by zero or NaN)
ndvi = np.nan_to_num(ndvi, nan=-9999)  # Replace NaN with a placeholder

import matplotlib.pyplot as plt
from matplotlib.colors import Normalize

plt.figure(figsize=(10, 8))
plt.imshow(ndvi, cmap="RdYlGn", extent=[da["x"].values.min(), da["x"].values.max(),
                                        da["y"].values.min(), da["y"].values.max()])
plt.colorbar(label="NDVI")
plt.title("NDVI Map")
plt.xlabel("Longitude")
plt.ylabel("Latitude")
plt.show()

swir = da.sel(band="swir16").values

# Normalize bands if needed
if swir.max() > 1:
    swir = swir / 10000.0
if nir.max() > 1:
    nir = nir / 10000.0

ndbi = (swir - nir) / (swir + nir)
ndbi = np.nan_to_num(ndbi, nan=-9999)

# Plot NDBI
plt.figure(figsize=(10, 8))
plt.imshow(ndbi, cmap="gray", extent=[da["x"].values.min(), da["x"].values.max(),
                                      da["y"].values.min(), da["y"].values.max()])
plt.colorbar(label="NDBI")
plt.title("NDBI (Built-up Index)")
plt.xlabel("Longitude")
plt.ylabel("Latitude")
plt.show()

The text was updated successfully, but these errors were encountered:

nlebovits added python backend labels Nov 23, 2024

nlebovits added this to Clean & Green Philly Nov 23, 2024

nlebovits moved this to To Do in Clean & Green Philly Nov 23, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Feature Request: Add NDVI and NDBI to ETL pipline #1016

Feature Request: Add NDVI and NDBI to ETL pipline #1016

nlebovits commented Nov 23, 2024

Feature Request: Add NDVI and NDBI to ETL pipline #1016

Feature Request: Add NDVI and NDBI to ETL pipline #1016

Comments

nlebovits commented Nov 23, 2024