You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Is your feature request related to a problem? Please describe.
We've discussed adding satellite image-derived NDVI and NDBI to our ETL pipeline to facilitate the identification of vacant land and vacant buildings.
Describe the solution you'd like
Using the pystac API and Sentinel 2 images, calculate NDVI and NDBI from a composite of the the 3 previous Sentinel 2 images (it has a revisit time of 10 days, so this should roughly correspond to 1 month), using a bounding box for the borders of Philadelphia. Then, extract those NDVI and NDBI raster values to a point on the surface (not a centroid) of all the polygons in our full dataset of the 500k+ properties in Philadelphia (the one created in new_etl via main.py, not the script.py pipeline).
Additional context
Here's Python code that should get you started:
# Import required libraries
import odc.stac
import pandas as pd
import planetary_computer
import pystac_client
import xarray as xr
import hvplot.xarray
import panel as pn
from shapely.geometry import box
import geopandas as gpd
# Enable Panel for interactive visualizations
pn.extension()
# Step 1: Connect to Planetary Computer Catalog
catalog = pystac_client.Client.open(
"https://planetarycomputer.microsoft.com/api/stac/v1",
modifier=planetary_computer.sign_inplace, # Automatically signs requests
)
# List available collections
all_collections = [i.id for i in catalog.get_collections()]
sentinel_collections = [collection for collection in all_collections if "sentinel" in collection]
print("Available Sentinel Collections:", sentinel_collections)
# Corrected Query for Sentinel-2 with Cloud Cover Filter
bbox = [-75.2803, 39.8670, -74.9557, 40.1379] # Philadelphia bounding box
datetime = "2024-06-01/2024-08-31" # Summer 2024
cloudy_less_than = 10 # Percent cloud cover threshold
search = catalog.search(
collections=["sentinel-2-l2a"],
bbox=bbox,
datetime=datetime,
query={"eo:cloud_cover": {"lt": cloudy_less_than}}
)
items = search.item_collection()
# Print Results
print(f"Returned {len(items)} Items:")
if items:
for item in items:
print(item.id)
else:
print("No items found with the current filter.")
selected_item = items[0]
selected_item
bands_of_interest = ["red", "green", "blue", "nir", "swir16"]
# Load all selected items (tiles) into a list of datasets
datasets = []
for item in items:
ds_tile = odc.stac.stac_load(
[item],
bands=bands_of_interest,
bbox=bbox,
chunks={} # Enable Dask for memory efficiency
)
datasets.append(ds_tile)
# Merge all datasets into one composite dataset
ds_merged = xr.concat(datasets, dim="time")
# Create a composite by taking the median across the `time` dimension
ds_composite = ds_merged.median(dim="time")
# note that this chunk may take about a minute to run
da = ds_composite.to_array(dim="band").compute()
da.attrs = selected_item.properties
da.attrs["crs"] = f"epsg:{selected_item.properties['proj:epsg']}"
da.attrs["crs"]
import matplotlib.pyplot as plt
import numpy as np
# Extract RGB bands
# Extract individual bands
red = da.sel(band="red").values
green = da.sel(band="green").values
blue = da.sel(band="blue").values
# Normalize bands, handling NaN values
red_norm = red / np.nanmax(red)
green_norm = green / np.nanmax(green)
blue_norm = blue / np.nanmax(blue)
# Stack into an RGB image
rgb_image = np.dstack((red_norm, green_norm, blue_norm))
# Plot the image
plt.figure(figsize=(10, 10))
plt.imshow(rgb_image)
plt.title("RGB Composite")
plt.axis("off")
plt.show()
red = da.sel(band="red").values
nir = da.sel(band="nir").values
print("Red Band Range:", red.min(), red.max())
print("NIR Band Range:", nir.min(), nir.max())
if red.max() > 1:
red = red / 10000.0
if nir.max() > 1:
nir = nir / 10000.0
# Calculate NDVI
ndvi = (nir - red) / (nir + red)
# Mask invalid values (divide by zero or NaN)
ndvi = np.nan_to_num(ndvi, nan=-9999) # Replace NaN with a placeholder
import matplotlib.pyplot as plt
from matplotlib.colors import Normalize
plt.figure(figsize=(10, 8))
plt.imshow(ndvi, cmap="RdYlGn", extent=[da["x"].values.min(), da["x"].values.max(),
da["y"].values.min(), da["y"].values.max()])
plt.colorbar(label="NDVI")
plt.title("NDVI Map")
plt.xlabel("Longitude")
plt.ylabel("Latitude")
plt.show()
swir = da.sel(band="swir16").values
# Normalize bands if needed
if swir.max() > 1:
swir = swir / 10000.0
if nir.max() > 1:
nir = nir / 10000.0
ndbi = (swir - nir) / (swir + nir)
ndbi = np.nan_to_num(ndbi, nan=-9999)
# Plot NDBI
plt.figure(figsize=(10, 8))
plt.imshow(ndbi, cmap="gray", extent=[da["x"].values.min(), da["x"].values.max(),
da["y"].values.min(), da["y"].values.max()])
plt.colorbar(label="NDBI")
plt.title("NDBI (Built-up Index)")
plt.xlabel("Longitude")
plt.ylabel("Latitude")
plt.show()
The text was updated successfully, but these errors were encountered:
Is your feature request related to a problem? Please describe.
We've discussed adding satellite image-derived NDVI and NDBI to our ETL pipeline to facilitate the identification of vacant land and vacant buildings.
Describe the solution you'd like
Using the
pystac
API and Sentinel 2 images, calculate NDVI and NDBI from a composite of the the 3 previous Sentinel 2 images (it has a revisit time of 10 days, so this should roughly correspond to 1 month), using a bounding box for the borders of Philadelphia. Then, extract those NDVI and NDBI raster values to a point on the surface (not a centroid) of all the polygons in our full dataset of the 500k+ properties in Philadelphia (the one created innew_etl
viamain.py
, not thescript.py
pipeline).Additional context
Here's Python code that should get you started:
The text was updated successfully, but these errors were encountered: