Skip to content

Commit

Permalink
Move writing geojsons into resample script; use existing run_gdal_sub…
Browse files Browse the repository at this point in the history
…command helper
  • Loading branch information
dmannarino committed Dec 10, 2024
1 parent 96dca40 commit 0ffabfc
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 36 deletions.
12 changes: 10 additions & 2 deletions batch/python/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from gdal_utils import from_vsi_path
from gfw_pixetl.grids import grid_factory
from logging_utils import listener_configurer, log_client_configurer, log_listener
from tiles_geojson import generate_geojson
from tiles_geojson import generate_geojsons

from pyproj import CRS, Transformer
from shapely.geometry import MultiPolygon, Polygon, shape
Expand Down Expand Up @@ -667,14 +667,22 @@ def resample(
extent_output_file = "extent.geojson"

logger.log(logging.INFO, f"Generating geojsons")
generate_geojson(
tiles_fc, extent_fc = generate_geojsons(
tile_paths,
tiles_output_file,
extent_output_file,
min(16, NUM_PROCESSES)
)
logger.log(logging.INFO, f"Finished generating geojsons")

tiles_txt = json.dumps(tiles_fc, indent=2)
with open(tiles_output_file, "w") as f:
print(tiles_txt, file=f)

extent_txt = json.dumps(extent_fc, indent=2)
with open(extent_output_file, "w") as f:
print(extent_txt, file=f)

logger.log(logging.INFO, f"Uploading geojsons to {target_prefix}")
upload_s3(tiles_output_file, bucket, os.path.join(target_prefix, "geotiff", tiles_output_file))
upload_s3(extent_output_file, bucket, os.path.join(target_prefix, "geotiff", extent_output_file))
Expand Down
53 changes: 19 additions & 34 deletions batch/python/tiles_geojson.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
from shapely.geometry import Polygon
from shapely.ops import unary_union

from batch.python.errors import GDALError
from gdal_utils import run_gdal_subcommand


def to_4326(crs: CRS, x: float, y: float) -> Tuple[float, float]:
transformer = Transformer.from_crs(
Expand All @@ -16,21 +19,6 @@ def to_4326(crs: CRS, x: float, y: float) -> Tuple[float, float]:
return transformer.transform(x, y)


def run_gdalinfo(file_path: str) -> Dict[str, Any]:
"""Run gdalinfo and parse the output as JSON."""
try:
result = subprocess.run(
["gdalinfo", "-json", file_path],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
check=True,
text=True,
)
return json.loads(result.stdout)
except subprocess.CalledProcessError as e:
raise RuntimeError(f"Failed to run gdalinfo on {file_path}: {e.stderr}")


def extract_metadata_from_gdalinfo(gdalinfo_json: Dict[str, Any]) -> Dict[str, Any]:
"""Extract necessary metadata from the gdalinfo JSON output."""
corner_coordinates = gdalinfo_json["cornerCoordinates"]
Expand All @@ -54,15 +42,26 @@ def extract_metadata_from_gdalinfo(gdalinfo_json: Dict[str, Any]) -> Dict[str, A
def process_file(file_path: str) -> Dict[str, Any]:
"""Run gdalinfo and extract metadata for a single file."""
print(f"Running gdalinfo on {file_path}")
gdalinfo_json = run_gdalinfo(file_path)
try:
stdout,stderr = run_gdal_subcommand(
["gdalinfo", "-json", file_path],
)
except GDALError as e:
raise RuntimeError(f"Failed to run gdalinfo on {file_path}: {e}")

gdalinfo_json: Dict = json.loads(stdout)
return extract_metadata_from_gdalinfo(gdalinfo_json)


def generate_geojson(geotiffs: List[str], tiles_fn: str, extent_fn: str, max_workers: int = None):
def generate_geojsons(
geotiffs: List[str],
tiles_fn: str,
extent_fn: str,
max_workers: int = None
) -> Tuple[FeatureCollection, FeatureCollection]:
"""Generate tiles.geojson and extent.geojson files."""
features = []
polygons = []
errors = []

with ProcessPoolExecutor(max_workers=max_workers) as executor:
future_to_file = {executor.submit(process_file, file): file for file in geotiffs}
Expand All @@ -88,27 +87,13 @@ def generate_geojson(geotiffs: List[str], tiles_fn: str, extent_fn: str, max_wor
# Collect for union
polygons.append(polygon)
except Exception as e:
print(f"Error processing file {file}: {e}")
errors.append(f"File {file}: {e}")

if errors:
raise RuntimeError(f"Failed to process the following files:\n" + "\n".join(errors))
raise RuntimeError(f"Error processing file {file}: {e}")

# Write tiles.geojson
tiles_fc = FeatureCollection(features)
tiles_txt = json.dumps(tiles_fc, indent=2)

with open(tiles_fn, "w") as f:
print(tiles_txt, file=f)

# Create and write extent.geojson
union_geometry = unary_union(polygons)
extent_fc = FeatureCollection([
Feature(geometry=union_geometry.__geo_interface__, properties={})
])
extent_txt = json.dumps(extent_fc, indent=2)
print(f"extent.geojson:\n", extent_txt)

with open(extent_fn, "w") as f:
print(extent_txt, file=f)
print(f"GeoJSON written to {extent_fn}")
return tiles_fc, extent_fc

0 comments on commit 0ffabfc

Please sign in to comment.