diff --git a/maxarcat/catalog.py b/maxarcat/catalog.py index f5a13cd..71c80d8 100644 --- a/maxarcat/catalog.py +++ b/maxarcat/catalog.py @@ -10,7 +10,7 @@ import logging from contextlib import contextmanager from datetime import datetime -from typing import Union +from typing import Iterator, Union import requests @@ -18,6 +18,8 @@ import maxarcat_client.rest from maxarcat.exceptions import CatalogError +MAX_LIMIT = 100 + class Catalog: default_catalog_url = 'https://api.content.maxar.com/catalog' @@ -265,32 +267,78 @@ def search(self, collections: list = None, bbox: list = None, intersects: dict = return self._call_api(self._stac_api.post_search_stac_with_http_info, body=body) - def query(self, collections: list = None, bbox: list = None, intersects: dict = None, - start_datetime: datetime = None, end_datetime: datetime = None, - item_ids: list = None, where: str = None, orderby: str = None, - limit: int = None): - """ - Generator that performs a query on the catalog with the given filters, requesting - additional pages as necessary. + def query( + self, + collections: list = None, + bbox: list = None, + intersects: dict = None, + start_datetime: datetime = None, + end_datetime: datetime = None, + item_ids: list = None, + where: str = None, + orderby: str = None, + limit: int = None, + complete: bool = None, + ) -> Iterator[maxarcat_client.models.Item]: + """Query the Maxar catalog requesting additional pages as necessary. + + :param collections: A list of collections to query against + :param bbox: Bounding box in degrees to search by. Format is a sequence of the form [west, south, east, north] + or [west, south, zmin, east, north, zmax]. Optional. + :param intersects: Geometry to search by. Dict of GeoJSON. Optional. + :param start_datetime: + :param end_datetime: + :param item_ids: List of item IDs to query. + :param where: STAC item properties filter. + :param orderby: Columns to order result by. + :param limit: Maximum number of items to return. + :param complete: If False then include incomplete features in the search. These are features + added to the catalog but with incomplete metadata. Most users should only request complete features. + :return: GeoJSON FeatureCollection as dict """ - page = 0 - feature_count = 0 - while True: - # Using this logic we make one more request than we have to. But this way - # we don't have to know what the service's page size limit is. - page += 1 - Catalog.logger.info(f'Query page {page}') - feature_coll = self.search(collections=collections, bbox=bbox, intersects=intersects, - start_datetime=start_datetime, end_datetime=end_datetime, - item_ids=item_ids, where=where, orderby=orderby, - limit=limit, page=page) - for feature in feature_coll.features: - yield feature - num_features = len(feature_coll.features) - feature_count += num_features - if not num_features: - Catalog.logger.info(f'Total features returned: {feature_count}') - return + if limit is not None and limit <= MAX_LIMIT: + feature_coll = self.search( + collections=collections, + bbox=bbox, + intersects=intersects, + start_datetime=start_datetime, + end_datetime=end_datetime, + item_ids=item_ids, + where=where, + orderby=orderby, + limit=limit, + page=1, + complete=complete, + ) + yield from feature_coll.features + else: + page = 0 + feature_count = 0 + while limit is None or feature_count < limit: + # Using this logic we make one more request than we have to. But this way + # we don't have to know what the service's page size limit is. + page += 1 + Catalog.logger.info(f"Query page {page}") + feature_coll = self.search( + collections=collections, + bbox=bbox, + intersects=intersects, + start_datetime=start_datetime, + end_datetime=end_datetime, + item_ids=item_ids, + where=where, + orderby=orderby, + limit=MAX_LIMIT, + page=page, + complete=complete, + ) + num_features = len(feature_coll.features) + temp = num_features if limit is None else min(limit - feature_count, num_features) + yield from feature_coll.features[:temp] + if not num_features or num_features < MAX_LIMIT: + Catalog.logger.info(f"Total features returned: {feature_count}") + return + feature_count += temp def get_url(self, url: str) -> bytes: """ diff --git a/tests/test_maxarcat.py b/tests/test_maxarcat.py index f96b4c1..e356b43 100644 --- a/tests/test_maxarcat.py +++ b/tests/test_maxarcat.py @@ -62,9 +62,9 @@ def test_search_datetime_both(catalog): start = datetime(year=2020, month=1, day=1, hour=12) end = datetime(year=2020, month=1, day=2, hour=15) collection = 'wv02' - features = catalog.query(collections=[collection], start_datetime=start, end_datetime=end) + feature_coll = catalog.search(collections=[collection], start_datetime=start, end_datetime=end) count = 0 - for feature in features: + for feature in feature_coll.features: dt = TestCatalog.parse_datetime_iso8601(feature.properties['datetime']) assert start <= dt < end assert feature.collection == collection @@ -199,9 +199,9 @@ def test_query(catalog): # per page. Assume that's still the case and perform a query # that should return thousands of items. Stop after we've read # what should be a few pages. - feature_coll = catalog.search(bbox=[-100, 40, -105, 45]) + features = catalog.query(bbox=[-100, 40, -105, 45]) count = 0 - for _ in feature_coll.features: + for _ in features: count += 1 if count >= 500: break