Skip to content

Commit

Permalink
Fix cursor tracking for paginated results that lead to data loss. (#54)
Browse files Browse the repository at this point in the history
* break out of pagination loop only after the fetched data has been cached in-memory

* do not log that pagination is disabled on every scrape, but rather do so only once in the main function
  • Loading branch information
ubiquitousbyte authored Dec 2, 2024
1 parent 8658e75 commit fe52004
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 14 deletions.
15 changes: 13 additions & 2 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,17 @@ def metrics():
url=url, headers=headers, max_retries=max_retries, logger=logger
).get_health_check()

##
# NOTIFY IF PAGINATION IS ENABLED
#
enable_pagination = str(os.getenv("PAGINATION_ENABLED", "True")) == "True"
pagination_limit = int(os.getenv("PAGINATION_LIMIT", 200))
if enable_pagination:
logger.info("Pagination is enabled")
logger.info(f"Pagination limit is {pagination_limit}")
else:
logger.info("Pagination is disabled")

# Create an instance of the PrefectMetrics class
metrics = PrefectMetrics(
url=url,
Expand All @@ -48,8 +59,8 @@ def metrics():
csrf_enabled=str(os.getenv("PREFECT_CSRF_ENABLED", "False")) == "True",
logger=logger,
# Enable pagination if not specified to avoid breaking existing deployments
enable_pagination=str(os.getenv("PAGINATION_ENABLED", "True")) == "True",
pagination_limit=int(os.getenv("PAGINATION_LIMIT", 200)),
enable_pagination=enable_pagination,
pagination_limit=pagination_limit,
)

# Register the metrics with Prometheus
Expand Down
10 changes: 6 additions & 4 deletions metrics/api_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,15 +74,17 @@ def _get_with_pagination(self, base_data: Optional[dict] = None) -> list:

curr_page_items = resp.json()

# If pagination is not used, break the loop
if not enable_pagination:
break

# If the current page is empty, break the loop
if not curr_page_items:
break

# The page has items. Extend the item set.
all_items.extend(curr_page_items)

# If pagination is not used, break the loop
if not enable_pagination:
break

offset += limit

return all_items
9 changes: 1 addition & 8 deletions metrics/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,14 +68,7 @@ def collect(self):
self.csrf_token_expiration = token_information.expiration
self.headers["Prefect-Csrf-Token"] = self.csrf_token
self.headers["Prefect-Csrf-Client"] = self.client_id
##
# NOTIFY IF PAGINATION IS ENABLED
#
if self.enable_pagination:
self.logger.info("Pagination is enabled")
self.logger.info(f"Pagination limit is {self.pagination_limit}")
else:
self.logger.info("Pagination is disabled")

##
# PREFECT GET RESOURCES
#
Expand Down

0 comments on commit fe52004

Please sign in to comment.