Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Respect the Retry-After header #225

Merged
merged 11 commits into from
Oct 25, 2023
14 changes: 12 additions & 2 deletions commcare_export/commcare_hq_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,15 @@

import copy
import logging
import time
from collections import OrderedDict
from math import ceil
from urllib.parse import urlencode

import backoff
import requests
from requests.auth import AuthBase, HTTPDigestAuth

import backoff
import commcare_export
from commcare_export.repeatable_iterator import RepeatableIterator

Expand All @@ -30,6 +32,12 @@

def on_backoff(details):
_log_backoff(details, 'Waiting for retry.')
response = details["exception"].response
if response.status_code == 429:
retry_after = response.headers.get("Retry-After", 0.0)
retry_after = ceil(float(retry_after))
logger.warning(f"Sleeping for {retry_after} seconds")
time.sleep(retry_after)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this isn't what I had in mind but I guess it works.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another way is to use the wait_gen parameter to give it the time to wait for, but it would need to be context aware. Currently the decorator is not. Do do this, we can wrap this whole get method along with the decorator inside an instance method. That way we can get a wait_gen generator that is able to determine how long to wait for, depending on the Retry-After value.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yea, I was thinking of something like that. It might get ugly though. We'd also need to modify the response.raise_for_status() call so that we don't do double backoff.

I'm fine with this for now if you would rather try the other approach later.

Copy link
Contributor Author

@SmittieC SmittieC Oct 24, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thoughts on this approach rather? 388a507

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks good to me



def on_giveup(details):
Expand Down Expand Up @@ -120,7 +128,9 @@ def api_url(self):
)
def get(self, resource, params=None):
"""
Gets the named resource.
Gets the named resource. When the server returns a 429 (too many requests), the process will sleep for
the amount of seconds specified in the Retry-After header from the response, after which it will raise
an exception to trigger the retry action.

Currently a bit of a vulnerable stub that works for this
particular use case in the hands of a trusted user; would likely
Expand Down
16 changes: 12 additions & 4 deletions commcare_export/commcare_minilinq.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@
'ucr',
}

DEFAULT_PAGE_SIZE = 1000
DEFAULT_UCR_PAGE_SIZE = 10000


class PaginationMode(Enum):
date_indexed = "date_indexed"
Expand Down Expand Up @@ -102,7 +105,7 @@ def __call__(self, since, until):

def get_paginator(
resource,
page_size=1000,
page_size=None,
pagination_mode=PaginationMode.date_indexed,
):
return {
Expand Down Expand Up @@ -134,7 +137,7 @@ class CommCareHqEnv(DictEnv):
CommCareHq API.
"""

def __init__(self, commcare_hq_client, until=None, page_size=1000):
def __init__(self, commcare_hq_client, page_size=None, until=None):
self.commcare_hq_client = commcare_hq_client
self.until = until
self.page_size = page_size
Expand Down Expand Up @@ -177,7 +180,8 @@ class SimplePaginator(object):
Paginate based on the 'next' URL provided in the API response.
"""

def __init__(self, page_size=1000, params=None):
def __init__(self, page_size=None, params=None):
page_size = page_size if page_size else 1000
SmittieC marked this conversation as resolved.
Show resolved Hide resolved
self.page_size = page_size
self.params = params

Expand Down Expand Up @@ -224,7 +228,8 @@ class DatePaginator(SimplePaginator):

DEFAULT_PARAMS = object()

def __init__(self, since_field, page_size=1000, params=DEFAULT_PARAMS):
def __init__(self, since_field, page_size=None, params=DEFAULT_PARAMS):
page_size = page_size if page_size else DEFAULT_PAGE_SIZE
params = DATE_PARAMS[
since_field] if params is DatePaginator.DEFAULT_PARAMS else params
super(DatePaginator, self).__init__(page_size, params)
Expand Down Expand Up @@ -268,6 +273,9 @@ def get_since_date(self, batch):


class UCRPaginator(SimplePaginator):
def __init__(self, page_size=None, *args, **kwargs):
page_size = page_size if page_size else DEFAULT_UCR_PAGE_SIZE
super().__init__(page_size, *args, **kwargs)

def next_page_params_from_batch(self, batch):
params = super(UCRPaginator, self).next_page_params_from_batch(batch)
Expand Down
Binary file added reports.zip
Binary file not shown.