diff --git a/tap_github.py b/tap_github.py index 8bff0a3c..a87a190e 100644 --- a/tap_github.py +++ b/tap_github.py @@ -6,6 +6,7 @@ import singer.bookmarks as bookmarks import singer.metrics as metrics import collections +import time from singer import metadata @@ -33,6 +34,9 @@ class AuthException(Exception): class NotFoundException(Exception): pass +class HttpException(Exception): + pass + def translate_state(state, catalog, repositories): ''' This tap used to only support a single repository, in which case the @@ -83,19 +87,34 @@ def get_bookmark(state, repo, stream_name, bookmark_key): return None def authed_get(source, url, headers={}): - with metrics.http_request_timer(source) as timer: - session.headers.update(headers) - resp = session.request(method='get', url=url) - if resp.status_code == 401: - raise AuthException(resp.text) - if resp.status_code == 403: - raise AuthException(resp.text) - if resp.status_code == 404: - raise NotFoundException(resp.text) + for _ in range(0, 3): # 3 attempts + with metrics.http_request_timer(source) as timer: + session.headers.update(headers) + resp = session.request(method='get', url=url) + + # Handle github's rate limited responses + remaining = resp.headers.get('X-RateLimit-Remaining') + time_to_reset = resp.headers.get('X-RateLimit-Reset', time.time() + 60) + if remaining is not None and remaining == '0': + time.sleep(float(time_to_reset) - time.time()) + continue # next attempt + + # Handle github's possible failures as retries + if resp.status_code == 502 or resp.status_code == 503: + continue # next attempt + + if resp.status_code == 401: + raise AuthException(resp.text) + if resp.status_code == 403: + raise AuthException(resp.text) + if resp.status_code == 404: + raise NotFoundException(resp.text) timer.tags[metrics.Tag.http_status_code] = resp.status_code return resp + raise HttpException(resp.text) + def authed_get_all_pages(source, url, headers={}): while True: r = authed_get(source, url, headers) @@ -187,7 +206,7 @@ def do_discover(): def get_all_releases(schemas, repo_path, state, mdata): # Releases doesn't seem to have an `updated_at` property, yet can be edited. - # For this reason and since the volume of release can safely be considered low, + # For this reason and since the volume of release can safely be considered low, # bookmarks were ignored for releases. with metrics.record_counter('releases') as counter: