From ceb338275bbb43959a3ac0141c8f7e6ee66ae758 Mon Sep 17 00:00:00 2001 From: Martin Redolatti Date: Mon, 20 May 2019 13:05:52 -0300 Subject: [PATCH 1/7] handle rate limiter --- tap_github.py | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/tap_github.py b/tap_github.py index 8bff0a3c..69d58b96 100644 --- a/tap_github.py +++ b/tap_github.py @@ -83,15 +83,25 @@ def get_bookmark(state, repo, stream_name, bookmark_key): return None def authed_get(source, url, headers={}): - with metrics.http_request_timer(source) as timer: - session.headers.update(headers) - resp = session.request(method='get', url=url) - if resp.status_code == 401: - raise AuthException(resp.text) - if resp.status_code == 403: - raise AuthException(resp.text) - if resp.status_code == 404: - raise NotFoundException(resp.text) + + for _ in range(0, 3): # 3 attempts + with metrics.http_request_timer(source) as timer: + session.headers.update(headers) + resp = session.request(method='get', url=url) + + # Handle github's rate limited responses + remaining = resp.headers.get('X-RateLimit-Remaining') + time_to_reset = resp.headers.get('X-RateLimit-Reset', 60) + if remaining is not None and remaining == 0: + time.sleep(time.now() - time_to_reset) + continue + + if resp.status_code == 401: + raise AuthException(resp.text) + if resp.status_code == 403: + raise AuthException(resp.text) + if resp.status_code == 404: + raise NotFoundException(resp.text) timer.tags[metrics.Tag.http_status_code] = resp.status_code return resp @@ -187,7 +197,7 @@ def do_discover(): def get_all_releases(schemas, repo_path, state, mdata): # Releases doesn't seem to have an `updated_at` property, yet can be edited. - # For this reason and since the volume of release can safely be considered low, + # For this reason and since the volume of release can safely be considered low, # bookmarks were ignored for releases. with metrics.record_counter('releases') as counter: From 3be1c34a0c5daab138b22937577a250ea435b78b Mon Sep 17 00:00:00 2001 From: Martin Redolatti Date: Fri, 24 May 2019 15:14:37 -0300 Subject: [PATCH 2/7] handle 502 & 503 errors as well --- tap_github.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tap_github.py b/tap_github.py index 69d58b96..1e75c5a0 100644 --- a/tap_github.py +++ b/tap_github.py @@ -94,7 +94,11 @@ def authed_get(source, url, headers={}): time_to_reset = resp.headers.get('X-RateLimit-Reset', 60) if remaining is not None and remaining == 0: time.sleep(time.now() - time_to_reset) - continue + continue # next attempt + + # Handle github's possible failures as retries + if resp.status_code == 502 or resp.status_code == 503: + continue # next attempt if resp.status_code == 401: raise AuthException(resp.text) From 7fa7927ce992353275a52296ae26918cf4b33274 Mon Sep 17 00:00:00 2001 From: Martin Redolatti Date: Fri, 24 May 2019 17:48:48 -0300 Subject: [PATCH 3/7] add exception if we run out of attempts --- tap_github.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tap_github.py b/tap_github.py index 1e75c5a0..adef116f 100644 --- a/tap_github.py +++ b/tap_github.py @@ -33,6 +33,9 @@ class AuthException(Exception): class NotFoundException(Exception): pass +class HttpException(Exception): + pass + def translate_state(state, catalog, repositories): ''' This tap used to only support a single repository, in which case the @@ -110,6 +113,8 @@ def authed_get(source, url, headers={}): timer.tags[metrics.Tag.http_status_code] = resp.status_code return resp + raise HttpException(resp.text) + def authed_get_all_pages(source, url, headers={}): while True: r = authed_get(source, url, headers) From b63218804aba86dbd20d0fe8305daf93ffb866b7 Mon Sep 17 00:00:00 2001 From: Martin Redolatti Date: Fri, 24 May 2019 18:20:06 -0300 Subject: [PATCH 4/7] set a proper default time to reset --- tap_github.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tap_github.py b/tap_github.py index adef116f..a7335997 100644 --- a/tap_github.py +++ b/tap_github.py @@ -94,7 +94,7 @@ def authed_get(source, url, headers={}): # Handle github's rate limited responses remaining = resp.headers.get('X-RateLimit-Remaining') - time_to_reset = resp.headers.get('X-RateLimit-Reset', 60) + time_to_reset = resp.headers.get('X-RateLimit-Reset', time.now() + 60) if remaining is not None and remaining == 0: time.sleep(time.now() - time_to_reset) continue # next attempt From f6e2b3dc8fa69f2ac7bb7e8c87edefdcb76518ac Mon Sep 17 00:00:00 2001 From: Martin Redolatti Date: Mon, 27 May 2019 15:34:20 -0300 Subject: [PATCH 5/7] fix missing import --- tap_github.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tap_github.py b/tap_github.py index a7335997..eb36c3af 100644 --- a/tap_github.py +++ b/tap_github.py @@ -6,6 +6,7 @@ import singer.bookmarks as bookmarks import singer.metrics as metrics import collections +import time from singer import metadata @@ -86,7 +87,6 @@ def get_bookmark(state, repo, stream_name, bookmark_key): return None def authed_get(source, url, headers={}): - for _ in range(0, 3): # 3 attempts with metrics.http_request_timer(source) as timer: session.headers.update(headers) @@ -94,9 +94,9 @@ def authed_get(source, url, headers={}): # Handle github's rate limited responses remaining = resp.headers.get('X-RateLimit-Remaining') - time_to_reset = resp.headers.get('X-RateLimit-Reset', time.now() + 60) + time_to_reset = resp.headers.get('X-RateLimit-Reset', time.time() + 60) if remaining is not None and remaining == 0: - time.sleep(time.now() - time_to_reset) + time.sleep(time.time() - time_to_reset) continue # next attempt # Handle github's possible failures as retries From 634b5a525b3abef754f2624ce5a3abcf0dacb94d Mon Sep 17 00:00:00 2001 From: Martin Redolatti Date: Thu, 30 May 2019 19:01:51 -0300 Subject: [PATCH 6/7] fix sleep statement --- tap_github.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tap_github.py b/tap_github.py index eb36c3af..b0a366be 100644 --- a/tap_github.py +++ b/tap_github.py @@ -96,7 +96,7 @@ def authed_get(source, url, headers={}): remaining = resp.headers.get('X-RateLimit-Remaining') time_to_reset = resp.headers.get('X-RateLimit-Reset', time.time() + 60) if remaining is not None and remaining == 0: - time.sleep(time.time() - time_to_reset) + time.sleep(time_to_reset - time.time()) continue # next attempt # Handle github's possible failures as retries From 00bfa130d14ddd4dc03f57a08e162acc4e83de88 Mon Sep 17 00:00:00 2001 From: Martin Redolatti Date: Fri, 31 May 2019 17:57:55 -0300 Subject: [PATCH 7/7] fix waiting logic --- tap_github.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tap_github.py b/tap_github.py index b0a366be..a87a190e 100644 --- a/tap_github.py +++ b/tap_github.py @@ -95,8 +95,8 @@ def authed_get(source, url, headers={}): # Handle github's rate limited responses remaining = resp.headers.get('X-RateLimit-Remaining') time_to_reset = resp.headers.get('X-RateLimit-Reset', time.time() + 60) - if remaining is not None and remaining == 0: - time.sleep(time_to_reset - time.time()) + if remaining is not None and remaining == '0': + time.sleep(float(time_to_reset) - time.time()) continue # next attempt # Handle github's possible failures as retries