From f06b70e7eebb47970bbd2fdeeb3622c8c67fca5f Mon Sep 17 00:00:00 2001 From: Elizabeth Alpert Date: Tue, 29 Nov 2022 11:37:56 +1000 Subject: [PATCH] Add ability to configure max retries for v1.1 server errors Given the increasing instability of the v1.1 API, it's helpful to be able to tune the number of retries for server errors. - Adds max_server_error_retries param to the v1.1 client retaining old value of 30 as default value - Pass max_server_error_retries through the rate_limit decorator - Pass max_server_error_retries through uses of Twarc.get() in client.py --- twarc/client.py | 77 +++++++++++++++++++++++++++++++++++++-------- twarc/decorators.py | 6 ++-- 2 files changed, 67 insertions(+), 16 deletions(-) diff --git a/twarc/client.py b/twarc/client.py index 00b30533..bff3c323 100644 --- a/twarc/client.py +++ b/twarc/client.py @@ -51,6 +51,7 @@ def __init__( access_token_secret=None, connection_errors=0, http_errors=0, + max_server_error_retries=30, config=None, profile="", protected=False, @@ -76,6 +77,7 @@ def __init__( self.access_token_secret = access_token_secret self.connection_errors = connection_errors self.http_errors = http_errors + self.max_server_error_retries = max_server_error_retries self.profile = profile self.client = None self.last_response = None @@ -144,7 +146,11 @@ def search( if max_id: params["max_id"] = max_id - resp = self.get(url, params=params) + resp = self.get( + url, + params=params, + max_server_error_retries=self.max_server_error_retries, + ) retrieved_pages += 1 statuses = resp.json()["statuses"] @@ -237,7 +243,11 @@ def premium_search( count = 0 stop = False while not stop: - resp = self.get(url, params=params) + resp = self.get( + url, + params=params, + max_server_error_retries=self.max_server_error_retries, + ) if resp.status_code == 200: data = resp.json() for tweet in data["results"]: @@ -293,7 +303,12 @@ def timeline( params["max_id"] = max_id try: - resp = self.get(url, params=params, allow_404=True) + resp = self.get( + url, + params=params, + allow_404=True, + max_server_error_retries=self.max_server_error_retries, + ) retrieved_pages += 1 except requests.exceptions.HTTPError as e: if e.response.status_code == 404: @@ -355,7 +370,12 @@ def do_lookup(): url = "https://api.twitter.com/1.1/users/lookup.json" params = {id_type: ids_str} try: - resp = self.get(url, params=params, allow_404=True) + resp = self.get( + url, + params=params, + allow_404=True, + max_server_error_retries=self.max_server_error_retries, + ) except requests.exceptions.HTTPError as e: if e.response.status_code == 404: log.warning("no users matching %s", ids_str) @@ -391,7 +411,12 @@ def follower_ids(self, user, max_pages=None): while params["cursor"] != 0: try: - resp = self.get(url, params=params, allow_404=True) + resp = self.get( + url, + params=params, + allow_404=True, + max_server_error_retries=self.max_server_error_retries, + ) retrieved_pages += 1 except requests.exceptions.HTTPError as e: if e.response.status_code == 404: @@ -424,7 +449,12 @@ def friend_ids(self, user, max_pages=None): while params["cursor"] != 0: try: - resp = self.get(url, params=params, allow_404=True) + resp = self.get( + url, + params=params, + allow_404=True, + max_server_error_retries=self.max_server_error_retries, + ) retrieved_pages += 1 except requests.exceptions.HTTPError as e: if e.response.status_code == 404: @@ -658,7 +688,12 @@ def retweets(self, tweet_ids): tweet_id ) try: - resp = self.get(url, params={"count": 100}, allow_404=True) + resp = self.get( + url, + params={"count": 100}, + allow_404=True, + max_server_error_retries=self.max_server_error_retries, + ) for tweet in resp.json(): yield tweet except requests.exceptions.HTTPError as e: @@ -671,7 +706,7 @@ def trends_available(self): """ url = "https://api.twitter.com/1.1/trends/available.json" try: - resp = self.get(url) + resp = self.get(url, max_server_error_retries=self.max_server_error_retries) except requests.exceptions.HTTPError as e: raise e return resp.json() @@ -687,7 +722,12 @@ def trends_place(self, woeid, exclude=None): if exclude: params["exclude"] = exclude try: - resp = self.get(url, params=params, allow_404=True) + resp = self.get( + url, + params=params, + allow_404=True, + max_server_error_retries=self.max_server_error_retries, + ) except requests.exceptions.HTTPError as e: if e.response.status_code == 404: log.info("no region matching WOEID %s", woeid) @@ -701,7 +741,11 @@ def trends_closest(self, lat, lon): url = "https://api.twitter.com/1.1/trends/closest.json" params = {"lat": lat, "long": lon} try: - resp = self.get(url, params=params) + resp = self.get( + url, + params=params, + max_server_error_retries=self.max_server_error_retries, + ) except requests.exceptions.HTTPError as e: raise e return resp.json() @@ -789,7 +833,12 @@ def list_members( while params["cursor"] != 0: try: - resp = self.get(url, params=params, allow_404=True) + resp = self.get( + url, + params=params, + allow_404=True, + max_server_error_retries=self.max_server_error_retries, + ) except requests.exceptions.HTTPError as e: if e.response.status_code == 404: log.error("no matching list") @@ -813,7 +862,9 @@ def oembed(self, tweet_url, **params): url = "https://publish.twitter.com/oembed" params["url"] = tweet_url - resp = self.get(url, params=params) + resp = self.get( + url, params=params, max_server_error_retries=self.max_server_error_retries + ) return resp.json() @@ -1010,7 +1061,7 @@ def validate_keys(self): # Need to explicitly reconnect to confirm the current creds # are used in the session object. self.connect() - self.get(url) + self.get(url, max_server_error_retries=self.max_server_error_retries) return True except requests.HTTPError as e: if e.response.status_code == 401: diff --git a/twarc/decorators.py b/twarc/decorators.py index 93f0f3df..88e526e5 100644 --- a/twarc/decorators.py +++ b/twarc/decorators.py @@ -15,7 +15,7 @@ def rate_limit(f): issue the API call again. """ - def new_f(*args, **kwargs): + def new_f(*args, max_server_error_retries=30, **kwargs): errors = 0 while True: resp = f(*args, **kwargs) @@ -56,7 +56,7 @@ def new_f(*args, **kwargs): resp.url.startswith("https://api.twitter.com/2/tweets/search/all") ): errors += 1 - if errors > 30: + if errors > max_server_error_retries: log.warning("too many errors from Twitter, giving up") resp.raise_for_status() # Shorter wait time than other endpoints for this specific case. Also @@ -76,7 +76,7 @@ def new_f(*args, **kwargs): time.sleep(seconds) elif resp.status_code >= 500: errors += 1 - if errors > 30: + if errors > max_server_error_retries: log.warning("too many errors from Twitter, giving up") resp.raise_for_status() seconds = 60 * errors