From 882edec15795c1963f88e8bea3940da51bc9bd59 Mon Sep 17 00:00:00 2001 From: Mark Major <32452238+mark-major@users.noreply.github.com> Date: Wed, 20 Nov 2024 09:26:30 +0100 Subject: [PATCH] Boto Glue standard retry policy with configuration (#1307) * boto glue standard retry policy configurable max retry * Update configuration.md Co-authored-by: Fokko Driesprong * Update glue.py Co-authored-by: Fokko Driesprong * boto glue retry mode configurable --------- Co-authored-by: Fokko Driesprong --- mkdocs/docs/configuration.md | 20 +++++++++++--------- pyiceberg/catalog/glue.py | 22 +++++++++++++++++++++- tests/catalog/test_glue.py | 1 - 3 files changed, 32 insertions(+), 11 deletions(-) diff --git a/mkdocs/docs/configuration.md b/mkdocs/docs/configuration.md index 2404f28b30..133f02060a 100644 --- a/mkdocs/docs/configuration.md +++ b/mkdocs/docs/configuration.md @@ -331,16 +331,18 @@ catalog: -| Key | Example | Description | -| ---------------------- | ------------------------------------ | ------------------------------------------------------------------------------- | -| glue.id | 111111111111 | Configure the 12-digit ID of the Glue Catalog | -| glue.skip-archive | true | Configure whether to skip the archival of older table versions. Default to true | +| Key | Example | Description | +|------------------------|----------------------------------------|---------------------------------------------------------------------------------| +| glue.id | 111111111111 | Configure the 12-digit ID of the Glue Catalog | +| glue.skip-archive | true | Configure whether to skip the archival of older table versions. Default to true | | glue.endpoint | | Configure an alternative endpoint of the Glue service for GlueCatalog to access | -| glue.profile-name | default | Configure the static profile used to access the Glue Catalog | -| glue.region | us-east-1 | Set the region of the Glue Catalog | -| glue.access-key-id | admin | Configure the static access key id used to access the Glue Catalog | -| glue.secret-access-key | password | Configure the static secret access key used to access the Glue Catalog | -| glue.session-token | AQoDYXdzEJr... | Configure the static session token used to access the Glue Catalog | +| glue.profile-name | default | Configure the static profile used to access the Glue Catalog | +| glue.region | us-east-1 | Set the region of the Glue Catalog | +| glue.access-key-id | admin | Configure the static access key id used to access the Glue Catalog | +| glue.secret-access-key | password | Configure the static secret access key used to access the Glue Catalog | +| glue.session-token | AQoDYXdzEJr... | Configure the static session token used to access the Glue Catalog | +| glue.max-retries | 10 | Configure the maximum number of retries for the Glue service calls | +| glue.retry-mode | standard | Configure the retry mode for the Glue service. Default to standard. | diff --git a/pyiceberg/catalog/glue.py b/pyiceberg/catalog/glue.py index 5e79c99ab8..4c575f6d59 100644 --- a/pyiceberg/catalog/glue.py +++ b/pyiceberg/catalog/glue.py @@ -29,6 +29,7 @@ ) import boto3 +from botocore.config import Config from mypy_boto3_glue.client import GlueClient from mypy_boto3_glue.type_defs import ( ColumnTypeDef, @@ -128,6 +129,14 @@ GLUE_ACCESS_KEY_ID = "glue.access-key-id" GLUE_SECRET_ACCESS_KEY = "glue.secret-access-key" GLUE_SESSION_TOKEN = "glue.session-token" +GLUE_MAX_RETRIES = "glue.max-retries" +GLUE_RETRY_MODE = "glue.retry-mode" + +MAX_RETRIES = 10 +STANDARD_RETRY_MODE = "standard" +ADAPTIVE_RETRY_MODE = "adaptive" +LEGACY_RETRY_MODE = "legacy" +EXISTING_RETRY_MODES = [STANDARD_RETRY_MODE, ADAPTIVE_RETRY_MODE, LEGACY_RETRY_MODE] def _construct_parameters( @@ -297,6 +306,8 @@ class GlueCatalog(MetastoreCatalog): def __init__(self, name: str, **properties: Any): super().__init__(name, **properties) + retry_mode_prop_value = get_first_property_value(properties, GLUE_RETRY_MODE) + session = boto3.Session( profile_name=properties.get(GLUE_PROFILE_NAME), region_name=get_first_property_value(properties, GLUE_REGION, AWS_REGION), @@ -305,7 +316,16 @@ def __init__(self, name: str, **properties: Any): aws_secret_access_key=get_first_property_value(properties, GLUE_SECRET_ACCESS_KEY, AWS_SECRET_ACCESS_KEY), aws_session_token=get_first_property_value(properties, GLUE_SESSION_TOKEN, AWS_SESSION_TOKEN), ) - self.glue: GlueClient = session.client("glue", endpoint_url=properties.get(GLUE_CATALOG_ENDPOINT)) + self.glue: GlueClient = session.client( + "glue", + endpoint_url=properties.get(GLUE_CATALOG_ENDPOINT), + config=Config( + retries={ + "max_attempts": properties.get(GLUE_MAX_RETRIES, MAX_RETRIES), + "mode": retry_mode_prop_value if retry_mode_prop_value in EXISTING_RETRY_MODES else STANDARD_RETRY_MODE, + } + ), + ) if glue_catalog_id := properties.get(GLUE_ID): _register_glue_catalog_id_with_glue_client(self.glue, glue_catalog_id) diff --git a/tests/catalog/test_glue.py b/tests/catalog/test_glue.py index 2013ed914d..825ac681d5 100644 --- a/tests/catalog/test_glue.py +++ b/tests/catalog/test_glue.py @@ -442,7 +442,6 @@ def test_list_tables( moto_endpoint_url: str, table_schema_nested: Schema, database_name: str, - table_name: str, table_list: List[str], ) -> None: test_catalog = GlueCatalog("glue", **{"s3.endpoint": moto_endpoint_url, "warehouse": f"s3://{BUCKET_NAME}/"})