From 68d5b386a8a1f62a2a36593c77953525fc5fa99a Mon Sep 17 00:00:00 2001 From: jreadey Date: Fri, 7 Jun 2024 09:27:41 -0500 Subject: [PATCH 1/6] use s3 list_keys_v2 --- hsds/util/s3Client.py | 4 +++- pyproject.toml | 3 +-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/hsds/util/s3Client.py b/hsds/util/s3Client.py index 7a402afb..513455ba 100644 --- a/hsds/util/s3Client.py +++ b/hsds/util/s3Client.py @@ -627,8 +627,10 @@ async def list_keys( session = self._app["session"] self._renewToken() kwargs = self._get_client_kwargs() + if prefix and prefix[-1] != "/": + prefix += "/" # list_v2 requires prefix end with slash async with session.create_client("s3", **kwargs) as _client: - paginator = _client.get_paginator("list_objects") + paginator = _client.get_paginator("list_objects_v2") # use a dictionary to hold return values if stats are needed key_names = {} if include_stats else [] diff --git a/pyproject.toml b/pyproject.toml index b130919d..acc295b8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,12 +35,11 @@ version = "0.8.5" dependencies = [ "aiohttp == 3.9.4", - "aiobotocore == 2.5.0", + "aiobotocore == 2.13.0", "aiohttp_cors", "aiofiles", "azure-storage-blob", "bitshuffle", - "botocore", "cryptography", "h5py >= 3.6.0", "numcodecs", From ad98de285e192e24fcc4b3b3c24c53ebea12de1e Mon Sep 17 00:00:00 2001 From: jreadey Date: Fri, 7 Jun 2024 09:37:07 -0500 Subject: [PATCH 2/6] sort key names for list_keys_v2 --- hsds/util/s3Client.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hsds/util/s3Client.py b/hsds/util/s3Client.py index 513455ba..36eaa780 100644 --- a/hsds/util/s3Client.py +++ b/hsds/util/s3Client.py @@ -669,6 +669,10 @@ async def list_keys( msg += f"but got {len(key_names)}" log.warning(msg) + if not include_stats: + # list_keys_v2 does not return keys in lexographic order, so sort here + key_names.sort() + return key_names async def releaseClient(self): From 311eabd2de014665d53ec05c3b1ae6facdd7e16b Mon Sep 17 00:00:00 2001 From: jreadey Date: Mon, 10 Jun 2024 09:40:45 -0500 Subject: [PATCH 3/6] update s3 config for updated boto version --- hsds/util/s3Client.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/hsds/util/s3Client.py b/hsds/util/s3Client.py index 36eaa780..fa071816 100644 --- a/hsds/util/s3Client.py +++ b/hsds/util/s3Client.py @@ -113,8 +113,10 @@ def __init__(self, app): except KeyError: pass - self._aio_config = AioConfig(max_pool_connections=max_pool_connections, - signature_version=signature_version) + kwargs = {"max_pool_connections": max_pool_connections} + if signature_version: + kwargs["signature_version"] = signature_version + self._aio_config = AioConfig(**kwargs) log.debug(f"S3Client init - aws_region {self._aws_region}") @@ -144,9 +146,15 @@ def _get_client_kwargs(self): kwargs["region_name"] = self._aws_region kwargs["aws_secret_access_key"] = self._aws_secret_access_key kwargs["aws_access_key_id"] = self._aws_access_key_id - kwargs["aws_session_token"] = self._aws_session_token - kwargs["endpoint_url"] = self._s3_gateway - kwargs["use_ssl"] = self._use_ssl + if self._aws_session_token: + kwargs["aws_session_token"] = self._aws_session_token + if self._s3_gateway and not self._s3_gateway.endswith("amazonaws.com"): + # let boto sort out the endpoint if it's on aws + # for third party s3 compatible services (e.g. minio), set it here + kwargs["endpoint_url"] = self._s3_gateway + if self._use_ssl: + kwargs["use_ssl"] = self._use_ssl + kwargs["config"] = self._aio_config # log.debug(f"s3 kwargs: {kwargs}") return kwargs From b02ac86fb5af161542c71c18e7029afb7580dbd7 Mon Sep 17 00:00:00 2001 From: jreadey Date: Mon, 10 Jun 2024 09:55:46 -0500 Subject: [PATCH 4/6] fix urlparsing security warning --- hsds/util/s3Client.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/hsds/util/s3Client.py b/hsds/util/s3Client.py index fa071816..b6062dd8 100644 --- a/hsds/util/s3Client.py +++ b/hsds/util/s3Client.py @@ -17,6 +17,7 @@ import datetime import json import time +from urllib.parse import urlparse from aiobotocore.config import AioConfig from aiobotocore.session import get_session from botocore.exceptions import ClientError @@ -148,10 +149,12 @@ def _get_client_kwargs(self): kwargs["aws_access_key_id"] = self._aws_access_key_id if self._aws_session_token: kwargs["aws_session_token"] = self._aws_session_token - if self._s3_gateway and not self._s3_gateway.endswith("amazonaws.com"): - # let boto sort out the endpoint if it's on aws - # for third party s3 compatible services (e.g. minio), set it here - kwargs["endpoint_url"] = self._s3_gateway + if self._s3_gateway: + host = urlparse(self._s3_gateway).hostname + if not host.endswith("amazonaws.com"): + # let boto sort out the endpoint if it's on aws + # for third party s3 compatible services (e.g. minio), set it here + kwargs["endpoint_url"] = self._s3_gateway if self._use_ssl: kwargs["use_ssl"] = self._use_ssl From b960932380c4eb8c707251023038f1cb0529093e Mon Sep 17 00:00:00 2001 From: jreadey Date: Mon, 10 Jun 2024 16:28:57 -0500 Subject: [PATCH 5/6] add dot before dns name --- hsds/util/s3Client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hsds/util/s3Client.py b/hsds/util/s3Client.py index b6062dd8..4c6fe9e2 100644 --- a/hsds/util/s3Client.py +++ b/hsds/util/s3Client.py @@ -151,7 +151,7 @@ def _get_client_kwargs(self): kwargs["aws_session_token"] = self._aws_session_token if self._s3_gateway: host = urlparse(self._s3_gateway).hostname - if not host.endswith("amazonaws.com"): + if not host.endswith(".amazonaws.com"): # let boto sort out the endpoint if it's on aws # for third party s3 compatible services (e.g. minio), set it here kwargs["endpoint_url"] = self._s3_gateway From a2e0281956187afb243f931e1fe8923baf593b6b Mon Sep 17 00:00:00 2001 From: jreadey Date: Tue, 11 Jun 2024 13:28:51 +0100 Subject: [PATCH 6/6] update requirements.txt to match toml --- requirements.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 04749dba..d16cb14b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,10 +1,9 @@ aiohttp==3.9.4 -aiobotocore==2.5.0 +aiobotocore==2.13.0 aiohttp_cors aiofiles azure-storage-blob bitshuffle -botocore cryptography h5py>=3.6.0 numcodecs