Skip to content

Commit

Permalink
Listkeysv2 (#371)
Browse files Browse the repository at this point in the history
* use s3 list_keys_v2

* sort key names for list_keys_v2

* update s3 config for updated boto version

* fix urlparsing security warning

* add dot before dns name

* update requirements.txt to match toml
  • Loading branch information
jreadey authored Jun 11, 2024
1 parent 131119d commit 960be48
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 10 deletions.
29 changes: 23 additions & 6 deletions hsds/util/s3Client.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import datetime
import json
import time
from urllib.parse import urlparse
from aiobotocore.config import AioConfig
from aiobotocore.session import get_session
from botocore.exceptions import ClientError
Expand Down Expand Up @@ -113,8 +114,10 @@ def __init__(self, app):
except KeyError:
pass

self._aio_config = AioConfig(max_pool_connections=max_pool_connections,
signature_version=signature_version)
kwargs = {"max_pool_connections": max_pool_connections}
if signature_version:
kwargs["signature_version"] = signature_version
self._aio_config = AioConfig(**kwargs)

log.debug(f"S3Client init - aws_region {self._aws_region}")

Expand Down Expand Up @@ -144,9 +147,17 @@ def _get_client_kwargs(self):
kwargs["region_name"] = self._aws_region
kwargs["aws_secret_access_key"] = self._aws_secret_access_key
kwargs["aws_access_key_id"] = self._aws_access_key_id
kwargs["aws_session_token"] = self._aws_session_token
kwargs["endpoint_url"] = self._s3_gateway
kwargs["use_ssl"] = self._use_ssl
if self._aws_session_token:
kwargs["aws_session_token"] = self._aws_session_token
if self._s3_gateway:
host = urlparse(self._s3_gateway).hostname
if not host.endswith(".amazonaws.com"):
# let boto sort out the endpoint if it's on aws
# for third party s3 compatible services (e.g. minio), set it here
kwargs["endpoint_url"] = self._s3_gateway
if self._use_ssl:
kwargs["use_ssl"] = self._use_ssl

kwargs["config"] = self._aio_config
# log.debug(f"s3 kwargs: {kwargs}")
return kwargs
Expand Down Expand Up @@ -627,8 +638,10 @@ async def list_keys(
session = self._app["session"]
self._renewToken()
kwargs = self._get_client_kwargs()
if prefix and prefix[-1] != "/":
prefix += "/" # list_v2 requires prefix end with slash
async with session.create_client("s3", **kwargs) as _client:
paginator = _client.get_paginator("list_objects")
paginator = _client.get_paginator("list_objects_v2")

# use a dictionary to hold return values if stats are needed
key_names = {} if include_stats else []
Expand Down Expand Up @@ -667,6 +680,10 @@ async def list_keys(
msg += f"but got {len(key_names)}"
log.warning(msg)

if not include_stats:
# list_keys_v2 does not return keys in lexographic order, so sort here
key_names.sort()

return key_names

async def releaseClient(self):
Expand Down
3 changes: 1 addition & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,11 @@ version = "0.8.5"

dependencies = [
"aiohttp == 3.9.4",
"aiobotocore == 2.5.0",
"aiobotocore == 2.13.0",
"aiohttp_cors",
"aiofiles",
"azure-storage-blob",
"bitshuffle",
"botocore",
"cryptography",
"h5py >= 3.6.0",
"numcodecs",
Expand Down
3 changes: 1 addition & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
aiohttp==3.9.4
aiobotocore==2.5.0
aiobotocore==2.13.0
aiohttp_cors
aiofiles
azure-storage-blob
bitshuffle
botocore
cryptography
h5py>=3.6.0
numcodecs
Expand Down

0 comments on commit 960be48

Please sign in to comment.