Skip to content

Commit

Permalink
[2/2] [a] Fix: Can't use curl to download a single manifest in one in…
Browse files Browse the repository at this point in the history
…vocation (#5918)

Add a wait parameter option to the manifest endpoint
  • Loading branch information
dsotirho-ucsc committed Aug 20, 2024
1 parent 0b60723 commit 99cbd30
Show file tree
Hide file tree
Showing 4 changed files with 110 additions and 31 deletions.
70 changes: 46 additions & 24 deletions lambdas/service/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -739,6 +739,13 @@ def validate_json_param(name: str, value: str) -> MutableJSON:
raise BRE(f'The {name!r} parameter is not valid JSON')


def validate_wait(wait: str | None):
if wait is None or wait in ['0', '1']:
pass
else:
raise ValueError


class Mandatory:
"""
Validation wrapper signifying that a parameter is mandatory.
Expand Down Expand Up @@ -1313,6 +1320,29 @@ def get_summary():
authentication=request.authentication)


def wait_parameter_spec(*, default: int | None = None) -> JSON:
valid_values = [0, 1]
assert default in (None, *valid_values), default
return params.query(
'wait',
schema.optional(
schema.enum(*valid_values)
if default is None else
schema.with_default(default, type_=schema.enum(*valid_values))
),
description=fd('''
If 0, the client is responsible for honoring the waiting period
specified in the `Retry-After` response header. If 1, the server
will delay the response in order to consume as much of that waiting
period as possible. This parameter should only be set to 1 by
clients who can't honor the `Retry-After` header, preventing them
from quickly exhausting the maximum number of redirects. If the
server cannot wait the full amount, any amount of wait time left
will still be returned in the `Retry-After` header of the response.
''')
)


post_manifest_example_url = (
f'{app.base_url}/manifest/files'
f'?catalog={list(config.catalogs.keys())[0]}'
Expand Down Expand Up @@ -1347,7 +1377,8 @@ def manifest_route(*, fetch: bool, initiate: bool, curl: bool = False):
'parameters': [
params.path('token', str, description=fd('''
An opaque string representing the manifest preparation job
'''))
''')),
*([] if fetch else [wait_parameter_spec()])
]
},
method_spec={
Expand Down Expand Up @@ -1465,6 +1496,7 @@ def manifest_route(*, fetch: bool, initiate: bool, curl: bool = False):
'''),
'parameters': [
catalog_param_spec,
*([wait_parameter_spec(default=1)] if curl else []),
filters_param_spec,
params.query(
'format',
Expand Down Expand Up @@ -1660,24 +1692,32 @@ def _file_manifest(fetch: bool, token_or_key: Optional[str] = None):
and request.headers.get('content-type') == 'application/x-www-form-urlencoded'
and request.raw_body != b''
):
raise BRE('The body must be empty for a POST request of content-type '
'`application/x-www-form-urlencoded` to this endpoint')
raise BRE('POST requests to this endpoint must have an empty body if '
'they specify a `Content-Type` header of '
'`application/x-www-form-urlencoded`')
query_params = request.query_params or {}
_hoist_parameters(query_params, request)
if post:
query_params.setdefault('wait', '1')
if token_or_key is None:
query_params.setdefault('filters', '{}')
# We list the `catalog` validator first so that the catalog is validated
# before any other potentially catalog-dependent validators are invoked
validate_params(query_params,
catalog=validate_catalog,
format=validate_manifest_format,
filters=validate_filters)
filters=validate_filters,
**({'wait': validate_wait} if post else {}))
# Now that the catalog is valid, we can provide the default format that
# depends on it
default_format = app.metadata_plugin.manifest_formats[0].value
query_params.setdefault('format', default_format)
else:
validate_params(query_params)
validate_params(query_params,
# If the initial request was a POST to the non-fetch
# endpoint, the 'wait' param will be carried over to
# each subsequent GET request to the non-fetch endpoint.
**({'wait': validate_wait} if not fetch else {}))
return app.manifest_controller.get_manifest_async(query_params=query_params,
token_or_key=token_or_key,
fetch=fetch,
Expand Down Expand Up @@ -1724,21 +1764,7 @@ def generate_manifest(event: AnyJSON, _context: LambdaContext):
made. If that fails, the UUID of the file will be used instead.
''')
),
params.query(
'wait',
schema.optional(schema.with_default(0)),
description=fd('''
If 0, the client is responsible for honoring the waiting period
specified in the Retry-After response header. If 1, the server
will delay the response in order to consume as much of that
waiting period as possible. This parameter should only be set to
1 by clients who can't honor the `Retry-After` header,
preventing them from quickly exhausting the maximum number of
redirects. If the server cannot wait the full amount, any amount
of wait time left will still be returned in the Retry-After
header of the response.
''')
),
wait_parameter_spec(default=0),
params.query(
'replica',
schema.optional(str),
Expand Down Expand Up @@ -1881,10 +1907,6 @@ def validate_replica(replica: str) -> None:
if replica not in ('aws', 'gcp'):
raise ValueError

def validate_wait(wait: str) -> None:
if wait not in ['0', '1']:
raise ValueError

def validate_version(version: str) -> None:
# This function exists so the repository plugin can be lazily loaded
# instead of being loaded before `validate_params()` can run. This is
Expand Down
41 changes: 39 additions & 2 deletions lambdas/service/openapi.json
Original file line number Diff line number Diff line change
Expand Up @@ -9631,6 +9631,21 @@
},
"description": "The name of the catalog to query."
},
{
"name": "wait",
"in": "query",
"required": false,
"schema": {
"type": "integer",
"format": "int64",
"enum": [
0,
1
],
"default": 1
},
"description": "\nIf 0, the client is responsible for honoring the waiting period\nspecified in the `Retry-After` response header. If 1, the server\nwill delay the response in order to consume as much of that waiting\nperiod as possible. This parameter should only be set to 1 by\nclients who can't honor the `Retry-After` header, preventing them\nfrom quickly exhausting the maximum number of redirects. If the\nserver cannot wait the full amount, any amount of wait time left\nwill still be returned in the `Retry-After` header of the response.\n"
},
{
"name": "filters",
"in": "query",
Expand Down Expand Up @@ -10988,6 +11003,20 @@
"type": "string"
},
"description": "\nAn opaque string representing the manifest preparation job\n"
},
{
"name": "wait",
"in": "query",
"required": false,
"schema": {
"type": "integer",
"format": "int64",
"enum": [
0,
1
]
},
"description": "\nIf 0, the client is responsible for honoring the waiting period\nspecified in the `Retry-After` response header. If 1, the server\nwill delay the response in order to consume as much of that waiting\nperiod as possible. This parameter should only be set to 1 by\nclients who can't honor the `Retry-After` header, preventing them\nfrom quickly exhausting the maximum number of redirects. If the\nserver cannot wait the full amount, any amount of wait time left\nwill still be returned in the `Retry-After` header of the response.\n"
}
],
"get": {
Expand Down Expand Up @@ -12541,9 +12570,13 @@
"schema": {
"type": "integer",
"format": "int64",
"enum": [
0,
1
],
"default": 0
},
"description": "\nIf 0, the client is responsible for honoring the waiting period\nspecified in the Retry-After response header. If 1, the server\nwill delay the response in order to consume as much of that\nwaiting period as possible. This parameter should only be set to\n1 by clients who can't honor the `Retry-After` header,\npreventing them from quickly exhausting the maximum number of\nredirects. If the server cannot wait the full amount, any amount\nof wait time left will still be returned in the Retry-After\nheader of the response.\n"
"description": "\nIf 0, the client is responsible for honoring the waiting period\nspecified in the `Retry-After` response header. If 1, the server\nwill delay the response in order to consume as much of that waiting\nperiod as possible. This parameter should only be set to 1 by\nclients who can't honor the `Retry-After` header, preventing them\nfrom quickly exhausting the maximum number of redirects. If the\nserver cannot wait the full amount, any amount of wait time left\nwill still be returned in the `Retry-After` header of the response.\n"
},
{
"name": "replica",
Expand Down Expand Up @@ -12677,9 +12710,13 @@
"schema": {
"type": "integer",
"format": "int64",
"enum": [
0,
1
],
"default": 0
},
"description": "\nIf 0, the client is responsible for honoring the waiting period\nspecified in the Retry-After response header. If 1, the server\nwill delay the response in order to consume as much of that\nwaiting period as possible. This parameter should only be set to\n1 by clients who can't honor the `Retry-After` header,\npreventing them from quickly exhausting the maximum number of\nredirects. If the server cannot wait the full amount, any amount\nof wait time left will still be returned in the Retry-After\nheader of the response.\n"
"description": "\nIf 0, the client is responsible for honoring the waiting period\nspecified in the `Retry-After` response header. If 1, the server\nwill delay the response in order to consume as much of that waiting\nperiod as possible. This parameter should only be set to 1 by\nclients who can't honor the `Retry-After` header, preventing them\nfrom quickly exhausting the maximum number of redirects. If the\nserver cannot wait the full amount, any amount of wait time left\nwill still be returned in the `Retry-After` header of the response.\n"
},
{
"name": "replica",
Expand Down
16 changes: 15 additions & 1 deletion src/azul/service/manifest_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ def get_manifest_async(self,
query_params: Mapping[str, str],
fetch: bool,
authentication: Optional[Authentication]):
wait = query_params.get('wait')
if token_or_key is None:
token, manifest_key = None, None
else:
Expand Down Expand Up @@ -187,7 +188,9 @@ def get_manifest_async(self,
body: dict[str, int | str | FlatJSON]

if manifest is None:
url = self.manifest_url_func(fetch=fetch, token_or_key=token.encode())
url = self.manifest_url_func(fetch=fetch,
token_or_key=token.encode(),
**({} if wait is None else {'wait': wait}))
body = {
'Status': 301,
'Location': str(url),
Expand Down Expand Up @@ -225,6 +228,17 @@ def get_manifest_async(self,
'CommandLine': self.service.command_lines(manifest, url, authentication)
}

if wait is not None:
if wait == '0':
pass
elif wait == '1':
retry_after = body.get('Retry-After')
if retry_after is not None:
time_slept = self.server_side_sleep(retry_after)
body['Retry-After'] = round(retry_after - time_slept)
else:
assert False, wait

# Note: Response objects returned without a 'Content-Type' header will
# be given one of type 'application/json' as default by Chalice.
# https://aws.github.io/chalice/tutorials/basicrestapi.html#customizing-the-http-response
Expand Down
14 changes: 10 additions & 4 deletions test/integration_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -602,8 +602,9 @@ def _test_other_endpoints(self):
def _test_manifest(self, catalog: CatalogName):
supported_formats = self.metadata_plugin(catalog).manifest_formats
assert supported_formats
for curl, format in chain(
product([False, True], [None, *supported_formats])
for curl, wait, format in chain(
product([False], [None], [None, *supported_formats]),
product([True], [None, 0, 1], [None, *supported_formats])
):
filters = self._manifest_filters(catalog)
if curl:
Expand All @@ -614,9 +615,10 @@ def _test_manifest(self, catalog: CatalogName):
fetch_modes = [first_fetch, not first_fetch]
for fetch in fetch_modes:
with self.subTest('manifest', catalog=catalog, format=format,
fetch=fetch, curl=curl):
fetch=fetch, curl=curl, wait=wait):
args = dict(catalog=catalog,
filters=json.dumps(filters))
filters=json.dumps(filters),
**({} if wait is None else {'wait': wait}))
if format is None:
format = first(supported_formats)
else:
Expand Down Expand Up @@ -892,6 +894,10 @@ def _get_url_content(self, method: str, url: furl) -> bytes:
retry_after = response.headers.get('Retry-After')
if retry_after is not None:
retry_after = float(retry_after)
if url.args.get('wait') == 1:
# The wait should have happened server-side and been
# subtracted from the retry-after that was returned.
self.assertEqual(0.0, retry_after)
log.info('Sleeping %.3fs to honor Retry-After header', retry_after)
time.sleep(retry_after)
url = furl(response.headers['Location'])
Expand Down

0 comments on commit 99cbd30

Please sign in to comment.