Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Request/Response Checksum Behavior Updates #3277

Closed
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
cdcb6b0
Add Flexible Checksum v2 Config Options (#3271)
jonathan343 Oct 8, 2024
c38b116
Add support for CRC64NVME when the CRT is available. (#3273)
jonathan343 Oct 8, 2024
02fac6d
Make CRC32 the default checksum and remove MD5
jonathan343 Oct 8, 2024
50d7142
Add tests for
jonathan343 Oct 8, 2024
0a2971f
Fix CRT only tests
jonathan343 Oct 8, 2024
bf16ed7
Actually fix CRT only tests
jonathan343 Oct 8, 2024
88eb03b
PR Feedback
jonathan343 Oct 22, 2024
878cb93
Ignore x-amz-checksum-algorithm when checking for user supplied check…
jonathan343 Oct 24, 2024
7563334
Revert "Ignore x-amz-checksum-algorithm when checking for user suppli…
jonathan343 Oct 28, 2024
860dc3b
enable response checksum validation by default
jonathan343 Oct 28, 2024
2e24f7f
PR feedback
jonathan343 Oct 29, 2024
e9eade8
Condense conditional; Update private function warning to docstring
jonathan343 Oct 29, 2024
ce46e4a
Avoid importing _CHECKSUM_CLS directly.
jonathan343 Nov 1, 2024
d44bb57
Add Flexible Checksum v2 Config Options (#3271)
jonathan343 Oct 8, 2024
dc710ec
Add support for CRC64NVME when the CRT is available. (#3273)
jonathan343 Oct 8, 2024
0569615
merge flexible-checksums-v2
jonathan343 Nov 6, 2024
626c08a
merge flexible-checksums-v2
jonathan343 Nov 13, 2024
8f26df1
Make "handle_request_validation_mode_member" private.
jonathan343 Nov 20, 2024
253928d
Set the header targeted by the "requestAlgorithmMember" to the defaul…
jonathan343 Nov 20, 2024
ff4f122
Minor test cleanup changes.
jonathan343 Nov 20, 2024
a2b19bd
Resolve the "requestAlgorithmMember" header in "apply_request_checksum".
jonathan343 Nov 21, 2024
503ead6
PR Feedback - Make requestAlgorithmMember context more specific.
jonathan343 Nov 22, 2024
509440a
fix testing issue; Assert actual checksum values in tests.
jonathan343 Nov 22, 2024
43ba7d0
Handle unsigned and "s3" signature_version requests.
jonathan343 Nov 25, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 12 additions & 5 deletions botocore/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,6 @@
from botocore.utils import (
SAFE_CHARS,
ArnParser,
conditionally_calculate_checksum,
conditionally_calculate_md5,
percent_encode,
switch_host_with_param,
)
Expand Down Expand Up @@ -1293,6 +1291,17 @@ def add_query_compatibility_header(model, params, **kwargs):
params['headers']['x-amzn-query-mode'] = 'true'


def _handle_request_validation_mode_member(params, model, **kwargs):
client_config = kwargs.get("context", {}).get("client_config")
if client_config is None:
return
response_checksum_validation = client_config.response_checksum_validation
http_checksum = model.http_checksum
mode_member = http_checksum.get("requestValidationModeMember")
if mode_member and response_checksum_validation == "when_supported":
params.setdefault(mode_member, "ENABLED")


# This is a list of (event_name, handler).
# When a Session is created, everything in this list will be
# automatically registered with that Session.
Expand Down Expand Up @@ -1325,6 +1334,7 @@ def add_query_compatibility_header(model, params, **kwargs):
('before-parse.s3.*', handle_expires_header),
('before-parse.s3.*', _handle_200_error, REGISTER_FIRST),
('before-parameter-build', generate_idempotent_uuid),
('before-parameter-build', _handle_request_validation_mode_member),
('before-parameter-build.s3', validate_bucket_name),
('before-parameter-build.s3', remove_bucket_from_url_paths_from_model),
(
Expand Down Expand Up @@ -1358,10 +1368,7 @@ def add_query_compatibility_header(model, params, **kwargs):
('before-call.s3', add_expect_header),
('before-call.glacier', add_glacier_version),
('before-call.apigateway', add_accept_header),
('before-call.s3.PutObject', conditionally_calculate_checksum),
('before-call.s3.UploadPart', conditionally_calculate_md5),
('before-call.s3.DeleteObjects', escape_xml_payload),
('before-call.s3.DeleteObjects', conditionally_calculate_checksum),
('before-call.s3.PutBucketLifecycleConfiguration', escape_xml_payload),
('before-call.glacier.UploadArchive', add_glacier_checksums),
('before-call.glacier.UploadMultipartPart', add_glacier_checksums),
Expand Down
105 changes: 70 additions & 35 deletions botocore/httpchecksum.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,15 @@
from binascii import crc32
from hashlib import sha1, sha256

from botocore.compat import HAS_CRT
from botocore.compat import HAS_CRT, urlparse
from botocore.exceptions import (
AwsChunkedWrapperError,
FlexibleChecksumError,
MissingDependencyException,
)
from botocore.model import StructureShape
from botocore.response import StreamingBody
from botocore.utils import (
conditionally_calculate_md5,
determine_content_length,
)
from botocore.utils import determine_content_length, has_checksum_header

if HAS_CRT:
from awscrt import checksums as crt_checksums
Expand All @@ -44,6 +42,8 @@

logger = logging.getLogger(__name__)

DEFAULT_CHECKSUM_ALGORITHM = "CRC32"


class BaseChecksum:
_CHUNK_SIZE = 1024 * 1024
Expand Down Expand Up @@ -259,7 +259,19 @@ def resolve_request_checksum_algorithm(
params,
supported_algorithms=None,
):
# If the header is already set by the customer, skip calculation
if has_checksum_header(request):
return

extra_headers = {}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we have a rough idea of what expansion we're expecting here? Generally grab-bag style containers like this tend to grow in unexpected ways. It may be worth constraining to the exact header we're expecting since it's tightly coupled to the checksum we're creating.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I updated this to instead store the following in our request checksum context:

"request_algorithm_header": {
    "name": "foo",
    "value": "bar",
},

We can later check if "request_algorithm_header" in request["context"]["checksum"] and apply the header if it exists. Let me know if that's better.

request_checksum_calculation = request["context"][
SamRemis marked this conversation as resolved.
Show resolved Hide resolved
"client_config"
].request_checksum_calculation
http_checksum = operation_model.http_checksum
request_checksum_required = (
operation_model.http_checksum_required
SamRemis marked this conversation as resolved.
Show resolved Hide resolved
or http_checksum.get("requestChecksumRequired")
)
algorithm_member = http_checksum.get("requestAlgorithmMember")
if algorithm_member and algorithm_member in params:
# If the client has opted into using flexible checksums and the
Expand All @@ -280,35 +292,59 @@ def resolve_request_checksum_algorithm(
raise FlexibleChecksumError(
error_msg=f"Unsupported checksum algorithm: {algorithm_name}"
)
elif request_checksum_required or (
algorithm_member and request_checksum_calculation == "when_supported"
):
algorithm_name = DEFAULT_CHECKSUM_ALGORITHM.lower()
algorithm_member_header = _get_request_algorithm_member_header(
operation_model, request, algorithm_member
)
if algorithm_member_header is not None:
extra_headers[algorithm_member_header] = DEFAULT_CHECKSUM_ALGORITHM
else:
return

location_type = "header"
if operation_model.has_streaming_input:
# Operations with streaming input must support trailers.
if request["url"].startswith("https:"):
# We only support unsigned trailer checksums currently. As this
# disables payload signing we'll only use trailers over TLS.
location_type = "trailer"

algorithm = {
"algorithm": algorithm_name,
"in": location_type,
"name": f"x-amz-checksum-{algorithm_name}",
}
location_type = "header"
if (
operation_model.has_streaming_input
and urlparse(request["url"]).scheme == "https"
):
# Operations with streaming input must support trailers.
# We only support unsigned trailer checksums currently. As this
# disables payload signing we'll only use trailers over TLS.
location_type = "trailer"
pass
jonathan343 marked this conversation as resolved.
Show resolved Hide resolved

algorithm = {
"algorithm": algorithm_name,
"in": location_type,
"name": f"x-amz-checksum-{algorithm_name}",
}
if extra_headers:
algorithm["extra_headers"] = extra_headers

if algorithm["name"] in request["headers"]:
# If the header is already set by the customer, skip calculation
return
checksum_context = request["context"].get("checksum", {})
checksum_context["request_algorithm"] = algorithm
request["context"]["checksum"] = checksum_context

checksum_context = request["context"].get("checksum", {})
checksum_context["request_algorithm"] = algorithm
request["context"]["checksum"] = checksum_context
elif operation_model.http_checksum_required or http_checksum.get(
"requestChecksumRequired"
):
# Otherwise apply the old http checksum behavior via Content-MD5
checksum_context = request["context"].get("checksum", {})
checksum_context["request_algorithm"] = "conditional-md5"
request["context"]["checksum"] = checksum_context

def _get_request_algorithm_member_header(
operation_model, request, algorithm_member
):
"""Get the name of the header targeted by the "requestAlgorithmMember"."""
operation_input_shape = operation_model.input_shape
if not isinstance(operation_input_shape, StructureShape):
return

algorithm_member_shape = operation_input_shape.members.get(
algorithm_member
)

return (
algorithm_member_shape.serialization.get("name")
if algorithm_member_shape
else None
)


def apply_request_checksum(request):
Expand All @@ -318,17 +354,16 @@ def apply_request_checksum(request):
if not algorithm:
return

if algorithm == "conditional-md5":
# Special case to handle the http checksum required trait
conditionally_calculate_md5(request)
elif algorithm["in"] == "header":
if algorithm["in"] == "header":
_apply_request_header_checksum(request)
elif algorithm["in"] == "trailer":
_apply_request_trailer_checksum(request)
else:
raise FlexibleChecksumError(
error_msg="Unknown checksum variant: {}".format(algorithm["in"])
)
if "extra_headers" in algorithm:
request["headers"].update(algorithm["extra_headers"])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How does this work if the user already supplied these headers?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The header we're worried about would only get added when the default checksum is used. This means that the user didn't supply the "requestAlgorithmMember" member. The user's input would always take precedence.
I'll update this logic to be less generic as suggested by your other comment.



def _apply_request_header_checksum(request):
Expand Down
29 changes: 20 additions & 9 deletions botocore/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3224,6 +3224,7 @@ def get_encoding_from_headers(headers, default='ISO-8859-1'):


def calculate_md5(body, **kwargs):
"""This function has been deprecated, but is kept for backwards compatibility."""
if isinstance(body, (bytes, bytearray)):
binary_md5 = _calculate_md5_from_bytes(body)
else:
Expand All @@ -3232,11 +3233,13 @@ def calculate_md5(body, **kwargs):


def _calculate_md5_from_bytes(body_bytes):
"""This function has been deprecated, but is kept for backwards compatibility."""
md5 = get_md5(body_bytes)
return md5.digest()


def _calculate_md5_from_file(fileobj):
"""This function has been deprecated, but is kept for backwards compatibility."""
start_position = fileobj.tell()
md5 = get_md5()
for chunk in iter(lambda: fileobj.read(1024 * 1024), b''):
Expand All @@ -3252,15 +3255,17 @@ def _is_s3express_request(params):
return endpoint_properties.get('backend') == 'S3Express'


def _has_checksum_header(params):
def has_checksum_header(params):
"""
Checks if a header starting with "x-amz-checksum-" is provided in a request.

This class is considered private and subject to abrupt breaking changes or
removal without prior announcement. Please do not use it directly.
"""
headers = params['headers']
# If a user provided Content-MD5 is present,
# don't try to compute a new one.
if 'Content-MD5' in headers:
return True

# If a header matching the x-amz-checksum-* pattern is present, we
# assume a checksum has already been provided and an md5 is not needed
# assume a checksum has already been provided by the user.
for header in headers:
if CHECKSUM_HEADER_PATTERN.match(header):
return True
Expand All @@ -3269,12 +3274,14 @@ def _has_checksum_header(params):


def conditionally_calculate_checksum(params, **kwargs):
nateprewitt marked this conversation as resolved.
Show resolved Hide resolved
if not _has_checksum_header(params):
"""This function has been deprecated, but is kept for backwards compatibility."""
if not has_checksum_header(params):
conditionally_calculate_md5(params, **kwargs)
conditionally_enable_crc32(params, **kwargs)


def conditionally_enable_crc32(params, **kwargs):
"""This function has been deprecated, but is kept for backwards compatibility."""
checksum_context = params.get('context', {}).get('checksum', {})
checksum_algorithm = checksum_context.get('request_algorithm')
if (
Expand All @@ -3292,15 +3299,19 @@ def conditionally_enable_crc32(params, **kwargs):


def conditionally_calculate_md5(params, **kwargs):
"""Only add a Content-MD5 if the system supports it."""
"""
This function has been deprecated, but is kept for backwards compatibility.

Only add a Content-MD5 if the system supports it.
"""
body = params['body']
checksum_context = params.get('context', {}).get('checksum', {})
checksum_algorithm = checksum_context.get('request_algorithm')
if checksum_algorithm and checksum_algorithm != 'conditional-md5':
# Skip for requests that will have a flexible checksum applied
return

if _has_checksum_header(params):
if has_checksum_header(params):
# Don't add a new header if one is already available.
return

Expand Down
12 changes: 12 additions & 0 deletions tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
from botocore import credentials, utils
from botocore.awsrequest import AWSResponse
from botocore.compat import HAS_CRT, parse_qs, urlparse
from botocore.httpchecksum import _CHECKSUM_CLS, DEFAULT_CHECKSUM_ALGORITHM
from botocore.stub import Stubber

_LOADER = botocore.loaders.Loader()
Expand Down Expand Up @@ -597,3 +598,14 @@ def mock_load_service_model(service_name, type_name, api_version=None):

loader = session.get_component('data_loader')
monkeypatch.setattr(loader, 'load_service_model', mock_load_service_model)


def get_checksum_cls(algorithm=DEFAULT_CHECKSUM_ALGORITHM.lower()):
"""
This pass through is grabbing our internally supported list of checksums
to ensure we stay in sync, while not exposing them publicly.

Returns a checksum algorithm class. The default checksum class is used
if one isn't specified.
"""
return _CHECKSUM_CLS[algorithm]
Loading
Loading