Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Request/Response Checksum Behavior Updates #3277

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
cdcb6b0
Add Flexible Checksum v2 Config Options (#3271)
jonathan343 Oct 8, 2024
c38b116
Add support for CRC64NVME when the CRT is available. (#3273)
jonathan343 Oct 8, 2024
02fac6d
Make CRC32 the default checksum and remove MD5
jonathan343 Oct 8, 2024
50d7142
Add tests for
jonathan343 Oct 8, 2024
0a2971f
Fix CRT only tests
jonathan343 Oct 8, 2024
bf16ed7
Actually fix CRT only tests
jonathan343 Oct 8, 2024
88eb03b
PR Feedback
jonathan343 Oct 22, 2024
878cb93
Ignore x-amz-checksum-algorithm when checking for user supplied check…
jonathan343 Oct 24, 2024
7563334
Revert "Ignore x-amz-checksum-algorithm when checking for user suppli…
jonathan343 Oct 28, 2024
860dc3b
enable response checksum validation by default
jonathan343 Oct 28, 2024
2e24f7f
PR feedback
jonathan343 Oct 29, 2024
e9eade8
Condense conditional; Update private function warning to docstring
jonathan343 Oct 29, 2024
ce46e4a
Avoid importing _CHECKSUM_CLS directly.
jonathan343 Nov 1, 2024
d44bb57
Add Flexible Checksum v2 Config Options (#3271)
jonathan343 Oct 8, 2024
dc710ec
Add support for CRC64NVME when the CRT is available. (#3273)
jonathan343 Oct 8, 2024
0569615
merge flexible-checksums-v2
jonathan343 Nov 6, 2024
626c08a
merge flexible-checksums-v2
jonathan343 Nov 13, 2024
8f26df1
Make "handle_request_validation_mode_member" private.
jonathan343 Nov 20, 2024
253928d
Set the header targeted by the "requestAlgorithmMember" to the defaul…
jonathan343 Nov 20, 2024
ff4f122
Minor test cleanup changes.
jonathan343 Nov 20, 2024
a2b19bd
Resolve the "requestAlgorithmMember" header in "apply_request_checksum".
jonathan343 Nov 21, 2024
503ead6
PR Feedback - Make requestAlgorithmMember context more specific.
jonathan343 Nov 22, 2024
509440a
fix testing issue; Assert actual checksum values in tests.
jonathan343 Nov 22, 2024
43ba7d0
Handle unsigned and "s3" signature_version requests.
jonathan343 Nov 25, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 12 additions & 5 deletions botocore/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,6 @@
from botocore.utils import (
SAFE_CHARS,
ArnParser,
conditionally_calculate_checksum,
conditionally_calculate_md5,
percent_encode,
switch_host_with_param,
)
Expand Down Expand Up @@ -1293,6 +1291,17 @@ def add_query_compatibility_header(model, params, **kwargs):
params['headers']['x-amzn-query-mode'] = 'true'


def _handle_request_validation_mode_member(params, model, **kwargs):
client_config = kwargs.get("context", {}).get("client_config")
if client_config is None:
return
response_checksum_validation = client_config.response_checksum_validation
http_checksum = model.http_checksum
mode_member = http_checksum.get("requestValidationModeMember")
if mode_member and response_checksum_validation == "when_supported":
params.setdefault(mode_member, "ENABLED")


# This is a list of (event_name, handler).
# When a Session is created, everything in this list will be
# automatically registered with that Session.
Expand Down Expand Up @@ -1325,6 +1334,7 @@ def add_query_compatibility_header(model, params, **kwargs):
('before-parse.s3.*', handle_expires_header),
('before-parse.s3.*', _handle_200_error, REGISTER_FIRST),
('before-parameter-build', generate_idempotent_uuid),
('before-parameter-build', _handle_request_validation_mode_member),
('before-parameter-build.s3', validate_bucket_name),
('before-parameter-build.s3', remove_bucket_from_url_paths_from_model),
(
Expand Down Expand Up @@ -1358,10 +1368,7 @@ def add_query_compatibility_header(model, params, **kwargs):
('before-call.s3', add_expect_header),
('before-call.glacier', add_glacier_version),
('before-call.apigateway', add_accept_header),
('before-call.s3.PutObject', conditionally_calculate_checksum),
('before-call.s3.UploadPart', conditionally_calculate_md5),
('before-call.s3.DeleteObjects', escape_xml_payload),
('before-call.s3.DeleteObjects', conditionally_calculate_checksum),
('before-call.s3.PutBucketLifecycleConfiguration', escape_xml_payload),
('before-call.glacier.UploadArchive', add_glacier_checksums),
('before-call.glacier.UploadMultipartPart', add_glacier_checksums),
Expand Down
113 changes: 79 additions & 34 deletions botocore/httpchecksum.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,16 @@
from binascii import crc32
from hashlib import sha1, sha256

from botocore.compat import HAS_CRT
from botocore import UNSIGNED
from botocore.compat import HAS_CRT, urlparse
from botocore.exceptions import (
AwsChunkedWrapperError,
FlexibleChecksumError,
MissingDependencyException,
)
from botocore.model import StructureShape
from botocore.response import StreamingBody
from botocore.utils import (
conditionally_calculate_md5,
determine_content_length,
)
from botocore.utils import determine_content_length, has_checksum_header

if HAS_CRT:
from awscrt import checksums as crt_checksums
Expand All @@ -44,6 +43,8 @@

logger = logging.getLogger(__name__)

DEFAULT_CHECKSUM_ALGORITHM = "CRC32"


class BaseChecksum:
_CHUNK_SIZE = 1024 * 1024
Expand Down Expand Up @@ -259,7 +260,19 @@ def resolve_request_checksum_algorithm(
params,
supported_algorithms=None,
):
# If the header is already set by the customer, skip calculation
if has_checksum_header(request):
return

checksum_context = request["context"].get("checksum", {})
request_checksum_calculation = request["context"][
SamRemis marked this conversation as resolved.
Show resolved Hide resolved
"client_config"
].request_checksum_calculation
http_checksum = operation_model.http_checksum
request_checksum_required = (
operation_model.http_checksum_required
SamRemis marked this conversation as resolved.
Show resolved Hide resolved
or http_checksum.get("requestChecksumRequired")
)
algorithm_member = http_checksum.get("requestAlgorithmMember")
if algorithm_member and algorithm_member in params:
# If the client has opted into using flexible checksums and the
Expand All @@ -280,35 +293,62 @@ def resolve_request_checksum_algorithm(
raise FlexibleChecksumError(
error_msg=f"Unsupported checksum algorithm: {algorithm_name}"
)
elif request_checksum_required or (
algorithm_member and request_checksum_calculation == "when_supported"
):
# Don't use a default checksum for presigned requests.
if request["context"].get("is_presign_request"):
return
algorithm_name = DEFAULT_CHECKSUM_ALGORITHM.lower()
algorithm_member_header = _get_request_algorithm_member_header(
operation_model, request, algorithm_member
)
if algorithm_member_header is not None:
checksum_context["request_algorithm_header"] = {
"name": algorithm_member_header,
"value": DEFAULT_CHECKSUM_ALGORITHM,
}
else:
return

location_type = "header"
if operation_model.has_streaming_input:
location_type = "header"
if (
operation_model.has_streaming_input
and urlparse(request["url"]).scheme == "https"
):
if request["context"]["client_config"].signature_version != 's3':
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: would we prefer this if condition to be added to the parent if viaand?

# Operations with streaming input must support trailers.
if request["url"].startswith("https:"):
# We only support unsigned trailer checksums currently. As this
# disables payload signing we'll only use trailers over TLS.
location_type = "trailer"

algorithm = {
"algorithm": algorithm_name,
"in": location_type,
"name": f"x-amz-checksum-{algorithm_name}",
}
# We only support unsigned trailer checksums currently. As this
# disables payload signing we'll only use trailers over TLS.
location_type = "trailer"

algorithm = {
"algorithm": algorithm_name,
"in": location_type,
"name": f"x-amz-checksum-{algorithm_name}",
}

if algorithm["name"] in request["headers"]:
# If the header is already set by the customer, skip calculation
return
checksum_context["request_algorithm"] = algorithm
request["context"]["checksum"] = checksum_context

checksum_context = request["context"].get("checksum", {})
checksum_context["request_algorithm"] = algorithm
request["context"]["checksum"] = checksum_context
elif operation_model.http_checksum_required or http_checksum.get(
"requestChecksumRequired"
):
# Otherwise apply the old http checksum behavior via Content-MD5
checksum_context = request["context"].get("checksum", {})
checksum_context["request_algorithm"] = "conditional-md5"
request["context"]["checksum"] = checksum_context

def _get_request_algorithm_member_header(
operation_model, request, algorithm_member
):
"""Get the name of the header targeted by the "requestAlgorithmMember"."""
operation_input_shape = operation_model.input_shape
if not isinstance(operation_input_shape, StructureShape):
return

algorithm_member_shape = operation_input_shape.members.get(
algorithm_member
)

return (
algorithm_member_shape.serialization.get("name")
if algorithm_member_shape
else None
)


def apply_request_checksum(request):
Expand All @@ -318,17 +358,19 @@ def apply_request_checksum(request):
if not algorithm:
return

if algorithm == "conditional-md5":
# Special case to handle the http checksum required trait
conditionally_calculate_md5(request)
elif algorithm["in"] == "header":
if algorithm["in"] == "header":
_apply_request_header_checksum(request)
elif algorithm["in"] == "trailer":
_apply_request_trailer_checksum(request)
else:
raise FlexibleChecksumError(
error_msg="Unknown checksum variant: {}".format(algorithm["in"])
)
if "request_algorithm_header" in checksum_context:
request_algorithm_header = checksum_context["request_algorithm_header"]
request["headers"][request_algorithm_header["name"]] = (
request_algorithm_header["value"]
)


def _apply_request_header_checksum(request):
Expand Down Expand Up @@ -371,6 +413,9 @@ def _apply_request_trailer_checksum(request):
# services such as S3 may require the decoded content length
headers["X-Amz-Decoded-Content-Length"] = str(content_length)

if request["context"]["client_config"].signature_version == UNSIGNED:
headers["X-Amz-Content-SHA256"] = "STREAMING-UNSIGNED-PAYLOAD-TRAILER"

if isinstance(body, (bytes, bytearray)):
body = io.BytesIO(body)

Expand Down
29 changes: 20 additions & 9 deletions botocore/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3224,6 +3224,7 @@ def get_encoding_from_headers(headers, default='ISO-8859-1'):


def calculate_md5(body, **kwargs):
"""This function has been deprecated, but is kept for backwards compatibility."""
if isinstance(body, (bytes, bytearray)):
binary_md5 = _calculate_md5_from_bytes(body)
else:
Expand All @@ -3232,11 +3233,13 @@ def calculate_md5(body, **kwargs):


def _calculate_md5_from_bytes(body_bytes):
"""This function has been deprecated, but is kept for backwards compatibility."""
md5 = get_md5(body_bytes)
return md5.digest()


def _calculate_md5_from_file(fileobj):
"""This function has been deprecated, but is kept for backwards compatibility."""
start_position = fileobj.tell()
md5 = get_md5()
for chunk in iter(lambda: fileobj.read(1024 * 1024), b''):
Expand All @@ -3252,15 +3255,17 @@ def _is_s3express_request(params):
return endpoint_properties.get('backend') == 'S3Express'


def _has_checksum_header(params):
def has_checksum_header(params):
"""
Checks if a header starting with "x-amz-checksum-" is provided in a request.

This class is considered private and subject to abrupt breaking changes or
removal without prior announcement. Please do not use it directly.
"""
headers = params['headers']
# If a user provided Content-MD5 is present,
# don't try to compute a new one.
if 'Content-MD5' in headers:
return True

# If a header matching the x-amz-checksum-* pattern is present, we
# assume a checksum has already been provided and an md5 is not needed
# assume a checksum has already been provided by the user.
for header in headers:
if CHECKSUM_HEADER_PATTERN.match(header):
return True
Expand All @@ -3269,12 +3274,14 @@ def _has_checksum_header(params):


def conditionally_calculate_checksum(params, **kwargs):
nateprewitt marked this conversation as resolved.
Show resolved Hide resolved
if not _has_checksum_header(params):
"""This function has been deprecated, but is kept for backwards compatibility."""
if not has_checksum_header(params):
conditionally_calculate_md5(params, **kwargs)
conditionally_enable_crc32(params, **kwargs)


def conditionally_enable_crc32(params, **kwargs):
"""This function has been deprecated, but is kept for backwards compatibility."""
checksum_context = params.get('context', {}).get('checksum', {})
checksum_algorithm = checksum_context.get('request_algorithm')
if (
Expand All @@ -3292,15 +3299,19 @@ def conditionally_enable_crc32(params, **kwargs):


def conditionally_calculate_md5(params, **kwargs):
"""Only add a Content-MD5 if the system supports it."""
"""
This function has been deprecated, but is kept for backwards compatibility.

Only add a Content-MD5 if the system supports it.
"""
body = params['body']
checksum_context = params.get('context', {}).get('checksum', {})
checksum_algorithm = checksum_context.get('request_algorithm')
if checksum_algorithm and checksum_algorithm != 'conditional-md5':
# Skip for requests that will have a flexible checksum applied
return

if _has_checksum_header(params):
if has_checksum_header(params):
# Don't add a new header if one is already available.
return

Expand Down
12 changes: 12 additions & 0 deletions tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
from botocore import credentials, utils
from botocore.awsrequest import AWSResponse
from botocore.compat import HAS_CRT, parse_qs, urlparse
from botocore.httpchecksum import _CHECKSUM_CLS, DEFAULT_CHECKSUM_ALGORITHM
from botocore.stub import Stubber

_LOADER = botocore.loaders.Loader()
Expand Down Expand Up @@ -597,3 +598,14 @@ def mock_load_service_model(service_name, type_name, api_version=None):

loader = session.get_component('data_loader')
monkeypatch.setattr(loader, 'load_service_model', mock_load_service_model)


def get_checksum_cls(algorithm=DEFAULT_CHECKSUM_ALGORITHM.lower()):
"""
This pass through is grabbing our internally supported list of checksums
to ensure we stay in sync, while not exposing them publicly.

Returns a checksum algorithm class. The default checksum class is used
if one isn't specified.
"""
return _CHECKSUM_CLS[algorithm]
Loading
Loading