Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[v2] S3 200 errors #9055

Merged
merged 12 commits into from
Nov 8, 2024
5 changes: 5 additions & 0 deletions .changes/next-release/enhancement-s3-70184.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"type": "enhancement",
"category": "``s3``",
"description": "Handle HTTP 200 responses with error information for all supported s3 operations."
}
1 change: 0 additions & 1 deletion awscli/botocore/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,6 @@ def compute_client_args(self, service_model, client_config,
if raw_value is not None:
parameter_validation = ensure_boolean(raw_value)


s3_config = self.compute_s3_config(client_config)

configured_endpoint_url = self._compute_configured_endpoint_url(
Expand Down
85 changes: 50 additions & 35 deletions awscli/botocore/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,41 +114,13 @@ def escape_xml_payload(params, **kwargs):
params['body'] = body


def check_for_200_error(response, **kwargs):
# From: http://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectCOPY.html
# There are two opportunities for a copy request to return an error. One
# can occur when Amazon S3 receives the copy request and the other can
# occur while Amazon S3 is copying the files. If the error occurs before
# the copy operation starts, you receive a standard Amazon S3 error. If the
# error occurs during the copy operation, the error response is embedded in
# the 200 OK response. This means that a 200 OK response can contain either
# a success or an error. Make sure to design your application to parse the
# contents of the response and handle it appropriately.
#
# So this handler checks for this case. Even though the server sends a
# 200 response, conceptually this should be handled exactly like a
# 500 response (with respect to raising exceptions, retries, etc.)
# We're connected *before* all the other retry logic handlers, so as long
# as we switch the error code to 500, we'll retry the error as expected.
if response is None:
# A None response can happen if an exception is raised while
# trying to retrieve the response. See Endpoint._get_response().
return
http_response, parsed = response
if _looks_like_special_case_error(http_response):
logger.debug("Error found for response with 200 status code, "
"errors: %s, changing status code to "
"500.", parsed)
http_response.status_code = 500


def _looks_like_special_case_error(http_response):
if http_response.status_code == 200:
def _looks_like_special_case_error(status_code, body):
if status_code == 200 and body:
try:
parser = ETree.XMLParser(
target=ETree.TreeBuilder(),
encoding='utf-8')
parser.feed(http_response.content)
parser.feed(body)
root = parser.close()
except XMLParseError:
# In cases of network disruptions, we may end up with a partial
Expand Down Expand Up @@ -1134,6 +1106,51 @@ def document_expires_shape(section, event_name, **kwargs):
)


def _handle_200_error(operation_model, response_dict, **kwargs):
# S3 can return a 200 response with an error embedded in the body.
# Convert the 200 to a 500 for retry resolution in ``_update_status_code``.
if not _should_handle_200_error(operation_model, response_dict):
# Operations with streaming response blobs are excluded as they
# can't be reliably distinguished from an S3 error.
return
if _looks_like_special_case_error(
response_dict['status_code'], response_dict['body']
):
response_dict['status_code'] = 500
logger.debug(
f"Error found for response with 200 status code: {response_dict['body']}."
)


def _should_handle_200_error(operation_model, response_dict):
output_shape = operation_model.output_shape
if (
not response_dict
or operation_model.has_event_stream_output
or not output_shape
):
return False
payload = output_shape.serialization.get('payload')
if payload is not None:
payload_shape = output_shape.members[payload]
if payload_shape.type_name in ('blob', 'string'):
return False
return True


def _update_status_code(response, **kwargs):
# Update the http_response status code when the parsed response has been
# modified in a handler. This enables retries for cases like ``_handle_200_error``.
if response is None:
return
http_response, parsed = response
parsed_status_code = parsed.get('ResponseMetadata', {}).get(
'HTTPStatusCode', http_response.status_code
)
if http_response.status_code != parsed_status_code:
http_response.status_code = parsed_status_code


# This is a list of (event_name, handler).
# When a Session is created, everything in this list will be
# automatically registered with that Session.
Expand All @@ -1158,6 +1175,7 @@ def document_expires_shape(section, event_name, **kwargs):
('after-call.cloudformation.GetTemplate', json_decode_template_body),
('after-call.s3.GetBucketLocation', parse_get_bucket_location),
('before-parse.s3.*', handle_expires_header),
('before-parse.s3.*', _handle_200_error, REGISTER_FIRST),
('before-parameter-build', generate_idempotent_uuid),

('before-parameter-build.s3', validate_bucket_name),
Expand Down Expand Up @@ -1196,10 +1214,7 @@ def document_expires_shape(section, event_name, **kwargs):
('before-call.glacier.UploadMultipartPart', add_glacier_checksums),
('before-call.ec2.CopySnapshot', inject_presigned_url_ec2),
('request-created.machine-learning.Predict', switch_host_machinelearning),
('needs-retry.s3.UploadPartCopy', check_for_200_error, REGISTER_FIRST),
('needs-retry.s3.CopyObject', check_for_200_error, REGISTER_FIRST),
('needs-retry.s3.CompleteMultipartUpload', check_for_200_error,
REGISTER_FIRST),
('needs-retry.s3.*', _update_status_code, REGISTER_FIRST),
('choose-signer.cognito-identity.GetId', disable_signing),
('choose-signer.cognito-identity.GetOpenIdToken', disable_signing),
('choose-signer.cognito-identity.UnlinkIdentity', disable_signing),
Expand Down
84 changes: 75 additions & 9 deletions tests/functional/botocore/test_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
UnsupportedS3ConfigurationError,
UnsupportedS3AccesspointConfigurationError,
)
from botocore.parsers import ResponseParserError
from botocore.loaders import Loader
from botocore import UNSIGNED

Expand Down Expand Up @@ -358,12 +357,12 @@ def create_stubbed_s3_client(self, **kwargs):
http_stubber.start()
return client, http_stubber

def test_s3_copy_object_with_empty_response(self):
def test_s3_copy_object_with_incomplete_response(self):
self.client, self.http_stubber = self.create_stubbed_s3_client(
region_name='us-east-1'
)

empty_body = b''
incomplete_body = b'<?xml version="1.0" encoding="UTF-8"?>\n\n\n'
complete_body = (
b'<?xml version="1.0" encoding="UTF-8"?>\n\n'
b'<CopyObjectResult '
Expand All @@ -372,7 +371,7 @@ def test_s3_copy_object_with_empty_response(self):
b'<ETag>&quot;s0mEcH3cK5uM&quot;</ETag></CopyObjectResult>'
)

self.http_stubber.add_response(status=200, body=empty_body)
self.http_stubber.add_response(status=200, body=incomplete_body)
self.http_stubber.add_response(status=200, body=complete_body)
response = self.client.copy_object(
Bucket='bucket',
Expand All @@ -385,19 +384,86 @@ def test_s3_copy_object_with_empty_response(self):
self.assertEqual(response['ResponseMetadata']['HTTPStatusCode'], 200)
self.assertTrue('CopyObjectResult' in response)

def test_s3_copy_object_with_incomplete_response(self):

class TestS3200ErrorResponse(BaseS3OperationTest):
def create_s3_client(self, **kwargs):
client_kwargs = {"region_name": self.region}
client_kwargs.update(kwargs)
return self.session.create_client("s3", **client_kwargs)

def create_stubbed_s3_client(self, **kwargs):
client = self.create_s3_client(**kwargs)
http_stubber = ClientHTTPStubber(client)
http_stubber.start()
return client, http_stubber

def test_s3_200_with_error_response(self):
self.client, self.http_stubber = self.create_stubbed_s3_client(
region_name='us-east-1'
)

incomplete_body = b'<?xml version="1.0" encoding="UTF-8"?>\n\n\n'
self.http_stubber.add_response(status=200, body=incomplete_body)
with self.assertRaises(ResponseParserError):
error_body = (
b"<Error>"
b"<Code>SlowDown</Code>"
b"<Message>Please reduce your request rate.</Message>"
b"</Error>"
)
# Populate 3 attempts for SlowDown to validate
# we reached two max retries and raised an exception.
for i in range(3):
self.http_stubber.add_response(status=200, body=error_body)
with self.assertRaises(botocore.exceptions.ClientError) as context:
self.client.copy_object(
Bucket='bucket',
CopySource='other-bucket/test.txt',
Key='test.txt',
)
self.assertEqual(len(self.http_stubber.requests), 3)
self.assertEqual(
context.exception.response["ResponseMetadata"]["HTTPStatusCode"],
500,
)
self.assertEqual(
context.exception.response["Error"]["Code"], "SlowDown"
)

def test_s3_200_with_no_error_response(self):
self.client, self.http_stubber = self.create_stubbed_s3_client(
region_name="us-east-1"
)
self.http_stubber.add_response(status=200, body=b"<NotAnError/>")

response = self.client.copy_object(
Bucket="bucket",
CopySource="other-bucket/test.txt",
Key="test.txt",
)

# Validate that the status code remains 200.
self.assertEqual(len(self.http_stubber.requests), 1)
self.assertEqual(response["ResponseMetadata"]["HTTPStatusCode"], 200)

def test_s3_200_with_error_response_on_streaming_operation(self):
self.client, self.http_stubber = self.create_stubbed_s3_client(
region_name="us-east-1"
)
self.http_stubber.add_response(status=200, body=b"<Error/>")
response = self.client.get_object(Bucket="bucket", Key="test.txt")

# Validate that the status code remains 200 because we don't
# process 200-with-error responses on streaming operations.
self.assertEqual(len(self.http_stubber.requests), 1)
self.assertEqual(response["ResponseMetadata"]["HTTPStatusCode"], 200)

def test_s3_200_response_with_no_body(self):
self.client, self.http_stubber = self.create_stubbed_s3_client(
region_name="us-east-1"
)
self.http_stubber.add_response(status=200)
response = self.client.head_object(Bucket="bucket", Key="test.txt")

# Validate that the status code remains 200 on operations without a body.
self.assertEqual(len(self.http_stubber.requests), 1)
self.assertEqual(response["ResponseMetadata"]["HTTPStatusCode"], 200)


class TestAccesspointArn(BaseS3ClientConfigurationTest):
Expand Down
1 change: 0 additions & 1 deletion tests/functional/s3/test_mv_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
from awscrt.s3 import S3RequestType

from awscli.compat import BytesIO
from awscli.customizations.s3.utils import S3PathResolver
from awscli.testutils import mock
from tests.functional.s3 import (
BaseS3TransferCommandTest, BaseCRTTransferClientTest
Expand Down
33 changes: 3 additions & 30 deletions tests/unit/botocore/test_handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -429,33 +429,6 @@ def test_presigned_url_casing_changed_for_rds(self):
params['PreSignedUrl'])
self.assertIn('X-Amz-Signature', params['PreSignedUrl'])

def test_500_status_code_set_for_200_response(self):
http_response = mock.Mock()
http_response.status_code = 200
http_response.content = """
<Error>
<Code>AccessDenied</Code>
<Message>Access Denied</Message>
<RequestId>id</RequestId>
<HostId>hostid</HostId>
</Error>
"""
handlers.check_for_200_error((http_response, {}))
self.assertEqual(http_response.status_code, 500)

def test_200_response_with_no_error_left_untouched(self):
http_response = mock.Mock()
http_response.status_code = 200
http_response.content = "<NotAnError></NotAnError>"
handlers.check_for_200_error((http_response, {}))
# We don't touch the status code since there are no errors present.
self.assertEqual(http_response.status_code, 200)

def test_500_response_can_be_none(self):
# A 500 response can raise an exception, which means the response
# object is None. We need to handle this case.
handlers.check_for_200_error(None)

def test_route53_resource_id(self):
event = 'before-parameter-build.route-53.GetHostedZone'
params = {'Id': '/hostedzone/ABC123',
Expand Down Expand Up @@ -1138,14 +1111,14 @@ def test_s3_special_case_is_before_other_retry(self):
response=(mock.Mock(), mock.Mock()), endpoint=mock.Mock(),
operation=operation, attempts=1, caught_exception=None)
# This is implementation specific, but we're trying to verify that
# the check_for_200_error is before any of the retry logic in
# the _update_status_code is before any of the retry logic in
# botocore.retries.*.
# Technically, as long as the relative order is preserved, we don't
# care about the absolute order.
names = self.get_handler_names(responses)
self.assertIn('check_for_200_error', names)
self.assertIn('_update_status_code', names)
self.assertIn('needs_retry', names)
s3_200_handler = names.index('check_for_200_error')
s3_200_handler = names.index('_update_status_code')
general_retry_handler = names.index('needs_retry')
self.assertTrue(s3_200_handler < general_retry_handler,
"S3 200 error handler was supposed to be before "
Expand Down
Loading