From 66464f0e6a3c386e50d46f8958cba1323a009a66 Mon Sep 17 00:00:00 2001 From: nl_0 Date: Fri, 23 Feb 2024 11:15:03 +0100 Subject: [PATCH] modern -> chunked --- lambdas/pkgpush/tests/test_index.py | 2 +- lambdas/s3hash/pytest.ini | 2 +- .../s3hash/src/t4_lambda_s3hash/__init__.py | 32 +++++++++---------- lambdas/s3hash/tests/test_compute_checksum.py | 10 +++--- .../tests/test_get_compliant_checksum.py | 24 +++++++------- 5 files changed, 35 insertions(+), 35 deletions(-) diff --git a/lambdas/pkgpush/tests/test_index.py b/lambdas/pkgpush/tests/test_index.py index a189f7c34e6..8fc53bf42c0 100644 --- a/lambdas/pkgpush/tests/test_index.py +++ b/lambdas/pkgpush/tests/test_index.py @@ -902,7 +902,7 @@ def test_calculate_pkg_hashes_too_large_file_error(self): def test_calculate_pkg_entry_hash(self): with mock.patch( "t4_lambda_pkgpush.invoke_hash_lambda", - return_value=Checksum(type=ChecksumType.MODERN, value="base64hash"), + return_value=Checksum(type=ChecksumType.SHA256_CHUNKED, value="base64hash"), ) as invoke_hash_lambda_mock: t4_lambda_pkgpush.calculate_pkg_entry_hash(self.entry_without_hash, CREDENTIALS) diff --git a/lambdas/s3hash/pytest.ini b/lambdas/s3hash/pytest.ini index 315cd5be1fa..8c50524e46f 100644 --- a/lambdas/s3hash/pytest.ini +++ b/lambdas/s3hash/pytest.ini @@ -2,5 +2,5 @@ asyncio_mode=auto env = MPU_CONCURRENCY=1000 - MODERN_CHECKSUMS=true + CHUNKED_CHECKSUMS=true SERVICE_BUCKET=service-bucket diff --git a/lambdas/s3hash/src/t4_lambda_s3hash/__init__.py b/lambdas/s3hash/src/t4_lambda_s3hash/__init__.py index 15279261c4a..9df26c7daee 100644 --- a/lambdas/s3hash/src/t4_lambda_s3hash/__init__.py +++ b/lambdas/s3hash/src/t4_lambda_s3hash/__init__.py @@ -37,7 +37,7 @@ logger.setLevel(os.environ.get("QUILT_LOG_LEVEL", "WARNING")) MPU_CONCURRENCY = int(os.environ["MPU_CONCURRENCY"]) -MODERN_CHECKSUMS = os.environ["MODERN_CHECKSUMS"] == "true" +CHUNKED_CHECKSUMS = os.environ["CHUNKED_CHECKSUMS"] == "true" SERVICE_BUCKET = os.environ["SERVICE_BUCKET"] SCRATCH_KEY_SERVICE = "user-requests/checksum-upload-tmp" @@ -70,22 +70,22 @@ async def aio_context(credentials: AWSCredentials): class Checksum(ChecksumBase): @classmethod - def legacy(cls, value: bytes): - return cls(value=value.hex(), type=ChecksumType.LEGACY) + def sha256(cls, value: bytes): + return cls(value=value.hex(), type=ChecksumType.SHA256) @classmethod - def modern(cls, value: bytes): - return cls(value=base64.b64encode(value).decode(), type=ChecksumType.MODERN) + def sha256_chunked(cls, value: bytes): + return cls(value=base64.b64encode(value).decode(), type=ChecksumType.SHA256_CHUNKED) @classmethod def for_parts(cls, checksums: T.Sequence[bytes]): - return cls.modern(hash_parts(checksums)) + return cls.sha256_chunked(hash_parts(checksums)) _EMPTY_HASH = hashlib.sha256().digest() @classmethod def empty(cls): - return cls.modern(cls._EMPTY_HASH) if MODERN_CHECKSUMS else cls.legacy(cls._EMPTY_HASH) + return cls.sha256_chunked(cls._EMPTY_HASH) if CHUNKED_CHECKSUMS else cls.sha256(cls._EMPTY_HASH) # 8 MiB -- boto3 default: @@ -134,7 +134,7 @@ def get_compliant_checksum(attrs: GetObjectAttributesOutputTypeDef) -> T.Optiona part_size = get_part_size(attrs["ObjectSize"]) object_parts = attrs.get("ObjectParts") - if not MODERN_CHECKSUMS or part_size is None: + if not CHUNKED_CHECKSUMS or part_size is None: if object_parts is not None: assert "TotalPartsCount" in object_parts if object_parts["TotalPartsCount"] != 1: @@ -147,9 +147,9 @@ def get_compliant_checksum(attrs: GetObjectAttributesOutputTypeDef) -> T.Optiona return ( # double-hash - Checksum.modern(hashlib.sha256(checksum_bytes).digest()) - if MODERN_CHECKSUMS - else Checksum.legacy(checksum_bytes) + Checksum.sha256_chunked(hashlib.sha256(checksum_bytes).digest()) + if CHUNKED_CHECKSUMS + else Checksum.sha256(checksum_bytes) ) if object_parts is None: @@ -160,7 +160,7 @@ def get_compliant_checksum(attrs: GetObjectAttributesOutputTypeDef) -> T.Optiona # Make sure we have _all_ parts. assert len(object_parts["Parts"]) == num_parts if all(part.get("Size") == part_size for part in object_parts["Parts"][:-1]): - return Checksum.modern(base64.b64decode(checksum_value)) + return Checksum.sha256_chunked(base64.b64decode(checksum_value)) return None @@ -311,7 +311,7 @@ async def compute_checksum_legacy(location: S3ObjectSource) -> Checksum: async for chunk in stream.content.iter_any(): hashobj.update(chunk) - return Checksum.legacy(hashobj.digest()) + return Checksum.sha256(hashobj.digest()) async def compute_checksum(location: S3ObjectSource) -> ChecksumResult: @@ -332,11 +332,11 @@ async def compute_checksum(location: S3ObjectSource) -> ChecksumResult: if total_size == 0: return ChecksumResult(checksum=Checksum.empty()) - if not MODERN_CHECKSUMS and total_size > MAX_PART_SIZE: + if not CHUNKED_CHECKSUMS and total_size > MAX_PART_SIZE: checksum = await compute_checksum_legacy(location) return ChecksumResult(checksum=checksum) - part_defs = get_parts_for_size(total_size) if MODERN_CHECKSUMS else PARTS_SINGLE + part_defs = get_parts_for_size(total_size) if CHUNKED_CHECKSUMS else PARTS_SINGLE async with create_mpu() as mpu: part_checksums = await compute_part_checksums( @@ -346,7 +346,7 @@ async def compute_checksum(location: S3ObjectSource) -> ChecksumResult: part_defs, ) - checksum = Checksum.for_parts(part_checksums) if MODERN_CHECKSUMS else Checksum.legacy(part_checksums[0]) + checksum = Checksum.for_parts(part_checksums) if CHUNKED_CHECKSUMS else Checksum.sha256(part_checksums[0]) return ChecksumResult(checksum=checksum) diff --git a/lambdas/s3hash/tests/test_compute_checksum.py b/lambdas/s3hash/tests/test_compute_checksum.py index d8ca30a84a1..455c08543ac 100644 --- a/lambdas/s3hash/tests/test_compute_checksum.py +++ b/lambdas/s3hash/tests/test_compute_checksum.py @@ -89,7 +89,7 @@ async def test_compliant(s3_stub: Stubber): res = await s3hash.compute_checksum(LOC) - assert res == s3hash.ChecksumResult(checksum=s3hash.Checksum.modern(base64.b64decode(checksum_hash))) + assert res == s3hash.ChecksumResult(checksum=s3hash.Checksum.sha256_chunked(base64.b64decode(checksum_hash))) async def test_empty(s3_stub: Stubber): @@ -149,12 +149,12 @@ async def test_legacy(s3_stub: Stubber, mocker: MockerFixture): LOC.boto_args, ) - mocker.patch("t4_lambda_s3hash.MODERN_CHECKSUMS", False) + mocker.patch("t4_lambda_s3hash.CHUNKED_CHECKSUMS", False) res = await s3hash.compute_checksum(LOC) checksum_hex = bytes.fromhex("d9d865cc54ec60678f1b119084ad79ae7f9357d1c4519c6457de3314b7fbba8a") - assert res == s3hash.ChecksumResult(checksum=s3hash.Checksum.legacy(checksum_hex)) + assert res == s3hash.ChecksumResult(checksum=s3hash.Checksum.sha256(checksum_hex)) async def test_mpu_fail(s3_stub: Stubber): @@ -226,7 +226,7 @@ async def test_mpu_single(s3_stub: Stubber): res = await s3hash.compute_checksum(LOC) - assert res == s3hash.ChecksumResult(checksum=s3hash.Checksum.modern(CHECKSUM_HASH)) + assert res == s3hash.ChecksumResult(checksum=s3hash.Checksum.sha256_chunked(CHECKSUM_HASH)) async def test_mpu_multi(s3_stub: Stubber): @@ -289,4 +289,4 @@ async def test_mpu_multi(s3_stub: Stubber): res = await s3hash.compute_checksum(LOC) - assert res == s3hash.ChecksumResult(checksum=s3hash.Checksum.modern(CHECKSUM_TOP)) + assert res == s3hash.ChecksumResult(checksum=s3hash.Checksum.sha256_chunked(CHECKSUM_TOP)) diff --git a/lambdas/s3hash/tests/test_get_compliant_checksum.py b/lambdas/s3hash/tests/test_get_compliant_checksum.py index 7d984721ce2..9666f5523dd 100644 --- a/lambdas/s3hash/tests/test_get_compliant_checksum.py +++ b/lambdas/s3hash/tests/test_get_compliant_checksum.py @@ -18,22 +18,22 @@ def test_no_sha256(obj_attrs): @pytest.mark.parametrize( - "obj_attrs, legacy, modern", + "obj_attrs, plain, chunked", [ ( { "Checksum": {"ChecksumSHA256": "MOFJVevxNSJm3C/4Bn5oEEYH51CrudOzZYK4r5Cfy1g="}, "ObjectSize": 1048576, # below the threshold }, - Checksum.legacy(base64.b64decode("MOFJVevxNSJm3C/4Bn5oEEYH51CrudOzZYK4r5Cfy1g=")), - Checksum.modern(base64.b64decode("WZ1xAz1wCsiSoOSPphsSXS9ZlBu0XaGQlETUPG7gurI=")), + Checksum.sha256(base64.b64decode("MOFJVevxNSJm3C/4Bn5oEEYH51CrudOzZYK4r5Cfy1g=")), + Checksum.sha256_chunked(base64.b64decode("WZ1xAz1wCsiSoOSPphsSXS9ZlBu0XaGQlETUPG7gurI=")), ), ( { "Checksum": {"ChecksumSHA256": "La6x82CVtEsxhBCz9Oi12Yncx7sCPRQmxJLasKMFPnQ="}, "ObjectSize": 8388608, # above the threshold }, - Checksum.legacy(base64.b64decode("La6x82CVtEsxhBCz9Oi12Yncx7sCPRQmxJLasKMFPnQ=")), + Checksum.sha256(base64.b64decode("La6x82CVtEsxhBCz9Oi12Yncx7sCPRQmxJLasKMFPnQ=")), None, ), ( @@ -55,8 +55,8 @@ def test_no_sha256(obj_attrs): }, "ObjectSize": 8388608, # above the threshold }, - Checksum.legacy(base64.b64decode("La6x82CVtEsxhBCz9Oi12Yncx7sCPRQmxJLasKMFPnQ=")), - Checksum.modern(base64.b64decode("MIsGKY+ykqN4CPj3gGGu4Gv03N7OWKWpsZqEf+OrGJs=")), + Checksum.sha256(base64.b64decode("La6x82CVtEsxhBCz9Oi12Yncx7sCPRQmxJLasKMFPnQ=")), + Checksum.sha256_chunked(base64.b64decode("MIsGKY+ykqN4CPj3gGGu4Gv03N7OWKWpsZqEf+OrGJs=")), ), ( { @@ -110,13 +110,13 @@ def test_no_sha256(obj_attrs): "ObjectSize": 13631488, # above the threshold }, None, - Checksum.modern(base64.b64decode("bGeobZC1xyakKeDkOLWP9khl+vuOditELvPQhrT/R9M=")), + Checksum.sha256_chunked(base64.b64decode("bGeobZC1xyakKeDkOLWP9khl+vuOditELvPQhrT/R9M=")), ), ], ) -def test_single_part(obj_attrs, legacy, modern): - with mock.patch("t4_lambda_s3hash.MODERN_CHECKSUMS", False): - assert get_compliant_checksum(obj_attrs) == legacy +def test_single_part(obj_attrs, plain, chunked): + with mock.patch("t4_lambda_s3hash.CHUNKED_CHECKSUMS", False): + assert get_compliant_checksum(obj_attrs) == plain - with mock.patch("t4_lambda_s3hash.MODERN_CHECKSUMS", True): - assert get_compliant_checksum(obj_attrs) == modern + with mock.patch("t4_lambda_s3hash.CHUNKED_CHECKSUMS", True): + assert get_compliant_checksum(obj_attrs) == chunked