Skip to content

Commit

Permalink
modern -> chunked
Browse files Browse the repository at this point in the history
  • Loading branch information
nl0 committed Feb 23, 2024
1 parent 97f641d commit 66464f0
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 35 deletions.
2 changes: 1 addition & 1 deletion lambdas/pkgpush/tests/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -902,7 +902,7 @@ def test_calculate_pkg_hashes_too_large_file_error(self):
def test_calculate_pkg_entry_hash(self):
with mock.patch(
"t4_lambda_pkgpush.invoke_hash_lambda",
return_value=Checksum(type=ChecksumType.MODERN, value="base64hash"),
return_value=Checksum(type=ChecksumType.SHA256_CHUNKED, value="base64hash"),
) as invoke_hash_lambda_mock:
t4_lambda_pkgpush.calculate_pkg_entry_hash(self.entry_without_hash, CREDENTIALS)

Expand Down
2 changes: 1 addition & 1 deletion lambdas/s3hash/pytest.ini
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@
asyncio_mode=auto
env =
MPU_CONCURRENCY=1000
MODERN_CHECKSUMS=true
CHUNKED_CHECKSUMS=true
SERVICE_BUCKET=service-bucket
32 changes: 16 additions & 16 deletions lambdas/s3hash/src/t4_lambda_s3hash/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
logger.setLevel(os.environ.get("QUILT_LOG_LEVEL", "WARNING"))

MPU_CONCURRENCY = int(os.environ["MPU_CONCURRENCY"])
MODERN_CHECKSUMS = os.environ["MODERN_CHECKSUMS"] == "true"
CHUNKED_CHECKSUMS = os.environ["CHUNKED_CHECKSUMS"] == "true"
SERVICE_BUCKET = os.environ["SERVICE_BUCKET"]

SCRATCH_KEY_SERVICE = "user-requests/checksum-upload-tmp"
Expand Down Expand Up @@ -70,22 +70,22 @@ async def aio_context(credentials: AWSCredentials):

class Checksum(ChecksumBase):
@classmethod
def legacy(cls, value: bytes):
return cls(value=value.hex(), type=ChecksumType.LEGACY)
def sha256(cls, value: bytes):
return cls(value=value.hex(), type=ChecksumType.SHA256)

@classmethod
def modern(cls, value: bytes):
return cls(value=base64.b64encode(value).decode(), type=ChecksumType.MODERN)
def sha256_chunked(cls, value: bytes):
return cls(value=base64.b64encode(value).decode(), type=ChecksumType.SHA256_CHUNKED)

@classmethod
def for_parts(cls, checksums: T.Sequence[bytes]):
return cls.modern(hash_parts(checksums))
return cls.sha256_chunked(hash_parts(checksums))

_EMPTY_HASH = hashlib.sha256().digest()

@classmethod
def empty(cls):
return cls.modern(cls._EMPTY_HASH) if MODERN_CHECKSUMS else cls.legacy(cls._EMPTY_HASH)
return cls.sha256_chunked(cls._EMPTY_HASH) if CHUNKED_CHECKSUMS else cls.sha256(cls._EMPTY_HASH)


# 8 MiB -- boto3 default:
Expand Down Expand Up @@ -134,7 +134,7 @@ def get_compliant_checksum(attrs: GetObjectAttributesOutputTypeDef) -> T.Optiona

part_size = get_part_size(attrs["ObjectSize"])
object_parts = attrs.get("ObjectParts")
if not MODERN_CHECKSUMS or part_size is None:
if not CHUNKED_CHECKSUMS or part_size is None:
if object_parts is not None:
assert "TotalPartsCount" in object_parts
if object_parts["TotalPartsCount"] != 1:
Expand All @@ -147,9 +147,9 @@ def get_compliant_checksum(attrs: GetObjectAttributesOutputTypeDef) -> T.Optiona

return (
# double-hash
Checksum.modern(hashlib.sha256(checksum_bytes).digest())
if MODERN_CHECKSUMS
else Checksum.legacy(checksum_bytes)
Checksum.sha256_chunked(hashlib.sha256(checksum_bytes).digest())
if CHUNKED_CHECKSUMS
else Checksum.sha256(checksum_bytes)
)

if object_parts is None:
Expand All @@ -160,7 +160,7 @@ def get_compliant_checksum(attrs: GetObjectAttributesOutputTypeDef) -> T.Optiona
# Make sure we have _all_ parts.
assert len(object_parts["Parts"]) == num_parts
if all(part.get("Size") == part_size for part in object_parts["Parts"][:-1]):
return Checksum.modern(base64.b64decode(checksum_value))
return Checksum.sha256_chunked(base64.b64decode(checksum_value))

return None

Expand Down Expand Up @@ -311,7 +311,7 @@ async def compute_checksum_legacy(location: S3ObjectSource) -> Checksum:
async for chunk in stream.content.iter_any():
hashobj.update(chunk)

return Checksum.legacy(hashobj.digest())
return Checksum.sha256(hashobj.digest())


async def compute_checksum(location: S3ObjectSource) -> ChecksumResult:
Expand All @@ -332,11 +332,11 @@ async def compute_checksum(location: S3ObjectSource) -> ChecksumResult:
if total_size == 0:
return ChecksumResult(checksum=Checksum.empty())

if not MODERN_CHECKSUMS and total_size > MAX_PART_SIZE:
if not CHUNKED_CHECKSUMS and total_size > MAX_PART_SIZE:
checksum = await compute_checksum_legacy(location)
return ChecksumResult(checksum=checksum)

part_defs = get_parts_for_size(total_size) if MODERN_CHECKSUMS else PARTS_SINGLE
part_defs = get_parts_for_size(total_size) if CHUNKED_CHECKSUMS else PARTS_SINGLE

async with create_mpu() as mpu:
part_checksums = await compute_part_checksums(
Expand All @@ -346,7 +346,7 @@ async def compute_checksum(location: S3ObjectSource) -> ChecksumResult:
part_defs,
)

checksum = Checksum.for_parts(part_checksums) if MODERN_CHECKSUMS else Checksum.legacy(part_checksums[0])
checksum = Checksum.for_parts(part_checksums) if CHUNKED_CHECKSUMS else Checksum.sha256(part_checksums[0])
return ChecksumResult(checksum=checksum)


Expand Down
10 changes: 5 additions & 5 deletions lambdas/s3hash/tests/test_compute_checksum.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ async def test_compliant(s3_stub: Stubber):

res = await s3hash.compute_checksum(LOC)

assert res == s3hash.ChecksumResult(checksum=s3hash.Checksum.modern(base64.b64decode(checksum_hash)))
assert res == s3hash.ChecksumResult(checksum=s3hash.Checksum.sha256_chunked(base64.b64decode(checksum_hash)))


async def test_empty(s3_stub: Stubber):
Expand Down Expand Up @@ -149,12 +149,12 @@ async def test_legacy(s3_stub: Stubber, mocker: MockerFixture):
LOC.boto_args,
)

mocker.patch("t4_lambda_s3hash.MODERN_CHECKSUMS", False)
mocker.patch("t4_lambda_s3hash.CHUNKED_CHECKSUMS", False)

res = await s3hash.compute_checksum(LOC)

checksum_hex = bytes.fromhex("d9d865cc54ec60678f1b119084ad79ae7f9357d1c4519c6457de3314b7fbba8a")
assert res == s3hash.ChecksumResult(checksum=s3hash.Checksum.legacy(checksum_hex))
assert res == s3hash.ChecksumResult(checksum=s3hash.Checksum.sha256(checksum_hex))


async def test_mpu_fail(s3_stub: Stubber):
Expand Down Expand Up @@ -226,7 +226,7 @@ async def test_mpu_single(s3_stub: Stubber):

res = await s3hash.compute_checksum(LOC)

assert res == s3hash.ChecksumResult(checksum=s3hash.Checksum.modern(CHECKSUM_HASH))
assert res == s3hash.ChecksumResult(checksum=s3hash.Checksum.sha256_chunked(CHECKSUM_HASH))


async def test_mpu_multi(s3_stub: Stubber):
Expand Down Expand Up @@ -289,4 +289,4 @@ async def test_mpu_multi(s3_stub: Stubber):

res = await s3hash.compute_checksum(LOC)

assert res == s3hash.ChecksumResult(checksum=s3hash.Checksum.modern(CHECKSUM_TOP))
assert res == s3hash.ChecksumResult(checksum=s3hash.Checksum.sha256_chunked(CHECKSUM_TOP))
24 changes: 12 additions & 12 deletions lambdas/s3hash/tests/test_get_compliant_checksum.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,22 +18,22 @@ def test_no_sha256(obj_attrs):


@pytest.mark.parametrize(
"obj_attrs, legacy, modern",
"obj_attrs, plain, chunked",
[
(
{
"Checksum": {"ChecksumSHA256": "MOFJVevxNSJm3C/4Bn5oEEYH51CrudOzZYK4r5Cfy1g="},
"ObjectSize": 1048576, # below the threshold
},
Checksum.legacy(base64.b64decode("MOFJVevxNSJm3C/4Bn5oEEYH51CrudOzZYK4r5Cfy1g=")),
Checksum.modern(base64.b64decode("WZ1xAz1wCsiSoOSPphsSXS9ZlBu0XaGQlETUPG7gurI=")),
Checksum.sha256(base64.b64decode("MOFJVevxNSJm3C/4Bn5oEEYH51CrudOzZYK4r5Cfy1g=")),
Checksum.sha256_chunked(base64.b64decode("WZ1xAz1wCsiSoOSPphsSXS9ZlBu0XaGQlETUPG7gurI=")),
),
(
{
"Checksum": {"ChecksumSHA256": "La6x82CVtEsxhBCz9Oi12Yncx7sCPRQmxJLasKMFPnQ="},
"ObjectSize": 8388608, # above the threshold
},
Checksum.legacy(base64.b64decode("La6x82CVtEsxhBCz9Oi12Yncx7sCPRQmxJLasKMFPnQ=")),
Checksum.sha256(base64.b64decode("La6x82CVtEsxhBCz9Oi12Yncx7sCPRQmxJLasKMFPnQ=")),
None,
),
(
Expand All @@ -55,8 +55,8 @@ def test_no_sha256(obj_attrs):
},
"ObjectSize": 8388608, # above the threshold
},
Checksum.legacy(base64.b64decode("La6x82CVtEsxhBCz9Oi12Yncx7sCPRQmxJLasKMFPnQ=")),
Checksum.modern(base64.b64decode("MIsGKY+ykqN4CPj3gGGu4Gv03N7OWKWpsZqEf+OrGJs=")),
Checksum.sha256(base64.b64decode("La6x82CVtEsxhBCz9Oi12Yncx7sCPRQmxJLasKMFPnQ=")),
Checksum.sha256_chunked(base64.b64decode("MIsGKY+ykqN4CPj3gGGu4Gv03N7OWKWpsZqEf+OrGJs=")),
),
(
{
Expand Down Expand Up @@ -110,13 +110,13 @@ def test_no_sha256(obj_attrs):
"ObjectSize": 13631488, # above the threshold
},
None,
Checksum.modern(base64.b64decode("bGeobZC1xyakKeDkOLWP9khl+vuOditELvPQhrT/R9M=")),
Checksum.sha256_chunked(base64.b64decode("bGeobZC1xyakKeDkOLWP9khl+vuOditELvPQhrT/R9M=")),
),
],
)
def test_single_part(obj_attrs, legacy, modern):
with mock.patch("t4_lambda_s3hash.MODERN_CHECKSUMS", False):
assert get_compliant_checksum(obj_attrs) == legacy
def test_single_part(obj_attrs, plain, chunked):
with mock.patch("t4_lambda_s3hash.CHUNKED_CHECKSUMS", False):
assert get_compliant_checksum(obj_attrs) == plain

with mock.patch("t4_lambda_s3hash.MODERN_CHECKSUMS", True):
assert get_compliant_checksum(obj_attrs) == modern
with mock.patch("t4_lambda_s3hash.CHUNKED_CHECKSUMS", True):
assert get_compliant_checksum(obj_attrs) == chunked

0 comments on commit 66464f0

Please sign in to comment.