Skip to content

Commit

Permalink
🐛 Fix aws checksum issue (ITISFoundation#4924)
Browse files Browse the repository at this point in the history
Co-authored-by: Andrei Neagu <[email protected]>
  • Loading branch information
bisgaard-itis and GitHK authored Oct 25, 2023
1 parent bd2689f commit 9538a3a
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 15 deletions.
9 changes: 5 additions & 4 deletions services/storage/src/simcore_service_storage/s3_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,11 +170,15 @@ async def create_multipart_upload_links(
file_id: SimcoreS3FileID,
file_size: ByteSize,
expiration_secs: int,
sha256_checksum: SHA256Str | None,
) -> MultiPartUploadLinks:
# NOTE: ensure the bucket/object exists, this will raise if not
await self.client.head_bucket(Bucket=bucket)
# first initiate the multipart upload
response = await self.client.create_multipart_upload(Bucket=bucket, Key=file_id)
create_input: dict[str, Any] = {"Bucket": bucket, "Key": file_id}
if sha256_checksum:
create_input["Metadata"] = {"sha256_checksum": sha256_checksum}
response = await self.client.create_multipart_upload(**create_input)
upload_id = response["UploadId"]
# compute the number of links, based on the announced file size
num_upload_links, chunk_size = compute_num_file_chunks(file_size)
Expand Down Expand Up @@ -242,7 +246,6 @@ async def complete_multipart_upload(
file_id: SimcoreS3FileID,
upload_id: UploadID,
uploaded_parts: list[UploadedPart],
sha256_checksum: SHA256Str | None,
) -> ETag:
inputs: dict[str, Any] = {
"Bucket": bucket,
Expand All @@ -255,8 +258,6 @@ async def complete_multipart_upload(
]
},
}
if sha256_checksum:
inputs["ChecksumSHA256"] = sha256_checksum
response = await self.client.complete_multipart_upload(**inputs)
return response["ETag"]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,7 @@ async def create_file_upload_links(
fmd.file_id,
file_size_bytes,
expiration_secs=self.settings.STORAGE_DEFAULT_PRESIGNED_LINK_EXPIRATION_SECONDS,
sha256_checksum=fmd.sha256_checksum,
)
# update the database so we keep the upload id
fmd.upload_id = multipart_presigned_links.upload_id
Expand Down Expand Up @@ -383,7 +384,6 @@ async def complete_file_upload(
file_id=fmd.file_id,
upload_id=fmd.upload_id,
uploaded_parts=uploaded_parts,
sha256_checksum=fmd.sha256_checksum,
)
async with self.engine.acquire() as conn:
fmd = await self._update_database_from_storage(conn, fmd)
Expand Down
18 changes: 15 additions & 3 deletions services/storage/tests/unit/test_dsm_dsmcleaner.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,11 @@ async def test_clean_expired_uploads_aborts_dangling_multipart_uploads(
file_id = _faker.file_name()
file_size = parse_obj_as(ByteSize, "100Mib")
upload_links = await storage_s3_client.create_multipart_upload_links(
storage_s3_bucket, file_id, file_size, expiration_secs=3600
storage_s3_bucket,
file_id,
file_size,
expiration_secs=3600,
sha256_checksum=parse_obj_as(SHA256Str, _faker.sha256()),
)

# ensure we have now an upload id
Expand Down Expand Up @@ -319,7 +323,11 @@ async def test_clean_expired_uploads_does_not_clean_multipart_upload_on_creation

upload_links_list: list[MultiPartUploadLinks] = [
await storage_s3_client.create_multipart_upload_links(
storage_s3_bucket, file_id, file_size, expiration_secs=3600
storage_s3_bucket,
file_id,
file_size,
expiration_secs=3600,
sha256_checksum=parse_obj_as(SHA256Str, _faker.sha256()),
)
for file_id in file_ids_to_upload
]
Expand Down Expand Up @@ -388,7 +396,11 @@ async def test_clean_expired_uploads_cleans_dangling_multipart_uploads_if_no_cor
assert fmd_in_db.upload_expires_at
# we create the multipart upload link
upload_links = await storage_s3_client.create_multipart_upload_links(
storage_s3_bucket, simcore_file_id, file_size, expiration_secs=3600
storage_s3_bucket,
simcore_file_id,
file_size,
expiration_secs=3600,
sha256_checksum=parse_obj_as(SHA256Str, _faker.sha256()),
)

# ensure we have now an upload id
Expand Down
15 changes: 8 additions & 7 deletions services/storage/tests/unit/test_s3_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from aiohttp import ClientSession
from faker import Faker
from models_library.api_schemas_storage import UploadedPart
from models_library.basic_types import SHA256Str
from models_library.projects import ProjectID
from models_library.projects_nodes import NodeID
from models_library.projects_nodes_io import SimcoreS3FileID
Expand Down Expand Up @@ -239,7 +240,7 @@ async def test_create_multipart_presigned_upload_link(

# now complete it
received_e_tag = await storage_s3_client.complete_multipart_upload(
storage_s3_bucket, file_id, upload_links.upload_id, uploaded_parts, None
storage_s3_bucket, file_id, upload_links.upload_id, uploaded_parts
)

# check that the multipart upload is not listed anymore
Expand Down Expand Up @@ -284,14 +285,13 @@ async def test_create_multipart_presigned_upload_link_invalid_raises(
file_id,
upload_links.upload_id,
uploaded_parts,
None,
)

wrong_file_id = create_simcore_file_id(uuid4(), uuid4(), faker.file_name())
# with pytest.raises(S3KeyNotFoundError):
# NOTE: this does not raise... and it returns the file_id of the original file...
await storage_s3_client.complete_multipart_upload(
storage_s3_bucket, wrong_file_id, upload_links.upload_id, uploaded_parts, None
storage_s3_bucket, wrong_file_id, upload_links.upload_id, uploaded_parts
)
# call it again triggers
with pytest.raises(S3AccessError):
Expand All @@ -300,7 +300,6 @@ async def test_create_multipart_presigned_upload_link_invalid_raises(
wrong_file_id,
upload_links.upload_id,
uploaded_parts,
None,
)


Expand Down Expand Up @@ -356,12 +355,12 @@ async def test_multiple_completion_of_multipart_upload(

# first completion
await storage_s3_client.complete_multipart_upload(
storage_s3_bucket, file_id, upload_links.upload_id, uploaded_parts, None
storage_s3_bucket, file_id, upload_links.upload_id, uploaded_parts
)

with pytest.raises(S3AccessError):
await storage_s3_client.complete_multipart_upload(
storage_s3_bucket, file_id, upload_links.upload_id, uploaded_parts, None
storage_s3_bucket, file_id, upload_links.upload_id, uploaded_parts
)


Expand All @@ -384,7 +383,7 @@ async def test_break_completion_of_multipart_upload(
with pytest.raises(asyncio.TimeoutError):
await asyncio.wait_for(
storage_s3_client.complete_multipart_upload(
storage_s3_bucket, file_id, upload_links.upload_id, uploaded_parts, None
storage_s3_bucket, file_id, upload_links.upload_id, uploaded_parts
),
timeout=VERY_SHORT_TIMEOUT,
)
Expand Down Expand Up @@ -453,6 +452,7 @@ def upload_file_multipart_presigned_link_without_completion(
storage_s3_client: StorageS3Client,
storage_s3_bucket: S3BucketName,
create_file_of_size: Callable[[ByteSize], Path],
faker: Faker,
) -> Callable[
..., Awaitable[tuple[SimcoreS3FileID, MultiPartUploadLinks, list[UploadedPart]]]
]:
Expand All @@ -468,6 +468,7 @@ async def _uploader(
file_id,
ByteSize(file.stat().st_size),
expiration_secs=DEFAULT_EXPIRATION_SECS,
sha256_checksum=parse_obj_as(SHA256Str, faker.sha256()),
)
assert upload_links

Expand Down

0 comments on commit 9538a3a

Please sign in to comment.