Skip to content

Commit

Permalink
Do not throw exception if md5Hash, size or updated are missing from G…
Browse files Browse the repository at this point in the history
…CP list response
  • Loading branch information
giacomo-alzetta-aiven committed Nov 20, 2023
1 parent 638cebe commit edb35fe
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 4 deletions.
10 changes: 7 additions & 3 deletions rohmu/object_storage/google.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
from typing import Any, BinaryIO, Callable, Collection, Iterable, Iterator, Optional, TextIO, Tuple, TypeVar, Union
from typing_extensions import Protocol

import base64
import codecs
import dataclasses
import errno
Expand Down Expand Up @@ -169,6 +170,9 @@ def report_status(self, stats: StatsClient, status: Union[MediaUploadProgress, M

ResType = TypeVar("ResType")

# Seems like there are a few edge cases where MD5 field is missing when listing objects. Provide a fake value for those.
_MISSING_MD5_B64_ENCODED = base64.encodebytes(b"Missing md5Hash!")


class GoogleTransfer(BaseTransfer[Config]):
config_model = Config
Expand Down Expand Up @@ -384,10 +388,10 @@ def initial_op(domain: Any) -> HttpRequest:
type=KEY_TYPE_OBJECT,
value={
"name": self.format_key_from_backend(item["name"]),
"size": int(item["size"]),
"last_modified": parse_timestamp(item["updated"]),
"size": int(item.get("size", 0)),
"last_modified": parse_timestamp(item.get("updated", "1970-01-01T00:00:00+00:00")),
"metadata": item.get("metadata", {}),
"md5": base64_to_hex(item["md5Hash"]),
"md5": base64_to_hex(item.get("md5Hash", _MISSING_MD5_B64_ENCODED)),
},
)
elif property_name == "prefixes":
Expand Down
68 changes: 67 additions & 1 deletion test/object_storage/test_google.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@
from __future__ import annotations

from contextlib import ExitStack
from datetime import datetime
from datetime import datetime, UTC
from googleapiclient.http import MediaUploadProgress
from io import BytesIO
from rohmu.common.models import StorageOperation
from rohmu.errors import InvalidByteRangeError
from rohmu.object_storage.base import IterKeyItem
from rohmu.object_storage.google import GoogleTransfer, MediaIoBaseDownloadWithByteRange, Reporter
from tempfile import NamedTemporaryFile
from unittest.mock import ANY, call, MagicMock, Mock, patch
Expand Down Expand Up @@ -211,3 +212,68 @@ def test_media_io_download_with_byte_range_and_very_small_object() -> None:
assert status.progress() == 1.0
assert result.getvalue() == b"lo, World!"
mock_request.http.request.assert_called_once_with(ANY, ANY, headers={"range": "bytes=3-100"})


def test_object_listed_when_missing_md5hash_size_and_updated() -> None:
notifier = MagicMock()
with ExitStack() as stack:
stack.enter_context(patch("rohmu.object_storage.google.get_credentials"))
stack.enter_context(patch("rohmu.object_storage.google.GoogleTransfer.get_or_create_bucket"))
mock_operation = stack.enter_context(patch("rohmu.common.statsd.StatsClient.operation"))
transfer = GoogleTransfer(
project_id="test-project-id",
bucket_name="test-bucket",
notifier=notifier,
)

# mock instance because there is decorator and context managers in the way
mock_client = stack.enter_context(patch.object(transfer, "_object_client"))
mock_client.return_value.__enter__.return_value.list_next.return_value = None
object_name = ("aiventest/111aa1aa-1aaa-1111-11a1-11111aaaaa11/a1111111-aaa1-1aaa-aa1a-1a11aaaa11a1"
"/tiered_storage/ccs/aaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
escaped_name = object_name.replace("/", "%2F")
mock_client.return_value.__enter__.return_value.list.return_value.execute.return_value = {
"items": [
{
"bucket": "test-bucket",
"contentType": "binary/octet-stream",
"generation": "1111111111111111",
"id": f"test-bucket/{object_name}/1111111111111111",
"kind": "storage#object",
"mediaLink": f"https://storage.googleapis.com/download/storage/v1/b/test-bucket/o/"
f"{escaped_name}?generation=1111111111111111&alt=media",
"metageneration": "1",
"name": object_name,
"selfLink": f"https://www.googleapis.com/storage/v1/b/"
f"p812de5da-0bab-4990-90e8-57303eebfd30-99012089cf1d961516b8b3ff6/o/"
f"{escaped_name}?generation=1111111111111111",
"storageClass": "REGIONAL",
}
]
}

got = list(
transfer.iter_key(
key="testkey",
with_metadata=False,
deep=True,
include_key=False,
)
)
assert mock_operation.call_count == 1
mock_operation.assert_has_calls(
[
call(operation=StorageOperation.iter_key),
]
)
assert len(got) == 1
assert got[0] == IterKeyItem(
type="object",
value={
"name": object_name,
"size": 0,
"last_modified": datetime(1970, 1, 1, 0, tzinfo=UTC),
"md5": "4d697373696e67206d64354861736821",
"metadata": {},
},
)

0 comments on commit edb35fe

Please sign in to comment.