Skip to content

Commit

Permalink
Support binary serialization of ProtoBuf schemas
Browse files Browse the repository at this point in the history
Add support for base64 encoded binary encodings of ProtoBuf schemas
when registering new schemas and with format=serialized query
parameter when fetching the schemas. Fixes #742
  • Loading branch information
tvainika committed Nov 3, 2023
1 parent f64a56e commit 369698e
Show file tree
Hide file tree
Showing 12 changed files with 614 additions and 82 deletions.
9 changes: 5 additions & 4 deletions karapace/protobuf/enum_element.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
"""
# Ported from square/wire:
# wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/EnumElement.kt
from __future__ import annotations

from itertools import chain
from karapace.protobuf.compare_result import CompareResult, Modification
from karapace.protobuf.compare_type_storage import CompareTypes
Expand All @@ -12,7 +14,6 @@
from karapace.protobuf.option_element import OptionElement
from karapace.protobuf.type_element import TypeElement
from karapace.protobuf.utils import append_documentation, append_indented
from typing import List


class EnumElement(TypeElement):
Expand All @@ -21,8 +22,8 @@ def __init__(
location: Location,
name: str,
documentation: str = "",
options: List[OptionElement] = None,
constants: List[EnumConstantElement] = None,
options: list[OptionElement] | None = None,
constants: list[EnumConstantElement] | None = None,
) -> None:
# Enums do not allow nested type declarations.
super().__init__(location, name, documentation, options or [], [])
Expand All @@ -47,7 +48,7 @@ def to_schema(self) -> str:
result.append("}\n")
return "".join(result)

def compare(self, other: "EnumElement", result: CompareResult, types: CompareTypes) -> None:
def compare(self, other: EnumElement, result: CompareResult, types: CompareTypes) -> None:
self_tags = {}
other_tags = {}
constant: EnumConstantElement
Expand Down
3 changes: 3 additions & 0 deletions karapace/protobuf/location.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,6 @@ def __str__(self) -> str:
result += str(self.column)

return result


DEFAULT_LOCATION = Location("", "")
19 changes: 10 additions & 9 deletions karapace/protobuf/message_element.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
# Ported from square/wire:
# wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/MessageElement.kt
# compatibility routine added
from __future__ import annotations

from itertools import chain
from karapace.protobuf.compare_result import CompareResult, Modification
from karapace.protobuf.compare_type_storage import CompareTypes
Expand All @@ -17,7 +19,6 @@
from karapace.protobuf.reserved_element import ReservedElement
from karapace.protobuf.type_element import TypeElement
from karapace.protobuf.utils import append_documentation, append_indented
from typing import List


class MessageElement(TypeElement):
Expand All @@ -26,13 +27,13 @@ def __init__(
location: Location,
name: str,
documentation: str = "",
nested_types: List[TypeElement] = None,
options: List[OptionElement] = None,
reserveds: List[ReservedElement] = None,
fields: List[FieldElement] = None,
one_ofs: List[OneOfElement] = None,
extensions: List[ExtensionsElement] = None,
groups: List[GroupElement] = None,
nested_types: list[TypeElement] | None = None,
options: list[OptionElement] | None = None,
reserveds: list[ReservedElement] | None = None,
fields: list[FieldElement] | None = None,
one_ofs: list[OneOfElement] | None = None,
extensions: list[ExtensionsElement] | None = None,
groups: list[GroupElement] | None = None,
) -> None:
super().__init__(location, name, documentation, options or [], nested_types or [])
self.reserveds = reserveds or []
Expand Down Expand Up @@ -83,7 +84,7 @@ def to_schema(self) -> str:
result.append("}\n")
return "".join(result)

def compare(self, other: "MessageElement", result: CompareResult, types: CompareTypes) -> None:
def compare(self, other: MessageElement, result: CompareResult, types: CompareTypes) -> None:
from karapace.protobuf.compare_type_lists import compare_type_lists

if types.lock_message(self):
Expand Down
57 changes: 34 additions & 23 deletions karapace/protobuf/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
Copyright (c) 2023 Aiven Ltd
See LICENSE for details
"""
from __future__ import annotations

from karapace.dataclasses import default_dataclass

# Ported from square/wire:
Expand All @@ -12,16 +14,17 @@
from karapace.protobuf.enum_element import EnumElement
from karapace.protobuf.exception import IllegalArgumentException
from karapace.protobuf.known_dependency import DependenciesHardcoded, KnownDependency
from karapace.protobuf.location import Location
from karapace.protobuf.location import DEFAULT_LOCATION
from karapace.protobuf.message_element import MessageElement
from karapace.protobuf.one_of_element import OneOfElement
from karapace.protobuf.option_element import OptionElement
from karapace.protobuf.proto_file_element import ProtoFileElement
from karapace.protobuf.proto_parser import ProtoParser
from karapace.protobuf.serialization import deserialize, is_base64_str, serialize
from karapace.protobuf.type_element import TypeElement
from karapace.protobuf.utils import append_documentation, append_indented
from karapace.schema_references import Reference
from typing import Iterable, List, Mapping, Optional, Sequence, Set, Tuple
from typing import Iterable, Mapping, Sequence

import itertools

Expand Down Expand Up @@ -126,10 +129,10 @@ class SourceFileReference:
@default_dataclass
class TypeTree:
token: str
children: List["TypeTree"]
source_reference: Optional[SourceFileReference]
children: list[TypeTree]
source_reference: SourceFileReference | None

def source_reference_tree_recursive(self) -> Iterable[Optional[SourceFileReference]]:
def source_reference_tree_recursive(self) -> Iterable[SourceFileReference | None]:
sources = [] if self.source_reference is None else [self.source_reference]
for child in self.children:
sources = itertools.chain(sources, child.source_reference_tree())
Expand Down Expand Up @@ -201,7 +204,7 @@ def __repr__(self) -> str:

def _add_new_type_recursive(
parent_tree: TypeTree,
remaining_tokens: List[str],
remaining_tokens: list[str],
file: str,
inserted_elements: int,
) -> None:
Expand Down Expand Up @@ -244,23 +247,28 @@ def add_new_type(


class ProtobufSchema:
DEFAULT_LOCATION = Location("", "")

def __init__(
self,
schema: str,
references: Optional[Sequence[Reference]] = None,
dependencies: Optional[Mapping[str, Dependency]] = None,
schema: str | None,
references: Sequence[Reference] | None = None,
dependencies: Mapping[str, Dependency] | None = None,
proto_file_element: ProtoFileElement | None = None,
) -> None:
if type(schema).__name__ != "str":
if schema is not None and type(schema).__name__ != "str":
raise IllegalArgumentException("Non str type of schema string")
self.dirty = schema
self.cache_string = ""
self.proto_file_element = ProtoParser.parse(self.DEFAULT_LOCATION, schema)

if proto_file_element is not None:
self.proto_file_element = proto_file_element
elif schema != "" and is_base64_str(schema):
self.proto_file_element = deserialize(schema)
else:
self.proto_file_element = ProtoParser.parse(DEFAULT_LOCATION, schema)

self.references = references
self.dependencies = dependencies

def type_in_tree(self, tree: TypeTree, remaining_tokens: List[str]) -> Optional[TypeTree]:
def type_in_tree(self, tree: TypeTree, remaining_tokens: list[str]) -> TypeTree | None:
if remaining_tokens:
to_seek = remaining_tokens.pop()

Expand All @@ -270,10 +278,10 @@ def type_in_tree(self, tree: TypeTree, remaining_tokens: List[str]) -> Optional[
return None
return tree

def type_exist_in_tree(self, tree: TypeTree, remaining_tokens: List[str]) -> bool:
def type_exist_in_tree(self, tree: TypeTree, remaining_tokens: list[str]) -> bool:
return self.type_in_tree(tree, remaining_tokens) is not None

def recursive_imports(self) -> Set[str]:
def recursive_imports(self) -> set[str]:
imports = set(self.proto_file_element.imports)

if self.dependencies:
Expand All @@ -282,7 +290,7 @@ def recursive_imports(self) -> Set[str]:

return imports

def are_type_usage_valid(self, root_type_tree: TypeTree, used_types: List[UsedType]) -> Tuple[bool, Optional[str]]:
def are_type_usage_valid(self, root_type_tree: TypeTree, used_types: list[UsedType]) -> tuple[bool, str | None]:
# Please note that this check only ensures the requested type exists. However, for performance reasons, it works in
# the opposite way of how specificity works in Protobuf. In Protobuf, the type is matched not only to check if it
# exists, but also based on the order of search: local definition comes before imported types. In this code, we
Expand Down Expand Up @@ -408,7 +416,7 @@ def types_tree(self) -> TypeTree:
return root_tree

@staticmethod
def used_type(parent: str, element_type: str) -> List[UsedType]:
def used_type(parent: str, element_type: str) -> list[UsedType]:
if element_type.find("map<") == 0:
end = element_type.find(">")
virgule = element_type.find(",")
Expand All @@ -426,7 +434,7 @@ def dependencies_one_of(
package_name: str,
parent_name: str,
one_of: OneOfElement,
) -> List[UsedType]:
) -> list[UsedType]:
parent = package_name + "." + parent_name
dependencies = []
for field in one_of.fields:
Expand All @@ -438,7 +446,7 @@ def dependencies_one_of(
)
return dependencies

def used_types(self) -> List[UsedType]:
def used_types(self) -> list[UsedType]:
dependencies_used_types = []
if self.dependencies:
for key in self.dependencies:
Expand Down Expand Up @@ -469,7 +477,7 @@ def nested_used_type(
package_name: str,
parent_name: str,
element_type: TypeElement,
) -> List[str]:
) -> list[str]:
used_types = []

if isinstance(element_type, MessageElement):
Expand Down Expand Up @@ -540,10 +548,13 @@ def to_schema(self) -> str:

return "".join(strings)

def compare(self, other: "ProtobufSchema", result: CompareResult) -> CompareResult:
def compare(self, other: ProtobufSchema, result: CompareResult) -> CompareResult:
return self.proto_file_element.compare(
other.proto_file_element,
result,
self_dependencies=self.dependencies,
other_dependencies=other.dependencies,
)

def serialize(self) -> str:
return serialize(self.proto_file_element)
Loading

0 comments on commit 369698e

Please sign in to comment.