diff --git a/karapace/protobuf/enum_element.py b/karapace/protobuf/enum_element.py index 35778a8fa..bf1bd7f8f 100644 --- a/karapace/protobuf/enum_element.py +++ b/karapace/protobuf/enum_element.py @@ -4,6 +4,8 @@ """ # Ported from square/wire: # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/EnumElement.kt +from __future__ import annotations + from itertools import chain from karapace.protobuf.compare_result import CompareResult, Modification from karapace.protobuf.compare_type_storage import CompareTypes @@ -12,7 +14,6 @@ from karapace.protobuf.option_element import OptionElement from karapace.protobuf.type_element import TypeElement from karapace.protobuf.utils import append_documentation, append_indented -from typing import List class EnumElement(TypeElement): @@ -21,8 +22,8 @@ def __init__( location: Location, name: str, documentation: str = "", - options: List[OptionElement] = None, - constants: List[EnumConstantElement] = None, + options: list[OptionElement] | None = None, + constants: list[EnumConstantElement] | None = None, ) -> None: # Enums do not allow nested type declarations. super().__init__(location, name, documentation, options or [], []) @@ -47,7 +48,7 @@ def to_schema(self) -> str: result.append("}\n") return "".join(result) - def compare(self, other: "EnumElement", result: CompareResult, types: CompareTypes) -> None: + def compare(self, other: EnumElement, result: CompareResult, types: CompareTypes) -> None: self_tags = {} other_tags = {} constant: EnumConstantElement diff --git a/karapace/protobuf/location.py b/karapace/protobuf/location.py index a608ef68a..eba9ad2b1 100644 --- a/karapace/protobuf/location.py +++ b/karapace/protobuf/location.py @@ -48,3 +48,6 @@ def __str__(self) -> str: result += str(self.column) return result + + +DEFAULT_LOCATION = Location("", "") diff --git a/karapace/protobuf/message_element.py b/karapace/protobuf/message_element.py index 8a2fb03f4..6d904d8c2 100644 --- a/karapace/protobuf/message_element.py +++ b/karapace/protobuf/message_element.py @@ -5,6 +5,8 @@ # Ported from square/wire: # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/MessageElement.kt # compatibility routine added +from __future__ import annotations + from itertools import chain from karapace.protobuf.compare_result import CompareResult, Modification from karapace.protobuf.compare_type_storage import CompareTypes @@ -17,7 +19,6 @@ from karapace.protobuf.reserved_element import ReservedElement from karapace.protobuf.type_element import TypeElement from karapace.protobuf.utils import append_documentation, append_indented -from typing import List class MessageElement(TypeElement): @@ -26,13 +27,13 @@ def __init__( location: Location, name: str, documentation: str = "", - nested_types: List[TypeElement] = None, - options: List[OptionElement] = None, - reserveds: List[ReservedElement] = None, - fields: List[FieldElement] = None, - one_ofs: List[OneOfElement] = None, - extensions: List[ExtensionsElement] = None, - groups: List[GroupElement] = None, + nested_types: list[TypeElement] | None = None, + options: list[OptionElement] | None = None, + reserveds: list[ReservedElement] | None = None, + fields: list[FieldElement] | None = None, + one_ofs: list[OneOfElement] | None = None, + extensions: list[ExtensionsElement] | None = None, + groups: list[GroupElement] | None = None, ) -> None: super().__init__(location, name, documentation, options or [], nested_types or []) self.reserveds = reserveds or [] @@ -42,7 +43,7 @@ def __init__( self.groups = groups or [] def to_schema(self) -> str: - result = [] + result: list[str] = [] append_documentation(result, self.documentation) result.append(f"message {self.name} {{") if self.reserveds: @@ -83,7 +84,7 @@ def to_schema(self) -> str: result.append("}\n") return "".join(result) - def compare(self, other: "MessageElement", result: CompareResult, types: CompareTypes) -> None: + def compare(self, other: MessageElement, result: CompareResult, types: CompareTypes) -> None: from karapace.protobuf.compare_type_lists import compare_type_lists if types.lock_message(self): diff --git a/karapace/protobuf/schema.py b/karapace/protobuf/schema.py index 157eb5447..9c407bdcd 100644 --- a/karapace/protobuf/schema.py +++ b/karapace/protobuf/schema.py @@ -15,17 +15,19 @@ from karapace.protobuf.enum_element import EnumElement from karapace.protobuf.exception import IllegalArgumentException from karapace.protobuf.known_dependency import DependenciesHardcoded, KnownDependency -from karapace.protobuf.location import Location +from karapace.protobuf.location import DEFAULT_LOCATION from karapace.protobuf.message_element import MessageElement from karapace.protobuf.one_of_element import OneOfElement from karapace.protobuf.option_element import OptionElement from karapace.protobuf.proto_file_element import ProtoFileElement from karapace.protobuf.proto_parser import ProtoParser +from karapace.protobuf.serialization import deserialize, serialize from karapace.protobuf.type_element import TypeElement from karapace.protobuf.utils import append_documentation, append_indented from karapace.schema_references import Reference from typing import Iterable, Mapping, Sequence +import binascii import itertools @@ -247,19 +249,25 @@ def add_new_type( class ProtobufSchema: - DEFAULT_LOCATION = Location("", "") - def __init__( self, schema: str, references: Sequence[Reference] | None = None, dependencies: Mapping[str, Dependency] | None = None, + proto_file_element: ProtoFileElement | None = None, ) -> None: if type(schema).__name__ != "str": raise IllegalArgumentException("Non str type of schema string") - self.dirty = schema self.cache_string = "" - self.proto_file_element = ProtoParser.parse(self.DEFAULT_LOCATION, schema) + + if proto_file_element is not None: + self.proto_file_element = proto_file_element + else: + try: + self.proto_file_element = deserialize(schema) + except binascii.Error: # If not base64 formatted + self.proto_file_element = ProtoParser.parse(DEFAULT_LOCATION, schema) + self.references = references self.dependencies = dependencies @@ -573,3 +581,6 @@ def compare(self, other: ProtobufSchema, result: CompareResult) -> CompareResult self_dependencies=self.dependencies, other_dependencies=other.dependencies, ) + + def serialize(self) -> str: + return serialize(self.proto_file_element) diff --git a/karapace/protobuf/serialization.py b/karapace/protobuf/serialization.py new file mode 100644 index 000000000..6bea8c7b2 --- /dev/null +++ b/karapace/protobuf/serialization.py @@ -0,0 +1,335 @@ +""" +Copyright (c) 2023 Aiven Ltd +See LICENSE for details +""" +from __future__ import annotations + +from karapace.protobuf.enum_constant_element import EnumConstantElement +from karapace.protobuf.enum_element import EnumElement +from karapace.protobuf.field import Field +from karapace.protobuf.field_element import FieldElement +from karapace.protobuf.kotlin_wrapper import KotlinRange +from karapace.protobuf.location import DEFAULT_LOCATION +from karapace.protobuf.message_element import MessageElement +from karapace.protobuf.one_of_element import OneOfElement +from karapace.protobuf.option_element import OptionElement +from karapace.protobuf.proto_file_element import PackageName, ProtoFileElement, TypeName +from karapace.protobuf.reserved_element import ReservedElement +from karapace.protobuf.syntax import Syntax +from karapace.protobuf.type_element import TypeElement +from types import MappingProxyType +from typing import Any + +import base64 +import google.protobuf.descriptor +import google.protobuf.descriptor_pb2 + +_TYPE_MAP = MappingProxyType( + { + google.protobuf.descriptor.FieldDescriptor.TYPE_DOUBLE: "double", + google.protobuf.descriptor.FieldDescriptor.TYPE_FLOAT: "float", + google.protobuf.descriptor.FieldDescriptor.TYPE_INT32: "int32", + google.protobuf.descriptor.FieldDescriptor.TYPE_INT64: "int64", + google.protobuf.descriptor.FieldDescriptor.TYPE_UINT32: "uint32", + google.protobuf.descriptor.FieldDescriptor.TYPE_UINT64: "uint64", + google.protobuf.descriptor.FieldDescriptor.TYPE_SINT32: "sint32", + google.protobuf.descriptor.FieldDescriptor.TYPE_SINT64: "sint64", + google.protobuf.descriptor.FieldDescriptor.TYPE_FIXED32: "fixed32", + google.protobuf.descriptor.FieldDescriptor.TYPE_FIXED64: "fixed64", + google.protobuf.descriptor.FieldDescriptor.TYPE_SFIXED32: "sfixed32", + google.protobuf.descriptor.FieldDescriptor.TYPE_SFIXED64: "sfixed64", + google.protobuf.descriptor.FieldDescriptor.TYPE_BOOL: "bool", + google.protobuf.descriptor.FieldDescriptor.TYPE_STRING: "string", + google.protobuf.descriptor.FieldDescriptor.TYPE_BYTES: "bytes", + } +) +_REVERSE_TYPE_MAP = MappingProxyType({v: k for k, v in _TYPE_MAP.items()}) + + +def _deserialize_field(field: Any) -> FieldElement: + if field.type not in _TYPE_MAP: + raise NotImplementedError(f"Unsupported field type {field.type}") + + label = None + if (field.HasField("proto3_optional") and field.proto3_optional) or Field.Label(field.label) != Field.Label.OPTIONAL: + label = Field.Label(field.label) + if field.HasField("type_name"): + element_type = field.type_name + else: + assert field.HasField("type") + element_type = _TYPE_MAP[field.type] + return FieldElement(DEFAULT_LOCATION, label=label, element_type=element_type, name=field.name, tag=field.number) + + +def _deserialize_enum(enumtype: Any) -> EnumElement: + constants: list[EnumConstantElement] = [] + for c in enumtype.value: + options: list[OptionElement] = list() + if c.options.deprecated: + options.append(OptionElement("deprecated", OptionElement.Kind.BOOLEAN, "true")) + constants.append(EnumConstantElement(DEFAULT_LOCATION, c.name, c.number, "", options)) + return EnumElement(DEFAULT_LOCATION, enumtype.name, "", None, constants) + + +def _deserialize_msg(msgtype: Any) -> MessageElement: + reserved_values: list[str | int | KotlinRange] = [] + reserveds: list[ReservedElement] = [] + nested_types: list[TypeElement] = [] + fields: list[FieldElement] = [] + + for reserved in msgtype.reserved_range: + if reserved.end == reserved.start + 1: + reserved_values.append(reserved.start) + else: + reserved_values.append(KotlinRange(reserved.start, reserved.end - 1)) + reserved_values += msgtype.reserved_name + if len(reserved_values) > 0: + reserveds.append(ReservedElement(location=DEFAULT_LOCATION, values=reserved_values)) + + for nested in msgtype.nested_type: + nested_types.append(_deserialize_msg(nested)) + for nested_enum in msgtype.enum_type: + nested_types.append(_deserialize_enum(nested_enum)) + + one_ofs: list[OneOfElement] = [OneOfElement(oneof.name) for oneof in msgtype.oneof_decl] + + for f in msgtype.field: + sf = _deserialize_field(f) + if f.HasField("oneof_index"): + one_ofs[f.oneof_index].fields.append(sf) + else: + fields.append(sf) + + return MessageElement( + DEFAULT_LOCATION, msgtype.name, nested_types=nested_types, reserveds=reserveds, fields=fields, one_ofs=one_ofs + ) + + +def _deserialize_options(options: Any) -> list[OptionElement]: + result: list[OptionElement] = [] + if options.HasField("java_package"): + result.append(OptionElement("java_package", OptionElement.Kind.STRING, options.java_package)) + if options.HasField("java_outer_classname"): + result.append(OptionElement("java_outer_classname", OptionElement.Kind.STRING, options.java_outer_classname)) + if options.HasField("optimize_for"): + result.append(OptionElement("optimize_for", OptionElement.Kind.ENUM, options.optimize_for)) + if options.HasField("java_multiple_files"): + result.append(OptionElement("java_multiple_files", OptionElement.Kind.BOOLEAN, options.java_multiple_files)) + if options.HasField("go_package"): + result.append(OptionElement("go_package", OptionElement.Kind.STRING, options.go_package)) + if options.HasField("cc_generic_services"): + result.append(OptionElement("cc_generic_services", OptionElement.Kind.BOOLEAN, options.cc_generic_services)) + if options.HasField("java_generic_services"): + result.append(OptionElement("java_generic_services", OptionElement.Kind.BOOLEAN, options.java_generic_services)) + if options.HasField("py_generic_services"): + result.append(OptionElement("py_generic_services", OptionElement.Kind.BOOLEAN, options.py_generic_services)) + if options.HasField("java_generate_equals_and_hash"): + result.append( + OptionElement("java_generate_equals_and_hash", OptionElement.Kind.BOOLEAN, options.java_generate_equals_and_hash) + ) + if options.HasField("deprecated"): + result.append(OptionElement("deprecated", OptionElement.Kind.BOOLEAN, options.deprecated)) + if options.HasField("java_string_check_utf8"): + result.append(OptionElement("java_string_check_utf8", OptionElement.Kind.BOOLEAN, options.java_string_check_utf8)) + if options.HasField("cc_enable_arenas"): + result.append(OptionElement("cc_enable_arenas", OptionElement.Kind.BOOLEAN, options.cc_enable_arenas)) + if options.HasField("objc_class_prefix"): + result.append(OptionElement("objc_class_prefix", OptionElement.Kind.STRING, options.objc_class_prefix)) + if options.HasField("csharp_namespace"): + result.append(OptionElement("csharp_namespace", OptionElement.Kind.STRING, options.csharp_namespace)) + if options.HasField("swift_prefix"): + result.append(OptionElement("swift_prefix", OptionElement.Kind.STRING, options.swift_prefix)) + if options.HasField("php_class_prefix"): + result.append(OptionElement("php_class_prefix", OptionElement.Kind.STRING, options.php_class_prefix)) + if options.HasField("php_namespace"): + result.append(OptionElement("php_namespace", OptionElement.Kind.STRING, options.php_namespace)) + if options.HasField("php_generic_services"): + result.append(OptionElement("php_generic_services", OptionElement.Kind.BOOLEAN, options.php_generic_services)) + if options.HasField("php_metadata_namespace"): + result.append(OptionElement("php_metadata_namespace", OptionElement.Kind.STRING, options.php_metadata_namespace)) + if options.HasField("ruby_package"): + result.append(OptionElement("ruby_package", OptionElement.Kind.STRING, options.ruby_package)) + return result + + +def deserialize(schema_b64: str) -> ProtoFileElement: + serialized_pb = base64.b64decode(schema_b64, validate=True) + proto = google.protobuf.descriptor_pb2.FileDescriptorProto() + proto.ParseFromString(serialized_pb) + imports: list[TypeName] = [] + public_imports: list[TypeName] = [] + for index, dep in enumerate(proto.dependency): + if index in proto.public_dependency: + public_imports.append(TypeName(dep)) + else: + imports.append(TypeName(dep)) + types: list[TypeElement] = [] + for enumtype in proto.enum_type: + types.append(_deserialize_enum(enumtype)) + for msgtype in proto.message_type: + types.append(_deserialize_msg(msgtype)) + options: list[OptionElement] = _deserialize_options(proto.options) + syntax = None + if proto.syntax: + syntax = Syntax(proto.syntax) + + return ProtoFileElement( + DEFAULT_LOCATION, + package_name=PackageName(proto.package), + syntax=syntax, + imports=imports, + public_imports=public_imports, + types=types, + options=options, + ) + + +_LABEL_MAP = MappingProxyType( + { + Field.Label.OPTIONAL: google.protobuf.descriptor_pb2.FieldDescriptorProto.LABEL_OPTIONAL, + Field.Label.REQUIRED: google.protobuf.descriptor_pb2.FieldDescriptorProto.LABEL_REQUIRED, + Field.Label.REPEATED: google.protobuf.descriptor_pb2.FieldDescriptorProto.LABEL_REPEATED, + } +) + + +def _serialize_field_label(label: Field.Label) -> google.protobuf.descriptor_pb2.FieldDescriptorProto.Label.ValueType: + if label not in _LABEL_MAP: + raise NotImplementedError(f"Unsupported field label {label}") + return _LABEL_MAP[label] + + +def _serialize_field(field: FieldElement) -> google.protobuf.descriptor_pb2.FieldDescriptorProto: + d = google.protobuf.descriptor_pb2.FieldDescriptorProto() + if field.label is not None: + d.label = _serialize_field_label(field.label) + if field.label == Field.Label.OPTIONAL: + d.proto3_optional = True + else: + d.label = _serialize_field_label(Field.Label.OPTIONAL) + if field.element_type in _REVERSE_TYPE_MAP: + d.type = _REVERSE_TYPE_MAP[field.element_type] + else: + d.type_name = field.element_type + if field.name is not None: + d.name = field.name + if field.tag is not None: + d.number = field.tag + return d + + +def _serialize_enumtype(e: EnumElement) -> google.protobuf.descriptor_pb2.EnumDescriptorProto: + result = google.protobuf.descriptor_pb2.EnumDescriptorProto() + result.name = e.name + for c in e.constants: + c2 = google.protobuf.descriptor_pb2.EnumValueDescriptorProto() + c2.name = c.name + c2.number = c.tag + c2.options.deprecated = any(o.name == "deprecated" and o.value == "true" for o in c.options) + result.value.append(c2) + return result + + +def _serialize_msgtype(t: MessageElement) -> google.protobuf.descriptor_pb2.DescriptorProto: + d = google.protobuf.descriptor_pb2.DescriptorProto() + d.name = t.name + for nt in t.nested_types: + if isinstance(nt, MessageElement): + d.nested_type.append(_serialize_msgtype(nt)) + elif isinstance(nt, EnumElement): + d.enum_type.append(_serialize_enumtype(nt)) + else: + raise NotImplementedError(f"Unsupported nested type of {nt}") + for r in t.reserveds: + for v in r.values: + if isinstance(v, int): + rr = google.protobuf.descriptor_pb2.DescriptorProto.ReservedRange() + rr.start = v + rr.end = v + 1 + d.reserved_range.append(rr) + elif isinstance(v, KotlinRange): + rr = google.protobuf.descriptor_pb2.DescriptorProto.ReservedRange() + rr.start = v.minimum + rr.end = v.maximum + 1 + d.reserved_range.append(rr) + elif isinstance(v, str): + d.reserved_name.append(v) + for field in t.fields: + d.field.append(_serialize_field(field)) + for oneof in t.one_ofs: + oneof2 = google.protobuf.descriptor_pb2.OneofDescriptorProto() + oneof2.name = oneof.name + oneof_index = len(d.oneof_decl) + d.oneof_decl.append(oneof2) + for field in oneof.fields: + sf = _serialize_field(field) + sf.oneof_index = oneof_index + d.field.append(sf) + return d + + +def _serialize_options(options: list[OptionElement], result: google.protobuf.descriptor_pb2.FileOptions) -> None: + for opt in options: + if opt.name == ("java_package"): + result.java_package = opt.value + if opt.name == ("java_outer_classname"): + result.java_outer_classname = opt.value + if opt.name == ("optimize_for"): + result.optimize_for = opt.value + if opt.name == ("java_multiple_files"): + result.java_multiple_files = opt.value + if opt.name == ("go_package"): + result.go_package = opt.value + if opt.name == ("cc_generic_services"): + result.cc_generic_services = opt.value + if opt.name == ("java_generic_services"): + result.java_generic_services = opt.value + if opt.name == ("py_generic_services"): + result.py_generic_services = opt.value + if opt.name == ("java_generate_equals_and_hash"): + result.java_generate_equals_and_hash = opt.value + if opt.name == ("deprecated"): + result.deprecated = opt.value + if opt.name == ("java_string_check_utf8"): + result.java_string_check_utf8 = opt.value + if opt.name == ("cc_enable_arenas"): + result.cc_enable_arenas = opt.value + if opt.name == ("objc_class_prefix"): + result.objc_class_prefix = opt.value + if opt.name == ("csharp_namespace"): + result.csharp_namespace = opt.value + if opt.name == ("swift_prefix"): + result.swift_prefix = opt.value + if opt.name == ("php_class_prefix"): + result.php_class_prefix = opt.value + if opt.name == ("php_namespace"): + result.php_namespace = opt.value + if opt.name == ("php_generic_services"): + result.php_generic_services = opt.value + if opt.name == ("php_metadata_namespace"): + result.php_metadata_namespace = opt.value + if opt.name == ("ruby_package"): + result.ruby_package = opt.value + + +def serialize(schema: ProtoFileElement) -> str: + fd = google.protobuf.descriptor_pb2.FileDescriptorProto() + if schema.syntax is not None: + fd.syntax = schema.syntax.value + if schema.package_name is not None: + fd.package = schema.package_name + if schema.options is not None: + _serialize_options(schema.options, fd.options) + + for index, dep in enumerate(schema.public_imports): + fd.dependency.append(str(dep)) + fd.public_dependency.append(index) + for dep in schema.imports: + fd.dependency.append(str(dep)) + + for t in schema.types: + if isinstance(t, MessageElement): + fd.message_type.append(_serialize_msgtype(t)) + elif isinstance(t, EnumElement): + fd.enum_type.append(_serialize_enumtype(t)) + return base64.b64encode(fd.SerializeToString()).decode("utf-8") diff --git a/karapace/schema_models.py b/karapace/schema_models.py index c81f2e8c1..63f232bf5 100644 --- a/karapace/schema_models.py +++ b/karapace/schema_models.py @@ -291,6 +291,11 @@ def schema(self) -> Draft7Validator | AvroSchema | ProtobufSchema: def get_references(self) -> Sequence[Reference] | None: return self.references + def serialize(self) -> str: + if isinstance(self.schema, ProtobufSchema): + return self.schema.serialize() + return self.schema_str + class ValidatedTypedSchema(ParsedTypedSchema): """Validated schema resource. diff --git a/karapace/schema_registry_apis.py b/karapace/schema_registry_apis.py index f4d22cd78..6ad39a4c2 100644 --- a/karapace/schema_registry_apis.py +++ b/karapace/schema_registry_apis.py @@ -506,7 +506,12 @@ def _has_subject_with_id() -> bool: status=HTTPStatus.NOT_FOUND, ) - response_body = {"schema": schema.schema_str} + schema_str = schema.schema_str + format_serialized = request.query.get("format", "").lower() == "serialized" + if format_serialized and schema.schema_type == SchemaType.PROTOBUF: + parsed_schema = ParsedTypedSchema.parse(schema_type=schema.schema_type, schema_str=schema_str) + schema_str = parsed_schema.serialize() + response_body = {"schema": schema_str} if include_subjects: response_body["subjects"] = self.schema_registry.database.subjects_for_schema(parsed_schema_id) diff --git a/mypy.ini b/mypy.ini index e06dfb201..a1b034e08 100644 --- a/mypy.ini +++ b/mypy.ini @@ -23,9 +23,6 @@ disallow_untyped_defs = False disallow_incomplete_defs = False warn_unused_ignores = False -[mypy-karapace.protobuf.kotlin_wrapper] -ignore_errors = True - [mypy-karapace.protobuf.proto_file_element] ignore_errors = True diff --git a/requirements/requirements-typing.in b/requirements/requirements-typing.in index c55f35548..b0179da71 100644 --- a/requirements/requirements-typing.in +++ b/requirements/requirements-typing.in @@ -5,3 +5,4 @@ mypy types-jsonschema sentry-sdk types-cachetools +types-protobuf<4 diff --git a/requirements/requirements-typing.txt b/requirements/requirements-typing.txt index a8f75adfa..f757e763a 100644 --- a/requirements/requirements-typing.txt +++ b/requirements/requirements-typing.txt @@ -4,6 +4,10 @@ # # 'make requirements' # +attrs==23.1.0 + # via + # -c requirements-dev.txt + # referencing certifi==2023.7.22 # via # -c requirements-dev.txt @@ -12,18 +16,25 @@ mypy==1.6.1 # via -r requirements-typing.in mypy-extensions==1.0.0 # via mypy -sentry-sdk==1.31.0 +referencing==0.30.0 + # via + # -c requirements-dev.txt + # types-jsonschema +rpds-py==0.9.2 # via # -c requirements-dev.txt - # -r requirements-typing.in -types-cachetools==5.3.0.6 + # referencing +sentry-sdk==1.31.0 + # via -r requirements-typing.in +types-cachetools==5.3.0.7 + # via -r requirements-typing.in +types-jsonschema==4.19.0.4 # via -r requirements-typing.in -types-jsonschema==4.17.0.10 +types-protobuf==3.20.3 # via -r requirements-typing.in typing-extensions==4.6.3 # via # -c requirements-dev.txt - # -c requirements.txt # mypy urllib3==2.0.7 # via diff --git a/tests/integration/test_schema_protobuf.py b/tests/integration/test_schema_protobuf.py index 9a89a17cd..c7247188c 100644 --- a/tests/integration/test_schema_protobuf.py +++ b/tests/integration/test_schema_protobuf.py @@ -1104,3 +1104,54 @@ async def test_protobuf_customer_update(registry_async_client: Client) -> None: res = await registry_async_client.post(f"subjects/{subject}/versions", json=body) assert res.status_code == 200 + + +async def test_protobuf_binary_serialized(registry_async_client: Client) -> None: + subject = create_subject_name_factory("test_protobuf_binary_serialized")() + + schema_plain = """\ +syntax = "proto3"; + +message Key { + int32 id = 1; +} +message Dog { + string name = 1; + int32 weight = 2; + repeated string toys = 4; +} +""" + schema_serialized = ( + "Cg5tZXNzYWdlcy5wcm90byIRCgNLZXkSCgoCaWQYASABKAUiMQoDRG9nEgwKBG5hbW" + + "UYASABKAkSDgoGd2VpZ2h0GAIgASgFEgwKBHRveXMYBCADKAliBnByb3RvMw==" + ) + + body = {"schemaType": "PROTOBUF", "schema": schema_serialized} + res = await registry_async_client.post(f"subjects/{subject}/versions", json=body) + + assert res.status_code == 200 + assert "id" in res.json() + schema_id = res.json()["id"] + + body = {"schemaType": "PROTOBUF", "schema": schema_plain} + res = await registry_async_client.post(f"subjects/{subject}/versions", json=body) + + assert res.status_code == 200 + assert "id" in res.json() + assert schema_id == res.json()["id"] + + res = await registry_async_client.get(f"/schemas/ids/{schema_id}") + assert res.status_code == 200 + assert "schema" in res.json() + assert res.json()["schema"] == schema_plain + + res = await registry_async_client.get(f"/schemas/ids/{schema_id}?format=serialized") + assert res.status_code == 200 + assert "schema" in res.json() + assert res.json()["schema"] + + body = {"schemaType": "PROTOBUF", "schema": res.json()["schema"]} + res = await registry_async_client.post(f"subjects/{subject}/versions", json=body) + assert res.status_code == 200 + assert "id" in res.json() + assert schema_id == res.json()["id"] diff --git a/tests/schemas/protobuf.py b/tests/schemas/protobuf.py index 2142efc2a..09d36799f 100644 --- a/tests/schemas/protobuf.py +++ b/tests/schemas/protobuf.py @@ -2,10 +2,12 @@ Copyright (c) 2023 Aiven Ltd See LICENSE for details """ -schema_protobuf_plain = """syntax = "proto3"; +schema_protobuf_plain = """\ +syntax = "proto3"; package com.codingharbour.protobuf; option java_outer_classname = "SimpleMessageProtos"; + message SimpleMessage { string content = 1; string date_time = 2; @@ -13,51 +15,64 @@ } """ -schema_protobuf_schema_registry1 = """ -|syntax = "proto3"; -|package com.codingharbour.protobuf; -| -|message SimpleMessage { -| string content = 1; -| string my_string = 2; -| int32 my_int = 3; -|} -| +schema_protobuf_plain_bin = ( + "CgdkZWZhdWx0Ehpjb20uY29kaW5naGFyYm91ci5wcm90b2J1ZiI/Cg1TaW1wbGVNZXNzYW" + + "dlEg0KB2NvbnRlbnQYASgJEg8KCWRhdGVfdGltZRgCKAkSDgoIY29udGVudDIYAygJQh" + + "VCE1NpbXBsZU1lc3NhZ2VQcm90b3NiBnByb3RvMw==" +) + +schema_protobuf_schema_registry1 = """\ +syntax = "proto3"; +package com.codingharbour.protobuf; + +message SimpleMessage { + string content = 1; + string my_string = 2; + int32 my_int = 3; +} + """ -schema_protobuf_order_before = """ -|syntax = "proto3"; -| -|option java_package = "com.codingharbour.protobuf"; -|option java_outer_classname = "TestEnumOrder"; -| -|enum Enum { -| HIGH = 0; -| MIDDLE = 1; -| LOW = 2; -|} -|message Message { -| int32 query = 1; -|} +schema_protobuf_order_before = """\ +syntax = "proto3"; + +option java_package = "com.codingharbour.protobuf"; +option java_outer_classname = "TestEnumOrder"; + +enum Enum { + HIGH = 0; + MIDDLE = 1; + LOW = 2; +} +message Message { + int32 query = 1; +} """ -schema_protobuf_order_after = """ -|syntax = "proto3"; -| -|option java_package = "com.codingharbour.protobuf"; -|option java_outer_classname = "TestEnumOrder"; -| -|message Message { -| int32 query = 1; -|} -|enum Enum { -| HIGH = 0; -| MIDDLE = 1; -| LOW = 2; -|} -| +schema_protobuf_order_after = """\ +syntax = "proto3"; + +option java_package = "com.codingharbour.protobuf"; +option java_outer_classname = "TestEnumOrder"; + +message Message { + int32 query = 1; +} +enum Enum { + HIGH = 0; + MIDDLE = 1; + LOW = 2; +} + """ +schema_protobuf_order_after_bin = ( + "CgdkZWZhdWx0IhYKB01lc3NhZ2USCwoFcXVlcnkYASgFKiUKBEVudW0SCAoESElHSBAAEg" + + "oKBk1JRERMRRABEgcKA0xPVxACQisKGmNvbS5jb2RpbmdoYXJib3VyLnByb3RvYnVmQg" + "1UZXN0RW51bU9yZGVyYgZwcm90bzM=" +) + + schema_protobuf_compare_one = """ |syntax = "proto3"; | @@ -75,3 +90,138 @@ |} | """ + +schema_protobuf_nested_message4 = """\ +syntax = "proto3"; +package fancy.company.in.party.v1; + +message AnotherMessage { + message WowANestedMessage { + message DeeplyNestedMsg { + message AnotherLevelOfNesting { + .fancy.company.in.party.v1.AnotherMessage.WowANestedMessage.BamFancyEnum im_tricky_im_referring_to_the_previous_enum = 1; + } + } + enum BamFancyEnum { + MY_AWESOME_FIELD = 0; + } + } +} +""" + +schema_protobuf_nested_message4_bin = ( + "CgdkZWZhdWx0EhlmYW5jeS5jb21wYW55LmluLnBhcnR5LnYxIvUBCg5Bbm90aGVyTWVzc2" + + "FnZRriAQoRV293QU5lc3RlZE1lc3NhZ2UapgEKD0RlZXBseU5lc3RlZE1zZxqSAQoVQW" + + "5vdGhlckxldmVsT2ZOZXN0aW5nEnkKK2ltX3RyaWNreV9pbV9yZWZlcnJpbmdfdG9fdG" + + "hlX3ByZXZpb3VzX2VudW0YATJILmZhbmN5LmNvbXBhbnkuaW4ucGFydHkudjEuQW5vdG" + + "hlck1lc3NhZ2UuV293QU5lc3RlZE1lc3NhZ2UuQmFtRmFuY3lFbnVtIiQKDEJhbUZhbm" + + "N5RW51bRIUChBNWV9BV0VTT01FX0ZJRUxEEABiBnByb3RvMw==" +) + +schema_protobuf_oneof = """\ +syntax = "proto3"; + +message Goods { + float ff = 5; + + oneof item { + string name_a = 1; + string name_b = 2; + int32 id = 3; + double dd = 4; + } + oneof item2 { + uint64 ui = 6; + bool bbb = 7; + sfixed32 sf = 32; + bytes bye = 33; + sint64 sintti = 42; + } +} +""" + +schema_protobuf_oneof_bin = ( + "CgdkZWZhdWx0Iq4BCgVHb29kcxIQCgZuYW1lX2EYASABKAlIABIQCgZuYW1lX2IYAiABKA" + + "lIABIMCgJpZBgDIAEoBUgAEgwKAmRkGAQgASgBSAASDAoCdWkYBiABKARIARINCgNiYm" + + "IYByABKAhIARIMCgJzZhggIAEoD0gBEg0KA2J5ZRghIAEoDEgBEhAKBnNpbnR0aRgqIA" + + "EoEkgBEggKAmZmGAUoAkIGCgRpdGVtQgcKBWl0ZW0yYgZwcm90bzM=" +) + + +schema_protobuf_container2 = """\ +syntax = "proto3"; +package a1; + +message container { + message H { + int32 s = 1; + } +} +""" + +schema_protobuf_container2_bin = "CgdkZWZhdWx0EgJhMSIZCgljb250YWluZXIaDAoBSBIHCgFzGAEoBWIGcHJvdG8z" + +schema_protobuf_references = """\ +syntax = "proto3"; +package a1; + +import "container2.proto"; + +message TestMessage { + string t = 1; + .a1.TestMessage.V v = 2; + + message V { + .a1.container.H h = 1; + int32 x = 2; + } +} +""" + +schema_protobuf_references_bin = ( + "CgdkZWZhdWx0EgJhMRoQY29udGFpbmVyMi5wcm90byJWCgtUZXN0TWVzc2FnZRIHCgF0GA" + + "EoCRIYCgF2GAIyES5hMS5UZXN0TWVzc2FnZS5WGiQKAVYSFgoBaBgBMg8uYTEuY29udG" + + "FpbmVyLkgSBwoBeBgCKAViBnByb3RvMw==" +) + +schema_protobuf_references2 = """\ +syntax = "proto3"; +package a1; + +import public "container2.proto"; + +message TestMessage { + string t = 1; + .a1.TestMessage.V v = 2; + + message V { + .a1.container.H h = 1; + int32 x = 2; + } +} +""" + +schema_protobuf_references2_bin = ( + "CgdkZWZhdWx0EgJhMRoQY29udGFpbmVyMi5wcm90byJWCgtUZXN0TWVzc2FnZRIHCgF0GA" + + "EoCRIYCgF2GAIyES5hMS5UZXN0TWVzc2FnZS5WGiQKAVYSFgoBaBgBMg8uYTEuY29udG" + + "FpbmVyLkgSBwoBeBgCKAVQAGIGcHJvdG8z" +) + +schema_protobuf_complex = """\ +import "google/protobuf/descriptor.proto"; + +message Foo { + reserved 10, 12 to 14, "foo"; +} +enum Data { + DATA_UNSPECIFIED = 0; + DATA_SEARCH = 1 [deprecated = true]; + DATA_DISPLAY = 2; +} +""" + +schema_protobuf_complex_bin = ( + "CgdkZWZhdWx0GiBnb29nbGUvcHJvdG9idWYvZGVzY3JpcHRvci5wcm90byIWCgNGb29KBA" + + "gKEAtKBAgMEA9SA2ZvbypDCgREYXRhEhQKEERBVEFfVU5TUEVDSUZJRUQQABITCgtEQV" + + "RBX1NFQVJDSBABGgIIARIQCgxEQVRBX0RJU1BMQVkQAg==" +) diff --git a/tests/unit/test_protobuf_binary_serialization.py b/tests/unit/test_protobuf_binary_serialization.py new file mode 100644 index 000000000..6e2870d65 --- /dev/null +++ b/tests/unit/test_protobuf_binary_serialization.py @@ -0,0 +1,88 @@ +""" +Copyright (c) 2023 Aiven Ltd +See LICENSE for details +""" +from karapace.protobuf.schema import ProtobufSchema +from karapace.protobuf.serialization import deserialize, serialize +from tests.schemas.protobuf import ( + schema_protobuf_complex, + schema_protobuf_complex_bin, + schema_protobuf_container2, + schema_protobuf_container2_bin, + schema_protobuf_nested_message4, + schema_protobuf_nested_message4_bin, + schema_protobuf_oneof, + schema_protobuf_oneof_bin, + schema_protobuf_order_after, + schema_protobuf_order_after_bin, + schema_protobuf_plain, + schema_protobuf_plain_bin, + schema_protobuf_references, + schema_protobuf_references2, + schema_protobuf_references2_bin, + schema_protobuf_references_bin, +) + +import pytest + +schema_serialized1 = ( + "Cg5tZXNzYWdlcy5wcm90byIRCgNLZXkSCgoCaWQYASABKAUiMQoDRG9nEgwKBG5hbW" + + "UYASABKAkSDgoGd2VpZ2h0GAIgASgFEgwKBHRveXMYBCADKAliBnByb3RvMw==" +) + +schema_plain1 = """\ +syntax = "proto3"; + +message Key { + int32 id = 1; +} +message Dog { + string name = 1; + int32 weight = 2; + repeated string toys = 4; +} +""" + +schema_serialized_normalized = ( + "CgdkZWZhdWx0Ig8KA0tleRIICgJpZBgBKAUiLQoDRG9nEgoKBG5hbWUYASgJEgwKBndlaWdodBgCKAUSDAoEdG95cxgEIAMoCWIGcHJvdG8z" +) + + +@pytest.mark.parametrize( + "schema_plain,schema_serialized", + [ + (schema_plain1, schema_serialized1), + (schema_protobuf_plain, schema_protobuf_plain_bin), + (schema_protobuf_order_after, schema_protobuf_order_after_bin), + (schema_protobuf_nested_message4, schema_protobuf_nested_message4_bin), + (schema_protobuf_oneof, schema_protobuf_oneof_bin), + (schema_protobuf_container2, schema_protobuf_container2_bin), + (schema_protobuf_references, schema_protobuf_references_bin), + (schema_protobuf_references2, schema_protobuf_references2_bin), + (schema_protobuf_complex, schema_protobuf_complex_bin), + ], +) +def test_schema_deserialize(schema_plain, schema_serialized): + assert ( + schema_plain.strip() + == ProtobufSchema("", None, None, proto_file_element=deserialize(schema_serialized)).to_schema().strip() + ) + + +@pytest.mark.parametrize( + "schema", + [ + schema_plain1, + schema_protobuf_plain, + schema_protobuf_order_after, + schema_protobuf_nested_message4, + schema_protobuf_oneof, + schema_protobuf_container2, + schema_protobuf_references, + schema_protobuf_references2, + schema_protobuf_complex, + ], +) +def test_simple_schema_serialize(schema): + serialized = serialize(ProtobufSchema(schema).proto_file_element) + assert schema.strip() == ProtobufSchema("", None, None, proto_file_element=deserialize(serialized)).to_schema().strip()