diff --git a/karapace/protobuf/proto_normalizations.py b/karapace/protobuf/proto_normalizations.py index 9f65a2623..2eba3b20a 100644 --- a/karapace/protobuf/proto_normalizations.py +++ b/karapace/protobuf/proto_normalizations.py @@ -93,12 +93,15 @@ class NormalizedOneOfElement(OneOfElement): groups: Sequence[NormalizedGroupElement] -def type_field_element_with_sorted_options(type_field: FieldElement) -> NormalizedFieldElement: +def type_field_element_with_sorted_options(type_field: FieldElement, scope: str) -> NormalizedFieldElement: sorted_options = None if type_field.options is None else list(sorted(type_field.options, key=sort_by_name)) + field_type_normalized = (type_field.element_type + .removeprefix(".") # getting rid of the optional `.` before the package name + .removeprefix(f"{scope}.")) return NormalizedFieldElement( location=type_field.location, label=type_field.label, - element_type=type_field.element_type, + element_type=field_type_normalized, name=type_field.name, default_value=type_field.default_value, json_name=type_field.json_name, @@ -119,13 +122,14 @@ def enum_constant_element_with_sorted_options(enum_constant: EnumConstantElement ) -def enum_element_with_sorted_options(enum_element: EnumElement) -> NormalizedEnumElement: +def enum_element_with_sorted_options(enum_element: EnumElement, scope: str) -> NormalizedEnumElement: sorted_options = None if enum_element.options is None else list(sorted(enum_element.options, key=sort_by_name)) constants_with_sorted_options = ( None if enum_element.constants is None else [enum_constant_element_with_sorted_options(constant) for constant in enum_element.constants] ) + # todo: strip scope. return NormalizedEnumElement( location=enum_element.location, name=enum_element.name, @@ -163,10 +167,11 @@ def one_ofs_with_sorted_options(one_ofs: OneOfElement) -> NormalizedOneOfElement ) -def message_element_with_sorted_options(message_element: MessageElement) -> NormalizedMessageElement: +def message_element_with_sorted_options(message_element: MessageElement, scope: str) -> NormalizedMessageElement: + message_scope = f"{scope}.{message_element.name}" sorted_options = None if message_element.options is None else list(sorted(message_element.options, key=sort_by_name)) - sorted_nested_types = [type_element_with_sorted_options(nested_type) for nested_type in message_element.nested_types] - sorted_fields = [type_field_element_with_sorted_options(field) for field in message_element.fields] + sorted_nested_types = [type_element_with_sorted_options(nested_type, scope) for nested_type in message_element.nested_types] + sorted_fields = [type_field_element_with_sorted_options(field, scope) for field in message_element.fields] sorted_one_ofs = [one_ofs_with_sorted_options(one_of) for one_of in message_element.one_ofs] return NormalizedMessageElement( @@ -183,14 +188,17 @@ def message_element_with_sorted_options(message_element: MessageElement) -> Norm ) -def type_element_with_sorted_options(type_element: TypeElement) -> NormalizedTypeElement: +def type_element_with_sorted_options(type_element: TypeElement, scope: str) -> NormalizedTypeElement: sorted_nested_types: list[TypeElement] = [] + parent_name = type_element.name + message_scope = f"{scope}.{parent_name}" + for nested_type in type_element.nested_types: if isinstance(nested_type, EnumElement): - sorted_nested_types.append(enum_element_with_sorted_options(nested_type)) + sorted_nested_types.append(enum_element_with_sorted_options(nested_type, message_scope)) elif isinstance(nested_type, MessageElement): - sorted_nested_types.append(message_element_with_sorted_options(nested_type)) + sorted_nested_types.append(message_element_with_sorted_options(nested_type, message_scope)) else: raise ValueError("Unknown type element") # tried with assert_never but it did not work @@ -198,10 +206,10 @@ def type_element_with_sorted_options(type_element: TypeElement) -> NormalizedTyp type_element.nested_types = sorted_nested_types if isinstance(type_element, EnumElement): - return enum_element_with_sorted_options(type_element) + return enum_element_with_sorted_options(type_element, scope) if isinstance(type_element, MessageElement): - return message_element_with_sorted_options(type_element) + return message_element_with_sorted_options(type_element, scope) raise ValueError("Unknown type element") # tried with assert_never but it did not work @@ -251,7 +259,7 @@ def service_element_with_sorted_options(service_element: ServiceElement) -> Norm def normalize(proto_file_element: ProtoFileElement) -> NormalizedProtoFileElement: sorted_types: Sequence[NormalizedTypeElement] = [ - type_element_with_sorted_options(type_element) for type_element in proto_file_element.types + type_element_with_sorted_options(type_element, proto_file_element.package_name) for type_element in proto_file_element.types ] sorted_options = list(sorted(proto_file_element.options, key=sort_by_name)) sorted_services: Sequence[NormalizedServiceElement] = [ diff --git a/tests/unit/protobuf/test_protobuf_normalization.py b/tests/unit/protobuf/test_protobuf_normalization.py index b772b293c..5fcec1611 100644 --- a/tests/unit/protobuf/test_protobuf_normalization.py +++ b/tests/unit/protobuf/test_protobuf_normalization.py @@ -2,14 +2,16 @@ Copyright (c) 2024 Aiven Ltd See LICENSE for details """ +from typing import Final + from karapace.protobuf.compare_result import CompareResult -from karapace.protobuf.location import Location +from karapace.protobuf.location import Location, DEFAULT_LOCATION from karapace.protobuf.proto_normalizations import normalize from karapace.protobuf.proto_parser import ProtoParser import pytest -location: Location = Location("some/folder", "file.proto") +LOCATION: Final[Location] = Location("somefolder", "file.proto") # this would be a good case for using a property based test with a well-formed message generator @@ -511,10 +513,85 @@ ), ) def test_differently_ordered_options_normalizes_equally(ordered_schema: str, unordered_schema: str) -> None: - ordered_proto = ProtoParser.parse(location, ordered_schema) - unordered_proto = ProtoParser.parse(location, unordered_schema) + ordered_proto = ProtoParser.parse(LOCATION, ordered_schema) + unordered_proto = ProtoParser.parse(LOCATION, unordered_schema) result = CompareResult() normalize(ordered_proto).compare(normalize(unordered_proto), result) assert result.is_compatible() assert normalize(ordered_proto).to_schema() == normalize(unordered_proto).to_schema() + + + +PROTO_WITH_FULLY_QUALIFIED_PATHS = """\ +syntax = "proto3"; +package my.awesome.customer.v1; + +import "my/awesome/customer/v1/nested_value.proto"; +import "google/protobuf/timestamp.proto"; + +option csharp_namespace = "my.awesome.customer.V1"; +option go_package = "github.com/customer/api/my/awesome/customer/v1;dspv1"; +option java_multiple_files = true; +option java_outer_classname = "EventValueProto"; +option java_package = "com.my.awesome.customer.v1"; +option objc_class_prefix = "TDD"; +option php_metadata_namespace = "My\\Awesome\\Customer\\V1"; +option php_namespace = "My\\Awesome\\Customer\\V1"; +option ruby_package = "My::Awesome::Customer::V1"; + +message EventValue { + .my.awesome.customer.v1.NestedValue nested_value = 1; + .google.protobuf.Timestamp created_at = 2; +} +""" + + + +PROTO_WITH_SIMPLE_NAMES = """\ +syntax = "proto3"; +package my.awesome.customer.v1; + +import "my/awesome/customer/v1/nested_value.proto"; +import "google/protobuf/timestamp.proto"; + +option csharp_namespace = "my.awesome.customer.V1"; +option go_package = "github.com/customer/api/my/awesome/customer/v1;dspv1"; +option java_multiple_files = true; +option java_outer_classname = "EventValueProto"; +option java_package = "com.my.awesome.customer.v1"; +option objc_class_prefix = "TDD"; +option php_metadata_namespace = "My\\Awesome\\Customer\\V1"; +option php_namespace = "My\\Awesome\\Customer\\V1"; +option ruby_package = "My::Awesome::Customer::V1"; + +message EventValue { + NestedValue nested_value = 1; + google.protobuf.Timestamp created_at = 2; +}""" + + +def test_full_path_and_simple_names_are_equal() -> None: + """ + If we consider the specifications provided by buf as valid, we can read the definition of a type reference there: https://protobuf.com/docs/language-spec#type-references. + The current issue in the previous definition its the inability of the current parser to determine the equivalente between a fully-qualified type reference (in a dot notation) and a fully-qualified reference identified + by the name alone. (https://protobuf.com/docs/language-spec#fully-qualified-references). + The current test do not assert anything about the various equal definition that a user can use to define a relative reference (https://protobuf.com/docs/language-spec#relative-references). + The current resolution logic its to strip away the `.package_name` reference from the definition before starting the comparison for the field modifications. + + Even if the `TypeTree` (trie datastructure) can enable the implementation of the relative reference algorithm resolution (https://protobuf.com/docs/language-spec#relative-references). + We wanna avoid implementing such a feature due to the risk of introducing new bugs in the current implementation. + + The plan its to introduce a step of normalization for the protobuf schemas later, we will use the profile descriptors to gather for each type the reference + and once obtained the fully qualified type reference directly from the proto descriptor the "lingua franca" of the protobuf format. + Implementing properly all the variations of the path resolution means implementing the same behaviour of the protoc compiler, the mechanism should rely + on an already processed entity as the proto descriptor. + Due to this the current solution its to implement "just" the fully-qualified in dot notation and the fully-qualified reference identified by the name alone. + """ + + fully_qualitifed_simple_name_notation = ProtoParser.parse(LOCATION, PROTO_WITH_SIMPLE_NAMES) + fully_qualitifed_dot_notation = ProtoParser.parse(LOCATION, PROTO_WITH_FULLY_QUALIFIED_PATHS) + result = CompareResult() + normalize(fully_qualitifed_dot_notation).compare(normalize(fully_qualitifed_simple_name_notation), result) + assert result.is_compatible() + assert normalize(fully_qualitifed_dot_notation).to_schema() == normalize(fully_qualitifed_simple_name_notation).to_schema()