Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

normalization: add normalization of the options #848

Merged
merged 4 commits into from
Apr 12, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion karapace/kafka_rest_apis/consumer_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -467,7 +467,6 @@ async def fetch(self, internal_name: Tuple[str, str], content_type: str, formats
)
# we get to be more in line with the confluent proxy by doing a bunch of fetches each time and
# respecting the max fetch request size
# pylint: disable=protected-access
max_bytes = (
int(query_params["max_bytes"])
if "max_bytes" in query_params
Expand Down
36 changes: 36 additions & 0 deletions karapace/protobuf/proto_normalizations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
"""
Copyright (c) 2024 Aiven Ltd
See LICENSE for details
"""
from karapace.protobuf.proto_file_element import ProtoFileElement
from karapace.typing import StrEnum


class ProtobufNormalisationOptions(StrEnum):
eliax1996 marked this conversation as resolved.
Show resolved Hide resolved
sort_options = "sort_options"


def normalize_options_ordered(proto_file_element: ProtoFileElement) -> ProtoFileElement:
eliax1996 marked this conversation as resolved.
Show resolved Hide resolved
sorted_options = (
None if proto_file_element.options is None else list(sorted(proto_file_element.options, key=lambda x: x.name))
)
return ProtoFileElement(
location=proto_file_element.location,
package_name=proto_file_element.package_name,
syntax=proto_file_element.syntax,
imports=proto_file_element.imports,
public_imports=proto_file_element.public_imports,
types=proto_file_element.types,
services=proto_file_element.services,
extend_declarations=proto_file_element.extend_declarations,
options=sorted_options,
)


# if other normalizations are added we will switch to a more generic approach:
# def normalize_parsed_file(proto_file_element: ProtoFileElement,
# normalization: ProtobufNormalisationOptions) -> ProtoFileElement:
# if normalization == ProtobufNormalisationOptions.sort_options:
# return normalize_options_ordered(proto_file_element)
# else:
# assert_never(normalization)
13 changes: 12 additions & 1 deletion karapace/schema_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
ProtobufUnresolvedDependencyException,
SchemaParseException as ProtobufSchemaParseException,
)
from karapace.protobuf.proto_normalizations import normalize_options_ordered
from karapace.protobuf.schema import ProtobufSchema
from karapace.schema_references import Reference
from karapace.schema_type import SchemaType
Expand Down Expand Up @@ -62,6 +63,7 @@ def parse_protobuf_schema_definition(
references: Sequence[Reference] | None = None,
dependencies: Mapping[str, Dependency] | None = None,
validate_references: bool = True,
normalize: bool = False,
) -> ProtobufSchema:
"""Parses and validates `schema_definition`.

Expand All @@ -74,6 +76,10 @@ def parse_protobuf_schema_definition(
result = protobuf_schema.verify_schema_dependencies()
if not result.result:
raise ProtobufUnresolvedDependencyException(f"{result.message}")

if protobuf_schema.proto_file_element is not None and normalize:
protobuf_schema.proto_file_element = normalize_options_ordered(protobuf_schema.proto_file_element)

return protobuf_schema


Expand Down Expand Up @@ -179,6 +185,7 @@ def parse(
validate_avro_names: bool,
references: Sequence[Reference] | None = None,
dependencies: Mapping[str, Dependency] | None = None,
normalize: bool = False,
) -> ParsedTypedSchema:
if schema_type not in [SchemaType.AVRO, SchemaType.JSONSCHEMA, SchemaType.PROTOBUF]:
raise InvalidSchema(f"Unknown parser {schema_type} for {schema_str}")
Expand All @@ -203,7 +210,7 @@ def parse(

elif schema_type is SchemaType.PROTOBUF:
try:
parsed_schema = parse_protobuf_schema_definition(schema_str, references, dependencies)
parsed_schema = parse_protobuf_schema_definition(schema_str, references, dependencies, normalize=normalize)
except (
TypeError,
SchemaError,
Expand Down Expand Up @@ -270,6 +277,7 @@ def parse(
schema_str: str,
references: Sequence[Reference] | None = None,
dependencies: Mapping[str, Dependency] | None = None,
normalize: bool = False,
) -> ParsedTypedSchema:
return parse(
schema_type=schema_type,
Expand All @@ -278,6 +286,7 @@ def parse(
validate_avro_names=False,
references=references,
dependencies=dependencies,
normalize=normalize,
)

def __str__(self) -> str:
Expand Down Expand Up @@ -352,6 +361,7 @@ def parse(
schema_str: str,
references: Sequence[Reference] | None = None,
dependencies: Mapping[str, Dependency] | None = None,
normalize: bool = False,
) -> ValidatedTypedSchema:
parsed_schema = parse(
schema_type=schema_type,
Expand All @@ -360,6 +370,7 @@ def parse(
validate_avro_names=True,
references=references,
dependencies=dependencies,
normalize=normalize,
)

return cast(ValidatedTypedSchema, parsed_schema)
Expand Down
2 changes: 2 additions & 0 deletions karapace/schema_registry_apis.py
Original file line number Diff line number Diff line change
Expand Up @@ -1191,6 +1191,7 @@ async def subject_post(
self._validate_schema_request_body(content_type, body)
schema_type = self._validate_schema_type(content_type, body)
self._validate_schema_key(content_type, body)
normalize = request.query.get("normalize", "false").lower() == "true"
references = self._validate_references(content_type, schema_type, body)

try:
Expand All @@ -1200,6 +1201,7 @@ async def subject_post(
schema_str=body["schema"],
references=references,
dependencies=resolved_dependencies,
normalize=normalize,
)
except (InvalidReferences, InvalidSchema, InvalidSchemaType) as e:
self.log.warning("Invalid schema: %r", body["schema"], exc_info=True)
Expand Down
50 changes: 50 additions & 0 deletions tests/integration/test_schema_protobuf.py
Original file line number Diff line number Diff line change
Expand Up @@ -1270,3 +1270,53 @@ async def test_protobuf_update_ordering(registry_async_client: Client) -> None:
assert res.status_code == 200
assert "id" in res.json()
assert schema_id != res.json()["id"]


async def test_protobuf_normalization_of_options(registry_async_client: Client) -> None:
subject = create_subject_name_factory("test_protobuf_normalization")()

schema_with_option_unordered_1 = """\
syntax = "proto3";
package tc4;

option java_package = "com.example";
option java_outer_classname = "FredProto";
option java_multiple_files = true;
option java_generic_services = true;
option java_generate_equals_and_hash = true;
option java_string_check_utf8 = true;

message Foo {
string code = 1;
}
"""

body = {"schemaType": "PROTOBUF", "schema": schema_with_option_unordered_1}
res = await registry_async_client.post(f"subjects/{subject}/versions?normalize=true", json=body)

assert res.status_code == 200
assert "id" in res.json()
original_schema_id = res.json()["id"]

schema_with_option_unordered_2 = """\
syntax = "proto3";
package tc4;

option java_package = "com.example";
option java_generate_equals_and_hash = true;
option java_string_check_utf8 = true;
option java_multiple_files = true;
option java_outer_classname = "FredProto";
option java_generic_services = true;

message Foo {
string code = 1;
}
"""

body = {"schemaType": "PROTOBUF", "schema": schema_with_option_unordered_2}
res = await registry_async_client.post(f"subjects/{subject}/versions?normalize=true", json=body)

assert res.status_code == 200
assert "id" in res.json()
assert original_schema_id == res.json()["id"]
71 changes: 71 additions & 0 deletions tests/unit/protobuf/test_protobuf_normalization.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
"""
Copyright (c) 2024 Aiven Ltd
See LICENSE for details
"""
from karapace.protobuf.compare_result import CompareResult
from karapace.protobuf.location import Location
from karapace.protobuf.proto_normalizations import normalize_options_ordered
from karapace.protobuf.proto_parser import ProtoParser

location: Location = Location("some/folder", "file.proto")


def test_different_options_order_its_correctly_normalized() -> None:
ordered_schema = """\
syntax = "proto3";

package pkg;

option cc_generic_services = true;
option java_generate_equals_and_hash = true;
option java_generic_services = true;
option java_multiple_files = true;
option java_outer_classname = "FooProto";
option java_package = "com.example.foo";
option java_string_check_utf8 = true;
option optimize_for = SPEED;

message Foo {
string fieldA = 1;

string fieldB = 2;

string fieldC = 3;

string fieldX = 4;
}
"""

unordered_schema = """\
syntax = "proto3";

package pkg;

option java_generic_services = true;
option java_generate_equals_and_hash = true;
option java_package = "com.example.foo";
option java_outer_classname = "FooProto";
option optimize_for = SPEED;
option cc_generic_services = true;
option java_multiple_files = true;
option java_string_check_utf8 = true;

message Foo {
string fieldA = 1;

string fieldB = 2;

string fieldC = 3;

string fieldX = 4;
}
"""

ordered_proto = ProtoParser.parse(location, ordered_schema)
unordered_proto = ProtoParser.parse(location, unordered_schema)

result = CompareResult()
assert result.is_compatible()
normalize_options_ordered(ordered_proto).compare(normalize_options_ordered(unordered_proto), result)
assert result.is_compatible()
assert normalize_options_ordered(ordered_proto).to_schema() == normalize_options_ordered(unordered_proto).to_schema()
Loading