From c069395567e1d7fd4183302d8e0bf44547e50bc7 Mon Sep 17 00:00:00 2001 From: Jarkko Jaakola Date: Wed, 25 Sep 2024 21:28:21 +0300 Subject: [PATCH 01/18] chore: update tests to use Kafka 3.4.1 --- tests/conftest.py | 2 +- tests/integration/conftest.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 99ba55809..960f07191 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -15,7 +15,7 @@ pytest_plugins = "aiohttp.pytest_plugin" KAFKA_BOOTSTRAP_SERVERS_OPT = "--kafka-bootstrap-servers" KAFKA_VERION_OPT = "--kafka-version" -KAFKA_VERSION = "2.7.0" +KAFKA_VERSION = "3.4.1" LOG_DIR_OPT = "--log-dir" VERSION_REGEX = "([0-9]+[.])*[0-9]+" diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 04fbd7aa1..139d1a1d5 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -79,7 +79,7 @@ def fixture_kafka_description(request: SubRequest) -> KafkaDescription: kafka_tgz=RUNTIME_DIR / kafka_tgz, install_dir=kafka_dir, download_url=kafka_url, - protocol_version="2.7", + protocol_version="3.4.1", ) From 8030f8fc09ca715849d4d6c3fc38b0d2e04e0ee2 Mon Sep 17 00:00:00 2001 From: Jarkko Jaakola Date: Tue, 8 Oct 2024 13:12:31 +0300 Subject: [PATCH 02/18] chore: remove Python 3.8 support --- .github/workflows/tests.yml | 2 +- .pre-commit-config.yaml | 2 +- GNUmakefile | 2 +- mypy.ini | 2 +- pyproject.toml | 5 ++--- runtime.txt | 2 +- website/README.rst | 2 +- 7 files changed, 8 insertions(+), 9 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index d3ae1c40f..8b151f124 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -20,7 +20,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [ '3.8', '3.9', '3.10', '3.11', '3.12' ] + python-version: [ '3.9', '3.10', '3.11', '3.12' ] env: PYTEST_ADDOPTS: >- --log-dir=/tmp/ci-logs diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a975b52ec..1161ba0b7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -34,7 +34,7 @@ repos: rev: v3.4.0 hooks: - id: pyupgrade - args: [ --py38-plus ] + args: [ --py39-plus ] - repo: https://github.com/pycqa/autoflake rev: v2.1.1 diff --git a/GNUmakefile b/GNUmakefile index 7f9c90191..0749b7613 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -3,7 +3,7 @@ SHELL := /usr/bin/env bash VENV_DIR ?= $(CURDIR)/venv PIP ?= pip3 --disable-pip-version-check --no-input --require-virtualenv PYTHON ?= python3 -PYTHON_VERSION ?= 3.8 +PYTHON_VERSION ?= 3.9 define PIN_VERSIONS_COMMAND pip install pip-tools && \ diff --git a/mypy.ini b/mypy.ini index 15ab9042f..0a0230c5f 100644 --- a/mypy.ini +++ b/mypy.ini @@ -1,6 +1,6 @@ [mypy] mypy_path = $MYPY_CONFIG_FILE_DIR/stubs -python_version = 3.8 +python_version = 3.9 packages = karapace show_error_codes = True pretty = True diff --git a/pyproject.toml b/pyproject.toml index 089668037..7b7efb33e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "karapace" -requires-python = ">= 3.8" +requires-python = ">= 3.9" dynamic = ["version"] readme = "README.rst" license = {file = "LICENSE"} @@ -49,7 +49,6 @@ classifiers=[ "Intended Audience :: Information Technology", "Intended Audience :: System Administrators", "License :: OSI Approved :: Apache Software License", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", @@ -107,5 +106,5 @@ include-package-data = true version_file = "src/karapace/version.py" [tool.black] -target-version = ["py38"] +target-version = ["py39"] line-length = 125 diff --git a/runtime.txt b/runtime.txt index 9e9414fda..57f558859 100644 --- a/runtime.txt +++ b/runtime.txt @@ -1 +1 @@ -python-3.8.16 +python-3.9.20 diff --git a/website/README.rst b/website/README.rst index 67e55aeef..c333ba578 100644 --- a/website/README.rst +++ b/website/README.rst @@ -6,7 +6,7 @@ A static HTML site, generated with Sphinx. You can find the website source in th Dependencies ------------ -You need Python 3.8+. Install the dependencies with ``pip``:: +You need Python 3.9+. Install the dependencies with ``pip``:: pip install -r requirements.txt From 3b30796f03fa8f5db074b4fef3e2c0b846420a55 Mon Sep 17 00:00:00 2001 From: Jarkko Jaakola Date: Tue, 8 Oct 2024 13:14:18 +0300 Subject: [PATCH 03/18] chore: pin-requirements with Python 3.9 --- requirements/requirements-dev.txt | 13 ++----------- requirements/requirements-typing.txt | 11 +---------- requirements/requirements.txt | 11 +---------- 3 files changed, 4 insertions(+), 31 deletions(-) diff --git a/requirements/requirements-dev.txt b/requirements/requirements-dev.txt index 849f7c38f..510bfeef1 100644 --- a/requirements/requirements-dev.txt +++ b/requirements/requirements-dev.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # make pin-requirements @@ -93,10 +93,6 @@ idna==3.8 # yarl importlib-metadata==8.4.0 # via flask -importlib-resources==6.4.4 - # via - # jsonschema - # jsonschema-specifications iniconfig==2.0.0 # via pytest isodate==0.6.1 @@ -135,8 +131,6 @@ packaging==24.1 # pytest pdbpp==0.10.3 # via karapace (/karapace/pyproject.toml) -pkgutil-resolve-name==1.3.10 - # via jsonschema pluggy==1.5.0 # via pytest prometheus-client==0.20.0 @@ -215,7 +209,6 @@ typing-extensions==4.12.2 # via # anyio # karapace (/karapace/pyproject.toml) - # rich ujson==5.10.0 # via karapace (/karapace/pyproject.toml) urllib3==2.2.2 @@ -238,9 +231,7 @@ yarl==1.12.1 # aiohttp # karapace (/karapace/pyproject.toml) zipp==3.20.1 - # via - # importlib-metadata - # importlib-resources + # via importlib-metadata zope-event==5.0 # via gevent zope-interface==7.0.2 diff --git a/requirements/requirements-typing.txt b/requirements/requirements-typing.txt index 503061bea..2667aea8d 100644 --- a/requirements/requirements-typing.txt +++ b/requirements/requirements-typing.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # make pin-requirements @@ -45,10 +45,6 @@ idna==3.10 # via # anyio # yarl -importlib-resources==6.4.5 - # via - # jsonschema - # jsonschema-specifications isodate==0.6.1 # via karapace (/karapace/pyproject.toml) jsonschema==4.23.0 @@ -73,8 +69,6 @@ networkx==3.1 # via karapace (/karapace/pyproject.toml) packaging==24.1 # via aiokafka -pkgutil-resolve-name==1.3.10 - # via jsonschema prometheus-client==0.20.0 # via karapace (/karapace/pyproject.toml) protobuf==3.20.3 @@ -122,7 +116,6 @@ typing-extensions==4.12.2 # karapace (/karapace/pyproject.toml) # multidict # mypy - # rich ujson==5.10.0 # via karapace (/karapace/pyproject.toml) urllib3==2.2.2 @@ -135,7 +128,5 @@ yarl==1.12.1 # via # aiohttp # karapace (/karapace/pyproject.toml) -zipp==3.20.2 - # via importlib-resources zstandard==0.23.0 # via karapace (/karapace/pyproject.toml) diff --git a/requirements/requirements.txt b/requirements/requirements.txt index d7803e46e..15b787dcf 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # make pin-requirements @@ -43,10 +43,6 @@ idna==3.8 # via # anyio # yarl -importlib-resources==6.4.4 - # via - # jsonschema - # jsonschema-specifications isodate==0.6.1 # via karapace (/karapace/pyproject.toml) jsonschema==4.23.0 @@ -67,8 +63,6 @@ networkx==3.1 # via karapace (/karapace/pyproject.toml) packaging==24.1 # via aiokafka -pkgutil-resolve-name==1.3.10 - # via jsonschema prometheus-client==0.20.0 # via karapace (/karapace/pyproject.toml) protobuf==3.20.3 @@ -103,7 +97,6 @@ typing-extensions==4.12.2 # via # anyio # karapace (/karapace/pyproject.toml) - # rich ujson==5.10.0 # via karapace (/karapace/pyproject.toml) watchfiles==0.23.0 @@ -114,7 +107,5 @@ yarl==1.12.1 # via # aiohttp # karapace (/karapace/pyproject.toml) -zipp==3.20.1 - # via importlib-resources zstandard==0.23.0 # via karapace (/karapace/pyproject.toml) From 06a93c7bd1f0b2c3bd4c6ccf1639afba5677bb0c Mon Sep 17 00:00:00 2001 From: Jarkko Jaakola Date: Tue, 8 Oct 2024 13:19:45 +0300 Subject: [PATCH 04/18] chore: remove Python 3.8 related code --- src/karapace/avro_dataclasses/introspect.py | 6 +---- src/karapace/protobuf/proto_normalizations.py | 3 +-- src/karapace/protobuf/type_tree.py | 3 +-- src/karapace/utils.py | 13 --------- tests/unit/test_utils.py | 27 +------------------ 5 files changed, 4 insertions(+), 48 deletions(-) diff --git a/src/karapace/avro_dataclasses/introspect.py b/src/karapace/avro_dataclasses/introspect.py index 64b2e5856..fe569dea5 100644 --- a/src/karapace/avro_dataclasses/introspect.py +++ b/src/karapace/avro_dataclasses/introspect.py @@ -10,11 +10,7 @@ from dataclasses import Field, fields, is_dataclass, MISSING from enum import Enum from functools import lru_cache -from typing import Final, Sequence, TYPE_CHECKING, TypeVar, Union - -# Note: It's important get_args and get_origin are imported from typing_extensions -# until support for Python 3.8 is dropped. -from typing_extensions import get_args, get_origin +from typing import Final, get_args, get_origin, Sequence, TYPE_CHECKING, TypeVar, Union import datetime import uuid diff --git a/src/karapace/protobuf/proto_normalizations.py b/src/karapace/protobuf/proto_normalizations.py index 6a5356103..e3539aa46 100644 --- a/src/karapace/protobuf/proto_normalizations.py +++ b/src/karapace/protobuf/proto_normalizations.py @@ -20,7 +20,6 @@ from karapace.protobuf.service_element import ServiceElement from karapace.protobuf.type_element import TypeElement from karapace.protobuf.type_tree import TypeTree -from karapace.utils import remove_prefix from typing import Sequence import abc @@ -90,7 +89,7 @@ class NormalizedOneOfElement(OneOfElement): def normalize_type_field_element(type_field: FieldElement, package: str, type_tree: TypeTree) -> NormalizedFieldElement: sorted_options = None if type_field.options is None else list(sorted(type_field.options, key=sort_by_name)) - field_type_normalized = remove_prefix(remove_prefix(type_field.element_type, "."), f"{package}.") + field_type_normalized = type_field.element_type.removeprefix(".").removeprefix(f"{package}.") reference_in_type_tree = type_tree.type_in_tree(field_type_normalized) google_included_type = ( field_type_normalized in KnownDependency.index_simple or field_type_normalized in KnownDependency.index diff --git a/src/karapace/protobuf/type_tree.py b/src/karapace/protobuf/type_tree.py index f9279e864..71fe83b3d 100644 --- a/src/karapace/protobuf/type_tree.py +++ b/src/karapace/protobuf/type_tree.py @@ -6,7 +6,6 @@ from collections.abc import Iterable, Sequence from karapace.dataclasses import default_dataclass -from karapace.utils import remove_prefix import itertools @@ -84,7 +83,7 @@ def _type_in_tree(tree: TypeTree, remaining_tokens: list[str]) -> TypeTree | Non return tree def type_in_tree(self, queried_type: str) -> TypeTree | None: - return TypeTree._type_in_tree(self, remove_prefix(queried_type, ".").split(".")) + return TypeTree._type_in_tree(self, queried_type.removeprefix(".").split(".")) def expand_missing_absolute_path(self) -> Sequence[str]: oldest_import = self.oldest_matching_import() diff --git a/src/karapace/utils.py b/src/karapace/utils.py index 071b3e9d3..10db7bfdb 100644 --- a/src/karapace/utils.py +++ b/src/karapace/utils.py @@ -246,19 +246,6 @@ def log( self.logger.exception("Error in logging") -def remove_prefix(string: str, prefix: str) -> str: - """ - Not available in python 3.8. - """ - i = 0 - while i < len(string) and i < len(prefix): - if string[i] != prefix[i]: - return string - i += 1 - - return string[i:] - - def shutdown(): """ Send a SIGTERM into the current running application process, which should initiate shutdown logic. diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index 51633376c..9a3a33d73 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -3,37 +3,12 @@ See LICENSE for details """ from _pytest.logging import LogCaptureFixture -from karapace.utils import remove_prefix, shutdown +from karapace.utils import shutdown from unittest.mock import patch import logging -def test_remove_prefix_basic() -> None: - result = remove_prefix("hello world", "hello ") - assert result == "world" - - -def test_remove_prefix_empty_prefix() -> None: - result = remove_prefix("hello world", "") - assert result == "hello world" - - -def test_remove_prefix_prefix_not_in_string() -> None: - result = remove_prefix("hello world", "hey ") - assert result == "hello world" - - -def test_remove_prefix_multiple_occurrences_of_prefix() -> None: - result = remove_prefix("hello hello world", "hello ") - assert result == "hello world" - - -def test_remove_prefix_empty_string() -> None: - result = remove_prefix("", "hello ") - assert result == "" - - def test_shutdown(caplog: LogCaptureFixture) -> None: with caplog.at_level(logging.WARNING, logger="karapace.utils"): with patch("karapace.utils.signal") as mock_signal: From 646fb93ad132f1eecbd900a9731cfd9f059d4420 Mon Sep 17 00:00:00 2001 From: Jarkko Jaakola Date: Tue, 8 Oct 2024 14:45:47 +0300 Subject: [PATCH 05/18] chore: pyupgrade formattin to py39 --- .../schema-registry-schema-post.py | 3 +- .../anonymize_schemas/anonymize_avro.py | 10 +++---- src/karapace/avro_dataclasses/introspect.py | 4 +-- src/karapace/avro_dataclasses/models.py | 3 +- src/karapace/backup/api.py | 4 +-- src/karapace/backup/backends/reader.py | 3 +- src/karapace/backup/backends/v1.py | 5 ++-- src/karapace/backup/backends/v2.py | 7 +++-- src/karapace/backup/backends/v3/backend.py | 3 +- src/karapace/backup/backends/v3/readers.py | 3 +- src/karapace/backup/backends/v3/schema.py | 7 +++-- .../backup/backends/v3/schema_tool.py | 5 ++-- src/karapace/backup/backends/writer.py | 3 +- src/karapace/backup/cli.py | 2 +- src/karapace/backup/safe_writer.py | 3 +- src/karapace/backup/topic_configurations.py | 3 +- src/karapace/client.py | 3 +- .../compatibility/jsonschema/utils.py | 6 ++-- src/karapace/config.py | 3 +- .../coordinator/schema_coordinator.py | 3 +- src/karapace/in_memory_database.py | 2 +- src/karapace/instrumentation/prometheus.py | 3 +- src/karapace/kafka/admin.py | 3 +- src/karapace/kafka/consumer.py | 3 +- .../kafka_rest_apis/consumer_manager.py | 29 +++++++++---------- src/karapace/kafka_rest_apis/schema_cache.py | 7 +++-- src/karapace/kafka_utils.py | 2 +- src/karapace/karapace.py | 3 +- src/karapace/messaging.py | 4 +-- src/karapace/protobuf/compare_result.py | 7 ++--- src/karapace/protobuf/compare_type_lists.py | 2 +- src/karapace/protobuf/compare_type_storage.py | 12 ++++---- src/karapace/protobuf/encoding_variants.py | 5 ++-- src/karapace/protobuf/enum_element.py | 2 +- src/karapace/protobuf/extend_element.py | 2 +- src/karapace/protobuf/group_element.py | 2 +- src/karapace/protobuf/io.py | 5 ++-- src/karapace/protobuf/known_dependency.py | 10 +++---- src/karapace/protobuf/message_element.py | 2 +- src/karapace/protobuf/one_of_element.py | 2 +- src/karapace/protobuf/proto_file_element.py | 12 ++++---- src/karapace/protobuf/proto_normalizations.py | 2 +- src/karapace/protobuf/proto_parser.py | 28 +++++++++--------- src/karapace/protobuf/protobuf_to_dict.py | 3 +- src/karapace/protobuf/protopace/protopace.py | 7 ++--- src/karapace/protobuf/schema.py | 2 +- src/karapace/protobuf/serialization.py | 3 +- src/karapace/protobuf/service_element.py | 2 +- src/karapace/protobuf/type_element.py | 3 +- src/karapace/protobuf/utils.py | 8 ++--- src/karapace/rapu.py | 8 ++--- src/karapace/schema_models.py | 5 ++-- src/karapace/schema_reader.py | 3 +- src/karapace/schema_references.py | 5 ++-- src/karapace/schema_registry.py | 2 +- src/karapace/sentry/sentry_client.py | 2 +- src/karapace/sentry/sentry_client_api.py | 2 +- src/karapace/serialization.py | 3 +- src/karapace/statsd.py | 3 +- src/karapace/typing.py | 11 +++---- tests/conftest.py | 8 ++--- tests/integration/backup/test_avro_export.py | 6 ++-- tests/integration/backup/test_v3_backup.py | 3 +- tests/integration/conftest.py | 2 +- tests/integration/test_karapace.py | 2 +- tests/integration/test_rest.py | 3 +- tests/integration/test_schema.py | 5 ++-- tests/integration/test_schema_coordinator.py | 3 +- tests/integration/test_schema_protobuf.py | 6 ++-- tests/integration/test_schema_reader.py | 3 +- .../integration/test_schema_registry_auth.py | 3 +- tests/integration/utils/cluster.py | 2 +- tests/integration/utils/network.py | 2 +- tests/integration/utils/process.py | 4 +-- tests/integration/utils/zookeeper.py | 3 +- .../anonymize_schemas/test_anonymize_avro.py | 4 +-- .../unit/avro_dataclasses/test_introspect.py | 13 +++++---- tests/unit/avro_dataclasses/test_models.py | 8 ++--- tests/unit/backup/backends/v3/conftest.py | 2 +- tests/unit/test_in_memory_database.py | 3 +- tests/unit/test_schema_models.py | 8 ++--- tests/unit/test_schema_reader.py | 10 +++---- tests/utils.py | 4 +-- 83 files changed, 217 insertions(+), 194 deletions(-) diff --git a/performance-test/schema-registry-schema-post.py b/performance-test/schema-registry-schema-post.py index 491bcbcc7..b3fd795ff 100644 --- a/performance-test/schema-registry-schema-post.py +++ b/performance-test/schema-registry-schema-post.py @@ -5,7 +5,6 @@ from dataclasses import dataclass, field from locust import FastHttpUser, task from locust.contrib.fasthttp import ResponseContextManager -from typing import Dict import json import random @@ -17,7 +16,7 @@ @dataclass class TestData: count: int = 0 - schemas: Dict[uuid.UUID, SchemaId] = field(default_factory=dict) + schemas: dict[uuid.UUID, SchemaId] = field(default_factory=dict) SUBJECTS = ["test-subject-1", "test-subject-2"] diff --git a/src/karapace/anonymize_schemas/anonymize_avro.py b/src/karapace/anonymize_schemas/anonymize_avro.py index 56c0b8302..69906c94d 100644 --- a/src/karapace/anonymize_schemas/anonymize_avro.py +++ b/src/karapace/anonymize_schemas/anonymize_avro.py @@ -4,7 +4,7 @@ Copyright (c) 2023 Aiven Ltd See LICENSE for details """ -from typing import Any, Dict, List, Union +from typing import Any, Union from typing_extensions import TypeAlias import hashlib @@ -95,7 +95,7 @@ def anonymize_element(m: re.Match) -> str: return NAME_ANONYMIZABLE_PATTERN.sub(anonymize_element, name) -Schema: TypeAlias = Union[str, Dict[str, Any], List[Any]] +Schema: TypeAlias = Union[str, dict[str, Any], list[Any]] def anonymize(input_schema: Schema) -> Schema: @@ -105,10 +105,10 @@ def anonymize(input_schema: Schema) -> Schema: if input_schema in ALL_TYPES: return input_schema return anonymize_name(input_schema) - elif isinstance(input_schema, List): + elif isinstance(input_schema, list): return [anonymize(value) for value in input_schema] - elif isinstance(input_schema, Dict): - output_schema: Dict[str, Any] = {} + elif isinstance(input_schema, dict): + output_schema: dict[str, Any] = {} for key, value in input_schema.items(): if key in KEYWORDS: output_schema[key] = anonymize(value) diff --git a/src/karapace/avro_dataclasses/introspect.py b/src/karapace/avro_dataclasses/introspect.py index fe569dea5..be9634493 100644 --- a/src/karapace/avro_dataclasses/introspect.py +++ b/src/karapace/avro_dataclasses/introspect.py @@ -6,11 +6,11 @@ from __future__ import annotations from .schema import AvroType, EnumType, FieldSchema, MapType, RecordSchema -from collections.abc import Mapping +from collections.abc import Mapping, Sequence from dataclasses import Field, fields, is_dataclass, MISSING from enum import Enum from functools import lru_cache -from typing import Final, get_args, get_origin, Sequence, TYPE_CHECKING, TypeVar, Union +from typing import Final, get_args, get_origin, TYPE_CHECKING, TypeVar, Union import datetime import uuid diff --git a/src/karapace/avro_dataclasses/models.py b/src/karapace/avro_dataclasses/models.py index 78a64c8f4..9bcd630cf 100644 --- a/src/karapace/avro_dataclasses/models.py +++ b/src/karapace/avro_dataclasses/models.py @@ -5,10 +5,11 @@ from __future__ import annotations from .introspect import record_schema +from collections.abc import Iterable, Mapping from dataclasses import asdict, fields, is_dataclass from enum import Enum from functools import lru_cache, partial -from typing import Callable, cast, IO, Iterable, Mapping, TYPE_CHECKING, TypeVar, Union +from typing import Callable, cast, IO, TYPE_CHECKING, TypeVar, Union from typing_extensions import get_args, get_origin, Self import avro diff --git a/src/karapace/backup/api.py b/src/karapace/backup/api.py index 7b243586b..d06c99ebe 100644 --- a/src/karapace/backup/api.py +++ b/src/karapace/backup/api.py @@ -22,7 +22,7 @@ from .poll_timeout import PollTimeout from .topic_configurations import ConfigSource, get_topic_configurations from aiokafka.errors import KafkaError, TopicAlreadyExistsError -from collections.abc import Sized +from collections.abc import Iterator, Mapping, Sized from concurrent.futures import Future from confluent_kafka import Message, TopicPartition from enum import Enum @@ -42,7 +42,7 @@ from pathlib import Path from rich.console import Console from tenacity import retry, retry_if_exception_type, RetryCallState, stop_after_delay, wait_fixed -from typing import Callable, Iterator, Literal, Mapping, NewType, TypeVar +from typing import Callable, Literal, NewType, TypeVar import contextlib import datetime diff --git a/src/karapace/backup/backends/reader.py b/src/karapace/backup/backends/reader.py index d4caadda2..d1d32bfe8 100644 --- a/src/karapace/backup/backends/reader.py +++ b/src/karapace/backup/backends/reader.py @@ -4,10 +4,11 @@ """ from __future__ import annotations +from collections.abc import Generator, Iterator, Mapping, Sequence from karapace.dataclasses import default_dataclass from karapace.typing import JsonData, JsonObject from pathlib import Path -from typing import Callable, ClassVar, Final, Generator, IO, Iterator, Mapping, Optional, Sequence, TypeVar, Union +from typing import Callable, ClassVar, Final, IO, Optional, TypeVar, Union from typing_extensions import TypeAlias import abc diff --git a/src/karapace/backup/backends/v1.py b/src/karapace/backup/backends/v1.py index 1b9400a98..186af0c0b 100644 --- a/src/karapace/backup/backends/v1.py +++ b/src/karapace/backup/backends/v1.py @@ -4,16 +4,17 @@ """ from __future__ import annotations +from collections.abc import Generator from karapace.backup.backends.reader import BaseItemsBackupReader from karapace.utils import json_decode -from typing import Generator, IO, List +from typing import IO class SchemaBackupV1Reader(BaseItemsBackupReader): @staticmethod def items_from_file(fp: IO[str]) -> Generator[list[str], None, None]: raw_msg = fp.read() - values = json_decode(raw_msg, List[List[str]]) + values = json_decode(raw_msg, list[list[str]]) if not values: return yield from values diff --git a/src/karapace/backup/backends/v2.py b/src/karapace/backup/backends/v2.py index 7472e9b2f..a456d6e39 100644 --- a/src/karapace/backup/backends/v2.py +++ b/src/karapace/backup/backends/v2.py @@ -4,12 +4,13 @@ """ from __future__ import annotations +from collections.abc import Generator, Sequence from karapace.anonymize_schemas import anonymize_avro from karapace.backup.backends.reader import BaseItemsBackupReader from karapace.backup.backends.writer import BaseKVBackupWriter, StdOut from karapace.utils import json_decode, json_encode from pathlib import Path -from typing import Any, ClassVar, Dict, Final, Generator, IO, Sequence +from typing import Any, ClassVar, Final, IO import base64 import contextlib @@ -61,8 +62,8 @@ def serialize_record( # Check that the message has key `schema` and type is Avro schema. # The Avro schemas may have `schemaType` key, if not present the schema is Avro. - key = json_decode(key_bytes, Dict[str, str]) - value = json_decode(value_bytes, Dict[str, str]) + key = json_decode(key_bytes, dict[str, str]) + value = json_decode(value_bytes, dict[str, str]) if value and "schema" in value and value.get("schemaType", "AVRO") == "AVRO": original_schema: Any = json_decode(value["schema"]) diff --git a/src/karapace/backup/backends/v3/backend.py b/src/karapace/backup/backends/v3/backend.py index 25e08cf42..c2aca1f25 100644 --- a/src/karapace/backup/backends/v3/backend.py +++ b/src/karapace/backup/backends/v3/backend.py @@ -9,6 +9,7 @@ from .readers import read_metadata, read_records from .schema import ChecksumAlgorithm, DataFile, Header, Metadata, Record from .writers import write_metadata, write_record +from collections.abc import Generator, Iterator, Mapping, Sequence from confluent_kafka import Message from dataclasses import dataclass from karapace.backup.backends.reader import BaseBackupReader, Instruction, ProducerSend, RestoreTopic @@ -18,7 +19,7 @@ from karapace.utils import assert_never from karapace.version import __version__ from pathlib import Path -from typing import Callable, ContextManager, Final, Generator, IO, Iterator, Mapping, Sequence, TypeVar +from typing import Callable, ContextManager, Final, IO, TypeVar from typing_extensions import TypeAlias import datetime diff --git a/src/karapace/backup/backends/v3/readers.py b/src/karapace/backup/backends/v3/readers.py index afa4aba4a..74b981143 100644 --- a/src/karapace/backup/backends/v3/readers.py +++ b/src/karapace/backup/backends/v3/readers.py @@ -8,8 +8,9 @@ from .constants import V3_MARKER from .errors import InvalidChecksum, InvalidHeader, TooFewRecords, TooManyRecords, UnexpectedEndOfData from .schema import Metadata, Record +from collections.abc import Generator from karapace.avro_dataclasses.models import AvroModel -from typing import Generator, IO, TypeVar +from typing import IO, TypeVar import io import struct diff --git a/src/karapace/backup/backends/v3/schema.py b/src/karapace/backup/backends/v3/schema.py index 9105d1c97..db4cc7862 100644 --- a/src/karapace/backup/backends/v3/schema.py +++ b/src/karapace/backup/backends/v3/schema.py @@ -4,10 +4,11 @@ Copyright (c) 2023 Aiven Ltd See LICENSE for details """ +from collections.abc import Mapping from dataclasses import field from karapace.avro_dataclasses.models import AvroModel from karapace.dataclasses import default_dataclass -from typing import Mapping, Optional, Tuple +from typing import Optional import datetime import enum @@ -56,7 +57,7 @@ class Metadata(AvroModel): partition_count: int = field(metadata={"type": "int"}) replication_factor: int = field(metadata={"type": "int"}) topic_configurations: Mapping[str, str] - data_files: Tuple[DataFile, ...] + data_files: tuple[DataFile, ...] checksum_algorithm: ChecksumAlgorithm = ChecksumAlgorithm.unknown def __post_init__(self) -> None: @@ -78,7 +79,7 @@ class Header(AvroModel): class Record(AvroModel): key: Optional[bytes] value: Optional[bytes] - headers: Tuple[Header, ...] + headers: tuple[Header, ...] offset: int = field(metadata={"type": "long"}) timestamp: int = field(metadata={"type": "long"}) # In order to reduce the impact of checksums on total file sizes, especially diff --git a/src/karapace/backup/backends/v3/schema_tool.py b/src/karapace/backup/backends/v3/schema_tool.py index 340be2477..94fb45db0 100644 --- a/src/karapace/backup/backends/v3/schema_tool.py +++ b/src/karapace/backup/backends/v3/schema_tool.py @@ -6,10 +6,11 @@ """ from . import schema from avro.compatibility import ReaderWriterCompatibilityChecker, SchemaCompatibilityType +from collections.abc import Generator from karapace.avro_dataclasses.introspect import record_schema from karapace.avro_dataclasses.models import AvroModel from karapace.schema_models import parse_avro_schema_definition -from typing import Final, Generator, Tuple, Type +from typing import Final import argparse import json @@ -19,7 +20,7 @@ import sys -def types() -> Generator[Tuple[str, Type[AvroModel]], None, None]: +def types() -> Generator[tuple[str, type[AvroModel]], None, None]: for name, value in schema.__dict__.items(): try: if issubclass(value, AvroModel) and value != AvroModel: diff --git a/src/karapace/backup/backends/writer.py b/src/karapace/backup/backends/writer.py index 7d5ddc287..927077e2b 100644 --- a/src/karapace/backup/backends/writer.py +++ b/src/karapace/backup/backends/writer.py @@ -4,10 +4,11 @@ """ from __future__ import annotations +from collections.abc import Iterator, Mapping, Sequence from confluent_kafka import Message from karapace.backup.safe_writer import bytes_writer, str_writer from pathlib import Path -from typing import ContextManager, Generic, IO, Iterator, Literal, Mapping, Sequence, TypeVar +from typing import ContextManager, Generic, IO, Literal, TypeVar from typing_extensions import TypeAlias import abc diff --git a/src/karapace/backup/cli.py b/src/karapace/backup/cli.py index 8e4b108be..5e3d72854 100644 --- a/src/karapace/backup/cli.py +++ b/src/karapace/backup/cli.py @@ -10,9 +10,9 @@ from .errors import BackupDataRestorationError, StaleConsumerError from .poll_timeout import PollTimeout from aiokafka.errors import BrokerResponseError +from collections.abc import Iterator from karapace.backup.api import VerifyLevel from karapace.config import Config, read_config -from typing import Iterator import argparse import contextlib diff --git a/src/karapace/backup/safe_writer.py b/src/karapace/backup/safe_writer.py index 57970b950..d8338f5ae 100644 --- a/src/karapace/backup/safe_writer.py +++ b/src/karapace/backup/safe_writer.py @@ -4,9 +4,10 @@ """ from __future__ import annotations +from collections.abc import Generator from pathlib import Path from tempfile import mkstemp, TemporaryDirectory -from typing import Final, Generator, IO, Literal +from typing import Final, IO, Literal from typing_extensions import TypeAlias import contextlib diff --git a/src/karapace/backup/topic_configurations.py b/src/karapace/backup/topic_configurations.py index 320e2e6ee..5aaf13a52 100644 --- a/src/karapace/backup/topic_configurations.py +++ b/src/karapace/backup/topic_configurations.py @@ -4,9 +4,10 @@ """ from __future__ import annotations +from collections.abc import Container from confluent_kafka.admin import ConfigSource from karapace.kafka.admin import KafkaAdminClient -from typing import Container, Final +from typing import Final ALL_CONFIG_SOURCES: Final = ConfigSource diff --git a/src/karapace/client.py b/src/karapace/client.py index dae79b244..23a9e157a 100644 --- a/src/karapace/client.py +++ b/src/karapace/client.py @@ -5,8 +5,9 @@ See LICENSE for details """ from aiohttp import BasicAuth, ClientSession +from collections.abc import Awaitable, Mapping from karapace.typing import JsonData -from typing import Awaitable, Callable, Mapping, Optional, Union +from typing import Callable, Optional, Union from urllib.parse import urljoin import logging diff --git a/src/karapace/compatibility/jsonschema/utils.py b/src/karapace/compatibility/jsonschema/utils.py index 011b7aa74..486af0719 100644 --- a/src/karapace/compatibility/jsonschema/utils.py +++ b/src/karapace/compatibility/jsonschema/utils.py @@ -5,12 +5,12 @@ from copy import copy from jsonschema import Draft7Validator from karapace.compatibility.jsonschema.types import BooleanSchema, Instance, Keyword, Subschema -from typing import Any, List, Optional, Tuple, Type, TypeVar, Union +from typing import Any, Optional, TypeVar, Union import re T = TypeVar("T") -JSONSCHEMA_TYPES = Union[Instance, Subschema, Keyword, Type[BooleanSchema]] +JSONSCHEMA_TYPES = Union[Instance, Subschema, Keyword, type[BooleanSchema]] def normalize_schema(validator: Draft7Validator) -> Any: @@ -53,7 +53,7 @@ def normalize_schema_rec(validator: Draft7Validator, original_schema: Any) -> An return normalized -def maybe_get_subschemas_and_type(schema: Any) -> Optional[Tuple[List[Any], Subschema]]: +def maybe_get_subschemas_and_type(schema: Any) -> Optional[tuple[list[Any], Subschema]]: """If schema contains `anyOf`, `allOf`, or `oneOf`, return it. This will also normalized schemas with a list of types to a `anyOf`, e..g: diff --git a/src/karapace/config.py b/src/karapace/config.py index 2618158a2..3761b0072 100644 --- a/src/karapace/config.py +++ b/src/karapace/config.py @@ -6,11 +6,12 @@ """ from __future__ import annotations +from collections.abc import Mapping from karapace.constants import DEFAULT_AIOHTTP_CLIENT_MAX_SIZE, DEFAULT_PRODUCER_MAX_REQUEST, DEFAULT_SCHEMA_TOPIC from karapace.typing import ElectionStrategy, NameStrategy from karapace.utils import json_decode, json_encode, JSONDecodeError from pathlib import Path -from typing import IO, Mapping +from typing import IO from typing_extensions import NotRequired, TypedDict import logging diff --git a/src/karapace/coordinator/schema_coordinator.py b/src/karapace/coordinator/schema_coordinator.py index ade69be91..151a1db26 100644 --- a/src/karapace/coordinator/schema_coordinator.py +++ b/src/karapace/coordinator/schema_coordinator.py @@ -25,11 +25,12 @@ SyncGroupRequest_v3, ) from aiokafka.util import create_future, create_task +from collections.abc import Coroutine, Sequence from karapace.dataclasses import default_dataclass from karapace.typing import JsonData from karapace.utils import json_decode, json_encode from karapace.version import __version__ -from typing import Any, Coroutine, Final, Sequence +from typing import Any, Final from typing_extensions import TypedDict import aiokafka.errors as Errors diff --git a/src/karapace/in_memory_database.py b/src/karapace/in_memory_database.py index 1192260ba..6692cae33 100644 --- a/src/karapace/in_memory_database.py +++ b/src/karapace/in_memory_database.py @@ -7,12 +7,12 @@ from __future__ import annotations from abc import ABC, abstractmethod +from collections.abc import Iterable, Sequence from dataclasses import dataclass, field from karapace.schema_models import SchemaVersion, TypedSchema, Versioner from karapace.schema_references import Reference, Referents from karapace.typing import SchemaId, Subject, Version from threading import Lock, RLock -from typing import Iterable, Sequence import logging diff --git a/src/karapace/instrumentation/prometheus.py b/src/karapace/instrumentation/prometheus.py index 4e478fdc7..1336b4ab0 100644 --- a/src/karapace/instrumentation/prometheus.py +++ b/src/karapace/instrumentation/prometheus.py @@ -9,9 +9,10 @@ from __future__ import annotations from aiohttp.web import middleware, Request, Response +from collections.abc import Awaitable from karapace.rapu import RestApp from prometheus_client import CollectorRegistry, Counter, Gauge, generate_latest, Histogram -from typing import Awaitable, Callable, Final +from typing import Callable, Final import logging import time diff --git a/src/karapace/kafka/admin.py b/src/karapace/kafka/admin.py index 5b9d9e5ad..fef52ebf5 100644 --- a/src/karapace/kafka/admin.py +++ b/src/karapace/kafka/admin.py @@ -5,7 +5,7 @@ from __future__ import annotations -from collections.abc import Iterable +from collections.abc import Container, Iterable from concurrent.futures import Future from confluent_kafka import TopicPartition from confluent_kafka.admin import ( @@ -27,7 +27,6 @@ single_futmap_result, UnknownTopicOrPartitionError, ) -from typing import Container class KafkaAdminClient(_KafkaConfigMixin, AdminClient): diff --git a/src/karapace/kafka/consumer.py b/src/karapace/kafka/consumer.py index 98e92c5f7..4bf4cde54 100644 --- a/src/karapace/kafka/consumer.py +++ b/src/karapace/kafka/consumer.py @@ -6,11 +6,12 @@ from __future__ import annotations from aiokafka.errors import IllegalStateError, KafkaTimeoutError +from collections.abc import Iterable from confluent_kafka import Consumer, Message, TopicPartition from confluent_kafka.admin import PartitionMetadata from confluent_kafka.error import KafkaException from karapace.kafka.common import _KafkaConfigMixin, KafkaClientParams, raise_from_kafkaexception -from typing import Any, Callable, Iterable, TypeVar +from typing import Any, Callable, TypeVar from typing_extensions import Unpack import asyncio diff --git a/src/karapace/kafka_rest_apis/consumer_manager.py b/src/karapace/kafka_rest_apis/consumer_manager.py index a2792303b..b02902e3d 100644 --- a/src/karapace/kafka_rest_apis/consumer_manager.py +++ b/src/karapace/kafka_rest_apis/consumer_manager.py @@ -25,7 +25,6 @@ from karapace.serialization import DeserializationError, InvalidMessageHeader, InvalidPayload, SchemaRegistrySerializer from karapace.utils import convert_to_int, json_decode, JSONDecodeError from struct import error as UnpackError -from typing import Tuple from urllib.parse import urljoin import asyncio @@ -58,7 +57,7 @@ def _assert(cond: bool, code: HTTPStatus, sub_code: int, message: str, content_t if not cond: KarapaceBase.r(content_type=content_type, status=code, body={"message": message, "error_code": sub_code}) - def _assert_consumer_exists(self, internal_name: Tuple[str, str], content_type: str) -> None: + def _assert_consumer_exists(self, internal_name: tuple[str, str], content_type: str) -> None: if internal_name not in self.consumers: KarapaceBase.not_found( message=f"Consumer for {internal_name} not found among {list(self.consumers.keys())}", @@ -116,7 +115,7 @@ def _topic_and_partition_valid(cluster_metadata: dict, topic_data: dict, content ) @staticmethod - def create_internal_name(group_name: str, consumer_name: str) -> Tuple[str, str]: + def create_internal_name(group_name: str, consumer_name: str) -> tuple[str, str]: return group_name, consumer_name @staticmethod @@ -156,7 +155,7 @@ def _illegal_state_fail(message: str, content_type: str) -> None: async def create_consumer(self, group_name: str, request_data: dict, content_type: str): group_name = group_name.strip("/") consumer_name: str = request_data.get("name") or new_name() - internal_name: Tuple[str, str] = self.create_internal_name(group_name, consumer_name) + internal_name: tuple[str, str] = self.create_internal_name(group_name, consumer_name) async with self.consumer_locks[internal_name]: if internal_name in self.consumers: LOG.warning( @@ -238,7 +237,7 @@ async def create_kafka_consumer(self, fetch_min_bytes, group_name, client_id: st raise await asyncio.sleep(1) - async def delete_consumer(self, internal_name: Tuple[str, str], content_type: str): + async def delete_consumer(self, internal_name: tuple[str, str], content_type: str): LOG.info("Deleting consumer for %s", internal_name) self._assert_consumer_exists(internal_name, content_type) async with self.consumer_locks[internal_name]: @@ -253,7 +252,7 @@ async def delete_consumer(self, internal_name: Tuple[str, str], content_type: st # OFFSETS async def commit_offsets( - self, internal_name: Tuple[str, str], content_type: str, request_data: dict, cluster_metadata: dict + self, internal_name: tuple[str, str], content_type: str, request_data: dict, cluster_metadata: dict ): LOG.info("Committing offsets for %s", internal_name) self._assert_consumer_exists(internal_name, content_type) @@ -283,7 +282,7 @@ async def commit_offsets( KarapaceBase.internal_error(message=f"error sending commit request: {e}", content_type=content_type) empty_response() - async def get_offsets(self, internal_name: Tuple[str, str], content_type: str, request_data: dict): + async def get_offsets(self, internal_name: tuple[str, str], content_type: str, request_data: dict): LOG.info("Retrieving offsets for %s", internal_name) self._assert_consumer_exists(internal_name, content_type) self._assert_has_key(request_data, "partitions", content_type) @@ -315,7 +314,7 @@ async def get_offsets(self, internal_name: Tuple[str, str], content_type: str, r KarapaceBase.r(body=response, content_type=content_type) # SUBSCRIPTION - async def set_subscription(self, internal_name: Tuple[str, str], content_type: str, request_data: dict): + async def set_subscription(self, internal_name: tuple[str, str], content_type: str, request_data: dict): LOG.info("Updating subscription for %s", internal_name) self._assert_consumer_exists(internal_name, content_type) topics = request_data.get("topics", []) @@ -343,14 +342,14 @@ async def set_subscription(self, internal_name: Tuple[str, str], content_type: s finally: LOG.info("Done updating subscription") - async def get_subscription(self, internal_name: Tuple[str, str], content_type: str): + async def get_subscription(self, internal_name: tuple[str, str], content_type: str): LOG.info("Retrieving subscription for %s", internal_name) self._assert_consumer_exists(internal_name, content_type) async with self.consumer_locks[internal_name]: consumer = self.consumers[internal_name].consumer KarapaceBase.r(content_type=content_type, body={"topics": list(consumer.subscription())}) - async def delete_subscription(self, internal_name: Tuple[str, str], content_type: str): + async def delete_subscription(self, internal_name: tuple[str, str], content_type: str): LOG.info("Deleting subscription for %s", internal_name) self._assert_consumer_exists(internal_name, content_type) async with self.consumer_locks[internal_name]: @@ -358,7 +357,7 @@ async def delete_subscription(self, internal_name: Tuple[str, str], content_type empty_response() # ASSIGNMENTS - async def set_assignments(self, internal_name: Tuple[str, str], content_type: str, request_data: dict): + async def set_assignments(self, internal_name: tuple[str, str], content_type: str, request_data: dict): LOG.info("Updating assignments for %s to %r", internal_name, request_data) self._assert_consumer_exists(internal_name, content_type) self._assert_has_key(request_data, "partitions", content_type) @@ -377,7 +376,7 @@ async def set_assignments(self, internal_name: Tuple[str, str], content_type: st finally: LOG.info("Done updating assignment") - async def get_assignments(self, internal_name: Tuple[str, str], content_type: str): + async def get_assignments(self, internal_name: tuple[str, str], content_type: str): LOG.info("Retrieving assignment for %s", internal_name) self._assert_consumer_exists(internal_name, content_type) async with self.consumer_locks[internal_name]: @@ -388,7 +387,7 @@ async def get_assignments(self, internal_name: Tuple[str, str], content_type: st ) # POSITIONS - async def seek_to(self, internal_name: Tuple[str, str], content_type: str, request_data: dict): + async def seek_to(self, internal_name: tuple[str, str], content_type: str, request_data: dict): LOG.info("Resetting offsets for %s to %r", internal_name, request_data) self._assert_consumer_exists(internal_name, content_type) self._assert_has_key(request_data, "offsets", content_type) @@ -410,7 +409,7 @@ async def seek_to(self, internal_name: Tuple[str, str], content_type: str, reque empty_response() async def seek_limit( - self, internal_name: Tuple[str, str], content_type: str, request_data: dict, beginning: bool = True + self, internal_name: tuple[str, str], content_type: str, request_data: dict, beginning: bool = True ): direction = "beginning" if beginning else "end" LOG.info("Seeking %s offsets", direction) @@ -443,7 +442,7 @@ async def seek_limit( sub_code=RESTErrorCodes.UNKNOWN_TOPIC_OR_PARTITION.value, ) - async def fetch(self, internal_name: Tuple[str, str], content_type: str, formats: dict, query_params: dict): + async def fetch(self, internal_name: tuple[str, str], content_type: str, formats: dict, query_params: dict): LOG.info("Running fetch for name %s with parameters %r and formats %r", internal_name, query_params, formats) self._assert_consumer_exists(internal_name, content_type) async with self.consumer_locks[internal_name]: diff --git a/src/karapace/kafka_rest_apis/schema_cache.py b/src/karapace/kafka_rest_apis/schema_cache.py index bde742e37..6f4d8b45a 100644 --- a/src/karapace/kafka_rest_apis/schema_cache.py +++ b/src/karapace/kafka_rest_apis/schema_cache.py @@ -5,9 +5,10 @@ from abc import ABC, abstractmethod from cachetools import TTLCache +from collections.abc import MutableMapping from karapace.schema_models import TypedSchema from karapace.typing import SchemaId, Subject -from typing import Dict, Final, MutableMapping, Optional +from typing import Final, Optional import hashlib @@ -36,7 +37,7 @@ def get_schema_str(self, schema_id: SchemaId) -> Optional[str]: class TopicSchemaCache: def __init__(self) -> None: - self._topic_cache: Dict[Subject, SchemaCache] = {} + self._topic_cache: dict[Subject, SchemaCache] = {} self._empty_schema_cache: Final = EmptySchemaCache() def get_schema_id(self, topic: Subject, schema: TypedSchema) -> Optional[SchemaId]: @@ -60,7 +61,7 @@ def get_schema_str(self, topic: Subject, schema_id: SchemaId) -> Optional[str]: class SchemaCache(SchemaCacheProtocol): def __init__(self) -> None: - self._schema_hash_str_to_id: Dict[str, SchemaId] = {} + self._schema_hash_str_to_id: dict[str, SchemaId] = {} self._id_to_schema_str: MutableMapping[SchemaId, TypedSchema] = TTLCache(maxsize=100, ttl=600) def get_schema_id(self, schema: TypedSchema) -> Optional[SchemaId]: diff --git a/src/karapace/kafka_utils.py b/src/karapace/kafka_utils.py index 129ad96d4..ede5e7023 100644 --- a/src/karapace/kafka_utils.py +++ b/src/karapace/kafka_utils.py @@ -3,10 +3,10 @@ See LICENSE for details """ from .config import Config +from collections.abc import Iterator from karapace.kafka.admin import KafkaAdminClient from karapace.kafka.consumer import KafkaConsumer from karapace.kafka.producer import KafkaProducer -from typing import Iterator import contextlib diff --git a/src/karapace/karapace.py b/src/karapace/karapace.py index 28e26cf91..75cd96da4 100644 --- a/src/karapace/karapace.py +++ b/src/karapace/karapace.py @@ -8,6 +8,7 @@ from __future__ import annotations from aiohttp.web_request import Request +from collections.abc import Awaitable from functools import partial from http import HTTPStatus from karapace.config import Config @@ -15,7 +16,7 @@ from karapace.typing import JsonObject from karapace.utils import json_encode from karapace.version import __version__ -from typing import Awaitable, Callable, NoReturn +from typing import Callable, NoReturn from typing_extensions import TypeAlias import aiohttp.web diff --git a/src/karapace/messaging.py b/src/karapace/messaging.py index bfdd33665..501047769 100644 --- a/src/karapace/messaging.py +++ b/src/karapace/messaging.py @@ -12,7 +12,7 @@ from karapace.offset_watcher import OffsetWatcher from karapace.utils import json_encode from karapace.version import __version__ -from typing import Any, Dict, Final, Optional, Union +from typing import Any, Final, Optional, Union import logging import time @@ -103,7 +103,7 @@ def _send_kafka_message(self, key: Union[bytes, str], value: Union[bytes, str]) ) ) - def send_message(self, *, key: Dict[str, Any], value: Optional[Dict[str, Any]]) -> None: + def send_message(self, *, key: dict[str, Any], value: Optional[dict[str, Any]]) -> None: key_bytes = self._key_formatter.format_key(key) value_bytes: Union[bytes, str] = b"" if value is not None: diff --git a/src/karapace/protobuf/compare_result.py b/src/karapace/protobuf/compare_result.py index fdbdb6bf5..1caffdc24 100644 --- a/src/karapace/protobuf/compare_result.py +++ b/src/karapace/protobuf/compare_result.py @@ -4,7 +4,6 @@ """ from dataclasses import dataclass, field from enum import auto, Enum -from typing import List class Modification(Enum): @@ -68,9 +67,9 @@ def to_str(self) -> str: class CompareResult: def __init__(self) -> None: - self.result: List[ModificationRecord] = [] - self.path: List[str] = [] - self.canonical_name: List[str] = [] + self.result: list[ModificationRecord] = [] + self.path: list[str] = [] + self.canonical_name: list[str] = [] def push_path(self, name_element: str, canonical: bool = False) -> None: if canonical: diff --git a/src/karapace/protobuf/compare_type_lists.py b/src/karapace/protobuf/compare_type_lists.py index d4d181a95..788d272ad 100644 --- a/src/karapace/protobuf/compare_type_lists.py +++ b/src/karapace/protobuf/compare_type_lists.py @@ -4,6 +4,7 @@ Copyright (c) 2023 Aiven Ltd See LICENSE for details """ +from collections.abc import Sequence from itertools import chain from karapace.protobuf.compare_result import CompareResult, Modification from karapace.protobuf.compare_type_storage import CompareTypes @@ -11,7 +12,6 @@ from karapace.protobuf.exception import IllegalStateException from karapace.protobuf.message_element import MessageElement from karapace.protobuf.type_element import TypeElement -from typing import Sequence def compare_type_lists( diff --git a/src/karapace/protobuf/compare_type_storage.py b/src/karapace/protobuf/compare_type_storage.py index cc1378d02..4ab651f9a 100644 --- a/src/karapace/protobuf/compare_type_storage.py +++ b/src/karapace/protobuf/compare_type_storage.py @@ -7,14 +7,14 @@ from karapace.protobuf.exception import IllegalArgumentException from karapace.protobuf.proto_type import ProtoType from karapace.protobuf.type_element import TypeElement -from typing import Dict, List, Optional, TYPE_CHECKING, Union +from typing import Optional, TYPE_CHECKING, Union if TYPE_CHECKING: from karapace.protobuf.field_element import FieldElement from karapace.protobuf.message_element import MessageElement -def compute_name(t: ProtoType, result_path: List[str], package_name: str, types: dict) -> Optional[str]: +def compute_name(t: ProtoType, result_path: list[str], package_name: str, types: dict) -> Optional[str]: string = t.string if string.startswith("."): @@ -41,10 +41,10 @@ def __init__(self, self_package_name: str, other_package_name: str, result: Comp self.self_package_name = self_package_name or "" self.other_package_name = other_package_name or "" - self.self_types: Dict[str, Union[TypeRecord, TypeRecordMap]] = {} - self.other_types: Dict[str, Union[TypeRecord, TypeRecordMap]] = {} - self.locked_messages: List["MessageElement"] = [] - self.environment: List["MessageElement"] = [] + self.self_types: dict[str, Union[TypeRecord, TypeRecordMap]] = {} + self.other_types: dict[str, Union[TypeRecord, TypeRecordMap]] = {} + self.locked_messages: list["MessageElement"] = [] + self.environment: list["MessageElement"] = [] self.result = result def add_a_type(self, prefix: str, package_name: str, type_element: TypeElement, types: dict) -> None: diff --git a/src/karapace/protobuf/encoding_variants.py b/src/karapace/protobuf/encoding_variants.py index 37e1d3cb9..ba1e24232 100644 --- a/src/karapace/protobuf/encoding_variants.py +++ b/src/karapace/protobuf/encoding_variants.py @@ -7,7 +7,6 @@ from io import BytesIO from karapace.protobuf.exception import IllegalArgumentException -from typing import List ZERO_BYTE = b"\x00" @@ -33,7 +32,7 @@ def read_varint(bio: BytesIO) -> int: return varint -def read_indexes(bio: BytesIO) -> List[int]: +def read_indexes(bio: BytesIO) -> list[int]: try: size: int = read_varint(bio) except EOFError: @@ -67,6 +66,6 @@ def write_varint(bio: BytesIO, value: int) -> int: return written_bytes -def write_indexes(bio: BytesIO, indexes: List[int]) -> None: +def write_indexes(bio: BytesIO, indexes: list[int]) -> None: for i in indexes: write_varint(bio, i) diff --git a/src/karapace/protobuf/enum_element.py b/src/karapace/protobuf/enum_element.py index dcee9522c..38c8a87c4 100644 --- a/src/karapace/protobuf/enum_element.py +++ b/src/karapace/protobuf/enum_element.py @@ -6,6 +6,7 @@ # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/EnumElement.kt from __future__ import annotations +from collections.abc import Sequence from itertools import chain from karapace.protobuf.compare_result import CompareResult, Modification from karapace.protobuf.compare_type_storage import CompareTypes @@ -14,7 +15,6 @@ from karapace.protobuf.option_element import OptionElement from karapace.protobuf.type_element import TypeElement from karapace.protobuf.utils import append_documentation, append_indented -from typing import Sequence class EnumElement(TypeElement): diff --git a/src/karapace/protobuf/extend_element.py b/src/karapace/protobuf/extend_element.py index da8229650..748d85d7e 100644 --- a/src/karapace/protobuf/extend_element.py +++ b/src/karapace/protobuf/extend_element.py @@ -6,11 +6,11 @@ # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/ExtendElement.kt from __future__ import annotations +from collections.abc import Sequence from dataclasses import dataclass from karapace.protobuf.field_element import FieldElement from karapace.protobuf.location import Location from karapace.protobuf.utils import append_documentation, append_indented -from typing import Sequence @dataclass diff --git a/src/karapace/protobuf/group_element.py b/src/karapace/protobuf/group_element.py index 1eeecf31c..0db09e41f 100644 --- a/src/karapace/protobuf/group_element.py +++ b/src/karapace/protobuf/group_element.py @@ -6,12 +6,12 @@ # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/GroupElement.kt from __future__ import annotations +from collections.abc import Sequence from dataclasses import dataclass from karapace.protobuf.field import Field from karapace.protobuf.field_element import FieldElement from karapace.protobuf.location import Location from karapace.protobuf.utils import append_documentation, append_indented -from typing import Sequence @dataclass diff --git a/src/karapace/protobuf/io.py b/src/karapace/protobuf/io.py index 2c87073d3..36c76e491 100644 --- a/src/karapace/protobuf/io.py +++ b/src/karapace/protobuf/io.py @@ -4,6 +4,7 @@ """ from __future__ import annotations +from collections.abc import Generator, Iterable from io import BytesIO from karapace.config import Config from karapace.protobuf.encoding_variants import read_indexes, write_indexes @@ -14,7 +15,7 @@ from karapace.protobuf.type_element import TypeElement from multiprocessing import Process, Queue from pathlib import Path -from typing import Dict, Final, Generator, Iterable, Protocol +from typing import Final, Protocol from typing_extensions import Self, TypeAlias import hashlib @@ -209,7 +210,7 @@ def read_in_forked_multiprocess_process( finally: p.join() reader_queue.close() - if isinstance(result, Dict): + if isinstance(result, dict): return result if isinstance(result, BaseException): raise result diff --git a/src/karapace/protobuf/known_dependency.py b/src/karapace/protobuf/known_dependency.py index 5b322929b..bb250707b 100644 --- a/src/karapace/protobuf/known_dependency.py +++ b/src/karapace/protobuf/known_dependency.py @@ -7,7 +7,7 @@ # Support of known dependencies -from typing import Any, Dict, Set +from typing import Any def static_init(cls: Any) -> object: @@ -18,9 +18,9 @@ def static_init(cls: Any) -> object: @static_init # pylint: disable=used-before-assignment class KnownDependency: - index: Dict = dict() - index_simple: Dict = dict() - map: Dict = { + index: dict = dict() + index_simple: dict = dict() + map: dict = { "google/protobuf/any.proto": ["google.protobuf.Any"], "google/protobuf/api.proto": ["google.protobuf.Api", "google.protobuf.Method", "google.protobuf.Mixin"], "google/protobuf/descriptor.proto": [ @@ -108,7 +108,7 @@ def static_init(cls) -> None: class DependenciesHardcoded: - index: Set[str] = { + index: set[str] = { "bool", "bytes", "double", diff --git a/src/karapace/protobuf/message_element.py b/src/karapace/protobuf/message_element.py index c7f2ddb10..d3333b47b 100644 --- a/src/karapace/protobuf/message_element.py +++ b/src/karapace/protobuf/message_element.py @@ -7,6 +7,7 @@ # compatibility routine added from __future__ import annotations +from collections.abc import Sequence from itertools import chain from karapace.protobuf.compare_result import CompareResult, Modification from karapace.protobuf.compare_type_storage import CompareTypes @@ -19,7 +20,6 @@ from karapace.protobuf.reserved_element import ReservedElement from karapace.protobuf.type_element import TypeElement from karapace.protobuf.utils import append_documentation, append_indented -from typing import Sequence class MessageElement(TypeElement): diff --git a/src/karapace/protobuf/one_of_element.py b/src/karapace/protobuf/one_of_element.py index 278886e23..8889d0cc7 100644 --- a/src/karapace/protobuf/one_of_element.py +++ b/src/karapace/protobuf/one_of_element.py @@ -7,6 +7,7 @@ from __future__ import annotations +from collections.abc import Sequence from itertools import chain from karapace.protobuf.compare_result import CompareResult, Modification from karapace.protobuf.compare_type_storage import CompareTypes @@ -14,7 +15,6 @@ from karapace.protobuf.group_element import GroupElement from karapace.protobuf.option_element import OptionElement from karapace.protobuf.utils import append_documentation, append_indented -from typing import Sequence class OneOfElement: diff --git a/src/karapace/protobuf/proto_file_element.py b/src/karapace/protobuf/proto_file_element.py index c9f4be031..ed9f638cd 100644 --- a/src/karapace/protobuf/proto_file_element.py +++ b/src/karapace/protobuf/proto_file_element.py @@ -2,6 +2,8 @@ Copyright (c) 2023 Aiven Ltd See LICENSE for details """ +from collections.abc import Sequence + # Ported from square/wire: # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/ProtoFileElement.kt from karapace.dependency import Dependency @@ -13,12 +15,12 @@ from karapace.protobuf.service_element import ServiceElement from karapace.protobuf.syntax import Syntax from karapace.protobuf.type_element import TypeElement -from typing import Dict, List, NewType, Optional, Sequence +from typing import NewType, Optional -def _collect_dependencies_types(compare_types: CompareTypes, dependencies: Optional[Dict[str, Dependency]], is_self: bool): +def _collect_dependencies_types(compare_types: CompareTypes, dependencies: Optional[dict[str, Dependency]], is_self: bool): for dep in dependencies.values(): - types: List[TypeElement] = dep.schema.schema.proto_file_element.types + types: list[TypeElement] = dep.schema.schema.proto_file_element.types sub_deps = dep.schema.schema.dependencies package_name = dep.schema.schema.proto_file_element.package_name type_: TypeElement @@ -131,8 +133,8 @@ def compare( self, other: "ProtoFileElement", result: CompareResult, - self_dependencies: Optional[Dict[str, Dependency]] = None, - other_dependencies: Optional[Dict[str, Dependency]] = None, + self_dependencies: Optional[dict[str, Dependency]] = None, + other_dependencies: Optional[dict[str, Dependency]] = None, ) -> CompareResult: from karapace.protobuf.compare_type_lists import compare_type_lists diff --git a/src/karapace/protobuf/proto_normalizations.py b/src/karapace/protobuf/proto_normalizations.py index e3539aa46..9b82cb3f6 100644 --- a/src/karapace/protobuf/proto_normalizations.py +++ b/src/karapace/protobuf/proto_normalizations.py @@ -5,6 +5,7 @@ from __future__ import annotations +from collections.abc import Sequence from karapace.protobuf.enum_constant_element import EnumConstantElement from karapace.protobuf.enum_element import EnumElement from karapace.protobuf.extend_element import ExtendElement @@ -20,7 +21,6 @@ from karapace.protobuf.service_element import ServiceElement from karapace.protobuf.type_element import TypeElement from karapace.protobuf.type_tree import TypeTree -from typing import Sequence import abc diff --git a/src/karapace/protobuf/proto_parser.py b/src/karapace/protobuf/proto_parser.py index f00602ac7..f5a002aa5 100644 --- a/src/karapace/protobuf/proto_parser.py +++ b/src/karapace/protobuf/proto_parser.py @@ -28,7 +28,7 @@ from karapace.protobuf.syntax_reader import SyntaxReader from karapace.protobuf.type_element import TypeElement from karapace.protobuf.utils import MAX_TAG_VALUE -from typing import List, Optional, Union +from typing import Optional, Union class Context(Enum): @@ -73,17 +73,17 @@ def permits_extend(self) -> bool: class ProtoParser: def __init__(self, location: Location, data: str) -> None: self.location = location - self.imports: List[str] = [] - self.nested_types: List[TypeElement] = [] - self.services: List[str] = [] - self.extends_list: List[str] = [] - self.options: List[str] = [] + self.imports: list[str] = [] + self.nested_types: list[TypeElement] = [] + self.services: list[str] = [] + self.extends_list: list[str] = [] + self.options: list[str] = [] self.declaration_count = 0 self.syntax: Optional[Syntax] = None self.package_name: Optional[str] = None self.prefix = "" self.data = data - self.public_imports: List[str] = [] + self.public_imports: list[str] = [] self.reader = SyntaxReader(data, location) def read_proto_file(self) -> ProtoFileElement: @@ -226,13 +226,13 @@ def read_declaration( def read_message(self, location: Location, documentation: str) -> MessageElement: """Reads a message declaration.""" name: str = self.reader.read_name() - fields: List[FieldElement] = [] - one_ofs: List[OneOfElement] = [] - nested_types: List[TypeElement] = [] - extensions: List[ExtensionsElement] = [] - options: List[OptionElement] = [] - reserveds: List[ReservedElement] = [] - groups: List[GroupElement] = [] + fields: list[FieldElement] = [] + one_ofs: list[OneOfElement] = [] + nested_types: list[TypeElement] = [] + extensions: list[ExtensionsElement] = [] + options: list[OptionElement] = [] + reserveds: list[ReservedElement] = [] + groups: list[GroupElement] = [] previous_prefix = self.prefix self.prefix = f"{self.prefix}{name}." diff --git a/src/karapace/protobuf/protobuf_to_dict.py b/src/karapace/protobuf/protobuf_to_dict.py index a9713e523..22fc300d6 100644 --- a/src/karapace/protobuf/protobuf_to_dict.py +++ b/src/karapace/protobuf/protobuf_to_dict.py @@ -12,7 +12,6 @@ from google.protobuf.message import Message from google.protobuf.timestamp_pb2 import Timestamp from types import MappingProxyType -from typing import Dict import datetime @@ -82,7 +81,7 @@ def protobuf_to_dict( use_enum_labels=True, including_default_value_fields=True, lowercase_enum_lables=False, -) -> Dict[object, object]: +) -> dict[object, object]: type_callable_map = TYPE_CALLABLE_MAP result_dict = {} extensions = {} diff --git a/src/karapace/protobuf/protopace/protopace.py b/src/karapace/protobuf/protopace/protopace.py index a65f90582..0f928016e 100644 --- a/src/karapace/protobuf/protopace/protopace.py +++ b/src/karapace/protobuf/protopace/protopace.py @@ -6,7 +6,6 @@ from dataclasses import dataclass, field from functools import cached_property from karapace.errors import InvalidSchema -from typing import Dict, List import ctypes import importlib.util @@ -41,11 +40,11 @@ class FormatResult(ctypes.Structure): class Proto: name: str schema: str - dependencies: List["Proto"] = field(default_factory=list) + dependencies: list["Proto"] = field(default_factory=list) @cached_property - def all_dependencies(self) -> List["Proto"]: - dependencies: Dict[str, "Proto"] = {} + def all_dependencies(self) -> list["Proto"]: + dependencies: dict[str, "Proto"] = {} for dep in self.dependencies: if dep.dependencies: dependencies.update([(d.name, d) for d in dep.all_dependencies]) diff --git a/src/karapace/protobuf/schema.py b/src/karapace/protobuf/schema.py index fdd72a891..1d059447a 100644 --- a/src/karapace/protobuf/schema.py +++ b/src/karapace/protobuf/schema.py @@ -5,6 +5,7 @@ from __future__ import annotations +from collections.abc import Mapping, Sequence from karapace.dataclasses import default_dataclass # Ported from square/wire: @@ -26,7 +27,6 @@ from karapace.protobuf.type_tree import SourceFileReference, TypeTree from karapace.protobuf.utils import append_documentation, append_indented from karapace.schema_references import Reference -from typing import Mapping, Sequence import binascii diff --git a/src/karapace/protobuf/serialization.py b/src/karapace/protobuf/serialization.py index abc01247d..6c3ca61fd 100644 --- a/src/karapace/protobuf/serialization.py +++ b/src/karapace/protobuf/serialization.py @@ -4,6 +4,7 @@ """ from __future__ import annotations +from collections.abc import Sequence from karapace.errors import InvalidSchema from karapace.protobuf.enum_constant_element import EnumConstantElement from karapace.protobuf.enum_element import EnumElement @@ -19,7 +20,7 @@ from karapace.protobuf.syntax import Syntax from karapace.protobuf.type_element import TypeElement from types import MappingProxyType -from typing import Any, Sequence +from typing import Any import base64 import google.protobuf.descriptor diff --git a/src/karapace/protobuf/service_element.py b/src/karapace/protobuf/service_element.py index ed714c58c..08f365b8b 100644 --- a/src/karapace/protobuf/service_element.py +++ b/src/karapace/protobuf/service_element.py @@ -6,12 +6,12 @@ # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/ServiceElement.kt from __future__ import annotations +from collections.abc import Sequence from dataclasses import dataclass from karapace.protobuf.location import Location from karapace.protobuf.option_element import OptionElement from karapace.protobuf.rpc_element import RpcElement from karapace.protobuf.utils import append_documentation, append_indented -from typing import Sequence @dataclass diff --git a/src/karapace/protobuf/type_element.py b/src/karapace/protobuf/type_element.py index ec840a801..89e999034 100644 --- a/src/karapace/protobuf/type_element.py +++ b/src/karapace/protobuf/type_element.py @@ -6,9 +6,10 @@ # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/TypeElement.kt from __future__ import annotations +from collections.abc import Sequence from dataclasses import dataclass from karapace.protobuf.location import Location -from typing import Sequence, TYPE_CHECKING +from typing import TYPE_CHECKING if TYPE_CHECKING: from karapace.protobuf.compare_result import CompareResult diff --git a/src/karapace/protobuf/utils.py b/src/karapace/protobuf/utils.py index b01e428fe..081a23c87 100644 --- a/src/karapace/protobuf/utils.py +++ b/src/karapace/protobuf/utils.py @@ -4,13 +4,13 @@ """ # Ported from square/wire: # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/Util.kt -from typing import List, TYPE_CHECKING +from typing import TYPE_CHECKING if TYPE_CHECKING: from karapace.protobuf.option_element import OptionElement -def append_documentation(data: List[str], documentation: str) -> None: +def append_documentation(data: list[str], documentation: str) -> None: if not documentation: return @@ -25,7 +25,7 @@ def append_documentation(data: List[str], documentation: str) -> None: data.append("\n") -def append_options(data: List[str], options: List["OptionElement"]) -> None: +def append_options(data: list[str], options: list["OptionElement"]) -> None: count = len(options) if count == 1: data.append("[") @@ -52,7 +52,7 @@ def try_to_schema(obj: "OptionElement") -> str: raise -def append_indented(data: List[str], value: str) -> None: +def append_indented(data: list[str], value: str) -> None: lines = value.split("\n") if len(lines) > 1 and not lines[-1]: del lines[-1] diff --git a/src/karapace/rapu.py b/src/karapace/rapu.py index c4e4c5ee3..3731abc8a 100644 --- a/src/karapace/rapu.py +++ b/src/karapace/rapu.py @@ -12,7 +12,7 @@ from karapace.statsd import StatsClient from karapace.utils import json_decode, json_encode from karapace.version import __version__ -from typing import Callable, Dict, NoReturn, Optional, overload, Union +from typing import Callable, NoReturn, Optional, overload, Union import aiohttp import aiohttp.web @@ -63,7 +63,7 @@ def __init__( *, url: str, query, - headers: Dict[str, str], + headers: dict[str, str], path_for_stats: str, method: str, content_type: Optional[str] = None, @@ -71,7 +71,7 @@ def __init__( ): self.url = url self.headers = headers - self._header_cache: Dict[str, Optional[str]] = {} + self._header_cache: dict[str, Optional[str]] = {} self.query = query self.content_type = content_type self.accepts = accepts @@ -120,7 +120,7 @@ def __init__( *, status: HTTPStatus = HTTPStatus.OK, content_type: Optional[str] = None, - headers: Optional[Dict[str, str]] = None, + headers: Optional[dict[str, str]] = None, ) -> None: self.body = body self.status = status diff --git a/src/karapace/schema_models.py b/src/karapace/schema_models.py index eab1a5c9f..10ffa1d82 100644 --- a/src/karapace/schema_models.py +++ b/src/karapace/schema_models.py @@ -6,6 +6,7 @@ from avro.errors import SchemaParseException from avro.schema import parse as avro_parse, Schema as AvroSchema +from collections.abc import Collection, Mapping, Sequence from dataclasses import dataclass from jsonschema import Draft7Validator from jsonschema.exceptions import SchemaError @@ -26,7 +27,7 @@ from karapace.schema_type import SchemaType from karapace.typing import JsonObject, SchemaId, Subject, Version, VersionTag from karapace.utils import assert_never, json_decode, json_encode, JSONDecodeError -from typing import Any, cast, Collection, Dict, Final, final, Mapping, Sequence +from typing import Any, cast, Final, final import hashlib import logging @@ -126,7 +127,7 @@ def __init__( def to_dict(self) -> JsonObject: if self.schema_type is SchemaType.PROTOBUF: raise InvalidSchema("Protobuf do not support to_dict serialization") - return json_decode(self.schema_str, Dict[str, Any]) + return json_decode(self.schema_str, dict[str, Any]) def fingerprint(self) -> str: if self._fingerprint_cached is None: diff --git a/src/karapace/schema_reader.py b/src/karapace/schema_reader.py index cd04944dc..d7131968f 100644 --- a/src/karapace/schema_reader.py +++ b/src/karapace/schema_reader.py @@ -21,6 +21,7 @@ UnknownTopicOrPartitionError, ) from avro.schema import Schema as AvroSchema +from collections.abc import Mapping, Sequence from confluent_kafka import Message, TopicPartition from contextlib import closing, ExitStack from enum import Enum @@ -45,7 +46,7 @@ from karapace.typing import JsonObject, SchemaId, Subject, Version from karapace.utils import json_decode, JSONDecodeError, shutdown from threading import Event, Thread -from typing import Final, Mapping, Sequence +from typing import Final import json import logging diff --git a/src/karapace/schema_references.py b/src/karapace/schema_references.py index 0eae47141..900568349 100644 --- a/src/karapace/schema_references.py +++ b/src/karapace/schema_references.py @@ -7,11 +7,12 @@ from __future__ import annotations +from collections.abc import Mapping from karapace.dataclasses import default_dataclass from karapace.typing import JsonData, JsonObject, SchemaId, Subject, Version -from typing import cast, List, Mapping, NewType, TypeVar +from typing import cast, NewType, TypeVar -Referents = NewType("Referents", List[SchemaId]) +Referents = NewType("Referents", list[SchemaId]) T = TypeVar("T") diff --git a/src/karapace/schema_registry.py b/src/karapace/schema_registry.py index 6594663ad..67c0fc899 100644 --- a/src/karapace/schema_registry.py +++ b/src/karapace/schema_registry.py @@ -4,6 +4,7 @@ """ from __future__ import annotations +from collections.abc import Sequence from contextlib import AsyncExitStack, closing from karapace.compatibility import check_compatibility, CompatibilityModes from karapace.compatibility.jsonschema.checks import is_incompatible @@ -29,7 +30,6 @@ from karapace.schema_reader import KafkaSchemaReader from karapace.schema_references import LatestVersionReference, Reference from karapace.typing import JsonObject, Mode, SchemaId, Subject, Version -from typing import Sequence import asyncio import logging diff --git a/src/karapace/sentry/sentry_client.py b/src/karapace/sentry/sentry_client.py index 59143ef04..776214c7f 100644 --- a/src/karapace/sentry/sentry_client.py +++ b/src/karapace/sentry/sentry_client.py @@ -4,8 +4,8 @@ """ from __future__ import annotations +from collections.abc import Mapping from karapace.sentry.sentry_client_api import KarapaceSentryConfig, SentryClientAPI -from typing import Mapping # The Sentry SDK is optional, omit pylint import error import sentry_sdk diff --git a/src/karapace/sentry/sentry_client_api.py b/src/karapace/sentry/sentry_client_api.py index 22f4482d4..4ca9575c8 100644 --- a/src/karapace/sentry/sentry_client_api.py +++ b/src/karapace/sentry/sentry_client_api.py @@ -4,7 +4,7 @@ """ from __future__ import annotations -from typing import Mapping +from collections.abc import Mapping from typing_extensions import TypeAlias KarapaceSentryConfig: TypeAlias = "Mapping[str, object] | None" diff --git a/src/karapace/serialization.py b/src/karapace/serialization.py index 81c51cabc..36509855e 100644 --- a/src/karapace/serialization.py +++ b/src/karapace/serialization.py @@ -7,6 +7,7 @@ from aiohttp import BasicAuth from avro.io import BinaryDecoder, BinaryEncoder, DatumReader, DatumWriter from cachetools import TTLCache +from collections.abc import MutableMapping from functools import lru_cache from google.protobuf.message import DecodeError from jsonschema import ValidationError @@ -20,7 +21,7 @@ from karapace.schema_references import LatestVersionReference, Reference, reference_from_mapping from karapace.typing import NameStrategy, SchemaId, Subject, SubjectType, Version from karapace.utils import json_decode, json_encode -from typing import Any, Callable, MutableMapping +from typing import Any, Callable from urllib.parse import quote import asyncio diff --git a/src/karapace/statsd.py b/src/karapace/statsd.py index 3c32e09d8..39d6a3153 100644 --- a/src/karapace/statsd.py +++ b/src/karapace/statsd.py @@ -10,10 +10,11 @@ """ from __future__ import annotations +from collections.abc import Iterator from contextlib import contextmanager from karapace.config import Config from karapace.sentry import get_sentry_client -from typing import Any, Final, Iterator +from typing import Any, Final import datetime import logging diff --git a/src/karapace/typing.py b/src/karapace/typing.py index 77058cce2..1268db001 100644 --- a/src/karapace/typing.py +++ b/src/karapace/typing.py @@ -4,15 +4,16 @@ """ from __future__ import annotations +from collections.abc import Mapping, Sequence from enum import Enum, unique from karapace.errors import InvalidVersion -from typing import Any, ClassVar, Dict, List, Mapping, NewType, Sequence, Union +from typing import Any, ClassVar, NewType, Union from typing_extensions import TypeAlias import functools -JsonArray: TypeAlias = List["JsonData"] -JsonObject: TypeAlias = Dict[str, "JsonData"] +JsonArray: TypeAlias = list["JsonData"] +JsonObject: TypeAlias = dict[str, "JsonData"] JsonScalar: TypeAlias = Union[str, int, float, None] JsonData: TypeAlias = Union[JsonScalar, JsonObject, JsonArray] @@ -23,8 +24,8 @@ Subject = NewType("Subject", str) VersionTag = Union[str, int] -SchemaMetadata = NewType("SchemaMetadata", Dict[str, Any]) -SchemaRuleSet = NewType("SchemaRuleSet", Dict[str, Any]) +SchemaMetadata = NewType("SchemaMetadata", dict[str, Any]) +SchemaRuleSet = NewType("SchemaRuleSet", dict[str, Any]) # note: the SchemaID is a unique id among all the schemas (and each version should be assigned to a different id) # basically the same SchemaID refer always to the same TypedSchema. diff --git a/tests/conftest.py b/tests/conftest.py index 99ba55809..d9e034cf1 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -5,7 +5,7 @@ from avro.compatibility import SchemaCompatibilityResult from pathlib import Path from tempfile import mkstemp -from typing import List, Optional +from typing import Optional import json import os @@ -20,14 +20,14 @@ VERSION_REGEX = "([0-9]+[.])*[0-9]+" -def pytest_assertrepr_compare(op, left, right) -> Optional[List[str]]: +def pytest_assertrepr_compare(op, left, right) -> Optional[list[str]]: if isinstance(left, SchemaCompatibilityResult) and isinstance(right, SchemaCompatibilityResult) and op in ("==", "!="): lines = ["Comparing SchemaCompatibilityResult instances:"] def pad(depth: int, *msg: str) -> str: return " " * depth + " ".join(msg) - def list_details(header: str, depth: int, items: List[str]) -> None: + def list_details(header: str, depth: int, items: list[str]) -> None: qty = len(items) if qty == 1: @@ -56,7 +56,7 @@ def compatibility_details(header: str, depth: int, obj: SchemaCompatibilityResul return None -def split_by_comma(arg: str) -> List[str]: +def split_by_comma(arg: str) -> list[str]: return arg.split(",") diff --git a/tests/integration/backup/test_avro_export.py b/tests/integration/backup/test_avro_export.py index 951cc2e57..041023580 100644 --- a/tests/integration/backup/test_avro_export.py +++ b/tests/integration/backup/test_avro_export.py @@ -12,7 +12,7 @@ from pathlib import Path from tests.integration.utils.cluster import RegistryDescription from tests.integration.utils.kafka_server import KafkaServers -from typing import Any, Dict +from typing import Any import base64 import json @@ -73,7 +73,7 @@ EXPECTED_COMPATIBILITY_CHANGE = {"compatibilityLevel": "NONE"} -async def insert_data(c: Client, schemaType: str, subject: str, data: Dict[str, Any]) -> None: +async def insert_data(c: Client, schemaType: str, subject: str, data: dict[str, Any]) -> None: schema_string = json.dumps(data) res = await c.post( f"subjects/{subject}/versions", @@ -83,7 +83,7 @@ async def insert_data(c: Client, schemaType: str, subject: str, data: Dict[str, assert "id" in res.json() -async def insert_compatibility_level_change(c: Client, subject: str, data: Dict[str, Any]) -> None: +async def insert_compatibility_level_change(c: Client, subject: str, data: dict[str, Any]) -> None: res = await c.put( f"config/{subject}", json=data, diff --git a/tests/integration/backup/test_v3_backup.py b/tests/integration/backup/test_v3_backup.py index 8e01365ed..744437be6 100644 --- a/tests/integration/backup/test_v3_backup.py +++ b/tests/integration/backup/test_v3_backup.py @@ -5,6 +5,7 @@ from __future__ import annotations from aiokafka.errors import UnknownTopicOrPartitionError +from collections.abc import Iterator from confluent_kafka import Message, TopicPartition from confluent_kafka.admin import NewTopic from dataclasses import fields @@ -27,7 +28,7 @@ from tempfile import mkdtemp from tests.integration.utils.cluster import RegistryDescription from tests.integration.utils.kafka_server import KafkaServers -from typing import Iterator, NoReturn +from typing import NoReturn from unittest.mock import patch import datetime diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index ecc52470a..79cf2ae8c 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -9,6 +9,7 @@ from _pytest.fixtures import SubRequest from aiohttp.pytest_plugin import AiohttpClient from aiohttp.test_utils import TestClient +from collections.abc import AsyncGenerator, AsyncIterator, Iterator from confluent_kafka.admin import NewTopic from contextlib import ExitStack from dataclasses import asdict @@ -35,7 +36,6 @@ from tests.integration.utils.synchronization import lock_path_for from tests.integration.utils.zookeeper import configure_and_start_zk from tests.utils import repeat_until_successful_request -from typing import AsyncGenerator, AsyncIterator, Iterator from urllib.parse import urlparse import asyncio diff --git a/tests/integration/test_karapace.py b/tests/integration/test_karapace.py index c6352ecfd..281cd7338 100644 --- a/tests/integration/test_karapace.py +++ b/tests/integration/test_karapace.py @@ -2,13 +2,13 @@ Copyright (c) 2023 Aiven Ltd See LICENSE for details """ +from collections.abc import Iterator from contextlib import closing, contextmanager, ExitStack from karapace.config import set_config_defaults from pathlib import Path from tests.integration.utils.kafka_server import KafkaServers from tests.integration.utils.process import stop_process from tests.utils import popen_karapace_all -from typing import Iterator import json import socket diff --git a/tests/integration/test_rest.py b/tests/integration/test_rest.py index e4949b43d..ee504366b 100644 --- a/tests/integration/test_rest.py +++ b/tests/integration/test_rest.py @@ -4,6 +4,7 @@ """ from __future__ import annotations +from collections.abc import Mapping from dataclasses import dataclass from karapace.client import Client from karapace.kafka.admin import KafkaAdminClient @@ -23,7 +24,7 @@ test_objects_avro_evolution, wait_for_topics, ) -from typing import Any, Mapping +from typing import Any import asyncio import base64 diff --git a/tests/integration/test_schema.py b/tests/integration/test_schema.py index dd0502c89..546c19e0b 100644 --- a/tests/integration/test_schema.py +++ b/tests/integration/test_schema.py @@ -21,7 +21,6 @@ create_subject_name_factory, repeat_until_successful_request, ) -from typing import List, Tuple import asyncio import json @@ -1060,7 +1059,7 @@ async def test_transitive_compatibility(registry_async_client: Client) -> None: assert res_json["error_code"] == 409 -async def assert_schema_versions(client: Client, trail: str, schema_id: int, expected: List[Tuple[str, int]]) -> None: +async def assert_schema_versions(client: Client, trail: str, schema_id: int, expected: list[tuple[str, int]]) -> None: """ Calls /schemas/ids/{schema_id}/versions and asserts the expected results were in the response. """ @@ -1084,7 +1083,7 @@ async def assert_schema_versions_failed(client: Client, trail: str, schema_id: i async def register_schema( registry_async_client: Client, trail: str, subject: str, schema_str: str, schema_type: SchemaType = SchemaType.AVRO -) -> Tuple[int, int]: +) -> tuple[int, int]: # Register to get the id payload = {"schema": schema_str} if schema_type == SchemaType.JSONSCHEMA: diff --git a/tests/integration/test_schema_coordinator.py b/tests/integration/test_schema_coordinator.py index e6ccbc699..5af1d17c0 100644 --- a/tests/integration/test_schema_coordinator.py +++ b/tests/integration/test_schema_coordinator.py @@ -18,13 +18,14 @@ SyncGroupResponse_v0 as SyncGroupResponse, ) from aiokafka.util import create_future, create_task +from collections.abc import AsyncGenerator, Iterator from karapace.coordinator.schema_coordinator import Assignment, SchemaCoordinator, SchemaCoordinatorGroupRebalance from karapace.utils import json_encode from karapace.version import __version__ from tenacity import retry, stop_after_delay, TryAgain, wait_fixed from tests.integration.utils.kafka_server import KafkaServers from tests.utils import new_random_name -from typing import AsyncGenerator, Final, Iterator +from typing import Final from unittest import mock import aiokafka.errors as Errors diff --git a/tests/integration/test_schema_protobuf.py b/tests/integration/test_schema_protobuf.py index 9eae2b994..4b4471cb2 100644 --- a/tests/integration/test_schema_protobuf.py +++ b/tests/integration/test_schema_protobuf.py @@ -12,7 +12,7 @@ from karapace.typing import JsonData, SchemaMetadata, SchemaRuleSet from tests.base_testcase import BaseTestCase from tests.utils import create_subject_name_factory -from typing import List, Optional, Union +from typing import Optional, Union import logging import pytest @@ -472,7 +472,7 @@ class TestCaseSchema: schema_type: SchemaType schema_str: str subject: str - references: Optional[List[JsonData]] = None + references: Optional[list[JsonData]] = None expected: int = 200 expected_msg: str = "" expected_error_code: Optional[int] = None @@ -501,7 +501,7 @@ class TestCaseHardDeleteSchema(TestCaseDeleteSchema): @dataclass class ReferenceTestCase(BaseTestCase): - schemas: List[Union[TestCaseSchema, TestCaseDeleteSchema]] + schemas: list[Union[TestCaseSchema, TestCaseDeleteSchema]] # Base case diff --git a/tests/integration/test_schema_reader.py b/tests/integration/test_schema_reader.py index 738f76498..4d00a5581 100644 --- a/tests/integration/test_schema_reader.py +++ b/tests/integration/test_schema_reader.py @@ -18,7 +18,6 @@ from tests.integration.utils.kafka_server import KafkaServers from tests.schemas.json_schemas import FALSE_SCHEMA, TRUE_SCHEMA from tests.utils import create_group_name_factory, create_subject_name_factory, new_random_name, new_topic -from typing import List, Tuple import asyncio import pytest @@ -204,7 +203,7 @@ async def test_regression_config_for_inexisting_object_should_not_throw( @dataclass class DetectKeyFormatCase(BaseTestCase): - raw_msgs: List[Tuple[bytes, bytes]] + raw_msgs: list[tuple[bytes, bytes]] expected: KeyMode diff --git a/tests/integration/test_schema_registry_auth.py b/tests/integration/test_schema_registry_auth.py index 5f780f3ce..89832355f 100644 --- a/tests/integration/test_schema_registry_auth.py +++ b/tests/integration/test_schema_registry_auth.py @@ -15,7 +15,6 @@ test_objects_avro, wait_for_topics, ) -from typing import List from urllib.parse import quote import aiohttp @@ -205,7 +204,7 @@ async def test_sr_ids(registry_async_retry_client_auth: RetryRestClient) -> None async def test_sr_auth_forwarding( - registry_async_auth_pair: List[str], registry_async_retry_client_auth: RetryRestClient + registry_async_auth_pair: list[str], registry_async_retry_client_auth: RetryRestClient ) -> None: auth = aiohttp.BasicAuth("admin", "admin") diff --git a/tests/integration/utils/cluster.py b/tests/integration/utils/cluster.py index 04560b453..0e992499e 100644 --- a/tests/integration/utils/cluster.py +++ b/tests/integration/utils/cluster.py @@ -4,6 +4,7 @@ """ from __future__ import annotations +from collections.abc import AsyncIterator from contextlib import asynccontextmanager, ExitStack from dataclasses import dataclass from karapace.config import Config, set_config_defaults, write_config @@ -11,7 +12,6 @@ from tests.integration.utils.network import allocate_port from tests.integration.utils.process import stop_process, wait_for_port_subprocess from tests.utils import new_random_name, popen_karapace_all -from typing import AsyncIterator @dataclass(frozen=True) diff --git a/tests/integration/utils/network.py b/tests/integration/utils/network.py index ef9439e1d..506751012 100644 --- a/tests/integration/utils/network.py +++ b/tests/integration/utils/network.py @@ -2,8 +2,8 @@ Copyright (c) 2023 Aiven Ltd See LICENSE for details """ +from collections.abc import Iterator from contextlib import closing, contextmanager -from typing import Iterator import socket diff --git a/tests/integration/utils/process.py b/tests/integration/utils/process.py index 4c9c93076..e3c36e412 100644 --- a/tests/integration/utils/process.py +++ b/tests/integration/utils/process.py @@ -5,7 +5,7 @@ from karapace.utils import Expiration from subprocess import Popen from tests.integration.utils.network import port_is_listening -from typing import List, Optional +from typing import Optional import os import signal @@ -45,7 +45,7 @@ def stop_process(proc: Optional[Popen]) -> None: pass -def get_java_process_configuration(java_args: List[str]) -> List[str]: +def get_java_process_configuration(java_args: list[str]) -> list[str]: command = [ "/usr/bin/java", "-server", diff --git a/tests/integration/utils/zookeeper.py b/tests/integration/utils/zookeeper.py index cd4aaa00b..5dffcfeca 100644 --- a/tests/integration/utils/zookeeper.py +++ b/tests/integration/utils/zookeeper.py @@ -7,10 +7,9 @@ from tests.integration.utils.config import KafkaDescription, ZKConfig from tests.integration.utils.process import get_java_process_configuration from tests.utils import write_ini -from typing import List -def zk_java_args(cfg_path: Path, kafka_description: KafkaDescription) -> List[str]: +def zk_java_args(cfg_path: Path, kafka_description: KafkaDescription) -> list[str]: msg = f"Couldn't find kafka installation at {kafka_description.install_dir} to run integration tests." assert kafka_description.install_dir.exists(), msg java_args = [ diff --git a/tests/unit/anonymize_schemas/test_anonymize_avro.py b/tests/unit/anonymize_schemas/test_anonymize_avro.py index 9b99e16c5..2dc8870dc 100644 --- a/tests/unit/anonymize_schemas/test_anonymize_avro.py +++ b/tests/unit/anonymize_schemas/test_anonymize_avro.py @@ -5,7 +5,7 @@ See LICENSE for details """ from karapace.anonymize_schemas.anonymize_avro import anonymize -from typing import Dict, Union +from typing import Union import json import pytest @@ -565,6 +565,6 @@ [EMPTY_STR, EMPTY_STR], ], ) -def test_anonymize(test_schema: str, expected_schema: Union[str, Dict[str, str]]): +def test_anonymize(test_schema: str, expected_schema: Union[str, dict[str, str]]): res = anonymize(test_schema) assert res == expected_schema diff --git a/tests/unit/avro_dataclasses/test_introspect.py b/tests/unit/avro_dataclasses/test_introspect.py index e9765609a..b816cf128 100644 --- a/tests/unit/avro_dataclasses/test_introspect.py +++ b/tests/unit/avro_dataclasses/test_introspect.py @@ -2,11 +2,12 @@ Copyright (c) 2023 Aiven Ltd See LICENSE for details """ +from collections.abc import Mapping, Sequence from dataclasses import dataclass, Field, field, fields from enum import Enum from karapace.avro_dataclasses.introspect import field_schema, record_schema, UnsupportedAnnotation from karapace.avro_dataclasses.schema import FieldSchema -from typing import Final, Mapping, Optional, Sequence, Tuple +from typing import Final, Optional import datetime import pytest @@ -35,8 +36,8 @@ class ValidRecord: optional_bytes_field: Optional[bytes] enum_field: Symbols dt_field: datetime.datetime - int_array: Tuple[int, ...] - nested_values: Tuple[Nested, ...] + int_array: tuple[int, ...] + nested_values: tuple[Nested, ...] enum_field_default: Symbols = Symbols.a int_field_default: int = 123 @@ -49,9 +50,9 @@ class ValidRecord: @dataclass class InvalidRecord: any_tuple: tuple - homogenous_short_tuple: Tuple[int] - homogenous_bi_tuple: Tuple[int, int] - homogenous_tri_tuple: Tuple[int, int, int] + homogenous_short_tuple: tuple[int] + homogenous_bi_tuple: tuple[int, int] + homogenous_tri_tuple: tuple[int, int, int] any_list: list any_sequence: Sequence diff --git a/tests/unit/avro_dataclasses/test_models.py b/tests/unit/avro_dataclasses/test_models.py index 85eff7df5..4161b84a5 100644 --- a/tests/unit/avro_dataclasses/test_models.py +++ b/tests/unit/avro_dataclasses/test_models.py @@ -4,7 +4,7 @@ """ from dataclasses import dataclass, field from karapace.avro_dataclasses.models import AvroModel -from typing import List, Optional, Tuple +from typing import Optional import datetime import enum @@ -21,7 +21,7 @@ class Symbol(enum.Enum): @dataclass(frozen=True) class NestedModel: bool_field: bool - values: Tuple[int, ...] + values: tuple[int, ...] @dataclass(frozen=True) @@ -29,14 +29,14 @@ class RecordModel(AvroModel): symbol: Symbol height: int = field(metadata={"type": "long"}) name: str - nested: Tuple[NestedModel, ...] + nested: tuple[NestedModel, ...] dt: datetime.datetime id: uuid.UUID @dataclass(frozen=True) class HasList(AvroModel): - values: List[NestedModel] + values: list[NestedModel] @dataclass(frozen=True) diff --git a/tests/unit/backup/backends/v3/conftest.py b/tests/unit/backup/backends/v3/conftest.py index 18fc33c88..412a3f2ac 100644 --- a/tests/unit/backup/backends/v3/conftest.py +++ b/tests/unit/backup/backends/v3/conftest.py @@ -2,8 +2,8 @@ Copyright (c) 2023 Aiven Ltd See LICENSE for details """ +from collections.abc import Iterator from contextlib import closing -from typing import Iterator import contextlib import io diff --git a/tests/unit/test_in_memory_database.py b/tests/unit/test_in_memory_database.py index aa25adf56..a3720940d 100644 --- a/tests/unit/test_in_memory_database.py +++ b/tests/unit/test_in_memory_database.py @@ -5,6 +5,7 @@ from __future__ import annotations from collections import defaultdict +from collections.abc import Iterable, Sequence from confluent_kafka.cimpl import KafkaError from karapace.config import DEFAULTS from karapace.constants import DEFAULT_SCHEMA_TOPIC @@ -17,7 +18,7 @@ from karapace.schema_references import Reference, Referents from karapace.typing import SchemaId, Version from pathlib import Path -from typing import Final, Iterable, Sequence +from typing import Final TEST_DATA_FOLDER: Final = Path("tests/unit/test_data/") diff --git a/tests/unit/test_schema_models.py b/tests/unit/test_schema_models.py index 392738335..313f77daf 100644 --- a/tests/unit/test_schema_models.py +++ b/tests/unit/test_schema_models.py @@ -10,13 +10,13 @@ from karapace.schema_models import parse_avro_schema_definition, SchemaVersion, TypedSchema, Versioner from karapace.schema_type import SchemaType from karapace.typing import Version, VersionTag -from typing import Any, Callable, Dict, Optional +from typing import Any, Callable, Optional import operator import pytest # Schema versions factory fixture type -SVFCallable = Callable[[None], Callable[[int, Dict[str, Any]], Dict[int, SchemaVersion]]] +SVFCallable = Callable[[None], Callable[[int, dict[str, Any]], dict[int, SchemaVersion]]] class TestVersion: @@ -90,8 +90,8 @@ def schema_versions_factory( self, avro_schema: str, avro_schema_parsed: AvroSchema, - ) -> Callable[[Version, Dict[str, Any]], Dict[Version, SchemaVersion]]: - def schema_versions(version: Version, schema_version_data: Optional[Dict[str, Any]] = None): + ) -> Callable[[Version, dict[str, Any]], dict[Version, SchemaVersion]]: + def schema_versions(version: Version, schema_version_data: Optional[dict[str, Any]] = None): schema_version_data = schema_version_data or dict() base_schema_version_data = dict( subject="test-topic", diff --git a/tests/unit/test_schema_reader.py b/tests/unit/test_schema_reader.py index 5d625931b..f5bb572cf 100644 --- a/tests/unit/test_schema_reader.py +++ b/tests/unit/test_schema_reader.py @@ -27,7 +27,7 @@ from karapace.typing import SchemaId, Version from tests.base_testcase import BaseTestCase from tests.utils import schema_protobuf_invalid_because_corrupted, schema_protobuf_with_invalid_ref -from typing import Callable, List, Tuple +from typing import Callable from unittest.mock import Mock import confluent_kafka @@ -336,8 +336,8 @@ class KafkaMessageHandlingErrorTestCase(BaseTestCase): @pytest.fixture(name="schema_reader_with_consumer_messages_factory") -def fixture_schema_reader_with_consumer_messages_factory() -> Callable[[Tuple[List[Message]]], KafkaSchemaReader]: - def factory(consumer_messages: Tuple[List[Message]]) -> KafkaSchemaReader: +def fixture_schema_reader_with_consumer_messages_factory() -> Callable[[tuple[list[Message]]], KafkaSchemaReader]: + def factory(consumer_messages: tuple[list[Message]]) -> KafkaSchemaReader: key_formatter_mock = Mock(spec=KeyFormatter) consumer_mock = Mock(spec=KafkaConsumer) @@ -507,7 +507,7 @@ def factory(key: bytes, value: bytes, offset: int = 1) -> Message: def test_message_error_handling( caplog: LogCaptureFixture, test_case: KafkaMessageHandlingErrorTestCase, - schema_reader_with_consumer_messages_factory: Callable[[Tuple[List[Message]]], KafkaSchemaReader], + schema_reader_with_consumer_messages_factory: Callable[[tuple[list[Message]]], KafkaSchemaReader], message_factory: Callable[[bytes, bytes, int], Message], ) -> None: message = message_factory(key=test_case.key, value=test_case.value) @@ -528,7 +528,7 @@ def test_message_error_handling( def test_message_error_handling_with_invalid_reference_schema_protobuf( caplog: LogCaptureFixture, - schema_reader_with_consumer_messages_factory: Callable[[Tuple[List[Message]]], KafkaSchemaReader], + schema_reader_with_consumer_messages_factory: Callable[[tuple[list[Message]]], KafkaSchemaReader], message_factory: Callable[[bytes, bytes, int], Message], ) -> None: # Given an invalid schema (corrupted) diff --git a/tests/utils.py b/tests/utils.py index 3757e0739..191fba348 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -10,7 +10,7 @@ from karapace.utils import Expiration from pathlib import Path from subprocess import Popen -from typing import Any, Callable, IO, List, Union +from typing import Any, Callable, IO, Union from urllib.parse import quote import asyncio @@ -264,7 +264,7 @@ def new_topic(admin_client: KafkaAdminClient, prefix: str = "topic", *, num_part return topic_name -async def wait_for_topics(rest_async_client: Client, topic_names: List[str], timeout: float, sleep: float) -> None: +async def wait_for_topics(rest_async_client: Client, topic_names: list[str], timeout: float, sleep: float) -> None: for topic in topic_names: expiration = Expiration.from_timeout(timeout=timeout) topic_found = False From 5bf72128a5c1c197c78c5e8edee4fcdb721f0c98 Mon Sep 17 00:00:00 2001 From: Jarkko Jaakola Date: Thu, 12 Sep 2024 17:14:57 +0300 Subject: [PATCH 06/18] feat: add systemd journal logging configuration --- README.rst | 9 ++++++++ karapace.config.json | 1 + mypy.ini | 3 +++ pyproject.toml | 1 + src/karapace/config.py | 2 ++ src/karapace/karapace_all.py | 44 +++++++++++++++++++++++++++++------- 6 files changed, 52 insertions(+), 8 deletions(-) diff --git a/README.rst b/README.rst index d1bcbd28f..f6e8a6736 100644 --- a/README.rst +++ b/README.rst @@ -478,6 +478,15 @@ Keys to take special care are the ones needed to configure Kafka and advertised_ * - ``use_protobuf_formatter`` - ``false`` - If protobuf formatter should be used on protobuf schemas in order to normalize schemas. The formatter is used on top and independent of regular normalization and schemas will be persisted in a formatted state. + * - ``log_handler`` + - ``stdout`` + - Select the log handler. Default is standard output. Alternative log handler is ``systemd``. + * - ``log_level`` + - ``DEBUG`` + - Logging level. Default level is debug. + * - ``log_format`` + - ``%(name)-20s\t%(threadName)s\t%(levelname)-8s\t%(message)s`` + - Log format Authentication and authorization of Karapace Schema Registry REST API diff --git a/karapace.config.json b/karapace.config.json index 55303ff4d..52a75bef9 100644 --- a/karapace.config.json +++ b/karapace.config.json @@ -9,6 +9,7 @@ "group_id": "schema-registry", "host": "127.0.0.1", "log_level": "DEBUG", + "log_handler": "stdout", "port": 8081, "server_tls_certfile": null, "server_tls_keyfile": null, diff --git a/mypy.ini b/mypy.ini index 0a0230c5f..c4ef8efd1 100644 --- a/mypy.ini +++ b/mypy.ini @@ -85,3 +85,6 @@ ignore_missing_imports = True [mypy-networkx.*] ignore_missing_imports = True + +[mypy-systemd.*] +ignore_missing_imports = True diff --git a/pyproject.toml b/pyproject.toml index 7b7efb33e..9c505b176 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -69,6 +69,7 @@ Issues = "https://github.com/Aiven-Open/karapace/issues" [project.optional-dependencies] sentry-sdk = ["sentry-sdk>=1.6.0"] +systemd-logging = ["systemd-python==235"] ujson = ["ujson"] dev = [ # Developer QoL diff --git a/src/karapace/config.py b/src/karapace/config.py index 3761b0072..7f02b7712 100644 --- a/src/karapace/config.py +++ b/src/karapace/config.py @@ -49,6 +49,7 @@ class Config(TypedDict): registry_authfile: str | None rest_authorization: bool rest_base_uri: str | None + log_handler: str | None log_level: str log_format: str master_eligibility: bool @@ -126,6 +127,7 @@ class ConfigDefaults(Config, total=False): "registry_authfile": None, "rest_authorization": False, "rest_base_uri": None, + "log_handler": "stdout", "log_level": "DEBUG", "log_format": "%(name)-20s\t%(threadName)s\t%(levelname)-8s\t%(message)s", "master_eligibility": True, diff --git a/src/karapace/karapace_all.py b/src/karapace/karapace_all.py index 240da1008..ccdb96915 100644 --- a/src/karapace/karapace_all.py +++ b/src/karapace/karapace_all.py @@ -2,10 +2,12 @@ Copyright (c) 2023 Aiven Ltd See LICENSE for details """ +from __future__ import annotations + from aiohttp.web_log import AccessLogger from contextlib import closing from karapace import version as karapace_version -from karapace.config import read_config +from karapace.config import Config, read_config from karapace.instrumentation.prometheus import PrometheusInstrumentation from karapace.kafka_rest_apis import KafkaRest from karapace.rapu import RestApp @@ -21,6 +23,38 @@ class KarapaceAll(KafkaRest, KarapaceSchemaRegistryController): pass +def _configure_logging(*, config: Config) -> None: + log_level = config.get("log_level", "DEBUG") + log_format = config.get("log_format", "%(name)-20s\t%(threadName)s\t%(levelname)-8s\t%(message)s") + + root_handler: logging.Handler | None = None + log_handler = config.get("log_handler", None) + if "systemd" == log_handler: + from systemd import journal + + root_handler = journal.JournalHandler(SYSLOG_IDENTIFIER="karapace") + elif "stdout" == log_handler or log_handler is None: + root_handler = logging.StreamHandler(stream=sys.stdout) + else: + logging.basicConfig(level=logging.INFO, format=log_format) + logging.getLogger().setLevel(log_level) + logging.warning("Log handler %s not recognized, root handler not set.", log_handler) + + if root_handler is not None: + root_handler.setFormatter(logging.Formatter(log_format)) + root_handler.setLevel(log_level) + root_handler.set_name(name="karapace") + logging.root.addHandler(root_handler) + + logging.root.setLevel(log_level) + + if config.get("access_logs_debug") is True: + config["access_log_class"] = DebugAccessLogger + logging.getLogger("aiohttp.access").setLevel(logging.DEBUG) + else: + config["access_log_class"] = AccessLogger + + def main() -> int: parser = argparse.ArgumentParser(prog="karapace", description="Karapace: Your Kafka essentials in one tool") parser.add_argument("--version", action="version", help="show program version", version=karapace_version.__version__) @@ -30,13 +64,7 @@ def main() -> int: with closing(arg.config_file): config = read_config(arg.config_file) - logging.basicConfig(level=logging.INFO, format=config["log_format"]) - logging.getLogger().setLevel(config["log_level"]) - if config.get("access_logs_debug") is True: - config["access_log_class"] = DebugAccessLogger - logging.getLogger("aiohttp.access").setLevel(logging.DEBUG) - else: - config["access_log_class"] = AccessLogger + _configure_logging(config=config) app: RestApp if config["karapace_rest"] and config["karapace_registry"]: From 66db599f016e12f47896f0ff91c31cbaf45e19f1 Mon Sep 17 00:00:00 2001 From: Jonas Keeling Date: Thu, 19 Sep 2024 14:31:57 +0200 Subject: [PATCH 07/18] feat: improve health check to fail if schema_reader raises exceptions --- src/karapace/karapace.py | 18 ++++- src/karapace/schema_reader.py | 40 +++++++++++- src/karapace/schema_registry_apis.py | 13 ++-- stubs/confluent_kafka/__init__.pyi | 2 + stubs/confluent_kafka/admin/__init__.pyi | 3 +- stubs/confluent_kafka/cimpl.pyi | 7 ++ tests/integration/test_health_check.py | 27 ++++++++ tests/unit/test_schema_reader.py | 83 +++++++++++++++++++++++- 8 files changed, 182 insertions(+), 11 deletions(-) create mode 100644 tests/integration/test_health_check.py diff --git a/src/karapace/karapace.py b/src/karapace/karapace.py index 75cd96da4..f486b1903 100644 --- a/src/karapace/karapace.py +++ b/src/karapace/karapace.py @@ -12,6 +12,7 @@ from functools import partial from http import HTTPStatus from karapace.config import Config +from karapace.dataclasses import default_dataclass from karapace.rapu import HTTPRequest, HTTPResponse, RestApp from karapace.typing import JsonObject from karapace.utils import json_encode @@ -22,7 +23,14 @@ import aiohttp.web import time -HealthHook: TypeAlias = Callable[[], Awaitable[JsonObject]] + +@default_dataclass +class HealthCheck: + status: JsonObject + healthy: bool + + +HealthHook: TypeAlias = Callable[[], Awaitable[HealthCheck]] class KarapaceBase(RestApp): @@ -96,11 +104,15 @@ async def health(self, _request: Request) -> aiohttp.web.Response: "process_uptime_sec": int(time.monotonic() - self._process_start_time), "karapace_version": __version__, } + status_code = HTTPStatus.OK for hook in self.health_hooks: - resp.update(await hook()) + check = await hook() + resp.update(check.status) + if not check.healthy: + status_code = HTTPStatus.SERVICE_UNAVAILABLE return aiohttp.web.Response( body=json_encode(resp, binary=True, compact=True), - status=HTTPStatus.OK.value, + status=status_code.value, headers={"Content-Type": "application/json"}, ) diff --git a/src/karapace/schema_reader.py b/src/karapace/schema_reader.py index d7131968f..b20487631 100644 --- a/src/karapace/schema_reader.py +++ b/src/karapace/schema_reader.py @@ -22,7 +22,7 @@ ) from avro.schema import Schema as AvroSchema from collections.abc import Mapping, Sequence -from confluent_kafka import Message, TopicPartition +from confluent_kafka import Message, TopicCollection, TopicPartition from contextlib import closing, ExitStack from enum import Enum from jsonschema.validators import Draft7Validator @@ -48,6 +48,7 @@ from threading import Event, Thread from typing import Final +import asyncio import json import logging import time @@ -62,6 +63,11 @@ KAFKA_CLIENT_CREATION_TIMEOUT_SECONDS: Final = 2.0 SCHEMA_TOPIC_CREATION_TIMEOUT_SECONDS: Final = 5.0 +# If handle_messages throws at least UNHEALTHY_CONSECUTIVE_ERRORS +# for UNHEALTHY_TIMEOUT_SECONDS the SchemaReader will be reported unhealthy +UNHEALTHY_TIMEOUT_SECONDS: Final = 10.0 +UNHEALTHY_CONSECUTIVE_ERRORS: Final = 3 + # For good startup performance the consumption of multiple # records for each consume round is essential. # Consumer default is 1 message for each consume call and after @@ -176,6 +182,9 @@ def __init__( self.start_time = time.monotonic() self.startup_previous_processed_offset = 0 + self.consecutive_unexpected_errors: int = 0 + self.consecutive_unexpected_errors_start: float = 0 + def close(self) -> None: LOG.info("Closing schema_reader") self._stop_schema_reader.set() @@ -249,15 +258,44 @@ def run(self) -> None: self.offset = self._get_beginning_offset() try: self.handle_messages() + self.consecutive_unexpected_errors = 0 except ShutdownException: self._stop_schema_reader.set() shutdown() except KafkaUnavailableError: + self.consecutive_unexpected_errors += 1 LOG.warning("Kafka cluster is unavailable or broker can't be resolved.") except Exception as e: # pylint: disable=broad-except self.stats.unexpected_exception(ex=e, where="schema_reader_loop") + self.consecutive_unexpected_errors += 1 + if self.consecutive_unexpected_errors == 1: + self.consecutive_unexpected_errors_start = time.monotonic() LOG.warning("Unexpected exception in schema reader loop - %s", e) + async def is_healthy(self) -> bool: + if ( + self.consecutive_unexpected_errors >= UNHEALTHY_CONSECUTIVE_ERRORS + and (duration := time.monotonic() - self.consecutive_unexpected_errors_start) >= UNHEALTHY_TIMEOUT_SECONDS + ): + LOG.warning( + "Health check failed with %s consecutive errors in %s seconds", self.consecutive_unexpected_errors, duration + ) + return False + + try: + # Explicitly check if topic exists. + # This needs to be done because in case of missing topic the consumer will not repeat the error + # on conscutive consume calls and instead will return empty list. + assert self.admin_client is not None + topic = self.config["topic_name"] + res = self.admin_client.describe_topics(TopicCollection([topic])) + await asyncio.wrap_future(res[topic]) + except Exception as e: # pylint: disable=broad-except + LOG.warning("Health check failed with %r", e) + return False + + return True + def _get_beginning_offset(self) -> int: assert self.consumer is not None, "Thread must be started" diff --git a/src/karapace/schema_registry_apis.py b/src/karapace/schema_registry_apis.py index 5a9196087..20aaabc6f 100644 --- a/src/karapace/schema_registry_apis.py +++ b/src/karapace/schema_registry_apis.py @@ -28,13 +28,13 @@ SubjectSoftDeletedException, VersionNotFoundException, ) -from karapace.karapace import KarapaceBase +from karapace.karapace import HealthCheck, KarapaceBase from karapace.protobuf.exception import ProtobufUnresolvedDependencyException from karapace.rapu import HTTPRequest, JSON_CONTENT_TYPE, SERVER_NAME from karapace.schema_models import ParsedTypedSchema, SchemaType, SchemaVersion, TypedSchema, ValidatedTypedSchema, Versioner from karapace.schema_references import LatestVersionReference, Reference, reference_from_mapping from karapace.schema_registry import KarapaceSchemaRegistry -from karapace.typing import JsonData, JsonObject, SchemaId, Subject, Version +from karapace.typing import JsonData, SchemaId, Subject, Version from karapace.utils import JSONDecodeError from typing import Any @@ -98,7 +98,7 @@ def __init__(self, config: Config) -> None: self.app.on_startup.append(self._create_forward_client) self.health_hooks.append(self.schema_registry_health) - async def schema_registry_health(self) -> JsonObject: + async def schema_registry_health(self) -> HealthCheck: resp = {} if self._auth is not None: resp["schema_registry_authfile_timestamp"] = self._auth.authfile_last_modified @@ -115,7 +115,12 @@ async def schema_registry_health(self) -> JsonObject: resp["schema_registry_primary_url"] = cs.primary_url resp["schema_registry_coordinator_running"] = cs.is_running resp["schema_registry_coordinator_generation_id"] = cs.group_generation_id - return resp + + healthy = True + if not await self.schema_registry.schema_reader.is_healthy(): + healthy = False + + return HealthCheck(status=resp, healthy=healthy) async def _start_schema_registry(self, app: aiohttp.web.Application) -> None: # pylint: disable=unused-argument """Callback for aiohttp.Application.on_startup""" diff --git a/stubs/confluent_kafka/__init__.pyi b/stubs/confluent_kafka/__init__.pyi index 175569fb4..e27cf4880 100644 --- a/stubs/confluent_kafka/__init__.pyi +++ b/stubs/confluent_kafka/__init__.pyi @@ -8,6 +8,7 @@ from .cimpl import ( TIMESTAMP_CREATE_TIME, TIMESTAMP_LOG_APPEND_TIME, TIMESTAMP_NOT_AVAILABLE, + TopicCollection, TopicPartition, ) @@ -22,4 +23,5 @@ __all__ = ( "TIMESTAMP_LOG_APPEND_TIME", "TIMESTAMP_NOT_AVAILABLE", "TopicPartition", + "TopicCollection", ) diff --git a/stubs/confluent_kafka/admin/__init__.pyi b/stubs/confluent_kafka/admin/__init__.pyi index 02abcc033..1dafa51b8 100644 --- a/stubs/confluent_kafka/admin/__init__.pyi +++ b/stubs/confluent_kafka/admin/__init__.pyi @@ -4,7 +4,7 @@ from ._listoffsets import ListOffsetsResultInfo, OffsetSpec from ._metadata import BrokerMetadata, ClusterMetadata, PartitionMetadata, TopicMetadata from ._resource import ResourceType from concurrent.futures import Future -from confluent_kafka import IsolationLevel, TopicPartition +from confluent_kafka import IsolationLevel, TopicCollection, TopicPartition from typing import Callable __all__ = ( @@ -52,3 +52,4 @@ class AdminClient: def describe_configs( self, resources: list[ConfigResource], request_timeout: float = -1 ) -> dict[ConfigResource, Future[dict[str, ConfigEntry]]]: ... + def describe_topics(self, topics: TopicCollection) -> dict[str, Future]: ... diff --git a/stubs/confluent_kafka/cimpl.pyi b/stubs/confluent_kafka/cimpl.pyi index 6936d10f0..ed163e5fc 100644 --- a/stubs/confluent_kafka/cimpl.pyi +++ b/stubs/confluent_kafka/cimpl.pyi @@ -48,6 +48,13 @@ class TopicPartition: self.leader_epoch: int | None self.error: KafkaError | None +class TopicCollection: + def __init__( + self, + topic_names: list[str], + ) -> None: + self.topic_names: list[str] + class Message: def offset(self) -> int: ... def timestamp(self) -> tuple[int, int]: ... diff --git a/tests/integration/test_health_check.py b/tests/integration/test_health_check.py new file mode 100644 index 000000000..c4958651e --- /dev/null +++ b/tests/integration/test_health_check.py @@ -0,0 +1,27 @@ +""" +Copyright (c) 2024 Aiven Ltd +See LICENSE for details +""" + +from karapace.client import Client +from karapace.kafka.admin import KafkaAdminClient +from tenacity import retry, stop_after_delay, wait_fixed +from tests.integration.utils.cluster import RegistryDescription + +import http + + +async def test_health_check( + registry_cluster: RegistryDescription, registry_async_client: Client, admin_client: KafkaAdminClient +) -> None: + res = await registry_async_client.get("/_health") + assert res.ok + + admin_client.delete_topic(registry_cluster.schemas_topic) + + @retry(stop=stop_after_delay(10), wait=wait_fixed(1), reraise=True) + async def check_health(): + res = await registry_async_client.get("/_health") + assert res.status_code == http.HTTPStatus.SERVICE_UNAVAILABLE, "should report unhealthy after topic has been deleted" + + await check_health() diff --git a/tests/unit/test_schema_reader.py b/tests/unit/test_schema_reader.py index f5bb572cf..552fa0be7 100644 --- a/tests/unit/test_schema_reader.py +++ b/tests/unit/test_schema_reader.py @@ -6,7 +6,7 @@ """ from _pytest.logging import LogCaptureFixture -from concurrent.futures import ThreadPoolExecutor +from concurrent.futures import Future, ThreadPoolExecutor from confluent_kafka import Message from dataclasses import dataclass from karapace.config import DEFAULTS @@ -25,9 +25,10 @@ ) from karapace.schema_type import SchemaType from karapace.typing import SchemaId, Version +from pytest import MonkeyPatch from tests.base_testcase import BaseTestCase from tests.utils import schema_protobuf_invalid_because_corrupted, schema_protobuf_with_invalid_ref -from typing import Callable +from typing import Callable, Optional from unittest.mock import Mock import confluent_kafka @@ -325,6 +326,84 @@ def test_handle_msg_delete_subject_logs(caplog: LogCaptureFixture) -> None: assert log.message == "Hard delete: version: Version(2) for subject: 'test-subject' did not exist, should have" +@dataclass +class HealthCheckTestCase(BaseTestCase): + current_time: float + consecutive_unexpected_errors: int + consecutive_unexpected_errors_start: float + healthy: bool + check_topic_error: Optional[Exception] = None + + +@pytest.mark.parametrize( + "testcase", + [ + HealthCheckTestCase( + test_name="No errors", + current_time=0, + consecutive_unexpected_errors=0, + consecutive_unexpected_errors_start=0, + healthy=True, + ), + HealthCheckTestCase( + test_name="10 errors in 5 seconds", + current_time=5, + consecutive_unexpected_errors=10, + consecutive_unexpected_errors_start=0, + healthy=True, + ), + HealthCheckTestCase( + test_name="1 error in 20 seconds", + current_time=20, + consecutive_unexpected_errors=1, + consecutive_unexpected_errors_start=0, + healthy=True, + ), + HealthCheckTestCase( + test_name="3 errors in 10 seconds", + current_time=10, + consecutive_unexpected_errors=3, + consecutive_unexpected_errors_start=0, + healthy=False, + ), + HealthCheckTestCase( + test_name="check topic error", + current_time=5, + consecutive_unexpected_errors=1, + consecutive_unexpected_errors_start=0, + healthy=False, + check_topic_error=Exception("Somethings wrong"), + ), + ], +) +async def test_schema_reader_health_check(testcase: HealthCheckTestCase, monkeypatch: MonkeyPatch) -> None: + offset_watcher = OffsetWatcher() + key_formatter_mock = Mock() + admin_client_mock = Mock() + + emtpy_future = Future() + if testcase.check_topic_error: + emtpy_future.set_exception(testcase.check_topic_error) + else: + emtpy_future.set_result(None) + admin_client_mock.describe_topics.return_value = {DEFAULTS["topic_name"]: emtpy_future} + + schema_reader = KafkaSchemaReader( + config=DEFAULTS, + offset_watcher=offset_watcher, + key_formatter=key_formatter_mock, + master_coordinator=None, + database=InMemoryDatabase(), + ) + + monkeypatch.setattr(time, "monotonic", lambda: testcase.current_time) + schema_reader.admin_client = admin_client_mock + schema_reader.consecutive_unexpected_errors = testcase.consecutive_unexpected_errors + schema_reader.consecutive_unexpected_errors_start = testcase.consecutive_unexpected_errors_start + + assert await schema_reader.is_healthy() == testcase.healthy + + @dataclass class KafkaMessageHandlingErrorTestCase(BaseTestCase): key: bytes From 260a105ce99fe0dd7436f3bfdbb2f60d59959fa5 Mon Sep 17 00:00:00 2001 From: Davide Armand Date: Thu, 3 Oct 2024 10:52:37 +0200 Subject: [PATCH 08/18] refactor: reorganize schema compatibility logic This in preparation for the fix for [EC-289]. - Move schema compatibility related code from __init__.py to its own module (schema_compatibility.py) - Refactor logic in: - Schema registering endpoint (`/subjects//versions`) - Schema compatibility endpoint (`/compatibility/subjects//versions/latest`) --- src/karapace/compatibility/__init__.py | 133 ------------------ src/karapace/schema_registry.py | 90 +++++++----- src/karapace/schema_registry_apis.py | 122 ++++++++-------- .../unit/compatibility/test_compatibility.py | 11 +- 4 files changed, 128 insertions(+), 228 deletions(-) diff --git a/src/karapace/compatibility/__init__.py b/src/karapace/compatibility/__init__.py index e5f61e710..3984ed9f5 100644 --- a/src/karapace/compatibility/__init__.py +++ b/src/karapace/compatibility/__init__.py @@ -4,22 +4,7 @@ Copyright (c) 2019 Aiven Ltd See LICENSE for details """ -from avro.compatibility import ( - merge, - ReaderWriterCompatibilityChecker as AvroChecker, - SchemaCompatibilityResult, - SchemaCompatibilityType, - SchemaIncompatibilityType, -) -from avro.schema import Schema as AvroSchema from enum import Enum, unique -from jsonschema import Draft7Validator -from karapace.compatibility.jsonschema.checks import compatibility as jsonschema_compatibility, incompatible_schema -from karapace.compatibility.protobuf.checks import check_protobuf_schema_compatibility -from karapace.protobuf.schema import ProtobufSchema -from karapace.schema_models import ParsedTypedSchema, ValidatedTypedSchema -from karapace.schema_reader import SchemaType -from karapace.utils import assert_never import logging @@ -54,121 +39,3 @@ def is_transitive(self) -> bool: "FULL_TRANSITIVE", } return self.value in TRANSITIVE_MODES - - -def check_avro_compatibility(reader_schema: AvroSchema, writer_schema: AvroSchema) -> SchemaCompatibilityResult: - return AvroChecker().get_compatibility(reader=reader_schema, writer=writer_schema) - - -def check_jsonschema_compatibility(reader: Draft7Validator, writer: Draft7Validator) -> SchemaCompatibilityResult: - return jsonschema_compatibility(reader, writer) - - -def check_protobuf_compatibility(reader: ProtobufSchema, writer: ProtobufSchema) -> SchemaCompatibilityResult: - return check_protobuf_schema_compatibility(reader, writer) - - -def check_compatibility( - old_schema: ParsedTypedSchema, - new_schema: ValidatedTypedSchema, - compatibility_mode: CompatibilityModes, -) -> SchemaCompatibilityResult: - """Check that `old_schema` and `new_schema` are compatible under `compatibility_mode`.""" - if compatibility_mode is CompatibilityModes.NONE: - LOG.info("Compatibility level set to NONE, no schema compatibility checks performed") - return SchemaCompatibilityResult(SchemaCompatibilityType.compatible) - - if old_schema.schema_type is not new_schema.schema_type: - return incompatible_schema( - incompat_type=SchemaIncompatibilityType.type_mismatch, - message=f"Comparing different schema types: {old_schema.schema_type} with {new_schema.schema_type}", - location=[], - ) - - if old_schema.schema_type is SchemaType.AVRO: - assert isinstance(old_schema.schema, AvroSchema) - assert isinstance(new_schema.schema, AvroSchema) - if compatibility_mode in {CompatibilityModes.BACKWARD, CompatibilityModes.BACKWARD_TRANSITIVE}: - result = check_avro_compatibility( - reader_schema=new_schema.schema, - writer_schema=old_schema.schema, - ) - - elif compatibility_mode in {CompatibilityModes.FORWARD, CompatibilityModes.FORWARD_TRANSITIVE}: - result = check_avro_compatibility( - reader_schema=old_schema.schema, - writer_schema=new_schema.schema, - ) - - elif compatibility_mode in {CompatibilityModes.FULL, CompatibilityModes.FULL_TRANSITIVE}: - result = check_avro_compatibility( - reader_schema=new_schema.schema, - writer_schema=old_schema.schema, - ) - result = merge( - result, - check_avro_compatibility( - reader_schema=old_schema.schema, - writer_schema=new_schema.schema, - ), - ) - - elif old_schema.schema_type is SchemaType.JSONSCHEMA: - assert isinstance(old_schema.schema, Draft7Validator) - assert isinstance(new_schema.schema, Draft7Validator) - if compatibility_mode in {CompatibilityModes.BACKWARD, CompatibilityModes.BACKWARD_TRANSITIVE}: - result = check_jsonschema_compatibility( - reader=new_schema.schema, - writer=old_schema.schema, - ) - - elif compatibility_mode in {CompatibilityModes.FORWARD, CompatibilityModes.FORWARD_TRANSITIVE}: - result = check_jsonschema_compatibility( - reader=old_schema.schema, - writer=new_schema.schema, - ) - - elif compatibility_mode in {CompatibilityModes.FULL, CompatibilityModes.FULL_TRANSITIVE}: - result = check_jsonschema_compatibility( - reader=new_schema.schema, - writer=old_schema.schema, - ) - result = merge( - result, - check_jsonschema_compatibility( - reader=old_schema.schema, - writer=new_schema.schema, - ), - ) - - elif old_schema.schema_type is SchemaType.PROTOBUF: - assert isinstance(old_schema.schema, ProtobufSchema) - assert isinstance(new_schema.schema, ProtobufSchema) - if compatibility_mode in {CompatibilityModes.BACKWARD, CompatibilityModes.BACKWARD_TRANSITIVE}: - result = check_protobuf_compatibility( - reader=new_schema.schema, - writer=old_schema.schema, - ) - elif compatibility_mode in {CompatibilityModes.FORWARD, CompatibilityModes.FORWARD_TRANSITIVE}: - result = check_protobuf_compatibility( - reader=old_schema.schema, - writer=new_schema.schema, - ) - - elif compatibility_mode in {CompatibilityModes.FULL, CompatibilityModes.FULL_TRANSITIVE}: - result = check_protobuf_compatibility( - reader=new_schema.schema, - writer=old_schema.schema, - ) - result = merge( - result, - check_protobuf_compatibility( - reader=old_schema.schema, - writer=new_schema.schema, - ), - ) - - else: - assert_never(f"Unknown schema_type {old_schema.schema_type}") - - return result diff --git a/src/karapace/schema_registry.py b/src/karapace/schema_registry.py index 67c0fc899..b98246b3e 100644 --- a/src/karapace/schema_registry.py +++ b/src/karapace/schema_registry.py @@ -4,10 +4,12 @@ """ from __future__ import annotations +from avro.compatibility import SchemaCompatibilityResult, SchemaCompatibilityType from collections.abc import Sequence from contextlib import AsyncExitStack, closing -from karapace.compatibility import check_compatibility, CompatibilityModes +from karapace.compatibility import CompatibilityModes from karapace.compatibility.jsonschema.checks import is_incompatible +from karapace.compatibility.schema_compatibility import SchemaCompatibility from karapace.config import Config from karapace.coordinator.master_coordinator import MasterCoordinator from karapace.dependency import Dependency @@ -281,7 +283,7 @@ async def subject_version_referencedby_get( return list(referenced_by) return [] - def _resolve_and_parse(self, schema: TypedSchema) -> ParsedTypedSchema: + def resolve_and_parse(self, schema: TypedSchema) -> ParsedTypedSchema: references, dependencies = self.resolve_references(schema.references) if schema.references else (None, None) return ParsedTypedSchema.parse( schema_type=schema.schema_type, @@ -325,12 +327,8 @@ async def write_new_schema_local( ) else: # First check if any of the existing schemas for the subject match - live_schema_versions = { - version_id: schema_version - for version_id, schema_version in all_schema_versions.items() - if schema_version.deleted is False - } - if not live_schema_versions: # Previous ones have been deleted by the user. + live_versions = self.get_live_versions_sorted(all_schema_versions) + if not live_versions: # Previous ones have been deleted by the user. version = self.database.get_next_version(subject=subject) schema_id = self.database.get_schema_id(new_schema) LOG.debug( @@ -351,32 +349,15 @@ async def write_new_schema_local( ) return schema_id - compatibility_mode = self.get_compatibility_mode(subject=subject) + result = self.check_schema_compatibility(new_schema, subject) - # Run a compatibility check between on file schema(s) and the one being submitted now - # the check is either towards the latest one or against all previous ones in case of - # transitive mode - schema_versions = sorted(live_schema_versions) - if compatibility_mode.is_transitive(): - check_against = schema_versions - else: - check_against = [schema_versions[-1]] - - for old_version in check_against: - parsed_old_schema = self._resolve_and_parse(all_schema_versions[old_version].schema) - result = check_compatibility( - old_schema=parsed_old_schema, - new_schema=new_schema, - compatibility_mode=compatibility_mode, + if is_incompatible(result): + message = set(result.messages).pop() if result.messages else "" + LOG.warning( + "Incompatible schema: %s, incompatibilities: %s", result.compatibility, result.incompatibilities ) - if is_incompatible(result): - message = set(result.messages).pop() if result.messages else "" - LOG.warning( - "Incompatible schema: %s, incompatibilities: %s", result.compatibility, result.incompatibilities - ) - raise IncompatibleSchema( - f"Incompatible schema, compatibility_mode={compatibility_mode.value} {message}" - ) + compatibility_mode = self.get_compatibility_mode(subject=subject) + raise IncompatibleSchema(f"Incompatible schema, compatibility_mode={compatibility_mode.value} {message}") # We didn't find an existing schema and the schema is compatible so go and create one version = self.database.get_next_version(subject=subject) @@ -465,3 +446,48 @@ def send_delete_subject_message(self, subject: Subject, version: Version) -> Non key = {"subject": subject, "magic": 0, "keytype": "DELETE_SUBJECT"} value = {"subject": subject, "version": version.value} self.producer.send_message(key=key, value=value) + + def check_schema_compatibility( + self, + new_schema: ValidatedTypedSchema, + subject: Subject, + ) -> SchemaCompatibilityResult: + result = SchemaCompatibilityResult(SchemaCompatibilityType.compatible) + + compatibility_mode = self.get_compatibility_mode(subject=subject) + all_schema_versions: dict[Version, SchemaVersion] = self.database.find_subject_schemas( + subject=subject, include_deleted=True + ) + live_versions = self.get_live_versions_sorted(all_schema_versions) + + if not live_versions: + old_versions = [] + elif compatibility_mode.is_transitive(): + # Only check against all versions + old_versions = live_versions + else: + # Only check against latest version + old_versions = [live_versions[-1]] + + for old_version in old_versions: + old_parsed_schema = self.resolve_and_parse(all_schema_versions[old_version].schema) + + result = SchemaCompatibility.check_compatibility( + old_schema=old_parsed_schema, + new_schema=new_schema, + compatibility_mode=compatibility_mode, + ) + + if is_incompatible(result): + break + + return result + + @staticmethod + def get_live_versions_sorted(all_schema_versions: dict[Version, SchemaVersion]) -> list[Version]: + live_schema_versions = { + version_id: schema_version + for version_id, schema_version in all_schema_versions.items() + if schema_version.deleted is False + } + return sorted(live_schema_versions) diff --git a/src/karapace/schema_registry_apis.py b/src/karapace/schema_registry_apis.py index 20aaabc6f..2f57279fe 100644 --- a/src/karapace/schema_registry_apis.py +++ b/src/karapace/schema_registry_apis.py @@ -9,8 +9,9 @@ from enum import Enum, unique from http import HTTPStatus from karapace.auth import HTTPAuthorizer, Operation, User -from karapace.compatibility import check_compatibility, CompatibilityModes +from karapace.compatibility import CompatibilityModes from karapace.compatibility.jsonschema.checks import is_incompatible +from karapace.compatibility.schema_compatibility import SchemaCompatibility from karapace.config import Config from karapace.errors import ( IncompatibleSchema, @@ -34,7 +35,7 @@ from karapace.schema_models import ParsedTypedSchema, SchemaType, SchemaVersion, TypedSchema, ValidatedTypedSchema, Versioner from karapace.schema_references import LatestVersionReference, Reference, reference_from_mapping from karapace.schema_registry import KarapaceSchemaRegistry -from karapace.typing import JsonData, SchemaId, Subject, Version +from karapace.typing import JsonData, JsonObject, SchemaId, Subject, Version from karapace.utils import JSONDecodeError from typing import Any @@ -380,63 +381,12 @@ def _invalid_version(self, content_type, version): ) async def compatibility_check( - self, content_type: str, *, subject: str, version: str, request: HTTPRequest, user: User | None = None + self, content_type: str, *, subject: Subject, version: str, request: HTTPRequest, user: User | None = None ) -> None: """Check for schema compatibility""" self._check_authorization(user, Operation.Read, f"Subject:{subject}") - body = request.json - schema_type = self._validate_schema_type(content_type=content_type, data=body) - references = self._validate_references(content_type, schema_type, body) - try: - references, new_schema_dependencies = self.schema_registry.resolve_references(references) - new_schema = ValidatedTypedSchema.parse( - schema_type=schema_type, - schema_str=body["schema"], - references=references, - dependencies=new_schema_dependencies, - use_protobuf_formatter=self.config["use_protobuf_formatter"], - ) - except InvalidSchema: - self.r( - body={ - "error_code": SchemaErrorCodes.INVALID_SCHEMA.value, - "message": f"Invalid {schema_type} schema", - }, - content_type=content_type, - status=HTTPStatus.UNPROCESSABLE_ENTITY, - ) - try: - old = self.schema_registry.subject_version_get(subject=subject, version=Versioner.V(version)) - except InvalidVersion: - self._invalid_version(content_type, version) - except (VersionNotFoundException, SchemasNotFoundException, SubjectNotFoundException): - self.r( - body={ - "error_code": SchemaErrorCodes.VERSION_NOT_FOUND.value, - "message": f"Version {version} not found.", - }, - content_type=content_type, - status=HTTPStatus.NOT_FOUND, - ) - old_schema_type = self._validate_schema_type(content_type=content_type, data=old) - try: - old_references = old.get("references", None) - old_dependencies = None - if old_references: - old_references, old_dependencies = self.schema_registry.resolve_references(old_references) - old_schema = ParsedTypedSchema.parse(old_schema_type, old["schema"], old_references, old_dependencies) - except InvalidSchema: - self.r( - body={ - "error_code": SchemaErrorCodes.INVALID_SCHEMA.value, - "message": f"Found an invalid {old_schema_type} schema registered", - }, - content_type=content_type, - status=HTTPStatus.UNPROCESSABLE_ENTITY, - ) - try: compatibility_mode = self.schema_registry.get_compatibility_mode(subject=subject) except ValueError as ex: @@ -451,11 +401,11 @@ async def compatibility_check( status=HTTPStatus.INTERNAL_SERVER_ERROR, ) - result = check_compatibility( - old_schema=old_schema, - new_schema=new_schema, - compatibility_mode=compatibility_mode, - ) + new_schema = self.get_new_schema(request.json, content_type) + old_schema = self._get_old_schema(subject, Versioner.V(version), content_type) + + result = SchemaCompatibility.check_compatibility(old_schema, new_schema, compatibility_mode) + if is_incompatible(result): self.r({"is_compatible": False}, content_type) self.r({"is_compatible": True}, content_type) @@ -1370,3 +1320,57 @@ def no_master_error(self, content_type: str) -> None: content_type=content_type, status=HTTPStatus.INTERNAL_SERVER_ERROR, ) + + def get_new_schema(self, body: JsonObject, content_type: str) -> ValidatedTypedSchema: + schema_type = self._validate_schema_type(content_type=content_type, data=body) + references = self._validate_references(content_type, schema_type, body) + try: + references, new_schema_dependencies = self.schema_registry.resolve_references(references) + return ValidatedTypedSchema.parse( + schema_type=schema_type, + schema_str=body["schema"], + references=references, + dependencies=new_schema_dependencies, + use_protobuf_formatter=self.config["use_protobuf_formatter"], + ) + except InvalidSchema: + self.r( + body={ + "error_code": SchemaErrorCodes.INVALID_SCHEMA.value, + "message": f"Invalid {schema_type} schema", + }, + content_type=content_type, + status=HTTPStatus.UNPROCESSABLE_ENTITY, + ) + + def _get_old_schema(self, subject: Subject, version: Version, content_type: str) -> ParsedTypedSchema: + try: + old = self.schema_registry.subject_version_get(subject=subject, version=version) + except InvalidVersion: + self._invalid_version(content_type, version) + except (VersionNotFoundException, SchemasNotFoundException, SubjectNotFoundException): + self.r( + body={ + "error_code": SchemaErrorCodes.VERSION_NOT_FOUND.value, + "message": f"Version {version} not found.", + }, + content_type=content_type, + status=HTTPStatus.NOT_FOUND, + ) + old_schema_type = self._validate_schema_type(content_type=content_type, data=old) + try: + old_references = old.get("references", None) + old_dependencies = None + if old_references: + old_references, old_dependencies = self.schema_registry.resolve_references(old_references) + old_schema = ParsedTypedSchema.parse(old_schema_type, old["schema"], old_references, old_dependencies) + return old_schema + except InvalidSchema: + self.r( + body={ + "error_code": SchemaErrorCodes.INVALID_SCHEMA.value, + "message": f"Found an invalid {old_schema_type} schema registered", + }, + content_type=content_type, + status=HTTPStatus.UNPROCESSABLE_ENTITY, + ) diff --git a/tests/unit/compatibility/test_compatibility.py b/tests/unit/compatibility/test_compatibility.py index 641f7df06..76f0e22b9 100644 --- a/tests/unit/compatibility/test_compatibility.py +++ b/tests/unit/compatibility/test_compatibility.py @@ -3,17 +3,20 @@ See LICENSE for details """ from avro.compatibility import SchemaCompatibilityType -from karapace.compatibility import check_compatibility, CompatibilityModes +from karapace.compatibility import CompatibilityModes +from karapace.compatibility.schema_compatibility import SchemaCompatibility from karapace.schema_models import SchemaType, ValidatedTypedSchema import json -def test_schema_type_can_change_when_mode_none(): +def test_schema_type_can_change_when_mode_none() -> None: avro_str = json.dumps({"type": "record", "name": "Record1", "fields": [{"name": "field1", "type": "int"}]}) - json_str = '{"type":"array"}' + json_str = '{"type": "array"}' avro_schema = ValidatedTypedSchema.parse(SchemaType.AVRO, avro_str) json_schema = ValidatedTypedSchema.parse(SchemaType.JSONSCHEMA, json_str) - result = check_compatibility(old_schema=avro_schema, new_schema=json_schema, compatibility_mode=CompatibilityModes.NONE) + result = SchemaCompatibility.check_compatibility( + old_schema=avro_schema, new_schema=json_schema, compatibility_mode=CompatibilityModes.NONE + ) assert result.compatibility is SchemaCompatibilityType.compatible From dfc476cad8891f26fe36980252142110724946d5 Mon Sep 17 00:00:00 2001 From: Davide Armand Date: Wed, 18 Sep 2024 11:12:16 +0200 Subject: [PATCH 09/18] fix: align transitive compatibility checks Make so that the transitive compatibility checks that are done in the schema creation endpoint are also done in the schema validation endpoint. In the creation endpoint (`/subjects//versions`), if the compatibility mode is transient then the new schema is checked against all schemas. In the validation endpoint (`/compatibility/subjects//versions/`): - Before this fix, only the latest schema is checked against (even in case of transitive mode) - After this fix, in case of transitive mode then all schema are checked against. Note that in this case the version provided in the POST request (``) is ignored. --- README.rst | 4 + .../compatibility/schema_compatibility.py | 138 ++++++++++ src/karapace/schema_registry.py | 4 +- src/karapace/schema_registry_apis.py | 13 +- .../integration/test_schema_compatibility.py | 235 ++++++++++++++++++ .../unit/compatibility/test_compatibility.py | 24 ++ tests/unit/test_schema_registry_api.py | 4 +- website/source/quickstart.rst | 4 + 8 files changed, 418 insertions(+), 8 deletions(-) create mode 100644 src/karapace/compatibility/schema_compatibility.py create mode 100644 tests/integration/test_schema_compatibility.py diff --git a/README.rst b/README.rst index f6e8a6736..0cc7489e4 100644 --- a/README.rst +++ b/README.rst @@ -149,6 +149,10 @@ Test the compatibility of a schema with the latest schema under subject "test-ke http://localhost:8081/compatibility/subjects/test-key/versions/latest {"is_compatible":true} +NOTE: if the subject's compatibility mode is transitive (BACKWARD_TRANSITIVE, FORWARD_TRANSITIVE or FULL_TRANSITIVE) then the +compatibility is checked not only against the latest schema, but also against all previous schemas, as it would be done +when trying to register the new schema through the `subjects//versions` endpoint. + Get current global backwards compatibility setting value:: $ curl -X GET http://localhost:8081/config diff --git a/src/karapace/compatibility/schema_compatibility.py b/src/karapace/compatibility/schema_compatibility.py new file mode 100644 index 000000000..07e059d50 --- /dev/null +++ b/src/karapace/compatibility/schema_compatibility.py @@ -0,0 +1,138 @@ +""" +Copyright (c) 2024 Aiven Ltd +See LICENSE for details +""" +from avro.compatibility import ( + merge, + ReaderWriterCompatibilityChecker as AvroChecker, + SchemaCompatibilityResult, + SchemaCompatibilityType, + SchemaIncompatibilityType, +) +from avro.schema import Schema as AvroSchema +from jsonschema import Draft7Validator +from karapace.compatibility import CompatibilityModes +from karapace.compatibility.jsonschema.checks import compatibility as jsonschema_compatibility, incompatible_schema +from karapace.compatibility.protobuf.checks import check_protobuf_schema_compatibility +from karapace.protobuf.schema import ProtobufSchema +from karapace.schema_models import ParsedTypedSchema, ValidatedTypedSchema +from karapace.schema_type import SchemaType +from karapace.utils import assert_never + +import logging + +LOG = logging.getLogger(__name__) + + +class SchemaCompatibility: + @staticmethod + def check_compatibility( + old_schema: ParsedTypedSchema, + new_schema: ValidatedTypedSchema, + compatibility_mode: CompatibilityModes, + ) -> SchemaCompatibilityResult: + """Check that `old_schema` and `new_schema` are compatible under `compatibility_mode`.""" + + if compatibility_mode is CompatibilityModes.NONE: + LOG.info("Compatibility level set to NONE, no schema compatibility checks performed") + return SchemaCompatibilityResult(SchemaCompatibilityType.compatible) + + if old_schema.schema_type is not new_schema.schema_type: + return incompatible_schema( + incompat_type=SchemaIncompatibilityType.type_mismatch, + message=f"Comparing different schema types: {old_schema.schema_type} with {new_schema.schema_type}", + location=[], + ) + + if old_schema.schema_type is SchemaType.AVRO: + assert isinstance(old_schema.schema, AvroSchema) + assert isinstance(new_schema.schema, AvroSchema) + if compatibility_mode in {CompatibilityModes.BACKWARD, CompatibilityModes.BACKWARD_TRANSITIVE}: + result = SchemaCompatibility.check_avro_compatibility( + reader_schema=new_schema.schema, + writer_schema=old_schema.schema, + ) + elif compatibility_mode in {CompatibilityModes.FORWARD, CompatibilityModes.FORWARD_TRANSITIVE}: + result = SchemaCompatibility.check_avro_compatibility( + reader_schema=old_schema.schema, + writer_schema=new_schema.schema, + ) + elif compatibility_mode in {CompatibilityModes.FULL, CompatibilityModes.FULL_TRANSITIVE}: + result = SchemaCompatibility.check_avro_compatibility( + reader_schema=new_schema.schema, + writer_schema=old_schema.schema, + ) + result = merge( + result, + SchemaCompatibility.check_avro_compatibility( + reader_schema=old_schema.schema, + writer_schema=new_schema.schema, + ), + ) + elif old_schema.schema_type is SchemaType.JSONSCHEMA: + assert isinstance(old_schema.schema, Draft7Validator) + assert isinstance(new_schema.schema, Draft7Validator) + if compatibility_mode in {CompatibilityModes.BACKWARD, CompatibilityModes.BACKWARD_TRANSITIVE}: + result = SchemaCompatibility.check_jsonschema_compatibility( + reader=new_schema.schema, + writer=old_schema.schema, + ) + elif compatibility_mode in {CompatibilityModes.FORWARD, CompatibilityModes.FORWARD_TRANSITIVE}: + result = SchemaCompatibility.check_jsonschema_compatibility( + reader=old_schema.schema, + writer=new_schema.schema, + ) + elif compatibility_mode in {CompatibilityModes.FULL, CompatibilityModes.FULL_TRANSITIVE}: + result = SchemaCompatibility.check_jsonschema_compatibility( + reader=new_schema.schema, + writer=old_schema.schema, + ) + result = merge( + result, + SchemaCompatibility.check_jsonschema_compatibility( + reader=old_schema.schema, + writer=new_schema.schema, + ), + ) + elif old_schema.schema_type is SchemaType.PROTOBUF: + assert isinstance(old_schema.schema, ProtobufSchema) + assert isinstance(new_schema.schema, ProtobufSchema) + if compatibility_mode in {CompatibilityModes.BACKWARD, CompatibilityModes.BACKWARD_TRANSITIVE}: + result = SchemaCompatibility.check_protobuf_compatibility( + reader=new_schema.schema, + writer=old_schema.schema, + ) + elif compatibility_mode in {CompatibilityModes.FORWARD, CompatibilityModes.FORWARD_TRANSITIVE}: + result = SchemaCompatibility.check_protobuf_compatibility( + reader=old_schema.schema, + writer=new_schema.schema, + ) + + elif compatibility_mode in {CompatibilityModes.FULL, CompatibilityModes.FULL_TRANSITIVE}: + result = SchemaCompatibility.check_protobuf_compatibility( + reader=new_schema.schema, + writer=old_schema.schema, + ) + result = merge( + result, + SchemaCompatibility.check_protobuf_compatibility( + reader=old_schema.schema, + writer=new_schema.schema, + ), + ) + else: + assert_never(f"Unknown schema_type {old_schema.schema_type}") + + return result + + @staticmethod + def check_avro_compatibility(reader_schema: AvroSchema, writer_schema: AvroSchema) -> SchemaCompatibilityResult: + return AvroChecker().get_compatibility(reader=reader_schema, writer=writer_schema) + + @staticmethod + def check_jsonschema_compatibility(reader: Draft7Validator, writer: Draft7Validator) -> SchemaCompatibilityResult: + return jsonschema_compatibility(reader, writer) + + @staticmethod + def check_protobuf_compatibility(reader: ProtobufSchema, writer: ProtobufSchema) -> SchemaCompatibilityResult: + return check_protobuf_schema_compatibility(reader, writer) diff --git a/src/karapace/schema_registry.py b/src/karapace/schema_registry.py index b98246b3e..ee0ae88cd 100644 --- a/src/karapace/schema_registry.py +++ b/src/karapace/schema_registry.py @@ -463,7 +463,7 @@ def check_schema_compatibility( if not live_versions: old_versions = [] elif compatibility_mode.is_transitive(): - # Only check against all versions + # Check against all versions old_versions = live_versions else: # Only check against latest version @@ -479,7 +479,7 @@ def check_schema_compatibility( ) if is_incompatible(result): - break + return result return result diff --git a/src/karapace/schema_registry_apis.py b/src/karapace/schema_registry_apis.py index 2f57279fe..f6d4593f7 100644 --- a/src/karapace/schema_registry_apis.py +++ b/src/karapace/schema_registry_apis.py @@ -402,9 +402,14 @@ async def compatibility_check( ) new_schema = self.get_new_schema(request.json, content_type) - old_schema = self._get_old_schema(subject, Versioner.V(version), content_type) - - result = SchemaCompatibility.check_compatibility(old_schema, new_schema, compatibility_mode) + old_schema = self.get_old_schema(subject, Versioner.V(version), content_type) + if compatibility_mode.is_transitive(): + # Ignore the schema version provided in the rest api call (`version`) + # Instead check against all previous versions (including `version` if existing) + result = self.schema_registry.check_schema_compatibility(new_schema, subject) + else: + # Check against the schema version provided in the rest api call (`version`) + result = SchemaCompatibility.check_compatibility(old_schema, new_schema, compatibility_mode) if is_incompatible(result): self.r({"is_compatible": False}, content_type) @@ -1343,7 +1348,7 @@ def get_new_schema(self, body: JsonObject, content_type: str) -> ValidatedTypedS status=HTTPStatus.UNPROCESSABLE_ENTITY, ) - def _get_old_schema(self, subject: Subject, version: Version, content_type: str) -> ParsedTypedSchema: + def get_old_schema(self, subject: Subject, version: Version, content_type: str) -> ParsedTypedSchema: try: old = self.schema_registry.subject_version_get(subject=subject, version=version) except InvalidVersion: diff --git a/tests/integration/test_schema_compatibility.py b/tests/integration/test_schema_compatibility.py new file mode 100644 index 000000000..82228ba32 --- /dev/null +++ b/tests/integration/test_schema_compatibility.py @@ -0,0 +1,235 @@ +""" +Copyright (c) 2024 Aiven Ltd +See LICENSE for details +""" +from __future__ import annotations + +from collections.abc import Coroutine +from dataclasses import dataclass +from karapace.client import Client +from karapace.typing import JsonObject, Subject +from tests.base_testcase import BaseTestCase +from typing import Any, Callable, Final + +import json +import logging +import pytest + +SchemaRegitrationFunc = Callable[[Client, Subject], Coroutine[Any, Any, None]] + +LOG = logging.getLogger(__name__) + +schema_int: Final[JsonObject] = {"type": "record", "name": "schema_name", "fields": [{"type": "int", "name": "field_name"}]} +schema_long: Final[JsonObject] = { + "type": "record", + "name": "schema_name", + "fields": [{"type": "long", "name": "field_name"}], +} +schema_string: Final[JsonObject] = { + "type": "record", + "name": "schema_name", + "fields": [{"type": "string", "name": "field_name"}], +} +schema_double: Final[JsonObject] = { + "type": "record", + "name": "schema_name", + "fields": [{"type": "double", "name": "field_name"}], +} + + +@dataclass +class SchemaCompatibilityTestCase(BaseTestCase): + new_schema: str + compatibility_mode: str + register_baseline_schemas: SchemaRegitrationFunc + expected_is_compatible: bool | None + expected_status_code: int + expected_incompatibilities: str | None + + +async def _register_baseline_schemas_no_incompatibilities(registry_async_client: Client, subject: Subject) -> None: + res = await registry_async_client.post( + f"subjects/{subject}/versions", + json={"schemaType": "AVRO", "schema": json.dumps(schema_int)}, + ) + assert res.status_code == 200 + + # Changing type from int to long is compatible + res = await registry_async_client.post( + f"subjects/{subject}/versions", + json={"schemaType": "AVRO", "schema": json.dumps(schema_long)}, + ) + assert res.status_code == 200 + + +async def _register_baseline_schemas_with_incompatibilities(registry_async_client: Client, subject: Subject) -> None: + # Allow registering non backward compatible schemas + await _set_compatibility_mode(registry_async_client, subject, "NONE") + + res = await registry_async_client.post( + f"subjects/{subject}/versions", + json={"schemaType": "AVRO", "schema": json.dumps(schema_string)}, + ) + assert res.status_code == 200 + + # Changing type from string to double is incompatible + res = await registry_async_client.post( + f"subjects/{subject}/versions", + json={"schemaType": "AVRO", "schema": json.dumps(schema_double)}, + ) + assert res.status_code == 200 + + +async def _register_baseline_schemas_with_incompatibilities_and_a_deleted_schema( + registry_async_client: Client, subject: Subject +) -> None: + await _register_baseline_schemas_with_incompatibilities(registry_async_client, subject) + + # Register schema + # Changing type from double to long is incompatible + res = await registry_async_client.post( + f"subjects/{subject}/versions", + json={"schemaType": "AVRO", "schema": json.dumps(schema_long)}, + ) + assert res.status_code == 200 + + # And delete it + res = await registry_async_client.delete(f"subjects/{subject}/versions/latest") + assert res.status_code == 200 + assert res.json() == 3 + + +async def _register_no_baseline_schemas( + registry_async_client: Client, subject: Subject # pylint: disable=unused-argument +) -> None: + pass + + +async def _set_compatibility_mode(registry_async_client: Client, subject: Subject, compatibility_mode: str) -> None: + res = await registry_async_client.put(f"config/{subject}", json={"compatibility": compatibility_mode}) + assert res.status_code == 200 + + +@pytest.mark.parametrize( + "test_case", + [ + # Case 0 + # New schema compatible with all baseline ones (int --> long, long --> long) + # Transitive mode + # --> No incompatibilities are found + SchemaCompatibilityTestCase( + test_name="case0", + compatibility_mode="BACKWARD", + register_baseline_schemas=_register_baseline_schemas_no_incompatibilities, + new_schema=json.dumps(schema_long), + expected_is_compatible=True, + expected_status_code=200, + expected_incompatibilities=None, + ), + # Case 1 + # Same as previous case, but in non-transitive mode + # --> No incompatibilities are found + SchemaCompatibilityTestCase( + test_name="case1", + compatibility_mode="BACKWARD_TRANSITIVE", + register_baseline_schemas=_register_baseline_schemas_no_incompatibilities, + new_schema=json.dumps(schema_long), + expected_is_compatible=True, + expected_status_code=200, + expected_incompatibilities=None, + ), + # Case 2 + # New schema incompatible with both baseline schemas (string --> int, double --> int) + # Non-transitive mode + # --> Incompatibilies are found only against last baseline schema (double --> int) + SchemaCompatibilityTestCase( + test_name="case2", + compatibility_mode="BACKWARD", + register_baseline_schemas=_register_baseline_schemas_with_incompatibilities, + new_schema=json.dumps(schema_int), + expected_is_compatible=False, + expected_status_code=200, + expected_incompatibilities="reader type: int not compatible with writer type: double", + ), + # Case 3 + # Same as previous case, but in non-transitive mode + # --> Incompatibilies are found in the first baseline schema (string --> int) + SchemaCompatibilityTestCase( + test_name="case3", + compatibility_mode="BACKWARD_TRANSITIVE", + register_baseline_schemas=_register_baseline_schemas_with_incompatibilities, + new_schema=json.dumps(schema_int), + expected_is_compatible=False, + expected_status_code=200, + expected_incompatibilities="reader type: int not compatible with writer type: string", + ), + # Case 4 + # Same as case 2, but with a deleted schema among baseline ones + # Non-transitive mode + # --> The delete schema is ignored + # --> Incompatibilies are found only against last baseline schema (double --> int) + SchemaCompatibilityTestCase( + test_name="case4", + compatibility_mode="BACKWARD", + register_baseline_schemas=_register_baseline_schemas_with_incompatibilities_and_a_deleted_schema, + new_schema=json.dumps(schema_int), + expected_is_compatible=False, + expected_status_code=200, + expected_incompatibilities="reader type: int not compatible with writer type: double", + ), + # Case 5 + # Same as case 3, but with a deleted schema among baseline ones + # --> The delete schema is ignored + # --> Incompatibilies are found in the first baseline schema (string --> int) + SchemaCompatibilityTestCase( + test_name="case5", + compatibility_mode="BACKWARD_TRANSITIVE", + register_baseline_schemas=_register_baseline_schemas_with_incompatibilities_and_a_deleted_schema, + new_schema=json.dumps(schema_int), + expected_is_compatible=False, + expected_status_code=200, + expected_incompatibilities="reader type: int not compatible with writer type: string", + ), + # Case 6 + # A new schema and no baseline schemas + # Non-transitive mode + # --> No incompatibilities are found + # --> Status code is 404 because `latest` version to check against does not exists + SchemaCompatibilityTestCase( + test_name="case6", + compatibility_mode="BACKWARD", + register_baseline_schemas=_register_no_baseline_schemas, + new_schema=json.dumps(schema_int), + expected_is_compatible=None, + expected_status_code=404, + expected_incompatibilities=None, + ), + # Case 7 + # Same as previous case, but in non-transitive mode + # --> No incompatibilities are found + # --> Status code is 404 because `latest` version to check against does not exists + SchemaCompatibilityTestCase( + test_name="case7", + compatibility_mode="BACKWARD_TRANSITIVE", + register_baseline_schemas=_register_no_baseline_schemas, + new_schema=json.dumps(schema_int), + expected_is_compatible=None, + expected_status_code=404, + expected_incompatibilities=None, + ), + ], +) +async def test_schema_compatibility(test_case: SchemaCompatibilityTestCase, registry_async_client: Client) -> None: + subject = Subject(f"subject_{test_case.test_name}") + + await test_case.register_baseline_schemas(registry_async_client, subject) + await _set_compatibility_mode(registry_async_client, subject, test_case.compatibility_mode) + + LOG.info("Validating new schema: %s", test_case.new_schema) + res = await registry_async_client.post( + f"compatibility/subjects/{subject}/versions/latest", json={"schema": test_case.new_schema} + ) + + assert res.status_code == test_case.expected_status_code + assert res.json().get("is_compatible") == test_case.expected_is_compatible + assert res.json().get("incompatibilities", None) == test_case.expected_incompatibilities diff --git a/tests/unit/compatibility/test_compatibility.py b/tests/unit/compatibility/test_compatibility.py index 76f0e22b9..af41aae99 100644 --- a/tests/unit/compatibility/test_compatibility.py +++ b/tests/unit/compatibility/test_compatibility.py @@ -20,3 +20,27 @@ def test_schema_type_can_change_when_mode_none() -> None: old_schema=avro_schema, new_schema=json_schema, compatibility_mode=CompatibilityModes.NONE ) assert result.compatibility is SchemaCompatibilityType.compatible + + +def test_schema_compatible_in_transitive_mode() -> None: + old_json = '{"type": "array", "name": "name_old"}' + new_json = '{"type": "array", "name": "name_new"}' + old_schema = ValidatedTypedSchema.parse(SchemaType.JSONSCHEMA, old_json) + new_schema = ValidatedTypedSchema.parse(SchemaType.JSONSCHEMA, new_json) + + result = SchemaCompatibility.check_compatibility( + old_schema=old_schema, new_schema=new_schema, compatibility_mode=CompatibilityModes.FULL_TRANSITIVE + ) + assert result.compatibility is SchemaCompatibilityType.compatible + + +def test_schema_incompatible_in_transitive_mode() -> None: + old_json = '{"type": "array"}' + new_json = '{"type": "integer"}' + old_schema = ValidatedTypedSchema.parse(SchemaType.JSONSCHEMA, old_json) + new_schema = ValidatedTypedSchema.parse(SchemaType.JSONSCHEMA, new_json) + + result = SchemaCompatibility.check_compatibility( + old_schema=old_schema, new_schema=new_schema, compatibility_mode=CompatibilityModes.FULL_TRANSITIVE + ) + assert result.compatibility is SchemaCompatibilityType.incompatible diff --git a/tests/unit/test_schema_registry_api.py b/tests/unit/test_schema_registry_api.py index 6d850f5fc..7fcecd47e 100644 --- a/tests/unit/test_schema_registry_api.py +++ b/tests/unit/test_schema_registry_api.py @@ -14,7 +14,7 @@ import pytest -async def test_validate_schema_request_body(): +async def test_validate_schema_request_body() -> None: controller = KarapaceSchemaRegistryController(config=set_config_defaults(DEFAULTS)) controller._validate_schema_request_body( # pylint: disable=W0212 @@ -30,7 +30,7 @@ async def test_validate_schema_request_body(): assert str(exc_info.value) == "HTTPResponse 422" -async def test_forward_when_not_ready(): +async def test_forward_when_not_ready() -> None: with patch("karapace.schema_registry_apis.KarapaceSchemaRegistry") as schema_registry_class: schema_reader_mock = Mock(spec=KafkaSchemaReader) ready_property_mock = PropertyMock(return_value=False) diff --git a/website/source/quickstart.rst b/website/source/quickstart.rst index 6e6ecdba6..f640e68d2 100644 --- a/website/source/quickstart.rst +++ b/website/source/quickstart.rst @@ -60,6 +60,10 @@ Test the compatibility of a schema with the latest schema under subject "test-ke $KARAPACE_REGISTRY_URI/compatibility/subjects/test-key/versions/latest {"is_compatible":true} +NOTE: if the subject's compatibility mode is transitive (BACKWARD_TRANSITIVE, FORWARD_TRANSITIVE or FULL_TRANSITIVE) then the +compatibility is checked not only against the latest schema, but also against all previous schemas, as it would be done +when trying to register the new schema through the `subjects//versions` endpoint. + Get current global backwards compatibility setting value:: $ curl -X GET $KARAPACE_REGISTRY_URI/config From 429e18c53007e61d56defc0f4bb37a61ec32240b Mon Sep 17 00:00:00 2001 From: Davide Armand Date: Thu, 26 Sep 2024 13:19:09 +0200 Subject: [PATCH 10/18] feat: return all schema validation errors --- src/karapace/schema_registry.py | 6 ++++-- src/karapace/schema_registry_apis.py | 3 ++- .../test_dependencies_compatibility_protobuf.py | 10 +++++----- tests/integration/test_schema.py | 12 ++++++++---- tests/integration/test_schema_protobuf.py | 4 ++-- 5 files changed, 21 insertions(+), 14 deletions(-) diff --git a/src/karapace/schema_registry.py b/src/karapace/schema_registry.py index ee0ae88cd..67f58fddd 100644 --- a/src/karapace/schema_registry.py +++ b/src/karapace/schema_registry.py @@ -352,12 +352,14 @@ async def write_new_schema_local( result = self.check_schema_compatibility(new_schema, subject) if is_incompatible(result): - message = set(result.messages).pop() if result.messages else "" LOG.warning( "Incompatible schema: %s, incompatibilities: %s", result.compatibility, result.incompatibilities ) compatibility_mode = self.get_compatibility_mode(subject=subject) - raise IncompatibleSchema(f"Incompatible schema, compatibility_mode={compatibility_mode.value} {message}") + raise IncompatibleSchema( + f"Incompatible schema, compatibility_mode={compatibility_mode.value}. " + f"Incompatibilities: {', '.join(result.messages)[:300]}" + ) # We didn't find an existing schema and the schema is compatible so go and create one version = self.database.get_next_version(subject=subject) diff --git a/src/karapace/schema_registry_apis.py b/src/karapace/schema_registry_apis.py index f6d4593f7..a37a3ff9f 100644 --- a/src/karapace/schema_registry_apis.py +++ b/src/karapace/schema_registry_apis.py @@ -412,7 +412,8 @@ async def compatibility_check( result = SchemaCompatibility.check_compatibility(old_schema, new_schema, compatibility_mode) if is_incompatible(result): - self.r({"is_compatible": False}, content_type) + maybe_truncated_error = ", ".join(result.messages)[:300] + self.r({"is_compatible": False, "incompatibilities": maybe_truncated_error}, content_type) self.r({"is_compatible": True}, content_type) async def schemas_list(self, content_type: str, *, request: HTTPRequest, user: User | None = None): diff --git a/tests/integration/test_dependencies_compatibility_protobuf.py b/tests/integration/test_dependencies_compatibility_protobuf.py index 2bacbdf7b..725611b5c 100644 --- a/tests/integration/test_dependencies_compatibility_protobuf.py +++ b/tests/integration/test_dependencies_compatibility_protobuf.py @@ -183,7 +183,7 @@ async def test_protobuf_schema_compatibility_dependencies(registry_async_client: json={"schemaType": "PROTOBUF", "schema": evolved_schema, "references": evolved_references}, ) assert res.status_code == 200 - assert res.json() == {"is_compatible": False} + assert res.json().get("is_compatible") is False @pytest.mark.parametrize("trail", ["", "/"]) @@ -271,7 +271,7 @@ async def test_protobuf_schema_compatibility_dependencies1(registry_async_client json={"schemaType": "PROTOBUF", "schema": evolved_schema, "references": evolved_references}, ) assert res.status_code == 200 - assert res.json() == {"is_compatible": False} + assert res.json().get("is_compatible") is False # Do compatibility check when message field is altered from referenced type to google type @@ -339,7 +339,7 @@ async def test_protobuf_schema_compatibility_dependencies1g(registry_async_clien json={"schemaType": "PROTOBUF", "schema": evolved_schema}, ) assert res.status_code == 200 - assert res.json() == {"is_compatible": False} + assert res.json().get("is_compatible") is False # Do compatibility check when message field is altered from google type to referenced type @@ -407,7 +407,7 @@ async def test_protobuf_schema_compatibility_dependencies1g_otherway(registry_as json={"schemaType": "PROTOBUF", "schema": evolved_schema, "references": container_references}, ) assert res.status_code == 200 - assert res.json() == {"is_compatible": False} + assert res.json().get("is_compatible") is False @pytest.mark.parametrize("trail", ["", "/"]) @@ -491,7 +491,7 @@ async def test_protobuf_schema_compatibility_dependencies2(registry_async_client json={"schemaType": "PROTOBUF", "schema": evolved_schema, "references": evolved_references}, ) assert res.status_code == 200 - assert res.json() == {"is_compatible": False} + assert res.json().get("is_compatible") is False SIMPLE_SCHEMA = """\ diff --git a/tests/integration/test_schema.py b/tests/integration/test_schema.py index 546c19e0b..bb4448d80 100644 --- a/tests/integration/test_schema.py +++ b/tests/integration/test_schema.py @@ -332,7 +332,8 @@ async def test_compatibility_endpoint(registry_async_client: Client, trail: str) json={"schema": json.dumps(schema)}, ) assert res.status_code == 200 - assert res.json() == {"is_compatible": False} + assert res.json().get("is_compatible") is False + assert res.json().get("incompatibilities") == "reader type: string not compatible with writer type: int" @pytest.mark.parametrize("trail", ["", "/"]) @@ -536,7 +537,7 @@ def _test_cases(): json={"schema": json.dumps(schema)}, ) assert res.status_code == 200 - assert res.json() == {"is_compatible": expected} + assert res.json().get("is_compatible") == expected @pytest.mark.parametrize("trail", ["", "/"]) @@ -3243,7 +3244,7 @@ async def test_schema_non_compliant_name_in_existing( json={"schema": json.dumps(evolved_schema)}, ) assert res.status_code == 200 - assert not res.json().get("is_compatible") + assert res.json().get("is_compatible") is False # Post evolved schema, should not be compatible and rejected. res = await registry_async_client.post( @@ -3253,7 +3254,10 @@ async def test_schema_non_compliant_name_in_existing( assert res.status_code == 409 assert res.json() == { "error_code": 409, - "message": "Incompatible schema, compatibility_mode=BACKWARD expected: compliant_name_test.test-schema", + "message": ( + "Incompatible schema, compatibility_mode=BACKWARD. " + "Incompatibilities: expected: compliant_name_test.test-schema" + ), } # Send compatibility configuration for subject that disabled backwards compatibility. diff --git a/tests/integration/test_schema_protobuf.py b/tests/integration/test_schema_protobuf.py index 4b4471cb2..55825fb92 100644 --- a/tests/integration/test_schema_protobuf.py +++ b/tests/integration/test_schema_protobuf.py @@ -1123,8 +1123,8 @@ async def test_protobuf_error(registry_async_client: Client) -> None: expected=409, expected_msg=( # ACTUALLY THERE NO MESSAGE_DROP!!! - "Incompatible schema, compatibility_mode=BACKWARD " - "Incompatible modification Modification.MESSAGE_DROP found" + "Incompatible schema, compatibility_mode=BACKWARD. " + "Incompatibilities: Incompatible modification Modification.MESSAGE_DROP found" ), ) print(f"Adding new schema, subject: '{testdata.subject}'\n{testdata.schema_str}") From e54ad600564965b9e1084963cc8fc02a3d3e69c1 Mon Sep 17 00:00:00 2001 From: Jarkko Jaakola Date: Wed, 16 Oct 2024 13:46:54 +0300 Subject: [PATCH 11/18] fix: Avro dataclass introspect typing --- src/karapace/avro_dataclasses/introspect.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/karapace/avro_dataclasses/introspect.py b/src/karapace/avro_dataclasses/introspect.py index be9634493..7ba38ab00 100644 --- a/src/karapace/avro_dataclasses/introspect.py +++ b/src/karapace/avro_dataclasses/introspect.py @@ -42,10 +42,17 @@ def _field_type_array(field: Field, origin: type, type_: object) -> AvroType: else: (inner_type,) = get_args(type_) + items: AvroType + if is_dataclass(inner_type): + assert isinstance(inner_type, type) + items = record_schema(inner_type) + else: + items = _field_type(field, inner_type) + return { "name": f"one_of_{field.name}", "type": "array", - "items": (record_schema(inner_type) if is_dataclass(inner_type) else _field_type(field, inner_type)), + "items": items, } @@ -128,7 +135,7 @@ def _field_type(field: Field, type_: object) -> AvroType: # pylint: disable=too T = TypeVar("T", str, int, bool, Enum, None) -def transform_default(type_: type[T], default: T) -> str | int | bool | None: +def transform_default(type_: type[T] | str, default: T) -> str | int | bool | None: if isinstance(default, Enum): assert isinstance(type_, type) assert issubclass(type_, Enum) From a0c664d84fdd4500cb3e3141ecd5f559fe1c0ded Mon Sep 17 00:00:00 2001 From: Jarkko Jaakola Date: Wed, 16 Oct 2024 12:18:39 +0300 Subject: [PATCH 12/18] fix: return "messages" instead "incompatibilities" from compatibility API For Confluent Schema Registry client compatibility the response field is changed from "incompatibilities: str" to "messages: list[str]". --- src/karapace/schema_registry_apis.py | 3 +-- tests/integration/test_schema.py | 2 +- tests/integration/test_schema_compatibility.py | 12 ++++++------ 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/src/karapace/schema_registry_apis.py b/src/karapace/schema_registry_apis.py index a37a3ff9f..fbb8f5a0c 100644 --- a/src/karapace/schema_registry_apis.py +++ b/src/karapace/schema_registry_apis.py @@ -412,8 +412,7 @@ async def compatibility_check( result = SchemaCompatibility.check_compatibility(old_schema, new_schema, compatibility_mode) if is_incompatible(result): - maybe_truncated_error = ", ".join(result.messages)[:300] - self.r({"is_compatible": False, "incompatibilities": maybe_truncated_error}, content_type) + self.r({"is_compatible": False, "messages": list(result.messages)}, content_type) self.r({"is_compatible": True}, content_type) async def schemas_list(self, content_type: str, *, request: HTTPRequest, user: User | None = None): diff --git a/tests/integration/test_schema.py b/tests/integration/test_schema.py index bb4448d80..668bec657 100644 --- a/tests/integration/test_schema.py +++ b/tests/integration/test_schema.py @@ -333,7 +333,7 @@ async def test_compatibility_endpoint(registry_async_client: Client, trail: str) ) assert res.status_code == 200 assert res.json().get("is_compatible") is False - assert res.json().get("incompatibilities") == "reader type: string not compatible with writer type: int" + assert res.json().get("messages") == ["reader type: string not compatible with writer type: int"] @pytest.mark.parametrize("trail", ["", "/"]) diff --git a/tests/integration/test_schema_compatibility.py b/tests/integration/test_schema_compatibility.py index 82228ba32..d71237d7c 100644 --- a/tests/integration/test_schema_compatibility.py +++ b/tests/integration/test_schema_compatibility.py @@ -44,7 +44,7 @@ class SchemaCompatibilityTestCase(BaseTestCase): register_baseline_schemas: SchemaRegitrationFunc expected_is_compatible: bool | None expected_status_code: int - expected_incompatibilities: str | None + expected_incompatibilities: list[str] | None async def _register_baseline_schemas_no_incompatibilities(registry_async_client: Client, subject: Subject) -> None: @@ -149,7 +149,7 @@ async def _set_compatibility_mode(registry_async_client: Client, subject: Subjec new_schema=json.dumps(schema_int), expected_is_compatible=False, expected_status_code=200, - expected_incompatibilities="reader type: int not compatible with writer type: double", + expected_incompatibilities=["reader type: int not compatible with writer type: double"], ), # Case 3 # Same as previous case, but in non-transitive mode @@ -161,7 +161,7 @@ async def _set_compatibility_mode(registry_async_client: Client, subject: Subjec new_schema=json.dumps(schema_int), expected_is_compatible=False, expected_status_code=200, - expected_incompatibilities="reader type: int not compatible with writer type: string", + expected_incompatibilities=["reader type: int not compatible with writer type: string"], ), # Case 4 # Same as case 2, but with a deleted schema among baseline ones @@ -175,7 +175,7 @@ async def _set_compatibility_mode(registry_async_client: Client, subject: Subjec new_schema=json.dumps(schema_int), expected_is_compatible=False, expected_status_code=200, - expected_incompatibilities="reader type: int not compatible with writer type: double", + expected_incompatibilities=["reader type: int not compatible with writer type: double"], ), # Case 5 # Same as case 3, but with a deleted schema among baseline ones @@ -188,7 +188,7 @@ async def _set_compatibility_mode(registry_async_client: Client, subject: Subjec new_schema=json.dumps(schema_int), expected_is_compatible=False, expected_status_code=200, - expected_incompatibilities="reader type: int not compatible with writer type: string", + expected_incompatibilities=["reader type: int not compatible with writer type: string"], ), # Case 6 # A new schema and no baseline schemas @@ -232,4 +232,4 @@ async def test_schema_compatibility(test_case: SchemaCompatibilityTestCase, regi assert res.status_code == test_case.expected_status_code assert res.json().get("is_compatible") == test_case.expected_is_compatible - assert res.json().get("incompatibilities", None) == test_case.expected_incompatibilities + assert res.json().get("messages") == test_case.expected_incompatibilities From 2891a87b35f5a06b32e70b46eeb1f66c9997bedd Mon Sep 17 00:00:00 2001 From: Jarkko Jaakola Date: Wed, 16 Oct 2024 14:46:27 +0300 Subject: [PATCH 13/18] chore: remove schema tool source layout check --- src/karapace/backup/backends/v3/schema_tool.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/src/karapace/backup/backends/v3/schema_tool.py b/src/karapace/backup/backends/v3/schema_tool.py index 94fb45db0..f5843ceb2 100644 --- a/src/karapace/backup/backends/v3/schema_tool.py +++ b/src/karapace/backup/backends/v3/schema_tool.py @@ -59,17 +59,6 @@ def relative_path(path: pathlib.Path) -> pathlib.Path: return pathlib.Path(str_path[len(cwd) + 1 :]) if str_path.startswith(cwd) else path -def target_has_source_layout(git_target: str) -> bool: - with subprocess.Popen( - ["git", "show", f"{git_target}:src"], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - ) as cp: - if cp.returncode == 128: - return False - return True - - def check_compatibility(git_target: str) -> None: errored = False found_any = False @@ -82,13 +71,8 @@ def check_compatibility(git_target: str) -> None: subprocess.run(["git", "fetch", remote, branch], check=True, capture_output=True) - # Does the target version have source layout - source_layout = target_has_source_layout(git_target) - for file in schema_directory.glob(f"*{extension}"): relative = relative_path(file) - if not source_layout: - relative = pathlib.Path(*relative.parts[1:]) with subprocess.Popen( ["git", "show", f"{git_target}:{relative}"], stdout=subprocess.PIPE, From 4c41189c915a9d108cd048f1e887b551c3d7b528 Mon Sep 17 00:00:00 2001 From: Jarkko Jaakola Date: Thu, 17 Oct 2024 09:55:50 +0300 Subject: [PATCH 14/18] fix: validate REST Proxy subscription param types --- src/karapace/kafka_rest_apis/consumer_manager.py | 14 ++++++++++++++ tests/integration/test_rest_consumer.py | 10 ++++++++++ 2 files changed, 24 insertions(+) diff --git a/src/karapace/kafka_rest_apis/consumer_manager.py b/src/karapace/kafka_rest_apis/consumer_manager.py index b02902e3d..809478f4c 100644 --- a/src/karapace/kafka_rest_apis/consumer_manager.py +++ b/src/karapace/kafka_rest_apis/consumer_manager.py @@ -150,6 +150,16 @@ def _illegal_state_fail(message: str, content_type: str) -> None: message=message, ) + @staticmethod + def _unprocessable_entity(*, message: str, content_type: str) -> None: + ConsumerManager._assert( + cond=False, + code=HTTPStatus.UNPROCESSABLE_ENTITY, + sub_code=RESTErrorCodes.HTTP_UNPROCESSABLE_ENTITY.value, + content_type=content_type, + message=message, + ) + # external api below # CONSUMER async def create_consumer(self, group_name: str, request_data: dict, content_type: str): @@ -318,7 +328,11 @@ async def set_subscription(self, internal_name: tuple[str, str], content_type: s LOG.info("Updating subscription for %s", internal_name) self._assert_consumer_exists(internal_name, content_type) topics = request_data.get("topics", []) + if topics and not isinstance(topics, list): + self._unprocessable_entity(message="Topics is expected to be list of strings", content_type=content_type) topics_pattern = request_data.get("topic_pattern") + if topics_pattern and not isinstance(topics_pattern, str): + self._unprocessable_entity(message="Topic patterns is expected to be a string", content_type=content_type) if not (topics or topics_pattern): self._illegal_state_fail( message="Neither topic_pattern nor topics are present in request", content_type=content_type diff --git a/tests/integration/test_rest_consumer.py b/tests/integration/test_rest_consumer.py index 1c5f6083a..f0003dbdd 100644 --- a/tests/integration/test_rest_consumer.py +++ b/tests/integration/test_rest_consumer.py @@ -167,6 +167,16 @@ async def test_subscription(rest_async_client, admin_client, producer, trail): res = await rest_async_client.post(assign_path, headers=REST_HEADERS["json"], json=assign_payload) assert res.status_code == 409, "Expecting status code 409 on assign after subscribe on the same consumer instance" + # topics parameter is expected to be array, 4xx error returned + res = await rest_async_client.post(sub_path, json={"topics": topic_name}, headers=REST_HEADERS["json"]) + assert res.status_code == 422, "Expecting status code 422 on subscription update with invalid topics param" + + # topic pattern parameter is expected to be a string, 4xx error returned + res = await rest_async_client.post( + sub_path, json={"topic_pattern": ["not", "a", "string"]}, headers=REST_HEADERS["json"] + ) + assert res.status_code == 422, "Expecting status code 422 on subscription update with invalid topics param" + @pytest.mark.parametrize("trail", ["", "/"]) async def test_seek(rest_async_client, admin_client, trail): From cfc4e1a2421dc19b3c94aaeb35fd245255703446 Mon Sep 17 00:00:00 2001 From: Davide Armand Date: Fri, 25 Oct 2024 00:19:57 +0200 Subject: [PATCH 15/18] tests,backup: fix flakiness by using current timestamps Using some (old) fixed timestamps when producing messages was causing the messages to be automatically deleted because past message retention. I think most of the time the test was managing to complete successfully before the retention kicked in, making it fail only sometimes. --- tests/integration/backup/test_v3_backup.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tests/integration/backup/test_v3_backup.py b/tests/integration/backup/test_v3_backup.py index 744437be6..332b09f0a 100644 --- a/tests/integration/backup/test_v3_backup.py +++ b/tests/integration/backup/test_v3_backup.py @@ -119,14 +119,13 @@ def test_roundtrip_from_kafka_state( admin_client.update_topic_config(new_topic.topic, {"max.message.bytes": "999"}) # Populate topic. - producer.send( + first_record_fut = producer.send( new_topic.topic, key=b"bar", value=b"foo", partition=0, - timestamp=1683474641, ) - producer.send( + second_record_fut = producer.send( new_topic.topic, key=b"foo", value=b"bar", @@ -135,10 +134,12 @@ def test_roundtrip_from_kafka_state( ("some-header", b"some header value"), ("other-header", b"some other header value"), ], - timestamp=1683474657, ) producer.flush() + first_message_timestamp = first_record_fut.result(timeout=5).timestamp()[1] + second_message_timestamp = second_record_fut.result(timeout=5).timestamp()[1] + topic_config = get_topic_configurations(admin_client, new_topic.topic, {ConfigSource.DYNAMIC_TOPIC_CONFIG}) # Execute backup creation. @@ -212,7 +213,7 @@ def test_roundtrip_from_kafka_state( # Note: This might be unreliable due to not using idempotent producer, i.e. we have # no guarantee against duplicates currently. assert first_record.offset() == 0 - assert first_record.timestamp()[1] == 1683474641 + assert first_record.timestamp()[1] == first_message_timestamp assert first_record.timestamp()[0] == Timestamp.CREATE_TIME assert first_record.key() == b"bar" assert first_record.value() == b"foo" @@ -223,7 +224,7 @@ def test_roundtrip_from_kafka_state( assert second_record.topic() == new_topic.topic assert second_record.partition() == partition assert second_record.offset() == 1 - assert second_record.timestamp()[1] == 1683474657 + assert second_record.timestamp()[1] == second_message_timestamp assert second_record.timestamp()[0] == Timestamp.CREATE_TIME assert second_record.key() == b"foo" assert second_record.value() == b"bar" From 7a3c8395358a34ff80210db8eaa7e95eb13e5dbb Mon Sep 17 00:00:00 2001 From: Jonas Keeling Date: Wed, 30 Oct 2024 16:07:31 +0100 Subject: [PATCH 16/18] fix: proto3 optionals from base64 encoded proto descriptors This fixes deserialization of optionals in proto3 descriptors as they are represented by synthetic oneofs. --- src/karapace/protobuf/serialization.py | 20 ++++++++++++++++--- tests/schemas/protobuf.py | 18 +++++++++++++++++ .../test_protobuf_binary_serialization.py | 4 ++++ 3 files changed, 39 insertions(+), 3 deletions(-) diff --git a/src/karapace/protobuf/serialization.py b/src/karapace/protobuf/serialization.py index 6c3ca61fd..123e80c8f 100644 --- a/src/karapace/protobuf/serialization.py +++ b/src/karapace/protobuf/serialization.py @@ -93,17 +93,31 @@ def _deserialize_msg(msgtype: Any) -> MessageElement: for nested_enum in msgtype.enum_type: nested_types.append(_deserialize_enum(nested_enum)) - one_ofs: list[OneOfElement] = [OneOfElement(oneof.name) for oneof in msgtype.oneof_decl] + one_ofs: list[OneOfElement | None] = [OneOfElement(oneof.name) for oneof in msgtype.oneof_decl] for f in msgtype.field: sf = _deserialize_field(f) - if f.HasField("oneof_index"): + is_oneof = f.HasField("oneof_index") + is_proto3_optional = f.HasField("oneof_index") and f.HasField("proto3_optional") and f.proto3_optional + if is_proto3_optional: + # Every proto3 optional field is placed into a one-field oneof, called a "synthetic" oneof, + # as it was not present in the source .proto file. + # This will make sure that we don't interpret those optionals as oneof. + one_ofs[f.oneof_index] = None + fields.append(sf) + elif is_oneof: one_ofs[f.oneof_index].fields.append(sf) else: fields.append(sf) + one_ofs_filtered: list[OneOfElement] = [oneof for oneof in one_ofs if oneof is not None] return MessageElement( - DEFAULT_LOCATION, msgtype.name, nested_types=nested_types, reserveds=reserveds, fields=fields, one_ofs=one_ofs + DEFAULT_LOCATION, + msgtype.name, + nested_types=nested_types, + reserveds=reserveds, + fields=fields, + one_ofs=one_ofs_filtered, ) diff --git a/tests/schemas/protobuf.py b/tests/schemas/protobuf.py index 1bd3f05d5..afbb3f890 100644 --- a/tests/schemas/protobuf.py +++ b/tests/schemas/protobuf.py @@ -261,3 +261,21 @@ "lzdGVyLk1ldGFkYXRhEhYKDmNvbXBhbnlfbnVtYmVyGAIgASgJGhYKCE1ldGFkYXRhEgoK" "AmlkGAEgASgJYgZwcm90bzM=" ) + +schema_protobuf_optionals_bin = ( + "Cgp0ZXN0LnByb3RvIqYBCgpEaW1lbnNpb25zEhEKBHNpemUYASABKAFIAIgBARISCgV3aWR0aBgCIAEoAUgBiAEBEhMKBmhlaWdodBgDIAEo" + + "AUgCiAEBEhMKBmxlbmd0aBgEIAEoAUgDiAEBEhMKBndlaWdodBgFIAEoAUgEiAEBQgcKBV9zaXplQggKBl93aWR0aEIJCgdfaGVpZ2h0Qg" + + "kKB19sZW5ndGhCCQoHX3dlaWdodGIGcHJvdG8z" +) + +schema_protobuf_optionals = """\ +syntax = "proto3"; + +message Dimensions { + optional double size = 1; + optional double width = 2; + optional double height = 3; + optional double length = 4; + optional double weight = 5; +} +""" diff --git a/tests/unit/test_protobuf_binary_serialization.py b/tests/unit/test_protobuf_binary_serialization.py index 6950066d3..99bfe375e 100644 --- a/tests/unit/test_protobuf_binary_serialization.py +++ b/tests/unit/test_protobuf_binary_serialization.py @@ -16,6 +16,8 @@ schema_protobuf_nested_message4_bin_protoc, schema_protobuf_oneof, schema_protobuf_oneof_bin, + schema_protobuf_optionals, + schema_protobuf_optionals_bin, schema_protobuf_order_after, schema_protobuf_order_after_bin, schema_protobuf_plain, @@ -89,6 +91,7 @@ (schema_protobuf_references, schema_protobuf_references_bin), (schema_protobuf_references2, schema_protobuf_references2_bin), (schema_protobuf_complex, schema_protobuf_complex_bin), + (schema_protobuf_optionals, schema_protobuf_optionals_bin), ], ) def test_schema_deserialize(schema_plain, schema_serialized): @@ -125,6 +128,7 @@ def test_protoc_serialized_schema_deserialize(schema_plain, schema_serialized): schema_protobuf_references, schema_protobuf_references2, schema_protobuf_complex, + schema_protobuf_optionals, ], ) def test_simple_schema_serialize(schema): From 6336082170c1045c3c997d53266cab781fc7df7d Mon Sep 17 00:00:00 2001 From: Francesco D'Orlandi Date: Thu, 31 Oct 2024 18:46:52 +0100 Subject: [PATCH 17/18] backup: add command line flag to override replication factor in restore command --- src/karapace/backup/api.py | 12 ++++- src/karapace/backup/cli.py | 10 +++++ tests/integration/backup/test_v3_backup.py | 52 +++++++++++++++++++++- 3 files changed, 71 insertions(+), 3 deletions(-) diff --git a/src/karapace/backup/api.py b/src/karapace/backup/api.py index d06c99ebe..3da9a2304 100644 --- a/src/karapace/backup/api.py +++ b/src/karapace/backup/api.py @@ -373,13 +373,20 @@ def _handle_restore_topic( instruction: RestoreTopic, config: Config, skip_topic_creation: bool = False, + override_replication_factor: int | None = None, ) -> None: if skip_topic_creation: return + repl_factor = instruction.replication_factor + if override_replication_factor is not None: + LOG.info( + "Overriding replication factor with: %d (was: %d)", override_replication_factor, instruction.replication_factor + ) + repl_factor = override_replication_factor if not _maybe_create_topic( config=config, name=instruction.topic_name, - replication_factor=instruction.replication_factor, + replication_factor=repl_factor, topic_configs=instruction.topic_configs, ): raise BackupTopicAlreadyExists(f"Topic to restore '{instruction.topic_name}' already exists") @@ -426,6 +433,7 @@ def restore_backup( backup_location: ExistingFile, topic_name: TopicName, skip_topic_creation: bool = False, + override_replication_factor: int | None = None, ) -> None: """Restores a backup from the specified location into the configured topic. @@ -475,7 +483,7 @@ def _check_producer_exception() -> None: _handle_restore_topic_legacy(instruction, config, skip_topic_creation) producer = stack.enter_context(_producer(config, instruction.topic_name)) elif isinstance(instruction, RestoreTopic): - _handle_restore_topic(instruction, config, skip_topic_creation) + _handle_restore_topic(instruction, config, skip_topic_creation, override_replication_factor) producer = stack.enter_context(_producer(config, instruction.topic_name)) elif isinstance(instruction, ProducerSend): if producer is None: diff --git a/src/karapace/backup/cli.py b/src/karapace/backup/cli.py index 5e3d72854..7125b1e04 100644 --- a/src/karapace/backup/cli.py +++ b/src/karapace/backup/cli.py @@ -76,6 +76,15 @@ def parse_args() -> argparse.Namespace: ), ) + parser_restore.add_argument( + "--override-replication-factor", + help=( + "Override the replication factor that is save in the backup. This is needed when restoring a backup from a" + "downsized cluster (like scaling down from 6 to 3 nodes). This has effect only for V3 backups." + ), + type=int, + ) + return parser.parse_args() @@ -115,6 +124,7 @@ def dispatch(args: argparse.Namespace) -> None: backup_location=api.locate_backup_file(location), topic_name=api.normalize_topic_name(args.topic, config), skip_topic_creation=args.skip_topic_creation, + override_replication_factor=args.override_replication_factor, ) except BackupDataRestorationError: traceback.print_exc() diff --git a/tests/integration/backup/test_v3_backup.py b/tests/integration/backup/test_v3_backup.py index 332b09f0a..6f2e5df35 100644 --- a/tests/integration/backup/test_v3_backup.py +++ b/tests/integration/backup/test_v3_backup.py @@ -4,7 +4,7 @@ """ from __future__ import annotations -from aiokafka.errors import UnknownTopicOrPartitionError +from aiokafka.errors import InvalidReplicationFactorError, UnknownTopicOrPartitionError from collections.abc import Iterator from confluent_kafka import Message, TopicPartition from confluent_kafka.admin import NewTopic @@ -698,6 +698,56 @@ def __exit__(self, exc_type, exc_value, exc_traceback): ) +def test_backup_restoration_override_replication_factor( + admin_client: KafkaAdminClient, + kafka_servers: KafkaServers, + producer: KafkaProducer, + new_topic: NewTopic, +) -> None: + backup_directory = Path(__file__).parent.parent.resolve() / "test_data" / "backup_v3_single_partition" / new_topic.topic + metadata_path = backup_directory / f"{new_topic.topic}.metadata" + config = set_config_defaults( + { + "bootstrap_uri": kafka_servers.bootstrap_servers, + } + ) + + # pupulate the topic and create a backup + for i in range(10): + producer.send( + new_topic.topic, + key=f"message-key-{i}", + value=f"message-value-{i}-" + 1000 * "X", + ) + producer.flush() + api.create_backup( + config=config, + backup_location=backup_directory, + topic_name=TopicName(new_topic.topic), + version=BackupVersion.V3, + replication_factor=6, + ) + + # make sure topic doesn't exist beforehand. + _delete_topic(admin_client, new_topic.topic) + + # assert that the restore would fail without the replication factor override + with pytest.raises(InvalidReplicationFactorError): + api.restore_backup( + config=config, + backup_location=metadata_path, + topic_name=TopicName(new_topic.topic), + ) + + # finally restore the backup with override + api.restore_backup( + config=config, + backup_location=metadata_path, + topic_name=TopicName(new_topic.topic), + override_replication_factor=1, + ) + + def no_color_env() -> dict[str, str]: env = os.environ.copy() try: From 0e8e831da788bd14e23bd0c9e87b2395f966aeb2 Mon Sep 17 00:00:00 2001 From: Kevin Michel Date: Thu, 7 Nov 2024 01:39:57 +0100 Subject: [PATCH 18/18] fix: don't send all aiokafka log.error to sentry These log events are numerous and costly. When these `log.error` events are related to real error, they cause exceptions which are still properly handled and recorded by Sentry, no need to also send individual events for all the `log.error` in addition to the exceptions. The same logic was already applied for the `kafka` library. --- src/karapace/sentry/sentry_client.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/karapace/sentry/sentry_client.py b/src/karapace/sentry/sentry_client.py index 776214c7f..c4dc99d33 100644 --- a/src/karapace/sentry/sentry_client.py +++ b/src/karapace/sentry/sentry_client.py @@ -41,6 +41,8 @@ def _initialize_sentry(self) -> None: # Don't send library logged errors to Sentry as there is also proper return value or raised exception to calling code from sentry_sdk.integrations.logging import ignore_logger + ignore_logger("aiokafka") + ignore_logger("aiokafka.*") ignore_logger("kafka") ignore_logger("kafka.*")