Skip to content

Commit

Permalink
feature: Add topic configurations to v3 format
Browse files Browse the repository at this point in the history
- added `topic_configurations` and `replication_factor` to
  Metadata schema
- backed up configurations include all the custom topic configs
  and a set of default configurations
- topic creation when restoring a backup with format v3 will
  fail if the topic already exists
  • Loading branch information
giuseppelillo committed Jun 1, 2023
1 parent 82ab84a commit 4f30e20
Show file tree
Hide file tree
Showing 28 changed files with 404 additions and 83 deletions.
24 changes: 22 additions & 2 deletions karapace/avro_dataclasses/introspect.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from __future__ import annotations

from .schema import AvroType, FieldSchema, RecordSchema
from collections.abc import Mapping
from dataclasses import Field, fields, is_dataclass, MISSING
from enum import Enum
from functools import lru_cache
Expand All @@ -30,7 +31,7 @@ class UnsupportedAnnotation(NotImplementedError):
...


class UnderspecifiedArray(UnsupportedAnnotation):
class UnderspecifiedAnnotation(UnsupportedAnnotation):
...


Expand Down Expand Up @@ -93,7 +94,7 @@ def _field_type(field: Field, type_: object) -> AvroType: # pylint: disable=too
if origin in sequence_types:
return _field_type_array(field, origin, type_)
if type_ in sequence_types:
raise UnderspecifiedArray("Inner type must be specified for sequence types")
raise UnderspecifiedAnnotation("Inner type must be specified for sequence types")

# Handle enums.
if isinstance(type_, type) and issubclass(type_, Enum):
Expand All @@ -107,6 +108,25 @@ def _field_type(field: Field, type_: object) -> AvroType: # pylint: disable=too
}
)

# Handle map types.
if origin is Mapping:
args = get_args(type_)
if len(args) != 2:
raise UnderspecifiedAnnotation("Key and value types must be specified for map types")
if args[0] is not str:
raise UnsupportedAnnotation("Key type must be str")
return FieldSchema(
{
"type": "map",
"values": _field_type(field, args[1]),
**(
{"default": field.default_factory()}
if field.default_factory is not MISSING
else {} # type: ignore[misc]
),
}
)

raise NotImplementedError(
f"Found an unknown type {type_!r} while assembling Avro schema for the field "
f"{field.name!r}. The Avro dataclasses implementation likely needs to be "
Expand Down
10 changes: 9 additions & 1 deletion karapace/avro_dataclasses/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"""
from __future__ import annotations

from collections.abc import Mapping
from typing import Literal
from typing_extensions import NotRequired, TypeAlias, TypedDict

Expand All @@ -29,9 +30,16 @@ class EnumType(TypedDict):
default: NotRequired[str]


class MapType(TypedDict):
name: str
type: Literal["map"]
values: AvroType
default: NotRequired[Mapping[str, AvroType]]


TypeUnit: TypeAlias = "Primitive | TypeObject"
UnionType: TypeAlias = "list[TypeUnit]"
AvroType: TypeAlias = "TypeUnit | UnionType | RecordSchema | ArrayType | EnumType"
AvroType: TypeAlias = "TypeUnit | UnionType | RecordSchema | ArrayType | EnumType | MapType"


class FieldSchema(TypedDict):
Expand Down
104 changes: 72 additions & 32 deletions karapace/backup/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,14 @@
"""
from __future__ import annotations

from .backends.reader import BaseBackupReader, BaseItemsBackupReader, ProducerSend, RestoreTopic
from .backends.reader import BaseBackupReader, BaseItemsBackupReader, ProducerSend, RestoreTopic, RestoreTopicLegacy
from .backends.v3.constants import V3_MARKER
from .backends.v3.schema import ChecksumAlgorithm
from .backends.writer import BackupWriter, StdOut
from .encoders import encode_key, encode_value
from .errors import BackupError, EmptyPartition, PartitionCountError, StaleConsumerError
from .errors import BackupError, BackupTopicAlreadyExists, EmptyPartition, PartitionCountError, StaleConsumerError
from .poll_timeout import PollTimeout
from .topic_configurations import ConfigSource, get_topic_configurations
from enum import Enum
from functools import partial
from kafka import KafkaConsumer, KafkaProducer
Expand All @@ -27,12 +28,11 @@
from karapace.config import Config
from karapace.kafka_utils import kafka_admin_from_config, kafka_consumer_from_config, kafka_producer_from_config
from karapace.key_format import KeyFormatter
from karapace.schema_reader import new_schema_topic_from_config
from karapace.utils import assert_never
from pathlib import Path
from rich.console import Console
from tenacity import retry, retry_if_exception_type, RetryCallState, stop_after_delay, wait_fixed
from typing import AbstractSet, Callable, Collection, Iterator, Literal, NewType, NoReturn, TypeVar
from typing import AbstractSet, Callable, Collection, Iterator, Literal, Mapping, NewType, NoReturn, TypeVar

import contextlib
import datetime
Expand Down Expand Up @@ -178,31 +178,27 @@ def _admin(config: Config) -> KafkaAdminClient:
wait=wait_fixed(1), # seconds
retry=retry_if_exception_type(KafkaError),
)
def _maybe_create_topic(config: Config, name: str, backup_version: BackupVersion) -> None:
if backup_version in {BackupVersion.V1, BackupVersion.V2}:
topic = new_schema_topic_from_config(config)

if topic.name != name:
LOG.warning(
"Not creating topic, because the name %r from the config and the name %r from the CLI differ.",
topic.name,
name,
)
return
else:
topic = NewTopic(
name=name,
num_partitions=1,
replication_factor=config["replication_factor"],
topic_configs={"cleanup.policy": "compact"},
)
def _maybe_create_topic(
name: str,
*,
config: Config,
replication_factor: int,
topic_configs: Mapping[str, str],
) -> bool:
"""Returns True if topic creation was successful, False if topic already exists"""
topic = NewTopic(
name=name,
num_partitions=constants.SCHEMA_TOPIC_NUM_PARTITIONS,
replication_factor=replication_factor,
topic_configs=topic_configs,
)

with _admin(config) as admin:
try:
admin.create_topics([topic], timeout_ms=constants.TOPIC_CREATION_TIMEOUT_MS)
except TopicAlreadyExistsError:
LOG.debug("Topic %r already exists", topic.name)
return
return False

LOG.info(
"Created topic %r (partition count: %s, replication factor: %s, config: %s)",
Expand All @@ -211,7 +207,7 @@ def _maybe_create_topic(config: Config, name: str, backup_version: BackupVersion
topic.replication_factor,
topic.topic_configs,
)
return
return True


@contextlib.contextmanager
Expand Down Expand Up @@ -307,18 +303,38 @@ def _write_partition(
)


def _handle_restore_topic(
instruction: RestoreTopic,
def _handle_restore_topic_legacy(
instruction: RestoreTopicLegacy,
config: Config,
backup_version: BackupVersion,
) -> None:
if config["topic_name"] != instruction.topic_name:
LOG.warning(
"Not creating topic, because the name %r from the config and the name %r from the CLI differ.",
config["topic_name"],
instruction.topic_name,
)
return
_maybe_create_topic(
config=config,
name=instruction.name,
backup_version=backup_version,
name=instruction.topic_name,
replication_factor=config["replication_factor"],
topic_configs={"cleanup.policy": "compact"},
)


def _handle_restore_topic(
instruction: RestoreTopic,
config: Config,
) -> None:
if not _maybe_create_topic(
config=config,
name=instruction.topic_name,
replication_factor=instruction.replication_factor,
topic_configs=instruction.topic_configs,
):
raise BackupTopicAlreadyExists(f"Topic to restore '{instruction.topic_name}' already exists")


def _raise_backup_error(exception: Exception) -> NoReturn:
raise BackupError("Error while producing restored messages") from exception

Expand Down Expand Up @@ -347,6 +363,12 @@ def restore_backup(
backup_location: Path | StdOut,
topic_name: TopicName,
) -> None:
"""Restores a backup from the specified location into the configured topic.
:raises Exception: if production fails, concrete exception types are unknown,
see Kafka implementation.
:raises BackupTopicAlreadyExists: if backup version is V3 and topic already exists
"""
if isinstance(backup_location, str):
raise NotImplementedError("Cannot restore backups from stdin")

Expand Down Expand Up @@ -377,9 +399,12 @@ def restore_backup(
producer = None

for instruction in backend.read(backup_location, topic_name):
if isinstance(instruction, RestoreTopic):
_handle_restore_topic(instruction, config, backup_version=backup_version)
producer = stack.enter_context(_producer(config, instruction.name))
if isinstance(instruction, RestoreTopicLegacy):
_handle_restore_topic_legacy(instruction, config)
producer = stack.enter_context(_producer(config, instruction.topic_name))
elif isinstance(instruction, RestoreTopic):
_handle_restore_topic(instruction, config)
producer = stack.enter_context(_producer(config, instruction.topic_name))
elif isinstance(instruction, ProducerSend):
if producer is None:
raise RuntimeError("Backend has not yet sent RestoreTopic.")
Expand All @@ -396,6 +421,7 @@ def create_backup(
*,
poll_timeout: PollTimeout = PollTimeout.default(),
overwrite: bool = False,
replication_factor: int | None = None,
) -> None:
"""Creates a backup of the configured topic.
Expand All @@ -404,6 +430,9 @@ def create_backup(
if not records are received within that time and the target offset has not
been reached an exception is raised. Defaults to one minute.
:param overwrite: the output file if it exists.
:param replication_factor: if specified, metadata will contain this value for
replication factor, instead of the one that would be retrieved from
the current topic state.
:raises Exception: if consumption fails, concrete exception types are unknown,
see Kafka implementation.
Expand All @@ -416,6 +445,8 @@ def create_backup(
"""
if version is BackupVersion.V3 and not isinstance(backup_location, Path):
raise RuntimeError("Backup format version 3 does not support writing to stdout.")
if version is BackupVersion.V3 and replication_factor is None:
raise RuntimeError("Backup format version 3 needs a replication factor to be specified.")

start_time = datetime.datetime.now(datetime.timezone.utc)
backend = version.writer()
Expand All @@ -426,6 +457,10 @@ def create_backup(
version.name,
topic_name,
)
with _admin(config) as admin:
topic_configurations = get_topic_configurations(
admin=admin, topic_name=topic_name, config_source_filter={ConfigSource.TOPIC_CONFIG}
)

# Note: It's expected that we at some point want to introduce handling of
# multi-partition topics here. The backend interface is built with that in
Expand Down Expand Up @@ -464,6 +499,8 @@ def create_backup(
started_at=start_time,
finished_at=end_time,
partition_count=1,
replication_factor=replication_factor if replication_factor is not None else config["replication_factor"],
topic_configurations=topic_configurations,
data_files=[data_file] if data_file else [],
)

Expand Down Expand Up @@ -506,6 +543,9 @@ def inspect(backup_location: Path | StdOut) -> None:
"topic_name": metadata.topic_name,
"topic_id": None if metadata.topic_id is None else str(metadata.topic_id),
"partition_count": metadata.partition_count,
"record_count": metadata.record_count,
"replication_factor": metadata.replication_factor,
"topic_configurations": metadata.topic_configurations,
"checksum_algorithm": metadata.checksum_algorithm.value,
"data_files": tuple(
{
Expand Down
18 changes: 13 additions & 5 deletions karapace/backup/backends/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from karapace.dataclasses import default_dataclass
from karapace.typing import JsonData, JsonObject
from pathlib import Path
from typing import Callable, ClassVar, Final, Generator, IO, Iterator, Optional, Sequence, TypeVar, Union
from typing import Callable, ClassVar, Final, Generator, IO, Iterator, Mapping, Optional, Sequence, TypeVar, Union
from typing_extensions import TypeAlias

import abc
Expand All @@ -17,10 +17,18 @@
PARTITION_ZERO: Final = 0


@default_dataclass
class RestoreTopicLegacy:
topic_name: str
partition_count: int


@default_dataclass
class RestoreTopic:
name: str
topic_name: str
partition_count: int
replication_factor: int
topic_configs: Mapping[str, str]


@default_dataclass
Expand All @@ -33,7 +41,7 @@ class ProducerSend:
timestamp: int | None = None


Instruction: TypeAlias = "RestoreTopic | ProducerSend"
Instruction: TypeAlias = "RestoreTopicLegacy | RestoreTopic | ProducerSend"


KeyEncoder: TypeAlias = Callable[[Union[JsonObject, str]], Optional[bytes]]
Expand Down Expand Up @@ -78,8 +86,8 @@ def read(
path: Path,
topic_name: str,
) -> Generator[Instruction, None, None]:
yield RestoreTopic(
name=topic_name,
yield RestoreTopicLegacy(
topic_name=topic_name,
partition_count=1,
)
with path.open("r") as buffer:
Expand Down
11 changes: 11 additions & 0 deletions karapace/backup/backends/v3/avro/Metadata.avsc
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,17 @@
"name": "partition_count",
"type": "int"
},
{
"name": "replication_factor",
"type": "int"
},
{
"name": "topic_configurations",
"type": {
"type": "map",
"values": "string"
}
},
{
"name": "data_files",
"type": {
Expand Down
Loading

0 comments on commit 4f30e20

Please sign in to comment.