From a504ce68126a07c5a8701ab78556c9b688d1ee7d Mon Sep 17 00:00:00 2001 From: Lin Zhihao <59785146+LinZhihao-723@users.noreply.github.com> Date: Mon, 15 Jul 2024 13:50:45 -0400 Subject: [PATCH] Rename "encoding/decoding" to "serialization/deserialization" to match latest CLP terminology. (#68) --- README.md | 14 +- clp_ffi_py/ir/__init__.py | 6 +- clp_ffi_py/ir/native.pyi | 26 +-- clp_ffi_py/ir/readers.py | 50 +++--- setup.py | 10 +- src/clp_ffi_py/PyObjectCast.hpp | 8 +- src/clp_ffi_py/ir/native/LogEvent.hpp | 2 +- src/clp_ffi_py/ir/native/Metadata.hpp | 6 +- src/clp_ffi_py/ir/native/PyDecoder.cpp | 102 ------------ src/clp_ffi_py/ir/native/PyDeserializer.cpp | 103 ++++++++++++ .../{PyDecoder.hpp => PyDeserializer.hpp} | 14 +- ...derBuffer.cpp => PyDeserializerBuffer.cpp} | 137 +++++++-------- ...derBuffer.hpp => PyDeserializerBuffer.hpp} | 72 ++++---- .../ir/native/PyFourByteEncoder.cpp | 145 ---------------- .../ir/native/PyFourByteSerializer.cpp | 143 ++++++++++++++++ ...teEncoder.hpp => PyFourByteSerializer.hpp} | 16 +- src/clp_ffi_py/ir/native/PyLogEvent.cpp | 4 +- src/clp_ffi_py/ir/native/PyMetadata.cpp | 5 +- src/clp_ffi_py/ir/native/PyQuery.cpp | 8 +- src/clp_ffi_py/ir/native/decoding_methods.hpp | 15 -- ...ethods.cpp => deserialization_methods.cpp} | 123 +++++++------- .../ir/native/deserialization_methods.hpp | 15 ++ src/clp_ffi_py/ir/native/encoding_methods.hpp | 16 -- src/clp_ffi_py/ir/native/error_messages.hpp | 18 +- ..._methods.cpp => serialization_methods.cpp} | 28 ++-- .../ir/native/serialization_methods.hpp | 16 ++ src/clp_ffi_py/modules/ir_native.cpp | 17 +- tests/test_ir/__init__.py | 6 +- .../{test_decoder.py => test_deserializer.py} | 156 ++++++++++-------- ..._buffer.py => test_deserializer_buffer.py} | 34 ++-- tests/test_ir/test_encoder.py | 41 ----- tests/test_ir/test_readers.py | 28 ++-- tests/test_ir/test_serializer.py | 40 +++++ 33 files changed, 732 insertions(+), 692 deletions(-) delete mode 100644 src/clp_ffi_py/ir/native/PyDecoder.cpp create mode 100644 src/clp_ffi_py/ir/native/PyDeserializer.cpp rename src/clp_ffi_py/ir/native/{PyDecoder.hpp => PyDeserializer.hpp} (62%) rename src/clp_ffi_py/ir/native/{PyDecoderBuffer.cpp => PyDeserializerBuffer.cpp} (68%) rename src/clp_ffi_py/ir/native/{PyDecoderBuffer.hpp => PyDeserializerBuffer.hpp} (73%) delete mode 100644 src/clp_ffi_py/ir/native/PyFourByteEncoder.cpp create mode 100644 src/clp_ffi_py/ir/native/PyFourByteSerializer.cpp rename src/clp_ffi_py/ir/native/{PyFourByteEncoder.hpp => PyFourByteSerializer.hpp} (62%) delete mode 100644 src/clp_ffi_py/ir/native/decoding_methods.hpp rename src/clp_ffi_py/ir/native/{decoding_methods.cpp => deserialization_methods.cpp} (74%) create mode 100644 src/clp_ffi_py/ir/native/deserialization_methods.hpp delete mode 100644 src/clp_ffi_py/ir/native/encoding_methods.hpp rename src/clp_ffi_py/ir/native/{encoding_methods.cpp => serialization_methods.cpp} (82%) create mode 100644 src/clp_ffi_py/ir/native/serialization_methods.hpp rename tests/test_ir/{test_decoder.py => test_deserializer.py} (67%) rename tests/test_ir/{test_decoder_buffer.py => test_deserializer_buffer.py} (70%) delete mode 100644 tests/test_ir/test_encoder.py create mode 100644 tests/test_ir/test_serializer.py diff --git a/README.md b/README.md index 416dc984..cb6bd8e5 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ This module provides Python packages to interface with [CLP Core Features][1] through CLP's FFI (foreign function interface). At present, this library -supplies built-in functions for encoding/decoding log messages using [CLP][2]. +supplies built-in functions for serializing/deserializing log messages using [CLP][2]. > [!IMPORTANT] > This project is no longer built for Python3.6. @@ -66,12 +66,12 @@ To manually build a package for distribution, follow the steps below. ## CLP IR Readers -CLP IR Readers provide a convenient interface for CLP IR decoding and search +CLP IR Readers provide a convenient interface for CLP IR deserialization and search methods. ### ClpIrStreamReader -- Read/decode any arbitrary CLP IR stream (as an instance of `IO[bytes]`). +- Read+deserialize any arbitrary CLP IR stream (as an instance of `IO[bytes]`). - Can be used as an iterator that returns each log event as a `LogEvent` object. - Can search target log events by giving a search query: - Searching log events within a certain time range. @@ -159,7 +159,7 @@ wildcard_search_query: Query = query_builder.build() matched_log_messages: List[Tuple[int, str]] = [] # A convenience file reader class is also available to interact with a file that -# represents an encoded CLP IR stream directly. +# represents a CLP IR stream directly. with ClpIrFileReader(Path("example.clp.zst")) as clp_reader: for log_event in clp_reader.search(wildcard_search_query): matched_log_messages.append((log_event.get_timestamp(), log_event.get_log_message())) @@ -180,7 +180,7 @@ help(FullStringWildcardQuery) help(SubstringWildcardQuery) ``` -### Streaming Decode/Search Directly from S3 Remote Storage +### Streaming Deserialize/Search Directly from S3 Remote Storage When working with CLP IR files stored on S3-compatible storage systems, [smart_open][17] can be used to open and read the IR stream for the following @@ -207,7 +207,7 @@ session = boto3.Session( ) url = 's3://clp-example-s3-bucket/example.clp.zst' -# Using `smart_open.open` to stream the encoded CLP IR: +# Using `smart_open.open` to stream the CLP IR byte sequence: with smart_open.open( url, mode="rb", compression="disable", transport_params={"client": session.client("s3")} ) as istream: @@ -231,7 +231,7 @@ closed. ### Parallel Processing The `Query` and `LogEvent` classes can be serialized by [pickle][15]. Therefore, -decoding and search can be parallelized across streams/files using libraries +deserializing and searching can be parallelized across streams/files using libraries such as [multiprocessing][13] and [tqlm][14]. ## Testing diff --git a/clp_ffi_py/ir/__init__.py b/clp_ffi_py/ir/__init__.py index 6963a44c..372a9259 100644 --- a/clp_ffi_py/ir/__init__.py +++ b/clp_ffi_py/ir/__init__.py @@ -5,9 +5,9 @@ from typing import List __all__: List[str] = [ - "Decoder", # native - "DecoderBuffer", # native - "FourByteEncoder", # native + "Deserializer", # native + "DeserializerBuffer", # native + "FourByteSerializer", # native "IncompleteStreamError", # native "LogEvent", # native "Metadata", # native diff --git a/clp_ffi_py/ir/native.pyi b/clp_ffi_py/ir/native.pyi index f953a5d0..483cab76 100644 --- a/clp_ffi_py/ir/native.pyi +++ b/clp_ffi_py/ir/native.pyi @@ -3,9 +3,9 @@ from typing import Any, Dict, IO, List, Optional from clp_ffi_py.wildcard_query import WildcardQuery -class DecoderBuffer: +class DeserializerBuffer: def __init__(self, input_stream: IO[bytes], initial_buffer_capacity: int = 4096): ... - def get_num_decoded_log_messages(self) -> int: ... + def get_num_deserialized_log_messages(self) -> int: ... def _test_streaming(self, seed: int) -> bytearray: ... class Metadata: @@ -58,24 +58,26 @@ class Query: def get_wildcard_queries(self) -> Optional[List[WildcardQuery]]: ... def match_log_event(self, log_event: LogEvent) -> bool: ... -class FourByteEncoder: +class FourByteSerializer: @staticmethod - def encode_preamble(ref_timestamp: int, timestamp_format: str, timezone: str) -> bytearray: ... + def serialize_preamble( + ref_timestamp: int, timestamp_format: str, timezone: str + ) -> bytearray: ... @staticmethod - def encode_message_and_timestamp_delta(timestamp_delta: int, msg: bytes) -> bytearray: ... + def serialize_message_and_timestamp_delta(timestamp_delta: int, msg: bytes) -> bytearray: ... @staticmethod - def encode_message(msg: bytes) -> bytearray: ... + def serialize_message(msg: bytes) -> bytearray: ... @staticmethod - def encode_timestamp_delta(timestamp_delta: int) -> bytearray: ... + def serialize_timestamp_delta(timestamp_delta: int) -> bytearray: ... @staticmethod - def encode_end_of_ir() -> bytearray: ... + def serialize_end_of_ir() -> bytearray: ... -class Decoder: +class Deserializer: @staticmethod - def decode_preamble(decoder_buffer: DecoderBuffer) -> Metadata: ... + def deserialize_preamble(decoder_buffer: DeserializerBuffer) -> Metadata: ... @staticmethod - def decode_next_log_event( - decoder_buffer: DecoderBuffer, + def deserialize_next_log_event( + decoder_buffer: DeserializerBuffer, query: Optional[Query] = None, allow_incomplete_stream: bool = False, ) -> Optional[LogEvent]: ... diff --git a/clp_ffi_py/ir/readers.py b/clp_ffi_py/ir/readers.py index e02e5ebf..f615f949 100644 --- a/clp_ffi_py/ir/readers.py +++ b/clp_ffi_py/ir/readers.py @@ -7,29 +7,28 @@ from zstandard import ZstdDecompressionReader, ZstdDecompressor -from clp_ffi_py.ir.native import Decoder, DecoderBuffer, LogEvent, Metadata, Query +from clp_ffi_py.ir.native import Deserializer, DeserializerBuffer, LogEvent, Metadata, Query class ClpIrStreamReader(Iterator[LogEvent]): """ - This class represents a stream reader used to read/decode encoded log events from a CLP IR - stream. It also provides method(s) to instantiate a log event generator with a customized search - query. + This class represents a stream reader used to read/deserialize log events from a CLP IR stream. + It also provides method(s) to instantiate a log event generator with a customized search query. - :param istream: Input stream that contains encoded CLP IR. - :param decoder_buffer_size: Initial size of the decoder buffer. + :param istream: Input stream that contains CLP IR byte sequence. + :param deserializer_buffer_size: Initial size of the deserializer buffer. :param enable_compression: A flag indicating whether the istream is compressed using `zstd`. :param allow_incomplete_stream: If set to `True`, an incomplete CLP IR stream is not treated as an error. Instead, encountering such a stream is seen as reaching its end without raising any exceptions. """ - DEFAULT_DECODER_BUFFER_SIZE: int = 65536 + DEFAULT_DESERIALIZER_BUFFER_SIZE: int = 65536 def __init__( self, istream: IO[bytes], - decoder_buffer_size: int = DEFAULT_DECODER_BUFFER_SIZE, + deserializer_buffer_size: int = DEFAULT_DESERIALIZER_BUFFER_SIZE, enable_compression: bool = True, allow_incomplete_stream: bool = False, ): @@ -39,40 +38,42 @@ def __init__( self.__istream = dctx.stream_reader(istream, read_across_frames=True) else: self.__istream = istream - self._decoder_buffer: DecoderBuffer = DecoderBuffer(self.__istream, decoder_buffer_size) + self._deserializer_buffer: DeserializerBuffer = DeserializerBuffer( + self.__istream, deserializer_buffer_size + ) self._metadata: Optional[Metadata] = None self._allow_incomplete_stream: bool = allow_incomplete_stream def read_next_log_event(self) -> Optional[LogEvent]: """ - Reads and decodes the next encoded log event from the IR stream. + Reads and deserializes the next log event from the IR stream. :return: - Next unread log event represented as an instance of LogEvent. - None if the end of IR stream is reached. :raise Exception: - If :meth:`~clp_ffi_py.ir.native.Decoder.decode_next_log_event` fails. + If :meth:`~clp_ffi_py.ir.native.Deserializer.deserialize_next_log_event` fails. """ - return Decoder.decode_next_log_event( - self._decoder_buffer, allow_incomplete_stream=self._allow_incomplete_stream + return Deserializer.deserialize_next_log_event( + self._deserializer_buffer, allow_incomplete_stream=self._allow_incomplete_stream ) def read_preamble(self) -> None: """ - Try to decode the preamble and set `metadata`. If `metadata` has been set already, it will - instantly return. It is separated from `__init__` so that the input stream does not need to - be readable on a reader's construction, but until the user starts to iterate logs. + Try to deserialize the preamble and set `metadata`. If `metadata` has been set already, it + will instantly return. It is separated from `__init__` so that the input stream does not + need to be readable on a reader's construction, but until the user starts to iterate logs. :raise Exception: - If :meth:`~clp_ffi_py.ir.native.Decoder.decode_preamble` fails. + If :meth:`~clp_ffi_py.ir.native.Deserializer.deserialize_preamble` fails. """ if self.has_metadata(): return - self._metadata = Decoder.decode_preamble(self._decoder_buffer) + self._metadata = Deserializer.deserialize_preamble(self._deserializer_buffer) def get_metadata(self) -> Metadata: if None is self._metadata: - raise RuntimeError("The metadata has not been successfully decoded yet.") + raise RuntimeError("The metadata has not been successfully deserialized yet.") return self._metadata def has_metadata(self) -> bool: @@ -84,14 +85,13 @@ def search(self, query: Query) -> Generator[LogEvent, None, None]: :param query: The input query object used to match log events. Check the document of :class:`~clp_ffi_py.ir.Query` for more details. - :yield: The next unread encoded log event that matches the given search query from the IR - stream. + :yield: The next unread log event that matches the given search query from the IR stream. """ if False is self.has_metadata(): self.read_preamble() while True: - log_event: Optional[LogEvent] = Decoder.decode_next_log_event( - self._decoder_buffer, + log_event: Optional[LogEvent] = Deserializer.deserialize_next_log_event( + self._deserializer_buffer, query=query, allow_incomplete_stream=self._allow_incomplete_stream, ) @@ -135,14 +135,14 @@ class ClpIrFileReader(ClpIrStreamReader): def __init__( self, fpath: Path, - decoder_buffer_size: int = ClpIrStreamReader.DEFAULT_DECODER_BUFFER_SIZE, + deserializer_buffer_size: int = ClpIrStreamReader.DEFAULT_DESERIALIZER_BUFFER_SIZE, enable_compression: bool = True, allow_incomplete_stream: bool = False, ): self._path: Path = fpath super().__init__( open(fpath, "rb"), - decoder_buffer_size=decoder_buffer_size, + deserializer_buffer_size=deserializer_buffer_size, enable_compression=enable_compression, allow_incomplete_stream=allow_incomplete_stream, ) diff --git a/setup.py b/setup.py index 01c41540..172b25e2 100644 --- a/setup.py +++ b/setup.py @@ -31,16 +31,16 @@ f"{clp_src_root}/ReaderInterface.cpp", f"{clp_src_root}/string_utils/string_utils.cpp", - f"{clp_ffi_py_src_root}/ir/native/decoding_methods.cpp", - f"{clp_ffi_py_src_root}/ir/native/encoding_methods.cpp", + f"{clp_ffi_py_src_root}/ir/native/deserialization_methods.cpp", f"{clp_ffi_py_src_root}/ir/native/Metadata.cpp", - f"{clp_ffi_py_src_root}/ir/native/PyDecoder.cpp", - f"{clp_ffi_py_src_root}/ir/native/PyDecoderBuffer.cpp", - f"{clp_ffi_py_src_root}/ir/native/PyFourByteEncoder.cpp", + f"{clp_ffi_py_src_root}/ir/native/PyDeserializer.cpp", + f"{clp_ffi_py_src_root}/ir/native/PyDeserializerBuffer.cpp", + f"{clp_ffi_py_src_root}/ir/native/PyFourByteSerializer.cpp", f"{clp_ffi_py_src_root}/ir/native/PyLogEvent.cpp", f"{clp_ffi_py_src_root}/ir/native/PyMetadata.cpp", f"{clp_ffi_py_src_root}/ir/native/PyQuery.cpp", f"{clp_ffi_py_src_root}/ir/native/Query.cpp", + f"{clp_ffi_py_src_root}/ir/native/serialization_methods.cpp", f"{clp_ffi_py_src_root}/modules/ir_native.cpp", f"{clp_ffi_py_src_root}/Py_utils.cpp", f"{clp_ffi_py_src_root}/utils.cpp", diff --git a/src/clp_ffi_py/PyObjectCast.hpp b/src/clp_ffi_py/PyObjectCast.hpp index 731924f9..fbe09c44 100644 --- a/src/clp_ffi_py/PyObjectCast.hpp +++ b/src/clp_ffi_py/PyObjectCast.hpp @@ -112,15 +112,15 @@ auto py_reinterpret_cast(Src* src) noexcept -> Dst* { } namespace ir::native { -class PyDecoder; -class PyDecoderBuffer; +class PyDeserializer; +class PyDeserializerBuffer; class PyLogEvent; class PyMetadata; class PyQuery; } // namespace ir::native -CLP_FFI_PY_MARK_AS_PYOBJECT(ir::native::PyDecoder); -CLP_FFI_PY_MARK_AS_PYOBJECT(ir::native::PyDecoderBuffer); +CLP_FFI_PY_MARK_AS_PYOBJECT(ir::native::PyDeserializer); +CLP_FFI_PY_MARK_AS_PYOBJECT(ir::native::PyDeserializerBuffer); CLP_FFI_PY_MARK_AS_PYOBJECT(ir::native::PyLogEvent); CLP_FFI_PY_MARK_AS_PYOBJECT(ir::native::PyMetadata); CLP_FFI_PY_MARK_AS_PYOBJECT(ir::native::PyQuery); diff --git a/src/clp_ffi_py/ir/native/LogEvent.hpp b/src/clp_ffi_py/ir/native/LogEvent.hpp index ad82768b..8175b5ec 100644 --- a/src/clp_ffi_py/ir/native/LogEvent.hpp +++ b/src/clp_ffi_py/ir/native/LogEvent.hpp @@ -7,7 +7,7 @@ namespace clp_ffi_py::ir::native { /** - * A class that represents a decoded IR log event. Contains ways to access (get or set) the log + * A class that represents a deserialized IR log event. Contains ways to access (get or set) the log * message, the timestamp, and the log event index. */ class LogEvent { diff --git a/src/clp_ffi_py/ir/native/Metadata.hpp b/src/clp_ffi_py/ir/native/Metadata.hpp index 33524281..aaf91007 100644 --- a/src/clp_ffi_py/ir/native/Metadata.hpp +++ b/src/clp_ffi_py/ir/native/Metadata.hpp @@ -8,13 +8,13 @@ namespace clp_ffi_py::ir::native { /** - * A class that represents a decoded IR preamble. Contains ways to access (get) metadata such as the - * timestamp format. After construction, the metadata is readonly. + * A class that represents a deserialized IR preamble. Contains ways to access (get) metadata such + * as the timestamp format. After construction, the metadata is readonly. */ class Metadata { public: /** - * Constructs a new Metadata object by reading values from a JSON object decoded from the + * Constructs a new Metadata object by reading values from a JSON object deserialized from the * preamble. This constructor will validate the JSON data and throw exceptions when failing to * extract required values. * @param metadata JSON data that contains the metadata. diff --git a/src/clp_ffi_py/ir/native/PyDecoder.cpp b/src/clp_ffi_py/ir/native/PyDecoder.cpp deleted file mode 100644 index 05128565..00000000 --- a/src/clp_ffi_py/ir/native/PyDecoder.cpp +++ /dev/null @@ -1,102 +0,0 @@ -#include // Must always be included before any other header files - -#include "PyDecoder.hpp" - -#include -#include -#include - -namespace clp_ffi_py::ir::native { -namespace { -// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays) -PyDoc_STRVAR( - cDecodePreambleDoc, - "decode_preamble(decoder_buffer)\n" - "--\n\n" - "Decodes the encoded preamble from the IR stream buffered in the given decoder buffer.\n\n" - ":param decoder_buffer: The decoder buffer of the encoded CLP IR stream.\n" - ":raises: Appropriate exceptions with detailed information on any encountered failure.\n" - ":return: The decoded preamble presented as a new instance of Metadata.\n" -); - -// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays) -PyDoc_STRVAR( - cDecodeNextLogEventDoc, - "decode_next_log_event(decoder_buffer, query=None, allow_incomplete_stream=False)\n" - "--\n\n" - "Decodes the next encoded log event from the IR stream buffered in the given decoder " - "buffer. `decoder_buffer` must have been returned by a successfully invocation of " - "`decode_preamble`. If `query` is provided, only the next log event matching the query " - "will be returned.\n\n" - ":param decoder_buffer: The decoder buffer of the encoded CLP IR stream.\n" - ":param query: A Query object that filters log events. See `Query` documents for more " - "details.\n" - ":param allow_incomplete_stream: If set to `True`, an incomplete CLP IR stream is not " - "treated as an error. Instead, encountering such a stream is seen as reaching its end, and " - "the function will return None without raising any exceptions.\n" - ":raises: Appropriate exceptions with detailed information on any encountered failure.\n" - ":return:\n" - " - A newly created LogEvent instance representing the next decoded log event from " - " the IR stream (if the query is `None`).\n" - " - A newly created LogEvent instance representing the next decoded log event " - " matched with the given query in the IR stream (if the query is given).\n" - " - None when the end of IR stream is reached or the query search terminates.\n" -); - -// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays) -PyMethodDef PyDecoder_method_table[]{ - {"decode_preamble", - decode_preamble, - METH_O | METH_STATIC, - static_cast(cDecodePreambleDoc)}, - - {"decode_next_log_event", - py_c_function_cast(decode_next_log_event), - METH_VARARGS | METH_KEYWORDS | METH_STATIC, - static_cast(cDecodeNextLogEventDoc)}, - - {nullptr, nullptr, 0, nullptr} -}; - -// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays) -PyDoc_STRVAR( - cPyDecoderDoc, - "Namespace for all CLP IR decoding methods.\n\n" - "Methods decode log events from encoded CLP IR streams. This class should never be " - "instantiated since it only contains static methods.\n" -); - -// NOLINTBEGIN(cppcoreguidelines-avoid-c-arrays, cppcoreguidelines-pro-type-const-cast) -PyType_Slot PyDecoder_slots[]{ - {Py_tp_methods, static_cast(PyDecoder_method_table)}, - {Py_tp_doc, const_cast(static_cast(cPyDecoderDoc))}, - {0, nullptr} -}; -// NOLINTEND(cppcoreguidelines-avoid-c-arrays, cppcoreguidelines-pro-type-const-cast) - -/** - * PyDecoder Python type specifications. - */ -PyType_Spec PyDecoder_type_spec{ - "clp_ffi_py.ir.native.Decoder", - sizeof(PyDecoder), - 0, - Py_TPFLAGS_DEFAULT, - static_cast(PyDecoder_slots) -}; -} // namespace - -PyObjectStaticPtr PyDecoder::m_py_type{nullptr}; - -auto PyDecoder::module_level_init(PyObject* py_module) -> bool { - static_assert(std::is_trivially_destructible()); - auto* type{py_reinterpret_cast(PyType_FromSpec(&PyDecoder_type_spec))}; - m_py_type.reset(type); - if (nullptr == type) { - return false; - } - // Explicitly set the tp_new to nullptr to mark this type non-instantiable. - type->tp_new = nullptr; - return add_python_type(type, "Decoder", py_module); -} -} // namespace clp_ffi_py::ir::native diff --git a/src/clp_ffi_py/ir/native/PyDeserializer.cpp b/src/clp_ffi_py/ir/native/PyDeserializer.cpp new file mode 100644 index 00000000..26a4be84 --- /dev/null +++ b/src/clp_ffi_py/ir/native/PyDeserializer.cpp @@ -0,0 +1,103 @@ +#include // Must always be included before any other header files + +#include "PyDeserializer.hpp" + +#include +#include +#include + +namespace clp_ffi_py::ir::native { +namespace { +// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays) +PyDoc_STRVAR( + cDeserializePreambleDoc, + "deserialize_preamble(deserializer_buffer)\n" + "--\n\n" + "Deserializes the preamble from the IR stream buffered in the given deserializer " + "buffer.\n\n" + ":param deserializer_buffer: The deserializer buffer of the serialized CLP IR stream.\n" + ":raises: Appropriate exceptions with detailed information on any encountered failure.\n" + ":return: The deserialized preamble presented as a new instance of Metadata.\n" +); + +// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays) +PyDoc_STRVAR( + cDeserializeNextLogEventDoc, + "deserialize_next_log_event(deserializer_buffer, query=None, allow_incomplete_stream=False)" + "\n--\n\n" + "Deserializes the next serialized log event from the IR stream buffered in the given " + "deserializer buffer. `deserializer_buffer` must have been returned by a successfully " + "invocation of `deserialize_preamble`. If `query` is provided, only the next log event " + "matching the query will be returned.\n\n" + ":param deserializer_buffer: The deserializer buffer of the serialized CLP IR stream.\n" + ":param query: A Query object that filters log events. See `Query` documents for more " + "details.\n" + ":param allow_incomplete_stream: If set to `True`, an incomplete CLP IR stream is not " + "treated as an error. Instead, encountering such a stream is seen as reaching its end, and " + "the function will return None without raising any exceptions.\n" + ":raises: Appropriate exceptions with detailed information on any encountered failure.\n" + ":return:\n" + " - A newly created LogEvent instance representing the next deserialized log event " + " from the IR stream (if the query is `None`).\n" + " - A newly created LogEvent instance representing the next deserialized log event " + " matched with the given query in the IR stream (if the query is given).\n" + " - None when the end of IR stream is reached or the query search terminates.\n" +); + +// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays) +PyMethodDef PyDeserializer_method_table[]{ + {"deserialize_preamble", + deserialize_preamble, + METH_O | METH_STATIC, + static_cast(cDeserializePreambleDoc)}, + + {"deserialize_next_log_event", + py_c_function_cast(deserialize_next_log_event), + METH_VARARGS | METH_KEYWORDS | METH_STATIC, + static_cast(cDeserializeNextLogEventDoc)}, + + {nullptr, nullptr, 0, nullptr} +}; + +// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays) +PyDoc_STRVAR( + cPyDeserializerDoc, + "Namespace for all CLP IR deserialization methods.\n\n" + "Methods deserialize log events from serialized CLP IR streams. This class should never be " + "instantiated since it only contains static methods.\n" +); + +// NOLINTBEGIN(cppcoreguidelines-avoid-c-arrays, cppcoreguidelines-pro-type-const-cast) +PyType_Slot PyDeserializer_slots[]{ + {Py_tp_methods, static_cast(PyDeserializer_method_table)}, + {Py_tp_doc, const_cast(static_cast(cPyDeserializerDoc))}, + {0, nullptr} +}; +// NOLINTEND(cppcoreguidelines-avoid-c-arrays, cppcoreguidelines-pro-type-const-cast) + +/** + * PyDeserializer Python type specifications. + */ +PyType_Spec PyDeserializer_type_spec{ + "clp_ffi_py.ir.native.Deserializer", + sizeof(PyDeserializer), + 0, + Py_TPFLAGS_DEFAULT, + static_cast(PyDeserializer_slots) +}; +} // namespace + +PyObjectStaticPtr PyDeserializer::m_py_type{nullptr}; + +auto PyDeserializer::module_level_init(PyObject* py_module) -> bool { + static_assert(std::is_trivially_destructible()); + auto* type{py_reinterpret_cast(PyType_FromSpec(&PyDeserializer_type_spec))}; + m_py_type.reset(type); + if (nullptr == type) { + return false; + } + // Explicitly set the tp_new to nullptr to mark this type non-instantiable. + type->tp_new = nullptr; + return add_python_type(type, "Deserializer", py_module); +} +} // namespace clp_ffi_py::ir::native diff --git a/src/clp_ffi_py/ir/native/PyDecoder.hpp b/src/clp_ffi_py/ir/native/PyDeserializer.hpp similarity index 62% rename from src/clp_ffi_py/ir/native/PyDecoder.hpp rename to src/clp_ffi_py/ir/native/PyDeserializer.hpp index 4080dbd7..d4fe8e2c 100644 --- a/src/clp_ffi_py/ir/native/PyDecoder.hpp +++ b/src/clp_ffi_py/ir/native/PyDeserializer.hpp @@ -1,5 +1,5 @@ -#ifndef CLP_FFI_PY_IR_NATIVE_PYDECODER_HPP -#define CLP_FFI_PY_IR_NATIVE_PYDECODER_HPP +#ifndef CLP_FFI_PY_IR_NATIVE_PYDESERIALIZER_HPP +#define CLP_FFI_PY_IR_NATIVE_PYDESERIALIZER_HPP #include // Must always be included before any other header files @@ -7,14 +7,14 @@ namespace clp_ffi_py::ir::native { /** - * This class provides a Python-level namespace for IR decoding methods. + * This class provides a Python-level namespace for IR deserialization methods. */ -class PyDecoder { +class PyDeserializer { public: /** - * Creates and initializes PyDecoder as a Python type, and then incorporates this type as a + * Creates and initializes PyDeserializer as a Python type, and then incorporates this type as a * Python object into py_module. - * @param py_module This is the Python module where the initialized PyDecoder will be + * @param py_module This is the Python module where the initialized PyDeserializer will be * incorporated. * @return true on success. * @return false on failure with the relevant Python exception and error set. @@ -28,4 +28,4 @@ class PyDecoder { }; } // namespace clp_ffi_py::ir::native -#endif // CLP_FFI_PY_IR_NATIVE_PYDECODER_HPP +#endif // CLP_FFI_PY_IR_NATIVE_PYDESERIALIZER_HPP diff --git a/src/clp_ffi_py/ir/native/PyDecoderBuffer.cpp b/src/clp_ffi_py/ir/native/PyDeserializerBuffer.cpp similarity index 68% rename from src/clp_ffi_py/ir/native/PyDecoderBuffer.cpp rename to src/clp_ffi_py/ir/native/PyDeserializerBuffer.cpp index b3e487be..4463e9d8 100644 --- a/src/clp_ffi_py/ir/native/PyDecoderBuffer.cpp +++ b/src/clp_ffi_py/ir/native/PyDeserializerBuffer.cpp @@ -1,6 +1,6 @@ #include // Must always be included before any other header files -#include "PyDecoderBuffer.hpp" +#include "PyDeserializerBuffer.hpp" #include #include @@ -16,7 +16,7 @@ namespace clp_ffi_py::ir::native { namespace { extern "C" { /** - * Callback of PyDecoderBuffer `__init__` method: + * Callback of PyDeserializerBuffer `__init__` method: * __init__(self, input_stream: IO[bytes], initial_buffer_capacity: int = 4096) * Keyword argument parsing is supported. * Assumes `self` is uninitialized and will allocate the underlying memory. If `self` is already @@ -27,7 +27,8 @@ extern "C" { * @return 0 on success. * @return -1 on failure with the relevant Python exception and error set. */ -auto PyDecoderBuffer_init(PyDecoderBuffer* self, PyObject* args, PyObject* keywords) -> int { +auto PyDeserializerBuffer_init(PyDeserializerBuffer* self, PyObject* args, PyObject* keywords) + -> int { static char keyword_input_stream[]{"input_stream"}; static char keyword_initial_buffer_capacity[]{"initial_buffer_capacity"}; static char* keyword_table[]{ @@ -41,7 +42,7 @@ auto PyDecoderBuffer_init(PyDecoderBuffer* self, PyObject* args, PyObject* keywo self->default_init(); PyObject* input_stream{nullptr}; - Py_ssize_t initial_buffer_capacity{PyDecoderBuffer::cDefaultInitialCapacity}; + Py_ssize_t initial_buffer_capacity{PyDeserializerBuffer::cDefaultInitialCapacity}; if (false == static_cast(PyArg_ParseTupleAndKeywords( args, @@ -78,10 +79,10 @@ auto PyDecoderBuffer_init(PyDecoderBuffer* self, PyObject* args, PyObject* keywo } /** - * Callback of PyDecoderBuffer deallocator. + * Callback of PyDeserializerBuffer deallocator. * @param self */ -auto PyDecoderBuffer_dealloc(PyDecoderBuffer* self) -> void { +auto PyDeserializerBuffer_dealloc(PyDeserializerBuffer* self) -> void { self->clean(); PyObject_Del(self); } @@ -94,7 +95,7 @@ auto PyDecoderBuffer_dealloc(PyDecoderBuffer* self) -> void { * @return 0 on success. * @return -1 on failure with the relevant Python exception and error set. */ -auto PyDecoderBuffer_getbuffer(PyDecoderBuffer* self, Py_buffer* view, int flags) -> int { +auto PyDeserializerBuffer_getbuffer(PyDeserializerBuffer* self, Py_buffer* view, int flags) -> int { return self->py_getbuffer(view, flags); } @@ -104,35 +105,39 @@ auto PyDecoderBuffer_getbuffer(PyDecoderBuffer* self, Py_buffer* view, int flags * @param self (unused). * @param view (unused). */ -auto PyDecoderBuffer_releasebuffer(PyDecoderBuffer* Py_UNUSED(self), Py_buffer* Py_UNUSED(view)) - -> void {} +auto PyDeserializerBuffer_releasebuffer( + PyDeserializerBuffer* Py_UNUSED(self), + Py_buffer* Py_UNUSED(view) +) -> void {} // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays) PyDoc_STRVAR( - cPyDecoderBufferGetNumDecodedLogMessages, - "get_num_decoded_log_messages(self)\n" + cPyDeserializerBufferGetNumDeserializedLogMessages, + "get_num_deserialized_log_messages(self)\n" "--\n\n" - ":return: Total number of messages decoded so far.\n" + ":return: Total number of messages deserialized so far.\n" ); -auto PyDecoderBuffer_get_num_decoded_log_messages(PyDecoderBuffer* self) -> PyObject* { - return PyLong_FromLongLong(static_cast(self->get_num_decoded_message())); +auto PyDeserializerBuffer_get_num_deserialized_log_messages(PyDeserializerBuffer* self +) -> PyObject* { + return PyLong_FromLongLong(static_cast(self->get_num_deserialized_message())); } // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays) PyDoc_STRVAR( - cPyDecoderBufferTestStreamingDoc, + cPyDeserializerBufferTestStreamingDoc, "_test_streaming(self, seed)\n" "--\n\n" - "Tests the functionality of the DecoderBuffer by streaming the entire input stream into " - "a Python bytearray. The stepping size from the read buffer is randomly generated, " + "Tests the functionality of the DeserializerBuffer by streaming the entire input stream " + "into a Python bytearray. The stepping size from the read buffer is randomly generated, " "initialized by the given seed.\n\n" "Note: this function should only be used for testing purpose.\n\n" ":param seed_obj: Random seed.\n" ":return: The entire input stream stored in a Python bytearray.\n" ); -auto PyDecoderBuffer_test_streaming(PyDecoderBuffer* self, PyObject* seed_obj) -> PyObject* { +auto PyDeserializerBuffer_test_streaming(PyDeserializerBuffer* self, PyObject* seed_obj) + -> PyObject* { unsigned seed{0}; if (false == parse_py_int(seed_obj, seed)) { return nullptr; @@ -142,16 +147,16 @@ auto PyDecoderBuffer_test_streaming(PyDecoderBuffer* self, PyObject* seed_obj) - } // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays) -PyMethodDef PyDecoderBuffer_method_table[]{ - {"get_num_decoded_log_messages", - py_c_function_cast(PyDecoderBuffer_get_num_decoded_log_messages), +PyMethodDef PyDeserializerBuffer_method_table[]{ + {"get_num_deserialized_log_messages", + py_c_function_cast(PyDeserializerBuffer_get_num_deserialized_log_messages), METH_NOARGS, - static_cast(cPyDecoderBufferGetNumDecodedLogMessages)}, + static_cast(cPyDeserializerBufferGetNumDeserializedLogMessages)}, {"_test_streaming", - py_c_function_cast(PyDecoderBuffer_test_streaming), + py_c_function_cast(PyDeserializerBuffer_test_streaming), METH_O, - static_cast(cPyDecoderBufferTestStreamingDoc)}, + static_cast(cPyDeserializerBufferTestStreamingDoc)}, {nullptr} }; @@ -160,60 +165,60 @@ PyMethodDef PyDecoderBuffer_method_table[]{ * Declaration of Python buffer protocol. */ // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays) -PyBufferProcs PyDecoderBuffer_as_buffer{ - .bf_getbuffer = py_getbufferproc_cast(PyDecoderBuffer_getbuffer), - .bf_releasebuffer = py_releasebufferproc_cast(PyDecoderBuffer_releasebuffer), +PyBufferProcs PyDeserializerBuffer_as_buffer{ + .bf_getbuffer = py_getbufferproc_cast(PyDeserializerBuffer_getbuffer), + .bf_releasebuffer = py_releasebufferproc_cast(PyDeserializerBuffer_releasebuffer), }; // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays) PyDoc_STRVAR( - cPyDecoderBufferDoc, - "This class represents a CLP IR Decoder Buffer corresponding to a CLP IR stream. " - "It buffers encoded CLP IR data read from the input stream, which can be consumed by the " - "CLP IR decoding methods to recover encoded log events. An instance of this class is " - "expected to be passed across different calls of CLP IR decoding methods when decoding " - "from the same IR stream.\n\n" + cPyDeserializerBufferDoc, + "This class represents a CLP IR Deserializer Buffer corresponding to a CLP IR stream. " + "It buffers serialized CLP IR data read from the input stream, which can be consumed by " + "the CLP IR deserialization methods to recover serialized log events. An instance of this " + "class is expected to be passed across different calls of CLP IR deserialization methods " + "when deserializing from the same IR stream.\n\n" "The signature of `__init__` method is shown as following:\n\n" "__init__(self, input_stream, initial_buffer_capacity=4096)\n\n" - "Initializes a DecoderBuffer object for the given input IR stream.\n\n" - ":param input_stream: Input stream that contains encoded CLP IR. It should be an instance " - "of type `IO[bytes]` with the method `readinto` supported.\n" + "Initializes a DeserializerBuffer object for the given input IR stream.\n\n" + ":param input_stream: Input stream that contains serialized CLP IR. It should be an " + "instance of type `IO[bytes]` with the method `readinto` supported.\n" ":param initial_buffer_capacity: The initial capacity of the underlying byte buffer.\n" ); // NOLINTBEGIN(cppcoreguidelines-avoid-c-arrays, cppcoreguidelines-pro-type-*-cast) -PyType_Slot PyDecoderBuffer_slots[]{ +PyType_Slot PyDeserializerBuffer_slots[]{ {Py_tp_alloc, reinterpret_cast(PyType_GenericAlloc)}, - {Py_tp_dealloc, reinterpret_cast(PyDecoderBuffer_dealloc)}, + {Py_tp_dealloc, reinterpret_cast(PyDeserializerBuffer_dealloc)}, {Py_tp_new, reinterpret_cast(PyType_GenericNew)}, - {Py_tp_init, reinterpret_cast(PyDecoderBuffer_init)}, - {Py_tp_methods, static_cast(PyDecoderBuffer_method_table)}, - {Py_tp_doc, const_cast(static_cast(cPyDecoderBufferDoc))}, + {Py_tp_init, reinterpret_cast(PyDeserializerBuffer_init)}, + {Py_tp_methods, static_cast(PyDeserializerBuffer_method_table)}, + {Py_tp_doc, const_cast(static_cast(cPyDeserializerBufferDoc))}, {0, nullptr} }; // NOLINTEND(cppcoreguidelines-avoid-c-arrays, cppcoreguidelines-pro-type-*-cast) /** - * PyDecoderBuffer Python type specifications. + * PyDeserializerBuffer Python type specifications. */ -PyType_Spec PyDecoderBuffer_type_spec{ - "clp_ffi_py.ir.native.DecoderBuffer", - sizeof(PyDecoderBuffer), +PyType_Spec PyDeserializerBuffer_type_spec{ + "clp_ffi_py.ir.native.DeserializerBuffer", + sizeof(PyDeserializerBuffer), 0, Py_TPFLAGS_DEFAULT, - static_cast(PyDecoderBuffer_slots) + static_cast(PyDeserializerBuffer_slots) }; // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays) PyDoc_STRVAR( cPyIncompleteStreamErrorDoc, - "This exception will be raised if the decoder buffer cannot read more data from the " - "input stream while the decoding method expects more bytes.\n" + "This exception will be raised if the deserializer buffer cannot read more data from the " + "input stream while the deserialization method expects more bytes.\n" "Typically, this error indicates the input stream has been truncated.\n" ); } // namespace -auto PyDecoderBuffer::init(PyObject* input_stream, Py_ssize_t buf_capacity) -> bool { +auto PyDeserializerBuffer::init(PyObject* input_stream, Py_ssize_t buf_capacity) -> bool { m_read_buffer_mem_owner = static_cast(PyMem_Malloc(buf_capacity)); if (nullptr == m_read_buffer_mem_owner) { PyErr_NoMemory(); @@ -225,7 +230,7 @@ auto PyDecoderBuffer::init(PyObject* input_stream, Py_ssize_t buf_capacity) -> b return true; } -auto PyDecoderBuffer::populate_read_buffer(Py_ssize_t& num_bytes_read) -> bool { +auto PyDeserializerBuffer::populate_read_buffer(Py_ssize_t& num_bytes_read) -> bool { auto const unconsumed_bytes_in_curr_read_buffer{get_unconsumed_bytes()}; auto const num_unconsumed_bytes{ static_cast(unconsumed_bytes_in_curr_read_buffer.size()) @@ -279,7 +284,7 @@ auto PyDecoderBuffer::populate_read_buffer(Py_ssize_t& num_bytes_read) -> bool { return true; } -auto PyDecoderBuffer::metadata_init(PyMetadata* metadata) -> bool { +auto PyDeserializerBuffer::metadata_init(PyMetadata* metadata) -> bool { if (has_metadata()) { PyErr_SetString(PyExc_RuntimeError, "Metadata has already been initialized."); return false; @@ -293,7 +298,7 @@ auto PyDecoderBuffer::metadata_init(PyMetadata* metadata) -> bool { return true; } -auto PyDecoderBuffer::py_getbuffer(Py_buffer* view, int flags) -> int { +auto PyDeserializerBuffer::py_getbuffer(Py_buffer* view, int flags) -> int { // Don't need to set the exception message during the failure. The Python level caller will set // the exception and thus overwrite it. if (false == is_py_buffer_protocol_enabled()) { @@ -310,28 +315,28 @@ auto PyDecoderBuffer::py_getbuffer(Py_buffer* view, int flags) -> int { ); } -auto PyDecoderBuffer::commit_read_buffer_consumption(Py_ssize_t num_bytes_consumed) -> bool { +auto PyDeserializerBuffer::commit_read_buffer_consumption(Py_ssize_t num_bytes_consumed) -> bool { if (get_num_unconsumed_bytes() < num_bytes_consumed) { - PyErr_SetString(PyExc_OverflowError, cDecoderBufferOverflowError); + PyErr_SetString(PyExc_OverflowError, cDeserializerBufferOverflowError); return false; } m_num_current_bytes_consumed += num_bytes_consumed; return true; } -auto PyDecoderBuffer::try_read() -> bool { +auto PyDeserializerBuffer::try_read() -> bool { Py_ssize_t num_bytes_read{0}; if (false == populate_read_buffer(num_bytes_read)) { return false; } if (0 == num_bytes_read) { - PyErr_SetString(get_py_incomplete_stream_error(), cDecoderIncompleteIRError); + PyErr_SetString(get_py_incomplete_stream_error(), cDeserializerIncompleteIRError); return false; } return true; } -auto PyDecoderBuffer::test_streaming(uint32_t seed) -> PyObject* { +auto PyDeserializerBuffer::test_streaming(uint32_t seed) -> PyObject* { std::default_random_engine rand_generator(seed); std::vector read_bytes; bool reach_istream_end{false}; @@ -362,19 +367,19 @@ auto PyDecoderBuffer::test_streaming(uint32_t seed) -> PyObject* { ); } -PyObjectStaticPtr PyDecoderBuffer::m_py_type{nullptr}; -PyObjectStaticPtr PyDecoderBuffer::m_py_incomplete_stream_error{nullptr}; +PyObjectStaticPtr PyDeserializerBuffer::m_py_type{nullptr}; +PyObjectStaticPtr PyDeserializerBuffer::m_py_incomplete_stream_error{nullptr}; -auto PyDecoderBuffer::get_py_type() -> PyTypeObject* { +auto PyDeserializerBuffer::get_py_type() -> PyTypeObject* { return m_py_type.get(); } -auto PyDecoderBuffer::get_py_incomplete_stream_error() -> PyObject* { +auto PyDeserializerBuffer::get_py_incomplete_stream_error() -> PyObject* { return m_py_incomplete_stream_error.get(); } -auto PyDecoderBuffer::module_level_init(PyObject* py_module) -> bool { - static_assert(std::is_trivially_destructible()); +auto PyDeserializerBuffer::module_level_init(PyObject* py_module) -> bool { + static_assert(std::is_trivially_destructible()); auto* py_incomplete_stream_error{PyErr_NewExceptionWithDoc( "clp_ffi_py.native.IncompleteStreamError", static_cast(cPyIncompleteStreamErrorDoc), @@ -389,12 +394,12 @@ auto PyDecoderBuffer::module_level_init(PyObject* py_module) -> bool { return false; } - auto* type{py_reinterpret_cast(PyType_FromSpec(&PyDecoderBuffer_type_spec))}; + auto* type{py_reinterpret_cast(PyType_FromSpec(&PyDeserializerBuffer_type_spec))}; m_py_type.reset(type); if (nullptr == type) { return false; } - type->tp_as_buffer = &PyDecoderBuffer_as_buffer; - return add_python_type(get_py_type(), "DecoderBuffer", py_module); + type->tp_as_buffer = &PyDeserializerBuffer_as_buffer; + return add_python_type(get_py_type(), "DeserializerBuffer", py_module); } } // namespace clp_ffi_py::ir::native diff --git a/src/clp_ffi_py/ir/native/PyDecoderBuffer.hpp b/src/clp_ffi_py/ir/native/PyDeserializerBuffer.hpp similarity index 73% rename from src/clp_ffi_py/ir/native/PyDecoderBuffer.hpp rename to src/clp_ffi_py/ir/native/PyDeserializerBuffer.hpp index 2ad4a6ab..7eb4d484 100644 --- a/src/clp_ffi_py/ir/native/PyDecoderBuffer.hpp +++ b/src/clp_ffi_py/ir/native/PyDeserializerBuffer.hpp @@ -1,5 +1,5 @@ -#ifndef CLP_FFI_PY_IR_NATIVE_PYDECODERBUFFER_HPP -#define CLP_FFI_PY_IR_NATIVE_PYDECODERBUFFER_HPP +#ifndef CLP_FFI_PY_IR_NATIVE_PYDESERIALIZERBUFFER_HPP +#define CLP_FFI_PY_IR_NATIVE_PYDESERIALIZERBUFFER_HPP #include // Must always be included before any other header files @@ -12,44 +12,44 @@ namespace clp_ffi_py::ir::native { /** - * This Python class is designed to buffer encoded CLP IR bytes that are read from an input stream. - * This object serves a dual purpose: - * - It enables CLP IR decoding methods to access the buffered bytes for the purpose of decoding log - * events. + * This Python class is designed to buffer serialized CLP IR bytes that are read from an input + * stream. This object serves a dual purpose: + * - It enables CLP IR deserialization methods to access the buffered bytes for the purpose of + * deserializing and decoding log events. * - It adheres to the Python buffer protocol, allowing for direct reading from an `IO[bytes]`-like * input stream. * This class encompasses all essential attributes to hold the buffered bytes and monitor the state - * of the buffer. It's meant to be utilized across various CLP IR decoding method calls when - * decoding from the same IR stream. + * of the buffer. It's meant to be utilized across various CLP IR deserialization method calls when + * deserializing from the same IR stream. */ -class PyDecoderBuffer { +class PyDeserializerBuffer { public: static constexpr Py_ssize_t cDefaultInitialCapacity{4096}; /** - * Since the memory allocation of PyDecoderBuffer is handled by CPython's allocator, cpp + * Since the memory allocation of PyDeserializerBuffer is handled by CPython's allocator, cpp * constructors will not be explicitly called. This function serves as the default constructor * to initialize the underlying input IR stream and read buffer. Other data members are assumed * to be zero-initialized by `default-init` method. It has to be manually called whenever - * creating a new PyDecoderBuffer object through CPython APIs. + * creating a new PyDeserializerBuffer object through CPython APIs. * @return true on success. * @return false on failure with the relevant Python exception and error set. */ [[nodiscard]] auto init( PyObject* input_stream, - Py_ssize_t buf_capacity = PyDecoderBuffer::cDefaultInitialCapacity + Py_ssize_t buf_capacity = PyDeserializerBuffer::cDefaultInitialCapacity ) -> bool; /** - * Zero-initializes all the data members in PyDecoderBuffer. Should be called once the object is - * allocated. + * Zero-initializes all the data members in PyDeserializerBuffer. Should be called once the + * object is allocated. */ auto default_init() -> void { m_read_buffer_mem_owner = nullptr; m_buffer_size = 0; m_num_current_bytes_consumed = 0; m_ref_timestamp = 0; - m_num_decoded_message = 0; + m_num_deserialized_message = 0; m_py_buffer_protocol_enabled = false; m_input_ir_stream = nullptr; m_metadata = nullptr; @@ -73,15 +73,18 @@ class PyDecoderBuffer { */ auto commit_read_buffer_consumption(Py_ssize_t num_bytes_consumed) -> bool; - [[nodiscard]] auto get_num_decoded_message() const -> size_t { return m_num_decoded_message; } + [[nodiscard]] auto get_num_deserialized_message() const -> size_t { + return m_num_deserialized_message; + } /** - * Increments the number of decoded message counter, and returns the value before increment. + * Increments the number of deserialized message counter, and returns the value before + * increment. */ - [[maybe_unused]] auto get_and_increment_decoded_message_count() -> size_t { - auto current_num_decoded_message{m_num_decoded_message}; - ++m_num_decoded_message; - return current_num_decoded_message; + [[maybe_unused]] auto get_and_increment_deserialized_message_count() -> size_t { + auto current_num_deserialized_message{m_num_deserialized_message}; + ++m_num_deserialized_message; + return current_num_deserialized_message; } [[nodiscard]] auto get_ref_timestamp() const -> clp::ir::epoch_time_ms_t { @@ -140,17 +143,17 @@ class PyDecoderBuffer { } /** - * Attempts to populate the decoder buffer. When this function is called, it is expected to have - * more bytes to read from the IR stream. + * Attempts to populate the deserializer buffer. When this function is called, it is expected to + * have more bytes to read from the IR stream. * @return true on success. * @return false on failure. The Python exception and error will be properly set if the error. */ [[nodiscard]] auto try_read() -> bool; /** - * Tests the functionality of the DecoderBuffer by sequentially reading through the input stream - * with randomly sized reads. It will grow the read buffer when necessary until the the entire - * input stream is consumed. All the read bytes will be returned as a Python bytearray. + * Tests the functionality of PyDeserializerBuffer by sequentially reading through the input + * stream with randomly sized reads. It will grow the read buffer when necessary until the the + * entire input stream is consumed. All the read bytes will be returned as a Python bytearray. * @param seed Random seed passing from the tester. * @return Python bytearray that contains all the read bytes read from the input stream in * sequence. @@ -159,9 +162,10 @@ class PyDecoderBuffer { [[nodiscard]] auto test_streaming(uint32_t seed) -> PyObject*; /** - * Gets the PyTypeObject that represents PyDecoderBuffer's Python type. This type is dynamically - * created and initialized during the execution of `PyDecoderBuffer::module_level_init`. - * @return Python type object associated with PyDecoderBuffer. + * Gets the PyTypeObject that represents PyDeserializerBuffer's Python type. This type is + * dynamically created and initialized during the execution of + * `PyDeserializerBuffer::module_level_init`. + * @return Python type object associated with PyDeserializerBuffer. */ [[nodiscard]] static auto get_py_type() -> PyTypeObject*; @@ -171,9 +175,9 @@ class PyDecoderBuffer { [[nodiscard]] static auto get_py_incomplete_stream_error() -> PyObject*; /** - * Creates and initializes PyDecoderBuffer as a Python type, and then incorporates this type as - * a Python object into the py_module module. - * @param py_module This is the Python module where the initialized PyDecoderBuffer will be + * Creates and initializes PyDeserializerBuffer as a Python type, and then incorporates this + * type as a Python object into the py_module module. + * @param py_module This is the Python module where the initialized PyDeserializerBuffer will be * incorporated. * @return true on success. * @return false on failure with the relevant Python exception and error set. @@ -210,7 +214,7 @@ class PyDecoderBuffer { clp::ir::epoch_time_ms_t m_ref_timestamp; Py_ssize_t m_buffer_size; Py_ssize_t m_num_current_bytes_consumed; - size_t m_num_decoded_message; + size_t m_num_deserialized_message; bool m_py_buffer_protocol_enabled; static PyObjectStaticPtr m_py_type; @@ -218,4 +222,4 @@ class PyDecoderBuffer { }; } // namespace clp_ffi_py::ir::native -#endif // CLP_FFI_PY_IR_NATIVE_PYDECODERBUFFER_HPP +#endif // CLP_FFI_PY_IR_NATIVE_PYDESERIALIZERBUFFER_HPP diff --git a/src/clp_ffi_py/ir/native/PyFourByteEncoder.cpp b/src/clp_ffi_py/ir/native/PyFourByteEncoder.cpp deleted file mode 100644 index 494a81a2..00000000 --- a/src/clp_ffi_py/ir/native/PyFourByteEncoder.cpp +++ /dev/null @@ -1,145 +0,0 @@ -#include // Must always be included before any other header files - -#include "PyFourByteEncoder.hpp" - -#include -#include -#include - -namespace clp_ffi_py::ir::native { -namespace { -// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays) -PyDoc_STRVAR( - cEncodePreambleDoc, - "encode_preamble(ref_timestamp, timestamp_format, timezone)\n" - "--\n\n" - "Creates the encoded CLP preamble for a stream of encoded log messages" - " using the 4-byte encoding.\n\n" - ":param ref_timestamp: Reference timestamp used to calculate deltas emitted with each " - "message.\n" - ":param timestamp_format: Timestamp format to be use when generating the logs with a " - "reader.\n" - ":param timezone: Timezone in TZID format to be use when generating the timestamp " - "from Unix epoch time.\n" - ":raises NotImplementedError: If metadata length too large.\n" - ":return: The encoded preamble.\n" -); - -// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays) -PyDoc_STRVAR( - cEncodeMessageAndTimestampDeltaDoc, - "encode_message_and_timestamp_delta(timestamp_delta, msg)\n" - "--\n\n" - "Encodes the log `msg` along with the timestamp delta using the 4-byte encoding.\n\n" - ":param timestamp_delta: Timestamp difference in milliseconds between the current log " - "message and the previous log message.\n" - ":param msg: Log message to encode.\n" - ":raises NotImplementedError: If the log message failed to encode, or the timestamp delta " - "exceeds the supported size.\n" - ":return: The encoded message and timestamp.\n" -); - -// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays) -PyDoc_STRVAR( - cEncodeMessageDoc, - "encode_message(msg)\n" - "--\n\n" - "Encodes the log `msg` using the 4-byte encoding.\n\n" - ":param msg: Log message to encode.\n" - ":raises NotImplementedError: If the log message failed to encode.\n" - ":return: The encoded message.\n" -); - -// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays) -PyDoc_STRVAR( - cEncodeTimestampDeltaDoc, - "encode_timestamp_delta(timestamp_delta)\n" - "--\n\n" - "Encodes the timestamp using the 4-byte encoding.\n\n" - ":param timestamp_delta: Timestamp difference in milliseconds between the current log " - "message and the previous log message.\n" - ":raises NotImplementedError: If the timestamp failed to encode.\n" - ":return: The encoded timestamp.\n" -); - -// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays) -PyDoc_STRVAR( - cEncodeEndOfIrDoc, - "encode_end_of_ir()\n" - "--\n\n" - "Encodes the byte sequence that indicates the end of a CLP IR stream. A stream that does " - "not contain this will be considered as an incomplete IR stream.\n" -); - -// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays) -PyMethodDef PyFourByteEncoder_method_table[]{ - {"encode_preamble", - clp_ffi_py::ir::native::encode_four_byte_preamble, - METH_VARARGS | METH_STATIC, - static_cast(cEncodePreambleDoc)}, - - {"encode_message_and_timestamp_delta", - clp_ffi_py::ir::native::encode_four_byte_message_and_timestamp_delta, - METH_VARARGS | METH_STATIC, - static_cast(cEncodeMessageAndTimestampDeltaDoc)}, - - {"encode_message", - clp_ffi_py::ir::native::encode_four_byte_message, - METH_VARARGS | METH_STATIC, - static_cast(cEncodeMessageDoc)}, - - {"encode_timestamp_delta", - clp_ffi_py::ir::native::encode_four_byte_timestamp_delta, - METH_VARARGS | METH_STATIC, - static_cast(cEncodeTimestampDeltaDoc)}, - - {"encode_end_of_ir", - py_c_function_cast(clp_ffi_py::ir::native::encode_end_of_ir), - METH_NOARGS | METH_STATIC, - static_cast(cEncodeEndOfIrDoc)}, - - {nullptr, nullptr, 0, nullptr} -}; - -// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays) -PyDoc_STRVAR( - cPyFourByteEncoderDoc, - "Namespace for all CLP four byte IR encoding methods.\n\n" - "Methods encode bytes from the log record to create a CLP log message. This class should " - "never be instantiated since it only contains static methods.\n" -); - -// NOLINTBEGIN(cppcoreguidelines-avoid-c-arrays, cppcoreguidelines-pro-type-const-cast) -PyType_Slot PyFourByteEncoder_slots[]{ - {Py_tp_methods, static_cast(PyFourByteEncoder_method_table)}, - {Py_tp_doc, const_cast(static_cast(cPyFourByteEncoderDoc))}, - {0, nullptr} -}; -// NOLINTEND(cppcoreguidelines-avoid-c-arrays, cppcoreguidelines-pro-type-const-cast) - -/** - * PyFourByteEncoder Python type specifications. - */ -PyType_Spec PyFourByteEncoder_type_spec{ - "clp_ffi_py.ir.native.FourByteEncoder", - sizeof(PyFourByteEncoder), - 0, - Py_TPFLAGS_DEFAULT, - static_cast(PyFourByteEncoder_slots) -}; -} // namespace - -PyObjectStaticPtr PyFourByteEncoder::m_py_type{nullptr}; - -auto PyFourByteEncoder::module_level_init(PyObject* py_module) -> bool { - static_assert(std::is_trivially_destructible()); - auto* type{py_reinterpret_cast(PyType_FromSpec(&PyFourByteEncoder_type_spec))}; - m_py_type.reset(type); - if (nullptr == type) { - return false; - } - // Explicitly set the tp_new to nullptr to mark this type non-instantiable. - type->tp_new = nullptr; - return add_python_type(type, "FourByteEncoder", py_module); -} -} // namespace clp_ffi_py::ir::native diff --git a/src/clp_ffi_py/ir/native/PyFourByteSerializer.cpp b/src/clp_ffi_py/ir/native/PyFourByteSerializer.cpp new file mode 100644 index 00000000..b183cb82 --- /dev/null +++ b/src/clp_ffi_py/ir/native/PyFourByteSerializer.cpp @@ -0,0 +1,143 @@ +#include // Must always be included before any other header files + +#include "PyFourByteSerializer.hpp" + +#include +#include +#include + +namespace clp_ffi_py::ir::native { +namespace { +// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays) +PyDoc_STRVAR( + cSerializePreambleDoc, + "serialize_preamble(ref_timestamp, timestamp_format, timezone)\n" + "--\n\n" + "Serializes the preamble for a 4-byte encoded CLP IR stream.\n\n" + ":param ref_timestamp: Reference timestamp used to calculate deltas emitted with each " + "message.\n" + ":param timestamp_format: Timestamp format to be use when generating the logs with a " + "reader.\n" + ":param timezone: Timezone in TZID format to be use when generating the timestamp " + "from Unix epoch time.\n" + ":raises NotImplementedError: If metadata length too large.\n" + ":return: The serialized preamble.\n" +); + +// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays) +PyDoc_STRVAR( + cSerializeMessageAndTimestampDeltaDoc, + "serialize_message_and_timestamp_delta(timestamp_delta, msg)\n" + "--\n\n" + "Serializes the log `msg` along with the timestamp delta using the 4-byte encoding.\n\n" + ":param timestamp_delta: Timestamp difference in milliseconds between the current log " + "message and the previous log message.\n" + ":param msg: Log message to serialize.\n" + ":raises NotImplementedError: If the log message failed to serialize." + ":return: The serialized message and timestamp.\n" +); + +// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays) +PyDoc_STRVAR( + cSerializeMessageDoc, + "serialize_message(msg)\n" + "--\n\n" + "Serializes the log `msg` using the 4-byte encoding.\n\n" + ":param msg: Log message to serialize.\n" + ":raises NotImplementedError: If the log message failed to serialize.\n" + ":return: The serialized message.\n" +); + +// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays) +PyDoc_STRVAR( + cSerializeTimestampDeltaDoc, + "serialize_timestamp_delta(timestamp_delta)\n" + "--\n\n" + "Serializes the timestamp using the 4-byte encoding.\n\n" + ":param timestamp_delta: Timestamp difference in milliseconds between the current log " + "message and the previous log message.\n" + ":raises NotImplementedError: If the timestamp failed to serialize.\n" + ":return: The serialized timestamp.\n" +); + +// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays) +PyDoc_STRVAR( + cSerializeEndOfIrDoc, + "serialize_end_of_ir()\n" + "--\n\n" + "Serializes the byte sequence that indicates the end of a CLP IR stream. A stream that " + "does not contain this will be considered as an incomplete IR stream.\n" +); + +// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays) +PyMethodDef PyFourByteSerializer_method_table[]{ + {"serialize_preamble", + clp_ffi_py::ir::native::serialize_four_byte_preamble, + METH_VARARGS | METH_STATIC, + static_cast(cSerializePreambleDoc)}, + + {"serialize_message_and_timestamp_delta", + clp_ffi_py::ir::native::serialize_four_byte_message_and_timestamp_delta, + METH_VARARGS | METH_STATIC, + static_cast(cSerializeMessageAndTimestampDeltaDoc)}, + + {"serialize_message", + clp_ffi_py::ir::native::serialize_four_byte_message, + METH_VARARGS | METH_STATIC, + static_cast(cSerializeMessageDoc)}, + + {"serialize_timestamp_delta", + clp_ffi_py::ir::native::serialize_four_byte_timestamp_delta, + METH_VARARGS | METH_STATIC, + static_cast(cSerializeTimestampDeltaDoc)}, + + {"serialize_end_of_ir", + py_c_function_cast(clp_ffi_py::ir::native::serialize_end_of_ir), + METH_NOARGS | METH_STATIC, + static_cast(cSerializeEndOfIrDoc)}, + + {nullptr, nullptr, 0, nullptr} +}; + +// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays) +PyDoc_STRVAR( + cPyFourByteSerializerDoc, + "Namespace for all CLP four byte IR serialization methods.\n\n" + "Methods serialize bytes from the log record to create a CLP log message. This class " + "should never be instantiated since it only contains static methods.\n" +); + +// NOLINTBEGIN(cppcoreguidelines-avoid-c-arrays, cppcoreguidelines-pro-type-const-cast) +PyType_Slot PyFourByteSerializer_slots[]{ + {Py_tp_methods, static_cast(PyFourByteSerializer_method_table)}, + {Py_tp_doc, const_cast(static_cast(cPyFourByteSerializerDoc))}, + {0, nullptr} +}; +// NOLINTEND(cppcoreguidelines-avoid-c-arrays, cppcoreguidelines-pro-type-const-cast) + +/** + * PyFourByteSerializer Python type specifications. + */ +PyType_Spec PyFourByteSerializer_type_spec{ + "clp_ffi_py.ir.native.FourByteSerializer", + sizeof(PyFourByteSerializer), + 0, + Py_TPFLAGS_DEFAULT, + static_cast(PyFourByteSerializer_slots) +}; +} // namespace + +PyObjectStaticPtr PyFourByteSerializer::m_py_type{nullptr}; + +auto PyFourByteSerializer::module_level_init(PyObject* py_module) -> bool { + static_assert(std::is_trivially_destructible()); + auto* type{py_reinterpret_cast(PyType_FromSpec(&PyFourByteSerializer_type_spec))}; + m_py_type.reset(type); + if (nullptr == type) { + return false; + } + // Explicitly set the tp_new to nullptr to mark this type non-instantiable. + type->tp_new = nullptr; + return add_python_type(type, "FourByteSerializer", py_module); +} +} // namespace clp_ffi_py::ir::native diff --git a/src/clp_ffi_py/ir/native/PyFourByteEncoder.hpp b/src/clp_ffi_py/ir/native/PyFourByteSerializer.hpp similarity index 62% rename from src/clp_ffi_py/ir/native/PyFourByteEncoder.hpp rename to src/clp_ffi_py/ir/native/PyFourByteSerializer.hpp index d65b853d..56a179f9 100644 --- a/src/clp_ffi_py/ir/native/PyFourByteEncoder.hpp +++ b/src/clp_ffi_py/ir/native/PyFourByteSerializer.hpp @@ -1,5 +1,5 @@ -#ifndef CLP_FFI_PY_IR_NATIVE_PYFOURBYTEENCODER_HPP -#define CLP_FFI_PY_IR_NATIVE_PYFOURBYTEENCODER_HPP +#ifndef CLP_FFI_PY_IR_NATIVE_PYFOURBYTESERIALIZER_HPP +#define CLP_FFI_PY_IR_NATIVE_PYFOURBYTESERIALIZER_HPP #include // Must always be included before any other header files @@ -7,14 +7,14 @@ namespace clp_ffi_py::ir::native { /** - * This class provides a Python-level namespace for CLP 4-byte IR encoding methods. + * This class provides a Python-level namespace for CLP 4-byte IR serialization methods. */ -class PyFourByteEncoder { +class PyFourByteSerializer { public: /** - * Creates and initializes PyFourByteEncoder as a Python type, and then incorporates this type - * as a Python object into py_module. - * @param py_module This is the Python module where the initialized PyFourByteEncoder will be + * Creates and initializes PyFourByteSerializer as a Python type, and then incorporates this + * type as a Python object into py_module. + * @param py_module This is the Python module where the initialized PyFourByteSerializer will be * incorporated. * @return true on success. * @return false on failure with the relevant Python exception and error set. @@ -28,4 +28,4 @@ class PyFourByteEncoder { }; } // namespace clp_ffi_py::ir::native -#endif // CLP_FFI_PY_IR_NATIVE_PYFOURBYTEENCODER_HPP +#endif // CLP_FFI_PY_IR_NATIVE_PYFOURBYTESERIALIZER_HPP diff --git a/src/clp_ffi_py/ir/native/PyLogEvent.cpp b/src/clp_ffi_py/ir/native/PyLogEvent.cpp index b28a9c2e..59a575e5 100644 --- a/src/clp_ffi_py/ir/native/PyLogEvent.cpp +++ b/src/clp_ffi_py/ir/native/PyLogEvent.cpp @@ -386,9 +386,9 @@ PyMethodDef PyLogEvent_method_table[]{ // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays) PyDoc_STRVAR( cPyLogEventDoc, - "This class represents a decoded log event and provides ways to access the underlying " + "This class represents a deserialzied log event and provides ways to access the underlying " "log data, including the log message, the timestamp, and the log event index. " - "Normally, this class will be instantiated by the FFI IR decoding methods.\n" + "Normally, this class will be instantiated by the FFI IR deserialization methods.\n" "However, with the `__init__` method provided below, direct instantiation is also " "possible.\n\n" "The signature of `__init__` method is shown as following:\n\n" diff --git a/src/clp_ffi_py/ir/native/PyMetadata.cpp b/src/clp_ffi_py/ir/native/PyMetadata.cpp index 38b67019..7e79950e 100644 --- a/src/clp_ffi_py/ir/native/PyMetadata.cpp +++ b/src/clp_ffi_py/ir/native/PyMetadata.cpp @@ -181,7 +181,8 @@ PyMethodDef PyMetadata_method_table[]{ PyDoc_STRVAR( cPyMetadataDoc, "This class represents the IR stream preamble and provides ways to access the underlying " - "metadata. Normally, this class will be instantiated by the FFI IR decoding methods.\n" + "metadata. Normally, this class will be instantiated by the FFI IR deserialization " + "methods.\n" "However, with the `__init__` method provided below, direct instantiation is also " "possible.\n\n" "The signature of `__init__` method is shown as following:\n\n" @@ -239,7 +240,7 @@ auto PyMetadata::init(nlohmann::json const& metadata, bool is_four_byte_encoding } catch (ExceptionFFI const& ex) { PyErr_Format( PyExc_RuntimeError, - "Failed to initialize metadata from decoded JSON format preamble. " + "Failed to initialize metadata from deserialized JSON format preamble. " "Error message: %s", ex.what() ); diff --git a/src/clp_ffi_py/ir/native/PyQuery.cpp b/src/clp_ffi_py/ir/native/PyQuery.cpp index 739f2f17..8132971e 100644 --- a/src/clp_ffi_py/ir/native/PyQuery.cpp +++ b/src/clp_ffi_py/ir/native/PyQuery.cpp @@ -560,10 +560,10 @@ PyDoc_STRVAR( "NOTE: When searching an IR stream with a query, ideally, the search would terminate once " "the current log event's timestamp exceeds the upper bound of the query's time range. " "However, the timestamps in the IR stream might not be monotonically increasing; they can " - "be locally disordered due to thread contention. To safely stop searching, the decoder " - "needs to ensure that the current timestamp in the IR stream exceeds the query's upper " - "bound timestamp by a reasonable margin. This margin can be specified during the " - "initialization. This margin is set to a default value specified by the static method " + "be locally disordered due to thread contention. To safely stop searching, the " + "deserializer needs to ensure that the current timestamp in the IR stream exceeds the " + "query's upper bound timestamp by a reasonable margin. This margin can be specified during " + "the initialization. This margin is set to a default value specified by the static method " "`default_search_time_termination_margin()`. Users can customized this margin accordingly, " "for example, the margin can be set to 0 if the CLP IR stream is generated from a " "single-threaded program execution.\n\n" diff --git a/src/clp_ffi_py/ir/native/decoding_methods.hpp b/src/clp_ffi_py/ir/native/decoding_methods.hpp deleted file mode 100644 index 88869c1a..00000000 --- a/src/clp_ffi_py/ir/native/decoding_methods.hpp +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef CLP_FFI_PY_IR_NATIVE_DECODING_METHODS -#define CLP_FFI_PY_IR_NATIVE_DECODING_METHODS - -#include // Must always be included before any other header files - -// Documentation for these methods is in clp/ir/native/PyDecoder.cpp, as it also serves as the -// documentation for Python. -namespace clp_ffi_py::ir::native { -extern "C" { -auto decode_preamble(PyObject* self, PyObject* py_decoder_buffer) -> PyObject*; -auto decode_next_log_event(PyObject* self, PyObject* args, PyObject* keywords) -> PyObject*; -} -} // namespace clp_ffi_py::ir::native - -#endif // CLP_FFI_PY_IR_NATIVE_DECODING_METHODS diff --git a/src/clp_ffi_py/ir/native/decoding_methods.cpp b/src/clp_ffi_py/ir/native/deserialization_methods.cpp similarity index 74% rename from src/clp_ffi_py/ir/native/decoding_methods.cpp rename to src/clp_ffi_py/ir/native/deserialization_methods.cpp index ee2756fd..068ef1e4 100644 --- a/src/clp_ffi_py/ir/native/decoding_methods.cpp +++ b/src/clp_ffi_py/ir/native/deserialization_methods.cpp @@ -1,6 +1,6 @@ #include // Must always be included before any other header files -#include "decoding_methods.hpp" +#include "deserialization_methods.hpp" #include #include @@ -13,7 +13,7 @@ #include #include -#include +#include #include #include #include @@ -29,10 +29,10 @@ using clp::ffi::ir_stream::IRProtocolErrorCode; namespace { /** * This template defines the function signature of a termination handler required by - * `generic_decode_log_event`. Signature: ( + * `deserialize_log_events`. Signature: ( * ffi::epoch_timestamp_ms timestamp, - * std::string_view decoded_log_message, - * size_t decoded_log_event_idx, + * std::string_view deserialized_log_msg, + * size_t deserialized_log_event_idx, * PyObject*& return_value * ) -> bool; * @tparam TerminateHandler @@ -49,9 +49,9 @@ concept TerminateHandlerSignature = requires(TerminateHandler handler) { /** * Handles the error when IRErrorCode::IRErrorCode_Incomplete_IR is seen. The handler will first - * try to load more data into `decoder_buffer`. If it fails, `allow_incomplete_stream` will be used - * to determine whether to swallow the incomplete IR exception. - * @param decoder_buffer + * try to load more data into `deserializer_buffer`. If it fails, `allow_incomplete_stream` will be + * used to determine whether to swallow the incomplete IR exception. + * @param deserializer_buffer * @param allow_incomplete_stream A flag to indicate whether the incomplete stream error should be * ignored. If it is set to true, incomplete stream error. * @param std::nullopt if more data is loaded. @@ -60,15 +60,16 @@ concept TerminateHandlerSignature = requires(TerminateHandler handler) { * and error set. */ [[nodiscard]] auto handle_incomplete_ir_error( - PyDecoderBuffer* decoder_buffer, + PyDeserializerBuffer* deserializer_buffer, bool allow_incomplete_stream ) -> std::optional { - if (decoder_buffer->try_read()) { + if (deserializer_buffer->try_read()) { return std::nullopt; } if (allow_incomplete_stream - && static_cast(PyErr_ExceptionMatches(PyDecoderBuffer::get_py_incomplete_stream_error( - )))) + && static_cast( + PyErr_ExceptionMatches(PyDeserializerBuffer::get_py_incomplete_stream_error()) + )) { PyErr_Clear(); Py_RETURN_NONE; @@ -77,11 +78,11 @@ concept TerminateHandlerSignature = requires(TerminateHandler handler) { } /** - * Decodes the next log event from the CLP IR buffer `decoder_buffer` until terminate handler - * returns true. - * @tparam TerminateHandler Method to determine if the decoding should terminate, and set the return - * value for termination. - * @param decoder_buffer IR decoder buffer of the input IR stream. + * Deserializes the next log event from the CLP IR buffer `deserializer_buffer` until terminate + * handler returns true. + * @tparam TerminateHandler Method to determine if the deserialization should terminate, and set the + * return value for termination. + * @param deserializer_buffer IR deserializer buffer of the input IR stream. * @param allow_incomplete_stream A flag to indicate whether the incomplete stream error should be * ignored. If it is set to true, incomplete stream error should be treated as the IR stream is * terminated. @@ -91,20 +92,20 @@ concept TerminateHandlerSignature = requires(TerminateHandler handler) { * @return nullptr on failure with the relevant Python exception and error set. */ template -auto generic_decode_log_events( - PyDecoderBuffer* decoder_buffer, +auto deserialize_log_events( + PyDeserializerBuffer* deserializer_buffer, bool allow_incomplete_stream, TerminateHandler terminate_handler ) -> PyObject* { - std::string decoded_message; + std::string deserialized_message; clp::ir::epoch_time_ms_t timestamp_delta{0}; - auto timestamp{decoder_buffer->get_ref_timestamp()}; + auto timestamp{deserializer_buffer->get_ref_timestamp()}; size_t current_log_event_idx{0}; PyObject* return_value{nullptr}; clp::ffi::ir_stream::encoded_tag_t tag{}; while (true) { - auto const unconsumed_bytes{decoder_buffer->get_unconsumed_bytes()}; + auto const unconsumed_bytes{deserializer_buffer->get_unconsumed_bytes()}; clp::BufferReader ir_buffer{ clp::size_checked_pointer_cast(unconsumed_bytes.data()), unconsumed_bytes.size() @@ -114,11 +115,11 @@ auto generic_decode_log_events( IRErrorCode::IRErrorCode_Success != err) { if (IRErrorCode::IRErrorCode_Incomplete_IR != err) { - PyErr_Format(PyExc_RuntimeError, cDecoderErrorCodeFormatStr, err); + PyErr_Format(PyExc_RuntimeError, cDeserializerErrorCodeFormatStr, err); return nullptr; } if (auto const ret_val{ - handle_incomplete_ir_error(decoder_buffer, allow_incomplete_stream) + handle_incomplete_ir_error(deserializer_buffer, allow_incomplete_stream) }; ret_val.has_value()) { @@ -133,12 +134,12 @@ auto generic_decode_log_events( auto const err{clp::ffi::ir_stream::four_byte_encoding::deserialize_log_event( ir_buffer, tag, - decoded_message, + deserialized_message, timestamp_delta )}; if (IRErrorCode::IRErrorCode_Incomplete_IR == err) { if (auto const ret_val{ - handle_incomplete_ir_error(decoder_buffer, allow_incomplete_stream) + handle_incomplete_ir_error(deserializer_buffer, allow_incomplete_stream) }; ret_val.has_value()) { @@ -147,17 +148,18 @@ auto generic_decode_log_events( continue; } if (IRErrorCode::IRErrorCode_Success != err) { - PyErr_Format(PyExc_RuntimeError, cDecoderErrorCodeFormatStr, err); + PyErr_Format(PyExc_RuntimeError, cDeserializerErrorCodeFormatStr, err); return nullptr; } timestamp += timestamp_delta; - current_log_event_idx = decoder_buffer->get_and_increment_decoded_message_count(); + current_log_event_idx = deserializer_buffer->get_and_increment_deserialized_message_count(); auto const num_bytes_consumed{static_cast(ir_buffer.get_pos())}; - decoder_buffer->commit_read_buffer_consumption(num_bytes_consumed); + deserializer_buffer->commit_read_buffer_consumption(num_bytes_consumed); - if (terminate_handler(timestamp, decoded_message, current_log_event_idx, return_value)) { - decoder_buffer->set_ref_timestamp(timestamp); + if (terminate_handler(timestamp, deserialized_message, current_log_event_idx, return_value)) + { + deserializer_buffer->set_ref_timestamp(timestamp); break; } } @@ -175,19 +177,22 @@ auto generic_decode_log_events( } // namespace extern "C" { -auto decode_preamble(PyObject* Py_UNUSED(self), PyObject* py_decoder_buffer) -> PyObject* { +auto deserialize_preamble(PyObject* Py_UNUSED(self), PyObject* py_deserializer_buffer) + -> PyObject* { if (false - == static_cast(PyObject_TypeCheck(py_decoder_buffer, PyDecoderBuffer::get_py_type()))) + == static_cast( + PyObject_TypeCheck(py_deserializer_buffer, PyDeserializerBuffer::get_py_type()) + )) { PyErr_SetString(PyExc_TypeError, cPyTypeError); return nullptr; } - auto* decoder_buffer{py_reinterpret_cast(py_decoder_buffer)}; + auto* deserializer_buffer{py_reinterpret_cast(py_deserializer_buffer)}; bool is_four_byte_encoding{false}; size_t ir_buffer_cursor_pos{0}; while (true) { - auto const unconsumed_bytes{decoder_buffer->get_unconsumed_bytes()}; + auto const unconsumed_bytes{deserializer_buffer->get_unconsumed_bytes()}; clp::BufferReader ir_buffer{ clp::size_checked_pointer_cast(unconsumed_bytes.data()), unconsumed_bytes.size() @@ -198,16 +203,17 @@ auto decode_preamble(PyObject* Py_UNUSED(self), PyObject* py_decoder_buffer) -> break; } if (IRErrorCode::IRErrorCode_Incomplete_IR != err) { - PyErr_Format(PyExc_RuntimeError, cDecoderErrorCodeFormatStr, err); + PyErr_Format(PyExc_RuntimeError, cDeserializerErrorCodeFormatStr, err); return nullptr; } - if (false == decoder_buffer->try_read()) { + if (false == deserializer_buffer->try_read()) { return nullptr; } } - decoder_buffer->commit_read_buffer_consumption(static_cast(ir_buffer_cursor_pos)); + deserializer_buffer->commit_read_buffer_consumption(static_cast(ir_buffer_cursor_pos + )); if (false == is_four_byte_encoding) { - PyErr_SetString(PyExc_NotImplementedError, "8-byte IR decoding is not supported yet."); + PyErr_SetString(PyExc_NotImplementedError, "8-byte IR encoding is not supported yet."); return nullptr; } @@ -215,7 +221,7 @@ auto decode_preamble(PyObject* Py_UNUSED(self), PyObject* py_decoder_buffer) -> size_t metadata_pos{0}; uint16_t metadata_size{0}; while (true) { - auto const unconsumed_bytes = decoder_buffer->get_unconsumed_bytes(); + auto const unconsumed_bytes = deserializer_buffer->get_unconsumed_bytes(); clp::BufferReader ir_buffer{ clp::size_checked_pointer_cast(unconsumed_bytes.data()), unconsumed_bytes.size() @@ -231,19 +237,20 @@ auto decode_preamble(PyObject* Py_UNUSED(self), PyObject* py_decoder_buffer) -> break; } if (IRErrorCode ::IRErrorCode_Incomplete_IR != err) { - PyErr_Format(PyExc_RuntimeError, cDecoderErrorCodeFormatStr, err); + PyErr_Format(PyExc_RuntimeError, cDeserializerErrorCodeFormatStr, err); return nullptr; } - if (false == decoder_buffer->try_read()) { + if (false == deserializer_buffer->try_read()) { return nullptr; } } - auto const unconsumed_bytes = decoder_buffer->get_unconsumed_bytes(); + auto const unconsumed_bytes = deserializer_buffer->get_unconsumed_bytes(); auto const metadata_buffer{ unconsumed_bytes.subspan(metadata_pos, static_cast(metadata_size)) }; - decoder_buffer->commit_read_buffer_consumption(static_cast(ir_buffer_cursor_pos)); + deserializer_buffer->commit_read_buffer_consumption(static_cast(ir_buffer_cursor_pos + )); PyMetadata* metadata{nullptr}; try { // Initialization list should not be used in this case: @@ -282,25 +289,25 @@ auto decode_preamble(PyObject* Py_UNUSED(self), PyObject* py_decoder_buffer) -> PyErr_Format(PyExc_RuntimeError, "Json Parsing Error: %s", ex.what()); return nullptr; } - if (false == decoder_buffer->metadata_init(metadata)) { + if (false == deserializer_buffer->metadata_init(metadata)) { return nullptr; } return py_reinterpret_cast(metadata); } -auto decode_next_log_event(PyObject* Py_UNUSED(self), PyObject* args, PyObject* keywords) +auto deserialize_next_log_event(PyObject* Py_UNUSED(self), PyObject* args, PyObject* keywords) -> PyObject* { - static char keyword_decoder_buffer[]{"decoder_buffer"}; + static char keyword_deserializer_buffer[]{"deserializer_buffer"}; static char keyword_query[]{"query"}; static char keyword_allow_incomplete_stream[]{"allow_incomplete_stream"}; static char* keyword_table[]{ - static_cast(keyword_decoder_buffer), + static_cast(keyword_deserializer_buffer), static_cast(keyword_query), static_cast(keyword_allow_incomplete_stream), nullptr }; - PyDecoderBuffer* decoder_buffer{nullptr}; + PyDeserializerBuffer* deserializer_buffer{nullptr}; PyObject* query_obj{Py_None}; int allow_incomplete_stream{0}; @@ -310,8 +317,8 @@ auto decode_next_log_event(PyObject* Py_UNUSED(self), PyObject* args, PyObject* keywords, "O!|Op", static_cast(keyword_table), - PyDecoderBuffer::get_py_type(), - &decoder_buffer, + PyDeserializerBuffer::get_py_type(), + &deserializer_buffer, &query_obj, &allow_incomplete_stream ))) @@ -327,14 +334,14 @@ auto decode_next_log_event(PyObject* Py_UNUSED(self), PyObject* args, PyObject* return nullptr; } - if (false == decoder_buffer->has_metadata()) { + if (false == deserializer_buffer->has_metadata()) { PyErr_SetString( PyExc_RuntimeError, - "The given DecoderBuffer does not have a valid CLP IR metadata decoded." + "The given deserializerBuffer does not have a valid CLP IR metadata deserialized." ); return nullptr; } - auto* metadata{decoder_buffer->get_metadata()}; + auto* metadata{deserializer_buffer->get_metadata()}; if (false == is_query_given) { auto terminate_handler{ @@ -353,8 +360,8 @@ auto decode_next_log_event(PyObject* Py_UNUSED(self), PyObject* args, PyObject* return true; } }; - return generic_decode_log_events( - decoder_buffer, + return deserialize_log_events( + deserializer_buffer, static_cast(allow_incomplete_stream), terminate_handler ); @@ -387,8 +394,8 @@ auto decode_next_log_event(PyObject* Py_UNUSED(self), PyObject* args, PyObject* return true; } }; - return generic_decode_log_events( - decoder_buffer, + return deserialize_log_events( + deserializer_buffer, static_cast(allow_incomplete_stream), query_terminate_handler ); diff --git a/src/clp_ffi_py/ir/native/deserialization_methods.hpp b/src/clp_ffi_py/ir/native/deserialization_methods.hpp new file mode 100644 index 00000000..ecdf56ec --- /dev/null +++ b/src/clp_ffi_py/ir/native/deserialization_methods.hpp @@ -0,0 +1,15 @@ +#ifndef CLP_FFI_PY_IR_NATIVE_DESERIALIZATION_METHODS +#define CLP_FFI_PY_IR_NATIVE_DESERIALIZATION_METHODS + +#include // Must always be included before any other header files + +// Documentation for these methods is in clp/ir/native/PyDeserializer.cpp, as it also serves as the +// documentation for Python. +namespace clp_ffi_py::ir::native { +extern "C" { +auto deserialize_preamble(PyObject* self, PyObject* py_deserializer_buffer) -> PyObject*; +auto deserialize_next_log_event(PyObject* self, PyObject* args, PyObject* keywords) -> PyObject*; +} +} // namespace clp_ffi_py::ir::native + +#endif // CLP_FFI_PY_IR_NATIVE_DESERIALIZATION_METHODS diff --git a/src/clp_ffi_py/ir/native/encoding_methods.hpp b/src/clp_ffi_py/ir/native/encoding_methods.hpp deleted file mode 100644 index 8352cdf2..00000000 --- a/src/clp_ffi_py/ir/native/encoding_methods.hpp +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef CLP_FFI_PY_IR_NATIVE_PY_ENCODING_METHODS -#define CLP_FFI_PY_IR_NATIVE_PY_ENCODING_METHODS - -#include // Must always be included before any other header files - -// Documentation for these methods is in clp_ffi_py/ir/native/PyFourByteEncoder.cpp, as it also -// serves as the documentation for python. -namespace clp_ffi_py::ir::native { -auto encode_four_byte_preamble(PyObject* self, PyObject* args) -> PyObject*; -auto encode_four_byte_message_and_timestamp_delta(PyObject* self, PyObject* args) -> PyObject*; -auto encode_four_byte_message(PyObject* self, PyObject* args) -> PyObject*; -auto encode_four_byte_timestamp_delta(PyObject* self, PyObject* args) -> PyObject*; -auto encode_end_of_ir(PyObject* self) -> PyObject*; -} // namespace clp_ffi_py::ir::native - -#endif // CLP_FFI_PY_IR_NATIVE_PY_ENCODING_METHODS diff --git a/src/clp_ffi_py/ir/native/error_messages.hpp b/src/clp_ffi_py/ir/native/error_messages.hpp index fdf1ead9..c598475f 100644 --- a/src/clp_ffi_py/ir/native/error_messages.hpp +++ b/src/clp_ffi_py/ir/native/error_messages.hpp @@ -2,13 +2,17 @@ #define CLP_FFI_PY_IR_NATIVE_ERROR_MESSAGES namespace clp_ffi_py::ir::native { -constexpr char const* cDecoderBufferOverflowError = "DecoderBuffer internal read buffer overflows."; -constexpr char const* cDecoderIncompleteIRError = "The IR stream is incomplete."; -constexpr char const* cDecoderErrorCodeFormatStr = "IR decoding method failed with error code: %d."; -constexpr char const* cEncodeTimestampError - = "Native encoder cannot encode the given timestamp delta"; -constexpr char const* cEncodePreambleError = "Native encoder cannot encode the given preamble"; -constexpr char const* cEncodeMessageError = "Native encoder cannot encode the given message"; +constexpr char const* cDeserializerBufferOverflowError + = "DeserializerBuffer internal read buffer overflows."; +constexpr char const* cDeserializerIncompleteIRError = "The IR stream is incomplete."; +constexpr char const* cDeserializerErrorCodeFormatStr + = "IR deserialization method failed with error code: %d."; +constexpr char const* cSerializeTimestampError + = "Native serializer cannot serialize the given timestamp delta"; +constexpr char const* cSerializePreambleError + = "Native serializer cannot serialize the given preamble"; +constexpr char const* cSerializeMessageError + = "Native serializer cannot serialize the given message"; } // namespace clp_ffi_py::ir::native #endif // CLP_FFI_PY_IR_NATIVE_ERROR_MESSAGES diff --git a/src/clp_ffi_py/ir/native/encoding_methods.cpp b/src/clp_ffi_py/ir/native/serialization_methods.cpp similarity index 82% rename from src/clp_ffi_py/ir/native/encoding_methods.cpp rename to src/clp_ffi_py/ir/native/serialization_methods.cpp index f54603b1..81a6ae00 100644 --- a/src/clp_ffi_py/ir/native/encoding_methods.cpp +++ b/src/clp_ffi_py/ir/native/serialization_methods.cpp @@ -1,6 +1,6 @@ #include // Must always be included before any other header files -#include "encoding_methods.hpp" +#include "serialization_methods.hpp" #include #include @@ -10,7 +10,7 @@ #include namespace clp_ffi_py::ir::native { -auto encode_four_byte_preamble(PyObject* Py_UNUSED(self), PyObject* args) -> PyObject* { +auto serialize_four_byte_preamble(PyObject* Py_UNUSED(self), PyObject* args) -> PyObject* { clp::ir::epoch_time_ms_t ref_timestamp{}; char const* input_timestamp_format{}; char const* input_timezone{}; @@ -47,7 +47,7 @@ auto encode_four_byte_preamble(PyObject* Py_UNUSED(self), PyObject* args) -> PyO ir_buf )) { - PyErr_SetString(PyExc_NotImplementedError, clp_ffi_py::ir::native::cEncodePreambleError); + PyErr_SetString(PyExc_NotImplementedError, clp_ffi_py::ir::native::cSerializePreambleError); return nullptr; } @@ -57,7 +57,7 @@ auto encode_four_byte_preamble(PyObject* Py_UNUSED(self), PyObject* args) -> PyO ); } -auto encode_four_byte_message_and_timestamp_delta(PyObject* Py_UNUSED(self), PyObject* args) +auto serialize_four_byte_message_and_timestamp_delta(PyObject* Py_UNUSED(self), PyObject* args) -> PyObject* { clp::ir::epoch_time_ms_t delta{}; char const* input_buffer{}; @@ -74,12 +74,15 @@ auto encode_four_byte_message_and_timestamp_delta(PyObject* Py_UNUSED(self), PyO ir_buf.reserve(input_buffer_size * 2); if (false == clp::ffi::ir_stream::four_byte_encoding::serialize_message(msg, logtype, ir_buf)) { - PyErr_SetString(PyExc_NotImplementedError, clp_ffi_py::ir::native::cEncodeMessageError); + PyErr_SetString(PyExc_NotImplementedError, clp_ffi_py::ir::native::cSerializeMessageError); return nullptr; } if (false == clp::ffi::ir_stream::four_byte_encoding::serialize_timestamp(delta, ir_buf)) { - PyErr_SetString(PyExc_NotImplementedError, clp_ffi_py::ir::native::cEncodeTimestampError); + PyErr_SetString( + PyExc_NotImplementedError, + clp_ffi_py::ir::native::cSerializeTimestampError + ); return nullptr; } @@ -89,7 +92,7 @@ auto encode_four_byte_message_and_timestamp_delta(PyObject* Py_UNUSED(self), PyO ); } -auto encode_four_byte_message(PyObject* Py_UNUSED(self), PyObject* args) -> PyObject* { +auto serialize_four_byte_message(PyObject* Py_UNUSED(self), PyObject* args) -> PyObject* { char const* input_buffer{}; Py_ssize_t input_buffer_size{}; if (0 == PyArg_ParseTuple(args, "y#", &input_buffer, &input_buffer_size)) { @@ -105,7 +108,7 @@ auto encode_four_byte_message(PyObject* Py_UNUSED(self), PyObject* args) -> PyOb if (false == clp::ffi::ir_stream::four_byte_encoding::serialize_message(msg, log_type, ir_buf)) { - PyErr_SetString(PyExc_NotImplementedError, clp_ffi_py::ir::native::cEncodeMessageError); + PyErr_SetString(PyExc_NotImplementedError, clp_ffi_py::ir::native::cSerializeMessageError); return nullptr; } @@ -115,7 +118,7 @@ auto encode_four_byte_message(PyObject* Py_UNUSED(self), PyObject* args) -> PyOb ); } -auto encode_four_byte_timestamp_delta(PyObject* Py_UNUSED(self), PyObject* args) -> PyObject* { +auto serialize_four_byte_timestamp_delta(PyObject* Py_UNUSED(self), PyObject* args) -> PyObject* { clp::ir::epoch_time_ms_t delta{}; if (0 == PyArg_ParseTuple(args, "L", &delta)) { return nullptr; @@ -123,7 +126,10 @@ auto encode_four_byte_timestamp_delta(PyObject* Py_UNUSED(self), PyObject* args) std::vector ir_buf; if (false == clp::ffi::ir_stream::four_byte_encoding::serialize_timestamp(delta, ir_buf)) { - PyErr_SetString(PyExc_NotImplementedError, clp_ffi_py::ir::native::cEncodeTimestampError); + PyErr_SetString( + PyExc_NotImplementedError, + clp_ffi_py::ir::native::cSerializeTimestampError + ); return nullptr; } @@ -133,7 +139,7 @@ auto encode_four_byte_timestamp_delta(PyObject* Py_UNUSED(self), PyObject* args) ); } -auto encode_end_of_ir(PyObject* Py_UNUSED(self)) -> PyObject* { +auto serialize_end_of_ir(PyObject* Py_UNUSED(self)) -> PyObject* { static constexpr char cEof{clp::ffi::ir_stream::cProtocol::Eof}; return PyByteArray_FromStringAndSize(&cEof, sizeof(cEof)); } diff --git a/src/clp_ffi_py/ir/native/serialization_methods.hpp b/src/clp_ffi_py/ir/native/serialization_methods.hpp new file mode 100644 index 00000000..44f83c6c --- /dev/null +++ b/src/clp_ffi_py/ir/native/serialization_methods.hpp @@ -0,0 +1,16 @@ +#ifndef CLP_FFI_PY_IR_NATIVE_SERIALIZATION_METHODS +#define CLP_FFI_PY_IR_NATIVE_SERIALIZATION_METHODS + +#include // Must always be included before any other header files + +// Documentation for these methods is in clp_ffi_py/ir/native/PyFourByteSerializer.cpp, as it also +// serves as the documentation for python. +namespace clp_ffi_py::ir::native { +auto serialize_four_byte_preamble(PyObject* self, PyObject* args) -> PyObject*; +auto serialize_four_byte_message_and_timestamp_delta(PyObject* self, PyObject* args) -> PyObject*; +auto serialize_four_byte_message(PyObject* self, PyObject* args) -> PyObject*; +auto serialize_four_byte_timestamp_delta(PyObject* self, PyObject* args) -> PyObject*; +auto serialize_end_of_ir(PyObject* self) -> PyObject*; +} // namespace clp_ffi_py::ir::native + +#endif // CLP_FFI_PY_IR_NATIVE_SERIALIZATION_METHODS diff --git a/src/clp_ffi_py/modules/ir_native.cpp b/src/clp_ffi_py/modules/ir_native.cpp index 7d499c61..0e077ae3 100644 --- a/src/clp_ffi_py/modules/ir_native.cpp +++ b/src/clp_ffi_py/modules/ir_native.cpp @@ -1,8 +1,8 @@ #include // Must always be included before any other header files -#include -#include -#include +#include +#include +#include #include #include #include @@ -10,7 +10,10 @@ namespace { // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays) -PyDoc_STRVAR(cModuleDoc, "Python interface to the CLP IR encoding and decoding methods."); +PyDoc_STRVAR( + cModuleDoc, + "Python interface to the CLP IR serialization and deserialization methods." +); // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays) PyMethodDef Py_native_method_table[]{{nullptr, nullptr, 0, nullptr}}; @@ -36,7 +39,7 @@ PyMODINIT_FUNC PyInit_native() { return nullptr; } - if (false == clp_ffi_py::ir::native::PyDecoderBuffer::module_level_init(new_module)) { + if (false == clp_ffi_py::ir::native::PyDeserializerBuffer::module_level_init(new_module)) { Py_DECREF(new_module); return nullptr; } @@ -56,12 +59,12 @@ PyMODINIT_FUNC PyInit_native() { return nullptr; } - if (false == clp_ffi_py::ir::native::PyDecoder::module_level_init(new_module)) { + if (false == clp_ffi_py::ir::native::PyDeserializer::module_level_init(new_module)) { Py_DECREF(new_module); return nullptr; } - if (false == clp_ffi_py::ir::native::PyFourByteEncoder::module_level_init(new_module)) { + if (false == clp_ffi_py::ir::native::PyFourByteSerializer::module_level_init(new_module)) { Py_DECREF(new_module); return nullptr; } diff --git a/tests/test_ir/__init__.py b/tests/test_ir/__init__.py index ba56d539..a2a4463c 100644 --- a/tests/test_ir/__init__.py +++ b/tests/test_ir/__init__.py @@ -1,14 +1,14 @@ import unittest from typing import Iterable, Optional, Union -from test_ir.test_decoder import * # noqa -from test_ir.test_decoder_buffer import * # noqa -from test_ir.test_encoder import * # noqa +from test_ir.test_deserializer import * # noqa +from test_ir.test_deserializer_buffer import * # noqa from test_ir.test_log_event import * # noqa from test_ir.test_metadata import * # noqa from test_ir.test_query import * # noqa from test_ir.test_query_builder import * # noqa from test_ir.test_readers import * # noqa +from test_ir.test_serializer import * # noqa from test_ir.test_utils import TestCLPBase diff --git a/tests/test_ir/test_decoder.py b/tests/test_ir/test_deserializer.py similarity index 67% rename from tests/test_ir/test_decoder.py rename to tests/test_ir/test_deserializer.py index 6c86d8b9..063c86d3 100644 --- a/tests/test_ir/test_decoder.py +++ b/tests/test_ir/test_deserializer.py @@ -6,9 +6,9 @@ from test_ir.test_utils import get_current_timestamp, LogGenerator, TestCLPBase from clp_ffi_py.ir import ( - Decoder, - DecoderBuffer, - FourByteEncoder, + Deserializer, + DeserializerBuffer, + FourByteSerializer, LogEvent, Metadata, Query, @@ -18,13 +18,13 @@ LOG_DIR: Path = Path("unittest-logs") -class TestCaseDecoderBase(TestCLPBase): +class TestCaseDeserializerBase(TestCLPBase): """ - Class for testing clp_ffi_py.ir.Decoder. + Class for testing clp_ffi_py.ir.Deserializer. """ - encoded_log_path_prefix: str - encoded_log_path_postfix: str + serialized_log_path_prefix: str + serialized_log_path_postfix: str num_test_iterations: int enable_compression: bool has_query: bool @@ -38,8 +38,8 @@ def setUpClass(cls) -> None: # override def setUp(self) -> None: - self.encoded_log_path_prefix: str = f"{self.id()}" - self.encoded_log_path_postfix: str = "clp.zst" if self.enable_compression else "clp" + self.serialized_log_path_prefix: str = f"{self.id()}" + self.serialized_log_path_postfix: str = "clp.zst" if self.enable_compression else "clp" for i in range(self.num_test_iterations): log_path = self._get_log_path(i) if log_path.exists(): @@ -47,11 +47,11 @@ def setUp(self) -> None: def _get_log_path(self, iter: int) -> Path: log_path: Path = LOG_DIR / Path( - f"{self.encoded_log_path_prefix}.{iter}.{self.encoded_log_path_postfix}" + f"{self.serialized_log_path_prefix}.{iter}.{self.serialized_log_path_postfix}" ) return log_path - def _encode_log_stream( + def _serialize_log_stream( self, log_path: Path, metadata: Metadata, log_events: List[LogEvent] ) -> None: """ @@ -59,12 +59,12 @@ def _encode_log_stream( :param log_path: Path on the local file system to write the stream. :param metadata: Metadata of the log stream. - :param log_events: A list of log events to encode. + :param log_events: A list of log events to serialize. """ with open(str(log_path), "wb") as ostream: ref_timestamp: int = metadata.get_ref_timestamp() ostream.write( - FourByteEncoder.encode_preamble( + FourByteSerializer.serialize_preamble( ref_timestamp, metadata.get_timestamp_format(), metadata.get_timezone_id() ) ) @@ -74,11 +74,13 @@ def _encode_log_stream( ref_timestamp = curr_ts log_message: str = log_event.get_log_message() ostream.write( - FourByteEncoder.encode_message_and_timestamp_delta(delta, log_message.encode()) + FourByteSerializer.serialize_message_and_timestamp_delta( + delta, log_message.encode() + ) ) - ostream.write(FourByteEncoder.encode_end_of_ir()) + ostream.write(FourByteSerializer.serialize_end_of_ir()) - def _encode_random_log_stream( + def _serialize_random_log_stream( self, log_path: Path, num_log_events_to_generate: int, seed: int ) -> Tuple[Metadata, List[LogEvent]]: """ @@ -93,10 +95,10 @@ def _encode_random_log_stream( log_events: List[LogEvent] metadata, log_events = LogGenerator.generate_random_logs(num_log_events_to_generate) try: - self._encode_log_stream(log_path, metadata, log_events) + self._serialize_log_stream(log_path, metadata, log_events) except Exception as e: self.assertTrue( - False, f"Failed to encode random log stream generated using seed {seed}: {e}" + False, f"Failed to serialize random log stream generated using seed {seed}: {e}" ) return metadata, log_events @@ -116,51 +118,54 @@ def _generate_random_query( """ return Query(), ref_log_events - def _decode_log_stream( + def _deserialize_log_stream( self, log_path: Path, query: Optional[Query] ) -> Tuple[Metadata, List[LogEvent]]: """ - Decodes the log stream specified by `log_path`, using decoding methods provided in - clp_ffi_py.ir.Decoder. + Decodes the log stream specified by `log_path`, using deserialization methods provided in + clp_ffi_py.ir.Deserializer. :param log_path: The path to the log stream. :param query: Optional search query. - :return: A tuple that contains the decoded metadata and log events returned from decoding - methods. + :return: A tuple that contains the deserialized metadata and log events returned from + deserialization methods. """ with open(str(log_path), "rb") as istream: - decoder_buffer: DecoderBuffer = DecoderBuffer(istream) - metadata: Metadata = Decoder.decode_preamble(decoder_buffer) + deserializer_buffer: DeserializerBuffer = DeserializerBuffer(istream) + metadata: Metadata = Deserializer.deserialize_preamble(deserializer_buffer) log_events: List[LogEvent] = [] while True: - log_event: Optional[LogEvent] = Decoder.decode_next_log_event(decoder_buffer, query) + log_event: Optional[LogEvent] = Deserializer.deserialize_next_log_event( + deserializer_buffer, query + ) if None is log_event: break log_events.append(log_event) return metadata, log_events - def _validate_decoded_logs( + def _validate_deserialized_logs( self, ref_metadata: Metadata, ref_log_events: List[LogEvent], - decoded_metadata: Metadata, - decoded_log_events: List[LogEvent], + deserialized_metadata: Metadata, + deserialized_log_events: List[LogEvent], log_path: Path, seed: int, ) -> None: """ - Validates decoded logs from the IR stream specified by `log_path`. + Validates deserialized logs from the IR stream specified by `log_path`. :param ref_metadata: Reference metadata. :param ref_log_events: A list of reference log events sequence (order sensitive). - :param decoded_metadata: Metadata decoded from the IR stream. - :param decoded_log_events: A list of log events decoded from the IR stream in sequence. + :param deserialized_metadata: Metadata deserialized from the IR stream. + :param deserialized_log_events: A list of log events deserialized from the IR stream in + sequence. :param log_path: Local path of the IR stream. :param seed: Random seed used to generate the log events sequence. """ test_info: str = f"Seed: {seed}, Log Path: {log_path}" self._check_metadata( - decoded_metadata, + deserialized_metadata, ref_metadata.get_ref_timestamp(), ref_metadata.get_timestamp_format(), ref_metadata.get_timezone_id(), @@ -168,24 +173,24 @@ def _validate_decoded_logs( ) ref_num_log_events: int = len(ref_log_events) - decoded_num_log_events: int = len(decoded_log_events) + deserialized_num_log_events: int = len(deserialized_log_events) self.assertEqual( ref_num_log_events, - decoded_num_log_events, - "Number of log events decoded does not match.\n" + test_info, + deserialized_num_log_events, + "Number of log events deserialized does not match.\n" + test_info, ) - for ref_log_event, decoded_log_event in zip(ref_log_events, decoded_log_events): + for ref_log_event, deserialized_log_event in zip(ref_log_events, deserialized_log_events): self._check_log_event( - decoded_log_event, + deserialized_log_event, ref_log_event.get_log_message(), ref_log_event.get_timestamp(), ref_log_event.get_index(), test_info, ) - def test_decoder_with_random_logs(self) -> None: + def test_deserializer_with_random_logs(self) -> None: """ - Tests encoding/decoding methods. + Tests serialization/deserialization methods. Check the TestCase class doc string for more details. """ @@ -197,7 +202,7 @@ def test_decoder_with_random_logs(self) -> None: ref_metadata: Metadata ref_log_events: List[LogEvent] - ref_metadata, ref_log_events = self._encode_random_log_stream( + ref_metadata, ref_log_events = self._serialize_random_log_stream( log_path, num_log_events, seed ) @@ -208,20 +213,21 @@ def test_decoder_with_random_logs(self) -> None: metadata: Metadata log_events: List[LogEvent] try: - metadata, log_events = self._decode_log_stream(log_path, query) + metadata, log_events = self._deserialize_log_stream(log_path, query) except Exception as e: self.assertTrue( - False, f"Failed to decode random log stream generated using seed {seed}: {e}" + False, + f"Failed to deserialize random log stream generated using seed {seed}: {e}", ) - self._validate_decoded_logs( + self._validate_deserialized_logs( ref_metadata, ref_log_events, metadata, log_events, log_path, seed ) -class TestCaseDecoderDecompress(TestCaseDecoderBase): +class TestCaseDeserializerDecompress(TestCaseDeserializerBase): """ - Tests encoding/decoding methods against uncompressed IR stream. + Tests serialization/deserialization methods against uncompressed IR stream. """ # override @@ -232,9 +238,9 @@ def setUp(self) -> None: super().setUp() -class TestCaseDecoderDecompressZstd(TestCaseDecoderBase): +class TestCaseDeserializerDecompressZstd(TestCaseDeserializerBase): """ - Tests encoding/decoding methods against zstd compressed IR stream. + Tests serialization/deserialization methods against zstd compressed IR stream. """ # override @@ -245,9 +251,10 @@ def setUp(self) -> None: super().setUp() -class TestCaseDecoderDecompressDefaultQuery(TestCaseDecoderBase): +class TestCaseDeserializerDecompressDefaultQuery(TestCaseDeserializerBase): """ - Tests encoding/decoding methods against uncompressed IR stream with the default empty query. + Tests serialization/deserialization methods against uncompressed IR stream with the default + empty query. """ # override @@ -258,9 +265,10 @@ def setUp(self) -> None: super().setUp() -class TestCaseDecoderDecompressZstdDefaultQuery(TestCaseDecoderBase): +class TestCaseDeserializerDecompressZstdDefaultQuery(TestCaseDeserializerBase): """ - Tests encoding/decoding methods against zstd compressed IR stream with the default empty query. + Tests serialization/deserialization methods against zstd compressed IR stream with the default + empty query. """ # override @@ -271,7 +279,7 @@ def setUp(self) -> None: super().setUp() -class TestCaseDecoderTimeRangeQueryBase(TestCaseDecoderBase): +class TestCaseDeserializerTimeRangeQueryBase(TestCaseDeserializerBase): # override def _generate_random_query( self, ref_log_events: List[LogEvent] @@ -294,10 +302,10 @@ def _generate_random_query( return query, matched_log_events -class TestCaseDecoderTimeRangeQuery(TestCaseDecoderTimeRangeQueryBase): +class TestCaseDeserializerTimeRangeQuery(TestCaseDeserializerTimeRangeQueryBase): """ - Tests encoding/decoding methods against uncompressed IR stream with the query that specifies a - search timestamp. + Tests serialization/deserialization methods against uncompressed IR stream with the query that + specifies a search timestamp. """ # override @@ -308,10 +316,10 @@ def setUp(self) -> None: super().setUp() -class TestCaseDecoderTimeRangeQueryZstd(TestCaseDecoderTimeRangeQueryBase): +class TestCaseDeserializerTimeRangeQueryZstd(TestCaseDeserializerTimeRangeQueryBase): """ - Tests encoding/decoding methods against zstd compressed IR stream with the query that specifies - a search timestamp. + Tests serialization/deserialization methods against zstd compressed IR stream with the query + that specifies a search timestamp. """ # override @@ -322,7 +330,7 @@ def setUp(self) -> None: super().setUp() -class TestCaseDecoderWildcardQueryBase(TestCaseDecoderBase): +class TestCaseDeserializerWildcardQueryBase(TestCaseDeserializerBase): # override def _generate_random_query( self, ref_log_events: List[LogEvent] @@ -339,10 +347,10 @@ def _generate_random_query( return query, matched_log_events -class TestCaseDecoderWildcardQuery(TestCaseDecoderWildcardQueryBase): +class TestCaseDeserializerWildcardQuery(TestCaseDeserializerWildcardQueryBase): """ - Tests encoding/decoding methods against uncompressed IR stream with the query that specifies - wildcard queries. + Tests serialization/deserialization methods against uncompressed IR stream with the query that + specifies wildcard queries. """ # override @@ -353,10 +361,10 @@ def setUp(self) -> None: super().setUp() -class TestCaseDecoderWildcardQueryZstd(TestCaseDecoderWildcardQueryBase): +class TestCaseDeserializerWildcardQueryZstd(TestCaseDeserializerWildcardQueryBase): """ - Tests encoding/decoding methods against zstd compressed IR stream with the query that specifies - a wildcard queries. + Tests serialization/deserialization methods against zstd compressed IR stream with the query + that specifies a wildcard queries. """ # override @@ -367,7 +375,7 @@ def setUp(self) -> None: super().setUp() -class TestCaseDecoderTimeRangeWildcardQueryBase(TestCaseDecoderBase): +class TestCaseDeserializerTimeRangeWildcardQueryBase(TestCaseDeserializerBase): # override def _generate_random_query( self, ref_log_events: List[LogEvent] @@ -394,10 +402,10 @@ def _generate_random_query( return query, matched_log_events -class TestCaseDecoderTimeRangeWildcardQuery(TestCaseDecoderTimeRangeWildcardQueryBase): +class TestCaseDeserializerTimeRangeWildcardQuery(TestCaseDeserializerTimeRangeWildcardQueryBase): """ - Tests encoding/decoding methods against uncompressed IR stream with the query that specifies - both search time range and wildcard queries. + Tests serialization/deserialization methods against uncompressed IR stream with the query that + specifies both search time range and wildcard queries. """ # override @@ -408,10 +416,12 @@ def setUp(self) -> None: super().setUp() -class TestCaseDecoderTimeRangeWildcardQueryZstd(TestCaseDecoderTimeRangeWildcardQueryBase): +class TestCaseDeserializerTimeRangeWildcardQueryZstd( + TestCaseDeserializerTimeRangeWildcardQueryBase +): """ - Tests encoding/decoding methods against zstd compressed IR stream with the query that specifies - both search time range and wildcard queries. + Tests serialization/deserialization methods against zstd compressed IR stream with the query + that specifies both search time range and wildcard queries. """ # override diff --git a/tests/test_ir/test_decoder_buffer.py b/tests/test_ir/test_deserializer_buffer.py similarity index 70% rename from tests/test_ir/test_decoder_buffer.py rename to tests/test_ir/test_deserializer_buffer.py index 84c82a65..dde1c409 100644 --- a/tests/test_ir/test_decoder_buffer.py +++ b/tests/test_ir/test_deserializer_buffer.py @@ -6,12 +6,12 @@ from smart_open import open # type: ignore from test_ir.test_utils import TestCLPBase -from clp_ffi_py.ir import DecoderBuffer +from clp_ffi_py.ir import DeserializerBuffer -class TestCaseDecoderBuffer(TestCLPBase): +class TestCaseDeserializerBuffer(TestCLPBase): """ - Class for testing clp_ffi_py.ir.DecoderBuffer. + Class for testing clp_ffi_py.ir.Deserializer. """ input_src_dir: str = "test_data" @@ -22,10 +22,10 @@ def test_buffer_protocol(self) -> None: """ byte_array: bytearray = bytearray(b"Hello, world!") byte_stream: io.BytesIO = io.BytesIO(byte_array) - decoder_buffer: DecoderBuffer = DecoderBuffer(byte_stream) + deserializer_buffer: DeserializerBuffer = DeserializerBuffer(byte_stream) exception_captured: bool = False try: - byte_stream.readinto(decoder_buffer) # type: ignore + byte_stream.readinto(deserializer_buffer) # type: ignore except TypeError: exception_captured = True self.assertTrue( @@ -34,38 +34,38 @@ def test_buffer_protocol(self) -> None: def test_streaming_small_buffer(self) -> None: """ - Tests DecoderBuffer's functionality using the small buffer capacity. + Tests DeserializerBuffer's functionality using the small buffer capacity. """ buffer_capacity: int = 1024 self.__launch_test(buffer_capacity) def test_streaming_default_buffer(self) -> None: """ - Tests DecoderBuffer's functionality using the default buffer capacity. + Tests DeserializerBuffer's functionality using the default buffer capacity. """ self.__launch_test(None) def test_streaming_large_buffer(self) -> None: """ - Tests DecoderBuffer's functionality using the large buffer capacity. + Tests DeserializerBuffer's functionality using the large buffer capacity. """ buffer_capacity: int = 16384 self.__launch_test(buffer_capacity) def __launch_test(self, buffer_capacity: Optional[int]) -> None: """ - Tests the DecoderBuffer by streaming the files inside `test_src_dir`. + Tests the DeserializerBuffer by streaming the files inside `test_src_dir`. :param self - :param buffer_capacity: The buffer capacity used to initialize the decoder buffer. + :param buffer_capacity: The buffer capacity used to initialize the deserializer buffer. """ current_dir: Path = Path(__file__).resolve().parent - test_src_dir: Path = current_dir / TestCaseDecoderBuffer.input_src_dir + test_src_dir: Path = current_dir / TestCaseDeserializerBuffer.input_src_dir for file_path in test_src_dir.rglob("*"): if not file_path.is_file(): continue streaming_result: bytearray - decoder_buffer: DecoderBuffer + deserializer_buffer: DeserializerBuffer random_seed: int # Run against 10 different seeds: for _ in range(10): @@ -73,12 +73,12 @@ def __launch_test(self, buffer_capacity: Optional[int]) -> None: with open(str(file_path), "rb") as istream: try: if None is buffer_capacity: - decoder_buffer = DecoderBuffer(istream) + deserializer_buffer = DeserializerBuffer(istream) else: - decoder_buffer = DecoderBuffer( + deserializer_buffer = DeserializerBuffer( initial_buffer_capacity=buffer_capacity, input_stream=istream ) - streaming_result = decoder_buffer._test_streaming(random_seed) + streaming_result = deserializer_buffer._test_streaming(random_seed) except Exception as e: self.assertFalse( True, f"Error on file {file_path} using seed {random_seed}: {e}" @@ -89,10 +89,10 @@ def __assert_streaming_result( self, file_path: Path, streaming_result: bytearray, random_seed: int ) -> None: """ - Validates the streaming result read by the decoder buffer. + Validates the streaming result read by the deserializer buffer. :param file_path: Input stream file Path. - :param streaming_result: Result of DecoderBuffer `_test_streaming` method. + :param streaming_result: Result of DeserializerBuffer `_test_streaming` method. """ with open(str(file_path), "rb") as istream: ref_result: bytearray = bytearray(istream.read()) diff --git a/tests/test_ir/test_encoder.py b/tests/test_ir/test_encoder.py deleted file mode 100644 index d1badfaf..00000000 --- a/tests/test_ir/test_encoder.py +++ /dev/null @@ -1,41 +0,0 @@ -from test_ir.test_utils import TestCLPBase - -from clp_ffi_py.ir import FourByteEncoder - - -class TestCaseFourByteEncoder(TestCLPBase): - """ - Class for testing clp_ffi_py.ir.FourByteEncoder. - - The actual functionality should also be covered by the unittest of CLP - Python logging library. - TODO: When the decoder is implemented, add some more tests to ensure the - encoded bytes can be successfully decoded to recover the original log event. - """ - - def test_init(self) -> None: - type_error_exception_captured: bool = False - four_byte_encoder: FourByteEncoder - try: - four_byte_encoder = FourByteEncoder() # noqa - except TypeError: - type_error_exception_captured = True - self.assertEqual( - type_error_exception_captured, True, "FourByteEncoder should be non-instantiable." - ) - - def test_encoding_methods_consistency(self) -> None: - """ - This test checks if the result of encode_message_and_timestamp_delta is consistent with the - combination of encode_message and encode_timestamp_delta. - """ - timestamp_delta: int = -3190 - log_message: str = "This is a test message: Do NOT Reply!" - encoded_message_and_ts_delta: bytearray = ( - FourByteEncoder.encode_message_and_timestamp_delta( - timestamp_delta, log_message.encode() - ) - ) - encoded_message: bytearray = FourByteEncoder.encode_message(log_message.encode()) - encoded_ts_delta: bytearray = FourByteEncoder.encode_timestamp_delta(timestamp_delta) - self.assertEqual(encoded_message_and_ts_delta, encoded_message + encoded_ts_delta) diff --git a/tests/test_ir/test_readers.py b/tests/test_ir/test_readers.py index 27a8e803..e7a8ad77 100644 --- a/tests/test_ir/test_readers.py +++ b/tests/test_ir/test_readers.py @@ -1,11 +1,11 @@ from pathlib import Path from typing import List, Optional, Tuple -from test_ir.test_decoder import ( - TestCaseDecoderBase, - TestCaseDecoderTimeRangeQueryBase, - TestCaseDecoderTimeRangeWildcardQueryBase, - TestCaseDecoderWildcardQueryBase, +from test_ir.test_deserializer import ( + TestCaseDeserializerBase, + TestCaseDeserializerTimeRangeQueryBase, + TestCaseDeserializerTimeRangeWildcardQueryBase, + TestCaseDeserializerWildcardQueryBase, ) from test_ir.test_utils import TestCLPBase @@ -37,33 +37,33 @@ def read_log_stream( return metadata, log_events -class TestCaseReaderBase(TestCaseDecoderBase): +class TestCaseReaderBase(TestCaseDeserializerBase): # override - def _decode_log_stream( + def _deserialize_log_stream( self, log_path: Path, query: Optional[Query] ) -> Tuple[Metadata, List[LogEvent]]: return read_log_stream(log_path, query, self.enable_compression) -class TestCaseReaderTimeRangeQueryBase(TestCaseDecoderTimeRangeQueryBase): +class TestCaseReaderTimeRangeQueryBase(TestCaseDeserializerTimeRangeQueryBase): # override - def _decode_log_stream( + def _deserialize_log_stream( self, log_path: Path, query: Optional[Query] ) -> Tuple[Metadata, List[LogEvent]]: return read_log_stream(log_path, query, self.enable_compression) -class TestCaseReaderWildcardQueryBase(TestCaseDecoderWildcardQueryBase): +class TestCaseReaderWildcardQueryBase(TestCaseDeserializerWildcardQueryBase): # override - def _decode_log_stream( + def _deserialize_log_stream( self, log_path: Path, query: Optional[Query] ) -> Tuple[Metadata, List[LogEvent]]: return read_log_stream(log_path, query, self.enable_compression) -class TestCaseReaderTimeRangeWildcardQueryBase(TestCaseDecoderTimeRangeWildcardQueryBase): +class TestCaseReaderTimeRangeWildcardQueryBase(TestCaseDeserializerTimeRangeWildcardQueryBase): # override - def _decode_log_stream( + def _deserialize_log_stream( self, log_path: Path, query: Optional[Query] ) -> Tuple[Metadata, List[LogEvent]]: return read_log_stream(log_path, query, self.enable_compression) @@ -205,7 +205,7 @@ def test_incomplete_ir_stream_error(self) -> None: incomplete_stream_error_captured, "Incomplete Stream Error is not properly set." ) self.assertFalse(other_exception_captured, "No other exception should be set.") - self.assertTrue(0 != log_counter, "No logs are decoded.") + self.assertTrue(0 != log_counter, "No logs are deserialized.") def test_allow_incomplete_ir_stream_error(self) -> None: """ diff --git a/tests/test_ir/test_serializer.py b/tests/test_ir/test_serializer.py new file mode 100644 index 00000000..beb4b92f --- /dev/null +++ b/tests/test_ir/test_serializer.py @@ -0,0 +1,40 @@ +from test_ir.test_utils import TestCLPBase + +from clp_ffi_py.ir import FourByteSerializer + + +class TestCaseFourByteSerializer(TestCLPBase): + """ + Class for testing clp_ffi_py.ir.FourByteSerializer. + + The actual functionality should also be covered by the unittest of CLP Python logging library. + """ + + def test_init(self) -> None: + type_error_exception_captured: bool = False + four_byte_serializer: FourByteSerializer + try: + four_byte_serializer = FourByteSerializer() # noqa + except TypeError: + type_error_exception_captured = True + self.assertEqual( + type_error_exception_captured, True, "FourByteSerializer should be non-instantiable." + ) + + def test_serialization_methods_consistency(self) -> None: + """ + This test checks if the result of serialize_message_and_timestamp_delta is consistent with + the combination of serialize_message and serialize_timestamp_delta. + """ + timestamp_delta: int = -3190 + log_message: str = "This is a test message: Do NOT Reply!" + serialized_message_and_ts_delta: bytearray = ( + FourByteSerializer.serialize_message_and_timestamp_delta( + timestamp_delta, log_message.encode() + ) + ) + serialized_message: bytearray = FourByteSerializer.serialize_message(log_message.encode()) + serialized_ts_delta: bytearray = FourByteSerializer.serialize_timestamp_delta( + timestamp_delta + ) + self.assertEqual(serialized_message_and_ts_delta, serialized_message + serialized_ts_delta)