Skip to content

Commit

Permalink
feat: Add support for key-value pair IR format serialization and dese…
Browse files Browse the repository at this point in the history
…rialization. (#86)

Co-authored-by: kirkrodrigues <[email protected]>
  • Loading branch information
LinZhihao-723 and kirkrodrigues authored Nov 16, 2024
1 parent 81ecf85 commit 2b8c23f
Show file tree
Hide file tree
Showing 45 changed files with 3,581 additions and 50 deletions.
4 changes: 1 addition & 3 deletions .clang-format
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,7 @@ IncludeCategories:
- Regex: "^<clp_ffi_py"
Priority: 4
# Third-party headers. Update when adding new third-party library
- Regex: "^<(clp)"
Priority: 3
- Regex: "<(json)"
- Regex: "<(clp|gsl|json|msgpack|outcome)"
Priority: 3
# C headers
- Regex: "^<.*.h>$"
Expand Down
10 changes: 8 additions & 2 deletions .github/PULL_REQUEST_TEMPLATE.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# References
<!-- Any issues or pull requests relevant to this pull request -->
<!--
Set the PR title to a meaningful commit message that:
- follows the Conventional Commits specification (https://www.conventionalcommits.org).
- is in imperative form.
Example:
fix: Don't add implicit wildcards ('*') at the beginning and the end of a query (fixes #390).
-->

# Description
<!-- Describe what this request will change/fix and provide any details necessary for reviewers -->
Expand Down
23 changes: 23 additions & 0 deletions .github/workflows/pr-title-checks.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
name: "pr-title-checks"

on:
pull_request_target:
types: ["edited", "opened", "reopened"]
branches: ["main"]

permissions:
pull-requests: "read"

concurrency:
group: "${{github.workflow}}-${{github.ref}}"

# Cancel in-progress jobs for efficiency
cancel-in-progress: true

jobs:
conventional-commits:
runs-on: "ubuntu-latest"
steps:
- uses: "amannn/action-semantic-pull-request@v5"
env:
GITHUB_TOKEN: "${{secrets.GITHUB_TOKEN}}"
6 changes: 6 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
[submodule "src/clp"]
path = src/clp
url = https://github.com/y-scope/clp.git
[submodule "src/msgpack"]
path = src/msgpack
url = https://github.com/msgpack/msgpack-c
[submodule "src/GSL"]
path = src/GSL
url = https://github.com/microsoft/GSL.git
38 changes: 37 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ if(DEFINED SKBUILD_PROJECT_NAME)
"The version of the project parsed by scikit-build-core."
FORCE
)
set(CLP_FFI_PY_ENABLE_LINTING OFF)
else()
set(CLP_FFI_PY_INSTALL_LIBS OFF CACHE BOOL "Disable installing the built libraries." FORCE)
set(CLP_FFI_PY_PROJECT_NAME "clp-ffi-py" CACHE STRING "Use a placeholder project name." FORCE)
Expand All @@ -28,6 +29,7 @@ else()
"Enable/Disable output of compile commands during generation."
FORCE
)
set(CLP_FFI_PY_ENABLE_LINTING ON)
endif()

project(${CLP_FFI_PY_PROJECT_NAME} LANGUAGES CXX VERSION ${CLP_FFI_PY_VERSION})
Expand Down Expand Up @@ -57,48 +59,72 @@ set(CLP_FFI_PY_SRC_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src")
set(CLP_FFI_PY_LIB_SRC_DIR "${CLP_FFI_PY_SRC_DIR}/clp_ffi_py")
set(CLP_FFI_PY_CLP_CORE_DIR "${CLP_FFI_PY_SRC_DIR}/clp/components/core")

# Add CLP's string_utils
add_subdirectory(${CLP_FFI_PY_CLP_CORE_DIR}/src/clp/string_utils)

# Add GSL
add_subdirectory(${CLP_FFI_PY_SRC_DIR}/GSL)

# Add msgpack
set(MSGPACK_CXX20 ON CACHE BOOL "Enable C++20 in msgpack" FORCE)
set(MSGPACK_USE_BOOST OFF CACHE BOOL "Disable Boost in msgpack" FORCE)
add_subdirectory(${CLP_FFI_PY_SRC_DIR}/msgpack EXCLUDE_FROM_ALL)

# NOTE: We don't add headers here since CLP core is technically a library we're using, not a part of
# this project.
set(CLP_FFI_PY_CLP_CORE_SOURCES
${CLP_FFI_PY_CLP_CORE_DIR}/src/clp/BufferReader.cpp
${CLP_FFI_PY_CLP_CORE_DIR}/src/clp/ffi/ir_stream/decoding_methods.cpp
${CLP_FFI_PY_CLP_CORE_DIR}/src/clp/ffi/ir_stream/encoding_methods.cpp
${CLP_FFI_PY_CLP_CORE_DIR}/src/clp/ffi/ir_stream/ir_unit_deserialization_methods.cpp
${CLP_FFI_PY_CLP_CORE_DIR}/src/clp/ffi/ir_stream/Serializer.cpp
${CLP_FFI_PY_CLP_CORE_DIR}/src/clp/ffi/ir_stream/utils.cpp
${CLP_FFI_PY_CLP_CORE_DIR}/src/clp/ffi/encoding_methods.cpp
${CLP_FFI_PY_CLP_CORE_DIR}/src/clp/ffi/SchemaTree.cpp
${CLP_FFI_PY_CLP_CORE_DIR}/src/clp/ffi/KeyValuePairLogEvent.cpp
${CLP_FFI_PY_CLP_CORE_DIR}/src/clp/ir/EncodedTextAst.cpp
${CLP_FFI_PY_CLP_CORE_DIR}/src/clp/ir/parsing.cpp
${CLP_FFI_PY_CLP_CORE_DIR}/src/clp/ReaderInterface.cpp
)

# NOTE: We include headers to ensure IDEs like CLion load the project properly.
set(CLP_FFI_PY_LIB_IR_SOURCES
${CLP_FFI_PY_LIB_SRC_DIR}/api_decoration.hpp
${CLP_FFI_PY_LIB_SRC_DIR}/error_messages.hpp
${CLP_FFI_PY_LIB_SRC_DIR}/ExceptionFFI.hpp
${CLP_FFI_PY_LIB_SRC_DIR}/ir/native/DeserializerBufferReader.cpp
${CLP_FFI_PY_LIB_SRC_DIR}/ir/native/DeserializerBufferReader.hpp
${CLP_FFI_PY_LIB_SRC_DIR}/ir/native/deserialization_methods.cpp
${CLP_FFI_PY_LIB_SRC_DIR}/ir/native/deserialization_methods.hpp
${CLP_FFI_PY_LIB_SRC_DIR}/ir/native/LogEvent.hpp
${CLP_FFI_PY_LIB_SRC_DIR}/ir/native/Metadata.cpp
${CLP_FFI_PY_LIB_SRC_DIR}/ir/native/Metadata.hpp
${CLP_FFI_PY_LIB_SRC_DIR}/ir/native/PyDeserializer.cpp
${CLP_FFI_PY_LIB_SRC_DIR}/ir/native/PyDeserializer.hpp
${CLP_FFI_PY_LIB_SRC_DIR}/ir/native/PyDeserializerBuffer.cpp
${CLP_FFI_PY_LIB_SRC_DIR}/ir/native/PyDeserializerBuffer.hpp
${CLP_FFI_PY_LIB_SRC_DIR}/ir/native/PyFourByteDeserializer.cpp
${CLP_FFI_PY_LIB_SRC_DIR}/ir/native/PyFourByteDeserializer.hpp
${CLP_FFI_PY_LIB_SRC_DIR}/ir/native/PyFourByteSerializer.cpp
${CLP_FFI_PY_LIB_SRC_DIR}/ir/native/PyFourByteSerializer.hpp
${CLP_FFI_PY_LIB_SRC_DIR}/ir/native/PyKeyValuePairLogEvent.cpp
${CLP_FFI_PY_LIB_SRC_DIR}/ir/native/PyKeyValuePairLogEvent.hpp
${CLP_FFI_PY_LIB_SRC_DIR}/ir/native/PyLogEvent.cpp
${CLP_FFI_PY_LIB_SRC_DIR}/ir/native/PyLogEvent.hpp
${CLP_FFI_PY_LIB_SRC_DIR}/ir/native/PyMetadata.cpp
${CLP_FFI_PY_LIB_SRC_DIR}/ir/native/PyMetadata.hpp
${CLP_FFI_PY_LIB_SRC_DIR}/ir/native/PyQuery.cpp
${CLP_FFI_PY_LIB_SRC_DIR}/ir/native/PyQuery.hpp
${CLP_FFI_PY_LIB_SRC_DIR}/ir/native/PySerializer.cpp
${CLP_FFI_PY_LIB_SRC_DIR}/ir/native/PySerializer.hpp
${CLP_FFI_PY_LIB_SRC_DIR}/ir/native/Query.cpp
${CLP_FFI_PY_LIB_SRC_DIR}/ir/native/Query.hpp
${CLP_FFI_PY_LIB_SRC_DIR}/ir/native/serialization_methods.cpp
${CLP_FFI_PY_LIB_SRC_DIR}/ir/native/serialization_methods.hpp
${CLP_FFI_PY_LIB_SRC_DIR}/modules/ir_native.cpp
${CLP_FFI_PY_LIB_SRC_DIR}/Py_utils.cpp
${CLP_FFI_PY_LIB_SRC_DIR}/Py_utils.hpp
${CLP_FFI_PY_LIB_SRC_DIR}/PyExceptionContext.hpp
${CLP_FFI_PY_LIB_SRC_DIR}/PyObjectCast.hpp
${CLP_FFI_PY_LIB_SRC_DIR}/PyObjectUtils.hpp
${CLP_FFI_PY_LIB_SRC_DIR}/Python.hpp
Expand All @@ -125,7 +151,17 @@ target_include_directories(

target_include_directories(${CLP_FFI_PY_LIB_IR} PRIVATE ${CLP_FFI_PY_SRC_DIR})

target_link_libraries(${CLP_FFI_PY_LIB_IR} PRIVATE clp::string_utils)
if(CLP_FFI_PY_ENABLE_LINTING)
target_compile_definitions(${CLP_FFI_PY_LIB_IR} PRIVATE CLP_FFI_PY_ENABLE_LINTING)
endif()

target_link_libraries(
${CLP_FFI_PY_LIB_IR}
PRIVATE
clp::string_utils
Microsoft.GSL::GSL
msgpack-cxx
)

if(CLP_FFI_PY_INSTALL_LIBS)
install(
Expand Down
3 changes: 3 additions & 0 deletions clp_ffi_py/ir/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,18 @@
__all__: List[str] = [
"Decoder", # native_deprecated
"DecoderBuffer", # native_deprecated
"Deserializer", # native
"DeserializerBuffer", # native
"FourByteDeserializer", # native
"FourByteEncoder", # native_deprecated
"FourByteSerializer", # native
"IncompleteStreamError", # native
"KeyValuePairLogEvent", # native
"LogEvent", # native
"Metadata", # native
"Query", # native
"QueryBuilder", # query_builder
"Serializer", # native
"ClpIrFileReader", # readers
"ClpIrStreamReader", # readers
]
32 changes: 31 additions & 1 deletion clp_ffi_py/ir/native.pyi
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
from __future__ import annotations

from datetime import tzinfo
from typing import Any, Dict, IO, List, Optional
from types import TracebackType
from typing import Any, Dict, IO, List, Optional, Type

from clp_ffi_py.wildcard_query import WildcardQuery

Expand Down Expand Up @@ -82,4 +85,31 @@ class FourByteDeserializer:
allow_incomplete_stream: bool = False,
) -> Optional[LogEvent]: ...

class KeyValuePairLogEvent:
def __init__(self, dictionary: Dict[Any, Any]): ...
def to_dict(self) -> Dict[Any, Any]: ...

class Serializer:
def __init__(self, output_stream: IO[bytes], buffer_size_limit: int = 65536): ...
def __enter__(self) -> Serializer: ...
def __exit__(
self,
exc_type: Optional[Type[BaseException]],
exc_value: Optional[BaseException],
traceback: Optional[TracebackType],
) -> None: ...
def serialize_log_event_from_msgpack_map(self, msgpack_map: bytes) -> int: ...
def get_num_bytes_serialized(self) -> int: ...
def flush(self) -> None: ...
def close(self) -> None: ...

class Deserializer:
def __init__(
self,
input_stream: IO[bytes],
buffer_capacity: int = 65536,
allow_incomplete_stream: bool = False,
): ...
def deserialize_log_event(self) -> Optional[KeyValuePairLogEvent]: ...

class IncompleteStreamError(Exception): ...
11 changes: 11 additions & 0 deletions clp_ffi_py/utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
from datetime import datetime, tzinfo
from typing import Any, Dict, Optional

Expand Down Expand Up @@ -45,3 +46,13 @@ def serialize_dict_to_msgpack(dictionary: Dict[Any, Any]) -> bytes:
if not isinstance(dictionary, dict):
raise TypeError("The type of the input object must be a dictionary.")
return msgpack.packb(dictionary)


def parse_json_str(json_str: str) -> Any:
"""
Wrapper of `json.loads`, which parses a JSON string into a Python object.
:param json_str: The JSON string to parse.
:return: The parsed JSON object.
"""
return json.loads(json_str)
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ exclude = [
"clp_ffi_py/ir/__init__.py"
]
line-length = 100
src = ["."]

[tool.ruff.lint]
select = ["E", "I", "F"]
Expand Down
1 change: 1 addition & 0 deletions src/GSL
Submodule GSL added at 87f9d7
14 changes: 12 additions & 2 deletions src/clp_ffi_py/ExceptionFFI.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,18 @@
#define CLP_FFI_PY_EXCEPTION_FFI

#include <string>
#include <utility>

#include <clp/ErrorCode.hpp>
#include <clp/TraceableException.hpp>

#include <clp_ffi_py/PyExceptionContext.hpp>

namespace clp_ffi_py {
/**
* A class that represents a traceable exception during the native code execution. Note: for
* exceptions of CPython execution, please use CPython interface to set the exception instead.
* A class that represents a traceable exception during the native code execution. It captures any
* Python exceptions set, allowing the handler at the catch site to either restore or discard the
* exception as needed.
*/
class ExceptionFFI : public clp::TraceableException {
public:
Expand All @@ -23,8 +28,13 @@ class ExceptionFFI : public clp::TraceableException {

[[nodiscard]] auto what() const noexcept -> char const* override { return m_message.c_str(); }

[[nodiscard]] auto get_py_exception_context() -> PyExceptionContext& {
return m_py_exception_context;
}

private:
std::string m_message;
PyExceptionContext m_py_exception_context;
};
} // namespace clp_ffi_py

Expand Down
70 changes: 70 additions & 0 deletions src/clp_ffi_py/PyExceptionContext.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#ifndef CLP_FFI_PY_PYEXCEPTIONCONTEXT_HPP
#define CLP_FFI_PY_PYEXCEPTIONCONTEXT_HPP

#include <clp_ffi_py/Python.hpp> // Must always be included before any other header files

namespace clp_ffi_py {
/**
* Class to get/set Python exception context, designed to capture the current exception state upon
* instantiation and provide a restore API that allows users to reinstate the exception context when
* needed. Doc: https://docs.python.org/3/c-api/exceptions.html#c.PyErr_Fetch
*/
class PyExceptionContext {
public:
// Constructor
/**
* Constructs the context by fetching the current raised exception (if any).
*/
PyExceptionContext() { PyErr_Fetch(&m_type, &m_value, &m_traceback); }

// Delete copy/move constructors and assignments
PyExceptionContext(PyExceptionContext const&) = delete;
PyExceptionContext(PyExceptionContext&&) = delete;
auto operator=(PyExceptionContext const&) -> PyExceptionContext& = delete;
auto operator=(PyExceptionContext&&) -> PyExceptionContext& = delete;

// Destructor
~PyExceptionContext() {
Py_XDECREF(m_type);
Py_XDECREF(m_value);
Py_XDECREF(m_traceback);
}

// Methods
/**
* @return Whether the context stores an exception.
*/
[[nodiscard]] auto has_exception() const noexcept -> bool { return nullptr != m_value; }

/**
* Restores the exception from the context.
* NOTE:
* - This method will clear the existing exception if one is set.
* - The stored context will be cleared after restoration.
* - If there's no exception in the stored context, the error indicator will be cleared.
* - This method should be called strictly once, otherwise the error indicator will be cleared.
* @return Whether an exception has been set by restoring the context.
*/
[[maybe_unused]] auto restore() noexcept -> bool {
auto const exception_has_been_set{has_exception()};
PyErr_Restore(m_type, m_value, m_traceback);
m_type = nullptr;
m_value = nullptr;
m_traceback = nullptr;
return exception_has_been_set;
}

[[nodiscard]] auto get_type() const -> PyObject* { return m_type; }

[[nodiscard]] auto get_value() const -> PyObject* { return m_value; }

[[nodiscard]] auto get_traceback() const -> PyObject* { return m_traceback; }

private:
PyObject* m_type{nullptr};
PyObject* m_value{nullptr};
PyObject* m_traceback{nullptr};
};
} // namespace clp_ffi_py

#endif // CLP_FFI_PY_PYEXCEPTIONCONTEXT_HPP
8 changes: 8 additions & 0 deletions src/clp_ffi_py/PyObjectCast.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,18 +112,26 @@ auto py_reinterpret_cast(Src* src) noexcept -> Dst* {
}

namespace ir::native {
class PyDeserializer;
class PyDeserializerBuffer;
class PyFourByteDeserializer;
class PyKeyValuePairLogEvent;
class PyLogEvent;
class PyMetadata;
class PyQuery;
class PySerializer;
} // namespace ir::native

CLP_FFI_PY_MARK_AS_PYOBJECT(ir::native::PyDeserializer);
CLP_FFI_PY_MARK_AS_PYOBJECT(ir::native::PyDeserializerBuffer);
CLP_FFI_PY_MARK_AS_PYOBJECT(ir::native::PyFourByteDeserializer);
CLP_FFI_PY_MARK_AS_PYOBJECT(ir::native::PyKeyValuePairLogEvent);
CLP_FFI_PY_MARK_AS_PYOBJECT(ir::native::PyLogEvent);
CLP_FFI_PY_MARK_AS_PYOBJECT(ir::native::PyMetadata);
CLP_FFI_PY_MARK_AS_PYOBJECT(ir::native::PyQuery);
CLP_FFI_PY_MARK_AS_PYOBJECT(ir::native::PySerializer);
CLP_FFI_PY_MARK_AS_PYOBJECT(PyBytesObject);
CLP_FFI_PY_MARK_AS_PYOBJECT(PyDictObject);
CLP_FFI_PY_MARK_AS_PYOBJECT(PyTypeObject);
} // namespace clp_ffi_py

Expand Down
Loading

0 comments on commit 2b8c23f

Please sign in to comment.