From dba3db2b41c296623ab9370eda48022daf35ea59 Mon Sep 17 00:00:00 2001 From: Lin Zhihao <59785146+LinZhihao-723@users.noreply.github.com> Date: Mon, 10 Jun 2024 20:45:34 -0400 Subject: [PATCH] Add more explicit interface to differentiate substring and full-string wildcard queries. (#62) --- README.md | 11 +++- clp_ffi_py/ir/query_builder.py | 49 +++++++++++++-- clp_ffi_py/wildcard_query.py | 57 ++++++++++++++++- pyproject.toml | 1 + requirements-dev.txt | 1 + src/clp_ffi_py/ir/native/PyQuery.cpp | 16 ++++- src/clp_ffi_py/ir/native/PyQuery.hpp | 7 +++ tests/test_ir/test_query.py | 39 +++++++++--- tests/test_ir/test_query_builder.py | 91 ++++++++++++++++++++++++---- 9 files changed, 240 insertions(+), 32 deletions(-) diff --git a/README.md b/README.md index b57efe0e..416dc984 100644 --- a/README.md +++ b/README.md @@ -141,13 +141,16 @@ from pathlib import Path from typing import List, Tuple from clp_ffi_py.ir import ClpIrFileReader, Query, QueryBuilder +from clp_ffi_py.wildcard_query import FullStringWildcardQuery, SubstringWildcardQuery # Create a QueryBuilder object to build the search query. query_builder: QueryBuilder = QueryBuilder() # Add wildcard patterns to filter log messages: -query_builder.add_wildcard_query("*uid=*,status=failed*") -query_builder.add_wildcard_query("*UID=*,Status=KILLED*", case_sensitive=True) +query_builder.add_wildcard_query(SubstringWildcardQuery("uid=*,status=failed")) +query_builder.add_wildcard_query( + FullStringWildcardQuery("*UID=*,Status=KILLED*", case_sensitive=True) +) # Initialize a Query object using the builder: wildcard_search_query: Query = query_builder.build() @@ -169,10 +172,12 @@ details, use the following code to access the related docstring. ```python from clp_ffi_py.ir import Query, QueryBuilder -from clp_ffi_py import WildcardQuery +from clp_ffi_py import FullStringWildcardQuery, SubstringWildcardQuery, WildcardQuery help(Query) help(QueryBuilder) help(WildcardQuery) +help(FullStringWildcardQuery) +help(SubstringWildcardQuery) ``` ### Streaming Decode/Search Directly from S3 Remote Storage diff --git a/clp_ffi_py/ir/query_builder.py b/clp_ffi_py/ir/query_builder.py index ecce0416..5b216bda 100644 --- a/clp_ffi_py/ir/query_builder.py +++ b/clp_ffi_py/ir/query_builder.py @@ -1,10 +1,17 @@ from __future__ import annotations +import warnings from copy import deepcopy -from typing import List, Optional +from typing import List, Optional, overload, Union + +from deprecated.sphinx import deprecated from clp_ffi_py.ir.native import Query -from clp_ffi_py.wildcard_query import WildcardQuery +from clp_ffi_py.wildcard_query import FullStringWildcardQuery, WildcardQuery + +_add_wildcard_query_deprecation_warning_message: str = "The wildcard query must be explicitly " +"created and passed as a parameter to this function. QueryBuilder should only accept instances of " +"`clp_ffi_py.wildcard_query.WildcardQuery`." class QueryBuilderException(Exception): @@ -78,6 +85,11 @@ def set_search_time_termination_margin(self, ts: int) -> QueryBuilder: self._search_time_termination_margin = ts return self + @overload + @deprecated( + version="0.0.12", + reason=_add_wildcard_query_deprecation_warning_message, + ) def add_wildcard_query(self, wildcard_query: str, case_sensitive: bool = False) -> QueryBuilder: """ Constructs and adds a :class:`~clp_ffi_py.wildcard_query.WildcardQuery` @@ -87,8 +99,37 @@ def add_wildcard_query(self, wildcard_query: str, case_sensitive: bool = False) :param case_sensitive: Whether to perform case-sensitive matching. :return: self. """ - self._wildcard_queries.append(WildcardQuery(wildcard_query, case_sensitive)) - return self + ... + + @overload + def add_wildcard_query(self, wildcard_query: WildcardQuery) -> QueryBuilder: + """ + Adds the given wildcard query to the wildcard query list. + + :param wildcard_query: The wildcard query to add. It can be any derived + class of :class:`~clp_ffi_py.wildcard_query.WildcardQuery`. + :return: self. + """ + ... + + def add_wildcard_query( + self, wildcard_query: Union[str, WildcardQuery], case_sensitive: bool = False + ) -> QueryBuilder: + """ + This method is the implementation of `add_wildcard_query`. + """ + if isinstance(wildcard_query, WildcardQuery): + self._wildcard_queries.append(wildcard_query) + return self + elif isinstance(wildcard_query, str): + warnings.warn( + _add_wildcard_query_deprecation_warning_message, + DeprecationWarning, + ) + self._wildcard_queries.append(FullStringWildcardQuery(wildcard_query, case_sensitive)) + return self + + raise NotImplementedError def add_wildcard_queries(self, wildcard_queries: List[WildcardQuery]) -> QueryBuilder: """ diff --git a/clp_ffi_py/wildcard_query.py b/clp_ffi_py/wildcard_query.py index 973add93..f6737bce 100644 --- a/clp_ffi_py/wildcard_query.py +++ b/clp_ffi_py/wildcard_query.py @@ -1,7 +1,12 @@ +import warnings + +from deprecated.sphinx import deprecated + + class WildcardQuery: """ - This class defines a wildcard query, which includes a wildcard string and a - boolean value to indicate if the match is case-sensitive. + This class defines an abstract wildcard query. It includes a wildcard string + and a boolean value to indicate if the match is case-sensitive. A wildcard string may contain the following types of supported wildcards: @@ -12,6 +17,13 @@ class WildcardQuery: Other characters which are escaped are treated as normal characters. """ + @deprecated( + version="0.0.12", + reason="`clp_ffi_py.wildcard_query.WildcardQuery` is supposed to be an abstract class and" + " should not be used directly. To create a wildcard query, please explicit instantiate" + " `clp_ffi_py.wildcard_query.SubstringWildcardQuery` or" + " `clp_ffi_py.wildcard_query.FullStringWildcardQuery`.", + ) def __init__(self, wildcard_query: str, case_sensitive: bool = False): """ Initializes a wildcard query using the given parameters. @@ -27,7 +39,7 @@ def __str__(self) -> str: :return: The string representation of the WildcardQuery object. """ return ( - f'WildcardQuery(wildcard_query="{self._wildcard_query}",' + f'{self.__class__.__name__}(wildcard_query="{self._wildcard_query}",' f" case_sensitive={self._case_sensitive})" ) @@ -44,3 +56,42 @@ def wildcard_query(self) -> str: @property def case_sensitive(self) -> bool: return self._case_sensitive + + +class SubstringWildcardQuery(WildcardQuery): + """ + This class defines a substring wildcard query. + + It is derived from + :class:`~clp_ffi_py.WildcardQuery`, adding both a prefix and a postfix + wildcard ("*") to the input wildcard string. This allows the query to match + any substring within a log message. + """ + + def __init__(self, substring_wildcard_query: str, case_sensitive: bool = False): + """ + Initializes a substring wildcard query using the given parameters. + + :param substring_wildcard_query: Wildcard query string. + :param case_sensitive: Case sensitive indicator. + """ + substring_wildcard_query = "*" + substring_wildcard_query + "*" + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + super().__init__(substring_wildcard_query, case_sensitive) + + +class FullStringWildcardQuery(WildcardQuery): + """ + This class defines a full string wildcard query. + + It is derived from + :class:`~clp_ffi_py.WildcardQuery`, and uses the input wildcard string + directly to create the query. This ensures that the query matches only the + entire log message. + """ + + def __init__(self, full_string_wildcard_query: str, case_sensitive: bool = False): + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + super().__init__(full_string_wildcard_query, case_sensitive) diff --git a/pyproject.toml b/pyproject.toml index 4c9c8a47..4c4d0ffa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,6 +17,7 @@ description = "Python interface to the CLP Core Features through CLP's FFI" readme = "README.md" requires-python = ">=3.7" dependencies = [ + "Deprecated >= 1.2.14", "python-dateutil >= 2.7.0", "typing-extensions >= 4.1.1", "zstandard >= 0.18.0", diff --git a/requirements-dev.txt b/requirements-dev.txt index e1988f5a..6a56be30 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -8,5 +8,6 @@ mypy-extensions>=1.0.0 packaging>=21.3 ruff>=0.4.6 smart_open==6.4.0 +types-Deprecated>=1.2.9 types-python-dateutil>=2.8 zstandard>=0.18.0 diff --git a/src/clp_ffi_py/ir/native/PyQuery.cpp b/src/clp_ffi_py/ir/native/PyQuery.cpp index 0830f404..733cf04b 100644 --- a/src/clp_ffi_py/ir/native/PyQuery.cpp +++ b/src/clp_ffi_py/ir/native/PyQuery.cpp @@ -109,7 +109,7 @@ auto serialize_wildcard_queries(std::vector const& wildcard_queri PyObjectPtr const is_case_sensitive{get_py_bool(wildcard_query.is_case_sensitive() )}; PyObject* py_wildcard_query{PyObject_CallFunction( - PyQuery::get_py_wildcard_query_type(), + PyQuery::get_py_full_string_wildcard_query_type(), "OO", wildcard_py_str, is_case_sensitive.get() @@ -644,6 +644,7 @@ auto PyQuery::init( PyObjectStaticPtr PyQuery::m_py_type{nullptr}; PyObjectStaticPtr PyQuery::m_py_wildcard_query_type{nullptr}; +PyObjectStaticPtr PyQuery::m_py_full_string_wildcard_query_type{nullptr}; auto PyQuery::get_py_type() -> PyTypeObject* { return m_py_type.get(); @@ -653,6 +654,10 @@ auto PyQuery::get_py_wildcard_query_type() -> PyObject* { return m_py_wildcard_query_type.get(); } +auto PyQuery::get_py_full_string_wildcard_query_type() -> PyObject* { + return m_py_full_string_wildcard_query_type.get(); +} + auto PyQuery::module_level_init(PyObject* py_module) -> bool { static_assert(std::is_trivially_destructible()); auto* type{py_reinterpret_cast(PyType_FromSpec(&PyQuery_type_spec))}; @@ -669,11 +674,18 @@ auto PyQuery::module_level_init(PyObject* py_module) -> bool { if (nullptr == py_query) { return false; } - auto* py_wildcard_query_type = PyObject_GetAttrString(py_query, "WildcardQuery"); + auto* py_wildcard_query_type{PyObject_GetAttrString(py_query, "WildcardQuery")}; if (nullptr == py_wildcard_query_type) { return false; } m_py_wildcard_query_type.reset(py_wildcard_query_type); + auto* py_full_string_wildcard_query_type{ + PyObject_GetAttrString(py_query, "FullStringWildcardQuery") + }; + if (nullptr == py_full_string_wildcard_query_type) { + return false; + } + m_py_full_string_wildcard_query_type.reset(py_full_string_wildcard_query_type); return true; } } // namespace clp_ffi_py::ir::native diff --git a/src/clp_ffi_py/ir/native/PyQuery.hpp b/src/clp_ffi_py/ir/native/PyQuery.hpp index 8679429a..8cbde3d7 100644 --- a/src/clp_ffi_py/ir/native/PyQuery.hpp +++ b/src/clp_ffi_py/ir/native/PyQuery.hpp @@ -76,12 +76,19 @@ class PyQuery { */ [[nodiscard]] static auto get_py_wildcard_query_type() -> PyObject*; + /** + * @return PyObject that represents the Python level class + * `FullStringWildcardQuery`. + */ + [[nodiscard]] static auto get_py_full_string_wildcard_query_type() -> PyObject*; + private: PyObject_HEAD; Query* m_query; static PyObjectStaticPtr m_py_type; static PyObjectStaticPtr m_py_wildcard_query_type; + static PyObjectStaticPtr m_py_full_string_wildcard_query_type; }; } // namespace clp_ffi_py::ir::native #endif diff --git a/tests/test_ir/test_query.py b/tests/test_ir/test_query.py index 09bd603a..6be51f38 100644 --- a/tests/test_ir/test_query.py +++ b/tests/test_ir/test_query.py @@ -7,7 +7,7 @@ LogEvent, Query, ) -from clp_ffi_py.wildcard_query import WildcardQuery +from clp_ffi_py.wildcard_query import FullStringWildcardQuery, SubstringWildcardQuery, WildcardQuery class TestCaseWildcardQuery(TestCLPBase): @@ -20,20 +20,41 @@ def test_init(self) -> None: Test the initialization of WildcardQuery object. """ wildcard_string: str + expected_wildcard_string: str wildcard_query: WildcardQuery wildcard_string = "Are you the lord of *Pleiades*?" - wildcard_query = WildcardQuery(wildcard_string) - self._check_wildcard_query(wildcard_query, wildcard_string, False) + expected_wildcard_string = wildcard_string - wildcard_query = WildcardQuery(wildcard_string, True) - self._check_wildcard_query(wildcard_query, wildcard_string, True) + wildcard_query = FullStringWildcardQuery(wildcard_string) + self._check_wildcard_query(wildcard_query, expected_wildcard_string, False) - wildcard_query = WildcardQuery(wildcard_string, case_sensitive=True) - self._check_wildcard_query(wildcard_query, wildcard_string, True) + wildcard_query = FullStringWildcardQuery(wildcard_string, True) + self._check_wildcard_query(wildcard_query, expected_wildcard_string, True) - wildcard_query = WildcardQuery(case_sensitive=True, wildcard_query=wildcard_string) - self._check_wildcard_query(wildcard_query, wildcard_string, True) + wildcard_query = FullStringWildcardQuery(wildcard_string, case_sensitive=True) + self._check_wildcard_query(wildcard_query, expected_wildcard_string, True) + + wildcard_query = FullStringWildcardQuery( + case_sensitive=True, full_string_wildcard_query=wildcard_string + ) + self._check_wildcard_query(wildcard_query, expected_wildcard_string, True) + + expected_wildcard_string = "*" + wildcard_string + "*" + + wildcard_query = SubstringWildcardQuery(wildcard_string) + self._check_wildcard_query(wildcard_query, expected_wildcard_string, False) + + wildcard_query = SubstringWildcardQuery(wildcard_string, True) + self._check_wildcard_query(wildcard_query, expected_wildcard_string, True) + + wildcard_query = SubstringWildcardQuery(wildcard_string, case_sensitive=True) + self._check_wildcard_query(wildcard_query, expected_wildcard_string, True) + + wildcard_query = SubstringWildcardQuery( + case_sensitive=True, substring_wildcard_query=wildcard_string + ) + self._check_wildcard_query(wildcard_query, expected_wildcard_string, True) class TestCaseQuery(TestCLPBase): diff --git a/tests/test_ir/test_query_builder.py b/tests/test_ir/test_query_builder.py index 086c4e71..f6ddf365 100644 --- a/tests/test_ir/test_query_builder.py +++ b/tests/test_ir/test_query_builder.py @@ -1,3 +1,4 @@ +import warnings from typing import List, Optional from test_ir.test_utils import TestCLPBase @@ -7,7 +8,7 @@ QueryBuilder, QueryBuilderException, ) -from clp_ffi_py.wildcard_query import WildcardQuery +from clp_ffi_py.wildcard_query import FullStringWildcardQuery, SubstringWildcardQuery, WildcardQuery class TestCaseQueryBuilder(TestCLPBase): @@ -64,7 +65,7 @@ def test_init(self) -> None: attributes_exception_captured, "search time termination margin should be read-only" ) - query_builder.wildcard_queries.append(WildcardQuery("")) + query_builder.wildcard_queries.append(FullStringWildcardQuery("")) self.assertEqual( len(query_builder.wildcard_queries), 0, "The query list should be size of 0" ) @@ -112,12 +113,16 @@ def test_set_value(self) -> None: search_time_termination_margin, ) - wildcard_queries = [WildcardQuery("aaa*aaa"), WildcardQuery("bbb*bbb", True)] + wildcard_queries = [ + FullStringWildcardQuery("aaa*aaa"), + SubstringWildcardQuery("bbb*bbb", True), + ] for wildcard_query in wildcard_queries: - query_builder.add_wildcard_query( - wildcard_query.wildcard_query, wildcard_query.case_sensitive - ) - extra_wildcard_queries = [WildcardQuery("ccc?ccc", True), WildcardQuery("ddd?ddd")] + query_builder.add_wildcard_query(wildcard_query) + extra_wildcard_queries: List[WildcardQuery] = [ + FullStringWildcardQuery("ccc?ccc", True), + SubstringWildcardQuery("ddd?ddd"), + ] query_builder.add_wildcard_queries(extra_wildcard_queries) wildcard_queries.extend(extra_wildcard_queries) self._check_query( @@ -145,8 +150,8 @@ def test_set_value(self) -> None: wildcard_query_string: str = "eee*?*eee" query_builder.set_search_time_termination_margin( search_time_termination_margin - ).add_wildcard_query(wildcard_query_string) - wildcard_queries.append(WildcardQuery(wildcard_query_string)) + ).add_wildcard_query(wildcard_query=SubstringWildcardQuery(wildcard_query_string)) + wildcard_queries.append(SubstringWildcardQuery(wildcard_query_string)) self._check_query( query_builder.build(), search_time_lower_bound, @@ -175,8 +180,10 @@ def test_set_value(self) -> None: ) query_builder.reset_wildcard_queries() - query_builder.add_wildcard_query(wildcard_query_string) - wildcard_queries = [WildcardQuery(wildcard_query_string)] + query_builder.add_wildcard_query( + wildcard_query=FullStringWildcardQuery(wildcard_query_string) + ) + wildcard_queries = [FullStringWildcardQuery(wildcard_query_string)] self._check_query( query_builder.build(), search_time_lower_bound, @@ -216,3 +223,65 @@ def test_exception(self) -> None: "QueryBuilderException is not triggered when the search time lower bound exceeds the" " search time upper bound", ) + + def test_deprecated(self) -> None: + """ + Tests deprecated methods to ensure they are still functionally correct, + and the deprecation warnings are properly captured. + """ + query_builder: QueryBuilder = QueryBuilder() + wildcard_query_strings: List[str] = ["aaa", "bbb", "ccc", "ddd"] + wildcard_queries: List[WildcardQuery] = [] + for wildcard_query_str in wildcard_query_strings: + wildcard_queries.append(FullStringWildcardQuery(wildcard_query_str, False)) + + deprecation_warn_captured: bool + + deprecation_warn_captured = False + with warnings.catch_warnings(record=True) as caught_warnings: + query_builder.add_wildcard_query(wildcard_query_strings[0]) + self.assertNotEqual(None, caught_warnings) + for warning in caught_warnings: + if issubclass(warning.category, DeprecationWarning): + deprecation_warn_captured = True + break + self.assertTrue(deprecation_warn_captured) + + deprecation_warn_captured = False + with warnings.catch_warnings(record=True) as caught_warnings: + query_builder.add_wildcard_query(wildcard_query_strings[1], False) + self.assertNotEqual(None, caught_warnings) + for warning in caught_warnings: + if issubclass(warning.category, DeprecationWarning): + deprecation_warn_captured = True + break + self.assertTrue(deprecation_warn_captured) + + deprecation_warn_captured = False + with warnings.catch_warnings(record=True) as caught_warnings: + query_builder.add_wildcard_query(wildcard_query_strings[2], case_sensitive=False) + self.assertNotEqual(None, caught_warnings) + for warning in caught_warnings: + if issubclass(warning.category, DeprecationWarning): + deprecation_warn_captured = True + break + self.assertTrue(deprecation_warn_captured) + + deprecation_warn_captured = False + with warnings.catch_warnings(record=True) as caught_warnings: + query_builder.add_wildcard_query( + case_sensitive=False, wildcard_query=wildcard_query_strings[3] + ) + for warning in caught_warnings: + if issubclass(warning.category, DeprecationWarning): + deprecation_warn_captured = True + break + self.assertTrue(deprecation_warn_captured) + + self._check_query( + query_builder.build(), + Query.default_search_time_lower_bound(), + Query.default_search_time_upper_bound(), + wildcard_queries, + 0, + )