From cae135ad260c24da9be11fa303836d3693d7ba09 Mon Sep 17 00:00:00 2001 From: LinZhihao-723 Date: Thu, 28 Sep 2023 16:33:36 -0400 Subject: [PATCH 1/5] Add support for wildcard query partial match --- clp_ffi_py/wildcard_query.py | 26 ++++++++++-- src/clp_ffi_py/ir/native/PyQuery.cpp | 27 ++++++++---- src/clp_ffi_py/ir/native/Query.hpp | 33 ++++++++++++--- tests/test_ir/test_query.py | 61 +++++++++++++++++++--------- tests/test_ir/test_utils.py | 14 ++++++- 5 files changed, 124 insertions(+), 37 deletions(-) diff --git a/clp_ffi_py/wildcard_query.py b/clp_ffi_py/wildcard_query.py index 973add93..8e539394 100644 --- a/clp_ffi_py/wildcard_query.py +++ b/clp_ffi_py/wildcard_query.py @@ -1,7 +1,8 @@ class WildcardQuery: """ - This class defines a wildcard query, which includes a wildcard string and a - boolean value to indicate if the match is case-sensitive. + This class defines a wildcard query, which includes a wildcard string a + boolean value to indicate if the match is case-sensitive, and a boolean + value to indicate if the match is a partial match. A wildcard string may contain the following types of supported wildcards: @@ -10,17 +11,29 @@ class WildcardQuery: Each wildcard can be escaped using a preceding '\\\\' (a single backslash). Other characters which are escaped are treated as normal characters. + + By default, the wildcard query is set to be a partial match. This means any + log message that contains the given wildcard string will be a match. If the + partial match is set to false, the wildcard query matches only if the + wildcard string matches the entire log message. + + A partial match wildcard query `"${WILDCARD_STRING}"` is equivalent to the + full match wildcard query `*${WILDCARD_STRING}*`. """ - def __init__(self, wildcard_query: str, case_sensitive: bool = False): + def __init__( + self, wildcard_query: str, case_sensitive: bool = False, partial_match: bool = True + ): """ Initializes a wildcard query using the given parameters. :param wildcard_query: Wildcard query string. :param case_sensitive: Case sensitive indicator. + :param partial_match: Partial match indicator. """ self._wildcard_query: str = wildcard_query self._case_sensitive: bool = case_sensitive + self._partial_match: bool = partial_match def __str__(self) -> str: """ @@ -28,7 +41,8 @@ def __str__(self) -> str: """ return ( f'WildcardQuery(wildcard_query="{self._wildcard_query}",' - f" case_sensitive={self._case_sensitive})" + f" case_sensitive={self._case_sensitive})," + f" partial_match={self._partial_match}" ) def __repr__(self) -> str: @@ -44,3 +58,7 @@ def wildcard_query(self) -> str: @property def case_sensitive(self) -> bool: return self._case_sensitive + + @property + def partial_match(self) -> bool: + return self._partial_match diff --git a/src/clp_ffi_py/ir/native/PyQuery.cpp b/src/clp_ffi_py/ir/native/PyQuery.cpp index 0d1cb304..eb515eea 100644 --- a/src/clp_ffi_py/ir/native/PyQuery.cpp +++ b/src/clp_ffi_py/ir/native/PyQuery.cpp @@ -3,8 +3,6 @@ #include "PyQuery.hpp" -#include - #include #include #include @@ -50,6 +48,7 @@ auto deserialize_wildcard_queries( PyErr_SetString(PyExc_TypeError, clp_ffi_py::cPyTypeError); return false; } + auto* wildcard_query_py_str{PyObject_GetAttrString(wildcard_query, "wildcard_query")}; if (nullptr == wildcard_query_py_str) { return false; @@ -58,6 +57,11 @@ auto deserialize_wildcard_queries( if (nullptr == case_sensitive_py_bool) { return false; } + auto* partial_match_py_bool{PyObject_GetAttrString(wildcard_query, "partial_match")}; + if (nullptr == partial_match_py_bool) { + return false; + } + std::string_view wildcard_query_view; if (false == parse_py_string_as_string_view(wildcard_query_py_str, wildcard_query_view)) { return false; @@ -66,9 +70,15 @@ auto deserialize_wildcard_queries( if (-1 == is_case_sensitive && nullptr != PyErr_Occurred()) { return false; } + int const is_partial_match{PyObject_IsTrue(partial_match_py_bool)}; + if (-1 == is_partial_match && nullptr != PyErr_Occurred()) { + return false; + } + wildcard_queries.emplace_back( - clean_up_wildcard_search_string(wildcard_query_view), - static_cast(is_case_sensitive) + std::string{wildcard_query_view}, + static_cast(is_case_sensitive), + static_cast(is_partial_match) ); } return true; @@ -99,7 +109,7 @@ auto serialize_wildcard_queries(std::vector const& wildcard_queri Py_ssize_t idx{0}; for (auto const& wildcard_query : wildcard_queries) { PyObjectPtr const wildcard_py_str_ptr{ - PyUnicode_FromString(wildcard_query.get_wildcard_query().c_str()) + PyUnicode_FromString(wildcard_query.get_uncleaned_wildcard_query().c_str()) }; auto* wildcard_py_str{wildcard_py_str_ptr.get()}; if (nullptr == wildcard_py_str) { @@ -108,11 +118,14 @@ auto serialize_wildcard_queries(std::vector const& wildcard_queri } PyObjectPtr const is_case_sensitive{get_py_bool(wildcard_query.is_case_sensitive() )}; + PyObjectPtr const is_partial_match{get_py_bool(wildcard_query.is_partial_match()) + }; PyObject* py_wildcard_query{PyObject_CallFunction( PyQuery::get_py_wildcard_query_type(), - "OO", + "OOO", wildcard_py_str, - is_case_sensitive.get() + is_case_sensitive.get(), + is_partial_match.get() )}; if (nullptr == py_wildcard_query) { Py_DECREF(py_wildcard_queries); diff --git a/src/clp_ffi_py/ir/native/Query.hpp b/src/clp_ffi_py/ir/native/Query.hpp index 8e8da60a..2901b60c 100644 --- a/src/clp_ffi_py/ir/native/Query.hpp +++ b/src/clp_ffi_py/ir/native/Query.hpp @@ -8,33 +8,54 @@ #include #include +#include #include #include namespace clp_ffi_py::ir::native { /** - * This class defines a wildcard query, which includes a wildcard string and a - * boolean value to indicate if the match is case-sensitive. + * This class defines a wildcard query, which includes a wildcard string a + * boolean value to indicate if the match is case-sensitive, and a boolean value + * to indicate if the query is a partial match. */ class WildcardQuery { public: /** - * Initializes the wildcard query. + * Initializes the wildcard query by cleaning the wildcard string. * @param wildcard_query Wildcard query. * @param case_sensitive Case sensitive indicator. + * @param partial_match Partial match indicator. */ - WildcardQuery(std::string wildcard_query, bool case_sensitive) - : m_wildcard_query(std::move(wildcard_query)), - m_case_sensitive(case_sensitive){}; + WildcardQuery(std::string wildcard_query, bool case_sensitive, bool partial_match) + : m_uncleaned_wildcard_query(std::move(wildcard_query)), + m_case_sensitive(case_sensitive), + m_partial_match(partial_match) { + if (partial_match) { + m_wildcard_query = "*"; + m_wildcard_query += m_uncleaned_wildcard_query; + m_wildcard_query += "*"; + m_wildcard_query = clean_up_wildcard_search_string(m_wildcard_query); + } else { + m_wildcard_query = clean_up_wildcard_search_string(m_uncleaned_wildcard_query); + } + } + + [[nodiscard]] auto get_uncleaned_wildcard_query() const -> std::string const& { + return m_uncleaned_wildcard_query; + } [[nodiscard]] auto get_wildcard_query() const -> std::string const& { return m_wildcard_query; } [[nodiscard]] auto is_case_sensitive() const -> bool { return m_case_sensitive; } + [[nodiscard]] auto is_partial_match() const -> bool { return m_partial_match; } + private: + std::string m_uncleaned_wildcard_query; std::string m_wildcard_query; bool m_case_sensitive; + bool m_partial_match; }; /** diff --git a/tests/test_ir/test_query.py b/tests/test_ir/test_query.py index 09bd603a..fd8c24ba 100644 --- a/tests/test_ir/test_query.py +++ b/tests/test_ir/test_query.py @@ -24,16 +24,19 @@ def test_init(self) -> None: wildcard_string = "Are you the lord of *Pleiades*?" wildcard_query = WildcardQuery(wildcard_string) - self._check_wildcard_query(wildcard_query, wildcard_string, False) + self._check_wildcard_query(wildcard_query, wildcard_string, False, True) wildcard_query = WildcardQuery(wildcard_string, True) - self._check_wildcard_query(wildcard_query, wildcard_string, True) + self._check_wildcard_query(wildcard_query, wildcard_string, True, True) wildcard_query = WildcardQuery(wildcard_string, case_sensitive=True) - self._check_wildcard_query(wildcard_query, wildcard_string, True) + self._check_wildcard_query(wildcard_query, wildcard_string, True, True) wildcard_query = WildcardQuery(case_sensitive=True, wildcard_query=wildcard_string) - self._check_wildcard_query(wildcard_query, wildcard_string, True) + self._check_wildcard_query(wildcard_query, wildcard_string, True, True) + + wildcard_query = WildcardQuery(partial_match=False, wildcard_query=wildcard_string) + self._check_wildcard_query(wildcard_query, wildcard_string, False, False) class TestCaseQuery(TestCLPBase): @@ -173,20 +176,15 @@ def test_init_wildcard_queries(self) -> None: wildcard_queries = [ WildcardQuery("who is \*** pleiades??\\"), - WildcardQuery("a\?m********I?\\"), - WildcardQuery("\g\%\*\??***"), - ] - ref_wildcard_queries = [ - WildcardQuery("who is \** pleiades??"), - WildcardQuery("a\?m*I?"), - WildcardQuery("g%\*\??*"), + WildcardQuery("a\?m********I?\\", case_sensitive=True), + WildcardQuery("\g\%\*\??***", partial_match=False), ] query = Query(wildcard_queries=wildcard_queries) self._check_query( query, Query.default_search_time_lower_bound(), Query.default_search_time_upper_bound(), - ref_wildcard_queries, + wildcard_queries, 0, ) @@ -203,7 +201,7 @@ def test_init_wildcard_queries(self) -> None: query, search_time_lower_bound, search_time_upper_bound, - ref_wildcard_queries, + wildcard_queries, search_time_termination_margin, ) @@ -287,7 +285,7 @@ def test_log_event_match(self) -> None: "Only log events whose message matches the wildcard query should match the query." ) log_event = LogEvent("fhakjhLFISHfashfShfiuSLSZkfSUSFS", 0) - wildcard_query_string = "*JHlfish*SH?IU*s" + wildcard_query_string = "JHlfish*SH?IU*s" query = Query(wildcard_queries=[WildcardQuery(wildcard_query_string)]) self.assertEqual(query.match_log_event(log_event), True, description) self.assertEqual(log_event.match_query(query), True, description) @@ -295,7 +293,7 @@ def test_log_event_match(self) -> None: self.assertEqual(query.match_log_event(log_event), False, description) self.assertEqual(log_event.match_query(query), False, description) log_event = LogEvent("j:flJo;jsf:LSJDFoiASFoasjzFZA", 0) - wildcard_query_string = "*flJo*s?*AS*A" + wildcard_query_string = "flJo*s?*AS" query = Query(wildcard_queries=[WildcardQuery(wildcard_query_string)]) self.assertEqual(query.match_log_event(log_event), True, description) self.assertEqual(log_event.match_query(query), True, description) @@ -307,12 +305,12 @@ def test_log_event_match(self) -> None: "Log event whose messages matches any one of the wildcard queries should be considered" " as a match of the query." ) - wildcard_queries: List[WildcardQuery] = [WildcardQuery("*b&A*"), WildcardQuery("*A|a*")] + wildcard_queries: List[WildcardQuery] = [WildcardQuery("b&A"), WildcardQuery("A|a")] log_event = LogEvent("-----a-A-----", 0) query = Query(wildcard_queries=wildcard_queries) self.assertEqual(query.match_log_event(log_event), False, description) self.assertEqual(log_event.match_query(query), False, description) - wildcard_queries.append(WildcardQuery("*a?a*")) + wildcard_queries.append(WildcardQuery("a?a")) query = Query(wildcard_queries=wildcard_queries) self.assertEqual(query.match_log_event(log_event), True, description) self.assertEqual(log_event.match_query(query), True, description) @@ -323,12 +321,37 @@ def test_log_event_match(self) -> None: description = ( "The match of query requires both timestamp in range and log message matching any one" - " of the wildcard queries." + " of the wildcard queries. (Partial Match)" + ) + query = Query( + search_time_lower_bound=3190, + search_time_upper_bound=3270, + wildcard_queries=[WildcardQuery("q?Q"), WildcardQuery("t?t", True)], + ) + log_event = LogEvent("I'm not matching anything...", 3213) + self.assertEqual(query.match_log_event(log_event), False, description) + self.assertEqual(log_event.match_query(query), False, description) + log_event = LogEvent("I'm not matching anything... T.T", 3213) + self.assertEqual(query.match_log_event(log_event), False, description) + self.assertEqual(log_event.match_query(query), False, description) + log_event = LogEvent("I'm not matching anything... QAQ", 2887) + self.assertEqual(query.match_log_event(log_event), False, description) + self.assertEqual(log_event.match_query(query), False, description) + log_event = LogEvent("I'm finally matching something... QAQ", 3213) + self.assertEqual(query.match_log_event(log_event), True, description) + self.assertEqual(log_event.match_query(query), True, description) + + description = ( + "The match of query requires both timestamp in range and log message matching any one" + " of the wildcard queries. (Full Match)" ) query = Query( search_time_lower_bound=3190, search_time_upper_bound=3270, - wildcard_queries=[WildcardQuery("*q?Q*"), WildcardQuery("*t?t*", True)], + wildcard_queries=[ + WildcardQuery("*q?Q*", partial_match=False), + WildcardQuery("*t?t*", case_sensitive=True, partial_match=False), + ], ) log_event = LogEvent("I'm not matching anything...", 3213) self.assertEqual(query.match_log_event(log_event), False, description) diff --git a/tests/test_ir/test_utils.py b/tests/test_ir/test_utils.py index 2f6808f1..c2103c8c 100644 --- a/tests/test_ir/test_utils.py +++ b/tests/test_ir/test_utils.py @@ -142,7 +142,11 @@ def _check_log_event( ) def _check_wildcard_query( - self, wildcard_query: WildcardQuery, ref_wildcard_string: str, ref_is_case_sensitive: bool + self, + wildcard_query: WildcardQuery, + ref_wildcard_string: str, + ref_is_case_sensitive: bool, + ref_is_partial_match: bool, ) -> None: """ Given a WildcardQuery object, check if the stored data matches the input @@ -151,9 +155,11 @@ def _check_wildcard_query( :param wildcard_query: Input WildcardQuery object. :param ref_wildcard_string: Reference wildcard string. :param ref_is_case_sensitive: Reference case-sensitive indicator. + :param ref_is_partial_match: Reference partial-match indicator. """ wildcard_string: str = wildcard_query.wildcard_query is_case_sensitive: bool = wildcard_query.case_sensitive + is_partial_match: bool = wildcard_query.partial_match self.assertEqual( wildcard_string, ref_wildcard_string, @@ -164,6 +170,11 @@ def _check_wildcard_query( ref_is_case_sensitive, f"Expected case-sensitive indicator: {ref_is_case_sensitive}", ) + self.assertEqual( + is_partial_match, + ref_is_partial_match, + f"Expected partial-match indicator: {ref_is_partial_match}", + ) def _check_query( self, @@ -224,6 +235,7 @@ def _check_query( wildcard_queries[i], ref_wildcard_queries[i].wildcard_query, ref_wildcard_queries[i].case_sensitive, + ref_wildcard_queries[i].partial_match, ) From 38017c637293610d096cef70a0bf204a1daf7fc6 Mon Sep 17 00:00:00 2001 From: LinZhihao-723 Date: Thu, 28 Sep 2023 16:35:32 -0400 Subject: [PATCH 2/5] Update readme --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index f104a242..a47f6a6d 100644 --- a/README.md +++ b/README.md @@ -128,8 +128,8 @@ from clp_ffi_py.ir import ClpIrFileReader, Query, QueryBuilder query_builder: QueryBuilder = QueryBuilder() # Add wildcard patterns to filter log messages: -query_builder.add_wildcard_query("*uid=*,status=failed*") -query_builder.add_wildcard_query("*UID=*,Status=KILLED*", case_sensitive=True) +query_builder.add_wildcard_query("uid=*,status=failed") +query_builder.add_wildcard_query("UID=*,Status=KILLED", case_sensitive=True) # Initialize a Query object using the builder: wildcard_search_query: Query = query_builder.build() From a36235846811201262c61e23a37a5560373ba249 Mon Sep 17 00:00:00 2001 From: LinZhihao-723 Date: Thu, 28 Sep 2023 16:43:13 -0400 Subject: [PATCH 3/5] Update query builder --- clp_ffi_py/ir/query_builder.py | 7 +++++-- tests/test_ir/test_query_builder.py | 10 ++++++++-- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/clp_ffi_py/ir/query_builder.py b/clp_ffi_py/ir/query_builder.py index ecce0416..47b11136 100644 --- a/clp_ffi_py/ir/query_builder.py +++ b/clp_ffi_py/ir/query_builder.py @@ -78,16 +78,19 @@ def set_search_time_termination_margin(self, ts: int) -> QueryBuilder: self._search_time_termination_margin = ts return self - def add_wildcard_query(self, wildcard_query: str, case_sensitive: bool = False) -> QueryBuilder: + def add_wildcard_query( + self, wildcard_query: str, case_sensitive: bool = False, partial_match: bool = True + ) -> QueryBuilder: """ Constructs and adds a :class:`~clp_ffi_py.wildcard_query.WildcardQuery` to the wildcard query list. :param wildcard_query: The wildcard query string to add. :param case_sensitive: Whether to perform case-sensitive matching. + :param partial_match: Whether to perform partial matching. :return: self. """ - self._wildcard_queries.append(WildcardQuery(wildcard_query, case_sensitive)) + self._wildcard_queries.append(WildcardQuery(wildcard_query, case_sensitive, partial_match)) return self def add_wildcard_queries(self, wildcard_queries: List[WildcardQuery]) -> QueryBuilder: diff --git a/tests/test_ir/test_query_builder.py b/tests/test_ir/test_query_builder.py index 086c4e71..55eba926 100644 --- a/tests/test_ir/test_query_builder.py +++ b/tests/test_ir/test_query_builder.py @@ -112,10 +112,16 @@ def test_set_value(self) -> None: search_time_termination_margin, ) - wildcard_queries = [WildcardQuery("aaa*aaa"), WildcardQuery("bbb*bbb", True)] + wildcard_queries = [ + WildcardQuery("aaa*aaa"), + WildcardQuery("bbb*bbb", True), + WildcardQuery("full match", True, False), + ] for wildcard_query in wildcard_queries: query_builder.add_wildcard_query( - wildcard_query.wildcard_query, wildcard_query.case_sensitive + wildcard_query.wildcard_query, + wildcard_query.case_sensitive, + wildcard_query.partial_match, ) extra_wildcard_queries = [WildcardQuery("ccc?ccc", True), WildcardQuery("ddd?ddd")] query_builder.add_wildcard_queries(extra_wildcard_queries) From 499ef7d7f41352609f7d3bacad2dd046ddf39cd2 Mon Sep 17 00:00:00 2001 From: LinZhihao-723 Date: Fri, 29 Sep 2023 09:10:59 -0400 Subject: [PATCH 4/5] Update readme; rename unclean query string --- README.md | 4 +++- src/clp_ffi_py/ir/native/PyQuery.cpp | 2 +- src/clp_ffi_py/ir/native/Query.hpp | 12 ++++++------ 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index a47f6a6d..146f3260 100644 --- a/README.md +++ b/README.md @@ -127,7 +127,9 @@ from clp_ffi_py.ir import ClpIrFileReader, Query, QueryBuilder # Create a QueryBuilder object to build the search query. query_builder: QueryBuilder = QueryBuilder() -# Add wildcard patterns to filter log messages: +# Add wildcard patterns to filter log messages.By default, the partial match +# will be executed. Please refer to the documents of `WildcardQuery` object for +# the difference between partial and full matches. query_builder.add_wildcard_query("uid=*,status=failed") query_builder.add_wildcard_query("UID=*,Status=KILLED", case_sensitive=True) diff --git a/src/clp_ffi_py/ir/native/PyQuery.cpp b/src/clp_ffi_py/ir/native/PyQuery.cpp index eb515eea..4629dfcb 100644 --- a/src/clp_ffi_py/ir/native/PyQuery.cpp +++ b/src/clp_ffi_py/ir/native/PyQuery.cpp @@ -109,7 +109,7 @@ auto serialize_wildcard_queries(std::vector const& wildcard_queri Py_ssize_t idx{0}; for (auto const& wildcard_query : wildcard_queries) { PyObjectPtr const wildcard_py_str_ptr{ - PyUnicode_FromString(wildcard_query.get_uncleaned_wildcard_query().c_str()) + PyUnicode_FromString(wildcard_query.get_original_query_string().c_str()) }; auto* wildcard_py_str{wildcard_py_str_ptr.get()}; if (nullptr == wildcard_py_str) { diff --git a/src/clp_ffi_py/ir/native/Query.hpp b/src/clp_ffi_py/ir/native/Query.hpp index 2901b60c..ac2f1097 100644 --- a/src/clp_ffi_py/ir/native/Query.hpp +++ b/src/clp_ffi_py/ir/native/Query.hpp @@ -28,21 +28,21 @@ class WildcardQuery { * @param partial_match Partial match indicator. */ WildcardQuery(std::string wildcard_query, bool case_sensitive, bool partial_match) - : m_uncleaned_wildcard_query(std::move(wildcard_query)), + : m_original_query_string(std::move(wildcard_query)), m_case_sensitive(case_sensitive), m_partial_match(partial_match) { if (partial_match) { m_wildcard_query = "*"; - m_wildcard_query += m_uncleaned_wildcard_query; + m_wildcard_query += m_original_query_string; m_wildcard_query += "*"; m_wildcard_query = clean_up_wildcard_search_string(m_wildcard_query); } else { - m_wildcard_query = clean_up_wildcard_search_string(m_uncleaned_wildcard_query); + m_wildcard_query = clean_up_wildcard_search_string(m_original_query_string); } } - [[nodiscard]] auto get_uncleaned_wildcard_query() const -> std::string const& { - return m_uncleaned_wildcard_query; + [[nodiscard]] auto get_original_query_string() const -> std::string const& { + return m_original_query_string; } [[nodiscard]] auto get_wildcard_query() const -> std::string const& { return m_wildcard_query; } @@ -52,7 +52,7 @@ class WildcardQuery { [[nodiscard]] auto is_partial_match() const -> bool { return m_partial_match; } private: - std::string m_uncleaned_wildcard_query; + std::string m_original_query_string; std::string m_wildcard_query; bool m_case_sensitive; bool m_partial_match; From 0a36df6cbf9eac843e2f9e0e3efab202e2a16221 Mon Sep 17 00:00:00 2001 From: LinZhihao-723 Date: Fri, 29 Sep 2023 10:44:46 -0400 Subject: [PATCH 5/5] Discard C++ level wildcard query changes --- clp_ffi_py/wildcard_query.py | 14 +++---- src/clp_ffi_py/ir/native/Metadata.cpp | 9 ++--- src/clp_ffi_py/ir/native/PyDecoder.cpp | 9 ++--- src/clp_ffi_py/ir/native/PyDecoderBuffer.cpp | 16 +++----- .../ir/native/PyFourByteEncoder.cpp | 9 ++--- src/clp_ffi_py/ir/native/PyLogEvent.cpp | 18 +++------ src/clp_ffi_py/ir/native/PyMetadata.cpp | 12 ++---- src/clp_ffi_py/ir/native/PyQuery.cpp | 39 ++++++------------- src/clp_ffi_py/ir/native/Query.hpp | 37 ++++-------------- src/clp_ffi_py/ir/native/decoding_methods.cpp | 6 +-- src/clp_ffi_py/ir/native/encoding_methods.cpp | 3 +- tests/test_ir/test_query.py | 18 +++++---- tests/test_ir/test_query_builder.py | 20 +++++----- tests/test_ir/test_utils.py | 9 ----- 14 files changed, 71 insertions(+), 148 deletions(-) diff --git a/clp_ffi_py/wildcard_query.py b/clp_ffi_py/wildcard_query.py index 8e539394..7d09dfa7 100644 --- a/clp_ffi_py/wildcard_query.py +++ b/clp_ffi_py/wildcard_query.py @@ -31,9 +31,12 @@ def __init__( :param case_sensitive: Case sensitive indicator. :param partial_match: Partial match indicator. """ - self._wildcard_query: str = wildcard_query + self._wildcard_query: str self._case_sensitive: bool = case_sensitive - self._partial_match: bool = partial_match + if partial_match: + self._wildcard_query = "*" + wildcard_query + "*" + else: + self._wildcard_query = wildcard_query def __str__(self) -> str: """ @@ -41,8 +44,7 @@ def __str__(self) -> str: """ return ( f'WildcardQuery(wildcard_query="{self._wildcard_query}",' - f" case_sensitive={self._case_sensitive})," - f" partial_match={self._partial_match}" + f" case_sensitive={self._case_sensitive})" ) def __repr__(self) -> str: @@ -58,7 +60,3 @@ def wildcard_query(self) -> str: @property def case_sensitive(self) -> bool: return self._case_sensitive - - @property - def partial_match(self) -> bool: - return self._partial_match diff --git a/src/clp_ffi_py/ir/native/Metadata.cpp b/src/clp_ffi_py/ir/native/Metadata.cpp index 837947ec..d4cacb83 100644 --- a/src/clp_ffi_py/ir/native/Metadata.cpp +++ b/src/clp_ffi_py/ir/native/Metadata.cpp @@ -31,8 +31,7 @@ Metadata::Metadata(nlohmann::json const& metadata, bool is_four_byte_encoding) { m_is_four_byte_encoding = is_four_byte_encoding; auto const* ref_timestamp_key{ - static_cast(ffi::ir_stream::cProtocol::Metadata::ReferenceTimestampKey) - }; + static_cast(ffi::ir_stream::cProtocol::Metadata::ReferenceTimestampKey)}; if (false == is_valid_json_string_data(metadata, ref_timestamp_key)) { throw ExceptionFFI( ErrorCode_MetadataCorrupted, @@ -49,8 +48,7 @@ Metadata::Metadata(nlohmann::json const& metadata, bool is_four_byte_encoding) { } auto const* timestamp_format_key{ - static_cast(ffi::ir_stream::cProtocol::Metadata::TimestampPatternKey) - }; + static_cast(ffi::ir_stream::cProtocol::Metadata::TimestampPatternKey)}; if (false == is_valid_json_string_data(metadata, timestamp_format_key)) { throw ExceptionFFI( ErrorCode_MetadataCorrupted, @@ -62,8 +60,7 @@ Metadata::Metadata(nlohmann::json const& metadata, bool is_four_byte_encoding) { m_timestamp_format = metadata[timestamp_format_key]; auto const* timezone_id_key{ - static_cast(ffi::ir_stream::cProtocol::Metadata::TimeZoneIdKey) - }; + static_cast(ffi::ir_stream::cProtocol::Metadata::TimeZoneIdKey)}; if (false == is_valid_json_string_data(metadata, timezone_id_key)) { throw ExceptionFFI( ErrorCode_MetadataCorrupted, diff --git a/src/clp_ffi_py/ir/native/PyDecoder.cpp b/src/clp_ffi_py/ir/native/PyDecoder.cpp index 1b4f4a6a..32c7f28b 100644 --- a/src/clp_ffi_py/ir/native/PyDecoder.cpp +++ b/src/clp_ffi_py/ir/native/PyDecoder.cpp @@ -55,8 +55,7 @@ PyMethodDef PyDecoder_method_table[]{ METH_VARARGS | METH_KEYWORDS | METH_STATIC, static_cast(cDecodeNextLogEventDoc)}, - {nullptr, nullptr, 0, nullptr} -}; + {nullptr, nullptr, 0, nullptr}}; // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays) PyDoc_STRVAR( @@ -70,8 +69,7 @@ PyDoc_STRVAR( PyType_Slot PyDecoder_slots[]{ {Py_tp_methods, static_cast(PyDecoder_method_table)}, {Py_tp_doc, const_cast(static_cast(cPyDecoderDoc))}, - {0, nullptr} -}; + {0, nullptr}}; // NOLINTEND(cppcoreguidelines-avoid-c-arrays, cppcoreguidelines-pro-type-const-cast) /** @@ -82,8 +80,7 @@ PyType_Spec PyDecoder_type_spec{ sizeof(PyDecoder), 0, Py_TPFLAGS_DEFAULT, - static_cast(PyDecoder_slots) -}; + static_cast(PyDecoder_slots)}; } // namespace PyObjectPtr PyDecoder::m_py_type{nullptr}; diff --git a/src/clp_ffi_py/ir/native/PyDecoderBuffer.cpp b/src/clp_ffi_py/ir/native/PyDecoderBuffer.cpp index 0ea7c302..698d6334 100644 --- a/src/clp_ffi_py/ir/native/PyDecoderBuffer.cpp +++ b/src/clp_ffi_py/ir/native/PyDecoderBuffer.cpp @@ -32,8 +32,7 @@ auto PyDecoderBuffer_init(PyDecoderBuffer* self, PyObject* args, PyObject* keywo static char* keyword_table[]{ static_cast(keyword_input_stream), static_cast(keyword_initial_buffer_capacity), - nullptr - }; + nullptr}; // If the argument parsing fails, `self` will be deallocated. We must reset // all pointers to nullptr in advance, otherwise the deallocator might @@ -55,8 +54,8 @@ auto PyDecoderBuffer_init(PyDecoderBuffer* self, PyObject* args, PyObject* keywo return -1; } - PyObjectPtr const readinto_method_obj{PyObject_GetAttrString(input_stream, "readinto") - }; + PyObjectPtr const readinto_method_obj{ + PyObject_GetAttrString(input_stream, "readinto")}; auto* readinto_method{readinto_method_obj.get()}; if (nullptr == readinto_method) { return -1; @@ -154,8 +153,7 @@ PyMethodDef PyDecoderBuffer_method_table[]{ METH_O, static_cast(cPyDecoderBufferTestStreamingDoc)}, - {nullptr} -}; + {nullptr}}; /** * Declaration of Python buffer protocol. @@ -190,8 +188,7 @@ PyType_Slot PyDecoderBuffer_slots[]{ {Py_tp_init, reinterpret_cast(PyDecoderBuffer_init)}, {Py_tp_methods, static_cast(PyDecoderBuffer_method_table)}, {Py_tp_doc, const_cast(static_cast(cPyDecoderBufferDoc))}, - {0, nullptr} -}; + {0, nullptr}}; // NOLINTEND(cppcoreguidelines-avoid-c-arrays, cppcoreguidelines-pro-type-*-cast) /** @@ -202,8 +199,7 @@ PyType_Spec PyDecoderBuffer_type_spec{ sizeof(PyDecoderBuffer), 0, Py_TPFLAGS_DEFAULT, - static_cast(PyDecoderBuffer_slots) -}; + static_cast(PyDecoderBuffer_slots)}; // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays) PyDoc_STRVAR( diff --git a/src/clp_ffi_py/ir/native/PyFourByteEncoder.cpp b/src/clp_ffi_py/ir/native/PyFourByteEncoder.cpp index 12110dc0..5ce775c3 100644 --- a/src/clp_ffi_py/ir/native/PyFourByteEncoder.cpp +++ b/src/clp_ffi_py/ir/native/PyFourByteEncoder.cpp @@ -98,8 +98,7 @@ PyMethodDef PyFourByteEncoder_method_table[]{ METH_NOARGS | METH_STATIC, static_cast(cEncodeEndOfIrDoc)}, - {nullptr, nullptr, 0, nullptr} -}; + {nullptr, nullptr, 0, nullptr}}; // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays) PyDoc_STRVAR( @@ -113,8 +112,7 @@ PyDoc_STRVAR( PyType_Slot PyFourByteEncoder_slots[]{ {Py_tp_methods, static_cast(PyFourByteEncoder_method_table)}, {Py_tp_doc, const_cast(static_cast(cPyFourByteEncoderDoc))}, - {0, nullptr} -}; + {0, nullptr}}; // NOLINTEND(cppcoreguidelines-avoid-c-arrays, cppcoreguidelines-pro-type-const-cast) /** @@ -125,8 +123,7 @@ PyType_Spec PyFourByteEncoder_type_spec{ sizeof(PyFourByteEncoder), 0, Py_TPFLAGS_DEFAULT, - static_cast(PyFourByteEncoder_slots) -}; + static_cast(PyFourByteEncoder_slots)}; } // namespace PyObjectPtr PyFourByteEncoder::m_py_type{nullptr}; diff --git a/src/clp_ffi_py/ir/native/PyLogEvent.cpp b/src/clp_ffi_py/ir/native/PyLogEvent.cpp index 3273cc0c..5f4e7a89 100644 --- a/src/clp_ffi_py/ir/native/PyLogEvent.cpp +++ b/src/clp_ffi_py/ir/native/PyLogEvent.cpp @@ -34,8 +34,7 @@ auto PyLogEvent_init(PyLogEvent* self, PyObject* args, PyObject* keywords) -> in static_cast(keyword_timestamp), static_cast(keyword_message_idx), static_cast(keyword_metadata), - nullptr - }; + nullptr}; // If the argument parsing fails, `self` will be deallocated. We must reset // all pointers to nullptr in advance, otherwise the deallocator might @@ -122,8 +121,7 @@ auto PyLogEvent_getstate(PyLogEvent* self) -> PyObject* { clp_ffi_py::py_utils_get_formatted_timestamp( log_event->get_timestamp(), self->has_metadata() ? self->get_py_metadata()->get_py_timezone() : Py_None - ) - }; + )}; auto* formatted_timestamp_ptr{formatted_timestamp_object.get()}; if (nullptr == formatted_timestamp_ptr) { return nullptr; @@ -381,8 +379,7 @@ PyMethodDef PyLogEvent_method_table[]{ METH_O, static_cast(cPyLogEventSetStateDoc)}, - {nullptr} -}; + {nullptr}}; // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays) PyDoc_STRVAR( @@ -413,8 +410,7 @@ PyType_Slot PyLogEvent_slots[]{ {Py_tp_repr, reinterpret_cast(PyLogEvent_repr)}, {Py_tp_methods, static_cast(PyLogEvent_method_table)}, {Py_tp_doc, const_cast(static_cast(cPyLogEventDoc))}, - {0, nullptr} -}; + {0, nullptr}}; // NOLINTEND(cppcoreguidelines-avoid-c-arrays, cppcoreguidelines-pro-type-*-cast) /** @@ -425,8 +421,7 @@ PyType_Spec PyLogEvent_type_spec{ sizeof(PyLogEvent), 0, Py_TPFLAGS_DEFAULT, - static_cast(PyLogEvent_slots) -}; + static_cast(PyLogEvent_slots)}; } // namespace auto PyLogEvent::get_formatted_message(PyObject* timezone) -> PyObject* { @@ -448,8 +443,7 @@ auto PyLogEvent::get_formatted_message(PyObject* timezone) -> PyObject* { } PyObjectPtr const formatted_timestamp_object{ - py_utils_get_formatted_timestamp(m_log_event->get_timestamp(), timezone) - }; + py_utils_get_formatted_timestamp(m_log_event->get_timestamp(), timezone)}; auto* formatted_timestamp_ptr{formatted_timestamp_object.get()}; if (nullptr == formatted_timestamp_ptr) { return nullptr; diff --git a/src/clp_ffi_py/ir/native/PyMetadata.cpp b/src/clp_ffi_py/ir/native/PyMetadata.cpp index e5607adc..2b3a1e69 100644 --- a/src/clp_ffi_py/ir/native/PyMetadata.cpp +++ b/src/clp_ffi_py/ir/native/PyMetadata.cpp @@ -32,8 +32,7 @@ auto PyMetadata_init(PyMetadata* self, PyObject* args, PyObject* keywords) -> in static_cast(keyword_ref_timestamp), static_cast(keyword_timestamp_format), static_cast(keyword_timezone_id), - nullptr - }; + nullptr}; ffi::epoch_time_ms_t ref_timestamp{0}; char const* input_timestamp_format{nullptr}; @@ -175,8 +174,7 @@ PyMethodDef PyMetadata_method_table[]{ METH_NOARGS, static_cast(cPyMetadataGetTimezoneDoc)}, - {nullptr} -}; + {nullptr}}; // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays) PyDoc_STRVAR( @@ -206,8 +204,7 @@ PyType_Slot PyMetadata_slots[]{ {Py_tp_new, reinterpret_cast(PyType_GenericNew)}, {Py_tp_methods, static_cast(PyMetadata_method_table)}, {Py_tp_doc, const_cast(static_cast(cPyMetadataDoc))}, - {0, nullptr} -}; + {0, nullptr}}; // NOLINTEND(cppcoreguidelines-avoid-c-arrays, cppcoreguidelines-pro-type-*-cast) PyType_Spec PyMetadata_type_spec{ @@ -215,8 +212,7 @@ PyType_Spec PyMetadata_type_spec{ sizeof(PyMetadata), 0, Py_TPFLAGS_DEFAULT, - static_cast(PyMetadata_slots) -}; + static_cast(PyMetadata_slots)}; } // namespace auto PyMetadata::init( diff --git a/src/clp_ffi_py/ir/native/PyQuery.cpp b/src/clp_ffi_py/ir/native/PyQuery.cpp index 4629dfcb..ed759121 100644 --- a/src/clp_ffi_py/ir/native/PyQuery.cpp +++ b/src/clp_ffi_py/ir/native/PyQuery.cpp @@ -57,10 +57,6 @@ auto deserialize_wildcard_queries( if (nullptr == case_sensitive_py_bool) { return false; } - auto* partial_match_py_bool{PyObject_GetAttrString(wildcard_query, "partial_match")}; - if (nullptr == partial_match_py_bool) { - return false; - } std::string_view wildcard_query_view; if (false == parse_py_string_as_string_view(wildcard_query_py_str, wildcard_query_view)) { @@ -70,15 +66,10 @@ auto deserialize_wildcard_queries( if (-1 == is_case_sensitive && nullptr != PyErr_Occurred()) { return false; } - int const is_partial_match{PyObject_IsTrue(partial_match_py_bool)}; - if (-1 == is_partial_match && nullptr != PyErr_Occurred()) { - return false; - } wildcard_queries.emplace_back( - std::string{wildcard_query_view}, - static_cast(is_case_sensitive), - static_cast(is_partial_match) + clean_up_wildcard_search_string(wildcard_query_view), + static_cast(is_case_sensitive) ); } return true; @@ -109,23 +100,20 @@ auto serialize_wildcard_queries(std::vector const& wildcard_queri Py_ssize_t idx{0}; for (auto const& wildcard_query : wildcard_queries) { PyObjectPtr const wildcard_py_str_ptr{ - PyUnicode_FromString(wildcard_query.get_original_query_string().c_str()) - }; + PyUnicode_FromString(wildcard_query.get_wildcard_query().c_str())}; auto* wildcard_py_str{wildcard_py_str_ptr.get()}; if (nullptr == wildcard_py_str) { Py_DECREF(py_wildcard_queries); return nullptr; } - PyObjectPtr const is_case_sensitive{get_py_bool(wildcard_query.is_case_sensitive() - )}; - PyObjectPtr const is_partial_match{get_py_bool(wildcard_query.is_partial_match()) - }; + PyObjectPtr const is_case_sensitive{ + get_py_bool(wildcard_query.is_case_sensitive())}; PyObject* py_wildcard_query{PyObject_CallFunction( PyQuery::get_py_wildcard_query_type(), "OOO", wildcard_py_str, is_case_sensitive.get(), - is_partial_match.get() + get_py_bool(false) )}; if (nullptr == py_wildcard_query) { Py_DECREF(py_wildcard_queries); @@ -168,8 +156,7 @@ auto PyQuery_init(PyQuery* self, PyObject* args, PyObject* keywords) -> int { static_cast(keyword_search_time_upper_bound), static_cast(keyword_wildcard_queries), static_cast(keyword_search_time_termination_margin), - nullptr - }; + nullptr}; // If the argument parsing fails, `self` will be deallocated. We must reset // all pointers to nullptr in advance, otherwise the deallocator might @@ -341,8 +328,7 @@ auto PyQuery_setstate(PyQuery* self, PyObject* state) -> PyObject* { } auto* search_time_termination_margin_obj{ - PyDict_GetItemString(state, cStateSearchTimeTerminationMargin) - }; + PyDict_GetItemString(state, cStateSearchTimeTerminationMargin)}; if (nullptr == search_time_termination_margin_obj) { PyErr_Format( PyExc_KeyError, @@ -554,8 +540,7 @@ PyMethodDef PyQuery_method_table[]{ METH_NOARGS | METH_STATIC, static_cast(cPyQueryDefaultSearchTimeTerminationMargin)}, - {nullptr} -}; + {nullptr}}; // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays) PyDoc_STRVAR( @@ -603,8 +588,7 @@ PyType_Slot PyQuery_slots[]{ {Py_tp_repr, reinterpret_cast(PyQuery_repr)}, {Py_tp_methods, static_cast(PyQuery_method_table)}, {Py_tp_doc, const_cast(static_cast(cPyQueryDoc))}, - {0, nullptr} -}; + {0, nullptr}}; // NOLINTEND(cppcoreguidelines-avoid-c-arrays, cppcoreguidelines-pro-type-*-cast) /** @@ -615,8 +599,7 @@ PyType_Spec PyQuery_type_spec{ sizeof(Query), 0, Py_TPFLAGS_DEFAULT, - static_cast(PyQuery_slots) -}; + static_cast(PyQuery_slots)}; } // namespace auto PyQuery::init( diff --git a/src/clp_ffi_py/ir/native/Query.hpp b/src/clp_ffi_py/ir/native/Query.hpp index ac2f1097..e47c444b 100644 --- a/src/clp_ffi_py/ir/native/Query.hpp +++ b/src/clp_ffi_py/ir/native/Query.hpp @@ -25,37 +25,18 @@ class WildcardQuery { * Initializes the wildcard query by cleaning the wildcard string. * @param wildcard_query Wildcard query. * @param case_sensitive Case sensitive indicator. - * @param partial_match Partial match indicator. */ - WildcardQuery(std::string wildcard_query, bool case_sensitive, bool partial_match) - : m_original_query_string(std::move(wildcard_query)), - m_case_sensitive(case_sensitive), - m_partial_match(partial_match) { - if (partial_match) { - m_wildcard_query = "*"; - m_wildcard_query += m_original_query_string; - m_wildcard_query += "*"; - m_wildcard_query = clean_up_wildcard_search_string(m_wildcard_query); - } else { - m_wildcard_query = clean_up_wildcard_search_string(m_original_query_string); - } - } - - [[nodiscard]] auto get_original_query_string() const -> std::string const& { - return m_original_query_string; - } + WildcardQuery(std::string wildcard_query, bool case_sensitive) + : m_wildcard_query(std::move(wildcard_query)), + m_case_sensitive(case_sensitive) {} [[nodiscard]] auto get_wildcard_query() const -> std::string const& { return m_wildcard_query; } [[nodiscard]] auto is_case_sensitive() const -> bool { return m_case_sensitive; } - [[nodiscard]] auto is_partial_match() const -> bool { return m_partial_match; } - private: - std::string m_original_query_string; std::string m_wildcard_query; bool m_case_sensitive; - bool m_partial_match; }; /** @@ -79,11 +60,9 @@ class Query { public: static constexpr ffi::epoch_time_ms_t const cTimestampMin{0}; static constexpr ffi::epoch_time_ms_t const cTimestampMax{ - std::numeric_limits::max() - }; + std::numeric_limits::max()}; static constexpr ffi::epoch_time_ms_t const cDefaultSearchTimeTerminationMargin{ - static_cast(60 * 1000) - }; + static_cast(60 * 1000)}; /** * Constructs an empty query object that will match all logs. The wildcard @@ -114,8 +93,7 @@ class Query { m_search_termination_ts{ (cTimestampMax - search_time_termination_margin > search_time_upper_bound) ? search_time_upper_bound + search_time_termination_margin - : cTimestampMax - } { + : cTimestampMax} { throw_if_ts_range_invalid(); } @@ -138,8 +116,7 @@ class Query { m_search_termination_ts{ (cTimestampMax - search_time_termination_margin > search_time_upper_bound) ? search_time_upper_bound + search_time_termination_margin - : cTimestampMax - }, + : cTimestampMax}, m_wildcard_queries{std::move(wildcard_queries)} { throw_if_ts_range_invalid(); } diff --git a/src/clp_ffi_py/ir/native/decoding_methods.cpp b/src/clp_ffi_py/ir/native/decoding_methods.cpp index bb453a09..ff663086 100644 --- a/src/clp_ffi_py/ir/native/decoding_methods.cpp +++ b/src/clp_ffi_py/ir/native/decoding_methods.cpp @@ -171,8 +171,7 @@ auto decode_preamble(PyObject* Py_UNUSED(self), PyObject* py_decoder_buffer) -> auto const unconsumed_bytes = decoder_buffer->get_unconsumed_bytes(); auto const metadata_buffer{ - unconsumed_bytes.subspan(metadata_pos, static_cast(metadata_size)) - }; + unconsumed_bytes.subspan(metadata_pos, static_cast(metadata_size))}; decoder_buffer->commit_read_buffer_consumption(static_cast(ir_buffer_cursor_pos)); PyMetadata* metadata{nullptr}; try { @@ -201,8 +200,7 @@ auto decode_next_log_event(PyObject* Py_UNUSED(self), PyObject* args, PyObject* static_cast(keyword_decoder_buffer), static_cast(keyword_query), static_cast(keyword_allow_incomplete_stream), - nullptr - }; + nullptr}; PyDecoderBuffer* decoder_buffer{nullptr}; PyObject* query{Py_None}; diff --git a/src/clp_ffi_py/ir/native/encoding_methods.cpp b/src/clp_ffi_py/ir/native/encoding_methods.cpp index d3b816f5..c5128d90 100644 --- a/src/clp_ffi_py/ir/native/encoding_methods.cpp +++ b/src/clp_ffi_py/ir/native/encoding_methods.cpp @@ -33,8 +33,7 @@ auto encode_four_byte_preamble(PyObject* Py_UNUSED(self), PyObject* args) -> PyO std::string_view const timestamp_format{ input_timestamp_format, - static_cast(input_timestamp_format_size) - }; + static_cast(input_timestamp_format_size)}; std::string_view const timezone{input_timezone, static_cast(input_timezone_size)}; std::vector ir_buf; diff --git a/tests/test_ir/test_query.py b/tests/test_ir/test_query.py index fd8c24ba..c5f5eba6 100644 --- a/tests/test_ir/test_query.py +++ b/tests/test_ir/test_query.py @@ -20,23 +20,25 @@ def test_init(self) -> None: Test the initialization of WildcardQuery object. """ wildcard_string: str + processed_wildcard_string: str wildcard_query: WildcardQuery wildcard_string = "Are you the lord of *Pleiades*?" + processed_wildcard_string = "*" + wildcard_string + "*" wildcard_query = WildcardQuery(wildcard_string) - self._check_wildcard_query(wildcard_query, wildcard_string, False, True) + self._check_wildcard_query(wildcard_query, processed_wildcard_string, False) wildcard_query = WildcardQuery(wildcard_string, True) - self._check_wildcard_query(wildcard_query, wildcard_string, True, True) + self._check_wildcard_query(wildcard_query, processed_wildcard_string, True) wildcard_query = WildcardQuery(wildcard_string, case_sensitive=True) - self._check_wildcard_query(wildcard_query, wildcard_string, True, True) + self._check_wildcard_query(wildcard_query, processed_wildcard_string, True) wildcard_query = WildcardQuery(case_sensitive=True, wildcard_query=wildcard_string) - self._check_wildcard_query(wildcard_query, wildcard_string, True, True) + self._check_wildcard_query(wildcard_query, processed_wildcard_string, True) wildcard_query = WildcardQuery(partial_match=False, wildcard_query=wildcard_string) - self._check_wildcard_query(wildcard_query, wildcard_string, False, False) + self._check_wildcard_query(wildcard_query, wildcard_string, False) class TestCaseQuery(TestCLPBase): @@ -175,9 +177,9 @@ def test_init_wildcard_queries(self) -> None: ) wildcard_queries = [ - WildcardQuery("who is \*** pleiades??\\"), - WildcardQuery("a\?m********I?\\", case_sensitive=True), - WildcardQuery("\g\%\*\??***", partial_match=False), + WildcardQuery("who is \** pleiades??\\"), + WildcardQuery("a\?m*I?\\", case_sensitive=True), + WildcardQuery("g%\*\??*", partial_match=False), ] query = Query(wildcard_queries=wildcard_queries) self._check_query( diff --git a/tests/test_ir/test_query_builder.py b/tests/test_ir/test_query_builder.py index 55eba926..5bbf7d80 100644 --- a/tests/test_ir/test_query_builder.py +++ b/tests/test_ir/test_query_builder.py @@ -1,4 +1,4 @@ -from typing import List, Optional +from typing import List, Optional, Tuple from test_ir.test_utils import TestCLPBase @@ -112,17 +112,15 @@ def test_set_value(self) -> None: search_time_termination_margin, ) - wildcard_queries = [ - WildcardQuery("aaa*aaa"), - WildcardQuery("bbb*bbb", True), - WildcardQuery("full match", True, False), + wildcard_queries = [] + wildcard_query_tuples: List[Tuple[str, bool, bool]] = [ + ("aaa*aaa", False, True), + ("bbb*bbb", True, True), + ("full match", True, False), ] - for wildcard_query in wildcard_queries: - query_builder.add_wildcard_query( - wildcard_query.wildcard_query, - wildcard_query.case_sensitive, - wildcard_query.partial_match, - ) + for wildcard_str, case_sensitive, partial_match in wildcard_query_tuples: + query_builder.add_wildcard_query(wildcard_str, case_sensitive, partial_match) + wildcard_queries.append(WildcardQuery(wildcard_str, case_sensitive, partial_match)) extra_wildcard_queries = [WildcardQuery("ccc?ccc", True), WildcardQuery("ddd?ddd")] query_builder.add_wildcard_queries(extra_wildcard_queries) wildcard_queries.extend(extra_wildcard_queries) diff --git a/tests/test_ir/test_utils.py b/tests/test_ir/test_utils.py index c2103c8c..e4c236d8 100644 --- a/tests/test_ir/test_utils.py +++ b/tests/test_ir/test_utils.py @@ -146,7 +146,6 @@ def _check_wildcard_query( wildcard_query: WildcardQuery, ref_wildcard_string: str, ref_is_case_sensitive: bool, - ref_is_partial_match: bool, ) -> None: """ Given a WildcardQuery object, check if the stored data matches the input @@ -155,11 +154,9 @@ def _check_wildcard_query( :param wildcard_query: Input WildcardQuery object. :param ref_wildcard_string: Reference wildcard string. :param ref_is_case_sensitive: Reference case-sensitive indicator. - :param ref_is_partial_match: Reference partial-match indicator. """ wildcard_string: str = wildcard_query.wildcard_query is_case_sensitive: bool = wildcard_query.case_sensitive - is_partial_match: bool = wildcard_query.partial_match self.assertEqual( wildcard_string, ref_wildcard_string, @@ -170,11 +167,6 @@ def _check_wildcard_query( ref_is_case_sensitive, f"Expected case-sensitive indicator: {ref_is_case_sensitive}", ) - self.assertEqual( - is_partial_match, - ref_is_partial_match, - f"Expected partial-match indicator: {ref_is_partial_match}", - ) def _check_query( self, @@ -235,7 +227,6 @@ def _check_query( wildcard_queries[i], ref_wildcard_queries[i].wildcard_query, ref_wildcard_queries[i].case_sensitive, - ref_wildcard_queries[i].partial_match, )