Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for partial matching in the wildcard query #27

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,8 @@ from clp_ffi_py.ir import ClpIrFileReader, Query, QueryBuilder
query_builder: QueryBuilder = QueryBuilder()

# Add wildcard patterns to filter log messages:
query_builder.add_wildcard_query("*uid=*,status=failed*")
query_builder.add_wildcard_query("*UID=*,Status=KILLED*", case_sensitive=True)
query_builder.add_wildcard_query("uid=*,status=failed")
query_builder.add_wildcard_query("UID=*,Status=KILLED", case_sensitive=True)

# Initialize a Query object using the builder:
wildcard_search_query: Query = query_builder.build()
Expand Down
7 changes: 5 additions & 2 deletions clp_ffi_py/ir/query_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,16 +78,19 @@ def set_search_time_termination_margin(self, ts: int) -> QueryBuilder:
self._search_time_termination_margin = ts
return self

def add_wildcard_query(self, wildcard_query: str, case_sensitive: bool = False) -> QueryBuilder:
def add_wildcard_query(
self, wildcard_query: str, case_sensitive: bool = False, partial_match: bool = True
) -> QueryBuilder:
"""
Constructs and adds a :class:`~clp_ffi_py.wildcard_query.WildcardQuery`
to the wildcard query list.

:param wildcard_query: The wildcard query string to add.
:param case_sensitive: Whether to perform case-sensitive matching.
:param partial_match: Whether to perform partial matching.
:return: self.
"""
self._wildcard_queries.append(WildcardQuery(wildcard_query, case_sensitive))
self._wildcard_queries.append(WildcardQuery(wildcard_query, case_sensitive, partial_match))
return self

def add_wildcard_queries(self, wildcard_queries: List[WildcardQuery]) -> QueryBuilder:
Expand Down
26 changes: 22 additions & 4 deletions clp_ffi_py/wildcard_query.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
class WildcardQuery:
"""
This class defines a wildcard query, which includes a wildcard string and a
boolean value to indicate if the match is case-sensitive.
This class defines a wildcard query, which includes a wildcard string a
boolean value to indicate if the match is case-sensitive, and a boolean
value to indicate if the match is a partial match.

A wildcard string may contain the following types of supported wildcards:

Expand All @@ -10,25 +11,38 @@ class WildcardQuery:

Each wildcard can be escaped using a preceding '\\\\' (a single backslash).
Other characters which are escaped are treated as normal characters.

By default, the wildcard query is set to be a partial match. This means any
log message that contains the given wildcard string will be a match. If the
partial match is set to false, the wildcard query matches only if the
wildcard string matches the entire log message.

A partial match wildcard query `"${WILDCARD_STRING}"` is equivalent to the
full match wildcard query `*${WILDCARD_STRING}*`.
"""

def __init__(self, wildcard_query: str, case_sensitive: bool = False):
def __init__(
self, wildcard_query: str, case_sensitive: bool = False, partial_match: bool = True
):
"""
Initializes a wildcard query using the given parameters.

:param wildcard_query: Wildcard query string.
:param case_sensitive: Case sensitive indicator.
:param partial_match: Partial match indicator.
"""
self._wildcard_query: str = wildcard_query
self._case_sensitive: bool = case_sensitive
self._partial_match: bool = partial_match

def __str__(self) -> str:
"""
:return: The string representation of the WildcardQuery object.
"""
return (
f'WildcardQuery(wildcard_query="{self._wildcard_query}",'
f" case_sensitive={self._case_sensitive})"
f" case_sensitive={self._case_sensitive}),"
f" partial_match={self._partial_match}"
)

def __repr__(self) -> str:
Expand All @@ -44,3 +58,7 @@ def wildcard_query(self) -> str:
@property
def case_sensitive(self) -> bool:
return self._case_sensitive

@property
def partial_match(self) -> bool:
return self._partial_match
27 changes: 20 additions & 7 deletions src/clp_ffi_py/ir/native/PyQuery.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@

#include "PyQuery.hpp"

#include <clp/components/core/src/string_utils.hpp>

#include <clp_ffi_py/error_messages.hpp>
#include <clp_ffi_py/ir/native/LogEvent.hpp>
#include <clp_ffi_py/ir/native/PyLogEvent.hpp>
Expand Down Expand Up @@ -50,6 +48,7 @@ auto deserialize_wildcard_queries(
PyErr_SetString(PyExc_TypeError, clp_ffi_py::cPyTypeError);
return false;
}

auto* wildcard_query_py_str{PyObject_GetAttrString(wildcard_query, "wildcard_query")};
if (nullptr == wildcard_query_py_str) {
return false;
Expand All @@ -58,6 +57,11 @@ auto deserialize_wildcard_queries(
if (nullptr == case_sensitive_py_bool) {
return false;
}
auto* partial_match_py_bool{PyObject_GetAttrString(wildcard_query, "partial_match")};
if (nullptr == partial_match_py_bool) {
return false;
}

std::string_view wildcard_query_view;
if (false == parse_py_string_as_string_view(wildcard_query_py_str, wildcard_query_view)) {
return false;
Expand All @@ -66,9 +70,15 @@ auto deserialize_wildcard_queries(
if (-1 == is_case_sensitive && nullptr != PyErr_Occurred()) {
return false;
}
int const is_partial_match{PyObject_IsTrue(partial_match_py_bool)};
if (-1 == is_partial_match && nullptr != PyErr_Occurred()) {
return false;
}

wildcard_queries.emplace_back(
clean_up_wildcard_search_string(wildcard_query_view),
static_cast<bool>(is_case_sensitive)
std::string{wildcard_query_view},
static_cast<bool>(is_case_sensitive),
static_cast<bool>(is_partial_match)
);
}
return true;
Expand Down Expand Up @@ -99,7 +109,7 @@ auto serialize_wildcard_queries(std::vector<WildcardQuery> const& wildcard_queri
Py_ssize_t idx{0};
for (auto const& wildcard_query : wildcard_queries) {
PyObjectPtr<PyObject> const wildcard_py_str_ptr{
PyUnicode_FromString(wildcard_query.get_wildcard_query().c_str())
PyUnicode_FromString(wildcard_query.get_uncleaned_wildcard_query().c_str())
};
auto* wildcard_py_str{wildcard_py_str_ptr.get()};
if (nullptr == wildcard_py_str) {
Expand All @@ -108,11 +118,14 @@ auto serialize_wildcard_queries(std::vector<WildcardQuery> const& wildcard_queri
}
PyObjectPtr<PyObject> const is_case_sensitive{get_py_bool(wildcard_query.is_case_sensitive()
)};
PyObjectPtr<PyObject> const is_partial_match{get_py_bool(wildcard_query.is_partial_match())
};
PyObject* py_wildcard_query{PyObject_CallFunction(
PyQuery::get_py_wildcard_query_type(),
"OO",
"OOO",
wildcard_py_str,
is_case_sensitive.get()
is_case_sensitive.get(),
is_partial_match.get()
)};
if (nullptr == py_wildcard_query) {
Py_DECREF(py_wildcard_queries);
Expand Down
33 changes: 27 additions & 6 deletions src/clp_ffi_py/ir/native/Query.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,33 +8,54 @@

#include <clp/components/core/src/ErrorCode.hpp>
#include <clp/components/core/src/ffi/encoding_methods.hpp>
#include <clp/components/core/src/string_utils.hpp>

#include <clp_ffi_py/ExceptionFFI.hpp>
#include <clp_ffi_py/ir/native/LogEvent.hpp>

namespace clp_ffi_py::ir::native {
/**
* This class defines a wildcard query, which includes a wildcard string and a
* boolean value to indicate if the match is case-sensitive.
* This class defines a wildcard query, which includes a wildcard string a
* boolean value to indicate if the match is case-sensitive, and a boolean value
* to indicate if the query is a partial match.
*/
class WildcardQuery {
public:
/**
* Initializes the wildcard query.
* Initializes the wildcard query by cleaning the wildcard string.
* @param wildcard_query Wildcard query.
* @param case_sensitive Case sensitive indicator.
* @param partial_match Partial match indicator.
*/
WildcardQuery(std::string wildcard_query, bool case_sensitive)
: m_wildcard_query(std::move(wildcard_query)),
m_case_sensitive(case_sensitive){};
WildcardQuery(std::string wildcard_query, bool case_sensitive, bool partial_match)
: m_uncleaned_wildcard_query(std::move(wildcard_query)),
m_case_sensitive(case_sensitive),
m_partial_match(partial_match) {
if (partial_match) {
m_wildcard_query = "*";
m_wildcard_query += m_uncleaned_wildcard_query;
m_wildcard_query += "*";
m_wildcard_query = clean_up_wildcard_search_string(m_wildcard_query);
} else {
m_wildcard_query = clean_up_wildcard_search_string(m_uncleaned_wildcard_query);
}
}

[[nodiscard]] auto get_uncleaned_wildcard_query() const -> std::string const& {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Lets rename get_uncleaned_wildcard_query. Maybe something like get_original_query_string or get_user_query_string. I don't think we need to explicitly call it a wildcard query considering this is a method of the WildcardQuery class (so pretty redundant). I'm a bit indifferent about adding _string as a suffix, but it would probably be necessary if there is ever a Query class in the future.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree. I will keep the suffix _string to differentiate it from the actual query and the input.

return m_uncleaned_wildcard_query;
}

[[nodiscard]] auto get_wildcard_query() const -> std::string const& { return m_wildcard_query; }

[[nodiscard]] auto is_case_sensitive() const -> bool { return m_case_sensitive; }

[[nodiscard]] auto is_partial_match() const -> bool { return m_partial_match; }

private:
std::string m_uncleaned_wildcard_query;
std::string m_wildcard_query;
bool m_case_sensitive;
bool m_partial_match;
};

/**
Expand Down
61 changes: 42 additions & 19 deletions tests/test_ir/test_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,19 @@ def test_init(self) -> None:

wildcard_string = "Are you the lord of *Pleiades*?"
wildcard_query = WildcardQuery(wildcard_string)
self._check_wildcard_query(wildcard_query, wildcard_string, False)
self._check_wildcard_query(wildcard_query, wildcard_string, False, True)

wildcard_query = WildcardQuery(wildcard_string, True)
self._check_wildcard_query(wildcard_query, wildcard_string, True)
self._check_wildcard_query(wildcard_query, wildcard_string, True, True)

wildcard_query = WildcardQuery(wildcard_string, case_sensitive=True)
self._check_wildcard_query(wildcard_query, wildcard_string, True)
self._check_wildcard_query(wildcard_query, wildcard_string, True, True)

wildcard_query = WildcardQuery(case_sensitive=True, wildcard_query=wildcard_string)
self._check_wildcard_query(wildcard_query, wildcard_string, True)
self._check_wildcard_query(wildcard_query, wildcard_string, True, True)

wildcard_query = WildcardQuery(partial_match=False, wildcard_query=wildcard_string)
self._check_wildcard_query(wildcard_query, wildcard_string, False, False)


class TestCaseQuery(TestCLPBase):
Expand Down Expand Up @@ -173,20 +176,15 @@ def test_init_wildcard_queries(self) -> None:

wildcard_queries = [
WildcardQuery("who is \*** pleiades??\\"),
WildcardQuery("a\?m********I?\\"),
WildcardQuery("\g\%\*\??***"),
]
ref_wildcard_queries = [
WildcardQuery("who is \** pleiades??"),
WildcardQuery("a\?m*I?"),
WildcardQuery("g%\*\??*"),
WildcardQuery("a\?m********I?\\", case_sensitive=True),
WildcardQuery("\g\%\*\??***", partial_match=False),
]
query = Query(wildcard_queries=wildcard_queries)
self._check_query(
query,
Query.default_search_time_lower_bound(),
Query.default_search_time_upper_bound(),
ref_wildcard_queries,
wildcard_queries,
0,
)

Expand All @@ -203,7 +201,7 @@ def test_init_wildcard_queries(self) -> None:
query,
search_time_lower_bound,
search_time_upper_bound,
ref_wildcard_queries,
wildcard_queries,
search_time_termination_margin,
)

Expand Down Expand Up @@ -287,15 +285,15 @@ def test_log_event_match(self) -> None:
"Only log events whose message matches the wildcard query should match the query."
)
log_event = LogEvent("fhakjhLFISHfashfShfiuSLSZkfSUSFS", 0)
wildcard_query_string = "*JHlfish*SH?IU*s"
wildcard_query_string = "JHlfish*SH?IU*s"
query = Query(wildcard_queries=[WildcardQuery(wildcard_query_string)])
self.assertEqual(query.match_log_event(log_event), True, description)
self.assertEqual(log_event.match_query(query), True, description)
query = Query(wildcard_queries=[WildcardQuery(wildcard_query_string, True)])
self.assertEqual(query.match_log_event(log_event), False, description)
self.assertEqual(log_event.match_query(query), False, description)
log_event = LogEvent("j:flJo;jsf:LSJDFoiASFoasjzFZA", 0)
wildcard_query_string = "*flJo*s?*AS*A"
wildcard_query_string = "flJo*s?*AS"
query = Query(wildcard_queries=[WildcardQuery(wildcard_query_string)])
self.assertEqual(query.match_log_event(log_event), True, description)
self.assertEqual(log_event.match_query(query), True, description)
Expand All @@ -307,12 +305,12 @@ def test_log_event_match(self) -> None:
"Log event whose messages matches any one of the wildcard queries should be considered"
" as a match of the query."
)
wildcard_queries: List[WildcardQuery] = [WildcardQuery("*b&A*"), WildcardQuery("*A|a*")]
wildcard_queries: List[WildcardQuery] = [WildcardQuery("b&A"), WildcardQuery("A|a")]
log_event = LogEvent("-----a-A-----", 0)
query = Query(wildcard_queries=wildcard_queries)
self.assertEqual(query.match_log_event(log_event), False, description)
self.assertEqual(log_event.match_query(query), False, description)
wildcard_queries.append(WildcardQuery("*a?a*"))
wildcard_queries.append(WildcardQuery("a?a"))
query = Query(wildcard_queries=wildcard_queries)
self.assertEqual(query.match_log_event(log_event), True, description)
self.assertEqual(log_event.match_query(query), True, description)
Expand All @@ -323,12 +321,37 @@ def test_log_event_match(self) -> None:

description = (
"The match of query requires both timestamp in range and log message matching any one"
" of the wildcard queries."
" of the wildcard queries. (Partial Match)"
)
query = Query(
search_time_lower_bound=3190,
search_time_upper_bound=3270,
wildcard_queries=[WildcardQuery("q?Q"), WildcardQuery("t?t", True)],
)
log_event = LogEvent("I'm not matching anything...", 3213)
self.assertEqual(query.match_log_event(log_event), False, description)
self.assertEqual(log_event.match_query(query), False, description)
log_event = LogEvent("I'm not matching anything... T.T", 3213)
self.assertEqual(query.match_log_event(log_event), False, description)
self.assertEqual(log_event.match_query(query), False, description)
log_event = LogEvent("I'm not matching anything... QAQ", 2887)
self.assertEqual(query.match_log_event(log_event), False, description)
self.assertEqual(log_event.match_query(query), False, description)
log_event = LogEvent("I'm finally matching something... QAQ", 3213)
self.assertEqual(query.match_log_event(log_event), True, description)
self.assertEqual(log_event.match_query(query), True, description)

description = (
"The match of query requires both timestamp in range and log message matching any one"
" of the wildcard queries. (Full Match)"
)
query = Query(
search_time_lower_bound=3190,
search_time_upper_bound=3270,
wildcard_queries=[WildcardQuery("*q?Q*"), WildcardQuery("*t?t*", True)],
wildcard_queries=[
WildcardQuery("*q?Q*", partial_match=False),
WildcardQuery("*t?t*", case_sensitive=True, partial_match=False),
],
)
log_event = LogEvent("I'm not matching anything...", 3213)
self.assertEqual(query.match_log_event(log_event), False, description)
Expand Down
10 changes: 8 additions & 2 deletions tests/test_ir/test_query_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,10 +112,16 @@ def test_set_value(self) -> None:
search_time_termination_margin,
)

wildcard_queries = [WildcardQuery("aaa*aaa"), WildcardQuery("bbb*bbb", True)]
wildcard_queries = [
WildcardQuery("aaa*aaa"),
WildcardQuery("bbb*bbb", True),
WildcardQuery("full match", True, False),
]
for wildcard_query in wildcard_queries:
query_builder.add_wildcard_query(
wildcard_query.wildcard_query, wildcard_query.case_sensitive
wildcard_query.wildcard_query,
wildcard_query.case_sensitive,
wildcard_query.partial_match,
)
extra_wildcard_queries = [WildcardQuery("ccc?ccc", True), WildcardQuery("ddd?ddd")]
query_builder.add_wildcard_queries(extra_wildcard_queries)
Expand Down
Loading