Skip to content

Commit

Permalink
Add more explicit interface to differentiate substring and full-strin…
Browse files Browse the repository at this point in the history
…g wildcard queries. (#62)
  • Loading branch information
LinZhihao-723 authored Jun 11, 2024
1 parent 37b2ae9 commit dba3db2
Show file tree
Hide file tree
Showing 9 changed files with 240 additions and 32 deletions.
11 changes: 8 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -141,13 +141,16 @@ from pathlib import Path
from typing import List, Tuple

from clp_ffi_py.ir import ClpIrFileReader, Query, QueryBuilder
from clp_ffi_py.wildcard_query import FullStringWildcardQuery, SubstringWildcardQuery

# Create a QueryBuilder object to build the search query.
query_builder: QueryBuilder = QueryBuilder()

# Add wildcard patterns to filter log messages:
query_builder.add_wildcard_query("*uid=*,status=failed*")
query_builder.add_wildcard_query("*UID=*,Status=KILLED*", case_sensitive=True)
query_builder.add_wildcard_query(SubstringWildcardQuery("uid=*,status=failed"))
query_builder.add_wildcard_query(
FullStringWildcardQuery("*UID=*,Status=KILLED*", case_sensitive=True)
)

# Initialize a Query object using the builder:
wildcard_search_query: Query = query_builder.build()
Expand All @@ -169,10 +172,12 @@ details, use the following code to access the related docstring.

```python
from clp_ffi_py.ir import Query, QueryBuilder
from clp_ffi_py import WildcardQuery
from clp_ffi_py import FullStringWildcardQuery, SubstringWildcardQuery, WildcardQuery
help(Query)
help(QueryBuilder)
help(WildcardQuery)
help(FullStringWildcardQuery)
help(SubstringWildcardQuery)
```

### Streaming Decode/Search Directly from S3 Remote Storage
Expand Down
49 changes: 45 additions & 4 deletions clp_ffi_py/ir/query_builder.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,17 @@
from __future__ import annotations

import warnings
from copy import deepcopy
from typing import List, Optional
from typing import List, Optional, overload, Union

from deprecated.sphinx import deprecated

from clp_ffi_py.ir.native import Query
from clp_ffi_py.wildcard_query import WildcardQuery
from clp_ffi_py.wildcard_query import FullStringWildcardQuery, WildcardQuery

_add_wildcard_query_deprecation_warning_message: str = "The wildcard query must be explicitly "
"created and passed as a parameter to this function. QueryBuilder should only accept instances of "
"`clp_ffi_py.wildcard_query.WildcardQuery`."


class QueryBuilderException(Exception):
Expand Down Expand Up @@ -78,6 +85,11 @@ def set_search_time_termination_margin(self, ts: int) -> QueryBuilder:
self._search_time_termination_margin = ts
return self

@overload
@deprecated(
version="0.0.12",
reason=_add_wildcard_query_deprecation_warning_message,
)
def add_wildcard_query(self, wildcard_query: str, case_sensitive: bool = False) -> QueryBuilder:
"""
Constructs and adds a :class:`~clp_ffi_py.wildcard_query.WildcardQuery`
Expand All @@ -87,8 +99,37 @@ def add_wildcard_query(self, wildcard_query: str, case_sensitive: bool = False)
:param case_sensitive: Whether to perform case-sensitive matching.
:return: self.
"""
self._wildcard_queries.append(WildcardQuery(wildcard_query, case_sensitive))
return self
...

@overload
def add_wildcard_query(self, wildcard_query: WildcardQuery) -> QueryBuilder:
"""
Adds the given wildcard query to the wildcard query list.
:param wildcard_query: The wildcard query to add. It can be any derived
class of :class:`~clp_ffi_py.wildcard_query.WildcardQuery`.
:return: self.
"""
...

def add_wildcard_query(
self, wildcard_query: Union[str, WildcardQuery], case_sensitive: bool = False
) -> QueryBuilder:
"""
This method is the implementation of `add_wildcard_query`.
"""
if isinstance(wildcard_query, WildcardQuery):
self._wildcard_queries.append(wildcard_query)
return self
elif isinstance(wildcard_query, str):
warnings.warn(
_add_wildcard_query_deprecation_warning_message,
DeprecationWarning,
)
self._wildcard_queries.append(FullStringWildcardQuery(wildcard_query, case_sensitive))
return self

raise NotImplementedError

def add_wildcard_queries(self, wildcard_queries: List[WildcardQuery]) -> QueryBuilder:
"""
Expand Down
57 changes: 54 additions & 3 deletions clp_ffi_py/wildcard_query.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
import warnings

from deprecated.sphinx import deprecated


class WildcardQuery:
"""
This class defines a wildcard query, which includes a wildcard string and a
boolean value to indicate if the match is case-sensitive.
This class defines an abstract wildcard query. It includes a wildcard string
and a boolean value to indicate if the match is case-sensitive.
A wildcard string may contain the following types of supported wildcards:
Expand All @@ -12,6 +17,13 @@ class WildcardQuery:
Other characters which are escaped are treated as normal characters.
"""

@deprecated(
version="0.0.12",
reason="`clp_ffi_py.wildcard_query.WildcardQuery` is supposed to be an abstract class and"
" should not be used directly. To create a wildcard query, please explicit instantiate"
" `clp_ffi_py.wildcard_query.SubstringWildcardQuery` or"
" `clp_ffi_py.wildcard_query.FullStringWildcardQuery`.",
)
def __init__(self, wildcard_query: str, case_sensitive: bool = False):
"""
Initializes a wildcard query using the given parameters.
Expand All @@ -27,7 +39,7 @@ def __str__(self) -> str:
:return: The string representation of the WildcardQuery object.
"""
return (
f'WildcardQuery(wildcard_query="{self._wildcard_query}",'
f'{self.__class__.__name__}(wildcard_query="{self._wildcard_query}",'
f" case_sensitive={self._case_sensitive})"
)

Expand All @@ -44,3 +56,42 @@ def wildcard_query(self) -> str:
@property
def case_sensitive(self) -> bool:
return self._case_sensitive


class SubstringWildcardQuery(WildcardQuery):
"""
This class defines a substring wildcard query.
It is derived from
:class:`~clp_ffi_py.WildcardQuery`, adding both a prefix and a postfix
wildcard ("*") to the input wildcard string. This allows the query to match
any substring within a log message.
"""

def __init__(self, substring_wildcard_query: str, case_sensitive: bool = False):
"""
Initializes a substring wildcard query using the given parameters.
:param substring_wildcard_query: Wildcard query string.
:param case_sensitive: Case sensitive indicator.
"""
substring_wildcard_query = "*" + substring_wildcard_query + "*"
with warnings.catch_warnings():
warnings.simplefilter("ignore", DeprecationWarning)
super().__init__(substring_wildcard_query, case_sensitive)


class FullStringWildcardQuery(WildcardQuery):
"""
This class defines a full string wildcard query.
It is derived from
:class:`~clp_ffi_py.WildcardQuery`, and uses the input wildcard string
directly to create the query. This ensures that the query matches only the
entire log message.
"""

def __init__(self, full_string_wildcard_query: str, case_sensitive: bool = False):
with warnings.catch_warnings():
warnings.simplefilter("ignore", DeprecationWarning)
super().__init__(full_string_wildcard_query, case_sensitive)
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ description = "Python interface to the CLP Core Features through CLP's FFI"
readme = "README.md"
requires-python = ">=3.7"
dependencies = [
"Deprecated >= 1.2.14",
"python-dateutil >= 2.7.0",
"typing-extensions >= 4.1.1",
"zstandard >= 0.18.0",
Expand Down
1 change: 1 addition & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,6 @@ mypy-extensions>=1.0.0
packaging>=21.3
ruff>=0.4.6
smart_open==6.4.0
types-Deprecated>=1.2.9
types-python-dateutil>=2.8
zstandard>=0.18.0
16 changes: 14 additions & 2 deletions src/clp_ffi_py/ir/native/PyQuery.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ auto serialize_wildcard_queries(std::vector<WildcardQuery> const& wildcard_queri
PyObjectPtr<PyObject> const is_case_sensitive{get_py_bool(wildcard_query.is_case_sensitive()
)};
PyObject* py_wildcard_query{PyObject_CallFunction(
PyQuery::get_py_wildcard_query_type(),
PyQuery::get_py_full_string_wildcard_query_type(),
"OO",
wildcard_py_str,
is_case_sensitive.get()
Expand Down Expand Up @@ -644,6 +644,7 @@ auto PyQuery::init(

PyObjectStaticPtr<PyTypeObject> PyQuery::m_py_type{nullptr};
PyObjectStaticPtr<PyObject> PyQuery::m_py_wildcard_query_type{nullptr};
PyObjectStaticPtr<PyObject> PyQuery::m_py_full_string_wildcard_query_type{nullptr};

auto PyQuery::get_py_type() -> PyTypeObject* {
return m_py_type.get();
Expand All @@ -653,6 +654,10 @@ auto PyQuery::get_py_wildcard_query_type() -> PyObject* {
return m_py_wildcard_query_type.get();
}

auto PyQuery::get_py_full_string_wildcard_query_type() -> PyObject* {
return m_py_full_string_wildcard_query_type.get();
}

auto PyQuery::module_level_init(PyObject* py_module) -> bool {
static_assert(std::is_trivially_destructible<PyQuery>());
auto* type{py_reinterpret_cast<PyTypeObject>(PyType_FromSpec(&PyQuery_type_spec))};
Expand All @@ -669,11 +674,18 @@ auto PyQuery::module_level_init(PyObject* py_module) -> bool {
if (nullptr == py_query) {
return false;
}
auto* py_wildcard_query_type = PyObject_GetAttrString(py_query, "WildcardQuery");
auto* py_wildcard_query_type{PyObject_GetAttrString(py_query, "WildcardQuery")};
if (nullptr == py_wildcard_query_type) {
return false;
}
m_py_wildcard_query_type.reset(py_wildcard_query_type);
auto* py_full_string_wildcard_query_type{
PyObject_GetAttrString(py_query, "FullStringWildcardQuery")
};
if (nullptr == py_full_string_wildcard_query_type) {
return false;
}
m_py_full_string_wildcard_query_type.reset(py_full_string_wildcard_query_type);
return true;
}
} // namespace clp_ffi_py::ir::native
7 changes: 7 additions & 0 deletions src/clp_ffi_py/ir/native/PyQuery.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,12 +76,19 @@ class PyQuery {
*/
[[nodiscard]] static auto get_py_wildcard_query_type() -> PyObject*;

/**
* @return PyObject that represents the Python level class
* `FullStringWildcardQuery`.
*/
[[nodiscard]] static auto get_py_full_string_wildcard_query_type() -> PyObject*;

private:
PyObject_HEAD;
Query* m_query;

static PyObjectStaticPtr<PyTypeObject> m_py_type;
static PyObjectStaticPtr<PyObject> m_py_wildcard_query_type;
static PyObjectStaticPtr<PyObject> m_py_full_string_wildcard_query_type;
};
} // namespace clp_ffi_py::ir::native
#endif
39 changes: 30 additions & 9 deletions tests/test_ir/test_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
LogEvent,
Query,
)
from clp_ffi_py.wildcard_query import WildcardQuery
from clp_ffi_py.wildcard_query import FullStringWildcardQuery, SubstringWildcardQuery, WildcardQuery


class TestCaseWildcardQuery(TestCLPBase):
Expand All @@ -20,20 +20,41 @@ def test_init(self) -> None:
Test the initialization of WildcardQuery object.
"""
wildcard_string: str
expected_wildcard_string: str
wildcard_query: WildcardQuery

wildcard_string = "Are you the lord of *Pleiades*?"
wildcard_query = WildcardQuery(wildcard_string)
self._check_wildcard_query(wildcard_query, wildcard_string, False)
expected_wildcard_string = wildcard_string

wildcard_query = WildcardQuery(wildcard_string, True)
self._check_wildcard_query(wildcard_query, wildcard_string, True)
wildcard_query = FullStringWildcardQuery(wildcard_string)
self._check_wildcard_query(wildcard_query, expected_wildcard_string, False)

wildcard_query = WildcardQuery(wildcard_string, case_sensitive=True)
self._check_wildcard_query(wildcard_query, wildcard_string, True)
wildcard_query = FullStringWildcardQuery(wildcard_string, True)
self._check_wildcard_query(wildcard_query, expected_wildcard_string, True)

wildcard_query = WildcardQuery(case_sensitive=True, wildcard_query=wildcard_string)
self._check_wildcard_query(wildcard_query, wildcard_string, True)
wildcard_query = FullStringWildcardQuery(wildcard_string, case_sensitive=True)
self._check_wildcard_query(wildcard_query, expected_wildcard_string, True)

wildcard_query = FullStringWildcardQuery(
case_sensitive=True, full_string_wildcard_query=wildcard_string
)
self._check_wildcard_query(wildcard_query, expected_wildcard_string, True)

expected_wildcard_string = "*" + wildcard_string + "*"

wildcard_query = SubstringWildcardQuery(wildcard_string)
self._check_wildcard_query(wildcard_query, expected_wildcard_string, False)

wildcard_query = SubstringWildcardQuery(wildcard_string, True)
self._check_wildcard_query(wildcard_query, expected_wildcard_string, True)

wildcard_query = SubstringWildcardQuery(wildcard_string, case_sensitive=True)
self._check_wildcard_query(wildcard_query, expected_wildcard_string, True)

wildcard_query = SubstringWildcardQuery(
case_sensitive=True, substring_wildcard_query=wildcard_string
)
self._check_wildcard_query(wildcard_query, expected_wildcard_string, True)


class TestCaseQuery(TestCLPBase):
Expand Down
Loading

0 comments on commit dba3db2

Please sign in to comment.