Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add more explicit interface to differentiate substring and full-string wildcard queries. #62

Merged
merged 7 commits into from
Jun 11, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -141,13 +141,16 @@ from pathlib import Path
from typing import List, Tuple

from clp_ffi_py.ir import ClpIrFileReader, Query, QueryBuilder
from clp_ffi_py.wildcard_query import FullStringWildcardQuery, SubstringWildcardQuery

# Create a QueryBuilder object to build the search query.
query_builder: QueryBuilder = QueryBuilder()

# Add wildcard patterns to filter log messages:
query_builder.add_wildcard_query("*uid=*,status=failed*")
query_builder.add_wildcard_query("*UID=*,Status=KILLED*", case_sensitive=True)
query_builder.add_wildcard_query(SubstringWildcardQuery("uid=*,status=failed"))
query_builder.add_wildcard_query(
FullStringWildcardQuery("*UID=*,Status=KILLED*", case_sensitive=True)
)

# Initialize a Query object using the builder:
wildcard_search_query: Query = query_builder.build()
Expand All @@ -169,10 +172,12 @@ details, use the following code to access the related docstring.

```python
from clp_ffi_py.ir import Query, QueryBuilder
from clp_ffi_py import WildcardQuery
from clp_ffi_py import FullStringWildcardQuery, SubstringWildcardQuery, WildcardQuery
help(Query)
help(QueryBuilder)
help(WildcardQuery)
help(FullStringWildcardQuery)
help(SubstringWildcardQuery)
```

### Streaming Decode/Search Directly from S3 Remote Storage
Expand Down
73 changes: 69 additions & 4 deletions clp_ffi_py/ir/query_builder.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,17 @@
from __future__ import annotations

import warnings
from copy import deepcopy
from typing import List, Optional
from typing import Any, Dict, List, no_type_check, Optional, overload, Tuple, Union

from deprecated.sphinx import deprecated

from clp_ffi_py.ir.native import Query
from clp_ffi_py.wildcard_query import WildcardQuery
from clp_ffi_py.wildcard_query import FullStringWildcardQuery, WildcardQuery

_add_wildcard_query_deprecation_warning_message: str = "The wildcard query must be explicitly "
"created and passed as a parameter to this function. QueryBuilder should only accept instances of "
"`clp_ffi_py.wildcard_query.WildcardQuery`."


class QueryBuilderException(Exception):
Expand Down Expand Up @@ -78,6 +85,11 @@ def set_search_time_termination_margin(self, ts: int) -> QueryBuilder:
self._search_time_termination_margin = ts
return self

@overload
@deprecated(
version="0.0.12",
reason=_add_wildcard_query_deprecation_warning_message,
)
def add_wildcard_query(self, wildcard_query: str, case_sensitive: bool = False) -> QueryBuilder:
"""
Constructs and adds a :class:`~clp_ffi_py.wildcard_query.WildcardQuery`
Expand All @@ -87,8 +99,61 @@ def add_wildcard_query(self, wildcard_query: str, case_sensitive: bool = False)
:param case_sensitive: Whether to perform case-sensitive matching.
:return: self.
"""
self._wildcard_queries.append(WildcardQuery(wildcard_query, case_sensitive))
return self
...

@overload
def add_wildcard_query(self, wildcard_query: WildcardQuery) -> QueryBuilder:
"""
Adds the given wildcard query to the wildcard query list.

:param wildcard_query: The wildcard query to add. It can be any derived
class of :class:`~clp_ffi_py.wildcard_query.WildcardQuery`.
:return: self.
"""
...

@no_type_check
def add_wildcard_query(self, *args: Tuple[Any, ...], **kwargs: Dict[str, Any]) -> QueryBuilder:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As discussed lets drop no_type_check and lets try a different approach.

"""
This method is the implementation of `add_wildcard_query`.

Type check is disabled since it executes runtime checks to ensure
passed-in arguments match the defined signatures.
"""
num_param: int = len(args) + len(kwargs)
if 1 == num_param:
wildcard_query: Union[WildcardQuery, str] = (
args[0] if 1 == len(args) else kwargs["wildcard_query"]
)
if isinstance(wildcard_query, WildcardQuery):
self._wildcard_queries.append(wildcard_query)
elif isinstance(wildcard_query, str):
warnings.warn(
_add_wildcard_query_deprecation_warning_message,
DeprecationWarning,
)
self._wildcard_queries.append(FullStringWildcardQuery(wildcard_query, False))
else:
raise TypeError
return self
if 2 == num_param:
wildcard_query: str
case_sensitive: bool
if 2 == len(args):
wildcard_query = args[0]
case_sensitive = args[1]
else:
wildcard_query = args[0] if 1 == len(args) else kwargs["wildcard_query"]
case_sensitive = kwargs["case_sensitive"]
if not (isinstance(wildcard_query, str) and isinstance(case_sensitive, bool)):
raise TypeError
warnings.warn(
_add_wildcard_query_deprecation_warning_message,
DeprecationWarning,
)
self._wildcard_queries.append(FullStringWildcardQuery(wildcard_query, case_sensitive))
return self
raise NotImplementedError

def add_wildcard_queries(self, wildcard_queries: List[WildcardQuery]) -> QueryBuilder:
"""
Expand Down
57 changes: 54 additions & 3 deletions clp_ffi_py/wildcard_query.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
import warnings

from deprecated.sphinx import deprecated


class WildcardQuery:
"""
This class defines a wildcard query, which includes a wildcard string and a
boolean value to indicate if the match is case-sensitive.
This class defines an abstract wildcard query. It includes a wildcard string
and a boolean value to indicate if the match is case-sensitive.

A wildcard string may contain the following types of supported wildcards:

Expand All @@ -12,6 +17,13 @@ class WildcardQuery:
Other characters which are escaped are treated as normal characters.
"""

@deprecated(
version="0.0.12",
reason="`clp_ffi_py.wildcard_query.WildcardQuery` is supposed to be an abstract class and"
" should not be used directly. To create a wildcard query, please explicit instantiate"
" `clp_ffi_py.wildcard_query.SubstringWildcardQuery` or"
" `clp_ffi_py.wildcard_query.FullStringWildcardQuery`.",
)
def __init__(self, wildcard_query: str, case_sensitive: bool = False):
"""
Initializes a wildcard query using the given parameters.
Expand All @@ -27,7 +39,7 @@ def __str__(self) -> str:
:return: The string representation of the WildcardQuery object.
"""
return (
f'WildcardQuery(wildcard_query="{self._wildcard_query}",'
f'{self.__class__.__name__}(wildcard_query="{self._wildcard_query}",'
f" case_sensitive={self._case_sensitive})"
)

Expand All @@ -44,3 +56,42 @@ def wildcard_query(self) -> str:
@property
def case_sensitive(self) -> bool:
return self._case_sensitive


class SubstringWildcardQuery(WildcardQuery):
"""
This class defines a substring wildcard query.

It is derived from
:class:`~clp_ffi_py.WildcardQuery`, adding both a prefix and a postfix
wildcard ("*") to the input wildcard string. This allows the query to match
any substring within a log message.
"""

def __init__(self, substring_wildcard_query: str, case_sensitive: bool = False):
"""
Initializes a substring wildcard query using the given parameters.

:param substring_wildcard_query: Wildcard query string.
:param case_sensitive: Case sensitive indicator.
"""
substring_wildcard_query = "*" + substring_wildcard_query + "*"
with warnings.catch_warnings():
warnings.simplefilter("ignore", DeprecationWarning)
super().__init__(substring_wildcard_query, case_sensitive)


class FullStringWildcardQuery(WildcardQuery):
"""
This class defines a full string wildcard query.

It is derived from
:class:`~clp_ffi_py.WildcardQuery`, and uses the input wildcard string
directly to create the query. This ensures that the query matches only the
entire log message.
"""

def __init__(self, full_string_wildcard_query: str, case_sensitive: bool = False):
with warnings.catch_warnings():
warnings.simplefilter("ignore", DeprecationWarning)
super().__init__(full_string_wildcard_query, case_sensitive)
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ description = "Python interface to the CLP Core Features through CLP's FFI"
readme = "README.md"
requires-python = ">=3.7"
dependencies = [
"Deprecated >= 1.2.14",
"python-dateutil >= 2.7.0",
"typing-extensions >= 4.1.1",
"zstandard >= 0.18.0",
Expand Down
1 change: 1 addition & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,6 @@ mypy-extensions>=1.0.0
packaging>=21.3
ruff>=0.1.6
smart_open==6.4.0
types-Deprecated>=1.2.9
types-python-dateutil>=2.8
zstandard>=0.18.0
16 changes: 14 additions & 2 deletions src/clp_ffi_py/ir/native/PyQuery.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ auto serialize_wildcard_queries(std::vector<WildcardQuery> const& wildcard_queri
PyObjectPtr<PyObject> const is_case_sensitive{get_py_bool(wildcard_query.is_case_sensitive()
)};
PyObject* py_wildcard_query{PyObject_CallFunction(
PyQuery::get_py_wildcard_query_type(),
PyQuery::get_py_full_string_wildcard_query_type(),
"OO",
wildcard_py_str,
is_case_sensitive.get()
Expand Down Expand Up @@ -644,6 +644,7 @@ auto PyQuery::init(

PyObjectStaticPtr<PyTypeObject> PyQuery::m_py_type{nullptr};
PyObjectStaticPtr<PyObject> PyQuery::m_py_wildcard_query_type{nullptr};
PyObjectStaticPtr<PyObject> PyQuery::m_py_full_string_wildcard_query_type{nullptr};

auto PyQuery::get_py_type() -> PyTypeObject* {
return m_py_type.get();
Expand All @@ -653,6 +654,10 @@ auto PyQuery::get_py_wildcard_query_type() -> PyObject* {
return m_py_wildcard_query_type.get();
}

auto PyQuery::get_py_full_string_wildcard_query_type() -> PyObject* {
return m_py_full_string_wildcard_query_type.get();
}

auto PyQuery::module_level_init(PyObject* py_module) -> bool {
static_assert(std::is_trivially_destructible<PyQuery>());
auto* type{py_reinterpret_cast<PyTypeObject>(PyType_FromSpec(&PyQuery_type_spec))};
Expand All @@ -669,11 +674,18 @@ auto PyQuery::module_level_init(PyObject* py_module) -> bool {
if (nullptr == py_query) {
return false;
}
auto* py_wildcard_query_type = PyObject_GetAttrString(py_query, "WildcardQuery");
auto* py_wildcard_query_type{PyObject_GetAttrString(py_query, "WildcardQuery")};
if (nullptr == py_wildcard_query_type) {
return false;
}
m_py_wildcard_query_type.reset(py_wildcard_query_type);
auto* py_full_string_wildcard_query_type{
PyObject_GetAttrString(py_query, "FullStringWildcardQuery")
};
if (nullptr == py_full_string_wildcard_query_type) {
return false;
}
m_py_full_string_wildcard_query_type.reset(py_full_string_wildcard_query_type);
return true;
}
} // namespace clp_ffi_py::ir::native
7 changes: 7 additions & 0 deletions src/clp_ffi_py/ir/native/PyQuery.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,12 +76,19 @@ class PyQuery {
*/
[[nodiscard]] static auto get_py_wildcard_query_type() -> PyObject*;

/**
* @return PyObject that represents the Python level class
* `FullStringWildcardQuery`.
*/
[[nodiscard]] static auto get_py_full_string_wildcard_query_type() -> PyObject*;

private:
PyObject_HEAD;
Query* m_query;

static PyObjectStaticPtr<PyTypeObject> m_py_type;
static PyObjectStaticPtr<PyObject> m_py_wildcard_query_type;
static PyObjectStaticPtr<PyObject> m_py_full_string_wildcard_query_type;
};
} // namespace clp_ffi_py::ir::native
#endif
2 changes: 1 addition & 1 deletion src/clp_ffi_py/ir/native/Query.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class WildcardQuery {
*/
WildcardQuery(std::string wildcard_query, bool case_sensitive)
: m_wildcard_query(std::move(wildcard_query)),
m_case_sensitive(case_sensitive){};
m_case_sensitive(case_sensitive) {};

[[nodiscard]] auto get_wildcard_query() const -> std::string const& { return m_wildcard_query; }

Expand Down
39 changes: 30 additions & 9 deletions tests/test_ir/test_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
LogEvent,
Query,
)
from clp_ffi_py.wildcard_query import WildcardQuery
from clp_ffi_py.wildcard_query import FullStringWildcardQuery, SubstringWildcardQuery, WildcardQuery


class TestCaseWildcardQuery(TestCLPBase):
Expand All @@ -20,20 +20,41 @@ def test_init(self) -> None:
Test the initialization of WildcardQuery object.
"""
wildcard_string: str
expected_wildcard_string: str
wildcard_query: WildcardQuery

wildcard_string = "Are you the lord of *Pleiades*?"
wildcard_query = WildcardQuery(wildcard_string)
self._check_wildcard_query(wildcard_query, wildcard_string, False)
expected_wildcard_string = wildcard_string

wildcard_query = WildcardQuery(wildcard_string, True)
self._check_wildcard_query(wildcard_query, wildcard_string, True)
wildcard_query = FullStringWildcardQuery(wildcard_string)
self._check_wildcard_query(wildcard_query, expected_wildcard_string, False)

wildcard_query = WildcardQuery(wildcard_string, case_sensitive=True)
self._check_wildcard_query(wildcard_query, wildcard_string, True)
wildcard_query = FullStringWildcardQuery(wildcard_string, True)
self._check_wildcard_query(wildcard_query, expected_wildcard_string, True)

wildcard_query = WildcardQuery(case_sensitive=True, wildcard_query=wildcard_string)
self._check_wildcard_query(wildcard_query, wildcard_string, True)
wildcard_query = FullStringWildcardQuery(wildcard_string, case_sensitive=True)
self._check_wildcard_query(wildcard_query, expected_wildcard_string, True)

wildcard_query = FullStringWildcardQuery(
case_sensitive=True, full_string_wildcard_query=wildcard_string
)
self._check_wildcard_query(wildcard_query, expected_wildcard_string, True)

expected_wildcard_string = "*" + wildcard_string + "*"

wildcard_query = SubstringWildcardQuery(wildcard_string)
self._check_wildcard_query(wildcard_query, expected_wildcard_string, False)

wildcard_query = SubstringWildcardQuery(wildcard_string, True)
self._check_wildcard_query(wildcard_query, expected_wildcard_string, True)

wildcard_query = SubstringWildcardQuery(wildcard_string, case_sensitive=True)
self._check_wildcard_query(wildcard_query, expected_wildcard_string, True)

wildcard_query = SubstringWildcardQuery(
case_sensitive=True, substring_wildcard_query=wildcard_string
)
self._check_wildcard_query(wildcard_query, expected_wildcard_string, True)


class TestCaseQuery(TestCLPBase):
Expand Down
Loading
Loading