Skip to content

Commit

Permalink
Add support for boolean expressions and quoted columns (#1286)
Browse files Browse the repository at this point in the history
* Add support for boolean expressions and quoted columns

* Add AlwaysTrue & AlwaysFalse support plus tests

* Add test for quoted column

* Remove commented code

---------

Co-authored-by: Mohammad Sheikh <[email protected]>
  • Loading branch information
MoSheikh and MoIMC authored Nov 6, 2024
1 parent 2778ec2 commit e9c3170
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 16 deletions.
49 changes: 35 additions & 14 deletions pyiceberg/expressions/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
CaselessKeyword,
DelimitedList,
Group,
MatchFirst,
ParserElement,
ParseResults,
Suppress,
Expand Down Expand Up @@ -57,6 +58,7 @@
StartsWith,
)
from pyiceberg.expressions.literals import (
BooleanLiteral,
DecimalLiteral,
Literal,
LongLiteral,
Expand All @@ -77,7 +79,9 @@
NAN = CaselessKeyword("nan")
LIKE = CaselessKeyword("like")

identifier = Word(alphas, alphanums + "_$").set_results_name("identifier")
unquoted_identifier = Word(alphas, alphanums + "_$")
quoted_identifier = Suppress('"') + unquoted_identifier + Suppress('"')
identifier = MatchFirst([unquoted_identifier, quoted_identifier]).set_results_name("identifier")
column = DelimitedList(identifier, delim=".", combine=False).set_results_name("column")

like_regex = r"(?P<valid_wildcard>(?<!\\)%$)|(?P<invalid_wildcard>(?<!\\)%)"
Expand All @@ -100,16 +104,18 @@ def _(result: ParseResults) -> Reference:
string = sgl_quoted_string.set_results_name("raw_quoted_string")
decimal = common.real().set_results_name("decimal")
integer = common.signed_integer().set_results_name("integer")
literal = Group(string | decimal | integer).set_results_name("literal")
literal_set = Group(DelimitedList(string) | DelimitedList(decimal) | DelimitedList(integer)).set_results_name("literal_set")
literal = Group(string | decimal | integer | boolean).set_results_name("literal")
literal_set = Group(
DelimitedList(string) | DelimitedList(decimal) | DelimitedList(integer) | DelimitedList(boolean)
).set_results_name("literal_set")


@boolean.set_parse_action
def _(result: ParseResults) -> BooleanExpression:
def _(result: ParseResults) -> Literal[bool]:
if strtobool(result.boolean):
return AlwaysTrue()
return BooleanLiteral(True)
else:
return AlwaysFalse()
return BooleanLiteral(False)


@string.set_parse_action
Expand Down Expand Up @@ -265,14 +271,29 @@ def handle_or(result: ParseResults) -> Or:
return Or(*result[0])


boolean_expression = infix_notation(
predicate,
[
(Suppress(NOT), 1, opAssoc.RIGHT, handle_not),
(Suppress(AND), 2, opAssoc.LEFT, handle_and),
(Suppress(OR), 2, opAssoc.LEFT, handle_or),
],
).set_name("expr")
def handle_always_expression(result: ParseResults) -> BooleanExpression:
# If the entire result is "true" or "false", return AlwaysTrue or AlwaysFalse
expr = result[0]
if isinstance(expr, BooleanLiteral):
if expr.value:
return AlwaysTrue()
else:
return AlwaysFalse()
return result[0]


boolean_expression = (
infix_notation(
predicate,
[
(Suppress(NOT), 1, opAssoc.RIGHT, handle_not),
(Suppress(AND), 2, opAssoc.LEFT, handle_and),
(Suppress(OR), 2, opAssoc.LEFT, handle_or),
],
)
.set_name("expr")
.add_parse_action(handle_always_expression)
)


def parse(expr: str) -> BooleanExpression:
Expand Down
18 changes: 16 additions & 2 deletions tests/expressions/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,28 @@
)


def test_true() -> None:
def test_always_true() -> None:
assert AlwaysTrue() == parser.parse("true")


def test_false() -> None:
def test_always_false() -> None:
assert AlwaysFalse() == parser.parse("false")


def test_quoted_column() -> None:
assert EqualTo("foo", True) == parser.parse('"foo" = TRUE')


def test_equals_true() -> None:
assert EqualTo("foo", True) == parser.parse("foo = true")
assert EqualTo("foo", True) == parser.parse("foo == TRUE")


def test_equals_false() -> None:
assert EqualTo("foo", False) == parser.parse("foo = false")
assert EqualTo("foo", False) == parser.parse("foo == FALSE")


def test_is_null() -> None:
assert IsNull("foo") == parser.parse("foo is null")
assert IsNull("foo") == parser.parse("foo IS NULL")
Expand Down

0 comments on commit e9c3170

Please sign in to comment.