Skip to content

Commit

Permalink
Update like statements to reflect sql behaviors (#91)
Browse files Browse the repository at this point in the history
* Update like statements to reflect sql behaciors

* Codestyle

* Codestyle

* Handle NotStartsWith

* Update pyiceberg/expressions/parser.py

Co-authored-by: Fokko Driesprong <[email protected]>

* Update tests/expressions/test_parser.py

Co-authored-by: Fokko Driesprong <[email protected]>

---------

Co-authored-by: Fokko Driesprong <[email protected]>
  • Loading branch information
danielcweeks and Fokko committed Oct 24, 2023
1 parent 1b186d6 commit a09de69
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 5 deletions.
21 changes: 18 additions & 3 deletions pyiceberg/expressions/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import re
from decimal import Decimal

from pyparsing import (
Expand Down Expand Up @@ -51,7 +52,6 @@
NotIn,
NotNaN,
NotNull,
NotStartsWith,
Or,
Reference,
StartsWith,
Expand All @@ -78,6 +78,8 @@
identifier = Word(alphas, alphanums + "_$").set_results_name("identifier")
column = DelimitedList(identifier, delim=".", combine=False).set_results_name("column")

like_regex = r'(?P<valid_wildcard>(?<!\\)%$)|(?P<invalid_wildcard>(?<!\\)%)'


@column.set_parse_action
def _(result: ParseResults) -> Reference:
Expand Down Expand Up @@ -217,12 +219,25 @@ def _(result: ParseResults) -> BooleanExpression:

@starts_with.set_parse_action
def _(result: ParseResults) -> BooleanExpression:
return StartsWith(result.column, result.raw_quoted_string)
return _evaluate_like_statement(result)


@not_starts_with.set_parse_action
def _(result: ParseResults) -> BooleanExpression:
return NotStartsWith(result.column, result.raw_quoted_string)
return ~_evaluate_like_statement(result)


def _evaluate_like_statement(result: ParseResults) -> BooleanExpression:
literal_like: StringLiteral = result.raw_quoted_string

match = re.search(like_regex, literal_like.value)

if match and match.groupdict()['invalid_wildcard']:
raise ValueError("LIKE expressions only supports wildcard, '%', at the end of a string")
elif match and match.groupdict()['valid_wildcard']:
return StartsWith(result.column, StringLiteral(literal_like.value[:-1].replace('\\%', '%')))
else:
return EqualTo(result.column, StringLiteral(literal_like.value.replace('\\%', '%')))


predicate = (comparison | in_check | null_check | nan_check | starts_check | boolean).set_results_name("predicate")
Expand Down
22 changes: 20 additions & 2 deletions tests/expressions/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,12 +168,30 @@ def test_multiple_and_or() -> None:
) == parser.parse("foo is not null and foo < 5 or (foo > 10 and foo < 100 and bar is null)")


def test_like_equality() -> None:
assert EqualTo("foo", "data") == parser.parse("foo LIKE 'data'")
assert EqualTo("foo", "data%") == parser.parse("foo LIKE 'data\\%'")


def test_starts_with() -> None:
assert StartsWith("foo", "data") == parser.parse("foo LIKE 'data'")
assert StartsWith("foo", "data") == parser.parse("foo LIKE 'data%'")
assert StartsWith("foo", "some % data") == parser.parse("foo LIKE 'some \\% data%'")
assert StartsWith("foo", "some data%") == parser.parse("foo LIKE 'some data\\%%'")


def test_invalid_likes() -> None:
invalid_statements = ["foo LIKE '%data%'", "foo LIKE 'da%ta'", "foo LIKE '%data'"]

for statement in invalid_statements:
with pytest.raises(ValueError) as exc_info:
parser.parse(statement)

assert "LIKE expressions only supports wildcard, '%', at the end of a string" in str(exc_info)


def test_not_starts_with() -> None:
assert NotStartsWith("foo", "data") == parser.parse("foo NOT LIKE 'data'")
assert NotEqualTo("foo", "data") == parser.parse("foo NOT LIKE 'data'")
assert NotStartsWith("foo", "data") == parser.parse("foo NOT LIKE 'data%'")


def test_with_function() -> None:
Expand Down

0 comments on commit a09de69

Please sign in to comment.