From fca62006a952e93ebb58c791dc4d022006f19c48 Mon Sep 17 00:00:00 2001 From: Jonas Haag Date: Mon, 9 Dec 2019 14:57:56 +0100 Subject: [PATCH] Improve predicate "syntax" error messages --- kartothek/serialization/_generic.py | 44 +++++++++++++++++++-------- tests/io_components/test_read.py | 11 +++++-- tests/serialization/test_dataframe.py | 21 +++++++++++-- 3 files changed, 57 insertions(+), 19 deletions(-) diff --git a/kartothek/serialization/_generic.py b/kartothek/serialization/_generic.py index 66aa8193..d749b389 100644 --- a/kartothek/serialization/_generic.py +++ b/kartothek/serialization/_generic.py @@ -167,19 +167,37 @@ def check_predicates(predicates): """ Check if predicates are well-formed. """ - if predicates is not None: - if len(predicates) == 0 or any(len(p) == 0 for p in predicates): - raise ValueError("Malformed predicates") - for conjunction in predicates: - for col, op, val in conjunction: - if ( - isinstance(val, list) - and any(_check_contains_null(v) for v in val) - or _check_contains_null(val) - ): - raise NotImplementedError( - "Null-terminated binary strings are not supported as predicate values." - ) + if predicates is None: + return + + if len(predicates) == 0: + raise ValueError("Empty predicates") + + for conjunction_idx, conjunction in enumerate(predicates): + if not isinstance(conjunction, list): + raise ValueError( + f"Invalid predicates: Conjunction {conjunction_idx} should be a " + f"list of 3-tuples, got object of type {type(conjunction)} instead." + ) + if len(conjunction) == 0: + raise ValueError( + f"Invalid predicates: Conjunction {conjunction_idx} is empty" + ) + for clause_idx, clause in enumerate(conjunction): + if not isinstance(clause, tuple) and len(clause) == 3: + raise ValueError( + f"Invalid predicates: Clause {clause_idx} in conjunction {conjunction_idx} " + f"should be a 3-tuple, got object of type {type(clause)} instead" + ) + _, _, val = clause + if ( + isinstance(val, list) + and any(_check_contains_null(v) for v in val) + or _check_contains_null(val) + ): + raise NotImplementedError( + "Null-terminated binary strings are not supported as predicate values." + ) def filter_predicates_by_column( diff --git a/tests/io_components/test_read.py b/tests/io_components/test_read.py index 948d030c..a3ab8f26 100644 --- a/tests/io_components/test_read.py +++ b/tests/io_components/test_read.py @@ -61,9 +61,14 @@ def test_dispatch_metapartitions_without_dataset_metadata(dataset, store_session assert mp.dataset_metadata == {} -@pytest.mark.parametrize("predicates", [[], [[]]]) -def test_dispatch_metapartition_undefined_behaviour(dataset, store_session, predicates): - with pytest.raises(ValueError, match="Malformed predicates"): +@pytest.mark.parametrize( + "predicates,error_msg", + [([], "Empty predicates"), ([[]], "Invalid predicates: Conjunction 0 is empty")], +) +def test_dispatch_metapartition_undefined_behaviour( + dataset, store_session, predicates, error_msg +): + with pytest.raises(ValueError, match=error_msg): list( dispatch_metapartitions(dataset.uuid, store_session, predicates=predicates) ) diff --git a/tests/serialization/test_dataframe.py b/tests/serialization/test_dataframe.py index c36cec25..10636131 100644 --- a/tests/serialization/test_dataframe.py +++ b/tests/serialization/test_dataframe.py @@ -357,7 +357,7 @@ def test_predicate_pushdown( predicates=predicates, **read_kwargs, ) - assert str(exc.value) == "Malformed predicates" + assert str(exc.value) == "Empty predicates" # Test malformed predicates 2 predicates = [[]] @@ -369,7 +369,7 @@ def test_predicate_pushdown( predicates=predicates, **read_kwargs, ) - assert str(exc.value) == "Malformed predicates" + assert str(exc.value) == "Invalid predicates: Conjunction 0 is empty" # Test malformed predicates 3 predicates = [[(df.columns[0], "<", df.iloc[0, 0])], []] @@ -381,7 +381,22 @@ def test_predicate_pushdown( predicates=predicates, **read_kwargs, ) - assert str(exc.value) == "Malformed predicates" + assert str(exc.value) == "Invalid predicates: Conjunction 1 is empty" + + # Test malformed predicates 4 + predicates = [[(df.columns[0], "<", df.iloc[0, 0])], ["foo"]] + with pytest.raises(ValueError) as exc: + serialiser.restore_dataframe( + store, + key, + predicate_pushdown_to_io=predicate_pushdown_to_io, + predicates=predicates, + **read_kwargs, + ) + assert ( + str(exc.value) + == "Invalid predicates: Clause 0 in conjunction 1 should be a 3-tuple, got object of type instead" + ) @predicate_serialisers