Skip to content

Commit

Permalink
Passing Empty List to multirange_index and df should return Empty…
Browse files Browse the repository at this point in the history
… Results (#1412)

* `.df` and `.multi_index` Empty Sequence Returns Empty Results
* Use TypeVar for EmptyRange

---------

Co-authored-by: Agisilaos Kounelis <[email protected]>
  • Loading branch information
nguyenv and kounelisagis authored Aug 13, 2024
1 parent 6ff4e62 commit 2ef0bb9
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 5 deletions.
16 changes: 13 additions & 3 deletions tiledb/multirange_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
Optional,
Sequence,
Tuple,
TypeVar,
Union,
cast,
)
Expand Down Expand Up @@ -44,8 +45,17 @@
current_timer: ContextVar[str] = ContextVar("timer_scope")


# sentinel value to denote selecting an empty range
EmptyRange = object()
# sentinel type to denote selecting an empty range
EmptyRange = TypeVar("EmptyRange")


def is_empty_range(idx: Union[EmptyRange, List, Tuple]) -> bool:
if idx is not EmptyRange:
if hasattr(idx, "__len__") and len(idx) == 0 and idx != "":
return True
return False
return True


# TODO: expand with more accepted scalar types
Scalar = Real
Expand Down Expand Up @@ -248,7 +258,7 @@ def return_incomplete(self) -> bool:

def __getitem__(self, idx):
with timing("getitem_time"):
if idx is EmptyRange:
if is_empty_range(idx):
self.pyquery = None
self.subarray = None
else:
Expand Down
8 changes: 6 additions & 2 deletions tiledb/tests/test_multi_index-hp.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,12 @@ def _direct_query_ranges(array: SparseArray, ranges, order):
q.set_subarray(subarray)

q.submit()
return {k: v[0].view(array.attr(0).dtype) for k, v in q.results().items()}

if ranges == [[]]:
# empty range should give empty result
return {k: [] for k in q.results()}
else:
return {k: v[0].view(array.attr(0).dtype) for k, v in q.results().items()}


# Compound strategies to build valid inputs for multi_index
Expand Down Expand Up @@ -90,7 +95,6 @@ def create_array(uri):
def test_multi_index_two_way_query(self, order, ranges, sparse_array_1d):
"""This test checks the result of "direct" range queries using PyQuery
against the result of `multi_index` on the same ranges."""

uri = sparse_array_1d

assert isinstance(uri, str)
Expand Down
24 changes: 24 additions & 0 deletions tiledb/tests/test_multi_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -903,6 +903,30 @@ def test_fixed_width_char(self):
with tiledb.open(uri, mode="r") as A:
assert all(A.query(use_arrow=True).df[:][""] == data)

@pytest.mark.skipif(not has_pandas(), reason="pandas>=1.0,<3.0 not installed")
def test_empty_idx(self):
uri = self.path("test_empty_idx")

schema = tiledb.ArraySchema(
domain=tiledb.Domain(tiledb.Dim(name="dim", domain=(0, 9), dtype=np.uint8)),
sparse=True,
attrs=[tiledb.Attr(name="a", dtype=np.float64)],
)
tiledb.Array.create(uri, schema)

data = np.array(np.random.randint(10, size=10), dtype=np.float64)

with tiledb.open(uri, mode="w") as A:
A[np.arange(10)] = data

with tiledb.open(uri, mode="r") as A:
assert_array_equal(A.df[tiledb.EmptyRange]["a"], [])
assert_array_equal(A.multi_index[tiledb.EmptyRange]["a"], [])
assert_array_equal(A.df[[]]["a"], [])
assert_array_equal(A.multi_index[[]]["a"], [])
assert_array_equal(A.df[()]["a"], [])
assert_array_equal(A.multi_index[()]["a"], [])


# parametrize dtype and sparse
@pytest.mark.parametrize(
Expand Down

0 comments on commit 2ef0bb9

Please sign in to comment.