Skip to content

Commit

Permalink
Improve multi_index query/coords behavior
Browse files Browse the repository at this point in the history
- Don't return coords for dense multi_index by default (#347)
- Fix and test coords exclusion for sparse array queries
  • Loading branch information
ihnorton committed Jun 26, 2020
1 parent 0cf81db commit 58c96a9
Show file tree
Hide file tree
Showing 5 changed files with 21 additions and 15 deletions.
15 changes: 7 additions & 8 deletions tiledb/core.cc
Original file line number Diff line number Diff line change
Expand Up @@ -214,21 +214,22 @@ class PyQuery {
array_ = std::shared_ptr<tiledb::Array>(new Array(ctx_, c_array_, false),
[](Array *p) {} /* no deleter*/);

bool issparse = array_->schema().array_type() == TILEDB_SPARSE;

query_ = std::shared_ptr<tiledb::Query>(
new Query(ctx_, *array_, TILEDB_READ));
// [](Query* p){} /* note: no deleter*/);

tiledb_layout_t layout = (tiledb_layout_t)py_layout.cast<int32_t>();
if (array_->schema().array_type() == TILEDB_DENSE &&
layout == TILEDB_UNORDERED) {
if (issparse && layout == TILEDB_UNORDERED) {
TPY_ERROR_LOC("TILEDB_UNORDERED read is not supported for dense arrays")
}
query_->set_layout(layout);

if (coords.is(py::none())) {
include_coords_ = true;
} else {
if (!coords.is(py::none())) {
include_coords_ = coords.cast<bool>();
} else {
include_coords_ = issparse;
}

for (auto a : attrs) {
Expand Down Expand Up @@ -553,10 +554,8 @@ class PyQuery {

void submit_read() {
auto schema = array_->schema();
auto issparse = schema.array_type() == TILEDB_SPARSE;
auto need_dim_buffers = include_coords_ || issparse;

if (need_dim_buffers) {
if (include_coords_) {
auto domain = schema.domain();
for (auto dim : domain.dimensions()) {
alloc_buffer(dim.name());
Expand Down
4 changes: 1 addition & 3 deletions tiledb/multirange_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,7 @@ def sel_to_subranges(dim_sel):

class MultiRangeIndexer(object):
"""
Implements multi-range / outer / orthogonal indexing.
Implements multi-range indexing.
"""

def __init__(self, array, query = None):
Expand Down Expand Up @@ -111,7 +110,6 @@ def __getitem__(self, idx):
schema = self.schema
dom = self.schema.domain
attr_names = tuple(self.schema.attr(i)._internal_name for i in range(self.schema.nattr))

coords = None
order = 'C' # TILEDB_ROW_MAJOR
if self.query is not None:
Expand Down
4 changes: 3 additions & 1 deletion tiledb/tests/test_libtiledb.py
Original file line number Diff line number Diff line change
Expand Up @@ -2853,7 +2853,9 @@ def test_tiledb_py_0_6_anon_attr(self):
self.assertEqual(A[0], 1)
mres = A.multi_index[0]
self.assertEqual(mres[''], 1)
self.assertEqual(mres['d'], 0)

qres = A.query(coords=True).multi_index[0]
self.assertEqual(qres['d'], 0)

class MemoryTest(DiskTestCase):
# sanity check that memory usage doesn't increase more than 2x when reading 40MB 100x
Expand Down
12 changes: 10 additions & 2 deletions tiledb/tests/test_multi_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,7 @@ def test_multirange_1d_dense_int64(self):
with tiledb.open(path) as A:
# stepped ranges are not supported
with self.assertRaises(ValueError):
A.multi_index[ 1::2 ]
A.query(coords=True).multi_index[ 1::2 ]

assert_array_equal(
orig_array[ [0,-1] ],
Expand All @@ -311,7 +311,7 @@ def test_multirange_1d_dense_int64(self):
)
self.assertEqual(
-10,
A.multi_index[-10]['coords'].view('i8')
A.query(coords=True).multi_index[-10]['coords'].view('i8')
)
assert_array_equal(
orig_array[0:],
Expand Down Expand Up @@ -576,6 +576,14 @@ def test_multirange_1d_sparse_query(self):
res[k]
)

with tiledb.open(path) as A:
Q = A.query(coords=False, attrs=["U"])
res = Q.multi_index[:]
self.assertTrue("U" in res)
self.assertTrue("V" not in res)
self.assertTrue("coords" not in res)
assert_array_equal(res["U"], data["U"])

def test_multirange_1d_dense_vectorized(self):
ctx = tiledb.Ctx()
path = self.path('mr_1d_dense_vectorized')
Expand Down
1 change: 0 additions & 1 deletion tiledb/tests/test_pandas_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -467,7 +467,6 @@ def test_csv_chunked(self):
ned = A.nonempty_domain()[0]
# TODO should support numpy scalar here
res = A.multi_index[int(ned[0]):int(ned[1])]
res.pop('rows')
df_bk = pd.DataFrame(res)

tm.assert_frame_equal(df_bk, df)

0 comments on commit 58c96a9

Please sign in to comment.