From 6e882478cf9c36c283046d168b5bd51a2bc497d1 Mon Sep 17 00:00:00 2001 From: John Readey Date: Thu, 25 Jul 2024 06:15:19 -0500 Subject: [PATCH] wip for fancy indexing with multiple coord list --- hsds/dset_lib.py | 2 +- hsds/dset_sn.py | 17 ++++++++--- hsds/util/chunkUtil.py | 2 ++ tests/integ/pointsel_test.py | 26 ++++++++-------- tests/integ/value_test.py | 2 +- tests/unit/chunk_util_test.py | 57 +++++++++-------------------------- 6 files changed, 44 insertions(+), 62 deletions(-) diff --git a/hsds/dset_lib.py b/hsds/dset_lib.py index cc9ce1aa..ece44e72 100755 --- a/hsds/dset_lib.py +++ b/hsds/dset_lib.py @@ -294,7 +294,7 @@ def getChunkItem(chunkid): kwargs = {"chunk_index": chunk_index, "factors": table_factors} _get_arr_pts(arr_points, arr_index, pt, **kwargs) - msg = f"got chunktable - {len(arr_points)} entries, calling getSelectionData" + msg = f"got chunktable - {len(arr_points)} entries, calling getChunkLocations" log.debug(msg) # this call won't lead to a circular loop of calls since we've checked # that the chunktable layout is not H5D_CHUNKED_REF_INDIRECT diff --git a/hsds/dset_sn.py b/hsds/dset_sn.py index 13dbbb9e..9e9534d3 100755 --- a/hsds/dset_sn.py +++ b/hsds/dset_sn.py @@ -745,6 +745,7 @@ async def POST_Dataset(request): dims = None shape_json = {} rank = 0 + chunk_size = None if "shape" not in body: shape_json["class"] = "H5S_SCALAR" @@ -858,19 +859,21 @@ async def POST_Dataset(request): else: creationProperties = {} + # TBD: check for invalid layout class... if layout_props: - if layout_props["class"] in ("H5D_COMPACT", "H5D_CONTIGUOUS"): - # treat compact and contiguous as chunked + if layout_props["class"] == "H5D_CONTIGUOUS": + # treat contiguous as chunked layout_class = "H5D_CHUNKED" else: layout_class = layout_props["class"] - elif shape_json["class"] != "H5S_NULL": layout_class = "H5D_CHUNKED" else: layout_class = None - if layout_class: + if layout_class == "H5D_COMPACT": + layout = {"class": "H5D_COMPACT"} + elif layout_class: # initialize to H5D_CHUNKED layout = {"class": "H5D_CHUNKED"} else: @@ -1034,6 +1037,12 @@ async def POST_Dataset(request): msg = "Expected filters in creationProperties to be a list" log.warn(msg) raise HTTPBadRequest(reason=msg) + + if f_in and chunk_size: + # filters can only be used with chunked datasets + msg = "Filters can only be used with chunked datasets" + log.warning(msg) + raise HTTPBadRequest(reason=msg) f_out = [] for filter in f_in: diff --git a/hsds/util/chunkUtil.py b/hsds/util/chunkUtil.py index e399a83a..97be97ea 100644 --- a/hsds/util/chunkUtil.py +++ b/hsds/util/chunkUtil.py @@ -305,6 +305,7 @@ def getNumChunks(selection, layout): # coordinate list last_chunk = None count = 0 + s.sort() # coordinates may not be sorted for x in s: this_chunk = x // c if this_chunk != last_chunk: @@ -483,6 +484,7 @@ def getChunkIds(dset_id, selection, layout, dim=0, prefix=None, chunk_ids=None): else: # coordinate list last_chunk_index = None + s.sort() # coordinates may not be in order for coord in s: chunk_index = coord // c if chunk_index != last_chunk_index: diff --git a/tests/integ/pointsel_test.py b/tests/integ/pointsel_test.py index 7a2473b6..e1b9bcb2 100755 --- a/tests/integ/pointsel_test.py +++ b/tests/integ/pointsel_test.py @@ -220,7 +220,7 @@ def testPost2DDataset(self): points = [] for i in range(3): for j in range(5): - pt = [i * 5 + 5, j * 5 + 5] + pt = [i * 5 + 5, 25 - j * 5] points.append(pt) body = {"points": points} # read a selected points @@ -228,21 +228,21 @@ def testPost2DDataset(self): self.assertEqual(rsp.status_code, 200) rspJson = json.loads(rsp.text) expected_result = [ - 50005, - 50010, - 50015, - 50020, 50025, - 100005, - 100010, - 100015, - 100020, + 50020, + 50015, + 50010, + 50005, 100025, - 150005, - 150010, - 150015, - 150020, + 100020, + 100015, + 100010, + 100005, 150025, + 150020, + 150015, + 150010, + 150005, ] self.assertTrue("value" in rspJson) values = rspJson["value"] diff --git a/tests/integ/value_test.py b/tests/integ/value_test.py index 2d94cfa0..42c1f6e9 100755 --- a/tests/integ/value_test.py +++ b/tests/integ/value_test.py @@ -1677,7 +1677,7 @@ def testGet(self): dset1_uuid = self.getUUIDByPath(domain, "/g1/g1.1/dset1.1.1") # read fancy selection - params = {"select": "[0:4, [2,4,7]]"} + params = {"select": "[0:4, [7,4,2]]"} req = helper.getEndpoint() + "/datasets/" + dset1_uuid + "/value" rsp = self.session.get(req, params=params, headers=headers) self.assertEqual(rsp.status_code, 200) diff --git a/tests/unit/chunk_util_test.py b/tests/unit/chunk_util_test.py index 28e7bf32..b1c69941 100755 --- a/tests/unit/chunk_util_test.py +++ b/tests/unit/chunk_util_test.py @@ -405,9 +405,7 @@ def testGetChunkIds(self): # getChunkIds(dset_id, selection, layout, dim=0, prefix=None, chunk_ids=None): dset_id = "d-12345678-1234-1234-1234-1234567890ab" - datashape = [ - 1, - ] + datashape = [1,] layout = (1,) selection = getHyperslabSelection(datashape) chunk_ids = getChunkIds(dset_id, selection, layout) @@ -419,9 +417,7 @@ def testGetChunkIds(self): self.assertEqual(len(chunk_id), 2 + 36 + 2) self.assertEqual(getDatasetId(chunk_id), dset_id) - datashape = [ - 100, - ] + datashape = [100,] layout = (10,) selection = getHyperslabSelection(datashape) chunk_ids = getChunkIds(dset_id, selection, layout) @@ -493,9 +489,7 @@ def testGetChunkIds(self): self.assertEqual(chunk_id[2:-2], dset_id[2:]) self.assertEqual(len(chunk_id), 2 + 36 + 2) - datashape = [ - 3207353, - ] + datashape = [3207353,] layout = (60000,) selection = getHyperslabSelection(datashape, 1234567, 1234568) chunk_ids = getChunkIds(dset_id, selection, layout) @@ -618,9 +612,7 @@ def testGetChunkIndex(self): index = getChunkIndex(chunk_id) self.assertEqual( index, - [ - 64, - ], + [64,], ) def testGetChunkSelection(self): @@ -726,9 +718,7 @@ def testGetChunkSelection(self): sel = getChunkSelection(chunk_id, selection, layout) self.assertEqual( sel[0], - [ - 33, - ], + [33,], ) # 2-d test @@ -790,9 +780,7 @@ def testGetChunkSelection(self): self.assertEqual(sel[0], slice(35, 40, 1)) self.assertEqual( sel[1], - [ - 33, - ], + [33,], ) chunk_id = chunk_ids[2] sel = getChunkSelection(chunk_id, selection, layout) @@ -803,15 +791,11 @@ def testGetChunkSelection(self): self.assertEqual(sel[0], slice(40, 45, 1)) self.assertEqual( sel[1], - [ - 33, - ], + [33,], ) # 1-d test with fractional chunks - datashape = [ - 104, - ] + datashape = [104,] layout = (10,) selection = getHyperslabSelection(datashape, 92, 102) chunk_ids = getChunkIds(dset_id, selection, layout) @@ -864,9 +848,7 @@ def testGetChunkSelection(self): def testGetChunkCoverage(self): # 1-d test dset_id = "d-12345678-1234-1234-1234-1234567890ab" - datashape = [ - 100, - ] + datashape = [100,] layout = (10,) selection = getHyperslabSelection(datashape, 42, 62) chunk_ids = getChunkIds(dset_id, selection, layout) @@ -1014,9 +996,7 @@ def testGetChunkCoverage(self): self.assertEqual(sel[1], (2, 9)) # 1-d test with fractional chunks - datashape = [ - 104, - ] + datashape = [104,] layout = (10,) selection = getHyperslabSelection(datashape, 92, 102) chunk_ids = getChunkIds(dset_id, selection, layout) @@ -1039,9 +1019,7 @@ def testGetChunkCoverage(self): def testGetDataCoverage(self): # 1-d test dset_id = "d-12345678-1234-1234-1234-1234567890ab" - datashape = [ - 100, - ] + datashape = [100,] layout = (10,) selection = getHyperslabSelection(datashape, 42, 62) chunk_ids = getChunkIds(dset_id, selection, layout) @@ -1205,9 +1183,7 @@ def testGetDataCoverage(self): self.assertEqual(sel[1].step, 1) # 1-d test with fractional chunks - datashape = [ - 104, - ] + datashape = [104,] layout = (10,) selection = getHyperslabSelection(datashape, 92, 102) chunk_ids = getChunkIds(dset_id, selection, layout) @@ -1294,9 +1270,7 @@ def testDimQuery(self): def testChunkIterator1d(self): dset_id = "d-12345678-1234-1234-1234-1234567890ab" dims = [100] - layout = [ - 10, - ] + layout = [10,] selection = getHyperslabSelection(dims) it = ChunkIterator(dset_id, selection, layout) @@ -1315,10 +1289,7 @@ def testChunkIterator1d(self): def testChunkIterator2d(self): dset_id = "d-12345678-1234-1234-1234-1234567890ab" - dims = [ - 100, - 100, - ] + dims = [100, 100,] layout = [50, 50] selection = getHyperslabSelection(dims) it = ChunkIterator(dset_id, selection, layout)