From a2aa1ea6ddc6756c0c9b406f4e2db29d4adf478a Mon Sep 17 00:00:00 2001 From: John Readey Date: Mon, 12 Aug 2024 13:42:13 -0500 Subject: [PATCH] support fancy selection with multiple coords --- hsds/chunk_sn.py | 1 + hsds/util/chunkUtil.py | 319 ++++++++++++++++++++++------------ hsds/util/dsetUtil.py | 14 +- tests/integ/value_test.py | 22 +++ tests/unit/chunk_util_test.py | 173 ++++++++++++------ tests/unit/dset_util_test.py | 152 +++++++--------- 6 files changed, 428 insertions(+), 253 deletions(-) diff --git a/hsds/chunk_sn.py b/hsds/chunk_sn.py index 01809281..2d9a295b 100755 --- a/hsds/chunk_sn.py +++ b/hsds/chunk_sn.py @@ -17,6 +17,7 @@ import base64 import math import numpy as np + from json import JSONDecodeError from asyncio import IncompleteReadError from aiohttp.web_exceptions import HTTPException, HTTPBadRequest diff --git a/hsds/util/chunkUtil.py b/hsds/util/chunkUtil.py index 651ed29e..581a9eda 100644 --- a/hsds/util/chunkUtil.py +++ b/hsds/util/chunkUtil.py @@ -259,7 +259,10 @@ def getNumChunks(selection, layout): If selection is provided (a list of slices), return the number of chunks that intersect with the selection. """ - + rank = len(layout) + if len(selection) != rank: + msg = f"selection list has {len(selection)} items, but rank is {rank}" + raise ValueError(msg) # do a quick check that we don't have a null selection space' # TBD: this needs to be revise to do the right think with stride > 1 for s in selection: @@ -271,48 +274,66 @@ def getNumChunks(selection, layout): # coordinate list if len(s) == 0: return 0 - num_chunks = 1 + # first, get the number of chunks needed for any coordinate selection + chunk_indices = [] for i in range(len(selection)): s = selection[i] - c = layout[i] # chunk size - + c = layout[i] if isinstance(s, slice): - if s.step is None: - s = slice(s.start, s.stop, 1) - if s.step > 1: - num_points = frac((s.stop - s.start), s.step) - w = num_points * s.step - (s.step - 1) - else: - w = s.stop - s.start # selection width (>0) + continue + + # coordinate list + if chunk_indices: + if len(s) != len(chunk_indices): + msg = "shape mismatch: indexing arrays could not be broadcast together " + msg += f"with shapes ({len(chunk_indices)},) ({len(s)},)" + raise ValueError(msg) + else: + chunk_indices = ["",] * len(s) - lc = frac(s.start, c) * c + for j in range(len(s)): + if chunk_indices[j]: + chunk_indices[j] += "_" + chunk_indices[j] += str(s[j] // layout[i]) - if s.start + w <= lc: - # looks like we just cross one chunk along this dimension - continue + if chunk_indices: + # number of chunks is the number of unique strings in the point list + num_chunks = len(set(chunk_indices)) + else: + num_chunks = 1 - rc = ((s.start + w) // c) * c - m = rc - lc - if c > s.step: - count = m // c - else: - count = m // s.step - if s.start < lc: - count += 1 # hit one chunk on the left - if s.start + w > rc: - count += 1 # hit one chunk on the right + # now deal with any slices in the selection + for i in range(len(selection)): + s = selection[i] + c = layout[i] # chunk size + if not isinstance(s, slice): + # ignore coordinate lists since we dealt with them above + continue + + if s.step is None: + s = slice(s.start, s.stop, 1) + if s.step > 1: + num_points = frac((s.stop - s.start), s.step) + w = num_points * s.step - (s.step - 1) else: - # coordinate list - last_chunk = None - count = 0 - s = list(s) - s.sort() # coordinates may not be sorted - for x in s: - this_chunk = x // c - if this_chunk != last_chunk: - count += 1 - last_chunk = this_chunk + w = s.stop - s.start # selection width (>0) + + lc = frac(s.start, c) * c + + if s.start + w <= lc: + # looks like we just cross one chunk along this dimension + continue + rc = ((s.start + w) // c) * c + m = rc - lc + if c > s.step: + count = m // c + else: + count = m // s.step + if s.start < lc: + count += 1 # hit one chunk on the left + if s.start + w > rc: + count += 1 # hit one chunk on the right num_chunks *= count return num_chunks @@ -422,11 +443,36 @@ def getChunkIdForPartition(chunk_id, dset_json): return chunk_id -def getChunkIds(dset_id, selection, layout, dim=0, prefix=None, chunk_ids=None): +def getChunkIds(dset_id, selection, layout, prefix=None): """Get the all the chunk ids for chunks that lie in the selection of the given dataset. """ + + def chunk_index_to_id(indices): + """ Convert chunk index list to string with '_' as seperator. + None values will be replaced with '*' """ + items = [] + for x in indices: + if x is None: + items.append("*") + else: + items.append(str(x)) + return "_".join(items) + + def chunk_id_to_index(chunk_id): + """ convert chunk_id to list of indices. + Any '*' values will be replaced with None """ + indices = [] + items = chunk_id.split("_") + for item in items: + if item == "*": + x = None + else: + x = int(item) + indices.append(x) + return indices + num_chunks = getNumChunks(selection, layout) if num_chunks == 0: return [] # empty list @@ -438,66 +484,87 @@ def getChunkIds(dset_id, selection, layout, dim=0, prefix=None, chunk_ids=None): raise ValueError(msg) prefix = "c-" + dset_id[2:] + "_" rank = len(selection) - if chunk_ids is None: - chunk_ids = [] - # log.debug(f"getChunkIds - selection: {selection}") - s = selection[dim] - c = layout[dim] - # log.debug(f"getChunkIds - layout: {layout}") - if isinstance(s, slice) and s.step is None: - s = slice(s.start, s.stop, 1) - if isinstance(s, slice) and s.step > c: - # chunks may not be contiguous, skip along the selection and add - # whatever chunks we land in - for i in range(s.start, s.stop, s.step): - chunk_index = i // c - chunk_id = prefix + str(chunk_index) - if dim + 1 == rank: - # we've gone through all the dimensions, add this id - # to the list - chunk_ids.append(chunk_id) - else: - chunk_id += "_" # seperator between dimensions - # recursive call - getChunkIds(dset_id, selection, layout, dim + 1, chunk_id, chunk_ids) - elif isinstance(s, slice): - # get a contiguous set of chunks along the selection - if s.step > 1: - num_points = frac((s.stop - s.start), s.step) - w = num_points * s.step - (s.step - 1) + # initialize chunk_ids based on coordinate index, if any + num_coordinates = None + chunk_items = set() + for s in selection: + if isinstance(s, slice): + continue + elif num_coordinates is None: + num_coordinates = len(s) else: - w = s.stop - s.start # selection width (>0) + if len(s) != num_coordinates: + raise ValueError("coordinate length mismatch") - chunk_index_start = s.start // c - chunk_index_end = frac((s.start + w), c) + if num_coordinates is None: + # no coordinates, all slices + num_coordinates = 1 # this will iniialize the list with one wildcard chunk index - for i in range(chunk_index_start, chunk_index_end): - chunk_id = prefix + str(i) - if dim + 1 == rank: - # we've gone through all the dimensions, - # add this id to the list - chunk_ids.append(chunk_id) + for i in range(num_coordinates): + chunk_idx = [] + for dim in range(rank): + s = selection[dim] + c = layout[dim] + if isinstance(s, slice): + chunk_index = None else: - chunk_id += "_" # seperator between dimensions - # recursive call - getChunkIds(dset_id, selection, layout, dim + 1, chunk_id, chunk_ids) - else: - # coordinate list - last_chunk_index = None - s = list(s) - s.sort() # coordinates may not be in order - for coord in s: - chunk_index = coord // c - if chunk_index != last_chunk_index: - chunk_id = prefix + str(chunk_index) - if dim + 1 == rank: - # add the chunk id - chunk_ids.append(chunk_id) - else: - chunk_id += "_" # dimension seperator - getChunkIds(dset_id, selection, layout, dim + 1, chunk_id, chunk_ids) - last_chunk_index = chunk_index + chunk_index = s[i] // c + chunk_idx.append(chunk_index) + chunk_id = chunk_index_to_id(chunk_idx) + chunk_items.add(chunk_id) + chunk_ids = list(chunk_items) # convert to a list, remove any dups + # convert str ids back to indices + chunk_items = [] + for chunk_id in chunk_ids: + chunk_index = chunk_id_to_index(chunk_id) + chunk_items.append(chunk_index) + + # log.debug(f"getChunkIds - selection: {selection}") + for dim in range(rank): + s = selection[dim] + c = layout[dim] + + if not isinstance(s, slice): + continue # chunk indices for coordinate list already factored in + + # log.debug(f"getChunkIds - layout: {layout}") + if s.step is None: + s = slice(s.start, s.stop, 1) + + chunk_indices = [] + if s.step > c: + # chunks may not be contiguous, skip along the selection and add + # whatever chunks we land in + for i in range(s.start, s.stop, s.step): + chunk_index = i // c + chunk_indices.append(chunk_index) + else: + # get a contiguous set of chunks along the selection + if s.step > 1: + num_points = frac((s.stop - s.start), s.step) + w = num_points * s.step - (s.step - 1) + else: + w = s.stop - s.start # selection width (>0) + + chunk_index_start = s.start // c + chunk_index_end = frac((s.start + w), c) + chunk_indices = list(range(chunk_index_start, chunk_index_end)) + + # append the set of chunk_indices to our set of chunk_ids + chunk_items_next = [] + for chunk_idx in chunk_items: + for chunk_index in chunk_indices: + chunk_idx_next = chunk_idx.copy() + chunk_idx_next[dim] = chunk_index + chunk_items_next.append(chunk_idx_next) + chunk_items = chunk_items_next + + # convert chunk indices to chunk ids + chunk_ids = [] + for chunk_idx in chunk_items: + chunk_id = prefix + chunk_index_to_id(chunk_idx) + chunk_ids.append(chunk_id) # got the complete list, return it! return chunk_ids @@ -622,10 +689,39 @@ def getDataCoverage(chunk_id, slices, layout): """ Get data-relative selection of the given chunk and selection. """ + chunk_sel = getChunkSelection(chunk_id, slices, layout) rank = len(layout) sel = [] - # print(f'getDataCoverage - chunk_id: {chunk_id} slices: {slices} layout: {layout}') + + points = None + coordinate_extent = None + for dim in range(rank): + c = chunk_sel[dim] + s = slices[dim] + if isinstance(s, slice): + continue + if isinstance(c, slice): + msg = "expecting coordinate chunk selection for data " + msg += "coord selection" + raise ValueError(msg) + if len(c) < 1: + msg = "expected at least one chunk coordinate" + raise ValueError(msg) + if coordinate_extent is None: + coordinate_extent = len(s) + elif coordinate_extent != len(s): + msg = "shape mismatch: indexing arrays could not be broadcast together " + msg += f"with shapes ({coordinate_extent},) ({len(s)},)" + raise ValueError(msg) + else: + pass + + if coordinate_extent is not None: + points = np.zeros((coordinate_extent, rank), dtype=np.int64) + points[:, :] = -1 + + data_pts = None for dim in range(rank): c = chunk_sel[dim] s = slices[dim] @@ -633,8 +729,7 @@ def getDataCoverage(chunk_id, slices, layout): if s.step is None: s = slice(s.start, s.stop, 1) if c.step != s.step: - msg = "expecting step for chunk selection to be the " - msg += "same as data selection" + msg = "expecting step for chunk selection to be the same as data selection" raise ValueError(msg) start = (c.start - s.start) // s.step stop = frac((c.stop - s.start), s.step) @@ -642,21 +737,31 @@ def getDataCoverage(chunk_id, slices, layout): sel.append(slice(start, stop, step)) else: # coordinate selection - if isinstance(c, slice): - msg = "expecting coordinate chunk selection for data " - msg += "coord selection" - raise ValueError(msg) - if len(c) < 1: - msg = "expected at least one chunk coordinate" - raise ValueError(msg) - start = 0 for i in range(len(s)): - if s[i] >= c[0]: + points[i, dim] = s[i] + + if data_pts is None: + data_pts = [] + sel.append(data_pts) + + # now fill in the coordinate selection + if data_pts is not None: + chunk_coord = getChunkCoordinate(chunk_id, layout) + for i in range(coordinate_extent): + include_pt = True + point = points[i] + for dim in range(rank): + point[dim] + if point[dim] < 0: + continue # this dim is a slice selection + if point[dim] < chunk_coord[dim]: + include_pt = False break - start += 1 - stop = start + len(c) - step = 1 - sel.append(slice(start, stop, step)) + if point[dim] >= chunk_coord[dim] + layout[dim]: + include_pt = False + break + if include_pt: + data_pts.append(i) return tuple(sel) diff --git a/hsds/util/dsetUtil.py b/hsds/util/dsetUtil.py index 1fe70b98..bfe24731 100644 --- a/hsds/util/dsetUtil.py +++ b/hsds/util/dsetUtil.py @@ -340,6 +340,7 @@ def getSelectionShape(selection): """ shape = [] rank = len(selection) + coordinate_extent = None for i in range(rank): s = selection[i] if isinstance(s, slice): @@ -354,10 +355,21 @@ def getSelectionShape(selection): extent = extent // step if (s.stop - s.start) % step != 0: extent += 1 + shape.append(extent) else: # coordinate list extent = len(s) - shape.append(extent) + if coordinate_extent is None: + coordinate_extent = extent + shape.append(extent) + elif coordinate_extent != extent: + msg = "shape mismatch: indexing arrays could not be broadcast together " + msg += f"with shapes ({coordinate_extent},) ({extent},)" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + else: + pass + return shape diff --git a/tests/integ/value_test.py b/tests/integ/value_test.py index 42c1f6e9..c57376c8 100755 --- a/tests/integ/value_test.py +++ b/tests/integ/value_test.py @@ -1689,6 +1689,17 @@ def testGet(self): self.assertTrue(data[2], [4, 8, 14]) self.assertTrue(data[3], [6, 12, 21]) + # read fancy selection with two index lists + params = {"select": "[[0,1,3], [7,4,2]]"} + req = helper.getEndpoint() + "/datasets/" + dset1_uuid + "/value" + rsp = self.session.get(req, params=params, headers=headers) + self.assertEqual(rsp.status_code, 200) + rspJson = json.loads(rsp.text) + self.assertTrue("value" in rspJson) + data = rspJson["value"] # should be 3 element array + self.assertEqual(len(data), 3) + self.assertTrue(data, [0, 4, 6]) + # read all the dataset values req = helper.getEndpoint() + "/datasets/" + dset1_uuid + "/value" rsp = self.session.get(req, headers=headers) @@ -1840,6 +1851,17 @@ def testFancyIndexing(self): self.assertTrue(data[0], [2, 4, 7]) self.assertTrue(data[1], [4, 8, 14]) + # read fancy selection with two coordinates + params = {"select": "[[0,3,7], [6,4,2]]"} + req = helper.getEndpoint() + "/datasets/" + dset1_uuid + "/value" + rsp = self.session.get(req, params=params, headers=headers) + self.assertEqual(rsp.status_code, 200) + rspJson = json.loads(rsp.text) + self.assertTrue("value" in rspJson) + data = rspJson["value"] # should be 3 element array + self.assertTrue(len(data), 3) + self.assertEqual(data, [0, 12, 14]) + def testResizable1DValue(self): # test read/write to resizable dataset print("testResizable1DValue", self.base_domain) diff --git a/tests/unit/chunk_util_test.py b/tests/unit/chunk_util_test.py index b1c69941..75ebfb5a 100755 --- a/tests/unit/chunk_util_test.py +++ b/tests/unit/chunk_util_test.py @@ -710,16 +710,18 @@ def testGetChunkSelection(self): # test with coordinate selection = ((12, 13, 33),) chunk_ids = getChunkIds(dset_id, selection, layout) + self.assertEqual(len(chunk_ids), 2) - chunk_id = chunk_ids[0] + print("x:", chunk_ids) + chunk_id = f"c-{dset_id[2:]}_1" + self.assertTrue(chunk_id in chunk_ids) + sel = getChunkSelection(chunk_id, selection, layout) self.assertEqual(sel[0], [12, 13]) - chunk_id = chunk_ids[1] + chunk_id = f"c-{dset_id[2:]}_3" + self.assertTrue(chunk_id in chunk_ids) sel = getChunkSelection(chunk_id, selection, layout) - self.assertEqual( - sel[0], - [33,], - ) + self.assertEqual(sel[0], [33,],) # 2-d test datashape = [100, 100] @@ -728,7 +730,8 @@ def testGetChunkSelection(self): chunk_ids = getChunkIds(dset_id, selection, layout) self.assertEqual(len(chunk_ids), 4) - chunk_id = chunk_ids[0] + chunk_id = f"c-{dset_id[2:]}_4_4" + self.assertTrue(chunk_id in chunk_ids) sel = getChunkSelection(chunk_id, selection, layout) self.assertEqual(sel[0].start, 42) self.assertEqual(sel[0].stop, 50) @@ -737,7 +740,8 @@ def testGetChunkSelection(self): self.assertEqual(sel[1].stop, 50) self.assertEqual(sel[1].step, 1) - chunk_id = chunk_ids[1] + chunk_id = f"c-{dset_id[2:]}_4_5" + self.assertTrue(chunk_id in chunk_ids) sel = getChunkSelection(chunk_id, selection, layout) self.assertEqual(sel[0].start, 42) self.assertEqual(sel[0].stop, 50) @@ -746,7 +750,8 @@ def testGetChunkSelection(self): self.assertEqual(sel[1].stop, 58) self.assertEqual(sel[1].step, 1) - chunk_id = chunk_ids[2] + chunk_id = f"c-{dset_id[2:]}_5_4" + self.assertTrue(chunk_id in chunk_ids) sel = getChunkSelection(chunk_id, selection, layout) self.assertEqual(sel[0].start, 50) self.assertEqual(sel[0].stop, 52) @@ -755,7 +760,8 @@ def testGetChunkSelection(self): self.assertEqual(sel[1].stop, 50) self.assertEqual(sel[1].step, 1) - chunk_id = chunk_ids[3] + chunk_id = f"c-{dset_id[2:]}_5_5" + self.assertTrue(chunk_id in chunk_ids) sel = getChunkSelection(chunk_id, selection, layout) self.assertEqual(sel[0].start, 50) self.assertEqual(sel[0].stop, 52) @@ -771,28 +777,27 @@ def testGetChunkSelection(self): ) chunk_ids = getChunkIds(dset_id, selection, layout) self.assertEqual(len(chunk_ids), 4) - chunk_id = chunk_ids[0] + chunk_id = f"c-{dset_id[2:]}_3_1" + self.assertTrue(chunk_id in chunk_ids) sel = getChunkSelection(chunk_id, selection, layout) self.assertEqual(sel[0], slice(35, 40, 1)) self.assertEqual(sel[1], [12, 13]) - chunk_id = chunk_ids[1] + + chunk_id = f"c-{dset_id[2:]}_3_3" + self.assertTrue(chunk_id in chunk_ids) sel = getChunkSelection(chunk_id, selection, layout) self.assertEqual(sel[0], slice(35, 40, 1)) - self.assertEqual( - sel[1], - [33,], - ) - chunk_id = chunk_ids[2] + self.assertEqual(sel[1], [33,]) + chunk_id = f"c-{dset_id[2:]}_4_1" + self.assertTrue(chunk_id in chunk_ids) sel = getChunkSelection(chunk_id, selection, layout) self.assertEqual(sel[0], slice(40, 45, 1)) self.assertEqual(sel[1], [12, 13]) - chunk_id = chunk_ids[3] + chunk_id = f"c-{dset_id[2:]}_4_3" + self.assertTrue(chunk_id in chunk_ids) sel = getChunkSelection(chunk_id, selection, layout) self.assertEqual(sel[0], slice(40, 45, 1)) - self.assertEqual( - sel[1], - [33,], - ) + self.assertEqual(sel[1], [33,],) # 1-d test with fractional chunks datashape = [104,] @@ -853,20 +858,22 @@ def testGetChunkCoverage(self): selection = getHyperslabSelection(datashape, 42, 62) chunk_ids = getChunkIds(dset_id, selection, layout) self.assertEqual(len(chunk_ids), 3) - - chunk_id = chunk_ids[0] + chunk_id = f"c-{dset_id[2:]}_4" + self.assertTrue(chunk_id in chunk_ids) sel = getChunkCoverage(chunk_id, selection, layout) self.assertEqual(sel[0].start, 2) self.assertEqual(sel[0].stop, 10) self.assertEqual(sel[0].step, 1) - chunk_id = chunk_ids[1] + chunk_id = f"c-{dset_id[2:]}_5" + self.assertTrue(chunk_id in chunk_ids) sel = getChunkCoverage(chunk_id, selection, layout) self.assertEqual(sel[0].start, 0) self.assertEqual(sel[0].stop, 10) self.assertEqual(sel[0].step, 1) - chunk_id = chunk_ids[2] + chunk_id = f"c-{dset_id[2:]}_6" + self.assertTrue(chunk_id in chunk_ids) sel = getChunkCoverage(chunk_id, selection, layout) self.assertEqual(sel[0].start, 0) self.assertEqual(sel[0].stop, 2) @@ -876,8 +883,10 @@ def testGetChunkCoverage(self): selection = ((32, 39, 61),) chunk_ids = getChunkIds(dset_id, selection, layout) self.assertEqual(len(chunk_ids), 2) - chunk_id = chunk_ids[0] + chunk_id = f"c-{dset_id[2:]}_3" + self.assertTrue(chunk_id in chunk_ids) sel = getChunkCoverage(chunk_id, selection, layout) + print("sel:", sel) self.assertEqual(sel[0], (2, 9)) # 1-d with step @@ -1025,43 +1034,56 @@ def testGetDataCoverage(self): chunk_ids = getChunkIds(dset_id, selection, layout) self.assertEqual(len(chunk_ids), 3) - chunk_id = chunk_ids[0] + chunk_id = f"c-{dset_id[2:]}_4" + self.assertTrue(chunk_id in chunk_ids) sel = getDataCoverage(chunk_id, selection, layout) + self.assertEqual(len(sel), 1) self.assertEqual(sel[0].start, 0) self.assertEqual(sel[0].stop, 8) self.assertEqual(sel[0].step, 1) - chunk_id = chunk_ids[1] + chunk_id = f"c-{dset_id[2:]}_5" + self.assertTrue(chunk_id in chunk_ids) sel = getDataCoverage(chunk_id, selection, layout) + self.assertEqual(len(sel), 1) self.assertEqual(sel[0].start, 8) self.assertEqual(sel[0].stop, 18) self.assertEqual(sel[0].step, 1) - chunk_id = chunk_ids[2] + chunk_id = f"c-{dset_id[2:]}_6" + self.assertTrue(chunk_id in chunk_ids) sel = getDataCoverage(chunk_id, selection, layout) + self.assertEqual(len(sel), 1) self.assertEqual(sel[0].start, 18) self.assertEqual(sel[0].stop, 20) self.assertEqual(sel[0].step, 1) # test with step selection = getHyperslabSelection(datashape, 42, 68, 4) + self.assertEqual(len(sel), 1) chunk_ids = getChunkIds(dset_id, selection, layout) self.assertEqual(len(chunk_ids), 3) - chunk_id = chunk_ids[0] + chunk_id = f"c-{dset_id[2:]}_4" + self.assertTrue(chunk_id in chunk_ids) sel = getDataCoverage(chunk_id, selection, layout) + self.assertEqual(len(sel), 1) self.assertEqual(sel[0].start, 0) self.assertEqual(sel[0].stop, 2) self.assertEqual(sel[0].step, 1) - chunk_id = chunk_ids[1] + chunk_id = f"c-{dset_id[2:]}_5" + self.assertTrue(chunk_id in chunk_ids) sel = getDataCoverage(chunk_id, selection, layout) + self.assertEqual(len(sel), 1) self.assertEqual(sel[0].start, 2) self.assertEqual(sel[0].stop, 5) self.assertEqual(sel[0].step, 1) - chunk_id = chunk_ids[2] + chunk_id = f"c-{dset_id[2:]}_6" + self.assertTrue(chunk_id in chunk_ids) sel = getDataCoverage(chunk_id, selection, layout) + self.assertEqual(len(sel), 1) self.assertEqual(sel[0].start, 5) self.assertEqual(sel[0].stop, 7) self.assertEqual(sel[0].step, 1) @@ -1071,11 +1093,11 @@ def testGetDataCoverage(self): chunk_ids = getChunkIds(dset_id, selection, layout) self.assertEqual(len(chunk_ids), 1) - chunk_id = chunk_ids[0] + chunk_id = f"c-{dset_id[2:]}_2" + self.assertTrue(chunk_id in chunk_ids) sel = getDataCoverage(chunk_id, selection, layout) - self.assertEqual(sel[0].start, 0) - self.assertEqual(sel[0].stop, 2) - self.assertEqual(sel[0].step, 1) + self.assertEqual(len(sel), 1) + self.assertEqual(sel[0], [0, 1]) # 2-d test dset_id = "d-12345678-1234-1234-1234-1234567890ab" @@ -1085,7 +1107,8 @@ def testGetDataCoverage(self): chunk_ids = getChunkIds(dset_id, selection, layout) self.assertEqual(len(chunk_ids), 4) - chunk_id = chunk_ids[0] + chunk_id = f"c-{dset_id[2:]}_4_4" + self.assertTrue(chunk_id in chunk_ids) sel = getDataCoverage(chunk_id, selection, layout) self.assertEqual(sel[0].start, 0) self.assertEqual(sel[0].stop, 8) @@ -1094,7 +1117,8 @@ def testGetDataCoverage(self): self.assertEqual(sel[1].stop, 4) self.assertEqual(sel[1].step, 1) - chunk_id = chunk_ids[1] + chunk_id = f"c-{dset_id[2:]}_4_5" + self.assertTrue(chunk_id in chunk_ids) sel = getDataCoverage(chunk_id, selection, layout) self.assertEqual(sel[0].start, 0) self.assertEqual(sel[0].stop, 8) @@ -1103,7 +1127,8 @@ def testGetDataCoverage(self): self.assertEqual(sel[1].stop, 12) self.assertEqual(sel[1].step, 1) - chunk_id = chunk_ids[2] + chunk_id = f"c-{dset_id[2:]}_5_4" + self.assertTrue(chunk_id in chunk_ids) sel = getDataCoverage(chunk_id, selection, layout) self.assertEqual(sel[0].start, 8) self.assertEqual(sel[0].stop, 10) @@ -1112,7 +1137,8 @@ def testGetDataCoverage(self): self.assertEqual(sel[1].stop, 4) self.assertEqual(sel[1].step, 1) - chunk_id = chunk_ids[3] + chunk_id = f"c-{dset_id[2:]}_5_5" + self.assertTrue(chunk_id in chunk_ids) sel = getDataCoverage(chunk_id, selection, layout) self.assertEqual(sel[0].start, 8) self.assertEqual(sel[0].stop, 10) @@ -1129,23 +1155,41 @@ def testGetDataCoverage(self): chunk_ids = getChunkIds(dset_id, selection, layout) self.assertEqual(len(chunk_ids), 2) - chunk_id = chunk_ids[0] + chunk_id = f"c-{dset_id[2:]}_4_2" + self.assertTrue(chunk_id in chunk_ids) sel = getDataCoverage(chunk_id, selection, layout) self.assertEqual(sel[0].start, 0) self.assertEqual(sel[0].stop, 5) self.assertEqual(sel[0].step, 1) - self.assertEqual(sel[1].start, 0) - self.assertEqual(sel[1].stop, 2) - self.assertEqual(sel[1].step, 1) + self.assertEqual(sel[1], [0, 1]) - chunk_id = chunk_ids[1] + chunk_id = f"c-{dset_id[2:]}_5_2" + self.assertTrue(chunk_id in chunk_ids) sel = getDataCoverage(chunk_id, selection, layout) self.assertEqual(sel[0].start, 5) self.assertEqual(sel[0].stop, 10) self.assertEqual(sel[0].step, 1) - self.assertEqual(sel[1].start, 0) - self.assertEqual(sel[1].stop, 2) - self.assertEqual(sel[1].step, 1) + self.assertEqual(sel[1], [0, 1]) + + # test with two coordinates + selection = ( + (1, 5, 55), + (23, 28, 57), + ) + chunk_ids = getChunkIds(dset_id, selection, layout) + self.assertEqual(len(chunk_ids), 2) + + chunk_id = f"c-{dset_id[2:]}_5_5" + self.assertTrue(chunk_id in chunk_ids) + self.assertTrue(chunk_id in chunk_ids) + sel = getDataCoverage(chunk_id, selection, layout) + self.assertEqual(sel[0], [2,]) + + chunk_id = f"c-{dset_id[2:]}_0_2" + self.assertTrue(chunk_id in chunk_ids) + sel = getDataCoverage(chunk_id, selection, layout) + self.assertEqual(len(sel), 1) + self.assertEqual(sel[0], [0, 1]) # 2-d test, non-regular chunks dset_id = "d-12345678-1234-1234-1234-1234567890ab" @@ -1155,7 +1199,9 @@ def testGetDataCoverage(self): chunk_ids = getChunkIds(dset_id, selection, layout) self.assertEqual(len(chunk_ids), 6) - chunk_id = chunk_ids[0] + chunk_id = f"c-{dset_id[2:]}_2_0" + self.assertTrue(chunk_id in chunk_ids) + self.assertTrue(chunk_id in chunk_ids) sel = getDataCoverage(chunk_id, selection, layout) self.assertEqual(sel[0].start, 0) self.assertEqual(sel[0].stop, 1) @@ -1164,7 +1210,9 @@ def testGetDataCoverage(self): self.assertEqual(sel[1].stop, 8) self.assertEqual(sel[1].step, 1) - chunk_id = chunk_ids[1] + chunk_id = f"c-{dset_id[2:]}_2_1" + self.assertTrue(chunk_id in chunk_ids) + self.assertTrue(chunk_id in chunk_ids) sel = getDataCoverage(chunk_id, selection, layout) self.assertEqual(sel[0].start, 0) self.assertEqual(sel[0].stop, 1) @@ -1173,7 +1221,9 @@ def testGetDataCoverage(self): self.assertEqual(sel[1].stop, 18) self.assertEqual(sel[1].step, 1) - chunk_id = chunk_ids[5] + chunk_id = f"c-{dset_id[2:]}_2_5" + self.assertTrue(chunk_id in chunk_ids) + self.assertTrue(chunk_id in chunk_ids) sel = getDataCoverage(chunk_id, selection, layout) self.assertEqual(sel[0].start, 0) self.assertEqual(sel[0].stop, 1) @@ -1186,17 +1236,22 @@ def testGetDataCoverage(self): datashape = [104,] layout = (10,) selection = getHyperslabSelection(datashape, 92, 102) + print("selection:", selection) chunk_ids = getChunkIds(dset_id, selection, layout) + self.assertEqual(len(chunk_ids), 2) - chunk_id = chunk_ids[0] + chunk_id = f"c-{dset_id[2:]}_9" + self.assertTrue(chunk_id in chunk_ids) + sel = getDataCoverage(chunk_id, selection, layout) sel = sel[0] self.assertEqual(sel.start, 0) self.assertEqual(sel.stop, 8) self.assertEqual(sel.step, 1) - chunk_id = chunk_ids[1] + chunk_id = f"c-{dset_id[2:]}_10" + self.assertTrue(chunk_id in chunk_ids) sel = getDataCoverage(chunk_id, selection, layout) sel = sel[0] self.assertEqual(sel.start, 8) @@ -1208,7 +1263,9 @@ def testGetDataCoverage(self): layout = (66, 89, 93) selection = (slice(0, 792, 1), slice(520, 521, 1), slice(1401, 1540, 1)) chunk_ids = getChunkIds(dset_id, selection, layout) - chunk_id = chunk_ids[1] + chunk_id = f"c-{dset_id[2:]}_0_5_16" + self.assertTrue(chunk_id in chunk_ids) + self.assertTrue(chunk_id in chunk_ids) sel = getDataCoverage(chunk_id, selection, layout) self.assertEqual(sel[0], slice(0, 66, 1)) self.assertEqual(sel[1], slice(0, 1, 1)) @@ -1216,11 +1273,13 @@ def testGetDataCoverage(self): selection = (slice(0, 792, 1), slice(520, 521, 1), [1401, 1501, 1540]) chunk_ids = getChunkIds(dset_id, selection, layout) - chunk_id = chunk_ids[1] + self.assertEqual(len(chunk_ids), 24) + chunk_id = f"c-{dset_id[2:]}_0_5_16" + self.assertTrue(chunk_id in chunk_ids) sel = getDataCoverage(chunk_id, selection, layout) self.assertEqual(sel[0], slice(0, 66, 1)) self.assertEqual(sel[1], slice(0, 1, 1)) - self.assertEqual(sel[2], slice(1, 3, 1)) + self.assertEqual(sel[2], [1, 2]) def testGetChunkId(self): # getChunkIds(dset_id, selection, layout, dim=0, prefix=None, chunk_ids=None): diff --git a/tests/unit/dset_util_test.py b/tests/unit/dset_util_test.py index b023bc20..0e77ab1b 100755 --- a/tests/unit/dset_util_test.py +++ b/tests/unit/dset_util_test.py @@ -28,9 +28,7 @@ def __init__(self, *args, **kwargs): def testGetHyperslabSelection(self): # getHyperslabSelection(dsetshape, start, stop, step) # 1-D case - datashape = [ - 100, - ] + datashape = [100,] slices = getHyperslabSelection(datashape) self.assertEqual(len(slices), 1) self.assertEqual(slices[0], slice(0, 100, 1)) @@ -69,79 +67,61 @@ def testGetHyperslabSelection(self): self.assertEqual(slices[1], slice(20, 30, 2)) def testGetSelectionShape(self): - sel = [ - slice(3, 7, 1), - ] + sel = [slice(3, 7, 1),] shape = getSelectionShape(sel) - self.assertEqual( - shape, - [ - 4, - ], - ) - - sel = [ - slice(3, 7, 3), - ] # select points 3, 6 + self.assertEqual(shape, [4,],) + + sel = [slice(3, 7, 3),] # select points 3, 6 shape = getSelectionShape(sel) - self.assertEqual( - shape, - [ - 2, - ], - ) + self.assertEqual(shape, [2,],) sel = [slice(44, 52, 1), slice(48, 52, 1)] shape = getSelectionShape(sel) self.assertEqual(shape, [8, 4]) - sel = [ - slice(0, 4, 2), - ] # select points 0, 2 + sel = [slice(0, 4, 2),] # select points 0, 2 shape = getSelectionShape(sel) - self.assertEqual( - shape, - [ - 2, - ], - ) - - sel = [ - slice(0, 5, 2), - ] # select points 0, 2, 4 + self.assertEqual(shape, [2,],) + + sel = [slice(0, 5, 2),] # select points 0, 2, 4 shape = getSelectionShape(sel) - self.assertEqual( - shape, - [ - 3, - ], - ) + self.assertEqual(shape, [3,],) sel = [[2, 3, 5, 7, 11]] # coordinate list shape = getSelectionShape(sel) - self.assertEqual( - shape, - [ - 5, - ], - ) + self.assertEqual(shape, [5,],) sel = [slice(0, 100, 1), slice(50, 51, 1), [23, 35, 56]] shape = getSelectionShape(sel) self.assertEqual(shape, [100, 1, 3]) + sel = [slice(0, 100, 1), [2, 3, 5, 7, 11]] + shape = getSelectionShape(sel) + self.assertEqual(shape, [100, 5]) + + sel = [[1, 2, 5, 9], [11, 3, 5, 7]] + shape = getSelectionShape(sel) + self.assertEqual(shape, [4,]) + + sel = [(0, 1, 3), (7, 4, 2)] + shape = getSelectionShape(sel) + self.assertEqual(shape, [3,]) + + try: + sel = [(0, 1, 3), (7, 4,)] + shape = getSelectionShape(sel) + self.assertTrue(False) + except Exception: + pass # expected + def testGetSelectionPagination(self): itemsize = 4 # will use 4 for most tests # 1D case - datashape = [ - 200, - ] + datashape = [200,] max_request_size = 120 - select = [ - (slice(20, 40)), - ] # 80 byte selection + select = [(slice(20, 40)),] # 80 byte selection # should return one page equivalent to original selection pages = getSelectionPagination(select, datashape, itemsize, max_request_size) self.assertEqual(len(pages), 1) @@ -151,9 +131,7 @@ def testGetSelectionPagination(self): self.assertEqual(s.start, 20) self.assertEqual(s.stop, 40) - select = [ - (slice(0, 200)), - ] # 800 byte selection + select = [(slice(0, 200)),] # 800 byte selection # should create 7 pages pages = getSelectionPagination(select, datashape, itemsize, max_request_size) self.assertEqual(len(pages), 8) @@ -309,9 +287,7 @@ def testGetSelectionPagination(self): def testItemIterator(self): # 1-D case - datashape = [ - 10, - ] + datashape = [10,] slices = getHyperslabSelection(datashape) it = ItemIterator(slices) @@ -350,9 +326,7 @@ def testItemIterator(self): self.assertEqual(count, 20) def testSelectionList1D(self): - dims = [ - 100, - ] + dims = [100,] for select in ("", []): selection = getSelectionList(select, dims) @@ -363,9 +337,7 @@ def testSelectionList1D(self): for select in ( "[5]", - [ - 5, - ], + [5,], ): selection = getSelectionList(select, dims) self.assertEqual(len(selection), 1) @@ -375,9 +347,7 @@ def testSelectionList1D(self): for select in ( "[:]", - [ - ":", - ], + [":",], ): selection = getSelectionList(select, dims) self.assertEqual(len(selection), 1) @@ -387,9 +357,7 @@ def testSelectionList1D(self): for select in ( "[3:7]", - [ - "3:7", - ], + ["3:7",], ): selection = getSelectionList(select, dims) self.assertEqual(len(selection), 1) @@ -399,9 +367,7 @@ def testSelectionList1D(self): for select in ( "[:4]", - [ - ":4", - ], + [":4",], ): selection = getSelectionList(select, dims) self.assertEqual(len(selection), 1) @@ -411,9 +377,7 @@ def testSelectionList1D(self): for select in ( "[0:100]", - [ - "0:100", - ], + ["0:100",], ): selection = getSelectionList(select, dims) self.assertEqual(len(selection), 1) @@ -430,9 +394,7 @@ def testSelectionList1D(self): for select in ( "[30:70:5]", - [ - "30:70:5", - ], + ["30:70:5",], ): selection = getSelectionList(select, dims) self.assertEqual(len(selection), 1) @@ -455,10 +417,7 @@ def testSelectionList1D(self): self.assertEqual(s1, slice(30, 70, 5)) def testSelectionList2D(self): - dims = [ - 50, - 100, - ] + dims = [50, 100, ] for select in ("", []): selection = getSelectionList(select, dims) @@ -500,6 +459,26 @@ def testSelectionList2D(self): self.assertTrue(isinstance(s2, list)) self.assertEqual(s2, [3, 4, 7]) + for select in ("[[2, 5, 8],[3,4,7]]", ["[2, 5, 8]", "[3,4,7]"], [[2, 5, 8], [3, 4, 7]]): + selection = getSelectionList(select, dims) + self.assertEqual(len(selection), 2) + s1 = selection[0] + self.assertTrue(isinstance(s1, list)) + self.assertEqual(s1, [2, 5, 8]) + s2 = selection[1] + self.assertTrue(isinstance(s2, list)) + self.assertEqual(s2, [3, 4, 7]) + + for select in ("[[2,5,8],[7,4,3]]", ["[2, 5, 8]", "[7,4,3]"], [[2, 5, 8], [7, 4, 3]]): + selection = getSelectionList(select, dims) + self.assertEqual(len(selection), 2) + s1 = selection[0] + self.assertTrue(isinstance(s1, list)) + self.assertEqual(s1, [2, 5, 8]) + s2 = selection[1] + self.assertTrue(isinstance(s2, list)) + self.assertEqual(s2, [7, 4, 3]) + for select in ("[1:20, 30:70:5]", ["1:20", "30:70:5"]): selection = getSelectionList(select, dims) self.assertEqual(len(selection), 2) @@ -541,10 +520,7 @@ def testSelectionList2D(self): self.assertEqual(s2, slice(30, 70, 5)) def testInvalidSelectionList(self): - dims = [ - 50, - 100, - ] + dims = [50, 100,] try: # no bracket