From 2f33dca59211d6b9348f4bfcc64e517459b509a2 Mon Sep 17 00:00:00 2001
From: jreadey <jreadey@hdfgroup.org>
Date: Thu, 5 Oct 2023 18:18:19 -0700
Subject: [PATCH 01/17] fix async errors in getting dset layout

---
 hsds/async_lib.py           | 20 +++++++++----
 hsds/chunk_sn.py            |  9 +++---
 hsds/dset_dn.py             |  8 +----
 hsds/dset_sn.py             | 15 +++++++---
 hsds/util/dsetUtil.py       | 60 ++++++++++++++++++-------------------
 tests/integ/dataset_test.py | 22 ++++++++++----
 6 files changed, 76 insertions(+), 58 deletions(-)

diff --git a/hsds/async_lib.py b/hsds/async_lib.py
index 92e788f5..9ebfa099 100755
--- a/hsds/async_lib.py
+++ b/hsds/async_lib.py
@@ -22,7 +22,7 @@
 from .util.hdf5dtype import getItemSize, createDataType
 from .util.arrayUtil import getShapeDims, getNumElements, bytesToArray
 from .util.dsetUtil import getHyperslabSelection, getFilterOps, getChunkDims
-from .util.dsetUtil import getDatasetLayoutClass, getDatasetCreationPropertyLayout
+from .util.dsetUtil import getDatasetLayoutClass, getDatasetLayout
 
 from .util.storUtil import getStorKeys, putStorJSONObj, getStorJSONObj
 from .util.storUtil import deleteStorObj, getStorBytes, isStorObj
@@ -79,9 +79,8 @@ async def updateDatasetInfo(app, dset_id, dataset_info, bucket=None):
         msg += f"for {dset_id}"
         log.warn(msg)
         return
-    layout = getDatasetCreationPropertyLayout(dset_json)
     msg = f"updateDatasetInfo - shape: {shape_json} type: {type_json} "
-    msg += f"item size: {item_size} layout: {layout}"
+    msg += f"item size: {item_size}"
     log.info(msg)
 
     dims = getShapeDims(shape_json)  # returns None for HS_NULL dsets
@@ -120,6 +119,7 @@ async def updateDatasetInfo(app, dset_id, dataset_info, bucket=None):
         linked_bytes = chunk_size * num_chunks
         num_linked_chunks = num_chunks
     elif layout_class == "H5D_CHUNKED_REF":
+        layout = getDatasetLayout(dset_json)
         if "chunks" not in layout:
             log.error("Expected to find 'chunks' key in H5D_CHUNKED_REF layout")
             return
@@ -130,7 +130,7 @@ async def updateDatasetInfo(app, dset_id, dataset_info, bucket=None):
             linked_bytes += chunk_info[1]
         num_linked_chunks = len(chunks)
     elif layout_class == "H5D_CHUNKED_REF_INDIRECT":
-        log.debug("chunk ref indirect")
+        layout = getDatasetLayout(dset_json)
         if "chunk_table" not in layout:
             msg = "Expected to find chunk_table in dataset layout for "
             msg += f"{dset_id}"
@@ -147,7 +147,7 @@ async def updateDatasetInfo(app, dset_id, dataset_info, bucket=None):
             msg += f"for {dset_id}"
             log.warn(msg)
             return
-        chunktable_layout = getDatasetCreationPropertyLayout(chunktable_json)
+        chunktable_layout = getDatasetLayout(chunktable_json)
         log.debug(f"chunktable_layout: {chunktable_layout}")
         if not isinstance(chunktable_layout, dict):
             log.warn(f"unexpected chunktable_layout: {chunktable_id}")
@@ -234,7 +234,15 @@ async def updateDatasetInfo(app, dset_id, dataset_info, bucket=None):
     elif layout_class == "H5D_CHUNKED":
         msg = "updateDatasetInfo - no linked bytes/chunks for "
         msg += "H5D_CHUNKED layout"
-        log.debug(msg)
+        log.info(msg)
+    elif layout_class == "H5D_CONTIGUOUS":
+        msg = "updateDatasetInfo - no linked bytes/chunks for "
+        msg += "H5D_CONTIGUOUS layout"
+        log.info(msg)
+    elif layout_class == "H5D_COMPACT":
+        msg = "updateDatasetInfo - no linked bytes/chunks for "
+        msg += "H5D_COMPACT layout"
+        log.info(msg)
     else:
         log.error(f"unexpected chunk layout: {layout_class}")
 
diff --git a/hsds/chunk_sn.py b/hsds/chunk_sn.py
index 73f0e5ed..df4d7476 100755
--- a/hsds/chunk_sn.py
+++ b/hsds/chunk_sn.py
@@ -30,10 +30,9 @@
 from .util.domainUtil import getDomainFromRequest, isValidDomain
 from .util.domainUtil import getBucketForDomain
 from .util.hdf5dtype import getItemSize, createDataType
-from .util.dsetUtil import getSelectionList, isNullSpace, getDatasetLayoutClass
+from .util.dsetUtil import getSelectionList, isNullSpace, getDatasetLayout, getDatasetLayoutClass
 from .util.dsetUtil import isExtensible, getSelectionPagination
 from .util.dsetUtil import getSelectionShape, getDsetMaxDims, getChunkLayout
-from .util.dsetUtil import getDatasetCreationPropertyLayout
 from .util.chunkUtil import getNumChunks, getChunkIds, getChunkId
 from .util.chunkUtil import getChunkIndex, getChunkSuffix
 from .util.chunkUtil import getChunkCoverage, getDataCoverage
@@ -177,7 +176,7 @@ def getChunkItem(chunkid):
         return chunk_item
 
     if layout_class == "H5D_CONTIGUOUS_REF":
-        layout = getDatasetCreationPropertyLayout(dset_json)
+        layout = getDatasetLayout(dset_json)
         log.debug(f"cpl layout: {layout}")
         s3path = layout["file_uri"]
         s3size = layout["size"]
@@ -229,7 +228,7 @@ def getChunkItem(chunkid):
             chunk_item["s3offset"] = s3offset
             chunk_item["s3size"] = chunk_size
     elif layout_class == "H5D_CHUNKED_REF":
-        layout = getDatasetCreationPropertyLayout(dset_json)
+        layout = getDatasetLayout(dset_json)
         log.debug(f"cpl layout: {layout}")
         s3path = layout["file_uri"]
         chunks = layout["chunks"]
@@ -248,7 +247,7 @@ def getChunkItem(chunkid):
             chunk_item["s3size"] = s3size
 
     elif layout_class == "H5D_CHUNKED_REF_INDIRECT":
-        layout = getDatasetCreationPropertyLayout(dset_json)
+        layout = getDatasetLayout(dset_json)
         log.debug(f"cpl layout: {layout}")
         if "chunk_table" not in layout:
             log.error("Expected to find chunk_table in dataset layout")
diff --git a/hsds/dset_dn.py b/hsds/dset_dn.py
index beac5a1b..e250bde9 100755
--- a/hsds/dset_dn.py
+++ b/hsds/dset_dn.py
@@ -273,13 +273,7 @@ async def PUT_DatasetShape(request):
         # e.g. another client has already extended the shape since the SN
         # verified it
         shape_update = body["shape"]
-        log.debug("shape_update: {}".format(shape_update))
-
-        for i in range(len(dims)):
-            if shape_update[i] < dims[i]:
-                msg = "Dataspace can not be made smaller"
-                log.warn(msg)
-                raise HTTPBadRequest(reason=msg)
+        log.debug(f"shape_update: {shape_update}")
 
         # Update the shape!
         for i in range(len(dims)):
diff --git a/hsds/dset_sn.py b/hsds/dset_sn.py
index e9e8729c..8f69a9fc 100755
--- a/hsds/dset_sn.py
+++ b/hsds/dset_sn.py
@@ -621,15 +621,22 @@ async def PUT_DatasetShape(request):
         msg = "Extent of update shape request does not match dataset sahpe"
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
+    shape_reduction = False
     for i in range(rank):
         if shape_update and shape_update[i] < dims[i]:
-            msg = "Dataspace can not be made smaller"
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
+            shape_reduction = True
+            if shape_update[i] < 0:
+                msg = "Extension dimension can not be made less than zero"
+                log.warn(msg)
+                raise HTTPBadRequest(reason=msg)
         if shape_update and maxdims[i] != 0 and shape_update[i] > maxdims[i]:
-            msg = "Database can not be extended past max extent"
+            msg = "Extension dimension can not be extended past max extent"
             log.warn(msg)
             raise HTTPConflict()
+    if shape_reduction:
+        log.info("Shape extent reduced for dataset")
+        # TBD - ensure any chunks that are outside the new shape region are
+        # deleted
     if extend_dim < 0 or extend_dim >= rank:
         msg = "Extension dimension must be less than rank and non-negative"
         log.warn(msg)
diff --git a/hsds/util/dsetUtil.py b/hsds/util/dsetUtil.py
index 9df8c0fe..da8dbdff 100644
--- a/hsds/util/dsetUtil.py
+++ b/hsds/util/dsetUtil.py
@@ -855,48 +855,46 @@ def isExtensible(dims, maxdims):
     return False
 
 
-def getDatasetCreationPropertyLayout(dset_json):
-    """ return layout json from creation property list """
-    cpl = None
+def getDatasetLayout(dset_json):
+    """ Return layout json from creation property list or layout json """
+    layout = None
+
     if "creationProperties" in dset_json:
         cp = dset_json["creationProperties"]
         if "layout" in cp:
-            cpl = cp["layout"]
-    if not cpl and "layout" in dset_json:
-        # fallback to dset_json layout
-        cpl = dset_json["layout"]
-    if cpl is None:
-        log.warn(f"no layout found for {dset_json}")
-    return cpl
+            layout = cp["layout"]
+    if not layout and "layout" in dset_json:
+        layout = dset_json["layout"]
+    if not layout:
+        log.warn(f"no layout for {dset_json}")
+    return layout
 
 
 def getDatasetLayoutClass(dset_json):
     """ return layout class """
-    chunk_layout = None
-    cp_layout = getDatasetCreationPropertyLayout(dset_json)
-    # check creation properties first
-    if cp_layout:
-        if "class" in cp_layout:
-            chunk_layout = cp_layout["class"]
-    # otherwise, get class prop from layout
-    if chunk_layout is None and "layout" in dset_json:
-        layout = dset_json["layout"]
-        if "class" in layout:
-            chunk_layout = layout["class"]
-    return chunk_layout
+    layout = getDatasetLayout(dset_json)
+    if layout and "class" in layout:
+        layout_class = layout["class"]
+    else:
+        layout_class = None
+    return layout_class
 
 
 def getChunkDims(dset_json):
     """ get chunk shape for given dset_json """
-    cpl = getDatasetCreationPropertyLayout(dset_json)
-    if cpl and "dims" in cpl:
-        return cpl["dims"]
-    # otherwise, check the 'layout' key
-    if 'layout' in dset_json:
-        layout = dset_json["layout"]
-        if "dims" in layout:
-            return layout["dims"]
-    return None  # not found
+
+    layout = getDatasetLayout(dset_json)
+    if layout and "dims" in layout:
+        return layout["dims"]
+    else:
+        # H5D_COMPACT and H5D_CONTIGUOUS will not have a dims key
+        # Check the layout dict in dset_json to see if it's
+        # defined there
+        if "layout" in dset_json:
+            layout = dset_json["layout"]
+            if "dims" in layout:
+                return layout["dims"]
+    return None
 
 
 class ItemIterator:
diff --git a/tests/integ/dataset_test.py b/tests/integ/dataset_test.py
index 9a729e55..f42dcdb3 100755
--- a/tests/integ/dataset_test.py
+++ b/tests/integ/dataset_test.py
@@ -681,11 +681,23 @@ def testResizableDataset(self):
         self.assertEqual(rsp.status_code, 201)
         rspJson = json.loads(rsp.text)
 
+        # verify updated-shape using the GET shape request
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        self.assertTrue("shape" in rspJson)
+        shape = rspJson["shape"]
+        self.assertEqual(shape["class"], "H5S_SIMPLE")
+        self.assertEqual(len(shape["dims"]), 1)
+        self.assertEqual(shape["dims"][0], 15)  # increased to 15
+        self.assertTrue("maxdims" in shape)
+        self.assertEqual(shape["maxdims"][0], 20)
+
         # reduce the size to 5 elements
-        # payload = {"shape": 5}
-        # rsp = self.session.put(req, data=json.dumps(payload), headers=headers)
-        # self.assertEqual(rsp.status_code, 201)
-        # rspJson = json.loads(rsp.text)
+        payload = {"shape": 5}
+        rsp = self.session.put(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 201)
+        rspJson = json.loads(rsp.text)
 
         # verify updated-shape using the GET shape request
         rsp = self.session.get(req, headers=headers)
@@ -695,7 +707,7 @@ def testResizableDataset(self):
         shape = rspJson["shape"]
         self.assertEqual(shape["class"], "H5S_SIMPLE")
         self.assertEqual(len(shape["dims"]), 1)
-        self.assertEqual(shape["dims"][0], 15)  # increased to 15
+        self.assertEqual(shape["dims"][0], 5)  # decreased to 5
         self.assertTrue("maxdims" in shape)
         self.assertEqual(shape["maxdims"][0], 20)
 

From 65609d19027026fc34f129884fdbfd9e820b5299 Mon Sep 17 00:00:00 2001
From: jreadey <jreadey@hdfgroup.org>
Date: Fri, 13 Oct 2023 09:25:43 -0700
Subject: [PATCH 02/17] broadcast support for SN PUT value

---
 hsds/chunk_crawl.py           |  11 ++--
 hsds/chunk_dn.py              |  17 ++++-
 hsds/chunk_sn.py              | 112 +++++++++++++++++++++++--------
 hsds/dset_sn.py               |  36 ++++++++--
 hsds/servicenode_lib.py       |   4 +-
 hsds/util/arrayUtil.py        |  36 ++++++----
 hsds/util/dsetUtil.py         |   1 -
 tests/integ/value_test.py     |  82 ++++++++++++++++++++++-
 tests/unit/array_util_test.py | 121 +++++++++++++++++++++++++++-------
 9 files changed, 335 insertions(+), 85 deletions(-)

diff --git a/hsds/chunk_crawl.py b/hsds/chunk_crawl.py
index 96497e1c..b7d2ce22 100755
--- a/hsds/chunk_crawl.py
+++ b/hsds/chunk_crawl.py
@@ -309,16 +309,13 @@ async def read_chunk_hyperslab(
             # TBD: this needs to be fixed up for variable length dtypes
             nrows = len(array_data) // query_dtype.itemsize
             try:
-                chunk_arr = bytesToArray(
-                    array_data,
-                    query_dtype,
-                    [
-                        nrows,
-                    ],
-                )
+                chunk_arr = bytesToArray(array_data, query_dtype, (nrows,))
             except ValueError as ve:
                 log.warn(f"bytesToArray ValueError: {ve}")
                 raise HTTPBadRequest()
+            if chunk_arr.shape[0] != nrows:
+                log.error(f"expected chunk shape to be ({nrows},), but got {chunk_arr.shape[0]}")
+                raise HTTPInternalServerError()
             # save result to chunk_info
             # chunk results will be merged later
             chunk_info["query_rsp"] = chunk_arr
diff --git a/hsds/chunk_dn.py b/hsds/chunk_dn.py
index 3bb3fc7f..3fafd940 100644
--- a/hsds/chunk_dn.py
+++ b/hsds/chunk_dn.py
@@ -20,7 +20,7 @@
 from aiohttp.web import json_response, StreamResponse
 
 from .util.httpUtil import request_read, getContentType
-from .util.arrayUtil import bytesToArray, arrayToBytes
+from .util.arrayUtil import bytesToArray, arrayToBytes, getShapeDims
 from .util.idUtil import getS3Key, validateInPartition, isValidUuid
 from .util.storUtil import isStorObj, deleteStorObj
 from .util.hdf5dtype import createDataType
@@ -137,7 +137,7 @@ async def PUT_Chunk(request):
     if getChunkInitializer(dset_json):
         chunk_init = True
     elif query:
-        chunk_init = False  # don't initalize new chunks on query update
+        chunk_init = False  # don't initialize new chunks on query update
     else:
         chunk_init = True
 
@@ -221,6 +221,8 @@ async def PUT_Chunk(request):
     else:
         # regular chunk update
 
+        broadcast = 0  # broadcast update
+
         # check that the content_length is what we expect
         if itemsize != "H5T_VARIABLE":
             log.debug(f"expect content_length: {num_elements*itemsize}")
@@ -229,10 +231,14 @@ async def PUT_Chunk(request):
         actual = request.content_length
         if itemsize != "H5T_VARIABLE":
             expected = num_elements * itemsize
-            if expected != actual:
+            if expected % actual != 0:
                 msg = f"Expected content_length of: {expected}, but got: {actual}"
                 log.error(msg)
                 raise HTTPBadRequest(reason=msg)
+            else:
+                broadcast = expected // actual
+                if broadcast != 1:
+                    log.info(f"broadcast chunk write: {broadcast}")
 
         # create a numpy array for incoming data
         input_bytes = await request_read(request)
@@ -375,6 +381,8 @@ async def GET_Chunk(request):
     dset_id = getDatasetId(chunk_id)
 
     dset_json = await get_metadata_obj(app, dset_id, bucket=bucket)
+    shape_dims = getShapeDims(dset_json["shape"])
+    log.debug(f"shape_dims: {shape_dims}")
     dims = getChunkLayout(dset_json)
     log.debug(f"GET_Chunk - got dims: {dims}")
 
@@ -385,6 +393,9 @@ async def GET_Chunk(request):
         select = None  # get slices for entire datashape
     if select is not None:
         log.debug(f"GET_Chunk - using select string: {select}")
+    else:
+        log.debug("GET_Chunk - no selection string")
+
     try:
         selection = getSelectionList(select, dims)
     except ValueError as ve:
diff --git a/hsds/chunk_sn.py b/hsds/chunk_sn.py
index df4d7476..f5aa03e9 100755
--- a/hsds/chunk_sn.py
+++ b/hsds/chunk_sn.py
@@ -494,6 +494,7 @@ async def PUT_Value(request):
                 log.warn(msg)
                 raise HTTPBadRequest(reason=msg)
             log.info(f"append_rows: {append_rows}")
+       
         if append_rows:
             for key in ("start", "stop", "step"):
                 if key in body:
@@ -509,6 +510,7 @@ async def PUT_Value(request):
                 log.warn(msg)
                 raise HTTPBadRequest(reason=msg)
             log.info(f"append_dim: {append_dim}")
+        
 
     # get state for dataset from DN.
     dset_json = await getObjectJson(app, dset_id, bucket=bucket, refresh=False)
@@ -624,6 +626,8 @@ async def PUT_Value(request):
     else:
         http_streaming = True
 
+    http_streaming = False  # test
+
     # body could also contain a point selection specifier
     if body and "points" in body:
         if append_rows:
@@ -709,11 +713,13 @@ async def PUT_Value(request):
                 log.warn(msg)
                 raise  # re-throw
 
+            """
             if len(binary_data) != request.content_length:
                 msg = f"Read {len(binary_data)} bytes, expecting: "
                 msg += f"{request.content_length}"
                 log.error(msg)
                 raise HTTPBadRequest(reason=msg)
+            """
 
     if append_rows:
         for i in range(rank):
@@ -753,38 +759,87 @@ async def PUT_Value(request):
         raise HTTPBadRequest(reason=msg)
 
     arr = None  # np array to hold request data
-    if binary_data and isinstance(item_size, int):
-        # binary, fixed item_size
-        if num_elements * item_size != len(binary_data):
-            msg = f"Expected: {num_elements*item_size} bytes, "
-            msg += f"but got: {len(binary_data)}, "
-            msg += f"num_elements: {num_elements}, item_size: {item_size}"
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
-        if num_elements * item_size > max_request_size:
-            msg = f"read {num_elements*item_size} bytes, greater than {max_request_size}"
-            log.warn(msg)
-        arr = np.fromstring(binary_data, dtype=dset_dtype)
-        try:
-            arr = arr.reshape(np_shape)  # conform to selection shape
-        except ValueError:
-            msg = "Bad Request: binary input data doesn't match selection"
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
+    if binary_data:
+        if item_size == "H5T_VARIABLE":
+            
+            # binary variable length data
+            try:
+                arr = bytesToArray(binary_data, dset_dtype, np_shape)
+            except ValueError as ve:
+                log.warn(f"bytesToArray value error: {ve}")
+                raise HTTPBadRequest()
+            
+            num_req_elements = getNumElements(arr.shape)
+            log.debug(f"binary variable data element count: {num_req_elements}")
+        else:
+            # fixed item size
+            if len(binary_data) % item_size != 0:
+                msg = f"Expected request size to be a multiple of {item_size}, "
+                msg += f"but {len(binary_data)} bytes received"
+                log.warn(msg)
+                raise HTTPBadRequest(reason=msg)
+            
+            # check against max request size
+            if num_elements * item_size > max_request_size:
+                msg = f"read {num_elements*item_size} bytes, greater than {max_request_size}"
+                log.warn(msg)
+
+            num_req_elements = len(binary_data) // item_size
+        
+        # if the req item count is less than expected,
+        # check to see if it is a broadcast request
+        broadcast_shape = None
+        if num_req_elements != num_elements and not append_rows:
+            broadcast_shape = [1,]
+            for ndim in range(rank):
+                if num_req_elements == np.prod(broadcast_shape):
+                    break
+                np_shape_extent = np_shape[rank - 1 - ndim]
+                if ndim == 0:
+                    broadcast_shape = [np_shape_extent,]
+                else:
+                    broadcast_shape = [np_shape_extent].extend(broadcast_shape)
+                log.debug(f"trying broadcast_shape: {broadcast_shape}")
+            if len(broadcast_shape) == rank:
+                msg = f"Unexpected request size: {len(binary_data)}, "
+                msg += f"for num_elements: {num_elements} with item_size: {item_size}"
+                log.warn(msg)
+                raise HTTPBadRequest(reason=msg)
+
+        # read bytes into a one-dimensional numpy array  
+        if item_size != "H5T_VARIABLE":
+            """
+            # binary variable length data
+            try:
+                arr = bytesToArray(binary_data, dset_dtype, (num_elements,))
+            except ValueError as ve:
+                log.warn(f"Unable to parse variable length data: {ve}")
+                raise HTTPBadRequest()
+            """
+            arr = np.fromstring(binary_data, dtype=dset_dtype)
+            
+        if broadcast_shape:
+            log.info(f"broadcasting from {broadcast_shape} to {np_shape}")
+            arr = arr.reshape(broadcast_shape)
+            tmp_arr = np.zeros(np_shape, dtype=dset_dtype)
+            tmp_arr[...] = arr
+            arr = tmp_arr
+        else:
+            try:
+                arr = arr.reshape(np_shape)  # conform to selection shape
+            except ValueError:
+                msg = "Bad Request: binary input data doesn't match selection"
+                log.warn(msg)
+                raise HTTPBadRequest(reason=msg)
         msg = f"PUT value - numpy array shape: {arr.shape} dtype: {arr.dtype}"
         log.debug(msg)
-    elif binary_data and item_size == "H5T_VARIABLE":
-        # binary variable length data
-        try:
-            arr = bytesToArray(binary_data, dset_dtype, np_shape)
-        except ValueError as ve:
-            log.warn(f"bytesToArray value error: {ve}")
-            raise HTTPBadRequest()
+    
     elif request_type == "json":
         # get array from json input
         try:
             msg = "input data doesn't match selection"
-            arr = jsonToArray(np_shape, dset_dtype, json_data)
+            # only enable broadcast if not appending
+            arr = jsonToArray(np_shape, dset_dtype, json_data, broadcast=(False if append_rows else True))
         except ValueError:
             log.warn(msg)
             raise HTTPBadRequest(reason=msg)
@@ -1051,7 +1106,10 @@ async def GET_Value(request):
     bucket = getBucketForDomain(domain)
 
     # get state for dataset from DN.
-    dset_json = await getObjectJson(app, dset_id, bucket=bucket)
+    # Note - refreshShape will do a refresh if the dataset is extensible
+    #   i.e. we need to make sure we have the correct shape dimensions
+    # 
+    dset_json = await getObjectJson(app, dset_id, bucket=bucket, refresh=True)
     type_json = dset_json["type"]
     dset_dtype = createDataType(type_json)
 
diff --git a/hsds/dset_sn.py b/hsds/dset_sn.py
index 8f69a9fc..9e82a690 100755
--- a/hsds/dset_sn.py
+++ b/hsds/dset_sn.py
@@ -21,10 +21,10 @@
 from .util.httpUtil import http_post, http_put, http_delete, getHref, respJsonAssemble
 from .util.httpUtil import jsonResponse
 from .util.idUtil import isValidUuid, getDataNodeUrl, createObjId, isSchema2Id
-from .util.dsetUtil import getPreviewQuery, getFilterItem
+from .util.dsetUtil import getPreviewQuery, getFilterItem, getChunkLayout
 from .util.arrayUtil import getNumElements, getShapeDims, getNumpyValue
 from .util.chunkUtil import getChunkSize, guessChunk, expandChunk, shrinkChunk
-from .util.chunkUtil import getContiguousLayout
+from .util.chunkUtil import getContiguousLayout, getChunkIds
 from .util.authUtil import getUserPasswordFromRequest, aclCheck
 from .util.authUtil import validateUserPassword
 from .util.domainUtil import getDomainFromRequest, getPathForDomain, isValidDomain
@@ -621,6 +621,7 @@ async def PUT_DatasetShape(request):
         msg = "Extent of update shape request does not match dataset sahpe"
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
+    
     shape_reduction = False
     for i in range(rank):
         if shape_update and shape_update[i] < dims[i]:
@@ -633,14 +634,34 @@ async def PUT_DatasetShape(request):
             msg = "Extension dimension can not be extended past max extent"
             log.warn(msg)
             raise HTTPConflict()
-    if shape_reduction:
-        log.info("Shape extent reduced for dataset")
-        # TBD - ensure any chunks that are outside the new shape region are
-        # deleted
+    
     if extend_dim < 0 or extend_dim >= rank:
         msg = "Extension dimension must be less than rank and non-negative"
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
+    
+    if shape_reduction:
+        log.info(f"Shape extent reduced for dataset (rank: {rank})")
+
+        # need to re-initialize any values that are now outside the shape
+        layout = getChunkLayout(dset_json)
+        log.debug(f"got layout: {layout}")
+        for n in range(rank):
+            if dims[n] <= shape_update[i]:
+                log.debug(f"skip dimension {n}")
+                continue
+            log.debug(f"reinitialize for dimension: {n}")
+            slices = []
+            for m in range(rank):
+                if m == n:
+                    s = slice(shape_update[m], dims[m], 1)
+                else:
+                    # just select the entire extent
+                    s = slice(0, dims[m])
+                slices.append(s)
+            log.debug(f"shape_reinitialize - got slices: {slices} for dimension: {n}")
+            chunk_ids = getChunkIds(dset_id, slices, layout)
+            log.debug(f"got chunkIds: {chunk_ids}")
 
     # send request onto DN
     req = getDataNodeUrl(app, dset_id) + "/datasets/" + dset_id + "/shape"
@@ -664,6 +685,9 @@ async def PUT_DatasetShape(request):
         log.warn("got 409 extending dataspace")
         raise
 
+    
+
+
     resp = await jsonResponse(request, json_resp, status=201)
     log.response(request, resp=resp)
     return resp
diff --git a/hsds/servicenode_lib.py b/hsds/servicenode_lib.py
index 3e1e2946..2f75284c 100644
--- a/hsds/servicenode_lib.py
+++ b/hsds/servicenode_lib.py
@@ -203,7 +203,7 @@ async def getObjectJson(
         if bucket:
             params["bucket"] = bucket
         req += "/" + collection + "/" + obj_id
-
+        log.debug(f"getObjectJson - fetching {obj_id} from {req}")
         # throws 404 if doesn't exist
         obj_json = await http_get(app, req, params=params)
         meta_cache[obj_id] = obj_json
@@ -211,7 +211,7 @@ async def getObjectJson(
         msg = f"Object: {obj_id} not found, req: {req}, params: {params}"
         log.warn(msg)
         raise HTTPNotFound()
-
+    
     return obj_json
 
 
diff --git a/hsds/util/arrayUtil.py b/hsds/util/arrayUtil.py
index 0fe8a8f4..0f6b7099 100644
--- a/hsds/util/arrayUtil.py
+++ b/hsds/util/arrayUtil.py
@@ -93,6 +93,8 @@ def getNumElements(dims):
     return num_elements
 
 
+
+
 def getShapeDims(shape):
     """
     Get dims from a given shape json.  Return [1,] for Scalar datasets,
@@ -131,7 +133,7 @@ def getShapeDims(shape):
     return dims
 
 
-def jsonToArray(data_shape, data_dtype, data_json):
+def jsonToArray(data_shape, data_dtype, data_json, broadcast=False):
     """
     Return numpy array from the given json array.
     """
@@ -143,7 +145,7 @@ def fillVlenArray(rank, data, arr, index):
                 arr[index] = data[i]
                 index += 1
         return index
-
+    
     # need some special conversion for compound types --
     # each element must be a tuple, but the JSON decoder
     # gives us a list instead.
@@ -160,9 +162,7 @@ def fillVlenArray(rank, data, arr, index):
             converted_data = toTuple(np_shape_rank, data_json)
         data_json = converted_data
     else:
-        data_json = [
-            data_json,
-        ]  # listify
+        data_json = [data_json,]  # listify
 
     if not (None in data_json):
         if isVlen(data_dtype):
@@ -178,9 +178,17 @@ def fillVlenArray(rank, data, arr, index):
         # allow if the array is a scalar and the selection shape is one element,
         # numpy is ok with this
         if arr.size != npoints:
-            msg = "Input data doesn't match selection number of elements"
-            msg += f" Expected {npoints}, but received: {arr.size}"
-            raise ValueError(msg)
+            if broadcast:
+                # try to broadcast to the target shape
+                # if it fails, a ValueError exception will be raised
+                arr_tgt = np.zeros(data_shape, dtype=data_dtype)
+                arr_tgt[...] = arr
+                # worked!  use arr_tgt as arr
+                arr = arr_tgt
+            else:
+                msg = "Input data doesn't match selection number of elements"
+                msg += f" Expected {npoints}, but received: {arr.size}"
+                raise ValueError(msg)
         if arr.shape != data_shape:
             arr = arr.reshape(data_shape)  # reshape to match selection
     else:
@@ -368,10 +376,11 @@ def copyElement(e, dt, buffer, offset):
     return offset
 
 
-def getElementCount(buffer, offset):
+def getElementCount(buffer, offset=0):
     """
     Get the count value from persisted vlen array
     """
+
     n = offset
     m = offset + 4
     count_bytes = bytes(buffer[n:m])
@@ -425,7 +434,7 @@ def readElement(buffer, offset, arr, index, dt):
                 offset = readElement(buffer, offset, e, i, dt)
             e.reshape(dt.shape)
         else:
-            count = getElementCount(buffer, offset)
+            count = getElementCount(buffer, offset=offset)
             offset += 4
             n = offset
             m = offset + count
@@ -472,17 +481,18 @@ def bytesToArray(data, dt, shape):
     """
     Create numpy array based on byte representation
     """
-    # print(f"bytesToArray({len(data)}, {dt}, {shape}")
-    nelements = getNumElements(shape)
     if not isVlen(dt):
         # regular numpy from string
         arr = np.frombuffer(data, dtype=dt)
     else:
+        nelements = getNumElements(shape)
+       
         arr = np.zeros((nelements,), dtype=dt)
         offset = 0
         for index in range(nelements):
             offset = readElement(data, offset, arr, index, dt)
-    arr = arr.reshape(shape)
+    if shape is not None:
+        arr = arr.reshape(shape)
     # check that we can update the array if needed
     # Note: this seems to have been required starting with numpuy v 1.17
     # Setting the flag directly is not recommended.
diff --git a/hsds/util/dsetUtil.py b/hsds/util/dsetUtil.py
index da8dbdff..79bfa5e7 100644
--- a/hsds/util/dsetUtil.py
+++ b/hsds/util/dsetUtil.py
@@ -843,7 +843,6 @@ def isExtensible(dims, maxdims):
     """
     if maxdims is None or len(dims) == 0:
         return False
-    log.debug(f"isExtensible - dims: {dims} maxdims: {maxdims}")
     rank = len(dims)
     if len(maxdims) != rank:
         raise ValueError("rank of maxdims does not match dataset")
diff --git a/tests/integ/value_test.py b/tests/integ/value_test.py
index 9588a66e..c27600b7 100755
--- a/tests/integ/value_test.py
+++ b/tests/integ/value_test.py
@@ -3063,9 +3063,7 @@ def testIntelligentRangeGet(self):
         req = self.endpoint + "/datasets/" + dset_id + "/value"
         start = 1234567
         stop = start + 10
-        params = {
-            "select": f"[{start}:{stop}]"
-        }  # read 10 element, starting at index 1234567
+        params = {"select": f"[{start}:{stop}]"}  # read 10 element, starting at index 1234567
         params["nonstrict"] = 1  # enable SN to invoke lambda func
 
         # read the selection
@@ -3078,6 +3076,7 @@ def testIntelligentRangeGet(self):
         # should get one element back
         self.assertEqual(len(value), 10)
         self.assertEqual(value, list(range(start, start + 10)))
+        
 
     def testLargeCreationProperties(self):
         # test Dataset with artifically large creation_properties data
@@ -3141,6 +3140,83 @@ def testLargeCreationProperties(self):
             self.assertEqual(ret_values[i], 24)
             self.assertEqual(ret_values[i + 5], 42)
 
+    def testValueReinitialization(self):
+        # Test the dataset values get reset after a reduction and resize
+         
+        print("testValueReinitialization", self.base_domain)
+        headers = helper.getRequestHeaders(domain=self.base_domain)
+    
+         
+        # get domain
+        req = f"{self.endpoint}/"
+        rsp = self.session.get(req, headers=headers)
+        rspJson = json.loads(rsp.text)
+        self.assertTrue("root" in rspJson)
+        root_uuid = rspJson["root"]
+
+        # create the dataset
+        req = f"{self.endpoint}/datasets"
+        payload = {"type": "H5T_STD_I32LE", "shape": 10, "maxdims": 10}
+        payload["creationProperties"] = {"fillValue": 42}
+        req = self.endpoint + "/datasets"
+        rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 201)  # create dataset
+        rspJson = json.loads(rsp.text)
+        dset_uuid = rspJson["id"]
+        self.assertTrue(helper.validateId(dset_uuid))
+
+        # link new dataset as 'dset'
+        name = "dset"
+        req = f"{self.endpoint}/groups/{root_uuid}/links/{name}"
+        payload = {"id": dset_uuid}
+        rsp = self.session.put(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 201)
+
+        # write to the dset
+        req = f"{self.endpoint}/datasets/{dset_uuid}/value"
+        data = list(range(10))  # write 0-9
+        payload = {"value": data[0:10]}
+        params = {"select": "[0:10]"}
+
+        rsp = self.session.put(req, data=json.dumps(payload), params=params, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+
+        # read back the data
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        self.assertTrue("value" in rspJson)
+        self.assertEqual(rspJson["value"], data)
+        
+        # resize the dataset to 5 elements
+        req =f"{self.endpoint}/datasets/{dset_uuid}/shape"
+        payload = {"shape": 5}
+        rsp = self.session.put(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 201)
+        rspJson = json.loads(rsp.text)
+        
+        # read back the remaining elements
+        req = f"{self.endpoint}/datasets/{dset_uuid}/value"
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        self.assertTrue("value" in rspJson)
+        self.assertEqual(rspJson["value"], data[:5])
+
+        # resize back to 10
+        req =f"{self.endpoint}/datasets/{dset_uuid}/shape"
+        payload = {"shape": 10}
+        rsp = self.session.put(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 201)
+        rspJson = json.loads(rsp.text)
+
+        # read all 10 data values
+        req = f"{self.endpoint}/datasets/{dset_uuid}/value"
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        self.assertTrue("value" in rspJson)
+        ret_value = rspJson["value"]
 
 if __name__ == "__main__":
     # setup test files
diff --git a/tests/unit/array_util_test.py b/tests/unit/array_util_test.py
index da7b8eef..d11979df 100644
--- a/tests/unit/array_util_test.py
+++ b/tests/unit/array_util_test.py
@@ -205,9 +205,7 @@ def testJsonToArray(self):
             4,
         ]
         data = [
-            [
-                1,
-            ],
+            [1,],
             [1, 2],
             [1, 2, 3],
             [1, 2, 3, 4],
@@ -292,6 +290,95 @@ def testJsonToArray(self):
         self.assertTrue(isinstance(e, tuple))
         self.assertEqual(e, (id0, id1, id2))
 
+
+    def testJsonToArrayBroadcast(self):
+        dt = np.dtype("i4")
+        shape = [10,]
+        data = [42,]
+        out = jsonToArray(shape, dt, data, broadcast=True)
+
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(out.shape, (10,))
+        for i in range(10):
+            self.assertEqual(out[i], 42)
+
+        # compound type
+        dt = np.dtype([("a", "i4"), ("b", "S5")])
+        shape = [10,]
+        data = [[6, "six"],]
+        out = jsonToArray(shape, dt, data, broadcast=True)
+
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(out.shape, (10,))
+        for i in range(10):
+            e = out[i]
+            self.assertEqual(e[0], 6)
+            self.assertEqual(e[1], b'six')
+
+        # VLEN str
+        dt = special_dtype(vlen=str)
+        data = ["hello",]
+             
+        shape = [10,]
+        
+        out = jsonToArray(shape, dt, data, broadcast=True)
+        self.assertTrue("vlen" in out.dtype.metadata)
+        self.assertEqual(out.dtype.metadata["vlen"], str)
+        self.assertEqual(out.dtype.kind, "O")
+        self.assertEqual(out.shape, (10,))
+        for i in range(10):
+            e = out[i]
+            self.assertEqual(out[0], data[0])
+
+        # two dimensional target
+        dt = np.dtype("i4")
+        shape = [10,2]
+        data = [42,]
+        out = jsonToArray(shape, dt, data, broadcast=True)
+
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(out.shape, (10,2))
+        for i in range(10):
+            for j in range(2):
+                self.assertEqual(out[i,j], 42)
+
+        dt = np.dtype("i4")
+        shape = [10,2]
+        data = [69,96]
+        out = jsonToArray(shape, dt, data, broadcast=True)
+
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(out.shape, (10,2))
+        for i in range(10):
+            self.assertEqual(out[i,0], 69)
+            self.assertEqual(out[i,1], 96)
+
+        # three dimensional target
+        dt = np.dtype("i4")
+        shape = [10, 3, 2]
+        data = [[0,1],[2,3],[4,5]]
+        out = jsonToArray(shape, dt, data, broadcast=True)
+
+        self.assertTrue(isinstance(out, np.ndarray))
+        self.assertEqual(out.shape, (10, 3, 2))
+        for i in range(10):
+            for j in range(3):
+                for k in range(2):
+                    self.assertEqual(out[i,j,k], j * 2 + k)
+
+
+        # verify ValueError returning if broadcast rules don't apply
+        dt = np.dtype("i4")
+        shape = [5,]
+        data = [1, 2]
+
+        try:
+            jsonToArray(shape, dt, data, broadcast=True)
+            self.assertTrue(False)
+        except ValueError:
+            pass  # expected  
+
+
     def testToBytes(self):
         # Simple array
         dt = np.dtype("<i4")
@@ -329,11 +416,7 @@ def testToBytes(self):
         # VLEN of int32's
         dt = np.dtype("O", metadata={"vlen": np.dtype("int32")})
         arr = np.zeros((4,), dtype=dt)
-        arr[0] = np.int32(
-            [
-                1,
-            ]
-        )
+        arr[0] = np.int32([1, ])
         arr[1] = np.int32([1, 2])
         arr[2] = 0  # test un-intialized value
         arr[3] = np.int32([1, 2, 3])
@@ -519,9 +602,7 @@ def testJsonToBytes(self):
         # VLEN int
         #
         dt = special_dtype(vlen=np.dtype("int32"))
-        shape = [
-            4,
-        ]
+        shape = [4,]
         data = [
             [
                 1,
@@ -547,7 +628,7 @@ def testJsonToBytes(self):
         self.assertEqual(buffer, expected)
 
         # convert back to array
-        arr_copy = bytesToArray(buffer, dt, (4,))
+        arr_copy = bytesToArray(buffer, dt, shape)
         # np.array_equal doesn't work for object arrays
         self.assertEqual(arr.dtype, arr_copy.dtype)
         self.assertEqual(arr.shape, arr_copy.shape)
@@ -560,9 +641,7 @@ def testJsonToBytes(self):
         #
         dt_str = np.dtype("O", metadata={"vlen": str})
         dt = np.dtype([("x", "i4"), ("tag", dt_str)])
-        shape = [
-            4,
-        ]
+        shape = [4, ]
         data = [[42, "Hello"], [0, 0], [0, 0], [84, "Bye"]]
         arr = jsonToArray(shape, dt, data)
         self.assertTrue(isinstance(arr, np.ndarray))
@@ -592,9 +671,7 @@ def testJsonToBytes(self):
         #
         dt_arr_str = np.dtype("(2,)O", metadata={"vlen": str})
         dt = np.dtype([("x", "i4"), ("tag", dt_arr_str)])
-        shape = [
-            4,
-        ]
+        shape = [4,]
         data = [
             [42, ["hi", "bye"]],
             [0, [0, 0]],
@@ -609,7 +686,7 @@ def testJsonToBytes(self):
         self.assertEqual(buffer.find(b"bye"), 14)
         self.assertEqual(buffer.find(b"hi-hi"), 49)
         self.assertEqual(buffer.find(b"bye-bye"), 58)
-        arr_copy = bytesToArray(buffer, dt, (4,))
+        arr_copy = bytesToArray(buffer, dt, shape)
 
         self.assertEqual(arr.dtype, arr_copy.dtype)
         self.assertEqual(arr.shape, arr_copy.shape)
@@ -623,9 +700,7 @@ def testJsonToBytes(self):
         #
         dt_arr_str = np.dtype("(2,)O", metadata={"vlen": bytes})
         dt = np.dtype([("x", "i4"), ("tag", dt_arr_str)])
-        shape = [
-            4,
-        ]
+        shape = [4,]
         data = [
             [42, [b"hi", b"bye"]],
             [0, [0, 0]],
@@ -640,7 +715,7 @@ def testJsonToBytes(self):
         self.assertEqual(buffer.find(b"bye"), 14)
         self.assertEqual(buffer.find(b"hi-hi"), 49)
         self.assertEqual(buffer.find(b"bye-bye"), 58)
-        arr_copy = bytesToArray(buffer, dt, (4,))
+        arr_copy = bytesToArray(buffer, dt, shape)
 
         self.assertEqual(arr.dtype, arr_copy.dtype)
         self.assertEqual(arr.shape, arr_copy.shape)

From d50a5752caa473607c4f074ea88161ae4726922e Mon Sep 17 00:00:00 2001
From: jreadey <jreadey@hdfgroup.org>
Date: Mon, 16 Oct 2023 10:46:02 -0700
Subject: [PATCH 03/17] broadcast support

---
 hsds/chunk_sn.py          | 133 ++++++++++++++++++--------------------
 hsds/dset_sn.py           |   9 +--
 hsds/servicenode_lib.py   |   2 +-
 hsds/util/arrayUtil.py    |   6 +-
 tests/integ/value_test.py |  19 +++---
 5 files changed, 80 insertions(+), 89 deletions(-)

diff --git a/hsds/chunk_sn.py b/hsds/chunk_sn.py
index f5aa03e9..e58baa17 100755
--- a/hsds/chunk_sn.py
+++ b/hsds/chunk_sn.py
@@ -422,6 +422,8 @@ async def PUT_Value(request):
     params = request.rel_url.query
     append_rows = None  # this is a append update or not
     append_dim = 0
+    num_elements = None
+    element_count = None
     if "append" in params and params["append"]:
         try:
             append_rows = int(params["append"])
@@ -450,6 +452,15 @@ async def PUT_Value(request):
             raise HTTPBadRequest(reason=msg)
         query = params["query"]
 
+    if "element_count" in params:
+        try:
+            element_count = int(params["element_count"])
+        except ValueError:
+            msg = "invalid element_count"
+            log.warn(msg)
+            raise HTTPBadRequest(reason=msg)
+        log.debug(f"element_count param: {element_count}")
+
     dset_id = request.match_info.get("id")
     if not dset_id:
         msg = "Missing dataset id"
@@ -494,7 +505,7 @@ async def PUT_Value(request):
                 log.warn(msg)
                 raise HTTPBadRequest(reason=msg)
             log.info(f"append_rows: {append_rows}")
-       
+
         if append_rows:
             for key in ("start", "stop", "step"):
                 if key in body:
@@ -510,7 +521,6 @@ async def PUT_Value(request):
                 log.warn(msg)
                 raise HTTPBadRequest(reason=msg)
             log.info(f"append_dim: {append_dim}")
-        
 
     # get state for dataset from DN.
     dset_json = await getObjectJson(app, dset_id, bucket=bucket, refresh=False)
@@ -713,14 +723,6 @@ async def PUT_Value(request):
                 log.warn(msg)
                 raise  # re-throw
 
-            """
-            if len(binary_data) != request.content_length:
-                msg = f"Read {len(binary_data)} bytes, expecting: "
-                msg += f"{request.content_length}"
-                log.error(msg)
-                raise HTTPBadRequest(reason=msg)
-            """
-
     if append_rows:
         for i in range(rank):
             if i == append_dim:
@@ -748,29 +750,44 @@ async def PUT_Value(request):
         np_shape = getSelectionShape(slices)
     else:
         # point update
-        np_shape = (num_points,)
+        np_shape = [num_points,]
 
     log.debug(f"selection shape: {np_shape}")
-    num_elements = getNumElements(np_shape)
-    log.debug(f"selection num elements: {num_elements}")
-    if num_elements <= 0:
+    if np.prod(np_shape) == 0:
         msg = "Selection is empty"
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
 
+    if element_count is not None:
+        # if this is set to something other than the number of
+        # elements in np_shape, should be a value that can
+        # be used for broadcasting
+        for n in range(rank):
+            msg = f"{element_count} vs np.prod({np_shape[:n+1]}): {np.prod(np_shape[:(n+1)])}"
+            log.debug(msg)
+            if element_count == np.prod(np_shape) // np.prod(np_shape[:(n + 1)]):
+                num_elements = element_count
+                log.debug(f"broadcast with: {element_count} elements is valid ")
+                break
+        if num_elements is None:
+            # this never got set, so element count must be invalid for this shape
+            msg = f"element_count {element_count} not compatible with selection shape: {np_shape}"
+            log.warn(msg)
+            raise HTTPBadRequest(reason=msg)
+    else:
+        # set num_elements based on selection shape
+        num_elements = getNumElements(np_shape)
+    log.debug(f"selection num elements: {num_elements}")
+
     arr = None  # np array to hold request data
     if binary_data:
         if item_size == "H5T_VARIABLE":
-            
             # binary variable length data
             try:
-                arr = bytesToArray(binary_data, dset_dtype, np_shape)
+                arr = bytesToArray(binary_data, dset_dtype, [num_elements,])
             except ValueError as ve:
                 log.warn(f"bytesToArray value error: {ve}")
                 raise HTTPBadRequest()
-            
-            num_req_elements = getNumElements(arr.shape)
-            log.debug(f"binary variable data element count: {num_req_elements}")
         else:
             # fixed item size
             if len(binary_data) % item_size != 0:
@@ -778,68 +795,46 @@ async def PUT_Value(request):
                 msg += f"but {len(binary_data)} bytes received"
                 log.warn(msg)
                 raise HTTPBadRequest(reason=msg)
-            
+
+            if len(binary_data) // item_size != num_elements:
+                msg = f"expected {item_size * num_elements} bytes but got {len(binary_data)}"
+                log.warn(msg)
+                raise HTTPBadRequest(reason=msg)
+
             # check against max request size
             if num_elements * item_size > max_request_size:
                 msg = f"read {num_elements*item_size} bytes, greater than {max_request_size}"
                 log.warn(msg)
 
-            num_req_elements = len(binary_data) // item_size
-        
-        # if the req item count is less than expected,
-        # check to see if it is a broadcast request
-        broadcast_shape = None
-        if num_req_elements != num_elements and not append_rows:
-            broadcast_shape = [1,]
-            for ndim in range(rank):
-                if num_req_elements == np.prod(broadcast_shape):
-                    break
-                np_shape_extent = np_shape[rank - 1 - ndim]
-                if ndim == 0:
-                    broadcast_shape = [np_shape_extent,]
-                else:
-                    broadcast_shape = [np_shape_extent].extend(broadcast_shape)
-                log.debug(f"trying broadcast_shape: {broadcast_shape}")
-            if len(broadcast_shape) == rank:
-                msg = f"Unexpected request size: {len(binary_data)}, "
-                msg += f"for num_elements: {num_elements} with item_size: {item_size}"
-                log.warn(msg)
-                raise HTTPBadRequest(reason=msg)
-
-        # read bytes into a one-dimensional numpy array  
-        if item_size != "H5T_VARIABLE":
-            """
-            # binary variable length data
-            try:
-                arr = bytesToArray(binary_data, dset_dtype, (num_elements,))
-            except ValueError as ve:
-                log.warn(f"Unable to parse variable length data: {ve}")
-                raise HTTPBadRequest()
-            """
             arr = np.fromstring(binary_data, dtype=dset_dtype)
-            
-        if broadcast_shape:
-            log.info(f"broadcasting from {broadcast_shape} to {np_shape}")
-            arr = arr.reshape(broadcast_shape)
-            tmp_arr = np.zeros(np_shape, dtype=dset_dtype)
-            tmp_arr[...] = arr
-            arr = tmp_arr
-        else:
-            try:
-                arr = arr.reshape(np_shape)  # conform to selection shape
-            except ValueError:
-                msg = "Bad Request: binary input data doesn't match selection"
-                log.warn(msg)
-                raise HTTPBadRequest(reason=msg)
+            log.debug(f"read fixed type array: {arr}")
+
+        if element_count is not None:
+            # broad cast data into numpy array
+            arr_tmp = np.zeros(np_shape, dtype=dset_dtype)
+            arr_tmp[...] = arr
+            arr = arr_tmp
+        try:
+            arr = arr.reshape(np_shape)  # conform to selection shape
+        except ValueError:
+            msg = "Bad Request: binary input data doesn't match selection"
+            log.warn(msg)
+            raise HTTPBadRequest(reason=msg)
+
         msg = f"PUT value - numpy array shape: {arr.shape} dtype: {arr.dtype}"
         log.debug(msg)
-    
+
     elif request_type == "json":
         # get array from json input
         try:
             msg = "input data doesn't match selection"
             # only enable broadcast if not appending
-            arr = jsonToArray(np_shape, dset_dtype, json_data, broadcast=(False if append_rows else True))
+            if num_elements < np.prod(np_shape):
+                broadcast = True
+            else:
+                broadcast = False
+            log.debug(f"np_shape: {np_shape}, broadcast: {broadcast}")
+            arr = jsonToArray(np_shape, dset_dtype, json_data, broadcast=broadcast)
         except ValueError:
             log.warn(msg)
             raise HTTPBadRequest(reason=msg)
@@ -1108,7 +1103,7 @@ async def GET_Value(request):
     # get state for dataset from DN.
     # Note - refreshShape will do a refresh if the dataset is extensible
     #   i.e. we need to make sure we have the correct shape dimensions
-    # 
+
     dset_json = await getObjectJson(app, dset_id, bucket=bucket, refresh=True)
     type_json = dset_json["type"]
     dset_dtype = createDataType(type_json)
diff --git a/hsds/dset_sn.py b/hsds/dset_sn.py
index 9e82a690..e09e5fba 100755
--- a/hsds/dset_sn.py
+++ b/hsds/dset_sn.py
@@ -621,7 +621,7 @@ async def PUT_DatasetShape(request):
         msg = "Extent of update shape request does not match dataset sahpe"
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
-    
+
     shape_reduction = False
     for i in range(rank):
         if shape_update and shape_update[i] < dims[i]:
@@ -634,12 +634,12 @@ async def PUT_DatasetShape(request):
             msg = "Extension dimension can not be extended past max extent"
             log.warn(msg)
             raise HTTPConflict()
-    
+
     if extend_dim < 0 or extend_dim >= rank:
         msg = "Extension dimension must be less than rank and non-negative"
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
-    
+
     if shape_reduction:
         log.info(f"Shape extent reduced for dataset (rank: {rank})")
 
@@ -685,9 +685,6 @@ async def PUT_DatasetShape(request):
         log.warn("got 409 extending dataspace")
         raise
 
-    
-
-
     resp = await jsonResponse(request, json_resp, status=201)
     log.response(request, resp=resp)
     return resp
diff --git a/hsds/servicenode_lib.py b/hsds/servicenode_lib.py
index 2f75284c..416f1fc5 100644
--- a/hsds/servicenode_lib.py
+++ b/hsds/servicenode_lib.py
@@ -211,7 +211,7 @@ async def getObjectJson(
         msg = f"Object: {obj_id} not found, req: {req}, params: {params}"
         log.warn(msg)
         raise HTTPNotFound()
-    
+
     return obj_json
 
 
diff --git a/hsds/util/arrayUtil.py b/hsds/util/arrayUtil.py
index 0f6b7099..ff586fee 100644
--- a/hsds/util/arrayUtil.py
+++ b/hsds/util/arrayUtil.py
@@ -93,8 +93,6 @@ def getNumElements(dims):
     return num_elements
 
 
-
-
 def getShapeDims(shape):
     """
     Get dims from a given shape json.  Return [1,] for Scalar datasets,
@@ -145,7 +143,7 @@ def fillVlenArray(rank, data, arr, index):
                 arr[index] = data[i]
                 index += 1
         return index
-    
+
     # need some special conversion for compound types --
     # each element must be a tuple, but the JSON decoder
     # gives us a list instead.
@@ -486,7 +484,7 @@ def bytesToArray(data, dt, shape):
         arr = np.frombuffer(data, dtype=dt)
     else:
         nelements = getNumElements(shape)
-       
+
         arr = np.zeros((nelements,), dtype=dt)
         offset = 0
         for index in range(nelements):
diff --git a/tests/integ/value_test.py b/tests/integ/value_test.py
index c27600b7..788d53f8 100755
--- a/tests/integ/value_test.py
+++ b/tests/integ/value_test.py
@@ -3076,7 +3076,6 @@ def testIntelligentRangeGet(self):
         # should get one element back
         self.assertEqual(len(value), 10)
         self.assertEqual(value, list(range(start, start + 10)))
-        
 
     def testLargeCreationProperties(self):
         # test Dataset with artifically large creation_properties data
@@ -3142,11 +3141,10 @@ def testLargeCreationProperties(self):
 
     def testValueReinitialization(self):
         # Test the dataset values get reset after a reduction and resize
-         
+
         print("testValueReinitialization", self.base_domain)
         headers = helper.getRequestHeaders(domain=self.base_domain)
-    
-         
+
         # get domain
         req = f"{self.endpoint}/"
         rsp = self.session.get(req, headers=headers)
@@ -3187,14 +3185,14 @@ def testValueReinitialization(self):
         rspJson = json.loads(rsp.text)
         self.assertTrue("value" in rspJson)
         self.assertEqual(rspJson["value"], data)
-        
+
         # resize the dataset to 5 elements
-        req =f"{self.endpoint}/datasets/{dset_uuid}/shape"
+        req = f"{self.endpoint}/datasets/{dset_uuid}/shape"
         payload = {"shape": 5}
         rsp = self.session.put(req, data=json.dumps(payload), headers=headers)
         self.assertEqual(rsp.status_code, 201)
         rspJson = json.loads(rsp.text)
-        
+
         # read back the remaining elements
         req = f"{self.endpoint}/datasets/{dset_uuid}/value"
         rsp = self.session.get(req, headers=headers)
@@ -3204,7 +3202,7 @@ def testValueReinitialization(self):
         self.assertEqual(rspJson["value"], data[:5])
 
         # resize back to 10
-        req =f"{self.endpoint}/datasets/{dset_uuid}/shape"
+        req = f"{self.endpoint}/datasets/{dset_uuid}/shape"
         payload = {"shape": 10}
         rsp = self.session.put(req, data=json.dumps(payload), headers=headers)
         self.assertEqual(rsp.status_code, 201)
@@ -3216,7 +3214,10 @@ def testValueReinitialization(self):
         self.assertEqual(rsp.status_code, 200)
         rspJson = json.loads(rsp.text)
         self.assertTrue("value" in rspJson)
-        ret_value = rspJson["value"]
+        value = rspJson["value"]
+        print("value:", value)
+        # TBD: verify values are getting reinitialized
+
 
 if __name__ == "__main__":
     # setup test files

From 84c055f46fc1871826badf72f4e811fc37a44e81 Mon Sep 17 00:00:00 2001
From: jreadey <jreadey@hdfgroup.org>
Date: Tue, 17 Oct 2023 15:43:32 -0700
Subject: [PATCH 04/17] fix for higher dimensional broadcast

---
 hsds/chunk_sn.py | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/hsds/chunk_sn.py b/hsds/chunk_sn.py
index e58baa17..69415bad 100755
--- a/hsds/chunk_sn.py
+++ b/hsds/chunk_sn.py
@@ -629,6 +629,7 @@ async def PUT_Value(request):
     binary_data = None
     points = None  # used for point selection writes
     np_shape = []  # shape of incoming data
+    bc_shape = []  # shape of broadcast array (if element_count is set)
     slices = []  # selection area to write to
 
     if item_size == 'H5T_VARIABLE' or not use_http_streaming(request, rank):
@@ -762,13 +763,18 @@ async def PUT_Value(request):
         # if this is set to something other than the number of
         # elements in np_shape, should be a value that can
         # be used for broadcasting
-        for n in range(rank):
-            msg = f"{element_count} vs np.prod({np_shape[:n+1]}): {np.prod(np_shape[:(n+1)])}"
-            log.debug(msg)
-            if element_count == np.prod(np_shape) // np.prod(np_shape[:(n + 1)]):
-                num_elements = element_count
-                log.debug(f"broadcast with: {element_count} elements is valid ")
-                break
+        if element_count == 1:
+            num_elements = 1
+            bc_shape = [1,]
+            log.debug(f"broadcasting one element to {np_shape}")
+        else:
+        
+            for n in range(rank-1):
+                bc_shape = np_shape[rank - n - 1]
+                if element_count == np.prod(bc_shape):
+                    num_elements = element_count
+                    log.debug(f"broadcast with: {element_count} elements is valid with shape: {bc_shape} ")
+                    break
         if num_elements is None:
             # this never got set, so element count must be invalid for this shape
             msg = f"element_count {element_count} not compatible with selection shape: {np_shape}"
@@ -809,8 +815,9 @@ async def PUT_Value(request):
             arr = np.fromstring(binary_data, dtype=dset_dtype)
             log.debug(f"read fixed type array: {arr}")
 
-        if element_count is not None:
-            # broad cast data into numpy array
+        if bc_shape:
+            # broadcast received data into numpy array
+            arr = arr.reshape(bc_shape)
             arr_tmp = np.zeros(np_shape, dtype=dset_dtype)
             arr_tmp[...] = arr
             arr = arr_tmp

From 557b7bcac6ca0f2cacb096f22313b04562f6a30a Mon Sep 17 00:00:00 2001
From: jreadey <jreadey@hdfgroup.org>
Date: Tue, 17 Oct 2023 23:00:14 -0700
Subject: [PATCH 05/17] added broadcast test

---
 hsds/chunk_sn.py              |  43 +--
 tests/integ/broadcast_test.py | 518 ++++++++++++++++++++++++++++++++++
 2 files changed, 542 insertions(+), 19 deletions(-)
 create mode 100755 tests/integ/broadcast_test.py

diff --git a/hsds/chunk_sn.py b/hsds/chunk_sn.py
index 69415bad..903f82fe 100755
--- a/hsds/chunk_sn.py
+++ b/hsds/chunk_sn.py
@@ -591,7 +591,6 @@ async def PUT_Value(request):
             method=request.method,
         )
 
-        log.debug(f"arr shape: {arr_rsp.shape}")
         response_type = getAcceptType(request)
 
         if response_type == "binary":
@@ -632,13 +631,11 @@ async def PUT_Value(request):
     bc_shape = []  # shape of broadcast array (if element_count is set)
     slices = []  # selection area to write to
 
-    if item_size == 'H5T_VARIABLE' or not use_http_streaming(request, rank):
+    if item_size == 'H5T_VARIABLE' or element_count or not use_http_streaming(request, rank):
         http_streaming = False
     else:
         http_streaming = True
 
-    http_streaming = False  # test
-
     # body could also contain a point selection specifier
     if body and "points" in body:
         if append_rows:
@@ -766,14 +763,15 @@ async def PUT_Value(request):
         if element_count == 1:
             num_elements = 1
             bc_shape = [1,]
-            log.debug(f"broadcasting one element to {np_shape}")
+            log.debug(f"broadcasting one element to shape: {np_shape}")
         else:
-        
-            for n in range(rank-1):
-                bc_shape = np_shape[rank - n - 1]
+            bc_shape = []
+            for n in range(rank - 1):
+                bc_shape.insert(0, np_shape[rank - n - 1])
                 if element_count == np.prod(bc_shape):
                     num_elements = element_count
-                    log.debug(f"broadcast with: {element_count} elements is valid with shape: {bc_shape} ")
+                    msg = f"broadcast with: {element_count} elements valid for shape: {bc_shape}"
+                    log.debug(msg)
                     break
         if num_elements is None:
             # this never got set, so element count must be invalid for this shape
@@ -836,12 +834,22 @@ async def PUT_Value(request):
         try:
             msg = "input data doesn't match selection"
             # only enable broadcast if not appending
-            if num_elements < np.prod(np_shape):
-                broadcast = True
+
+            if bc_shape:
+                arr = jsonToArray(bc_shape, dset_dtype, json_data)
             else:
-                broadcast = False
-            log.debug(f"np_shape: {np_shape}, broadcast: {broadcast}")
-            arr = jsonToArray(np_shape, dset_dtype, json_data, broadcast=broadcast)
+                arr = jsonToArray(np_shape, dset_dtype, json_data)
+
+            log.debug(f"jsonToArray returned: {arr}")
+            if num_elements != np.prod(arr.shape):
+                msg = f"expected {num_elements} elements, but got {np.prod(arr.shape)}"
+                raise HTTPBadRequest(reason=msg)
+
+            if bc_shape:
+                # broadcast to target
+                arr_tmp = np.zeros(np_shape, dtype=dset_dtype)
+                arr_tmp[...] = arr
+                arr = arr_tmp
         except ValueError:
             log.warn(msg)
             raise HTTPBadRequest(reason=msg)
@@ -953,8 +961,6 @@ async def PUT_Value(request):
                     msg = f"bytesToArray value error for page: {page_number+1}: {ve}"
                     log.warn(msg)
                     raise HTTPBadRequest(reason=msg)
-                if len(select_shape) == 2:
-                    log.debug(f"arr test value[0,0]: {arr[0,0]}")
 
             try:
                 chunk_ids = getChunkIds(dset_id, page, layout)
@@ -963,9 +969,8 @@ async def PUT_Value(request):
                 raise HTTPInternalServerError()
             log.debug(f"chunk_ids: {chunk_ids}")
             if len(chunk_ids) > max_chunks:
-                log.warn(
-                    f"got {len(chunk_ids)} for page: {page_number+1}.  max_chunks: {max_chunks} "
-                )
+                msg = f"got {len(chunk_ids)} for page: {page_number+1}.  max_chunks: {max_chunks}"
+                log.warn(msg)
 
             crawler = ChunkCrawler(
                 app,
diff --git a/tests/integ/broadcast_test.py b/tests/integ/broadcast_test.py
new file mode 100755
index 00000000..5d0187a5
--- /dev/null
+++ b/tests/integ/broadcast_test.py
@@ -0,0 +1,518 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of HSDS (HDF5 Scalable Data Service), Libraries and      #
+# Utilities.  The full HSDS copyright notice, including                      #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+import unittest
+import json
+import helper
+
+
+class BroadcastTest(unittest.TestCase):
+    def __init__(self, *args, **kwargs):
+        super(BroadcastTest, self).__init__(*args, **kwargs)
+        self.base_domain = helper.getTestDomainName(self.__class__.__name__)
+        helper.setupDomain(self.base_domain)
+        self.endpoint = helper.getEndpoint()
+
+    def setUp(self):
+        self.session = helper.getSession()
+
+    def tearDown(self):
+        if self.session:
+            self.session.close()
+
+    def getUUIDByPath(self, domain, h5path):
+        return helper.getUUIDByPath(domain, h5path, session=self.session)
+
+    def getRootUUID(self, domain, username=None, password=None):
+        return helper.getRootUUID(
+            domain, username=username, password=password, session=self.session
+        )
+
+    def checkVerbose(self, dset_id, headers=None, expected=None):
+        # do a flush with rescan, then check the expected return values are correct
+        req = f"{self.endpoint}/"
+        params = {"flush": 1, "rescan": 1}
+        rsp = self.session.put(req, params=params, headers=headers)
+        # should get a NO_CONTENT code,
+        self.assertEqual(rsp.status_code, 204)
+
+        # do a get and verify the additional keys are
+        req = f"{self.endpoint}/datasets/{dset_id}"
+        params = {"verbose": 1}
+
+        rsp = self.session.get(req, params=params, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+
+        for k in expected:
+            self.assertTrue(k in rspJson)
+            self.assertEqual(rspJson[k], expected[k])
+
+        # main
+
+    def testPut1DDataset(self):
+        # Test PUT value with broadcast for 1d dataset
+        print("testPut1DDataset", self.base_domain)
+
+        headers = helper.getRequestHeaders(domain=self.base_domain)
+        req = self.endpoint + "/"
+
+        # Get root uuid
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        root_uuid = rspJson["root"]
+        helper.validateId(root_uuid)
+
+        # create dataset
+        data = {"type": "H5T_STD_I32LE", "shape": 10}
+
+        req = self.endpoint + "/datasets"
+        rsp = self.session.post(req, data=json.dumps(data), headers=headers)
+        self.assertEqual(rsp.status_code, 201)
+        rspJson = json.loads(rsp.text)
+        dset_id = rspJson["id"]
+        self.assertTrue(helper.validateId(dset_id))
+
+        # link new dataset as 'dset1d'
+        name = "dset1d"
+        req = self.endpoint + "/groups/" + root_uuid + "/links/" + name
+        payload = {"id": dset_id}
+        rsp = self.session.put(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 201)
+
+        # write to the dset
+        req = self.endpoint + "/datasets/" + dset_id + "/value"
+        data = [42,]  # broadcast to [42, ..., 42]
+
+        payload = {"value": data}
+        params = {"element_count": 1}
+
+        rsp = self.session.put(req, data=json.dumps(payload), params=params, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+
+        # read back the data
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        self.assertTrue("value" in rspJson)
+        self.assertEqual(rspJson["value"], data * 10)
+
+    def testPut1DDatasetBinary(self):
+        # Test PUT value with broadcast for 1d dataset using binary data
+        print("testPut1DDatasetBinary", self.base_domain)
+        NUM_ELEMENTS = 10  # 1000000 - this value is hitting nginx request size limit
+
+        headers = helper.getRequestHeaders(domain=self.base_domain)
+        headers_bin_req = helper.getRequestHeaders(domain=self.base_domain)
+        headers_bin_req["Content-Type"] = "application/octet-stream"
+        headers_bin_rsp = helper.getRequestHeaders(domain=self.base_domain)
+        headers_bin_rsp["accept"] = "application/octet-stream"
+
+        req = self.endpoint + "/"
+
+        # Get root uuid
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        root_uuid = rspJson["root"]
+        helper.validateId(root_uuid)
+
+        # create dataset
+        data = {"type": "H5T_STD_I32LE", "shape": NUM_ELEMENTS}
+        req = self.endpoint + "/datasets"
+        rsp = self.session.post(req, data=json.dumps(data), headers=headers)
+        self.assertEqual(rsp.status_code, 201)
+        rspJson = json.loads(rsp.text)
+        dset_id = rspJson["id"]
+        self.assertTrue(helper.validateId(dset_id))
+
+        # link new dataset as 'dset1d'
+        name = "dset1d"
+        req = self.endpoint + "/groups/" + root_uuid + "/links/" + name
+        payload = {"id": dset_id}
+        rsp = self.session.put(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 201)
+
+        # write 42 as four-byte little endian integer
+        # broadcast across the entire dataset
+        req = self.endpoint + "/datasets/" + dset_id + "/value"
+        data = bytearray(4)
+        data[0] = 0x2a
+        params = {"element_count": 1}
+        rsp = self.session.put(req, data=data, params=params, headers=headers_bin_req)
+        self.assertEqual(rsp.status_code, 200)
+
+        # read back the data
+        rsp = self.session.get(req, headers=headers_bin_rsp)
+        self.assertEqual(rsp.status_code, 200)
+        data = rsp.content
+        self.assertEqual(len(data), NUM_ELEMENTS * 4)
+        for i in range(NUM_ELEMENTS):
+            offset = i * 4
+            self.assertEqual(data[offset + 0], 0x2a)
+            self.assertEqual(data[offset + 1], 0)
+            self.assertEqual(data[offset + 2], 0)
+            self.assertEqual(data[offset + 3], 0)
+
+        # write a selection
+        params = {"select": "[4:6]"}  # 4th and 5th elements
+        params["element_count"] = 1  # broadcast
+        data = bytearray(4)
+        data[0] = 0x40  # 64
+        rsp = self.session.put(req, data=data, params=params, headers=headers_bin_req)
+        self.assertEqual(rsp.status_code, 200)
+
+        # read a selection
+        params = {"select": "[0:6]"}  # read first 6 elements
+        rsp = self.session.get(req, params=params, headers=headers_bin_rsp)
+        self.assertEqual(rsp.status_code, 200)
+        data = rsp.content
+        self.assertEqual(len(data), 24)
+        for i in range(6):
+            offset = i * 4
+            if i >= 4:
+                # these were updated by the previous selection
+                self.assertEqual(data[offset + 0], 0x40)
+            else:
+                self.assertEqual(data[offset + 0], 0x2a)
+            self.assertEqual(data[offset + 1], 0)
+            self.assertEqual(data[offset + 2], 0)
+            self.assertEqual(data[offset + 3], 0)
+
+    def testPut2DDataset(self):
+        """Test PUT value with broadcast for 2d dataset"""
+        print("testPut2DDataset", self.base_domain)
+
+        headers = helper.getRequestHeaders(domain=self.base_domain)
+        req = self.endpoint + "/"
+
+        # Get root uuid
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        root_uuid = rspJson["root"]
+        helper.validateId(root_uuid)
+
+        # create dataset
+        num_col = 5
+        num_row = 4
+        data = {"type": "H5T_STD_I32LE", "shape": [num_row, num_col]}
+
+        req = self.endpoint + "/datasets"
+        rsp = self.session.post(req, data=json.dumps(data), headers=headers)
+        self.assertEqual(rsp.status_code, 201)
+        rspJson = json.loads(rsp.text)
+        dset_id = rspJson["id"]
+        self.assertTrue(helper.validateId(dset_id))
+
+        # link new dataset as 'dset2d'
+        name = "dset2d"
+        req = self.endpoint + "/groups/" + root_uuid + "/links/" + name
+        payload = {"id": dset_id}
+        rsp = self.session.put(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 201)
+
+        # broadcast one element to the dataset
+        req = self.endpoint + "/datasets/" + dset_id + "/value"
+        json_data = [42,]
+        payload = {"value": json_data}
+        params = {"element_count": 1}
+        rsp = self.session.put(req, data=json.dumps(payload), params=params, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+
+        # read back the data
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        self.assertTrue("value" in rspJson)
+        json_value = rspJson["value"]
+        for row in json_value:
+            for item in row:
+                self.assertEqual(item, 42)
+
+        # broadcast row to the dataset
+        req = self.endpoint + "/datasets/" + dset_id + "/value"
+        json_data = [1, 2, 3, 4, 5]
+        payload = {"value": json_data}
+        params = {"element_count": 5}
+        rsp = self.session.put(req, data=json.dumps(payload), params=params, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+
+        # read back the data
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        self.assertTrue("value" in rspJson)
+        json_value = rspJson["value"]
+        for row in json_value:
+            self.assertEqual(row, [1, 2, 3, 4, 5])
+
+    def testPut2DDatasetBinary(self):
+        # Test PUT value with broadcast for a 2d dataset
+        print("testPut2DDatasetBinary", self.base_domain)
+
+        headers = helper.getRequestHeaders(domain=self.base_domain)
+        headers_bin_req = helper.getRequestHeaders(domain=self.base_domain)
+        headers_bin_req["Content-Type"] = "application/octet-stream"
+        headers_bin_rsp = helper.getRequestHeaders(domain=self.base_domain)
+        headers_bin_rsp["accept"] = "application/octet-stream"
+
+        req = self.endpoint + "/"
+
+        # Get root uuid
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        root_uuid = rspJson["root"]
+        helper.validateId(root_uuid)
+
+        # create dataset
+        num_col = 5
+        num_row = 4
+        data = {"type": "H5T_STD_I32LE", "shape": [num_row, num_col]}
+
+        req = self.endpoint + "/datasets"
+        rsp = self.session.post(req, data=json.dumps(data), headers=headers)
+        self.assertEqual(rsp.status_code, 201)
+        rspJson = json.loads(rsp.text)
+        dset_id = rspJson["id"]
+        self.assertTrue(helper.validateId(dset_id))
+
+        # link new dataset as 'dset2d'
+        name = "dset2d"
+        req = self.endpoint + "/groups/" + root_uuid + "/links/" + name
+        payload = {"id": dset_id}
+        rsp = self.session.put(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 201)
+
+        # broadcast one value to entire datsaet
+        bin_data = bytearray(4)
+        bin_data[0] = 0x2a
+        req = self.endpoint + "/datasets/" + dset_id + "/value"
+        params = {"element_count": 1}
+        rsp = self.session.put(req, data=bin_data, params=params, headers=headers_bin_req)
+        self.assertEqual(rsp.status_code, 200)
+
+        # read back the data as json
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        self.assertTrue("value" in rspJson)
+        json_data = rspJson["value"]
+        for row in json_data:
+            self.assertEqual(row, [42, 42, 42, 42, 42])
+
+        # broadcast a row to the entire dataset
+        bin_data = bytearray(4 * 5)
+        for i in range(5):
+            bin_data[i * 4] = i
+
+        params = {"element_count": 5}
+        rsp = self.session.put(req, data=bin_data, params=params, headers=headers_bin_req)
+        self.assertEqual(rsp.status_code, 200)
+
+        # read back the data as json
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        self.assertTrue("value" in rspJson)
+        json_data = rspJson["value"]
+        for row in json_data:
+            self.assertEqual(row, [0, 1, 2, 3, 4])
+
+    def testPut3DDataset(self):
+        """Test PUT value with broadcast for 3d dataset"""
+        print("testPut3DDataset", self.base_domain)
+
+        headers = helper.getRequestHeaders(domain=self.base_domain)
+
+        req = self.endpoint + "/"
+
+        # Get root uuid
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        root_uuid = rspJson["root"]
+        helper.validateId(root_uuid)
+
+        # create dataset
+        data = {"type": "H5T_STD_I32LE", "shape": [2, 3, 5]}
+
+        req = self.endpoint + "/datasets"
+        rsp = self.session.post(req, data=json.dumps(data), headers=headers)
+        self.assertEqual(rsp.status_code, 201)
+        rspJson = json.loads(rsp.text)
+        dset_id = rspJson["id"]
+        self.assertTrue(helper.validateId(dset_id))
+
+        # link new dataset as 'dset3d'
+        name = "dset3d"
+        req = self.endpoint + "/groups/" + root_uuid + "/links/" + name
+        payload = {"id": dset_id}
+        rsp = self.session.put(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 201)
+
+        # broadcast one element to the dataset
+        req = self.endpoint + "/datasets/" + dset_id + "/value"
+        json_data = [42,]
+        payload = {"value": json_data}
+        params = {"element_count": 1}
+        rsp = self.session.put(req, data=json.dumps(payload), params=params, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+
+        # read back the data
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        self.assertTrue("value" in rspJson)
+        json_value = rspJson["value"]
+        for level in json_value:
+            for row in level:
+                self.assertEqual(row, [42, 42, 42, 42, 42])
+
+        # broadcast row to the dataset
+        req = self.endpoint + "/datasets/" + dset_id + "/value"
+        json_data = [1, 2, 3, 4, 5]
+        payload = {"value": json_data}
+        params = {"element_count": 5}
+        rsp = self.session.put(req, data=json.dumps(payload), params=params, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+
+        # read back the data
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        self.assertTrue("value" in rspJson)
+        json_value = rspJson["value"]
+        for level in json_value:
+            for row in level:
+                self.assertEqual(row, [1, 2, 3, 4, 5])
+
+        # broadcast level (3x5 block) to the dataset
+        req = self.endpoint + "/datasets/" + dset_id + "/value"
+        test_data = [[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15]]
+        payload = {"value": test_data}
+        params = {"element_count": 15}
+        rsp = self.session.put(req, data=json.dumps(payload), params=params, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+
+        # read back the data
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        self.assertTrue("value" in rspJson)
+        json_value = rspJson["value"]
+        # test data should be repeated twice
+        self.assertEqual(json_value[0], test_data)
+        self.assertEqual(json_value[1], test_data)
+
+    def testPut3DDatasetBinary(self):
+        """Test PUT value with broadcast for 3d dataset"""
+        print("testPut3DDatasetBinary", self.base_domain)
+
+        headers = helper.getRequestHeaders(domain=self.base_domain)
+        headers_bin_req = helper.getRequestHeaders(domain=self.base_domain)
+        headers_bin_req["Content-Type"] = "application/octet-stream"
+        headers_bin_rsp = helper.getRequestHeaders(domain=self.base_domain)
+        headers_bin_rsp["accept"] = "application/octet-stream"
+
+        req = self.endpoint + "/"
+
+        # Get root uuid
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        root_uuid = rspJson["root"]
+        helper.validateId(root_uuid)
+
+        # create dataset
+        data = {"type": "H5T_STD_I32LE", "shape": [2, 3, 5]}
+
+        req = self.endpoint + "/datasets"
+        rsp = self.session.post(req, data=json.dumps(data), headers=headers)
+        self.assertEqual(rsp.status_code, 201)
+        rspJson = json.loads(rsp.text)
+        dset_id = rspJson["id"]
+        self.assertTrue(helper.validateId(dset_id))
+
+        # link new dataset as 'dset3d'
+        name = "dset3d"
+        req = self.endpoint + "/groups/" + root_uuid + "/links/" + name
+        payload = {"id": dset_id}
+        rsp = self.session.put(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 201)
+
+        # broadcast one value to entire datsaet
+        bin_data = bytearray(4)
+        bin_data[0] = 0x2a
+        req = self.endpoint + "/datasets/" + dset_id + "/value"
+        params = {"element_count": 1}
+        rsp = self.session.put(req, data=bin_data, params=params, headers=headers_bin_req)
+        self.assertEqual(rsp.status_code, 200)
+
+        # read back the data
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        self.assertTrue("value" in rspJson)
+        json_value = rspJson["value"]
+        for level in json_value:
+            for row in level:
+                self.assertEqual(row, [42, 42, 42, 42, 42])
+
+        # broadcast row to the dataset
+        req = self.endpoint + "/datasets/" + dset_id + "/value"
+        bin_data = bytearray(5 * 4)
+        for i in range(5):
+            bin_data[i * 4] = i + 1
+
+        params = {"element_count": 5}
+        rsp = self.session.put(req, data=bin_data, params=params, headers=headers_bin_req)
+        self.assertEqual(rsp.status_code, 200)
+
+        # read back the data
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        self.assertTrue("value" in rspJson)
+        json_value = rspJson["value"]
+        for level in json_value:
+            for row in level:
+                self.assertEqual(row, [1, 2, 3, 4, 5])
+
+        # broadcast level (3x5 block) to the dataset
+        req = self.endpoint + "/datasets/" + dset_id + "/value"
+        bin_data = bytearray(5 * 3 * 4)
+        for i in range(5 * 3):
+            bin_data[i * 4] = i + 1
+        params = {"element_count": 15}
+        rsp = self.session.put(req, data=bin_data, params=params, headers=headers_bin_req)
+        self.assertEqual(rsp.status_code, 200)
+
+        # read back the data
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        self.assertTrue("value" in rspJson)
+        json_value = rspJson["value"]
+
+        for level in json_value:
+            expected = 1
+            for row in level:
+                for item in row:
+                    self.assertEqual(item, expected)
+                    expected += 1
+
+
+if __name__ == "__main__":
+    # setup test files
+    unittest.main()

From 7c9f7191edea8eec5672f90763fb192d45ba2281 Mon Sep 17 00:00:00 2001
From: jreadey <jreadey@hdfgroup.org>
Date: Wed, 18 Oct 2023 11:06:30 -0700
Subject: [PATCH 06/17] remove broadcast from arrayUtil.py

---
 hsds/util/arrayUtil.py        | 16 ++-----
 tests/unit/array_util_test.py | 90 +----------------------------------
 2 files changed, 5 insertions(+), 101 deletions(-)

diff --git a/hsds/util/arrayUtil.py b/hsds/util/arrayUtil.py
index ff586fee..a0358fc7 100644
--- a/hsds/util/arrayUtil.py
+++ b/hsds/util/arrayUtil.py
@@ -131,7 +131,7 @@ def getShapeDims(shape):
     return dims
 
 
-def jsonToArray(data_shape, data_dtype, data_json, broadcast=False):
+def jsonToArray(data_shape, data_dtype, data_json):
     """
     Return numpy array from the given json array.
     """
@@ -176,17 +176,9 @@ def fillVlenArray(rank, data, arr, index):
         # allow if the array is a scalar and the selection shape is one element,
         # numpy is ok with this
         if arr.size != npoints:
-            if broadcast:
-                # try to broadcast to the target shape
-                # if it fails, a ValueError exception will be raised
-                arr_tgt = np.zeros(data_shape, dtype=data_dtype)
-                arr_tgt[...] = arr
-                # worked!  use arr_tgt as arr
-                arr = arr_tgt
-            else:
-                msg = "Input data doesn't match selection number of elements"
-                msg += f" Expected {npoints}, but received: {arr.size}"
-                raise ValueError(msg)
+            msg = "Input data doesn't match selection number of elements"
+            msg += f" Expected {npoints}, but received: {arr.size}"
+            raise ValueError(msg)
         if arr.shape != data_shape:
             arr = arr.reshape(data_shape)  # reshape to match selection
     else:
diff --git a/tests/unit/array_util_test.py b/tests/unit/array_util_test.py
index d11979df..6df2e0aa 100644
--- a/tests/unit/array_util_test.py
+++ b/tests/unit/array_util_test.py
@@ -290,95 +290,7 @@ def testJsonToArray(self):
         self.assertTrue(isinstance(e, tuple))
         self.assertEqual(e, (id0, id1, id2))
 
-
-    def testJsonToArrayBroadcast(self):
-        dt = np.dtype("i4")
-        shape = [10,]
-        data = [42,]
-        out = jsonToArray(shape, dt, data, broadcast=True)
-
-        self.assertTrue(isinstance(out, np.ndarray))
-        self.assertEqual(out.shape, (10,))
-        for i in range(10):
-            self.assertEqual(out[i], 42)
-
-        # compound type
-        dt = np.dtype([("a", "i4"), ("b", "S5")])
-        shape = [10,]
-        data = [[6, "six"],]
-        out = jsonToArray(shape, dt, data, broadcast=True)
-
-        self.assertTrue(isinstance(out, np.ndarray))
-        self.assertEqual(out.shape, (10,))
-        for i in range(10):
-            e = out[i]
-            self.assertEqual(e[0], 6)
-            self.assertEqual(e[1], b'six')
-
-        # VLEN str
-        dt = special_dtype(vlen=str)
-        data = ["hello",]
-             
-        shape = [10,]
-        
-        out = jsonToArray(shape, dt, data, broadcast=True)
-        self.assertTrue("vlen" in out.dtype.metadata)
-        self.assertEqual(out.dtype.metadata["vlen"], str)
-        self.assertEqual(out.dtype.kind, "O")
-        self.assertEqual(out.shape, (10,))
-        for i in range(10):
-            e = out[i]
-            self.assertEqual(out[0], data[0])
-
-        # two dimensional target
-        dt = np.dtype("i4")
-        shape = [10,2]
-        data = [42,]
-        out = jsonToArray(shape, dt, data, broadcast=True)
-
-        self.assertTrue(isinstance(out, np.ndarray))
-        self.assertEqual(out.shape, (10,2))
-        for i in range(10):
-            for j in range(2):
-                self.assertEqual(out[i,j], 42)
-
-        dt = np.dtype("i4")
-        shape = [10,2]
-        data = [69,96]
-        out = jsonToArray(shape, dt, data, broadcast=True)
-
-        self.assertTrue(isinstance(out, np.ndarray))
-        self.assertEqual(out.shape, (10,2))
-        for i in range(10):
-            self.assertEqual(out[i,0], 69)
-            self.assertEqual(out[i,1], 96)
-
-        # three dimensional target
-        dt = np.dtype("i4")
-        shape = [10, 3, 2]
-        data = [[0,1],[2,3],[4,5]]
-        out = jsonToArray(shape, dt, data, broadcast=True)
-
-        self.assertTrue(isinstance(out, np.ndarray))
-        self.assertEqual(out.shape, (10, 3, 2))
-        for i in range(10):
-            for j in range(3):
-                for k in range(2):
-                    self.assertEqual(out[i,j,k], j * 2 + k)
-
-
-        # verify ValueError returning if broadcast rules don't apply
-        dt = np.dtype("i4")
-        shape = [5,]
-        data = [1, 2]
-
-        try:
-            jsonToArray(shape, dt, data, broadcast=True)
-            self.assertTrue(False)
-        except ValueError:
-            pass  # expected  
-
-
+    
     def testToBytes(self):
         # Simple array
         dt = np.dtype("<i4")

From c4110f3af0f62445dd6e419b2835e1cde5d0d5e7 Mon Sep 17 00:00:00 2001
From: jreadey <jreadey@hdfgroup.org>
Date: Wed, 18 Oct 2023 11:20:06 -0700
Subject: [PATCH 07/17] fix flake8 error

---
 tests/unit/array_util_test.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/unit/array_util_test.py b/tests/unit/array_util_test.py
index 6df2e0aa..dd10c1c0 100644
--- a/tests/unit/array_util_test.py
+++ b/tests/unit/array_util_test.py
@@ -290,7 +290,6 @@ def testJsonToArray(self):
         self.assertTrue(isinstance(e, tuple))
         self.assertEqual(e, (id0, id1, id2))
 
-    
     def testToBytes(self):
         # Simple array
         dt = np.dtype("<i4")

From 759313eb6a8e5efd06773dd2c5d149618ccd2775 Mon Sep 17 00:00:00 2001
From: jreadey <jreadey@hdfgroup.org>
Date: Wed, 18 Oct 2023 13:40:18 -0700
Subject: [PATCH 08/17] broadcast PUT Value to DNs when data is one element

---
 hsds/chunk_crawl.py           | 27 ++++++++++++++++---
 hsds/chunk_dn.py              | 43 +++++++++++++++++++++---------
 hsds/chunk_sn.py              | 49 +++++++++++++++++------------------
 hsds/util/arrayUtil.py        | 23 ++++++++++++++++
 tests/unit/array_util_test.py | 23 +++++++++++++++-
 5 files changed, 124 insertions(+), 41 deletions(-)

diff --git a/hsds/chunk_crawl.py b/hsds/chunk_crawl.py
index b7d2ce22..dade9fa5 100755
--- a/hsds/chunk_crawl.py
+++ b/hsds/chunk_crawl.py
@@ -70,20 +70,41 @@ async def write_chunk_hyperslab(
         log.error(f"No type found in dset_json: {dset_json}")
         raise HTTPInternalServerError()
 
+    params = {}
     layout = getChunkLayout(dset_json)
     chunk_sel = getChunkCoverage(chunk_id, slices, layout)
     log.debug(f"chunk_sel: {chunk_sel}")
     data_sel = getDataCoverage(chunk_id, slices, layout)
     log.debug(f"data_sel: {data_sel}")
     log.debug(f"arr.shape: {arr.shape}")
-    arr_chunk = arr[data_sel]
+
+    # broadcast data if arr has one element and no stride is set
+    do_broadcast = True
+    if np.prod(arr.shape) != 1:
+        do_broadcast = False
+    else:
+        for s in slices:
+            if s.step is None:
+                continue
+            if s.step > 1:
+                do_broadcast = False
+
+    if do_broadcast:
+        log.debug(f"broadcasting {arr}")
+        # just broadcast data value across selection
+        params["element_count"] = 1
+        arr_chunk = arr
+    else:
+        arr_chunk = arr[data_sel]
+
     req = getDataNodeUrl(app, chunk_id)
     req += "/chunks/" + chunk_id
 
-    log.debug(f"PUT chunk req: {req}")
     data = arrayToBytes(arr_chunk)
+
+    log.debug(f"PUT chunk req: {req}, {len(data)} bytes")
+
     # pass itemsize, type, dimensions, and selection as query params
-    params = {}
     select = getSliceQueryParam(chunk_sel)
     params["select"] = select
     if bucket:
diff --git a/hsds/chunk_dn.py b/hsds/chunk_dn.py
index 3fafd940..329f772a 100644
--- a/hsds/chunk_dn.py
+++ b/hsds/chunk_dn.py
@@ -20,7 +20,7 @@
 from aiohttp.web import json_response, StreamResponse
 
 from .util.httpUtil import request_read, getContentType
-from .util.arrayUtil import bytesToArray, arrayToBytes, getShapeDims
+from .util.arrayUtil import bytesToArray, arrayToBytes, getShapeDims, getBroadcastShape
 from .util.idUtil import getS3Key, validateInPartition, isValidUuid
 from .util.storUtil import isStorObj, deleteStorObj
 from .util.hdf5dtype import createDataType
@@ -48,6 +48,7 @@ async def PUT_Chunk(request):
     limit = 0
     bucket = None
     input_arr = None
+    element_count = None
 
     if "query" in params:
         query = params["query"]
@@ -77,6 +78,15 @@ async def PUT_Chunk(request):
         log.warn(msg)
         raise HTTPInternalServerError(reason=msg)
 
+    if "element_count" in params:
+        try:
+            element_count = int(params["element_count"])
+        except ValueError:
+            msg = "invalid element_count"
+            log.warn(msg)
+            raise HTTPBadRequest(reason=msg)
+        log.debug(f"element_count param: {element_count}")
+
     try:
         validateInPartition(app, chunk_id)
     except KeyError:
@@ -130,9 +140,16 @@ async def PUT_Chunk(request):
     log.debug(f"PUT_Chunk slices: {selection}")
 
     mshape = getSelectionShape(selection)
-    num_elements = 1
-    for extent in mshape:
-        num_elements *= extent
+    if element_count is not None:
+        bcshape = getBroadcastShape(mshape, element_count)
+        log.debug(f"ussing bcshape: {bcshape}")
+    else:
+        bcshape = None
+
+    if bcshape:
+        num_elements = np.prod(bcshape)
+    else:
+        num_elements = np.prod(mshape)
 
     if getChunkInitializer(dset_json):
         chunk_init = True
@@ -220,9 +237,6 @@ async def PUT_Chunk(request):
         return
     else:
         # regular chunk update
-
-        broadcast = 0  # broadcast update
-
         # check that the content_length is what we expect
         if itemsize != "H5T_VARIABLE":
             log.debug(f"expect content_length: {num_elements*itemsize}")
@@ -235,10 +249,6 @@ async def PUT_Chunk(request):
                 msg = f"Expected content_length of: {expected}, but got: {actual}"
                 log.error(msg)
                 raise HTTPBadRequest(reason=msg)
-            else:
-                broadcast = expected // actual
-                if broadcast != 1:
-                    log.info(f"broadcast chunk write: {broadcast}")
 
         # create a numpy array for incoming data
         input_bytes = await request_read(request)
@@ -249,7 +259,16 @@ async def PUT_Chunk(request):
             log.error(msg)
             raise HTTPInternalServerError()
 
-        input_arr = bytesToArray(input_bytes, dt, mshape)
+        input_arr = bytesToArray(input_bytes, dt, [num_elements, ])
+        if bcshape:
+            input_arr = input_arr.reshape(bcshape)
+            log.debug(f"broadcasting {bcshape} to mshape {mshape}")
+            arr_tmp = np.zeros(mshape, dtype=dt)
+            arr_tmp[...] = input_arr
+            input_arr = arr_tmp
+        else:
+            input_arr = input_arr.reshape(mshape)
+
         kwargs = {"chunk_arr": chunk_arr, "slices": selection, "data": input_arr}
         is_dirty = chunkWriteSelection(**kwargs)
 
diff --git a/hsds/chunk_sn.py b/hsds/chunk_sn.py
index 903f82fe..0c094314 100755
--- a/hsds/chunk_sn.py
+++ b/hsds/chunk_sn.py
@@ -39,7 +39,7 @@
 from .util.chunkUtil import getQueryDtype, get_chunktable_dims
 from .util.arrayUtil import bytesArrayToList, jsonToArray, getShapeDims
 from .util.arrayUtil import getNumElements, arrayToBytes, bytesToArray
-from .util.arrayUtil import squeezeArray, getNumpyValue
+from .util.arrayUtil import squeezeArray, getNumpyValue, getBroadcastShape
 from .util.authUtil import getUserPasswordFromRequest, validateUserPassword
 from .util.boolparser import BooleanParser
 from .servicenode_lib import getObjectJson, validateAction
@@ -760,24 +760,15 @@ async def PUT_Value(request):
         # if this is set to something other than the number of
         # elements in np_shape, should be a value that can
         # be used for broadcasting
-        if element_count == 1:
-            num_elements = 1
-            bc_shape = [1,]
-            log.debug(f"broadcasting one element to shape: {np_shape}")
-        else:
-            bc_shape = []
-            for n in range(rank - 1):
-                bc_shape.insert(0, np_shape[rank - n - 1])
-                if element_count == np.prod(bc_shape):
-                    num_elements = element_count
-                    msg = f"broadcast with: {element_count} elements valid for shape: {bc_shape}"
-                    log.debug(msg)
-                    break
-        if num_elements is None:
+        bc_shape = getBroadcastShape(np_shape, element_count)
+
+        if bc_shape is None:
             # this never got set, so element count must be invalid for this shape
             msg = f"element_count {element_count} not compatible with selection shape: {np_shape}"
             log.warn(msg)
             raise HTTPBadRequest(reason=msg)
+        # element_count will be what we expected to see
+        num_elements = element_count
     else:
         # set num_elements based on selection shape
         num_elements = getNumElements(np_shape)
@@ -816,15 +807,23 @@ async def PUT_Value(request):
         if bc_shape:
             # broadcast received data into numpy array
             arr = arr.reshape(bc_shape)
-            arr_tmp = np.zeros(np_shape, dtype=dset_dtype)
-            arr_tmp[...] = arr
-            arr = arr_tmp
-        try:
-            arr = arr.reshape(np_shape)  # conform to selection shape
-        except ValueError:
-            msg = "Bad Request: binary input data doesn't match selection"
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
+            if element_count == 1:
+                log.debug("will send broadcast set to DN nodes")
+            else:
+                # need to instantiate the full np_shape since chunk boundries
+                # will effect how individual chunks get set
+                arr_tmp = np.zeros(np_shape, dtype=dset_dtype)
+                arr_tmp[...] = arr
+                arr = arr_tmp
+
+        if element_count != 1:
+            try:
+                arr = arr.reshape(np_shape)  # conform to selection shape
+            except ValueError:
+                msg = "Bad Request: binary input data doesn't match selection "
+                msg += f"reshaping {arr.shape} to {np_shape}"
+                log.warn(msg)
+                raise HTTPBadRequest(reason=msg)
 
         msg = f"PUT value - numpy array shape: {arr.shape} dtype: {arr.dtype}"
         log.debug(msg)
@@ -845,7 +844,7 @@ async def PUT_Value(request):
                 msg = f"expected {num_elements} elements, but got {np.prod(arr.shape)}"
                 raise HTTPBadRequest(reason=msg)
 
-            if bc_shape:
+            if bc_shape and element_count != 1:
                 # broadcast to target
                 arr_tmp = np.zeros(np_shape, dtype=dset_dtype)
                 arr_tmp[...] = arr
diff --git a/hsds/util/arrayUtil.py b/hsds/util/arrayUtil.py
index a0358fc7..31ee3bf1 100644
--- a/hsds/util/arrayUtil.py
+++ b/hsds/util/arrayUtil.py
@@ -677,3 +677,26 @@ def ndarray_compare(arr1, arr2):
     else:
         # can just us np array_compare
         return np.array_equal(arr1, arr2)
+
+
+def getBroadcastShape(mshape, element_count):
+    # if element_count is less than the number of elements
+    # defined by mshape, return a numpy compatible broadcast
+    # shape that contains element_count elements.
+    # If non exists return None
+
+    if np.prod(mshape) == element_count:
+        return None
+
+    if element_count == 1:
+        # this always works
+        return [1,]
+
+    bcshape = []
+    rank = len(mshape)
+    for n in range(rank - 1):
+        bcshape.insert(0, mshape[rank - n - 1])
+        if element_count == np.prod(bcshape):
+            return bcshape  # have a match
+
+    return None  # no broadcast found
diff --git a/tests/unit/array_util_test.py b/tests/unit/array_util_test.py
index dd10c1c0..c734e045 100644
--- a/tests/unit/array_util_test.py
+++ b/tests/unit/array_util_test.py
@@ -27,7 +27,8 @@
     getByteArraySize,
     IndexIterator,
     ndarray_compare,
-    getNumpyValue
+    getNumpyValue,
+    getBroadcastShape
 )
 from hsds.util.hdf5dtype import special_dtype
 from hsds.util.hdf5dtype import check_dtype
@@ -795,6 +796,26 @@ def testJsonToArrayOnNoneArray(self):
         self.assertTrue(len(arr) == 0)
         self.assertTrue(arr.dtype == data_dtype)
 
+    def testGetBroadcastShape(self):
+        bcshape = getBroadcastShape([1, ], 1)
+        self.assertEqual(bcshape, None)
+        bcshape = getBroadcastShape([2, 3], 6)
+        self.assertEqual(bcshape, None)
+        bcshape = getBroadcastShape([2, 3], 5)
+        self.assertEqual(bcshape, None)
+
+        bcshape = getBroadcastShape([4, 5], 1)
+        self.assertEqual(bcshape, [1, ])
+        bcshape = getBroadcastShape([4, 5], 5)
+        self.assertEqual(bcshape, [5, ])
+
+        bcshape = getBroadcastShape([2, 3, 5], 1)
+        self.assertEqual(bcshape, [1, ])
+        bcshape = getBroadcastShape([2, 3, 5], 5)
+        self.assertEqual(bcshape, [5, ])
+        bcshape = getBroadcastShape([2, 3, 5], 15)
+        self.assertEqual(bcshape, [3, 5])
+
 
 if __name__ == "__main__":
     # setup test files

From 241f4a32f88b5e5c3007f7743d361630811f1e0c Mon Sep 17 00:00:00 2001
From: jreadey <jreadey@hdfgroup.org>
Date: Wed, 18 Oct 2023 15:30:42 -0700
Subject: [PATCH 09/17] re-init values when dset shape is reduced then expanded

---
 hsds/chunk_sn.py          |   5 +-
 hsds/dset_sn.py           |  42 +++++++++++++++
 hsds/util/chunkUtil.py    |  11 ++++
 tests/integ/value_test.py | 106 ++++++++++++++++++++++++++++++++++++--
 4 files changed, 157 insertions(+), 7 deletions(-)

diff --git a/hsds/chunk_sn.py b/hsds/chunk_sn.py
index 0c094314..5aa4fd91 100755
--- a/hsds/chunk_sn.py
+++ b/hsds/chunk_sn.py
@@ -985,9 +985,8 @@ async def PUT_Value(request):
             crawler_status = crawler.get_status()
 
             if crawler_status not in (200, 201):
-                log.warn(
-                    f"crawler failed for page: {page_number+1} with status: {crawler_status}"
-                )
+                msg = f"crawler failed for page: {page_number+1} with status: {crawler_status}"
+                log.warn(msg)
             else:
                 log.info("crawler write_chunk_hyperslab successful")
 
diff --git a/hsds/dset_sn.py b/hsds/dset_sn.py
index e09e5fba..94241541 100755
--- a/hsds/dset_sn.py
+++ b/hsds/dset_sn.py
@@ -15,6 +15,7 @@
 #
 
 import math
+import numpy as np
 from json import JSONDecodeError
 from aiohttp.web_exceptions import HTTPBadRequest, HTTPNotFound, HTTPConflict
 
@@ -34,6 +35,7 @@
 from .util.hdf5dtype import getItemSize
 from .servicenode_lib import getDomainJson, getObjectJson, getPathForObjectId
 from .servicenode_lib import getObjectIdByPath, validateAction, getRootInfo
+from .chunk_crawl import ChunkCrawler
 from . import config
 from . import hsds_logger as log
 
@@ -644,6 +646,24 @@ async def PUT_DatasetShape(request):
         log.info(f"Shape extent reduced for dataset (rank: {rank})")
 
         # need to re-initialize any values that are now outside the shape
+        # first get the fill value
+        fill_value = None
+        type_json = dset_json["type"]
+        dt = createDataType(type_json)
+
+        if "creationProperties" in dset_json:
+            fill_value = None
+            cprops = dset_json["creationProperties"]
+            if "fillValue" in cprops:
+                fill_value_prop = cprops["fillValue"]
+                encoding = cprops.get("fillValue_encoding")
+                fill_value = getNumpyValue(fill_value_prop, dt=dt, encoding=encoding)
+        if fill_value:
+            arr = np.empty((1,), dtype=dt, order="C")
+            arr[...] = fill_value
+        else:
+            arr = np.zeros([1,], dtype=dt, order="C")
+
         layout = getChunkLayout(dset_json)
         log.debug(f"got layout: {layout}")
         for n in range(rank):
@@ -663,6 +683,28 @@ async def PUT_DatasetShape(request):
             chunk_ids = getChunkIds(dset_id, slices, layout)
             log.debug(f"got chunkIds: {chunk_ids}")
 
+            chunk_ids.sort()
+
+            crawler = ChunkCrawler(
+                app,
+                chunk_ids,
+                dset_json=dset_json,
+                bucket=bucket,
+                slices=slices,
+                arr=arr,
+                action="write_chunk_hyperslab",
+            )
+            await crawler.crawl()
+
+            crawler_status = crawler.get_status()
+
+            if crawler_status not in (200, 201):
+                msg = f"crawler failed for shape reinitialize with status: {crawler_status}"
+                log.warn(msg)
+            else:
+                msg = f"crawler success for reinitialization with slices: {slices}"
+                log.info(msg)
+
     # send request onto DN
     req = getDataNodeUrl(app, dset_id) + "/datasets/" + dset_id + "/shape"
 
diff --git a/hsds/util/chunkUtil.py b/hsds/util/chunkUtil.py
index 2031eaa0..87bdb40c 100644
--- a/hsds/util/chunkUtil.py
+++ b/hsds/util/chunkUtil.py
@@ -311,7 +311,10 @@ def getNumChunks(selection, layout):
     for i in range(len(selection)):
         s = selection[i]
         c = layout[i]  # chunk size
+
         if isinstance(s, slice):
+            if s.step is None:
+                s = slice(s.start, s.stop, 1)
             if s.step > 1:
                 num_points = frac((s.stop - s.start), s.step)
                 w = num_points * s.step - (s.step - 1)
@@ -475,6 +478,8 @@ def getChunkIds(dset_id, selection, layout, dim=0, prefix=None, chunk_ids=None):
     s = selection[dim]
     c = layout[dim]
     # log.debug(f"getChunkIds - layout: {layout}")
+    if isinstance(s, slice) and s.step is None:
+        s = slice(s.start, s.stop, 1)
 
     if isinstance(s, slice) and s.step > c:
         # chunks may not be contiguous,  skip along the selection and add
@@ -570,6 +575,8 @@ def getChunkSelection(chunk_id, slices, layout):
         c = layout[dim]
         n = chunk_index[dim] * c
         if isinstance(s, slice):
+            if s.step is None:
+                s = slice(s.start, s.stop, 1)
             if s.start >= n + c:
                 return None  # null intersection
             if s.stop < n:
@@ -653,6 +660,8 @@ def getDataCoverage(chunk_id, slices, layout):
         c = chunk_sel[dim]
         s = slices[dim]
         if isinstance(s, slice):
+            if s.step is None:
+                s = slice(s.start, s.stop, 1)
             if c.step != s.step:
                 msg = "expecting step for chunk selection to be the "
                 msg += "same as data selection"
@@ -1163,6 +1172,8 @@ def chunkQuery(
 
     # adjust the index to correspond with the dataset
     s = slices[0]
+    if s.step is None:
+        s = slice(s.start, s.stop, 1)
     start = s.start + chunk_coord[0]
     if start > 0:
         # can just increment every value by same amount
diff --git a/tests/integ/value_test.py b/tests/integ/value_test.py
index 788d53f8..18aac784 100755
--- a/tests/integ/value_test.py
+++ b/tests/integ/value_test.py
@@ -3139,10 +3139,10 @@ def testLargeCreationProperties(self):
             self.assertEqual(ret_values[i], 24)
             self.assertEqual(ret_values[i + 5], 42)
 
-    def testValueReinitialization(self):
+    def testValueReinitialization1D(self):
         # Test the dataset values get reset after a reduction and resize
 
-        print("testValueReinitialization", self.base_domain)
+        print("testValueReinitialization1D", self.base_domain)
         headers = helper.getRequestHeaders(domain=self.base_domain)
 
         # get domain
@@ -3215,8 +3215,106 @@ def testValueReinitialization(self):
         rspJson = json.loads(rsp.text)
         self.assertTrue("value" in rspJson)
         value = rspJson["value"]
-        print("value:", value)
-        # TBD: verify values are getting reinitialized
+        self.assertEqual(value[0:5], data[0:5])
+        self.assertEqual(value[5:10], [42,] * 5)
+
+    def testValueReinitialization2D(self):
+        # Test the dataset values get reset after a reduction and resize
+
+        print("testValueReinitialization1D", self.base_domain)
+        headers = helper.getRequestHeaders(domain=self.base_domain)
+
+        # get domain
+        req = f"{self.endpoint}/"
+        rsp = self.session.get(req, headers=headers)
+        rspJson = json.loads(rsp.text)
+        self.assertTrue("root" in rspJson)
+        root_uuid = rspJson["root"]
+
+        # create the dataset
+        req = f"{self.endpoint}/datasets"
+        payload = {"type": "H5T_STD_I32LE", "shape": [12, 15], "maxdims": [12, 15]}
+        req = self.endpoint + "/datasets"
+        rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 201)  # create dataset
+        rspJson = json.loads(rsp.text)
+        dset_uuid = rspJson["id"]
+        self.assertTrue(helper.validateId(dset_uuid))
+
+        # link new dataset as 'dset'
+        name = "dset"
+        req = f"{self.endpoint}/groups/{root_uuid}/links/{name}"
+        payload = {"id": dset_uuid}
+        rsp = self.session.put(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 201)
+
+        # write to the dset
+        req = f"{self.endpoint}/datasets/{dset_uuid}/value"
+        data = []
+        for i in range(12):
+            row = []
+            for j in range(15):
+                row.append(i * j)
+            data.append(row)
+        payload = {"value": data}
+        params = {"select": "[0:12, 0:15]"}
+
+        rsp = self.session.put(req, data=json.dumps(payload), params=params, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+
+        # read back the data
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        self.assertTrue("value" in rspJson)
+        self.assertEqual(rspJson["value"], data)
+
+        # resize the dataset to 10 x 10 array
+        req = f"{self.endpoint}/datasets/{dset_uuid}/shape"
+        payload = {"shape": [10, 10]}
+        rsp = self.session.put(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 201)
+        rspJson = json.loads(rsp.text)
+
+        # read back the remaining elements
+        req = f"{self.endpoint}/datasets/{dset_uuid}/value"
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        self.assertTrue("value" in rspJson)
+        value = rspJson["value"]
+        self.assertEqual(len(value), 10)
+        for i in range(10):
+            row = value[i]
+            self.assertEqual(len(row), 10)
+            for j in range(10):
+                self.assertEqual(row[j], i * j)
+
+        # resize back to 12, 15
+        req = f"{self.endpoint}/datasets/{dset_uuid}/shape"
+        payload = {"shape": [12, 15]}
+        rsp = self.session.put(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 201)
+        rspJson = json.loads(rsp.text)
+
+        # read all the data values
+        req = f"{self.endpoint}/datasets/{dset_uuid}/value"
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        self.assertTrue("value" in rspJson)
+        value = rspJson["value"]
+
+        # check that the re-extended area is zero's
+        self.assertEqual(len(value), 12)
+        for i in range(12):
+            row = value[i]
+            self.assertEqual(len(row), 15)
+            for j in range(15):
+                if j < 10 and i < 10:
+                    self.assertEqual(row[j], i * j)
+                else:
+                    self.assertEqual(row[j], 0)
 
 
 if __name__ == "__main__":

From 0ee395fbc5a66bcdf6b18b5019ac6bdd6079950c Mon Sep 17 00:00:00 2001
From: jreadey <jreadey@hdfgroup.org>
Date: Thu, 19 Oct 2023 15:10:54 -0700
Subject: [PATCH 10/17] test for fetching points after reshape

---
 tests/integ/pointsel_test.py | 80 ++++++++++++++++++++++++++++++++++++
 1 file changed, 80 insertions(+)

diff --git a/tests/integ/pointsel_test.py b/tests/integ/pointsel_test.py
index 0fc5516b..be2335fc 100755
--- a/tests/integ/pointsel_test.py
+++ b/tests/integ/pointsel_test.py
@@ -1583,6 +1583,86 @@ def testSelect2DDataset(self):
         self.assertEqual(len(data), 3 * 4)
         self.assertEqual(data, b"\x1e\x00\x00\x00 \x00\x00\x00#\x00\x00\x00")
 
+    def testShapeUpdate(self):
+        
+        # Test selecting points after shape has been updated
+        print("testShapeUpdate", self.base_domain)
+
+        points = [75,]
+            
+        headers = helper.getRequestHeaders(domain=self.base_domain)
+        req = self.endpoint + "/"
+
+        # Get root uuid
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        root_uuid = rspJson["root"]
+        helper.validateId(root_uuid)
+
+        # create dataset
+        data = {"type": "H5T_STD_I32LE", "shape": (100,), "maxdims": (100,)}
+
+        req = self.endpoint + "/datasets"
+        rsp = self.session.post(req, data=json.dumps(data), headers=headers)
+        self.assertEqual(rsp.status_code, 201)
+        rspJson = json.loads(rsp.text)
+        dset_id = rspJson["id"]
+        self.assertTrue(helper.validateId(dset_id))
+
+        # link new dataset as 'dset1d'
+        name = "dset"
+        req = self.endpoint + "/groups/" + root_uuid + "/links/" + name
+        payload = {"id": dset_id}
+        rsp = self.session.put(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 201)
+
+        # write to the dset
+        data = list(range(100))
+        data.reverse()  # 99, 98, ..., 0
+
+        payload = {"value": data}
+        req = self.endpoint + "/datasets/" + dset_id + "/value"
+
+        rsp = self.session.put(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+
+        body = {"points": points}
+        # read selected points
+        rsp = self.session.post(req, data=json.dumps(body), headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        self.assertTrue("value" in rspJson)
+        ret_value = rspJson["value"]
+        self.assertEqual(len(ret_value), len(points))
+        expected_result = [24, ]
+         
+        self.assertEqual(ret_value, expected_result)
+
+        # resize the dataset to the small shape
+        req = self.endpoint + "/datasets/" + dset_id + "/shape"
+        payload = {"shape": 50}
+        rsp = self.session.put(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 201)
+        rspJson = json.loads(rsp.text)
+
+        # should get a 400 now
+        req = self.endpoint + "/datasets/" + dset_id + "/value"
+        rsp = self.session.post(req, data=json.dumps(body), headers=headers)
+        self.assertEqual(rsp.status_code, 400)
+
+        # resize back to large shape
+        payload = {"shape": 100}
+        rsp = self.session.put(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 201)
+        rspJson = json.loads(rsp.text)
+
+        # read point again
+        req = self.endpoint + "/datasets/" + dset_id + "/value"
+        rsp = self.session.post(req, data=json.dumps(body), headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+
+
 
 if __name__ == "__main__":
     # setup test files

From 51df3a2773fe163e66ec8a4218e5ecc0edfd69ec Mon Sep 17 00:00:00 2001
From: jreadey <jreadey@hdfgroup.org>
Date: Fri, 20 Oct 2023 10:34:21 -0700
Subject: [PATCH 11/17] fix for pt selection fail after shape update - #276

---
 hsds/chunk_sn.py             | 70 ++++++++------------------
 hsds/dset_sn.py              | 88 ++++++++++++++++++++-------------
 hsds/servicenode_lib.py      | 25 ++++++++++
 tests/integ/dataset_test.py  | 18 ++++++-
 tests/integ/pointsel_test.py | 87 +--------------------------------
 tests/integ/value_test.py    | 95 +++++++++++++++++++++++++++++++++++-
 6 files changed, 211 insertions(+), 172 deletions(-)

diff --git a/hsds/chunk_sn.py b/hsds/chunk_sn.py
index 5aa4fd91..f24cbfbe 100755
--- a/hsds/chunk_sn.py
+++ b/hsds/chunk_sn.py
@@ -42,7 +42,7 @@
 from .util.arrayUtil import squeezeArray, getNumpyValue, getBroadcastShape
 from .util.authUtil import getUserPasswordFromRequest, validateUserPassword
 from .util.boolparser import BooleanParser
-from .servicenode_lib import getObjectJson, validateAction
+from .servicenode_lib import getDsetJson, validateAction
 from .chunk_crawl import ChunkCrawler
 from . import config
 from . import hsds_logger as log
@@ -72,7 +72,7 @@ def get_hrefs(request, dset_json):
     return hrefs
 
 
-async def get_slices(app, select, dset_json, bucket=None):
+def get_slices(app, select, dset_json):
     """Get desired slices from selection query param string or json value.
     If select is none or empty, slices for entire datashape will be
     returned.
@@ -87,35 +87,14 @@ async def get_slices(app, select, dset_json, bucket=None):
         raise HTTPBadRequest(reason=msg)
 
     dims = getShapeDims(datashape)  # throws 400 for HS_NULL dsets
-    maxdims = getDsetMaxDims(dset_json)
-
-    # refetch the dims if the dataset is extensible and request or hasn't
-    # provided an explicit region
-    if isExtensible(dims, maxdims) and (select is None or not select):
-        kwargs = {"bucket": bucket, "refresh": True}
-        dset_json = await getObjectJson(app, dset_id, **kwargs)
-        dims = getShapeDims(dset_json["shape"])
 
-    slices = None  # selection for read
-    if isExtensible and select:
-        try:
-            slices = getSelectionList(select, dims)
-        except ValueError:
-            # exception might be due to us having stale version of dims,
-            # so use refresh
-            kwargs = {"bucket": bucket, "refresh": True}
-            dset_json = await getObjectJson(app, dset_id, **kwargs)
-            dims = getShapeDims(dset_json["shape"])
-            slices = None  # retry below
-
-    if slices is None:
-        try:
-            slices = getSelectionList(select, dims)
-        except ValueError:
-            msg = f"Invalid selection: {select} on dims: {dims} "
-            msg += f"for dataset: {dset_id}"
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
+    try:
+        slices = getSelectionList(select, dims)
+    except ValueError:
+        msg = f"Invalid selection: {select} on dims: {dims} "
+        msg += f"for dataset: {dset_id}"
+        log.warn(msg)
+        raise HTTPBadRequest(reason=msg)
     return slices
 
 
@@ -254,8 +233,7 @@ def getChunkItem(chunkid):
             raise HTTPInternalServerError()
         chunktable_id = layout["chunk_table"]
         # get  state for dataset from DN.
-        kwargs = {"bucket": bucket, "refresh": False}
-        chunktable_json = await getObjectJson(app, chunktable_id, **kwargs)
+        chunktable_json = await getDsetJson(app, chunktable_id, bucket=bucket)
         # log.debug(f"chunktable_json: {chunktable_json}")
         chunktable_dims = getShapeDims(chunktable_json["shape"])
         chunktable_layout = chunktable_json["layout"]
@@ -523,7 +501,7 @@ async def PUT_Value(request):
             log.info(f"append_dim: {append_dim}")
 
     # get state for dataset from DN.
-    dset_json = await getObjectJson(app, dset_id, bucket=bucket, refresh=False)
+    dset_json = await getDsetJson(app, dset_id, bucket=bucket)
 
     layout = None
     datashape = dset_json["shape"]
@@ -568,7 +546,7 @@ async def PUT_Value(request):
                 raise HTTPBadRequest(reason=msg)
 
         select = params.get("select")
-        slices = await get_slices(app, select, dset_json, bucket=bucket)
+        slices = get_slices(app, select, dset_json)
         if "Limit" in params:
             try:
                 limit = int(params["Limit"])
@@ -676,12 +654,6 @@ async def PUT_Value(request):
                 log.warn("unable to append to dataspace")
                 raise HTTPConflict()
 
-    # refetch the dims if the dataset is extensible
-    if isExtensible(dims, maxdims):
-        kwargs = {"bucket": bucket, "refresh": True}
-        dset_json = await getObjectJson(app, dset_id, **kwargs)
-        dims = getShapeDims(dset_json["shape"])
-
     if request_type == "json":
         if "value" in body:
             json_data = body["value"]
@@ -737,10 +709,10 @@ async def PUT_Value(request):
 
     elif points is None:
         if body and "start" in body and "stop" in body:
-            slices = await get_slices(app, body, dset_json, bucket=bucket)
+            slices = get_slices(app, body, dset_json)
         else:
             select = params.get("select")
-            slices = await get_slices(app, select, dset_json, bucket=bucket)
+            slices = get_slices(app, select, dset_json)
 
         # The selection parameters will determine expected put value shape
         log.debug(f"PUT Value selection: {slices}")
@@ -992,7 +964,7 @@ async def PUT_Value(request):
 
     else:
         #
-        # Do point PUT
+        # Do point post
         #
         log.debug(f"num_points: {num_points}")
 
@@ -1111,10 +1083,10 @@ async def GET_Value(request):
     bucket = getBucketForDomain(domain)
 
     # get state for dataset from DN.
-    # Note - refreshShape will do a refresh if the dataset is extensible
+    # Note - this will do a refresh if the dataset is extensible
     #   i.e. we need to make sure we have the correct shape dimensions
 
-    dset_json = await getObjectJson(app, dset_id, bucket=bucket, refresh=True)
+    dset_json = await getDsetJson(app, dset_id, bucket=bucket)
     type_json = dset_json["type"]
     dset_dtype = createDataType(type_json)
 
@@ -1137,7 +1109,7 @@ async def GET_Value(request):
     select = params.get("select")
     if select:
         log.debug(f"select query param: {select}")
-    slices = await get_slices(app, select, dset_json, bucket=bucket)
+    slices = get_slices(app, select, dset_json)
     log.debug(f"GET Value selection: {slices}")
 
     limit = 0
@@ -1569,7 +1541,7 @@ async def getSelectionData(
     await getChunkLocations(app, dset_id, dset_json, chunkinfo, chunk_ids, bucket=bucket)
 
     if slices is None:
-        slices = await get_slices(app, None, dset_json, bucket=bucket)
+        slices = get_slices(app, None, dset_json)
 
     if points is None:
         # get chunk selections for hyperslab select
@@ -1649,7 +1621,7 @@ async def POST_Value(request):
         raise HTTPBadRequest(reason=msg)
 
     # get  state for dataset from DN.
-    dset_json = await getObjectJson(app, dset_id, bucket=bucket)
+    dset_json = await getDsetJson(app, dset_id, bucket=bucket)
 
     datashape = dset_json["shape"]
     if datashape["class"] == "H5S_NULL":
@@ -1691,7 +1663,7 @@ async def POST_Value(request):
         elif "select" in body:
             select = body["select"]
             log.debug(f"select: {select}")
-            slices = await get_slices(app, select, dset_json, bucket=bucket)
+            slices = get_slices(app, select, dset_json)
             log.debug(f"got slices: {slices}")
         else:
             msg = "Expected points or select key in request body"
diff --git a/hsds/dset_sn.py b/hsds/dset_sn.py
index 94241541..d4fb0a96 100755
--- a/hsds/dset_sn.py
+++ b/hsds/dset_sn.py
@@ -25,7 +25,7 @@
 from .util.dsetUtil import getPreviewQuery, getFilterItem, getChunkLayout
 from .util.arrayUtil import getNumElements, getShapeDims, getNumpyValue
 from .util.chunkUtil import getChunkSize, guessChunk, expandChunk, shrinkChunk
-from .util.chunkUtil import getContiguousLayout, getChunkIds
+from .util.chunkUtil import getContiguousLayout, getChunkIds, getChunkSelection
 from .util.authUtil import getUserPasswordFromRequest, aclCheck
 from .util.authUtil import validateUserPassword
 from .util.domainUtil import getDomainFromRequest, getPathForDomain, isValidDomain
@@ -33,7 +33,7 @@
 from .util.storUtil import getFilters
 from .util.hdf5dtype import validateTypeItem, createDataType, getBaseTypeJson
 from .util.hdf5dtype import getItemSize
-from .servicenode_lib import getDomainJson, getObjectJson, getPathForObjectId
+from .servicenode_lib import getDomainJson, getObjectJson, getDsetJson, getPathForObjectId
 from .servicenode_lib import getObjectIdByPath, validateAction, getRootInfo
 from .chunk_crawl import ChunkCrawler
 from . import config
@@ -189,9 +189,7 @@ async def validateChunkLayout(app, shape_json, item_size, layout, bucket=None):
             raise HTTPBadRequest(reason=msg)
         # verify the chunk table exists and is of reasonable shape
         try:
-            chunktable_json = await getObjectJson(
-                app, chunktable_id, bucket=bucket, refresh=False
-            )
+            chunktable_json = await getDsetJson(app, chunktable_id, bucket=bucket)
         except HTTPNotFound:
             msg = f"chunk table id: {chunktable_id} not found"
             log.warn(msg)
@@ -343,9 +341,8 @@ async def GET_Dataset(request):
 
     # get authoritative state for dataset from DN (even if it's
     # in the meta_cache).
-    dset_json = await getObjectJson(
-        app, dset_id, refresh=True, include_attrs=include_attrs, bucket=bucket
-    )
+    kwargs = {"refresh": True, "include_attrs": include_attrs, "bucket": bucket}
+    dset_json = await getDsetJson(app, dset_id, **kwargs)
 
     # check that we have permissions to read the object
     await validateAction(app, domain, dset_id, username, "read")
@@ -444,7 +441,7 @@ async def GET_DatasetType(request):
 
     # get authoritative state for group from DN (even if it's in
     # the meta_cache).
-    dset_json = await getObjectJson(app, dset_id, refresh=True, bucket=bucket)
+    dset_json = await getDsetJson(app, dset_id, refresh=True, bucket=bucket)
 
     await validateAction(app, domain, dset_id, username, "read")
 
@@ -496,7 +493,7 @@ async def GET_DatasetShape(request):
 
     # get authoritative state for dataset from DN (even if it's in
     # the meta_cache).
-    dset_json = await getObjectJson(app, dset_id, refresh=True, bucket=bucket)
+    dset_json = await getDsetJson(app, dset_id, refresh=True, bucket=bucket)
 
     await validateAction(app, domain, dset_id, username, "read")
 
@@ -601,9 +598,7 @@ async def PUT_DatasetShape(request):
     # verify the user has permission to update shape
     await validateAction(app, domain, dset_id, username, "update")
 
-    # get authoritative state for dataset from DN (even if it's in the
-    # meta_cache).
-    dset_json = await getObjectJson(app, dset_id, refresh=True, bucket=bucket)
+    dset_json = await getDsetJson(app, dset_id, bucket=bucket)
     shape_orig = dset_json["shape"]
     log.debug(f"shape_orig: {shape_orig}")
 
@@ -666,44 +661,69 @@ async def PUT_DatasetShape(request):
 
         layout = getChunkLayout(dset_json)
         log.debug(f"got layout: {layout}")
+        delete_ids = set()  # chunk ids that will need to be deleted
         for n in range(rank):
             if dims[n] <= shape_update[i]:
                 log.debug(f"skip dimension {n}")
                 continue
             log.debug(f"reinitialize for dimension: {n}")
             slices = []
+            update_ids = set()  # chunk ids that will need to be updated
+
             for m in range(rank):
                 if m == n:
                     s = slice(shape_update[m], dims[m], 1)
                 else:
                     # just select the entire extent
-                    s = slice(0, dims[m])
+                    s = slice(0, dims[m], 1)
                 slices.append(s)
             log.debug(f"shape_reinitialize - got slices: {slices} for dimension: {n}")
             chunk_ids = getChunkIds(dset_id, slices, layout)
             log.debug(f"got chunkIds: {chunk_ids}")
 
-            chunk_ids.sort()
-
-            crawler = ChunkCrawler(
-                app,
-                chunk_ids,
-                dset_json=dset_json,
-                bucket=bucket,
-                slices=slices,
-                arr=arr,
-                action="write_chunk_hyperslab",
-            )
-            await crawler.crawl()
-
-            crawler_status = crawler.get_status()
-
-            if crawler_status not in (200, 201):
-                msg = f"crawler failed for shape reinitialize with status: {crawler_status}"
-                log.warn(msg)
+            # separate ids into those that overlap the new shape
+            # vs. those that follow entirely outside the new shape.
+            # The former will need to be partiaally reset, the latter
+            # will need to be deleted
+            for chunk_id in chunk_ids:
+                if getChunkSelection(chunk_id, slices, layout) is None:
+                    delete_ids.add(chunk_id)
+                else:
+                    update_ids.add(chunk_id)
+
+            if update_ids:
+                update_ids = list(update_ids)
+                update_ids.sort()
+                log.debug(f"these ids will need to be updated: {update_ids}")
+
+                crawler = ChunkCrawler(
+                    app,
+                    update_ids,
+                    dset_json=dset_json,
+                    bucket=bucket,
+                    slices=slices,
+                    arr=arr,
+                    action="write_chunk_hyperslab",
+                )
+                await crawler.crawl()
+
+                crawler_status = crawler.get_status()
+
+                if crawler_status not in (200, 201):
+                    msg = f"crawler failed for shape reinitialize with status: {crawler_status}"
+                    log.warn(msg)
+                else:
+                    msg = f"crawler success for reinitialization with slices: {slices}"
+                    log.info(msg)
             else:
-                msg = f"crawler success for reinitialization with slices: {slices}"
-                log.info(msg)
+                log.info(f"no chunks need updating for shape reduction over dim {m}")
+
+        if delete_ids:
+            delete_ids = list(delete_ids)
+            delete_ids.sort()
+            log.debug(f"these ids will need to be deleted: {delete_ids}")
+        else:
+            log.info("no chunks need deletion for shape reduction")
 
     # send request onto DN
     req = getDataNodeUrl(app, dset_id) + "/datasets/" + dset_id + "/shape"
diff --git a/hsds/servicenode_lib.py b/hsds/servicenode_lib.py
index 416f1fc5..57711067 100644
--- a/hsds/servicenode_lib.py
+++ b/hsds/servicenode_lib.py
@@ -215,6 +215,31 @@ async def getObjectJson(
     return obj_json
 
 
+async def getDsetJson(app, dset_id,
+                      bucket=None,
+                      refresh=False,
+                      include_links=False,
+                      include_attrs=False):
+    kwargs = {}
+    kwargs["bucket"] = bucket
+    kwargs["refresh"] = refresh
+    kwargs["include_links"] = include_links
+    kwargs["include_attrs"] = include_attrs
+    dset_json = await getObjectJson(app, dset_id, **kwargs)
+    if refresh:
+        # can just return the json
+        return dset_json
+
+    # check to see if the dataspace is mutable
+    # if so, refresh if necessary
+    datashape = dset_json["shape"]
+    if "maxdims" in datashape:
+        log.debug("getDsetJson - refreshing json for mutable shape")
+        kwargs["refresh"] = True
+        dset_json = await getObjectJson(app, dset_id, **kwargs)
+    return dset_json
+
+
 async def getObjectIdByPath(app, obj_id, h5path, bucket=None, refresh=False, domain=None,
                             follow_soft_links=False, follow_external_links=False):
     """Find the object at the provided h5path location.
diff --git a/tests/integ/dataset_test.py b/tests/integ/dataset_test.py
index ab23eb06..e7f5cf73 100755
--- a/tests/integ/dataset_test.py
+++ b/tests/integ/dataset_test.py
@@ -830,7 +830,23 @@ def testExtendDataset(self):
         self.assertTrue("root" in rspJson)
         root_uuid = rspJson["root"]
 
-        # create the dataset
+        # create non-extendable dataset
+        req = self.endpoint + "/datasets"
+        payload = {"type": "H5T_STD_I32LE", "shape": 10}
+        req = self.endpoint + "/datasets"
+        rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 201)  # create dataset
+        rspJson = json.loads(rsp.text)
+        dset_uuid = rspJson["id"]
+        self.assertTrue(helper.validateId(dset_uuid))
+
+        # try extending it (should fail)
+        req = self.endpoint + "/datasets/" + dset_uuid + "/shape"
+        payload = {"extend": 5}
+        rsp = self.session.put(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 400)
+
+        # create extendable dataset
         req = self.endpoint + "/datasets"
         payload = {"type": "H5T_STD_I32LE", "shape": 10, "maxdims": 20}
         req = self.endpoint + "/datasets"
diff --git a/tests/integ/pointsel_test.py b/tests/integ/pointsel_test.py
index be2335fc..f14ad053 100755
--- a/tests/integ/pointsel_test.py
+++ b/tests/integ/pointsel_test.py
@@ -1569,12 +1569,7 @@ def testSelect2DDataset(self):
         rspJson = json.loads(rsp.text)
         self.assertTrue("hrefs" in rspJson)
         self.assertTrue("value" in rspJson)
-        self.assertEqual(
-            rspJson["value"],
-            [
-                [30, 32, 35],
-            ],
-        )
+        self.assertEqual(rspJson["value"], [[30, 32, 35],], )
 
         # read a coordinate selection with binary response
         rsp = self.session.post(req, data=json.dumps(body), headers=headers_bin_rsp)
@@ -1583,86 +1578,6 @@ def testSelect2DDataset(self):
         self.assertEqual(len(data), 3 * 4)
         self.assertEqual(data, b"\x1e\x00\x00\x00 \x00\x00\x00#\x00\x00\x00")
 
-    def testShapeUpdate(self):
-        
-        # Test selecting points after shape has been updated
-        print("testShapeUpdate", self.base_domain)
-
-        points = [75,]
-            
-        headers = helper.getRequestHeaders(domain=self.base_domain)
-        req = self.endpoint + "/"
-
-        # Get root uuid
-        rsp = self.session.get(req, headers=headers)
-        self.assertEqual(rsp.status_code, 200)
-        rspJson = json.loads(rsp.text)
-        root_uuid = rspJson["root"]
-        helper.validateId(root_uuid)
-
-        # create dataset
-        data = {"type": "H5T_STD_I32LE", "shape": (100,), "maxdims": (100,)}
-
-        req = self.endpoint + "/datasets"
-        rsp = self.session.post(req, data=json.dumps(data), headers=headers)
-        self.assertEqual(rsp.status_code, 201)
-        rspJson = json.loads(rsp.text)
-        dset_id = rspJson["id"]
-        self.assertTrue(helper.validateId(dset_id))
-
-        # link new dataset as 'dset1d'
-        name = "dset"
-        req = self.endpoint + "/groups/" + root_uuid + "/links/" + name
-        payload = {"id": dset_id}
-        rsp = self.session.put(req, data=json.dumps(payload), headers=headers)
-        self.assertEqual(rsp.status_code, 201)
-
-        # write to the dset
-        data = list(range(100))
-        data.reverse()  # 99, 98, ..., 0
-
-        payload = {"value": data}
-        req = self.endpoint + "/datasets/" + dset_id + "/value"
-
-        rsp = self.session.put(req, data=json.dumps(payload), headers=headers)
-        self.assertEqual(rsp.status_code, 200)
-
-        body = {"points": points}
-        # read selected points
-        rsp = self.session.post(req, data=json.dumps(body), headers=headers)
-        self.assertEqual(rsp.status_code, 200)
-        rspJson = json.loads(rsp.text)
-        self.assertTrue("value" in rspJson)
-        ret_value = rspJson["value"]
-        self.assertEqual(len(ret_value), len(points))
-        expected_result = [24, ]
-         
-        self.assertEqual(ret_value, expected_result)
-
-        # resize the dataset to the small shape
-        req = self.endpoint + "/datasets/" + dset_id + "/shape"
-        payload = {"shape": 50}
-        rsp = self.session.put(req, data=json.dumps(payload), headers=headers)
-        self.assertEqual(rsp.status_code, 201)
-        rspJson = json.loads(rsp.text)
-
-        # should get a 400 now
-        req = self.endpoint + "/datasets/" + dset_id + "/value"
-        rsp = self.session.post(req, data=json.dumps(body), headers=headers)
-        self.assertEqual(rsp.status_code, 400)
-
-        # resize back to large shape
-        payload = {"shape": 100}
-        rsp = self.session.put(req, data=json.dumps(payload), headers=headers)
-        self.assertEqual(rsp.status_code, 201)
-        rspJson = json.loads(rsp.text)
-
-        # read point again
-        req = self.endpoint + "/datasets/" + dset_id + "/value"
-        rsp = self.session.post(req, data=json.dumps(body), headers=headers)
-        self.assertEqual(rsp.status_code, 200)
-
-
 
 if __name__ == "__main__":
     # setup test files
diff --git a/tests/integ/value_test.py b/tests/integ/value_test.py
index 18aac784..face21a7 100755
--- a/tests/integ/value_test.py
+++ b/tests/integ/value_test.py
@@ -3218,10 +3218,10 @@ def testValueReinitialization1D(self):
         self.assertEqual(value[0:5], data[0:5])
         self.assertEqual(value[5:10], [42,] * 5)
 
-    def testValueReinitialization2D(self):
+    def testShapeReinitialization2D(self):
         # Test the dataset values get reset after a reduction and resize
 
-        print("testValueReinitialization1D", self.base_domain)
+        print("testShapeReinitialization2D", self.base_domain)
         headers = helper.getRequestHeaders(domain=self.base_domain)
 
         # get domain
@@ -3316,6 +3316,97 @@ def testValueReinitialization2D(self):
                 else:
                     self.assertEqual(row[j], 0)
 
+    def testShapeReinitialization3D(self):
+        # Test the dataset values get reset after a reduction and resize
+
+        print("testPointReinitialization3D", self.base_domain)
+        headers = helper.getRequestHeaders(domain=self.base_domain)
+
+        # get domain
+        req = f"{self.endpoint}/"
+        rsp = self.session.get(req, headers=headers)
+        rspJson = json.loads(rsp.text)
+        self.assertTrue("root" in rspJson)
+        root_uuid = rspJson["root"]
+
+        # define two different shapes that we'll switch between
+        # min extent in each dimension is 20 for the point setup to work
+        large_shape = (110, 120, 130)
+        small_shape = (55, 60, 70)
+
+        # setup some points on the diagonal
+        # space some points apart equally
+        delta = (large_shape[0] // 10, large_shape[1] // 10, large_shape[2] // 10)
+        offset = (5, 5, 5)
+        points = []
+        for i in range(10):
+            if i == 0:
+                pt = offset
+            else:
+                last_pt = points[i - 1]
+                pt = (last_pt[0] + delta[0], last_pt[1] + delta[1], last_pt[2] + delta[2])
+            for n in range(3):
+                if pt[n] >= large_shape[n]:
+                    raise ValueError("pt outside extent")
+            points.append(pt)
+
+        # create the dataset
+        req = f"{self.endpoint}/datasets"
+        payload = {"type": "H5T_STD_I32LE", "shape": large_shape, "maxdims": large_shape}
+        req = self.endpoint + "/datasets"
+        rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 201)  # create dataset
+        rspJson = json.loads(rsp.text)
+        dset_uuid = rspJson["id"]
+        self.assertTrue(helper.validateId(dset_uuid))
+
+        # link new dataset as 'dset'
+        name = "dset"
+        req = f"{self.endpoint}/groups/{root_uuid}/links/{name}"
+        payload = {"id": dset_uuid}
+        rsp = self.session.put(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 201)
+
+        value = [1, ] * 10  # set value of each pt to one
+
+        # write 1's to all the point locations
+        payload = {"points": points, "value": value}
+        req = f"{self.endpoint}/datasets/{dset_uuid}/value"
+        rsp = self.session.put(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+
+        # resize the dataset to the small shape
+        req = f"{self.endpoint}/datasets/{dset_uuid}/shape"
+        payload = {"shape": small_shape}
+        rsp = self.session.put(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 201)
+        rspJson = json.loads(rsp.text)
+
+        # resize back to large shape
+        req = f"{self.endpoint}/datasets/{dset_uuid}/shape"
+        payload = {"shape": large_shape}
+        rsp = self.session.put(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 201)
+        rspJson = json.loads(rsp.text)
+
+        # read all the data values
+        req = f"{self.endpoint}/datasets/{dset_uuid}/value"
+        body = {"points": points}
+        # read selected points
+        rsp = self.session.post(req, data=json.dumps(body), headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        self.assertTrue("value" in rspJson)
+        ret_value = rspJson["value"]
+
+        for i in range(10):
+            pt = points[i]
+            n = ret_value[i]
+            if pt[0] >= small_shape[0] and pt[1] >= small_shape[1] and pt[2] >= small_shape[2]:
+                self.assertEqual(n, 0)
+            else:
+                self.assertEqual(n, 1)
+
 
 if __name__ == "__main__":
     # setup test files

From 21f08c0f7118c7bb3f9e1535d8b6e0401c4d88ef Mon Sep 17 00:00:00 2001
From: jreadey <jreadey@hdfgroup.org>
Date: Fri, 20 Oct 2023 18:15:45 -0700
Subject: [PATCH 12/17] delete chunk outside reduced shape region

---
 hsds/dset_sn.py           | 38 +++++++++++++++++++-------
 hsds/servicenode_lib.py   | 57 +++++++++++++++++++++++++++++++++++++--
 tests/integ/value_test.py |  6 +++--
 3 files changed, 87 insertions(+), 14 deletions(-)

diff --git a/hsds/dset_sn.py b/hsds/dset_sn.py
index d4fb0a96..338a4cbc 100755
--- a/hsds/dset_sn.py
+++ b/hsds/dset_sn.py
@@ -34,7 +34,7 @@
 from .util.hdf5dtype import validateTypeItem, createDataType, getBaseTypeJson
 from .util.hdf5dtype import getItemSize
 from .servicenode_lib import getDomainJson, getObjectJson, getDsetJson, getPathForObjectId
-from .servicenode_lib import getObjectIdByPath, validateAction, getRootInfo
+from .servicenode_lib import getObjectIdByPath, validateAction, getRootInfo, removeChunks
 from .chunk_crawl import ChunkCrawler
 from . import config
 from . import hsds_logger as log
@@ -524,6 +524,7 @@ async def PUT_DatasetShape(request):
     shape_update = None
     extend = 0
     extend_dim = 0
+    hrefs = []  # tBD - definae HATEOS refs to return
 
     dset_id = request.match_info.get("id")
     if not dset_id:
@@ -556,13 +557,16 @@ async def PUT_DatasetShape(request):
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
 
+    if "shape" in data and "extend" in data:
+        msg = "PUT shape must have shape or extend key in body but not both"
+        log.warn(msg)
+        raise HTTPBadRequest(reason=msg)
+
     if "shape" in data:
         shape_update = data["shape"]
         if isinstance(shape_update, int):
             # convert to a list
-            shape_update = [
-                shape_update,
-            ]
+            shape_update = [shape_update, ]
         log.debug(f"shape_update: {shape_update}")
 
     if "extend" in data:
@@ -619,6 +623,23 @@ async def PUT_DatasetShape(request):
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
 
+    if extend_dim < 0 or extend_dim >= rank:
+        msg = "Extension dimension must be less than rank and non-negative"
+        log.warn(msg)
+        raise HTTPBadRequest(reason=msg)
+
+    if shape_update is None:
+        # construct a shape update using original dims and extend dim and value
+        shape_update = dims.copy()
+        shape_update[extend_dim] = extend
+
+    if shape_update == dims:
+        log.info("shape update is same as current dims, no action needed")
+        json_resp = {"hrefs:", hrefs}
+        resp = await jsonResponse(request, json_resp, status=200)
+        log.response(request, resp=resp)
+        return resp
+
     shape_reduction = False
     for i in range(rank):
         if shape_update and shape_update[i] < dims[i]:
@@ -632,11 +653,6 @@ async def PUT_DatasetShape(request):
             log.warn(msg)
             raise HTTPConflict()
 
-    if extend_dim < 0 or extend_dim >= rank:
-        msg = "Extension dimension must be less than rank and non-negative"
-        log.warn(msg)
-        raise HTTPBadRequest(reason=msg)
-
     if shape_reduction:
         log.info(f"Shape extent reduced for dataset (rank: {rank})")
 
@@ -718,17 +734,19 @@ async def PUT_DatasetShape(request):
             else:
                 log.info(f"no chunks need updating for shape reduction over dim {m}")
 
+        log.debug("chunk reinitialization complete")
         if delete_ids:
             delete_ids = list(delete_ids)
             delete_ids.sort()
             log.debug(f"these ids will need to be deleted: {delete_ids}")
+            await removeChunks(app, delete_ids, bucket=bucket)
         else:
             log.info("no chunks need deletion for shape reduction")
 
     # send request onto DN
     req = getDataNodeUrl(app, dset_id) + "/datasets/" + dset_id + "/shape"
 
-    json_resp = {"hrefs": []}
+    json_resp = {"hrefs": hrefs}
     params = {}
     if bucket:
         params["bucket"] = bucket
diff --git a/hsds/servicenode_lib.py b/hsds/servicenode_lib.py
index 57711067..5c0047ab 100644
--- a/hsds/servicenode_lib.py
+++ b/hsds/servicenode_lib.py
@@ -13,9 +13,11 @@
 # utility methods for service node handlers
 #
 
+import asyncio
+
 from aiohttp.web_exceptions import HTTPBadRequest, HTTPForbidden
 from aiohttp.web_exceptions import HTTPNotFound, HTTPInternalServerError
-from aiohttp.client_exceptions import ClientOSError
+from aiohttp.client_exceptions import ClientOSError, ClientError
 
 from .util.authUtil import getAclKeys
 from .util.idUtil import getDataNodeUrl, getCollectionForId, isSchema2Id
@@ -23,7 +25,7 @@
 from .util.linkUtil import h5Join
 from .util.storUtil import getStorJSONObj, isStorObj
 from .util.authUtil import aclCheck
-from .util.httpUtil import http_get
+from .util.httpUtil import http_get, http_delete
 from .util.domainUtil import getBucketForDomain, verifyRoot
 
 from . import hsds_logger as log
@@ -485,3 +487,54 @@ async def getRootInfo(app, root_id, bucket=None):
         return None
 
     return info_json
+
+
+async def removeChunks(app, chunk_ids, bucket=None):
+    """ Remove chunks with the given ids """
+
+    log.info(f"removeChunks, {len(chunk_ids)} chunks")
+    log.debug(f"removeChunks for: {chunk_ids}")
+
+    dn_urls = app["dn_urls"]
+    if not dn_urls:
+        log.error("removeChunks request, but no dn_urls")
+        raise HTTPInternalServerError()
+
+    log.debug(f"doFlush - dn_urls: {dn_urls}")
+    params = {}
+    if bucket:
+        params["bucket"] = bucket
+    failed_count = 0
+
+    try:
+        tasks = []
+        for chunk_id in chunk_ids:
+            dn_url = getDataNodeUrl(app, chunk_id)
+            req = dn_url + "/chunks/" + chunk_id
+            task = asyncio.ensure_future(http_delete(app, req, params=params))
+            tasks.append(task)
+        done, pending = await asyncio.wait(tasks)
+        if pending:
+            # should be empty since we didn't use return_when parameter
+            log.error("removeChunks - got pending tasks")
+            raise HTTPInternalServerError()
+        for task in done:
+            if task.exception():
+                exception_type = type(task.exception())
+                msg = f"removeChunks - task had exception: {exception_type}"
+                log.warn(msg)
+                failed_count += 1
+
+    except ClientError as ce:
+        msg = f"removeChunks - ClientError: {ce}"
+        log.error(msg)
+        raise HTTPInternalServerError()
+    except asyncio.CancelledError as cle:
+        log.error(f"removeChunks - CancelledError: {cle}")
+        raise HTTPInternalServerError()
+
+    if failed_count:
+        msg = f"removeChunks, failed count: {failed_count}"
+        log.error(msg)
+    else:
+        log.info(f"removeChunks complete for {len(chunk_ids)} chunks - no errors")
diff --git a/tests/integ/value_test.py b/tests/integ/value_test.py
index face21a7..fd9e2e29 100755
--- a/tests/integ/value_test.py
+++ b/tests/integ/value_test.py
@@ -3331,7 +3331,7 @@ def testShapeReinitialization3D(self):
 
         # define two different shapes that we'll switch between
         # min extent in each dimension is 20 for the point setup to work
-        large_shape = (110, 120, 130)
+        large_shape = (220, 120, 130)
         small_shape = (55, 60, 70)
 
         # setup some points on the diagonal
@@ -3398,11 +3398,13 @@ def testShapeReinitialization3D(self):
         rspJson = json.loads(rsp.text)
         self.assertTrue("value" in rspJson)
         ret_value = rspJson["value"]
+        print(ret_value)
 
         for i in range(10):
             pt = points[i]
             n = ret_value[i]
-            if pt[0] >= small_shape[0] and pt[1] >= small_shape[1] and pt[2] >= small_shape[2]:
+            print(f"{pt}: {n}")
+            if pt[0] >= small_shape[0] or pt[1] >= small_shape[1] or pt[2] >= small_shape[2]:
                 self.assertEqual(n, 0)
             else:
                 self.assertEqual(n, 1)

From dc6571f8d0cf3d85b2bf7a32fa3bbe531ba83ed4 Mon Sep 17 00:00:00 2001
From: jreadey <jreadey@hdfgroup.org>
Date: Sat, 21 Oct 2023 13:54:10 -0700
Subject: [PATCH 13/17] added dset_lib.py

---
 hsds/chunk_crawl.py       | 12 ++++--------
 hsds/chunk_sn.py          | 12 ++++--------
 hsds/datanode_lib.py      | 12 ++++--------
 hsds/dset_lib.py          | 41 +++++++++++++++++++++++++++++++++++++++
 hsds/dset_sn.py           | 19 ++----------------
 tests/integ/value_test.py |  2 --
 6 files changed, 55 insertions(+), 43 deletions(-)
 create mode 100755 hsds/dset_lib.py

diff --git a/hsds/chunk_crawl.py b/hsds/chunk_crawl.py
index dade9fa5..8660ad56 100755
--- a/hsds/chunk_crawl.py
+++ b/hsds/chunk_crawl.py
@@ -31,8 +31,10 @@
 from .util.dsetUtil import getSelectionShape, getChunkLayout
 from .util.chunkUtil import getChunkCoverage, getDataCoverage
 from .util.chunkUtil import getChunkIdForPartition, getQueryDtype
-from .util.arrayUtil import jsonToArray, getShapeDims, getNumpyValue
+from .util.arrayUtil import jsonToArray, getShapeDims
 from .util.arrayUtil import getNumElements, arrayToBytes, bytesToArray
+from .dset_lib import getFillValue
+
 from . import config
 from . import hsds_logger as log
 
@@ -422,14 +424,8 @@ async def read_point_sel(
     np_arr_rsp = None
     dt = np_arr.dtype
 
-    fill_value = None
     # initialize to fill_value if specified
-    if "creationProperties" in dset_json:
-        cprops = dset_json["creationProperties"]
-        if "fillValue" in cprops:
-            fill_value_prop = cprops["fillValue"]
-            encoding = cprops.get("fillValue_encoding")
-            fill_value = getNumpyValue(fill_value_prop, dt=dt, encoding=encoding)
+    fill_value = getFillValue(dset_json)
 
     def defaultArray():
         # no data, return zero array
diff --git a/hsds/chunk_sn.py b/hsds/chunk_sn.py
index f24cbfbe..b0bf8dff 100755
--- a/hsds/chunk_sn.py
+++ b/hsds/chunk_sn.py
@@ -39,10 +39,11 @@
 from .util.chunkUtil import getQueryDtype, get_chunktable_dims
 from .util.arrayUtil import bytesArrayToList, jsonToArray, getShapeDims
 from .util.arrayUtil import getNumElements, arrayToBytes, bytesToArray
-from .util.arrayUtil import squeezeArray, getNumpyValue, getBroadcastShape
+from .util.arrayUtil import squeezeArray, getBroadcastShape
 from .util.authUtil import getUserPasswordFromRequest, validateUserPassword
 from .util.boolparser import BooleanParser
 from .servicenode_lib import getDsetJson, validateAction
+from .dset_lib import getFillValue
 from .chunk_crawl import ChunkCrawler
 from . import config
 from . import hsds_logger as log
@@ -1407,13 +1408,8 @@ async def doReadSelection(
             raise HTTPBadRequest(reason=msg)
 
         # initialize to fill_value if specified
-        fill_value = None
-        if "creationProperties" in dset_json:
-            cprops = dset_json["creationProperties"]
-            if "fillValue" in cprops:
-                fill_value_prop = cprops["fillValue"]
-                encoding = cprops.get("fillValue_encoding")
-                fill_value = getNumpyValue(fill_value_prop, dt=dset_dtype, encoding=encoding)
+        fill_value = getFillValue(dset_json)
+
         if fill_value:
             arr = np.empty(np_shape, dtype=dset_dtype, order="C")
             arr[...] = fill_value
diff --git a/hsds/datanode_lib.py b/hsds/datanode_lib.py
index e87b063e..36d29ae3 100644
--- a/hsds/datanode_lib.py
+++ b/hsds/datanode_lib.py
@@ -32,12 +32,13 @@
 from .util.dsetUtil import getChunkLayout, getFilterOps
 from .util.dsetUtil import getChunkInitializer, getSliceQueryParam
 from .util.chunkUtil import getDatasetId, getChunkSelection, getChunkIndex
-from .util.arrayUtil import arrayToBytes, bytesToArray, getShapeDims, jsonToArray, getNumpyValue
+from .util.arrayUtil import arrayToBytes, bytesToArray, getShapeDims, jsonToArray
 from .util.hdf5dtype import createDataType, getItemSize
 from .util.rangegetUtil import ChunkLocation, chunkMunge
 
 from . import config
 from . import hsds_logger as log
+from .dset_lib import getFillValue
 
 # supported initializer commands
 INITIALIZER_CMDS = ["chunklocator", "arange"]
@@ -1119,13 +1120,8 @@ async def get_chunk(
 
             if chunk_arr is None:
                 # normal fill value based init or initializer failed
-                fill_value = None
-                if "creationProperties" in dset_json:
-                    cprops = dset_json["creationProperties"]
-                    if "fillValue" in cprops:
-                        fill_value_prop = cprops["fillValue"]
-                        encoding = cprops.get("fillValue_encoding")
-                        fill_value = getNumpyValue(fill_value_prop, dt=dt, encoding=encoding)
+                fill_value = getFillValue(dset_json)
+
                 if fill_value:
                     chunk_arr = np.empty(dims, dtype=dt, order="C")
                     chunk_arr[...] = fill_value
diff --git a/hsds/dset_lib.py b/hsds/dset_lib.py
new file mode 100755
index 00000000..d23be6c0
--- /dev/null
+++ b/hsds/dset_lib.py
@@ -0,0 +1,41 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of HSDS (HDF5 Scalable Data Service), Libraries and      #
+# Utilities.  The full HSDS copyright notice, including                      #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+
+import numpy as np
+
+from .util.hdf5dtype import createDataType
+from .util.arrayUtil import getNumpyValue
+from . import hsds_logger as log
+
+
+def getFillValue(dset_json):
+    """ Return the fill value of the given dataset as a numpy array.
+      If no fill value is defined, return an zero array of given type """
+
+    fill_value = None
+    type_json = dset_json["type"]
+    dt = createDataType(type_json)
+
+    if "creationProperties" in dset_json:
+        cprops = dset_json["creationProperties"]
+        if "fillValue" in cprops:
+            fill_value_prop = cprops["fillValue"]
+            log.debug(f"got fo;;+value_prop: {fill_value_prop}")
+            encoding = cprops.get("fillValue_encoding")
+            fill_value = getNumpyValue(fill_value_prop, dt=dt, encoding=encoding)
+    if fill_value:
+        arr = np.empty((1,), dtype=dt, order="C")
+        arr[...] = fill_value
+    else:
+        arr = np.zeros([1,], dtype=dt, order="C")
+
+    return arr
diff --git a/hsds/dset_sn.py b/hsds/dset_sn.py
index 338a4cbc..49bfd514 100755
--- a/hsds/dset_sn.py
+++ b/hsds/dset_sn.py
@@ -15,7 +15,6 @@
 #
 
 import math
-import numpy as np
 from json import JSONDecodeError
 from aiohttp.web_exceptions import HTTPBadRequest, HTTPNotFound, HTTPConflict
 
@@ -35,6 +34,7 @@
 from .util.hdf5dtype import getItemSize
 from .servicenode_lib import getDomainJson, getObjectJson, getDsetJson, getPathForObjectId
 from .servicenode_lib import getObjectIdByPath, validateAction, getRootInfo, removeChunks
+from .dset_lib import getFillValue
 from .chunk_crawl import ChunkCrawler
 from . import config
 from . import hsds_logger as log
@@ -658,22 +658,7 @@ async def PUT_DatasetShape(request):
 
         # need to re-initialize any values that are now outside the shape
         # first get the fill value
-        fill_value = None
-        type_json = dset_json["type"]
-        dt = createDataType(type_json)
-
-        if "creationProperties" in dset_json:
-            fill_value = None
-            cprops = dset_json["creationProperties"]
-            if "fillValue" in cprops:
-                fill_value_prop = cprops["fillValue"]
-                encoding = cprops.get("fillValue_encoding")
-                fill_value = getNumpyValue(fill_value_prop, dt=dt, encoding=encoding)
-        if fill_value:
-            arr = np.empty((1,), dtype=dt, order="C")
-            arr[...] = fill_value
-        else:
-            arr = np.zeros([1,], dtype=dt, order="C")
+        arr = getFillValue(dset_json)
 
         layout = getChunkLayout(dset_json)
         log.debug(f"got layout: {layout}")
diff --git a/tests/integ/value_test.py b/tests/integ/value_test.py
index fd9e2e29..b3b15d31 100755
--- a/tests/integ/value_test.py
+++ b/tests/integ/value_test.py
@@ -3398,12 +3398,10 @@ def testShapeReinitialization3D(self):
         rspJson = json.loads(rsp.text)
         self.assertTrue("value" in rspJson)
         ret_value = rspJson["value"]
-        print(ret_value)
 
         for i in range(10):
             pt = points[i]
             n = ret_value[i]
-            print(f"{pt}: {n}")
             if pt[0] >= small_shape[0] or pt[1] >= small_shape[1] or pt[2] >= small_shape[2]:
                 self.assertEqual(n, 0)
             else:

From 8b2bdd1d5c3c935f599544402954ac99dcaaedd4 Mon Sep 17 00:00:00 2001
From: jreadey <jreadey@hdfgroup.org>
Date: Sat, 21 Oct 2023 14:58:17 -0700
Subject: [PATCH 14/17] refactored reduce shape to dset_lib

---
 hsds/chunk_crawl.py     |  30 +++++++-
 hsds/dset_lib.py        | 150 ++++++++++++++++++++++++++++++++++++++++
 hsds/dset_sn.py         |  89 +++---------------------
 hsds/servicenode_lib.py |  60 +---------------
 4 files changed, 192 insertions(+), 137 deletions(-)

diff --git a/hsds/chunk_crawl.py b/hsds/chunk_crawl.py
index 8660ad56..3e655fd2 100755
--- a/hsds/chunk_crawl.py
+++ b/hsds/chunk_crawl.py
@@ -31,9 +31,8 @@
 from .util.dsetUtil import getSelectionShape, getChunkLayout
 from .util.chunkUtil import getChunkCoverage, getDataCoverage
 from .util.chunkUtil import getChunkIdForPartition, getQueryDtype
-from .util.arrayUtil import jsonToArray, getShapeDims
+from .util.arrayUtil import jsonToArray, getShapeDims, getNumpyValue
 from .util.arrayUtil import getNumElements, arrayToBytes, bytesToArray
-from .dset_lib import getFillValue
 
 from . import config
 from . import hsds_logger as log
@@ -45,6 +44,33 @@
 )
 
 
+def getFillValue(dset_json):
+    """ Return the fill value of the given dataset as a numpy array.
+      If no fill value is defined, return an zero array of given type """
+
+    # NOTE - this is copy of the function in dset_lib, but needed to put
+    # here to avoid circular dependency
+
+    fill_value = None
+    type_json = dset_json["type"]
+    dt = createDataType(type_json)
+
+    if "creationProperties" in dset_json:
+        cprops = dset_json["creationProperties"]
+        if "fillValue" in cprops:
+            fill_value_prop = cprops["fillValue"]
+            log.debug(f"got fo;;+value_prop: {fill_value_prop}")
+            encoding = cprops.get("fillValue_encoding")
+            fill_value = getNumpyValue(fill_value_prop, dt=dt, encoding=encoding)
+    if fill_value:
+        arr = np.empty((1,), dtype=dt, order="C")
+        arr[...] = fill_value
+    else:
+        arr = np.zeros([1,], dtype=dt, order="C")
+
+    return arr
+
+
 async def write_chunk_hyperslab(
     app, chunk_id, dset_json, slices, arr, bucket=None, client=None
 ):
diff --git a/hsds/dset_lib.py b/hsds/dset_lib.py
index d23be6c0..36a8e34f 100755
--- a/hsds/dset_lib.py
+++ b/hsds/dset_lib.py
@@ -10,11 +10,20 @@
 # request a copy from help@hdfgroup.org.                                     #
 ##############################################################################
 
+import asyncio
 import numpy as np
 
+from aiohttp.client_exceptions import ClientError
+
 from .util.hdf5dtype import createDataType
 from .util.arrayUtil import getNumpyValue
+from .util.dsetUtil import getChunkLayout
+from .util.chunkUtil import getChunkIds, getChunkSelection
+from .util.idUtil import getDataNodeUrl
+from .util.httpUtil import http_delete
+
 from . import hsds_logger as log
+from .chunk_crawl import ChunkCrawler
 
 
 def getFillValue(dset_json):
@@ -39,3 +48,144 @@ def getFillValue(dset_json):
         arr = np.zeros([1,], dtype=dt, order="C")
 
     return arr
+
+
+async def removeChunks(app, chunk_ids, bucket=None):
+    """ Remove chunks with the given ids """
+
+    # this should only be called from a SN
+
+    log.info(f"removeChunks, {len(chunk_ids)} chunks")
+    log.debug(f"removeChunks for: {chunk_ids}")
+
+    dn_urls = app["dn_urls"]
+    if not dn_urls:
+        log.error("removeChunks request, but no dn_urls")
+        raise ValueError()
+
+    log.debug(f"doFlush - dn_urls: {dn_urls}")
+    params = {}
+    if bucket:
+        params["bucket"] = bucket
+    failed_count = 0
+
+    try:
+        tasks = []
+        for chunk_id in chunk_ids:
+            dn_url = getDataNodeUrl(app, chunk_id)
+            req = dn_url + "/chunks/" + chunk_id
+            task = asyncio.ensure_future(http_delete(app, req, params=params))
+            tasks.append(task)
+        done, pending = await asyncio.wait(tasks)
+        if pending:
+            # should be empty since we didn't use return_when parameter
+            log.error("removeChunks - got pending tasks")
+            raise ValueError()
+        for task in done:
+            if task.exception():
+                exception_type = type(task.exception())
+                msg = f"removeChunks - task had exception: {exception_type}"
+                log.warn(msg)
+                failed_count += 1
+
+    except ClientError as ce:
+        msg = f"removeChunks - ClientError: {ce}"
+        log.error(msg)
+        raise ValueError()
+    except asyncio.CancelledError as cle:
+        log.error(f"removeChunks - CancelledError: {cle}")
+        raise ValueError()
+
+    if failed_count:
+        msg = f"removeChunks, failed count: {failed_count}"
+        log.error(msg)
+    else:
+        log.info(f"removeChunks complete for {len(chunk_ids)} chunks - no errors")
+
+
+async def reduceShape(app, dset_json, shape_update, bucket=None):
+    """ Given an existing dataset and a new shape,
+        Reinitialize and edge chunks and delete any chunks
+        that fall entirely out of the new shape region """
+
+    dset_id = dset_json["id"]
+    log.info(f"reduceShape for {dset_id} to {shape_update}")
+
+    # get the current shape dims
+    shape_orig = dset_json["shape"]
+    if shape_orig["class"] != "H5S_SIMPLE":
+        raise ValueError("reduceShape can only be called on simple datasets")
+    dims = shape_orig["dims"]
+    rank = len(dims)
+
+    # get the fill value
+    arr = getFillValue(dset_json)
+
+    # and the chunk layout
+    layout = getChunkLayout(dset_json)
+    log.debug(f"got layout: {layout}")
+    delete_ids = set()  # chunk ids that will need to be deleted
+    for n in range(rank):
+        if dims[n] <= shape_update[n]:
+            log.debug(f"skip dimension {n}")
+            continue
+        log.debug(f"reinitialize for dimension: {n}")
+        slices = []
+        update_ids = set()  # chunk ids that will need to be updated
+
+        for m in range(rank):
+            if m == n:
+                s = slice(shape_update[m], dims[m], 1)
+            else:
+                # just select the entire extent
+                s = slice(0, dims[m], 1)
+            slices.append(s)
+        log.debug(f"shape_reinitialize - got slices: {slices} for dimension: {n}")
+        chunk_ids = getChunkIds(dset_id, slices, layout)
+        log.debug(f"got chunkIds: {chunk_ids}")
+
+        # separate ids into those that overlap the new shape
+        # vs. those that follow entirely outside the new shape.
+        # The former will need to be partiaally reset, the latter
+        # will need to be deleted
+        for chunk_id in chunk_ids:
+            if getChunkSelection(chunk_id, slices, layout) is None:
+                delete_ids.add(chunk_id)
+            else:
+                update_ids.add(chunk_id)
+
+        if update_ids:
+            update_ids = list(update_ids)
+            update_ids.sort()
+            log.debug(f"these ids will need to be updated: {update_ids}")
+
+            crawler = ChunkCrawler(
+                app,
+                update_ids,
+                dset_json=dset_json,
+                bucket=bucket,
+                slices=slices,
+                arr=arr,
+                action="write_chunk_hyperslab",
+            )
+            await crawler.crawl()
+
+            crawler_status = crawler.get_status()
+
+            if crawler_status not in (200, 201):
+                msg = f"crawler failed for shape reinitialize with status: {crawler_status}"
+                log.warn(msg)
+            else:
+                msg = f"crawler success for reinitialization with slices: {slices}"
+                log.info(msg)
+        else:
+            log.info(f"no chunks need updating for shape reduction over dim {m}")
+
+    log.debug("chunk reinitialization complete")
+    if delete_ids:
+        delete_ids = list(delete_ids)
+        delete_ids.sort()
+        log.debug(f"these ids will need to be deleted: {delete_ids}")
+        await removeChunks(app, delete_ids, bucket=bucket)
+    else:
+        log.info("no chunks need deletion for shape reduction")
diff --git a/hsds/dset_sn.py b/hsds/dset_sn.py
index 49bfd514..e3f537d3 100755
--- a/hsds/dset_sn.py
+++ b/hsds/dset_sn.py
@@ -16,15 +16,16 @@
 
 import math
 from json import JSONDecodeError
-from aiohttp.web_exceptions import HTTPBadRequest, HTTPNotFound, HTTPConflict
+from aiohttp.web_exceptions import HTTPBadRequest, HTTPNotFound
+from aiohttp.web_exceptions import HTTPConflict, HTTPInternalServerError
 
 from .util.httpUtil import http_post, http_put, http_delete, getHref, respJsonAssemble
 from .util.httpUtil import jsonResponse
 from .util.idUtil import isValidUuid, getDataNodeUrl, createObjId, isSchema2Id
-from .util.dsetUtil import getPreviewQuery, getFilterItem, getChunkLayout
+from .util.dsetUtil import getPreviewQuery, getFilterItem
 from .util.arrayUtil import getNumElements, getShapeDims, getNumpyValue
 from .util.chunkUtil import getChunkSize, guessChunk, expandChunk, shrinkChunk
-from .util.chunkUtil import getContiguousLayout, getChunkIds, getChunkSelection
+from .util.chunkUtil import getContiguousLayout
 from .util.authUtil import getUserPasswordFromRequest, aclCheck
 from .util.authUtil import validateUserPassword
 from .util.domainUtil import getDomainFromRequest, getPathForDomain, isValidDomain
@@ -33,9 +34,8 @@
 from .util.hdf5dtype import validateTypeItem, createDataType, getBaseTypeJson
 from .util.hdf5dtype import getItemSize
 from .servicenode_lib import getDomainJson, getObjectJson, getDsetJson, getPathForObjectId
-from .servicenode_lib import getObjectIdByPath, validateAction, getRootInfo, removeChunks
-from .dset_lib import getFillValue
-from .chunk_crawl import ChunkCrawler
+from .servicenode_lib import getObjectIdByPath, validateAction, getRootInfo
+from .dset_lib import reduceShape
 from . import config
 from . import hsds_logger as log
 
@@ -655,78 +655,11 @@ async def PUT_DatasetShape(request):
 
     if shape_reduction:
         log.info(f"Shape extent reduced for dataset (rank: {rank})")
-
-        # need to re-initialize any values that are now outside the shape
-        # first get the fill value
-        arr = getFillValue(dset_json)
-
-        layout = getChunkLayout(dset_json)
-        log.debug(f"got layout: {layout}")
-        delete_ids = set()  # chunk ids that will need to be deleted
-        for n in range(rank):
-            if dims[n] <= shape_update[i]:
-                log.debug(f"skip dimension {n}")
-                continue
-            log.debug(f"reinitialize for dimension: {n}")
-            slices = []
-            update_ids = set()  # chunk ids that will need to be updated
-
-            for m in range(rank):
-                if m == n:
-                    s = slice(shape_update[m], dims[m], 1)
-                else:
-                    # just select the entire extent
-                    s = slice(0, dims[m], 1)
-                slices.append(s)
-            log.debug(f"shape_reinitialize - got slices: {slices} for dimension: {n}")
-            chunk_ids = getChunkIds(dset_id, slices, layout)
-            log.debug(f"got chunkIds: {chunk_ids}")
-
-            # separate ids into those that overlap the new shape
-            # vs. those that follow entirely outside the new shape.
-            # The former will need to be partiaally reset, the latter
-            # will need to be deleted
-            for chunk_id in chunk_ids:
-                if getChunkSelection(chunk_id, slices, layout) is None:
-                    delete_ids.add(chunk_id)
-                else:
-                    update_ids.add(chunk_id)
-
-            if update_ids:
-                update_ids = list(update_ids)
-                update_ids.sort()
-                log.debug(f"these ids will need to be updated: {update_ids}")
-
-                crawler = ChunkCrawler(
-                    app,
-                    update_ids,
-                    dset_json=dset_json,
-                    bucket=bucket,
-                    slices=slices,
-                    arr=arr,
-                    action="write_chunk_hyperslab",
-                )
-                await crawler.crawl()
-
-                crawler_status = crawler.get_status()
-
-                if crawler_status not in (200, 201):
-                    msg = f"crawler failed for shape reinitialize with status: {crawler_status}"
-                    log.warn(msg)
-                else:
-                    msg = f"crawler success for reinitialization with slices: {slices}"
-                    log.info(msg)
-            else:
-                log.info(f"no chunks need updating for shape reduction over dim {m}")
-
-        log.debug("chunk reinitialization complete")
-        if delete_ids:
-            delete_ids = list(delete_ids)
-            delete_ids.sort()
-            log.debug(f"these ids will need to be deleted: {delete_ids}")
-            await removeChunks(app, delete_ids, bucket=bucket)
-        else:
-            log.info("no chunks need deletion for shape reduction")
+        try:
+            await reduceShape(app, dset_json, shape_update, bucket=bucket)
+        except ValueError as ve:
+            msg = f"reduceShape for {dset_id} to {shape_update} resulted in exception: {ve}"
+            raise HTTPInternalServerError()
 
     # send request onto DN
     req = getDataNodeUrl(app, dset_id) + "/datasets/" + dset_id + "/shape"
diff --git a/hsds/servicenode_lib.py b/hsds/servicenode_lib.py
index 5c0047ab..abb312ae 100644
--- a/hsds/servicenode_lib.py
+++ b/hsds/servicenode_lib.py
@@ -13,19 +13,16 @@
 # utility methods for service node handlers
 #
 
-import asyncio
-
 from aiohttp.web_exceptions import HTTPBadRequest, HTTPForbidden
 from aiohttp.web_exceptions import HTTPNotFound, HTTPInternalServerError
-from aiohttp.client_exceptions import ClientOSError, ClientError
+from aiohttp.client_exceptions import ClientOSError
 
 from .util.authUtil import getAclKeys
-from .util.idUtil import getDataNodeUrl, getCollectionForId, isSchema2Id
-from .util.idUtil import getS3Key
+from .util.idUtil import getDataNodeUrl, getCollectionForId, isSchema2Id, getS3Key
 from .util.linkUtil import h5Join
 from .util.storUtil import getStorJSONObj, isStorObj
 from .util.authUtil import aclCheck
-from .util.httpUtil import http_get, http_delete
+from .util.httpUtil import http_get
 from .util.domainUtil import getBucketForDomain, verifyRoot
 
 from . import hsds_logger as log
@@ -487,54 +484,3 @@ async def getRootInfo(app, root_id, bucket=None):
         return None
 
     return info_json
-
-
-async def removeChunks(app, chunk_ids, bucket=None):
-    """ Remove chunks with the given ids """
-
-    log.info(f"removeChunks, {len(chunk_ids)} chunks")
-    log.debug(f"removeChunks for: {chunk_ids}")
-
-    dn_urls = app["dn_urls"]
-    if not dn_urls:
-        log.error("removeChunks request, but no dn_urls")
-        raise HTTPInternalServerError()
-
-    log.debug(f"doFlush - dn_urls: {dn_urls}")
-    params = {}
-    if bucket:
-        params["bucket"] = bucket
-    failed_count = 0
-
-    try:
-        tasks = []
-        for chunk_id in chunk_ids:
-            dn_url = getDataNodeUrl(app, chunk_id)
-            req = dn_url + "/chunks/" + chunk_id
-            task = asyncio.ensure_future(http_delete(app, req, params=params))
-            tasks.append(task)
-        done, pending = await asyncio.wait(tasks)
-        if pending:
-            # should be empty since we didn't use return_when parameter
-            log.error("removeChunks - got pending tasks")
-            raise HTTPInternalServerError()
-        for task in done:
-            if task.exception():
-                exception_type = type(task.exception())
-                msg = f"removeChunks - task had exception: {exception_type}"
-                log.warn(msg)
-                failed_count += 1
-
-    except ClientError as ce:
-        msg = f"removeChunks - ClientError: {ce}"
-        log.error(msg)
-        raise HTTPInternalServerError()
-    except asyncio.CancelledError as cle:
-        log.error(f"removeChunks - CancelledError: {cle}")
-        raise HTTPInternalServerError()
-
-    if failed_count:
-        msg = f"removeChunks, failed count: {failed_count}"
-        log.error(msg)
-    else:
-        log.info(f"removeChunks complete for {len(chunk_ids)} chunks - no errors")

From 4dc6a06f8aea58460a898fe4d77eb567a1499608 Mon Sep 17 00:00:00 2001
From: jreadey <jreadey@hdfgroup.org>
Date: Mon, 23 Oct 2023 11:04:12 -0700
Subject: [PATCH 15/17] determine allocated chunks for shape reduction

---
 hsds/attr_sn.py           |   4 +-
 hsds/chunk_crawl.py       |  26 ++++---
 hsds/ctype_sn.py          |   5 +-
 hsds/domain_sn.py         |  57 +-------------
 hsds/dset_lib.py          | 153 +++++++++++++++++++++++++++++---------
 hsds/dset_sn.py           |  11 ++-
 hsds/group_sn.py          |   5 +-
 hsds/servicenode_lib.py   |  59 ++++++++++++++-
 hsds/util/chunkUtil.py    |   6 +-
 tests/integ/value_test.py |   2 +-
 10 files changed, 214 insertions(+), 114 deletions(-)

diff --git a/hsds/attr_sn.py b/hsds/attr_sn.py
index 4b9db23a..d3dd648a 100755
--- a/hsds/attr_sn.py
+++ b/hsds/attr_sn.py
@@ -377,7 +377,7 @@ async def PUT_Attribute(request):
             msg = "Bad Request: input data doesn't match selection"
             log.warn(msg)
             raise HTTPBadRequest(reason=msg)
-        log.info(f"Got: {arr.size} array elements")
+        log.debug(f"Got: {arr.size} array elements")
     else:
         value = None
 
@@ -717,7 +717,7 @@ async def PUT_AttributeValue(request):
             msg = "Bad Request: input data doesn't match selection"
             log.warn(msg)
             raise HTTPBadRequest(reason=msg)
-    log.info(f"Got: {arr.size} array elements")
+    log.debug(f"Got: {arr.size} array elements")
 
     # ready to add attribute now
     attr_json = {}
diff --git a/hsds/chunk_crawl.py b/hsds/chunk_crawl.py
index 3e655fd2..02930993 100755
--- a/hsds/chunk_crawl.py
+++ b/hsds/chunk_crawl.py
@@ -16,6 +16,7 @@
 
 import asyncio
 import time
+import traceback
 import random
 from asyncio import CancelledError
 import numpy as np
@@ -100,7 +101,11 @@ async def write_chunk_hyperslab(
 
     params = {}
     layout = getChunkLayout(dset_json)
+    log.debug(f"getChunkCoverage({chunk_id}, {slices}, {layout})")
     chunk_sel = getChunkCoverage(chunk_id, slices, layout)
+    if chunk_sel is None:
+        log.warn(f"getChunkCoverage returned None for: {chunk_id}, {slices}, {layout}")
+        return
     log.debug(f"chunk_sel: {chunk_sel}")
     data_sel = getDataCoverage(chunk_id, slices, layout)
     log.debug(f"data_sel: {data_sel}")
@@ -857,26 +862,23 @@ async def do_work(self, chunk_id, client=None):
                 )
             except HTTPServiceUnavailable as sue:
                 status_code = 503
-                log.warn(
-                    f"HTTPServiceUnavailable for {self._action}({chunk_id}): {sue}"
-                )
+                msg = f"HTTPServiceUnavailable for {self._action}({chunk_id}): {sue}"
+                log.warn(msg)
             except Exception as e:
                 status_code = 500
-                log.error(
-                    f"Unexpected exception {type(e)} for {self._action}({chunk_id}): {e} "
-                )
+                msg = f"Unexpected exception {type(e)} for {self._action}({chunk_id}): {e} "
+                log.error(msg)
+                tb = traceback.format_exc()
+                print("traceback:", tb)
             retry += 1
             if status_code == 200:
                 break
             if retry == max_retries:
-                log.error(
-                    f"ChunkCrawler action: {self._action} failed after: {retry} retries"
-                )
+                msg = f"ChunkCrawler action: {self._action} failed after: {retry} retries"
+                log.error(msg)
             else:
                 sleep_time = retry_exp * 2 ** retry + random.uniform(0, 0.1)
-                log.warn(
-                    f"ChunkCrawler.doWork - retry: {retry}, sleeping for {sleep_time:.2f}"
-                )
+                msg = f"ChunkCrawler.doWork - retry: {retry}, sleeping for {sleep_time:.2f}"
                 await asyncio.sleep(sleep_time)
 
         # save status_code
diff --git a/hsds/ctype_sn.py b/hsds/ctype_sn.py
index 01be4bac..f3d0236e 100755
--- a/hsds/ctype_sn.py
+++ b/hsds/ctype_sn.py
@@ -72,7 +72,10 @@ async def GET_Datatype(request):
             msg = "h5paths must be absolute"
             log.warn(msg)
             raise HTTPBadRequest(reason=msg)
-        log.info(f"GET_Datatype, h5path: {h5path}")
+        msg = f"GET_Datatype, h5path: {h5path}"
+        if group_id:
+            msg += f" group_id: {group_id}"
+        log.info(msg)
 
     username, pswd = getUserPasswordFromRequest(request)
     if username is None and app["allow_noauth"]:
diff --git a/hsds/domain_sn.py b/hsds/domain_sn.py
index e3724cad..8ca13f72 100755
--- a/hsds/domain_sn.py
+++ b/hsds/domain_sn.py
@@ -13,7 +13,6 @@
 # service node of hsds cluster
 #
 
-from asyncio import CancelledError
 import asyncio
 import json
 import os.path as op
@@ -23,7 +22,6 @@
 from aiohttp.web_exceptions import HTTPGone, HTTPInternalServerError
 from aiohttp.web_exceptions import HTTPConflict, HTTPServiceUnavailable
 from aiohttp import ClientResponseError
-from aiohttp.client_exceptions import ClientError
 from aiohttp.web import json_response
 from requests.sessions import merge_setting
 
@@ -41,7 +39,7 @@
 from .util.boolparser import BooleanParser
 from .util.globparser import globmatch
 from .servicenode_lib import getDomainJson, getObjectJson, getObjectIdByPath
-from .servicenode_lib import getRootInfo, checkBucketAccess
+from .servicenode_lib import getRootInfo, checkBucketAccess, doFlush
 from .basenode import getVersion
 from . import hsds_logger as log
 from . import config
@@ -893,59 +891,6 @@ async def GET_Domain(request):
     return resp
 
 
-async def doFlush(app, root_id, bucket=None):
-    """return wnen all DN nodes have wrote any pending changes to S3"""
-    log.info(f"doFlush {root_id}")
-    params = {"flush": 1}
-    if bucket:
-        params["bucket"] = bucket
-    dn_urls = app["dn_urls"]
-    dn_ids = []
-    log.debug(f"doFlush - dn_urls: {dn_urls}")
-    failed_count = 0
-
-    try:
-        tasks = []
-        for dn_url in dn_urls:
-            req = dn_url + "/groups/" + root_id
-            task = asyncio.ensure_future(http_put(app, req, params=params))
-            tasks.append(task)
-        done, pending = await asyncio.wait(tasks)
-        if pending:
-            # should be empty since we didn't use return_when parameter
-            log.error("doFlush - got pending tasks")
-            raise HTTPInternalServerError()
-        for task in done:
-            if task.exception():
-                exception_type = type(task.exception())
-                msg = f"doFlush - task had exception: {exception_type}"
-                log.warn(msg)
-                failed_count += 1
-            else:
-                json_rsp = task.result()
-                log.debug(f"PUT /groups rsp: {json_rsp}")
-                if json_rsp and "id" in json_rsp:
-                    dn_ids.append(json_rsp["id"])
-                else:
-                    log.error("expected dn_id in flush response from DN")
-    except ClientError as ce:
-        msg = f"doFlush - ClientError for http_put('/groups/{root_id}'): {ce}"
-        log.error(msg)
-        raise HTTPInternalServerError()
-    except CancelledError as cle:
-        log.error(f"doFlush - CancelledError '/groups/{root_id}'): {cle}")
-        raise HTTPInternalServerError()
-    msg = f"doFlush for {root_id} complete, failed: {failed_count} "
-    msg += f"out of {len(dn_urls)}"
-    log.info(msg)
-    if failed_count > 0:
-        log.error(f"doFlush fail count: {failed_count} returning 500")
-        raise HTTPInternalServerError()
-    else:
-        log.info("doFlush no fails, returning dn ids")
-        return dn_ids
-
-
 async def getScanTime(app, root_id, bucket=None):
     """ Return timestamp for the last scan of the given root id """
     root_scan = 0
diff --git a/hsds/dset_lib.py b/hsds/dset_lib.py
index 36a8e34f..7666a94a 100755
--- a/hsds/dset_lib.py
+++ b/hsds/dset_lib.py
@@ -18,11 +18,13 @@
 from .util.hdf5dtype import createDataType
 from .util.arrayUtil import getNumpyValue
 from .util.dsetUtil import getChunkLayout
-from .util.chunkUtil import getChunkIds, getChunkSelection
-from .util.idUtil import getDataNodeUrl
+from .util.chunkUtil import getChunkCoordinate
+from .util.idUtil import getDataNodeUrl, isSchema2Id, getS3Key, getObjId
+from .util.storUtil import getStorKeys
 from .util.httpUtil import http_delete
 
 from . import hsds_logger as log
+from . import config
 from .chunk_crawl import ChunkCrawler
 
 
@@ -103,6 +105,58 @@ async def removeChunks(app, chunk_ids, bucket=None):
         log.info(f"removeChunks complete for {len(chunk_ids)} chunks - no errors")
 
 
+async def getAllocatedChunkIds(app, dset_id, bucket=None):
+    """ Return the set of allocated chunk ids for the give dataset.
+        If slices is given, just return chunks that interesect with the slice region """
+
+    log.info(f"getAllocatedChunkIds for {dset_id}")
+
+    if not isSchema2Id(dset_id):
+        msg = f"no tabulation for schema v1 id: {dset_id} returning "
+        msg += "null results"
+        log.warn(msg)
+        return {}
+
+    if not bucket:
+        bucket = config.get("bucket_name")
+    if not bucket:
+        raise ValueError(f"no bucket defined for getAllocatedChunkIds for {dset_id}")
+
+    root_key = getS3Key(dset_id)
+    log.debug(f"got root_key: {root_key}")
+
+    if not root_key.endswith("/.dataset.json"):
+        raise ValueError("unexpected root key")
+
+    root_prefix = root_key[: -(len(".dataset.json"))]
+
+    log.debug(f"scanRoot - using prefix: {root_prefix}")
+
+    kwargs = {
+        "prefix": root_prefix,
+        "include_stats": False,
+        "bucket": bucket,
+    }
+    s3keys = await getStorKeys(app, **kwargs)
+
+    # getStoreKeys will pick up the dataset.json as well,
+    # so go through and discard
+    chunk_ids = []
+    for s3key in s3keys:
+        if s3key.endswith("json"):
+            # ignore metadata items
+            continue
+        try:
+            chunk_id = getObjId(s3key)
+        except ValueError:
+            log.warn(f"ignoring s3key: {s3key}")
+            continue
+        chunk_ids.append(chunk_id)
+
+    log.debug(f"getAllocattedChunkIds - got {len(chunk_ids)} ids")
+    return chunk_ids
+
+
 async def reduceShape(app, dset_json, shape_update, bucket=None):
     """ Given an existing dataset and a new shape,
         Reinitialize and edge chunks and delete any chunks
@@ -122,42 +176,68 @@ async def reduceShape(app, dset_json, shape_update, bucket=None):
     arr = getFillValue(dset_json)
 
     # and the chunk layout
-    layout = getChunkLayout(dset_json)
+    layout = tuple(getChunkLayout(dset_json))
     log.debug(f"got layout: {layout}")
-    delete_ids = set()  # chunk ids that will need to be deleted
-    for n in range(rank):
-        if dims[n] <= shape_update[n]:
-            log.debug(f"skip dimension {n}")
+
+    # get all chunk ids for chunks that have been allocated
+    chunk_ids = await getAllocatedChunkIds(app, dset_id, bucket=bucket)
+    chunk_ids.sort()
+
+    log.debug(f"got chunkIds: {chunk_ids}")
+
+    # separate ids into three groups:
+    #   A: those are entirely inside the new shape region - no action needed
+    #   B: those that overlap the new shape - will need the edge portion reinitialized
+    #   C: those that are entirely outside the new shape - will need to be deleted
+
+    delete_ids = []  # chunk ids for chunk that that will need to be deleted
+    update_ids = []  # chunk ids for chunks that will need to be reinitialized
+
+    for chunk_id in chunk_ids:
+        log.debug(f"chunk_id: {chunk_id}")
+        chunk_coord = getChunkCoordinate(chunk_id, layout)
+        log.debug(f"chunk_coord: {chunk_coord}")
+        skip = True
+        for i in range(rank):
+            if chunk_coord[i] + layout[i] > shape_update[i]:
+                skip = False
+                break
+        if skip:
+            log.debug(f"chunk_id {chunk_id} no action needed")
             continue
-        log.debug(f"reinitialize for dimension: {n}")
-        slices = []
-        update_ids = set()  # chunk ids that will need to be updated
 
-        for m in range(rank):
-            if m == n:
-                s = slice(shape_update[m], dims[m], 1)
-            else:
-                # just select the entire extent
-                s = slice(0, dims[m], 1)
-            slices.append(s)
-        log.debug(f"shape_reinitialize - got slices: {slices} for dimension: {n}")
-        chunk_ids = getChunkIds(dset_id, slices, layout)
-        log.debug(f"got chunkIds: {chunk_ids}")
-
-        # separate ids into those that overlap the new shape
-        # vs. those that follow entirely outside the new shape.
-        # The former will need to be partiaally reset, the latter
-        # will need to be deleted
-        for chunk_id in chunk_ids:
-            if getChunkSelection(chunk_id, slices, layout) is None:
-                delete_ids.add(chunk_id)
-            else:
-                update_ids.add(chunk_id)
+        reinit = False
+        for n in range(rank):
+            if chunk_coord[n] < shape_update[n]:
+                reinit = True
+                break
+        if reinit:
+            log.debug("chunk reinit")
+            update_ids.append(chunk_id)
+        else:
+            log.debug("chunk delete")
+            delete_ids.append(chunk_id)
 
-        if update_ids:
-            update_ids = list(update_ids)
-            update_ids.sort()
-            log.debug(f"these ids will need to be updated: {update_ids}")
+    msg = f"reduceShape - from {len(chunk_ids)} chunks, {len(update_ids)} will need to be "
+    msg += f"updated and {len(delete_ids)} will need to deleted"
+    log.info(msg)
+
+    if update_ids:
+        log.debug(f"these ids will need to be updated: {update_ids}")
+
+        # For multidimensional datasets, may need multiple hyperslab writes
+        # go through each dimension and calculate region to update
+
+        for n in range(rank):
+            slices = []
+
+            for m in range(rank):
+                if m == n:
+                    s = slice(shape_update[m], dims[m], 1)
+                else:
+                    # just select the entire extent
+                    s = slice(0, dims[m], 1)
+                slices.append(s)
 
             crawler = ChunkCrawler(
                 app,
@@ -178,10 +258,11 @@ async def reduceShape(app, dset_json, shape_update, bucket=None):
             else:
                 msg = f"crawler success for reinitialization with slices: {slices}"
                 log.info(msg)
-        else:
-            log.info(f"no chunks need updating for shape reduction over dim {m}")
+    else:
+        log.info("no chunks need updating for shape reduction")
 
     log.debug("chunk reinitialization complete")
+
     if delete_ids:
         delete_ids = list(delete_ids)
         delete_ids.sort()
diff --git a/hsds/dset_sn.py b/hsds/dset_sn.py
index e3f537d3..00bfcd7f 100755
--- a/hsds/dset_sn.py
+++ b/hsds/dset_sn.py
@@ -34,7 +34,7 @@
 from .util.hdf5dtype import validateTypeItem, createDataType, getBaseTypeJson
 from .util.hdf5dtype import getItemSize
 from .servicenode_lib import getDomainJson, getObjectJson, getDsetJson, getPathForObjectId
-from .servicenode_lib import getObjectIdByPath, validateAction, getRootInfo
+from .servicenode_lib import getObjectIdByPath, validateAction, getRootInfo, doFlush
 from .dset_lib import reduceShape
 from . import config
 from . import hsds_logger as log
@@ -306,7 +306,10 @@ async def GET_Dataset(request):
             msg = "h5paths must be absolute"
             log.warn(msg)
             raise HTTPBadRequest(reason=msg)
-        log.info(f"GET_Dataset, h5path: {h5path}")
+        msg = f"GET_Dataset, h5path: {h5path}"
+        if group_id:
+            msg += f" group_id: {group_id}"
+        log.info(msg)
 
     username, pswd = getUserPasswordFromRequest(request)
     if username is None and app["allow_noauth"]:
@@ -655,10 +658,14 @@ async def PUT_DatasetShape(request):
 
     if shape_reduction:
         log.info(f"Shape extent reduced for dataset (rank: {rank})")
+        root_id = dset_json["root"]
+        # need to do a flush to know which chunks to update or delete
+        await doFlush(app, root_id, bucket=bucket)
         try:
             await reduceShape(app, dset_json, shape_update, bucket=bucket)
         except ValueError as ve:
             msg = f"reduceShape for {dset_id} to {shape_update} resulted in exception: {ve}"
+            log.error(msg)
             raise HTTPInternalServerError()
 
     # send request onto DN
diff --git a/hsds/group_sn.py b/hsds/group_sn.py
index dcdf04d9..98d58ed1 100755
--- a/hsds/group_sn.py
+++ b/hsds/group_sn.py
@@ -62,7 +62,10 @@ async def GET_Group(request):
             msg = "h5paths must be absolute if no parent id is provided"
             log.warn(msg)
             raise HTTPBadRequest(reason=msg)
-        log.info(f"GET_Group, h5path: {h5path}")
+        msg = f"GET_Group, h5path: {h5path}"
+        if group_id:
+            msg += f" group_id: {group_id}"
+        log.info(msg)
     if "include_links" in params and params["include_links"]:
         include_links = True
     if "include_attrs" in params and params["include_attrs"]:
diff --git a/hsds/servicenode_lib.py b/hsds/servicenode_lib.py
index abb312ae..2a7b45c1 100644
--- a/hsds/servicenode_lib.py
+++ b/hsds/servicenode_lib.py
@@ -13,16 +13,18 @@
 # utility methods for service node handlers
 #
 
+import asyncio
+
 from aiohttp.web_exceptions import HTTPBadRequest, HTTPForbidden
 from aiohttp.web_exceptions import HTTPNotFound, HTTPInternalServerError
-from aiohttp.client_exceptions import ClientOSError
+from aiohttp.client_exceptions import ClientOSError, ClientError
 
 from .util.authUtil import getAclKeys
 from .util.idUtil import getDataNodeUrl, getCollectionForId, isSchema2Id, getS3Key
 from .util.linkUtil import h5Join
 from .util.storUtil import getStorJSONObj, isStorObj
 from .util.authUtil import aclCheck
-from .util.httpUtil import http_get
+from .util.httpUtil import http_get, http_put
 from .util.domainUtil import getBucketForDomain, verifyRoot
 
 from . import hsds_logger as log
@@ -484,3 +486,56 @@ async def getRootInfo(app, root_id, bucket=None):
         return None
 
     return info_json
+
+
+async def doFlush(app, root_id, bucket=None):
+    """return wnen all DN nodes have wrote any pending changes to S3"""
+    log.info(f"doFlush {root_id}")
+    params = {"flush": 1}
+    if bucket:
+        params["bucket"] = bucket
+    dn_urls = app["dn_urls"]
+    dn_ids = []
+    log.debug(f"doFlush - dn_urls: {dn_urls}")
+    failed_count = 0
+
+    try:
+        tasks = []
+        for dn_url in dn_urls:
+            req = dn_url + "/groups/" + root_id
+            task = asyncio.ensure_future(http_put(app, req, params=params))
+            tasks.append(task)
+        done, pending = await asyncio.wait(tasks)
+        if pending:
+            # should be empty since we didn't use return_when parameter
+            log.error("doFlush - got pending tasks")
+            raise HTTPInternalServerError()
+        for task in done:
+            if task.exception():
+                exception_type = type(task.exception())
+                msg = f"doFlush - task had exception: {exception_type}"
+                log.warn(msg)
+                failed_count += 1
+            else:
+                json_rsp = task.result()
+                log.debug(f"PUT /groups rsp: {json_rsp}")
+                if json_rsp and "id" in json_rsp:
+                    dn_ids.append(json_rsp["id"])
+                else:
+                    log.error("expected dn_id in flush response from DN")
+    except ClientError as ce:
+        msg = f"doFlush - ClientError for http_put('/groups/{root_id}'): {ce}"
+        log.error(msg)
+        raise HTTPInternalServerError()
+    except asyncio.CancelledError as cle:
+        log.error(f"doFlush - CancelledError '/groups/{root_id}'): {cle}")
+        raise HTTPInternalServerError()
+    msg = f"doFlush for {root_id} complete, failed: {failed_count} "
+    msg += f"out of {len(dn_urls)}"
+    log.info(msg)
+    if failed_count > 0:
+        log.error(f"doFlush fail count: {failed_count} returning 500")
+        raise HTTPInternalServerError()
+    else:
+        log.info("doFlush no fails, returning dn ids")
+        return dn_ids
diff --git a/hsds/util/chunkUtil.py b/hsds/util/chunkUtil.py
index 87bdb40c..88059a57 100644
--- a/hsds/util/chunkUtil.py
+++ b/hsds/util/chunkUtil.py
@@ -557,7 +557,6 @@ def getChunkCoordinate(chunk_id, layout):
     coord = getChunkIndex(chunk_id)
     for i in range(len(layout)):
         coord[i] *= layout[i]
-
     return coord
 
 
@@ -611,7 +610,12 @@ def getChunkCoverage(chunk_id, slices, layout):
     """
     chunk_index = getChunkIndex(chunk_id)
     chunk_sel = getChunkSelection(chunk_id, slices, layout)
+    if not chunk_sel:
+        log.warn(f"slices: {slices} does intersect chunk: {chunk_id}")
+        return None
     rank = len(layout)
+    if len(slices) != rank:
+        raise ValueError(f"invalid slices value for dataset of rank: {rank}")
     sel = []
     for dim in range(rank):
         s = chunk_sel[dim]
diff --git a/tests/integ/value_test.py b/tests/integ/value_test.py
index b3b15d31..70f62efe 100755
--- a/tests/integ/value_test.py
+++ b/tests/integ/value_test.py
@@ -3331,7 +3331,7 @@ def testShapeReinitialization3D(self):
 
         # define two different shapes that we'll switch between
         # min extent in each dimension is 20 for the point setup to work
-        large_shape = (220, 120, 130)
+        large_shape = (2200, 120, 130)
         small_shape = (55, 60, 70)
 
         # setup some points on the diagonal

From 681c75e19a4e42098feefcc5311d96ba362fa15b Mon Sep 17 00:00:00 2001
From: jreadey <jreadey@hdfgroup.org>
Date: Thu, 26 Oct 2023 08:39:23 -0700
Subject: [PATCH 16/17] updates for review comments

---
 hsds/async_lib.py             |   4 +-
 hsds/attr_sn.py               |   3 +-
 hsds/chunk_crawl.py           |  20 +-
 hsds/chunk_dn.py              |   4 +-
 hsds/chunk_sn.py              | 568 ++------------------------------
 hsds/datanode_lib.py          |   4 +-
 hsds/dset_lib.py              | 588 ++++++++++++++++++++++++++++++++--
 hsds/dset_sn.py               |   4 +-
 hsds/util/arrayUtil.py        |  38 ---
 hsds/util/dsetUtil.py         |  66 ++++
 tests/integ/broadcast_test.py |   3 +-
 11 files changed, 673 insertions(+), 629 deletions(-)

diff --git a/hsds/async_lib.py b/hsds/async_lib.py
index 9ebfa099..2308cedb 100755
--- a/hsds/async_lib.py
+++ b/hsds/async_lib.py
@@ -20,9 +20,9 @@
 from .util.idUtil import getObjId, isValidChunkId, getCollectionForId
 from .util.chunkUtil import getDatasetId, getNumChunks, ChunkIterator
 from .util.hdf5dtype import getItemSize, createDataType
-from .util.arrayUtil import getShapeDims, getNumElements, bytesToArray
+from .util.arrayUtil import getNumElements, bytesToArray
 from .util.dsetUtil import getHyperslabSelection, getFilterOps, getChunkDims
-from .util.dsetUtil import getDatasetLayoutClass, getDatasetLayout
+from .util.dsetUtil import getDatasetLayoutClass, getDatasetLayout, getShapeDims
 
 from .util.storUtil import getStorKeys, putStorJSONObj, getStorJSONObj
 from .util.storUtil import deleteStorObj, getStorBytes, isStorObj
diff --git a/hsds/attr_sn.py b/hsds/attr_sn.py
index d3dd648a..da78f4cb 100755
--- a/hsds/attr_sn.py
+++ b/hsds/attr_sn.py
@@ -27,8 +27,9 @@
 from .util.attrUtil import validateAttributeName, getRequestCollectionName
 from .util.hdf5dtype import validateTypeItem, getBaseTypeJson
 from .util.hdf5dtype import createDataType, getItemSize
-from .util.arrayUtil import jsonToArray, getShapeDims, getNumElements
+from .util.arrayUtil import jsonToArray, getNumElements
 from .util.arrayUtil import bytesArrayToList
+from .util.dsetUtil import getShapeDims
 from .servicenode_lib import getDomainJson, getObjectJson, validateAction
 from . import hsds_logger as log
 from . import config
diff --git a/hsds/chunk_crawl.py b/hsds/chunk_crawl.py
index 02930993..eccbc4e2 100755
--- a/hsds/chunk_crawl.py
+++ b/hsds/chunk_crawl.py
@@ -28,11 +28,11 @@
 from .util.httpUtil import isUnixDomainUrl
 from .util.idUtil import getDataNodeUrl, getNodeCount
 from .util.hdf5dtype import createDataType
-from .util.dsetUtil import getSliceQueryParam
+from .util.dsetUtil import getSliceQueryParam, getShapeDims
 from .util.dsetUtil import getSelectionShape, getChunkLayout
 from .util.chunkUtil import getChunkCoverage, getDataCoverage
 from .util.chunkUtil import getChunkIdForPartition, getQueryDtype
-from .util.arrayUtil import jsonToArray, getShapeDims, getNumpyValue
+from .util.arrayUtil import jsonToArray, getNumpyValue
 from .util.arrayUtil import getNumElements, arrayToBytes, bytesToArray
 
 from . import config
@@ -50,7 +50,7 @@ def getFillValue(dset_json):
       If no fill value is defined, return an zero array of given type """
 
     # NOTE - this is copy of the function in dset_lib, but needed to put
-    # here to avoid circular dependency
+    # here to avoid a circular dependency
 
     fill_value = None
     type_json = dset_json["type"]
@@ -60,7 +60,7 @@ def getFillValue(dset_json):
         cprops = dset_json["creationProperties"]
         if "fillValue" in cprops:
             fill_value_prop = cprops["fillValue"]
-            log.debug(f"got fo;;+value_prop: {fill_value_prop}")
+            log.debug(f"got fill_value_prop: {fill_value_prop}")
             encoding = cprops.get("fillValue_encoding")
             fill_value = getNumpyValue(fill_value_prop, dt=dt, encoding=encoding)
     if fill_value:
@@ -179,18 +179,6 @@ async def read_chunk_hyperslab(
         return
 
     msg = f"read_chunk_hyperslab, chunk_id: {chunk_id},"
-    """
-    msg += " slices: ["
-    for s in slices:
-        if isinstance(s, slice):
-            msg += f"{s},"
-        else:
-            if len(s) > 5:
-                # avoid large output lines
-                msg += f"[{s[0]}, {s[1]}, ..., {s[-2]}, {s[-1]}],"
-            else:
-                msg += f"{s},"
-    """
     msg += f" bucket: {bucket}"
     if query is not None:
         msg += f" query: {query} limit: {limit}"
diff --git a/hsds/chunk_dn.py b/hsds/chunk_dn.py
index 329f772a..4f3da7f7 100644
--- a/hsds/chunk_dn.py
+++ b/hsds/chunk_dn.py
@@ -20,11 +20,11 @@
 from aiohttp.web import json_response, StreamResponse
 
 from .util.httpUtil import request_read, getContentType
-from .util.arrayUtil import bytesToArray, arrayToBytes, getShapeDims, getBroadcastShape
+from .util.arrayUtil import bytesToArray, arrayToBytes, getBroadcastShape
 from .util.idUtil import getS3Key, validateInPartition, isValidUuid
 from .util.storUtil import isStorObj, deleteStorObj
 from .util.hdf5dtype import createDataType
-from .util.dsetUtil import getSelectionList, getChunkLayout
+from .util.dsetUtil import getSelectionList, getChunkLayout, getShapeDims
 from .util.dsetUtil import getSelectionShape, getChunkInitializer
 from .util.chunkUtil import getChunkIndex, getDatasetId, chunkQuery
 from .util.chunkUtil import chunkWriteSelection, chunkReadSelection
diff --git a/hsds/chunk_sn.py b/hsds/chunk_sn.py
index b0bf8dff..3a6eb4dd 100755
--- a/hsds/chunk_sn.py
+++ b/hsds/chunk_sn.py
@@ -30,29 +30,21 @@
 from .util.domainUtil import getDomainFromRequest, isValidDomain
 from .util.domainUtil import getBucketForDomain
 from .util.hdf5dtype import getItemSize, createDataType
-from .util.dsetUtil import getSelectionList, isNullSpace, getDatasetLayout, getDatasetLayoutClass
+from .util.dsetUtil import isNullSpace, get_slices, getShapeDims
 from .util.dsetUtil import isExtensible, getSelectionPagination
 from .util.dsetUtil import getSelectionShape, getDsetMaxDims, getChunkLayout
 from .util.chunkUtil import getNumChunks, getChunkIds, getChunkId
-from .util.chunkUtil import getChunkIndex, getChunkSuffix
-from .util.chunkUtil import getChunkCoverage, getDataCoverage
-from .util.chunkUtil import getQueryDtype, get_chunktable_dims
-from .util.arrayUtil import bytesArrayToList, jsonToArray, getShapeDims
+from .util.arrayUtil import bytesArrayToList, jsonToArray
 from .util.arrayUtil import getNumElements, arrayToBytes, bytesToArray
 from .util.arrayUtil import squeezeArray, getBroadcastShape
 from .util.authUtil import getUserPasswordFromRequest, validateUserPassword
 from .util.boolparser import BooleanParser
 from .servicenode_lib import getDsetJson, validateAction
-from .dset_lib import getFillValue
+from .dset_lib import getSelectionData
 from .chunk_crawl import ChunkCrawler
 from . import config
 from . import hsds_logger as log
 
-CHUNK_REF_LAYOUTS = (
-    "H5D_CONTIGUOUS_REF",
-    "H5D_CHUNKED_REF",
-    "H5D_CHUNKED_REF_INDIRECT",
-)
 
 VARIABLE_AVG_ITEM_SIZE = 512  # guess at avg variable type length
 
@@ -73,32 +65,6 @@ def get_hrefs(request, dset_json):
     return hrefs
 
 
-def get_slices(app, select, dset_json):
-    """Get desired slices from selection query param string or json value.
-    If select is none or empty, slices for entire datashape will be
-    returned.
-    Refretch dims if the dataset is extensible
-    """
-
-    dset_id = dset_json["id"]
-    datashape = dset_json["shape"]
-    if datashape["class"] == "H5S_NULL":
-        msg = "Null space datasets can not be used as target for GET value"
-        log.warn(msg)
-        raise HTTPBadRequest(reason=msg)
-
-    dims = getShapeDims(datashape)  # throws 400 for HS_NULL dsets
-
-    try:
-        slices = getSelectionList(select, dims)
-    except ValueError:
-        msg = f"Invalid selection: {select} on dims: {dims} "
-        msg += f"for dataset: {dset_id}"
-        log.warn(msg)
-        raise HTTPBadRequest(reason=msg)
-    return slices
-
-
 def use_http_streaming(request, rank):
     """ return boolean indicating whether http streaming should be used """
     if rank == 0:
@@ -110,283 +76,6 @@ def use_http_streaming(request, rank):
     return True
 
 
-async def getChunkLocations(app, dset_id, dset_json, chunkinfo_map, chunk_ids, bucket=None):
-    """
-    Get info for chunk locations (for reference layouts)
-    """
-    layout_class = getDatasetLayoutClass(dset_json)
-
-    if layout_class not in CHUNK_REF_LAYOUTS:
-        msg = f"skip getChunkLocations for layout class: {layout_class}"
-        log.debug(msg)
-        return
-
-    chunk_dims = None
-    if "layout" in dset_json:
-        dset_layout = dset_json["layout"]
-        log.debug(f"dset_json layout: {dset_layout}")
-        if "dims" in dset_layout:
-            chunk_dims = dset_layout["dims"]
-    if chunk_dims is None:
-        msg = "no chunk dimensions set in dataset layout"
-        log.error(msg)
-        raise HTTPInternalServerError()
-
-    datashape = dset_json["shape"]
-    datatype = dset_json["type"]
-    if isNullSpace(dset_json):
-        log.error("H5S_NULL shape class used with reference chunk layout")
-        raise HTTPInternalServerError()
-    dims = getShapeDims(datashape)
-    rank = len(dims)
-    # chunk_ids = list(chunkinfo_map.keys())
-    # chunk_ids.sort()
-    num_chunks = len(chunk_ids)
-    msg = f"getChunkLocations for dset: {dset_id} bucket: {bucket} "
-    msg += f"rank: {rank} num chunk_ids: {num_chunks}"
-    log.info(msg)
-    log.debug(f"getChunkLocations layout: {layout_class}")
-
-    def getChunkItem(chunkid):
-        if chunk_id in chunkinfo_map:
-            chunk_item = chunkinfo_map[chunk_id]
-        else:
-            chunk_item = {}
-            chunkinfo_map[chunk_id] = chunk_item
-        return chunk_item
-
-    if layout_class == "H5D_CONTIGUOUS_REF":
-        layout = getDatasetLayout(dset_json)
-        log.debug(f"cpl layout: {layout}")
-        s3path = layout["file_uri"]
-        s3size = layout["size"]
-        if s3size == 0:
-            msg = "getChunkLocations - H5D_CONTIGUOUS_REF layout size 0, "
-            msg += "no allocation"
-            log.info(msg)
-            return
-        item_size = getItemSize(datatype)
-        chunk_size = item_size
-        for dim in chunk_dims:
-            chunk_size *= dim
-        log.debug(f"using chunk_size: {chunk_size} for H5D_CONTIGUOUS_REF")
-
-        for chunk_id in chunk_ids:
-            log.debug(f"getChunkLocations - getting data for chunk: {chunk_id}")
-            chunk_item = getChunkItem(chunk_id)
-            chunk_index = getChunkIndex(chunk_id)
-            if len(chunk_index) != rank:
-                log.error("Unexpected chunk_index")
-                raise HTTPInternalServerError()
-            extent = item_size
-            if "offset" not in layout:
-                msg = "getChunkLocations - expected to find offset in chunk "
-                msg += "layout for H5D_CONTIGUOUS_REF"
-                log.error(msg)
-                continue
-            s3offset = layout["offset"]
-            if not isinstance(s3offset, int):
-                msg = "getChunkLocations - expected offset to be an int but "
-                msg += f"got: {s3offset}"
-                log.error(msg)
-                continue
-            log.debug(f"getChunkLocations s3offset: {s3offset}")
-            for i in range(rank):
-                dim = rank - i - 1
-                index = chunk_index[dim]
-                s3offset += index * chunk_dims[dim] * extent
-                extent *= dims[dim]
-            msg = f"setting chunk_info_map to s3offset: {s3offset} "
-            msg == f"s3size: {s3size} for chunk_id: {chunk_id}"
-            log.debug(msg)
-            if s3offset > layout["offset"] + layout["size"]:
-                msg = f"range get of s3offset: {s3offset} s3size: {s3size} "
-                msg += "extends beyond end of contiguous dataset for "
-                msg += f"chunk_id: {chunk_id}"
-                log.warn(msg)
-            chunk_item["s3path"] = s3path
-            chunk_item["s3offset"] = s3offset
-            chunk_item["s3size"] = chunk_size
-    elif layout_class == "H5D_CHUNKED_REF":
-        layout = getDatasetLayout(dset_json)
-        log.debug(f"cpl layout: {layout}")
-        s3path = layout["file_uri"]
-        chunks = layout["chunks"]
-
-        for chunk_id in chunk_ids:
-            chunk_item = getChunkItem(chunk_id)
-            s3offset = 0
-            s3size = 0
-            chunk_key = getChunkSuffix(chunk_id)
-            if chunk_key in chunks:
-                item = chunks[chunk_key]
-                s3offset = item[0]
-                s3size = item[1]
-            chunk_item["s3path"] = s3path
-            chunk_item["s3offset"] = s3offset
-            chunk_item["s3size"] = s3size
-
-    elif layout_class == "H5D_CHUNKED_REF_INDIRECT":
-        layout = getDatasetLayout(dset_json)
-        log.debug(f"cpl layout: {layout}")
-        if "chunk_table" not in layout:
-            log.error("Expected to find chunk_table in dataset layout")
-            raise HTTPInternalServerError()
-        chunktable_id = layout["chunk_table"]
-        # get  state for dataset from DN.
-        chunktable_json = await getDsetJson(app, chunktable_id, bucket=bucket)
-        # log.debug(f"chunktable_json: {chunktable_json}")
-        chunktable_dims = getShapeDims(chunktable_json["shape"])
-        chunktable_layout = chunktable_json["layout"]
-        if chunktable_layout.get("class") == "H5D_CHUNKED_REF_INDIRECT":
-            # We don't support recursive chunked_ref_indirect classes
-            msg = "chunktable layout: H5D_CHUNKED_REF_INDIRECT is invalid"
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
-
-        if len(chunktable_dims) != rank:
-            msg = "Rank of chunktable should be same as the dataset"
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
-
-        # convert the list of chunk_ids into a set of points to query in
-        # the chunk table
-        log.debug(f"datashape: {dims}")
-        log.debug(f"chunk_dims: {chunk_dims}")
-        log.debug(f"chunktable_dims: {chunktable_dims}")
-        default_chunktable_dims = get_chunktable_dims(dims, chunk_dims)
-        log.debug(f"default_chunktable_dims: {default_chunktable_dims}")
-        table_factors = []
-        if "hyper_dims" in layout:
-            hyper_dims = layout["hyper_dims"]
-        else:
-            # assume 1 to 1 matching
-            hyper_dims = chunk_dims
-        ref_num_chunks = num_chunks
-        for dim in range(rank):
-            if chunk_dims[dim] % hyper_dims[dim] != 0:
-                msg = f"expected hyper_dims [{hyper_dims[dim]}] to be a factor"
-                msg += f" of {chunk_dims[dim]}"
-                log.warn(msg)
-                raise HTTPBadRequest(reason=msg)
-            factor = chunk_dims[dim] // hyper_dims[dim]
-            table_factors.append(factor)
-            ref_num_chunks *= factor
-        log.debug(f"table_factors: {table_factors}")
-        log.debug(f"ref_num_chunks: {ref_num_chunks}")
-        log.debug(f"hyper_dims: {hyper_dims}")
-
-        if rank == 1:
-            arr_points = np.zeros((ref_num_chunks,), dtype=np.dtype("u8"))
-            table_factor = table_factors[0]
-            for i in range(num_chunks):
-                chunk_id = chunk_ids[i]
-                log.debug(f"chunk_id: {chunk_id}")
-                chunk_index = getChunkIndex(chunk_id)
-                chunk_index = chunk_index[0]
-                log.debug(f"chunk_index: {chunk_index}")
-                for j in range(table_factor):
-                    index = chunk_index * table_factor + j
-                    arr_index = i * table_factor + j
-                    arr_points[arr_index] = index
-        else:
-            if ref_num_chunks != num_chunks:
-                msg = "hyperchunks not supported for multidimensional datasets"
-                log.warn(msg)
-                raise HTTPBadRequest(msg=msg)
-            arr_points = np.zeros((num_chunks, rank), dtype=np.dtype("u8"))
-            for i in range(num_chunks):
-                chunk_id = chunk_ids[i]
-                log.debug(f"chunk_id for chunktable: {chunk_id}")
-                indx = getChunkIndex(chunk_id)
-                log.debug(f"get chunk indx: {indx}")
-                arr_points[i] = indx
-
-        msg = f"got chunktable points: {arr_points}, calling getSelectionData"
-        log.debug(msg)
-        # this call won't lead to a circular loop of calls since we've checked
-        # that the chunktable layout is not H5D_CHUNKED_REF_INDIRECT
-        kwargs = {"points": arr_points, "bucket": bucket}
-        point_data = await getSelectionData(app, chunktable_id, chunktable_json, **kwargs)
-
-        log.debug(f"got chunktable data: {point_data}")
-        if "file_uri" in layout:
-            s3_layout_path = layout["file_uri"]
-            log.debug(f"got s3_layout_path: {s3_layout_path}")
-        else:
-            s3_layout_path = None
-
-        for i in range(num_chunks):
-            chunk_id = chunk_ids[i]
-            chunk_item = getChunkItem(chunk_id)
-            item = point_data[i]
-            if s3_layout_path is None:
-                if len(item) < 3:
-                    msg = "expected chunk table to have three fields"
-                    log.warn(msg)
-                    raise HTTPBadRequest(reason=msg)
-                e = item[2]
-                if e:
-                    s3path = e.decode("utf-8")
-                    log.debug(f"got s3path: {s3path}")
-            else:
-                s3path = s3_layout_path
-            chunk_item["s3path"] = s3path
-
-            if ref_num_chunks == num_chunks:
-                item = point_data[i]
-                s3offset = int(item[0])
-                s3size = int(item[1])
-                chunk_item["s3offset"] = s3offset
-                chunk_item["s3size"] = s3size
-            else:
-                factor = ref_num_chunks // num_chunks
-                s3offsets = []
-                s3sizes = []
-                for j in range(factor):
-                    item = point_data[i * factor + j]
-                    s3offset = int(item[0])
-                    s3offsets.append(s3offset)
-                    s3size = int(item[1])
-                    s3sizes.append(s3size)
-                chunk_item["s3offset"] = s3offsets
-                chunk_item["s3size"] = s3sizes
-                chunk_item["hyper_dims"] = hyper_dims
-
-    else:
-        log.error(f"Unexpected chunk layout: {layout['class']}")
-        raise HTTPInternalServerError()
-
-    log.debug(f"returning chunkinfo_map: {chunkinfo_map}")
-    return chunkinfo_map
-
-
-def get_chunk_selections(chunk_map, chunk_ids, slices, dset_json):
-    """Update chunk_map with chunk and data selections for the
-    given set of slices
-    """
-    log.debug(f"get_chunk_selections - chunk_ids: {chunk_ids}")
-    if not slices:
-        log.debug("no slices set, returning")
-        return  # nothing to do
-    log.debug(f"slices: {slices}")
-    layout = getChunkLayout(dset_json)
-    for chunk_id in chunk_ids:
-        if chunk_id in chunk_map:
-            item = chunk_map[chunk_id]
-        else:
-            item = {}
-            chunk_map[chunk_id] = item
-
-        chunk_sel = getChunkCoverage(chunk_id, slices, layout)
-        log.debug(
-            f"get_chunk_selections - chunk_id: {chunk_id}, chunk_sel: {chunk_sel}"
-        )
-        item["chunk_sel"] = chunk_sel
-        data_sel = getDataCoverage(chunk_id, slices, layout)
-        log.debug(f"get_chunk_selections - data_sel: {data_sel}")
-        item["data_sel"] = data_sel
-
 
 async def PUT_Value(request):
     """
@@ -547,7 +236,12 @@ async def PUT_Value(request):
                 raise HTTPBadRequest(reason=msg)
 
         select = params.get("select")
-        slices = get_slices(app, select, dset_json)
+        try:
+            slices = get_slices(select, dset_json)
+        except ValueError as ve:
+            log.warn(f"Invalid selection: {ve}")
+            raise HTTPBadRequest(reason="Invalid selection")
+        
         if "Limit" in params:
             try:
                 limit = int(params["Limit"])
@@ -709,11 +403,15 @@ async def PUT_Value(request):
         np_shape = tuple(np_shape)
 
     elif points is None:
-        if body and "start" in body and "stop" in body:
-            slices = get_slices(app, body, dset_json)
-        else:
-            select = params.get("select")
-            slices = get_slices(app, select, dset_json)
+        try:
+            if body and "start" in body and "stop" in body:
+                slices = get_slices(body, dset_json)
+            else:
+                select = params.get("select")
+                slices = get_slices(select, dset_json)
+        except ValueError as ve:
+            log.warn(f"Invalid Selection: {ve}")
+            raise HTTPBadRequest(reason="Invalid Selection")
 
         # The selection parameters will determine expected put value shape
         log.debug(f"PUT Value selection: {slices}")
@@ -1110,7 +808,12 @@ async def GET_Value(request):
     select = params.get("select")
     if select:
         log.debug(f"select query param: {select}")
-    slices = get_slices(app, select, dset_json)
+    try:
+        slices = get_slices(select, dset_json)
+    except ValueError as ve:
+        log.warn(f"Invalid selection: {ve}")
+        raise HTTPBadRequest(reason="Invalid selection")
+    
     log.debug(f"GET Value selection: {slices}")
 
     limit = 0
@@ -1347,223 +1050,6 @@ async def GET_Value(request):
     return resp
 
 
-async def doReadSelection(
-    app,
-    chunk_ids,
-    dset_json,
-    slices=None,
-    points=None,
-    query=None,
-    query_update=None,
-    chunk_map=None,
-    bucket=None,
-    limit=0,
-):
-    """read selection utility function"""
-    log.info(f"doReadSelection - number of chunk_ids: {len(chunk_ids)}")
-    log.debug(f"doReadSelection - chunk_ids: {chunk_ids}")
-
-    type_json = dset_json["type"]
-    item_size = getItemSize(type_json)
-    log.debug(f"item size: {item_size}")
-    dset_dtype = createDataType(type_json)  # np datatype
-    if query is None:
-        query_dtype = None
-    else:
-        log.debug(f"query: {query} limit: {limit}")
-        query_dtype = getQueryDtype(dset_dtype)
-
-    # create array to hold response data
-    arr = None
-
-    if points is not None:
-        # point selection
-        np_shape = [
-            len(points),
-        ]
-    elif query is not None:
-        # return shape will be determined by number of matches
-        np_shape = None
-    elif slices is not None:
-        log.debug(f"get np_shape for slices: {slices}")
-        np_shape = getSelectionShape(slices)
-    else:
-        log.error("doReadSelection - expected points or slices to be set")
-        raise HTTPInternalServerError()
-    log.debug(f"selection shape: {np_shape}")
-
-    if np_shape is not None:
-        # check that the array size is reasonable
-        request_size = math.prod(np_shape)
-        if item_size == "H5T_VARIABLE":
-            request_size *= 512  # random guess of avg item_size
-        else:
-            request_size *= item_size
-            log.debug(f"request_size: {request_size}")
-        max_request_size = int(config.get("max_request_size"))
-        if request_size >= max_request_size:
-            msg = f"Attempting to fetch {request_size} bytes (greater than "
-            msg += f"{max_request_size} limit"
-            log.error(msg)
-            raise HTTPBadRequest(reason=msg)
-
-        # initialize to fill_value if specified
-        fill_value = getFillValue(dset_json)
-
-        if fill_value:
-            arr = np.empty(np_shape, dtype=dset_dtype, order="C")
-            arr[...] = fill_value
-        else:
-            arr = np.zeros(np_shape, dtype=dset_dtype, order="C")
-
-    crawler = ChunkCrawler(
-        app,
-        chunk_ids,
-        dset_json=dset_json,
-        chunk_map=chunk_map,
-        bucket=bucket,
-        slices=slices,
-        query=query,
-        query_update=query_update,
-        limit=limit,
-        arr=arr,
-        action="read_chunk_hyperslab",
-    )
-    await crawler.crawl()
-
-    crawler_status = crawler.get_status()
-
-    log.info(f"doReadSelection complete - status:  {crawler_status}")
-    if crawler_status == 400:
-        log.info(f"doReadSelection raising BadRequest error:  {crawler_status}")
-        raise HTTPBadRequest()
-    if crawler_status not in (200, 201):
-        log.info(
-            f"doReadSelection raising HTTPInternalServerError for status:  {crawler_status}"
-        )
-        raise HTTPInternalServerError()
-
-    if query is not None:
-        # combine chunk responses and return
-        if limit > 0 and crawler._hits > limit:
-            nrows = limit
-        else:
-            nrows = crawler._hits
-        arr = np.empty((nrows,), dtype=query_dtype)
-        start = 0
-        for chunkid in chunk_ids:
-            if chunkid not in chunk_map:
-                continue
-            chunk_item = chunk_map[chunkid]
-            if "query_rsp" not in chunk_item:
-                continue
-            query_rsp = chunk_item["query_rsp"]
-            if len(query_rsp) == 0:
-                continue
-            stop = start + len(query_rsp)
-            if stop > nrows:
-                rsp_stop = len(query_rsp) - (stop - nrows)
-                arr[start:] = query_rsp[0:rsp_stop]
-            else:
-                arr[start:stop] = query_rsp[:]
-            start = stop
-            if start >= nrows:
-                log.debug(f"got {nrows} rows for query, quitting")
-                break
-    return arr
-
-
-async def getSelectionData(
-    app,
-    dset_id,
-    dset_json,
-    slices=None,
-    points=None,
-    query=None,
-    query_update=None,
-    bucket=None,
-    limit=0,
-    method="GET",
-):
-    """Read selected slices and return numpy array"""
-    log.debug("getSelectionData")
-    if slices is None and points is None:
-        log.error("getSelectionData - expected either slices or points to be set")
-        raise HTTPInternalServerError()
-
-    layout = getChunkLayout(dset_json)
-
-    chunkinfo = {}
-
-    if slices is not None:
-        num_chunks = getNumChunks(slices, layout)
-        log.debug(f"num_chunks: {num_chunks}")
-
-        max_chunks = int(config.get("max_chunks_per_request", default=1000))
-        if num_chunks > max_chunks:
-            msg = f"num_chunks over {max_chunks} limit, but will attempt to fetch with crawler"
-            log.warn(msg)
-
-        chunk_ids = getChunkIds(dset_id, slices, layout)
-    else:
-        # points - already checked it is not None
-        num_points = len(points)
-        chunk_ids = []
-        for pt_indx in range(num_points):
-            point = points[pt_indx]
-            chunk_id = getChunkId(dset_id, point, layout)
-            if chunk_id in chunkinfo:
-                chunk_entry = chunkinfo[chunk_id]
-            else:
-                chunk_entry = {}
-                chunkinfo[chunk_id] = chunk_entry
-                chunk_ids.append(chunk_id)
-            if "points" in chunk_entry:
-                point_list = chunk_entry["points"]
-            else:
-                point_list = []
-                chunk_entry["points"] = point_list
-            if "indices" in chunk_entry:
-                point_index = chunk_entry["indices"]
-            else:
-                point_index = []
-                chunk_entry["indices"] = point_index
-
-            point_list.append(point)
-            point_index.append(pt_indx)
-
-    # Get information about where chunks are located
-    #   Will be None except for H5D_CHUNKED_REF_INDIRECT type
-    await getChunkLocations(app, dset_id, dset_json, chunkinfo, chunk_ids, bucket=bucket)
-
-    if slices is None:
-        slices = get_slices(app, None, dset_json)
-
-    if points is None:
-        # get chunk selections for hyperslab select
-        get_chunk_selections(chunkinfo, chunk_ids, slices, dset_json)
-
-    log.debug(f"chunkinfo_map: {chunkinfo}")
-
-    if method == "OPTIONS":
-        # skip doing any big data load for options request
-        return None
-
-    arr = await doReadSelection(
-        app,
-        chunk_ids,
-        dset_json,
-        slices=slices,
-        points=points,
-        query=query,
-        query_update=query_update,
-        limit=limit,
-        chunk_map=chunkinfo,
-        bucket=bucket,
-    )
-
-    return arr
-
 
 async def POST_Value(request):
     """
@@ -1659,7 +1145,11 @@ async def POST_Value(request):
         elif "select" in body:
             select = body["select"]
             log.debug(f"select: {select}")
-            slices = get_slices(app, select, dset_json)
+            try:
+                slices = get_slices(select, dset_json)
+            except ValueError as ve:
+                log.warn(f"Invalid selection: {ve}")
+                raise HTTPBadRequest(reason="Invalid selection")
             log.debug(f"got slices: {slices}")
         else:
             msg = "Expected points or select key in request body"
diff --git a/hsds/datanode_lib.py b/hsds/datanode_lib.py
index 36d29ae3..d365ce2a 100644
--- a/hsds/datanode_lib.py
+++ b/hsds/datanode_lib.py
@@ -29,10 +29,10 @@
 from .util.domainUtil import isValidDomain, getBucketForDomain
 from .util.attrUtil import getRequestCollectionName
 from .util.httpUtil import http_post
-from .util.dsetUtil import getChunkLayout, getFilterOps
+from .util.dsetUtil import getChunkLayout, getFilterOps, getShapeDims
 from .util.dsetUtil import getChunkInitializer, getSliceQueryParam
 from .util.chunkUtil import getDatasetId, getChunkSelection, getChunkIndex
-from .util.arrayUtil import arrayToBytes, bytesToArray, getShapeDims, jsonToArray
+from .util.arrayUtil import arrayToBytes, bytesToArray, jsonToArray
 from .util.hdf5dtype import createDataType, getItemSize
 from .util.rangegetUtil import ChunkLocation, chunkMunge
 
diff --git a/hsds/dset_lib.py b/hsds/dset_lib.py
index 7666a94a..36e03989 100755
--- a/hsds/dset_lib.py
+++ b/hsds/dset_lib.py
@@ -11,23 +11,36 @@
 ##############################################################################
 
 import asyncio
+import math
 import numpy as np
 
 from aiohttp.client_exceptions import ClientError
-
-from .util.hdf5dtype import createDataType
+from aiohttp.web_exceptions import HTTPBadRequest, HTTPInternalServerError
+from .util.hdf5dtype import createDataType, getItemSize
 from .util.arrayUtil import getNumpyValue
-from .util.dsetUtil import getChunkLayout
-from .util.chunkUtil import getChunkCoordinate
+from .util.dsetUtil import isNullSpace, getDatasetLayout, getDatasetLayoutClass
+from .util.dsetUtil import getChunkLayout, getSelectionShape, getShapeDims, get_slices
+from .util.chunkUtil import getChunkCoordinate, getChunkIndex, getChunkSuffix
+from .util.chunkUtil import getNumChunks, getChunkIds, getChunkId
+from .util.chunkUtil import getChunkCoverage, getDataCoverage
+from .util.chunkUtil import getQueryDtype, get_chunktable_dims
+
 from .util.idUtil import getDataNodeUrl, isSchema2Id, getS3Key, getObjId
 from .util.storUtil import getStorKeys
 from .util.httpUtil import http_delete
 
-from . import hsds_logger as log
-from . import config
+from .servicenode_lib import getDsetJson
 from .chunk_crawl import ChunkCrawler
+from . import config
+from . import hsds_logger as log
 
 
+CHUNK_REF_LAYOUTS = (
+    "H5D_CONTIGUOUS_REF",
+    "H5D_CHUNKED_REF",
+    "H5D_CHUNKED_REF_INDIRECT",
+)
+
 def getFillValue(dset_json):
     """ Return the fill value of the given dataset as a numpy array.
       If no fill value is defined, return an zero array of given type """
@@ -40,7 +53,7 @@ def getFillValue(dset_json):
         cprops = dset_json["creationProperties"]
         if "fillValue" in cprops:
             fill_value_prop = cprops["fillValue"]
-            log.debug(f"got fo;;+value_prop: {fill_value_prop}")
+            log.debug(f"got fill_value_prop: {fill_value_prop}")
             encoding = cprops.get("fillValue_encoding")
             fill_value = getNumpyValue(fill_value_prop, dt=dt, encoding=encoding)
     if fill_value:
@@ -52,6 +65,530 @@ def getFillValue(dset_json):
     return arr
 
 
+async def getChunkLocations(app, dset_id, dset_json, chunkinfo_map, chunk_ids, bucket=None):
+    """
+    Get info for chunk locations (for reference layouts)
+    """
+    layout_class = getDatasetLayoutClass(dset_json)
+
+    if layout_class not in CHUNK_REF_LAYOUTS:
+        msg = f"skip getChunkLocations for layout class: {layout_class}"
+        log.debug(msg)
+        return
+
+    chunk_dims = None
+    if "layout" in dset_json:
+        dset_layout = dset_json["layout"]
+        log.debug(f"dset_json layout: {dset_layout}")
+        if "dims" in dset_layout:
+            chunk_dims = dset_layout["dims"]
+    if chunk_dims is None:
+        msg = "no chunk dimensions set in dataset layout"
+        log.error(msg)
+        raise HTTPInternalServerError()
+
+    datashape = dset_json["shape"]
+    datatype = dset_json["type"]
+    if isNullSpace(dset_json):
+        log.error("H5S_NULL shape class used with reference chunk layout")
+        raise HTTPInternalServerError()
+    dims = getShapeDims(datashape)
+    rank = len(dims)
+    # chunk_ids = list(chunkinfo_map.keys())
+    # chunk_ids.sort()
+    num_chunks = len(chunk_ids)
+    msg = f"getChunkLocations for dset: {dset_id} bucket: {bucket} "
+    msg += f"rank: {rank} num chunk_ids: {num_chunks}"
+    log.info(msg)
+    log.debug(f"getChunkLocations layout: {layout_class}")
+
+    def getChunkItem(chunkid):
+        if chunk_id in chunkinfo_map:
+            chunk_item = chunkinfo_map[chunk_id]
+        else:
+            chunk_item = {}
+            chunkinfo_map[chunk_id] = chunk_item
+        return chunk_item
+
+    if layout_class == "H5D_CONTIGUOUS_REF":
+        layout = getDatasetLayout(dset_json)
+        log.debug(f"cpl layout: {layout}")
+        s3path = layout["file_uri"]
+        s3size = layout["size"]
+        if s3size == 0:
+            msg = "getChunkLocations - H5D_CONTIGUOUS_REF layout size 0, "
+            msg += "no allocation"
+            log.info(msg)
+            return
+        item_size = getItemSize(datatype)
+        chunk_size = item_size
+        for dim in chunk_dims:
+            chunk_size *= dim
+        log.debug(f"using chunk_size: {chunk_size} for H5D_CONTIGUOUS_REF")
+
+        for chunk_id in chunk_ids:
+            log.debug(f"getChunkLocations - getting data for chunk: {chunk_id}")
+            chunk_item = getChunkItem(chunk_id)
+            chunk_index = getChunkIndex(chunk_id)
+            if len(chunk_index) != rank:
+                log.error("Unexpected chunk_index")
+                raise HTTPInternalServerError()
+            extent = item_size
+            if "offset" not in layout:
+                msg = "getChunkLocations - expected to find offset in chunk "
+                msg += "layout for H5D_CONTIGUOUS_REF"
+                log.error(msg)
+                continue
+            s3offset = layout["offset"]
+            if not isinstance(s3offset, int):
+                msg = "getChunkLocations - expected offset to be an int but "
+                msg += f"got: {s3offset}"
+                log.error(msg)
+                continue
+            log.debug(f"getChunkLocations s3offset: {s3offset}")
+            for i in range(rank):
+                dim = rank - i - 1
+                index = chunk_index[dim]
+                s3offset += index * chunk_dims[dim] * extent
+                extent *= dims[dim]
+            msg = f"setting chunk_info_map to s3offset: {s3offset} "
+            msg == f"s3size: {s3size} for chunk_id: {chunk_id}"
+            log.debug(msg)
+            if s3offset > layout["offset"] + layout["size"]:
+                msg = f"range get of s3offset: {s3offset} s3size: {s3size} "
+                msg += "extends beyond end of contiguous dataset for "
+                msg += f"chunk_id: {chunk_id}"
+                log.warn(msg)
+            chunk_item["s3path"] = s3path
+            chunk_item["s3offset"] = s3offset
+            chunk_item["s3size"] = chunk_size
+    elif layout_class == "H5D_CHUNKED_REF":
+        layout = getDatasetLayout(dset_json)
+        log.debug(f"cpl layout: {layout}")
+        s3path = layout["file_uri"]
+        chunks = layout["chunks"]
+
+        for chunk_id in chunk_ids:
+            chunk_item = getChunkItem(chunk_id)
+            s3offset = 0
+            s3size = 0
+            chunk_key = getChunkSuffix(chunk_id)
+            if chunk_key in chunks:
+                item = chunks[chunk_key]
+                s3offset = item[0]
+                s3size = item[1]
+            chunk_item["s3path"] = s3path
+            chunk_item["s3offset"] = s3offset
+            chunk_item["s3size"] = s3size
+
+    elif layout_class == "H5D_CHUNKED_REF_INDIRECT":
+        layout = getDatasetLayout(dset_json)
+        log.debug(f"cpl layout: {layout}")
+        if "chunk_table" not in layout:
+            log.error("Expected to find chunk_table in dataset layout")
+            raise HTTPInternalServerError()
+        chunktable_id = layout["chunk_table"]
+        # get  state for dataset from DN.
+        chunktable_json = await getDsetJson(app, chunktable_id, bucket=bucket)
+        # log.debug(f"chunktable_json: {chunktable_json}")
+        chunktable_dims = getShapeDims(chunktable_json["shape"])
+        chunktable_layout = chunktable_json["layout"]
+        if chunktable_layout.get("class") == "H5D_CHUNKED_REF_INDIRECT":
+            # We don't support recursive chunked_ref_indirect classes
+            msg = "chunktable layout: H5D_CHUNKED_REF_INDIRECT is invalid"
+            log.warn(msg)
+            raise HTTPBadRequest(reason=msg)
+
+        if len(chunktable_dims) != rank:
+            msg = "Rank of chunktable should be same as the dataset"
+            log.warn(msg)
+            raise HTTPBadRequest(reason=msg)
+
+        # convert the list of chunk_ids into a set of points to query in
+        # the chunk table
+        log.debug(f"datashape: {dims}")
+        log.debug(f"chunk_dims: {chunk_dims}")
+        log.debug(f"chunktable_dims: {chunktable_dims}")
+        default_chunktable_dims = get_chunktable_dims(dims, chunk_dims)
+        log.debug(f"default_chunktable_dims: {default_chunktable_dims}")
+        table_factors = []
+        if "hyper_dims" in layout:
+            hyper_dims = layout["hyper_dims"]
+        else:
+            # assume 1 to 1 matching
+            hyper_dims = chunk_dims
+        ref_num_chunks = num_chunks
+        for dim in range(rank):
+            if chunk_dims[dim] % hyper_dims[dim] != 0:
+                msg = f"expected hyper_dims [{hyper_dims[dim]}] to be a factor"
+                msg += f" of {chunk_dims[dim]}"
+                log.warn(msg)
+                raise HTTPBadRequest(reason=msg)
+            factor = chunk_dims[dim] // hyper_dims[dim]
+            table_factors.append(factor)
+            ref_num_chunks *= factor
+        log.debug(f"table_factors: {table_factors}")
+        log.debug(f"ref_num_chunks: {ref_num_chunks}")
+        log.debug(f"hyper_dims: {hyper_dims}")
+
+        if rank == 1:
+            arr_points = np.zeros((ref_num_chunks,), dtype=np.dtype("u8"))
+            table_factor = table_factors[0]
+            for i in range(num_chunks):
+                chunk_id = chunk_ids[i]
+                log.debug(f"chunk_id: {chunk_id}")
+                chunk_index = getChunkIndex(chunk_id)
+                chunk_index = chunk_index[0]
+                log.debug(f"chunk_index: {chunk_index}")
+                for j in range(table_factor):
+                    index = chunk_index * table_factor + j
+                    arr_index = i * table_factor + j
+                    arr_points[arr_index] = index
+        else:
+            if ref_num_chunks != num_chunks:
+                msg = "hyperchunks not supported for multidimensional datasets"
+                log.warn(msg)
+                raise HTTPBadRequest(msg=msg)
+            arr_points = np.zeros((num_chunks, rank), dtype=np.dtype("u8"))
+            for i in range(num_chunks):
+                chunk_id = chunk_ids[i]
+                log.debug(f"chunk_id for chunktable: {chunk_id}")
+                indx = getChunkIndex(chunk_id)
+                log.debug(f"get chunk indx: {indx}")
+                arr_points[i] = indx
+
+        msg = f"got chunktable points: {arr_points}, calling getSelectionData"
+        log.debug(msg)
+        # this call won't lead to a circular loop of calls since we've checked
+        # that the chunktable layout is not H5D_CHUNKED_REF_INDIRECT
+        kwargs = {"points": arr_points, "bucket": bucket}
+        point_data = await getSelectionData(app, chunktable_id, chunktable_json, **kwargs)
+
+        log.debug(f"got chunktable data: {point_data}")
+        if "file_uri" in layout:
+            s3_layout_path = layout["file_uri"]
+            log.debug(f"got s3_layout_path: {s3_layout_path}")
+        else:
+            s3_layout_path = None
+
+        for i in range(num_chunks):
+            chunk_id = chunk_ids[i]
+            chunk_item = getChunkItem(chunk_id)
+            item = point_data[i]
+            if s3_layout_path is None:
+                if len(item) < 3:
+                    msg = "expected chunk table to have three fields"
+                    log.warn(msg)
+                    raise HTTPBadRequest(reason=msg)
+                e = item[2]
+                if e:
+                    s3path = e.decode("utf-8")
+                    log.debug(f"got s3path: {s3path}")
+            else:
+                s3path = s3_layout_path
+            chunk_item["s3path"] = s3path
+
+            if ref_num_chunks == num_chunks:
+                item = point_data[i]
+                s3offset = int(item[0])
+                s3size = int(item[1])
+                chunk_item["s3offset"] = s3offset
+                chunk_item["s3size"] = s3size
+            else:
+                factor = ref_num_chunks // num_chunks
+                s3offsets = []
+                s3sizes = []
+                for j in range(factor):
+                    item = point_data[i * factor + j]
+                    s3offset = int(item[0])
+                    s3offsets.append(s3offset)
+                    s3size = int(item[1])
+                    s3sizes.append(s3size)
+                chunk_item["s3offset"] = s3offsets
+                chunk_item["s3size"] = s3sizes
+                chunk_item["hyper_dims"] = hyper_dims
+
+    else:
+        log.error(f"Unexpected chunk layout: {layout['class']}")
+        raise HTTPInternalServerError()
+
+    log.debug(f"returning chunkinfo_map: {chunkinfo_map}")
+    return chunkinfo_map
+
+
+
+def get_chunkmap_selections(chunk_map, chunk_ids, slices, dset_json):
+    """Update chunk_map with chunk and data selections for the
+    given set of slices
+    """
+    log.debug(f"get_chunkmap_selections - chunk_ids: {chunk_ids}")
+    if not slices:
+        log.debug("no slices set, returning")
+        return  # nothing to do
+    log.debug(f"slices: {slices}")
+    layout = getChunkLayout(dset_json)
+    for chunk_id in chunk_ids:
+        if chunk_id in chunk_map:
+            item = chunk_map[chunk_id]
+        else:
+            item = {}
+            chunk_map[chunk_id] = item
+
+        chunk_sel = getChunkCoverage(chunk_id, slices, layout)
+        log.debug(
+            f"get_chunk_selections - chunk_id: {chunk_id}, chunk_sel: {chunk_sel}"
+        )
+        item["chunk_sel"] = chunk_sel
+        data_sel = getDataCoverage(chunk_id, slices, layout)
+        log.debug(f"get_chunk_selections - data_sel: {data_sel}")
+        item["data_sel"] = data_sel
+
+
+def get_chunk_selections(chunk_map, chunk_ids, slices, dset_json):
+    """Update chunk_map with chunk and data selections for the
+    given set of slices
+    """
+    log.debug(f"get_chunk_selections - chunk_ids: {chunk_ids}")
+    if not slices:
+        log.debug("no slices set, returning")
+        return  # nothing to do
+    log.debug(f"slices: {slices}")
+    layout = getChunkLayout(dset_json)
+    for chunk_id in chunk_ids:
+        if chunk_id in chunk_map:
+            item = chunk_map[chunk_id]
+        else:
+            item = {}
+            chunk_map[chunk_id] = item
+
+        chunk_sel = getChunkCoverage(chunk_id, slices, layout)
+        log.debug(
+            f"get_chunk_selections - chunk_id: {chunk_id}, chunk_sel: {chunk_sel}"
+        )
+        item["chunk_sel"] = chunk_sel
+        data_sel = getDataCoverage(chunk_id, slices, layout)
+        log.debug(f"get_chunk_selections - data_sel: {data_sel}")
+        item["data_sel"] = data_sel
+
+async def getSelectionData(
+    app,
+    dset_id,
+    dset_json,
+    slices=None,
+    points=None,
+    query=None,
+    query_update=None,
+    bucket=None,
+    limit=0,
+    method="GET",
+):
+    """Read selected slices and return numpy array"""
+    log.debug("getSelectionData")
+    if slices is None and points is None:
+        log.error("getSelectionData - expected either slices or points to be set")
+        raise HTTPInternalServerError()
+
+    layout = getChunkLayout(dset_json)
+
+    chunkinfo = {}
+
+    if slices is not None:
+        num_chunks = getNumChunks(slices, layout)
+        log.debug(f"num_chunks: {num_chunks}")
+
+        max_chunks = int(config.get("max_chunks_per_request", default=1000))
+        if num_chunks > max_chunks:
+            msg = f"num_chunks over {max_chunks} limit, but will attempt to fetch with crawler"
+            log.warn(msg)
+
+        chunk_ids = getChunkIds(dset_id, slices, layout)
+    else:
+        # points - already checked it is not None
+        num_points = len(points)
+        chunk_ids = []
+        for pt_indx in range(num_points):
+            point = points[pt_indx]
+            chunk_id = getChunkId(dset_id, point, layout)
+            if chunk_id in chunkinfo:
+                chunk_entry = chunkinfo[chunk_id]
+            else:
+                chunk_entry = {}
+                chunkinfo[chunk_id] = chunk_entry
+                chunk_ids.append(chunk_id)
+            if "points" in chunk_entry:
+                point_list = chunk_entry["points"]
+            else:
+                point_list = []
+                chunk_entry["points"] = point_list
+            if "indices" in chunk_entry:
+                point_index = chunk_entry["indices"]
+            else:
+                point_index = []
+                chunk_entry["indices"] = point_index
+
+            point_list.append(point)
+            point_index.append(pt_indx)
+
+    # Get information about where chunks are located
+    #   Will be None except for H5D_CHUNKED_REF_INDIRECT type
+    await getChunkLocations(app, dset_id, dset_json, chunkinfo, chunk_ids, bucket=bucket)
+
+    if slices is None:
+        slices = get_slices(None, dset_json)
+
+    if points is None:
+        # get chunk selections for hyperslab select
+        get_chunk_selections(chunkinfo, chunk_ids, slices, dset_json)
+
+    log.debug(f"chunkinfo_map: {chunkinfo}")
+
+    if method == "OPTIONS":
+        # skip doing any big data load for options request
+        return None
+
+    arr = await doReadSelection(
+        app,
+        chunk_ids,
+        dset_json,
+        slices=slices,
+        points=points,
+        query=query,
+        query_update=query_update,
+        limit=limit,
+        chunk_map=chunkinfo,
+        bucket=bucket,
+    )
+
+    return arr
+
+
+async def doReadSelection(
+    app,
+    chunk_ids,
+    dset_json,
+    slices=None,
+    points=None,
+    query=None,
+    query_update=None,
+    chunk_map=None,
+    bucket=None,
+    limit=0,
+):
+    """read selection utility function"""
+    log.info(f"doReadSelection - number of chunk_ids: {len(chunk_ids)}")
+    log.debug(f"doReadSelection - chunk_ids: {chunk_ids}")
+
+    type_json = dset_json["type"]
+    item_size = getItemSize(type_json)
+    log.debug(f"item size: {item_size}")
+    dset_dtype = createDataType(type_json)  # np datatype
+    if query is None:
+        query_dtype = None
+    else:
+        log.debug(f"query: {query} limit: {limit}")
+        query_dtype = getQueryDtype(dset_dtype)
+
+    # create array to hold response data
+    arr = None
+
+    if points is not None:
+        # point selection
+        np_shape = [
+            len(points),
+        ]
+    elif query is not None:
+        # return shape will be determined by number of matches
+        np_shape = None
+    elif slices is not None:
+        log.debug(f"get np_shape for slices: {slices}")
+        np_shape = getSelectionShape(slices)
+    else:
+        log.error("doReadSelection - expected points or slices to be set")
+        raise HTTPInternalServerError()
+    log.debug(f"selection shape: {np_shape}")
+
+    if np_shape is not None:
+        # check that the array size is reasonable
+        request_size = math.prod(np_shape)
+        if item_size == "H5T_VARIABLE":
+            request_size *= 512  # random guess of avg item_size
+        else:
+            request_size *= item_size
+            log.debug(f"request_size: {request_size}")
+        max_request_size = int(config.get("max_request_size"))
+        if request_size >= max_request_size:
+            msg = f"Attempting to fetch {request_size} bytes (greater than "
+            msg += f"{max_request_size} limit"
+            log.error(msg)
+            raise HTTPBadRequest(reason=msg)
+
+        # initialize to fill_value if specified
+        fill_value = getFillValue(dset_json)
+
+        if fill_value:
+            arr = np.empty(np_shape, dtype=dset_dtype, order="C")
+            arr[...] = fill_value
+        else:
+            arr = np.zeros(np_shape, dtype=dset_dtype, order="C")
+
+    crawler = ChunkCrawler(
+        app,
+        chunk_ids,
+        dset_json=dset_json,
+        chunk_map=chunk_map,
+        bucket=bucket,
+        slices=slices,
+        query=query,
+        query_update=query_update,
+        limit=limit,
+        arr=arr,
+        action="read_chunk_hyperslab",
+    )
+    await crawler.crawl()
+
+    crawler_status = crawler.get_status()
+
+    log.info(f"doReadSelection complete - status:  {crawler_status}")
+    if crawler_status == 400:
+        log.info(f"doReadSelection raising BadRequest error:  {crawler_status}")
+        raise HTTPBadRequest()
+    if crawler_status not in (200, 201):
+        log.info(
+            f"doReadSelection raising HTTPInternalServerError for status:  {crawler_status}"
+        )
+        raise HTTPInternalServerError()
+
+    if query is not None:
+        # combine chunk responses and return
+        if limit > 0 and crawler._hits > limit:
+            nrows = limit
+        else:
+            nrows = crawler._hits
+        arr = np.empty((nrows,), dtype=query_dtype)
+        start = 0
+        for chunkid in chunk_ids:
+            if chunkid not in chunk_map:
+                continue
+            chunk_item = chunk_map[chunkid]
+            if "query_rsp" not in chunk_item:
+                continue
+            query_rsp = chunk_item["query_rsp"]
+            if len(query_rsp) == 0:
+                continue
+            stop = start + len(query_rsp)
+            if stop > nrows:
+                rsp_stop = len(query_rsp) - (stop - nrows)
+                arr[start:] = query_rsp[0:rsp_stop]
+            else:
+                arr[start:stop] = query_rsp[:]
+            start = stop
+            if start >= nrows:
+                log.debug(f"got {nrows} rows for query, quitting")
+                break
+    return arr
+
+
+
 async def removeChunks(app, chunk_ids, bucket=None):
     """ Remove chunks with the given ids """
 
@@ -65,7 +602,6 @@ async def removeChunks(app, chunk_ids, bucket=None):
         log.error("removeChunks request, but no dn_urls")
         raise ValueError()
 
-    log.debug(f"doFlush - dn_urls: {dn_urls}")
     params = {}
     if bucket:
         params["bucket"] = bucket
@@ -73,6 +609,8 @@ async def removeChunks(app, chunk_ids, bucket=None):
 
     try:
         tasks = []
+        # TBD - this may be problematic if the number of chunks to
+        # be deleted is very large - may need to implement some sort of crawler
         for chunk_id in chunk_ids:
             dn_url = getDataNodeUrl(app, chunk_id)
             req = dn_url + "/chunks/" + chunk_id
@@ -159,7 +697,7 @@ async def getAllocatedChunkIds(app, dset_id, bucket=None):
 
 async def reduceShape(app, dset_json, shape_update, bucket=None):
     """ Given an existing dataset and a new shape,
-        Reinitialize and edge chunks and delete any chunks
+        Reinitialize any edge chunks and delete any chunks
         that fall entirely out of the new shape region """
 
     dset_id = dset_json["id"]
@@ -197,25 +735,17 @@ async def reduceShape(app, dset_json, shape_update, bucket=None):
         log.debug(f"chunk_id: {chunk_id}")
         chunk_coord = getChunkCoordinate(chunk_id, layout)
         log.debug(f"chunk_coord: {chunk_coord}")
-        skip = True
-        for i in range(rank):
-            if chunk_coord[i] + layout[i] > shape_update[i]:
-                skip = False
-                break
-        if skip:
+        
+        
+        if np.all(np.add(chunk_coord, layout) <= shape_update):
             log.debug(f"chunk_id {chunk_id} no action needed")
             continue
-
-        reinit = False
-        for n in range(rank):
-            if chunk_coord[n] < shape_update[n]:
-                reinit = True
-                break
-        if reinit:
-            log.debug("chunk reinit")
+        
+        if np.any(chunk_coord < shape_update):
+            log.debug(f"{chunk_id} reinit")
             update_ids.append(chunk_id)
         else:
-            log.debug("chunk delete")
+            log.debug(f"{chunk_id} delete")
             delete_ids.append(chunk_id)
 
     msg = f"reduceShape - from {len(chunk_ids)} chunks, {len(update_ids)} will need to be "
@@ -230,15 +760,23 @@ async def reduceShape(app, dset_json, shape_update, bucket=None):
 
         for n in range(rank):
             slices = []
-
+            update_element_count = 1
             for m in range(rank):
                 if m == n:
                     s = slice(shape_update[m], dims[m], 1)
+                    update_element_count *= dims[m] - shape_update[m]
                 else:
                     # just select the entire extent
                     s = slice(0, dims[m], 1)
+                    update_element_count *= dims[m]
                 slices.append(s)
 
+            if update_element_count == 0:
+                log.debug(f"empty hyperslab update for dim {n}")
+                continue
+
+            log.debug(f"update {update_element_count} elements for dim {n}")
+
             crawler = ChunkCrawler(
                 app,
                 update_ids,
diff --git a/hsds/dset_sn.py b/hsds/dset_sn.py
index 00bfcd7f..26919178 100755
--- a/hsds/dset_sn.py
+++ b/hsds/dset_sn.py
@@ -22,8 +22,8 @@
 from .util.httpUtil import http_post, http_put, http_delete, getHref, respJsonAssemble
 from .util.httpUtil import jsonResponse
 from .util.idUtil import isValidUuid, getDataNodeUrl, createObjId, isSchema2Id
-from .util.dsetUtil import getPreviewQuery, getFilterItem
-from .util.arrayUtil import getNumElements, getShapeDims, getNumpyValue
+from .util.dsetUtil import getPreviewQuery, getFilterItem, getShapeDims
+from .util.arrayUtil import getNumElements, getNumpyValue
 from .util.chunkUtil import getChunkSize, guessChunk, expandChunk, shrinkChunk
 from .util.chunkUtil import getContiguousLayout
 from .util.authUtil import getUserPasswordFromRequest, aclCheck
diff --git a/hsds/util/arrayUtil.py b/hsds/util/arrayUtil.py
index 31ee3bf1..5bd7e0ab 100644
--- a/hsds/util/arrayUtil.py
+++ b/hsds/util/arrayUtil.py
@@ -93,44 +93,6 @@ def getNumElements(dims):
     return num_elements
 
 
-def getShapeDims(shape):
-    """
-    Get dims from a given shape json.  Return [1,] for Scalar datasets,
-    None for null dataspaces
-    """
-    dims = None
-    if isinstance(shape, int):
-        dims = [
-            shape,
-        ]
-    elif isinstance(shape, list) or isinstance(shape, tuple):
-        dims = shape  # can use as is
-    elif isinstance(shape, str):
-        # only valid string value is H5S_NULL
-        if shape != "H5S_NULL":
-            raise ValueError("Invalid value for shape")
-        dims = None
-    elif isinstance(shape, dict):
-        if "class" not in shape:
-            raise ValueError("'class' key not found in shape")
-        if shape["class"] == "H5S_NULL":
-            dims = None
-        elif shape["class"] == "H5S_SCALAR":
-            dims = [
-                1,
-            ]
-        elif shape["class"] == "H5S_SIMPLE":
-            if "dims" not in shape:
-                raise ValueError("'dims' key expected for shape")
-            dims = shape["dims"]
-        else:
-            raise ValueError("Unknown shape class: {}".format(shape["class"]))
-    else:
-        raise ValueError("Unexpected shape class: {}".format(type(shape)))
-
-    return dims
-
-
 def jsonToArray(data_shape, data_dtype, data_json):
     """
     Return numpy array from the given json array.
diff --git a/hsds/util/dsetUtil.py b/hsds/util/dsetUtil.py
index 79bfa5e7..3e8cd475 100644
--- a/hsds/util/dsetUtil.py
+++ b/hsds/util/dsetUtil.py
@@ -309,6 +309,45 @@ def getSelectionShape(selection):
     return shape
 
 
+def getShapeDims(shape):
+    """
+    Get dims from a given shape json.  Return [1,] for Scalar datasets,
+    None for null dataspaces
+    """
+    dims = None
+    if isinstance(shape, int):
+        dims = [
+            shape,
+        ]
+    elif isinstance(shape, list) or isinstance(shape, tuple):
+        dims = shape  # can use as is
+    elif isinstance(shape, str):
+        # only valid string value is H5S_NULL
+        if shape != "H5S_NULL":
+            raise ValueError("Invalid value for shape")
+        dims = None
+    elif isinstance(shape, dict):
+        if "class" not in shape:
+            raise ValueError("'class' key not found in shape")
+        if shape["class"] == "H5S_NULL":
+            dims = None
+        elif shape["class"] == "H5S_SCALAR":
+            dims = [
+                1,
+            ]
+        elif shape["class"] == "H5S_SIMPLE":
+            if "dims" not in shape:
+                raise ValueError("'dims' key expected for shape")
+            dims = shape["dims"]
+        else:
+            raise ValueError("Unknown shape class: {}".format(shape["class"]))
+    else:
+        raise ValueError("Unexpected shape class: {}".format(type(shape)))
+
+    return dims
+
+
+
 def getQueryParameter(request, query_name, body=None, default=None):
     """
     Herlper function, get query parameter value from request.
@@ -560,6 +599,33 @@ def getSelectionList(select, dims):
     return tuple(select_list)
 
 
+def get_slices(select, dset_json):
+    """Get desired slices from selection query param string or json value.
+    If select is none or empty, slices for entire datashape will be
+    returned.
+    Refretch dims if the dataset is extensible
+    """
+
+    dset_id = dset_json["id"]
+    datashape = dset_json["shape"]
+    if datashape["class"] == "H5S_NULL":
+        msg = "Null space datasets can not be used as target for GET value"
+        log.warn(msg)
+        raise HTTPBadRequest(reason=msg)
+
+    dims = getShapeDims(datashape)  # throws 400 for HS_NULL dsets
+
+    try:
+        slices = getSelectionList(select, dims)
+    except ValueError:
+        msg = f"Invalid selection: {select} on dims: {dims} "
+        msg += f"for dataset: {dset_id}"
+        log.warn(msg)
+        raise
+    return slices
+
+
+
 def getSelectionPagination(select, dims, itemsize, max_request_size):
     """
     Paginate a select tupe into multiple selects where each
diff --git a/tests/integ/broadcast_test.py b/tests/integ/broadcast_test.py
index 5d0187a5..1bf3c2ee 100755
--- a/tests/integ/broadcast_test.py
+++ b/tests/integ/broadcast_test.py
@@ -109,8 +109,7 @@ def testPut1DDataset(self):
     def testPut1DDatasetBinary(self):
         # Test PUT value with broadcast for 1d dataset using binary data
         print("testPut1DDatasetBinary", self.base_domain)
-        NUM_ELEMENTS = 10  # 1000000 - this value is hitting nginx request size limit
-
+        NUM_ELEMENTS = 10  
         headers = helper.getRequestHeaders(domain=self.base_domain)
         headers_bin_req = helper.getRequestHeaders(domain=self.base_domain)
         headers_bin_req["Content-Type"] = "application/octet-stream"

From 8eec4398de225ccc4803b0996772a2b366764203 Mon Sep 17 00:00:00 2001
From: jreadey <jreadey@hdfgroup.org>
Date: Thu, 26 Oct 2023 08:47:47 -0700
Subject: [PATCH 17/17] flake8 updates

---
 hsds/chunk_sn.py              | 8 +++-----
 hsds/dset_lib.py              | 9 ++++-----
 hsds/util/dsetUtil.py         | 2 --
 tests/integ/broadcast_test.py | 2 +-
 4 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/hsds/chunk_sn.py b/hsds/chunk_sn.py
index 3a6eb4dd..650cc0fa 100755
--- a/hsds/chunk_sn.py
+++ b/hsds/chunk_sn.py
@@ -76,7 +76,6 @@ def use_http_streaming(request, rank):
     return True
 
 
-
 async def PUT_Value(request):
     """
     Handler for PUT /<dset_uuid>/value request
@@ -241,7 +240,7 @@ async def PUT_Value(request):
         except ValueError as ve:
             log.warn(f"Invalid selection: {ve}")
             raise HTTPBadRequest(reason="Invalid selection")
-        
+
         if "Limit" in params:
             try:
                 limit = int(params["Limit"])
@@ -663,7 +662,7 @@ async def PUT_Value(request):
 
     else:
         #
-        # Do point post
+        # Do point put
         #
         log.debug(f"num_points: {num_points}")
 
@@ -813,7 +812,7 @@ async def GET_Value(request):
     except ValueError as ve:
         log.warn(f"Invalid selection: {ve}")
         raise HTTPBadRequest(reason="Invalid selection")
-    
+
     log.debug(f"GET Value selection: {slices}")
 
     limit = 0
@@ -1050,7 +1049,6 @@ async def GET_Value(request):
     return resp
 
 
-
 async def POST_Value(request):
     """
     Handler for POST /<dset_uuid>/value request - point selection or hyperslab read
diff --git a/hsds/dset_lib.py b/hsds/dset_lib.py
index 36e03989..3e2fc56e 100755
--- a/hsds/dset_lib.py
+++ b/hsds/dset_lib.py
@@ -41,6 +41,7 @@
     "H5D_CHUNKED_REF_INDIRECT",
 )
 
+
 def getFillValue(dset_json):
     """ Return the fill value of the given dataset as a numpy array.
       If no fill value is defined, return an zero array of given type """
@@ -316,7 +317,6 @@ def getChunkItem(chunkid):
     return chunkinfo_map
 
 
-
 def get_chunkmap_selections(chunk_map, chunk_ids, slices, dset_json):
     """Update chunk_map with chunk and data selections for the
     given set of slices
@@ -370,6 +370,7 @@ def get_chunk_selections(chunk_map, chunk_ids, slices, dset_json):
         log.debug(f"get_chunk_selections - data_sel: {data_sel}")
         item["data_sel"] = data_sel
 
+
 async def getSelectionData(
     app,
     dset_id,
@@ -588,7 +589,6 @@ async def doReadSelection(
     return arr
 
 
-
 async def removeChunks(app, chunk_ids, bucket=None):
     """ Remove chunks with the given ids """
 
@@ -735,12 +735,11 @@ async def reduceShape(app, dset_json, shape_update, bucket=None):
         log.debug(f"chunk_id: {chunk_id}")
         chunk_coord = getChunkCoordinate(chunk_id, layout)
         log.debug(f"chunk_coord: {chunk_coord}")
-        
-        
+
         if np.all(np.add(chunk_coord, layout) <= shape_update):
             log.debug(f"chunk_id {chunk_id} no action needed")
             continue
-        
+
         if np.any(chunk_coord < shape_update):
             log.debug(f"{chunk_id} reinit")
             update_ids.append(chunk_id)
diff --git a/hsds/util/dsetUtil.py b/hsds/util/dsetUtil.py
index 3e8cd475..5d9a2479 100644
--- a/hsds/util/dsetUtil.py
+++ b/hsds/util/dsetUtil.py
@@ -347,7 +347,6 @@ def getShapeDims(shape):
     return dims
 
 
-
 def getQueryParameter(request, query_name, body=None, default=None):
     """
     Herlper function, get query parameter value from request.
@@ -625,7 +624,6 @@ def get_slices(select, dset_json):
     return slices
 
 
-
 def getSelectionPagination(select, dims, itemsize, max_request_size):
     """
     Paginate a select tupe into multiple selects where each
diff --git a/tests/integ/broadcast_test.py b/tests/integ/broadcast_test.py
index 1bf3c2ee..f480e637 100755
--- a/tests/integ/broadcast_test.py
+++ b/tests/integ/broadcast_test.py
@@ -109,7 +109,7 @@ def testPut1DDataset(self):
     def testPut1DDatasetBinary(self):
         # Test PUT value with broadcast for 1d dataset using binary data
         print("testPut1DDatasetBinary", self.base_domain)
-        NUM_ELEMENTS = 10  
+        NUM_ELEMENTS = 10
         headers = helper.getRequestHeaders(domain=self.base_domain)
         headers_bin_req = helper.getRequestHeaders(domain=self.base_domain)
         headers_bin_req["Content-Type"] = "application/octet-stream"