From b0b71581eb4d6d012fb6c89c98d5521acf1e7f93 Mon Sep 17 00:00:00 2001 From: Matthew Larson Date: Wed, 1 Nov 2023 11:35:24 -0500 Subject: [PATCH] Fix jsonToArray on None array --- hsds/attr_sn.py | 40 ++++++++++++++++--------- hsds/chunk_sn.py | 16 +++++++--- hsds/chunklocator.py | 5 +++- hsds/util/arrayUtil.py | 55 +++++++++++++++++++++-------------- hsds/util/chunkUtil.py | 37 ----------------------- tests/unit/array_util_test.py | 11 +++++++ 6 files changed, 87 insertions(+), 77 deletions(-) diff --git a/hsds/attr_sn.py b/hsds/attr_sn.py index d7c865f9..e69f605e 100755 --- a/hsds/attr_sn.py +++ b/hsds/attr_sn.py @@ -374,10 +374,13 @@ async def PUT_Attribute(request): log.debug(f"attribute value: {value}") try: arr = jsonToArray(np_dims, arr_dtype, value) - except ValueError: - msg = "Bad Request: input data doesn't match selection" - log.warn(msg) - raise HTTPBadRequest(reason=msg) + except ValueError as e: + if (value is None): + arr = np.array([]).astype(arr_dtype) + else: + msg = f"Bad Request: input data doesn't match selection: {e}" + log.warn(msg) + raise HTTPBadRequest(reason=msg) log.debug(f"Got: {arr.size} array elements") else: value = None @@ -540,10 +543,13 @@ async def GET_AttributeValue(request): np_shape = getShapeDims(shape_json) try: arr = jsonToArray(np_shape, arr_dtype, dn_json["value"]) - except ValueError: - msg = "Bad Request: input data doesn't match selection" - log.warn(msg) - raise HTTPBadRequest(reason=msg) + except ValueError as e: + if (dn_json["value"] is None): + arr = np.array([]).astype(arr_dtype) + else: + msg = f"Bad Request: input data doesn't match selection: {e}" + log.warn(msg) + raise HTTPBadRequest(reason=msg) output_data = arr.tobytes() msg = f"GET AttributeValue - returning {len(output_data)} " msg += "bytes binary data" @@ -697,7 +703,12 @@ async def PUT_AttributeValue(request): arr = arr.reshape(np_shape) # conform to selection shape # convert to JSON for transmission to DN data = arr.tolist() - value = bytesArrayToList(data) + + try: + value = bytesArrayToList(data) + except ValueError as err: + raise HTTPBadRequest(f"Cannot decode bytes to list: {err}") + if attr_shape["class"] == "H5S_SCALAR": # just send the value, not a list value = value[0] @@ -719,10 +730,13 @@ async def PUT_AttributeValue(request): # validate that the value agrees with type/shape try: arr = jsonToArray(np_shape, np_dtype, value) - except ValueError: - msg = "Bad Request: input data doesn't match selection" - log.warn(msg) - raise HTTPBadRequest(reason=msg) + except ValueError as e: + if (value is None): + arr = np.array([]).astype(np_dtype) + else: + msg = f"Bad Request: input data doesn't match selection: {e}" + log.warn(msg) + raise HTTPBadRequest(reason=msg) log.debug(f"Got: {arr.size} array elements") # ready to add attribute now diff --git a/hsds/chunk_sn.py b/hsds/chunk_sn.py index 39641b6e..0dc2521c 100755 --- a/hsds/chunk_sn.py +++ b/hsds/chunk_sn.py @@ -288,7 +288,10 @@ async def PUT_Value(request): rsp_json = {} data = arr_rsp.tolist() log.debug(f"got rsp data {len(data)} points") - json_query_data = bytesArrayToList(data) + try: + json_query_data = bytesArrayToList(data) + except ValueError as err: + raise HTTPBadRequest(f"Cannot decode provided bytes to list: {err}") rsp_json["value"] = json_query_data rsp_json["hrefs"] = get_hrefs(request, dset_json) resp = await jsonResponse(request, rsp_json) @@ -1020,8 +1023,10 @@ async def GET_Value(request): arr = squeezeArray(arr) data = arr.tolist() - json_data = bytesArrayToList(data) - + try: + json_data = bytesArrayToList(data) + except ValueError as err: + raise HTTPBadRequest(f"Cannot decode bytes to list: {err}") datashape = dset_json["shape"] if datashape["class"] == "H5S_SCALAR": @@ -1279,7 +1284,10 @@ async def POST_Value(request): resp_json = {} data = arr_rsp.tolist() log.debug(f"got rsp data {len(data)} points") - json_data = bytesArrayToList(data) + try: + json_data = bytesArrayToList(data) + except ValueError as err: + raise HTTPBadRequest(f"Cannot decode bytes to list: {err}") resp_json["value"] = json_data resp_json["hrefs"] = get_hrefs(request, dset_json) resp_body = await jsonResponse( diff --git a/hsds/chunklocator.py b/hsds/chunklocator.py index 0baa3d64..38a9b7d6 100644 --- a/hsds/chunklocator.py +++ b/hsds/chunklocator.py @@ -215,7 +215,10 @@ def main(): log.warn(msg) sys.exit(-1) log.info(f"got chunk array shape: {arr.shape}") - json_data = bytesArrayToList(arr) + try: + json_data = bytesArrayToList(arr) + except ValueError as err: + raise err # print list data to stdout print(json_data) log.info(f"got {len(json_data)} json elements") diff --git a/hsds/util/arrayUtil.py b/hsds/util/arrayUtil.py index 050c1f81..87d18461 100644 --- a/hsds/util/arrayUtil.py +++ b/hsds/util/arrayUtil.py @@ -44,9 +44,16 @@ def bytesArrayToList(data): if is_list: out = [] for item in data: - out.append(bytesArrayToList(item)) # recursive call + try: + rec_item = bytesArrayToList(item) # recursive call + out.append(rec_item) + except ValueError as err: + raise err elif type(data) is bytes: - out = data.decode("utf-8") + try: + out = data.decode("utf-8") + except UnicodeDecodeError as err: + raise ValueError(err) else: out = data @@ -125,6 +132,13 @@ def fillVlenArray(rank, data, arr, index): index += 1 return index + if (data_json is None): + return np.array([]).astype(data_dtype) + + if (isinstance(data_json, (list, tuple))): + if None in data_json: + return np.array([]).astype(data_dtype) + # need some special conversion for compound types -- # each element must be a tuple, but the JSON decoder # gives us a list instead. @@ -145,27 +159,24 @@ def fillVlenArray(rank, data, arr, index): data_json = data_json.encode("utf8") data_json = [data_json,] # listify - if not (None in data_json): - if isVlen(data_dtype): - arr = np.zeros((npoints,), dtype=data_dtype) - fillVlenArray(np_shape_rank, data_json, arr, 0) - else: - try: - arr = np.array(data_json, dtype=data_dtype) - except UnicodeEncodeError as ude: - msg = "Unable to encode data" - raise ValueError(msg) from ude - # raise an exception of the array shape doesn't match the selection shape - # allow if the array is a scalar and the selection shape is one element, - # numpy is ok with this - if arr.size != npoints: - msg = "Input data doesn't match selection number of elements" - msg += f" Expected {npoints}, but received: {arr.size}" - raise ValueError(msg) - if arr.shape != data_shape: - arr = arr.reshape(data_shape) # reshape to match selection + if isVlen(data_dtype): + arr = np.zeros((npoints,), dtype=data_dtype) + fillVlenArray(np_shape_rank, data_json, arr, 0) else: - arr = np.array([]).astype(data_dtype) + try: + arr = np.array(data_json, dtype=data_dtype) + except UnicodeEncodeError as ude: + msg = "Unable to encode data" + raise ValueError(msg) from ude + # raise an exception of the array shape doesn't match the selection shape + # allow if the array is a scalar and the selection shape is one element, + # numpy is ok with this + if arr.size != npoints: + msg = "Input data doesn't match selection number of elements" + msg += f" Expected {npoints}, but received: {arr.size}" + raise ValueError(msg) + if arr.shape != data_shape: + arr = arr.reshape(data_shape) # reshape to match selection return arr diff --git a/hsds/util/chunkUtil.py b/hsds/util/chunkUtil.py index 88059a57..1e043fad 100644 --- a/hsds/util/chunkUtil.py +++ b/hsds/util/chunkUtil.py @@ -9,43 +9,6 @@ PRIMES = [29, 31, 37, 41, 43, 47, 53, 59, 61, 67] # for chunk partitioning -""" -Convert list that may contain bytes type elements to list of string elements - -TBD: code copy from arrayUtil.py -""" - - -def _bytesArrayToList(data): - if type(data) in (bytes, str): - is_list = False - elif isinstance(data, (np.ndarray, np.generic)): - if len(data.shape) == 0: - is_list = False - data = data.tolist() # tolist will return a scalar in this case - if type(data) in (list, tuple): - is_list = True - else: - is_list = False - else: - is_list = True - elif type(data) in (list, tuple): - is_list = True - else: - is_list = False - - if is_list: - out = [] - for item in data: - out.append(_bytesArrayToList(item)) # recursive call - elif type(data) is bytes: - out = data.decode("utf-8") - else: - out = data - - return out - - def getChunkSize(layout, type_size): """Return chunk size given layout. i.e. just the product of the values in the list. diff --git a/tests/unit/array_util_test.py b/tests/unit/array_util_test.py index dac0e395..1695e82d 100644 --- a/tests/unit/array_util_test.py +++ b/tests/unit/array_util_test.py @@ -806,6 +806,17 @@ def testGetBroadcastShape(self): bcshape = getBroadcastShape([2, 3, 5], 15) self.assertEqual(bcshape, [3, 5]) + def testJsonToArrayOnNoneCompoundArray(self): + # compound type + dt = np.dtype([("a", "i4"), ("b", "S5")]) + shape = [1,] + data = None + + arr = jsonToArray(shape, dt, data) + + self.assertEqual(len(arr), 0) + self.assertEqual(arr.dtype, dt) + if __name__ == "__main__": # setup test files