From 365071027584163715831a71324699e37b991be5 Mon Sep 17 00:00:00 2001 From: Matthew Larson Date: Tue, 28 Nov 2023 16:45:41 -0600 Subject: [PATCH] Support creation of array/nested array types Reading and writing to array/nested array type objects is still buggy, but this is a step in the right direction. Previously, the check on the return value of getFillValue would throw an error when the datatype was H5T_ARRAY. --- hsds/chunk_crawl.py | 4 +- hsds/datanode_lib.py | 2 +- hsds/dset_lib.py | 10 ++++- hsds/util/hdf5dtype.py | 4 +- tests/integ/value_test.py | 95 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 108 insertions(+), 7 deletions(-) diff --git a/hsds/chunk_crawl.py b/hsds/chunk_crawl.py index eccbc4e2..7c2c51d8 100755 --- a/hsds/chunk_crawl.py +++ b/hsds/chunk_crawl.py @@ -67,7 +67,7 @@ def getFillValue(dset_json): arr = np.empty((1,), dtype=dt, order="C") arr[...] = fill_value else: - arr = np.zeros([1,], dtype=dt, order="C") + arr = None # np.zeros([1,], dtype=dt, order="C") return arr @@ -448,7 +448,7 @@ async def read_point_sel( def defaultArray(): # no data, return zero array - if fill_value: + if fill_value is not None: arr = np.empty((num_points,), dtype=dt) arr[...] = fill_value else: diff --git a/hsds/datanode_lib.py b/hsds/datanode_lib.py index 8cddd76c..bbaef431 100644 --- a/hsds/datanode_lib.py +++ b/hsds/datanode_lib.py @@ -1123,7 +1123,7 @@ async def get_chunk( # normal fill value based init or initializer failed fill_value = getFillValue(dset_json) - if fill_value: + if fill_value is not None: chunk_arr = np.empty(dims, dtype=dt, order="C") chunk_arr[...] = fill_value else: diff --git a/hsds/dset_lib.py b/hsds/dset_lib.py index 3e2fc56e..f8326481 100755 --- a/hsds/dset_lib.py +++ b/hsds/dset_lib.py @@ -61,7 +61,7 @@ def getFillValue(dset_json): arr = np.empty((1,), dtype=dt, order="C") arr[...] = fill_value else: - arr = np.zeros([1,], dtype=dt, order="C") + arr = None # np.zeros([1,], dtype=dt, order="C") return arr @@ -526,7 +526,7 @@ async def doReadSelection( # initialize to fill_value if specified fill_value = getFillValue(dset_json) - if fill_value: + if fill_value is not None: arr = np.empty(np_shape, dtype=dset_dtype, order="C") arr[...] = fill_value else: @@ -713,6 +713,12 @@ async def reduceShape(app, dset_json, shape_update, bucket=None): # get the fill value arr = getFillValue(dset_json) + type_json = dset_json["type"] + dt = createDataType(type_json) + + if arr is None: + arr = np.zeros([1], dtype=dt, order="C") + # and the chunk layout layout = tuple(getChunkLayout(dset_json)) log.debug(f"got layout: {layout}") diff --git a/hsds/util/hdf5dtype.py b/hsds/util/hdf5dtype.py index 7a40ec11..0bfc2628 100644 --- a/hsds/util/hdf5dtype.py +++ b/hsds/util/hdf5dtype.py @@ -662,9 +662,9 @@ def createBaseDataType(typeItem): if isinstance(arrayBaseType, dict): if "class" not in arrayBaseType: raise KeyError("'class' not provided for array base type") - type_classes = ("H5T_INTEGER", "H5T_FLOAT", "H5T_STRING") + type_classes = ("H5T_INTEGER", "H5T_FLOAT", "H5T_STRING", "H5T_ARRAY") if arrayBaseType["class"] not in type_classes: - msg = "Array Type base type must be integer, float, or string" + msg = "Array Type base type must be integer, float, string, or array" raise TypeError(msg) baseType = createDataType(arrayBaseType) metadata = None diff --git a/tests/integ/value_test.py b/tests/integ/value_test.py index f68d1771..644b2e5d 100755 --- a/tests/integ/value_test.py +++ b/tests/integ/value_test.py @@ -3588,6 +3588,101 @@ def testPutFixedUTF8StringDatasetBinary(self): self.assertEqual(rsp.status_code, 200) self.assertEqual(rsp.text, text) + def testCreateArrayDataset(self): + headers = helper.getRequestHeaders(domain=self.base_domain) + req = self.endpoint + "/" + + # Get root uuid + rsp = self.session.get(req, headers=headers) + self.assertEqual(rsp.status_code, 200) + rspJson = json.loads(rsp.text) + root_uuid = rspJson["root"] + helper.validateId(root_uuid) + + array_dims = [5] + num_arrays = 3 + + datatype = { + "class": "H5T_ARRAY", + "base": { + "class": "H5T_INTEGER", + "base": "H5T_STD_I64LE" + }, + "dims": array_dims + } + + payload = { + "type": datatype, + "shape": num_arrays, + } + + req = self.endpoint + "/datasets" + rsp = self.session.post(req, data=json.dumps(payload), headers=headers) + self.assertEqual(rsp.status_code, 201) # create dataset + + rspJson = json.loads(rsp.text) + array_dset_uuid = rspJson["id"] + self.assertTrue(helper.validateId(array_dset_uuid)) + + # verify the shape of the dataset + req = self.endpoint + "/datasets/" + array_dset_uuid + rsp = self.session.get(req, headers=headers) + self.assertEqual(rsp.status_code, 200) # get dataset + rspJson = json.loads(rsp.text) + shape = rspJson["shape"] + self.assertEqual(shape["class"], "H5S_SIMPLE") + self.assertEqual(shape["dims"], [num_arrays]) + + def testCreateNestedArrayDataset(self): + headers = helper.getRequestHeaders(domain=self.base_domain) + req = self.endpoint + "/" + + # Get root uuid + rsp = self.session.get(req, headers=headers) + self.assertEqual(rsp.status_code, 200) + rspJson = json.loads(rsp.text) + root_uuid = rspJson["root"] + helper.validateId(root_uuid) + + base_array_dims = [2] + nested_array_dims = [3] + num_nested_arrays = 4 + + nested_array_dtype = { + "class": "H5T_ARRAY", + "base": { + "class": "H5T_ARRAY", + "base": { + "class": "H5T_INTEGER", + "base": "H5T_STD_I64LE" + }, + "dims": base_array_dims + }, + "dims": nested_array_dims + } + + payload = { + "type": nested_array_dtype, + "shape": num_nested_arrays, + } + + req = self.endpoint + "/datasets" + rsp = self.session.post(req, data=json.dumps(payload), headers=headers) + self.assertEqual(rsp.status_code, 201) # create dataset + + rspJson = json.loads(rsp.text) + array_dset_uuid = rspJson["id"] + self.assertTrue(helper.validateId(array_dset_uuid)) + + # verify the shape of the dataset + req = self.endpoint + "/datasets/" + array_dset_uuid + rsp = self.session.get(req, headers=headers) + self.assertEqual(rsp.status_code, 200) # get dataset + rspJson = json.loads(rsp.text) + shape = rspJson["shape"] + self.assertEqual(shape["class"], "H5S_SIMPLE") + self.assertEqual(shape["dims"], [num_nested_arrays]) + if __name__ == "__main__": # setup test files