Skip to content

Commit

Permalink
Support creation of array/nested array types
Browse files Browse the repository at this point in the history
Reading and writing to array/nested array type objects is
still buggy, but this is a step in the right direction.

Previously, the check on the return value of getFillValue
would throw an error when the datatype was H5T_ARRAY.
  • Loading branch information
mattjala committed Nov 28, 2023
1 parent c8c51c3 commit 3650710
Show file tree
Hide file tree
Showing 5 changed files with 108 additions and 7 deletions.
4 changes: 2 additions & 2 deletions hsds/chunk_crawl.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def getFillValue(dset_json):
arr = np.empty((1,), dtype=dt, order="C")
arr[...] = fill_value
else:
arr = np.zeros([1,], dtype=dt, order="C")
arr = None # np.zeros([1,], dtype=dt, order="C")

return arr

Expand Down Expand Up @@ -448,7 +448,7 @@ async def read_point_sel(

def defaultArray():
# no data, return zero array
if fill_value:
if fill_value is not None:
arr = np.empty((num_points,), dtype=dt)
arr[...] = fill_value
else:
Expand Down
2 changes: 1 addition & 1 deletion hsds/datanode_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -1123,7 +1123,7 @@ async def get_chunk(
# normal fill value based init or initializer failed
fill_value = getFillValue(dset_json)

if fill_value:
if fill_value is not None:
chunk_arr = np.empty(dims, dtype=dt, order="C")
chunk_arr[...] = fill_value
else:
Expand Down
10 changes: 8 additions & 2 deletions hsds/dset_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def getFillValue(dset_json):
arr = np.empty((1,), dtype=dt, order="C")
arr[...] = fill_value
else:
arr = np.zeros([1,], dtype=dt, order="C")
arr = None # np.zeros([1,], dtype=dt, order="C")

return arr

Expand Down Expand Up @@ -526,7 +526,7 @@ async def doReadSelection(
# initialize to fill_value if specified
fill_value = getFillValue(dset_json)

if fill_value:
if fill_value is not None:
arr = np.empty(np_shape, dtype=dset_dtype, order="C")
arr[...] = fill_value
else:
Expand Down Expand Up @@ -713,6 +713,12 @@ async def reduceShape(app, dset_json, shape_update, bucket=None):
# get the fill value
arr = getFillValue(dset_json)

type_json = dset_json["type"]
dt = createDataType(type_json)

if arr is None:
arr = np.zeros([1], dtype=dt, order="C")

# and the chunk layout
layout = tuple(getChunkLayout(dset_json))
log.debug(f"got layout: {layout}")
Expand Down
4 changes: 2 additions & 2 deletions hsds/util/hdf5dtype.py
Original file line number Diff line number Diff line change
Expand Up @@ -662,9 +662,9 @@ def createBaseDataType(typeItem):
if isinstance(arrayBaseType, dict):
if "class" not in arrayBaseType:
raise KeyError("'class' not provided for array base type")
type_classes = ("H5T_INTEGER", "H5T_FLOAT", "H5T_STRING")
type_classes = ("H5T_INTEGER", "H5T_FLOAT", "H5T_STRING", "H5T_ARRAY")
if arrayBaseType["class"] not in type_classes:
msg = "Array Type base type must be integer, float, or string"
msg = "Array Type base type must be integer, float, string, or array"
raise TypeError(msg)
baseType = createDataType(arrayBaseType)
metadata = None
Expand Down
95 changes: 95 additions & 0 deletions tests/integ/value_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3588,6 +3588,101 @@ def testPutFixedUTF8StringDatasetBinary(self):
self.assertEqual(rsp.status_code, 200)
self.assertEqual(rsp.text, text)

def testCreateArrayDataset(self):
headers = helper.getRequestHeaders(domain=self.base_domain)
req = self.endpoint + "/"

# Get root uuid
rsp = self.session.get(req, headers=headers)
self.assertEqual(rsp.status_code, 200)
rspJson = json.loads(rsp.text)
root_uuid = rspJson["root"]
helper.validateId(root_uuid)

array_dims = [5]
num_arrays = 3

datatype = {
"class": "H5T_ARRAY",
"base": {
"class": "H5T_INTEGER",
"base": "H5T_STD_I64LE"
},
"dims": array_dims
}

payload = {
"type": datatype,
"shape": num_arrays,
}

req = self.endpoint + "/datasets"
rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
self.assertEqual(rsp.status_code, 201) # create dataset

rspJson = json.loads(rsp.text)
array_dset_uuid = rspJson["id"]
self.assertTrue(helper.validateId(array_dset_uuid))

# verify the shape of the dataset
req = self.endpoint + "/datasets/" + array_dset_uuid
rsp = self.session.get(req, headers=headers)
self.assertEqual(rsp.status_code, 200) # get dataset
rspJson = json.loads(rsp.text)
shape = rspJson["shape"]
self.assertEqual(shape["class"], "H5S_SIMPLE")
self.assertEqual(shape["dims"], [num_arrays])

def testCreateNestedArrayDataset(self):
headers = helper.getRequestHeaders(domain=self.base_domain)
req = self.endpoint + "/"

# Get root uuid
rsp = self.session.get(req, headers=headers)
self.assertEqual(rsp.status_code, 200)
rspJson = json.loads(rsp.text)
root_uuid = rspJson["root"]
helper.validateId(root_uuid)

base_array_dims = [2]
nested_array_dims = [3]
num_nested_arrays = 4

nested_array_dtype = {
"class": "H5T_ARRAY",
"base": {
"class": "H5T_ARRAY",
"base": {
"class": "H5T_INTEGER",
"base": "H5T_STD_I64LE"
},
"dims": base_array_dims
},
"dims": nested_array_dims
}

payload = {
"type": nested_array_dtype,
"shape": num_nested_arrays,
}

req = self.endpoint + "/datasets"
rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
self.assertEqual(rsp.status_code, 201) # create dataset

rspJson = json.loads(rsp.text)
array_dset_uuid = rspJson["id"]
self.assertTrue(helper.validateId(array_dset_uuid))

# verify the shape of the dataset
req = self.endpoint + "/datasets/" + array_dset_uuid
rsp = self.session.get(req, headers=headers)
self.assertEqual(rsp.status_code, 200) # get dataset
rspJson = json.loads(rsp.text)
shape = rspJson["shape"]
self.assertEqual(shape["class"], "H5S_SIMPLE")
self.assertEqual(shape["dims"], [num_nested_arrays])


if __name__ == "__main__":
# setup test files
Expand Down

0 comments on commit 3650710

Please sign in to comment.