Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow PUT shape to reduce extent #266

Merged
merged 1 commit into from
Oct 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 14 additions & 6 deletions hsds/async_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from .util.hdf5dtype import getItemSize, createDataType
from .util.arrayUtil import getShapeDims, getNumElements, bytesToArray
from .util.dsetUtil import getHyperslabSelection, getFilterOps, getChunkDims
from .util.dsetUtil import getDatasetLayoutClass, getDatasetCreationPropertyLayout
from .util.dsetUtil import getDatasetLayoutClass, getDatasetLayout

from .util.storUtil import getStorKeys, putStorJSONObj, getStorJSONObj
from .util.storUtil import deleteStorObj, getStorBytes, isStorObj
Expand Down Expand Up @@ -79,9 +79,8 @@ async def updateDatasetInfo(app, dset_id, dataset_info, bucket=None):
msg += f"for {dset_id}"
log.warn(msg)
return
layout = getDatasetCreationPropertyLayout(dset_json)
msg = f"updateDatasetInfo - shape: {shape_json} type: {type_json} "
msg += f"item size: {item_size} layout: {layout}"
msg += f"item size: {item_size}"
log.info(msg)

dims = getShapeDims(shape_json) # returns None for HS_NULL dsets
Expand Down Expand Up @@ -120,6 +119,7 @@ async def updateDatasetInfo(app, dset_id, dataset_info, bucket=None):
linked_bytes = chunk_size * num_chunks
num_linked_chunks = num_chunks
elif layout_class == "H5D_CHUNKED_REF":
layout = getDatasetLayout(dset_json)
if "chunks" not in layout:
log.error("Expected to find 'chunks' key in H5D_CHUNKED_REF layout")
return
Expand All @@ -130,7 +130,7 @@ async def updateDatasetInfo(app, dset_id, dataset_info, bucket=None):
linked_bytes += chunk_info[1]
num_linked_chunks = len(chunks)
elif layout_class == "H5D_CHUNKED_REF_INDIRECT":
log.debug("chunk ref indirect")
layout = getDatasetLayout(dset_json)
if "chunk_table" not in layout:
msg = "Expected to find chunk_table in dataset layout for "
msg += f"{dset_id}"
Expand All @@ -147,7 +147,7 @@ async def updateDatasetInfo(app, dset_id, dataset_info, bucket=None):
msg += f"for {dset_id}"
log.warn(msg)
return
chunktable_layout = getDatasetCreationPropertyLayout(chunktable_json)
chunktable_layout = getDatasetLayout(chunktable_json)
log.debug(f"chunktable_layout: {chunktable_layout}")
if not isinstance(chunktable_layout, dict):
log.warn(f"unexpected chunktable_layout: {chunktable_id}")
Expand Down Expand Up @@ -234,7 +234,15 @@ async def updateDatasetInfo(app, dset_id, dataset_info, bucket=None):
elif layout_class == "H5D_CHUNKED":
msg = "updateDatasetInfo - no linked bytes/chunks for "
msg += "H5D_CHUNKED layout"
log.debug(msg)
log.info(msg)
elif layout_class == "H5D_CONTIGUOUS":
msg = "updateDatasetInfo - no linked bytes/chunks for "
msg += "H5D_CONTIGUOUS layout"
log.info(msg)
elif layout_class == "H5D_COMPACT":
msg = "updateDatasetInfo - no linked bytes/chunks for "
msg += "H5D_COMPACT layout"
log.info(msg)
else:
log.error(f"unexpected chunk layout: {layout_class}")

Expand Down
9 changes: 4 additions & 5 deletions hsds/chunk_sn.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,9 @@
from .util.domainUtil import getDomainFromRequest, isValidDomain
from .util.domainUtil import getBucketForDomain
from .util.hdf5dtype import getItemSize, createDataType
from .util.dsetUtil import getSelectionList, isNullSpace, getDatasetLayoutClass
from .util.dsetUtil import getSelectionList, isNullSpace, getDatasetLayout, getDatasetLayoutClass
from .util.dsetUtil import isExtensible, getSelectionPagination
from .util.dsetUtil import getSelectionShape, getDsetMaxDims, getChunkLayout
from .util.dsetUtil import getDatasetCreationPropertyLayout
from .util.chunkUtil import getNumChunks, getChunkIds, getChunkId
from .util.chunkUtil import getChunkIndex, getChunkSuffix
from .util.chunkUtil import getChunkCoverage, getDataCoverage
Expand Down Expand Up @@ -177,7 +176,7 @@ def getChunkItem(chunkid):
return chunk_item

if layout_class == "H5D_CONTIGUOUS_REF":
layout = getDatasetCreationPropertyLayout(dset_json)
layout = getDatasetLayout(dset_json)
log.debug(f"cpl layout: {layout}")
s3path = layout["file_uri"]
s3size = layout["size"]
Expand Down Expand Up @@ -229,7 +228,7 @@ def getChunkItem(chunkid):
chunk_item["s3offset"] = s3offset
chunk_item["s3size"] = chunk_size
elif layout_class == "H5D_CHUNKED_REF":
layout = getDatasetCreationPropertyLayout(dset_json)
layout = getDatasetLayout(dset_json)
log.debug(f"cpl layout: {layout}")
s3path = layout["file_uri"]
chunks = layout["chunks"]
Expand All @@ -248,7 +247,7 @@ def getChunkItem(chunkid):
chunk_item["s3size"] = s3size

elif layout_class == "H5D_CHUNKED_REF_INDIRECT":
layout = getDatasetCreationPropertyLayout(dset_json)
layout = getDatasetLayout(dset_json)
log.debug(f"cpl layout: {layout}")
if "chunk_table" not in layout:
log.error("Expected to find chunk_table in dataset layout")
Expand Down
8 changes: 1 addition & 7 deletions hsds/dset_dn.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,13 +273,7 @@ async def PUT_DatasetShape(request):
# e.g. another client has already extended the shape since the SN
# verified it
shape_update = body["shape"]
log.debug("shape_update: {}".format(shape_update))

for i in range(len(dims)):
if shape_update[i] < dims[i]:
msg = "Dataspace can not be made smaller"
log.warn(msg)
raise HTTPBadRequest(reason=msg)
log.debug(f"shape_update: {shape_update}")

# Update the shape!
for i in range(len(dims)):
Expand Down
15 changes: 11 additions & 4 deletions hsds/dset_sn.py
Original file line number Diff line number Diff line change
Expand Up @@ -621,15 +621,22 @@ async def PUT_DatasetShape(request):
msg = "Extent of update shape request does not match dataset sahpe"
log.warn(msg)
raise HTTPBadRequest(reason=msg)
shape_reduction = False
for i in range(rank):
if shape_update and shape_update[i] < dims[i]:
msg = "Dataspace can not be made smaller"
log.warn(msg)
raise HTTPBadRequest(reason=msg)
shape_reduction = True
if shape_update[i] < 0:
msg = "Extension dimension can not be made less than zero"
log.warn(msg)
raise HTTPBadRequest(reason=msg)
if shape_update and maxdims[i] != 0 and shape_update[i] > maxdims[i]:
msg = "Database can not be extended past max extent"
msg = "Extension dimension can not be extended past max extent"
log.warn(msg)
raise HTTPConflict()
if shape_reduction:
log.info("Shape extent reduced for dataset")
# TBD - ensure any chunks that are outside the new shape region are
# deleted
if extend_dim < 0 or extend_dim >= rank:
msg = "Extension dimension must be less than rank and non-negative"
log.warn(msg)
Expand Down
60 changes: 29 additions & 31 deletions hsds/util/dsetUtil.py
Original file line number Diff line number Diff line change
Expand Up @@ -855,48 +855,46 @@ def isExtensible(dims, maxdims):
return False


def getDatasetCreationPropertyLayout(dset_json):
""" return layout json from creation property list """
cpl = None
def getDatasetLayout(dset_json):
""" Return layout json from creation property list or layout json """
layout = None

if "creationProperties" in dset_json:
cp = dset_json["creationProperties"]
if "layout" in cp:
cpl = cp["layout"]
if not cpl and "layout" in dset_json:
# fallback to dset_json layout
cpl = dset_json["layout"]
if cpl is None:
log.warn(f"no layout found for {dset_json}")
return cpl
layout = cp["layout"]
if not layout and "layout" in dset_json:
layout = dset_json["layout"]
if not layout:
log.warn(f"no layout for {dset_json}")
return layout


def getDatasetLayoutClass(dset_json):
""" return layout class """
chunk_layout = None
cp_layout = getDatasetCreationPropertyLayout(dset_json)
# check creation properties first
if cp_layout:
if "class" in cp_layout:
chunk_layout = cp_layout["class"]
# otherwise, get class prop from layout
if chunk_layout is None and "layout" in dset_json:
layout = dset_json["layout"]
if "class" in layout:
chunk_layout = layout["class"]
return chunk_layout
layout = getDatasetLayout(dset_json)
if layout and "class" in layout:
layout_class = layout["class"]
else:
layout_class = None
return layout_class


def getChunkDims(dset_json):
""" get chunk shape for given dset_json """
cpl = getDatasetCreationPropertyLayout(dset_json)
if cpl and "dims" in cpl:
return cpl["dims"]
# otherwise, check the 'layout' key
if 'layout' in dset_json:
layout = dset_json["layout"]
if "dims" in layout:
return layout["dims"]
return None # not found

layout = getDatasetLayout(dset_json)
if layout and "dims" in layout:
return layout["dims"]
else:
# H5D_COMPACT and H5D_CONTIGUOUS will not have a dims key
# Check the layout dict in dset_json to see if it's
# defined there
if "layout" in dset_json:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't this section redundant, since getDatasetLayout already checks for a layout directly in dset_json?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a bit hacky - the getDatasetLayout returns either the layout in CreationProperties or the layout key in dset_json. The idea is that the CreationProperties is just what the client passed in on POST dataset where the layout in dset_json is the actual layout used (e.g. HSDS may decide to use a larger chunksize). In the case here, we don't want CONTIGUOUS or COMPACT, so get dset_json["layout"] if that's the case.

layout = dset_json["layout"]
if "dims" in layout:
return layout["dims"]
return None


class ItemIterator:
Expand Down
22 changes: 17 additions & 5 deletions tests/integ/dataset_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -681,11 +681,23 @@ def testResizableDataset(self):
self.assertEqual(rsp.status_code, 201)
rspJson = json.loads(rsp.text)

# verify updated-shape using the GET shape request
rsp = self.session.get(req, headers=headers)
self.assertEqual(rsp.status_code, 200)
rspJson = json.loads(rsp.text)
self.assertTrue("shape" in rspJson)
shape = rspJson["shape"]
self.assertEqual(shape["class"], "H5S_SIMPLE")
self.assertEqual(len(shape["dims"]), 1)
self.assertEqual(shape["dims"][0], 15) # increased to 15
self.assertTrue("maxdims" in shape)
self.assertEqual(shape["maxdims"][0], 20)

# reduce the size to 5 elements
# payload = {"shape": 5}
# rsp = self.session.put(req, data=json.dumps(payload), headers=headers)
# self.assertEqual(rsp.status_code, 201)
# rspJson = json.loads(rsp.text)
payload = {"shape": 5}
rsp = self.session.put(req, data=json.dumps(payload), headers=headers)
self.assertEqual(rsp.status_code, 201)
rspJson = json.loads(rsp.text)

# verify updated-shape using the GET shape request
rsp = self.session.get(req, headers=headers)
Expand All @@ -695,7 +707,7 @@ def testResizableDataset(self):
shape = rspJson["shape"]
self.assertEqual(shape["class"], "H5S_SIMPLE")
self.assertEqual(len(shape["dims"]), 1)
self.assertEqual(shape["dims"][0], 15) # increased to 15
self.assertEqual(shape["dims"][0], 5) # decreased to 5
self.assertTrue("maxdims" in shape)
self.assertEqual(shape["maxdims"][0], 20)

Expand Down
Loading