diff --git a/README.md b/README.md index c11b2fc9..07b09f87 100755 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ Make sure you have Python 3 and Pip installed, then: - Set user_name: `$ export USER_NAME=$USER` - Set user_password: `$ export USER_PASSWORD=$USER` - Set admin name: `$ export ADMIN_USERNAME=$USER` - - Set admin password: `$ $export ADMIN_PASSWORD=$USER` + - Set admin password: `$ export ADMIN_PASSWORD=$USER` - Run test suite: `$ python testall.py --skip_unit` 5. (Optional) Install the h5pyd package for an h5py compatible api and tool suite: https://github.com/HDFGroup/h5pyd 6. (Optional) Post install setup (test data, home folders, cli tools, etc): [docs/post_install.md](docs/post_install.md) diff --git a/admin/config/config.yml b/admin/config/config.yml index d690ba7e..92718250 100755 --- a/admin/config/config.yml +++ b/admin/config/config.yml @@ -76,7 +76,6 @@ http_streaming: true # enable HTTP streaming k8s_dn_label_selector: app=hsds # Selector for getting data node pods from a k8s deployment (https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#label-selectors) k8s_namespace: null # Specifies if a the client should be limited to a specific namespace. Useful for some RBAC configurations. restart_policy: on-failure # Docker restart policy -domain_req_max_objects_limit: 500 # maximum number of objects to return in GET domain request with use_cache # the following two values with give backoff times of approx: 0.2, 0.4, 0.8, 1.6, 3.2, 6.4, 12.8 dn_max_retries: 7 # number of time to retry DN requests dn_retry_backoff_exp: 0.1 # backoff factor for retries @@ -99,3 +98,4 @@ data_cache_max_req_size: 128k # max size for rangeget fetches data_cache_expire_time: 3600 # expire cache items after one hour data_cache_page_size: 4m # page size for range get cache, set to zero to disable proxy data_cache_max_concurrent_read: 16 # maximum number of inflight storage read requests +domain_req_max_objects_limit: 500 # maximum number of objects to return in GET domain request with use_cache diff --git a/hsds/attr_dn.py b/hsds/attr_dn.py index d80ca322..1b5693c8 100755 --- a/hsds/attr_dn.py +++ b/hsds/attr_dn.py @@ -15,15 +15,16 @@ import time from bisect import bisect_left -from aiohttp.web_exceptions import HTTPBadRequest, HTTPConflict, HTTPNotFound +from aiohttp.web_exceptions import HTTPBadRequest, HTTPConflict, HTTPNotFound, HTTPGone from aiohttp.web_exceptions import HTTPInternalServerError from aiohttp.web import json_response -from .util.attrUtil import validateAttributeName +from .util.attrUtil import validateAttributeName, isEqualAttr from .util.hdf5dtype import getItemSize, createDataType +from .util.globparser import globmatch from .util.dsetUtil import getShapeDims from .util.arrayUtil import arrayToBytes, jsonToArray, decodeData -from .util.arrayUtil import bytesToArray, bytesArrayToList +from .util.arrayUtil import bytesToArray, bytesArrayToList, getNumElements from .datanode_lib import get_obj_id, get_metadata_obj, save_metadata_obj from . import hsds_logger as log @@ -43,7 +44,7 @@ def _index(items, marker, create_order=False): return -1 -def _getAttribute(attr_name, obj_json, include_data=True, encoding=None): +def _getAttribute(attr_name, obj_json, include_data=True, max_data_size=0, encoding=None): """ copy relevant fields from src to target """ if not isinstance(obj_json, dict): @@ -89,6 +90,26 @@ def _getAttribute(attr_name, obj_json, include_data=True, encoding=None): encoding = None log.debug("base64 encoding requested") + if include_data and max_data_size > 0: + # check that the size of the data is not greater than the limit + item_size = getItemSize(type_json) + if item_size == "H5T_VARIABLE": + # could be anything, just guess as 512 bytes per element + # TBD: determine exact size + item_size = 512 + dims = getShapeDims(shape_json) + num_elements = getNumElements(dims) + attr_size = item_size * num_elements + if attr_size > max_data_size: + msg = f"{attr_name} size of {attr_size} is " + msg += "larger than max_data_size, excluding data" + log.debug(msg) + include_data = False + else: + msg = f"{attr_name} size of {attr_size} is " + msg += "not larger than max_data_size, including data" + log.debug(msg) + if include_data: value_json = src_attr["value"] if "encoding" in src_attr: @@ -143,11 +164,18 @@ async def GET_Attributes(request): if params.get("IncludeData"): include_data = True + max_data_size = 0 + if params.get("max_data_size"): + max_data_size = int(params["max_data_size"]) + pattern = None + if params.get("pattern"): + pattern = params["pattern"] + limit = None if "Limit" in params: try: limit = int(params["Limit"]) - log.info("GET_Links - using Limit: {}".format(limit)) + log.info(f"GET_Attributes - using Limit: {limit}") except ValueError: msg = "Bad Request: Expected int type for limit" log.error(msg) # should be validated by SN @@ -204,7 +232,14 @@ async def GET_Attributes(request): attr_list = [] for i in range(start_index, end_index): attr_name = titles[i] + if pattern: + if not globmatch(attr_name, pattern): + log.debug(f"attr_name: {attr_name} did not match pattern: {pattern}") + continue + kwargs = {"include_data": include_data, "encoding": encoding} + if include_data: + kwargs["max_data_size"] = max_data_size log.debug(f"_getAttribute kwargs: {kwargs}") des_attr = _getAttribute(attr_name, obj_json, **kwargs) attr_list.append(des_attr) @@ -249,6 +284,9 @@ async def POST_Attributes(request): if "IncludeData" in params and params["IncludeData"]: include_data = True log.debug("include attr data") + max_data_size = 0 + if params.get("max_data_size"): + max_data_size = int(params["max_data_size"]) if params.get("encoding"): encoding = params["encoding"] log.debug("POST_Attributes requested base64 encoding") @@ -269,22 +307,34 @@ async def POST_Attributes(request): kwargs = {"include_data": include_data} if encoding: kwargs["encoding"] = encoding + if max_data_size > 0: + kwargs["max_data_size"] = max_data_size + + missing_names = set() for attr_name in titles: if attr_name not in attr_dict: + missing_names.add(attr_name) continue des_attr = _getAttribute(attr_name, obj_json, **kwargs) attr_list.append(des_attr) resp_json = {"attributes": attr_list} - if not attr_list: - msg = f"POST attributes - requested {len(titles)} but none were found" - log.warn(msg) - raise HTTPNotFound() - if len(attr_list) != len(titles): + + if missing_names: msg = f"POST attributes - requested {len(titles)} attributes but only " msg += f"{len(attr_list)} were found" log.warn(msg) + # one or more attributes not found, check to see if any + # had been previously deleted + deleted_attrs = app["deleted_attrs"] + if obj_id in deleted_attrs: + attr_delete_set = deleted_attrs[obj_id] + for attr_name in missing_names: + if attr_name in attr_delete_set: + log.info(f"attribute: {attr_name} was previously deleted, returning 410") + raise HTTPGone() + log.info("one or mores attributes not found, returning 404") raise HTTPNotFound() log.debug(f"POST attributes returning: {resp_json}") resp = json_response(resp_json) @@ -392,18 +442,28 @@ async def PUT_Attributes(request): attributes = obj_json["attributes"] - # check for conflicts, also set timestamp create_time = time.time() - new_attribute = False # set this if we have any new attributes + # check for conflicts + new_attributes = set() # attribute names that are new or replacements for attr_name in items: attribute = items[attr_name] if attr_name in attributes: log.debug(f"attribute {attr_name} exists") - if replace: + old_item = attributes[attr_name] + try: + is_dup = isEqualAttr(attribute, old_item) + except TypeError: + log.error(f"isEqualAttr TypeError - new: {attribute} old: {old_item}") + raise HTTPInternalServerError() + if is_dup: + log.debug(f"duplicate attribute: {attr_name}") + continue + elif replace: # don't change the create timestamp log.debug(f"attribute {attr_name} exists, but will be updated") old_item = attributes[attr_name] attribute["created"] = old_item["created"] + new_attributes.add(attr_name) else: # Attribute already exists, return a 409 msg = f"Attempt to overwrite attribute: {attr_name} " @@ -414,18 +474,30 @@ async def PUT_Attributes(request): # set the timestamp log.debug(f"new attribute {attr_name}") attribute["created"] = create_time - new_attribute = True + new_attributes.add(attr_name) - # ok - all set, create the attributes - for attr_name in items: + # if any of the attribute names was previously deleted, + # remove from the deleted set + deleted_attrs = app["deleted_attrs"] + if obj_id in deleted_attrs: + attr_delete_set = deleted_attrs[obj_id] + else: + attr_delete_set = set() + + # ok - all set, add the attributes + for attr_name in new_attributes: log.debug(f"adding attribute {attr_name}") attr_json = items[attr_name] attributes[attr_name] = attr_json - - # write back to S3, save to metadata cache - await save_metadata_obj(app, obj_id, obj_json, bucket=bucket) - - if new_attribute: + if attr_name in attr_delete_set: + attr_delete_set.remove(attr_name) + + if new_attributes: + # update the obj lastModified + now = time.time() + obj_json["lastModified"] = now + # write back to S3, save to metadata cache + await save_metadata_obj(app, obj_id, obj_json, bucket=bucket) status = 201 else: status = 200 @@ -490,15 +562,35 @@ async def DELETE_Attributes(request): # return a list of attributes based on sorted dictionary keys attributes = obj_json["attributes"] + # add attribute names to deleted set, so we can return a 410 if they + # are requested in the future + deleted_attrs = app["deleted_attrs"] + if obj_id in deleted_attrs: + attr_delete_set = deleted_attrs[obj_id] + else: + attr_delete_set = set() + deleted_attrs[obj_id] = attr_delete_set + + save_obj = False # set to True if anything is actually modified for attr_name in attr_names: + if attr_name in attr_delete_set: + log.warn(f"attribute {attr_name} already deleted") + continue + if attr_name not in attributes: - msg = f"Attribute {attr_name} not found in objid: {obj_id}" + msg = f"Attribute {attr_name} not found in obj id: {obj_id}" log.warn(msg) raise HTTPNotFound() del attributes[attr_name] - - await save_metadata_obj(app, obj_id, obj_json, bucket=bucket) + attr_delete_set.add(attr_name) + save_obj = True + + if save_obj: + # update the object lastModified + now = time.time() + obj_json["lastModified"] = now + await save_metadata_obj(app, obj_id, obj_json, bucket=bucket) resp_json = {} resp = json_response(resp_json) diff --git a/hsds/attr_sn.py b/hsds/attr_sn.py index 579ae7ff..6f4ebd8e 100755 --- a/hsds/attr_sn.py +++ b/hsds/attr_sn.py @@ -18,9 +18,9 @@ from aiohttp.web import StreamResponse from json import JSONDecodeError -from .util.httpUtil import getHref -from .util.httpUtil import getAcceptType, jsonResponse -from .util.idUtil import isValidUuid, getCollectionForId, getRootObjId +from .util.httpUtil import getAcceptType, jsonResponse, getHref, getBooleanParam +from .util.globparser import globmatch +from .util.idUtil import isValidUuid, getRootObjId from .util.authUtil import getUserPasswordFromRequest, validateUserPassword from .util.domainUtil import getDomainFromRequest, isValidDomain from .util.domainUtil import getBucketForDomain, verifyRoot @@ -57,25 +57,38 @@ async def GET_Attributes(request): log.warn(msg) raise HTTPBadRequest(reason=msg) - kwargs = {} + domain = getDomainFromRequest(request) + if not isValidDomain(domain): + msg = f"Invalid domain: {domain}" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + + bucket = getBucketForDomain(domain) + log.debug(f"bucket: {bucket}") + + follow_links = getBooleanParam(params, "follow_links") + if follow_links and collection != "groups": + msg = "follow_links can only be used with group ids" + log.warn(msg) + raise HTTPBadRequest(reason=msg) - ignore_nan = False - include_data = True - if "IncludeData" in params: - IncludeData = params["IncludeData"] - if not IncludeData or IncludeData == "0": - include_data = False - kwargs["include_data"] = False + log.debug(f"getAttributes follow_links: {follow_links}") + include_data = getBooleanParam(params, "IncludeData") log.debug(f"include_data: {include_data}") - if "ignore_nan" in params and params["ignore_nan"]: - ignore_nan = True - kwargs["ignore_nan"] = True + if "max_data_size" in params: + try: + max_data_size = int(params["max_data_size"]) + except ValueError: + msg = "expected int for max_data_size" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + else: + max_data_size = 0 - if "CreateOrder" in params and params["CreateOrder"]: - kwargs["create_order"] = True + ignore_nan = getBooleanParam(params, "ignore_nan") + create_order = getBooleanParam(params, "CreateOrder") - limit = None if "Limit" in params: try: limit = int(params["Limit"]) @@ -83,11 +96,33 @@ async def GET_Attributes(request): msg = "Bad Request: Expected int type for limit" log.warn(msg) raise HTTPBadRequest(reason=msg) - kwargs["limit"] = limit - marker = None + else: + limit = None if "Marker" in params: marker = params["Marker"] - kwargs["marker"] = marker + else: + marker = None + if "encoding" in params: + encoding = params["encoding"] + if params["encoding"] != "base64": + msg = "only base64 encoding is supported" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + encoding = "base64" + else: + encoding = None + + if "pattern" in params and params["pattern"]: + pattern = params["pattern"] + try: + globmatch("abc", pattern) + except ValueError: + msg = f"invlaid pattern: {pattern} for attribute matching" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + log.debug(f"using pattern: {pattern} for GET_Attributes") + else: + pattern = None username, pswd = getUserPasswordFromRequest(request) if username is None and app["allow_noauth"]: @@ -95,27 +130,60 @@ async def GET_Attributes(request): else: await validateUserPassword(app, username, pswd) - domain = getDomainFromRequest(request) - if not isValidDomain(domain): - msg = f"Invalid domain: {domain}" - log.warn(msg) - raise HTTPBadRequest(reason=msg) - bucket = getBucketForDomain(domain) - log.debug(f"bucket: {bucket}") - kwargs["bucket"] = bucket - - # TBD - verify that the obj_id belongs to the given domain await validateAction(app, domain, obj_id, username, "read") - attributes = await getAttributes(app, obj_id, **kwargs) - - log.debug(f"got attributes json from dn for obj_id: {obj_id}") + if follow_links: + # setup kwargs for DomainCrawler + kwargs = {"action": "get_attr", "follow_links": True, "bucket": bucket} + # mixin params + if include_data: + kwargs["include_data"] = True + if max_data_size > 0: + kwargs["max_data_size"] = max_data_size + if ignore_nan: + kwargs["ignore_nan"] = True + if limit: + kwargs["limit"] = limit + if encoding: + kwargs["encoding"] = encoding + if pattern: + kwargs["pattern"] = pattern + if create_order: + kwargs["create_order"] = True + items = [obj_id, ] + crawler = DomainCrawler(app, items, **kwargs) + # will raise exception on NotFound, etc. + await crawler.crawl() + attributes = crawler._obj_dict + msg = f"DomainCrawler returned: {len(attributes)} objects" + log.info(msg) + else: + # just get attributes for this objects + kwargs = {"bucket": bucket} + if include_data: + kwargs["include_data"] = True + if max_data_size > 0: + kwargs["max_data_size"] = max_data_size + if ignore_nan: + kwargs["ignore_nan"] = True + if limit: + kwargs["limit"] = limit + if marker: + kwargs["marker"] = marker + if encoding: + kwargs["encoding"] = encoding + if pattern: + kwargs["pattern"] = pattern + if create_order: + kwargs["create_order"] = True + attributes = await getAttributes(app, obj_id, **kwargs) + log.debug(f"got attributes json from dn for obj_id: {obj_id}") - # mixin hrefs - for attribute in attributes: - attr_name = attribute["name"] - attr_href = f"/{collection}/{obj_id}/attributes/{attr_name}" - attribute["href"] = getHref(request, attr_href) + # mixin hrefs + for attribute in attributes: + attr_name = attribute["name"] + attr_href = f"/{collection}/{obj_id}/attributes/{attr_name}" + attribute["href"] = getHref(request, attr_href) resp_json = {} resp_json["attributes"] = attributes @@ -166,18 +234,15 @@ async def GET_Attribute(request): raise HTTPBadRequest(reason=msg) bucket = getBucketForDomain(domain) - # TBD - verify that the obj_id belongs to the given domain await validateAction(app, domain, obj_id, username, "read") - if "ignore_nan" in params and params["ignore_nan"]: - ignore_nan = True - else: - ignore_nan = False + ignore_nan = getBooleanParam(params, "ignore_nan") - if "IncludeData" in params and not params["IncludeData"]: - include_data = False - else: + if "IncludeData" not in params: + # this boolean param breaks our usual rule of default False include_data = True + else: + include_data = getBooleanParam(params, "IncludeData") if params.get("encoding"): if params["encoding"] != "base64": @@ -518,7 +583,6 @@ async def PUT_Attribute(request): domain_json = await getDomainJson(app, domain) verifyRoot(domain_json) - # TBD - verify that the obj_id belongs to the given domain await validateAction(app, domain, req_obj_id, username, "create") # get attribute from request body @@ -539,8 +603,7 @@ async def PUT_Attribute(request): status = await putAttributes(app, req_obj_id, attr_json, **kwargs) log.info(f"PUT Attributes status: {status}") - hrefs = [] # TBD - req_rsp = {"hrefs": hrefs} + req_rsp = {} # attribute creation successful resp = await jsonResponse(request, req_rsp, status=status) log.response(request, resp=resp) @@ -564,7 +627,6 @@ async def PUT_Attributes(request): msg = "PUT Attribute with no body" log.warn(msg) raise HTTPBadRequest(reason=msg) - try: body = await request.json() except JSONDecodeError: @@ -579,6 +641,10 @@ async def PUT_Attributes(request): raise HTTPBadRequest(reason=msg) bucket = getBucketForDomain(domain) log.debug(f"got bucket: {bucket}") + if "replace" in params and params["replace"]: + replace = True + else: + replace = False # get domain JSON domain_json = await getDomainJson(app, domain) @@ -656,13 +722,8 @@ async def PUT_Attributes(request): log.debug(f"got {len(obj_ids)} obj_ids") - # TBD - verify that the obj_id belongs to the given domain await validateAction(app, domain, req_obj_id, username, "create") - kwargs = {"bucket": bucket} - if params.get("replace"): - kwargs["replace"] = True - count = len(obj_ids) if count == 0: msg = "no obj_ids defined" @@ -673,18 +734,17 @@ async def PUT_Attributes(request): obj_id = list(obj_ids.keys())[0] attr_json = obj_ids[obj_id] log.debug(f"got attr_json: {attr_json}") + kwargs = {"bucket": bucket, "attr_json": attr_json} + if replace: + kwargs["replace"] = True - status = await putAttributes(app, obj_id, attr_json, **kwargs) + status = await putAttributes(app, obj_id, **kwargs) else: # put multi obj - - # mixin some additonal kwargs - crawler_params = {"follow_links": False} - if bucket: - crawler_params["bucket"] = bucket - - kwargs = {"action": "put_attr", "raise_error": True, "params": crawler_params} + kwargs = {"action": "put_attr", "bucket": bucket} + if replace: + kwargs["replace"] = True crawler = DomainCrawler(app, obj_ids, **kwargs) # will raise exception on not found, server busy, etc. @@ -694,8 +754,7 @@ async def PUT_Attributes(request): log.info("DomainCrawler done for put_attrs action") - hrefs = [] # TBD - req_rsp = {"hrefs": hrefs} + req_rsp = {} # attribute creation successful log.debug(f"PUT_Attributes returning status: {status}") resp = await jsonResponse(request, req_rsp, status=status) @@ -737,7 +796,6 @@ async def DELETE_Attribute(request): domain_json = await getDomainJson(app, domain) verifyRoot(domain_json) - # TBD - verify that the obj_id belongs to the given domain await validateAction(app, domain, obj_id, username, "delete") attr_names = [attr_name, ] @@ -745,8 +803,7 @@ async def DELETE_Attribute(request): await deleteAttributes(app, obj_id, **kwargs) - hrefs = [] # TBD - req_rsp = {"hrefs": hrefs} + req_rsp = {} resp = await jsonResponse(request, req_rsp) log.response(request, resp=resp) return resp @@ -789,14 +846,11 @@ async def GET_AttributeValue(request): domain_json = await getDomainJson(app, domain) verifyRoot(domain_json) - # TBD - verify that the obj_id belongs to the given domain await validateAction(app, domain, obj_id, username, "read") params = request.rel_url.query - if "ignore_nan" in params and params["ignore_nan"]: - ignore_nan = True - else: - ignore_nan = False + ignore_nan = getBooleanParam(params, "ignore_nan") + if "encoding" in params: encoding = params["encoding"] if encoding and encoding != "base64": @@ -807,7 +861,7 @@ async def GET_AttributeValue(request): encoding = None attr_names = [attr_name, ] - kwargs = {"attr_names": attr_names, "bucket": bucket} + kwargs = {"attr_names": attr_names, "bucket": bucket, "include_data": True} if ignore_nan: kwargs["ignore_nan"] = True @@ -967,7 +1021,6 @@ async def PUT_AttributeValue(request): domain_json = await getDomainJson(app, domain) verifyRoot(domain_json) - # TBD - verify that the obj_id belongs to the given domain await validateAction(app, domain, obj_id, username, "update") attr_names = [attr_name, ] @@ -1097,8 +1150,7 @@ async def PUT_AttributeValue(request): else: log.info("PUT AttributesValue status: 200") - hrefs = [] # TBD - req_rsp = {"hrefs": hrefs} + req_rsp = {} # attribute creation successful resp = await jsonResponse(request, req_rsp) log.response(request, resp=resp) @@ -1106,7 +1158,7 @@ async def PUT_AttributeValue(request): async def POST_Attributes(request): - """HTTP method to get multiple attribute values""" + """HTTP method to get multiple attributes """ log.request(request) app = request.app log.info("POST_Attributes") @@ -1213,16 +1265,21 @@ async def POST_Attributes(request): domain_json = await getDomainJson(app, domain) verifyRoot(domain_json) - # TBD - verify that the obj_id belongs to the given domain await validateAction(app, domain, obj_id, username, "read") params = request.rel_url.query log.debug(f"got params: {params}") - include_data = True - if "IncludeData" in params: - IncludeData = params["IncludeData"] - if not IncludeData or IncludeData == "0": - include_data = False + include_data = False + max_data_size = 0 + include_data = getBooleanParam(params, "IncludeData") + log.debug(f"include_data: {include_data}") + if "max_data_size" in params: + try: + max_data_size = int(params["max_data_size"]) + except ValueError: + msg = "expected int for max_data_size" + log.warn(msg) + raise HTTPBadRequest(reason=msg) if params.get("ignore_nan"): ignore_nan = True @@ -1247,40 +1304,37 @@ async def POST_Attributes(request): elif len(items) == 1: # just make a request the datanode obj_id = list(items.keys())[0] - collection = getCollectionForId(obj_id) attr_names = items[obj_id] kwargs = {"attr_names": attr_names, "bucket": bucket} - if not include_data: - kwargs["include_data"] = False + if include_data: + log.debug("setting include_data to True") + kwargs["include_data"] = True + if max_data_size > 0: + kwargs["max_data_size"] = max_data_size if ignore_nan: kwargs["ignore_nan"] = True if encoding: kwargs["encoding"] = encoding - + log.debug(f"getAttributes kwargs: {kwargs}") attributes = await getAttributes(app, obj_id, **kwargs) - # mixin hrefs - for attribute in attributes: - attr_name = attribute["name"] - attr_href = f"/{collection}/{obj_id}/attributes/{attr_name}" - attribute["href"] = getHref(request, attr_href) - resp_json["attributes"] = attributes else: # get multi obj # don't follow links! - crawler_params = {"follow_links": False, "bucket": bucket} - # mixin params - if not include_data: - crawler_params["include_data"] = False - + kwargs = {"action": "get_attr", "bucket": bucket, "follow_links": False} + kwargs["include_attrs"] = True + if include_data: + log.debug("setting include_data to True") + kwargs["include_data"] = True + if max_data_size > 0: + kwargs["max_data_size"] = max_data_size if ignore_nan: - crawler_params["ignore_nan"] = True - + kwargs["ignore_nan"] = True if encoding: - crawler_params["encoding"] = encoding - - kwargs = {"action": "get_attr", "raise_error": True, "params": crawler_params} + pass + # TBD: crawler_params["encoding"] = encoding + log.debug(f"DomainCrawler kwargs: {kwargs}") crawler = DomainCrawler(app, items, **kwargs) # will raise exception on NotFound, etc. await crawler.crawl() @@ -1288,31 +1342,16 @@ async def POST_Attributes(request): msg = f"DomainCrawler returned: {len(crawler._obj_dict)} objects" log.info(msg) attributes = crawler._obj_dict - # mixin hrefs + # log attributes returned for each obj_id for obj_id in attributes: obj_attributes = attributes[obj_id] msg = f"POST_Attributes, obj_id {obj_id} " msg += f"returned {len(obj_attributes)}" log.debug(msg) - collection = getCollectionForId(obj_id) - for attribute in obj_attributes: - log.debug(f"attribute: {attribute}") - attr_name = attribute["name"] - attr_href = f"/{collection}/{obj_id}/attributes/{attr_name}" - attribute["href"] = getHref(request, attr_href) log.debug(f"got {len(attributes)} attributes") resp_json["attributes"] = attributes - hrefs = [] - collection = getCollectionForId(req_id) - obj_uri = "/" + collection + "/" + req_id - href = getHref(request, obj_uri + "/attributes") - hrefs.append({"rel": "self", "href": href}) - hrefs.append({"rel": "home", "href": getHref(request, "/")}) - hrefs.append({"rel": "owner", "href": getHref(request, obj_uri)}) - resp_json["hrefs"] = hrefs - resp = await jsonResponse(request, resp_json, ignore_nan=ignore_nan) log.response(request, resp=resp) return resp @@ -1383,7 +1422,6 @@ async def DELETE_Attributes(request): domain_json = await getDomainJson(app, domain) verifyRoot(domain_json) - # TBD - verify that the obj_id belongs to the given domain await validateAction(app, domain, obj_id, username, "delete") kwargs = {"attr_names": attr_names, "bucket": bucket, "separator": separator} diff --git a/hsds/basenode.py b/hsds/basenode.py index 4ae3dfed..c318a675 100644 --- a/hsds/basenode.py +++ b/hsds/basenode.py @@ -33,7 +33,7 @@ from .util.k8sClient import getDnLabelSelector, getPodIps from . import hsds_logger as log -HSDS_VERSION = "0.8.5" +HSDS_VERSION = "0.9.0.alpha0" def getVersion(): diff --git a/hsds/chunk_crawl.py b/hsds/chunk_crawl.py index b131ca9b..426cb169 100755 --- a/hsds/chunk_crawl.py +++ b/hsds/chunk_crawl.py @@ -81,9 +81,6 @@ async def write_chunk_hyperslab( np_arr: numpy array of data to be written """ - if not bucket: - bucket = config.get("bucket_name") - msg = f"write_chunk_hyperslab, chunk_id: {chunk_id}, slices: {slices}, " msg += f"bucket: {bucket}" log.info(msg) @@ -181,8 +178,6 @@ async def read_chunk_hyperslab( entire object) bucket: s3 bucket to read from """ - if not bucket: - bucket = config.get("bucket_name") if chunk_map is None: log.error("expected chunk_map to be set") @@ -444,9 +439,6 @@ async def read_point_sel( arr: numpy array to store read bytes """ - if not bucket: - bucket = config.get("bucket_name") - msg = f"read_point_sel, chunk_id: {chunk_id}, bucket: {bucket}" log.info(msg) @@ -549,9 +541,6 @@ async def write_point_sel( point_data: index of arr element to update for a given point """ - if not bucket: - bucket = config.get("bucket_name") - msg = f"write_point_sel, chunk_id: {chunk_id}, points: {point_list}, " msg += f"data: {point_data}" log.info(msg) diff --git a/hsds/ctype_sn.py b/hsds/ctype_sn.py index f3d0236e..ad1941e7 100755 --- a/hsds/ctype_sn.py +++ b/hsds/ctype_sn.py @@ -16,7 +16,7 @@ from aiohttp.web_exceptions import HTTPBadRequest, HTTPGone from json import JSONDecodeError -from .util.httpUtil import http_post, http_put, http_delete, getHref, respJsonAssemble +from .util.httpUtil import http_post, getHref, respJsonAssemble from .util.httpUtil import jsonResponse from .util.idUtil import isValidUuid, getDataNodeUrl, createObjId from .util.authUtil import getUserPasswordFromRequest, aclCheck @@ -24,8 +24,8 @@ from .util.domainUtil import getDomainFromRequest, getPathForDomain, isValidDomain from .util.domainUtil import getBucketForDomain, verifyRoot from .util.hdf5dtype import validateTypeItem, getBaseTypeJson -from .servicenode_lib import getDomainJson, getObjectJson, validateAction -from .servicenode_lib import getObjectIdByPath, getPathForObjectId +from .servicenode_lib import getDomainJson, getObjectJson, validateAction, deleteObj +from .servicenode_lib import getObjectIdByPath, getPathForObjectId, putHardLink from . import hsds_logger as log @@ -223,22 +223,13 @@ async def POST_Datatype(request): ctype_json = {"id": ctype_id, "root": root_id, "type": datatype} log.debug(f"create named type, body: {ctype_json}") req = getDataNodeUrl(app, ctype_id) + "/datatypes" - params = {} - if bucket: - params["bucket"] = bucket + params = {"bucket": bucket} type_json = await http_post(app, req, data=ctype_json, params=params) # create link if requested if link_id and link_title: - link_json = {} - link_json["id"] = ctype_id - link_json["class"] = "H5L_TYPE_HARD" - link_req = getDataNodeUrl(app, link_id) - link_req += "/groups/" + link_id + "/links/" + link_title - log.debug("PUT link - : " + link_req) - put_rsp = await http_put(app, link_req, data=link_json, params=params) - log.debug(f"PUT Link resp: {put_rsp}") + await putHardLink(app, link_id, link_title, tgt_id=ctype_id, bucket=bucket) # datatype creation successful resp = await jsonResponse(request, type_json, status=201) @@ -251,8 +242,6 @@ async def DELETE_Datatype(request): """HTTP method to delete a committed type resource""" log.request(request) app = request.app - meta_cache = app["meta_cache"] - ctype_id = request.match_info.get("id") if not ctype_id: msg = "Missing committed type id" @@ -280,15 +269,9 @@ async def DELETE_Datatype(request): domain_json = await getDomainJson(app, domain) verifyRoot(domain_json) - # TBD - verify that the obj_id belongs to the given domain await validateAction(app, domain, ctype_id, username, "delete") - req = getDataNodeUrl(app, ctype_id) + "/datatypes/" + ctype_id - - await http_delete(app, req, params=params) - - if ctype_id in meta_cache: - del meta_cache[ctype_id] # remove from cache + await deleteObj(app, ctype_id, bucket=bucket) resp = await jsonResponse(request, {}) log.response(request, resp=resp) diff --git a/hsds/datanode.py b/hsds/datanode.py index 1efd9063..50bb0307 100644 --- a/hsds/datanode.py +++ b/hsds/datanode.py @@ -30,7 +30,7 @@ from .domain_dn import GET_Domain, PUT_Domain, DELETE_Domain, PUT_ACL from .group_dn import GET_Group, POST_Group, DELETE_Group, PUT_Group from .group_dn import POST_Root -from .link_dn import GET_Links, GET_Link, PUT_Link, DELETE_Link +from .link_dn import GET_Links, POST_Links, PUT_Links, DELETE_Links from .attr_dn import GET_Attributes, POST_Attributes from .attr_dn import PUT_Attributes, DELETE_Attributes from .ctype_dn import GET_Datatype, POST_Datatype, DELETE_Datatype @@ -59,9 +59,9 @@ async def init(): app.router.add_route("PUT", "/groups/{id}", PUT_Group) app.router.add_route("POST", "/groups", POST_Group) app.router.add_route("GET", "/groups/{id}/links", GET_Links) - app.router.add_route("GET", "/groups/{id}/links/{title}", GET_Link) - app.router.add_route("DELETE", "/groups/{id}/links/{title}", DELETE_Link) - app.router.add_route("PUT", "/groups/{id}/links/{title}", PUT_Link) + app.router.add_route("POST", "/groups/{id}/links", POST_Links) + app.router.add_route("DELETE", "/groups/{id}/links", DELETE_Links) + app.router.add_route("PUT", "/groups/{id}/links", PUT_Links) app.router.add_route("GET", "/groups/{id}/attributes", GET_Attributes) app.router.add_route("POST", "/groups/{id}/attributes", POST_Attributes) app.router.add_route("DELETE", "/groups/{id}/attributes", DELETE_Attributes) @@ -299,6 +299,8 @@ def create_app(): } app["chunk_cache"] = LruCache(**kwargs) app["deleted_ids"] = set() + app["deleted_attrs"] = {} # map of objectid to set of deleted attribute names + app["deleted_links"] = {} # map of objecctid to set of deleted link names # map of objids to timestamp and bucket of which they were last updated app["dirty_ids"] = {} # map of dataset ids to deflate levels (if compressed) diff --git a/hsds/domain_crawl.py b/hsds/domain_crawl.py index 84ee7a87..119ac442 100644 --- a/hsds/domain_crawl.py +++ b/hsds/domain_crawl.py @@ -18,10 +18,10 @@ from aiohttp.web_exceptions import HTTPServiceUnavailable, HTTPConflict, HTTPBadRequest from aiohttp.web_exceptions import HTTPInternalServerError, HTTPNotFound, HTTPGone - +from .util.httpUtil import isOK from .util.idUtil import getCollectionForId, getDataNodeUrl - -from .servicenode_lib import getObjectJson, getAttributes, putAttributes +from .util.globparser import globmatch +from .servicenode_lib import getObjectJson, getAttributes, putAttributes, getLinks, putLinks from . import hsds_logger as log @@ -31,26 +31,49 @@ def __init__( app, objs, action="get_obj", - params=None, + bucket=None, + follow_links=False, + include_links=False, + include_attrs=False, + include_data=False, + max_data_size=0, + ignore_nan=False, + encoding=None, + create_order=False, + pattern=None, + limit=None, + replace=False, + ignore_error=False, max_tasks=40, - max_objects_limit=0, - raise_error=False + max_objects_limit=0 ): - log.info(f"DomainCrawler.__init__ root_id: {len(objs)} objs") - log.debug(f"params: {params}") + log.info(f"DomainCrawler.__init__ action: {action} - {len(objs)} objs") self._app = app self._action = action self._max_objects_limit = max_objects_limit - self._params = params + self._follow_links = follow_links + self._include_links = include_links + self._include_attrs = include_attrs + self._include_data = include_data + self._max_data_size = max_data_size + self._ignore_nan = ignore_nan + self._encoding = encoding + self._create_order = create_order + self._pattern = pattern + self._limit = limit + self._replace = replace self._max_tasks = max_tasks self._q = asyncio.Queue() self._obj_dict = {} self.seen_ids = set() - self._raise_error = raise_error + self._ignore_error = ignore_error if not objs: log.error("no objs for crawler to crawl!") raise ValueError() - + if not bucket: + log.error("bucket not set for DomainCrawler") + raise ValueError() + self._bucket = bucket for obj_id in objs: log.debug(f"adding {obj_id} to the queue") self._q.put_nowait(obj_id) @@ -59,6 +82,52 @@ def __init__( else: self._objs = None + def follow_links(self, grp_id, links): + # add any linked obj ids to the lookup ids set + log.debug(f"follow links for {grp_id}, links: {links}") + if getCollectionForId(grp_id) != "groups": + log.warn(f"expected group id but got: {grp_id}") + return + link_count = 0 + for link in links: + log.debug(f"DomainCrawler - follow links for: {link}") + if isinstance(link, str): + # we were passed a dict of link titles to link_jsons + title = link + link_obj = links[title] + else: + # were passed a list of link jsons + if "title" not in link: + log.warn(f"expected to find title key in link: {link}") + continue + title = link["title"] + link_obj = link + log.debug(f"link {title}: {link_obj}") + if link_obj["class"] != "H5L_TYPE_HARD": + # just follow hardlinks + log.debug("not hard link, continue") + continue + link_id = link_obj["id"] + link_collection = getCollectionForId(link_id) + if self._action in ("get_link", "put_link") and link_collection != "groups": + # only groups can have links + log.debug(f"link id: {link_id} is not for a group, continue") + continue + num_objects = len(self._obj_dict) + if self._max_objects_limit and num_objects >= self._max_objects_limit: + msg = f"DomainCrawler reached limit of {self._max_objects_limit}" + log.info(msg) + break + if link_id not in self._obj_dict: + # haven't seen this object yet, get obj json + log.debug(f"DomainCrawler - adding link_id: {link_id} to queue") + self._obj_dict[link_id] = {} # placeholder for obj id + self._q.put_nowait(link_id) + link_count += 1 + else: + log.debug(f"link: {link_id} already in object dict") + log.debug(f"follow links done, added {link_count} ids to queue") + async def get_attributes(self, obj_id, attr_names): # get the given attributes for the obj_id msg = f"get_attributes for {obj_id}" @@ -66,12 +135,25 @@ async def get_attributes(self, obj_id, attr_names): msg += f", {len(attr_names)} attributes" log.debug(msg) - kwargs = {} - for key in ("include_data", "ignore_nan", "bucket"): - if key in self._params: - kwargs[key] = self._params[key] + kwargs = {"bucket": self._bucket} + if self._include_data: + kwargs["include_data"] = True + if self._ignore_nan: + kwargs["ignore_nan"] = True + if self._encoding: + kwargs["encoding"] = self._encoding if attr_names: kwargs["attr_names"] = attr_names + else: + # only apply these parameters if we are attempting to fetch all links + if self._limit: + kwargs["limit"] = self._limit + if self._create_order: + kwargs["create_order"] = True + if self._pattern: + kwargs["pattern"] = self._pattern + if self._max_data_size > 0: + kwargs["max_data_size"] = self._max_data_size log.debug(f"using kwargs: {kwargs}") status = 200 @@ -93,24 +175,46 @@ async def get_attributes(self, obj_id, attr_names): log.error(f"unexpected exception from post request: {e}") status = 500 - if status == 200: + if isOK(status): log.debug(f"got attributes: {attributes}") self._obj_dict[obj_id] = attributes else: log.warn(f"Domain crawler - got {status} status for obj_id {obj_id}") self._obj_dict[obj_id] = {"status": status} + collection = getCollectionForId(obj_id) + + if collection == "groups" and self._follow_links: + links = None + status = 200 + try: + links = await getLinks(self._app, obj_id, bucket=self._bucket) + except HTTPNotFound: + status = 404 + except HTTPServiceUnavailable: + status = 503 + except HTTPInternalServerError: + status = 500 + except Exception as e: + log.error(f"unexpected exception {e}") + status = 500 + + if status >= 500: + log.warn(f"getLinks for {obj_id} returned: {status}") + elif links: + self.follow_links(obj_id, links) + else: + log.debug(f"no links for {obj_id}") + async def put_attributes(self, obj_id, attr_items): # write the given attributes for the obj_id log.debug(f"put_attributes for {obj_id}, {len(attr_items)} attributes") req = getDataNodeUrl(self._app, obj_id) collection = getCollectionForId(obj_id) req += f"/{collection}/{obj_id}/attributes" - kwargs = {} - if "bucket" in self._params: - kwargs["bucket"] = self._params["bucket"] - if "replace" in self._params: - kwargs["replace"] = self._params["replace"] + kwargs = {"bucket": self._bucket} + if self._replace: + kwargs["replace"] = True status = None try: status = await putAttributes(self._app, obj_id, attr_items, **kwargs) @@ -129,20 +233,17 @@ async def put_attributes(self, obj_id, attr_items): async def get_obj_json(self, obj_id): """ get the given obj_json for the obj_id. - for each group found, search the links if include_links is set """ + for each group found, search the links if follow_links is set """ log.debug(f"get_obj_json: {obj_id}") collection = getCollectionForId(obj_id) - kwargs = {} + kwargs = {"bucket": self._bucket, "include_attrs": self._include_attrs} - for k in ("include_links", "include_attrs", "bucket"): - if k in self._params: - kwargs[k] = self._params[k] - if collection == "groups" and self._params.get("follow_links"): + if collection == "groups" and self._follow_links: follow_links = True kwargs["include_links"] = True # get them so we can follow them else: follow_links = False - if follow_links or self._params.get("include_attrs"): + if follow_links or self._include_attrs: kwargs["refresh"] = True # don't want a cached version in this case log.debug(f"follow_links: {follow_links}") @@ -181,34 +282,121 @@ async def get_obj_json(self, obj_id): # for groups iterate through all the hard links and # add to the lookup ids set - log.debug(f"gotCollection: {collection}") + log.debug(f"gotCollection: {collection}, follow_links: {follow_links}") if collection == "groups" and follow_links: if "links" not in obj_json: log.error("expected links key in obj_json") return links = obj_json["links"] - log.debug(f"DomainCrawler links: {links}") - for title in links: - log.debug(f"DomainCrawler - got link: {title}") - link_obj = links[title] - num_objects = len(self._obj_dict) - if self._params.get("max_objects_limit") is not None: - max_objects_limit = self._params["max_objects_limit"] - if num_objects >= max_objects_limit: - msg = "DomainCrawler reached limit of " - msg += f"{max_objects_limit}" - log.info(msg) - break - if link_obj["class"] != "H5L_TYPE_HARD": - # just follow hardlinks - continue - link_id = link_obj["id"] - if link_id not in self._obj_dict: - # haven't seen this object yet, get obj json - log.debug(f"DomainCrawler - adding link_id: {link_id}") - self._obj_dict[link_id] = {} # placeholder for obj id - self._q.put_nowait(link_id) + self.follow_links(obj_id, links) + + if not self._include_links: + # don't keep the links + del obj_json["links"] + + async def get_links(self, grp_id, titles=None): + """ if titles is set, get all the links in grp_id that + have a title in the list. Otherwise, return all links for the object. """ + log.debug(f"get_links: {grp_id} follow_links: {self._follow_links}") + pattern = None + if titles: + log.debug(f"titles; {titles}") + collection = getCollectionForId(grp_id) + if collection != "groups": + log.warn(f"get_links, expected groups id but got: {grp_id}") + return + kwargs = {"bucket": self._bucket} + if titles: + kwargs["titles"] = titles + else: + # only apply these parameters if we are attempting to fetch all links + if self._limit: + kwargs["limit"] = self._limit + if self._create_order: + kwargs["create_order"] = True + + if self._pattern: + if self._follow_links: + # apply the pattern after we get the links back, + # otherwise we won't get the groups links that we + # need to follow + log.debug("will apply pattern on return") + pattern = self._pattern + else: + kwargs["pattern"] = self._pattern + + log.debug(f"follow_links: {self._follow_links}") + log.debug(f"getLinks kwargs: {kwargs}") + + links = None + status = 200 + try: + links = await getLinks(self._app, grp_id, **kwargs) + except HTTPNotFound: + status = 404 + except HTTPServiceUnavailable: + status = 503 + except HTTPInternalServerError: + status = 500 + except Exception as e: + log.error(f"unexpected exception {e}") + status = 500 + log.debug(f"get_links status: {status}") + + if links is None: + msg = f"DomainCrawler - get_links for {grp_id} " + if status >= 500: + msg += f"failed, status: {status}" + log.error(msg) + else: + msg += f"returned status: {status}" + log.warn(msg) + return + + log.debug(f"DomainCrawler - got links for {grp_id}") + + if pattern: + log.debug(f"applying pattern: {pattern}") + filtered_links = [] + for link in links: + title = link["title"] + if globmatch(title, pattern): + filtered_links.append(link) + msg = f"getLinks with pattern: {pattern} returning " + msg += f"{len(filtered_links)} links from {len(links)}" + log.debug(msg) + log.debug(f"save to obj_dict: {filtered_links}") + self._obj_dict[grp_id] = filtered_links + else: + log.debug(f"save to obj_dict: {links}") + self._obj_dict[grp_id] = links # store the links + + # if follow_links, add any group links to the lookup ids set + if self._follow_links: + self.follow_links(grp_id, links) + + async def put_links(self, grp_id, link_items): + # write the given links for the obj_id + log.debug(f"put_links for {grp_id}, {len(link_items)} links") + req = getDataNodeUrl(self._app, grp_id) + req += f"/groups/{grp_id}/links" + kwargs = {"bucket": self._bucket} + status = None + try: + status = await putLinks(self._app, grp_id, link_items, **kwargs) + except HTTPConflict: + log.warn("DomainCrawler - got HTTPConflict from http_put") + status = 409 + except HTTPServiceUnavailable: + status = 503 + except HTTPInternalServerError: + status = 500 + except Exception as e: + log.error(f"unexpected exception {e}") + + log.debug(f"DomainCrawler fetch for {grp_id} - returning status: {status}") + self._obj_dict[grp_id] = {"status": status} def get_status(self): """ return the highest status of any of the returned objects """ @@ -242,13 +430,11 @@ async def crawl(self): status = self.get_status() if status: log.debug(f"DomainCrawler -- status: {status}") - log.debug(f"raise_error: {self._raise_error}") - if self._raise_error: - # throw the approriate exception if other than 200, 201 - if status == 200: + log.debug(f"ignore_error: {self._ignore_error}") + if not self._ignore_error: + # throw the appropriate exception if other than 200, 201 + if isOK(status): pass # ok - elif status == 201: - pass # also ok elif status == 400: log.warn("DomainCrawler - BadRequest") raise HTTPBadRequest(reason="unkown") @@ -279,7 +465,6 @@ async def work(self): async def fetch(self, obj_id): log.debug(f"DomainCrawler fetch for id: {obj_id}") - log.debug(f"action: {self._action}") if self._action == "get_obj": log.debug("DomainCrawler - get obj") # just get the obj json @@ -287,13 +472,10 @@ async def fetch(self, obj_id): elif self._action == "get_attr": log.debug("DomainCrawler - get attributes") # fetch the given attributes - if self._objs is None: - log.error("DomainCrawler - self._objs not set") - return - if obj_id not in self._objs: - log.error(f"couldn't find {obj_id} in self._objs") - return - attr_names = self._objs[obj_id] + if self._objs is None or obj_id not in self._objs: + attr_names = None # fetch all attributes for obj_id + else: + attr_names = self._objs[obj_id] if attr_names is None: log.debug(f"fetch all attributes for {obj_id}") else: @@ -304,7 +486,7 @@ async def fetch(self, obj_id): log.warn("expected at least one name in attr_names list") return - log.debug(f"DomainCrawler - got attribute names: {attr_names}") + log.debug(f"DomainCrawler - get attribute names: {attr_names}") await self.get_attributes(obj_id, attr_names) elif self._action == "put_attr": log.debug("DomainCrawler - put attributes") @@ -316,6 +498,36 @@ async def fetch(self, obj_id): log.debug(f"got {len(attr_items)} attr_items") await self.put_attributes(obj_id, attr_items) + elif self._action == "get_link": + log.debug("DomainCrawlwer - get links") + log.debug(f"self._objs: {self._objs}, type: {type(self._objs)}") + + if self._objs is None or obj_id not in self._objs: + link_titles = None # fetch all links for this object + else: + link_titles = self._objs[obj_id] + if link_titles is None: + log.debug(f"fetch all links for {obj_id}") + else: + if not isinstance(link_titles, list): + log.error("expected list for link titles") + return + if len(link_titles) == 0: + log.warn("expected at least one name in link titles list") + return + + log.debug(f"DomainCrawler - get link titles: {link_titles}") + await self.get_links(obj_id, link_titles) + elif self._action == "put_link": + log.debug("DomainCrawlwer - put links") + # write links + if self._objs and obj_id not in self._objs: + log.error(f"couldn't find {obj_id} in self._objs") + return + link_items = self._objs[obj_id] + log.debug(f"got {len(link_items)} link items for {obj_id}") + + await self.put_links(obj_id, link_items) else: msg = f"DomainCrawler: unexpected action: {self._action}" log.error(msg) diff --git a/hsds/domain_sn.py b/hsds/domain_sn.py index ad6baf70..ec6ce1fc 100755 --- a/hsds/domain_sn.py +++ b/hsds/domain_sn.py @@ -23,9 +23,8 @@ from aiohttp.web_exceptions import HTTPConflict, HTTPServiceUnavailable from aiohttp import ClientResponseError from aiohttp.web import json_response -from requests.sessions import merge_setting -from .util.httpUtil import getObjectClass, http_post, http_put, http_get, http_delete +from .util.httpUtil import getObjectClass, http_post, http_put, http_delete from .util.httpUtil import getHref, respJsonAssemble from .util.httpUtil import jsonResponse from .util.idUtil import getDataNodeUrl, createObjId, getCollectionForId @@ -47,60 +46,54 @@ from . import config -async def get_collections(app, root_id, bucket=None): +async def get_collections(app, root_id, bucket=None, max_objects_limit=None): """Return the object ids for given root.""" log.info(f"get_collections for {root_id}") - groups = {} - datasets = {} - datatypes = {} - lookup_ids = set() - lookup_ids.add(root_id) - params = {"bucket": bucket} - - while lookup_ids: - grp_id = lookup_ids.pop() - req = getDataNodeUrl(app, grp_id) - req += "/groups/" + grp_id + "/links" - log.debug("collection get LINKS: " + req) - try: - # throws 404 if doesn't exist - links_json = await http_get(app, req, params=params) - except HTTPNotFound: - log.warn(f"get_collection, group {grp_id} not found") - continue - log.debug(f"got links json from dn for group_id: {grp_id}") - links = links_json["links"] - log.debug(f"get_collection: got links: {links}") - for link in links: - if link["class"] != "H5L_TYPE_HARD": - continue - link_id = link["id"] - obj_type = getCollectionForId(link_id) - if obj_type == "groups": - if link_id in groups: - continue # been here before - groups[link_id] = {} - lookup_ids.add(link_id) - elif obj_type == "datasets": - if link_id in datasets: - continue - datasets[link_id] = {} - elif obj_type == "datatypes": - if link_id in datatypes: - continue - datatypes[link_id] = {} - else: - msg = "get_collection: unexpected link object type: " - msg += f"{obj_type}" - log.error(merge_setting) - HTTPInternalServerError() + kwargs = { + "action": "get_obj", + "include_attrs": False, + "include_links": False, + "follow_links": True, + "bucket": bucket, + } + + if max_objects_limit: + kwargs["max_objects_limit"] = max_objects_limit + + crawler = DomainCrawler(app, [root_id, ], **kwargs) + await crawler.crawl() + if max_objects_limit and len(crawler._obj_dict) >= max_objects_limit: + msg = "get_collections - too many objects: " + msg += f"{len(crawler._obj_dict)}, returning None" + log.info(msg) + return None + else: + msg = f"DomainCrawler returned: {len(crawler._obj_dict)} object ids" + log.info(msg) + + group_ids = set() + dataset_ids = set() + datatype_ids = set() + + for obj_id in crawler._obj_dict: + obj_type = getCollectionForId(obj_id) + if obj_type == "groups": + group_ids.add(obj_id) + elif obj_type == "datasets": + dataset_ids.add(obj_id) + elif obj_type == "datatypes": + datatype_ids.add(obj_id) + else: + log.warn(f"get_collections - unexpected id type: {obj_id}") + if root_id in group_ids: + group_ids.remove(root_id) # don't include the root id result = {} - result["groups"] = groups - result["datasets"] = datasets - result["datatypes"] = datatypes + result["groups"] = group_ids + result["datasets"] = dataset_ids + result["datatypes"] = datatype_ids return result @@ -112,14 +105,16 @@ async def getDomainObjects(app, root_id, include_attrs=False, bucket=None): log.info(f"getDomainObjects for root: {root_id}, include_attrs: {include_attrs}") max_objects_limit = int(config.get("domain_req_max_objects_limit", default=500)) - crawler_params = { + kwargs = { + "action": "get_obj", "include_attrs": include_attrs, - "bucket": bucket, + "include_links": True, "follow_links": True, "max_objects_limit": max_objects_limit, + "bucket": bucket, } - crawler = DomainCrawler(app, [root_id, ], action="get_obj", params=crawler_params) + crawler = DomainCrawler(app, [root_id, ], **kwargs) await crawler.crawl() if len(crawler._obj_dict) >= max_objects_limit: msg = "getDomainObjects - too many objects: " @@ -263,15 +258,13 @@ async def get_domains(request): if pattern: # do a pattern match on the basename basename = op.basename(domain) - log.debug( - f"get_domains: checking {basename} against pattern: {pattern}" - ) + msg = f"get_domains: checking {basename} against pattern: {pattern}" + log.debug(msg) try: got_match = globmatch(basename, pattern) except ValueError as ve: - log.warn( - f"get_domains, invalid query pattern {pattern}, ValueError: {ve}" - ) + msg = f"get_domains, invalid query pattern {pattern}, ValueError: {ve}" + log.warn(msg) raise HTTPBadRequest(reason="invalid query pattern") if got_match: log.debug("get_domains - got_match") @@ -453,7 +446,7 @@ async def GET_Domain(request): bucket = getBucketForDomain(domain) log.debug(f"GET_Domain domain: {domain} bucket: {bucket}") - if not bucket and not config.get("bucket_name"): + if not bucket: # no bucket defined, raise 400 msg = "Bucket not provided" log.warn(msg) @@ -502,14 +495,14 @@ async def GET_Domain(request): h5path = params["h5path"] # select which object to perform path search under - root_id = parent_id if parent_id else domain_json["root"] + base_id = parent_id if parent_id else domain_json["root"] # getObjectIdByPath throws 404 if not found obj_id, domain, _ = await getObjectIdByPath( - app, root_id, h5path, bucket=bucket, domain=domain, + app, base_id, h5path, bucket=bucket, domain=domain, follow_soft_links=follow_soft_links, follow_external_links=follow_external_links) - log.info(f"get obj_id: {obj_id} from h5path: {h5path}") + log.info(f"got obj_id: {obj_id} from h5path: {h5path}") # get authoritative state for object from DN (even if # it's in the meta_cache). kwargs = {"refresh": True, "bucket": bucket, @@ -624,6 +617,146 @@ async def getScanTime(app, root_id, bucket=None): return root_scan +async def POST_Domain(request): + """ return object defined by h5path list """ + + log.request(request) + app = request.app + params = request.rel_url.query + log.debug(f"POST_Domain query params: {params}") + + parent_id = None + include_links = False + include_attrs = False + follow_soft_links = False + follow_external_links = False + + if "parent_id" in params and params["parent_id"]: + parent_id = params["parent_id"] + if "include_links" in params and params["include_links"]: + include_links = True + if "include_attrs" in params and params["include_attrs"]: + include_attrs = True + if "follow_soft_links" in params and params["follow_soft_links"]: + follow_soft_links = True + if "follow_external_links" in params and params["follow_external_links"]: + follow_external_links = True + + if not request.has_body: + msg = "POST Domain with no body" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + + try: + body = await request.json() + except json.JSONDecodeError: + msg = "Unable to load JSON body" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + + if "h5paths" in body: + h5paths = body["h5paths"] + if not isinstance(h5paths, list): + msg = f"expected list for h5paths but got: {type(h5paths)}" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + else: + msg = "expected h5paths key in body" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + + (username, pswd) = getUserPasswordFromRequest(request) + if username is None and app["allow_noauth"]: + username = "default" + else: + await validateUserPassword(app, username, pswd) + + domain = None + try: + domain = getDomainFromRequest(request) + except ValueError: + log.warn(f"Invalid domain: {domain}") + raise HTTPBadRequest(reason="Invalid domain name") + + bucket = getBucketForDomain(domain) + log.debug(f"GET_Domain domain: {domain} bucket: {bucket}") + + if not bucket: + # no bucket defined, raise 400 + msg = "Bucket not provided" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + if bucket: + checkBucketAccess(app, bucket) + + if not domain: + msg = "no domain given" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + + log.info(f"got domain: {domain}") + + domain_json = await getDomainJson(app, domain, reload=True) + + if domain_json is None: + log.warn(f"domain: {domain} not found") + raise HTTPNotFound() + + if "acls" not in domain_json: + log.error("No acls key found in domain") + raise HTTPInternalServerError() + + if "root" not in domain_json: + msg = f"{domain} is a folder, not a domain" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + + root_id = domain_json["root"] + + # select which object to perform path search under + base_id = parent_id if parent_id else root_id + + log.debug(f"POST_Domain with h5paths: {h5paths} from: {base_id}") + # validate that the requesting user has permission to read this domain + # aclCheck throws exception if not authorized + aclCheck(app, domain_json, "read", username) + + json_objs = {} + + # TBD: the following could be made more efficient for + # cases where a large number of h5paths are given... + for h5path in h5paths: + + # getObjectIdByPath throws 404 if not found + obj_id, domain, _ = await getObjectIdByPath( + app, base_id, h5path, bucket=bucket, domain=domain, + follow_soft_links=follow_soft_links, + follow_external_links=follow_external_links) + + log.info(f"got obj_id: {obj_id} from h5path: {h5path}") + # get authoritative state for object from DN (even if + # it's in the meta_cache). + kwargs = {"refresh": True, "bucket": bucket, + "include_attrs": include_attrs, "include_links": include_links} + log.debug(f"kwargs for getObjectJson: {kwargs}") + + obj_json = await getObjectJson(app, obj_id, **kwargs) + + obj_json = respJsonAssemble(obj_json, params, obj_id) + + obj_json["domain"] = getPathForDomain(domain) + + # client may not know class of object retrieved via path + obj_json["class"] = getObjectClass(obj_id) + + json_objs[h5path] = obj_json + + jsonRsp = {"h5paths": json_objs} + resp = await jsonResponse(request, jsonRsp) + log.response(request, resp=resp) + return resp + + async def PUT_Domain(request): """HTTP method to create a new domain""" log.request(request) @@ -1354,10 +1487,6 @@ async def GET_Datasets(request): raise HTTPBadRequest(reason=msg) bucket = getBucketForDomain(domain) - if not bucket: - bucket = config.get("bucket_name") - else: - checkBucketAccess(app, bucket) # verify the domain try: @@ -1448,10 +1577,6 @@ async def GET_Groups(request): raise HTTPBadRequest(reason=msg) bucket = getBucketForDomain(domain) - if not bucket: - bucket = config.get("bucket_name") - else: - checkBucketAccess(app, bucket) # use reload to get authoritative domain json try: @@ -1537,10 +1662,6 @@ async def GET_Datatypes(request): raise HTTPBadRequest(reason=msg) bucket = getBucketForDomain(domain) - if not bucket: - bucket = config.get("bucket_name") - else: - checkBucketAccess(app, bucket) # use reload to get authoritative domain json try: diff --git a/hsds/dset_lib.py b/hsds/dset_lib.py index 5534d2b6..ca7ec957 100755 --- a/hsds/dset_lib.py +++ b/hsds/dset_lib.py @@ -904,3 +904,21 @@ async def reduceShape(app, dset_json, shape_update, bucket=None): await removeChunks(app, delete_ids, bucket=bucket) else: log.info("no chunks need deletion for shape reduction") + + +async def deleteAllChunks(app, dset_id, bucket=None): + """ Delete any allocated chunks for the given dataset """ + + log.info(f"deleteAllChunks for {dset_id}") + + # get all chunk ids for chunks that have been allocated + chunk_ids = await getAllocatedChunkIds(app, dset_id, bucket=bucket) + + if chunk_ids: + chunk_ids = list(chunk_ids) + chunk_ids.sort() + msg = f"deleteAllChunks for {dset_id} - these chunks will need to be deleted: {chunk_ids}" + log.debug(msg) + await removeChunks(app, chunk_ids, bucket=bucket) + else: + log.info(f"deleteAllChunks for {dset_id} - no chunks need deletion") diff --git a/hsds/dset_sn.py b/hsds/dset_sn.py index 7df44873..51a2c7ba 100755 --- a/hsds/dset_sn.py +++ b/hsds/dset_sn.py @@ -19,7 +19,7 @@ from aiohttp.web_exceptions import HTTPBadRequest, HTTPNotFound from aiohttp.web_exceptions import HTTPConflict, HTTPInternalServerError -from .util.httpUtil import http_post, http_put, http_delete, getHref, respJsonAssemble +from .util.httpUtil import http_post, http_put, getHref, respJsonAssemble from .util.httpUtil import jsonResponse from .util.idUtil import isValidUuid, getDataNodeUrl, createObjId, isSchema2Id from .util.dsetUtil import getPreviewQuery, getFilterItem, getShapeDims @@ -34,8 +34,9 @@ from .util.hdf5dtype import validateTypeItem, createDataType, getBaseTypeJson from .util.hdf5dtype import getItemSize from .servicenode_lib import getDomainJson, getObjectJson, getDsetJson, getPathForObjectId -from .servicenode_lib import getObjectIdByPath, validateAction, getRootInfo, doFlush -from .dset_lib import reduceShape +from .servicenode_lib import getObjectIdByPath, validateAction, getRootInfo, doFlush, putHardLink +from .servicenode_lib import deleteObj +from .dset_lib import reduceShape, deleteAllChunks from . import config from . import hsds_logger as log @@ -107,7 +108,7 @@ async def validateChunkLayout(app, shape_json, item_size, layout, bucket=None): # reference to a dataset in a traditional HDF5 files with # contigious storage if item_size == "H5T_VARIABLE": - # can't be used with variable types.. + # can't be used with variable types... msg = "Datsets with variable types cannot be used with " msg += "reference layouts" log.warn(msg) @@ -527,7 +528,6 @@ async def PUT_DatasetShape(request): shape_update = None extend = 0 extend_dim = 0 - hrefs = [] # tBD - definae HATEOS refs to return dset_id = request.match_info.get("id") if not dset_id: @@ -638,7 +638,7 @@ async def PUT_DatasetShape(request): if shape_update == dims: log.info("shape update is same as current dims, no action needed") - json_resp = {"hrefs:", hrefs} + json_resp = {} resp = await jsonResponse(request, json_resp, status=200) log.response(request, resp=resp) return resp @@ -671,7 +671,7 @@ async def PUT_DatasetShape(request): # send request onto DN req = getDataNodeUrl(app, dset_id) + "/datasets/" + dset_id + "/shape" - json_resp = {"hrefs": hrefs} + json_resp = {} params = {} if bucket: params["bucket"] = bucket @@ -1170,22 +1170,13 @@ async def POST_Dataset(request): log.debug(f"create dataset: {dataset_json}") req = getDataNodeUrl(app, dset_id) + "/datasets" - params = {} - if bucket: - params["bucket"] = bucket + params = {"bucket": bucket} post_json = await http_post(app, req, data=dataset_json, params=params) # create link if requested if link_id and link_title: - link_json = {} - link_json["id"] = dset_id - link_json["class"] = "H5L_TYPE_HARD" - link_req = getDataNodeUrl(app, link_id) - link_req += "/groups/" + link_id + "/links/" + link_title - log.info("PUT link - : " + link_req) - put_rsp = await http_put(app, link_req, data=link_json, params=params) - log.debug(f"PUT Link resp: {put_rsp}") + await putHardLink(app, link_id, link_title, tgt_id=dset_id, bucket=bucket) # dataset creation successful resp = await jsonResponse(request, post_json, status=201) @@ -1198,7 +1189,6 @@ async def DELETE_Dataset(request): """HTTP method to delete a dataset resource""" log.request(request) app = request.app - meta_cache = app["meta_cache"] dset_id = request.match_info.get("id") if not dset_id: @@ -1224,18 +1214,12 @@ async def DELETE_Dataset(request): domain_json = await getDomainJson(app, domain) verifyRoot(domain_json) - # TBD - verify that the obj_id belongs to the given domain await validateAction(app, domain, dset_id, username, "delete") - req = getDataNodeUrl(app, dset_id) + "/datasets/" + dset_id - - params = {} - if bucket: - params["bucket"] = bucket - await http_delete(app, req, params=params) + # free any allocated chunks + await deleteAllChunks(app, dset_id, bucket=bucket) - if dset_id in meta_cache: - del meta_cache[dset_id] # remove from cache + await deleteObj(app, dset_id, bucket=bucket) resp = await jsonResponse(request, {}) log.response(request, resp=resp) diff --git a/hsds/group_sn.py b/hsds/group_sn.py index 98d58ed1..8a8d54a4 100755 --- a/hsds/group_sn.py +++ b/hsds/group_sn.py @@ -16,15 +16,15 @@ from aiohttp.web_exceptions import HTTPBadRequest, HTTPForbidden, HTTPNotFound from json import JSONDecodeError -from .util.httpUtil import http_post, http_put, http_delete, getHref +from .util.httpUtil import http_post, getHref from .util.httpUtil import jsonResponse from .util.idUtil import isValidUuid, getDataNodeUrl, createObjId from .util.authUtil import getUserPasswordFromRequest, aclCheck from .util.authUtil import validateUserPassword from .util.domainUtil import getDomainFromRequest, isValidDomain from .util.domainUtil import getBucketForDomain, getPathForDomain, verifyRoot -from .servicenode_lib import getDomainJson, getObjectJson, validateAction -from .servicenode_lib import getObjectIdByPath, getPathForObjectId +from .servicenode_lib import getDomainJson, getObjectJson, validateAction, deleteObj +from .servicenode_lib import getObjectIdByPath, getPathForObjectId, putHardLink from . import hsds_logger as log @@ -223,23 +223,14 @@ async def POST_Group(request): group_json["creationProperties"] = creation_props log.debug(f"create group, body: {group_json}") req = getDataNodeUrl(app, group_id) + "/groups" - params = {} - if bucket: - params["bucket"] = bucket + params = {"bucket": bucket} group_json = await http_post(app, req, data=group_json, params=params) # create link if requested if link_id and link_title: - link_json = {} - link_json["id"] = group_id - link_json["class"] = "H5L_TYPE_HARD" - link_req = getDataNodeUrl(app, link_id) - link_req += "/groups/" + link_id + "/links/" + link_title - log.debug("PUT link - : " + link_req) - kwargs = {"data": link_json, "params": params} - put_json_rsp = await http_put(app, link_req, **kwargs) - log.debug(f"PUT Link resp: {put_json_rsp}") + await putHardLink(app, link_id, link_title, tgt_id=group_id, bucket=bucket) + log.debug("returning resp") # group creation successful resp = await jsonResponse(request, group_json, status=201) @@ -251,7 +242,6 @@ async def DELETE_Group(request): """HTTP method to delete a group resource""" log.request(request) app = request.app - meta_cache = app["meta_cache"] group_id = request.match_info.get("id") if not group_id: @@ -276,7 +266,6 @@ async def DELETE_Group(request): # get domain JSON domain_json = await getDomainJson(app, domain) - # TBD - verify that the obj_id belongs to the given domain await validateAction(app, domain, group_id, username, "delete") verifyRoot(domain_json) @@ -287,17 +276,7 @@ async def DELETE_Group(request): log.warn(msg) raise HTTPForbidden() - req = getDataNodeUrl(app, group_id) - req += "/groups/" + group_id - params = {} - if bucket: - params["bucket"] = bucket - log.debug(f"http_delete req: {req} params: {params}") - - await http_delete(app, req, params=params) - - if group_id in meta_cache: - del meta_cache[group_id] # remove from cache + await deleteObj(app, group_id, bucket=bucket) resp = await jsonResponse(request, {}) log.response(request, resp=resp) diff --git a/hsds/link_dn.py b/hsds/link_dn.py index 27b55050..7c71baa0 100755 --- a/hsds/link_dn.py +++ b/hsds/link_dn.py @@ -17,12 +17,13 @@ from copy import copy from bisect import bisect_left -from aiohttp.web_exceptions import HTTPBadRequest, HTTPNotFound, HTTPConflict +from aiohttp.web_exceptions import HTTPBadRequest, HTTPNotFound, HTTPGone, HTTPConflict from aiohttp.web_exceptions import HTTPInternalServerError from aiohttp.web import json_response from .util.idUtil import isValidUuid -from .util.linkUtil import validateLinkName +from .util.globparser import globmatch +from .util.linkUtil import validateLinkName, getLinkClass, isEqualLink from .datanode_lib import get_obj_id, get_metadata_obj, save_metadata_obj from . import hsds_logger as log @@ -42,11 +43,34 @@ def _index(items, marker, create_order=False): return -1 +def _getTitles(links, create_order=False): + titles = [] + if create_order: + order_dict = {} + for title in links: + item = links[title] + if "created" not in item: + log.warning(f"expected to find 'created' key in link item {title}") + continue + order_dict[title] = item["created"] + log.debug(f"order_dict: {order_dict}") + # now sort by created + for k in sorted(order_dict.items(), key=lambda item: item[1]): + titles.append(k[0]) + log.debug(f"links by create order: {titles}") + else: + titles = list(links.keys()) + titles.sort() + log.debug(f"links by lexographic order: {titles}") + return titles + + async def GET_Links(request): """HTTP GET method to return JSON for a link collection""" log.request(request) app = request.app params = request.rel_url.query + log.debug(f"GET_Links params: {params}") group_id = get_obj_id(request) log.info(f"GET links: {group_id}") if not isValidUuid(group_id, obj_class="group"): @@ -80,9 +104,13 @@ async def GET_Links(request): log.warn(msg) raise HTTPBadRequest(reason=msg) + pattern = None + if "pattern" in params: + pattern = params["pattern"] + group_json = await get_metadata_obj(app, group_id, bucket=bucket) - log.info(f"for id: {group_id} got group json: {group_json}") + log.debug(f"for id: {group_id} got group json: {group_json}") if "links" not in group_json: msg.error(f"unexpected group data for id: {group_id}") raise HTTPInternalServerError() @@ -90,24 +118,17 @@ async def GET_Links(request): # return a list of links based on sorted dictionary keys link_dict = group_json["links"] - titles = [] - if create_order: - order_dict = {} - for title in link_dict: - item = link_dict[title] - if "created" not in item: - log.warning(f"expected to find 'created' key in link item {title}") - continue - order_dict[title] = item["created"] - log.debug(f"order_dict: {order_dict}") - # now sort by created - for k in sorted(order_dict.items(), key=lambda item: item[1]): - titles.append(k[0]) - log.debug(f"links by create order: {titles}") - else: - titles = list(link_dict.keys()) - titles.sort() # sort by key - log.debug(f"links by lexographic order: {titles}") + titles = _getTitles(link_dict, create_order=create_order) + + if pattern: + try: + titles = [x for x in titles if globmatch(x, pattern)] + except ValueError: + log.error(f"exception getting links using pattern: {pattern}") + raise HTTPBadRequest(reason=msg) + msg = f"getLinks with pattern: {pattern} returning {len(titles)} " + msg += f"links from {len(link_dict)}" + log.debug(msg) start_index = 0 if marker is not None: @@ -136,86 +157,153 @@ async def GET_Links(request): return resp -async def GET_Link(request): - """HTTP GET method to return JSON for a link""" +async def POST_Links(request): + """HTTP POST method to return JSON for a link or a given set of links """ log.request(request) app = request.app params = request.rel_url.query group_id = get_obj_id(request) - log.info(f"GET link: {group_id}") + log.info(f"POST_Links: {group_id}") if not isValidUuid(group_id, obj_class="group"): log.error(f"Unexpected group_id: {group_id}") raise HTTPInternalServerError() - link_title = request.match_info.get("title") + body = await request.json() + if "titles" not in body: + msg = f"POST_Links expected titles in body but got: {body.keys()}" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + + titles = body["titles"] # list of link names to fetch - validateLinkName(link_title) + for title in titles: + validateLinkName(title) bucket = None if "bucket" in params: bucket = params["bucket"] + if not bucket: - msg = "GET_Links - no bucket param" + msg = "POST_Links - no bucket param" log.warn(msg) raise HTTPBadRequest(reason=msg) group_json = await get_metadata_obj(app, group_id, bucket=bucket) log.info(f"for id: {group_id} got group json: {group_json}") + if "links" not in group_json: log.error(f"unexpected group data for id: {group_id}") raise HTTPInternalServerError() links = group_json["links"] - if link_title not in links: - log.info(f"Link name {link_title} not found in group: {group_id}") - raise HTTPNotFound() - link_json = links[link_title] + link_list = [] # links to be returned + + missing_names = set() + for title in titles: + if title not in links: + missing_names.add(title) + log.info(f"Link name {title} not found in group: {group_id}") + continue + link_json = links[title] + item = {} + if "class" not in link_json: + log.warn(f"expected to find class key for link: {title}") + continue + link_class = link_json["class"] + item["class"] = link_class + if "created" not in link_json: + log.warn(f"expected to find created time for link: {title}") + link_created = 0 + else: + link_created = link_json["created"] + item["created"] = link_created + if link_class == "H5L_TYPE_HARD": + if "id" not in link_json: + log.warn(f"expected to id for hard linK: {title}") + continue + item["id"] = link_json["id"] + elif link_class == "H5L_TYPE_SOFT": + if "h5path" not in link_json: + log.warn(f"expected to find h5path for soft link: {title}") + continue + item["h5path"] = link_json["h5path"] + elif link_class == "H5L_TYPE_EXTERNAL": + if "h5path" not in link_json: + log.warn(f"expected to find h5path for external link: {title}") + continue + item["h5path"] = link_json["h5path"] + if "h5domain" not in link_json: + log.warn(f"expted to find h5domain for external link: {title}") + continue + item["h5domain"] = link_json["h5domain"] + else: + log.warn(f"unexpected to link class {link_class} for link: {title}") + continue + + item["title"] = title - resp = json_response(link_json) + link_list.append(item) + + if missing_names: + msg = f"POST_links - requested {len(titles)} links but only " + msg += f"{len(link_list)} were found" + log.warn(msg) + # one or more links not found, check to see if any + # had been previously deleted + deleted_links = app["deleted_links"] + if group_id in deleted_links: + link_delete_set = deleted_links[group_id] + for link_name in missing_names: + if link_name in link_delete_set: + log.info(f"link: {link_name} was previously deleted, returning 410") + raise HTTPGone() + log.info("one or more links not found, returning 404") + raise HTTPNotFound() + + rspJson = {"links": link_list} + resp = json_response(rspJson) log.response(request, resp=resp) return resp -async def PUT_Link(request): - """Handler creating a new link""" +async def PUT_Links(request): + """Handler creating new links """ log.request(request) app = request.app params = request.rel_url.query group_id = get_obj_id(request) - log.info(f"PUT link: {group_id}") + log.info(f"PUT links: {group_id}") + if not isValidUuid(group_id, obj_class="group"): log.error(f"Unexpected group_id: {group_id}") raise HTTPInternalServerError() - link_title = request.match_info.get("title") - validateLinkName(link_title) - - log.info(f"link_title: {link_title}") - if not request.has_body: - msg = "PUT Link with no body" + msg = "PUT_Links with no body" log.warn(msg) raise HTTPBadRequest(reason=msg) body = await request.json() - if "class" not in body: - msg = "PUT Link with no class key body" + if "links" not in body: + msg = "PUT_Links with no links key in body" log.warn(msg) raise HTTPBadRequest(reason=msg) - link_class = body["class"] - link_json = {} - link_json["class"] = link_class + items = body["links"] - if "id" in body: - link_json["id"] = body["id"] - if "h5path" in body: - link_json["h5path"] = body["h5path"] - if "h5domain" in body: - link_json["h5domain"] = body["h5domain"] + # validate input + for title in items: + validateLinkName(title) + item = items[title] + try: + link_class = getLinkClass(item) + except ValueError: + raise HTTPBadRequest(reason="invalid link") + if "class" not in item: + item["class"] = link_class if "bucket" in params: bucket = params["bucket"] @@ -225,7 +313,7 @@ async def PUT_Link(request): bucket = None if not bucket: - msg = "GET_Links - no bucket param" + msg = "PUT_Links - no bucket provided" log.warn(msg) raise HTTPBadRequest(reason=msg) @@ -235,78 +323,145 @@ async def PUT_Link(request): raise HTTPInternalServerError() links = group_json["links"] - if link_title in links: - msg = f"Link name {link_title} already found in group: {group_id}" - log.warn(msg) - raise HTTPConflict() - - now = time.time() - link_json["created"] = now - - # add the link - links[link_title] = link_json - - # update the group lastModified - group_json["lastModified"] = now - - # write back to S3, save to metadata cache - await save_metadata_obj(app, group_id, group_json, bucket=bucket) + new_links = set() + for title in items: + if title in links: + link_json = items[title] + existing_link = links[title] + try: + is_dup = isEqualLink(link_json, existing_link) + except TypeError: + log.error(f"isEqualLink TypeError - new: {link_json}, old: {existing_link}") + raise HTTPInternalServerError() + + if is_dup: + # TBD: replace param for links? + continue # dup + else: + msg = f"link {title} already exists, returning 409" + log.warn(msg) + raise HTTPConflict() + else: + new_links.add(title) + + # if any of the attribute names was previously deleted, + # remove from the deleted set + deleted_links = app["deleted_links"] + if group_id in deleted_links: + link_delete_set = deleted_links[group_id] + else: + link_delete_set = set() + + create_time = time.time() + for title in new_links: + item = items[title] + item["created"] = create_time + links[title] = item + log.debug(f"added link {title}: {item}") + if title in link_delete_set: + link_delete_set.remove(title) + + if new_links: + # update the group lastModified + group_json["lastModified"] = create_time + log.debug(f"tbd: group_json: {group_json}") + + # write back to S3, save to metadata cache + await save_metadata_obj(app, group_id, group_json, bucket=bucket) + + status = 201 + else: + # nothing to update + status = 200 - resp_json = {} + # put the status in the JSON response since the http_put function + # used by the the SN won't return it + resp_json = {"status": status} - resp = json_response(resp_json, status=201) + resp = json_response(resp_json, status=status) log.response(request, resp=resp) return resp -async def DELETE_Link(request): +async def DELETE_Links(request): """HTTP DELETE method for group links""" log.request(request) app = request.app params = request.rel_url.query group_id = get_obj_id(request) - log.info(f"DELETE link: {group_id}") + log.info(f"DELETE links: {group_id}") if not isValidUuid(group_id, obj_class="group"): msg = f"Unexpected group_id: {group_id}" log.warn(msg) raise HTTPBadRequest(reason=msg) - link_title = request.match_info.get("title") - validateLinkName(link_title) + if "separator" in params: + separator = params["separator"] + else: + separator = "/" if "bucket" in params: bucket = params["bucket"] else: bucket = None + if not bucket: - msg = "GET_Links - no bucket param" + msg = "DELETE_Links - no bucket param" log.warn(msg) raise HTTPBadRequest(reason=msg) + if "titles" not in params: + msg = "expected titles for DELETE links" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + + titles_param = params["titles"] + + titles = titles_param.split(separator) + + log.info(f"DELETE links {titles} in {group_id} bucket: {bucket}") + group_json = await get_metadata_obj(app, group_id, bucket=bucket) - # TBD: Possible race condition + if "links" not in group_json: log.error(f"unexpected group data for id: {group_id}") raise HTTPInternalServerError() links = group_json["links"] - if link_title not in links: - msg = f"Link name {link_title} not found in group: {group_id}" - log.warn(msg) - raise HTTPNotFound() - del links[link_title] # remove the link from dictionary + # add link titles to deleted set, so we can return a 410 if they + # are requested in the future + deleted_links = app["deleted_links"] + if group_id in deleted_links: + link_delete_set = deleted_links[group_id] + else: + link_delete_set = set() + deleted_links[group_id] = link_delete_set + + save_obj = False # set to True if anything actually updated + for title in titles: + if title not in links: + if title in link_delete_set: + log.warn(f"Link name {title} has already been deleted") + continue + msg = f"Link name {title} not found in group: {group_id}" + log.warn(msg) + raise HTTPNotFound() + + del links[title] # remove the link from dictionary + link_delete_set.add(title) + save_obj = True - # update the group lastModified - now = time.time() - group_json["lastModified"] = now + if save_obj: + # update the group lastModified + now = time.time() + group_json["lastModified"] = now - # write back to S3 - await save_metadata_obj(app, group_id, group_json, bucket=bucket) + # write back to S3 + await save_metadata_obj(app, group_id, group_json, bucket=bucket) - hrefs = [] # TBD - resp_json = {"href": hrefs} + resp_json = {} resp = json_response(resp_json) log.response(request, resp=resp) diff --git a/hsds/link_sn.py b/hsds/link_sn.py index cb75d9a2..609479c5 100755 --- a/hsds/link_sn.py +++ b/hsds/link_sn.py @@ -13,18 +13,20 @@ # service node of hsds cluster # -from aiohttp.web_exceptions import HTTPBadRequest, HTTPConflict +from aiohttp.web_exceptions import HTTPBadRequest from json import JSONDecodeError -from .util.httpUtil import http_get, http_put, http_delete, getHref +from .util.httpUtil import getHref, getBooleanParam from .util.httpUtil import jsonResponse +from .util.globparser import globmatch from .util.idUtil import isValidUuid, getDataNodeUrl, getCollectionForId from .util.authUtil import getUserPasswordFromRequest, validateUserPassword -from .util.domainUtil import getDomainFromRequest, isValidDomain +from .util.domainUtil import getDomainFromRequest, isValidDomain, verifyRoot from .util.domainUtil import getBucketForDomain -from .util.linkUtil import validateLinkName -from .servicenode_lib import validateAction, getObjectJson -from . import config +from .util.linkUtil import validateLinkName, getLinkClass +from .servicenode_lib import getDomainJson, validateAction +from .servicenode_lib import getLink, putLink, putLinks, getLinks, deleteLinks +from .domain_crawl import DomainCrawler from . import hsds_logger as log @@ -44,21 +46,6 @@ async def GET_Links(request): msg = f"Invalid group id: {group_id}" log.warn(msg) raise HTTPBadRequest(reason=msg) - limit = None - create_order = False - if "CreateOrder" in params and params["CreateOrder"]: - create_order = True - - if "Limit" in params: - try: - limit = int(params["Limit"]) - except ValueError: - msg = "Bad Request: Expected int type for limit" - log.warn(msg) - raise HTTPBadRequest(reason=msg) - marker = None - if "Marker" in params: - marker = params["Marker"] username, pswd = getUserPasswordFromRequest(request) if username is None and app["allow_noauth"]: @@ -72,36 +59,91 @@ async def GET_Links(request): log.warn(msg) raise HTTPBadRequest(reason=msg) bucket = getBucketForDomain(domain) - if not bucket: - bucket = config.get("bucket_name") await validateAction(app, domain, group_id, username, "read") - req = getDataNodeUrl(app, group_id) - req += "/groups/" + group_id + "/links" + follow_links = getBooleanParam(params, "follow_links") + + if "pattern" in params and params["pattern"]: + pattern = params["pattern"] + try: + globmatch("abc", pattern) + except ValueError: + msg = f"invlaid pattern: {pattern} for link matching" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + log.debug(f"using pattern: {pattern} for GET_Links") + else: + pattern = None + + create_order = getBooleanParam(params, "CreateOrder") - params = {} - if create_order: - params["CreateOrder"] = 1 - if limit is not None: - params["Limit"] = str(limit) - if marker is not None: - params["Marker"] = marker - if bucket: - params["bucket"] = bucket - links_json = await http_get(app, req, params=params) - log.debug(f"got links json from dn for group_id: {group_id}") - links = links_json["links"] - - # mix in collection key, target and hrefs - for link in links: - if link["class"] == "H5L_TYPE_HARD": - collection_name = getCollectionForId(link["id"]) - link["collection"] = collection_name - target_uri = "/" + collection_name + "/" + link["id"] - link["target"] = getHref(request, target_uri) - link_uri = "/groups/" + group_id + "/links/" + link["title"] - link["href"] = getHref(request, link_uri) + limit = None + if "Limit" in params: + try: + limit = int(params["Limit"]) + except ValueError: + msg = "Bad Request: Expected int type for limit" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + + if "Marker" in params: + marker = params["Marker"] + else: + marker = None + + if follow_links: + # Use DomainCrawler to fetch links from multiple objects. + # set the follow_links and bucket params + log.debug(f"GET_Links - following links starting with {group_id}") + + kwargs = {"action": "get_link", "bucket": bucket, "follow_links": True} + kwargs["include_links"] = True + items = [group_id, ] + crawler = DomainCrawler(app, items, **kwargs) + + # will raise exception on NotFound, etc. + await crawler.crawl() + + msg = f"DomainCrawler returned: {len(crawler._obj_dict)} objects" + log.info(msg) + links = crawler._obj_dict + if pattern: + for grp_id in links.keys(): + grp_links = links[grp_id] + ret_links = [] + for link in grp_links: + title = link["title"] + if globmatch(title, pattern): + ret_links.append(link) + links[grp_id] = ret_links + msg = f"getLinks for {grp_id}, matched {len((ret_links))} links " + msg += f"from {len(grp_links)} links with pattern {pattern}" + log.debug(msg) + else: + kwargs = {"bucket": bucket} + if create_order: + kwargs["create_order"] = True + if limit: + kwargs["limit"] = limit + if marker: + kwargs["marker"] = marker + if pattern: + kwargs["pattern"] = pattern + + links = await getLinks(app, group_id, **kwargs) + + log.debug(f"got {len(links)} links json from dn for group_id: {group_id}") + + # mix in collection key, target and hrefs + for link in links: + if link["class"] == "H5L_TYPE_HARD": + collection_name = getCollectionForId(link["id"]) + link["collection"] = collection_name + target_uri = "/" + collection_name + "/" + link["id"] + link["target"] = getHref(request, target_uri) + link_uri = "/groups/" + group_id + "/links/" + link["title"] + link["href"] = getHref(request, link_uri) resp_json = {} resp_json["links"] = links @@ -135,7 +177,10 @@ async def GET_Link(request): log.warn(msg) raise HTTPBadRequest(reason=msg) link_title = request.match_info.get("title") - validateLinkName(link_title) + try: + validateLinkName(link_title) + except ValueError: + raise HTTPBadRequest(reason="invalid link name") username, pswd = getUserPasswordFromRequest(request) if username is None and app["allow_noauth"]: @@ -149,19 +194,15 @@ async def GET_Link(request): log.warn(msg) raise HTTPBadRequest(reason=msg) bucket = getBucketForDomain(domain) - if not bucket: - bucket = config.get("bucket_name") await validateAction(app, domain, group_id, username, "read") req = getDataNodeUrl(app, group_id) - req += "/groups/" + group_id + "/links/" + link_title + req += "/groups/" + group_id + "/links" log.debug("get LINK: " + req) - params = {} - if bucket: - params["bucket"] = bucket - link_json = await http_get(app, req, params=params) - log.debug("got link_json: " + str(link_json)) + + link_json = await getLink(app, group_id, link_title, bucket=bucket) + resp_link = {} resp_link["title"] = link_title link_class = link_json["class"] @@ -212,13 +253,9 @@ async def PUT_Link(request): msg = "Missing group id" log.warn(msg) raise HTTPBadRequest(reason=msg) - if not isValidUuid(group_id, obj_class="Group"): - msg = f"Invalid group id: {group_id}" - log.warn(msg) - raise HTTPBadRequest(reason=msg) + link_title = request.match_info.get("title") log.info(f"PUT Link_title: [{link_title}]") - validateLinkName(link_title) username, pswd = getUserPasswordFromRequest(request) # write actions need auth @@ -236,99 +273,447 @@ async def PUT_Link(request): log.warn(msg) raise HTTPBadRequest(reason=msg) - link_json = {} - if "id" in body: - if not isValidUuid(body["id"]): - msg = "PUT Link with invalid id in body" + domain = getDomainFromRequest(request) + if not isValidDomain(domain): + msg = f"Invalid domain: {domain}" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + bucket = getBucketForDomain(domain) + + await validateAction(app, domain, group_id, username, "create") + # putLink will validate these arguments + kwargs = {"bucket": bucket} + kwargs["tgt_id"] = body.get("id") + kwargs["h5path"] = body.get("h5path") + kwargs["h5domain"] = body.get("h5domain") + + status = await putLink(app, group_id, link_title, **kwargs) + + hrefs = [] # TBD + req_rsp = {"hrefs": hrefs} + # link creation successful + # returns 201 if new link was created, 200 if this is a duplicate + # of an existing link + resp = await jsonResponse(request, req_rsp, status=status) + log.response(request, resp=resp) + return resp + + +async def PUT_Links(request): + """HTTP method to create a new links """ + log.request(request) + params = request.rel_url.query + app = request.app + status = None + + log.debug("PUT_Links") + + username, pswd = getUserPasswordFromRequest(request) + # write actions need auth + await validateUserPassword(app, username, pswd) + + if not request.has_body: + msg = "PUT_Links with no body" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + + try: + body = await request.json() + except JSONDecodeError: + msg = "Unable to load JSON body" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + + domain = getDomainFromRequest(request) + if not isValidDomain(domain): + msg = f"Invalid domain: {domain}" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + bucket = getBucketForDomain(domain) + log.debug(f"got bucket: {bucket}") + replace = getBooleanParam(params, "replace") + + # get domain JSON + domain_json = await getDomainJson(app, domain) + verifyRoot(domain_json) + + req_grp_id = request.match_info.get("id") + if not req_grp_id: + req_grp_id = domain_json["root"] + + if "links" in body: + link_items = body["links"] + if not isinstance(link_items, dict): + msg = f"PUT_Links expected dict for for links body, but got: {type(link_items)}" log.warn(msg) raise HTTPBadRequest(reason=msg) - link_json["id"] = body["id"] - link_json["class"] = "H5L_TYPE_HARD" - - elif "h5path" in body: - link_json["h5path"] = body["h5path"] - # could be hard or soft link - if "h5domain" in body: - link_json["h5domain"] = body["h5domain"] - link_json["class"] = "H5L_TYPE_EXTERNAL" + # validate the links + for title in link_items: + try: + validateLinkName(title) + link_item = link_items[title] + getLinkClass(link_item) + except ValueError: + raise HTTPBadRequest(reason="invalid link item") + else: + link_items = None + + if link_items: + log.debug(f"PUT Links {len(link_items)} links to add") + else: + log.debug("no links defined yet") + + # next, sort out where these attributes are going to + + grp_ids = {} + if "grp_ids" in body: + body_ids = body["grp_ids"] + if isinstance(body_ids, list): + # multi cast the links - each link in link_items + # will be written to each of the objects identified by obj_id + if not link_items: + msg = "no links provided" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + else: + for grp_id in body_ids: + if not isValidUuid(grp_id): + msg = f"Invalid object id: {grp_id}" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + grp_ids[grp_id] = link_items + + msg = f"{len(link_items)} links will be multicast to " + msg += f"{len(grp_ids)} objects" + log.info(msg) + elif isinstance(body_ids, dict): + # each value is body_ids is a set of links to write to the object + # unlike the above case, different attributes can be written to + # different objects + if link_items: + msg = "links defined outside the obj_ids dict" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + else: + for grp_id in body_ids: + if not isValidUuid(grp_id): + msg = f"Invalid object id: {grp_id}" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + id_json = body_ids[grp_id] + + if "links" not in id_json: + msg = f"PUT_links with no links for grp_id: {grp_id}" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + link_items = id_json["links"] + if not isinstance(link_items, dict): + msg = f"PUT_Links expected dict for grp_id {grp_id}, " + msg += f"but got: {type(link_items)}" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + # validate link items + for title in link_items: + try: + validateLinkName(title) + link_item = link_items[title] + getLinkClass(link_item) + except ValueError: + raise HTTPBadRequest(reason="invalid link item") + grp_ids[grp_id] = link_items + + # write different attributes to different objects + msg = f"PUT_Links over {len(grp_ids)} objects" else: - # soft link - link_json["class"] = "H5L_TYPE_SOFT" + msg = f"unexpected type for grp_ids: {type(grp_ids)}" + log.warn(msg) + raise HTTPBadRequest(reason=msg) else: - msg = "PUT Link with no id or h5path keys" + # use the object id from the request + grp_id = request.match_info.get("id") + if not grp_id: + msg = "Missing object id" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + grp_ids[grp_id] = link_items # make it look like a list for consistency + + log.debug(f"got {len(grp_ids)} grp_ids") + + await validateAction(app, domain, req_grp_id, username, "create") + + count = len(grp_ids) + if count == 0: + msg = "no grp_ids defined" + log.warn(f"PUT_Attributes: {msg}") + raise HTTPBadRequest(reason=msg) + elif count == 1: + # just send one PUT Attributes request to the dn + kwargs = {"bucket": bucket} + if replace: + kwargs["replace"] = True + grp_id = list(grp_ids.keys())[0] + link_json = grp_ids[grp_id] + log.debug(f"got link_json: {link_json}") + + status = await putLinks(app, grp_id, link_json, **kwargs) + + else: + # put multi obj + kwargs = {"action": "put_link", "bucket": bucket} + if replace: + kwargs["replace"] = True + + crawler = DomainCrawler(app, grp_ids, **kwargs) + + # will raise exception on not found, server busy, etc. + await crawler.crawl() + + status = crawler.get_status() + + log.info("DomainCrawler done for put_links action") + + # link creation successful + log.debug(f"PUT_Links returning status: {status}") + req_rsp = {} + resp = await jsonResponse(request, req_rsp, status=status) + log.response(request, resp=resp) + return resp + + +async def DELETE_Links(request): + """HTTP method to delete multiple links """ + log.request(request) + app = request.app + params = request.rel_url.query + group_id = request.match_info.get("id") + if not group_id: + msg = "Missing group id" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + if not isValidUuid(group_id, obj_class="Group"): + msg = f"Invalid group id: {group_id}" log.warn(msg) raise HTTPBadRequest(reason=msg) + if "titles" not in params: + msg = "expected titles params" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + + titles_param = params["titles"] + if "separator" in params: + separator = params["separator"] + else: + separator = "/" + titles = titles_param.split(separator) + + for title in titles: + try: + validateLinkName(title) + except ValueError: + raise HTTPBadRequest(reason="invalid link name") + + username, pswd = getUserPasswordFromRequest(request) + await validateUserPassword(app, username, pswd) + domain = getDomainFromRequest(request) if not isValidDomain(domain): - msg = f"Invalid domain: {domain}" + msg = f"domain: {domain}" log.warn(msg) raise HTTPBadRequest(reason=msg) + bucket = getBucketForDomain(domain) - if not bucket: - bucket = config.get("bucket_name") - await validateAction(app, domain, group_id, username, "create") + await validateAction(app, domain, group_id, username, "delete") + + await deleteLinks(app, group_id, titles=titles, bucket=bucket) + + rsp_json = {} + resp = await jsonResponse(request, rsp_json) + log.response(request, resp=resp) + return resp + + +async def POST_Links(request): + """HTTP method to get multiple links """ + log.request(request) + app = request.app + params = request.rel_url.query + log.debug(f"POST_Links params: {params}") + log.info("POST_Links") + req_id = request.match_info.get("id") - # for hard links, verify that the referenced id exists and is in - # this domain - if "id" in body: - ref_id = body["id"] - ref_json = await getObjectJson(app, ref_id, bucket=bucket) - group_json = await getObjectJson(app, group_id, bucket=bucket) - if ref_json["root"] != group_json["root"]: - msg = "Hard link must reference an object in the same domain" + follow_links = getBooleanParam(params, "follow_links") + + create_order = getBooleanParam(params, "CreateOrder") + + limit = None + if "Limit" in params: + try: + limit = int(params["Limit"]) + except ValueError: + msg = "Bad Request: Expected int type for limit" log.warn(msg) raise HTTPBadRequest(reason=msg) + if "pattern" in params: + pattern = params["pattern"] + else: + pattern = None + + if not request.has_body: + msg = "POST Links with no body" + log.warn(msg) + raise HTTPBadRequest(reason=msg) - # ready to add link now - req = getDataNodeUrl(app, group_id) - req += "/groups/" + group_id + "/links/" + link_title - log.debug("PUT link - getting group: " + req) - params = {} - if bucket: - params["bucket"] = bucket try: - put_rsp = await http_put(app, req, data=link_json, params=params) - log.debug("PUT Link resp: " + str(put_rsp)) - dn_status = 201 - except HTTPConflict: - # check to see if this is just a duplicate put of an existing link - dn_status = 409 - log.warn(f"PUT Link: got conflict error for link_json: {link_json}") - existing_link = await http_get(app, req, params=params) - log.warn(f"PUT Link: fetched existing link: {existing_link}") - for prop in ("class", "id", "h5path", "h5domain"): - if prop in link_json: - if prop not in existing_link: - msg = f"PUT Link - prop {prop} not found in existing " - msg += "link, returning 409" - log.warn(msg) - break - if link_json[prop] != existing_link[prop]: - msg = f"PUT Link - prop {prop} value is different, old: " - msg += f"{existing_link[prop]}, new: {link_json[prop]}, " - msg += "returning 409" - log.warn(msg) - break + body = await request.json() + except JSONDecodeError: + msg = "Unable to load JSON body" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + + if "titles" in body: + titles = body["titles"] + if not isinstance(titles, list): + msg = f"expected list for titles but got: {type(titles)}" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + else: + titles = None + + if "group_ids" in body: + group_ids = body["group_ids"] + else: + group_ids = None + + if titles is None and group_ids is None: + msg = "expected body to contain one of titles, group_ids keys" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + + # construct an item list from titles and group_ids + items = {} + if group_ids is None: + if not req_id: + msg = "no object id in request" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + items[req_id] = titles + elif isinstance(group_ids, list): + if titles is None: + msg = "no titles - will return all links for each object" + log.debug(msg) + for group_id in group_ids: + items[group_id] = None + elif isinstance(group_ids, dict): + if titles is not None: + msg = "titles must not be provided if obj_ids is a dict" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + for group_id in group_ids: + names_for_id = group_ids[group_id] + if not isinstance(names_for_id, list): + msg = "expected list of titles" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + items[group_id] = names_for_id + + log.debug(f"POST Links items: {items}") + + # do a check that everything is as it should with the item list + for group_id in items: + if not isValidUuid(group_id, obj_class="Group"): + msg = f"Invalid group id: {group_id}" + log.warn(msg) + + titles = items[group_id] + + if titles is None: + log.debug(f"getting all links for {group_id}") + elif isinstance(titles, list): + for title in titles: + try: + validateLinkName(title) + except ValueError: + raise HTTPBadRequest(reason="invalid link name") else: - log.info("PUT link is identical to existing value returning OK") - # return 200 since we didn't actually create a resource - dn_status = 200 - if dn_status == 409: - raise # return 409 to client - hrefs = [] # TBD - req_rsp = {"hrefs": hrefs} - # link creation successful - # returns 201 if new link was created, 200 if this is a duplicate - # of an existing link - resp = await jsonResponse(request, req_rsp, status=dn_status) + msg = f"expected list for titles but got: {type(titles)}" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + + username, pswd = getUserPasswordFromRequest(request) + if username is None and app["allow_noauth"]: + username = "default" + else: + await validateUserPassword(app, username, pswd) + + domain = getDomainFromRequest(request) + if not isValidDomain(domain): + msg = f"Invalid domain value: {domain}" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + bucket = getBucketForDomain(domain) + + # get domain JSON + domain_json = await getDomainJson(app, domain) + verifyRoot(domain_json) + + await validateAction(app, domain, req_id, username, "read") + + resp_json = {} + + if len(items) == 0: + msg = "no group ids specified for POST Links" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + elif len(items) == 1 and not follow_links: + # just make a request to the datanode + group_id = list(items.keys())[0] + kwargs = {"bucket": bucket} + + titles = items[group_id] + if titles: + kwargs["titles"] = titles + else: + if limit: + kwargs["limit"] = limit + if create_order: + kwargs["create_order"] = True + if pattern: + kwargs["pattern"] = pattern + links = await getLinks(app, group_id, **kwargs) + + resp_json["links"] = links + else: + # Use DomainCrawler to fetch links from multiple object. + # set the follow_links and bucket params + kwargs = {"action": "get_link", "bucket": bucket, "include_links": True} + if follow_links: + kwargs["follow_links"] = True + if create_order: + kwargs["create_order"] = True + if limit: + kwargs["limit"] = limit + if pattern: + kwargs["pattern"] = pattern + crawler = DomainCrawler(app, items, **kwargs) + # will raise exception on NotFound, etc. + await crawler.crawl() + + msg = f"DomainCrawler returned: {len(crawler._obj_dict)} objects" + log.info(msg) + links = crawler._obj_dict + + log.debug(f"got {len(links)} links") + resp_json["links"] = links + + resp = await jsonResponse(request, resp_json) log.response(request, resp=resp) return resp async def DELETE_Link(request): - """HTTP method to delete a link""" + """HTTP method to delete one or more links """ log.request(request) app = request.app @@ -354,18 +739,12 @@ async def DELETE_Link(request): raise HTTPBadRequest(reason=msg) bucket = getBucketForDomain(domain) - if not bucket: - bucket = config.get("bucket_name") await validateAction(app, domain, group_id, username, "delete") - req = getDataNodeUrl(app, group_id) - req += "/groups/" + group_id + "/links/" + link_title - params = {} - if bucket: - params["bucket"] = bucket - rsp_json = await http_delete(app, req, params=params) + await deleteLinks(app, group_id, titles=[link_title, ], bucket=bucket) + rsp_json = {} resp = await jsonResponse(request, rsp_json) log.response(request, resp=resp) return resp diff --git a/hsds/servicenode.py b/hsds/servicenode.py index 22b9822a..2ea85319 100755 --- a/hsds/servicenode.py +++ b/hsds/servicenode.py @@ -25,13 +25,14 @@ from .basenode import healthCheck, baseInit from . import hsds_logger as log from .util.authUtil import initUserDB, initGroupDB, setPassword -from .domain_sn import GET_Domain, PUT_Domain, DELETE_Domain, GET_Domains +from .domain_sn import GET_Domain, PUT_Domain, DELETE_Domain, GET_Domains, POST_Domain from .domain_sn import GET_Datasets, GET_Groups, GET_Datatypes from .domain_sn import GET_ACL, GET_ACLs, PUT_ACL from .group_sn import GET_Group, POST_Group, DELETE_Group -from .link_sn import GET_Links, GET_Link, PUT_Link, DELETE_Link -from .attr_sn import GET_Attributes, GET_Attribute, PUT_Attribute, PUT_Attributes, DELETE_Attribute -from .attr_sn import DELETE_Attributes, GET_AttributeValue, PUT_AttributeValue, POST_Attributes +from .link_sn import GET_Links, POST_Links, GET_Link, PUT_Link, PUT_Links +from .link_sn import DELETE_Link, DELETE_Links +from .attr_sn import GET_Attributes, GET_Attribute, PUT_Attribute, PUT_Attributes, POST_Attributes +from .attr_sn import DELETE_Attributes, DELETE_Attribute, GET_AttributeValue, PUT_AttributeValue from .ctype_sn import GET_Datatype, POST_Datatype, DELETE_Datatype from .dset_sn import GET_Dataset, POST_Dataset, DELETE_Dataset from .dset_sn import GET_DatasetShape, PUT_DatasetShape, GET_DatasetType @@ -52,6 +53,7 @@ async def init(): app.router.add_route("GET", path, GET_Domain) app.router.add_route("DELETE", path, DELETE_Domain) app.router.add_route("PUT", path, PUT_Domain) + app.router.add_route("POST", path, POST_Domain) path = "/domains" app.router.add_route("GET", path, GET_Domains) @@ -82,6 +84,9 @@ async def init(): path = "/groups/{id}/links" app.router.add_route("GET", path, GET_Links) + app.router.add_route("POST", path, POST_Links) + app.router.add_route("PUT", path, PUT_Links) + app.router.add_route("DELETE", path, DELETE_Links) path = "/groups/{id}/links/{title}" app.router.add_route("GET", path, GET_Link) diff --git a/hsds/servicenode_lib.py b/hsds/servicenode_lib.py index c5f0c561..a3d128c6 100644 --- a/hsds/servicenode_lib.py +++ b/hsds/servicenode_lib.py @@ -22,8 +22,9 @@ from .util.authUtil import getAclKeys from .util.arrayUtil import encodeData -from .util.idUtil import getDataNodeUrl, getCollectionForId, isSchema2Id, getS3Key -from .util.linkUtil import h5Join +from .util.idUtil import getDataNodeUrl, getCollectionForId +from .util.idUtil import isSchema2Id, getS3Key, isValidUuid +from .util.linkUtil import h5Join, validateLinkName, getLinkClass from .util.storUtil import getStorJSONObj, isStorObj from .util.authUtil import aclCheck from .util.httpUtil import http_get, http_put, http_post, http_delete @@ -341,6 +342,217 @@ async def getDsetJson(app, dset_id, return dset_json +async def getLinks(app, group_id, + titles=None, + create_order=False, + limit=None, + marker=None, + pattern=None, + bucket=None): + + """ Get the link jsons for the given titles """ + + req = getDataNodeUrl(app, group_id) + req += "/groups/" + group_id + "/links" + params = {"bucket": bucket} + if pattern is not None: + params["pattern"] = pattern + log.debug(f"getLinks {group_id}") + + if titles: + # do a post request with the given title list + log.debug(f"getLinks for {group_id} - {len(titles)} titles") + log.debug(f" params: {params}") + data = {"titles": titles} + post_rsp = await http_post(app, req, data=data, params=params) + log.debug(f"got link_json: {post_rsp}") + if "links" not in post_rsp: + log.error("unexpected response from post links") + raise HTTPInternalServerError() + links = post_rsp["links"] + else: + # do a get for all links + if create_order: + params["CreateOrder"] = 1 + if limit is not None: + params["Limit"] = str(limit) + if marker is not None: + params["Marker"] = marker + log.debug(f"getLinks, all links for {group_id}, params: {params}") + + get_rsp = await http_get(app, req, params=params) + log.debug(f"got link_json: {get_rsp}") + if "links" not in get_rsp: + log.error("unexpected response from get links") + raise HTTPInternalServerError() + links = get_rsp["links"] + + return links + + +async def getLink(app, group_id, title, bucket=None): + """ Get the link json for the given title """ + + titles = [title, ] + links = await getLinks(app, group_id, titles=titles, bucket=bucket) + + if len(links) != 1: + log.error(f"expected 1 link but got: {len(links)}") + raise HTTPInternalServerError() + link_json = links[0] + + return link_json + + +async def putLink(app, group_id, title, tgt_id=None, h5path=None, h5domain=None, bucket=None): + """ create a new link. Return 201 if this is a new link, + or 200 if it's a duplicate of an existing link. """ + + try: + validateLinkName(title) + except ValueError: + raise HTTPBadRequest(reason="invalid link name") + + if h5path and tgt_id: + msg = "putLink - provide tgt_id or h5path, but not both" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + + link_json = {} + if tgt_id: + link_json["id"] = tgt_id + if h5path: + link_json["h5path"] = h5path + if h5domain: + link_json["h5domain"] = h5domain + + try: + link_class = getLinkClass(link_json) + except ValueError: + raise HTTPBadRequest(reason="invalid link") + + link_json["class"] = link_class + + # for hard links, verify that the referenced id exists and is in + # this domain + if link_class == "H5L_TYPE_HARD": + tgt_id = link_json["id"] + ref_json = await getObjectJson(app, tgt_id, bucket=bucket) + group_json = await getObjectJson(app, group_id, bucket=bucket) + if ref_json["root"] != group_json["root"]: + msg = "Hard link must reference an object in the same domain" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + + # ready to add link now + req = getDataNodeUrl(app, group_id) + req += "/groups/" + group_id + "/links" + log.debug(f"PUT links - PUT request: {req}") + params = {"bucket": bucket} + + data = {"links": {title: link_json}} + + put_rsp = await http_put(app, req, data=data, params=params) + log.debug(f"PUT Link resp: {put_rsp}") + if "status" in put_rsp: + status = put_rsp["status"] + else: + status = 201 + return status + + +async def putHardLink(app, group_id, title, tgt_id=None, bucket=None): + """ create a new hard link. Return 201 if this is a new link, + or 200 if it's a duplicate of an existing link """ + + status = await putLink(app, group_id, title, tgt_id=tgt_id, bucket=bucket) + return status + + +async def putSoftLink(app, group_id, title, h5path=None, bucket=None): + """ create a new soft link. Return 201 if this is a new link, + or 200 if it's a duplicate of an existing link """ + + status = await putLink(app, group_id, title, h5path=h5path, bucket=bucket) + return status + + +async def putExternalLink(app, group_id, title, h5path=None, h5domain=None, bucket=None): + """ create a new external link. Return 201 if this is a new link, + or 200 if it's a duplicate of an existing link """ + + status = await putLink(app, group_id, title, h5path=h5path, h5domain=h5domain, bucket=bucket) + return status + + +async def putLinks(app, group_id, items, bucket=None): + """ create a new links. Return 201 if any item is a new link, + or 200 if it's a duplicate of an existing link. """ + + isValidUuid(group_id, obj_class="group") + group_json = None + + # validate input + for title in items: + try: + validateLinkName(title) + item = items[title] + link_class = getLinkClass(item) + except ValueError: + # invalid link + raise HTTPBadRequest(reason="invalid link") + + if link_class == "H5L_TYPE_HARD": + tgt_id = item["id"] + isValidUuid(tgt_id) + # for hard links, verify that the referenced id exists and is in + # this domain + ref_json = await getObjectJson(app, tgt_id, bucket=bucket) + if not group_json: + # just need to fetch this once + group_json = await getObjectJson(app, group_id, bucket=bucket) + if ref_json["root"] != group_json["root"]: + msg = "Hard link must reference an object in the same domain" + log.warn(msg) + raise HTTPBadRequest(reason=msg) + + # ready to add links now + req = getDataNodeUrl(app, group_id) + req += "/groups/" + group_id + "/links" + log.debug(f"PUT links - PUT request: {req}") + params = {"bucket": bucket} + + data = {"links": items} + + put_rsp = await http_put(app, req, data=data, params=params) + log.debug(f"PUT Link resp: {put_rsp}") + if "status" in put_rsp: + status = put_rsp["status"] + else: + status = 201 + return status + + +async def deleteLinks(app, group_id, titles=None, separator="/", bucket=None): + """ delete the requested set of links from the given object """ + + if titles is None or len(titles) == 0: + msg = "provide a list of link names for deletion" + log.debug(msg) + raise HTTPBadRequest(reason=msg) + + node_url = getDataNodeUrl(app, group_id) + req = f"{node_url}/groups/{group_id}/links" + log.debug(f"deleteLinks: {req}") + params = {"separator": separator, "bucket": bucket} + + # stringify the list of link_names + titles_param = separator.join(titles) + params["titles"] = titles_param + log.debug(f"using params: {params}") + await http_delete(app, req, params=params) + + async def getObjectIdByPath(app, obj_id, h5path, bucket=None, refresh=False, domain=None, follow_soft_links=False, follow_external_links=False): """Find the object at the provided h5path location. @@ -379,13 +591,7 @@ async def getObjectIdByPath(app, obj_id, h5path, bucket=None, refresh=False, dom if not link: continue # skip empty link - req = getDataNodeUrl(app, obj_id) - req += "/groups/" + obj_id + "/links/" + link - log.debug("get LINK: " + req) - params = {} - if bucket: - params["bucket"] = bucket - link_json = await http_get(app, req, params=params) + link_json = await getLink(app, obj_id, link, bucket=bucket) if link_json["class"] == "H5L_TYPE_EXTERNAL": if not follow_external_links: @@ -646,9 +852,11 @@ async def doFlush(app, root_id, bucket=None): async def getAttributes(app, obj_id, attr_names=None, - include_data=True, + include_data=False, + max_data_size=0, ignore_nan=False, create_order=False, + pattern=None, encoding=None, limit=0, marker=None, @@ -674,6 +882,8 @@ async def getAttributes(app, obj_id, params["CreateOrder"] = 1 if encoding: params["encoding"] = encoding + if max_data_size > 0: + params["max_data_size"] = max_data_size if attr_names: # send names via a POST request @@ -687,6 +897,9 @@ async def getAttributes(app, obj_id, params["Limit"] = limit if marker: params["Marker"] = marker + if pattern: + params["pattern"] = pattern + log.debug(f"using params: {params}") # do a get to fetch all the attributes dn_json = await http_get(app, req, params=params) @@ -775,3 +988,21 @@ async def deleteAttributes(app, obj_id, attr_names=None, separator="/", bucket=N params["attr_names"] = attr_name_param log.debug(f"using params: {params}") await http_delete(app, req, params=params) + + +async def deleteObj(app, obj_id, bucket=None): + """ send delete request for group, datatype, or dataset obj """ + log.debug(f"deleteObj {obj_id}") + req = getDataNodeUrl(app, obj_id) + collection = getCollectionForId(obj_id) + req += f"/{collection}/{obj_id}" + params = {} + if bucket: + params["bucket"] = bucket + log.debug(f"http_delete req: {req} params: {params}") + + await http_delete(app, req, params=params) + + meta_cache = app["meta_cache"] + if obj_id in meta_cache: + del meta_cache[obj_id] # remove from cache diff --git a/hsds/util/attrUtil.py b/hsds/util/attrUtil.py index 68ef2cdc..06468f7e 100755 --- a/hsds/util/attrUtil.py +++ b/hsds/util/attrUtil.py @@ -51,3 +51,26 @@ def validateAttributeName(name): msg = f"attribute name must be a string, but got: {type(name)}" log.warn(msg) raise HTTPBadRequest(reason=msg) + + +def isEqualAttr(attr1, attr2): + """ compare to attributes, return True if the same, False if differnt """ + for obj in (attr1, attr2): + if not isinstance(obj, dict): + raise TypeError(f"unexpected type: {type(obj)}") + if "type" not in obj: + raise TypeError("expected type key for attribute") + if "shape" not in obj: + raise TypeError("expected shape key for attribute") + # value is optional (not set for null space attributes) + if attr1["type"] != attr2["type"]: + return False + if attr1["shape"] != attr2["shape"]: + return False + shape_class = attr1["shape"].get("class") + if shape_class == "H5S_NULL": + return True # nothing else to compare + for obj in (attr1, attr2): + if "value" not in obj: + raise TypeError("expected value key for attribute") + return attr1["value"] == attr2["value"] diff --git a/hsds/util/domainUtil.py b/hsds/util/domainUtil.py index 3fa2e7f7..140ec01a 100644 --- a/hsds/util/domainUtil.py +++ b/hsds/util/domainUtil.py @@ -50,6 +50,22 @@ def isIPAddress(s): return True +def getBucketForDomain(domain): + """get the bucket for the domain or None + if no bucket is given + """ + if not domain: + return None + if domain[0] == "/": + # no bucket specified + return None + index = domain.find("/") + if index < 0: + # invalid domain? + return None + return domain[:index] + + def getParentDomain(domain): """Get parent domain of given domain. E.g. getParentDomain("www.hdfgroup.org") returns "hdfgroup.org" @@ -263,22 +279,6 @@ def getPathForDomain(domain): return domain[(index):] -def getBucketForDomain(domain): - """get the bucket for the domain or None - if no bucket is given - """ - if not domain: - return None - if domain[0] == "/": - # no bucket specified - return None - index = domain.find("/") - if index < 0: - # invalid domain? - return None - return domain[:index] - - def verifyRoot(domain_json): """Throw bad request if we are expecting a domain, but got a folder instead diff --git a/hsds/util/httpUtil.py b/hsds/util/httpUtil.py index 1cc1e0dd..5df7bfcb 100644 --- a/hsds/util/httpUtil.py +++ b/hsds/util/httpUtil.py @@ -43,6 +43,32 @@ def getUrl(host, port): return f"http://{host}:{port}" +def getBooleanParam(params, key): + """ return False if the given key is not in the + params dict, or is it, but has the value, 0, or "0". + return True otherwise """ + + if not isinstance(key, str): + raise TypeError("expected str value for key") + + if key not in params: + return False + + value = params[key] + if not value: + return False + + try: + int_value = int(value) + except ValueError: + return True + + if int_value: + return True + else: + return False + + def getPortFromUrl(url): """Get Port number for given url""" if not url: diff --git a/hsds/util/linkUtil.py b/hsds/util/linkUtil.py index b16133d1..3469a8a1 100644 --- a/hsds/util/linkUtil.py +++ b/hsds/util/linkUtil.py @@ -13,20 +13,112 @@ # linkdUtil: # link related functions # -from aiohttp.web_exceptions import HTTPBadRequest from .. import hsds_logger as log def validateLinkName(name): + """ verify the link name is valid """ if not isinstance(name, str): msg = "Unexpected type for link name" - log.error(msg) - raise HTTPBadRequest(reason=msg) + log.warn(msg) + raise ValueError(msg) if name.find("/") >= 0: msg = "link name contains slash" - log.error(msg) - raise HTTPBadRequest(reason=msg) + log.warn(msg) + raise ValueError(msg) + + +def getLinkClass(link_json): + """ verify this is a valid link + returns the link class """ + if "class" in link_json: + link_class = link_json["class"] + else: + link_class = None + if "h5path" in link_json and "id" in link_json: + msg = "link tgt_id and h5path both set" + log.warn(msg) + raise ValueError(msg) + if "id" in link_json: + tgt_id = link_json["id"] + if not isinstance(tgt_id, str) or len(tgt_id) < 38: + msg = f"link with invalid id: {tgt_id}" + log.warn(msg) + raise ValueError(msg) + if tgt_id[:2] not in ("g-", "t-", "d-"): + msg = "link tgt must be group, datatype or dataset uuid" + log.warn(msg) + raise ValueError(msg) + if link_class: + if link_class != "H5L_TYPE_HARD": + msg = f"expected link class to be H5L_TYPE_HARD but got: {link_class}" + log.warn(msg) + raise ValueError(msg) + else: + link_class = "H5L_TYPE_HARD" + elif "h5path" in link_json: + h5path = link_json["h5path"] + log.debug(f"link path: {h5path}") + if "h5domain" in link_json: + if link_class: + if link_class != "H5L_TYPE_EXTERNAL": + msg = f"expected link class to be H5L_TYPE_EXTERNAL but got: {link_class}" + log.warn(msg) + raise ValueError(msg) + else: + link_class = "H5L_TYPE_EXTERNAL" + else: + if link_class: + if link_class != "H5L_TYPE_SOFT": + msg = f"expected link class to be H5L_TYPE_SOFT but got: {link_class}" + log.warn(msg) + raise ValueError(msg) + else: + link_class = "H5L_TYPE_SOFT" + else: + msg = "link with no id or h5path" + log.warn(msg) + raise ValueError(msg) + + return link_class + + +def isEqualLink(link1, link2): + """ Return True if the two links are the same """ + + for obj in (link1, link2): + if not isinstance(obj, dict): + raise TypeError(f"unexpected type: {type(obj)}") + if "class" not in obj: + raise TypeError("expected class key for link") + if link1["class"] != link2["class"]: + return False # different link types + link_class = link1["class"] + if link_class == "H5L_TYPE_HARD": + for obj in (link1, link2): + if "id" not in obj: + raise TypeError(f"expected id key for link: {obj}") + if link1["id"] != link2["id"]: + return False + elif link_class == "H5L_TYPE_SOFT": + for obj in (link1, link2): + if "h5path" not in obj: + raise TypeError(f"expected h5path key for link: {obj}") + if link1["h5path"] != link2["h5path"]: + return False + elif link_class == "H5L_TYPE_EXTERNAL": + for obj in (link1, link2): + for k in ("h5path", "h5domain"): + if k not in obj: + raise TypeError(f"expected {k} key for link: {obj}") + if link1["h5path"] != link2["h5path"]: + return False + if link1["h5domain"] != link2["h5domain"]: + return False + else: + raise TypeError(f"unexpected link class: {link_class}") + return True def h5Join(path, paths): diff --git a/hsds/util/storUtil.py b/hsds/util/storUtil.py index b80b7b3a..00fc6a9b 100644 --- a/hsds/util/storUtil.py +++ b/hsds/util/storUtil.py @@ -20,6 +20,7 @@ import numpy as np import numcodecs as codecs import bitshuffle +from json import JSONDecodeError from aiohttp.web_exceptions import HTTPInternalServerError from .. import hsds_logger as log @@ -389,6 +390,9 @@ async def getStorJSONObj(app, key, bucket=None): except UnicodeDecodeError: log.error(f"Error loading JSON at key: {key}") raise HTTPInternalServerError() + except JSONDecodeError: + log.error(f"unable to load json: {data}") + raise HTTPInternalServerError() msg = f"storage key {key} returned json object " msg += f"with {len(json_dict)} keys" diff --git a/tests/integ/attr_test.py b/tests/integ/attr_test.py index 6e203981..f669746b 100644 --- a/tests/integ/attr_test.py +++ b/tests/integ/attr_test.py @@ -97,6 +97,7 @@ def testListAttr(self): self.assertEqual(rspJson["attributeCount"], attr_count) for creation_order in (False, True): + print("creation_order:", creation_order) expected_names = copy(attr_names) if creation_order: @@ -245,6 +246,7 @@ def testObjAttr(self): rsp = self.session.get(req, headers=headers) self.assertEqual(rsp.status_code, 404) # not found attr_payload = {"type": "H5T_STD_I32LE", "value": 42} + attr_payload2 = {"type": "H5T_STD_I32LE", "value": 84} # try adding the attribute as a different user user2_name = config.get("user2_name") @@ -266,6 +268,14 @@ def testObjAttr(self): rsp = self.session.put(req, data=json.dumps(attr_payload), headers=headers) self.assertEqual(rsp.status_code, 201) # created + # try resending + rsp = self.session.put(req, data=json.dumps(attr_payload), headers=headers) + self.assertEqual(rsp.status_code, 200) # ok + + # try with a different value + rsp = self.session.put(req, data=json.dumps(attr_payload2), headers=headers) + self.assertEqual(rsp.status_code, 409) # conflict + # read the attribute we just created rsp = self.session.get(req, headers=headers) self.assertEqual(rsp.status_code, 200) # create attribute @@ -287,11 +297,11 @@ def testObjAttr(self): rspJson = json.loads(rsp.text) self.assertEqual(rspJson["attributeCount"], 1) # one attribute - # try creating the attribute again - should return 409 + # try creating the attribute again - should return 200 req = f"{self.endpoint}/{col_name}/{obj1_id}/attributes/{attr_name}" rsp = self.session.put(req, data=json.dumps(attr_payload), headers=headers) - self.assertEqual(rsp.status_code, 409) # conflict + self.assertEqual(rsp.status_code, 200) # OK # set the replace param and we should get a 200 params = {"replace": 1} @@ -327,6 +337,10 @@ def testEmptyShapeAttr(self): rsp = self.session.put(req, headers=headers, data=json.dumps(attr_payload)) self.assertEqual(rsp.status_code, 201) # created + # retry + rsp = self.session.put(req, headers=headers, data=json.dumps(attr_payload)) + self.assertEqual(rsp.status_code, 200) # OK + # read back the attribute rsp = self.session.get(req, headers=headers) self.assertEqual(rsp.status_code, 200) # OK @@ -411,6 +425,10 @@ def testNoShapeAttr(self): rsp = self.session.put(req, headers=headers, data=json.dumps(attr_payload)) self.assertEqual(rsp.status_code, 201) # created + # try re-sending the put. Should return 200 + rsp = self.session.put(req, headers=headers, data=json.dumps(attr_payload)) + self.assertEqual(rsp.status_code, 200) + # read back the attribute rsp = self.session.get(req, headers=headers) self.assertEqual(rsp.status_code, 200) # OK @@ -465,6 +483,10 @@ def testPutFixedString(self): rsp = self.session.put(req, data=json.dumps(data), headers=headers) self.assertEqual(rsp.status_code, 201) + # try re-sending the put. Should return 200 + rsp = self.session.put(req, data=json.dumps(data), headers=headers) + self.assertEqual(rsp.status_code, 200) + # read attr rsp = self.session.get(req, headers=headers) self.assertEqual(rsp.status_code, 200) @@ -1671,7 +1693,6 @@ def testNonURLEncodableAttributeName(self): self.assertTrue("name" in rsp_attr) self.assertEqual(rsp_attr["name"], attr_name) - self.assertTrue("href" in rsp_attr) self.assertTrue("created" in rsp_attr) self.assertTrue("type" in rsp_attr) self.assertEqual(rsp_attr["type"], expected_type) @@ -1684,10 +1705,8 @@ def testNonURLEncodableAttributeName(self): rsp = self.session.delete(bad_req, headers=headers) self.assertEqual(rsp.status_code, 404) # not found - # send attribute name as an encoded query param - attr_names_param = base64.b64encode(attr_name.encode("utf8")).decode("ascii") # specify a separator since our attribute name has the default slash - params = {"attr_names": attr_names_param, "encoding": "base64", "separator": "!"} + params = {"attr_names": attr_names, "separator": "!"} rsp = self.session.delete(req, params=params, headers=headers) self.assertEqual(rsp.status_code, 200) @@ -1702,7 +1721,7 @@ def testNonURLEncodableAttributeName(self): def testPostAttributeSingle(self): domain = helper.getTestDomain("tall.h5") - print("testGetDomain", domain) + print("testPostAttributeSingle", domain) headers = helper.getRequestHeaders(domain=domain) headers["Origin"] = "https://www.hdfgroup.org" # test CORS headers_bin_rsp = helper.getRequestHeaders(domain=domain) @@ -1731,7 +1750,6 @@ def testPostAttributeSingle(self): self.assertEqual(rsp.status_code, 200) rspJson = json.loads(rsp.text) - self.assertTrue("hrefs" in rspJson) self.assertTrue("attributes" in rspJson) attributes = rspJson["attributes"] self.assertTrue(isinstance(attributes, list)) @@ -1749,7 +1767,6 @@ def testPostAttributeSingle(self): shapeJson = attrJson["shape"] self.assertEqual(shapeJson["class"], "H5S_SIMPLE") self.assertTrue("created" in attrJson) - self.assertTrue("href" in attrJson) self.assertTrue("value" not in attrJson) # test with returning all attribute values @@ -1758,7 +1775,6 @@ def testPostAttributeSingle(self): self.assertEqual(rsp.status_code, 200) rspJson = json.loads(rsp.text) - self.assertTrue("hrefs" in rspJson) self.assertTrue("attributes" in rspJson) attributes = rspJson["attributes"] self.assertTrue(isinstance(attributes, list)) @@ -1776,14 +1792,13 @@ def testPostAttributeSingle(self): shapeJson = attrJson["shape"] self.assertEqual(shapeJson["class"], "H5S_SIMPLE") self.assertTrue("created" in attrJson) - self.assertTrue("href" in attrJson) self.assertTrue("value" in attrJson) self.assertEqual(attrJson["value"], expected_values[i]) def testPostAttributeMultiple(self): """ Get attributes for multiple objs """ domain = helper.getTestDomain("tall.h5") - print("testGetDomain", domain) + print("testPostAttributeMultiple", domain) headers = helper.getRequestHeaders(domain=domain) headers["Origin"] = "https://www.hdfgroup.org" # test CORS headers_bin_rsp = helper.getRequestHeaders(domain=domain) @@ -1827,7 +1842,6 @@ def testPostAttributeMultiple(self): self.assertEqual(rsp.status_code, 200) rspJson = json.loads(rsp.text) - self.assertTrue("hrefs" in rspJson) self.assertTrue("attributes" in rspJson) attributes = rspJson["attributes"] self.assertTrue(isinstance(attributes, dict)) @@ -1853,7 +1867,6 @@ def testPostAttributeMultiple(self): shapeJson = attrJson["shape"] self.assertEqual(shapeJson["class"], "H5S_SIMPLE") self.assertTrue("created" in attrJson) - self.assertTrue("href" in attrJson) self.assertTrue("value" not in attrJson) # test with returning attribute values @@ -1862,7 +1875,6 @@ def testPostAttributeMultiple(self): self.assertEqual(rsp.status_code, 200) rspJson = json.loads(rsp.text) - self.assertTrue("hrefs" in rspJson) self.assertTrue("attributes" in rspJson) attributes = rspJson["attributes"] self.assertTrue(isinstance(attributes, dict)) @@ -1889,7 +1901,6 @@ def testPostAttributeMultiple(self): shapeJson = attrJson["shape"] self.assertEqual(shapeJson["class"], "H5S_SIMPLE") self.assertTrue("created" in attrJson) - self.assertTrue("href" in attrJson) self.assertTrue("value" in attrJson) self.assertEqual(attrJson["value"], expected_values[i]) @@ -1903,7 +1914,6 @@ def testPostAttributeMultiple(self): self.assertEqual(rsp.status_code, 200) rspJson = json.loads(rsp.text) - self.assertTrue("hrefs" in rspJson) self.assertTrue("attributes" in rspJson) attributes = rspJson["attributes"] self.assertTrue(isinstance(attributes, dict)) @@ -1934,7 +1944,6 @@ def testPostAttributeMultiple(self): rsp = self.session.post(req, data=json.dumps(data), headers=headers) self.assertEqual(rsp.status_code, 200) rspJson = json.loads(rsp.text) - self.assertTrue("hrefs" in rspJson) self.assertTrue("attributes" in rspJson) attributes = rspJson["attributes"] self.assertEqual(len(attributes), 2) @@ -2040,9 +2049,9 @@ def testPutAttributeMultiple(self): self.assertEqual(len(attr_value), extent) self.assertEqual(attr_value, [i * 10 + j for j in range(extent)]) - # try writing again, should get 409 + # try writing again, should get 200 rsp = self.session.put(req, data=json.dumps(data), headers=headers) - self.assertEqual(rsp.status_code, 409) + self.assertEqual(rsp.status_code, 200) # write attributes to the three group objects data = {"obj_ids": grp_ids, "attributes": attributes} @@ -2103,10 +2112,10 @@ def testPutAttributeMultiple(self): self.assertEqual(len(attr_value), extent) self.assertEqual(attr_value, expected_value) - # try writing again, should get 409 + # try writing again, should get 200 req = self.endpoint + "/groups/" + root_id + "/attributes" rsp = self.session.put(req, data=json.dumps(data), headers=headers) - self.assertEqual(rsp.status_code, 409) + self.assertEqual(rsp.status_code, 200) def testDeleteAttributesMultiple(self): print("testDeleteAttributesMultiple", self.base_domain) @@ -2159,7 +2168,7 @@ def testDeleteAttributesMultiple(self): for i in range(attr_count): req = self.endpoint + "/groups/" + grp_id + "/attributes/" + attr_names[i] rsp = self.session.get(req, headers=headers) - self.assertEqual(rsp.status_code, 404) + self.assertEqual(rsp.status_code, 410) # Create another batch of attributes for i in range(attr_count): @@ -2181,7 +2190,306 @@ def testDeleteAttributesMultiple(self): for i in range(attr_count): req = self.endpoint + "/groups/" + grp_id + "/attributes/" + attr_names[i] rsp = self.session.get(req, headers=headers) - self.assertEqual(rsp.status_code, 404) + self.assertEqual(rsp.status_code, 410) + + def testMaxDataSize(self): + domain = helper.getTestDomain("tall.h5") + print("testMaxDataSize", domain) + headers = helper.getRequestHeaders(domain=domain) + + # verify domain exists + req = helper.getEndpoint() + "/" + rsp = self.session.get(req, headers=headers) + if rsp.status_code != 200: + msg = f"WARNING: Failed to get domain: {domain}. Is test data setup?" + print(msg) + return # abort rest of test + domainJson = json.loads(rsp.text) + root_id = domainJson["root"] + helper.validateId(root_id) + + attr_names = ["attr1", "attr2"] + + req = helper.getEndpoint() + "/groups/" + root_id + "/attributes" + params = {"IncludeData": 1} + + for max_data_size in (0, 10): + params["max_data_size"] = max_data_size + rsp = self.session.get(req, params=params, headers=headers) + self.assertEqual(rsp.status_code, 200) + + rspJson = json.loads(rsp.text) + self.assertTrue("attributes" in rspJson) + attributes = rspJson["attributes"] + self.assertTrue(isinstance(attributes, list)) + + self.assertEqual(len(attributes), len(attr_names)) + + for i in range(len(attr_names)): + attrJson = attributes[i] + self.assertTrue("name" in attrJson) + attr_name = attrJson["name"] + self.assertEqual(attr_name, attr_names[i]) + self.assertTrue("type" in attrJson) + self.assertTrue("shape" in attrJson) + shapeJson = attrJson["shape"] + self.assertEqual(shapeJson["class"], "H5S_SIMPLE") + self.assertTrue("created" in attrJson) + if max_data_size == 0 or attr_name == "attr1": + self.assertTrue("value" in attrJson) + else: + self.assertFalse("value" in attrJson) + + # do the same thing with a post request + data = {"attr_names": ["attr1", "attr2", ]} + for max_data_size in (0, 10): + params["max_data_size"] = max_data_size + rsp = self.session.post(req, data=json.dumps(data), params=params, headers=headers) + self.assertEqual(rsp.status_code, 200) + + rspJson = json.loads(rsp.text) + self.assertTrue("attributes" in rspJson) + attributes = rspJson["attributes"] + self.assertTrue(isinstance(attributes, list)) + + self.assertEqual(len(attributes), len(attr_names)) + + for i in range(len(attr_names)): + attrJson = attributes[i] + self.assertTrue("name" in attrJson) + attr_name = attrJson["name"] + self.assertEqual(attr_name, attr_names[i]) + self.assertTrue("type" in attrJson) + self.assertTrue("shape" in attrJson) + shapeJson = attrJson["shape"] + self.assertEqual(shapeJson["class"], "H5S_SIMPLE") + self.assertTrue("created" in attrJson) + if max_data_size == 0 or attr_name == "attr1": + self.assertTrue("value" in attrJson) + else: + self.assertFalse("value" in attrJson) + + def testGetPattern(self): + # test getting attributes from an existing domain, with a glob filter + domain = helper.getTestDomain("tall.h5") + print("testGetPattern", domain) + headers = helper.getRequestHeaders(domain=domain) + + # verify domain exists + req = helper.getEndpoint() + "/" + rsp = self.session.get(req, headers=headers) + if rsp.status_code != 200: + print(f"WARNING: Failed to get domain: {domain}. Is test data setup?") + return # abort rest of test + + rspJson = json.loads(rsp.text) + root_uuid = rspJson["root"] + self.assertTrue(root_uuid.startswith("g-")) + # get the "/g1/g1.1/dset1.1.1" dset id + d111_uuid = helper.getUUIDByPath(domain, "/g1/g1.1/dset1.1.1", session=self.session) + + # do get with a glob pattern + req = helper.getEndpoint() + "/datasets/" + d111_uuid + "/attributes" + params = {"pattern": "*1", "IncludeData": 1} + rsp = self.session.get(req, params=params, headers=headers) + self.assertEqual(rsp.status_code, 200) + rspJson = json.loads(rsp.text) + self.assertTrue("attributes" in rspJson) + attributes = rspJson["attributes"] + self.assertEqual(len(attributes), 1) # only attr1 should be returned + attr = attributes[0] + for name in ("created", "type", "shape", "value", "name", "href"): + self.assertTrue(name in attr) + self.assertEqual(attr["name"], "attr1") + + # do recursive get with a pattern + req = helper.getEndpoint() + "/groups/" + root_uuid + "/attributes" + params = {"pattern": "*1", "follow_links": 1} + rsp = self.session.get(req, params=params, headers=headers) + self.assertEqual(rsp.status_code, 200) + rspJson = json.loads(rsp.text) + self.assertTrue("attributes" in rspJson) + obj_map = rspJson["attributes"] + self.assertEqual(len(obj_map), 10) # 10 objects in the domain + attr_count = 0 + for obj_id in obj_map: + attrs = obj_map[obj_id] + attr_count += len(attrs) + for attr in attrs: + self.assertEqual(attr["name"], "attr1") + self.assertEqual(attr_count, 2) + + def testGetRecursive(self): + # test getting all attributes from an existing domain + domain = helper.getTestDomain("tall.h5") + print("testGetRecursive", domain) + headers = helper.getRequestHeaders(domain=domain) + + # verify domain exists + req = helper.getEndpoint() + "/" + rsp = self.session.get(req, headers=headers) + if rsp.status_code != 200: + print(f"WARNING: Failed to get domain: {domain}. Is test data setup?") + return # abort rest of test + + rspJson = json.loads(rsp.text) + root_uuid = rspJson["root"] + self.assertTrue(root_uuid.startswith("g-")) + # get the "/g1/g1.1/dset1.1.1" dset id + d111_uuid = helper.getUUIDByPath(domain, "/g1/g1.1/dset1.1.1", session=self.session) + + # do get with follow_links + req = helper.getEndpoint() + "/groups/" + root_uuid + "/attributes" + params = {"follow_links": "1", "IncludeData": 1} + rsp = self.session.get(req, params=params, headers=headers) + self.assertEqual(rsp.status_code, 200) + rspJson = json.loads(rsp.text) + self.assertTrue("attributes" in rspJson) + obj_map = rspJson["attributes"] + self.assertEqual(len(obj_map), 10) + attr_count = 0 + for obj_id in obj_map: + attr_count += len(obj_map[obj_id]) + self.assertEqual(attr_count, 4) + for obj_id in (root_uuid, d111_uuid): + # these are the only two objects with attributes + self.assertTrue(obj_id in obj_map) + obj_attrs = obj_map[obj_id] + self.assertEqual(len(obj_attrs), 2) + for attrJson in obj_attrs: + self.assertTrue("name" in attrJson) + attr_name = attrJson["name"] + self.assertTrue(attr_name in ("attr1", "attr2")) + self.assertTrue("type" in attrJson) + self.assertTrue("shape" in attrJson) + shapeJson = attrJson["shape"] + self.assertEqual(shapeJson["class"], "H5S_SIMPLE") + self.assertTrue("created" in attrJson) + self.assertTrue("value" in attrJson) + + # same thing with Limit + req = helper.getEndpoint() + "/groups/" + root_uuid + "/attributes" + params = {"follow_links": "1", "Limit": 1} + rsp = self.session.get(req, params=params, headers=headers) + self.assertEqual(rsp.status_code, 200) + rspJson = json.loads(rsp.text) + self.assertTrue("attributes" in rspJson) + obj_map = rspJson["attributes"] + self.assertEqual(len(obj_map), 10) + attr_count = 0 + for obj_id in obj_map: + self.assertTrue(len(obj_map[obj_id]) <= 1) + attr_count += len(obj_map[obj_id]) + self.assertEqual(attr_count, 2) + for obj_id in (root_uuid, d111_uuid): + # these are the only two objects with attributes + self.assertTrue(obj_id in obj_map) + obj_attrs = obj_map[obj_id] + self.assertEqual(len(obj_attrs), 1) + for attrJson in obj_attrs: + self.assertTrue("name" in attrJson) + attr_name = attrJson["name"] + self.assertTrue(attr_name in ("attr1", "attr2")) + self.assertTrue("type" in attrJson) + self.assertTrue("shape" in attrJson) + shapeJson = attrJson["shape"] + self.assertEqual(shapeJson["class"], "H5S_SIMPLE") + self.assertTrue("created" in attrJson) + self.assertFalse("value" in attrJson) + + # do a get with encoding + req = helper.getEndpoint() + "/groups/" + root_uuid + "/attributes" + params = {"follow_links": "1", "encoding": "base64", "IncludeData": 1} + rsp = self.session.get(req, params=params, headers=headers) + self.assertEqual(rsp.status_code, 200) + rspJson = json.loads(rsp.text) + self.assertTrue("attributes" in rspJson) + obj_map = rspJson["attributes"] + self.assertEqual(len(obj_map), 10) + attr_count = 0 + for obj_id in obj_map: + attr_count += len(obj_map[obj_id]) + self.assertEqual(attr_count, 4) + for obj_id in (root_uuid, d111_uuid): + # these are the only two objects with attributes + self.assertTrue(obj_id in obj_map) + obj_attrs = obj_map[obj_id] + self.assertEqual(len(obj_attrs), 2) + for attrJson in obj_attrs: + self.assertTrue("name" in attrJson) + attr_name = attrJson["name"] + self.assertTrue(attr_name in ("attr1", "attr2")) + self.assertTrue("type" in attrJson) + self.assertTrue("shape" in attrJson) + shapeJson = attrJson["shape"] + self.assertEqual(shapeJson["class"], "H5S_SIMPLE") + self.assertTrue("created" in attrJson) + self.assertTrue("encoding" in attrJson) + self.assertEqual(attrJson["encoding"], "base64") + self.assertTrue("value" in attrJson) + self.assertTrue(isinstance(attrJson["value"], str)) + + # do a get with includeData set to false + req = helper.getEndpoint() + "/groups/" + root_uuid + "/attributes" + params = {"follow_links": "1", "IncludeData": "0"} + rsp = self.session.get(req, params=params, headers=headers) + self.assertEqual(rsp.status_code, 200) + rspJson = json.loads(rsp.text) + self.assertTrue("attributes" in rspJson) + obj_map = rspJson["attributes"] + self.assertEqual(len(obj_map), 10) + attr_count = 0 + for obj_id in obj_map: + attr_count += len(obj_map[obj_id]) + self.assertEqual(attr_count, 4) + for obj_id in (root_uuid, d111_uuid): + # these are the only two objects with attributes + self.assertTrue(obj_id in obj_map) + obj_attrs = obj_map[obj_id] + self.assertEqual(len(obj_attrs), 2) + for attrJson in obj_attrs: + self.assertTrue("name" in attrJson) + attr_name = attrJson["name"] + self.assertTrue(attr_name in ("attr1", "attr2")) + self.assertTrue("type" in attrJson) + self.assertTrue("shape" in attrJson) + shapeJson = attrJson["shape"] + self.assertEqual(shapeJson["class"], "H5S_SIMPLE") + self.assertTrue("created" in attrJson) + self.assertFalse("value" in attrJson) + + # do a get with max_data_size of 10 bytes + req = helper.getEndpoint() + "/groups/" + root_uuid + "/attributes" + params = {"follow_links": "1", "max_data_size": 10, "IncludeData": 1} + rsp = self.session.get(req, params=params, headers=headers) + self.assertEqual(rsp.status_code, 200) + rspJson = json.loads(rsp.text) + self.assertTrue("attributes" in rspJson) + obj_map = rspJson["attributes"] + self.assertEqual(len(obj_map), 10) + attr_count = 0 + for obj_id in obj_map: + attr_count += len(obj_map[obj_id]) + self.assertEqual(attr_count, 4) + for obj_id in (root_uuid, d111_uuid): + # these are the only two objects with attributes + self.assertTrue(obj_id in obj_map) + obj_attrs = obj_map[obj_id] + self.assertEqual(len(obj_attrs), 2) + for attrJson in obj_attrs: + self.assertTrue("name" in attrJson) + attr_name = attrJson["name"] + self.assertTrue(attr_name in ("attr1", "attr2")) + self.assertTrue("type" in attrJson) + self.assertTrue("shape" in attrJson) + shapeJson = attrJson["shape"] + self.assertEqual(shapeJson["class"], "H5S_SIMPLE") + self.assertTrue("created" in attrJson) + if obj_id == root_uuid and attr_name == "attr1": + self.assertTrue("value" in attrJson) + else: + # other attributes are larger than 10 bytes + self.assertFalse("value" in attrJson) if __name__ == "__main__": diff --git a/tests/integ/domain_test.py b/tests/integ/domain_test.py index e339aa6e..dbe21b50 100755 --- a/tests/integ/domain_test.py +++ b/tests/integ/domain_test.py @@ -189,6 +189,83 @@ def testGetDomain(self): rsp = self.session.get(req, params=params, headers=headers) self.assertEqual(rsp.status_code, 400) + def testPostDomainSingle(self): + domain = helper.getTestDomain("tall.h5") + print("testPostDomainSingle", domain) + headers = helper.getRequestHeaders(domain=domain) + + req = helper.getEndpoint() + "/" + rsp = self.session.get(req, headers=headers) + if rsp.status_code != 200: + msg = f"WARNING: Failed to get domain: {domain}. Is test data setup?" + print(msg) + return # abort rest of test + domainJson = json.loads(rsp.text) + self.assertTrue("root" in domainJson) + root_id = domainJson["root"] + g1_id = helper.getUUIDByPath(domain, "/g1", session=self.session) + g11_id = helper.getUUIDByPath(domain, "/g1/g1.1", session=self.session) + d111_id = helper.getUUIDByPath(domain, "/g1/g1.1/dset1.1.1", session=self.session) + + # Get group at /g1/g1.1 by using h5path + data = {"h5paths": ["/g1/g1.1", ]} + rsp = self.session.post(req, data=json.dumps(data), headers=headers) + self.assertEqual(rsp.status_code, 200) + rspJson = json.loads(rsp.text) + self.assertTrue("h5paths" in rspJson) + rsp_paths = rspJson["h5paths"] + self.assertTrue("/g1/g1.1" in rsp_paths) + obj_json = rsp_paths["/g1/g1.1"] + self.assertEqual(g11_id, obj_json["id"]) + self.assertTrue("root" in obj_json) + self.assertEqual(root_id, obj_json["root"]) + + # Get dataset /g1/g1.1/dset1.1.1 with a relative path and parent_id g1 + params = {"parent_id": g1_id} + data = {"h5paths": ["g1.1/dset1.1.1", ]} + rsp = self.session.post(req, data=json.dumps(data), params=params, headers=headers) + self.assertEqual(rsp.status_code, 200) + rspJson = json.loads(rsp.text) + self.assertTrue("h5paths" in rspJson) + rsp_paths = rspJson["h5paths"] + self.assertTrue("g1.1/dset1.1.1" in rsp_paths) + obj_json = rsp_paths["g1.1/dset1.1.1"] + self.assertEqual(d111_id, obj_json["id"]) + self.assertTrue("root" in obj_json) + self.assertEqual(root_id, obj_json["root"]) + + def testPostDomainMultiple(self): + domain = helper.getTestDomain("tall.h5") + print("testPostDomainMultiple", domain) + headers = helper.getRequestHeaders(domain=domain) + + req = helper.getEndpoint() + "/" + rsp = self.session.get(req, headers=headers) + if rsp.status_code != 200: + msg = f"WARNING: Failed to get domain: {domain}. Is test data setup?" + print(msg) + return # abort rest of test + domainJson = json.loads(rsp.text) + self.assertTrue("root" in domainJson) + root_id = domainJson["root"] + + # h5paths to fetch + h5paths = ["/g1/g1.1", "/g1/g1.2", "/g2/dset2.2"] + data = {"h5paths": h5paths} + rsp = self.session.post(req, data=json.dumps(data), headers=headers) + self.assertEqual(rsp.status_code, 200) + rspJson = json.loads(rsp.text) + self.assertTrue("h5paths" in rspJson) + rsp_paths = rspJson["h5paths"] + self.assertEqual(len(h5paths), len(rsp_paths)) + for h5path in h5paths: + self.assertTrue(h5path in rsp_paths) + obj_json = rsp_paths[h5path] + obj_id = helper.getUUIDByPath(domain, h5path, session=self.session) + self.assertEqual(obj_id, obj_json["id"]) + self.assertTrue("root" in obj_json) + self.assertEqual(root_id, obj_json["root"]) + def testGetByPath(self): domain = helper.getTestDomain("tall.h5") print("testGetByPath", domain) @@ -204,14 +281,19 @@ def testGetByPath(self): self.assertTrue("root" in domainJson) root_id = domainJson["root"] + # get ids that we'll need later + g1_id = helper.getUUIDByPath(domain, "/g1", session=self.session) + g11_id = helper.getUUIDByPath(domain, "/g1/g1.1", session=self.session) + d111_id = helper.getUUIDByPath(domain, "/g1/g1.1/dset1.1.1", session=self.session) + # Get group at /g1/g1.1 by using h5path params = {"h5path": "/g1/g1.1"} rsp = self.session.get(req, headers=headers, params=params) self.assertEqual(rsp.status_code, 200) rspJson = json.loads(rsp.text) self.assertTrue("id" in rspJson) - g11id = helper.getUUIDByPath(domain, "/g1/g1.1", session=self.session) - self.assertEqual(g11id, rspJson["id"]) + + self.assertEqual(g11_id, rspJson["id"]) self.assertTrue("root" in rspJson) self.assertEqual(root_id, rspJson["root"]) @@ -221,10 +303,18 @@ def testGetByPath(self): self.assertEqual(rsp.status_code, 200) rspJson = json.loads(rsp.text) self.assertTrue("id" in rspJson) - d111id = helper.getUUIDByPath( - domain, "/g1/g1.1/dset1.1.1", session=self.session - ) - self.assertEqual(d111id, rspJson["id"]) + + self.assertEqual(d111_id, rspJson["id"]) + self.assertTrue("root" in rspJson) + self.assertEqual(root_id, rspJson["root"]) + + # get /g1/g1.1/dset1.1.1 using a relative path with parent id g1 + params = {"h5path": "g1.1/dset1.1.1", "parent_id": g1_id} + rsp = self.session.get(req, headers=headers, params=params) + self.assertEqual(rsp.status_code, 200) + rspJson = json.loads(rsp.text) + self.assertTrue("id" in rspJson) + self.assertEqual(d111_id, rspJson["id"]) self.assertTrue("root" in rspJson) self.assertEqual(root_id, rspJson["root"]) diff --git a/tests/integ/helper.py b/tests/integ/helper.py index d5d5412d..dd5d125e 100644 --- a/tests/integ/helper.py +++ b/tests/integ/helper.py @@ -253,3 +253,22 @@ def getHDF5JSON(filename): with open(filename) as f: hdf5_json = json.load(f) return hdf5_json + + +def getLink(domain, grp_id, title): + headers = getRequestHeaders(domain=domain) + session = getSession() + req = getEndpoint() + "/groups/" + grp_id + "/links/" + title + rsp = session.get(req, headers=headers) + if rsp.status_code in (404, 410): + # not found or deleted + return None + elif rsp.status_code != 200: + raise ValueError(f"getLink exception: {rsp.status_code}") + + rspJson = json.loads(rsp.text) + if "link" not in rspJson: + raise KeyError(f"expected link key in {rspJson}") + link_json = rspJson["link"] + + return link_json diff --git a/tests/integ/link_test.py b/tests/integ/link_test.py index 02392cdb..3d12335d 100755 --- a/tests/integ/link_test.py +++ b/tests/integ/link_test.py @@ -23,6 +23,7 @@ def __init__(self, *args, **kwargs): super(LinkTest, self).__init__(*args, **kwargs) self.base_domain = helper.getTestDomainName(self.__class__.__name__) helper.setupDomain(self.base_domain, folder=True) + self.endpoint = helper.getEndpoint() def setUp(self): self.session = helper.getSession() @@ -96,7 +97,7 @@ def testHardLink(self): self.assertEqual(rspLink["id"], grp1_id) self.assertEqual(rspLink["collection"], "groups") - # try creating the link again (be ok = PUT is idempotent) + # try creating the link again (should be ok - PUT is idempotent) rsp = self.session.put(req, data=json.dumps(payload), headers=headers) self.assertEqual(rsp.status_code, 200) # OK @@ -343,121 +344,141 @@ def testGetLinks(self): req = helper.getEndpoint() + "/groups/" + root_id + "/links" - for creation_order in (False, True): - - # get all the links for the root group - params = {} - if creation_order: - params["CreateOrder"] = 1 - rsp = self.session.get(req, params=params, headers=headers) - self.assertEqual(rsp.status_code, 200) - rspJson = json.loads(rsp.text) - self.assertTrue("links" in rspJson) - self.assertTrue("hrefs" in rspJson) - links = rspJson["links"] - self.assertEqual(len(links), len(link_names)) - ret_names = [] - for link in links: - self.assertTrue("title" in link) - self.assertTrue("class" in link) - self.assertEqual(link["class"], "H5L_TYPE_HARD") - self.assertTrue("collection" in link) - self.assertEqual(link["collection"], "groups") - self.assertTrue("created" in link) - ret_names.append(link["title"]) - - expected_names = copy(link_names) - - if creation_order: - # result should come back in sorted order - pass - else: - expected_names.sort() # lexographic order - # sorted list should be: - # ['eighth', 'eleventh', 'fifth', 'first', 'fourth', 'ninth', - # 'second', 'seventh', 'sixth', 'tenth', 'third', 'twelfth'] - # - - self.assertEqual(ret_names, expected_names) - - # get links with a result limit of 4 - limit = 4 - params = {"Limit": limit} - if creation_order: - params["CreateOrder"] = 1 - rsp = self.session.get(req, params=params, headers=headers) - self.assertEqual(rsp.status_code, 200) - rspJson = json.loads(rsp.text) - self.assertTrue("links" in rspJson) - self.assertTrue("hrefs" in rspJson) - links = rspJson["links"] - self.assertEqual(len(links), limit) - last_link = links[-1] - self.assertEqual(last_link["title"], expected_names[limit - 1]) - - # get links after the one with name: "seventh" - marker = "seventh" - params = {"Marker": marker} - if creation_order: - params["CreateOrder"] = 1 - rsp = self.session.get(req, params=params, headers=headers) - self.assertEqual(rsp.status_code, 200) - rspJson = json.loads(rsp.text) - self.assertTrue("links" in rspJson) - self.assertTrue("hrefs" in rspJson) - links = rspJson["links"] - if creation_order: - self.assertEqual(len(links), 5) - else: - self.assertEqual(len(links), 4) - last_link = links[-1] - # "twelfth" is last in either ordering - self.assertEqual(last_link["title"], "twelfth") - - # Use a marker that is not present (should return 404) - params["Marker"] = "foobar" - rsp = self.session.get(req, params=params, headers=headers) - self.assertEqual(rsp.status_code, 404) - - # get links starting with name: "seventh", and limit to 3 results - params["Marker"] = "seventh" - limit = 3 - params["Limit"] = limit - rsp = self.session.get(req, params=params, headers=headers) - self.assertEqual(rsp.status_code, 200) - rspJson = json.loads(rsp.text) - self.assertTrue("links" in rspJson) - self.assertTrue("hrefs" in rspJson) - links = rspJson["links"] - self.assertEqual(len(links), 3) - last_link = links[-1] - if creation_order: - # expecting: "eighth", "ninth", "tenth" - self.assertEqual(last_link["title"], "tenth") - else: - # expecting: "sixth", "tenth", "third" - self.assertEqual(last_link["title"], "third") + for use_post in (False, True): + for creation_order in (False, True): + # get all the links for the root group + params = {} + if creation_order: + params["CreateOrder"] = 1 + + if use_post: + payload = {"group_ids": [root_id, ]} + data = json.dumps(payload) + rsp = self.session.post(req, data=data, params=params, headers=headers) + else: + rsp = self.session.get(req, params=params, headers=headers) + self.assertEqual(rsp.status_code, 200) + rspJson = json.loads(rsp.text) + self.assertTrue("links" in rspJson) + if use_post: + pass # hrefs not returned for post + else: + self.assertTrue("hrefs" in rspJson) + links = rspJson["links"] + self.assertEqual(len(links), len(link_names)) + ret_names = [] + for link in links: + self.assertTrue("title" in link) + self.assertTrue("class" in link) + self.assertEqual(link["class"], "H5L_TYPE_HARD") + if use_post: + pass # href, collection not returned for post + else: + self.assertTrue("href" in link) + self.assertTrue("collection" in link) + self.assertEqual(link["collection"], "groups") + self.assertTrue("created" in link) + ret_names.append(link["title"]) + + expected_names = copy(link_names) + + if creation_order: + # result should come back in sorted order + pass + else: + expected_names.sort() # lexographic order + # sorted list should be: + # ['eighth', 'eleventh', 'fifth', 'first', 'fourth', 'ninth', + # 'second', 'seventh', 'sixth', 'tenth', 'third', 'twelfth'] + # + + self.assertEqual(ret_names, expected_names) + + # get links with a result limit of 4 + limit = 4 + params = {"Limit": limit} + if creation_order: + params["CreateOrder"] = 1 + if use_post: + payload = {"group_ids": [root_id, ]} + data = json.dumps(payload) + rsp = self.session.post(req, data=data, params=params, headers=headers) + else: + rsp = self.session.get(req, params=params, headers=headers) + self.assertEqual(rsp.status_code, 200) + rspJson = json.loads(rsp.text) + self.assertTrue("links" in rspJson) + if use_post: + pass # no hrefs for post + else: + self.assertTrue("hrefs" in rspJson) + links = rspJson["links"] + self.assertEqual(len(links), limit) + last_link = links[-1] + self.assertEqual(last_link["title"], expected_names[limit - 1]) + + # get links after the one with name: "seventh" + marker = "seventh" + params = {"Marker": marker} + if creation_order: + params["CreateOrder"] = 1 + # Marker isn't supported for POST, so just run get twice + rsp = self.session.get(req, params=params, headers=headers) + self.assertEqual(rsp.status_code, 200) + rspJson = json.loads(rsp.text) + self.assertTrue("links" in rspJson) + self.assertTrue("hrefs" in rspJson) + links = rspJson["links"] + if creation_order: + self.assertEqual(len(links), 5) + else: + self.assertEqual(len(links), 4) + last_link = links[-1] + # "twelfth" is last in either ordering + self.assertEqual(last_link["title"], "twelfth") + + # Use a marker that is not present (should return 404) + params["Marker"] = "foobar" + rsp = self.session.get(req, params=params, headers=headers) + self.assertEqual(rsp.status_code, 404) + + # get links starting with name: "seventh", and limit to 3 results + params["Marker"] = "seventh" + limit = 3 + params["Limit"] = limit + rsp = self.session.get(req, params=params, headers=headers) + self.assertEqual(rsp.status_code, 200) + rspJson = json.loads(rsp.text) + self.assertTrue("links" in rspJson) + self.assertTrue("hrefs" in rspJson) + links = rspJson["links"] + self.assertEqual(len(links), 3) + last_link = links[-1] + if creation_order: + # expecting: "eighth", "ninth", "tenth" + self.assertEqual(last_link["title"], "tenth") + else: + # expecting: "sixth", "tenth", "third" + self.assertEqual(last_link["title"], "third") def testGet(self): # test getting links from an existing domain domain = helper.getTestDomain("tall.h5") - print("testGetDomain", domain) + print("testGet", domain) headers = helper.getRequestHeaders(domain=domain) # verify domain exists req = helper.getEndpoint() + "/" rsp = self.session.get(req, headers=headers) if rsp.status_code != 200: - print( - "WARNING: Failed to get domain: {}. Is test data setup?".format(domain) - ) + print(f"WARNING: Failed to get domain: {domain}. Is test data setup?") return # abort rest of test rspJson = json.loads(rsp.text) root_uuid = rspJson["root"] self.assertTrue(root_uuid.startswith("g-")) - # get the "/g1" group + # get the "/g1/g1.2" group g1_2_uuid = helper.getUUIDByPath(domain, "/g1/g1.2", session=self.session) now = time.time() @@ -505,9 +526,7 @@ def testGet(self): self.assertTrue(g1_2_1_uuid is not None) self.assertTrue(extlink_file is not None) - expected_uuid = helper.getUUIDByPath( - domain, "/g1/g1.2/g1.2.1", session=self.session - ) + expected_uuid = helper.getUUIDByPath(domain, "/g1/g1.2/g1.2.1", session=self.session) self.assertEqual(expected_uuid, g1_2_1_uuid) # get link by title @@ -532,6 +551,175 @@ def testGet(self): self.assertEqual(link["title"], "slink") self.assertEqual(link["h5path"], "somevalue") + def testGetRecursive(self): + # test getting links from an existing domain, following links + domain = helper.getTestDomain("tall.h5") + print("testGetRecursive", domain) + headers = helper.getRequestHeaders(domain=domain) + + # verify domain exists + req = helper.getEndpoint() + "/" + rsp = self.session.get(req, headers=headers) + if rsp.status_code != 200: + print(f"WARNING: Failed to get domain: {domain}. Is test data setup?") + return # abort rest of test + + rspJson = json.loads(rsp.text) + root_uuid = rspJson["root"] + self.assertTrue(root_uuid.startswith("g-")) + + # get links for root group and other groups recursively + req = helper.getEndpoint() + "/groups/" + root_uuid + "/links" + params = {"follow_links": 1} + rsp = self.session.get(req, params=params, headers=headers) + self.assertEqual(rsp.status_code, 200) + rspJson = json.loads(rsp.text) + self.assertTrue("hrefs" in rspJson) + hrefs = rspJson["hrefs"] + self.assertEqual(len(hrefs), 3) + self.assertTrue("links" in rspJson) + obj_map = rspJson["links"] # map of obj_ids to links + hardlink_count = 0 + softlink_count = 0 + extlink_count = 0 + expected_group_links = ("g1", "g2", "g1.1", "g1.2", "g1.2.1", ) + expected_dset_links = ("dset1.1.1", "dset1.1.2", "dset2.1", "dset2.2") + expected_soft_links = ("slink", ) + expected_external_links = ("extlink", ) + self.assertEqual(len(obj_map), 6) + for grp_id in obj_map: + helper.validateId(grp_id) + links = obj_map[grp_id] + for link in links: + self.assertTrue("title" in link) + link_title = link["title"] + self.assertTrue("class" in link) + link_class = link["class"] + if link_class == "H5L_TYPE_HARD": + hardlink_count += 1 + self.assertTrue("id" in link) + link_id = link["id"] + helper.validateId(link_id) + if link_id.startswith("g-"): + self.assertTrue(link_title in expected_group_links) + elif link_id.startswith("d-"): + self.assertTrue(link_title in expected_dset_links) + else: + self.assertTrue(False) # unexpected + elif link_class == "H5L_TYPE_SOFT": + softlink_count += 1 + self.assertTrue("h5path" in link) + self.assertFalse("h5domain" in link) + self.assertFalse("id" in link) + self.assertTrue(link_title in expected_soft_links) + elif link_class == "H5L_TYPE_EXTERNAL": + extlink_count += 1 + self.assertTrue("h5path" in link) + self.assertTrue("h5domain" in link) + self.assertFalse("id" in link) + self.assertTrue(link_title in expected_external_links) + else: + self.assertTrue(False) # unexpected + + self.assertEqual(hardlink_count, len(expected_dset_links) + len(expected_group_links)) + self.assertEqual(softlink_count, len(expected_soft_links)) + self.assertEqual(extlink_count, len(expected_external_links)) + + def testGetPattern(self): + # test getting links from an existing domain, with a glob filter + domain = helper.getTestDomain("tall.h5") + print("testGetPattern", domain) + headers = helper.getRequestHeaders(domain=domain) + + # verify domain exists + req = helper.getEndpoint() + "/" + rsp = self.session.get(req, headers=headers) + if rsp.status_code != 200: + print(f"WARNING: Failed to get domain: {domain}. Is test data setup?") + return # abort rest of test + + rspJson = json.loads(rsp.text) + root_uuid = rspJson["root"] + self.assertTrue(root_uuid.startswith("g-")) + # get the "/g1/g1.2" group id + g1_2_uuid = helper.getUUIDByPath(domain, "/g1/g1.2", session=self.session) + now = time.time() + + # do get with a glob pattern + # get links for /g1/g1.2: + + for use_post in (False, True): + req = helper.getEndpoint() + "/groups/" + g1_2_uuid + "/links" + params = {"pattern": "ext*"} + if use_post: + payload = {"group_ids": [g1_2_uuid, ]} + data = json.dumps(payload) + rsp = self.session.post(req, data=data, params=params, headers=headers) + else: + rsp = self.session.get(req, params=params, headers=headers) + self.assertEqual(rsp.status_code, 200) + rspJson = json.loads(rsp.text) + self.assertTrue("links" in rspJson) + links = rspJson["links"] + + self.assertEqual(len(links), 1) # only extlink should be returned + link = links[0] + for name in ("created", "class", "h5domain", "h5path", "title"): + self.assertTrue(name in link) + if use_post: + pass # no href with post + else: + self.assertTrue("href" in link) + self.assertEqual(link["class"], "H5L_TYPE_EXTERNAL") + self.assertEqual(link["title"], "extlink") + self.assertEqual(link["h5domain"], "somefile") + self.assertEqual(link["h5path"], "somepath") + self.assertTrue(link["created"] < now - 10) + + # get links for root group and other groups recursively + req = helper.getEndpoint() + "/groups/" + root_uuid + "/links" + params = {"follow_links": 1, "pattern": "dset*"} + if use_post: + payload = {"group_ids": [root_uuid, ]} + data = json.dumps(payload) + rsp = self.session.post(req, data=data, params=params, headers=headers) + else: + rsp = self.session.get(req, params=params, headers=headers) + self.assertEqual(rsp.status_code, 200) + rspJson = json.loads(rsp.text) + if use_post: + pass # hrefs not returned with post + else: + self.assertTrue("hrefs" in rspJson) + hrefs = rspJson["hrefs"] + self.assertEqual(len(hrefs), 3) + self.assertTrue("links" in rspJson) + obj_map = rspJson["links"] # map of grp ids to links + + expected_dset_links = ("dset1.1.1", "dset1.1.2", "dset2.1", "dset2.2") + + self.assertEqual(len(obj_map), 6) # 6 groups should be returned + link_count = 0 + + for grp_id in obj_map: + helper.validateId(grp_id) + links = obj_map[grp_id] + for link in links: + self.assertTrue("title" in link) + link_title = link["title"] + self.assertTrue(link_title in expected_dset_links) + self.assertTrue("class" in link) + link_class = link["class"] + # only hardlinks will be a match with this pattern + self.assertEqual(link_class, "H5L_TYPE_HARD") + link_count += 1 + self.assertTrue("id" in link) + link_id = link["id"] + helper.validateId(link_id) + self.assertTrue(link_id.startswith("d-")) # link to a dataset + + self.assertEqual(link_count, len(expected_dset_links)) + def testSoftLinkTraversal(self): # test that an object can be found via path with an external link # relative and absolute path @@ -865,6 +1053,556 @@ def testRootH5Path(self): self.assertTrue(k in cprops) self.assertEqual(cprops[k], creation_props[k]) + def testNonURLEncodableLinkName(self): + domain = self.base_domain + "/testNonURLEncodableLinkName.h5" + print("testNonURLEncodableLinkName", domain) + helper.setupDomain(domain) + + headers = helper.getRequestHeaders(domain=domain) + req = self.endpoint + "/" + + # Get root uuid + rsp = self.session.get(req, headers=headers) + self.assertEqual(rsp.status_code, 200) + rspJson = json.loads(rsp.text) + root_uuid = rspJson["root"] + helper.validateId(root_uuid) + + # create a subgroup + req = self.endpoint + "/groups" + rsp = self.session.post(req, headers=headers) + self.assertEqual(rsp.status_code, 201) + rspJson = json.loads(rsp.text) + grp_id = rspJson["id"] + self.assertTrue(helper.validateId(grp_id)) + + # link as "grp1" + grp_name = "grp1" + req = self.endpoint + "/groups/" + root_uuid + "/links/" + grp_name + payload = {"id": grp_id} + rsp = self.session.put(req, data=json.dumps(payload), headers=headers) + self.assertEqual(rsp.status_code, 201) # created + + link_name = "#link1#" + data = {"h5path": "somewhere"} + req = self.endpoint + "/groups/" + grp_id + "/links" # request without name + bad_req = f"{req}/{link_name}" # this request will fail because of the hash char + + # create link + rsp = self.session.put(bad_req, data=json.dumps(data), headers=headers) + self.assertEqual(rsp.status_code, 404) # regular put doesn't work + + links = {link_name: data} + body = {"links": links} + + rsp = self.session.put(req, data=json.dumps(body), headers=headers) + self.assertEqual(rsp.status_code, 201) # this is ok + + # get all links and verify the one we created is there + rsp = self.session.get(req, headers=headers) + self.assertEqual(rsp.status_code, 200) + rspJson = json.loads(rsp.text) + self.assertTrue("links" in rspJson) + rsp_links = rspJson["links"] + self.assertEqual(len(rsp_links), 1) + rsp_link = rsp_links[0] + self.assertTrue("title" in rsp_link) + self.assertEqual(rsp_link["title"], link_name) + + # try doing a get on this specific link + rsp = self.session.get(bad_req, headers=headers) + self.assertEqual(rsp.status_code, 404) # can't do a get with the link name + + # do a post request with the link name + link_names = [link_name, ] + data = {"titles": link_names} + rsp = self.session.post(req, data=json.dumps(data), headers=headers) + self.assertEqual(rsp.status_code, 200) + rspJson = json.loads(rsp.text) + self.assertTrue("links" in rspJson) + rsp_links = rspJson["links"] + self.assertEqual(len(rsp_links), 1) + rsp_links = rsp_links[0] + + self.assertTrue("title" in rsp_link) + self.assertEqual(rsp_link["title"], link_name) + + # try deleting the link by name + rsp = self.session.delete(bad_req, headers=headers) + self.assertEqual(rsp.status_code, 404) # not found + + # send link name as a query param + params = {"titles": link_names} + rsp = self.session.delete(req, params=params, headers=headers) + self.assertEqual(rsp.status_code, 200) + + # verify the link is gone + rsp = self.session.get(req, headers=headers, params=params) + self.assertEqual(rsp.status_code, 200) + rspJson = json.loads(rsp.text) + self.assertTrue("links" in rspJson) + rsp_links = rspJson["links"] + self.assertEqual(len(rsp_links), 0) + + def testPostLinkSingle(self): + domain = helper.getTestDomain("tall.h5") + print("testPostLinkSingle", domain) + headers = helper.getRequestHeaders(domain=domain) + headers["Origin"] = "https://www.hdfgroup.org" # test CORS + + # verify domain exists + req = helper.getEndpoint() + "/" + rsp = self.session.get(req, headers=headers) + if rsp.status_code != 200: + msg = f"WARNING: Failed to get domain: {domain}. Is test data setup?" + print(msg) + return # abort rest of test + + domainJson = json.loads(rsp.text) + root_id = domainJson["root"] + helper.validateId(root_id) + + # get the "/g1/g1.2" group + g1_2_uuid = helper.getUUIDByPath(domain, "/g1/g1.2", session=self.session) + + now = time.time() + + # get link "extlink" and "g1.2.1" for /g1/g1.2: + titles = ["extlink", "g1.2.1"] + payload = {"titles": titles} + req = helper.getEndpoint() + "/groups/" + g1_2_uuid + "/links" + rsp = self.session.post(req, data=json.dumps(payload), headers=headers) + self.assertEqual(rsp.status_code, 200) + rspJson = json.loads(rsp.text) + self.assertTrue("links" in rspJson) + links = rspJson["links"] + self.assertEqual(len(links), 2) + g1_2_1_uuid = None + extlink_file = None + for link in links: + self.assertTrue("class" in link) + link_class = link["class"] + if link_class == "H5L_TYPE_HARD": + for name in ( + "created", + "class", + "id", + "title", + ): + self.assertTrue(name in link) + g1_2_1_uuid = link["id"] + self.assertTrue(g1_2_1_uuid.startswith("g-")) + self.assertEqual(link["title"], "g1.2.1") + self.assertTrue(link["created"] < now - 10) + else: + self.assertEqual(link_class, "H5L_TYPE_EXTERNAL") + for name in ("created", "class", "h5domain", "h5path", "title"): + self.assertTrue(name in link) + self.assertEqual(link["title"], "extlink") + extlink_file = link["h5domain"] + self.assertEqual(extlink_file, "somefile") + self.assertEqual(link["h5path"], "somepath") + self.assertTrue(link["created"] < now - 10) + + self.assertTrue(g1_2_1_uuid is not None) + self.assertTrue(extlink_file is not None) + expected_uuid = helper.getUUIDByPath(domain, "/g1/g1.2/g1.2.1", session=self.session) + self.assertEqual(expected_uuid, g1_2_1_uuid) + + def testPostLinkMultiple(self): + domain = helper.getTestDomain("tall.h5") + print("testPostLinkSingle", domain) + headers = helper.getRequestHeaders(domain=domain) + headers["Origin"] = "https://www.hdfgroup.org" # test CORS + + # verify domain exists + req = helper.getEndpoint() + "/" + rsp = self.session.get(req, headers=headers) + if rsp.status_code != 200: + msg = f"WARNING: Failed to get domain: {domain}. Is test data setup?" + print(msg) + return # abort rest of test + + domainJson = json.loads(rsp.text) + root_id = domainJson["root"] + helper.validateId(root_id) + + # get the "/g1/g1.2" group + h5paths = ["/g1", "/g2", "/g1/g1.1", "/g1/g1.2", "/g2", "/g1/g1.2/g1.2.1"] + grp_map = {} + g1_id = None + g2_id = None + for h5path in h5paths: + grp_id = helper.getUUIDByPath(domain, h5path, session=self.session) + grp_map[grp_id] = h5path + if h5path == "/g1": + g1_id = grp_id # save + elif h5path == "/g2": + g2_id = grp_id + + # get all links for the given set of group ids + grp_ids = list(grp_map.keys()) + payload = {"group_ids": grp_ids} + req = helper.getEndpoint() + "/groups/" + root_id + "/links" + rsp = self.session.post(req, data=json.dumps(payload), headers=headers) + self.assertEqual(rsp.status_code, 200) + rspJson = json.loads(rsp.text) + self.assertTrue("links" in rspJson) + obj_links = rspJson["links"] + self.assertTrue(len(obj_links), len(grp_ids)) + for grp_id in obj_links: + self.assertTrue(grp_id in grp_map) + h5path = grp_map[grp_id] + if h5path == "/g1/g1.2/g1.2.1": + expected_count = 1 + else: + expected_count = 2 # all the rest have two links + links = obj_links[grp_id] + self.assertEqual(len(links), expected_count) + for link in links: + title = link["title"] + expected = helper.getLink(domain, grp_id, title) + self.assertEqual(link["class"], expected["class"]) + link_class = link["class"] + if link_class == "H5L_TYPE_HARD": + self.assertEqual(link["id"], expected["id"]) + else: + # soft or external link + self.assertEqual(link["h5path"], expected["h5path"]) + if link_class == "H5L_TYPE_EXTERNAL": + self.assertEqual(link["h5domain"], expected["h5domain"]) + + # get just the requested links for each group + req = helper.getEndpoint() + "/groups/" + root_id + "/links" + link_map = {g1_id: ["g1.1", "g1.2"], g2_id: ["dset2.2", ]} + payload = {"group_ids": link_map} + rsp = self.session.post(req, data=json.dumps(payload), headers=headers) + self.assertEqual(rsp.status_code, 200) + rspJson = json.loads(rsp.text) + self.assertTrue("links" in rspJson) + obj_links = rspJson["links"] + self.assertEqual(len(obj_links), 2) + self.assertTrue(g1_id in obj_links) + g1_links = obj_links[g1_id] + self.assertEqual(len(g1_links), 2) + for link in g1_links: + self.assertTrue("class" in link) + self.assertEqual(link["class"], "H5L_TYPE_HARD") + self.assertTrue("title" in link) + self.assertTrue(link["title"] in ("g1.1", "g1.2")) + self.assertTrue("id" in link) + g2_links = obj_links[g2_id] + self.assertEqual(len(g2_links), 1) # two links in this group but just asked for dset2.2 + link = g2_links[0] + self.assertEqual(link["class"], "H5L_TYPE_HARD") + + # get all links for the domain by providing the root_id with the follow_links param + params = {"follow_links": 1} + grp_ids = [root_id, ] + payload = {"group_ids": grp_ids} + rsp = self.session.post(req, data=json.dumps(payload), params=params, headers=headers) + self.assertEqual(rsp.status_code, 200) + rspJson = json.loads(rsp.text) + self.assertTrue("links" in rspJson) + obj_links = rspJson["links"] + self.assertEqual(len(obj_links), 6) + expected_group_links = ("g1", "g2", "g1.1", "g1.2", "g1.2.1", ) + expected_dset_links = ("dset1.1.1", "dset1.1.2", "dset2.1", "dset2.2") + expected_soft_links = ("slink", ) + expected_external_links = ("extlink", ) + + # listify the returned links + links = [] + for obj_id in obj_links: + links.extend(obj_links[obj_id]) + self.assertEqual(len(links), 11) + for link in links: + self.assertTrue("title" in link) + title = link["title"] + self.assertTrue("class" in link) + link_class = link["class"] + if link_class == "H5L_TYPE_HARD": + link_id = link["id"] + if link_id.startswith("g-"): + self.assertTrue(title in expected_group_links) + elif link_id.startswith("d-"): + self.assertTrue(title in expected_dset_links) + else: + self.assertTrue(False) # unexpected + elif link_class == "H5L_TYPE_SOFT": + self.assertTrue(title in expected_soft_links) + elif link_class == "H5L_TYPE_EXTERNAL": + self.assertTrue(title in expected_external_links) + else: + self.assertTrue(False) # unexpected + + def testPutLinkMultiple(self): + domain = self.base_domain + "/testPutLinkMultiple.h5" + helper.setupDomain(domain) + print("testPutLinkMultiple", domain) + headers = helper.getRequestHeaders(domain=domain) + req = self.endpoint + "/" + + rsp = self.session.get(req, headers=headers) + self.assertEqual(rsp.status_code, 200) + rspJson = json.loads(rsp.text) + root_id = rspJson["root"] + + # create a group + req = self.endpoint + "/groups" + rsp = self.session.post(req, headers=headers) + self.assertEqual(rsp.status_code, 201) + rspJson = json.loads(rsp.text) + grpA_id = rspJson["id"] + self.assertTrue(helper.validateId(grpA_id)) + + # link new obj as '/grpA' + req = self.endpoint + "/groups/" + root_id + "/links/grpA" + payload = {"id": grpA_id} + rsp = self.session.put(req, data=json.dumps(payload), headers=headers) + self.assertEqual(rsp.status_code, 201) # created + + # create some groups under grp1 + grp_count = 3 + + grp_names = [f"grp{(i+1):04d}" for i in range(grp_count)] + grp_ids = [] + + for grp_name in grp_names: + # create sub_groups + req = self.endpoint + "/groups" + rsp = self.session.post(req, headers=headers) + self.assertEqual(rsp.status_code, 201) + rspJson = json.loads(rsp.text) + grp_id = rspJson["id"] + self.assertTrue(helper.validateId(grp_id)) + grp_ids.append(grp_id) + + # create some links + links = {} + for i in range(grp_count): + title = grp_names[i] + links[title] = {"id": grp_ids[i]} + + # add a soft and external link as well + links["softlink"] = {"h5path": "a_path"} + links["extlink"] = {"h5path": "another_path", "h5domain": "/a_domain"} + link_count = len(links) + + # write links to the grpA + data = {"links": links} + req = self.endpoint + "/groups/" + grpA_id + "/links" + rsp = self.session.put(req, data=json.dumps(data), headers=headers) + self.assertEqual(rsp.status_code, 201) + + # do a get on the links + rsp = self.session.get(req, headers=headers) + self.assertEqual(rsp.status_code, 200) + rspJson = json.loads(rsp.text) + self.assertTrue("links" in rspJson) + ret_links = rspJson["links"] + self.assertEqual(len(ret_links), link_count) + for link in ret_links: + self.assertTrue("title" in link) + title = link["title"] + self.assertTrue("class" in link) + link_class = link["class"] + if link_class == "H5L_TYPE_HARD": + self.assertTrue("id" in link) + self.assertTrue(link["id"] in grp_ids) + self.assertTrue(title in grp_names) + elif link_class == "H5L_TYPE_SOFT": + self.assertTrue("h5path" in link) + h5path = link["h5path"] + self.assertEqual(h5path, "a_path") + elif link_class == "H5L_TYPE_EXTERNAL": + self.assertTrue("h5path" in link) + h5path = link["h5path"] + self.assertEqual(h5path, "another_path") + self.assertTrue("h5domain" in link) + h5domain = link["h5domain"] + self.assertEqual(h5domain, "/a_domain") + else: + self.assertTrue(False) # unexpected + + # try writing again, should get 200 (no new links) + rsp = self.session.put(req, data=json.dumps(data), headers=headers) + self.assertEqual(rsp.status_code, 200) + + # write some links to three group objects + links = {} + links["hardlink_multicast"] = {"id": root_id} + links["softlink_multicast"] = {"h5path": "multi_path"} + links["extlink_multicast"] = {"h5path": "multi_path", "h5domain": "/another_domain"} + link_count = len(links) + data = {"links": links, "grp_ids": grp_ids} + req = self.endpoint + "/groups/" + root_id + "/links" + rsp = self.session.put(req, data=json.dumps(data), headers=headers) + self.assertEqual(rsp.status_code, 201) + + # check that the links got created + for grp_id in grp_ids: + req = self.endpoint + "/groups/" + grp_id + "/links" + rsp = self.session.get(req, headers=headers) + self.assertEqual(rsp.status_code, 200) + rspJson = json.loads(rsp.text) + self.assertTrue("links" in rspJson) + ret_links = rspJson["links"] + self.assertEqual(len(ret_links), 3) + for ret_link in ret_links: + self.assertTrue("class" in ret_link) + link_class = ret_link["class"] + if link_class == "H5L_TYPE_HARD": + self.assertTrue("id" in ret_link) + self.assertEqual(ret_link["id"], root_id) + elif link_class == "H5L_TYPE_SOFT": + self.assertTrue("h5path" in ret_link) + self.assertEqual(ret_link["h5path"], "multi_path") + elif link_class == "H5L_TYPE_EXTERNAL": + self.assertTrue("h5path" in ret_link) + self.assertEqual(ret_link["h5path"], "multi_path") + self.assertTrue("h5domain" in ret_link) + self.assertEqual(ret_link["h5domain"], "/another_domain") + else: + self.assertTrue(False) # unexpected + + # write different links to three group objects + link_data = {} + for i in range(grp_count): + grp_id = grp_ids[i] + links = {} + links[f"hardlink_{i}"] = {"id": root_id} + links[f"softlink_{i}"] = {"h5path": f"multi_path_{i}"} + ext_link = {"h5path": f"multi_path_{i}", "h5domain": f"/another_domain/{i}"} + links[f"extlink_{i}"] = ext_link + link_data[grp_id] = {"links": links} + + data = {"grp_ids": link_data} + req = self.endpoint + "/groups/" + root_id + "/links" + rsp = self.session.put(req, data=json.dumps(data), headers=headers) + self.assertEqual(rsp.status_code, 201) + + # check that the new links got created + for i in range(grp_count): + grp_id = grp_ids[i] + titles = [f"hardlink_{i}", f"softlink_{i}", f"extlink_{i}", ] + data = {"titles": titles} + # do a post to just return the links we are interested in + req = self.endpoint + "/groups/" + grp_id + "/links" + rsp = self.session.post(req, data=json.dumps(data), headers=headers) + self.assertEqual(rsp.status_code, 200) + rspJson = json.loads(rsp.text) + self.assertTrue("links" in rspJson) + ret_links = rspJson["links"] + self.assertEqual(len(ret_links), len(titles)) + for j in range(len(titles)): + ret_link = ret_links[j] + self.assertTrue("class" in ret_link) + link_class = ret_link["class"] + self.assertTrue("title" in ret_link) + link_title = ret_link["title"] + if link_class == "H5L_TYPE_HARD": + self.assertEqual(link_title, f"hardlink_{i}") + self.assertTrue("id" in ret_link) + self.assertEqual(ret_link["id"], root_id) + elif link_class == "H5L_TYPE_SOFT": + self.assertEqual(link_title, f"softlink_{i}") + self.assertTrue("h5path" in ret_link) + self.assertEqual(ret_link["h5path"], f"multi_path_{i}") + elif link_class == "H5L_TYPE_EXTERNAL": + self.assertEqual(link_title, f"extlink_{i}") + self.assertTrue("h5path" in ret_link) + self.assertEqual(ret_link["h5path"], f"multi_path_{i}") + self.assertTrue("h5domain" in ret_link) + self.assertEqual(ret_link["h5domain"], f"/another_domain/{i}") + else: + self.assertTrue(False) # unexpected + + def testDeleteLinkMultiple(self): + domain = self.base_domain + "/testDeleteLinkMultiple.h5" + helper.setupDomain(domain) + + print("testDeleteLinkMultiple", self.base_domain) + + headers = helper.getRequestHeaders(domain=domain) + req = self.endpoint + "/" + + # Get root uuid + rsp = self.session.get(req, headers=headers) + self.assertEqual(rsp.status_code, 200) + rspJson = json.loads(rsp.text) + root_uuid = rspJson["root"] + helper.validateId(root_uuid) + + # create a subgroup + req = self.endpoint + "/groups" + rsp = self.session.post(req, headers=headers) + self.assertEqual(rsp.status_code, 201) + rspJson = json.loads(rsp.text) + grp_id = rspJson["id"] + self.assertTrue(helper.validateId(grp_id)) + + # link as "grp1" + grp_name = "grp1" + req = self.endpoint + "/groups/" + root_uuid + "/links/" + grp_name + payload = {"id": grp_id} + rsp = self.session.put(req, data=json.dumps(payload), headers=headers) + self.assertEqual(rsp.status_code, 201) # created + + # create some links + titles = [] + links = {} + title = "root" + links[title] = {"id": root_uuid} + titles.append(title) + + # add a soft and external link as well + title = "softlink" + links[title] = {"h5path": "a_path"} + titles.append(title) + title = "extlink" + links[title] = {"h5path": "another_path", "h5domain": "/a_domain"} + titles.append(title) + link_count = len(links) + + # write links to the grp1 + data = {"links": links} + req = self.endpoint + "/groups/" + grp_id + "/links" + rsp = self.session.put(req, data=json.dumps(data), headers=headers) + self.assertEqual(rsp.status_code, 201) + + # Delete all by parameter + separator = '/' + params = {"titles": separator.join(titles)} + req = self.endpoint + "/groups/" + grp_id + "/links" + rsp = self.session.delete(req, params=params, headers=headers) + self.assertEqual(rsp.status_code, 200) + + # Attempt to read deleted links + for i in range(link_count): + req = self.endpoint + "/groups/" + grp_id + "/links/" + titles[i] + rsp = self.session.get(req, headers=headers) + self.assertEqual(rsp.status_code, 410) + + # re-create links + req = self.endpoint + "/groups/" + grp_id + "/links" + rsp = self.session.put(req, data=json.dumps(data), headers=headers) + self.assertEqual(rsp.status_code, 201) + + # Delete with custom separator + separator = ':' + params = {"titles": separator.join(titles)} + params["separator"] = ":" + req = self.endpoint + "/groups/" + grp_id + "/links" + rsp = self.session.delete(req, params=params, headers=headers) + self.assertEqual(rsp.status_code, 200) + + # Attempt to read + for i in range(link_count): + req = self.endpoint + "/groups/" + grp_id + "/links/" + titles[i] + rsp = self.session.get(req, headers=headers) + self.assertEqual(rsp.status_code, 410) + if __name__ == "__main__": # setup test files