diff --git a/pyQuARC/code/custom_validator.py b/pyQuARC/code/custom_validator.py index bf3620d1..ab789d3f 100644 --- a/pyQuARC/code/custom_validator.py +++ b/pyQuARC/code/custom_validator.py @@ -2,6 +2,7 @@ from .string_validator import StringValidator from .utils import cmr_request, if_arg, set_cmr_prms +from collections.abc import Mapping class CustomValidator(BaseValidator): @@ -277,3 +278,63 @@ def count_check(count, values, key): items = [items] num_items = len(items) return {"valid": int(count) == num_items, "value": (count, num_items)} + + @staticmethod + def opendap_link_check(related_urls, key, extra=None): + """ + Checks if the related_urls contains an OPeNDAP link by looking for "opendap" in the URL + or matching Type/Subtype fields. This function works with both OrderedDict and regular dict, + as well as a list of dictionaries. + + Args: + related_urls (list or Mapping): The related_urls field of the object, expected to be a list of URL objects + or a single OrderedDict. + key (dict): A dictionary with "type" and "url_keyword" keys for the checks. + extra (optional): An additional argument to match the expected function call signature. This argument is ignored. + + Returns: + dict: A validation result indicating whether a valid OPeNDAP link is present and the link itself if found. + """ + + # If related_urls is None or not provided, initialize it as an empty list + if not related_urls: + related_urls = [] + + # If related_urls is a single Mapping (like OrderedDict), wrap it in a list + elif isinstance(related_urls, Mapping): + related_urls = [related_urls] + + # Default return object if no valid OPeNDAP link is found + return_obj = {"valid": False, "value": "None"} + + # Extract URL keyword and type to check from key + url_keyword = key.get("url_keyword", "opendap").lower() + type_to_check = key.get("type", "OPENDAP DATA").upper() + + # Process each URL object in the list + for url_obj in related_urls: + # Ensure that url_obj is a dictionary-like object before processing + if not isinstance(url_obj, Mapping): + continue + + # Retrieve the URL field + url_value = url_obj.get("URL", "").lower() + + # Check if the URL contains "opendap" + if "opendap" in url_value: + return_obj["valid"] = True + return_obj["value"] = url_value + break + + # Retrieve and normalize Type and Subtype fields + type_field = url_obj.get("Type", "").upper() + subtype_field = url_obj.get("Subtype", "").upper() + + # Check if the Type or Subtype contains "OPENDAP DATA" + if type_to_check in type_field or type_to_check in subtype_field: + return_obj["valid"] = True + return_obj["value"] = url_value if url_value else "None" + break + + return return_obj + diff --git a/pyQuARC/schemas/check_messages.json b/pyQuARC/schemas/check_messages.json index 0b8b38c8..18bde23f 100644 --- a/pyQuARC/schemas/check_messages.json +++ b/pyQuARC/schemas/check_messages.json @@ -1070,5 +1070,13 @@ "url": "https://wiki.earthdata.nasa.gov/display/CMR/Spatial+Extent" }, "remediation": "Recommend providing the horizontal pixel resolution, if applicable. If provided, this information will be indexed in the EDSC 'Horizontal Data Resolution' search facet which allows users to search by spatial resolution." + }, + "opendap_link_check": { + "failure": "No OPeNDAP URL is provided in the granule fields. An OPeNDAP link is recommended for data access.", + "help": { + "message": "OPeNDAP links allow for direct data access through the OPeNDAP protocol.", + "url": "https://wiki.earthdata.nasa.gov/display/CMR/Related+URLs" + }, + "remediation": "Recommend providing an OPeNDAP in the granule's Online Resources or Related URLs fields for enhanced data accessibility." } } \ No newline at end of file diff --git a/pyQuARC/schemas/checks.json b/pyQuARC/schemas/checks.json index 778f4da3..ef303aa6 100644 --- a/pyQuARC/schemas/checks.json +++ b/pyQuARC/schemas/checks.json @@ -298,5 +298,10 @@ "data_type": "custom", "check_function": "count_check", "available": true + }, + "opendap_link_check": { + "data_type": "custom", + "check_function": "opendap_link_check", + "available": true } } diff --git a/pyQuARC/schemas/rule_mapping.json b/pyQuARC/schemas/rule_mapping.json index 2e3acc41..054df11e 100644 --- a/pyQuARC/schemas/rule_mapping.json +++ b/pyQuARC/schemas/rule_mapping.json @@ -3745,6 +3745,34 @@ "severity": "error", "check_id": "string_compare" }, + "opendap_link_check": { + "rule_name": "OPeNDAP Link Presence Check", + "fields_to_apply": { + "echo-g": [ + { + "fields": [ + "Granule/OnlineResources/OnlineResource" + ] + } + ], + "umm-g": [ + { + "fields": [ + "RelatedUrls" + ] + } + ] + }, + "data": [ + { + "type": "OPENDAP DATA", + "url_keyword": "opendap" + } + ], + "relation": "contains", + "check_id": "opendap_link_check", + "severity": "warning" + }, "location_keyword_presence_check": { "rule_name": "Location Keyword Presence Check", "fields_to_apply": {