Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add new check for OPeNDAP for granules #305

Open
wants to merge 3 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 61 additions & 0 deletions pyQuARC/code/custom_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from .string_validator import StringValidator

from .utils import cmr_request, if_arg, set_cmr_prms
from collections.abc import Mapping


class CustomValidator(BaseValidator):
Expand Down Expand Up @@ -277,3 +278,63 @@ def count_check(count, values, key):
items = [items]
num_items = len(items)
return {"valid": int(count) == num_items, "value": (count, num_items)}

@staticmethod
def opendap_link_check(related_urls, key, extra=None):
"""
Checks if the related_urls contains an OPeNDAP link by looking for "opendap" in the URL
or matching Type/Subtype fields. This function works with both OrderedDict and regular dict,
as well as a list of dictionaries.

Args:
related_urls (list or Mapping): The related_urls field of the object, expected to be a list of URL objects
or a single OrderedDict.
key (dict): A dictionary with "type" and "url_keyword" keys for the checks.
extra (optional): An additional argument to match the expected function call signature. This argument is ignored.

Returns:
dict: A validation result indicating whether a valid OPeNDAP link is present and the link itself if found.
"""

# If related_urls is None or not provided, initialize it as an empty list
if not related_urls:
related_urls = []

# If related_urls is a single Mapping (like OrderedDict), wrap it in a list
elif isinstance(related_urls, Mapping):
related_urls = [related_urls]

# Default return object if no valid OPeNDAP link is found
return_obj = {"valid": False, "value": "None"}

# Extract URL keyword and type to check from key
url_keyword = key.get("url_keyword", "opendap").lower()
type_to_check = key.get("type", "OPENDAP DATA").upper()

# Process each URL object in the list
for url_obj in related_urls:
# Ensure that url_obj is a dictionary-like object before processing
if not isinstance(url_obj, Mapping):
continue

# Retrieve the URL field
url_value = url_obj.get("URL", "").lower()

# Check if the URL contains "opendap"
if "opendap" in url_value:
return_obj["valid"] = True
return_obj["value"] = url_value
break

# Retrieve and normalize Type and Subtype fields
type_field = url_obj.get("Type", "").upper()
subtype_field = url_obj.get("Subtype", "").upper()

# Check if the Type or Subtype contains "OPENDAP DATA"
if type_to_check in type_field or type_to_check in subtype_field:
return_obj["valid"] = True
return_obj["value"] = url_value if url_value else "None"
break

return return_obj

8 changes: 8 additions & 0 deletions pyQuARC/schemas/check_messages.json
Original file line number Diff line number Diff line change
Expand Up @@ -1070,5 +1070,13 @@
"url": "https://wiki.earthdata.nasa.gov/display/CMR/Spatial+Extent"
},
"remediation": "Recommend providing the horizontal pixel resolution, if applicable. If provided, this information will be indexed in the EDSC 'Horizontal Data Resolution' search facet which allows users to search by spatial resolution."
},
"opendap_link_check": {
"failure": "No OPeNDAP URL is provided in the granule fields. An OPeNDAP link is recommended for data access.",
"help": {
"message": "OPeNDAP links allow for direct data access through the OPeNDAP protocol.",
"url": "https://wiki.earthdata.nasa.gov/display/CMR/Related+URLs"
},
"remediation": "Recommend providing an OPeNDAP in the granule's Online Resources or Related URLs fields for enhanced data accessibility."
}
}
5 changes: 5 additions & 0 deletions pyQuARC/schemas/checks.json
Original file line number Diff line number Diff line change
Expand Up @@ -298,5 +298,10 @@
"data_type": "custom",
"check_function": "count_check",
"available": true
},
"opendap_link_check": {
"data_type": "custom",
"check_function": "opendap_link_check",
"available": true
}
}
28 changes: 28 additions & 0 deletions pyQuARC/schemas/rule_mapping.json
Original file line number Diff line number Diff line change
Expand Up @@ -3745,6 +3745,34 @@
"severity": "error",
"check_id": "string_compare"
},
"opendap_link_check": {
"rule_name": "OPeNDAP Link Presence Check",
"fields_to_apply": {
"echo-g": [
{
"fields": [
"Granule/OnlineResources/OnlineResource"
]
}
],
"umm-g": [
{
"fields": [
"RelatedUrls"
]
}
]
},
"data": [
{
"type": "OPENDAP DATA",
"url_keyword": "opendap"
}
],
"relation": "contains",
"check_id": "opendap_link_check",
"severity": "warning"
},
"location_keyword_presence_check": {
"rule_name": "Location Keyword Presence Check",
"fields_to_apply": {
Expand Down
Loading