Skip to content

Commit

Permalink
improve netcdf multi-group reading and handle metadata QC issue (#516)
Browse files Browse the repository at this point in the history
- ATL11 v006, ATL14 v003, and ATL15 v003 have a meaningless string set as the version. A temporary fix gets the most recent version number from CMR to use instead for those products.
- adds handling to read in multiple variable groups from a netcdf
  • Loading branch information
JessicaS11 authored Jun 4, 2024
1 parent cc02758 commit ae4a20c
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 17 deletions.
16 changes: 15 additions & 1 deletion icepyx/core/is2ref.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@ def _validate_product(product):
"""
Confirm a valid ICESat-2 product was specified
"""
error_msg = "A valid product string was not provided. Check user input, if given, or file metadata."
error_msg = (
"A valid product string was not provided. "
"Check user input, if given, or file metadata."
)
if isinstance(product, str):
product = str.upper(product)
assert product in [
Expand Down Expand Up @@ -428,6 +431,17 @@ def extract_version(filepath, auth=None):
"Unable to parse the version from file metadata"
).with_traceback(e.__traceback__)

# catch cases where the version number is an invalid string
# e.g. a VersionID of "SET_BY_PGE", causing issues where version needs to be a valid number
try:
float(version)
except ValueError:
raise Exception(
"There is an underlying issue with the version information"
"provided in the metadata of this file."
"Consider setting the version manually for further processing."
)

# Close the file reader
f.close()
return version
53 changes: 39 additions & 14 deletions icepyx/core/read.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,17 @@ def vars(self):
<icepyx.core.variables.Variables at [location]>
"""

# fix to handle fact that some VersionID metadata is wrong
# see: https://forum.earthdata.nasa.gov/viewtopic.php?t=5154
# (v006, v003, v003, respectively)
# Note that this results in a login being required even for a local file
# because otherwise Variables (variables.py) tries to get the version from the file (ln99).
bad_metadata = ["ATL11", "ATL14", "ATL15"]
if self._product in bad_metadata and not hasattr(self, "_read_vars"):
self._read_vars = Variables(
product=self._product, version=is2ref.latest_version(self._product)
)

if not hasattr(self, "_read_vars"):
self._read_vars = Variables(path=self.filelist[0])
return self._read_vars
Expand Down Expand Up @@ -621,8 +632,10 @@ def load(self):
all_dss.append(
self._build_single_file_dataset(file, groups_list)
) # wanted_groups, vgrp.keys()))
if isinstance(file, S3File):
file.close()

# Closing the file prevents further operations on the dataset
# if isinstance(file, S3File):
# file.close()

if len(all_dss) == 1:
return all_dss[0]
Expand Down Expand Up @@ -704,10 +717,16 @@ def _build_single_file_dataset(self, file, groups_list):
-------
Xarray Dataset
"""
# DEVNOTE: if and elif does not actually apply wanted variable list,
# returns wanted groups as a list of lists with group path string elements separated
_, wanted_groups_tiered = Variables.parse_var_list(
groups_list, tiered=True, tiered_vars=True
)

# DEVNOTE: elif does not actually apply wanted variable list,
# and has not been tested for merging multiple files into one ds
# if a gridded product
# of a gridded product
# TODO: all products need to be tested, and quicklook products added or explicitly excluded
# consider looking for netcdf file extension instead of using product
# Level 3b, gridded (netcdf): ATL14, 15, 16, 17, 18, 19, 20, 21
if self.product in [
"ATL14",
Expand All @@ -720,7 +739,22 @@ def _build_single_file_dataset(self, file, groups_list):
"ATL21",
"ATL23",
]:
is2ds = xr.open_dataset(file)
wanted_grouponly_set = set(wanted_groups_tiered[0])
wanted_groups_list = list(sorted(wanted_grouponly_set))
if len(wanted_groups_list) == 1:
is2ds = self._read_single_grp(file, grp_path=wanted_groups_list[0])
else:
is2ds = self._build_dataset_template(file)
while wanted_groups_list:
ds = self._read_single_grp(file, grp_path=wanted_groups_list[0])
wanted_groups_list = wanted_groups_list[1:]
is2ds = is2ds.merge(
ds, join="outer", combine_attrs="drop_conflicts"
)
if hasattr(is2ds, "description"):
is2ds.attrs["description"] = (
"Group-level data descriptions were removed during Dataset creation."
)

# Level 3b, hdf5: ATL11
elif self.product in ["ATL11"]:
Expand All @@ -739,11 +773,6 @@ def _build_single_file_dataset(self, file, groups_list):
# Note: the sorting is critical for datasets with highly nested groups
wanted_groups_list = ["ancillary_data"] + sorted(wanted_groups_set)

# returns wanted groups as a list of lists with group path string elements separated
_, wanted_groups_tiered = Variables.parse_var_list(
groups_list, tiered=True, tiered_vars=True
)

while wanted_groups_list:
# print(wanted_groups_list)
grp_path = wanted_groups_list[0]
Expand Down Expand Up @@ -772,10 +801,6 @@ def _build_single_file_dataset(self, file, groups_list):
wanted_groups_list = ["orbit_info", "ancillary_data"] + sorted(
wanted_groups_set
)
# returns wanted groups as a list of lists with group path string elements separated
_, wanted_groups_tiered = Variables.parse_var_list(
groups_list, tiered=True, tiered_vars=True
)

while wanted_groups_list:
grp_path = wanted_groups_list[0]
Expand Down
10 changes: 8 additions & 2 deletions icepyx/tests/test_is2ref.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,20 @@

def test_num_product():
dsnum = 6
ermsg = "A valid product string was not provided. Check user input, if given, or file metadata."
ermsg = (
"A valid product string was not provided. "
"Check user input, if given, or file metadata."
)
with pytest.raises(TypeError, match=ermsg):
is2ref._validate_product(dsnum)


def test_bad_product():
wrngds = "atl-6"
ermsg = "A valid product string was not provided. Check user input, if given, or file metadata."
ermsg = (
"A valid product string was not provided. "
"Check user input, if given, or file metadata."
)
with pytest.raises(AssertionError, match=ermsg):
is2ref._validate_product(wrngds)

Expand Down

0 comments on commit ae4a20c

Please sign in to comment.