-
Notifications
You must be signed in to change notification settings - Fork 25
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Support specifying single HDF Group in open_virtual_dataset #165
Merged
Merged
Changes from 11 commits
Commits
Show all changes
14 commits
Select commit
Hold shift + click to select a range
646e72d
first pass at single hdf group
scottyhq 15d5178
merge with main
scottyhq 9a9ebda
Update virtualizarr/tests/test_xarray.py
scottyhq cf540b2
Update virtualizarr/xarray.py
scottyhq a896492
merge main
scottyhq 2d805ef
Merge branch 'main' into group
scottyhq bc440b9
refactor, basic test
scottyhq 9cfc6f6
add test
scottyhq ae08df9
fsspec doesnt like pytest tmp_path
scottyhq b1dba1f
document
scottyhq 16b1332
merge main
scottyhq 94a31f2
Merge branch 'main' into scotthq_group
TomNicholas 91ae76b
Merge branch 'main' into scotthq_group
TomNicholas b5de99e
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
from collections.abc import Mapping | ||
from unittest.mock import patch | ||
|
||
import fsspec | ||
import numpy as np | ||
import pytest | ||
import xarray as xr | ||
|
@@ -349,8 +350,10 @@ class TestReadFromURL: | |
"hdf4", | ||
"https://github.com/corteva/rioxarray/raw/master/test/test_data/input/MOD09GA.A2008296.h14v17.006.2015181011753.hdf", | ||
), | ||
# https://github.com/zarr-developers/VirtualiZarr/issues/159 | ||
# ("hdf5", "https://github.com/fsspec/kerchunk/raw/main/kerchunk/tests/NEONDSTowerTemperatureData.hdf5"), | ||
( | ||
"hdf5", | ||
"https://nisar.asf.earthdatacloud.nasa.gov/NISAR-SAMPLE-DATA/GCOV/ALOS1_Rosamond_20081012/NISAR_L2_PR_GCOV_001_005_A_219_4020_SHNA_A_20081012T060910_20081012T060926_P01101_F_N_J_001.h5", | ||
), | ||
pytest.param( | ||
"tiff", | ||
"https://github.com/fsspec/kerchunk/raw/main/kerchunk/tests/lcmap_tiny_cog_2020.tif", | ||
|
@@ -375,10 +378,48 @@ def test_read_from_url(self, filetype, url): | |
if filetype in ["grib", "jpg", "hdf4"]: | ||
with pytest.raises(NotImplementedError): | ||
vds = open_virtual_dataset(url, reader_options={}, indexes={}) | ||
elif filetype == "hdf5": | ||
vds = open_virtual_dataset( | ||
url, | ||
group="science/LSAR/GCOV/grids/frequencyA", | ||
drop_variables=["listOfCovarianceTerms", "listOfPolarizations"], | ||
indexes={}, | ||
reader_options={}, | ||
) | ||
assert isinstance(vds, xr.Dataset) | ||
else: | ||
vds = open_virtual_dataset(url, indexes={}) | ||
assert isinstance(vds, xr.Dataset) | ||
|
||
def test_virtualizarr_vs_local_nisar(self): | ||
# Open group directly from locally cached file with xarray | ||
url = "https://nisar.asf.earthdatacloud.nasa.gov/NISAR-SAMPLE-DATA/GCOV/ALOS1_Rosamond_20081012/NISAR_L2_PR_GCOV_001_005_A_219_4020_SHNA_A_20081012T060910_20081012T060926_P01101_F_N_J_001.h5" | ||
tmpfile = fsspec.open_local( | ||
f"filecache::{url}", filecache=dict(cache_storage="/tmp", same_names=True) | ||
) | ||
hdf_group = "science/LSAR/GCOV/grids/frequencyA" | ||
dsXR = xr.open_dataset( | ||
tmpfile, | ||
engine="h5netcdf", | ||
group=hdf_group, | ||
drop_variables=["listOfCovarianceTerms", "listOfPolarizations"], | ||
phony_dims="access", | ||
) | ||
|
||
# save group reference file via virtualizarr, then open with engine="kerchunk" | ||
vds = open_virtual_dataset( | ||
tmpfile, | ||
group=hdf_group, | ||
indexes={}, | ||
drop_variables=["listOfCovarianceTerms", "listOfPolarizations"], | ||
) | ||
tmpref = "/tmp/cmip6.json" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. pytest has a fixture |
||
vds.virtualize.to_kerchunk(tmpref, format="json") | ||
dsV = xr.open_dataset(tmpref, engine="kerchunk") | ||
|
||
# xrt.assert_identical(dsXR, dsV) #Attribute order changes | ||
scottyhq marked this conversation as resolved.
Show resolved
Hide resolved
|
||
xrt.assert_equal(dsXR, dsV) | ||
|
||
|
||
class TestLoadVirtualDataset: | ||
def test_loadable_variables(self, netcdf4_file): | ||
|
@@ -406,6 +447,26 @@ def test_explicit_filetype(self, netcdf4_file): | |
with pytest.raises(NotImplementedError): | ||
open_virtual_dataset(netcdf4_file, filetype="grib") | ||
|
||
def test_group_kwarg(self, hdf5_groups_file): | ||
with pytest.raises(ValueError, match="Multiple HDF Groups found"): | ||
open_virtual_dataset(hdf5_groups_file) | ||
with pytest.raises(ValueError, match="not found in"): | ||
open_virtual_dataset(hdf5_groups_file, group="doesnt_exist") | ||
|
||
vars_to_load = ["air", "time"] | ||
vds = open_virtual_dataset( | ||
hdf5_groups_file, | ||
group="test/group", | ||
loadable_variables=vars_to_load, | ||
indexes={}, | ||
) | ||
full_ds = xr.open_dataset( | ||
hdf5_groups_file, group="test/group", decode_times=False | ||
) | ||
for name in full_ds.variables: | ||
if name in vars_to_load: | ||
xrt.assert_identical(vds.variables[name], full_ds.variables[name]) | ||
scottyhq marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
@patch("virtualizarr.kerchunk.read_kerchunk_references_from_file") | ||
def test_open_virtual_dataset_passes_expected_args( | ||
self, mock_read_kerchunk, netcdf4_file | ||
|
@@ -415,6 +476,7 @@ def test_open_virtual_dataset_passes_expected_args( | |
args = { | ||
"filepath": netcdf4_file, | ||
"filetype": None, | ||
"group": None, | ||
"reader_options": reader_options, | ||
} | ||
mock_read_kerchunk.assert_called_once_with(**args) | ||
|
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not familiar with this
fsspec
function. Is this not something that can just be done withpathlib
?