Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enh/manage groups #66

Merged
merged 13 commits into from
Feb 6, 2024
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
0.5.0 (unreleased)
==================

**Breaking changes**
- Nested group handling:
Before this version, all groups were read, but conflicting variable names in-between groups would shadow data. Now, similarly to xarray ``open_dataset``, ``open_ncml`` accepts an optional ``group`` argument to specify which group should be read. When ``group`` is not specified, it defaults to the root group. Additionally ``group`` can be set to ``'*'`` so that every group is read and the hierarchy is flattened. In the event of conflicting variable/dimension names across groups, the conflicting name will be modified by appending ``'__n'`` where n is incremented.


0.4.0 (2024-01-08)
==================

Expand Down
16 changes: 16 additions & 0 deletions tests/data/testGroup.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<?xml version="1.0" encoding="UTF-8"?>
<netcdf xmlns="http://www.unidata.ucar.edu/namespaces/netcdf/ncml-2.2">
<variable name="toto" shape="" type="ushort">
<values>3</values>
</variable>
<group name="a_sub_group">
<variable name="group_var" shape="" type="ushort">
<values>1</values>
</variable>
</group>
<group name="another_sub_group">
<variable name="other_group_var" shape="" type="ushort">
<values>2</values>
</variable>
</group>
</netcdf>
11 changes: 11 additions & 0 deletions tests/data/testGroupConflictingDims.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<?xml version="1.0" encoding="UTF-8"?>
<netcdf xmlns="http://www.unidata.ucar.edu/namespaces/netcdf/ncml-2.2">
<group name="gr_a">
<dimension name="index" length="42"/>
<variable name="gr_a_var" shape="index" type="ushort"></variable>
</group>
<group name="gr_b">
<dimension name="index" length="94"/>
<variable name="gr_b_var" shape="index" type="ushort"></variable>
</group>
</netcdf>
7 changes: 7 additions & 0 deletions tests/data/testGroupInvalidDim.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<netcdf xmlns="http://www.unidata.ucar.edu/namespaces/netcdf/ncml-2.2">
<variable name="toto" shape="myDim" type="ushort">
<values>3</values>
</variable>
<dimension name="myDim"></dimension>
</netcdf>
18 changes: 18 additions & 0 deletions tests/data/testGroupMultiLayers.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
<?xml version="1.0" encoding="UTF-8"?>
<netcdf xmlns="http://www.unidata.ucar.edu/namespaces/netcdf/ncml-2.2">
<variable name="a_var" shape="" type="ushort">
<values>2</values>
</variable>
<group name="gr_a">
<dimension name="index" length="42"/>
<group name="sub_gr">
<variable name="a_var" shape="index" type="ushort"></variable>
</group>
</group>
<group name="gr_b">
<dimension name="index" length="22"/>
<group name="sub_gr">
<variable name="a_var" shape="index" type="ushort"></variable>
</group>
</group>
</netcdf>
72 changes: 66 additions & 6 deletions tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,22 +310,22 @@ def test_unsigned_type():

def test_empty_scalar__no_values_tag():
"""
Scalar without values loose their type because we can't create a typed numpy
scalar which is empty
A scalar variable which <values> is missing will have its value set to
the default value of its type.
"""
ds = xncml.open_ncml(data / 'testEmptyScalar.xml')
assert ds['empty_scalar_var'].dtype == np.dtype('O')
assert ds['empty_scalar_var'].item() is None
assert ds['empty_scalar_var'].dtype == np.dtype('float64')
assert ds['empty_scalar_var'].item() == 0


def test_empty_scalar__with_empty_values_tag():
"""A scalar variable with an empty <values> tag is invalid."""
"""A scalar with an empty <values> tag is invalid."""
with pytest.raises(ValueError, match='No values found for variable .*'):
xncml.open_ncml(data / 'testEmptyScalar_withValuesTag.xml')


def test_multiple_values_for_scalar():
"""Scalar with an multiple values in <values> tag is invalid."""
"""A scalar with multiple values in its <values> tag is invalid."""
with pytest.raises(ValueError, match='The expected size for variable .* was 1, .*'):
xncml.open_ncml(data / 'testEmptyScalar_withMultipleValues.xml')

Expand All @@ -343,6 +343,66 @@ def test_empty_attr():
assert ds.attrs['comment'] == ''


def test_read_group__read_only_root_group():
"""By default, only read root group."""
ds = xncml.open_ncml(data / 'testGroup.xml')
assert ds.toto is not None
assert ds.get('group_var') is None
assert ds.get('other_group_var') is None


def test_read_group__read_sub_group():
"""Read specified sub group and its parents."""
ds = xncml.open_ncml(data / 'testGroup.xml', group='a_sub_group')
assert ds.toto is not None
assert ds.get('group_var') is not None
ds.group_var.attrs['group_path'] = '/a_sub_group'
assert ds.get('other_group_var') is None


def test_read_group__conflicting_dims():
"""Read a group and ensure its dimension is correct"""
ds = xncml.open_ncml(data / 'testGroupConflictingDims.xml', group='gr_b')
assert ds.dims['index'] == 94
assert 'index' in ds.gr_b_var.dims


def test_read__invalid_dim():
with pytest.raises(ValueError, match="Unknown dimension 'myDim'.*"):
xncml.open_ncml(data / 'testGroupInvalidDim.xml')


def test_flatten_groups():
"""Read every group and flatten everything in a single dataset/group."""
ds = xncml.open_ncml(data / 'testGroup.xml', group='*')
assert ds.toto is not None
assert ds.get('toto__1') is None
assert ds.get('group_var') is not None
ds.group_var.attrs['group_path'] = '/a_sub_group'
assert ds.get('other_group_var') is not None
ds.other_group_var.attrs['group_path'] = '/another_sub_group'


def test_flatten_groups__conflicting_dims():
"""Read every group and rename dimensions"""
ds = xncml.open_ncml(data / 'testGroupConflictingDims.xml', group='*')
assert 'index' in ds.gr_a_var.dims
assert ds.dims['index'] is not None
assert 'index__1' in ds.gr_b_var.dims
assert ds.dims['index__1'] is not None


def test_flatten_groups__sub_groups():
"""Read every group and rename dimensions"""
ds = xncml.open_ncml(data / 'testGroupMultiLayers.xml', group='*')
assert ds.dims['index'] == 42
assert ds.dims['index__1'] == 22
assert ds['a_var'].size == 1
assert ds['a_var'] == 2
assert ds['a_var__1'].size == 42
assert ds['a_var__2'].size == 22


# --- #
def check_dimension(ds):
assert len(ds['lat']) == 3
Expand Down
Loading
Loading