Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enh/manage groups #66

Merged
merged 13 commits into from
Feb 6, 2024
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ repos:
- id: check-yaml
- id: double-quote-string-fixer

- repo: https://github.com/psf/black
- repo: https://github.com/psf/black-pre-commit-mirror
rev: 23.12.1
hooks:
- id: black
Expand Down
12 changes: 12 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
0.5.0 (unreleased)
==================

**Breaking changes**
- Nested group handling:
Before this version, all groups were read, but conflicting variable names in-between groups would be cause a lost of data.
Now, similarly to xarray ``open_dataset``, ``open_ncml`` accepts an optional `group` argument to specify which group should be read.
When ``group`` is missing, the root group is assumed.
Additionally ``group`` can be set to ``'*'``. In that case every group is read and the hierachy is flatten.
In the event of conflicting names, the variable/dimension name will be updated by appending '__n' where n is incremented.
bzah marked this conversation as resolved.
Show resolved Hide resolved


0.4.0 (2024-01-08)
==================

Expand Down
16 changes: 16 additions & 0 deletions tests/data/testGroup.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<?xml version="1.0" encoding="UTF-8"?>
<netcdf xmlns="http://www.unidata.ucar.edu/namespaces/netcdf/ncml-2.2">
<variable name="toto" shape="" type="ushort">
<values>3</values>
</variable>
<group name="a_sub_group">
<variable name="group_var" shape="" type="ushort">
<values>1</values>
</variable>
</group>
<group name="another_sub_group">
<variable name="other_group_var" shape="" type="ushort">
<values>2</values>
</variable>
</group>
</netcdf>
11 changes: 11 additions & 0 deletions tests/data/testGroupConflictingDims.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<?xml version="1.0" encoding="UTF-8"?>
<netcdf xmlns="http://www.unidata.ucar.edu/namespaces/netcdf/ncml-2.2">
<group name="gr_a">
<dimension name="index" length="42"/>
<variable name="gr_a_var" shape="index" type="ushort"></variable>
</group>
<group name="gr_b">
<dimension name="index" length="94"/>
<variable name="gr_b_var" shape="index" type="ushort"></variable>
</group>
</netcdf>
7 changes: 7 additions & 0 deletions tests/data/testGroupInvalidDim.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<netcdf xmlns="http://www.unidata.ucar.edu/namespaces/netcdf/ncml-2.2">
<variable name="toto" shape="myDim" type="ushort">
<values>3</values>
</variable>
<dimension name="myDim"></dimension>
</netcdf>
15 changes: 15 additions & 0 deletions tests/data/testGroupMultiLayers.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
<?xml version="1.0" encoding="UTF-8"?>
<netcdf xmlns="http://www.unidata.ucar.edu/namespaces/netcdf/ncml-2.2">
<group name="gr_a">
<dimension name="index" length="42"/>
<group name="sub_gr">
<variable name="gr_a_var" shape="index" type="ushort"></variable>
</group>
</group>
<group name="gr_b">
<dimension name="index" length="22"/>
<group name="sub_gr">
<variable name="gr_b_var" shape="index" type="ushort"></variable>
</group>
</group>
</netcdf>
69 changes: 63 additions & 6 deletions tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,22 +310,22 @@ def test_unsigned_type():

def test_empty_scalar__no_values_tag():
"""
Scalar without values loose their type because we can't create a typed numpy
scalar which is empty
A scalar without a <values> tag will not be typed, even if the 'type' attribute is
filled, because numpy can't create an empty typed scalar.
"""
ds = xncml.open_ncml(data / 'testEmptyScalar.xml')
assert ds['empty_scalar_var'].dtype == np.dtype('O')
assert ds['empty_scalar_var'].item() is None
assert ds['empty_scalar_var'].dtype == np.dtype('float64')
assert ds['empty_scalar_var'].item() == 0


def test_empty_scalar__with_empty_values_tag():
"""A scalar variable with an empty <values> tag is invalid."""
"""A scalar with an empty in its <values> tag is invalid."""
with pytest.raises(ValueError, match='No values found for variable .*'):
xncml.open_ncml(data / 'testEmptyScalar_withValuesTag.xml')


def test_multiple_values_for_scalar():
"""Scalar with an multiple values in <values> tag is invalid."""
"""a scalar with multiple values in its <values> tag is invalid."""
with pytest.raises(ValueError, match='The expected size for variable .* was 1, .*'):
xncml.open_ncml(data / 'testEmptyScalar_withMultipleValues.xml')

Expand All @@ -343,6 +343,63 @@ def test_empty_attr():
assert ds.attrs['comment'] == ''


def test_read_group__read_only_root_group():
"""By default, only read root group."""
ds = xncml.open_ncml(data / 'testGroup.xml')
assert ds.toto is not None
assert ds.get('group_var') is None
assert ds.get('other_group_var') is None


def test_read_group__read_sub_group():
"""Read specified sub group and its parents."""
ds = xncml.open_ncml(data / 'testGroup.xml', group='a_sub_group')
assert ds.toto is not None
assert ds.get('group_var') is not None
ds.group_var.attrs['group_path'] = '/a_sub_group'
assert ds.get('other_group_var') is None


def test_read_group__conflicting_dims():
"""Read a group and ensure its dimension is correct"""
ds = xncml.open_ncml(data / 'testGroupConflictingDims.xml', group='gr_b')
assert ds.dims['index'] == 94
assert 'index' in ds.gr_b_var.dims


def test_read__invalid_dim():
with pytest.raises(ValueError, match="Unknown dimension 'myDim'.*"):
xncml.open_ncml(data / 'testGroupInvalidDim.xml')


def test_flatten_groups():
"""Read every group and flatten everything in a single dataset/group."""
ds = xncml.open_ncml(data / 'testGroup.xml', group='*')
assert ds.toto is not None
assert ds.get('toto__1') is None
assert ds.get('group_var') is not None
ds.group_var.attrs['group_path'] = '/a_sub_group'
assert ds.get('other_group_var') is not None
ds.other_group_var.attrs['group_path'] = '/another_sub_group'


def test_flatten_groups__conflicting_dims():
"""Read every group and rename dimensions"""
ds = xncml.open_ncml(data / 'testGroupConflictingDims.xml', group='*')
assert 'index' in ds.gr_a_var.dims
assert ds.dims['index'] is not None
assert 'index__1' in ds.gr_b_var.dims
assert ds.dims['index__1'] is not None


def test_flatten_groups__sub_groups():
"""Read every group and rename dimensions"""
ds = xncml.open_ncml(data / 'testGroupMultiLayers.xml', group='*')
assert ds.dims['index'] is not None
assert ds['gr_a_var'] is not None
assert ds['gr_b_var'] is not None


# --- #
def check_dimension(ds):
assert len(ds['lat']) == 3
Expand Down
Loading