From 29a0a69ef1a9e92b7bbb969b58111c3ce870b0bd Mon Sep 17 00:00:00 2001 From: Abel Aoun Date: Wed, 7 Feb 2024 17:07:37 +0100 Subject: [PATCH] MNT: Update enum parsing (#68) * MNT: Update enum parsing This aligns xncml enum parsing behavior with xarray's netCDF4 backend behavior. --- CHANGELOG.md | 2 +- tests/test_parser.py | 4 ++-- xncml/parser.py | 17 +++++++---------- 3 files changed, 10 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 68f0d9c..3ba985f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,7 @@ **Breaking changes** - Nested group handling: Before this version, all groups were read, but conflicting variable names in-between groups would shadow data. Now, similarly to xarray ``open_dataset``, ``open_ncml`` accepts an optional ``group`` argument to specify which group should be read. When ``group`` is not specified, it defaults to the root group. Additionally ``group`` can be set to ``'*'`` so that every group is read and the hierarchy is flattened. In the event of conflicting variable/dimension names across groups, the conflicting name will be modified by appending ``'__n'`` where n is incremented. - +- Enums are no longer transformed into CF flag_values and flag_meanings attributes, instead they are stored in the ``encoding["dtype"].metadata`` of their respective variable. This is aligned with what is done on xarray v2024.01.0 0.4.0 (2024-01-08) ================== diff --git a/tests/test_parser.py b/tests/test_parser.py index 9676daa..fea0cce 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -333,8 +333,8 @@ def test_multiple_values_for_scalar(): def test_read_enum(): """A enum should be turned into CF flag_values and flag_meanings attributes.""" ds = xncml.open_ncml(data / 'testEnums.xml') - assert ds['be_or_not_to_be'].attrs['flag_values'] == [0, 1] - assert ds['be_or_not_to_be'].attrs['flag_meanings'] == ['false', 'true'] + assert ds.be_or_not_to_be.dtype.metadata['enum'] == {'false': 0, 'true': 1} + assert ds.be_or_not_to_be.dtype.metadata['enum_name'] == 'boolean' def test_empty_attr(): diff --git a/xncml/parser.py b/xncml/parser.py index a71f249..274b916 100644 --- a/xncml/parser.py +++ b/xncml/parser.py @@ -459,12 +459,9 @@ def read_enum(obj: EnumTypedef) -> dict[str, list]: Returns ------- dict: - A dictionary with CF flag_values and flag_meanings that describe the Enum. + A dictionary describing the Enum. """ - return { - 'flag_values': list(map(lambda e: e.key, obj.content)), - 'flag_meanings': list(map(lambda e: e.content[0], obj.content)), - } + return {e.content[0]: e.key for e in obj.content} def read_variable( @@ -472,7 +469,7 @@ def read_variable( ref: xr.Dataset, obj: Variable, dimensions: dict, - enums: dict, + enums: dict[str, dict[str, int]], group_path: str, ) -> xr.Dataset: """ @@ -576,10 +573,10 @@ def read_variable( raise NotImplementedError if obj.typedef in enums.keys(): - # TODO (@bzah): Update this once Enums are merged in xarray - # https://github.com/pydata/xarray/pull/8147 - out.attrs['flag_values'] = enums[obj.typedef]['flag_values'] - out.attrs['flag_meanings'] = enums[obj.typedef]['flag_meanings'] + dtype = out.dtype + new_dtype = np.dtype(dtype, metadata={'enum': enums[obj.typedef], 'enum_name': obj.typedef}) + out.encoding['dtype'] = new_dtype + out = out.astype(new_dtype) elif obj.typedef is not None: raise NotImplementedError import re