From 29a0a69ef1a9e92b7bbb969b58111c3ce870b0bd Mon Sep 17 00:00:00 2001
From: Abel Aoun <aoun.abel@gmail.com>
Date: Wed, 7 Feb 2024 17:07:37 +0100
Subject: [PATCH] MNT: Update enum parsing (#68)

* MNT: Update enum parsing

This aligns xncml enum parsing behavior with xarray's
netCDF4 backend behavior.
---
 CHANGELOG.md         |  2 +-
 tests/test_parser.py |  4 ++--
 xncml/parser.py      | 17 +++++++----------
 3 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 68f0d9c..3ba985f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,7 +4,7 @@
 **Breaking changes**
 - Nested group handling:
   Before this version, all groups were read, but conflicting variable names in-between groups would shadow data.  Now, similarly to xarray ``open_dataset``, ``open_ncml`` accepts an optional ``group`` argument to specify which group should be read. When ``group`` is not specified, it defaults to the root group. Additionally ``group`` can be set to ``'*'`` so that every group is read and the hierarchy is flattened.   In the event of conflicting variable/dimension names across groups, the conflicting name will be modified by appending ``'__n'`` where n is incremented.
-
+- Enums are no longer transformed into CF flag_values and flag_meanings attributes, instead they are stored in the ``encoding["dtype"].metadata`` of their respective variable. This is aligned with what is done on xarray v2024.01.0
 
 0.4.0 (2024-01-08)
 ==================
diff --git a/tests/test_parser.py b/tests/test_parser.py
index 9676daa..fea0cce 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -333,8 +333,8 @@ def test_multiple_values_for_scalar():
 def test_read_enum():
     """A enum should be turned into CF flag_values and flag_meanings attributes."""
     ds = xncml.open_ncml(data / 'testEnums.xml')
-    assert ds['be_or_not_to_be'].attrs['flag_values'] == [0, 1]
-    assert ds['be_or_not_to_be'].attrs['flag_meanings'] == ['false', 'true']
+    assert ds.be_or_not_to_be.dtype.metadata['enum'] == {'false': 0, 'true': 1}
+    assert ds.be_or_not_to_be.dtype.metadata['enum_name'] == 'boolean'
 
 
 def test_empty_attr():
diff --git a/xncml/parser.py b/xncml/parser.py
index a71f249..274b916 100644
--- a/xncml/parser.py
+++ b/xncml/parser.py
@@ -459,12 +459,9 @@ def read_enum(obj: EnumTypedef) -> dict[str, list]:
     Returns
     -------
     dict:
-        A dictionary with CF flag_values and flag_meanings that describe the Enum.
+        A dictionary describing the Enum.
     """
-    return {
-        'flag_values': list(map(lambda e: e.key, obj.content)),
-        'flag_meanings': list(map(lambda e: e.content[0], obj.content)),
-    }
+    return {e.content[0]: e.key for e in obj.content}
 
 
 def read_variable(
@@ -472,7 +469,7 @@ def read_variable(
     ref: xr.Dataset,
     obj: Variable,
     dimensions: dict,
-    enums: dict,
+    enums: dict[str, dict[str, int]],
     group_path: str,
 ) -> xr.Dataset:
     """
@@ -576,10 +573,10 @@ def read_variable(
         raise NotImplementedError
 
     if obj.typedef in enums.keys():
-        # TODO (@bzah): Update this once Enums are merged in xarray
-        #      https://github.com/pydata/xarray/pull/8147
-        out.attrs['flag_values'] = enums[obj.typedef]['flag_values']
-        out.attrs['flag_meanings'] = enums[obj.typedef]['flag_meanings']
+        dtype = out.dtype
+        new_dtype = np.dtype(dtype, metadata={'enum': enums[obj.typedef], 'enum_name': obj.typedef})
+        out.encoding['dtype'] = new_dtype
+        out = out.astype(new_dtype)
     elif obj.typedef is not None:
         raise NotImplementedError
     import re