diff --git a/modules/xarray/README.md b/modules/xarray/README.md new file mode 100644 index 00000000..25d9b66d --- /dev/null +++ b/modules/xarray/README.md @@ -0,0 +1,21 @@ +# Installation (from VCS) + +```sh +pip install "git+https://github.com/magnusuMET/fimex.git@feature/xarray#subdirectory=modules/xarray" +``` + +# Usage + +The module is installed as an engine which `xarray` can use directly. You can open a dataset using +```python +import xarray + +xarray.open_dataset("mydataset.nc", engine="fimex") +``` + +Pass `config` or `filetype` if necessary: +```python +import xarray + +xarray.open_dataset("mydataset", filtype="grbml", config="myconfig.xml", engine="fimex") +``` diff --git a/modules/xarray/fimex_xarray/__init__.py b/modules/xarray/fimex_xarray/__init__.py new file mode 100644 index 00000000..f1337666 --- /dev/null +++ b/modules/xarray/fimex_xarray/__init__.py @@ -0,0 +1 @@ +from .xr import FimexBackendEntrypoint, FimexDataVariable diff --git a/modules/xarray/fimex_xarray/xr.py b/modules/xarray/fimex_xarray/xr.py new file mode 100755 index 00000000..7a279709 --- /dev/null +++ b/modules/xarray/fimex_xarray/xr.py @@ -0,0 +1,180 @@ +#! /usr/bin/env python3 +import pyfimex0 +import numpy as np +import xarray +from xarray.backends import BackendEntrypoint, BackendArray +import os + + +DTYPE_MAPPINGS = { + pyfimex0.CDMDataType.FLOAT: np.float32, + pyfimex0.CDMDataType.DOUBLE: np.float64, + pyfimex0.CDMDataType.INT: np.intc, + pyfimex0.CDMDataType.INT64: np.int64, + pyfimex0.CDMDataType.UINT: np.uint, + pyfimex0.CDMDataType.UINT64: np.uint64, + pyfimex0.CDMDataType.SHORT: np.short, + pyfimex0.CDMDataType.USHORT: np.ushort, + pyfimex0.CDMDataType.STRING: str, + pyfimex0.CDMDataType.CHAR: np.byte, + pyfimex0.CDMDataType.UCHAR: np.ubyte, +} + + +class FimexBackendEntrypoint(BackendEntrypoint): + def open_dataset(self, filename_or_obj, drop_variables, *, config=None, filetype=None, decode_times=True, scale_offset=True): + if filetype is None: + filetype = "" + if config is None: + config = "" + fh = pyfimex0.createFileReader(filetype, filename_or_obj, config) + cdm = fh.getCDM() + + var_names = set(cdm.getVariableNames()) + attr_names = cdm.getGlobalAttributeNames() + + attrs = dict() + for aname in attr_names: + attr = cdm.getGlobalAttribute(aname) + attrv = attr.getData().values() + attrs[aname] = attrv + + coord_names = cdm.getDimensionNames() + coords = dict() + for cname in set(coord_names).intersection(var_names): + var = cdm.getVariable(cname) + vardata = var.getData() + if vardata is not None: + values = vardata.values() + shape = var.getShape() + if len(shape) != 1: + raise Exception() + else: + values = fh.getData(cname).values() + shape = [len(values)] + + if scale_offset: + try: + scale_factor = cdm.getAttribute(cname, "scale_factor").getData().values() + except RuntimeError: + scale_factor = 1 + try: + add_offset = cdm.getAttribute(cname, "add_offset").getData().values() + except RuntimeError: + add_offset = 0 + values = values*scale_factor + add_offset + coords[cname] = values + try: + var_names.remove(cname) + except KeyError: + pass + + vars = dict() + for vname in var_names: + var = cdm.getVariable(vname) + dims = list(reversed(var.getShape())) + attrs = dict() + for aname in cdm.getAttributeNames(vname): + attr = cdm.getAttribute(vname, aname) + attrv = attr.getData().values() + if len(attrv) == 1: + attrv = attrv[0] + attrs[aname] = attrv + + data = FimexDataVariable(fh, cdm, var, scale_offset=scale_offset) + data = xarray.core.indexing.LazilyIndexedArray(data) + var = xarray.Variable(data=data, dims=dims, attrs=attrs) + vars[vname] = var + + if decode_times: + if "time" in coord_names: + var = coords["time"] + var = np.array([np.datetime64(int(v), "s") for v in var]) + + coords["time"] = var + + ds = xarray.Dataset(data_vars=vars, attrs=attrs, coords=coords) + + return ds + + open_dataset_parameters = ["filename_or_obj", "drop_variables", "config", "filetype", "decode_times", "scale_offset"] + + def guess_can_open(self, filename_or_obj): + try: + _, ext = os.path.splitext(filename_or_obj) + except TypeError: + return False + + return ext in {".grbml", ".ncml", ".grb", ".nc"} + + description = "Read using fimex" + url = "https://github.com/metno/fimex" + + +class FimexDataVariable(BackendArray): + def __init__(self, fh, cdm, var, scale_offset): + super().__init__() + self.fh = fh + self.var = var + self.cdm = cdm + self.data = var.getData() + self.scale_offset = scale_offset + + shape = [] + for s in var.getShape(): + shape.append(cdm.getDimension(s).getLength()) + self.shape = list(reversed(shape)) + try: + dtype = self.data.getDataType() + self.dtype = np.dtype(DTYPE_MAPPINGS[dtype]) + except AttributeError: + self.dtype = np.dtype(np.float64) + + def __getitem__( + self, key: xarray.core.indexing.ExplicitIndexer + ) -> np.typing.ArrayLike: + return xarray.core.indexing.explicit_indexing_adapter( + key, + self.shape, + xarray.core.indexing.IndexingSupport.BASIC, + self._raw_indexing_method, + ) + + def _raw_indexing_method(self, key: tuple) -> np.typing.ArrayLike: + vname = self.var.getName() + + slicebuilder = pyfimex0.SliceBuilder(self.cdm, vname, False) + dimsizes = [] + for k, dim, dimname in zip(key, self.shape, reversed(self.var.getShape())): + ignore_dim = dimname + if isinstance(k, int): + slicebuilder.setStartAndSize(dimname, k, 1) + elif isinstance(k, slice): + start = k.start if k.start is not None else 0 + step = k.step if k.step is not None else 1 + stop = k.stop if k.stop is not None else dim + size = (stop - start) // step + slicebuilder.setStartAndSize(dimname, start, size) + dimsizes.append(size) + else: + raise TypeError(f"Unknown type of {k}: {type(k)}") + + data = self.fh.getDataSliceSB(vname, slicebuilder) + if len(dimsizes) == 0: + # Bug? Fimex can't read scalar data? + data_shaped = -1 + else: + datav = data.values() + data_shaped = np.reshape(datav, dimsizes) + if self.scale_offset: + try: + scale_factor = self.cdm.getAttribute(vname, "scale_factor").getData().values() + except RuntimeError: + scale_factor = 1 + try: + add_offset = self.cdm.getAttribute(vname, "add_offset").getData().values() + except RuntimeError: + add_offset = 0 + data_shaped = data_shaped*scale_factor + add_offset + + return data_shaped diff --git a/modules/xarray/pyproject.toml b/modules/xarray/pyproject.toml new file mode 100644 index 00000000..a4f2f87f --- /dev/null +++ b/modules/xarray/pyproject.toml @@ -0,0 +1,22 @@ +[build-system] +requires = ["setuptools", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "fimex_xarray" +version = "0.0.1" +readme = "README.md" + +dependencies = [ + # "pyfimex0", + "numpy", + "xarray", +] + +[project.optional-dependencies] +dev = [ + "pytest", +] + +[project.entry-points."xarray.backends"] +fimex = "fimex_xarray.xr:FimexBackendEntrypoint" diff --git a/modules/xarray/tests/test_fimex.py b/modules/xarray/tests/test_fimex.py new file mode 100644 index 00000000..3acaf1db --- /dev/null +++ b/modules/xarray/tests/test_fimex.py @@ -0,0 +1,35 @@ +import xarray +import os +import numpy as np + +test_srcdir = os.path.join(os.path.dirname(__file__), "..", "..", "..", "test") + + +def test_open_hirlam12(): + path = os.path.join(test_srcdir, "hirlam12.nc") + path = os.path.join(test_srcdir, "verticalPressure.nc") + path = os.path.join(test_srcdir, "erai.sfc.40N.0.75d.200301011200.nc") + path = os.path.join(test_srcdir, "verticalOceanSG2.nc") + ds = xarray.open_dataset(path, engine="fimex", decode_times=False) + + temp = ds["temp"] + selection = temp.isel(ocean_time=3, s_rho=[4, 6, 9], xi_rho=range(4, 7)) + values = selection.values + + +def test_isel_slice(): + path = os.path.join(test_srcdir, "verticalOceanSG2.nc") + ds = xarray.open_dataset(path, engine="fimex", decode_times=False) + + zeta = ds["zeta"] + + zeta_sub = ds.isel(eta_rho=slice(None, None, None))["zeta"] + assert np.all(zeta == zeta_sub) + + zeta_sub = ds.isel(eta_rho=slice(2, None, None))["zeta"] + zeta_isel = zeta.isel(eta_rho=slice(2, None, None)) + assert np.all(zeta_isel == zeta_sub) + + zeta_sub = ds.isel(eta_rho=slice(2, 10, None))["zeta"] + zeta_isel = zeta.isel(eta_rho=slice(2, 10, None)) + assert np.all(zeta_isel == zeta_sub)