-
Notifications
You must be signed in to change notification settings - Fork 16
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Initial xarray support #35
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# Installation (from VCS) | ||
|
||
```sh | ||
pip install "git+https://github.com/magnusuMET/fimex.git@feature/xarray#subdirectory=modules/xarray" | ||
``` | ||
|
||
# Usage | ||
|
||
The module is installed as an engine which `xarray` can use directly. You can open a dataset using | ||
```python | ||
import xarray | ||
|
||
xarray.open_dataset("mydataset.nc", engine="fimex") | ||
``` | ||
|
||
Pass `config` or `filetype` if necessary: | ||
```python | ||
import xarray | ||
|
||
xarray.open_dataset("mydataset", filtype="grbml", config="myconfig.xml", engine="fimex") | ||
``` |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
from .xr import FimexBackendEntrypoint, FimexDataVariable |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,180 @@ | ||
#! /usr/bin/env python3 | ||
import pyfimex0 | ||
import numpy as np | ||
import xarray | ||
from xarray.backends import BackendEntrypoint, BackendArray | ||
import os | ||
|
||
|
||
DTYPE_MAPPINGS = { | ||
pyfimex0.CDMDataType.FLOAT: np.float32, | ||
pyfimex0.CDMDataType.DOUBLE: np.float64, | ||
pyfimex0.CDMDataType.INT: np.intc, | ||
pyfimex0.CDMDataType.INT64: np.int64, | ||
pyfimex0.CDMDataType.UINT: np.uint, | ||
pyfimex0.CDMDataType.UINT64: np.uint64, | ||
pyfimex0.CDMDataType.SHORT: np.short, | ||
pyfimex0.CDMDataType.USHORT: np.ushort, | ||
pyfimex0.CDMDataType.STRING: str, | ||
pyfimex0.CDMDataType.CHAR: np.byte, | ||
pyfimex0.CDMDataType.UCHAR: np.ubyte, | ||
} | ||
|
||
|
||
class FimexBackendEntrypoint(BackendEntrypoint): | ||
def open_dataset(self, filename_or_obj, drop_variables, *, config=None, filetype=None, decode_times=True, scale_offset=True): | ||
if filetype is None: | ||
filetype = "" | ||
if config is None: | ||
config = "" | ||
fh = pyfimex0.createFileReader(filetype, filename_or_obj, config) | ||
cdm = fh.getCDM() | ||
|
||
var_names = set(cdm.getVariableNames()) | ||
attr_names = cdm.getGlobalAttributeNames() | ||
|
||
attrs = dict() | ||
for aname in attr_names: | ||
attr = cdm.getGlobalAttribute(aname) | ||
attrv = attr.getData().values() | ||
attrs[aname] = attrv | ||
|
||
coord_names = cdm.getDimensionNames() | ||
coords = dict() | ||
for cname in set(coord_names).intersection(var_names): | ||
var = cdm.getVariable(cname) | ||
vardata = var.getData() | ||
if vardata is not None: | ||
values = vardata.values() | ||
shape = var.getShape() | ||
if len(shape) != 1: | ||
raise Exception() | ||
else: | ||
values = fh.getData(cname).values() | ||
shape = [len(values)] | ||
|
||
if scale_offset: | ||
try: | ||
scale_factor = cdm.getAttribute(cname, "scale_factor").getData().values() | ||
except RuntimeError: | ||
scale_factor = 1 | ||
try: | ||
add_offset = cdm.getAttribute(cname, "add_offset").getData().values() | ||
except RuntimeError: | ||
add_offset = 0 | ||
values = values*scale_factor + add_offset | ||
coords[cname] = values | ||
try: | ||
var_names.remove(cname) | ||
except KeyError: | ||
pass | ||
|
||
vars = dict() | ||
for vname in var_names: | ||
var = cdm.getVariable(vname) | ||
dims = list(reversed(var.getShape())) | ||
attrs = dict() | ||
for aname in cdm.getAttributeNames(vname): | ||
attr = cdm.getAttribute(vname, aname) | ||
attrv = attr.getData().values() | ||
if len(attrv) == 1: | ||
attrv = attrv[0] | ||
attrs[aname] = attrv | ||
|
||
data = FimexDataVariable(fh, cdm, var, scale_offset=scale_offset) | ||
data = xarray.core.indexing.LazilyIndexedArray(data) | ||
var = xarray.Variable(data=data, dims=dims, attrs=attrs) | ||
vars[vname] = var | ||
|
||
if decode_times: | ||
if "time" in coord_names: | ||
var = coords["time"] | ||
var = np.array([np.datetime64(int(v), "s") for v in var]) | ||
|
||
coords["time"] = var | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't really know what this should do in xarray, but the time-variable in netcdf is every variable with a time unit, i.e. 'days since 2005-09-03 +15:00'. One cannot rely on the dimension-name, and that there is only one "time"-axis per file. |
||
ds = xarray.Dataset(data_vars=vars, attrs=attrs, coords=coords) | ||
|
||
return ds | ||
|
||
open_dataset_parameters = ["filename_or_obj", "drop_variables", "config", "filetype", "decode_times", "scale_offset"] | ||
|
||
def guess_can_open(self, filename_or_obj): | ||
try: | ||
_, ext = os.path.splitext(filename_or_obj) | ||
except TypeError: | ||
return False | ||
|
||
return ext in {".grbml", ".ncml", ".grb", ".nc"} | ||
|
||
description = "Read using fimex" | ||
url = "https://github.com/metno/fimex" | ||
|
||
|
||
class FimexDataVariable(BackendArray): | ||
def __init__(self, fh, cdm, var, scale_offset): | ||
super().__init__() | ||
self.fh = fh | ||
self.var = var | ||
self.cdm = cdm | ||
self.data = var.getData() | ||
self.scale_offset = scale_offset | ||
|
||
shape = [] | ||
for s in var.getShape(): | ||
shape.append(cdm.getDimension(s).getLength()) | ||
self.shape = list(reversed(shape)) | ||
try: | ||
dtype = self.data.getDataType() | ||
self.dtype = np.dtype(DTYPE_MAPPINGS[dtype]) | ||
except AttributeError: | ||
self.dtype = np.dtype(np.float64) | ||
|
||
def __getitem__( | ||
self, key: xarray.core.indexing.ExplicitIndexer | ||
) -> np.typing.ArrayLike: | ||
return xarray.core.indexing.explicit_indexing_adapter( | ||
key, | ||
self.shape, | ||
xarray.core.indexing.IndexingSupport.BASIC, | ||
self._raw_indexing_method, | ||
) | ||
|
||
def _raw_indexing_method(self, key: tuple) -> np.typing.ArrayLike: | ||
vname = self.var.getName() | ||
|
||
slicebuilder = pyfimex0.SliceBuilder(self.cdm, vname, False) | ||
dimsizes = [] | ||
for k, dim, dimname in zip(key, self.shape, reversed(self.var.getShape())): | ||
ignore_dim = dimname | ||
if isinstance(k, int): | ||
slicebuilder.setStartAndSize(dimname, k, 1) | ||
elif isinstance(k, slice): | ||
start = k.start if k.start is not None else 0 | ||
step = k.step if k.step is not None else 1 | ||
stop = k.stop if k.stop is not None else dim | ||
size = (stop - start) // step | ||
slicebuilder.setStartAndSize(dimname, start, size) | ||
dimsizes.append(size) | ||
else: | ||
raise TypeError(f"Unknown type of {k}: {type(k)}") | ||
|
||
data = self.fh.getDataSliceSB(vname, slicebuilder) | ||
if len(dimsizes) == 0: | ||
# Bug? Fimex can't read scalar data? | ||
data_shaped = -1 | ||
else: | ||
datav = data.values() | ||
data_shaped = np.reshape(datav, dimsizes) | ||
if self.scale_offset: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. consider using getScaledDataSliceSB and let fimex do the scale/offset handling |
||
try: | ||
scale_factor = self.cdm.getAttribute(vname, "scale_factor").getData().values() | ||
except RuntimeError: | ||
scale_factor = 1 | ||
try: | ||
add_offset = self.cdm.getAttribute(vname, "add_offset").getData().values() | ||
except RuntimeError: | ||
add_offset = 0 | ||
data_shaped = data_shaped*scale_factor + add_offset | ||
|
||
return data_shaped |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
[build-system] | ||
requires = ["setuptools", "wheel"] | ||
build-backend = "setuptools.build_meta" | ||
|
||
[project] | ||
name = "fimex_xarray" | ||
version = "0.0.1" | ||
readme = "README.md" | ||
|
||
dependencies = [ | ||
# "pyfimex0", | ||
"numpy", | ||
"xarray", | ||
] | ||
|
||
[project.optional-dependencies] | ||
dev = [ | ||
"pytest", | ||
] | ||
|
||
[project.entry-points."xarray.backends"] | ||
fimex = "fimex_xarray.xr:FimexBackendEntrypoint" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
import xarray | ||
import os | ||
import numpy as np | ||
|
||
test_srcdir = os.path.join(os.path.dirname(__file__), "..", "..", "..", "test") | ||
|
||
|
||
def test_open_hirlam12(): | ||
path = os.path.join(test_srcdir, "hirlam12.nc") | ||
path = os.path.join(test_srcdir, "verticalPressure.nc") | ||
path = os.path.join(test_srcdir, "erai.sfc.40N.0.75d.200301011200.nc") | ||
path = os.path.join(test_srcdir, "verticalOceanSG2.nc") | ||
ds = xarray.open_dataset(path, engine="fimex", decode_times=False) | ||
|
||
temp = ds["temp"] | ||
selection = temp.isel(ocean_time=3, s_rho=[4, 6, 9], xi_rho=range(4, 7)) | ||
values = selection.values | ||
|
||
|
||
def test_isel_slice(): | ||
path = os.path.join(test_srcdir, "verticalOceanSG2.nc") | ||
ds = xarray.open_dataset(path, engine="fimex", decode_times=False) | ||
|
||
zeta = ds["zeta"] | ||
|
||
zeta_sub = ds.isel(eta_rho=slice(None, None, None))["zeta"] | ||
assert np.all(zeta == zeta_sub) | ||
|
||
zeta_sub = ds.isel(eta_rho=slice(2, None, None))["zeta"] | ||
zeta_isel = zeta.isel(eta_rho=slice(2, None, None)) | ||
assert np.all(zeta_isel == zeta_sub) | ||
|
||
zeta_sub = ds.isel(eta_rho=slice(2, 10, None))["zeta"] | ||
zeta_isel = zeta.isel(eta_rho=slice(2, 10, None)) | ||
assert np.all(zeta_isel == zeta_sub) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
use getScaledData