From 9bd301442297d94e6d3cac9b6d3e8ade4ff2c2a9 Mon Sep 17 00:00:00 2001 From: Jonas Hoersch Date: Fri, 1 Nov 2024 16:17:18 +0100 Subject: [PATCH 1/2] fix: Skip previous encoding workaround for fixed xarray versions see reported issue https://github.com/pydata/xarray/issues/7691 and pr https://github.com/pydata/xarray/pull/8713 which was included into xarray v2024.03.0 --- atlite/datasets/era5.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/atlite/datasets/era5.py b/atlite/datasets/era5.py index 786e31b4..eedea403 100644 --- a/atlite/datasets/era5.py +++ b/atlite/datasets/era5.py @@ -12,6 +12,7 @@ import os import warnings import weakref +from importlib.metadata import version from tempfile import mkstemp import cdsapi @@ -21,6 +22,7 @@ from dask import compute, delayed from dask.array import arctan2, sqrt from numpy import atleast_1d +from packaging.version import parse from atlite.gis import maybe_swap_spatial_dims from atlite.pv.solar_position import SolarPosition @@ -365,10 +367,12 @@ def retrieve_data(product, chunks=None, tmpdir=None, lock=None, **updates): # saving due to how xarray handles netcdf compression (only float encoded as short int seem affected) # Fixes issue by keeping "float32" encoded as "float32" instead of internally saving as "short int", see: # https://stackoverflow.com/questions/75755441/why-does-saving-to-netcdf-without-encoding-change-some-values-to-nan - # and hopefully fixed soon (could then remove), see https://github.com/pydata/xarray/issues/7691 - for v in ds.data_vars: - if ds[v].encoding["dtype"] == "int16": - ds[v].encoding.clear() + # see https://github.com/pydata/xarray/issues/7691 and https://github.com/pydata/xarray/pull/8713 + # Fix was included in v2024.03.0 + if parse(version("xarray")) < parse("2024.03.0"): + for v in ds.data_vars: + if ds[v].encoding["dtype"] == "int16": + ds[v].encoding.clear() return ds From 332a5d14f8261d3174772212e046e54a86d51504 Mon Sep 17 00:00:00 2001 From: Jonas Hoersch Date: Sat, 9 Nov 2024 18:25:23 +0100 Subject: [PATCH 2/2] Replace workaround by a new xarray lower bound --- atlite/datasets/era5.py | 13 ------------- pyproject.toml | 4 ++-- 2 files changed, 2 insertions(+), 15 deletions(-) diff --git a/atlite/datasets/era5.py b/atlite/datasets/era5.py index eedea403..b70b311f 100644 --- a/atlite/datasets/era5.py +++ b/atlite/datasets/era5.py @@ -12,7 +12,6 @@ import os import warnings import weakref -from importlib.metadata import version from tempfile import mkstemp import cdsapi @@ -22,7 +21,6 @@ from dask import compute, delayed from dask.array import arctan2, sqrt from numpy import atleast_1d -from packaging.version import parse from atlite.gis import maybe_swap_spatial_dims from atlite.pv.solar_position import SolarPosition @@ -363,17 +361,6 @@ def retrieve_data(product, chunks=None, tmpdir=None, lock=None, **updates): logger.debug(f"Adding finalizer for {target}") weakref.finalize(ds._file_obj._manager, noisy_unlink, target) - # Remove default encoding we get from CDSAPI, which can lead to NaN values after loading with subsequent - # saving due to how xarray handles netcdf compression (only float encoded as short int seem affected) - # Fixes issue by keeping "float32" encoded as "float32" instead of internally saving as "short int", see: - # https://stackoverflow.com/questions/75755441/why-does-saving-to-netcdf-without-encoding-change-some-values-to-nan - # see https://github.com/pydata/xarray/issues/7691 and https://github.com/pydata/xarray/pull/8713 - # Fix was included in v2024.03.0 - if parse(version("xarray")) < parse("2024.03.0"): - for v in ds.data_vars: - if ds[v].encoding["dtype"] == "int16": - ds[v].encoding.clear() - return ds diff --git a/pyproject.toml b/pyproject.toml index 4a098ca4..6b6612d3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,7 +33,7 @@ dependencies = [ "pandas>=0.25", "bottleneck", "numexpr", - "xarray>=0.20", + "xarray>=2024.03.0", "netcdf4", "dask>=2021.10.0", "toolz", @@ -116,4 +116,4 @@ ignore = [ 'D415', # First line should end with a period, question mark, or exclamation point 'D417', # Missing argument descriptions in the docstring -] \ No newline at end of file +]