Skip to content

Commit

Permalink
Merge pull request #164 from ericpre/markers_update
Browse files Browse the repository at this point in the history
Update of HyperSpy Markers API changes for the `hspy`/`zspy` format
  • Loading branch information
ericpre authored Oct 5, 2023
2 parents e4e71ad + 0255321 commit 42574d2
Show file tree
Hide file tree
Showing 9 changed files with 202 additions and 129 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ jobs:
- os: ubuntu
PYTHON_VERSION: '3.8'
# Set pillow and scikit-image version to be compatible with imageio and scipy
DEPENDENCIES: matplotlib==3.1.3 numpy==1.20.0 scipy==1.5 imagecodecs==2020.1.31 tifffile==2020.2.16 dask[array]==2021.3.1 numba==0.52 imageio==2.16 pillow==8.3.2 scikit-image==0.18.0
# matplotlib needs 3.5 to support markers in hyperspy 2.0 (requires `collection.set_offset_transform`)
DEPENDENCIES: matplotlib==3.5 numpy==1.20.0 scipy==1.5 imagecodecs==2020.1.31 tifffile==2020.2.16 dask[array]==2021.3.1 numba==0.52 imageio==2.16 pillow==8.3.2 scikit-image==0.18.0
LABEL: '-oldest'
# test minimum requirement
- os: ubuntu
Expand Down
6 changes: 6 additions & 0 deletions docs/supported_formats/hspy.rst
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,12 @@ the experiments and that will be accessible as attributes of the
Changelog
^^^^^^^^^

v3.3
""""
- Rename ``ragged_shapes`` dataset to ``_ragged_shapes_{key}`` where the ``key``
is the name of the corresponding ragged ``dataset``.


v3.2
""""
- Deprecated ``record_by`` attribute is removed
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ file = "COPYING.txt"
[project.optional-dependencies]
blockfile = ["scikit-image>=0.18"]
mrcz = ["blosc>=1.5", "mrcz>=0.3.6"]
scalebar_export = ["matplotlib-scalebar", "matplotlib>=3.1.3"]
scalebar_export = ["matplotlib-scalebar", "matplotlib>=3.5"]
tiff = ["tifffile>=2020.2.16", "imagecodecs>=2020.1.31"]
# Add sidpy dependency and pinning as workaround to fix pyUSID import
# Remove sidpy dependency once https://github.com/pycroscopy/pyUSID/issues/85 is fixed.
Expand Down
70 changes: 49 additions & 21 deletions rsciio/_hierarchical.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from rsciio.utils.tools import ensure_unicode


version = "3.2"
version = "3.3"

default_version = Version(version)

Expand Down Expand Up @@ -225,6 +225,26 @@ def read(self, lazy):

return exp_dict_list

@staticmethod
def _read_array(group, dataset_key):
# This is a workaround for the lack of support for n-d ragged array
# in h5py and zarr. There is work in progress for implementation in zarr:
# https://github.com/zarr-developers/zarr-specs/issues/62 which may be
# relevant to implement here when available
data = group[dataset_key]
key = f"_ragged_shapes_{dataset_key}"
if "ragged_shapes" in group:
# For file saved with rosettaSciIO <= 0.1
# rename from `ragged_shapes` to `_ragged_shapes_{key}` in v3.3
key = "ragged_shapes"
if key in group:
ragged_shape = group[key]
new_data = np.empty(shape=data.shape, dtype=object)
for i in np.ndindex(data.shape):
new_data[i] = np.reshape(data[i], ragged_shape[i])
data = new_data
return data

def group2signaldict(self, group, lazy=False):
"""
Reads a h5py/zarr group and returns a signal dictionary.
Expand Down Expand Up @@ -253,8 +273,12 @@ def group2signaldict(self, group, lazy=False):
exp = {
"metadata": self._group2dict(group[metadata], lazy=lazy),
"original_metadata": self._group2dict(group[original_metadata], lazy=lazy),
"attributes": {},
}
if "attributes" in group:
# RosettaSciIO version is > 0.1
exp["attributes"] = self._group2dict(group["attributes"], lazy=lazy)
else:
exp["attributes"] = {}
if "package" in group.attrs:
# HyperSpy version is >= 1.5
exp["package"] = group.attrs["package"]
Expand All @@ -266,20 +290,13 @@ def group2signaldict(self, group, lazy=False):
exp["package"] = ""
exp["package_version"] = ""

data = group["data"]
try:
ragged_shape = group["ragged_shapes"]
new_data = np.empty(shape=data.shape, dtype=object)
for i in np.ndindex(data.shape):
new_data[i] = np.reshape(data[i], ragged_shape[i])
data = new_data
except KeyError:
pass
data = self._read_array(group, "data")
if lazy:
data = da.from_array(data, chunks=data.chunks)
exp["attributes"]["_lazy"] = True
else:
data = np.asanyarray(data)
exp["attributes"]["_lazy"] = False
exp["data"] = data
axes = []
for i in range(len(exp["data"].shape)):
Expand Down Expand Up @@ -514,21 +531,22 @@ def _group2dict(self, group, dictionary=None, lazy=False):
dictionary[key] = value
if not isinstance(group, self.Dataset):
for key in group.keys():
if key.startswith("_sig_"):
if key.startswith("_ragged_shapes_"):
# array used to parse ragged array, need to skip it
# otherwise, it will wrongly read kwargs when reading
# variable length markers as they uses ragged arrays
pass
elif key.startswith("_sig_"):
dictionary[key] = self.group2signaldict(group[key])
elif isinstance(group[key], self.Dataset):
dat = group[key]
dat = self._read_array(group, key)
kn = key
if key.startswith("_list_"):
if h5py.check_string_dtype(dat.dtype) and hasattr(dat, "asstr"):
# h5py 3.0 and newer
# https://docs.h5py.org/en/3.0.0/strings.html
dat = dat.asstr()[:]
ans = np.array(dat)
ans = self._parse_iterable(dat)
ans = ans.tolist()
kn = key[6:]
elif key.startswith("_tuple_"):
ans = np.array(dat)
ans = self._parse_iterable(dat)
ans = tuple(ans.tolist())
kn = key[7:]
elif dat.dtype.char == "S":
Expand Down Expand Up @@ -574,6 +592,14 @@ def _group2dict(self, group, dictionary=None, lazy=False):

return dictionary

@staticmethod
def _parse_iterable(data):
if h5py.check_string_dtype(data.dtype) and hasattr(data, "asstr"):
# h5py 3.0 and newer
# https://docs.h5py.org/en/3.0.0/strings.html
data = data.asstr()[:]
return np.array(data)


class HierarchicalWriter:
"""
Expand Down Expand Up @@ -687,10 +713,10 @@ def overwrite_dataset(cls, group, data, key, signal_axes=None, chunks=None, **kw
new_data[i] = data[i].ravel()
shapes[i] = np.array(data[i].shape)
shape_dset = cls._get_object_dset(
group, shapes, "ragged_shapes", shapes.shape, **kwds
group, shapes, f"_ragged_shapes_{key}", shapes.shape, **kwds
)
cls._store_data(
shapes, shape_dset, group, "ragged_shapes", chunks=shapes.shape
shapes, shape_dset, group, f"_ragged_shapes_{key}", chunks=shapes.shape
)
cls._store_data(new_data, dset, group, key, chunks)
else:
Expand Down Expand Up @@ -738,6 +764,8 @@ def write_signal(self, signal, group, write_dataset=True, chunks=None, **kwds):
self.dict2group(signal["original_metadata"], original_par, **kwds)
learning_results = group.require_group("learning_results")
self.dict2group(signal["learning_results"], learning_results, **kwds)
attributes = group.require_group("attributes")
self.dict2group(signal["attributes"], attributes, **kwds)

if signal["models"]:
model_group = self.file.require_group("Analysis/models")
Expand Down
2 changes: 1 addition & 1 deletion rsciio/hspy/_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def _get_object_dset(group, data, key, chunks, **kwds):
if chunks is None:
chunks = 1
dset = group.require_dataset(
key, chunks, dtype=h5py.special_dtype(vlen=data[0].dtype), **kwds
key, chunks, dtype=h5py.special_dtype(vlen=data.flatten()[0].dtype), **kwds
)
return dset

Expand Down
Loading

0 comments on commit 42574d2

Please sign in to comment.