Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update of HyperSpy Markers API changes for the hspy/zspy format #164

Merged
merged 8 commits into from
Oct 5, 2023
3 changes: 2 additions & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ jobs:
- os: ubuntu
PYTHON_VERSION: '3.8'
# Set pillow and scikit-image version to be compatible with imageio and scipy
DEPENDENCIES: matplotlib==3.1.3 numpy==1.20.0 scipy==1.5 imagecodecs==2020.1.31 tifffile==2020.2.16 dask[array]==2021.3.1 numba==0.52 imageio==2.16 pillow==8.3.2 scikit-image==0.18.0
# matplotlib needs 3.5 to support markers in hyperspy 2.0 (requires `collection.set_offset_transform`)
DEPENDENCIES: matplotlib==3.5 numpy==1.20.0 scipy==1.5 imagecodecs==2020.1.31 tifffile==2020.2.16 dask[array]==2021.3.1 numba==0.52 imageio==2.16 pillow==8.3.2 scikit-image==0.18.0
LABEL: '-oldest'
# test minimum requirement
- os: ubuntu
Expand Down
6 changes: 6 additions & 0 deletions docs/supported_formats/hspy.rst
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,12 @@ the experiments and that will be accessible as attributes of the
Changelog
^^^^^^^^^

v3.3
""""
- Rename ``ragged_shapes`` dataset to ``_ragged_shapes_{key}`` where the ``key``
is the name of the corresponding ragged ``dataset``.


v3.2
""""
- Deprecated ``record_by`` attribute is removed
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ file = "COPYING.txt"
[project.optional-dependencies]
blockfile = ["scikit-image>=0.18"]
mrcz = ["blosc>=1.5", "mrcz>=0.3.6"]
scalebar_export = ["matplotlib-scalebar", "matplotlib>=3.1.3"]
scalebar_export = ["matplotlib-scalebar", "matplotlib>=3.5"]
tiff = ["tifffile>=2020.2.16", "imagecodecs>=2020.1.31"]
# Add sidpy dependency and pinning as workaround to fix pyUSID import
# Remove sidpy dependency once https://github.com/pycroscopy/pyUSID/issues/85 is fixed.
Expand Down
66 changes: 45 additions & 21 deletions rsciio/_hierarchical.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from rsciio.utils.tools import ensure_unicode


version = "3.2"
version = "3.3"

default_version = Version(version)

Expand Down Expand Up @@ -225,6 +225,22 @@

return exp_dict_list

@staticmethod
def _read_array(group, dataset_key):
data = group[dataset_key]
CSSFrancis marked this conversation as resolved.
Show resolved Hide resolved
key = f"_ragged_shapes_{dataset_key}"
if "ragged_shapes" in group:
# For file saved with rosettaSciIO <= 0.1
# rename from `ragged_shapes` to `_ragged_shapes_{key}` in v3.3
key = "ragged_shapes"

Check warning on line 235 in rsciio/_hierarchical.py

View check run for this annotation

Codecov / codecov/patch

rsciio/_hierarchical.py#L235

Added line #L235 was not covered by tests
if key in group:
ragged_shape = group[key]
new_data = np.empty(shape=data.shape, dtype=object)
for i in np.ndindex(data.shape):
new_data[i] = np.reshape(data[i], ragged_shape[i])
data = new_data
return data

def group2signaldict(self, group, lazy=False):
"""
Reads a h5py/zarr group and returns a signal dictionary.
Expand Down Expand Up @@ -253,8 +269,12 @@
exp = {
"metadata": self._group2dict(group[metadata], lazy=lazy),
"original_metadata": self._group2dict(group[original_metadata], lazy=lazy),
"attributes": {},
}
if "attributes" in group:
# RosettaSciIO version is > 0.1
exp["attributes"] = self._group2dict(group["attributes"], lazy=lazy)
else:
exp["attributes"] = {}
if "package" in group.attrs:
# HyperSpy version is >= 1.5
exp["package"] = group.attrs["package"]
Expand All @@ -266,20 +286,13 @@
exp["package"] = ""
exp["package_version"] = ""

data = group["data"]
try:
ragged_shape = group["ragged_shapes"]
new_data = np.empty(shape=data.shape, dtype=object)
for i in np.ndindex(data.shape):
new_data[i] = np.reshape(data[i], ragged_shape[i])
data = new_data
except KeyError:
pass
data = self._read_array(group, "data")
if lazy:
data = da.from_array(data, chunks=data.chunks)
exp["attributes"]["_lazy"] = True
else:
data = np.asanyarray(data)
exp["attributes"]["_lazy"] = False
exp["data"] = data
axes = []
for i in range(len(exp["data"].shape)):
Expand Down Expand Up @@ -514,21 +527,22 @@
dictionary[key] = value
if not isinstance(group, self.Dataset):
for key in group.keys():
if key.startswith("_sig_"):
if key.startswith("_ragged_shapes_"):
# array used to parse ragged array, need to skip it
CSSFrancis marked this conversation as resolved.
Show resolved Hide resolved
# otherwise, it will wrongly read kwargs when reading
# variable length markers as they uses ragged arrays
pass
elif key.startswith("_sig_"):
dictionary[key] = self.group2signaldict(group[key])
elif isinstance(group[key], self.Dataset):
dat = group[key]
dat = self._read_array(group, key)
kn = key
if key.startswith("_list_"):
if h5py.check_string_dtype(dat.dtype) and hasattr(dat, "asstr"):
# h5py 3.0 and newer
# https://docs.h5py.org/en/3.0.0/strings.html
dat = dat.asstr()[:]
ans = np.array(dat)
ans = self._parse_iterable(dat)
ans = ans.tolist()
kn = key[6:]
elif key.startswith("_tuple_"):
ans = np.array(dat)
ans = self._parse_iterable(dat)
ans = tuple(ans.tolist())
kn = key[7:]
elif dat.dtype.char == "S":
Expand Down Expand Up @@ -574,6 +588,14 @@

return dictionary

@staticmethod
def _parse_iterable(data):
if h5py.check_string_dtype(data.dtype) and hasattr(data, "asstr"):
# h5py 3.0 and newer
# https://docs.h5py.org/en/3.0.0/strings.html
data = data.asstr()[:]
return np.array(data)


class HierarchicalWriter:
"""
Expand Down Expand Up @@ -687,10 +709,10 @@
new_data[i] = data[i].ravel()
shapes[i] = np.array(data[i].shape)
shape_dset = cls._get_object_dset(
group, shapes, "ragged_shapes", shapes.shape, **kwds
group, shapes, f"_ragged_shapes_{key}", shapes.shape, **kwds
)
cls._store_data(
shapes, shape_dset, group, "ragged_shapes", chunks=shapes.shape
shapes, shape_dset, group, f"_ragged_shapes_{key}", chunks=shapes.shape
)
cls._store_data(new_data, dset, group, key, chunks)
else:
Expand Down Expand Up @@ -738,6 +760,8 @@
self.dict2group(signal["original_metadata"], original_par, **kwds)
learning_results = group.require_group("learning_results")
self.dict2group(signal["learning_results"], learning_results, **kwds)
attributes = group.require_group("attributes")
self.dict2group(signal["attributes"], attributes, **kwds)

if signal["models"]:
model_group = self.file.require_group("Analysis/models")
Expand Down
2 changes: 1 addition & 1 deletion rsciio/hspy/_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def _get_object_dset(group, data, key, chunks, **kwds):
if chunks is None:
chunks = 1
dset = group.require_dataset(
key, chunks, dtype=h5py.special_dtype(vlen=data[0].dtype), **kwds
key, chunks, dtype=h5py.special_dtype(vlen=data.flatten()[0].dtype), **kwds
)
return dset

Expand Down
Loading
Loading