diff --git a/CHANGELOG.md b/CHANGELOG.md index 96c75e8f..4a0845ed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ * Added support for Pathlib paths. @mavaylon1 [#212](https://github.com/hdmf-dev/hdmf-zarr/pull/212) * Updated packages used for testing and readthedocs configuration. @mavaylon1, @rly [#214](https://github.com/hdmf-dev/hdmf-zarr/pull/214) * Add `force_overwite` parameter for `ZarrIO.__init__` to allow overwriting an existing file or directory. @oruebel [#229](https://github.com/hdmf-dev/hdmf-zarr/pull/229) +* Remove allowance of `hdmf.Array` in `__init__` of `AbstractZarrTableDataset` and `ZarrDataset` to be compatible with HDMF 4.0. @rly [#236](https://github.com/hdmf-dev/hdmf-zarr/pull/236) ### Bug Fixes * Fix reading of cached specs and caching of specs during export. @rly [#232](https://github.com/hdmf-dev/hdmf-zarr/pull/232) diff --git a/requirements.txt b/requirements.txt index 840b4815..ff40e127 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,7 @@ # pinned dependencies to reproduce an entire development environment to use HDMF-ZARR hdmf==3.14.5 -zarr==2.18.3 +zarr==2.18.3; python_version >= "3.10" # zarr 2.18.3 dropped support for python 3.9 +zarr==2.18.2; python_version < "3.10" pynwb==2.8.2 numpy==2.1.3 numcodecs==0.13.1 diff --git a/src/hdmf_zarr/backend.py b/src/hdmf_zarr/backend.py index 6702a415..147ce52d 100644 --- a/src/hdmf_zarr/backend.py +++ b/src/hdmf_zarr/backend.py @@ -731,7 +731,7 @@ def __is_ref(self, dtype): elif isinstance(dtype, np.dtype): return False else: - return dtype == DatasetBuilder.OBJECT_REF_TYPE or dtype == DatasetBuilder.REGION_REF_TYPE + return dtype == DatasetBuilder.OBJECT_REF_TYPE def resolve_ref(self, zarr_ref): """ @@ -790,7 +790,7 @@ def _create_ref(self, ref_object, ref_link_source=None): elif isinstance(ref_object, ReferenceBuilder): builder = ref_object.builder - path = self.__get_path(builder) # This is the internal path in the store to the item. + path = self.__get_path(builder) # This is the internal path in the store to the item. # get the object id if available object_id = builder.get('object_id', None) @@ -1074,12 +1074,7 @@ def write_dataset(self, **kwargs): # noqa: C901 # and will be refactored/removed. if self.__is_ref(dts['dtype']): refs.append(i) - ref_tmp = self._create_ref(data[0][i], ref_link_source=self.path) - if isinstance(ref_tmp, ZarrReference): - dts_str = 'object' - else: - dts_str = 'region' - type_str.append({'name': dts['name'], 'dtype': dts_str}) + type_str.append({'name': dts['name'], 'dtype': 'object'}) else: i = list([dts, ]) t = self.__resolve_dtype_helper__(i) @@ -1208,7 +1203,6 @@ def write_dataset(self, **kwargs): # noqa: C901 "ref": ZarrReference, "reference": ZarrReference, "object": ZarrReference, - "region": ZarrReference, } @classmethod @@ -1535,20 +1529,15 @@ def __read_dataset(self, zarr_obj, name): # Check compound dataset where one of the subsets contains references has_reference = False for i, dts in enumerate(dtype): - if dts['dtype'] in ['object', 'region']: # check items for object reference + if dts['dtype'] == 'object': # check items for object reference has_reference = True break retrieved_dtypes = [dtype_dict['dtype'] for dtype_dict in dtype] if has_reference: - # TODO: BuilderZarrTableDataset does not yet support region reference data = BuilderZarrTableDataset(zarr_obj, self, retrieved_dtypes) elif self.__is_ref(dtype): # Array of references - if dtype == 'object': - data = BuilderZarrReferenceDataset(data, self) - # TODO: Resolution of Region reference not yet supported by BuilderZarrRegionDataset - # elif dtype == 'region': - # data = BuilderZarrRegionDataset(data, self) + data = BuilderZarrReferenceDataset(data, self) kwargs['data'] = data if name is None: @@ -1571,9 +1560,6 @@ def __read_attrs(self, zarr_obj): ret[k] = self.__read_group(target_zarr_obj, target_name) else: ret[k] = self.__read_dataset(target_zarr_obj, target_name) - # TODO Need to implement region references for attributes - elif v['zarr_dtype'] == 'region': - raise NotImplementedError("Read of region references from attributes not implemented in ZarrIO") else: raise NotImplementedError("Unsupported zarr_dtype for attribute " + str(v)) else: diff --git a/src/hdmf_zarr/zarr_utils.py b/src/hdmf_zarr/zarr_utils.py index 9252510d..c01623d0 100644 --- a/src/hdmf_zarr/zarr_utils.py +++ b/src/hdmf_zarr/zarr_utils.py @@ -7,7 +7,7 @@ from copy import copy import numpy as np -from zarr import Array as ZarrArray +from zarr import Array from hdmf.build import DatasetBuilder from hdmf.data_utils import append_data @@ -20,7 +20,7 @@ class ZarrDataset(HDMFDataset): Extension of HDMFDataset to add Zarr compatibility """ - @docval({'name': 'dataset', 'type': (np.ndarray, ZarrArray), 'doc': 'the Zarr file lazily evaluate'}, + @docval({'name': 'dataset', 'type': (np.ndarray, Array), 'doc': 'the Zarr file lazily evaluate'}, {'name': 'io', 'type': 'ZarrIO', 'doc': 'the IO object that was used to read the underlying dataset'}) def __init__(self, **kwargs): self.__io = popargs('io', kwargs) @@ -129,7 +129,7 @@ class AbstractZarrTableDataset(DatasetOfReferences): references in compound datasets to either Builders and Containers. """ - @docval({'name': 'dataset', 'type': (np.ndarray, ZarrArray), 'doc': 'the Zarr file lazily evaluate'}, + @docval({'name': 'dataset', 'type': (np.ndarray, Array), 'doc': 'the Zarr file lazily evaluate'}, {'name': 'io', 'type': 'ZarrIO', 'doc': 'the IO object that was used to read the underlying dataset'}, {'name': 'types', 'type': (list, tuple), 'doc': 'the list/tuple of reference types'}) @@ -138,8 +138,6 @@ def __init__(self, **kwargs): super().__init__(**kwargs) self.__refgetters = dict() for i, t in enumerate(types): - # if t is RegionReference: # TODO: Region References not yet supported - # self.__refgetters[i] = self.__get_regref if t == DatasetBuilder.OBJECT_REF_TYPE: self.__refgetters[i] = self._get_ref elif t is str: @@ -153,7 +151,6 @@ def __init__(self, **kwargs): sub = self.dataset.dtype[i] if np.issubdtype(sub, np.dtype('O')): tmp.append('object') - # TODO: Region References are not yet supported if sub.metadata: if 'vlen' in sub.metadata: t = sub.metadata['vlen'] @@ -223,24 +220,6 @@ def dtype(self): return 'object' -class AbstractZarrRegionDataset(AbstractZarrReferenceDataset): - """ - Extension of DatasetOfReferences to serve as the base class for resolving Zarr - references in datasets to either Builders and Containers. - - Note: Region References are not yet supported. - """ - - def __getitem__(self, arg): - obj = super().__getitem__(arg) - ref = self.dataset[arg] - return obj[ref] - - @property - def dtype(self): - return 'region' - - class ContainerZarrTableDataset(ContainerResolverMixin, AbstractZarrTableDataset): """ A reference-resolving dataset for resolving references inside tables @@ -283,29 +262,3 @@ class BuilderZarrReferenceDataset(BuilderResolverMixin, AbstractZarrReferenceDat @classmethod def get_inverse_class(cls): return ContainerZarrReferenceDataset - - -class ContainerZarrRegionDataset(ContainerResolverMixin, AbstractZarrRegionDataset): - """ - A reference-resolving dataset for resolving region references that returns - resolved references as Containers - - Note: Region References are not yet supported. - """ - - @classmethod - def get_inverse_class(cls): - return BuilderZarrRegionDataset - - -class BuilderZarrRegionDataset(BuilderResolverMixin, AbstractZarrRegionDataset): - """ - A reference-resolving dataset for resolving region references that returns - resolved references as Builders. - - Note: Region References are not yet supported. - """ - - @classmethod - def get_inverse_class(cls): - return ContainerZarrRegionDataset