diff --git a/virtualizarr/readers/tiff.py b/virtualizarr/readers/tiff.py index a9b6df33..9d170450 100644 --- a/virtualizarr/readers/tiff.py +++ b/virtualizarr/readers/tiff.py @@ -1,23 +1,15 @@ -import warnings from typing import Iterable, Mapping, Optional -from xarray import Dataset, Index +from xarray import DataArray, Dataset, Index +import zarr -from virtualizarr.readers.common import ( - VirtualBackend, - construct_virtual_dataset, - open_loadable_vars_and_indexes, -) -from virtualizarr.translators.kerchunk import ( - virtual_vars_and_metadata_from_kerchunk_refs, -) -from virtualizarr.types.kerchunk import KerchunkStoreRefs -from virtualizarr.utils import check_for_collisions +from virtualizarr.readers.common import VirtualBackend +from virtualizarr.readers.zarr import virtual_variable_from_zarr_array class TIFFVirtualBackend(VirtualBackend): @staticmethod - def open_virtual_dataset( + def open_virtual_dataarray( filepath: str, group: str | None = None, drop_variables: Iterable[str] | None = None, @@ -26,48 +18,40 @@ def open_virtual_dataset( indexes: Mapping[str, Index] | None = None, reader_options: Optional[dict] = None, ) -> Dataset: - from kerchunk.tiff import tiff_to_zarr + + from tifffile import imread - drop_variables, loadable_variables = check_for_collisions( - drop_variables=drop_variables, loadable_variables=loadable_variables - ) + store = imread(filepath, aszarr=True) - if reader_options is None: - reader_options = {} + # TODO exception handling for TIFF files with multiple arrays + za = zarr.open_array(store=store, mode="r") - reader_options.pop("storage_options", {}) - warnings.warn( - "storage_options have been dropped from reader_options as they are not supported by kerchunk.tiff.tiff_to_zarr", - UserWarning, - ) + vv = virtual_variable_from_zarr_array(za) - # handle inconsistency in kerchunk, see GH issue https://github.com/zarr-developers/VirtualiZarr/issues/160 - refs = KerchunkStoreRefs({"refs": tiff_to_zarr(filepath, **reader_options)}) + # TODO should we generate any pixel coordnate arrays like kerhunk seems to do? - print(refs) + return DataArray(data=vv, dims=vv.dims, attrs=za.attrs) - # refs = extract_group(refs, group) + @staticmethod + def open_virtual_dataset( + filepath: str, + group: str | None = None, + drop_variables: Iterable[str] | None = None, + loadable_variables: Iterable[str] | None = None, + decode_times: bool | None = None, + indexes: Mapping[str, Index] | None = None, + reader_options: Optional[dict] = None, + ) -> Dataset: + + from tifffile import imread - virtual_vars, attrs, coord_names = virtual_vars_and_metadata_from_kerchunk_refs( - refs, - loadable_variables, - drop_variables, - ) + store = imread(filepath, aszarr=True) - loadable_vars, indexes = open_loadable_vars_and_indexes( - filepath, - loadable_variables=loadable_variables, - reader_options=reader_options, - drop_variables=drop_variables, - indexes=indexes, - group=group, - decode_times=decode_times, - ) + try: + zg = zarr.open_group(store, mode="r") + except zarr.errors.ContainsArrayError as err: + # TODO tidy this up + print("TIFF file contains only a single array, please use `open_virtual_dataarray` instead") + raise - return construct_virtual_dataset( - virtual_vars=virtual_vars, - loadable_vars=loadable_vars, - indexes=indexes, - coord_names=coord_names, - attrs=attrs, - ) + raise NotImplementedError() diff --git a/virtualizarr/readers/zarr.py b/virtualizarr/readers/zarr.py new file mode 100644 index 00000000..4fda7e5d --- /dev/null +++ b/virtualizarr/readers/zarr.py @@ -0,0 +1,50 @@ +import io + +from xarray import Variable +import zarr + +from virtualizarr.zarr import ZArray +from virtualizarr.manifests import ChunkManifest, ManifestArray + + +def virtual_variable_from_zarr_array(za: zarr.Array) -> Variable: + """ + Create a virtual xarray.Variable wrapping a ManifestArray from a single zarr.Array. + """ + + # TODO this only works with zarr-python v2 for now + + attrs = dict(za.attrs) + + # extract _ARRAY_DIMENSIONS and remove it from attrs + # TODO handle v3 DIMENSION_NAMES too + dims = attrs.pop("_ARRAY_DIMENSIONS") + + zarray = ZArray( + shape=za.shape, + chunks=za.chunks, + dtype=za.dtype, + fill_value=za.fill_value, + order=za.order, + compressor=za.compressor, + filters=za.filters, + #zarr_format=za.zarr_format, + ) + + manifest = chunkmanifest_from_zarr_array(za) + + ma = ManifestArray(chunkmanifest=manifest, zarray=zarray) + + return Variable(data=ma, dims=dims, attrs=attrs) + + +def chunkmanifest_from_zarr_array(za: zarr.Array) -> ChunkManifest: + import ujson + + of2 = io.StringIO() + + # TODO handle remote urls + za.store.write_fsspec(of2)# , url=url) + out = ujson.loads(of2.getvalue()) + + print(out) diff --git a/virtualizarr/tests/test_readers/test_tiff.py b/virtualizarr/tests/test_readers/test_tiff.py index 60abaa0a..48c8e043 100644 --- a/virtualizarr/tests/test_readers/test_tiff.py +++ b/virtualizarr/tests/test_readers/test_tiff.py @@ -1,20 +1,16 @@ import numpy as np -from xarray import Dataset +from xarray import DataArray -from virtualizarr import open_virtual_dataset +from virtualizarr import open_virtual_dataarray from virtualizarr.manifests import ManifestArray from virtualizarr.tests import requires_pillow @requires_pillow def test_random_tiff(random_tiff): - vds = open_virtual_dataset(random_tiff, indexes={}) + vda = open_virtual_dataarray(random_tiff, indexes={}) - assert isinstance(vds, Dataset) - - # TODO what is the name of this array expected to be?? - assert list(vds.variables) == ["foo"] - vda = vds["foo"] + assert isinstance(vda, DataArray) assert vda.sizes == {"X": 128, "Y": 128} assert vda.dtype == np.uint8