From b4a7de66f1c849a7e8952731b0d61f93fb7fc8c6 Mon Sep 17 00:00:00 2001 From: Martin Schorb <35071867+martinschorb@users.noreply.github.com> Date: Wed, 14 Feb 2024 11:43:19 +0100 Subject: [PATCH] add test for 2D CLI (#128) Change default filetype to ome.zarr and bump version --------- Co-authored-by: Martin Schorb Co-authored-by: Constantin Pape --- .gitignore | 1 + mobie/__version__.py | 2 +- mobie/htm/data_import.py | 4 +- mobie/image_data.py | 6 +-- mobie/import_data/from_node_labels.py | 4 +- mobie/import_data/image.py | 4 +- mobie/import_data/segmentation.py | 4 +- mobie/import_data/traces.py | 22 +++++++-- mobie/import_data/utils.py | 2 +- mobie/registration.py | 4 +- mobie/segmentation.py | 2 +- mobie/traces.py | 2 +- test/import_data/test_image.py | 28 +++++++++--- test/import_data/test_segmentation.py | 7 ++- test/test_image_data.py | 64 +++++++++++++++++++++++---- test/test_segmentation.py | 2 +- test/test_spots.py | 3 +- test/test_traces.py | 2 +- test/test_utils.py | 2 +- 19 files changed, 121 insertions(+), 44 deletions(-) diff --git a/.gitignore b/.gitignore index d4f21a5..cc4d53c 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ tmp*/ *.n5 *.h5 .idea/ +test/test-folder/ diff --git a/mobie/__version__.py b/mobie/__version__.py index 707578a..4f92c0a 100644 --- a/mobie/__version__.py +++ b/mobie/__version__.py @@ -1,2 +1,2 @@ -__version__ = "0.4.5" +__version__ = "0.4.6" SPEC_VERSION = "0.3.0" diff --git a/mobie/htm/data_import.py b/mobie/htm/data_import.py index f32cb09..5a38525 100644 --- a/mobie/htm/data_import.py +++ b/mobie/htm/data_import.py @@ -87,7 +87,7 @@ def _add_sources(dataset_folder, source_names, paths, def add_images(files, root, dataset_name, image_names, resolution, scale_factors, chunks, - key=None, file_format="bdv.n5", + key=None, file_format="ome.zarr", tmp_folder=None, target="local", max_jobs=multiprocessing.cpu_count(), unit="micrometer", is_default_dataset=False, is2d=None): assert len(files) == len(image_names), f"{len(files)}, {len(image_names)}" @@ -114,7 +114,7 @@ def add_images(files, root, def add_segmentations(files, root, dataset_name, segmentation_names, resolution, scale_factors, chunks, - key=None, file_format="bdv.n5", + key=None, file_format="ome.zarr", tmp_folder=None, target="local", max_jobs=multiprocessing.cpu_count(), add_default_tables=True, unit="micrometer", is_default_dataset=False, is2d=None): diff --git a/mobie/image_data.py b/mobie/image_data.py index 6eaba1c..fe7dcaf 100644 --- a/mobie/image_data.py +++ b/mobie/image_data.py @@ -158,7 +158,7 @@ def add_bdv_image(xml_path, root, dataset_name, def add_image(input_path, input_key, root, dataset_name, image_name, resolution, scale_factors, chunks, - file_format="bdv.n5", menu_name=None, + file_format="ome.zarr", menu_name=None, tmp_folder=None, target="local", max_jobs=multiprocessing.cpu_count(), view=None, transformation=None, @@ -185,7 +185,7 @@ def add_image(input_path, input_key, chunks [list[int]] - chunks for the data. menu_name [str] - menu name for this source. If none is given will be created based on the image name. (default: None) - file_format [str] - the file format used to store the data internally (default: bdv.n5) + file_format [str] - the file format used to store the data internally (default: ome.zarr) tmp_folder [str] - folder for temporary files (default: None) target [str] - computation target (default: "local") max_jobs [int] - number of jobs (default: number of cores) @@ -234,7 +234,7 @@ def add_image(input_path, input_key, if move_only: if int_to_uint: - raise ValueError("Conversio of integer to unsigned integer is not possible with move_only") + raise ValueError("Conversion of integer to unsigned integer is not possible with move_only") shutil.move(input_path, data_path) if "bdv." in file_format: shutil.move(os.path.splitext(input_path)[0]+".xml", image_metadata_path) diff --git a/mobie/import_data/from_node_labels.py b/mobie/import_data/from_node_labels.py index 52db6e8..6f18755 100644 --- a/mobie/import_data/from_node_labels.py +++ b/mobie/import_data/from_node_labels.py @@ -37,7 +37,7 @@ def import_segmentation_from_node_labels(in_path, in_key, out_path, resolution, scale_factors, chunks, tmp_folder, target, max_jobs, block_shape=None, unit="micrometer", - source_name=None, file_format="bdv.n5"): + source_name=None, file_format="ome.zarr"): """ Import segmentation data into mobie format from a paintera dataset Arguments: @@ -56,7 +56,7 @@ def import_segmentation_from_node_labels(in_path, in_key, out_path, By default, same as chunks. (default:None) unit [str] - physical unit of the coordinate system (default: micrometer) source_name [str] - name of the source (default: None) - file_format [str] - the file format (default: "bdv.n5") + file_format [str] - the file format (default: "ome.zarr") """ out_key = get_scale_key(file_format) diff --git a/mobie/import_data/image.py b/mobie/import_data/image.py index c77b2e0..4357606 100644 --- a/mobie/import_data/image.py +++ b/mobie/import_data/image.py @@ -6,7 +6,7 @@ def import_image_data(in_path, in_key, out_path, resolution, scale_factors, chunks, tmp_folder=None, target="local", max_jobs=mp.cpu_count(), block_shape=None, unit="micrometer", - source_name=None, file_format="bdv.n5", + source_name=None, file_format="ome.zarr", int_to_uint=False, channel=None): """ Import image data to mobie format. @@ -24,7 +24,7 @@ def import_image_data(in_path, in_key, out_path, By default, same as chunks. (default:None) unit [str] - physical unit of the coordinate system (default: micrometer) source_name [str] - name of the source (default: None) - file_format [str] - the file format (default: "bdv.n5") + file_format [str] - the file format (default: "ome.zarr") int_to_uint [bool] - whether to convert signed to unsigned integer (default: False) channel [int] - the channel to load from the data. Currently only supported for the ome.zarr format (default: None) diff --git a/mobie/import_data/segmentation.py b/mobie/import_data/segmentation.py index a6264c2..9fc5e89 100644 --- a/mobie/import_data/segmentation.py +++ b/mobie/import_data/segmentation.py @@ -6,7 +6,7 @@ def import_segmentation(in_path, in_key, out_path, tmp_folder, target, max_jobs, block_shape=None, with_max_id=True, unit="micrometer", source_name=None, - file_format="bdv.n5"): + file_format="ome.zarr"): """ Import segmentation data into mobie format. Arguments: @@ -24,7 +24,7 @@ def import_segmentation(in_path, in_key, out_path, with_max_id [bool] - whether to add the max id attribute unit [str] - physical unit of the coordinate system (default: micrometer) source_name [str] - name of the source (default: None) - file_format [str] - the file format (default: "bdv.n5") + file_format [str] - the file format (default: "ome.zarr") """ # we allow 2d data for ome.zarr file format if file_format != "ome.zarr": diff --git a/mobie/import_data/traces.py b/mobie/import_data/traces.py index 25fb718..5a3b8d3 100644 --- a/mobie/import_data/traces.py +++ b/mobie/import_data/traces.py @@ -13,6 +13,16 @@ from tqdm import tqdm +def is_ome_zarr(path): + return path.endswith("ome.zarr") + + +def get_key_ome_zarr(path): + with open_file(path, "r") as f: + key = f.attrs["multiscales"][0]["datasets"][0]["path"] + return key + + def coords_to_vol(coords, nid, radius=5): bb_min = coords.min(axis=0) bb_max = coords.max(axis=0) + 1 @@ -162,14 +172,18 @@ def import_traces(input_folder, out_path, traces = parse_traces(input_folder) # check that we are compatible with bdv (ids need to be smaller than int16 max) - max_id = np.iinfo('int16').max + max_id = np.iinfo("int16").max max_trace_id = max(traces.keys()) if max_trace_id > max_id: raise RuntimeError("Can't export id %i > %i" % (max_trace_id, max_id)) - is_h5 = is_h5py(reference_path) - ref_key = get_key(is_h5, timepoint=0, setup_id=0, scale=reference_scale) - with open_file(reference_path, 'r') as f: + if is_ome_zarr(reference_path): + ref_key = get_key_ome_zarr(reference_path) + else: + is_h5 = is_h5py(reference_path) + ref_key = get_key(is_h5, timepoint=0, setup_id=0, scale=reference_scale) + + with open_file(reference_path, "r") as f: ds = f[ref_key] shape = ds.shape if chunks is None: diff --git a/mobie/import_data/utils.py b/mobie/import_data/utils.py index 8c12180..5d34709 100644 --- a/mobie/import_data/utils.py +++ b/mobie/import_data/utils.py @@ -63,7 +63,7 @@ def downscale(in_path, in_key, out_path, resolution, scale_factors, chunks, tmp_folder, target, max_jobs, block_shape, library="vigra", library_kwargs=None, - metadata_format="bdv.n5", out_key="", + metadata_format="ome.zarr", out_key="", unit="micrometer", source_name=None, roi_begin=None, roi_end=None, int_to_uint=False, channel=None): diff --git a/mobie/registration.py b/mobie/registration.py index 719bfaa..da427e4 100644 --- a/mobie/registration.py +++ b/mobie/registration.py @@ -17,7 +17,7 @@ def add_registered_source(input_path, input_key, transformation, root, dataset_name, source_name, resolution, scale_factors, chunks, method, - menu_name=None, file_format="bdv.n5", + menu_name=None, file_format="ome.zarr", shape=None, source_type='image', view=None, add_default_table=True, fiji_executable=None, elastix_directory=None, @@ -46,7 +46,7 @@ def add_registered_source(input_path, input_key, transformation, 'transformix': apply transformation using transformix menu_name [str] - menu name for this source. If none is given will be created based on the image name. (default: None) - file_format [str] - the file format used to store the data internally (default: bdv.n5) + file_format [str] - the file format used to store the data internally (default: ome.zarr) shape [tuple[int]] - shape of the output volume. If None, the shape specified in the elastix transformation file will be used. (default: None) source_type [str] - type of the data, can be either 'image', 'segmentation' or 'mask' diff --git a/mobie/segmentation.py b/mobie/segmentation.py index 3772ad8..bb160bf 100644 --- a/mobie/segmentation.py +++ b/mobie/segmentation.py @@ -17,7 +17,7 @@ def add_segmentation(input_path, input_key, root, dataset_name, segmentation_name, resolution, scale_factors, chunks, - menu_name=None, file_format="bdv.n5", + menu_name=None, file_format="ome.zarr", node_label_path=None, node_label_key=None, tmp_folder=None, target="local", max_jobs=multiprocessing.cpu_count(), diff --git a/mobie/traces.py b/mobie/traces.py index 7002962..b4b5ebd 100644 --- a/mobie/traces.py +++ b/mobie/traces.py @@ -35,7 +35,7 @@ def add_traces(input_folder, root, dataset_name, traces_name, scale_factors [list[list[int]]] - scale factors used for down-sampling. menu_name [str] - menu item for this source. If none is given will be created based on the image name. (default: None) - file_format [str] - the file format used to store the data internally (default: bdv.n5) + file_format [str] - the file format used to store the data internally (default: ome.zarr) view [dict] - default view settings for this source (default: None) chunks [list[int]] - chunks for the data. max_jobs [int] - number of jobs (default: number of cores) diff --git a/test/import_data/test_image.py b/test/import_data/test_image.py index e322257..c9911d6 100644 --- a/test/import_data/test_image.py +++ b/test/import_data/test_image.py @@ -19,7 +19,7 @@ class TestImportImage(unittest.TestCase): test_folder = "./test-folder" tmp_folder = "./test-folder/tmp" - out_path = "./test-folder/imported-data.n5" + out_path = "./test-folder/imported-data.ome.zarr" n_jobs = min(4, cpu_count()) def setUp(self): @@ -51,6 +51,7 @@ def check_data(self, exp_data, scales, is_h5=False, out_path=None): self._check_data(exp_data, scale_data, scales) def check_data_ome_zarr(self, exp_data, scales, out_path, resolution, scale_factors): + out_path = self.out_path if out_path is None else out_path scale_data = [] with open_file(out_path, "r") as f: @@ -96,7 +97,7 @@ def create_h5_input_data(self, shape=3*(64,)): return test_path, key, data # - # test imports from different file formats (to default output format = bdv.n5) + # test imports from different file formats (to default output format = ome.zarr) # def test_import_tif(self): @@ -106,27 +107,31 @@ def test_import_tif(self): im_folder = os.path.join(self.test_folder, "im-stack") os.makedirs(im_folder, exist_ok=True) + + resolution=(0.25, 1, 1) + for z in range(shape[0]): path = os.path.join(im_folder, "z_%03i.tif" % z) imageio.imsave(path, data[z]) scales = [[1, 2, 2], [1, 2, 2], [2, 2, 2]] import_image_data(im_folder, "*.tif", self.out_path, - resolution=(0.25, 1, 1), chunks=(16, 64, 64), + resolution=resolution, chunks=(16, 64, 64), scale_factors=scales, tmp_folder=self.tmp_folder, target="local", max_jobs=self.n_jobs) - self.check_data(data, scales) + self.check_data_ome_zarr(data, scales, self.out_path, resolution, scales) def test_import_hdf5(self): from mobie.import_data import import_image_data test_path, key, data = self.create_h5_input_data() scales = [[2, 2, 2], [2, 2, 2], [2, 2, 2]] + resolution=(1, 1, 1) import_image_data(test_path, key, self.out_path, - resolution=(1, 1, 1), chunks=(32, 32, 32), + resolution=resolution, chunks=(32, 32, 32), scale_factors=scales, tmp_folder=self.tmp_folder, target="local", max_jobs=self.n_jobs) - self.check_data(data, scales) + self.check_data_ome_zarr(data, scales, self.out_path, resolution, scales) # TODO @unittest.skipIf(mrcfile is None, "Need mrcfile") @@ -148,6 +153,17 @@ def test_import_bdv_hdf5(self): target="local", max_jobs=1, file_format="bdv.hdf5") self.check_data(data, scales, is_h5=True, out_path=out_path) + def test_import_bdv_n5(self): + from mobie.import_data import import_image_data + test_path, key, data = self.create_h5_input_data() + scales = [[2, 2, 2], [2, 2, 2], [2, 2, 2]] + out_path = os.path.join(self.test_folder, "imported_data.n5") + import_image_data(test_path, key, out_path, + resolution=(1, 1, 1), chunks=(32, 32, 32), + scale_factors=scales, tmp_folder=self.tmp_folder, + target="local", max_jobs=1, file_format="bdv.n5") + self.check_data(data, scales, is_h5=False, out_path=out_path) + def test_import_ome_zarr(self): from mobie.import_data import import_image_data test_path, key, data = self.create_h5_input_data() diff --git a/test/import_data/test_segmentation.py b/test/import_data/test_segmentation.py index 0087625..58934e8 100644 --- a/test/import_data/test_segmentation.py +++ b/test/import_data/test_segmentation.py @@ -5,14 +5,13 @@ import numpy as np from elf.io import open_file -from pybdv.util import get_key from pybdv.downsample import sample_shape class TestImportSegmentation(unittest.TestCase): test_folder = './test-folder' tmp_folder = './test-folder/tmp' - out_path = './test-folder/imported-data.n5' + out_path = './test-folder/imported-data.ome.zarr' n_jobs = multiprocessing.cpu_count() def setUp(self): @@ -22,7 +21,7 @@ def tearDown(self): rmtree(self.test_folder) def check_seg(self, exp_data, scales): - key = get_key(False, 0, 0, 0) + key = "s0" with open_file(self.out_path, 'r') as f: ds = f[key] data = ds[:] @@ -33,7 +32,7 @@ def check_seg(self, exp_data, scales): exp_shape = data.shape for scale, scale_facor in enumerate(scales, 1): - key = get_key(False, 0, 0, scale) + key = f"s{scale}" with open_file(self.out_path, 'r') as f: self.assertIn(key, f) this_shape = f[key].shape diff --git a/test/test_image_data.py b/test/test_image_data.py index 7904c23..3d7fe8d 100644 --- a/test/test_image_data.py +++ b/test/test_image_data.py @@ -73,7 +73,7 @@ def make_hdf5_data(self, path, key, shape, func=None): f.create_dataset(key, data=data) def init_h5_dataset( - self, dataset_name, raw_name, shape, file_format="bdv.n5", func=None, int_to_uint=False + self, dataset_name, raw_name, shape, file_format="ome.zarr", func=None, int_to_uint=False ): data_path = os.path.join(self.test_folder, "data.h5") @@ -169,11 +169,11 @@ def test_bdv_hdf5(self): shape = (64, 64, 64) self.init_h5_dataset(dataset_name, raw_name, shape, file_format="bdv.hdf5") - def test_ome_zarr(self): + def test_n5(self): dataset_name = "test" raw_name = "test-raw" shape = (64, 64, 64) - self.init_h5_dataset(dataset_name, raw_name, shape, file_format="ome.zarr") + self.init_h5_dataset(dataset_name, raw_name, shape, file_format="bdv.n5") # # tests with existing dataset @@ -233,6 +233,44 @@ def test_cli(self): dataset_folder = os.path.join(self.root, self.dataset_name) self.check_data(dataset_folder, im_name) + # 2D + @unittest.skipIf(platform == "win32", "CLI does not work on windows") + def test_cli_2D(self): + + shape = (1, 512, 512) + + im_folder = os.path.join(self.test_folder, "im-stack") + self.make_tif_data(im_folder, shape) + + dataset_name = "test" + im_name = "test-cli-2D" + + resolution = json.dumps([1., 1.]) + scales = json.dumps([[2, 2], [2, 2]]) + chunks = json.dumps([64, 64]) + + tmp_folder = os.path.join(self.test_folder, "cli-im2D") + + in_path = os.path.join(im_folder, "z_000.tif") + + cmd = ["mobie.add_image", + "--input_path", in_path, + "--input_key", "", + "--root", self.root, + "--dataset_name", self.dataset_name, + "--name", im_name, + "--resolution", resolution, + "--scale_factors", scales, + "--chunks", chunks, + "--tmp_folder", tmp_folder] + subprocess.run(cmd) + + exp_data = imageio.imread(in_path) + + + dataset_folder = os.path.join(self.root, dataset_name) + self.check_data(dataset_folder, im_name, exp_data=exp_data) + # # test with numpy data # @@ -272,7 +310,7 @@ def _test_with_trafo(self, file_format, transformation): chunks=(64, 64, 64), tmp_folder=self.tmp_folder, target="local", max_jobs=self.max_jobs, transformation=transformation, file_format=file_format) - self.check_data(os.path.join(self.root, self.dataset_name), im_name) + self.check_data(os.path.join(self.root, self.dataset_name), im_name, file_format=file_format) # TODO implement the test once ome.zarr v0.5 is released def test_with_trafo_ome_zarr(self): @@ -318,7 +356,7 @@ def test_skip_metadata(self): # data validation # - def check_dataset(self, dataset_folder, exp_shape, raw_name, file_format="bdv.n5"): + def check_dataset(self, dataset_folder, exp_shape, raw_name, file_format="ome.zarr"): # validate the full project mobie.validation.validate_project( self.root, assert_true=self.assertTrue, assert_in=self.assertIn, assert_equal=self.assertEqual @@ -342,8 +380,9 @@ def check_dataset(self, dataset_folder, exp_shape, raw_name, file_format="bdv.n5 self.assertEqual(shape, exp_shape) self.assertFalse(np.allclose(data, 0.)) - def check_data(self, dataset_folder, name): - exp_data = self.data + def check_data(self, dataset_folder, name, exp_data=None, file_format="ome.zarr"): + if exp_data is None: + exp_data = self.data # check the image metadata metadata = mobie.metadata.read_dataset_metadata(dataset_folder) @@ -352,9 +391,16 @@ def check_data(self, dataset_folder, name): mobie.validation.validate_source_metadata(name, sources[name], dataset_folder) # check the image data - im_path = os.path.join(dataset_folder, "images", "bdv-n5", f"{name}.n5") + + if file_format == "bdv.n5": + im_path = os.path.join(dataset_folder, "images", "bdv-n5", f"{name}.n5") + key = get_key(False, 0, 0, 0) + else: + im_path = os.path.join(dataset_folder, "images", "ome-zarr", f"{name}.ome.zarr") + key = "s0" + self.assertTrue(os.path.exists(im_path)) - key = get_key(False, 0, 0, 0) + with open_file(im_path, "r") as f: data = f[key][:] self.assertTrue(np.array_equal(data, exp_data)) diff --git a/test/test_segmentation.py b/test/test_segmentation.py index 172b7b6..3bba31c 100644 --- a/test/test_segmentation.py +++ b/test/test_segmentation.py @@ -22,7 +22,7 @@ class TestSegmentation(unittest.TestCase): def setUp(self): os.makedirs(self.test_folder, exist_ok=True) - self.seg_path = os.path.join(self.test_folder, "seg.n5") + self.seg_path = os.path.join(self.test_folder, "seg.ome.zarr") self.seg_key = "seg" self.data = np.random.randint(0, 100, size=self.shape) with open_file(self.seg_path, "a") as f: diff --git a/test/test_spots.py b/test/test_spots.py index 9a72157..5a2a744 100644 --- a/test/test_spots.py +++ b/test/test_spots.py @@ -33,7 +33,8 @@ def setUp(self): chunks = (8, 32, 32) mobie.add_image(data_path, data_key, self.root, self.dataset_name, self.image_source_name, resolution=self.resolution, scale_factors=scale_factors, chunks=chunks, - unit="nanometer", tmp_folder=os.path.join(self.test_folder, "tmp_image")) + unit="nanometer", tmp_folder=os.path.join(self.test_folder, "tmp_image"), + file_format="bdv.n5") gene_names = ["aaa", "bbb", "ccc", "xyz", "123", "456"] table = { diff --git a/test/test_traces.py b/test/test_traces.py index f3c54df..9091b81 100644 --- a/test/test_traces.py +++ b/test/test_traces.py @@ -33,7 +33,7 @@ def init_dataset(self): scales = [[2, 2, 2]] mobie.add_image(data_path, data_key, self.root, self.dataset_name, raw_name, resolution=(1, 1, 1), chunks=(64, 64, 64), scale_factors=scales, - tmp_folder=tmp_folder) + tmp_folder=tmp_folder, file_format="bdv.n5") def generate_trace(self, trace_id): path = os.path.join(self.trace_folder, f"trace_{trace_id}.swc") diff --git a/test/test_utils.py b/test/test_utils.py index f4493bd..8764555 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -28,7 +28,7 @@ def init_dataset(self): scales = [[2, 2, 2]] add_image(data_path, data_key, self.root, self.dataset_name, raw_name, resolution=(1, 1, 1), chunks=(32,)*3, scale_factors=scales, - tmp_folder=tmp_folder) + tmp_folder=tmp_folder, file_format="bdv.n5") def setUp(self): os.makedirs(self.test_folder, exist_ok=True)