From 98ef0678a479ca9b37df3abda469431125af2364 Mon Sep 17 00:00:00 2001 From: Ethan Blackwood Date: Mon, 1 Apr 2024 03:11:11 -0400 Subject: [PATCH 1/6] Fix SBX functions for older tifffile versions and Windows (close mmaps after using) --- caiman/tests/test_sbx.py | 62 ++++++++++++++++++++------------------- caiman/utils/sbx_utils.py | 13 ++++---- 2 files changed, 40 insertions(+), 35 deletions(-) diff --git a/caiman/tests/test_sbx.py b/caiman/tests/test_sbx.py index 9aa1538ae..342c9d799 100644 --- a/caiman/tests/test_sbx.py +++ b/caiman/tests/test_sbx.py @@ -91,50 +91,52 @@ def test_load_subind(): def test_sbx_to_tif(): - tif_file = os.path.join(caiman_datadir(), 'temp', 'from_sbx.tif') + tif_filename = os.path.join(caiman_datadir(), 'temp', 'from_sbx.tif') + tif_file = None + try: file_2d = os.path.join(TESTDATA_PATH, '2d_sbx.sbx') data_2d_from_sbx = cm.load(file_2d) - sbx_utils.sbx_to_tif(file_2d, fileout=tif_file) - data_2d_from_tif = cm.load(tif_file) + sbx_utils.sbx_to_tif(file_2d, fileout=tif_filename) + data_2d_from_tif = cm.load(tif_filename) npt.assert_array_almost_equal(data_2d_from_sbx, data_2d_from_tif, err_msg='Data do not match when loaded from .sbx vs. .tif') - + file_3d = os.path.join(TESTDATA_PATH, '3d_sbx_1.sbx') data_3d_from_sbx = cm.load(file_3d, is3D=True) - sbx_utils.sbx_to_tif(file_3d, fileout=tif_file) - data_3d_from_tif = cm.load(tif_file, is3D=True) + sbx_utils.sbx_to_tif(file_3d, fileout=tif_filename) + data_3d_from_tif = cm.load(tif_filename, is3D=True) npt.assert_array_almost_equal(data_3d_from_sbx, data_3d_from_tif, err_msg='3D data do not match when loaded from .sbx vs. .tif') # make sure individual planes are not saved as 3D (i.e. RGB) - Y = tifffile.TiffFile(tif_file).series[0] - assert Y.shape == SHAPE_3D, 'Shape of data in tif file is wrong' - if Y[0].shape != SHAPE_3D[2:]: - if Y[0].shape == SHAPE_3D[1:]: - assert False, 'Tif "plane" is 3-dimensional (i.e., has channel dimension)' - else: - assert False, 'Shape of tif plane is wrong' - + with tifffile.TiffFile(tif_filename) as tif_file: + Y = tif_file.series[0] + assert Y.shape == SHAPE_3D, 'Shape of data in tif file is wrong' + if Y[0].shape != SHAPE_3D[2:]: + if Y[0].shape == SHAPE_3D[1:]: + assert False, 'Tif "plane" is 3-dimensional (i.e., has channel dimension)' + else: + assert False, 'Shape of tif plane is wrong' + # with subindices subinds = (slice(0, None, 2), [0, 1, 3], slice(None)) - sbx_utils.sbx_to_tif(file_2d, fileout=tif_file, subindices=subinds) - sub_data_from_tif = cm.load(tif_file) + sbx_utils.sbx_to_tif(file_2d, fileout=tif_filename, subindices=subinds) + sub_data_from_tif = cm.load(tif_filename) npt.assert_array_almost_equal(data_2d_from_sbx[subinds_to_ix(subinds, data_2d_from_sbx.shape)], sub_data_from_tif) # with plane - sbx_utils.sbx_to_tif(file_3d, fileout=tif_file, plane=0) - plane_data_from_tif = cm.load(tif_file) + sbx_utils.sbx_to_tif(file_3d, fileout=tif_filename, plane=0) + plane_data_from_tif = cm.load(tif_filename) npt.assert_array_almost_equal(data_3d_from_sbx[:, :, :, 0], plane_data_from_tif) - finally: # cleanup - if os.path.isfile(tif_file): - os.remove(tif_file) + if os.path.isfile(tif_filename): + os.remove(tif_filename) def test_sbx_chain_to_tif(): - tif_file = os.path.join(caiman_datadir(), 'temp', 'from_sbx.tif') + tif_filename = os.path.join(caiman_datadir(), 'temp', 'from_sbx.tif') try: file_3d_1 = os.path.join(TESTDATA_PATH, '3d_sbx_1.sbx') data_3d_1 = sbx_utils.sbxread(file_3d_1) @@ -142,16 +144,16 @@ def test_sbx_chain_to_tif(): data_3d_2 = sbx_utils.sbxread(file_3d_2) # normal chain - sbx_utils.sbx_chain_to_tif([file_3d_1, file_3d_2], fileout=tif_file) - data_chain_tif = cm.load(tif_file, is3D=True) + sbx_utils.sbx_chain_to_tif([file_3d_1, file_3d_2], fileout=tif_filename) + data_chain_tif = cm.load(tif_filename, is3D=True) data_chain_gt = np.concatenate([data_3d_1, data_3d_2], axis=0) npt.assert_array_almost_equal(data_chain_tif, data_chain_gt, err_msg='Tif from chain does not match expected') # matching subindices - sbx_utils.sbx_chain_to_tif([file_3d_1, file_3d_2], fileout=tif_file, + sbx_utils.sbx_chain_to_tif([file_3d_1, file_3d_2], fileout=tif_filename, subindices=(slice(None), slice(0, None, 2))) - data_chain_tif = cm.load(tif_file, is3D=True) + data_chain_tif = cm.load(tif_filename, is3D=True) data_chain_gt = data_chain_gt[:, ::2] npt.assert_array_almost_equal(data_chain_tif, data_chain_gt, err_msg='Tif from chain with subindices does not match expected') @@ -159,9 +161,9 @@ def test_sbx_chain_to_tif(): # non-matching subindices with compatible shapes subinds_1 = (slice(None), [0, 1, 3], slice(0, None, 2), [0, 2]) subinds_2 = (slice(1, None), [-4, -2, -1], slice(1, None, 2), [1, 3]) - sbx_utils.sbx_chain_to_tif([file_3d_1, file_3d_2], fileout=tif_file, + sbx_utils.sbx_chain_to_tif([file_3d_1, file_3d_2], fileout=tif_filename, subindices=[subinds_1, subinds_2]) - data_chain_tif = cm.load(tif_file, is3D=True) + data_chain_tif = cm.load(tif_filename, is3D=True) data_chain_gt = np.concatenate([data_3d_1[subinds_to_ix(subinds_1, data_3d_1.shape)], data_3d_2[subinds_to_ix(subinds_2, data_3d_2.shape)]], axis=0) npt.assert_array_almost_equal(data_chain_tif, data_chain_gt, @@ -169,6 +171,6 @@ def test_sbx_chain_to_tif(): finally: # cleanup - if os.path.isfile(tif_file): - os.remove(tif_file) + if os.path.isfile(tif_filename): + os.remove(tif_filename) \ No newline at end of file diff --git a/caiman/utils/sbx_utils.py b/caiman/utils/sbx_utils.py index f3001cdf5..f12c4a486 100644 --- a/caiman/utils/sbx_utils.py +++ b/caiman/utils/sbx_utils.py @@ -131,6 +131,7 @@ def sbx_to_tif(filename: str, fileout: Optional[str] = None, subindices: Optiona # Open tif as a memmap and copy to it memmap_tif = tifffile.memmap(fileout, shape=save_shape, dtype='uint16', photometric='MINISBLACK') _sbxread_helper(filename, subindices=subindices, channel=channel, out=memmap_tif, plane=plane, chunk_size=chunk_size) + memmap_tif._mmap.close() def sbx_chain_to_tif(filenames: list[str], fileout: str, subindices: Optional[ChainSubindices] = slice(None), @@ -187,7 +188,7 @@ def sbx_chain_to_tif(filenames: list[str], fileout: str, subindices: Optional[Ch channel = 0 elif any(shape[0] <= channel for shape in all_shapes): raise Exception('Not all files have the requested channel') - + # Allocate empty tif file with the final shape (do this first to ensure any existing file is overwritten) common_shape = tuple(map(int, all_shapes_out[0, 1:])) Ns = list(map(int, all_shapes_out[:, 0])) @@ -204,16 +205,18 @@ def sbx_chain_to_tif(filenames: list[str], fileout: str, subindices: Optional[Ch fileout = fileout + '.tif' dtype = np.float32 if to32 else np.uint16 - tifffile.imwrite(fileout, data=None, mode='w', shape=save_shape, bigtiff=bigtiff, imagej=imagej, - dtype=dtype, photometric='MINISBLACK') + with tifffile.TiffWriter(fileout, bigtiff=bigtiff, imagej=imagej) as tif: + tif.write(None, shape=save_shape, dtype=dtype, photometric='MINISBLACK') # Now convert each file - tif_memmap = tifffile.memmap(fileout, series=0) + memmap_tif = tifffile.memmap(fileout, series=0) offset = 0 for filename, subind, file_N in zip(filenames, subindices, Ns): - _sbxread_helper(filename, subindices=subind, channel=channel, out=tif_memmap[offset:offset+file_N], plane=plane, chunk_size=chunk_size) + _sbxread_helper(filename, subindices=subind, channel=channel, out=memmap_tif[offset:offset+file_N], plane=plane, chunk_size=chunk_size) offset += file_N + memmap_tif._mmap.close() + def sbx_shape(filename: str, info: Optional[dict] = None) -> tuple[int, int, int, int, int]: """ From 4a08717a8d1903d6753c100337132fede7198143 Mon Sep 17 00:00:00 2001 From: Ethan Blackwood Date: Wed, 10 Apr 2024 00:34:27 -0400 Subject: [PATCH 2/6] Use memmap to load only indexed parts of files --- caiman/utils/sbx_utils.py | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/caiman/utils/sbx_utils.py b/caiman/utils/sbx_utils.py index f12c4a486..aeaabce84 100644 --- a/caiman/utils/sbx_utils.py +++ b/caiman/utils/sbx_utils.py @@ -449,35 +449,35 @@ def _sbxread_helper(filename: str, subindices: FileSubindices = slice(None), cha raise Exception('Plane cannot be specified for 2D data') save_shape = save_shape[:3] - frame_stride = _interpret_subindices(subindices[0], n_frames)[1] - if out is not None and out.shape != save_shape: raise Exception('Existing memmap is the wrong shape to hold loaded data') # Open File with open(filename + '.sbx') as sbx_file: - def get_chunk(n: int): + def get_chunk(n: int, step: int): + mmap = np.memmap(sbx_file, mode='c', dtype='uint16', shape=data_shape[:4] + (n*step,), order='F') + mmap = np.transpose(mmap, (0, 4, 2, 1, 3)) # to (chans, frames, Y, X, Z) + inds = (channel, slice(0, None, step)) + np.ix_(*subindices[1:]) + if not is3D: + inds = inds + (0,) # squeeze out singleton plane dim # Note: SBX files store the values strangely, it's necessary to invert each uint16 value to get the correct ones - chunk = np.invert(np.fromfile(sbx_file, dtype='uint16', count=frame_size//2 * n)) - chunk = np.reshape(chunk, data_shape[:4] + (n,), order='F') # (chans, X, Y, Z, frames) - chunk = np.transpose(chunk, (0, 4, 2, 1, 3))[channel] # to (frames, Y, X, Z) - if not is3D: # squeeze out singleton plane dim - chunk = np.squeeze(chunk, axis=3) - chunk = chunk[(slice(None),) + np.ix_(*subindices[1:])] + chunk = mmap[inds] + np.invert(chunk, out=chunk) # avoid copying, may be large if is3D and len(save_shape) == 3: # squeeze out plane dim after selecting chunk = chunk[:, :, :, plane] return chunk - if frame_stride == 1: + if isinstance(subindices[0], range): + frame_stride = subindices[0].step sbx_file.seek(subindices[0][0] * frame_size, 0) if chunk_size is None: # load a contiguous block all at once if out is None: # avoid allocating an extra array just to copy into it - out = get_chunk(N) + out = get_chunk(N, frame_stride) else: - out[:] = get_chunk(N) + out[:] = get_chunk(N, frame_stride) else: # load a contiguous block of data, but chunk it to not use too much memory at once if out is None: @@ -487,7 +487,7 @@ def get_chunk(n: int): offset = 0 while n_remaining > 0: this_chunk_size = min(n_remaining, chunk_size) - chunk = get_chunk(this_chunk_size) + chunk = get_chunk(this_chunk_size, frame_stride) out[offset:offset+this_chunk_size] = chunk n_remaining -= this_chunk_size offset += this_chunk_size @@ -501,7 +501,7 @@ def get_chunk(n: int): if counter % 100 == 0: logging.debug(f'Reading iteration: {k}') sbx_file.seek(k * frame_size, 0) - out[ind] = get_chunk(1) + out[ind] = get_chunk(1, 1) if isinstance(out, np.memmap): out.flush() @@ -523,7 +523,10 @@ def _interpret_subindices(subindices: DimSubindices, dim_extent: int) -> tuple[I f'(requested up to {subindices.stop})') else: iterable_elements = subindices - skip = 0 + if isinstance(subindices, range): + skip = subindices.step + else: + skip = 0 return iterable_elements, skip From 163022fcb4bdc738c3ae21268b9a172e92cd6db9 Mon Sep 17 00:00:00 2001 From: Ethan Blackwood Date: Wed, 10 Apr 2024 02:03:16 -0400 Subject: [PATCH 3/6] Simplify by using the same loading code for all time index types --- caiman/tests/test_sbx.py | 13 +++++++ caiman/utils/sbx_utils.py | 76 +++++++++++++++------------------------ 2 files changed, 42 insertions(+), 47 deletions(-) diff --git a/caiman/tests/test_sbx.py b/caiman/tests/test_sbx.py index 342c9d799..09092c2a2 100644 --- a/caiman/tests/test_sbx.py +++ b/caiman/tests/test_sbx.py @@ -4,6 +4,7 @@ import numpy.testing as npt import os import tifffile +import tracemalloc import caiman as cm from caiman.paths import caiman_datadir @@ -90,6 +91,18 @@ def test_load_subind(): npt.assert_array_equal(data_3d_plane0_3d[:, :, :, 0], data_3d_plane0_2d) +def test_load_efficiency(): + # Make sure that when loading, excess copies are not being made and + # data outside subindices are not being loaded into memory + file_2d = os.path.join(TESTDATA_PATH, '2d_sbx.sbx') + tracemalloc.start() + data_2d_sliced = sbx_utils.sbxread(file_2d, subindices=(slice(None), slice(None, None, 2))) + curr_mem, peak_mem = tracemalloc.get_traced_memory() + assert peak_mem / curr_mem < 1.1, 'Too much memory allocated when loading' + del data_2d_sliced + tracemalloc.stop() + + def test_sbx_to_tif(): tif_filename = os.path.join(caiman_datadir(), 'temp', 'from_sbx.tif') tif_file = None diff --git a/caiman/utils/sbx_utils.py b/caiman/utils/sbx_utils.py index aeaabce84..61dd8bdf3 100644 --- a/caiman/utils/sbx_utils.py +++ b/caiman/utils/sbx_utils.py @@ -454,58 +454,40 @@ def _sbxread_helper(filename: str, subindices: FileSubindices = slice(None), cha # Open File with open(filename + '.sbx') as sbx_file: - def get_chunk(n: int, step: int): - mmap = np.memmap(sbx_file, mode='c', dtype='uint16', shape=data_shape[:4] + (n*step,), order='F') - mmap = np.transpose(mmap, (0, 4, 2, 1, 3)) # to (chans, frames, Y, X, Z) - inds = (channel, slice(0, None, step)) + np.ix_(*subindices[1:]) - if not is3D: - inds = inds + (0,) # squeeze out singleton plane dim + sbx_mmap = np.memmap(sbx_file, mode='r', dtype='uint16', shape=data_shape, order='F') + sbx_mmap = np.transpose(sbx_mmap, (0, 4, 2, 1, 3)) # to (chans, frames, Y, X, Z) + sbx_mmap = sbx_mmap[channel] + if not is3D: # squeeze out singleton plane dim + sbx_mmap = sbx_mmap[..., 0] + elif plane is not None: # select plane relative to subindices + sbx_mmap = sbx_mmap[..., subindices[-1][plane]] + subindices = subindices[:-1] + inds = np.ix_(*subindices) + + if chunk_size is None: + # load a contiguous block all at once + chunk_size = N + elif out is None: + # Pre-allocate destination when loading in chunks + out = np.empty(save_shape, dtype=np.uint16) + + n_remaining = N + offset = 0 + while n_remaining > 0: + this_chunk_size = min(n_remaining, chunk_size) # Note: SBX files store the values strangely, it's necessary to invert each uint16 value to get the correct ones - chunk = mmap[inds] + chunk = sbx_mmap[(inds[0][offset:offset+this_chunk_size],) + inds[1:]] np.invert(chunk, out=chunk) # avoid copying, may be large - if is3D and len(save_shape) == 3: # squeeze out plane dim after selecting - chunk = chunk[:, :, :, plane] - return chunk - - if isinstance(subindices[0], range): - frame_stride = subindices[0].step - sbx_file.seek(subindices[0][0] * frame_size, 0) - - if chunk_size is None: - # load a contiguous block all at once - if out is None: - # avoid allocating an extra array just to copy into it - out = get_chunk(N, frame_stride) - else: - out[:] = get_chunk(N, frame_stride) - else: - # load a contiguous block of data, but chunk it to not use too much memory at once - if out is None: - out = np.empty(save_shape, dtype=np.uint16) - - n_remaining = N - offset = 0 - while n_remaining > 0: - this_chunk_size = min(n_remaining, chunk_size) - chunk = get_chunk(this_chunk_size, frame_stride) - out[offset:offset+this_chunk_size] = chunk - n_remaining -= this_chunk_size - offset += this_chunk_size - else: - # load one frame at a time, sorting indices for fastest access if out is None: - out = np.empty(save_shape, dtype=np.uint16) - - for counter, (k, ind) in enumerate(sorted(zip(subindices[0], range(N)))): - if counter % 100 == 0: - logging.debug(f'Reading iteration: {k}') - sbx_file.seek(k * frame_size, 0) - out[ind] = get_chunk(1, 1) + out = chunk # avoid copying when loading all data + else: + out[offset:offset+this_chunk_size] = chunk + n_remaining -= this_chunk_size + offset += this_chunk_size - if isinstance(out, np.memmap): - out.flush() - + if isinstance(out, np.memmap): + out.flush() return out From 2647b590bf027256888a7568213b1561bc9d1042 Mon Sep 17 00:00:00 2001 From: Ethan Blackwood Date: Thu, 11 Apr 2024 03:03:36 -0400 Subject: [PATCH 4/6] Revert tifffile write to what is in dev since it works --- caiman/utils/sbx_utils.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/caiman/utils/sbx_utils.py b/caiman/utils/sbx_utils.py index 61dd8bdf3..7fca35217 100644 --- a/caiman/utils/sbx_utils.py +++ b/caiman/utils/sbx_utils.py @@ -205,17 +205,17 @@ def sbx_chain_to_tif(filenames: list[str], fileout: str, subindices: Optional[Ch fileout = fileout + '.tif' dtype = np.float32 if to32 else np.uint16 - with tifffile.TiffWriter(fileout, bigtiff=bigtiff, imagej=imagej) as tif: - tif.write(None, shape=save_shape, dtype=dtype, photometric='MINISBLACK') + tifffile.imwrite(fileout, data=None, shape=save_shape, bigtiff=bigtiff, imagej=imagej, + dtype=dtype, photometric='MINISBLACK') # Now convert each file - memmap_tif = tifffile.memmap(fileout, series=0) + tif_memmap = tifffile.memmap(fileout, series=0) offset = 0 for filename, subind, file_N in zip(filenames, subindices, Ns): - _sbxread_helper(filename, subindices=subind, channel=channel, out=memmap_tif[offset:offset+file_N], plane=plane, chunk_size=chunk_size) + _sbxread_helper(filename, subindices=subind, channel=channel, out=tif_memmap[offset:offset+file_N], plane=plane, chunk_size=chunk_size) offset += file_N - memmap_tif._mmap.close() + tif_memmap._mmap.close() def sbx_shape(filename: str, info: Optional[dict] = None) -> tuple[int, int, int, int, int]: From c75a4f81fe382ffbd3e35336061e9c711344167a Mon Sep 17 00:00:00 2001 From: Ethan Blackwood Date: Fri, 12 Apr 2024 00:51:03 -0400 Subject: [PATCH 5/6] Use del to close memmaps rather than _mmap; reduce code duplication --- caiman/utils/sbx_utils.py | 107 +++++++++++++++++--------------------- 1 file changed, 48 insertions(+), 59 deletions(-) diff --git a/caiman/utils/sbx_utils.py b/caiman/utils/sbx_utils.py index 7fca35217..5b9881323 100644 --- a/caiman/utils/sbx_utils.py +++ b/caiman/utils/sbx_utils.py @@ -80,6 +80,7 @@ def sbxread(filename: str, subindices: Optional[FileSubindices] = slice(None), c def sbx_to_tif(filename: str, fileout: Optional[str] = None, subindices: Optional[FileSubindices] = slice(None), + bigtiff: Optional[bool] = True, imagej: bool = False, to32: bool = False, channel: Optional[int] = None, plane: Optional[int] = None, chunk_size: int = 1000): """ Convert a single .sbx file to .tif format @@ -95,6 +96,9 @@ def sbx_to_tif(filename: str, fileout: Optional[str] = None, subindices: Optiona which frames to read (defaults to all) if a tuple of non-scalars, specifies slices of up to 4 dimensions in the order (frame, Y, X, Z). + to32: bool + whether to save in float32 format (default is to keep as uint16) + channel: int | None which channel to save (required if data has >1 channel) @@ -106,32 +110,17 @@ def sbx_to_tif(filename: str, fileout: Optional[str] = None, subindices: Optiona how many frames to load into memory at once (None = load the whole thing) """ # Check filenames - basename, ext = os.path.splitext(filename) - if ext == '.sbx': - filename = basename - if fileout is None: + basename, ext = os.path.splitext(filename) + if ext == '.sbx': + filename = basename fileout = filename + '.tif' - else: - # Add a '.tif' extension if not already present - extension = os.path.splitext(fileout)[1].lower() - if extension not in ['.tif', '.tiff', '.btf']: - fileout = fileout + '.tif' if subindices is None: subindices = slice(None) - # Find the shape of the file we need - save_shape = _get_output_shape(filename, subindices)[0] - if plane is not None: - if len(save_shape) < 4: - raise Exception('Plane cannot be specified for 2D data') - save_shape = save_shape[:3] - - # Open tif as a memmap and copy to it - memmap_tif = tifffile.memmap(fileout, shape=save_shape, dtype='uint16', photometric='MINISBLACK') - _sbxread_helper(filename, subindices=subindices, channel=channel, out=memmap_tif, plane=plane, chunk_size=chunk_size) - memmap_tif._mmap.close() + sbx_chain_to_tif([filename], fileout, [subindices], bigtiff=bigtiff, imagej=imagej, to32=to32, + channel=channel, plane=plane, chunk_size=chunk_size) def sbx_chain_to_tif(filenames: list[str], fileout: str, subindices: Optional[ChainSubindices] = slice(None), @@ -150,11 +139,8 @@ def sbx_chain_to_tif(filenames: list[str], fileout: str, subindices: Optional[Ch see subindices for sbx_to_tif can specify separate subindices for each file if nested 2 levels deep; X, Y, and Z sizes must match for all files after indexing. - - to32: bool - whether to save in float32 format (default is to keep as uint16) - channel, plane, chunk_size: see sbx_to_tif + to32, channel, plane, chunk_size: see sbx_to_tif """ if subindices is None: subindices = slice(None) @@ -200,13 +186,15 @@ def sbx_chain_to_tif(filenames: list[str], fileout: str, subindices: Optional[Ch raise Exception('Plane cannot be specified for 2D data') save_shape = save_shape[:3] + # Add a '.tif' extension if not already present extension = os.path.splitext(fileout)[1].lower() if extension not in ['.tif', '.tiff', '.btf']: fileout = fileout + '.tif' dtype = np.float32 if to32 else np.uint16 + # Make the file first so we can pass in bigtiff and imagej options; otherwise could create using tifffile.memmap directly tifffile.imwrite(fileout, data=None, shape=save_shape, bigtiff=bigtiff, imagej=imagej, - dtype=dtype, photometric='MINISBLACK') + dtype=dtype, photometric='MINISBLACK', align=tifffile.TIFF.ALLOCATIONGRANULARITY) # Now convert each file tif_memmap = tifffile.memmap(fileout, series=0) @@ -215,7 +203,7 @@ def sbx_chain_to_tif(filenames: list[str], fileout: str, subindices: Optional[Ch _sbxread_helper(filename, subindices=subind, channel=channel, out=tif_memmap[offset:offset+file_N], plane=plane, chunk_size=chunk_size) offset += file_N - tif_memmap._mmap.close() + del tif_memmap # important to make sure file is closed (on Windows) def sbx_shape(filename: str, info: Optional[dict] = None) -> tuple[int, int, int, int, int]: @@ -452,39 +440,40 @@ def _sbxread_helper(filename: str, subindices: FileSubindices = slice(None), cha if out is not None and out.shape != save_shape: raise Exception('Existing memmap is the wrong shape to hold loaded data') - # Open File - with open(filename + '.sbx') as sbx_file: - sbx_mmap = np.memmap(sbx_file, mode='r', dtype='uint16', shape=data_shape, order='F') - sbx_mmap = np.transpose(sbx_mmap, (0, 4, 2, 1, 3)) # to (chans, frames, Y, X, Z) - sbx_mmap = sbx_mmap[channel] - if not is3D: # squeeze out singleton plane dim - sbx_mmap = sbx_mmap[..., 0] - elif plane is not None: # select plane relative to subindices - sbx_mmap = sbx_mmap[..., subindices[-1][plane]] - subindices = subindices[:-1] - inds = np.ix_(*subindices) - - if chunk_size is None: - # load a contiguous block all at once - chunk_size = N - elif out is None: - # Pre-allocate destination when loading in chunks - out = np.empty(save_shape, dtype=np.uint16) - - n_remaining = N - offset = 0 - while n_remaining > 0: - this_chunk_size = min(n_remaining, chunk_size) - # Note: SBX files store the values strangely, it's necessary to invert each uint16 value to get the correct ones - chunk = sbx_mmap[(inds[0][offset:offset+this_chunk_size],) + inds[1:]] - np.invert(chunk, out=chunk) # avoid copying, may be large - - if out is None: - out = chunk # avoid copying when loading all data - else: - out[offset:offset+this_chunk_size] = chunk - n_remaining -= this_chunk_size - offset += this_chunk_size + # Read from .sbx file, using memmap to avoid loading until necessary + sbx_mmap = np.memmap(filename + '.sbx', mode='r', dtype='uint16', shape=data_shape, order='F') + sbx_mmap = np.transpose(sbx_mmap, (0, 4, 2, 1, 3)) # to (chans, frames, Y, X, Z) + sbx_mmap = sbx_mmap[channel] + if not is3D: # squeeze out singleton plane dim + sbx_mmap = sbx_mmap[..., 0] + elif plane is not None: # select plane relative to subindices + sbx_mmap = sbx_mmap[..., subindices[-1][plane]] + subindices = subindices[:-1] + inds = np.ix_(*subindices) + + if chunk_size is None: + # load a contiguous block all at once + chunk_size = N + elif out is None: + # Pre-allocate destination when loading in chunks + out = np.empty(save_shape, dtype=np.uint16) + + n_remaining = N + offset = 0 + while n_remaining > 0: + this_chunk_size = min(n_remaining, chunk_size) + # Note: SBX files store the values strangely, it's necessary to invert each uint16 value to get the correct ones + chunk = sbx_mmap[(inds[0][offset:offset+this_chunk_size],) + inds[1:]] + np.invert(chunk, out=chunk) # avoid copying, may be large + + if out is None: + out = chunk # avoid copying when loading all data + else: + out[offset:offset+this_chunk_size] = chunk + n_remaining -= this_chunk_size + offset += this_chunk_size + + del sbx_mmap # Important to close file (on Windows) if isinstance(out, np.memmap): out.flush() From 6c836c028e1d72d4f52b6251714d5eb85386ebcc Mon Sep 17 00:00:00 2001 From: Ethan Blackwood Date: Fri, 12 Apr 2024 12:00:49 -0400 Subject: [PATCH 6/6] Add comment about not viewing memmap and rename Ns --- caiman/utils/sbx_utils.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/caiman/utils/sbx_utils.py b/caiman/utils/sbx_utils.py index 5b9881323..f349b9d0b 100644 --- a/caiman/utils/sbx_utils.py +++ b/caiman/utils/sbx_utils.py @@ -177,9 +177,9 @@ def sbx_chain_to_tif(filenames: list[str], fileout: str, subindices: Optional[Ch # Allocate empty tif file with the final shape (do this first to ensure any existing file is overwritten) common_shape = tuple(map(int, all_shapes_out[0, 1:])) - Ns = list(map(int, all_shapes_out[:, 0])) - N = sum(Ns) - save_shape = (N,) + common_shape + all_n_frames_out = list(map(int, all_shapes_out[:, 0])) + n_frames_out = sum(all_n_frames_out) + save_shape = (n_frames_out,) + common_shape if plane is not None: if len(save_shape) < 4: @@ -199,7 +199,7 @@ def sbx_chain_to_tif(filenames: list[str], fileout: str, subindices: Optional[Ch # Now convert each file tif_memmap = tifffile.memmap(fileout, series=0) offset = 0 - for filename, subind, file_N in zip(filenames, subindices, Ns): + for filename, subind, file_N in zip(filenames, subindices, all_n_frames_out): _sbxread_helper(filename, subindices=subind, channel=channel, out=tif_memmap[offset:offset+file_N], plane=plane, chunk_size=chunk_size) offset += file_N @@ -273,7 +273,7 @@ def sbx_shape(filename: str, info: Optional[dict] = None) -> tuple[int, int, int else: info['max_idx'] = filesize / info['bytesPerBuffer'] * factor - 1 - N = info['max_idx'] + 1 # Last frame + n_frames = info['max_idx'] + 1 # Last frame # Determine whether we are looking at a z-stack # Only consider optotune z-stacks - knobby schedules have too many possibilities and @@ -282,9 +282,9 @@ def sbx_shape(filename: str, info: Optional[dict] = None) -> tuple[int, int, int n_planes = info['otparam'][2] else: n_planes = 1 - N //= n_planes + n_frames //= n_planes - x = (int(info['nChan']), int(info['sz'][1]), int(info['recordsPerBuffer']), int(n_planes), int(N)) + x = (int(info['nChan']), int(info['sz'][1]), int(info['recordsPerBuffer']), int(n_planes), int(n_frames)) return x @@ -431,7 +431,7 @@ def _sbxread_helper(filename: str, subindices: FileSubindices = slice(None), cha raise Exception('Invalid scanbox metadata') save_shape, subindices = _get_output_shape(data_shape, subindices) - N = save_shape[0] + n_frames_out = save_shape[0] if plane is not None: if len(save_shape) < 4: raise Exception('Plane cannot be specified for 2D data') @@ -453,17 +453,19 @@ def _sbxread_helper(filename: str, subindices: FileSubindices = slice(None), cha if chunk_size is None: # load a contiguous block all at once - chunk_size = N + chunk_size = n_frames_out elif out is None: # Pre-allocate destination when loading in chunks out = np.empty(save_shape, dtype=np.uint16) - n_remaining = N + n_remaining = n_frames_out offset = 0 while n_remaining > 0: this_chunk_size = min(n_remaining, chunk_size) - # Note: SBX files store the values strangely, it's necessary to invert each uint16 value to get the correct ones + # Note: important to copy the data here instead of making a view, + # so the memmap can be closed (achieved by advanced indexing) chunk = sbx_mmap[(inds[0][offset:offset+this_chunk_size],) + inds[1:]] + # Note: SBX files store the values strangely, it's necessary to invert each uint16 value to get the correct ones np.invert(chunk, out=chunk) # avoid copying, may be large if out is None: