From 98ef0678a479ca9b37df3abda469431125af2364 Mon Sep 17 00:00:00 2001
From: Ethan Blackwood <ethanbblackwood@gmail.com>
Date: Mon, 1 Apr 2024 03:11:11 -0400
Subject: [PATCH 1/6] Fix SBX functions for older tifffile versions and Windows
 (close mmaps after using)

---
 caiman/tests/test_sbx.py  | 62 ++++++++++++++++++++-------------------
 caiman/utils/sbx_utils.py | 13 ++++----
 2 files changed, 40 insertions(+), 35 deletions(-)

diff --git a/caiman/tests/test_sbx.py b/caiman/tests/test_sbx.py
index 9aa1538ae..342c9d799 100644
--- a/caiman/tests/test_sbx.py
+++ b/caiman/tests/test_sbx.py
@@ -91,50 +91,52 @@ def test_load_subind():
 
 
 def test_sbx_to_tif():
-    tif_file = os.path.join(caiman_datadir(), 'temp', 'from_sbx.tif')
+    tif_filename = os.path.join(caiman_datadir(), 'temp', 'from_sbx.tif')
+    tif_file = None
+
     try:
         file_2d = os.path.join(TESTDATA_PATH, '2d_sbx.sbx')
         data_2d_from_sbx = cm.load(file_2d)
-        sbx_utils.sbx_to_tif(file_2d, fileout=tif_file)
-        data_2d_from_tif = cm.load(tif_file)
+        sbx_utils.sbx_to_tif(file_2d, fileout=tif_filename)
+        data_2d_from_tif = cm.load(tif_filename)
         npt.assert_array_almost_equal(data_2d_from_sbx, data_2d_from_tif,
                                       err_msg='Data do not match when loaded from .sbx vs. .tif')
-        
+
         file_3d = os.path.join(TESTDATA_PATH, '3d_sbx_1.sbx')
         data_3d_from_sbx = cm.load(file_3d, is3D=True)
-        sbx_utils.sbx_to_tif(file_3d, fileout=tif_file)
-        data_3d_from_tif = cm.load(tif_file, is3D=True)
+        sbx_utils.sbx_to_tif(file_3d, fileout=tif_filename)
+        data_3d_from_tif = cm.load(tif_filename, is3D=True)
         npt.assert_array_almost_equal(data_3d_from_sbx, data_3d_from_tif,
                                       err_msg='3D data do not match when loaded from .sbx vs. .tif')
         # make sure individual planes are not saved as 3D (i.e. RGB)
-        Y = tifffile.TiffFile(tif_file).series[0]
-        assert Y.shape == SHAPE_3D, 'Shape of data in tif file is wrong'
-        if Y[0].shape != SHAPE_3D[2:]:
-            if Y[0].shape == SHAPE_3D[1:]:
-                assert False, 'Tif "plane" is 3-dimensional (i.e., has channel dimension)'
-            else:
-                assert False, 'Shape of tif plane is wrong'
-        
+        with tifffile.TiffFile(tif_filename) as tif_file:
+            Y = tif_file.series[0]
+            assert Y.shape == SHAPE_3D, 'Shape of data in tif file is wrong'
+            if Y[0].shape != SHAPE_3D[2:]:
+                if Y[0].shape == SHAPE_3D[1:]:
+                    assert False, 'Tif "plane" is 3-dimensional (i.e., has channel dimension)'
+                else:
+                    assert False, 'Shape of tif plane is wrong'
+
         # with subindices
         subinds = (slice(0, None, 2), [0, 1, 3], slice(None))
-        sbx_utils.sbx_to_tif(file_2d, fileout=tif_file, subindices=subinds)
-        sub_data_from_tif = cm.load(tif_file)
+        sbx_utils.sbx_to_tif(file_2d, fileout=tif_filename, subindices=subinds)
+        sub_data_from_tif = cm.load(tif_filename)
         npt.assert_array_almost_equal(data_2d_from_sbx[subinds_to_ix(subinds, data_2d_from_sbx.shape)], sub_data_from_tif)
 
         # with plane
-        sbx_utils.sbx_to_tif(file_3d, fileout=tif_file, plane=0)
-        plane_data_from_tif = cm.load(tif_file)
+        sbx_utils.sbx_to_tif(file_3d, fileout=tif_filename, plane=0)
+        plane_data_from_tif = cm.load(tif_filename)
         npt.assert_array_almost_equal(data_3d_from_sbx[:, :, :, 0], plane_data_from_tif)
 
-
     finally:
         # cleanup
-        if os.path.isfile(tif_file):
-            os.remove(tif_file)
+        if os.path.isfile(tif_filename):
+            os.remove(tif_filename)
 
 
 def test_sbx_chain_to_tif():
-    tif_file = os.path.join(caiman_datadir(), 'temp', 'from_sbx.tif')
+    tif_filename = os.path.join(caiman_datadir(), 'temp', 'from_sbx.tif')
     try:
         file_3d_1 = os.path.join(TESTDATA_PATH, '3d_sbx_1.sbx')
         data_3d_1 = sbx_utils.sbxread(file_3d_1)
@@ -142,16 +144,16 @@ def test_sbx_chain_to_tif():
         data_3d_2 = sbx_utils.sbxread(file_3d_2)
 
         # normal chain
-        sbx_utils.sbx_chain_to_tif([file_3d_1, file_3d_2], fileout=tif_file)
-        data_chain_tif = cm.load(tif_file, is3D=True)
+        sbx_utils.sbx_chain_to_tif([file_3d_1, file_3d_2], fileout=tif_filename)
+        data_chain_tif = cm.load(tif_filename, is3D=True)
         data_chain_gt = np.concatenate([data_3d_1, data_3d_2], axis=0)
         npt.assert_array_almost_equal(data_chain_tif, data_chain_gt,
                                       err_msg='Tif from chain does not match expected')
 
         # matching subindices
-        sbx_utils.sbx_chain_to_tif([file_3d_1, file_3d_2], fileout=tif_file,
+        sbx_utils.sbx_chain_to_tif([file_3d_1, file_3d_2], fileout=tif_filename,
                                    subindices=(slice(None), slice(0, None, 2)))
-        data_chain_tif = cm.load(tif_file, is3D=True)
+        data_chain_tif = cm.load(tif_filename, is3D=True)
         data_chain_gt = data_chain_gt[:, ::2]
         npt.assert_array_almost_equal(data_chain_tif, data_chain_gt,
                                       err_msg='Tif from chain with subindices does not match expected')
@@ -159,9 +161,9 @@ def test_sbx_chain_to_tif():
         # non-matching subindices with compatible shapes
         subinds_1 = (slice(None), [0, 1, 3], slice(0, None, 2), [0, 2])
         subinds_2 = (slice(1, None), [-4, -2, -1], slice(1, None, 2), [1, 3])
-        sbx_utils.sbx_chain_to_tif([file_3d_1, file_3d_2], fileout=tif_file,
+        sbx_utils.sbx_chain_to_tif([file_3d_1, file_3d_2], fileout=tif_filename,
                                     subindices=[subinds_1, subinds_2])
-        data_chain_tif = cm.load(tif_file, is3D=True)
+        data_chain_tif = cm.load(tif_filename, is3D=True)
         data_chain_gt = np.concatenate([data_3d_1[subinds_to_ix(subinds_1, data_3d_1.shape)],
                                         data_3d_2[subinds_to_ix(subinds_2, data_3d_2.shape)]], axis=0)
         npt.assert_array_almost_equal(data_chain_tif, data_chain_gt,
@@ -169,6 +171,6 @@ def test_sbx_chain_to_tif():
 
     finally:
         # cleanup
-        if os.path.isfile(tif_file):
-            os.remove(tif_file)
+        if os.path.isfile(tif_filename):
+            os.remove(tif_filename)
     
\ No newline at end of file
diff --git a/caiman/utils/sbx_utils.py b/caiman/utils/sbx_utils.py
index f3001cdf5..f12c4a486 100644
--- a/caiman/utils/sbx_utils.py
+++ b/caiman/utils/sbx_utils.py
@@ -131,6 +131,7 @@ def sbx_to_tif(filename: str, fileout: Optional[str] = None, subindices: Optiona
     # Open tif as a memmap and copy to it
     memmap_tif = tifffile.memmap(fileout, shape=save_shape, dtype='uint16', photometric='MINISBLACK')
     _sbxread_helper(filename, subindices=subindices, channel=channel, out=memmap_tif, plane=plane, chunk_size=chunk_size)
+    memmap_tif._mmap.close()
 
 
 def sbx_chain_to_tif(filenames: list[str], fileout: str, subindices: Optional[ChainSubindices] = slice(None),
@@ -187,7 +188,7 @@ def sbx_chain_to_tif(filenames: list[str], fileout: str, subindices: Optional[Ch
         channel = 0
     elif any(shape[0] <= channel for shape in all_shapes):
         raise Exception('Not all files have the requested channel')
-    
+
     # Allocate empty tif file with the final shape (do this first to ensure any existing file is overwritten)
     common_shape = tuple(map(int, all_shapes_out[0, 1:]))
     Ns = list(map(int, all_shapes_out[:, 0]))
@@ -204,16 +205,18 @@ def sbx_chain_to_tif(filenames: list[str], fileout: str, subindices: Optional[Ch
         fileout = fileout + '.tif'
 
     dtype = np.float32 if to32 else np.uint16
-    tifffile.imwrite(fileout, data=None, mode='w', shape=save_shape, bigtiff=bigtiff, imagej=imagej,
-                     dtype=dtype, photometric='MINISBLACK')
+    with tifffile.TiffWriter(fileout, bigtiff=bigtiff, imagej=imagej) as tif:
+        tif.write(None, shape=save_shape, dtype=dtype, photometric='MINISBLACK')
 
     # Now convert each file
-    tif_memmap = tifffile.memmap(fileout, series=0)
+    memmap_tif = tifffile.memmap(fileout, series=0)
     offset = 0
     for filename, subind, file_N in zip(filenames, subindices, Ns):
-        _sbxread_helper(filename, subindices=subind, channel=channel, out=tif_memmap[offset:offset+file_N], plane=plane, chunk_size=chunk_size)
+        _sbxread_helper(filename, subindices=subind, channel=channel, out=memmap_tif[offset:offset+file_N], plane=plane, chunk_size=chunk_size)
         offset += file_N
 
+    memmap_tif._mmap.close()
+
 
 def sbx_shape(filename: str, info: Optional[dict] = None) -> tuple[int, int, int, int, int]:
     """

From 4a08717a8d1903d6753c100337132fede7198143 Mon Sep 17 00:00:00 2001
From: Ethan Blackwood <ethanbblackwood@gmail.com>
Date: Wed, 10 Apr 2024 00:34:27 -0400
Subject: [PATCH 2/6] Use memmap to load only indexed parts of files

---
 caiman/utils/sbx_utils.py | 33 ++++++++++++++++++---------------
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/caiman/utils/sbx_utils.py b/caiman/utils/sbx_utils.py
index f12c4a486..aeaabce84 100644
--- a/caiman/utils/sbx_utils.py
+++ b/caiman/utils/sbx_utils.py
@@ -449,35 +449,35 @@ def _sbxread_helper(filename: str, subindices: FileSubindices = slice(None), cha
             raise Exception('Plane cannot be specified for 2D data')
         save_shape = save_shape[:3]
 
-    frame_stride = _interpret_subindices(subindices[0], n_frames)[1]
-
     if out is not None and out.shape != save_shape:
         raise Exception('Existing memmap is the wrong shape to hold loaded data')
 
     # Open File
     with open(filename + '.sbx') as sbx_file:
-        def get_chunk(n: int):
+        def get_chunk(n: int, step: int):
+            mmap = np.memmap(sbx_file, mode='c', dtype='uint16', shape=data_shape[:4] + (n*step,), order='F')
+            mmap = np.transpose(mmap, (0, 4, 2, 1, 3))  # to (chans, frames, Y, X, Z)
+            inds = (channel, slice(0, None, step)) + np.ix_(*subindices[1:])
+            if not is3D:
+                inds = inds + (0,)  # squeeze out singleton plane dim
             # Note: SBX files store the values strangely, it's necessary to invert each uint16 value to get the correct ones
-            chunk = np.invert(np.fromfile(sbx_file, dtype='uint16', count=frame_size//2 * n))
-            chunk = np.reshape(chunk, data_shape[:4] + (n,), order='F')  # (chans, X, Y, Z, frames)
-            chunk = np.transpose(chunk, (0, 4, 2, 1, 3))[channel]  # to (frames, Y, X, Z)
-            if not is3D:  # squeeze out singleton plane dim
-                chunk = np.squeeze(chunk, axis=3)
-            chunk = chunk[(slice(None),) + np.ix_(*subindices[1:])]
+            chunk = mmap[inds]
+            np.invert(chunk, out=chunk)  # avoid copying, may be large
             if is3D and len(save_shape) == 3:  # squeeze out plane dim after selecting
                 chunk = chunk[:, :, :, plane]
             return chunk
 
-        if frame_stride == 1:
+        if isinstance(subindices[0], range):
+            frame_stride = subindices[0].step
             sbx_file.seek(subindices[0][0] * frame_size, 0)
 
             if chunk_size is None:
                 # load a contiguous block all at once
                 if out is None:
                     # avoid allocating an extra array just to copy into it
-                    out = get_chunk(N)
+                    out = get_chunk(N, frame_stride)
                 else:
-                    out[:] = get_chunk(N)
+                    out[:] = get_chunk(N, frame_stride)
             else:
                 # load a contiguous block of data, but chunk it to not use too much memory at once
                 if out is None:
@@ -487,7 +487,7 @@ def get_chunk(n: int):
                 offset = 0
                 while n_remaining > 0:
                     this_chunk_size = min(n_remaining, chunk_size)
-                    chunk = get_chunk(this_chunk_size)
+                    chunk = get_chunk(this_chunk_size, frame_stride)
                     out[offset:offset+this_chunk_size] = chunk
                     n_remaining -= this_chunk_size
                     offset += this_chunk_size
@@ -501,7 +501,7 @@ def get_chunk(n: int):
                 if counter % 100 == 0:
                     logging.debug(f'Reading iteration: {k}')
                 sbx_file.seek(k * frame_size, 0)
-                out[ind] = get_chunk(1)
+                out[ind] = get_chunk(1, 1)
 
         if isinstance(out, np.memmap):
             out.flush()
@@ -523,7 +523,10 @@ def _interpret_subindices(subindices: DimSubindices, dim_extent: int) -> tuple[I
                             f'(requested up to {subindices.stop})')
     else:
         iterable_elements = subindices
-        skip = 0
+        if isinstance(subindices, range):
+            skip = subindices.step
+        else:
+            skip = 0
 
     return iterable_elements, skip
 

From 163022fcb4bdc738c3ae21268b9a172e92cd6db9 Mon Sep 17 00:00:00 2001
From: Ethan Blackwood <ethanbblackwood@gmail.com>
Date: Wed, 10 Apr 2024 02:03:16 -0400
Subject: [PATCH 3/6] Simplify by using the same loading code for all time
 index types

---
 caiman/tests/test_sbx.py  | 13 +++++++
 caiman/utils/sbx_utils.py | 76 +++++++++++++++------------------------
 2 files changed, 42 insertions(+), 47 deletions(-)

diff --git a/caiman/tests/test_sbx.py b/caiman/tests/test_sbx.py
index 342c9d799..09092c2a2 100644
--- a/caiman/tests/test_sbx.py
+++ b/caiman/tests/test_sbx.py
@@ -4,6 +4,7 @@
 import numpy.testing as npt
 import os
 import tifffile
+import tracemalloc
 
 import caiman as cm
 from caiman.paths import caiman_datadir
@@ -90,6 +91,18 @@ def test_load_subind():
     npt.assert_array_equal(data_3d_plane0_3d[:, :, :, 0], data_3d_plane0_2d)
 
 
+def test_load_efficiency():
+    # Make sure that when loading, excess copies are not being made and 
+    # data outside subindices are not being loaded into memory
+    file_2d = os.path.join(TESTDATA_PATH, '2d_sbx.sbx')
+    tracemalloc.start()
+    data_2d_sliced = sbx_utils.sbxread(file_2d, subindices=(slice(None), slice(None, None, 2)))
+    curr_mem, peak_mem = tracemalloc.get_traced_memory()
+    assert peak_mem / curr_mem < 1.1, 'Too much memory allocated when loading'
+    del data_2d_sliced
+    tracemalloc.stop()    
+
+
 def test_sbx_to_tif():
     tif_filename = os.path.join(caiman_datadir(), 'temp', 'from_sbx.tif')
     tif_file = None
diff --git a/caiman/utils/sbx_utils.py b/caiman/utils/sbx_utils.py
index aeaabce84..61dd8bdf3 100644
--- a/caiman/utils/sbx_utils.py
+++ b/caiman/utils/sbx_utils.py
@@ -454,58 +454,40 @@ def _sbxread_helper(filename: str, subindices: FileSubindices = slice(None), cha
 
     # Open File
     with open(filename + '.sbx') as sbx_file:
-        def get_chunk(n: int, step: int):
-            mmap = np.memmap(sbx_file, mode='c', dtype='uint16', shape=data_shape[:4] + (n*step,), order='F')
-            mmap = np.transpose(mmap, (0, 4, 2, 1, 3))  # to (chans, frames, Y, X, Z)
-            inds = (channel, slice(0, None, step)) + np.ix_(*subindices[1:])
-            if not is3D:
-                inds = inds + (0,)  # squeeze out singleton plane dim
+        sbx_mmap = np.memmap(sbx_file, mode='r', dtype='uint16', shape=data_shape, order='F')
+        sbx_mmap = np.transpose(sbx_mmap, (0, 4, 2, 1, 3))  # to (chans, frames, Y, X, Z)
+        sbx_mmap = sbx_mmap[channel]
+        if not is3D:  # squeeze out singleton plane dim
+            sbx_mmap = sbx_mmap[..., 0]
+        elif plane is not None:  # select plane relative to subindices
+            sbx_mmap = sbx_mmap[..., subindices[-1][plane]]
+            subindices = subindices[:-1]
+        inds = np.ix_(*subindices)
+
+        if chunk_size is None:
+            # load a contiguous block all at once
+            chunk_size = N
+        elif out is None:
+            # Pre-allocate destination when loading in chunks
+            out = np.empty(save_shape, dtype=np.uint16)
+
+        n_remaining = N
+        offset = 0
+        while n_remaining > 0:
+            this_chunk_size = min(n_remaining, chunk_size)
             # Note: SBX files store the values strangely, it's necessary to invert each uint16 value to get the correct ones
-            chunk = mmap[inds]
+            chunk = sbx_mmap[(inds[0][offset:offset+this_chunk_size],) + inds[1:]]
             np.invert(chunk, out=chunk)  # avoid copying, may be large
-            if is3D and len(save_shape) == 3:  # squeeze out plane dim after selecting
-                chunk = chunk[:, :, :, plane]
-            return chunk
-
-        if isinstance(subindices[0], range):
-            frame_stride = subindices[0].step
-            sbx_file.seek(subindices[0][0] * frame_size, 0)
-
-            if chunk_size is None:
-                # load a contiguous block all at once
-                if out is None:
-                    # avoid allocating an extra array just to copy into it
-                    out = get_chunk(N, frame_stride)
-                else:
-                    out[:] = get_chunk(N, frame_stride)
-            else:
-                # load a contiguous block of data, but chunk it to not use too much memory at once
-                if out is None:
-                    out = np.empty(save_shape, dtype=np.uint16)
-
-                n_remaining = N
-                offset = 0
-                while n_remaining > 0:
-                    this_chunk_size = min(n_remaining, chunk_size)
-                    chunk = get_chunk(this_chunk_size, frame_stride)
-                    out[offset:offset+this_chunk_size] = chunk
-                    n_remaining -= this_chunk_size
-                    offset += this_chunk_size
 
-        else:
-            # load one frame at a time, sorting indices for fastest access
             if out is None:
-                out = np.empty(save_shape, dtype=np.uint16)
-
-            for counter, (k, ind) in enumerate(sorted(zip(subindices[0], range(N)))):
-                if counter % 100 == 0:
-                    logging.debug(f'Reading iteration: {k}')
-                sbx_file.seek(k * frame_size, 0)
-                out[ind] = get_chunk(1, 1)
+                out = chunk  # avoid copying when loading all data
+            else:
+                out[offset:offset+this_chunk_size] = chunk
+            n_remaining -= this_chunk_size
+            offset += this_chunk_size
 
-        if isinstance(out, np.memmap):
-            out.flush()
-        
+    if isinstance(out, np.memmap):
+        out.flush()    
     return out
 
 

From 2647b590bf027256888a7568213b1561bc9d1042 Mon Sep 17 00:00:00 2001
From: Ethan Blackwood <ethanbblackwood@gmail.com>
Date: Thu, 11 Apr 2024 03:03:36 -0400
Subject: [PATCH 4/6] Revert tifffile write to what is in dev since it works

---
 caiman/utils/sbx_utils.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/caiman/utils/sbx_utils.py b/caiman/utils/sbx_utils.py
index 61dd8bdf3..7fca35217 100644
--- a/caiman/utils/sbx_utils.py
+++ b/caiman/utils/sbx_utils.py
@@ -205,17 +205,17 @@ def sbx_chain_to_tif(filenames: list[str], fileout: str, subindices: Optional[Ch
         fileout = fileout + '.tif'
 
     dtype = np.float32 if to32 else np.uint16
-    with tifffile.TiffWriter(fileout, bigtiff=bigtiff, imagej=imagej) as tif:
-        tif.write(None, shape=save_shape, dtype=dtype, photometric='MINISBLACK')
+    tifffile.imwrite(fileout, data=None, shape=save_shape, bigtiff=bigtiff, imagej=imagej,
+                     dtype=dtype, photometric='MINISBLACK')
 
     # Now convert each file
-    memmap_tif = tifffile.memmap(fileout, series=0)
+    tif_memmap = tifffile.memmap(fileout, series=0)
     offset = 0
     for filename, subind, file_N in zip(filenames, subindices, Ns):
-        _sbxread_helper(filename, subindices=subind, channel=channel, out=memmap_tif[offset:offset+file_N], plane=plane, chunk_size=chunk_size)
+        _sbxread_helper(filename, subindices=subind, channel=channel, out=tif_memmap[offset:offset+file_N], plane=plane, chunk_size=chunk_size)
         offset += file_N
 
-    memmap_tif._mmap.close()
+    tif_memmap._mmap.close()
 
 
 def sbx_shape(filename: str, info: Optional[dict] = None) -> tuple[int, int, int, int, int]:

From c75a4f81fe382ffbd3e35336061e9c711344167a Mon Sep 17 00:00:00 2001
From: Ethan Blackwood <ethanbblackwood@gmail.com>
Date: Fri, 12 Apr 2024 00:51:03 -0400
Subject: [PATCH 5/6] Use del to close memmaps rather than _mmap; reduce code
 duplication

---
 caiman/utils/sbx_utils.py | 107 +++++++++++++++++---------------------
 1 file changed, 48 insertions(+), 59 deletions(-)

diff --git a/caiman/utils/sbx_utils.py b/caiman/utils/sbx_utils.py
index 7fca35217..5b9881323 100644
--- a/caiman/utils/sbx_utils.py
+++ b/caiman/utils/sbx_utils.py
@@ -80,6 +80,7 @@ def sbxread(filename: str, subindices: Optional[FileSubindices] = slice(None), c
 
 
 def sbx_to_tif(filename: str, fileout: Optional[str] = None, subindices: Optional[FileSubindices] = slice(None),
+               bigtiff: Optional[bool] = True, imagej: bool = False, to32: bool = False,
                channel: Optional[int] = None, plane: Optional[int] = None, chunk_size: int = 1000):
     """
     Convert a single .sbx file to .tif format
@@ -95,6 +96,9 @@ def sbx_to_tif(filename: str, fileout: Optional[str] = None, subindices: Optiona
             which frames to read (defaults to all)
             if a tuple of non-scalars, specifies slices of up to 4 dimensions in the order (frame, Y, X, Z).
 
+        to32: bool
+            whether to save in float32 format (default is to keep as uint16)
+
         channel: int | None
             which channel to save (required if data has >1 channel)
 
@@ -106,32 +110,17 @@ def sbx_to_tif(filename: str, fileout: Optional[str] = None, subindices: Optiona
             how many frames to load into memory at once (None = load the whole thing)
     """
     # Check filenames
-    basename, ext = os.path.splitext(filename)
-    if ext == '.sbx':
-        filename = basename
-
     if fileout is None:
+        basename, ext = os.path.splitext(filename)
+        if ext == '.sbx':
+            filename = basename
         fileout = filename + '.tif'
-    else:
-        # Add a '.tif' extension if not already present
-        extension = os.path.splitext(fileout)[1].lower()
-        if extension not in ['.tif', '.tiff', '.btf']:
-            fileout = fileout + '.tif'
 
     if subindices is None:
         subindices = slice(None)
 
-    # Find the shape of the file we need
-    save_shape = _get_output_shape(filename, subindices)[0]
-    if plane is not None:
-        if len(save_shape) < 4:
-            raise Exception('Plane cannot be specified for 2D data')
-        save_shape = save_shape[:3]
-
-    # Open tif as a memmap and copy to it
-    memmap_tif = tifffile.memmap(fileout, shape=save_shape, dtype='uint16', photometric='MINISBLACK')
-    _sbxread_helper(filename, subindices=subindices, channel=channel, out=memmap_tif, plane=plane, chunk_size=chunk_size)
-    memmap_tif._mmap.close()
+    sbx_chain_to_tif([filename], fileout, [subindices], bigtiff=bigtiff, imagej=imagej, to32=to32,
+                     channel=channel, plane=plane, chunk_size=chunk_size)
 
 
 def sbx_chain_to_tif(filenames: list[str], fileout: str, subindices: Optional[ChainSubindices] = slice(None),
@@ -150,11 +139,8 @@ def sbx_chain_to_tif(filenames: list[str], fileout: str, subindices: Optional[Ch
             see subindices for sbx_to_tif
             can specify separate subindices for each file if nested 2 levels deep; 
             X, Y, and Z sizes must match for all files after indexing.
-        
-        to32: bool
-            whether to save in float32 format (default is to keep as uint16)
 
-        channel, plane, chunk_size: see sbx_to_tif
+        to32, channel, plane, chunk_size: see sbx_to_tif
     """
     if subindices is None:
         subindices = slice(None)
@@ -200,13 +186,15 @@ def sbx_chain_to_tif(filenames: list[str], fileout: str, subindices: Optional[Ch
             raise Exception('Plane cannot be specified for 2D data')
         save_shape = save_shape[:3]
 
+    # Add a '.tif' extension if not already present
     extension = os.path.splitext(fileout)[1].lower()
     if extension not in ['.tif', '.tiff', '.btf']:
         fileout = fileout + '.tif'
 
     dtype = np.float32 if to32 else np.uint16
+    # Make the file first so we can pass in bigtiff and imagej options; otherwise could create using tifffile.memmap directly
     tifffile.imwrite(fileout, data=None, shape=save_shape, bigtiff=bigtiff, imagej=imagej,
-                     dtype=dtype, photometric='MINISBLACK')
+                     dtype=dtype, photometric='MINISBLACK', align=tifffile.TIFF.ALLOCATIONGRANULARITY)
 
     # Now convert each file
     tif_memmap = tifffile.memmap(fileout, series=0)
@@ -215,7 +203,7 @@ def sbx_chain_to_tif(filenames: list[str], fileout: str, subindices: Optional[Ch
         _sbxread_helper(filename, subindices=subind, channel=channel, out=tif_memmap[offset:offset+file_N], plane=plane, chunk_size=chunk_size)
         offset += file_N
 
-    tif_memmap._mmap.close()
+    del tif_memmap  # important to make sure file is closed (on Windows)
 
 
 def sbx_shape(filename: str, info: Optional[dict] = None) -> tuple[int, int, int, int, int]:
@@ -452,39 +440,40 @@ def _sbxread_helper(filename: str, subindices: FileSubindices = slice(None), cha
     if out is not None and out.shape != save_shape:
         raise Exception('Existing memmap is the wrong shape to hold loaded data')
 
-    # Open File
-    with open(filename + '.sbx') as sbx_file:
-        sbx_mmap = np.memmap(sbx_file, mode='r', dtype='uint16', shape=data_shape, order='F')
-        sbx_mmap = np.transpose(sbx_mmap, (0, 4, 2, 1, 3))  # to (chans, frames, Y, X, Z)
-        sbx_mmap = sbx_mmap[channel]
-        if not is3D:  # squeeze out singleton plane dim
-            sbx_mmap = sbx_mmap[..., 0]
-        elif plane is not None:  # select plane relative to subindices
-            sbx_mmap = sbx_mmap[..., subindices[-1][plane]]
-            subindices = subindices[:-1]
-        inds = np.ix_(*subindices)
-
-        if chunk_size is None:
-            # load a contiguous block all at once
-            chunk_size = N
-        elif out is None:
-            # Pre-allocate destination when loading in chunks
-            out = np.empty(save_shape, dtype=np.uint16)
-
-        n_remaining = N
-        offset = 0
-        while n_remaining > 0:
-            this_chunk_size = min(n_remaining, chunk_size)
-            # Note: SBX files store the values strangely, it's necessary to invert each uint16 value to get the correct ones
-            chunk = sbx_mmap[(inds[0][offset:offset+this_chunk_size],) + inds[1:]]
-            np.invert(chunk, out=chunk)  # avoid copying, may be large
-
-            if out is None:
-                out = chunk  # avoid copying when loading all data
-            else:
-                out[offset:offset+this_chunk_size] = chunk
-            n_remaining -= this_chunk_size
-            offset += this_chunk_size
+    # Read from .sbx file, using memmap to avoid loading until necessary
+    sbx_mmap = np.memmap(filename + '.sbx', mode='r', dtype='uint16', shape=data_shape, order='F')
+    sbx_mmap = np.transpose(sbx_mmap, (0, 4, 2, 1, 3))  # to (chans, frames, Y, X, Z)
+    sbx_mmap = sbx_mmap[channel]
+    if not is3D:  # squeeze out singleton plane dim
+        sbx_mmap = sbx_mmap[..., 0]
+    elif plane is not None:  # select plane relative to subindices
+        sbx_mmap = sbx_mmap[..., subindices[-1][plane]]
+        subindices = subindices[:-1]
+    inds = np.ix_(*subindices)
+
+    if chunk_size is None:
+        # load a contiguous block all at once
+        chunk_size = N
+    elif out is None:
+        # Pre-allocate destination when loading in chunks
+        out = np.empty(save_shape, dtype=np.uint16)
+
+    n_remaining = N
+    offset = 0
+    while n_remaining > 0:
+        this_chunk_size = min(n_remaining, chunk_size)
+        # Note: SBX files store the values strangely, it's necessary to invert each uint16 value to get the correct ones
+        chunk = sbx_mmap[(inds[0][offset:offset+this_chunk_size],) + inds[1:]]
+        np.invert(chunk, out=chunk)  # avoid copying, may be large
+
+        if out is None:
+            out = chunk  # avoid copying when loading all data
+        else:
+            out[offset:offset+this_chunk_size] = chunk
+        n_remaining -= this_chunk_size
+        offset += this_chunk_size
+
+    del sbx_mmap  # Important to close file (on Windows)
 
     if isinstance(out, np.memmap):
         out.flush()    

From 6c836c028e1d72d4f52b6251714d5eb85386ebcc Mon Sep 17 00:00:00 2001
From: Ethan Blackwood <ethanbblackwood@gmail.com>
Date: Fri, 12 Apr 2024 12:00:49 -0400
Subject: [PATCH 6/6] Add comment about not viewing memmap and rename Ns

---
 caiman/utils/sbx_utils.py | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/caiman/utils/sbx_utils.py b/caiman/utils/sbx_utils.py
index 5b9881323..f349b9d0b 100644
--- a/caiman/utils/sbx_utils.py
+++ b/caiman/utils/sbx_utils.py
@@ -177,9 +177,9 @@ def sbx_chain_to_tif(filenames: list[str], fileout: str, subindices: Optional[Ch
 
     # Allocate empty tif file with the final shape (do this first to ensure any existing file is overwritten)
     common_shape = tuple(map(int, all_shapes_out[0, 1:]))
-    Ns = list(map(int, all_shapes_out[:, 0]))
-    N = sum(Ns)
-    save_shape = (N,) + common_shape
+    all_n_frames_out = list(map(int, all_shapes_out[:, 0]))
+    n_frames_out = sum(all_n_frames_out)
+    save_shape = (n_frames_out,) + common_shape
 
     if plane is not None:
         if len(save_shape) < 4:
@@ -199,7 +199,7 @@ def sbx_chain_to_tif(filenames: list[str], fileout: str, subindices: Optional[Ch
     # Now convert each file
     tif_memmap = tifffile.memmap(fileout, series=0)
     offset = 0
-    for filename, subind, file_N in zip(filenames, subindices, Ns):
+    for filename, subind, file_N in zip(filenames, subindices, all_n_frames_out):
         _sbxread_helper(filename, subindices=subind, channel=channel, out=tif_memmap[offset:offset+file_N], plane=plane, chunk_size=chunk_size)
         offset += file_N
 
@@ -273,7 +273,7 @@ def sbx_shape(filename: str, info: Optional[dict] = None) -> tuple[int, int, int
     else:
         info['max_idx'] = filesize / info['bytesPerBuffer'] * factor - 1
 
-    N = info['max_idx'] + 1    # Last frame
+    n_frames = info['max_idx'] + 1    # Last frame
 
     # Determine whether we are looking at a z-stack
     # Only consider optotune z-stacks - knobby schedules have too many possibilities and
@@ -282,9 +282,9 @@ def sbx_shape(filename: str, info: Optional[dict] = None) -> tuple[int, int, int
         n_planes = info['otparam'][2]
     else:
         n_planes = 1
-    N //= n_planes
+    n_frames //= n_planes
 
-    x = (int(info['nChan']), int(info['sz'][1]), int(info['recordsPerBuffer']), int(n_planes), int(N))
+    x = (int(info['nChan']), int(info['sz'][1]), int(info['recordsPerBuffer']), int(n_planes), int(n_frames))
     return x
 
 
@@ -431,7 +431,7 @@ def _sbxread_helper(filename: str, subindices: FileSubindices = slice(None), cha
         raise Exception('Invalid scanbox metadata')
 
     save_shape, subindices = _get_output_shape(data_shape, subindices)
-    N = save_shape[0]
+    n_frames_out = save_shape[0]
     if plane is not None:
         if len(save_shape) < 4:
             raise Exception('Plane cannot be specified for 2D data')
@@ -453,17 +453,19 @@ def _sbxread_helper(filename: str, subindices: FileSubindices = slice(None), cha
 
     if chunk_size is None:
         # load a contiguous block all at once
-        chunk_size = N
+        chunk_size = n_frames_out
     elif out is None:
         # Pre-allocate destination when loading in chunks
         out = np.empty(save_shape, dtype=np.uint16)
 
-    n_remaining = N
+    n_remaining = n_frames_out
     offset = 0
     while n_remaining > 0:
         this_chunk_size = min(n_remaining, chunk_size)
-        # Note: SBX files store the values strangely, it's necessary to invert each uint16 value to get the correct ones
+        # Note: important to copy the data here instead of making a view,
+        # so the memmap can be closed (achieved by advanced indexing)
         chunk = sbx_mmap[(inds[0][offset:offset+this_chunk_size],) + inds[1:]]
+        # Note: SBX files store the values strangely, it's necessary to invert each uint16 value to get the correct ones
         np.invert(chunk, out=chunk)  # avoid copying, may be large
 
         if out is None: