diff --git a/.gitignore b/.gitignore index 1d06306d5..147a0db9c 100644 --- a/.gitignore +++ b/.gitignore @@ -44,6 +44,7 @@ __pycache__/ *.pyc *.npy *.mat +!testdata/*.mat *.hdf5 *.mmap *.npz diff --git a/caiman/base/movies.py b/caiman/base/movies.py index e94ddd218..665ae0015 100644 --- a/caiman/base/movies.py +++ b/caiman/base/movies.py @@ -34,6 +34,7 @@ import caiman.base.traces import caiman.mmapping import caiman.summary_images +import caiman.utils.sbx_utils import caiman.utils.visualization try: @@ -1310,7 +1311,7 @@ def load(file_name: Union[str, list[str]], logging.error('movies.py:load(): channel parameter is not supported for single movie input') if os.path.exists(file_name): - _, extension = os.path.splitext(file_name)[:2] + basename, extension = os.path.splitext(file_name) extension = extension.lower() if extension == '.mat': @@ -1575,10 +1576,11 @@ def load(file_name: Union[str, list[str]], elif extension == '.sbx': logging.debug('sbx') - if subindices is not None: - return movie(sbxreadskip(file_name[:-4], subindices), fr=fr).astype(outtype) - else: - return movie(sbxread(file_name[:-4], k=0, n_frames=np.inf), fr=fr).astype(outtype) + meta_data = caiman.utils.sbx_utils.sbx_meta_data(basename) + input_arr = caiman.utils.sbx_utils.sbxread(basename, subindices) + return movie(input_arr, fr=fr, + file_name=os.path.split(file_name)[-1], + meta_data=meta_data).astype(outtype) elif extension == '.sima': raise Exception("movies.py:load(): FATAL: sima support was removed in 1.9.8") @@ -1706,229 +1708,6 @@ def _load_behavior(file_name:str) -> Any: file_name=os.path.split(file_name)[-1], meta_data=meta_data) -#### -# TODO: Consider pulling these functions that work with .mat files into a separate file - - -def loadmat_sbx(filename: str): - """ - this wrapper should be called instead of directly calling spio.loadmat - - It solves the problem of not properly recovering python dictionaries - from mat files. It calls the function check keys to fix all entries - which are still mat-objects - """ - data_ = scipy.io.loadmat(filename, struct_as_record=False, squeeze_me=True) - _check_keys(data_) - return data_ - - -def _check_keys(checkdict:dict) -> None: - """ - checks if entries in dictionary are mat-objects. If yes todict is called to change them to nested dictionaries. - Modifies its parameter in-place. - """ - - for key in checkdict: - if isinstance(checkdict[key], scipy.io.matlab.mio5_params.mat_struct): - checkdict[key] = _todict(checkdict[key]) - - -def _todict(matobj) -> dict: - """ - A recursive function which constructs from matobjects nested dictionaries - """ - - ret = {} - for strg in matobj._fieldnames: - elem = matobj.__dict__[strg] - if isinstance(elem, scipy.io.matlab.mio5_params.mat_struct): - ret[strg] = _todict(elem) - else: - ret[strg] = elem - return ret - - -def sbxread(filename: str, k: int = 0, n_frames=np.inf) -> np.ndarray: - """ - Args: - filename: str - filename should be full path excluding .sbx - """ - # Check if contains .sbx and if so just truncate - if '.sbx' in filename: - filename = filename[:-4] - - # Load info - info = loadmat_sbx(filename + '.mat')['info'] - - # Defining number of channels/size factor - if info['channels'] == 1: - info['nChan'] = 2 - factor = 1 - elif info['channels'] == 2: - info['nChan'] = 1 - factor = 2 - elif info['channels'] == 3: - info['nChan'] = 1 - factor = 2 - - # Determine number of frames in whole file - max_idx = os.path.getsize(filename + '.sbx') / info['recordsPerBuffer'] / info['sz'][1] * factor / 4 - 1 - - # Parameters - N = max_idx + 1 # Last frame - N = np.minimum(N, n_frames) - - nSamples = info['sz'][1] * info['recordsPerBuffer'] * 2 * info['nChan'] - - # Open File - fo = open(filename + '.sbx') - - # Note: SBX files store the values strangely, its necessary to subtract the values from the max int16 to get the correct ones - fo.seek(k * nSamples, 0) - ii16 = np.iinfo(np.uint16) - x = ii16.max - np.fromfile(fo, dtype='uint16', count=int(nSamples / 2 * N)) - x = x.reshape((int(info['nChan']), int(info['sz'][1]), int(info['recordsPerBuffer']), int(N)), order='F') - - x = x[0, :, :, :] - - fo.close() - - return x.transpose([2, 1, 0]) - - -def sbxreadskip(filename: str, subindices: slice) -> np.ndarray: - """ - Args: - filename: str - filename should be full path excluding .sbx - - slice: pass a slice to slice along the last dimension - """ - # Check if contains .sbx and if so just truncate - if '.sbx' in filename: - filename = filename[:-4] - - # Load info - info = loadmat_sbx(filename + '.mat')['info'] - - # Defining number of channels/size factor - if info['channels'] == 1: - info['nChan'] = 2 - factor = 1 - elif info['channels'] == 2: - info['nChan'] = 1 - factor = 2 - elif info['channels'] == 3: - info['nChan'] = 1 - factor = 2 - - # Determine number of frames in whole file - max_idx = int(os.path.getsize(filename + '.sbx') / info['recordsPerBuffer'] / info['sz'][1] * factor / 4 - 1) - - # Parameters - if isinstance(subindices, slice): - if subindices.start is None: - start = 0 - else: - start = subindices.start - - if subindices.stop is None: - N = max_idx + 1 # Last frame - else: - N = np.minimum(subindices.stop, max_idx + 1).astype(int) - - if subindices.step is None: - skip = 1 - else: - skip = subindices.step - - iterable_elements = range(start, N, skip) - - else: - - N = len(subindices) - iterable_elements = subindices - skip = 0 - - N_time = len(list(iterable_elements)) - - nSamples = info['sz'][1] * info['recordsPerBuffer'] * 2 * info['nChan'] - assert nSamples >= 0 - - # Open File - fo = open(filename + '.sbx') - - # Note: SBX files store the values strangely, its necessary to subtract the values from the max int16 to get the correct ones - - counter = 0 - - if skip == 1: - # Note: SBX files store the values strangely, its necessary to subtract the values from the max int16 to get the correct ones - assert start * nSamples > 0 - fo.seek(start * nSamples, 0) - ii16 = np.iinfo(np.uint16) - x = ii16.max - np.fromfile(fo, dtype='uint16', count=int(nSamples / 2 * (N - start))) - x = x.reshape((int(info['nChan']), int(info['sz'][1]), int(info['recordsPerBuffer']), int(N - start)), - order='F') - - x = x[0, :, :, :] - - else: - for k in iterable_elements: - assert k >= 0 - if counter % 100 == 0: - logging.debug(f'Reading Iteration: {k}') - fo.seek(k * nSamples, 0) - ii16 = np.iinfo(np.uint16) - tmp = ii16.max - \ - np.fromfile(fo, dtype='uint16', count=int(nSamples / 2 * 1)) - - tmp = tmp.reshape((int(info['nChan']), int(info['sz'][1]), int(info['recordsPerBuffer'])), order='F') - if counter == 0: - x = np.zeros((tmp.shape[0], tmp.shape[1], tmp.shape[2], N_time)) - - x[:, :, :, counter] = tmp - counter += 1 - - x = x[0, :, :, :] - fo.close() - - return x.transpose([2, 1, 0]) - - -def sbxshape(filename: str) -> tuple[int, int, int]: - """ - Args: - filename should be full path excluding .sbx - """ - # TODO: Document meaning of return values - - # Check if contains .sbx and if so just truncate - if '.sbx' in filename: - filename = filename[:-4] - - # Load info - info = loadmat_sbx(filename + '.mat')['info'] - - # Defining number of channels/size factor - if info['channels'] == 1: - info['nChan'] = 2 - factor = 1 - elif info['channels'] == 2: - info['nChan'] = 1 - factor = 2 - elif info['channels'] == 3: - info['nChan'] = 1 - factor = 2 - - # Determine number of frames in whole file - max_idx = os.path.getsize(filename + '.sbx') / info['recordsPerBuffer'] / info['sz'][1] * factor / 4 - 1 - N = max_idx + 1 # Last frame - x = (int(info['sz'][1]), int(info['recordsPerBuffer']), int(N)) - return x - def to_3D(mov2D:np.ndarray, shape:tuple, order='F') -> np.ndarray: """ @@ -2285,23 +2064,9 @@ def get_file_size(file_name, var_name_hdf5:str='mov') -> tuple[tuple, Union[int, raise Exception('Variable not found. Use one of the above') T, dims = siz[0], siz[1:] elif extension in ('.sbx'): - info = loadmat_sbx(file_name[:-4]+ '.mat')['info'] - dims = tuple((info['sz']).astype(int)) - # Defining number of channels/size factor - if info['channels'] == 1: - info['nChan'] = 2 - factor = 1 - elif info['channels'] == 2: - info['nChan'] = 1 - factor = 2 - elif info['channels'] == 3: - info['nChan'] = 1 - factor = 2 - - # Determine number of frames in whole file - T = int(os.path.getsize( - file_name[:-4] + '.sbx') / info['recordsPerBuffer'] / info['sz'][1] * factor / 4 - 1) - + shape = caiman.utils.sbx_utils.sbx_shape(file_name[:-4]) + T = shape[-1] + dims = (shape[2], shape[1]) else: raise Exception('Unknown file type') dims = tuple(dims) diff --git a/caiman/tests/test_sbx.py b/caiman/tests/test_sbx.py new file mode 100644 index 000000000..f71f9e5e4 --- /dev/null +++ b/caiman/tests/test_sbx.py @@ -0,0 +1,150 @@ +#!/usr/bin/env python + +import numpy as np +import numpy.testing as npt +import os + +import caiman as cm +from caiman.paths import caiman_datadir +from caiman.utils import sbx_utils + +testdata_path = os.path.join(caiman_datadir(), 'testdata') + +def subinds_to_ix(subinds, array_shape): + """Helper to avoid advanced slicing""" + fixed_subinds = [range(s)[inds] if isinstance(inds, slice) else inds + for inds, s in zip(subinds, array_shape)] + return np.ix_(*fixed_subinds) + + +def test_load_2d(): + file_2d = os.path.join(testdata_path, '2d_sbx.sbx') + data_2d = sbx_utils.sbxread(file_2d) + meta_2d = sbx_utils.sbx_meta_data(file_2d) + + assert data_2d.ndim == 3, 'Loaded 2D data has wrong dimensionality' + assert data_2d.shape == (10, 512, 796), 'Loaded 2D data has wrong shape' + assert data_2d.shape == (meta_2d['num_frames'], *meta_2d['frame_size']), 'Shape in metadata does not match loaded data' + npt.assert_array_equal(data_2d[0, 0, :10], [712, 931, 1048, 825, 1383, 882, 601, 798, 1022, 966], 'Loaded 2D data has wrong values') + + data_2d_movie = cm.load(file_2d) + assert data_2d_movie.ndim == data_2d.ndim, 'Movie loaded with cm.load has wrong dimensionality' + assert data_2d_movie.shape == data_2d.shape, 'Movie loaded with cm.load has wrong shape' + npt.assert_array_almost_equal(data_2d_movie, data_2d, err_msg='Movie loaded with cm.load has wrong values') + + +def test_load_3d(): + file_3d = os.path.join(testdata_path, '3d_sbx_1.sbx') + data_3d = sbx_utils.sbxread(file_3d) + meta_3d = sbx_utils.sbx_meta_data(file_3d) + + assert data_3d.ndim == 4, 'Loaded 3D data has wrong dimensionality' + assert data_3d.shape == (2, 512, 796, 4), 'Loaded 3D data has wrong shape' + assert data_3d.shape == (meta_3d['num_frames'], *meta_3d['frame_size'], meta_3d['num_planes']), 'Shape in metadata does not match loaded data' + npt.assert_array_equal(data_3d[0, 0, :10, 0], [2167, 2525, 1713, 1747, 1887, 1741, 1873, 1244, 1747, 1637], 'Loaded 2D data has wrong values') + + data_3d_movie = cm.load(file_3d, is3D=True) + assert data_3d_movie.ndim == data_3d.ndim, 'Movie loaded with cm.load has wrong dimensionality' + assert data_3d_movie.shape == data_3d.shape, 'Movie loaded with cm.load has wrong shape' + npt.assert_array_almost_equal(data_3d_movie, data_3d, err_msg='Movie loaded with cm.load has wrong values') + + +def test_load_subind(): + file_2d = os.path.join(testdata_path, '2d_sbx.sbx') + data_2d_full = cm.load(file_2d) + + # Just frame subset + data_2d_head3 = cm.load(file_2d, subindices=slice(0, 3)) + npt.assert_array_equal(data_2d_full[:3], data_2d_head3) + data_2d_ds2 = cm.load(file_2d, subindices=slice(0, None, 2)) + npt.assert_array_equal(data_2d_full[::2], data_2d_ds2) + arb_subind = [0, 3, 5, 8] + data_2d_arb = cm.load(file_2d, subindices=arb_subind) + npt.assert_array_equal(data_2d_full[arb_subind], data_2d_arb) + + # Subset on multiple dimensions + subind_t_y = (slice(0, 3), [0, *range(2, 512)]) + data_2d_t_y = cm.load(file_2d, subindices=subind_t_y) + npt.assert_array_equal(data_2d_full[subinds_to_ix(subind_t_y, data_2d_full.shape)], data_2d_t_y) + subind_t_y_x = subind_t_y + (slice(0, None, 2),) + data_2d_t_y_x = cm.load(file_2d, subindices=subind_t_y_x) + npt.assert_array_equal(data_2d_full[subinds_to_ix(subind_t_y_x, data_2d_full.shape)], data_2d_t_y_x) + + # Check 3D file + file_3d = os.path.join(testdata_path, '3d_sbx_1.sbx') + data_3d_full = cm.load(file_3d, is3D=True) + data_3d_first = cm.load(file_3d, is3D=True, subindices=slice(0, 1)) + npt.assert_array_equal(data_3d_full[:1], data_3d_first) + subind_t_y_x_z = (slice(0, 1), [0, *range(2, 512)], slice(0, None, 2), [0, 2]) + data_3d_t_y_x_z = cm.load(file_3d, is3D=True, subindices=subind_t_y_x_z) + npt.assert_array_equal(data_3d_full[subinds_to_ix(subind_t_y_x_z, data_3d_full.shape)], data_3d_t_y_x_z) + + +def test_sbx_to_tif(): + tif_file = os.path.join(caiman_datadir(), 'temp', 'from_sbx.tif') + try: + file_2d = os.path.join(testdata_path, '2d_sbx.sbx') + data_2d_from_sbx = cm.load(file_2d) + sbx_utils.sbx_to_tif(file_2d, fileout=tif_file) + data_2d_from_tif = cm.load(tif_file) + npt.assert_array_almost_equal(data_2d_from_sbx, data_2d_from_tif, + err_msg='Data do not match when loaded from .sbx vs. .tif') + + file_3d = os.path.join(testdata_path, '3d_sbx_1.sbx') + data_3d_from_sbx = cm.load(file_3d, is3D=True) + sbx_utils.sbx_to_tif(file_3d, fileout=tif_file) + data_3d_from_tif = cm.load(tif_file, is3D=True) + npt.assert_array_almost_equal(data_3d_from_sbx, data_3d_from_tif, + err_msg='3D data do not match when loaded from .sbx vs. .tif') + + # with subindices + subinds = (slice(0, None, 2), [0, 1, 3], slice(None)) + sbx_utils.sbx_to_tif(file_2d, fileout=tif_file, subindices=subinds) + sub_data_from_tif = cm.load(tif_file) + npt.assert_array_almost_equal(data_2d_from_sbx[subinds_to_ix(subinds, data_2d_from_sbx.shape)], sub_data_from_tif) + + finally: + # cleanup + if os.path.isfile(tif_file): + os.remove(tif_file) + + +def test_sbx_chain_to_tif(): + tif_file = os.path.join(caiman_datadir(), 'temp', 'from_sbx.tif') + try: + file_3d_1 = os.path.join(testdata_path, '3d_sbx_1.sbx') + data_3d_1 = sbx_utils.sbxread(file_3d_1) + file_3d_2 = os.path.join(testdata_path, '3d_sbx_2.sbx') + data_3d_2 = sbx_utils.sbxread(file_3d_2) + + # normal chain + sbx_utils.sbx_chain_to_tif([file_3d_1, file_3d_2], fileout=tif_file) + data_chain_tif = cm.load(tif_file, is3D=True) + data_chain_gt = np.concatenate([data_3d_1, data_3d_2], axis=0) + npt.assert_array_almost_equal(data_chain_tif, data_chain_gt, + err_msg='Tif from chain does not match expected') + + # matching subindices + sbx_utils.sbx_chain_to_tif([file_3d_1, file_3d_2], fileout=tif_file, + subindices=(slice(None), slice(0, None, 2))) + data_chain_tif = cm.load(tif_file, is3D=True) + data_chain_gt = data_chain_gt[:, ::2] + npt.assert_array_almost_equal(data_chain_tif, data_chain_gt, + err_msg='Tif from chain with subindices does not match expected') + + # non-matching subindices with compatible shapes + subinds_1 = (slice(None), [0, 1, 3], slice(0, None, 2), [0, 2]) + subinds_2 = (slice(1, None), [-4, -2, -1], slice(1, None, 2), [1, 3]) + sbx_utils.sbx_chain_to_tif([file_3d_1, file_3d_2], fileout=tif_file, + subindices=[subinds_1, subinds_2]) + data_chain_tif = cm.load(tif_file, is3D=True) + data_chain_gt = np.concatenate([data_3d_1[subinds_to_ix(subinds_1, data_3d_1.shape)], + data_3d_2[subinds_to_ix(subinds_2, data_3d_2.shape)]], axis=0) + npt.assert_array_almost_equal(data_chain_tif, data_chain_gt, + err_msg='Tif from chain with non-matching subindices does not match expected') + + finally: + # cleanup + if os.path.isfile(tif_file): + os.remove(tif_file) + \ No newline at end of file diff --git a/caiman/utils/sbx_utils.py b/caiman/utils/sbx_utils.py new file mode 100644 index 000000000..05f0f05a4 --- /dev/null +++ b/caiman/utils/sbx_utils.py @@ -0,0 +1,525 @@ +#!/usr/bin/env python + +""" +Utility functions for Neurolabware Scanbox files (.sbx) +""" + +import logging +import numpy as np +import os +import scipy +import tifffile +from typing import Iterable + +DimSubindices = Iterable[int] | slice +FileSubindices = DimSubindices | Iterable[DimSubindices] # can have inds for just frames or also for y, x, z +ChainSubindices = FileSubindices | Iterable[FileSubindices] # one to apply to each file, or separate for each file + +def loadmat_sbx(filename: str) -> dict: + """ + this wrapper should be called instead of directly calling spio.loadmat + + It solves the problem of not properly recovering python dictionaries + from mat files. It calls the function check keys to fix all entries + which are still mat-objects + """ + data_ = scipy.io.loadmat(filename, struct_as_record=False, squeeze_me=True) + _check_keys(data_) + return data_ + + +def _check_keys(checkdict: dict) -> None: + """ + checks if entries in dictionary are mat-objects. If yes todict is called to change them to nested dictionaries. + Modifies its parameter in-place. + """ + + for key in checkdict: + if isinstance(checkdict[key], scipy.io.matlab.mio5_params.mat_struct): + checkdict[key] = _todict(checkdict[key]) + + +def _todict(matobj) -> dict: + """ + A recursive function which constructs from matobjects nested dictionaries + """ + + ret = {} + for strg in matobj._fieldnames: + elem = matobj.__dict__[strg] + if isinstance(elem, scipy.io.matlab.mio5_params.mat_struct): + ret[strg] = _todict(elem) + else: + ret[strg] = elem + return ret + + +def sbxread(filename: str, subindices: FileSubindices | None = slice(None), channel: int | None = None) -> np.ndarray: + """ + Load frames of an .sbx file into a new NumPy array + + Args: + filename: str + filename should be full path excluding .sbx + + subindices: slice | array-like | tuple[slice | array-like, ...] + which frames to read (defaults to all) + if a tuple of non-scalars, specifies slices of up to 4 dimensions in the order (frame, Y, X, Z). + + channel: int | None + which channel to save (required if data has >1 channel) + """ + if subindices is None: + subindices = slice(None) + return _sbxread_helper(filename, subindices=subindices, channel=channel, chunk_size=None) + + +def sbx_to_tif(filename: str, fileout: str | None = None, subindices: FileSubindices | None = slice(None), + channel: int | None = None, chunk_size: int = 1000): + """ + Convert a single .sbx file to .tif format + + Args: + filename: str + filename should be full path excluding .sbx + + fileout: str | None + filename to save (defaults to `filename` with .sbx replaced with .tif) + + subindices: slice | array-like | tuple[slice | array-like, ...] + which frames to read (defaults to all) + if a tuple of non-scalars, specifies slices of up to 4 dimensions in the order (frame, Y, X, Z). + + channel: int | None + which channel to save (required if data has >1 channel) + + chunk_size: int | None + how many frames to load into memory at once (None = load the whole thing) + """ + # Check filenames + basename, ext = os.path.splitext(filename) + if ext == '.sbx': + filename = basename + + if fileout is None: + fileout = filename + '.tif' + else: + # Add a '.tif' extension if not already present + extension = os.path.splitext(fileout)[1].lower() + if extension not in ['.tif', '.tiff', '.btf']: + fileout = fileout + '.tif' + + if subindices is None: + subindices = slice(None) + + # Find the shape of the file we need + save_shape = _get_output_shape(filename, subindices)[0] + + # Open tif as a memmap and copy to it + memmap_tif = tifffile.memmap(fileout, shape=save_shape, dtype='uint16') + _sbxread_helper(filename, subindices=subindices, channel=channel, out=memmap_tif, chunk_size=chunk_size) + + +def sbx_chain_to_tif(filenames: list[str], fileout: str, subindices: ChainSubindices | None = slice(None), + bigtiff: bool | None = True, imagej: bool = False, to32: bool = False, + channel: int | None = None, chunk_size: int = 1000) -> None: + """ + Concatenate a list of sbx files into one tif file. + Args: + filenames: list[str] + each filename should be full path excluding .sbx + + fileout: str + filename to save, including the .tif suffix + + subindices: Iterable[int] | slice | Iterable[Iterable[int] | slice | tuple[Iterable[int] | slice, ...]] + see subindices for sbx_to_tif + can specify separate subindices for each file if nested 2 levels deep; + X, Y, and Z sizes must match for all files after indexing. + + to32: bool + whether to save in float32 format (default is to keep as uint16) + + channel, chunk_size: see sbx_to_tif + """ + if subindices is None: + subindices = slice(None) + + # Validate aggressively to avoid failing after waiting to copy a lot of data + if isinstance(subindices, slice) or np.isscalar(subindices[0]): + # One set of subindices to repeat for each file + subindices = [(subindices,) for _ in filenames] + + elif isinstance(subindices[0], slice) or np.isscalar(subindices[0][0]): + # Interpret this as being an iterable over dimensions to repeat for each file + subindices = [subindices for _ in filenames] + + elif len(subindices) != len(filenames): + # Must be a separate subindices for each file; must match number of files + raise Exception('Length of subindices does not match length of file list') + + # Get the total size of the file + all_shapes = [sbx_shape(file) for file in filenames] + all_shapes_out = np.stack([_get_output_shape(file, subind)[0] for (file, subind) in zip(filenames, subindices)]) + + # Check that X, Y, and Z are consistent + for dimname, shapes in zip(('Y', 'X', 'Z'), all_shapes_out.T[1:]): + if np.any(np.diff(shapes) != 0): + raise Exception(f'Given files have inconsistent shapes in the {dimname} dimension') + + # Check that all files have the requested channel + if channel is None: + if any(shape[0] > 1 for shape in all_shapes): + raise Exception('At least one file has multiple channels; must specify channel') + channel = 0 + elif any(shape[0] <= channel for shape in all_shapes): + raise Exception('Not all files have the requested channel') + + # Allocate empty tif file with the final shape (do this first to ensure any existing file is overwritten) + common_shape = tuple(map(int, all_shapes_out[0, 1:])) + Ns = list(map(int, all_shapes_out[:, 0])) + N = sum(Ns) + save_shape = (N,) + common_shape + + extension = os.path.splitext(fileout)[1].lower() + if extension not in ['.tif', '.tiff', '.btf']: + fileout = fileout + '.tif' + + dtype = np.float32 if to32 else np.uint16 + tifffile.imwrite(fileout, data=None, mode='w', shape=save_shape, bigtiff=bigtiff, imagej=imagej, dtype=dtype) + + # Now convert each file + tif_memmap = tifffile.memmap(fileout, series=0) + offset = 0 + for filename, subind, file_N in zip(filenames, subindices, Ns): + _sbxread_helper(filename, subindices=subind, channel=channel, out=tif_memmap[offset:offset+file_N], chunk_size=chunk_size) + offset += file_N + + +def sbx_shape(filename: str, info: dict | None = None) -> tuple[int, int, int, int, int]: + """ + Args: + filename: str + filename should be full path excluding .sbx + + info: dict | None + info struct for sbx file (to avoid re-loading) + + Output: tuple (chans, X, Y, Z, frames) representing shape of scanbox data + """ + basename, ext = os.path.splitext(filename) + if ext == '.sbx': + filename = basename + + # Load info + if info is None: + info = loadmat_sbx(filename + '.mat')['info'] + + # Image size + if 'sz' not in info: + info['sz'] = np.array([512, 796]) + + # Scan mode (0 indicates bidirectional) + if 'scanmode' in info and info['scanmode'] == 0: + info['recordsPerBuffer'] *= 2 + + # Fold lines (multiple subframes per scan) - basically means the frames are smaller and + # there are more of them than is reflected in the info file + if 'fold_lines' in info and info['fold_lines'] > 0: + if info['recordsPerBuffer'] % info['fold_lines'] != 0: + raise Exception('Non-integer folds per frame not supported') + n_folds = round(info['recordsPerBuffer'] / info['fold_lines']) + info['recordsPerBuffer'] = info['fold_lines'] + info['sz'][0] = info['fold_lines'] + if 'bytesPerBuffer' in info: + info['bytesPerBuffer'] /= n_folds + else: + n_folds = 1 + + # Defining number of channels/size factor + if 'chan' in info: + info['nChan'] = info['chan']['nchan'] + factor = 1 # should not be used + else: + if info['channels'] == 1: + info['nChan'] = 2 + factor = 1 + elif info['channels'] == 2: + info['nChan'] = 1 + factor = 2 + elif info['channels'] == 3: + info['nChan'] = 1 + factor = 2 + + # Determine number of frames in whole file + filesize = os.path.getsize(filename + '.sbx') + if 'scanbox_version' in info: + if info['scanbox_version'] == 2: + info['max_idx'] = filesize / info['recordsPerBuffer'] / info['sz'][1] * factor / 4 - 1 + elif info['scanbox_version'] == 3: + info['max_idx'] = filesize / np.prod(info['sz']) / info['nChan'] / 2 - 1 + else: + raise Exception('Invalid Scanbox version') + else: + info['max_idx'] = filesize / info['bytesPerBuffer'] * factor - 1 + + N = info['max_idx'] + 1 # Last frame + + # Determine whether we are looking at a z-stack + # Only consider optotune z-stacks - knobby schedules have too many possibilities and + # can't determine whether it was actually armed from the saved info. + if info['volscan']: + n_planes = info['otparam'][2] + else: + n_planes = 1 + N //= n_planes + + x = (int(info['nChan']), int(info['sz'][1]), int(info['recordsPerBuffer']), int(n_planes), int(N)) + return x + + +def sbx_meta_data(filename: str): + """ + Get metadata for an .sbx file + Thanks to sbxreader for much of this: https://github.com/jcouto/sbxreader + Field names and values are equivalent to sbxreader as much as possible + + Args: + filename: str + filename should be full path excluding .sbx + """ + basename, ext = os.path.splitext(filename) + if ext == '.sbx': + filename = basename + + info = loadmat_sbx(filename + '.mat')['info'] + + meta_data = dict() + n_chan, n_x, n_y, n_planes, n_frames = sbx_shape(filename, info) + + # Frame rate + # use uncorrected recordsPerBuffer here b/c we want the actual # of resonant scans per frame + meta_data['frame_rate'] = info['resfreq'] / info['recordsPerBuffer'] / n_planes + + # Spatial resolution + magidx = info['config']['magnification'] - 1 + if 'dycal' in info and 'dxcal' in info: + meta_data['um_per_pixel_x'] = info['dxcal'] + meta_data['um_per_pixel_y'] = info['dycal'] + else: + try: + meta_data['um_per_pixel_x'] = info['calibration'][magidx]['x'] + meta_data['um_per_pixel_y'] = info['calibration'][magidx]['y'] + except (KeyError, TypeError): + pass + + # Optotune depths + if n_planes == 1: + meta_data['etl_pos'] = [] + else: + if 'otwave' in info and not isinstance(info['otwave'], int) and len(info['otwave']): + meta_data['etl_pos'] = [a for a in info['otwave']] + + if 'etl_table' in info: + meta_data['etl_pos'] = [a[0] for a in info['etl_table']] + + meta_data['scanning_mode'] = 'bidirectional' if info['scanmode'] == 0 else 'unidirectional' + meta_data['num_frames'] = n_frames + meta_data['num_channels'] = n_chan + meta_data['num_planes'] = n_planes + meta_data['frame_size'] = info['sz'] + meta_data['num_target_frames'] = info['config']['frames'] + meta_data['num_stored_frames'] = info['max_idx'] + 1 + meta_data['stage_pos'] = [info['config']['knobby']['pos']['x'], + info['config']['knobby']['pos']['y'], + info['config']['knobby']['pos']['z']] + meta_data['stage_angle'] = info['config']['knobby']['pos']['a'] + meta_data['filename'] = os.path.basename(filename + '.sbx') + meta_data['resonant_freq'] = info['resfreq'] + meta_data['scanbox_version'] = info['scanbox_version'] + meta_data['records_per_buffer'] = info['recordsPerBuffer'] + meta_data['magnification'] = float(info['config']['magnification_list'][magidx]) + meta_data['objective'] = info['objective'] + + for i in range(4): + if f'pmt{i}_gain' in info['config']: + meta_data[f'pmt{i}_gain'] = info['config'][f'pmt{i}_gain'] + + possible_fields = ['messages', 'event_id', 'usernotes', 'ballmotion', + ('frame', 'event_frame'), ('line', 'event_line')] + + for field in possible_fields: + if isinstance(field, tuple): + field, fieldout = field + else: + fieldout = field + + if field in info: + meta_data[fieldout] = info[field] + + return meta_data + + +def _sbxread_helper(filename: str, subindices: FileSubindices = slice(None), channel: int | None = None, + out: np.memmap | None = None, chunk_size: int | None = 1000) -> np.ndarray: + """ + Load frames of an .sbx file into a new NumPy array, or into the given memory-mapped file. + + Args: + filename: str + filename should be full path excluding .sbx + + subindices: slice | array-like + which frames to read (defaults to all) + + channel: int | None + which channel to save (required if data has >1 channel) + + out: np.memmap | None + existing memory-mapped file to write into (in which case fileout is ignored) + + chunk_size: int | None + how many frames to load into memory at once (None = load the whole thing) + """ + basename, ext = os.path.splitext(filename) + if ext == '.sbx': + filename = basename + + # Normalize so subindices is a list over dimensions + if isinstance(subindices, slice) or np.isscalar(subindices[0]): + subindices = [subindices] + else: + subindices = list(subindices) + + # Load info + info = loadmat_sbx(filename + '.mat')['info'] + + # Get shape (and update info) + data_shape = sbx_shape(filename, info) # (chans, X, Y, Z, frames) + n_chans, n_x, n_y, n_planes, n_frames = data_shape + is3D = n_planes > 1 + + # Fill in missing dimensions in subindices + subindices += [slice(None) for _ in range(max(0, 3 + is3D - len(subindices)))] + + if channel is None: + if n_chans > 1: + raise Exception('Channel input required for multi-chanel data') + channel = 0 + elif channel >= n_chans: + raise Exception(f'Channel input out of range (data has {n_chans} channels)') + + if 'scanbox_version' in info and info['scanbox_version'] == 3: + frame_size = round(np.prod(info['sz']) * info['nChan'] * 2 * n_planes) + else: + frame_size = round(info['sz'][1] * info['recordsPerBuffer'] * 2 * info['nChan'] * n_planes) + if frame_size <= 0: + raise Exception('Invalid scanbox metadata') + + save_shape, subindices = _get_output_shape(data_shape, subindices) + frame_stride = _interpret_subindices(subindices[0], n_frames)[1] + N = save_shape[0] + + if out is not None and out.shape != save_shape: + raise Exception('Existing memmap is the wrong shape to hold loaded data') + + # Open File + with open(filename + '.sbx') as sbx_file: + def get_chunk(n: int): + # Note: SBX files store the values strangely, it's necessary to invert each uint16 value to get the correct ones + chunk = np.invert(np.fromfile(sbx_file, dtype='uint16', count=frame_size//2 * n)) + chunk = np.reshape(chunk, data_shape[:4] + (n,), order='F') # (chans, X, Y, Z, frames) + chunk = np.transpose(chunk, (0, 4, 2, 1, 3))[channel] # to (frames, Y, X, Z) + if not is3D: + chunk = np.squeeze(chunk, axis=3) + return chunk[:, *np.ix_(*subindices[1:])] + + if frame_stride == 1: + sbx_file.seek(subindices[0][0] * frame_size, 0) + + if chunk_size is None: + # load a contiguous block all at once + if out is None: + # avoid allocating an extra array just to copy into it + out = get_chunk(N) + else: + out[:] = get_chunk(N) + else: + # load a contiguous block of data, but chunk it to not use too much memory at once + if out is None: + out = np.empty(save_shape, dtype=np.uint16) + + n_remaining = N + offset = 0 + while n_remaining > 0: + this_chunk_size = min(n_remaining, chunk_size) + chunk = get_chunk(this_chunk_size) + out[offset:offset+this_chunk_size] = chunk + n_remaining -= this_chunk_size + offset += this_chunk_size + + else: + # load one frame at a time, sorting indices for fastest access + if out is None: + out = np.empty(save_shape, dtype=np.uint16) + + for counter, (k, ind) in enumerate(sorted(zip(subindices[0], range(N)))): + if counter % 100 == 0: + logging.debug(f'Reading iteration: {k}') + sbx_file.seek(k * frame_size, 0) + out[ind] = get_chunk(1) + + if isinstance(out, np.memmap): + out.flush() + + return out + + +def _interpret_subindices(subindices: DimSubindices, dim_extent: int) -> tuple[Iterable[int], int]: + """ + Given the extent of a dimension in the corresponding recording, obtain an iterable over subindices + and the step size (or 0 if the step size is not uniform). + """ + if isinstance(subindices, slice): + iterable_elements = range(dim_extent)[subindices] + skip = iterable_elements.step + + if subindices.stop is not None and np.isfinite(subindices.stop) and subindices.stop > dim_extent: + logging.warning(f'Only {dim_extent} frames or pixels available to load ' + + f'(requested up to {subindices.stop})') + else: + iterable_elements = subindices + skip = 0 + + return iterable_elements, skip + + +def _get_output_shape(filename_or_shape: str | tuple[int, ...], subindices: FileSubindices + ) -> tuple[tuple[int, ...], FileSubindices]: + """ + Helper to determine what shape will be loaded/saved given subindices + Also returns back the subindices with slices transformed to ranges, for convenience + """ + if isinstance(subindices, slice) or np.isscalar(subindices[0]): + subindices = (subindices,) + + n_inds = len(subindices) # number of dimensions that are indexed + + if isinstance(filename_or_shape, str): + data_shape = sbx_shape(filename_or_shape) + else: + data_shape = filename_or_shape + + n_x, n_y, n_planes, n_frames = data_shape[1:] + is3D = n_planes > 1 + if n_inds > 3 + is3D: + raise Exception('Too many dimensions in subdindices') + + shape_out = [n_frames, n_y, n_x, n_planes] if is3D else [n_frames, n_y, n_x] + subinds_out = [] + for i, (dim, subind) in enumerate(zip(shape_out, subindices)): + iterable_elements = _interpret_subindices(subind, dim)[0] + shape_out[i] = len(iterable_elements) + subinds_out.append(iterable_elements) + + return tuple(shape_out), tuple(subinds_out) \ No newline at end of file diff --git a/testdata/2d_sbx.mat b/testdata/2d_sbx.mat new file mode 100644 index 000000000..174177def Binary files /dev/null and b/testdata/2d_sbx.mat differ diff --git a/testdata/2d_sbx.sbx b/testdata/2d_sbx.sbx new file mode 100644 index 000000000..b2dca2b2f Binary files /dev/null and b/testdata/2d_sbx.sbx differ diff --git a/testdata/3d_sbx_1.mat b/testdata/3d_sbx_1.mat new file mode 100644 index 000000000..9c6bafa53 Binary files /dev/null and b/testdata/3d_sbx_1.mat differ diff --git a/testdata/3d_sbx_1.sbx b/testdata/3d_sbx_1.sbx new file mode 100644 index 000000000..9ee2d70b2 Binary files /dev/null and b/testdata/3d_sbx_1.sbx differ diff --git a/testdata/3d_sbx_2.mat b/testdata/3d_sbx_2.mat new file mode 100644 index 000000000..2a55b6e01 Binary files /dev/null and b/testdata/3d_sbx_2.mat differ diff --git a/testdata/3d_sbx_2.sbx b/testdata/3d_sbx_2.sbx new file mode 100644 index 000000000..93ab43171 Binary files /dev/null and b/testdata/3d_sbx_2.sbx differ