From 8126b8aed8fb293e3f0e8cd622019841fc5b3e42 Mon Sep 17 00:00:00 2001 From: EricThomson Date: Wed, 21 Feb 2024 18:21:47 -0500 Subject: [PATCH 1/2] handle problems that can come up with auto-percentile calculations for dff --- caiman/utils/stats.py | 47 ++++++++++++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 16 deletions(-) diff --git a/caiman/utils/stats.py b/caiman/utils/stats.py index 4264ab7c2..94d75c46b 100644 --- a/caiman/utils/stats.py +++ b/caiman/utils/stats.py @@ -169,6 +169,8 @@ def df_percentile(inputData, axis=None): Extracting the percentile of the data where the mode occurs and its value. Used to determine the filtering level for DF/F extraction. Note that computation can be inaccurate for short traces. + + If errors occur, will just return fallback value of median (50% percentile) """ if axis is not None: @@ -181,24 +183,37 @@ def fnc(x): else: # Create the function that we can use for the half-sample mode err = True - while err: + err_count = 0 + max_err_count = 10 + while err==True and err_count < max_err_count: try: bandwidth, mesh, density, cdf = kde(inputData) err = False - except: - logging.warning('Percentile computation failed. Duplicating ' + 'and trying again.') + except ValueError: + err_count += 1 + logging.warning(f"kde() failure {err_count}. Concatenating traces and recomputing.") if not isinstance(inputData, list): inputData = inputData.tolist() inputData += inputData - data_prct = cdf[np.argmax(density)] * 100 - val = mesh[np.argmax(density)] + # There are three ways the above calculation can go wrong. + # For each case: just use median. + # if cdf was never defined, it means kde never ran without error + try: + cdf + except NameError: + logging.warning('Max err count reached. Reverting to median.') + data_prct = 50 + val = np.median(np.array(inputData)) + else: + data_prct = cdf[np.argmax(density)] * 100 + val = mesh[np.argmax(density)] + if data_prct >= 100 or data_prct < 0: - logging.warning('Invalid percentile computed possibly due ' + 'short trace. Duplicating and recomuputing.') - if not isinstance(inputData, list): - inputData = inputData.tolist() - inputData *= 2 - err = True + logging.warning('Invalid percentile (<0 or >100) computed. Reverting to median.') + data_prct = 50 + val = np.median(np.array(inputData)) + if np.isnan(data_prct): logging.warning('NaN percentile computed. Reverting to median.') data_prct = 50 @@ -215,8 +230,6 @@ def fnc(x): Daniel B. Smith, PhD Updated 1-23-2013 """ - - def kde(data, N=None, MIN=None, MAX=None): # Parameters to set up the mesh on which to calculate @@ -241,12 +254,13 @@ def kde(data, N=None, MIN=None, MAX=None): SqDCTData = (DCTData[1:] / 2)**2 # The fixed point calculation finds the bandwidth = t_star + # fixed_point is function defined below guess = 0.1 try: - t_star = scipy.optimize.brentq(fixed_point, 0, guess, args=(M, I, SqDCTData)) - except ValueError: - print('Oops!') - return None + t_star = scipy.optimize.brentq(fixed_point, 0, guess, args=(M, I, SqDCTData)) + except ValueError as e: + # + raise ValueError(f"Unable to find root: {str(e)}") # Smooth the DCTransformed data using t_star SmDCTData = DCTData * np.exp(-np.arange(N)**2 * np.pi**2 * t_star / 2) @@ -262,6 +276,7 @@ def kde(data, N=None, MIN=None, MAX=None): def fixed_point(t, M, I, a2): + # TODO: document this l = 7 I = np.float64(I) M = np.float64(M) From dc8237cffbce2d7c9ac47c242fef4b1c5c83f90f Mon Sep 17 00:00:00 2001 From: EricThomson Date: Wed, 21 Feb 2024 20:37:52 -0500 Subject: [PATCH 2/2] updated based on review --- caiman/utils/stats.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/caiman/utils/stats.py b/caiman/utils/stats.py index 94d75c46b..a5688fe36 100644 --- a/caiman/utils/stats.py +++ b/caiman/utils/stats.py @@ -170,7 +170,7 @@ def df_percentile(inputData, axis=None): Used to determine the filtering level for DF/F extraction. Note that computation can be inaccurate for short traces. - If errors occur, will just return fallback value of median (50% percentile) + If errors occur, return fallback values """ if axis is not None: @@ -185,7 +185,7 @@ def fnc(x): err = True err_count = 0 max_err_count = 10 - while err==True and err_count < max_err_count: + while err and err_count < max_err_count: try: bandwidth, mesh, density, cdf = kde(inputData) err = False @@ -226,7 +226,8 @@ def fnc(x): An implementation of the kde bandwidth selection method outlined in: Z. I. Botev, J. F. Grotowski, and D. P. Kroese. Kernel density estimation via diffusion. The Annals of Statistics, 38(5):2916-2957, 2010. -Based on the implementation in Matlab by Zdravko Botev. +Based on the implementation in Matlab by Zdravko Botev: +See: https://www.mathworks.com/matlabcentral/fileexchange/14034-kernel-density-estimator Daniel B. Smith, PhD Updated 1-23-2013 """ @@ -253,8 +254,7 @@ def kde(data, N=None, MIN=None, MAX=None): I = [iN * iN for iN in range(1, N)] SqDCTData = (DCTData[1:] / 2)**2 - # The fixed point calculation finds the bandwidth = t_star - # fixed_point is function defined below + # The fixed point calculation finds the bandwidth = t_star for smoothing (see paper cited above) guess = 0.1 try: t_star = scipy.optimize.brentq(fixed_point, 0, guess, args=(M, I, SqDCTData)) @@ -276,7 +276,8 @@ def kde(data, N=None, MIN=None, MAX=None): def fixed_point(t, M, I, a2): - # TODO: document this + # TODO: document this: + # From Matlab code: this implements the function t-zeta*gamma^[l](t) l = 7 I = np.float64(I) M = np.float64(M)