handle problems that can come up with auto-percentile calculations fo…

…r dff
flatironinstitute · Feb 21, 2024 · 8126b8a · 8126b8a
1 parent ca7012b
commit 8126b8a
Showing 1 changed file with 31 additions and 16 deletions.
diff --git a/caiman/utils/stats.py b/caiman/utils/stats.py
@@ -169,6 +169,8 @@ def df_percentile(inputData, axis=None):
     Extracting the percentile of the data where the mode occurs and its value.
     Used to determine the filtering level for DF/F extraction. Note that
     computation can be inaccurate for short traces.
+
+    If errors occur, will just return fallback value of median (50% percentile)
     """
     if axis is not None:
 
@@ -181,24 +183,37 @@ def fnc(x):
     else:
         # Create the function that we can use for the half-sample mode
         err = True
-        while err:
+        err_count = 0
+        max_err_count = 10
+        while err==True and err_count < max_err_count:
             try:
                 bandwidth, mesh, density, cdf = kde(inputData)
                 err = False
-            except:
-                logging.warning('Percentile computation failed. Duplicating ' + 'and trying again.')
+            except ValueError:
+                err_count += 1
+                logging.warning(f"kde() failure {err_count}. Concatenating traces and recomputing.")
                 if not isinstance(inputData, list):
                     inputData = inputData.tolist()
                 inputData += inputData
 
-        data_prct = cdf[np.argmax(density)] * 100
-        val = mesh[np.argmax(density)]
+        # There are three ways the above calculation can go wrong. 
+        # For each case: just use median.
+        # if cdf was never defined, it means kde never ran without error
+        try:
+            cdf
+        except NameError:
+            logging.warning('Max err count reached. Reverting to median.')
+            data_prct = 50
+            val = np.median(np.array(inputData))
+        else:
+            data_prct = cdf[np.argmax(density)] * 100
+            val = mesh[np.argmax(density)]
+
         if data_prct >= 100 or data_prct < 0:
-            logging.warning('Invalid percentile computed possibly due ' + 'short trace. Duplicating and recomuputing.')
-            if not isinstance(inputData, list):
-                inputData = inputData.tolist()
-            inputData *= 2
-            err = True
+            logging.warning('Invalid percentile (<0 or >100) computed. Reverting to median.')
+            data_prct = 50
+            val = np.median(np.array(inputData))
+
         if np.isnan(data_prct):
             logging.warning('NaN percentile computed. Reverting to median.')
             data_prct = 50
@@ -215,8 +230,6 @@ def fnc(x):
 Daniel B. Smith, PhD
 Updated 1-23-2013
 """
-
-
 def kde(data, N=None, MIN=None, MAX=None):
 
     # Parameters to set up the mesh on which to calculate
@@ -241,12 +254,13 @@ def kde(data, N=None, MIN=None, MAX=None):
     SqDCTData = (DCTData[1:] / 2)**2
 
     # The fixed point calculation finds the bandwidth = t_star
+    # fixed_point is function defined below
     guess = 0.1
     try:
-        t_star = scipy.optimize.brentq(fixed_point, 0, guess, args=(M, I, SqDCTData))
-    except ValueError:
-        print('Oops!')
-        return None
+       t_star = scipy.optimize.brentq(fixed_point, 0, guess, args=(M, I, SqDCTData))
+    except ValueError as e:
+       # 
+       raise ValueError(f"Unable to find root: {str(e)}")
 
     # Smooth the DCTransformed data using t_star
     SmDCTData = DCTData * np.exp(-np.arange(N)**2 * np.pi**2 * t_star / 2)
@@ -262,6 +276,7 @@ def kde(data, N=None, MIN=None, MAX=None):
 
 
 def fixed_point(t, M, I, a2):
+    # TODO: document this
     l = 7
     I = np.float64(I)
     M = np.float64(M)