Check all clusters in case they're not in order

flatironinstitute · Oct 12, 2020 · ea58975 · ea58975
1 parent 71a75c0
commit ea58975
Showing 1 changed file with 2 additions and 1 deletion.
diff --git a/inferelator_prior/processor/prior.py b/inferelator_prior/processor/prior.py
@@ -321,7 +321,8 @@ def _find_outliers_dbscan(tf_data, max_sparsity=0.05):
     labels = DBSCAN(min_samples=max(int(scores.size * 0.001), 10), eps=1, n_jobs=None)\
         .fit_predict(scores.reshape(-1, 1), sample_weight=weights)
 
-    min_score = np.min(scores[labels == np.unique(labels)[-1]])
+    largest_cluster = np.argmax(np.array([np.min(scores[labels == i]) for i in range(np.max(labels) + 1)]))
+    min_score = np.min(scores[labels == largest_cluster])
 
     # If the largest cluster is less than max_sparsity, keep it and any outliers greater than it
     keep_all_values = tf_data >= min_score