From bffa4083c46077706e5a3c2a3c7543503a575948 Mon Sep 17 00:00:00 2001 From: Brett Date: Thu, 14 Nov 2024 11:05:35 -0500 Subject: [PATCH 1/4] Revert "Revert "AL-875: Add memory saving options to compute_weight_threshold sigma_clip call" (#315)" This reverts commit 63d52808d1122596225447d06355f09b6126e589, reversing changes made to 5299646bd96ccce9322cda441d9046ba3d65623f. --- src/stcal/outlier_detection/utils.py | 7 ++++++- tests/outlier_detection/test_utils.py | 18 ++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/src/stcal/outlier_detection/utils.py b/src/stcal/outlier_detection/utils.py index 886fac37..ba3bf041 100644 --- a/src/stcal/outlier_detection/utils.py +++ b/src/stcal/outlier_detection/utils.py @@ -77,7 +77,12 @@ def compute_weight_threshold(weight, maskpt): weight_masked = np.ma.array(weight, mask=np.logical_or( mask_zero_weight, mask_nans)) # Sigma-clip the unmasked data - weight_masked = sigma_clip(weight_masked, sigma=3, maxiters=5) + weight_masked = sigma_clip(weight_masked, + sigma=3, + maxiters=5, + masked=False, + copy=False, + ) mean_weight = np.mean(weight_masked) # Mask pixels where weight falls below maskpt percent weight_threshold = mean_weight * maskpt diff --git a/tests/outlier_detection/test_utils.py b/tests/outlier_detection/test_utils.py index fd72d9dc..4e14c508 100644 --- a/tests/outlier_detection/test_utils.py +++ b/tests/outlier_detection/test_utils.py @@ -16,6 +16,7 @@ reproject, medfilt, ) +from stcal.testing_helpers import MemoryThreshold @pytest.mark.parametrize("shape,diff", [ @@ -81,6 +82,23 @@ def test_compute_weight_threshold_zeros(): np.testing.assert_allclose(result, 21) +def test_compute_weight_threshold_memory(): + """Test that weight threshold function modifies + the weight array in place""" + arr = np.zeros([500, 500], dtype=np.float32) + arr[:250, :250] = 42 + arr[10,10] = 0 + arr[-10,-10] = np.nan + + # buffer to account for memory overhead needs to be small enough + # to ensure that the array was not copied + fractional_memory_buffer = 1.9 + expected_mem = int(arr.nbytes*fractional_memory_buffer) + with MemoryThreshold(str(expected_mem) + " B"): + result = compute_weight_threshold(arr, 0.5) + np.testing.assert_allclose(result, 21) + + def test_flag_crs(): sci = np.zeros((10, 10), dtype=np.float32) err = np.ones_like(sci) From 0d7b16bfd76c067a2223c680fe5cef28bed14d55 Mon Sep 17 00:00:00 2001 From: Brett Date: Thu, 14 Nov 2024 11:21:01 -0500 Subject: [PATCH 2/4] reduce memory for compute_weight_threshold --- src/stcal/outlier_detection/utils.py | 28 +++++++++------------------- 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/src/stcal/outlier_detection/utils.py b/src/stcal/outlier_detection/utils.py index ba3bf041..00b577ac 100644 --- a/src/stcal/outlier_detection/utils.py +++ b/src/stcal/outlier_detection/utils.py @@ -68,25 +68,15 @@ def compute_weight_threshold(weight, maskpt): float The weight threshold for this integration. ''' - # necessary in order to assure that mask gets applied correctly - if hasattr(weight, '_mask'): - del weight._mask - mask_zero_weight = np.equal(weight, 0.) - mask_nans = np.isnan(weight) - # Combine the masks - weight_masked = np.ma.array(weight, mask=np.logical_or( - mask_zero_weight, mask_nans)) - # Sigma-clip the unmasked data - weight_masked = sigma_clip(weight_masked, - sigma=3, - maxiters=5, - masked=False, - copy=False, - ) - mean_weight = np.mean(weight_masked) - # Mask pixels where weight falls below maskpt percent - weight_threshold = mean_weight * maskpt - return weight_threshold + return np.mean( + sigma_clip( + weight[np.isfinite(weight) & (weight != 0)], + sigma=3, + maxiters=5, + masked=False, + copy=False, + ), + dtype='f8') * maskpt def _abs_deriv(array): From 38289f144e1371d1ae4b998446433a73d1575e52 Mon Sep 17 00:00:00 2001 From: Brett Date: Thu, 14 Nov 2024 13:32:39 -0500 Subject: [PATCH 3/4] reduce threshold --- tests/outlier_detection/test_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/outlier_detection/test_utils.py b/tests/outlier_detection/test_utils.py index 4e14c508..05a6dea7 100644 --- a/tests/outlier_detection/test_utils.py +++ b/tests/outlier_detection/test_utils.py @@ -92,7 +92,7 @@ def test_compute_weight_threshold_memory(): # buffer to account for memory overhead needs to be small enough # to ensure that the array was not copied - fractional_memory_buffer = 1.9 + fractional_memory_buffer = 0.9 expected_mem = int(arr.nbytes*fractional_memory_buffer) with MemoryThreshold(str(expected_mem) + " B"): result = compute_weight_threshold(arr, 0.5) From d953b1687b41cfe1581704a4d384347883ff6fbb Mon Sep 17 00:00:00 2001 From: Brett Date: Thu, 21 Nov 2024 10:11:02 -0500 Subject: [PATCH 4/4] add changelog --- changes/319.bugfix.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 changes/319.bugfix.rst diff --git a/changes/319.bugfix.rst b/changes/319.bugfix.rst new file mode 100644 index 00000000..0fd87fd4 --- /dev/null +++ b/changes/319.bugfix.rst @@ -0,0 +1 @@ +Update weight threshold calculation in outlier detection to work around numpy bug that introduces small numerical differences for a mean of a masked array.