From 0668d9473290dfcba4f0edfdd0cc1d230ed79197 Mon Sep 17 00:00:00 2001 From: Ken Kehoe Date: Wed, 25 Sep 2024 12:07:18 -0600 Subject: [PATCH 1/4] Adding ignore_range keyword to add_persistence_test(). --- act/qc/qctests.py | 18 ++++++++++++---- tests/qc/test_qctests.py | 46 ++++++++++++++++++++++++++++++++++------ 2 files changed, 54 insertions(+), 10 deletions(-) diff --git a/act/qc/qctests.py b/act/qc/qctests.py index 3b9916d1f8..2f27df036f 100644 --- a/act/qc/qctests.py +++ b/act/qc/qctests.py @@ -948,6 +948,7 @@ def add_persistence_test( test_number=None, flag_value=False, prepend_text=None, + ignore_range=None, ): """ Method to perform a persistence test over 1-D data.. @@ -968,21 +969,26 @@ def add_persistence_test( center : boolean Optional where within the moving window to report the standard deviation values. Used in the .rolling.std() calculation with xarray. - test_meaning : str + test_meaning : None or str The optional text description to add to flag_meanings describing the test. Will add a default if not set. test_assessment : str Optional single word describing the assessment of the test. Will set a default if not set. - test_number : int + test_number : None or int Optional test number to use. If not set will ues next available test number. flag_value : boolean Indicates that the tests are stored as integers not bit packed values in quality control variable. - prepend_text : str + prepend_text : None or str Optional text to prepend to the test meaning. Example is indicate what institution added the test. + ignore_range : None, tuple, list + Optional list of minimum and maximum data values used to define a range + where the test will not flag if a persistence is discovered if the data used + in testing is within this range. Can be used when there is a specific range + of values that often have a persistent value. e.g. RH at 100% during raining event. Returns ------- @@ -1008,7 +1014,11 @@ def add_persistence_test( with warnings.catch_warnings(): warnings.filterwarnings('ignore', category=RuntimeWarning) stddev = data.rolling(time=window, min_periods=min_periods, center=True).std() - index = stddev < test_limit + index = stddev <= test_limit + + if ignore_range is not None: + ignore_index = (data >= ignore_range[0]) & (data <= ignore_range[1]) + index = index & ~ignore_index result = self._ds.qcfilter.add_test( var_name, diff --git a/tests/qc/test_qctests.py b/tests/qc/test_qctests.py index 332f88cdc8..b6dd77cbca 100644 --- a/tests/qc/test_qctests.py +++ b/tests/qc/test_qctests.py @@ -337,10 +337,9 @@ def test_qctests_dos(): ds = read_arm_netcdf(EXAMPLE_IRT25m20s) var_name = 'inst_up_long_dome_resist' - # persistence test data = ds[var_name].values data[1000:2400] = data[1000] - data = np.around(data, decimals=3) + data = np.around(data, decimals=5) ds[var_name].values = data result = ds.qcfilter.add_persistence_test(var_name) qc_var_name = result['qc_variable_name'] @@ -348,10 +347,10 @@ def test_qctests_dos(): 'Data failing persistence test. Standard Deviation over a ' 'window of 10 values less than 0.0001.' ) - assert ds[qc_var_name].attrs['flag_meanings'][-1] == test_meaning - # There is a precision issue with GitHub testing that makes the number of tests - # tripped off. This isclose() option is to account for that. - assert np.isclose(np.sum(ds[qc_var_name].values), 1399, atol=2) + assert ds[qc_var_name].attrs['flag_meanings'] == [test_meaning] + # There is a precision issue with hardware/VM used in testing that makes the + # number of tests tripped off. This isclose() option is to account for that. + assert np.isclose(np.sum(ds[qc_var_name].values), 1406, atol=2) ds.qcfilter.add_persistence_test(var_name, window=10000, prepend_text='DQO') test_meaning = ( @@ -360,6 +359,41 @@ def test_qctests_dos(): ) assert ds[qc_var_name].attrs['flag_meanings'][-1] == test_meaning + ds.close() + del ds + + # Test the ignore range in persistence test + ds = read_arm_netcdf(EXAMPLE_IRT25m20s) + data = ds[var_name].values + data[1000:1400] = data[1000] + data[2000:2400] = 14.2 + data = np.around(data, decimals=3) + ds[var_name].values = data + result = ds.qcfilter.add_persistence_test(var_name, window=20, min_periods=20, test_limit=0.01) + + assert np.isclose(np.sum(ds[qc_var_name].values), 768, atol=2) + + del ds[qc_var_name] + result = ds.qcfilter.add_persistence_test( + var_name, + window=20, + min_periods=20, + test_limit=0.01, + ignore_range=[13.1, 14.8], + test_assessment='Suspect', + ) + + assert np.isclose(np.sum(ds[qc_var_name].values), 387, atol=2) + assert ds[qc_var_name].attrs['flag_assessments'] == ['Suspect'] + test_meaning = ( + 'Data failing persistence test. Standard Deviation over a window ' + 'of 20 values less than 0.01.' + ) + assert ds[qc_var_name].attrs['flag_meanings'] == [test_meaning] + + ds.close() + del ds + def test_add_atmospheric_pressure_test(): ds = read_arm_netcdf(EXAMPLE_MET1, cleanup_qc=True) From babcdc8786e2c48c4492bc1727f57319836a4f20 Mon Sep 17 00:00:00 2001 From: Ken Kehoe Date: Wed, 25 Sep 2024 12:11:05 -0600 Subject: [PATCH 2/4] Pulling min and max values from ignore_range keyword instead of expecting an order. --- act/qc/qctests.py | 2 +- tests/qc/test_qctests.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/act/qc/qctests.py b/act/qc/qctests.py index 2f27df036f..4d57661489 100644 --- a/act/qc/qctests.py +++ b/act/qc/qctests.py @@ -1017,7 +1017,7 @@ def add_persistence_test( index = stddev <= test_limit if ignore_range is not None: - ignore_index = (data >= ignore_range[0]) & (data <= ignore_range[1]) + ignore_index = (data >= min(ignore_range)) & (data <= max(ignore_range)) index = index & ~ignore_index result = self._ds.qcfilter.add_test( diff --git a/tests/qc/test_qctests.py b/tests/qc/test_qctests.py index b6dd77cbca..5807f7e62b 100644 --- a/tests/qc/test_qctests.py +++ b/tests/qc/test_qctests.py @@ -379,7 +379,7 @@ def test_qctests_dos(): window=20, min_periods=20, test_limit=0.01, - ignore_range=[13.1, 14.8], + ignore_range=[14.8, 13.1], test_assessment='Suspect', ) From 8405922af10a3aa1a62fe1169a8edb9a3fda7283 Mon Sep 17 00:00:00 2001 From: Ken Kehoe Date: Wed, 25 Sep 2024 13:06:06 -0600 Subject: [PATCH 3/4] Updated absolute tolerance with tests to accomidate different values because of hardware/VM./ --- tests/qc/test_qctests.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/qc/test_qctests.py b/tests/qc/test_qctests.py index 5807f7e62b..dc7d98bd37 100644 --- a/tests/qc/test_qctests.py +++ b/tests/qc/test_qctests.py @@ -350,7 +350,7 @@ def test_qctests_dos(): assert ds[qc_var_name].attrs['flag_meanings'] == [test_meaning] # There is a precision issue with hardware/VM used in testing that makes the # number of tests tripped off. This isclose() option is to account for that. - assert np.isclose(np.sum(ds[qc_var_name].values), 1406, atol=2) + assert np.isclose(np.sum(ds[qc_var_name].values), 1400, atol=10) ds.qcfilter.add_persistence_test(var_name, window=10000, prepend_text='DQO') test_meaning = ( @@ -371,7 +371,7 @@ def test_qctests_dos(): ds[var_name].values = data result = ds.qcfilter.add_persistence_test(var_name, window=20, min_periods=20, test_limit=0.01) - assert np.isclose(np.sum(ds[qc_var_name].values), 768, atol=2) + assert np.isclose(np.sum(ds[qc_var_name].values), 768, atol=5) del ds[qc_var_name] result = ds.qcfilter.add_persistence_test( @@ -383,7 +383,7 @@ def test_qctests_dos(): test_assessment='Suspect', ) - assert np.isclose(np.sum(ds[qc_var_name].values), 387, atol=2) + assert np.isclose(np.sum(ds[qc_var_name].values), 387, atol=5) assert ds[qc_var_name].attrs['flag_assessments'] == ['Suspect'] test_meaning = ( 'Data failing persistence test. Standard Deviation over a window ' From 61ab85cb86454c69326130615dab0aed45e3f738 Mon Sep 17 00:00:00 2001 From: Ken Kehoe Date: Wed, 25 Sep 2024 14:57:50 -0600 Subject: [PATCH 4/4] Adjusting the test values and rounding to ensure tests work correctly across hardware. --- tests/qc/test_qctests.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/qc/test_qctests.py b/tests/qc/test_qctests.py index dc7d98bd37..0a244eea7c 100644 --- a/tests/qc/test_qctests.py +++ b/tests/qc/test_qctests.py @@ -348,8 +348,9 @@ def test_qctests_dos(): 'window of 10 values less than 0.0001.' ) assert ds[qc_var_name].attrs['flag_meanings'] == [test_meaning] + # There is a precision issue with hardware/VM used in testing that makes the - # number of tests tripped off. This isclose() option is to account for that. + # number of tests tripped different than listed value. The isclose() option is to account for that. assert np.isclose(np.sum(ds[qc_var_name].values), 1400, atol=10) ds.qcfilter.add_persistence_test(var_name, window=10000, prepend_text='DQO') @@ -367,11 +368,11 @@ def test_qctests_dos(): data = ds[var_name].values data[1000:1400] = data[1000] data[2000:2400] = 14.2 - data = np.around(data, decimals=3) + data = np.around(data, decimals=5) ds[var_name].values = data result = ds.qcfilter.add_persistence_test(var_name, window=20, min_periods=20, test_limit=0.01) - assert np.isclose(np.sum(ds[qc_var_name].values), 768, atol=5) + assert np.isclose(np.sum(ds[qc_var_name].values), 779, atol=5) del ds[qc_var_name] result = ds.qcfilter.add_persistence_test( @@ -383,7 +384,7 @@ def test_qctests_dos(): test_assessment='Suspect', ) - assert np.isclose(np.sum(ds[qc_var_name].values), 387, atol=5) + assert np.isclose(np.sum(ds[qc_var_name].values), 398, atol=5) assert ds[qc_var_name].attrs['flag_assessments'] == ['Suspect'] test_meaning = ( 'Data failing persistence test. Standard Deviation over a window '