Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Persistence test ignore range #857

Merged
merged 4 commits into from
Sep 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 14 additions & 4 deletions act/qc/qctests.py
Original file line number Diff line number Diff line change
Expand Up @@ -948,6 +948,7 @@ def add_persistence_test(
test_number=None,
flag_value=False,
prepend_text=None,
ignore_range=None,
):
"""
Method to perform a persistence test over 1-D data..
Expand All @@ -968,21 +969,26 @@ def add_persistence_test(
center : boolean
Optional where within the moving window to report the standard
deviation values. Used in the .rolling.std() calculation with xarray.
test_meaning : str
test_meaning : None or str
The optional text description to add to flag_meanings
describing the test. Will add a default if not set.
test_assessment : str
Optional single word describing the assessment of the test.
Will set a default if not set.
test_number : int
test_number : None or int
Optional test number to use. If not set will ues next
available test number.
flag_value : boolean
Indicates that the tests are stored as integers
not bit packed values in quality control variable.
prepend_text : str
prepend_text : None or str
Optional text to prepend to the test meaning.
Example is indicate what institution added the test.
ignore_range : None, tuple, list
Optional list of minimum and maximum data values used to define a range
where the test will not flag if a persistence is discovered if the data used
in testing is within this range. Can be used when there is a specific range
of values that often have a persistent value. e.g. RH at 100% during raining event.

Returns
-------
Expand All @@ -1008,7 +1014,11 @@ def add_persistence_test(
with warnings.catch_warnings():
warnings.filterwarnings('ignore', category=RuntimeWarning)
stddev = data.rolling(time=window, min_periods=min_periods, center=True).std()
index = stddev < test_limit
index = stddev <= test_limit

if ignore_range is not None:
ignore_index = (data >= min(ignore_range)) & (data <= max(ignore_range))
index = index & ~ignore_index

result = self._ds.qcfilter.add_test(
var_name,
Expand Down
47 changes: 41 additions & 6 deletions tests/qc/test_qctests.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,21 +337,21 @@ def test_qctests_dos():
ds = read_arm_netcdf(EXAMPLE_IRT25m20s)
var_name = 'inst_up_long_dome_resist'

# persistence test
data = ds[var_name].values
data[1000:2400] = data[1000]
data = np.around(data, decimals=3)
data = np.around(data, decimals=5)
ds[var_name].values = data
result = ds.qcfilter.add_persistence_test(var_name)
qc_var_name = result['qc_variable_name']
test_meaning = (
'Data failing persistence test. Standard Deviation over a '
'window of 10 values less than 0.0001.'
)
assert ds[qc_var_name].attrs['flag_meanings'][-1] == test_meaning
# There is a precision issue with GitHub testing that makes the number of tests
# tripped off. This isclose() option is to account for that.
assert np.isclose(np.sum(ds[qc_var_name].values), 1399, atol=2)
assert ds[qc_var_name].attrs['flag_meanings'] == [test_meaning]

# There is a precision issue with hardware/VM used in testing that makes the
# number of tests tripped different than listed value. The isclose() option is to account for that.
assert np.isclose(np.sum(ds[qc_var_name].values), 1400, atol=10)

ds.qcfilter.add_persistence_test(var_name, window=10000, prepend_text='DQO')
test_meaning = (
Expand All @@ -360,6 +360,41 @@ def test_qctests_dos():
)
assert ds[qc_var_name].attrs['flag_meanings'][-1] == test_meaning

ds.close()
del ds

# Test the ignore range in persistence test
ds = read_arm_netcdf(EXAMPLE_IRT25m20s)
data = ds[var_name].values
data[1000:1400] = data[1000]
data[2000:2400] = 14.2
data = np.around(data, decimals=5)
ds[var_name].values = data
result = ds.qcfilter.add_persistence_test(var_name, window=20, min_periods=20, test_limit=0.01)

assert np.isclose(np.sum(ds[qc_var_name].values), 779, atol=5)

del ds[qc_var_name]
result = ds.qcfilter.add_persistence_test(
var_name,
window=20,
min_periods=20,
test_limit=0.01,
ignore_range=[14.8, 13.1],
test_assessment='Suspect',
)

assert np.isclose(np.sum(ds[qc_var_name].values), 398, atol=5)
assert ds[qc_var_name].attrs['flag_assessments'] == ['Suspect']
test_meaning = (
'Data failing persistence test. Standard Deviation over a window '
'of 20 values less than 0.01.'
)
assert ds[qc_var_name].attrs['flag_meanings'] == [test_meaning]

ds.close()
del ds


def test_add_atmospheric_pressure_test():
ds = read_arm_netcdf(EXAMPLE_MET1, cleanup_qc=True)
Expand Down
Loading