diff --git a/act/io/arm.py b/act/io/arm.py index e501726a4d..893bc84e28 100644 --- a/act/io/arm.py +++ b/act/io/arm.py @@ -549,8 +549,9 @@ def write_netcdf( make_copy=True, cf_compliant=False, delete_global_attrs=['qc_standards_version', 'qc_method', 'qc_comment'], - FillValue=-9999, + FillValue=True, cf_convention='CF-1.8', + encoding={}, **kwargs, ): """ @@ -573,7 +574,8 @@ def write_netcdf( white space between words. join_char : str The character sting to use for replacing white spaces between words when converting - a list of strings to single character string attributes. + a list of strings to single character string attributes. Main use is with the + flag_meanings attribute. make_copy : boolean Make a copy before modifying Dataset to write. For large Datasets this may add processing time and memory. If modifying the Dataset is OK @@ -587,14 +589,18 @@ def write_netcdf( Optional global attributes to be deleted. Defaults to some standard QC attributes that are not needed. Can add more or set to None to not remove the attributes. - FillValue : int, float - The value to use as a _FillValue in output file. This is used to fix - issues with how Xarray handles missing_value upon reading. It's confusing - so not a perfect fix. Set to None to leave Xarray to do what it wants. - Set to a value to be the value used as _FillValue in the file and data - array. This should then remove missing_value attribute from the file as well. + FillValue : boolean + Xarray assumes all float type variables had the missing value indicator converted + to NaN upon reading. to_netcdf() will then write a _FillValue attribute set to NaN. + Set FillValue to False to supress adding the _FillValue=NaN variable attribute to + the written file. Set to True to allow to_netcdf() to add the attribute. + If the Dataset variable already has a _FillValue attribute or a _FillValue key + is provided in the encoding dictionary those will not be changed and a _FillValue + will be written to NetCDF file. cf_convention : str The Climate and Forecast convention string to add to Conventions attribute. + encoding : dict + The encoding dictionary used with to_netcdf() method. **kwargs : keywords Keywords to pass through to Dataset.to_netcdf() @@ -607,98 +613,102 @@ def write_netcdf( """ if make_copy: - write_ds = copy.deepcopy(self._ds) + ds = copy.deepcopy(self._ds) else: - write_ds = self._ds + ds = self._ds - encoding = {} if cleanup_global_atts: - for attr in list(write_ds.attrs): + for attr in list(ds.attrs): if attr.startswith('_'): - del write_ds.attrs[attr] + del ds.attrs[attr] if cleanup_qc_atts: check_atts = ['flag_meanings', 'flag_assessments'] - for var_name in list(write_ds.data_vars): - if 'standard_name' not in write_ds[var_name].attrs.keys(): + for var_name in list(ds.data_vars): + if 'standard_name' not in ds[var_name].attrs.keys(): + continue + + if ds[var_name].attrs['standard_name'] != "quality_flag": continue for attr_name in check_atts: try: - att_values = write_ds[var_name].attrs[attr_name] + att_values = ds[var_name].attrs[attr_name] if isinstance(att_values, (list, tuple)): att_values = [ att_value.replace(' ', join_char) for att_value in att_values ] - write_ds[var_name].attrs[attr_name] = ' '.join(att_values) + ds[var_name].attrs[attr_name] = ' '.join(att_values) except KeyError: pass - # Tell .to_netcdf() to not add a _FillValue attribute for - # quality control variables. - if FillValue is not None: - encoding[var_name] = {'_FillValue': None} + # Xarray makes an assumption that float type variables were read in and converted + # missing value indicator to NaN. .to_netcdf() will then automatically assign + # _FillValue attribute set to NaN when writing. If requested will set _FillValue + # key in encoding to None which will supress to_netcdf() from adding a _FillValue. + # If _FillValue attribute or _FillValue key in encoding is already set, will not + # override and the _FillValue will be written to the file. + if not FillValue: + all_var_names = list(ds.coords.keys()) + list(ds.data_vars) + for var_name in all_var_names: + if '_FillValue' in ds[var_name].attrs: + continue - # Clean up _FillValue vs missing_value mess by creating an - # encoding dictionary with each variable's _FillValue set to - # requested fill value. May need to improve upon this for data type - # and other issues in the future. - if FillValue is not None: - skip_variables = ['base_time', 'time_offset', 'qc_time'] + list(encoding.keys()) - for var_name in list(write_ds.data_vars): - if var_name not in skip_variables: - encoding[var_name] = {'_FillValue': FillValue} + if var_name not in encoding.keys(): + encoding[var_name] = {'_FillValue': None} + elif '_FillValue' not in encoding[var_name].keys(): + encoding[var_name]['_FillValue'] = None if delete_global_attrs is not None: for attr in delete_global_attrs: try: - del write_ds.attrs[attr] + del ds.attrs[attr] except KeyError: pass - for var_name in list(write_ds.keys()): - if 'string' in list(write_ds[var_name].attrs.keys()): - att = write_ds[var_name].attrs['string'] - write_ds[var_name].attrs[var_name + '_string'] = att - del write_ds[var_name].attrs['string'] + for var_name in list(ds.keys()): + if 'string' in list(ds[var_name].attrs.keys()): + att = ds[var_name].attrs['string'] + ds[var_name].attrs[var_name + '_string'] = att + del ds[var_name].attrs['string'] # If requested update global attributes and variables attributes for required # CF attributes. if cf_compliant: # Get variable names and standard name for each variable - var_names = list(write_ds.keys()) + var_names = list(ds.keys()) standard_names = [] for var_name in var_names: try: - standard_names.append(write_ds[var_name].attrs['standard_name']) + standard_names.append(ds[var_name].attrs['standard_name']) except KeyError: standard_names.append(None) # Check if time varible has axis and standard_name attribute coord_name = 'time' try: - write_ds[coord_name].attrs['axis'] + ds[coord_name].attrs['axis'] except KeyError: try: - write_ds[coord_name].attrs['axis'] = 'T' + ds[coord_name].attrs['axis'] = 'T' except KeyError: pass try: - write_ds[coord_name].attrs['standard_name'] + ds[coord_name].attrs['standard_name'] except KeyError: try: - write_ds[coord_name].attrs['standard_name'] = 'time' + ds[coord_name].attrs['standard_name'] = 'time' except KeyError: pass # Try to determine type of dataset by coordinate dimention named time # and other factors try: - write_ds.attrs['FeatureType'] + ds.attrs['FeatureType'] except KeyError: - dim_names = list(write_ds.dims) + dim_names = list(ds.dims) FeatureType = None if dim_names == ['time']: FeatureType = 'timeSeries' @@ -706,15 +716,15 @@ def write_netcdf( FeatureType = 'timeSeries' elif len(dim_names) >= 2 and 'time' in dim_names: for var_name in var_names: - dims = list(write_ds[var_name].dims) + dims = list(ds[var_name].dims) if len(dims) == 2 and 'time' in dims: prof_dim = list(set(dims) - {'time'})[0] - if write_ds[prof_dim].values.size > 2: + if ds[prof_dim].values.size > 2: FeatureType = 'timeSeriesProfile' break if FeatureType is not None: - write_ds.attrs['FeatureType'] = FeatureType + ds.attrs['FeatureType'] = FeatureType # Add axis and positive attributes to variables with standard_name # equal to 'altitude' @@ -723,18 +733,18 @@ def write_netcdf( ] for var_name in alt_variables: try: - write_ds[var_name].attrs['axis'] + ds[var_name].attrs['axis'] except KeyError: - write_ds[var_name].attrs['axis'] = 'Z' + ds[var_name].attrs['axis'] = 'Z' try: - write_ds[var_name].attrs['positive'] + ds[var_name].attrs['positive'] except KeyError: - write_ds[var_name].attrs['positive'] = 'up' + ds[var_name].attrs['positive'] = 'up' # Check if the Conventions global attribute lists the CF convention try: - Conventions = write_ds.attrs['Conventions'] + Conventions = ds.attrs['Conventions'] Conventions = Conventions.split() cf_listed = False for ii in Conventions: @@ -743,37 +753,30 @@ def write_netcdf( break if not cf_listed: Conventions.append(cf_convention) - write_ds.attrs['Conventions'] = ' '.join(Conventions) + ds.attrs['Conventions'] = ' '.join(Conventions) except KeyError: - write_ds.attrs['Conventions'] = str(cf_convention) + ds.attrs['Conventions'] = str(cf_convention) # Reorder global attributes to ensure history is last try: - history = copy.copy(write_ds.attrs['history']) - del write_ds.attrs['history'] - write_ds.attrs['history'] = history + history = copy.copy(ds.attrs['history']) + del ds.attrs['history'] + ds.attrs['history'] = history except KeyError: pass - current_time = dt.datetime.now().replace(microsecond=0) - if 'history' in list(write_ds.attrs.keys()): - write_ds.attrs['history'] += ''.join( - [ - '\n', - str(current_time), - ' created by ACT ', - str(act.__version__), - ' act.io.write.write_netcdf', - ] - ) - - if 'time_bounds' in encoding.keys(): - encoding['time_bounds']['dtype'] = 'float64' - if hasattr(write_ds, 'time_bounds') and not write_ds.time.encoding: - write_ds.time.encoding.update(write_ds.time_bounds.encoding) + current_time = dt.datetime.utcnow().replace(microsecond=0) + history_value = ( + f'Written to file by ACT-{act.__version__} ' + f'with write_netcdf() at {current_time} UTC' + ) + if 'history' in list(ds.attrs.keys()): + ds.attrs['history'] += f" ; {history_value}" + else: + ds.attrs['history'] = history_value - write_ds.to_netcdf(encoding=encoding, **kwargs) + ds.to_netcdf(encoding=encoding, **kwargs) def check_if_tar_gz_file(filenames): diff --git a/act/qc/arm.py b/act/qc/arm.py index bf323e77ac..a0c99f7095 100644 --- a/act/qc/arm.py +++ b/act/qc/arm.py @@ -8,6 +8,7 @@ import numpy as np import requests import json +from dateutil import parser from act.config import DEFAULT_DATASTREAM_NAME @@ -22,6 +23,7 @@ def add_dqr_to_qc( cleanup_qc=True, dqr_link=False, skip_location_vars=False, + create_missing_qc_variables=True, ): """ Function to query the ARM DQR web service for reports and @@ -68,6 +70,9 @@ def add_dqr_to_qc( skip_location_vars : boolean Does not apply DQRs to location variables. This can be useful in the event the submitter has erroneously selected all variables. + create_missing_qc_variables : boolean + If a quality control variable for the data variable does not exist, + create the quality control varible and apply DQR. Returns ------- @@ -102,8 +107,35 @@ def add_dqr_to_qc( if cleanup_qc: ds.clean.cleanup() - start_date = ds['time'].values[0].astype('datetime64[s]').astype(dt.datetime).strftime('%Y%m%d') - end_date = ds['time'].values[-1].astype('datetime64[s]').astype(dt.datetime).strftime('%Y%m%d') + # Get time from Dataset + time = ds['time'].values + + # If the time is not a datetime64 because the read routine was not asked to + # convert CF variables, convert the time variable for this routine only. + if not np.issubdtype(time.dtype, np.datetime64): + units_strings = [ + 'seconds since ', + 'minutes since ', + 'hours since ', + 'days since ', + 'milliseconds since ', + 'months since ', + 'years since ', + ] + td64_strings = ['s', 'm', 'h', 'D', 'ms', 'M', 'Y'] + units = ds['time'].attrs['units'] + for ii, _ in enumerate(units_strings): + if units.startswith(units_strings[ii]): + units = units.replace(units_strings[ii], '') + td64_string = td64_strings[ii] + break + + start_time = parser.parse(units) + start_time = np.datetime64(start_time, td64_string) + time = start_time + ds['time'].values.astype('timedelta64[s]') + + start_date = time[0].astype('datetime64[s]').astype(dt.datetime).strftime('%Y%m%d') + end_date = time[-1].astype('datetime64[s]').astype(dt.datetime).strftime('%Y%m%d') # Clean up assessment to ensure it is a string with no spaces. if isinstance(assessment, (list, tuple)): @@ -152,7 +184,7 @@ def add_dqr_to_qc( for time_range in docs[quality_category][dqr_number]['dates']: starttime = np.datetime64(time_range['start_date']) endtime = np.datetime64(time_range['end_date']) - ind = np.where((ds['time'].values >= starttime) & (ds['time'].values <= endtime)) + ind = np.where((time >= starttime) & (time <= endtime)) if ind[0].size > 0: index = np.append(index, ind[0]) @@ -181,6 +213,10 @@ def add_dqr_to_qc( if skip_location_vars and var_name in loc_vars: continue + # Do not process time variables + if var_name in ['time', 'time_offset', 'time_bounds']: + continue + # Only process provided variable names if variable is not None and var_name not in variable: continue @@ -193,6 +229,12 @@ def add_dqr_to_qc( except KeyError: pass + if ( + create_missing_qc_variables is False + and ds.qcfilter.check_for_ancillary_qc(var_name, add_if_missing=False) is None + ): + continue + try: ds.qcfilter.add_test( var_name, diff --git a/act/qc/clean.py b/act/qc/clean.py index 2c0bb7baa7..2b951e4a3e 100644 --- a/act/qc/clean.py +++ b/act/qc/clean.py @@ -792,7 +792,7 @@ def normalize_assessment( self, variables=None, exclude_variables=None, - qc_lookup={'Incorrect': 'Bad', 'Suspect': 'Indeterminate'}, + qc_lookup={'Bad': 'Incorrect', 'Indeterminate': 'Suspect'}, ): """ Method to clean up assessment terms used to be consistent between diff --git a/act/qc/qc_summary.py b/act/qc/qc_summary.py index ff6518dde0..d96375abff 100644 --- a/act/qc/qc_summary.py +++ b/act/qc/qc_summary.py @@ -8,6 +8,9 @@ """ import datetime +import copy +import xarray as xr +import warnings class QCSummary: @@ -23,7 +26,12 @@ def __init__(self, ds): """initialize""" self._ds = ds - def create_qc_summary(self, cleanup_qc=False): + def create_qc_summary( + self, + cleanup_qc=False, + remove_attrs=['fail_min', 'fail_max', 'fail_delta'], + normalize_assessment=True, + ): """ Method to convert embedded quality control to summary QC that utilzes flag values instead of flag masks and summarizes the assessments to only @@ -34,6 +42,11 @@ def create_qc_summary(self, cleanup_qc=False): Call clean.cleanup() method to convert to standardized ancillary quality control variables. The quality control summary requires the current embedded quality control variables to use ACT standards. + remove_attrs : None, list + Quality Control variable attributes to remove after creating the summary. + normalize_assessment : bool + Option to clean up assessments to use the same terminology. + Returns ------- @@ -42,22 +55,19 @@ def create_qc_summary(self, cleanup_qc=False): """ - standard_assessments = [ - 'Suspect', - 'Indeterminate', - 'Incorrect', - 'Bad', - ] - standard_meanings = [ - "Data suspect, further analysis recommended", - "Data suspect, further analysis recommended", - "Data incorrect, use not recommended", - "Data incorrect, use not recommended", - ] + standard_meanings = { + 'Suspect': "Data suspect further analysis recommended", + 'Indeterminate': "Data suspect further analysis recommended", + 'Incorrect': "Data incorrect use not recommended", + 'Bad': "Data incorrect use not recommended", + } if cleanup_qc: self._ds.clean.cleanup() + if normalize_assessment: + self._ds.clean.normalize_assessment() + return_ds = self._ds.copy() added = False @@ -67,11 +77,15 @@ def create_qc_summary(self, cleanup_qc=False): if qc_var_name is None: continue - added = True - - assessments = list(set(self._ds[qc_var_name].attrs['flag_assessments'])) + # Do not really know how to handle scalars yet. + if return_ds[qc_var_name].ndim == 0: + warnings.warn( + f'Unable to process scalar variable {var_name}. ' + 'Scalar variables currently not implemented.' + ) + continue - import xarray as xr + added = True result = xr.zeros_like(return_ds[qc_var_name]) for attr in ['flag_masks', 'flag_meanings', 'flag_assessments', 'flag_values']: @@ -91,34 +105,50 @@ def create_qc_summary(self, cleanup_qc=False): flag_value=True, ) - for ii, assessment in enumerate(standard_assessments): - if assessment not in assessments: - continue + flag_assessments = list(standard_meanings.keys()) + added_assessments = set(self._ds[qc_var_name].attrs['flag_assessments']) - set( + flag_assessments + ) + flag_assessments += list(added_assessments) + for ii, assessment in enumerate(flag_assessments): + try: + standard_meaning = standard_meanings[assessment.capitalize()] + except KeyError: + standard_meaning = f"Data {assessment}" qc_mask = self.get_masked_data( var_name, rm_assessments=assessment, return_mask_only=True ) - # Do not really know how to handle scalars yet. - if qc_mask.ndim == 0: - continue + # # Do not really know how to handle scalars yet. return_ds.qcfilter.add_test( var_name, index=qc_mask, - test_meaning=standard_meanings[ii], + test_meaning=standard_meaning, test_assessment=assessment, flag_value=True, ) + # Remove fail limit variable attributes + if remove_attrs is not None: + for att_name in copy.copy(list(return_ds[qc_var_name].attrs.keys())): + if att_name in remove_attrs: + del return_ds[qc_var_name].attrs[att_name] + self._ds.update({qc_var_name: return_ds[qc_var_name]}) if added: - history = return_ds.attrs['history'] - history += ( - " ; Quality control summary implemented by ACT at " - f"{datetime.datetime.utcnow().isoformat()} UTC." + from act import __version__ as version + + history_value = ( + f"Quality control summary implemented by ACT-{version} at " + f"{datetime.datetime.utcnow().replace(microsecond=0)} UTC" ) - return_ds.attrs['history'] = history + + if 'history' in list(return_ds.attrs.keys()): + return_ds.attrs['history'] += f" ; {history_value}" + else: + return_ds.attrs['history'] = history_value return return_ds diff --git a/act/qc/qcfilter.py b/act/qc/qcfilter.py index c8a64ca5e4..c24e655de9 100644 --- a/act/qc/qcfilter.py +++ b/act/qc/qcfilter.py @@ -10,6 +10,7 @@ import xarray as xr from act.qc import comparison_tests, qctests, bsrn_tests, qc_summary +from act.utils.data_utils import get_missing_value @xr.register_dataset_accessor('qcfilter') @@ -957,6 +958,7 @@ def datafilter( rm_tests=None, verbose=False, del_qc_var=False, + no_NaN=False, ): """ Method to apply quality control variables to data variables by @@ -987,6 +989,12 @@ def datafilter( and xarray method processing would also process the quality control variables, the default is to remove the quality control data variables. Defaults to False. + no_NaN : boolean + Should the returned Xarray Dataset use NaN as the missing value indicator. + If Xarray did not convert the _FillValue or missing_value to NaN upon reading + the same missing value indicator should be used. If set to true will try to determine + the current missing_value or _FillValue set in the file and use that value. If neither + are set as a variable attribute it will use the default value (most likely -9999). Examples -------- @@ -1048,6 +1056,12 @@ def datafilter( var_name, rm_assessments=rm_assessments, rm_tests=rm_tests, return_nan_array=True ) + if no_NaN: + missing_value = get_missing_value(self._ds, var_name, add_if_missing_in_ds=True) + index = np.isnan(data) + if np.any(index): + data[index] = missing_value + # If data was orginally stored as Dask array return values to Dataset as Dask array # else set as Numpy array. try: @@ -1070,35 +1084,45 @@ def datafilter( pass # Add comment to history for each test that's filtered out - if isinstance(rm_tests, int): - rm_tests = [rm_tests] if rm_tests is not None: - for test in list(rm_tests): - test = 2 ** (test - 1) - if test in flag_masks: - index = flag_masks.index(test) - comment = ''.join(['act.qc.datafilter: ', flag_meanings[index]]) - if 'history' in self._ds[var_name].attrs.keys(): - self._ds[var_name].attrs['history'] += '\n' + comment - else: - self._ds[var_name].attrs['history'] = comment + if isinstance(rm_tests, int): + rm_tests = [rm_tests] + + for test in rm_tests: + try: + index = flag_masks.index(set_bit(0, test)) + except ValueError: + continue + + comment = f'act.qc.datafilter: {flag_meanings[index]}' + if 'history' in self._ds[var_name].attrs.keys(): + self._ds[var_name].attrs['history'] += f'\n {comment}' + else: + self._ds[var_name].attrs['history'] = comment - if isinstance(rm_assessments, str): - rm_assessments = [rm_assessments] if rm_assessments is not None: + if isinstance(rm_assessments, str): + rm_assessments = [rm_assessments] + for assessment in rm_assessments: if assessment in flag_assessments: index = [i for i, e in enumerate(flag_assessments) if e == assessment] for ind in index: - comment = ''.join(['act.qc.datafilter: ', flag_meanings[ind]]) + comment = f'act.qc.datafilter: {flag_meanings[ind]}' if 'history' in self._ds[var_name].attrs.keys(): - self._ds[var_name].attrs['history'] += '\n' + comment + self._ds[var_name].attrs['history'] += f'\n {comment}' else: self._ds[var_name].attrs['history'] = comment # If requested delete quality control variable if del_qc_var: del self._ds[qc_var_name] + try: + if self._ds[var_name].attrs['ancillary_variables'] == qc_var_name: + del self._ds[var_name].attrs['ancillary_variables'] + except KeyError: + pass + if verbose: print(f'Deleting {qc_var_name} from dataset') diff --git a/tests/discovery/test_asos.py b/tests/discovery/test_asos.py index e0f44842df..7bac2e8eb0 100644 --- a/tests/discovery/test_asos.py +++ b/tests/discovery/test_asos.py @@ -1,5 +1,5 @@ from datetime import datetime - +import pytest import numpy as np import act @@ -22,6 +22,9 @@ def test_get_region(): time_window = [datetime(2020, 2, 4, 2, 0), datetime(2020, 2, 12, 10, 0)] lat_window = (41.8781 - 0.5, 41.8781 + 0.5) lon_window = (-87.6298 - 0.5, -87.6298 + 0.5) - my_asoses = act.discovery.get_asos_data(time_window, lat_range=lat_window, lon_range=lon_window) + with pytest.warns(UserWarning, match="No data available at station"): + my_asoses = act.discovery.get_asos_data( + time_window, lat_range=lat_window, lon_range=lon_window + ) asos_keys = list(my_asoses.keys()) assert asos_keys == my_keys diff --git a/tests/io/test_ameriflux.py b/tests/io/test_ameriflux.py index 395983de39..bf2820a160 100644 --- a/tests/io/test_ameriflux.py +++ b/tests/io/test_ameriflux.py @@ -1,13 +1,15 @@ import act import glob import xarray as xr +import pytest def test_convert_to_ameriflux(): files = glob.glob(act.tests.sample_files.EXAMPLE_ECORSF_E39) ds_ecor = act.io.arm.read_arm_netcdf(files) - df = act.io.ameriflux.convert_to_ameriflux(ds_ecor) + with pytest.warns(UserWarning, match="mapping was not provided"): + df = act.io.ameriflux.convert_to_ameriflux(ds_ecor) assert 'FC' in df assert 'WS_MAX' in df @@ -16,7 +18,8 @@ def test_convert_to_ameriflux(): ds_sebs = act.io.arm.read_arm_netcdf(files) ds = xr.merge([ds_ecor, ds_sebs]) - df = act.io.ameriflux.convert_to_ameriflux(ds) + with pytest.warns(UserWarning, match="mapping was not provided"): + df = act.io.ameriflux.convert_to_ameriflux(ds) assert 'SWC_2_1_1' in df assert 'TS_3_1_1' in df @@ -26,7 +29,8 @@ def test_convert_to_ameriflux(): ds_stamp = act.io.arm.read_arm_netcdf(files) ds = xr.merge([ds_ecor, ds_sebs, ds_stamp], compat='override') - df = act.io.ameriflux.convert_to_ameriflux(ds) + with pytest.warns(UserWarning, match="mapping was not provided"): + df = act.io.ameriflux.convert_to_ameriflux(ds) assert 'SWC_6_10_1' in df assert 'G_2_1_1' in df diff --git a/tests/plotting/test_distributiondisplay.py b/tests/plotting/test_distributiondisplay.py index ad906ab443..eea1deb0cf 100644 --- a/tests/plotting/test_distributiondisplay.py +++ b/tests/plotting/test_distributiondisplay.py @@ -419,7 +419,12 @@ def test_plot_pie_chart(): ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_AOSACSM) fields = ['sulfate', 'ammonium', 'nitrate', 'chloride'] display = DistributionDisplay(ds) - display.plot_pie_chart(fields) + with pytest.warns(UserWarning, match="contains negatives values, consider using a threshold."): + with pytest.warns( + UserWarning, + match="No time parameter used, calculating a mean for each field for the whole dataset.", + ): + display.plot_pie_chart(fields) ds.close() try: @@ -435,12 +440,16 @@ def test_plot_pie_chart_kwargs(): threshold = 0.0 fill_value = 0.0 display = DistributionDisplay(ds) - display.plot_pie_chart( - fields, - threshold=threshold, - fill_value=fill_value, - colors=['olivedrab', 'rosybrown', 'gray', 'saddlebrown'], - ) + with pytest.warns( + UserWarning, + match="No time parameter used, calculating a mean for each field for the whole dataset.", + ): + display.plot_pie_chart( + fields, + threshold=threshold, + fill_value=fill_value, + colors=['olivedrab', 'rosybrown', 'gray', 'saddlebrown'], + ) ds.close() try: diff --git a/tests/plotting/test_skewtdisplay.py b/tests/plotting/test_skewtdisplay.py index 21a45281a9..8047bba58b 100644 --- a/tests/plotting/test_skewtdisplay.py +++ b/tests/plotting/test_skewtdisplay.py @@ -67,7 +67,8 @@ def test_multi_skewt_plot(): def test_enhanced_skewt_plot(): ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_SONDE1) display = act.plotting.SkewTDisplay(ds) - display.plot_enhanced_skewt(color_field='alt', component_range=85) + with pytest.warns(): + display.plot_enhanced_skewt(color_field='alt', component_range=85) ds.close() return display.fig diff --git a/tests/plotting/test_timeseriesdisplay.py b/tests/plotting/test_timeseriesdisplay.py index d1461b3779..508331f421 100644 --- a/tests/plotting/test_timeseriesdisplay.py +++ b/tests/plotting/test_timeseriesdisplay.py @@ -462,7 +462,8 @@ def test_plot_barbs_from_u_v4(): fake_ds = xr.Dataset( {'xbins': xbins, 'ybins': ybins, 'ydata': y_array, 'xdata': x_array, 'pres': pres} ) - BarbDisplay = TimeSeriesDisplay(fake_ds) + with pytest.warns(UserWarning, match="Could not discern datastreamname and dict or tuple"): + BarbDisplay = TimeSeriesDisplay(fake_ds) BarbDisplay.plot_barbs_from_u_v( 'xdata', 'ydata', None, set_title='test', use_var_for_y='pres', cmap='jet' ) @@ -488,7 +489,8 @@ def test_plot_barbs_from_u_v5(): fake_ds = xr.Dataset( {'xbins': xbins, 'ybins': ybins, 'ydata': y_array, 'xdata': x_array, 'pres': pres} ) - BarbDisplay = TimeSeriesDisplay(fake_ds) + with pytest.warns(UserWarning, match="Could not discern datastreamname and dict or tuple"): + BarbDisplay = TimeSeriesDisplay(fake_ds) BarbDisplay.plot_barbs_from_u_v( 'xdata', 'ydata', diff --git a/tests/plotting/test_windrosedisplay.py b/tests/plotting/test_windrosedisplay.py index 7566e9d8f0..19934b052c 100644 --- a/tests/plotting/test_windrosedisplay.py +++ b/tests/plotting/test_windrosedisplay.py @@ -138,15 +138,16 @@ def test_groupby_plot(): # Create Plot Display display = WindRoseDisplay(ds, figsize=(15, 15), subplot_shape=(3, 3)) groupby = display.group_by('day') - groupby.plot_group( - 'plot_data', - None, - dir_field='wdir_vec_mean', - spd_field='wspd_vec_mean', - data_field='temp_mean', - num_dirs=12, - plot_type='line', - ) + with pytest.warns(RuntimeWarning): + groupby.plot_group( + 'plot_data', + None, + dir_field='wdir_vec_mean', + spd_field='wspd_vec_mean', + data_field='temp_mean', + num_dirs=12, + plot_type='line', + ) # Set theta tick markers for each axis inside display to be inside the polar axes for i in range(3): diff --git a/tests/plotting/test_xsectiondisplay.py b/tests/plotting/test_xsectiondisplay.py index 732165cd2e..8dede9047a 100644 --- a/tests/plotting/test_xsectiondisplay.py +++ b/tests/plotting/test_xsectiondisplay.py @@ -57,7 +57,11 @@ def test_xsection_plot_map(): sample_files.EXAMPLE_VISST, combine='nested', concat_dim='time' ) try: - xsection = XSectionDisplay(radar_ds, figsize=(15, 8)) + with pytest.warns( + UserWarning, + match="Could not discern datastreamname and dict or tuple were not provided. Using defaultname of act_datastream!", + ): + xsection = XSectionDisplay(radar_ds, figsize=(15, 8)) xsection.plot_xsection_map( None, 'ir_temperature', diff --git a/tests/qc/test_qc_summary.py b/tests/qc/test_qc_summary.py index 40cd8c5fc0..53f9e29af3 100644 --- a/tests/qc/test_qc_summary.py +++ b/tests/qc/test_qc_summary.py @@ -36,14 +36,24 @@ def test_qc_summary(): assert 'flag_masks' not in result[qc_var_name].attrs.keys() assert isinstance(result[qc_var_name].attrs['flag_values'], list) - assert np.sum(result[qc_var_name].values) == 610 + assert np.sum(result[qc_var_name].values) == 880 - qc_ma = result.qcfilter.get_masked_data(var_name, rm_assessments='Indeterminate') - assert np.all(np.where(qc_ma.mask)[0] == np.arange(100, 170)) + qc_ma = result.qcfilter.get_masked_data(var_name, rm_assessments='Suspect') + assert np.sum(np.where(qc_ma.mask)) == 9415 - qc_ma = result.qcfilter.get_masked_data(var_name, rm_assessments='Bad') - index = np.concatenate([index_1, index_2, index_3]) - assert np.all(np.where(qc_ma.mask)[0] == index) + qc_ma = result.qcfilter.get_masked_data(var_name, rm_assessments='Incorrect') + assert np.sum(np.where(qc_ma.mask)) == 89415 + + att_names = [ + 'fail_min', + 'fail_max', + 'fail_delta', + 'valid_min', + 'valid_max', + 'valid_delta', + ] + for att_name in att_names: + assert att_name not in ds[f'qc_{var_name}'].attrs assert "Quality control summary implemented by ACT" in result.attrs['history'] @@ -78,7 +88,7 @@ def test_qc_summary_multiple_assessment_names(): var_name, index=index_6, test_meaning='Testing Incorrect', test_assessment='Incorrect' ) - result = ds.qcfilter.create_qc_summary() + result = ds.qcfilter.create_qc_summary(normalize_assessment=False) assert result[qc_var_name].attrs['flag_assessments'] == [ 'Not failing', @@ -105,6 +115,118 @@ def test_qc_summary_multiple_assessment_names(): assert np.sum(np.where(qc_ma.mask)[0]) == 884575 +def test_qc_summary_unexpected_assessment_name(): + var_name = 'temp_mean' + ds = read_arm_netcdf(EXAMPLE_MET1, keep_variables=var_name) + + test_meanings = [ + 'Testing Bad', + 'Testing Boomer', + 'Testing Boomer Second', + 'Testing Incorrect', + 'Testing Indeterminate', + 'Testing Sooner', + 'Testing Suspect', + ] + test_assessments = [ + 'Bad', + 'Boomer', + 'boomer', + 'Incorrect', + 'Indeterminate', + 'Sooner', + 'Suspect', + ] + + test_index_sums = [4950, 39900, 39900, 34950, 44950, 54950, 64950] + + for ii, _ in enumerate(test_assessments): + ds.qcfilter.add_test( + var_name, + index=np.arange(ii * 100, ii * 100 + 100), + test_meaning=test_meanings[ii], + test_assessment=test_assessments[ii], + ) + + ds = ds.qcfilter.create_qc_summary(normalize_assessment=False) + + qc_var_name = ds.qcfilter.check_for_ancillary_qc(var_name, add_if_missing=False) + + # Make sure flag meanings are correct with new assessments. + assert sorted(ds[qc_var_name].attrs['flag_meanings']) == [ + 'Data Boomer', + 'Data Sooner', + 'Data incorrect use not recommended', + 'Data incorrect use not recommended', + 'Data suspect further analysis recommended', + 'Data suspect further analysis recommended', + 'Not failing quality control tests', + ] + assert sorted(ds[qc_var_name].attrs['flag_assessments']) == [ + 'Bad', + 'Boomer', + 'Incorrect', + 'Indeterminate', + 'Not failing', + 'Sooner', + 'Suspect', + ] + # Make sure the values and order of first 5 are as expected. The other non-standard + # assessments may be in different order with set operations. + assert ds[qc_var_name].attrs['flag_assessments'][:5] == [ + 'Not failing', + 'Suspect', + 'Indeterminate', + 'Incorrect', + 'Bad', + ] + + for assessment, index_sum in zip(test_assessments, test_index_sums): + qc_ma = ds.qcfilter.get_masked_data(var_name, rm_assessments=assessment) + assert np.sum(np.where(qc_ma.mask)[0]) == index_sum + + qc_ma = ds.qcfilter.get_masked_data(var_name, rm_assessments=['Bucky']) + assert np.sum(np.where(qc_ma.mask)[0]) == 0 + + qc_ma = ds.qcfilter.get_masked_data(var_name, rm_assessments=['Boomer', 'Sooner']) + assert np.sum(np.where(qc_ma.mask)[0]) == 94850 + + qc_ma = ds.qcfilter.get_masked_data( + var_name, + rm_assessments=['Boomer', 'Sooner', 'Indeterminate', 'Suspect', 'Bad', 'Incorrect'], + ) + assert np.sum(np.where(qc_ma.mask)[0]) == 244650 + + del ds + + +def test_qc_summary_scalar(): + # Test scalar variables. Currently not implemented so just check that we + # don't do anything. + var_names = ['alt', 'temp_mean'] + ds = read_arm_netcdf(EXAMPLE_MET1, keep_variables=var_names) + + test_meanings = ['Testing Incorrect', 'Testing Suspect'] + test_assessments = ['Incorrect', 'Suspect'] + + for var_name in var_names: + for ii, _ in enumerate(test_assessments): + ds.qcfilter.add_test( + var_name, + index=0, + test_meaning=test_meanings[ii], + test_assessment=test_assessments[ii], + ) + + with pytest.warns(UserWarning, match="Unable to process scalar variable"): + ds = ds.qcfilter.create_qc_summary(normalize_assessment=False) + + assert 'flag_masks' in ds[f'qc_{var_names[0]}'].attrs.keys() + assert 'flag_values' not in ds[f'qc_{var_names[0]}'].attrs.keys() + assert 'flag_masks' not in ds[f'qc_{var_names[1]}'].attrs.keys() + assert 'flag_values' in ds[f'qc_{var_names[1]}'].attrs.keys() + + @pytest.mark.big @pytest.mark.skipif('ARCHIVE_DATA' not in environ, reason="Running outside ADC system.") def test_qc_summary_big_data(): @@ -150,31 +272,13 @@ def test_qc_summary_big_data(): 'zrh', 'osc', ] - skip_datastream_codes = [ - 'mmcrmom', - # 'microbasepi', - # 'lblch1a', - # '30co2flx4mmet', - # 'microbasepi2', - # '30co2flx60m', - # 'bbhrpavg1mlawer', - # 'co', - # 'lblch1b', - # '30co2flx25m', - # '30co2flx4m', - # 'armbeatm', - # 'armtrajcld', - # '1swfanalsiros1long', - ] - # skip_datastreams = ['nimmfrsraod5chcorM1.c1', 'anxaoso3M1.b0'] + skip_datastream_codes = ['mmcrmom'] num_files = 3 expected_assessments = ['Not failing', 'Suspect', 'Indeterminate', 'Incorrect', 'Bad'] testing_files = [] - single_test = False if len(testing_files) == 0: - single_test = True filename = ( f'test_qc_summary_big_data.{datetime.datetime.utcnow().strftime("%Y%m%d.%H%M%S")}.txt' ) @@ -192,9 +296,6 @@ def test_qc_summary_big_data(): if '-' in datastream_dir.name: continue - # if datastream_dir.name in skip_datastreams: - # continue - fn_obj = DatastreamParserARM(datastream_dir.name) facility = fn_obj.facility if facility is not None and facility[0] in ['A', 'X', 'U', 'F', 'N']: @@ -216,8 +317,7 @@ def test_qc_summary_big_data(): for ii in range(0, num_tests): testing_files.append(random.choice(files)) - if single_test: - print(f"Testing {len(testing_files)} files\n") + print(f"\nTesting {len(testing_files)} files\n") print(f"Output file name = {output_file}\n") for file in testing_files: diff --git a/tests/qc/test_qcfilter.py b/tests/qc/test_qcfilter.py index 163206f56f..d8ddd2d904 100644 --- a/tests/qc/test_qcfilter.py +++ b/tests/qc/test_qcfilter.py @@ -45,10 +45,10 @@ def test_arm_qc(): except ValueError: return - assert 'Suspect' not in ds[qc_variable].attrs['flag_assessments'] - assert 'Incorrect' not in ds[qc_variable].attrs['flag_assessments'] - assert 'Bad' in ds[qc_variable].attrs['flag_assessments'] - assert 'Indeterminate' in ds[qc_variable].attrs['flag_assessments'] + assert 'Suspect' in ds[qc_variable].attrs['flag_assessments'] + assert 'Incorrect' in ds[qc_variable].attrs['flag_assessments'] + assert 'Bad' not in ds[qc_variable].attrs['flag_assessments'] + assert 'Indeterminate' not in ds[qc_variable].attrs['flag_assessments'] # Check that defualt will update all variables in DQR for var_name in ['wdir_vec_mean', 'wdir_vec_std', 'wspd_arith_mean', 'wspd_vec_mean']: @@ -409,29 +409,50 @@ def test_datafilter(): data_var_names.sort() qc_var_names.sort() - var_name = 'atmos_pressure' + var_name = 'rh_mean' - ds_1 = ds.mean() + ds_1 = ds.sum() - ds.qcfilter.add_less_test(var_name, 99, test_assessment='Bad') + ds.qcfilter.add_less_test(var_name, 80, test_assessment='Bad') + ds.qcfilter.add_less_test(var_name, 70, test_assessment='Suspect') ds_filtered = copy.deepcopy(ds) ds_filtered.qcfilter.datafilter(rm_assessments='Bad') - ds_2 = ds_filtered.mean() - assert np.isclose(ds_1[var_name].values, 98.86, atol=0.01) - assert np.isclose(ds_2[var_name].values, 99.15, atol=0.01) + ds_2 = ds_filtered.sum() + assert np.isclose(ds_1[var_name].values, 104602.23, atol=0.01) + assert np.isclose(ds_2[var_name].values, 7466.4004, atol=0.01) assert isinstance(ds_1[var_name].data, da.core.Array) assert 'act.qc.datafilter' in ds_filtered[var_name].attrs['history'] + assert 'ancillary_variables' in ds_filtered[var_name].attrs.keys() ds_filtered = copy.deepcopy(ds) ds_filtered.qcfilter.datafilter(rm_assessments='Bad', variables=var_name, del_qc_var=True) - ds_2 = ds_filtered.mean() - assert np.isclose(ds_2[var_name].values, 99.15, atol=0.01) + ds_2 = ds_filtered.sum() + assert np.isclose(ds_2[var_name].values, 7466.40, atol=0.01) expected_var_names = sorted(list(set(data_var_names + qc_var_names) - {'qc_' + var_name})) assert sorted(list(ds_filtered.data_vars)) == expected_var_names ds_filtered = copy.deepcopy(ds) - ds_filtered.qcfilter.datafilter(rm_assessments='Bad', del_qc_var=True) + ds_filtered.qcfilter.datafilter(rm_assessments='Suspect', del_qc_var=True) + ds_2 = ds_filtered.sum() + assert np.isclose(ds_2[var_name].values, 80244.33, atol=0.01) assert sorted(list(ds_filtered.data_vars)) == data_var_names + assert 'ancillary_variables' not in ds_filtered[var_name].attrs.keys() + + ds_filtered = copy.deepcopy(ds) + ds_filtered.qcfilter.datafilter(rm_assessments=['Bad', 'Suspect']) + ds_2 = ds_filtered.sum() + assert np.isclose(ds_2[var_name].values, 7466.40, atol=0.01) + + ds_filtered = copy.deepcopy(ds) + ds_filtered.qcfilter.datafilter(rm_assessments=['Sponge', 'Bob']) + ds_2 = ds_filtered.sum() + assert np.isclose(ds_2[var_name].values, 104602.23, atol=0.01) + + ds_filtered = copy.deepcopy(ds) + ds_filtered.qcfilter.datafilter(rm_assessments=['Sponge', 'Bob', 'suspect'], variables=var_name) + ds_2 = ds_filtered.sum() + assert np.isclose(ds_2[var_name].values, 80244.33, atol=0.01) + assert np.isclose(ds_2['temp_mean'].values, np.sum(ds_filtered['temp_mean'].values), atol=0.01) ds.close() del ds