From 5b1fed49069eb72c5150958ef5d3385fd224fe6d Mon Sep 17 00:00:00 2001 From: Ken Kehoe Date: Thu, 8 Aug 2024 16:53:22 +0000 Subject: [PATCH 01/31] Not creating QC variable for time variables. --- act/qc/arm.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/act/qc/arm.py b/act/qc/arm.py index bf323e77ac..edb9f45c73 100644 --- a/act/qc/arm.py +++ b/act/qc/arm.py @@ -181,6 +181,10 @@ def add_dqr_to_qc( if skip_location_vars and var_name in loc_vars: continue + # Do not process time varibles + if var_name in ['time', 'time_offset']: + continue + # Only process provided variable names if variable is not None and var_name not in variable: continue From 74892bd4f2cdff17e330ddf42e0c766b9a7d7b8c Mon Sep 17 00:00:00 2001 From: Ken Kehoe Date: Thu, 8 Aug 2024 16:54:05 +0000 Subject: [PATCH 02/31] Updates to make writing more CF complient. --- act/io/arm.py | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/act/io/arm.py b/act/io/arm.py index e501726a4d..635af00397 100644 --- a/act/io/arm.py +++ b/act/io/arm.py @@ -623,6 +623,9 @@ def write_netcdf( if 'standard_name' not in write_ds[var_name].attrs.keys(): continue + if write_ds[var_name].attrs['standard_name'] != "quality_flag": + continue + for attr_name in check_atts: try: att_values = write_ds[var_name].attrs[attr_name] @@ -637,18 +640,22 @@ def write_netcdf( # Tell .to_netcdf() to not add a _FillValue attribute for # quality control variables. - if FillValue is not None: + if FillValue is not False: encoding[var_name] = {'_FillValue': None} - # Clean up _FillValue vs missing_value mess by creating an - # encoding dictionary with each variable's _FillValue set to - # requested fill value. May need to improve upon this for data type - # and other issues in the future. - if FillValue is not None: - skip_variables = ['base_time', 'time_offset', 'qc_time'] + list(encoding.keys()) - for var_name in list(write_ds.data_vars): - if var_name not in skip_variables: - encoding[var_name] = {'_FillValue': FillValue} + # Clean up _FillValue vs missing_value mess by creating an + # encoding dictionary with each variable's _FillValue set to + # requested fill value. May need to improve upon this for data type + # and other issues in the future. + if FillValue is not False: + for var_name in list(write_ds.data_vars): + if '_FillValue' in write_ds[var_name].attrs: + continue + + if var_name not in encoding.keys(): + encoding[var_name] = {'_FillValue': FillValue} + elif '_FillValue' not in encoding[var_name].keys(): + encoding[var_name] = {'_FillValue': FillValue} if delete_global_attrs is not None: for attr in delete_global_attrs: @@ -767,8 +774,14 @@ def write_netcdf( ] ) - if 'time_bounds' in encoding.keys(): - encoding['time_bounds']['dtype'] = 'float64' + # Correct time variable from having a _FillValue attribute written to the file. + try: + if 'time' not in encoding.keys(): + encoding['time'] = {} + + encoding['time']['_FillValue'] = None + except KeyError: + pass if hasattr(write_ds, 'time_bounds') and not write_ds.time.encoding: write_ds.time.encoding.update(write_ds.time_bounds.encoding) From e53eb1e1eb4116e307be16f7a9af5e4937bb194f Mon Sep 17 00:00:00 2001 From: Ken Kehoe Date: Fri, 9 Aug 2024 14:07:59 -0600 Subject: [PATCH 03/31] Updated how to handle _FillValue. Changed all Xarray Datasets to ds. Improved how history attribute is modified. --- act/io/arm.py | 154 +++++++++++++++++++++++--------------------------- 1 file changed, 72 insertions(+), 82 deletions(-) diff --git a/act/io/arm.py b/act/io/arm.py index 635af00397..893bc84e28 100644 --- a/act/io/arm.py +++ b/act/io/arm.py @@ -549,8 +549,9 @@ def write_netcdf( make_copy=True, cf_compliant=False, delete_global_attrs=['qc_standards_version', 'qc_method', 'qc_comment'], - FillValue=-9999, + FillValue=True, cf_convention='CF-1.8', + encoding={}, **kwargs, ): """ @@ -573,7 +574,8 @@ def write_netcdf( white space between words. join_char : str The character sting to use for replacing white spaces between words when converting - a list of strings to single character string attributes. + a list of strings to single character string attributes. Main use is with the + flag_meanings attribute. make_copy : boolean Make a copy before modifying Dataset to write. For large Datasets this may add processing time and memory. If modifying the Dataset is OK @@ -587,14 +589,18 @@ def write_netcdf( Optional global attributes to be deleted. Defaults to some standard QC attributes that are not needed. Can add more or set to None to not remove the attributes. - FillValue : int, float - The value to use as a _FillValue in output file. This is used to fix - issues with how Xarray handles missing_value upon reading. It's confusing - so not a perfect fix. Set to None to leave Xarray to do what it wants. - Set to a value to be the value used as _FillValue in the file and data - array. This should then remove missing_value attribute from the file as well. + FillValue : boolean + Xarray assumes all float type variables had the missing value indicator converted + to NaN upon reading. to_netcdf() will then write a _FillValue attribute set to NaN. + Set FillValue to False to supress adding the _FillValue=NaN variable attribute to + the written file. Set to True to allow to_netcdf() to add the attribute. + If the Dataset variable already has a _FillValue attribute or a _FillValue key + is provided in the encoding dictionary those will not be changed and a _FillValue + will be written to NetCDF file. cf_convention : str The Climate and Forecast convention string to add to Conventions attribute. + encoding : dict + The encoding dictionary used with to_netcdf() method. **kwargs : keywords Keywords to pass through to Dataset.to_netcdf() @@ -607,105 +613,102 @@ def write_netcdf( """ if make_copy: - write_ds = copy.deepcopy(self._ds) + ds = copy.deepcopy(self._ds) else: - write_ds = self._ds + ds = self._ds - encoding = {} if cleanup_global_atts: - for attr in list(write_ds.attrs): + for attr in list(ds.attrs): if attr.startswith('_'): - del write_ds.attrs[attr] + del ds.attrs[attr] if cleanup_qc_atts: check_atts = ['flag_meanings', 'flag_assessments'] - for var_name in list(write_ds.data_vars): - if 'standard_name' not in write_ds[var_name].attrs.keys(): + for var_name in list(ds.data_vars): + if 'standard_name' not in ds[var_name].attrs.keys(): continue - if write_ds[var_name].attrs['standard_name'] != "quality_flag": + if ds[var_name].attrs['standard_name'] != "quality_flag": continue for attr_name in check_atts: try: - att_values = write_ds[var_name].attrs[attr_name] + att_values = ds[var_name].attrs[attr_name] if isinstance(att_values, (list, tuple)): att_values = [ att_value.replace(' ', join_char) for att_value in att_values ] - write_ds[var_name].attrs[attr_name] = ' '.join(att_values) + ds[var_name].attrs[attr_name] = ' '.join(att_values) except KeyError: pass - # Tell .to_netcdf() to not add a _FillValue attribute for - # quality control variables. - if FillValue is not False: - encoding[var_name] = {'_FillValue': None} - - # Clean up _FillValue vs missing_value mess by creating an - # encoding dictionary with each variable's _FillValue set to - # requested fill value. May need to improve upon this for data type - # and other issues in the future. - if FillValue is not False: - for var_name in list(write_ds.data_vars): - if '_FillValue' in write_ds[var_name].attrs: + # Xarray makes an assumption that float type variables were read in and converted + # missing value indicator to NaN. .to_netcdf() will then automatically assign + # _FillValue attribute set to NaN when writing. If requested will set _FillValue + # key in encoding to None which will supress to_netcdf() from adding a _FillValue. + # If _FillValue attribute or _FillValue key in encoding is already set, will not + # override and the _FillValue will be written to the file. + if not FillValue: + all_var_names = list(ds.coords.keys()) + list(ds.data_vars) + for var_name in all_var_names: + if '_FillValue' in ds[var_name].attrs: continue if var_name not in encoding.keys(): - encoding[var_name] = {'_FillValue': FillValue} + encoding[var_name] = {'_FillValue': None} elif '_FillValue' not in encoding[var_name].keys(): - encoding[var_name] = {'_FillValue': FillValue} + encoding[var_name]['_FillValue'] = None if delete_global_attrs is not None: for attr in delete_global_attrs: try: - del write_ds.attrs[attr] + del ds.attrs[attr] except KeyError: pass - for var_name in list(write_ds.keys()): - if 'string' in list(write_ds[var_name].attrs.keys()): - att = write_ds[var_name].attrs['string'] - write_ds[var_name].attrs[var_name + '_string'] = att - del write_ds[var_name].attrs['string'] + for var_name in list(ds.keys()): + if 'string' in list(ds[var_name].attrs.keys()): + att = ds[var_name].attrs['string'] + ds[var_name].attrs[var_name + '_string'] = att + del ds[var_name].attrs['string'] # If requested update global attributes and variables attributes for required # CF attributes. if cf_compliant: # Get variable names and standard name for each variable - var_names = list(write_ds.keys()) + var_names = list(ds.keys()) standard_names = [] for var_name in var_names: try: - standard_names.append(write_ds[var_name].attrs['standard_name']) + standard_names.append(ds[var_name].attrs['standard_name']) except KeyError: standard_names.append(None) # Check if time varible has axis and standard_name attribute coord_name = 'time' try: - write_ds[coord_name].attrs['axis'] + ds[coord_name].attrs['axis'] except KeyError: try: - write_ds[coord_name].attrs['axis'] = 'T' + ds[coord_name].attrs['axis'] = 'T' except KeyError: pass try: - write_ds[coord_name].attrs['standard_name'] + ds[coord_name].attrs['standard_name'] except KeyError: try: - write_ds[coord_name].attrs['standard_name'] = 'time' + ds[coord_name].attrs['standard_name'] = 'time' except KeyError: pass # Try to determine type of dataset by coordinate dimention named time # and other factors try: - write_ds.attrs['FeatureType'] + ds.attrs['FeatureType'] except KeyError: - dim_names = list(write_ds.dims) + dim_names = list(ds.dims) FeatureType = None if dim_names == ['time']: FeatureType = 'timeSeries' @@ -713,15 +716,15 @@ def write_netcdf( FeatureType = 'timeSeries' elif len(dim_names) >= 2 and 'time' in dim_names: for var_name in var_names: - dims = list(write_ds[var_name].dims) + dims = list(ds[var_name].dims) if len(dims) == 2 and 'time' in dims: prof_dim = list(set(dims) - {'time'})[0] - if write_ds[prof_dim].values.size > 2: + if ds[prof_dim].values.size > 2: FeatureType = 'timeSeriesProfile' break if FeatureType is not None: - write_ds.attrs['FeatureType'] = FeatureType + ds.attrs['FeatureType'] = FeatureType # Add axis and positive attributes to variables with standard_name # equal to 'altitude' @@ -730,18 +733,18 @@ def write_netcdf( ] for var_name in alt_variables: try: - write_ds[var_name].attrs['axis'] + ds[var_name].attrs['axis'] except KeyError: - write_ds[var_name].attrs['axis'] = 'Z' + ds[var_name].attrs['axis'] = 'Z' try: - write_ds[var_name].attrs['positive'] + ds[var_name].attrs['positive'] except KeyError: - write_ds[var_name].attrs['positive'] = 'up' + ds[var_name].attrs['positive'] = 'up' # Check if the Conventions global attribute lists the CF convention try: - Conventions = write_ds.attrs['Conventions'] + Conventions = ds.attrs['Conventions'] Conventions = Conventions.split() cf_listed = False for ii in Conventions: @@ -750,43 +753,30 @@ def write_netcdf( break if not cf_listed: Conventions.append(cf_convention) - write_ds.attrs['Conventions'] = ' '.join(Conventions) + ds.attrs['Conventions'] = ' '.join(Conventions) except KeyError: - write_ds.attrs['Conventions'] = str(cf_convention) + ds.attrs['Conventions'] = str(cf_convention) # Reorder global attributes to ensure history is last try: - history = copy.copy(write_ds.attrs['history']) - del write_ds.attrs['history'] - write_ds.attrs['history'] = history + history = copy.copy(ds.attrs['history']) + del ds.attrs['history'] + ds.attrs['history'] = history except KeyError: pass - current_time = dt.datetime.now().replace(microsecond=0) - if 'history' in list(write_ds.attrs.keys()): - write_ds.attrs['history'] += ''.join( - [ - '\n', - str(current_time), - ' created by ACT ', - str(act.__version__), - ' act.io.write.write_netcdf', - ] - ) - - # Correct time variable from having a _FillValue attribute written to the file. - try: - if 'time' not in encoding.keys(): - encoding['time'] = {} - encoding['time']['_FillValue'] = None - except KeyError: - pass - - if hasattr(write_ds, 'time_bounds') and not write_ds.time.encoding: - write_ds.time.encoding.update(write_ds.time_bounds.encoding) + current_time = dt.datetime.utcnow().replace(microsecond=0) + history_value = ( + f'Written to file by ACT-{act.__version__} ' + f'with write_netcdf() at {current_time} UTC' + ) + if 'history' in list(ds.attrs.keys()): + ds.attrs['history'] += f" ; {history_value}" + else: + ds.attrs['history'] = history_value - write_ds.to_netcdf(encoding=encoding, **kwargs) + ds.to_netcdf(encoding=encoding, **kwargs) def check_if_tar_gz_file(filenames): From b50fd2141c14522627b77f7649a2793fbb9b522c Mon Sep 17 00:00:00 2001 From: Ken Kehoe Date: Fri, 9 Aug 2024 14:09:41 -0600 Subject: [PATCH 04/31] Adding option to suppres adding QC variables. Checking if time is numpy.datetime64. If not will convert to work with method to add DQR information.' --- act/qc/arm.py | 46 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 42 insertions(+), 4 deletions(-) diff --git a/act/qc/arm.py b/act/qc/arm.py index edb9f45c73..9e9699dbc5 100644 --- a/act/qc/arm.py +++ b/act/qc/arm.py @@ -8,6 +8,7 @@ import numpy as np import requests import json +from dateutil import parser from act.config import DEFAULT_DATASTREAM_NAME @@ -22,6 +23,7 @@ def add_dqr_to_qc( cleanup_qc=True, dqr_link=False, skip_location_vars=False, + create_missing_qc_variables=True, ): """ Function to query the ARM DQR web service for reports and @@ -68,6 +70,9 @@ def add_dqr_to_qc( skip_location_vars : boolean Does not apply DQRs to location variables. This can be useful in the event the submitter has erroneously selected all variables. + create_missing_qc_variables : boolean + If a quality control varible for the data varialbe does not exist, + create the quality control varible and apply DQR. Returns ------- @@ -102,8 +107,35 @@ def add_dqr_to_qc( if cleanup_qc: ds.clean.cleanup() - start_date = ds['time'].values[0].astype('datetime64[s]').astype(dt.datetime).strftime('%Y%m%d') - end_date = ds['time'].values[-1].astype('datetime64[s]').astype(dt.datetime).strftime('%Y%m%d') + # Get time from Dataset + time = ds['time'].values + + # If the time is not a datetime64 because the read routine was not asked to + # convert CF variables, convert the time varible for this routine only. + if not np.issubdtype(time.dtype, np.datetime64): + units_strings = [ + 'seconds since ', + 'minutes since ', + 'hours since ', + 'days since ', + 'milliseconds since ', + 'months since ', + 'years since ', + ] + td64_strings = ['s', 'm', 'h', 'D', 'ms', 'M', 'Y'] + units = ds['time'].attrs['units'] + for ii, _ in enumerate(units_strings): + if units.startswith(units_strings[ii]): + units = units.replace(units_strings[ii], '') + td64_string = td64_strings[ii] + break + + start_time = parser.parse(units) + start_time = np.datetime64(start_time, td64_string) + time = start_time + ds['time'].values.astype('timedelta64[s]') + + start_date = time[0].astype(dt.datetime).strftime('%Y%m%d') + end_date = time[-1].astype(dt.datetime).strftime('%Y%m%d') # Clean up assessment to ensure it is a string with no spaces. if isinstance(assessment, (list, tuple)): @@ -152,7 +184,7 @@ def add_dqr_to_qc( for time_range in docs[quality_category][dqr_number]['dates']: starttime = np.datetime64(time_range['start_date']) endtime = np.datetime64(time_range['end_date']) - ind = np.where((ds['time'].values >= starttime) & (ds['time'].values <= endtime)) + ind = np.where((time >= starttime) & (time <= endtime)) if ind[0].size > 0: index = np.append(index, ind[0]) @@ -182,7 +214,7 @@ def add_dqr_to_qc( continue # Do not process time varibles - if var_name in ['time', 'time_offset']: + if var_name in ['time', 'time_offset', 'time_bounds']: continue # Only process provided variable names @@ -197,6 +229,12 @@ def add_dqr_to_qc( except KeyError: pass + if ( + create_missing_qc_variables is False + and ds.qcfilter.check_for_ancillary_qc(var_name, add_if_missing=False) is None + ): + continue + try: ds.qcfilter.add_test( var_name, From 85516b2234cf40c35faca77b8c4a4024b433c0c5 Mon Sep 17 00:00:00 2001 From: Ken Kehoe Date: Fri, 9 Aug 2024 14:11:17 -0600 Subject: [PATCH 05/31] Adding option to remove QC variable attributes. Updated method to add info to history attribute. Removed command that updates orginal Dataset. --- act/qc/qc_summary.py | 42 ++++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/act/qc/qc_summary.py b/act/qc/qc_summary.py index ff6518dde0..097bdb131b 100644 --- a/act/qc/qc_summary.py +++ b/act/qc/qc_summary.py @@ -8,6 +8,7 @@ """ import datetime +import copy class QCSummary: @@ -23,7 +24,9 @@ def __init__(self, ds): """initialize""" self._ds = ds - def create_qc_summary(self, cleanup_qc=False): + def create_qc_summary( + self, cleanup_qc=False, remove_attrs=['fail_min', 'fail_max', 'fail_delta'] + ): """ Method to convert embedded quality control to summary QC that utilzes flag values instead of flag masks and summarizes the assessments to only @@ -34,6 +37,8 @@ def create_qc_summary(self, cleanup_qc=False): Call clean.cleanup() method to convert to standardized ancillary quality control variables. The quality control summary requires the current embedded quality control variables to use ACT standards. + remove_attrs : None, list + Quality Control variable attributes to remove after creating the summary. Returns ------- @@ -49,17 +54,17 @@ def create_qc_summary(self, cleanup_qc=False): 'Bad', ] standard_meanings = [ - "Data suspect, further analysis recommended", - "Data suspect, further analysis recommended", - "Data incorrect, use not recommended", - "Data incorrect, use not recommended", + "Data suspect further analysis recommended", + "Data suspect further analysis recommended", + "Data incorrect use not recommended", + "Data incorrect use not recommended", ] - if cleanup_qc: - self._ds.clean.cleanup() - return_ds = self._ds.copy() + if cleanup_qc: + return_ds.clean.cleanup() + added = False for var_name in list(self._ds.data_vars): qc_var_name = self.check_for_ancillary_qc(var_name, add_if_missing=False, cleanup=False) @@ -111,14 +116,23 @@ def create_qc_summary(self, cleanup_qc=False): flag_value=True, ) - self._ds.update({qc_var_name: return_ds[qc_var_name]}) + # Remove fail limit variable attributes + if remove_attrs is not None: + for att_name in copy.copy(list(return_ds[qc_var_name].attrs.keys())): + if att_name in remove_attrs: + del return_ds[qc_var_name].attrs[att_name] if added: - history = return_ds.attrs['history'] - history += ( - " ; Quality control summary implemented by ACT at " - f"{datetime.datetime.utcnow().isoformat()} UTC." + from act import __version__ as version + + history_value = ( + f"Quality control summary implemented by ACT-{version} at " + f"{datetime.datetime.utcnow().replace(microsecond=0)} UTC" ) - return_ds.attrs['history'] = history + + if 'history' in list(return_ds.attrs.keys()): + return_ds.attrs['history'] += f" ; {history_value}" + else: + return_ds.attrs['history'] = history_value return return_ds From ba46940c62bdb12dcc110ac3a4d39d750953f8c1 Mon Sep 17 00:00:00 2001 From: Ken Kehoe Date: Sat, 10 Aug 2024 04:02:03 +0000 Subject: [PATCH 06/31] Adding a method to ensure datatype is datetime64. --- act/qc/arm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/act/qc/arm.py b/act/qc/arm.py index 9e9699dbc5..f070f55cff 100644 --- a/act/qc/arm.py +++ b/act/qc/arm.py @@ -134,8 +134,8 @@ def add_dqr_to_qc( start_time = np.datetime64(start_time, td64_string) time = start_time + ds['time'].values.astype('timedelta64[s]') - start_date = time[0].astype(dt.datetime).strftime('%Y%m%d') - end_date = time[-1].astype(dt.datetime).strftime('%Y%m%d') + start_date = time[0].astype('datetime64[s]').astype(dt.datetime).strftime('%Y%m%d') + end_date = time[-1].astype('datetime64[s]').astype(dt.datetime).strftime('%Y%m%d') # Clean up assessment to ensure it is a string with no spaces. if isinstance(assessment, (list, tuple)): From a44ad88d0da522fa7c74d0dd6f522b9363c775a8 Mon Sep 17 00:00:00 2001 From: Ken Kehoe Date: Sat, 10 Aug 2024 04:03:14 +0000 Subject: [PATCH 07/31] Returning order to correct format for making copy. Returning .update() method to ensure the attribute removal takes hold. --- act/qc/qc_summary.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/act/qc/qc_summary.py b/act/qc/qc_summary.py index 097bdb131b..c183b77609 100644 --- a/act/qc/qc_summary.py +++ b/act/qc/qc_summary.py @@ -60,10 +60,10 @@ def create_qc_summary( "Data incorrect use not recommended", ] - return_ds = self._ds.copy() - if cleanup_qc: - return_ds.clean.cleanup() + self._ds.clean.cleanup() + + return_ds = self._ds.copy() added = False for var_name in list(self._ds.data_vars): @@ -122,6 +122,8 @@ def create_qc_summary( if att_name in remove_attrs: del return_ds[qc_var_name].attrs[att_name] + self._ds.update({qc_var_name: return_ds[qc_var_name]}) + if added: from act import __version__ as version From d870cae1cfc3a5de71e93f2f59f9132cc5ea534d Mon Sep 17 00:00:00 2001 From: Ken Kehoe Date: Fri, 23 Aug 2024 09:24:55 -0600 Subject: [PATCH 08/31] Changing the default from Internal QC Assessment terms to DQR Assessment terms. --- act/qc/clean.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/act/qc/clean.py b/act/qc/clean.py index 2c0bb7baa7..c2797b5c6a 100644 --- a/act/qc/clean.py +++ b/act/qc/clean.py @@ -792,7 +792,8 @@ def normalize_assessment( self, variables=None, exclude_variables=None, - qc_lookup={'Incorrect': 'Bad', 'Suspect': 'Indeterminate'}, + # qc_lookup={'Incorrect': 'Bad', 'Suspect': 'Indeterminate'}, + qc_lookup={'Bad': 'Incorrect', 'Indeterminate': 'Suspect'}, ): """ Method to clean up assessment terms used to be consistent between From 2229dbe6ddf19b6f1634b7aac177d69102ac0b4c Mon Sep 17 00:00:00 2001 From: Ken Kehoe Date: Fri, 23 Aug 2024 09:26:44 -0600 Subject: [PATCH 09/31] Adding option to normalize assessment terms used. --- act/qc/qc_summary.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/act/qc/qc_summary.py b/act/qc/qc_summary.py index c183b77609..7626c0ffd4 100644 --- a/act/qc/qc_summary.py +++ b/act/qc/qc_summary.py @@ -25,7 +25,10 @@ def __init__(self, ds): self._ds = ds def create_qc_summary( - self, cleanup_qc=False, remove_attrs=['fail_min', 'fail_max', 'fail_delta'] + self, + cleanup_qc=False, + remove_attrs=['fail_min', 'fail_max', 'fail_delta'], + normalize_assessment=True, ): """ Method to convert embedded quality control to summary QC that utilzes @@ -39,6 +42,9 @@ def create_qc_summary( control variables to use ACT standards. remove_attrs : None, list Quality Control variable attributes to remove after creating the summary. + normalize_assessment : bool + Option to clean up assessments to use the same terminology. + Returns ------- @@ -63,6 +69,9 @@ def create_qc_summary( if cleanup_qc: self._ds.clean.cleanup() + if normalize_assessment: + self._ds.clean.normalize_assessment() + return_ds = self._ds.copy() added = False From 2efa1396fb0e32318a447fabc3afa96aa7362839 Mon Sep 17 00:00:00 2001 From: Ken Kehoe Date: Fri, 23 Aug 2024 10:45:59 -0600 Subject: [PATCH 10/31] Adding option to set the missing value indicater to be a value other than NaN. --- act/qc/qcfilter.py | 54 +++++++++++++++++++++++++++++++++------------- 1 file changed, 39 insertions(+), 15 deletions(-) diff --git a/act/qc/qcfilter.py b/act/qc/qcfilter.py index c8a64ca5e4..c24e655de9 100644 --- a/act/qc/qcfilter.py +++ b/act/qc/qcfilter.py @@ -10,6 +10,7 @@ import xarray as xr from act.qc import comparison_tests, qctests, bsrn_tests, qc_summary +from act.utils.data_utils import get_missing_value @xr.register_dataset_accessor('qcfilter') @@ -957,6 +958,7 @@ def datafilter( rm_tests=None, verbose=False, del_qc_var=False, + no_NaN=False, ): """ Method to apply quality control variables to data variables by @@ -987,6 +989,12 @@ def datafilter( and xarray method processing would also process the quality control variables, the default is to remove the quality control data variables. Defaults to False. + no_NaN : boolean + Should the returned Xarray Dataset use NaN as the missing value indicator. + If Xarray did not convert the _FillValue or missing_value to NaN upon reading + the same missing value indicator should be used. If set to true will try to determine + the current missing_value or _FillValue set in the file and use that value. If neither + are set as a variable attribute it will use the default value (most likely -9999). Examples -------- @@ -1048,6 +1056,12 @@ def datafilter( var_name, rm_assessments=rm_assessments, rm_tests=rm_tests, return_nan_array=True ) + if no_NaN: + missing_value = get_missing_value(self._ds, var_name, add_if_missing_in_ds=True) + index = np.isnan(data) + if np.any(index): + data[index] = missing_value + # If data was orginally stored as Dask array return values to Dataset as Dask array # else set as Numpy array. try: @@ -1070,35 +1084,45 @@ def datafilter( pass # Add comment to history for each test that's filtered out - if isinstance(rm_tests, int): - rm_tests = [rm_tests] if rm_tests is not None: - for test in list(rm_tests): - test = 2 ** (test - 1) - if test in flag_masks: - index = flag_masks.index(test) - comment = ''.join(['act.qc.datafilter: ', flag_meanings[index]]) - if 'history' in self._ds[var_name].attrs.keys(): - self._ds[var_name].attrs['history'] += '\n' + comment - else: - self._ds[var_name].attrs['history'] = comment + if isinstance(rm_tests, int): + rm_tests = [rm_tests] + + for test in rm_tests: + try: + index = flag_masks.index(set_bit(0, test)) + except ValueError: + continue + + comment = f'act.qc.datafilter: {flag_meanings[index]}' + if 'history' in self._ds[var_name].attrs.keys(): + self._ds[var_name].attrs['history'] += f'\n {comment}' + else: + self._ds[var_name].attrs['history'] = comment - if isinstance(rm_assessments, str): - rm_assessments = [rm_assessments] if rm_assessments is not None: + if isinstance(rm_assessments, str): + rm_assessments = [rm_assessments] + for assessment in rm_assessments: if assessment in flag_assessments: index = [i for i, e in enumerate(flag_assessments) if e == assessment] for ind in index: - comment = ''.join(['act.qc.datafilter: ', flag_meanings[ind]]) + comment = f'act.qc.datafilter: {flag_meanings[ind]}' if 'history' in self._ds[var_name].attrs.keys(): - self._ds[var_name].attrs['history'] += '\n' + comment + self._ds[var_name].attrs['history'] += f'\n {comment}' else: self._ds[var_name].attrs['history'] = comment # If requested delete quality control variable if del_qc_var: del self._ds[qc_var_name] + try: + if self._ds[var_name].attrs['ancillary_variables'] == qc_var_name: + del self._ds[var_name].attrs['ancillary_variables'] + except KeyError: + pass + if verbose: print(f'Deleting {qc_var_name} from dataset') From 82a3b57fffb5758fa0a003dc9cca1852b8495f45 Mon Sep 17 00:00:00 2001 From: Ken Kehoe Date: Fri, 23 Aug 2024 10:46:32 -0600 Subject: [PATCH 11/31] Changed to use Suspect and Incorrect --- tests/qc/test_qc_summary.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/qc/test_qc_summary.py b/tests/qc/test_qc_summary.py index 40cd8c5fc0..4555e77fd5 100644 --- a/tests/qc/test_qc_summary.py +++ b/tests/qc/test_qc_summary.py @@ -38,10 +38,10 @@ def test_qc_summary(): assert np.sum(result[qc_var_name].values) == 610 - qc_ma = result.qcfilter.get_masked_data(var_name, rm_assessments='Indeterminate') + qc_ma = result.qcfilter.get_masked_data(var_name, rm_assessments='Suspect') assert np.all(np.where(qc_ma.mask)[0] == np.arange(100, 170)) - qc_ma = result.qcfilter.get_masked_data(var_name, rm_assessments='Bad') + qc_ma = result.qcfilter.get_masked_data(var_name, rm_assessments='Incorrect') index = np.concatenate([index_1, index_2, index_3]) assert np.all(np.where(qc_ma.mask)[0] == index) @@ -78,7 +78,7 @@ def test_qc_summary_multiple_assessment_names(): var_name, index=index_6, test_meaning='Testing Incorrect', test_assessment='Incorrect' ) - result = ds.qcfilter.create_qc_summary() + result = ds.qcfilter.create_qc_summary(normalize_assessment=False) assert result[qc_var_name].attrs['flag_assessments'] == [ 'Not failing', From f831cbe57643f22056044f3e4324aaf0f732b39b Mon Sep 17 00:00:00 2001 From: Ken Kehoe Date: Fri, 23 Aug 2024 10:47:11 -0600 Subject: [PATCH 12/31] Updated to match new default values for normalized assessments --- tests/qc/test_qcfilter.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/qc/test_qcfilter.py b/tests/qc/test_qcfilter.py index 163206f56f..adc8edf967 100644 --- a/tests/qc/test_qcfilter.py +++ b/tests/qc/test_qcfilter.py @@ -45,10 +45,10 @@ def test_arm_qc(): except ValueError: return - assert 'Suspect' not in ds[qc_variable].attrs['flag_assessments'] - assert 'Incorrect' not in ds[qc_variable].attrs['flag_assessments'] - assert 'Bad' in ds[qc_variable].attrs['flag_assessments'] - assert 'Indeterminate' in ds[qc_variable].attrs['flag_assessments'] + assert 'Suspect' in ds[qc_variable].attrs['flag_assessments'] + assert 'Incorrect' in ds[qc_variable].attrs['flag_assessments'] + assert 'Bad' not in ds[qc_variable].attrs['flag_assessments'] + assert 'Indeterminate' not in ds[qc_variable].attrs['flag_assessments'] # Check that defualt will update all variables in DQR for var_name in ['wdir_vec_mean', 'wdir_vec_std', 'wspd_arith_mean', 'wspd_vec_mean']: From f6121d9125d76e173b4372b8d0f992a47bb8846c Mon Sep 17 00:00:00 2001 From: Ken Kehoe Date: Fri, 23 Aug 2024 11:16:02 -0600 Subject: [PATCH 13/31] Removing commented code. --- tests/qc/test_qc_summary.py | 37 +++++++++++++------------------------ 1 file changed, 13 insertions(+), 24 deletions(-) diff --git a/tests/qc/test_qc_summary.py b/tests/qc/test_qc_summary.py index 4555e77fd5..22abc50e56 100644 --- a/tests/qc/test_qc_summary.py +++ b/tests/qc/test_qc_summary.py @@ -45,6 +45,17 @@ def test_qc_summary(): index = np.concatenate([index_1, index_2, index_3]) assert np.all(np.where(qc_ma.mask)[0] == index) + att_names = [ + 'fail_min', + 'fail_max', + 'fail_delta', + 'valid_min', + 'valid_max', + 'valid_delta', + ] + for att_name in att_names: + assert att_name not in ds[f'qc_{var_name}'].attrs + assert "Quality control summary implemented by ACT" in result.attrs['history'] del ds @@ -150,31 +161,13 @@ def test_qc_summary_big_data(): 'zrh', 'osc', ] - skip_datastream_codes = [ - 'mmcrmom', - # 'microbasepi', - # 'lblch1a', - # '30co2flx4mmet', - # 'microbasepi2', - # '30co2flx60m', - # 'bbhrpavg1mlawer', - # 'co', - # 'lblch1b', - # '30co2flx25m', - # '30co2flx4m', - # 'armbeatm', - # 'armtrajcld', - # '1swfanalsiros1long', - ] - # skip_datastreams = ['nimmfrsraod5chcorM1.c1', 'anxaoso3M1.b0'] + skip_datastream_codes = ['mmcrmom'] num_files = 3 expected_assessments = ['Not failing', 'Suspect', 'Indeterminate', 'Incorrect', 'Bad'] testing_files = [] - single_test = False if len(testing_files) == 0: - single_test = True filename = ( f'test_qc_summary_big_data.{datetime.datetime.utcnow().strftime("%Y%m%d.%H%M%S")}.txt' ) @@ -192,9 +185,6 @@ def test_qc_summary_big_data(): if '-' in datastream_dir.name: continue - # if datastream_dir.name in skip_datastreams: - # continue - fn_obj = DatastreamParserARM(datastream_dir.name) facility = fn_obj.facility if facility is not None and facility[0] in ['A', 'X', 'U', 'F', 'N']: @@ -216,8 +206,7 @@ def test_qc_summary_big_data(): for ii in range(0, num_tests): testing_files.append(random.choice(files)) - if single_test: - print(f"Testing {len(testing_files)} files\n") + print(f"\nTesting {len(testing_files)} files\n") print(f"Output file name = {output_file}\n") for file in testing_files: From 29a335933a9a2f42317cc2af72bee8ab490d9a2c Mon Sep 17 00:00:00 2001 From: Ken Kehoe Date: Tue, 27 Aug 2024 13:02:08 -0600 Subject: [PATCH 14/31] Improving the datafilter test. Checking ancillary_variables attribute. Checking rm_assessments keyword. --- tests/qc/test_qcfilter.py | 39 ++++++++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/tests/qc/test_qcfilter.py b/tests/qc/test_qcfilter.py index adc8edf967..d8ddd2d904 100644 --- a/tests/qc/test_qcfilter.py +++ b/tests/qc/test_qcfilter.py @@ -409,29 +409,50 @@ def test_datafilter(): data_var_names.sort() qc_var_names.sort() - var_name = 'atmos_pressure' + var_name = 'rh_mean' - ds_1 = ds.mean() + ds_1 = ds.sum() - ds.qcfilter.add_less_test(var_name, 99, test_assessment='Bad') + ds.qcfilter.add_less_test(var_name, 80, test_assessment='Bad') + ds.qcfilter.add_less_test(var_name, 70, test_assessment='Suspect') ds_filtered = copy.deepcopy(ds) ds_filtered.qcfilter.datafilter(rm_assessments='Bad') - ds_2 = ds_filtered.mean() - assert np.isclose(ds_1[var_name].values, 98.86, atol=0.01) - assert np.isclose(ds_2[var_name].values, 99.15, atol=0.01) + ds_2 = ds_filtered.sum() + assert np.isclose(ds_1[var_name].values, 104602.23, atol=0.01) + assert np.isclose(ds_2[var_name].values, 7466.4004, atol=0.01) assert isinstance(ds_1[var_name].data, da.core.Array) assert 'act.qc.datafilter' in ds_filtered[var_name].attrs['history'] + assert 'ancillary_variables' in ds_filtered[var_name].attrs.keys() ds_filtered = copy.deepcopy(ds) ds_filtered.qcfilter.datafilter(rm_assessments='Bad', variables=var_name, del_qc_var=True) - ds_2 = ds_filtered.mean() - assert np.isclose(ds_2[var_name].values, 99.15, atol=0.01) + ds_2 = ds_filtered.sum() + assert np.isclose(ds_2[var_name].values, 7466.40, atol=0.01) expected_var_names = sorted(list(set(data_var_names + qc_var_names) - {'qc_' + var_name})) assert sorted(list(ds_filtered.data_vars)) == expected_var_names ds_filtered = copy.deepcopy(ds) - ds_filtered.qcfilter.datafilter(rm_assessments='Bad', del_qc_var=True) + ds_filtered.qcfilter.datafilter(rm_assessments='Suspect', del_qc_var=True) + ds_2 = ds_filtered.sum() + assert np.isclose(ds_2[var_name].values, 80244.33, atol=0.01) assert sorted(list(ds_filtered.data_vars)) == data_var_names + assert 'ancillary_variables' not in ds_filtered[var_name].attrs.keys() + + ds_filtered = copy.deepcopy(ds) + ds_filtered.qcfilter.datafilter(rm_assessments=['Bad', 'Suspect']) + ds_2 = ds_filtered.sum() + assert np.isclose(ds_2[var_name].values, 7466.40, atol=0.01) + + ds_filtered = copy.deepcopy(ds) + ds_filtered.qcfilter.datafilter(rm_assessments=['Sponge', 'Bob']) + ds_2 = ds_filtered.sum() + assert np.isclose(ds_2[var_name].values, 104602.23, atol=0.01) + + ds_filtered = copy.deepcopy(ds) + ds_filtered.qcfilter.datafilter(rm_assessments=['Sponge', 'Bob', 'suspect'], variables=var_name) + ds_2 = ds_filtered.sum() + assert np.isclose(ds_2[var_name].values, 80244.33, atol=0.01) + assert np.isclose(ds_2['temp_mean'].values, np.sum(ds_filtered['temp_mean'].values), atol=0.01) ds.close() del ds From b18dd53df379f4ed91956bf8ec5572e3a3d7f128 Mon Sep 17 00:00:00 2001 From: Ken Kehoe Date: Wed, 28 Aug 2024 14:54:37 -0600 Subject: [PATCH 15/31] Adding more testing to qc_summary. --- tests/qc/test_qc_summary.py | 119 ++++++++++++++++++++++++++++++++++-- 1 file changed, 115 insertions(+), 4 deletions(-) diff --git a/tests/qc/test_qc_summary.py b/tests/qc/test_qc_summary.py index 22abc50e56..53f9e29af3 100644 --- a/tests/qc/test_qc_summary.py +++ b/tests/qc/test_qc_summary.py @@ -36,14 +36,13 @@ def test_qc_summary(): assert 'flag_masks' not in result[qc_var_name].attrs.keys() assert isinstance(result[qc_var_name].attrs['flag_values'], list) - assert np.sum(result[qc_var_name].values) == 610 + assert np.sum(result[qc_var_name].values) == 880 qc_ma = result.qcfilter.get_masked_data(var_name, rm_assessments='Suspect') - assert np.all(np.where(qc_ma.mask)[0] == np.arange(100, 170)) + assert np.sum(np.where(qc_ma.mask)) == 9415 qc_ma = result.qcfilter.get_masked_data(var_name, rm_assessments='Incorrect') - index = np.concatenate([index_1, index_2, index_3]) - assert np.all(np.where(qc_ma.mask)[0] == index) + assert np.sum(np.where(qc_ma.mask)) == 89415 att_names = [ 'fail_min', @@ -116,6 +115,118 @@ def test_qc_summary_multiple_assessment_names(): assert np.sum(np.where(qc_ma.mask)[0]) == 884575 +def test_qc_summary_unexpected_assessment_name(): + var_name = 'temp_mean' + ds = read_arm_netcdf(EXAMPLE_MET1, keep_variables=var_name) + + test_meanings = [ + 'Testing Bad', + 'Testing Boomer', + 'Testing Boomer Second', + 'Testing Incorrect', + 'Testing Indeterminate', + 'Testing Sooner', + 'Testing Suspect', + ] + test_assessments = [ + 'Bad', + 'Boomer', + 'boomer', + 'Incorrect', + 'Indeterminate', + 'Sooner', + 'Suspect', + ] + + test_index_sums = [4950, 39900, 39900, 34950, 44950, 54950, 64950] + + for ii, _ in enumerate(test_assessments): + ds.qcfilter.add_test( + var_name, + index=np.arange(ii * 100, ii * 100 + 100), + test_meaning=test_meanings[ii], + test_assessment=test_assessments[ii], + ) + + ds = ds.qcfilter.create_qc_summary(normalize_assessment=False) + + qc_var_name = ds.qcfilter.check_for_ancillary_qc(var_name, add_if_missing=False) + + # Make sure flag meanings are correct with new assessments. + assert sorted(ds[qc_var_name].attrs['flag_meanings']) == [ + 'Data Boomer', + 'Data Sooner', + 'Data incorrect use not recommended', + 'Data incorrect use not recommended', + 'Data suspect further analysis recommended', + 'Data suspect further analysis recommended', + 'Not failing quality control tests', + ] + assert sorted(ds[qc_var_name].attrs['flag_assessments']) == [ + 'Bad', + 'Boomer', + 'Incorrect', + 'Indeterminate', + 'Not failing', + 'Sooner', + 'Suspect', + ] + # Make sure the values and order of first 5 are as expected. The other non-standard + # assessments may be in different order with set operations. + assert ds[qc_var_name].attrs['flag_assessments'][:5] == [ + 'Not failing', + 'Suspect', + 'Indeterminate', + 'Incorrect', + 'Bad', + ] + + for assessment, index_sum in zip(test_assessments, test_index_sums): + qc_ma = ds.qcfilter.get_masked_data(var_name, rm_assessments=assessment) + assert np.sum(np.where(qc_ma.mask)[0]) == index_sum + + qc_ma = ds.qcfilter.get_masked_data(var_name, rm_assessments=['Bucky']) + assert np.sum(np.where(qc_ma.mask)[0]) == 0 + + qc_ma = ds.qcfilter.get_masked_data(var_name, rm_assessments=['Boomer', 'Sooner']) + assert np.sum(np.where(qc_ma.mask)[0]) == 94850 + + qc_ma = ds.qcfilter.get_masked_data( + var_name, + rm_assessments=['Boomer', 'Sooner', 'Indeterminate', 'Suspect', 'Bad', 'Incorrect'], + ) + assert np.sum(np.where(qc_ma.mask)[0]) == 244650 + + del ds + + +def test_qc_summary_scalar(): + # Test scalar variables. Currently not implemented so just check that we + # don't do anything. + var_names = ['alt', 'temp_mean'] + ds = read_arm_netcdf(EXAMPLE_MET1, keep_variables=var_names) + + test_meanings = ['Testing Incorrect', 'Testing Suspect'] + test_assessments = ['Incorrect', 'Suspect'] + + for var_name in var_names: + for ii, _ in enumerate(test_assessments): + ds.qcfilter.add_test( + var_name, + index=0, + test_meaning=test_meanings[ii], + test_assessment=test_assessments[ii], + ) + + with pytest.warns(UserWarning, match="Unable to process scalar variable"): + ds = ds.qcfilter.create_qc_summary(normalize_assessment=False) + + assert 'flag_masks' in ds[f'qc_{var_names[0]}'].attrs.keys() + assert 'flag_values' not in ds[f'qc_{var_names[0]}'].attrs.keys() + assert 'flag_masks' not in ds[f'qc_{var_names[1]}'].attrs.keys() + assert 'flag_values' in ds[f'qc_{var_names[1]}'].attrs.keys() + + @pytest.mark.big @pytest.mark.skipif('ARCHIVE_DATA' not in environ, reason="Running outside ADC system.") def test_qc_summary_big_data(): From f5119e6887d63b7953e3323a4743d794532ac614 Mon Sep 17 00:00:00 2001 From: Ken Kehoe Date: Wed, 28 Aug 2024 14:56:04 -0600 Subject: [PATCH 16/31] Updated to handle flag_assessments ouside the standard 4. Correctly skips scalar qc variable. --- act/qc/qc_summary.py | 55 ++++++++++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 23 deletions(-) diff --git a/act/qc/qc_summary.py b/act/qc/qc_summary.py index 7626c0ffd4..fee27d9e75 100644 --- a/act/qc/qc_summary.py +++ b/act/qc/qc_summary.py @@ -9,6 +9,8 @@ import datetime import copy +import xarray as xr +import warnings class QCSummary: @@ -53,18 +55,12 @@ def create_qc_summary( """ - standard_assessments = [ - 'Suspect', - 'Indeterminate', - 'Incorrect', - 'Bad', - ] - standard_meanings = [ - "Data suspect further analysis recommended", - "Data suspect further analysis recommended", - "Data incorrect use not recommended", - "Data incorrect use not recommended", - ] + standard_meanings = { + 'Suspect': "Data suspect further analysis recommended", + 'Indeterminate': "Data suspect further analysis recommended", + 'Incorrect': "Data incorrect use not recommended", + 'Bad': "Data incorrect use not recommended", + } if cleanup_qc: self._ds.clean.cleanup() @@ -81,11 +77,15 @@ def create_qc_summary( if qc_var_name is None: continue - added = True - - assessments = list(set(self._ds[qc_var_name].attrs['flag_assessments'])) + # Do not really know how to handle scalars yet. + if return_ds[qc_var_name].ndim == 0: + warnings.warn( + f'Unable to process scalar variable {var_name}. ' + 'Scalar variables currently not implemented.' + ) + continue - import xarray as xr + added = True result = xr.zeros_like(return_ds[qc_var_name]) for attr in ['flag_masks', 'flag_meanings', 'flag_assessments', 'flag_values']: @@ -105,22 +105,31 @@ def create_qc_summary( flag_value=True, ) - for ii, assessment in enumerate(standard_assessments): - if assessment not in assessments: - continue + flag_assessments = list(standard_meanings.keys()) + added_assessments = set(self._ds[qc_var_name].attrs['flag_assessments']) - set( + flag_assessments + ) + flag_assessments += list(added_assessments) + for ii, assessment in enumerate(flag_assessments): + try: + standard_meaning = standard_meanings[assessment.capitalize()] + except KeyError: + standard_meaning = f"Data {assessment}" qc_mask = self.get_masked_data( var_name, rm_assessments=assessment, return_mask_only=True ) - # Do not really know how to handle scalars yet. - if qc_mask.ndim == 0: - continue + # # Do not really know how to handle scalars yet. + # if return_ds[var_name].ndim == 0: + # warnings.warn(f'Unable to process scalar variable {var_name}. ' + # 'Scalar variables currently not implemented.') + # continue return_ds.qcfilter.add_test( var_name, index=qc_mask, - test_meaning=standard_meanings[ii], + test_meaning=standard_meaning, test_assessment=assessment, flag_value=True, ) From e06191ce9db8a5b115d340d2fb74cd946484746b Mon Sep 17 00:00:00 2001 From: Ken Kehoe Date: Wed, 28 Aug 2024 16:05:10 -0600 Subject: [PATCH 17/31] Catching warning with pytest to ensure the warning was issued --- tests/discovery/test_asos.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/discovery/test_asos.py b/tests/discovery/test_asos.py index e0f44842df..7bac2e8eb0 100644 --- a/tests/discovery/test_asos.py +++ b/tests/discovery/test_asos.py @@ -1,5 +1,5 @@ from datetime import datetime - +import pytest import numpy as np import act @@ -22,6 +22,9 @@ def test_get_region(): time_window = [datetime(2020, 2, 4, 2, 0), datetime(2020, 2, 12, 10, 0)] lat_window = (41.8781 - 0.5, 41.8781 + 0.5) lon_window = (-87.6298 - 0.5, -87.6298 + 0.5) - my_asoses = act.discovery.get_asos_data(time_window, lat_range=lat_window, lon_range=lon_window) + with pytest.warns(UserWarning, match="No data available at station"): + my_asoses = act.discovery.get_asos_data( + time_window, lat_range=lat_window, lon_range=lon_window + ) asos_keys = list(my_asoses.keys()) assert asos_keys == my_keys From 440f821d38be1ed2cd52cf701134a388a773a173 Mon Sep 17 00:00:00 2001 From: Ken Kehoe Date: Wed, 28 Aug 2024 16:05:29 -0600 Subject: [PATCH 18/31] Catching warning with pytest to ensure the warning was issued --- tests/io/test_ameriflux.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/io/test_ameriflux.py b/tests/io/test_ameriflux.py index 395983de39..bf2820a160 100644 --- a/tests/io/test_ameriflux.py +++ b/tests/io/test_ameriflux.py @@ -1,13 +1,15 @@ import act import glob import xarray as xr +import pytest def test_convert_to_ameriflux(): files = glob.glob(act.tests.sample_files.EXAMPLE_ECORSF_E39) ds_ecor = act.io.arm.read_arm_netcdf(files) - df = act.io.ameriflux.convert_to_ameriflux(ds_ecor) + with pytest.warns(UserWarning, match="mapping was not provided"): + df = act.io.ameriflux.convert_to_ameriflux(ds_ecor) assert 'FC' in df assert 'WS_MAX' in df @@ -16,7 +18,8 @@ def test_convert_to_ameriflux(): ds_sebs = act.io.arm.read_arm_netcdf(files) ds = xr.merge([ds_ecor, ds_sebs]) - df = act.io.ameriflux.convert_to_ameriflux(ds) + with pytest.warns(UserWarning, match="mapping was not provided"): + df = act.io.ameriflux.convert_to_ameriflux(ds) assert 'SWC_2_1_1' in df assert 'TS_3_1_1' in df @@ -26,7 +29,8 @@ def test_convert_to_ameriflux(): ds_stamp = act.io.arm.read_arm_netcdf(files) ds = xr.merge([ds_ecor, ds_sebs, ds_stamp], compat='override') - df = act.io.ameriflux.convert_to_ameriflux(ds) + with pytest.warns(UserWarning, match="mapping was not provided"): + df = act.io.ameriflux.convert_to_ameriflux(ds) assert 'SWC_6_10_1' in df assert 'G_2_1_1' in df From 01d4b0e7ec7437a0b75e459078c1bedad991cbbe Mon Sep 17 00:00:00 2001 From: Ken Kehoe Date: Wed, 28 Aug 2024 16:06:05 -0600 Subject: [PATCH 19/31] Catching warning with pytest to ensure the warning was issued --- tests/plotting/test_distributiondisplay.py | 23 +++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/tests/plotting/test_distributiondisplay.py b/tests/plotting/test_distributiondisplay.py index ad906ab443..eea1deb0cf 100644 --- a/tests/plotting/test_distributiondisplay.py +++ b/tests/plotting/test_distributiondisplay.py @@ -419,7 +419,12 @@ def test_plot_pie_chart(): ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_AOSACSM) fields = ['sulfate', 'ammonium', 'nitrate', 'chloride'] display = DistributionDisplay(ds) - display.plot_pie_chart(fields) + with pytest.warns(UserWarning, match="contains negatives values, consider using a threshold."): + with pytest.warns( + UserWarning, + match="No time parameter used, calculating a mean for each field for the whole dataset.", + ): + display.plot_pie_chart(fields) ds.close() try: @@ -435,12 +440,16 @@ def test_plot_pie_chart_kwargs(): threshold = 0.0 fill_value = 0.0 display = DistributionDisplay(ds) - display.plot_pie_chart( - fields, - threshold=threshold, - fill_value=fill_value, - colors=['olivedrab', 'rosybrown', 'gray', 'saddlebrown'], - ) + with pytest.warns( + UserWarning, + match="No time parameter used, calculating a mean for each field for the whole dataset.", + ): + display.plot_pie_chart( + fields, + threshold=threshold, + fill_value=fill_value, + colors=['olivedrab', 'rosybrown', 'gray', 'saddlebrown'], + ) ds.close() try: From 778227c2d9e5cec0e07bc39a9d622c3ca9a147e9 Mon Sep 17 00:00:00 2001 From: Ken Kehoe Date: Wed, 28 Aug 2024 16:06:29 -0600 Subject: [PATCH 20/31] Catching warning with pytest to ensure the warning was issued --- tests/plotting/test_skewtdisplay.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/plotting/test_skewtdisplay.py b/tests/plotting/test_skewtdisplay.py index 21a45281a9..8047bba58b 100644 --- a/tests/plotting/test_skewtdisplay.py +++ b/tests/plotting/test_skewtdisplay.py @@ -67,7 +67,8 @@ def test_multi_skewt_plot(): def test_enhanced_skewt_plot(): ds = act.io.arm.read_arm_netcdf(sample_files.EXAMPLE_SONDE1) display = act.plotting.SkewTDisplay(ds) - display.plot_enhanced_skewt(color_field='alt', component_range=85) + with pytest.warns(): + display.plot_enhanced_skewt(color_field='alt', component_range=85) ds.close() return display.fig From b8e3c4d23d1a5e4ab592377963f49db3e52a1362 Mon Sep 17 00:00:00 2001 From: Ken Kehoe Date: Wed, 28 Aug 2024 16:06:53 -0600 Subject: [PATCH 21/31] Catching warning with pytest to ensure the warning was issued --- tests/plotting/test_timeseriesdisplay.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/plotting/test_timeseriesdisplay.py b/tests/plotting/test_timeseriesdisplay.py index d1461b3779..508331f421 100644 --- a/tests/plotting/test_timeseriesdisplay.py +++ b/tests/plotting/test_timeseriesdisplay.py @@ -462,7 +462,8 @@ def test_plot_barbs_from_u_v4(): fake_ds = xr.Dataset( {'xbins': xbins, 'ybins': ybins, 'ydata': y_array, 'xdata': x_array, 'pres': pres} ) - BarbDisplay = TimeSeriesDisplay(fake_ds) + with pytest.warns(UserWarning, match="Could not discern datastreamname and dict or tuple"): + BarbDisplay = TimeSeriesDisplay(fake_ds) BarbDisplay.plot_barbs_from_u_v( 'xdata', 'ydata', None, set_title='test', use_var_for_y='pres', cmap='jet' ) @@ -488,7 +489,8 @@ def test_plot_barbs_from_u_v5(): fake_ds = xr.Dataset( {'xbins': xbins, 'ybins': ybins, 'ydata': y_array, 'xdata': x_array, 'pres': pres} ) - BarbDisplay = TimeSeriesDisplay(fake_ds) + with pytest.warns(UserWarning, match="Could not discern datastreamname and dict or tuple"): + BarbDisplay = TimeSeriesDisplay(fake_ds) BarbDisplay.plot_barbs_from_u_v( 'xdata', 'ydata', From 4b2f0eb010f5d27d4b9c4f7b3ffed6574bbc377b Mon Sep 17 00:00:00 2001 From: Ken Kehoe Date: Wed, 28 Aug 2024 16:07:19 -0600 Subject: [PATCH 22/31] Catching warning with pytest to ensure the warning was issued --- tests/plotting/test_windrosedisplay.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/tests/plotting/test_windrosedisplay.py b/tests/plotting/test_windrosedisplay.py index 7566e9d8f0..19934b052c 100644 --- a/tests/plotting/test_windrosedisplay.py +++ b/tests/plotting/test_windrosedisplay.py @@ -138,15 +138,16 @@ def test_groupby_plot(): # Create Plot Display display = WindRoseDisplay(ds, figsize=(15, 15), subplot_shape=(3, 3)) groupby = display.group_by('day') - groupby.plot_group( - 'plot_data', - None, - dir_field='wdir_vec_mean', - spd_field='wspd_vec_mean', - data_field='temp_mean', - num_dirs=12, - plot_type='line', - ) + with pytest.warns(RuntimeWarning): + groupby.plot_group( + 'plot_data', + None, + dir_field='wdir_vec_mean', + spd_field='wspd_vec_mean', + data_field='temp_mean', + num_dirs=12, + plot_type='line', + ) # Set theta tick markers for each axis inside display to be inside the polar axes for i in range(3): From c0f2423a8d14934c59f538bd48329b6424a06f98 Mon Sep 17 00:00:00 2001 From: Ken Kehoe Date: Wed, 28 Aug 2024 16:07:57 -0600 Subject: [PATCH 23/31] Catching warning with pytest to ensure the warning was issued --- tests/plotting/test_xsectiondisplay.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/plotting/test_xsectiondisplay.py b/tests/plotting/test_xsectiondisplay.py index 732165cd2e..8dede9047a 100644 --- a/tests/plotting/test_xsectiondisplay.py +++ b/tests/plotting/test_xsectiondisplay.py @@ -57,7 +57,11 @@ def test_xsection_plot_map(): sample_files.EXAMPLE_VISST, combine='nested', concat_dim='time' ) try: - xsection = XSectionDisplay(radar_ds, figsize=(15, 8)) + with pytest.warns( + UserWarning, + match="Could not discern datastreamname and dict or tuple were not provided. Using defaultname of act_datastream!", + ): + xsection = XSectionDisplay(radar_ds, figsize=(15, 8)) xsection.plot_xsection_map( None, 'ir_temperature', From 99e852449cda13df94874730491e0881d64cdd8c Mon Sep 17 00:00:00 2001 From: Zach Sherman <19153455+zssherman@users.noreply.github.com> Date: Fri, 30 Aug 2024 10:39:40 -0500 Subject: [PATCH 24/31] DOC: Fix spelling --- act/qc/arm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/act/qc/arm.py b/act/qc/arm.py index f070f55cff..b652e9ddd7 100644 --- a/act/qc/arm.py +++ b/act/qc/arm.py @@ -111,7 +111,7 @@ def add_dqr_to_qc( time = ds['time'].values # If the time is not a datetime64 because the read routine was not asked to - # convert CF variables, convert the time varible for this routine only. + # convert CF variables, convert the time variable for this routine only. if not np.issubdtype(time.dtype, np.datetime64): units_strings = [ 'seconds since ', From 4bba281933a7c9848e020b70bd400d76b6887c82 Mon Sep 17 00:00:00 2001 From: Zach Sherman <19153455+zssherman@users.noreply.github.com> Date: Fri, 30 Aug 2024 10:39:51 -0500 Subject: [PATCH 25/31] DOC: Fix spelling --- act/qc/arm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/act/qc/arm.py b/act/qc/arm.py index b652e9ddd7..f8a1f5a737 100644 --- a/act/qc/arm.py +++ b/act/qc/arm.py @@ -71,7 +71,7 @@ def add_dqr_to_qc( Does not apply DQRs to location variables. This can be useful in the event the submitter has erroneously selected all variables. create_missing_qc_variables : boolean - If a quality control varible for the data varialbe does not exist, + If a quality control variable for the data variable does not exist, create the quality control varible and apply DQR. Returns From 2d7dc76d9167a8ff2d29498b721d0ed130bbdf6b Mon Sep 17 00:00:00 2001 From: Zach Sherman <19153455+zssherman@users.noreply.github.com> Date: Fri, 30 Aug 2024 10:39:58 -0500 Subject: [PATCH 26/31] DOC: Fix spelling --- act/qc/arm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/act/qc/arm.py b/act/qc/arm.py index f8a1f5a737..a0c99f7095 100644 --- a/act/qc/arm.py +++ b/act/qc/arm.py @@ -213,7 +213,7 @@ def add_dqr_to_qc( if skip_location_vars and var_name in loc_vars: continue - # Do not process time varibles + # Do not process time variables if var_name in ['time', 'time_offset', 'time_bounds']: continue From 6ccc537ea68bc198934571c37e261b4ff621dc9a Mon Sep 17 00:00:00 2001 From: Zach Sherman <19153455+zssherman@users.noreply.github.com> Date: Fri, 30 Aug 2024 10:40:20 -0500 Subject: [PATCH 27/31] MTN: Remove old code --- act/qc/clean.py | 1 - 1 file changed, 1 deletion(-) diff --git a/act/qc/clean.py b/act/qc/clean.py index c2797b5c6a..2b951e4a3e 100644 --- a/act/qc/clean.py +++ b/act/qc/clean.py @@ -792,7 +792,6 @@ def normalize_assessment( self, variables=None, exclude_variables=None, - # qc_lookup={'Incorrect': 'Bad', 'Suspect': 'Indeterminate'}, qc_lookup={'Bad': 'Incorrect', 'Indeterminate': 'Suspect'}, ): """ From 05541fcb6f97f9f2000066616f75db4f9925cc86 Mon Sep 17 00:00:00 2001 From: Zach Sherman <19153455+zssherman@users.noreply.github.com> Date: Fri, 30 Aug 2024 10:40:29 -0500 Subject: [PATCH 28/31] MTN: Remove old code --- act/qc/qc_summary.py | 1 - 1 file changed, 1 deletion(-) diff --git a/act/qc/qc_summary.py b/act/qc/qc_summary.py index fee27d9e75..b8a128d8ba 100644 --- a/act/qc/qc_summary.py +++ b/act/qc/qc_summary.py @@ -121,7 +121,6 @@ def create_qc_summary( ) # # Do not really know how to handle scalars yet. - # if return_ds[var_name].ndim == 0: # warnings.warn(f'Unable to process scalar variable {var_name}. ' # 'Scalar variables currently not implemented.') # continue From 4681905d14527e8058f87b7d2ea695d8c0e58856 Mon Sep 17 00:00:00 2001 From: Zach Sherman <19153455+zssherman@users.noreply.github.com> Date: Fri, 30 Aug 2024 10:40:39 -0500 Subject: [PATCH 29/31] MNT: Remove old code --- act/qc/qc_summary.py | 1 - 1 file changed, 1 deletion(-) diff --git a/act/qc/qc_summary.py b/act/qc/qc_summary.py index b8a128d8ba..82c20f7da6 100644 --- a/act/qc/qc_summary.py +++ b/act/qc/qc_summary.py @@ -121,7 +121,6 @@ def create_qc_summary( ) # # Do not really know how to handle scalars yet. - # warnings.warn(f'Unable to process scalar variable {var_name}. ' # 'Scalar variables currently not implemented.') # continue From ac5967c4f72ffac62adf7bf97ce1db959294a303 Mon Sep 17 00:00:00 2001 From: Zach Sherman <19153455+zssherman@users.noreply.github.com> Date: Fri, 30 Aug 2024 10:40:48 -0500 Subject: [PATCH 30/31] MNT: Remove old code --- act/qc/qc_summary.py | 1 - 1 file changed, 1 deletion(-) diff --git a/act/qc/qc_summary.py b/act/qc/qc_summary.py index 82c20f7da6..35ecf8c396 100644 --- a/act/qc/qc_summary.py +++ b/act/qc/qc_summary.py @@ -121,7 +121,6 @@ def create_qc_summary( ) # # Do not really know how to handle scalars yet. - # 'Scalar variables currently not implemented.') # continue return_ds.qcfilter.add_test( From 8bdff06712835a898dcf2ea4c7fb726023e44dd6 Mon Sep 17 00:00:00 2001 From: Zach Sherman <19153455+zssherman@users.noreply.github.com> Date: Fri, 30 Aug 2024 10:40:56 -0500 Subject: [PATCH 31/31] MNT: Remove old code --- act/qc/qc_summary.py | 1 - 1 file changed, 1 deletion(-) diff --git a/act/qc/qc_summary.py b/act/qc/qc_summary.py index 35ecf8c396..d96375abff 100644 --- a/act/qc/qc_summary.py +++ b/act/qc/qc_summary.py @@ -121,7 +121,6 @@ def create_qc_summary( ) # # Do not really know how to handle scalars yet. - # continue return_ds.qcfilter.add_test( var_name,