From 76624551c644eadadf7ced89c107faf62c6cc3a5 Mon Sep 17 00:00:00 2001 From: George McCabe <23407799+georgemccabe@users.noreply.github.com> Date: Thu, 26 Sep 2024 10:49:36 -0600 Subject: [PATCH] Feature #2651 SeriesAnalysis -aggr argument and new use case (#2701) * exclude build directory from PyCharm project * refactor to satisfy SonarQube complaints * Per #2651, add support for setting -aggr argument with file path. Refactor functions to resolve SonarQube complaints. Enhance unit tests to test multiple init times and -aggr argument * add new config variables to basic use case * change settings to match MET unit test * added new config variables to set -aggr argument in SeriesAnalysis wrapper to documentation * Per #2651, add new use case to demonstrate using the -aggr argument to SeriesAnalysis * turn on new use case to test it in GHA * remove blank line * turn off use case to prepare for PR --- .github/parm/use_case_groups.json | 5 + .idea/METplus.iml | 4 +- docs/Users_Guide/glossary.rst | 13 + docs/Users_Guide/wrappers.rst | 2 + .../SeriesAnalysis/SeriesAnalysis_aggr.py | 115 +++++ .../series_analysis/test_series_analysis.py | 54 ++- internal/tests/use_cases/all_use_cases.txt | 1 + metplus/wrappers/series_analysis_wrapper.py | 409 +++++++++--------- .../SeriesAnalysis/SeriesAnalysis.conf | 3 + .../SeriesAnalysis/SeriesAnalysis_aggr.conf | 215 +++++++++ 10 files changed, 587 insertions(+), 234 deletions(-) create mode 100644 docs/use_cases/met_tool_wrapper/SeriesAnalysis/SeriesAnalysis_aggr.py create mode 100644 parm/use_cases/met_tool_wrapper/SeriesAnalysis/SeriesAnalysis_aggr.conf diff --git a/.github/parm/use_case_groups.json b/.github/parm/use_case_groups.json index e53d706aad..05cc0b6196 100644 --- a/.github/parm/use_case_groups.json +++ b/.github/parm/use_case_groups.json @@ -9,6 +9,11 @@ "index_list": "30-58", "run": false }, + { + "category": "met_tool_wrapper", + "index_list": "65", + "run": false + }, { "category": "air_quality_and_comp", "index_list": "0", diff --git a/.idea/METplus.iml b/.idea/METplus.iml index 5fdd65ba2a..54d4a167a4 100644 --- a/.idea/METplus.iml +++ b/.idea/METplus.iml @@ -1,7 +1,9 @@ - + + + diff --git a/docs/Users_Guide/glossary.rst b/docs/Users_Guide/glossary.rst index 9e029bf4be..6bc690bf76 100644 --- a/docs/Users_Guide/glossary.rst +++ b/docs/Users_Guide/glossary.rst @@ -12978,3 +12978,16 @@ METplus Configuration Glossary See: :term:`_CLIMO_STDEV_VAR_OPTIONS` | *Used by:* SeriesAnalysis + + SERIES_ANALYSIS_AGGR_INPUT_TEMPLATE + Template used to specify the file path to pass to SeriesAnalysis using the + -aggr command line argument. This file is the output NetCDF file from a + previous SeriesAnalysis run. + + | *Used by:* SeriesAnalysis + + SERIES_ANALYSIS_AGGR_INPUT_DIR + Directory containing SeriesAnalysis output to be read by SeriesAnalysis + using the -aggr command line argument. + + | *Used by:* SeriesAnalysis diff --git a/docs/Users_Guide/wrappers.rst b/docs/Users_Guide/wrappers.rst index 3ebe53b17a..c4da8506ad 100644 --- a/docs/Users_Guide/wrappers.rst +++ b/docs/Users_Guide/wrappers.rst @@ -7941,6 +7941,7 @@ METplus Configuration | :term:`OBS_SERIES_ANALYSIS_INPUT_DIR` | :term:`BOTH_SERIES_ANALYSIS_INPUT_DIR` | :term:`SERIES_ANALYSIS_TC_STAT_INPUT_DIR` +| :term:`SERIES_ANALYSIS_AGGR_INPUT_DIR` | :term:`SERIES_ANALYSIS_OUTPUT_DIR` | :term:`FCST_SERIES_ANALYSIS_INPUT_TEMPLATE` | :term:`OBS_SERIES_ANALYSIS_INPUT_TEMPLATE` @@ -7949,6 +7950,7 @@ METplus Configuration | :term:`OBS_SERIES_ANALYSIS_INPUT_FILE_LIST` | :term:`BOTH_SERIES_ANALYSIS_INPUT_FILE_LIST` | :term:`SERIES_ANALYSIS_TC_STAT_INPUT_TEMPLATE` +| :term:`SERIES_ANALYSIS_AGGR_INPUT_TEMPLATE` | :term:`SERIES_ANALYSIS_OUTPUT_TEMPLATE` | :term:`SERIES_ANALYSIS_CLIMO_MEAN_FILE_NAME` | :term:`SERIES_ANALYSIS_CLIMO_MEAN_VAR_NAME` diff --git a/docs/use_cases/met_tool_wrapper/SeriesAnalysis/SeriesAnalysis_aggr.py b/docs/use_cases/met_tool_wrapper/SeriesAnalysis/SeriesAnalysis_aggr.py new file mode 100644 index 0000000000..2145858909 --- /dev/null +++ b/docs/use_cases/met_tool_wrapper/SeriesAnalysis/SeriesAnalysis_aggr.py @@ -0,0 +1,115 @@ +""" +SeriesAnalysis: Aggregate Output Use Case +========================================= + +met_tool_wrapper/SeriesAnalysis/SeriesAnalysis_aggr.conf + +""" +############################################################################## +# Scientific Objective +# -------------------- +# +# Read in output from a previous SeriesAnalysis run into SeriesAnalysis to +# aggregate the results. + +############################################################################## +# Datasets +# -------- +# +# | **Forecast:** GFS 6 hour precipitation accumulation +# | **Observation:** STAGE4 6 hour precipitation accumulation +# +# | **Location:** All of the input data required for this use case can be found in the met_test sample data tarball. Click here to the METplus releases page and download sample data for the appropriate release: https://github.com/dtcenter/METplus/releases +# | This tarball should be unpacked into the directory that you will set the value of INPUT_BASE. See `Running METplus`_ section for more information. +# | + +############################################################################## +# METplus Components +# ------------------ +# +# This use case utilizes the METplus SeriesAnalysis wrapper to search for +# files that are valid at a given run time and generates a command to run +# the MET tool series_analysis if all required files are found. + +############################################################################## +# METplus Workflow +# ---------------- +# +# SeriesAnalysis is the only tool called in this example. It processes the following +# run times: +# +# | **Init:** 2012-04-09_0Z +# | **Forecast lead:** 30, 36, and 42 hour +# | + +############################################################################## +# METplus Configuration +# --------------------- +# +# METplus first loads all of the configuration files found in parm/metplus_config, +# then it loads any configuration files passed to METplus via the command line, +# e.g. parm/use_cases/met_tool_wrapper/SeriesAnalysis/SeriesAnalysis_aggr.conf +# +# .. highlight:: bash +# .. literalinclude:: ../../../../parm/use_cases/met_tool_wrapper/SeriesAnalysis/SeriesAnalysis_aggr.conf + +############################################################################## +# MET Configuration +# --------------------- +# +# METplus sets environment variables based on user settings in the METplus configuration file. +# See :ref:`How METplus controls MET config file settings` for more details. +# +# **YOU SHOULD NOT SET ANY OF THESE ENVIRONMENT VARIABLES YOURSELF! THEY WILL BE OVERWRITTEN BY METPLUS WHEN IT CALLS THE MET TOOLS!** +# +# If there is a setting in the MET configuration file that is currently not supported by METplus you'd like to control, please refer to: +# :ref:`Overriding Unsupported MET config file settings` +# +# .. note:: See the :ref:`SeriesAnalysis MET Configuration` section of the User's Guide for more information on the environment variables used in the file below: +# +# .. highlight:: bash +# .. literalinclude:: ../../../../parm/met_config/SeriesAnalysisConfig_wrapped + +############################################################################## +# Running METplus +# --------------- +# +# Pass the use case configuration file to the run_metplus.py script +# along with any user-specific system configuration files if desired:: +# +# run_metplus.py /path/to/METplus/parm/use_cases/met_tool_wrapper/SeriesAnalysis/SeriesAnalysis_aggr.conf /path/to/user_system.conf +# +# See :ref:`running-metplus` for more information. + + +############################################################################## +# Expected Output +# --------------- +# +# A successful run will output the following both to the screen and to the logfile:: +# +# INFO: METplus has successfully finished running. +# +# Refer to the value set for **OUTPUT_BASE** to find where the output data was generated. +# Output for this use case will be found in series_analysis (relative to **OUTPUT_BASE**) +# and will contain the following file: +# +# * series_analysis_AGGR_CMD_LINE_APCP_06_2012040900_to_2012041000.nc + +############################################################################## +# Keywords +# -------- +# +# .. note:: +# +# * SeriesAnalysisUseCase +# * DiagnosticsUseCase +# * RuntimeFreqUseCase +# * GRIBFileUseCase +# +# Navigate to the :ref:`quick-search` page to discover other similar use cases. +# +# +# +# sphinx_gallery_thumbnail_path = '_static/met_tool_wrapper-SeriesAnalysis.png' +# diff --git a/internal/tests/pytests/wrappers/series_analysis/test_series_analysis.py b/internal/tests/pytests/wrappers/series_analysis/test_series_analysis.py index acbffecb95..0f10114c25 100644 --- a/internal/tests/pytests/wrappers/series_analysis/test_series_analysis.py +++ b/internal/tests/pytests/wrappers/series_analysis/test_series_analysis.py @@ -18,11 +18,14 @@ obs_fmt = (f'field = [{{ name="{obs_name}"; ' f'level="{obs_level_no_quotes}"; }}];') time_fmt = '%Y%m%d%H' -#run_times = ['2005080700', '2005080712'] -run_times = ['2005080700',] +run_times = ['2005080700', '2005080712'] stat_list = 'TOTAL,RMSE,FBAR,OBAR' stat_list_quotes = '", "'.join(stat_list.split(',')) stat_list_fmt = f'output_stats = {{cnt = ["{stat_list_quotes}"];}}' +aggr_dir = '/some/fake/path/for' +aggr_rel = 'aggr_file_.nc' +aggr_template = 'aggr_file_{init?fmt=%Y%m%d%H}.nc' +both_file_list = '/some/fake/path/for/both/file_list.txt' def get_input_dirs(config): @@ -613,6 +616,10 @@ def test_series_analysis_missing_inputs(metplus_config, get_test_data_dir, 'time_interp_method = NEAREST;' 'match_month = TRUE;day_interval = 30;' 'hour_interval = 12;}')}), + ({'SERIES_ANALYSIS_AGGR_INPUT_TEMPLATE': os.path.join(aggr_dir, aggr_template), }, + {}), + ({'SERIES_ANALYSIS_AGGR_INPUT_DIR': aggr_dir, 'SERIES_ANALYSIS_AGGR_INPUT_TEMPLATE': aggr_template,}, + {}), ] ) @pytest.mark.wrapper_a @@ -638,31 +645,34 @@ def test_series_analysis_single_field(metplus_config, config_overrides, config_file = wrapper.c_dict.get('CONFIG_FILE') out_dir = wrapper.c_dict.get('OUTPUT_DIR') prefix = 'series_analysis_files_' - suffix = '_init_20050807000000_valid_ALL_lead_ALL.txt' + suffix = '_init_0000_valid_ALL_lead_ALL.txt' fcst_file = f'{prefix}fcst{suffix}' obs_file = f'{prefix}obs{suffix}' - + + extra_args = ' ' + if 'SERIES_ANALYSIS_AGGR_INPUT_TEMPLATE' in config_overrides: + extra_args += f'-aggr {os.path.join(aggr_dir, aggr_rel)} ' + if is_both: - expected_cmds = [(f"{app_path} " - f"-both {out_dir}/{fcst_file} " - f"-out {out_dir}/2005080700 " - f"-config {config_file} {verbosity}"), - ] + file_args = f"-both {out_dir}/{fcst_file}" else: - expected_cmds = [(f"{app_path} " - f"-fcst {out_dir}/{fcst_file} " - f"-obs {out_dir}/{obs_file} " - f"-out {out_dir}/2005080700 " - f"-config {config_file} {verbosity}"), - ] + file_args = f"-fcst {out_dir}/{fcst_file} -obs {out_dir}/{obs_file}" - all_cmds = wrapper.run_all_times() + expected_cmds = [] + for run_time in run_times: + cmd = (f"{app_path} {file_args} -out {out_dir}/{extra_args}" + f"-config {config_file} {verbosity}") + expected_cmds.append(cmd.replace('', run_time)) + all_cmds = wrapper.run_all_times() expected_len = len(expected_cmds) + compare_cmds = all_cmds if 'SERIES_ANALYSIS_GENERATE_PLOTS' in config_overrides: - expected_len += 8 + expected_len += 8 * len(expected_cmds) + compare_cmds = all_cmds[0::9][0:len(expected_cmds)] if 'SERIES_ANALYSIS_GENERATE_ANIMATIONS' in config_overrides: expected_len += 4 + assert len(all_cmds) == expected_len special_values = { @@ -672,7 +682,7 @@ def test_series_analysis_single_field(metplus_config, config_overrides, if 'METPLUS_OUTPUT_STATS_DICT' not in env_var_values: special_values['METPLUS_OUTPUT_STATS_DICT'] = stat_list_fmt # only compare first command since the rest are not series_analysis - compare_command_and_env_vars(all_cmds[0:1], expected_cmds, env_var_values, + compare_command_and_env_vars(compare_cmds, expected_cmds, env_var_values, wrapper, special_values) @@ -1204,9 +1214,9 @@ def test_get_netcdf_min_max(tmp_path_factory, wrapper = series_analysis_wrapper(metplus_config) - min, max = wrapper._get_netcdf_min_max(filepath, variable_name) - assert min == expected_min - assert max == expected_max + min_val, max_val = wrapper._get_netcdf_min_max(filepath, variable_name) + assert min_val == expected_min + assert max_val == expected_max @pytest.mark.wrapper_a @@ -1237,7 +1247,7 @@ def test_run_once_per_lead(metplus_config): assert wrapper.isOK assert actual is True - # lead_hours = None + # lead_hours None with mock.patch.object(saw, 'ti_get_hours_from_lead', return_value=None): actual = wrapper.run_once_per_lead(None) assert actual is True diff --git a/internal/tests/use_cases/all_use_cases.txt b/internal/tests/use_cases/all_use_cases.txt index 321bbd130f..1522963abe 100644 --- a/internal/tests/use_cases/all_use_cases.txt +++ b/internal/tests/use_cases/all_use_cases.txt @@ -64,6 +64,7 @@ Category: met_tool_wrapper 62::TCDiag:: met_tool_wrapper/TCDiag/TCDiag.conf 63::WaveletStat:: met_tool_wrapper/WaveletStat/WaveletStat.conf 64::MADIS2NC:: met_tool_wrapper/MADIS2NC/MADIS2NC.conf +65::SeriesAnalysis_aggr:: met_tool_wrapper/SeriesAnalysis/SeriesAnalysis_aggr.conf::netcdf4_env Category: air_quality_and_comp 0::EnsembleStat_fcstICAP_obsMODIS_aod::model_applications/air_quality_and_comp/EnsembleStat_fcstICAP_obsMODIS_aod.conf diff --git a/metplus/wrappers/series_analysis_wrapper.py b/metplus/wrappers/series_analysis_wrapper.py index ac14097840..cea1a44b4a 100755 --- a/metplus/wrappers/series_analysis_wrapper.py +++ b/metplus/wrappers/series_analysis_wrapper.py @@ -144,29 +144,7 @@ def create_c_dict(self): extra_args={'remove_quotes': True}) # handle all output_stats dictionary values - output_stats_dict = {} - for key in self.OUTPUT_STATS: - nicknames = [ - f'SERIES_ANALYSIS_OUTPUT_STATS_{key.upper()}', - f'SERIES_ANALYSIS_{key.upper()}_LIST', - f'SERIES_ANALYSIS_{key.upper()}' - ] - # add legacy support for STAT_LIST for cnt - if key == 'cnt': - nicknames.append('SERIES_ANALYSIS_STAT_LIST') - # read cnt stat list to get stats to loop over for plotting - self.add_met_config(name='cnt', - data_type='list', - env_var_name='STAT_LIST', - metplus_configs=nicknames) - c_dict['STAT_LIST'] = getlist( - self.get_env_var_value('METPLUS_STAT_LIST') - ) - - value = ('list', None, None, nicknames) - output_stats_dict[key] = value - - self.add_met_config_dict('output_stats', output_stats_dict) + self._handle_output_stats_dict(c_dict) self.handle_mask(single_value=True) @@ -180,128 +158,19 @@ def create_c_dict(self): data_types=('FCST', 'OBS'), app_name=self.app_name) - # get input dir, template, and datatype for FCST, OBS, and BOTH - for data_type in ('FCST', 'OBS', 'BOTH'): - - # check if {data_type}_{app}_FILE_LIST is set - c_dict[f'{data_type}_INPUT_FILE_LIST'] = ( - self.config.getraw( - 'config', - f'{data_type}_SERIES_ANALYSIS_INPUT_FILE_LIST' - ) - ) - - c_dict[f'{data_type}_INPUT_DIR'] = ( - self.config.getdir(f'{data_type}_SERIES_ANALYSIS_INPUT_DIR', '') - ) - c_dict[f'{data_type}_INPUT_TEMPLATE'] = ( - self.config.getraw('config', - f'{data_type}_SERIES_ANALYSIS_INPUT_TEMPLATE', - '') - ) - - c_dict[f'{data_type}_INPUT_DATATYPE'] = ( - self.config.getstr('config', - f'{data_type}_SERIES_ANALYSIS_INPUT_DATATYPE', - '') - ) - - # read and set file type env var for FCST and OBS - if data_type == 'BOTH': - continue - - self.add_met_config( - name='file_type', - data_type='string', - env_var_name=f'{data_type}_FILE_TYPE', - metplus_configs=[f'{data_type}_SERIES_ANALYSIS_FILE_TYPE', - f'SERIES_ANALYSIS_{data_type}_FILE_TYPE', - f'{data_type}_FILE_TYPE', - f'{data_type}_SERIES_ANALYSIS_INPUT_DATATYPE', - 'SERIES_ANALYSIS_FILE_TYPE'], - extra_args={'remove_quotes': True, - 'uppercase': True}) - - self.add_met_config( - name='cat_thresh', - data_type='list', - env_var_name=f'METPLUS_{data_type}_CAT_THRESH', - metplus_configs=[f'{data_type}_SERIES_ANALYSIS_CAT_THRESH', - f'SERIES_ANALYSIS_{data_type}_CAT_THRESH', - f'{data_type}_CAT_THRESH'], - extra_args={'remove_quotes': True} - ) - - c_dict['USING_BOTH'] = (c_dict['BOTH_INPUT_TEMPLATE'] or - c_dict['BOTH_INPUT_FILE_LIST']) + self.get_input_templates(c_dict, { + 'FCST': {'prefix': 'FCST_SERIES_ANALYSIS', 'required': False}, + 'OBS': {'prefix': 'OBS_SERIES_ANALYSIS', 'required': False}, + 'BOTH': {'prefix': 'BOTH_SERIES_ANALYSIS', 'required': False}, + 'TC_STAT': {'prefix': 'SERIES_ANALYSIS_TC_STAT', 'required': False}, + 'AGGR': {'prefix': 'SERIES_ANALYSIS_AGGR', 'required': False}, + }) - if c_dict['USING_BOTH']: - - # check if using explicit file list for BOTH - if c_dict['BOTH_INPUT_FILE_LIST']: - c_dict['EXPLICIT_FILE_LIST'] = True - else: - # set *_WINDOW_* variables for BOTH - # used in CommandBuilder.find_data function) - self.handle_file_window_variables(c_dict, data_types=['BOTH']) - - prob_thresh = self.config.getraw( - 'config', - 'BOTH_SERIES_ANALYSIS_PROB_THRESH' - ) - c_dict['FCST_PROB_THRESH'] = prob_thresh - c_dict['OBS_PROB_THRESH'] = prob_thresh + self._handle_fcst_obs_or_both_c_dict(c_dict) - # if BOTH is not set, both FCST or OBS must be set - else: - fcst_input_list = c_dict['FCST_INPUT_FILE_LIST'] - obs_input_list = c_dict['OBS_INPUT_FILE_LIST'] - if fcst_input_list and obs_input_list: - c_dict['EXPLICIT_FILE_LIST'] = True - elif not fcst_input_list and not obs_input_list: - if (not c_dict['FCST_INPUT_TEMPLATE'] or - not c_dict['OBS_INPUT_TEMPLATE']): - self.log_error( - "Must either set " - "BOTH_SERIES_ANALYSIS_INPUT_TEMPLATE or both " - "FCST_SERIES_ANALYSIS_INPUT_TEMPLATE and " - "OBS_SERIES_ANALYSIS_INPUT_TEMPLATE to run " - "SeriesAnalysis wrapper." - ) - - # set *_WINDOW_* variables for FCST and OBS - self.handle_file_window_variables(c_dict) - # if fcst input list or obs input list are not set - else: - self.log_error('Cannot set ' - 'FCST_SERIES_ANALYSIS_INPUT_FILE_LIST ' - 'without OBS_SERIES_ANALYSIS_INPUT_FILE_LIST ' - 'and vice versa') - - c_dict['FCST_PROB_THRESH'] = ( - self.config.getraw('config', - 'FCST_SERIES_ANALYSIS_PROB_THRESH') - ) - - c_dict['OBS_PROB_THRESH'] = ( - self.config.getraw('config', - 'OBS_SERIES_ANALYSIS_PROB_THRESH') - ) - - c_dict['TC_STAT_INPUT_DIR'] = ( - self.config.getdir('SERIES_ANALYSIS_TC_STAT_INPUT_DIR', '') - ) - - c_dict['TC_STAT_INPUT_TEMPLATE'] = ( - self.config.getraw('config', - 'SERIES_ANALYSIS_TC_STAT_INPUT_TEMPLATE') - ) - - c_dict['OUTPUT_DIR'] = self.config.getdir('SERIES_ANALYSIS_OUTPUT_DIR', - '') + c_dict['OUTPUT_DIR'] = self.config.getdir('SERIES_ANALYSIS_OUTPUT_DIR', '') c_dict['OUTPUT_TEMPLATE'] = ( - self.config.getraw('config', - 'SERIES_ANALYSIS_OUTPUT_TEMPLATE') + self.config.getraw('config', 'SERIES_ANALYSIS_OUTPUT_TEMPLATE') ) if not c_dict['OUTPUT_DIR']: self.log_error("Must set SERIES_ANALYSIS_OUTPUT_DIR to run.") @@ -370,6 +239,125 @@ def create_c_dict(self): return c_dict + def _handle_output_stats_dict(self, c_dict): + output_stats_dict = {} + for key in self.OUTPUT_STATS: + nicknames = [ + f'SERIES_ANALYSIS_OUTPUT_STATS_{key.upper()}', + f'SERIES_ANALYSIS_{key.upper()}_LIST', + f'SERIES_ANALYSIS_{key.upper()}' + ] + # add legacy support for STAT_LIST for cnt + if key == 'cnt': + nicknames.append('SERIES_ANALYSIS_STAT_LIST') + # read cnt stat list to get stats to loop over for plotting + self.add_met_config(name='cnt', + data_type='list', + env_var_name='STAT_LIST', + metplus_configs=nicknames) + c_dict['STAT_LIST'] = getlist( + self.get_env_var_value('METPLUS_STAT_LIST') + ) + + value = ('list', None, None, nicknames) + output_stats_dict[key] = value + + self.add_met_config_dict('output_stats', output_stats_dict) + + def _handle_fcst_obs_or_both_c_dict(self, c_dict): + # get input dir, template, and datatype for FCST, OBS, and BOTH + for data_type in ('FCST', 'OBS', 'BOTH'): + c_dict[f'{data_type}_INPUT_DATATYPE'] = ( + self.config.getstr('config', + f'{data_type}_SERIES_ANALYSIS_INPUT_DATATYPE', + '') + ) + + # read and set file type env var for FCST and OBS + if data_type == 'BOTH': + continue + + self.add_met_config( + name='file_type', + data_type='string', + env_var_name=f'{data_type}_FILE_TYPE', + metplus_configs=[f'{data_type}_SERIES_ANALYSIS_FILE_TYPE', + f'SERIES_ANALYSIS_{data_type}_FILE_TYPE', + f'{data_type}_FILE_TYPE', + f'{data_type}_SERIES_ANALYSIS_INPUT_DATATYPE', + 'SERIES_ANALYSIS_FILE_TYPE'], + extra_args={'remove_quotes': True, + 'uppercase': True}) + + self.add_met_config( + name='cat_thresh', + data_type='list', + env_var_name=f'METPLUS_{data_type}_CAT_THRESH', + metplus_configs=[f'{data_type}_SERIES_ANALYSIS_CAT_THRESH', + f'SERIES_ANALYSIS_{data_type}_CAT_THRESH', + f'{data_type}_CAT_THRESH'], + extra_args={'remove_quotes': True} + ) + + c_dict['USING_BOTH'] = (c_dict['BOTH_INPUT_TEMPLATE'] or + c_dict.get('BOTH_INPUT_FILE_LIST')) + + if c_dict['USING_BOTH']: + + # check if using explicit file list for BOTH + if c_dict.get('BOTH_INPUT_FILE_LIST'): + c_dict['EXPLICIT_FILE_LIST'] = True + else: + # set *_WINDOW_* variables for BOTH + # used in CommandBuilder.find_data function + self.handle_file_window_variables(c_dict, data_types=['BOTH']) + + prob_thresh = self.config.getraw( + 'config', + 'BOTH_SERIES_ANALYSIS_PROB_THRESH' + ) + c_dict['FCST_PROB_THRESH'] = prob_thresh + c_dict['OBS_PROB_THRESH'] = prob_thresh + return + + c_dict['FCST_PROB_THRESH'] = ( + self.config.getraw('config', + 'FCST_SERIES_ANALYSIS_PROB_THRESH') + ) + + c_dict['OBS_PROB_THRESH'] = ( + self.config.getraw('config', + 'OBS_SERIES_ANALYSIS_PROB_THRESH') + ) + + # if BOTH is not set, both FCST or OBS must be set + fcst_input_list = c_dict.get('FCST_INPUT_FILE_LIST', '') + obs_input_list = c_dict.get('OBS_INPUT_FILE_LIST', '') + if fcst_input_list and obs_input_list: + c_dict['EXPLICIT_FILE_LIST'] = True + return + + if not fcst_input_list and not obs_input_list: + if (not c_dict['FCST_INPUT_TEMPLATE'] or + not c_dict['OBS_INPUT_TEMPLATE']): + self.log_error( + "Must either set " + "BOTH_SERIES_ANALYSIS_INPUT_TEMPLATE or both " + "FCST_SERIES_ANALYSIS_INPUT_TEMPLATE and " + "OBS_SERIES_ANALYSIS_INPUT_TEMPLATE to run " + "SeriesAnalysis wrapper." + ) + + # set *_WINDOW_* variables for FCST and OBS + self.handle_file_window_variables(c_dict) + return + + # if fcst input list or obs input list are not set + self.log_error('Cannot set ' + 'FCST_SERIES_ANALYSIS_INPUT_FILE_LIST ' + 'without OBS_SERIES_ANALYSIS_INPUT_FILE_LIST ' + 'and vice versa') + def _plot_data_plane_init(self): """! Set values to allow successful initialization of PlotDataPlane wrapper @@ -409,7 +397,7 @@ def run_all_times(self): def run_once_per_lead(self, custom): """! Run once per forecast lead - @param value of current CUSTOM_LOOP_LIST iteration + @param custom value of current CUSTOM_LOOP_LIST iteration @returns True if all runs were successful, False otherwise """ self.logger.debug("Running once for forecast lead time") @@ -501,11 +489,7 @@ def run_at_time_once(self, time_info, lead_group=None): lead_group) ) if not fcst_path or not obs_path: - msg = 'No ASCII file lists were created. Skipping.' - if self.c_dict['ALLOW_MISSING_INPUTS']: - self.logger.warning(msg) - else: - self.log_error(msg) + self._log_allow_missing('No ASCII file lists were created. Skipping.') continue # Build up the arguments to and then run the MET tool series_analysis. @@ -662,52 +646,9 @@ def _get_fcst_and_obs_path(self, time_info, storm_id, lead_group): # if file list are explicitly specified, # return the file list file paths if self.c_dict.get('EXPLICIT_FILE_LIST', False): - # set forecast lead to last lead in list to set in output filename - if leads: - time_info['lead'] = leads[-1] - - if self.c_dict['USING_BOTH']: - both_path = do_string_sub(self.c_dict['BOTH_INPUT_FILE_LIST'], - **time_info) - self.logger.debug(f"Explicit BOTH file list file: {both_path}") - if not os.path.exists(both_path): - msg = f'Could not find file: {both_path}' - if self.c_dict['ALLOW_MISSING_INPUTS']: - self.logger.warning(msg) - else: - self.log_error(msg) - return None, None - - return both_path, both_path - - fcst_path = do_string_sub(self.c_dict['FCST_INPUT_FILE_LIST'], - **time_info) - self.logger.debug(f"Explicit FCST file list file: {fcst_path}") - if not os.path.exists(fcst_path): - msg = f'Could not find forecast file: {fcst_path}' - if self.c_dict['ALLOW_MISSING_INPUTS']: - self.logger.warning(msg) - else: - self.log_error(msg) - - fcst_path = None - - obs_path = do_string_sub(self.c_dict['OBS_INPUT_FILE_LIST'], - **time_info) - self.logger.debug(f"Explicit OBS file list file: {obs_path}") - if not os.path.exists(obs_path): - msg = f'Could not find observation file: {obs_path}' - if self.c_dict['ALLOW_MISSING_INPUTS']: - self.logger.warning(msg) - else: - self.log_error(msg) - - obs_path = None - - return fcst_path, obs_path + return self._handle_explicit_file_list(leads, time_info) output_dir = self.get_output_dir(time_info, storm_id, label) - list_file_dict = self.subset_input_files(time_info, output_dir=output_dir, leads=leads, @@ -724,6 +665,43 @@ def _get_fcst_and_obs_path(self, time_info, storm_id, lead_group): obs_path = list_file_dict[obs_key] return fcst_path, obs_path + def _handle_explicit_file_list(self, leads, time_info): + # set forecast lead to last lead in list to set in output filename + if leads: + time_info['lead'] = leads[-1] + + if self.c_dict['USING_BOTH']: + both_path = do_string_sub(self.c_dict.get('BOTH_INPUT_FILE_LIST', ''), + **time_info) + self.logger.debug(f"Explicit BOTH file list file: {both_path}") + if not os.path.exists(both_path): + self._log_allow_missing(f'Could not find file: {both_path}') + return None, None + + return both_path, both_path + + fcst_path = do_string_sub(self.c_dict.get('FCST_INPUT_FILE_LIST', ''), + **time_info) + self.logger.debug(f"Explicit FCST file list file: {fcst_path}") + if not os.path.exists(fcst_path): + self._log_allow_missing(f'Could not find forecast file: {fcst_path}') + fcst_path = None + + obs_path = do_string_sub(self.c_dict.get('OBS_INPUT_FILE_LIST', ''), + **time_info) + self.logger.debug(f"Explicit OBS file list file: {obs_path}") + if not os.path.exists(obs_path): + self._log_allow_missing(f'Could not find observation file: {obs_path}') + obs_path = None + + return fcst_path, obs_path + + def _log_allow_missing(self, msg): + if self.c_dict['ALLOW_MISSING_INPUTS']: + self.logger.warning(msg) + else: + self.log_error(msg) + def _check_python_embedding(self): """! Check if any of the field names contain a Python embedding script. See CommandBuilder.check_for_python_embedding for more info. @@ -750,8 +728,7 @@ def get_output_dir(self, time_info, storm_id, label): @param time_info dictionary containing time information for current run @param storm_id storm ID to process - @param label label defined for forecast lead groups to identify - them + @param label defined for forecast lead groups to identify them @returns path to output directory with filename templates substituted with the information for the current run """ @@ -780,11 +757,11 @@ def build_and_run_series_request(self, time_info, fcst_path, obs_path): """! Build up the -obs, -fcst, -out necessary for running the series_analysis MET tool, then invoke series_analysis. - @param time_info dictionary containing time information for - current run - @param storm_id storm ID to process - @returns True if all runs succeeded, False if there was a problem - with any of the runs + @param time_info dictionary containing time information for current run + @param fcst_path path to forecast file + @param obs_path path to observation file + @returns True if all runs succeeded, False if there was a problem + with any of the runs """ success = True @@ -836,12 +813,19 @@ def set_command_line_arguments(self, time_info): """ # add input data format if set if self.c_dict['PAIRED']: - self.args.append(" -paired") + self.args.append("-paired") + + # add -aggr argument if set + if self.c_dict.get('AGGR_INPUT_TEMPLATE'): + template = os.path.join(self.c_dict['AGGR_INPUT_DIR'], + self.c_dict['AGGR_INPUT_TEMPLATE']) + filepath = do_string_sub(template, **time_info) + self.args.append(f"-aggr {filepath}") # add config file - passing through do_string_sub # to get custom string if set config_file = do_string_sub(self.c_dict['CONFIG_FILE'], **time_info) - self.args.append(f" -config {config_file}") + self.args.append(f"-config {config_file}") def get_command(self): """! Build command to run @@ -860,7 +844,8 @@ def get_command(self): cmd += f' -out {self.get_output_path()}' # add arguments - cmd += ''.join(self.args) + if self.args: + cmd += ' ' + ' '.join(self.args) # add verbosity cmd += ' -v ' + self.c_dict['VERBOSITY'] @@ -1006,15 +991,8 @@ def get_fcst_file_info(self, fcst_path): beg = None end = None for filepath in files_of_interest: - filepath = filepath.strip() - found = False - for template in templates: - file_time_info = parse_template(template, filepath, self.logger) - if file_time_info: - found = True - break - - if not found: + file_time_info = self._get_time_from_templates(templates, filepath) + if file_time_info is None: continue lead = ti_get_seconds_from_lead(file_time_info.get('lead'), @@ -1031,6 +1009,14 @@ def get_fcst_file_info(self, fcst_path): return num, beg, end + def _get_time_from_templates(self, templates, filepath): + filepath = filepath.strip() + for template in templates: + file_time_info = parse_template(template, filepath, self.logger) + if file_time_info: + return file_time_info + return None + @staticmethod def _get_netcdf_min_max(filepath, variable_name): """! Determine the min and max for all lead times for each @@ -1165,6 +1151,7 @@ def _get_times_from_file_list(file_path, templates): for file_name in file_list: found = False + file_time_info = None for template in templates: file_time_info = parse_template(template, file_name) if file_time_info: diff --git a/parm/use_cases/met_tool_wrapper/SeriesAnalysis/SeriesAnalysis.conf b/parm/use_cases/met_tool_wrapper/SeriesAnalysis/SeriesAnalysis.conf index b1a19e7e40..278a7cfc20 100644 --- a/parm/use_cases/met_tool_wrapper/SeriesAnalysis/SeriesAnalysis.conf +++ b/parm/use_cases/met_tool_wrapper/SeriesAnalysis/SeriesAnalysis.conf @@ -54,6 +54,9 @@ OBS_SERIES_ANALYSIS_INPUT_TEMPLATE = ST2ml{valid?fmt=%Y%m%d%H}_A03h.nc SERIES_ANALYSIS_TC_STAT_INPUT_DIR = SERIES_ANALYSIS_TC_STAT_INPUT_TEMPLATE = +SERIES_ANALYSIS_AGGR_INPUT_DIR = +SERIES_ANALYSIS_AGGR_INPUT_TEMPLATE = + SERIES_ANALYSIS_OUTPUT_DIR = {OUTPUT_BASE}/met_tool_wrapper/SeriesAnalysis SERIES_ANALYSIS_OUTPUT_TEMPLATE = {init?fmt=%Y%m%d%H}_sa.nc diff --git a/parm/use_cases/met_tool_wrapper/SeriesAnalysis/SeriesAnalysis_aggr.conf b/parm/use_cases/met_tool_wrapper/SeriesAnalysis/SeriesAnalysis_aggr.conf new file mode 100644 index 0000000000..f0facffa20 --- /dev/null +++ b/parm/use_cases/met_tool_wrapper/SeriesAnalysis/SeriesAnalysis_aggr.conf @@ -0,0 +1,215 @@ +[config] + +# Documentation for this use case can be found at +# https://metplus.readthedocs.io/en/latest/generated/met_tool_wrapper/SeriesAnalysis/SeriesAnalysis.html + +# For additional information, please see the METplus Users Guide. +# https://metplus.readthedocs.io/en/latest/Users_Guide + +### +# Processes to run +# https://metplus.readthedocs.io/en/latest/Users_Guide/systemconfiguration.html#process-list +### + +PROCESS_LIST = SeriesAnalysis + + +### +# Time Info +# LOOP_BY options are INIT, VALID, RETRO, and REALTIME +# If set to INIT or RETRO: +# INIT_TIME_FMT, INIT_BEG, INIT_END, and INIT_INCREMENT must also be set +# If set to VALID or REALTIME: +# VALID_TIME_FMT, VALID_BEG, VALID_END, and VALID_INCREMENT must also be set +# LEAD_SEQ is the list of forecast leads to process +# https://metplus.readthedocs.io/en/latest/Users_Guide/systemconfiguration.html#timing-control +### + +SERIES_ANALYSIS_RUNTIME_FREQ = RUN_ONCE_PER_INIT_OR_VALID + +LOOP_BY = INIT +INIT_TIME_FMT = %Y%m%d%H +INIT_BEG=2012040900 +INIT_END=2012040900 +INIT_INCREMENT = 1d + +LEAD_SEQ = 30, 36, 42 + + +SERIES_ANALYSIS_RUN_ONCE_PER_STORM_ID = False + +SERIES_ANALYSIS_CUSTOM_LOOP_LIST = + +### +# File I/O +# https://metplus.readthedocs.io/en/latest/Users_Guide/systemconfiguration.html#directory-and-filename-template-info +### + +FCST_SERIES_ANALYSIS_INPUT_DIR = {INPUT_BASE}/met_test/new/unit_test/model_data/grib1/gfs_hmt +FCST_SERIES_ANALYSIS_INPUT_TEMPLATE = gfs_{init?fmt=%Y%m%d%H}_F{lead?fmt=%3H}.grib + +OBS_SERIES_ANALYSIS_INPUT_DIR = {INPUT_BASE}/met_test/new/unit_test/obs_data/stage4_hmt +OBS_SERIES_ANALYSIS_INPUT_TEMPLATE = stage4_{valid?fmt=%Y%m%d%H}_06h.grib + +SERIES_ANALYSIS_TC_STAT_INPUT_DIR = +SERIES_ANALYSIS_TC_STAT_INPUT_TEMPLATE = + +SERIES_ANALYSIS_AGGR_INPUT_DIR = {INPUT_BASE}/met_test/new/test_out/series_analysis +SERIES_ANALYSIS_AGGR_INPUT_TEMPLATE = series_analysis_CMD_LINE_APCP_06_{init?fmt=%Y%m%d%H}_to_{init?fmt=%Y%m%d%H?shift=1d}.nc + +SERIES_ANALYSIS_OUTPUT_DIR = {OUTPUT_BASE}/series_analysis +SERIES_ANALYSIS_OUTPUT_TEMPLATE = series_analysis_AGGR_CMD_LINE_APCP_06_{init?fmt=%Y%m%d%H}_to_{init?fmt=%Y%m%d%H?shift=1d}.nc + +SERIES_ANALYSIS_CLIMO_MEAN_INPUT_DIR = +SERIES_ANALYSIS_CLIMO_MEAN_INPUT_TEMPLATE = + +SERIES_ANALYSIS_CLIMO_STDEV_INPUT_DIR = +SERIES_ANALYSIS_CLIMO_STDEV_INPUT_TEMPLATE = + + +### +# Field Info +# https://metplus.readthedocs.io/en/latest/Users_Guide/systemconfiguration.html#field-info +### + +FCST_CAT_THRESH = >0.0, >5.0 +OBS_CAT_THRESH = >0.0, >5.0 + +FCST_VAR1_NAME = APCP +FCST_VAR1_LEVELS = A06 + +OBS_VAR1_NAME = APCP +OBS_VAR1_LEVELS = A06 + + +### +# SeriesAnalysis Settings +# https://metplus.readthedocs.io/en/latest/Users_Guide/wrappers.html#seriesanalysis +### + +MODEL = GFS +OBTYPE = STAGE4 + +#LOG_SERIES_ANALYSIS_VERBOSITY = 2 + +SERIES_ANALYSIS_CONFIG_FILE = {PARM_BASE}/met_config/SeriesAnalysisConfig_wrapped + +SERIES_ANALYSIS_IS_PAIRED = False + +#SERIES_ANALYSIS_DESC = + +#SERIES_ANALYSIS_CAT_THRESH = + +#SERIES_ANALYSIS_VLD_THRESH = + +#SERIES_ANALYSIS_BLOCK_SIZE = + +#SERIES_ANALYSIS_REGRID_TO_GRID = +#SERIES_ANALYSIS_REGRID_METHOD = +#SERIES_ANALYSIS_REGRID_WIDTH = +#SERIES_ANALYSIS_REGRID_VLD_THRESH = +#SERIES_ANALYSIS_REGRID_SHAPE = +#SERIES_ANALYSIS_REGRID_CONVERT = +#SERIES_ANALYSIS_REGRID_CENSOR_THRESH = +#SERIES_ANALYSIS_REGRID_CENSOR_VAL = + +#SERIES_ANALYSIS_CLIMO_MEAN_FILE_NAME = +#SERIES_ANALYSIS_CLIMO_MEAN_FIELD = +#SERIES_ANALYSIS_CLIMO_MEAN_REGRID_METHOD = +#SERIES_ANALYSIS_CLIMO_MEAN_REGRID_WIDTH = +#SERIES_ANALYSIS_CLIMO_MEAN_REGRID_VLD_THRESH = +#SERIES_ANALYSIS_CLIMO_MEAN_REGRID_SHAPE = +#SERIES_ANALYSIS_CLIMO_MEAN_TIME_INTERP_METHOD = +#SERIES_ANALYSIS_CLIMO_MEAN_MATCH_MONTH = +#SERIES_ANALYSIS_CLIMO_MEAN_DAY_INTERVAL = +#SERIES_ANALYSIS_CLIMO_MEAN_HOUR_INTERVAL = +#SERIES_ANALYSIS_CLIMO_MEAN_FILE_TYPE = + +#SERIES_ANALYSIS_CLIMO_STDEV_FILE_NAME = +#SERIES_ANALYSIS_CLIMO_STDEV_FIELD = +#SERIES_ANALYSIS_CLIMO_STDEV_REGRID_METHOD = +#SERIES_ANALYSIS_CLIMO_STDEV_REGRID_WIDTH = +#SERIES_ANALYSIS_CLIMO_STDEV_REGRID_VLD_THRESH = +#SERIES_ANALYSIS_CLIMO_STDEV_REGRID_SHAPE = +#SERIES_ANALYSIS_CLIMO_STDEV_TIME_INTERP_METHOD = +#SERIES_ANALYSIS_CLIMO_STDEV_MATCH_MONTH = +#SERIES_ANALYSIS_CLIMO_STDEV_DAY_INTERVAL = +#SERIES_ANALYSIS_CLIMO_STDEV_HOUR_INTERVAL = +#SERIES_ANALYSIS_CLIMO_STDEV_FILE_TYPE = + +#SERIES_ANALYSIS_FCST_CLIMO_MEAN_FILE_NAME = +#SERIES_ANALYSIS_FCST_CLIMO_MEAN_FIELD = +#SERIES_ANALYSIS_FCST_CLIMO_MEAN_REGRID_METHOD = +#SERIES_ANALYSIS_FCST_CLIMO_MEAN_REGRID_WIDTH = +#SERIES_ANALYSIS_FCST_CLIMO_MEAN_REGRID_VLD_THRESH = +#SERIES_ANALYSIS_FCST_CLIMO_MEAN_REGRID_SHAPE = +#SERIES_ANALYSIS_FCST_CLIMO_MEAN_TIME_INTERP_METHOD = +#SERIES_ANALYSIS_FCST_CLIMO_MEAN_DAY_INTERVAL = +#SERIES_ANALYSIS_FCST_CLIMO_MEAN_HOUR_INTERVAL = + +#SERIES_ANALYSIS_FCST_CLIMO_STDEV_FILE_NAME = +#SERIES_ANALYSIS_FCST_CLIMO_STDEV_FIELD = +#SERIES_ANALYSIS_FCST_CLIMO_STDEV_REGRID_METHOD = +#SERIES_ANALYSIS_FCST_CLIMO_STDEV_REGRID_WIDTH = +#SERIES_ANALYSIS_FCST_CLIMO_STDEV_REGRID_VLD_THRESH = +#SERIES_ANALYSIS_FCST_CLIMO_STDEV_REGRID_SHAPE = +#SERIES_ANALYSIS_FCST_CLIMO_STDEV_TIME_INTERP_METHOD = +#SERIES_ANALYSIS_FCST_CLIMO_STDEV_DAY_INTERVAL = +#SERIES_ANALYSIS_FCST_CLIMO_STDEV_HOUR_INTERVAL = + +#SERIES_ANALYSIS_OBS_CLIMO_MEAN_FILE_NAME = +#SERIES_ANALYSIS_OBS_CLIMO_MEAN_FIELD = +#SERIES_ANALYSIS_OBS_CLIMO_MEAN_REGRID_METHOD = +#SERIES_ANALYSIS_OBS_CLIMO_MEAN_REGRID_WIDTH = +#SERIES_ANALYSIS_OBS_CLIMO_MEAN_REGRID_VLD_THRESH = +#SERIES_ANALYSIS_OBS_CLIMO_MEAN_REGRID_SHAPE = +#SERIES_ANALYSIS_OBS_CLIMO_MEAN_TIME_INTERP_METHOD = +#SERIES_ANALYSIS_OBS_CLIMO_MEAN_DAY_INTERVAL = +#SERIES_ANALYSIS_OBS_CLIMO_MEAN_HOUR_INTERVAL = + +#SERIES_ANALYSIS_OBS_CLIMO_STDEV_FILE_NAME = +#SERIES_ANALYSIS_OBS_CLIMO_STDEV_FIELD = +#SERIES_ANALYSIS_OBS_CLIMO_STDEV_REGRID_METHOD = +#SERIES_ANALYSIS_OBS_CLIMO_STDEV_REGRID_WIDTH = +#SERIES_ANALYSIS_OBS_CLIMO_STDEV_REGRID_VLD_THRESH = +#SERIES_ANALYSIS_OBS_CLIMO_STDEV_REGRID_SHAPE = +#SERIES_ANALYSIS_OBS_CLIMO_STDEV_TIME_INTERP_METHOD = +#SERIES_ANALYSIS_OBS_CLIMO_STDEV_DAY_INTERVAL = +#SERIES_ANALYSIS_OBS_CLIMO_STDEV_HOUR_INTERVAL = + +#SERIES_ANALYSIS_CLIMO_CDF_BINS = +#SERIES_ANALYSIS_CLIMO_CDF_CENTER_BINS = +#SERIES_ANALYSIS_CLIMO_CDF_DIRECT_PROB = + +#SERIES_ANALYSIS_HSS_EC_VALUE = + +#FCST_SERIES_ANALYSIS_PROB_THRESH = + +SERIES_ANALYSIS_OUTPUT_STATS_FHO = F_RATE, O_RATE +SERIES_ANALYSIS_OUTPUT_STATS_CTC = ALL +SERIES_ANALYSIS_OUTPUT_STATS_CTS = CSI, GSS +SERIES_ANALYSIS_OUTPUT_STATS_MCTC = ALL +SERIES_ANALYSIS_OUTPUT_STATS_MCTS = ACC, ACC_NCL, ACC_NCU +SERIES_ANALYSIS_OUTPUT_STATS_CNT = TOTAL, ME, ME_NCL, ME_NCU +SERIES_ANALYSIS_OUTPUT_STATS_SL1L2 = ALL +#SERIES_ANALYSIS_OUTPUT_STATS_SAL1L2 = +#SERIES_ANALYSIS_OUTPUT_STATS_PCT = +#SERIES_ANALYSIS_OUTPUT_STATS_PSTD = +#SERIES_ANALYSIS_OUTPUT_STATS_PJC = +#SERIES_ANALYSIS_OUTPUT_STATS_PRC = + +#SERIES_ANALYSIS_MASK_GRID = +#SERIES_ANALYSIS_MASK_POLY = + + +### +# SeriesAnalysis Plotting +### + +SERIES_ANALYSIS_GENERATE_PLOTS = no + +PLOT_DATA_PLANE_TITLE = + +SERIES_ANALYSIS_GENERATE_ANIMATIONS = no + +#SERIES_ANALYSIS_TIME_OFFSET_WARNING =