From 5236fe57b7378451ea2cbe9856bc5588034f9aa0 Mon Sep 17 00:00:00 2001 From: Doug Latornell Date: Mon, 30 Sep 2024 12:28:39 -0700 Subject: [PATCH] Add month-averaging of grazing/mortality & biology growth rate variable groups to `make_averaged_dataset` (#297) * Add more month-avg reshapr configs for make_averaged_dataset These YAML configuration files resample daily biological growth rate and grazing/mortality fields to monthly averages for the SalishSeaCast v202111 model. They specify the variables to extract, the resampling method, and the destination for the output files. * Add support for month-avgs of grazing & growth var groups Extended the make_averaged_dataset worker and related test cases to include grazing and growth dataset variable groups. Updated configuration files and added parameter-specific validations for these new variable groups. * Add day grazing and growth failure messages Updated the nowcast.yaml configuration file to include failure messages for grazing and growth dataset day-averaging. Corresponding test cases were also added to ensure proper error handling for these new scenarios. Missed in ce694e2fe. Necessary because grazing and growth dataset day-averaging operation requests are rejected to avoid redundant work; those datasets are calculated and output by out NEMO configuration. * Add month-avg grazing and growth to automation Expanded the next_workers module to calculate month-average grazing and growth datasets at month-end. Added corresponding test functions to ensure the new make_averaged_dataset worker instances are launched correctly. --- config/nowcast.yaml | 12 +++++ ...month-average_202111_bio_growth_rates.yaml | 32 ++++++++++++ ...onth-average_202111_grazing_mortality.yaml | 38 ++++++++++++++ nowcast/next_workers.py | 31 +++++++++++ nowcast/workers/make_averaged_dataset.py | 8 ++- tests/test_next_workers.py | 42 ++++++++++++++- tests/workers/test_make_averaged_dataset.py | 51 ++++++++++++++++++- 7 files changed, 211 insertions(+), 3 deletions(-) create mode 100644 config/reshapr/month-average_202111_bio_growth_rates.yaml create mode 100644 config/reshapr/month-average_202111_grazing_mortality.yaml diff --git a/config/nowcast.yaml b/config/nowcast.yaml index 0a1e52df..b3f02806 100644 --- a/config/nowcast.yaml +++ b/config/nowcast.yaml @@ -442,6 +442,12 @@ averaged datasets: chemistry: reshapr config: month-average_202111_chemistry.yaml file pattern: "SalishSeaCast_1m_chem_T_{yyyymmdd}_{yyyymmdd}.nc" + grazing: + reshapr config: month-average_202111_grazing_mortality.yaml + file pattern: "SalishSeaCast_1m_graz_T_{yyyymmdd}_{yyyymmdd}.nc" + growth: + reshapr config: month-average_202111_bio_growth_rates.yaml + file pattern: "SalishSeaCast_1m_prod_T_{yyyymmdd}_{yyyymmdd}.nc" physics: reshapr config: month-average_202111_physics.yaml file pattern: "SalishSeaCast_1m_grid_T_{yyyymmdd}_{yyyymmdd}.nc" @@ -1581,12 +1587,18 @@ message registry: failure day biology: biology dataset day-averaging failed success day chemistry: chemistry dataset day-averaged failure day chemistry: chemistry dataset day-averaging failed + failure day grazing: grazing dataset day-averaging failed + failure day growth: growth dataset day-averaging failed success day physics: physics dataset day-averaged failure day physics: physics dataset day-averaging failed success month biology: biology dataset month-averaged failure month biology: biology dataset month-averaging failed success month chemistry: chemistry dataset month-averaged failure month chemistry: chemistry dataset month-averaging failed + success month grazing: grazing dataset month-averaged + failure month grazing: grazing dataset month-averaging failed + success month growth: biology growth rates dataset month-averaged + failure month growth: biology growth rates dataset month-averaging failed success month physics: physics dataset month-averaged failure month physics: physics dataset month-averaging failed crash: make_averaged_dataset worker crashed diff --git a/config/reshapr/month-average_202111_bio_growth_rates.yaml b/config/reshapr/month-average_202111_bio_growth_rates.yaml new file mode 100644 index 00000000..6100d5d8 --- /dev/null +++ b/config/reshapr/month-average_202111_bio_growth_rates.yaml @@ -0,0 +1,32 @@ +# `reshapr extract` config to resample v202111 day-average grzing & mortality fields +# to month-average + +dataset: + model profile: SalishSeaCast-202111-salish.yaml + time base: day + variables group: biology growth rates + +dask cluster: tcp://142.103.36.12:4386 + +# Placeholder state/end dates that are overridden by worker's run-date arg +start date: 2007-01-01 +end date: 2007-01-31 + +extract variables: + - PPDIAT + - PPPHY + - PPDIATNO3 + - PPPHYNO3 + - TQ10 + +resample: + time interval: 1M + aggregation: mean + +extracted dataset: + name: SalishSeaCast_1m_prod_T + description: Month-averaged biology growth rate variables resampled from + v202111 SalishSea_1d_*_prod_T.nc + deflate: True + format: NETCDF4 + dest dir: /results2/SalishSea/month-avg.202111/ diff --git a/config/reshapr/month-average_202111_grazing_mortality.yaml b/config/reshapr/month-average_202111_grazing_mortality.yaml new file mode 100644 index 00000000..db0e80d6 --- /dev/null +++ b/config/reshapr/month-average_202111_grazing_mortality.yaml @@ -0,0 +1,38 @@ +# `reshapr extract` config to resample v202111 day-average grzing & mortality fields +# to month-average + +dataset: + model profile: SalishSeaCast-202111-salish.yaml + time base: day + variables group: grazing + +dask cluster: tcp://142.103.36.12:4386 + +# Placeholder state/end dates that are overridden by worker's run-date arg +start date: 2007-01-01 +end date: 2007-01-31 + +extract variables: + - MORTPHY + - MORTDIAT + - MORTMICZ + - GRMESZDIAT + - GRMESZPHY + - GRMESZPON + - GRMESZMICZ + - GRMICZDIAT + - GRMICZPHY + - GRMICZPON + - GRMICZMICZ + +resample: + time interval: 1M + aggregation: mean + +extracted dataset: + name: SalishSeaCast_1m_graz_T + description: Month-averaged grazing and mortality variables resampled from + v202111 SalishSea_1d_*_graz_T.nc + deflate: True + format: NETCDF4 + dest dir: /results2/SalishSea/month-avg.202111/ diff --git a/nowcast/next_workers.py b/nowcast/next_workers.py index 6fb0d59f..7e68c66d 100644 --- a/nowcast/next_workers.py +++ b/nowcast/next_workers.py @@ -1557,6 +1557,8 @@ def after_make_averaged_dataset(msg, config, checklist): "crash": [], "failure day biology": [], "failure day chemistry": [], + "failure day grazing": [], + "failure day growth": [], "failure day physics": [], "failure month biology": [], "failure month chemistry": [], @@ -1566,6 +1568,8 @@ def after_make_averaged_dataset(msg, config, checklist): "success day physics": [], "success month biology": [], "success month chemistry": [], + "success month grazing": [], + "success month growth": [], "success month physics": [], } if msg.type.startswith("success day"): @@ -1580,6 +1584,33 @@ def after_make_averaged_dataset(msg, config, checklist): host="localhost", ) ) + if msg.type.startswith("success month"): + *_, reshapr_var_group = msg.type.split() + match reshapr_var_group: + case "physics": + run_date = arrow.get(msg.payload["month physics"]["run date"]).format( + "YYYY-MM-DD" + ) + next_workers[msg.type].append( + NextWorker( + "nowcast.workers.make_averaged_dataset", + args=["month", "grazing", "--run-date", run_date], + host="localhost", + ) + ) + case "grazing": + run_date = arrow.get(msg.payload["month grazing"]["run date"]).format( + "YYYY-MM-DD" + ) + next_workers[msg.type].append( + NextWorker( + "nowcast.workers.make_averaged_dataset", + args=["month", "growth", "--run-date", run_date], + host="localhost", + ) + ) + case _: + pass return next_workers[msg.type] diff --git a/nowcast/workers/make_averaged_dataset.py b/nowcast/workers/make_averaged_dataset.py index eb1c8018..e9c5a470 100644 --- a/nowcast/workers/make_averaged_dataset.py +++ b/nowcast/workers/make_averaged_dataset.py @@ -54,7 +54,7 @@ def main(): ) worker.cli.add_argument( "reshapr_var_group", - choices={"biology", "chemistry", "physics"}, + choices={"biology", "chemistry", "grazing", "growth", "physics"}, help="Dataset variable group to run extraction for", ) worker.cli.add_date_option( @@ -154,6 +154,12 @@ def make_averaged_dataset(parsed_args, config, *args): f"run_date = {run_date.format('YYYY-MM-DD')}" ) raise WorkerError + if avg_time_interval == "day" and reshapr_var_group in {"grazing", "growth"}: + logger.error( + f"Day-average {reshapr_var_group} datasets are calculated by NEMO; " + f"use this worker for month-averaging" + ) + raise WorkerError reshapr_config_dir = Path(config["averaged datasets"]["reshapr config dir"]) reshapr_config_yaml = config["averaged datasets"][avg_time_interval][ reshapr_var_group diff --git a/tests/test_next_workers.py b/tests/test_next_workers.py index d5589997..e62ba773 100644 --- a/tests/test_next_workers.py +++ b/tests/test_next_workers.py @@ -2216,10 +2216,12 @@ class TestAfterMakeAveragedDataset: "crash", "failure day biology", "failure day chemistry", + "failure day grazing", + "failure day growth", "failure day physics", "success month biology", "success month chemistry", - "success month physics", + "success month growth", "failure month biology", "failure month chemistry", "failure month physics", @@ -2261,6 +2263,44 @@ def test_month_end_day_success_launch_month_average( ) assert expected in workers + def test_month_physics_success_launch_month_grazing(self, config, checklist): + msg = Message( + "make_averaged_dataset", + "success month physics", + payload={ + "month physics": { + "run date": "2024-09-01", + "file path": "SalishSea_1m_20240901_20240930_grid_T.nc", + } + }, + ) + workers = next_workers.after_make_averaged_dataset(msg, config, checklist) + expected = NextWorker( + "nowcast.workers.make_averaged_dataset", + args=["month", "grazing", "--run-date", "2024-09-01"], + host="localhost", + ) + assert expected in workers + + def test_month_grazing_success_launch_month_growth(self, config, checklist): + msg = Message( + "make_averaged_dataset", + "success month grazing", + payload={ + "month grazing": { + "run date": "2024-09-01", + "file path": "SalishSea_1m_20240901_20240930_graz_T.nc", + } + }, + ) + workers = next_workers.after_make_averaged_dataset(msg, config, checklist) + expected = NextWorker( + "nowcast.workers.make_averaged_dataset", + args=["month", "growth", "--run-date", "2024-09-01"], + host="localhost", + ) + assert expected in workers + class TestAfterArchiveTarball: """Unit tests for the after_archive_tarball function.""" diff --git a/tests/workers/test_make_averaged_dataset.py b/tests/workers/test_make_averaged_dataset.py index 55bec617..283e102b 100644 --- a/tests/workers/test_make_averaged_dataset.py +++ b/tests/workers/test_make_averaged_dataset.py @@ -59,6 +59,12 @@ def config(base_config): chemistry: reshapr config: month-average_202111_chemistry.yaml file pattern: "SalishSeaCast_1m_chem_T_{yyyymmdd}_{yyyymmdd}.nc" + grazing: + reshapr config: month-average_202111_grazing_mortality.yaml + file pattern: "SalishSeaCast_1m_graz_T_{yyyymmdd}_{yyyymmdd}.nc" + growth: + reshapr config: month-average_202111_bio_growth_rates.yaml + file pattern: "SalishSeaCast_1m_prod_T_{yyyymmdd}_{yyyymmdd}.nc" physics: reshapr config: month-average_202111_physics.yaml file pattern: "SalishSeaCast_1m_grid_T_{yyyymmdd}_{yyyymmdd}.nc" @@ -100,6 +106,8 @@ def test_add_reshapr_var_group_arg(self, mock_worker): assert worker.cli.parser._actions[4].choices == { "biology", "chemistry", + "grazing", + "growth", "physics", } assert worker.cli.parser._actions[4].help @@ -136,18 +144,24 @@ def test_message_registry_keys(self, prod_config): "failure day biology", "success day chemistry", "failure day chemistry", + "failure day grazing", + "failure day growth", "success day physics", "failure day physics", "success month biology", "failure month biology", "success month chemistry", "failure month chemistry", + "success month grazing", + "failure month grazing", + "success month growth", + "failure month growth", "success month physics", "failure month physics", "crash", ] - def test_averaged_datasets(self, prod_config): + def test_reshapr_configs(self, prod_config): averaged_datasets = prod_config["averaged datasets"] expected = "/SalishSeaCast/SalishSeaNowcast/config/reshapr/" @@ -196,6 +210,16 @@ def test_day_averaged_datasets( "month-average_202111_chemistry.yaml", "SalishSeaCast_1m_chem_T_{yyyymmdd}_{yyyymmdd}.nc", ), + ( + "grazing", + "month-average_202111_grazing_mortality.yaml", + "SalishSeaCast_1m_graz_T_{yyyymmdd}_{yyyymmdd}.nc", + ), + ( + "growth", + "month-average_202111_bio_growth_rates.yaml", + "SalishSeaCast_1m_prod_T_{yyyymmdd}_{yyyymmdd}.nc", + ), ( "physics", "month-average_202111_physics.yaml", @@ -247,6 +271,8 @@ def test_day_average_success(self, avg_time_interval, reshapr_var_group, caplog) ( ("month", "biology"), ("month", "chemistry"), + ("month", "grazing"), + ("month", "growth"), ("month", "physics"), ), ) @@ -301,6 +327,8 @@ def test_day_average_failure(self, avg_time_interval, reshapr_var_group, caplog) ( ("month", "biology"), ("month", "chemistry"), + ("month", "grazing"), + ("month", "growth"), ("month", "physics"), ), ) @@ -385,6 +413,8 @@ def mock_extract_netcdf(reshapr_config, reshapr_config_yaml): ( ("month", "biology"), ("month", "chemistry"), + ("month", "grazing"), + ("month", "growth"), ("month", "physics"), ), ) @@ -446,3 +476,22 @@ def test_bad_month_avg_run_date(self, caplog, config): assert caplog.records[0].levelname == "ERROR" expected = f"Month-averaging must start on the first day of a month but run_date = 2022-11-10" assert caplog.messages[0] == expected + + @pytest.mark.parametrize("reshapr_var_group", ("grazing", "growth")) + def test_month_avg_only_var_groups(self, reshapr_var_group, caplog, config): + parsed_args = SimpleNamespace( + avg_time_interval="day", + run_date=arrow.get("2024-09-24"), + reshapr_var_group=reshapr_var_group, + ) + caplog.set_level(logging.DEBUG) + + with pytest.raises(WorkerError): + make_averaged_dataset.make_averaged_dataset(parsed_args, config) + + assert caplog.records[0].levelname == "ERROR" + expected = ( + f"Day-average {reshapr_var_group} datasets are calculated by NEMO; " + f"use this worker for month-averaging" + ) + assert caplog.messages[0] == expected