From 6dc02188c73e7314db44d50226f6c45d7d86b03b Mon Sep 17 00:00:00 2001 From: Spencer Wong Date: Fri, 11 Oct 2024 16:54:16 +1100 Subject: [PATCH 1/7] Initial log compression for cice4 --- payu/models/cice.py | 28 +++++++++ payu/models/cice5.py | 3 + test/models/test_cice.py | 129 +++++++++++++++++++++++++++++++++------ 3 files changed, 140 insertions(+), 20 deletions(-) diff --git a/payu/models/cice.py b/payu/models/cice.py index 50f29c1f..2ae47c0e 100644 --- a/payu/models/cice.py +++ b/payu/models/cice.py @@ -17,6 +17,8 @@ import sys import shutil import datetime +import re +import tarfile # Extensions import f90nml @@ -51,6 +53,13 @@ def __init__(self, expt, name, config): self.copy_inputs = False + # regex patterns for matching log files. When empty, no logs compressed + self.logs_to_compress = [r"iceout[0-9]{3}", + r"debug\.root\.[0-9]{2}", + r"ice_diag\.d", + r"ice_diag_out"] + self.log_tar_name = "logfiles.tar.gz" + def set_model_pathnames(self): super(Cice, self).set_model_pathnames() @@ -333,6 +342,25 @@ def archive(self, **kwargs): else: shutil.rmtree(self.work_input_path) + if self.expt.config.get('compress_logs', False): + self.compress_log_files() + + def get_log_files(self): + log_files = [] + for filename in os.listdir(self.work_path): + if any((re.match(pattern, filename) + for pattern in self.logs_to_compress)): + log_files.append(os.path.join(self.work_path, filename)) + return log_files + + def compress_log_files(self): + log_files = self.get_log_files() + with tarfile.open(name=os.path.join(self.work_path, self.log_tar_name), + mode="w:gz") as tar: + for file in log_files: + tar.add(file, arcname=os.path.basename(file)) + os.remove(file) + def collate(self): pass diff --git a/payu/models/cice5.py b/payu/models/cice5.py index bc5618af..1c8fcfac 100644 --- a/payu/models/cice5.py +++ b/payu/models/cice5.py @@ -42,6 +42,9 @@ def __init__(self, expt, name, config): self.copy_restarts = True self.copy_inputs = True + # Empty list means no log files will be compressed + self.logs_to_compress = [] + def set_local_timestep(self, t_step): dt = self.ice_in['setup_nml']['dt'] npt = self.ice_in['setup_nml']['npt'] diff --git a/test/models/test_cice.py b/test/models/test_cice.py index 15e6cb54..150e9c27 100644 --- a/test/models/test_cice.py +++ b/test/models/test_cice.py @@ -3,6 +3,7 @@ import pytest import f90nml +import tarfile import payu @@ -124,41 +125,47 @@ def empty_workdir(): workdir.symlink_to(expt_workdir) yield expt_workdir - shutil.rmtree(expt_workdir) + try: + shutil.rmtree(expt_workdir) + except FileNotFoundError: + pass workdir.unlink() +@pytest.fixture +def cice_nml(): + nml_path = os.path.join(ctrldir, CICE_NML_NAME) + f90nml.write(DEFAULT_CICE_NML, nml_path) + + yield nml_path + + # Cleanup + os.remove(nml_path) + # Important to test None case without separate ice history file @pytest.fixture(params=[None, {"icefields_nml": {"f_icy": "m"}}, {"icefields_nml": {"f_icy": "m", "f_new": "y"}}]) -def cice_config_files(request): +def cice_history_nml(request): """ - Write the default cice_in.nml namelist, and if included, separate ice - history namelist used by ESM1.5. + Write separate ice history namelist used by ESM1.5, if provided. """ - cice_nml = DEFAULT_CICE_NML ice_history = request.param + ice_history_path = os.path.join(ctrldir, HIST_NML_NAME) - with cd(ctrldir): - # 2. Create config.nml - f90nml.write(cice_nml, CICE_NML_NAME) - - if ice_history: - f90nml.write(ice_history, HIST_NML_NAME) + if ice_history: + f90nml.write(ice_history, ice_history_path) yield {'ice_history': ice_history} # cleanup - with cd(ctrldir): - os.remove(CICE_NML_NAME) - if ice_history: - os.remove(HIST_NML_NAME) + if ice_history: + os.remove(ice_history_path) @pytest.mark.parametrize("config", [DEFAULT_CONFIG], indirect=True) -def test_setup(config, cice_config_files): +def test_setup(config, cice_nml, cice_history_nml): """ Confirm that 1: payu overwrites cice_in with ice_history @@ -183,9 +190,9 @@ def test_setup(config, cice_config_files): # Check cice_in was patched with ice_history work_input_fpath = os.path.join(model.work_path, CICE_NML_NAME) input_nml = f90nml.read(work_input_fpath) - if cice_config_files['ice_history']: + if cice_history_nml['ice_history']: assert (input_nml["icefields_nml"] == - cice_config_files["ice_history"]["icefields_nml"]) + cice_history_nml["ice_history"]["icefields_nml"]) else: assert input_nml["icefields_nml"] == DEFAULT_CICE_NML["icefields_nml"] @@ -238,7 +245,7 @@ def prior_restart_cice4(run_timing_params): @pytest.mark.parametrize("config", [CONFIG_WITH_RESTART], indirect=True) -def test_restart_setup(config, cice_config_files, prior_restart_cice4, +def test_restart_setup(config, cice_nml, cice_history_nml, prior_restart_cice4, run_timing_params): """ Test that seting up an experiment from a cloned control directory @@ -280,7 +287,7 @@ def test_restart_setup(config, cice_config_files, prior_restart_cice4, @pytest.mark.parametrize("config", [DEFAULT_CONFIG], indirect=True) -def test_no_restart_ptr(config, cice_config_files): +def test_no_restart_ptr(config, cice_nml, cice_history_nml): """ Test that payu raises an error if no prior restart path is specified, restart is `true` in cice_in.nml, and the restart pointer is missing. @@ -300,3 +307,85 @@ def test_no_restart_ptr(config, cice_config_files): with pytest.raises(RuntimeError, match="Cannot find previous restart file"): model.setup() + + +CONFIG_WITH_COMPRESSION = { + "laboratory": "lab", + "jobname": "testrun", + "model": "cice", + "exe": "test.exe", + "experiment": ctrldir_basename, + "metadata": {"enable": False}, + "compress_logs": True +} + + +@pytest.fixture +def cice4_log_files(): + """ + Create cice log files matching those produced during ESM1.5 simulations. + """ + log_names = ["ice_diag_out", "ice_diag.d", "debug.root.03", + "iceout086", "iceout088", "iceout090", "iceout092", + "iceout094", "iceout096", "iceout085", "iceout087", + "iceout089", "iceout091", "iceout093", "iceout095"] + log_paths = [os.path.join(expt_workdir, name) for name in log_names] + + for log_file in log_paths: + with open(log_file, "w") as f: + f.close() + + yield log_paths + + # Cleanup + for log_file in log_paths: + try: + os.remove(log_file) + except FileNotFoundError: + pass + + +@pytest.fixture +def non_log_file(): + """ + Create a cice4 output file to be ignored by log compression. + Use cice_in.nml which is copied to the work directory in ESM1.5. + """ + non_log_path = os.path.join(expt_workdir, CICE_NML_NAME) + with open(non_log_path, "w") as f: + f.close() + + yield non_log_path + + # Cleanup + os.remove(non_log_path) + + +@pytest.mark.parametrize("config", [CONFIG_WITH_COMPRESSION], + indirect=True) +def test_log_compression(config, cice4_log_files, non_log_file, + cice_nml # Required by expt.__init__ + ): + """ + Test that logfiles produced by cice during ESM1.5 simulations are + properly compressed into a tarball by cice.compress_log_files(). + """ + with cd(ctrldir): + # Initialise laboratory and experiment + lab = payu.laboratory.Laboratory(lab_path=str(labdir)) + expt = payu.experiment.Experiment(lab, reproduce=False) + model = expt.models[0] + + # Function to test + model.compress_log_files() + + # Check that log tarball created and no original logs remain + assert set(os.listdir(expt_workdir)) == {model.log_tar_name, + os.path.basename(non_log_file)} + + # Check all logs present in tarball + log_file_names = {os.path.basename(log_path) for + log_path in cice4_log_files} + with tarfile.open(os.path.join(expt_workdir, model.log_tar_name), + "r") as tar: + assert set(tar.getnames()) == log_file_names From d6c057ffc101883c9d3673b087db5307954f832a Mon Sep 17 00:00:00 2001 From: Spencer Wong Date: Wed, 16 Oct 2024 14:37:11 +1100 Subject: [PATCH 2/7] PEP8 --- test/models/test_cice.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/models/test_cice.py b/test/models/test_cice.py index 150e9c27..cd860b55 100644 --- a/test/models/test_cice.py +++ b/test/models/test_cice.py @@ -364,7 +364,7 @@ def non_log_file(): @pytest.mark.parametrize("config", [CONFIG_WITH_COMPRESSION], indirect=True) def test_log_compression(config, cice4_log_files, non_log_file, - cice_nml # Required by expt.__init__ + cice_nml # Required by expt.__init__ ): """ Test that logfiles produced by cice during ESM1.5 simulations are From 0b2fcc957f736aac1762026f24184601596c70ac Mon Sep 17 00:00:00 2001 From: Spencer Wong Date: Thu, 17 Oct 2024 16:02:02 +1100 Subject: [PATCH 3/7] Move compression settings into archive dict, add docstrings, review cleanup suggestions --- payu/experiment.py | 21 +++++++++++++++++++-- payu/models/cice.py | 24 +++++++++++++++++++++++- test/models/test_cice.py | 27 +++++++++++++-------------- 3 files changed, 55 insertions(+), 17 deletions(-) diff --git a/payu/experiment.py b/payu/experiment.py index 40832736..03d1223e 100644 --- a/payu/experiment.py +++ b/payu/experiment.py @@ -503,7 +503,7 @@ def setup(self, force_archive=False): # Check restart pruning for valid configuration values and # warns user if more restarts than expected would be pruned - if self.config.get('archive', True): + if self.archiving(): self.get_restarts_to_prune() def run(self, *user_flags): @@ -769,8 +769,25 @@ def run(self, *user_flags): if run_script: self.run_userscript(run_script) + def archiving(self): + """ + Determine whether to run archive step based on config.yaml settings. + Default to True when archive settings are absent. + """ + archive_config = self.config.get('archive', {}) + if isinstance(archive_config, dict): + return archive_config.get('enable', True) + + # Backwards compatibility for configs with boolean archive setting + elif isinstance(archive_config, bool): + return archive_config + + else: + msg = "Incorrect format for archive settings in config.yaml" + raise RuntimeError(msg) + def archive(self, force_prune_restarts=False): - if not self.config.get('archive', True): + if not self.archiving(): print('payu: not archiving due to config.yaml setting.') return diff --git a/payu/models/cice.py b/payu/models/cice.py index 2ae47c0e..3d99cb51 100644 --- a/payu/models/cice.py +++ b/payu/models/cice.py @@ -342,10 +342,29 @@ def archive(self, **kwargs): else: shutil.rmtree(self.work_input_path) - if self.expt.config.get('compress_logs', False): + if self.compression_enabled(): self.compress_log_files() + def compression_enabled(self): + """ + Determine whether to run log compression based on config.yaml settings. + Default to True when 'compress_logs' setting is absent. + """ + archive_config = self.expt.config.get('archive', {}) + if isinstance(archive_config, dict): + return archive_config.get('compress_logs', True) + else: + return True + def get_log_files(self): + """ + Find model log files in the work directory based on regex patterns + in self.logs_to_compress. + + Returns + ------- + log_files: list of paths to model log files. + """ log_files = [] for filename in os.listdir(self.work_path): if any((re.match(pattern, filename) @@ -354,6 +373,9 @@ def get_log_files(self): return log_files def compress_log_files(self): + """ + Compress model log files into tarball. + """ log_files = self.get_log_files() with tarfile.open(name=os.path.join(self.work_path, self.log_tar_name), mode="w:gz") as tar: diff --git a/test/models/test_cice.py b/test/models/test_cice.py index cd860b55..5b78c3a6 100644 --- a/test/models/test_cice.py +++ b/test/models/test_cice.py @@ -4,6 +4,7 @@ import pytest import f90nml import tarfile +from pathlib import Path import payu @@ -142,6 +143,7 @@ def cice_nml(): # Cleanup os.remove(nml_path) + # Important to test None case without separate ice history file @pytest.fixture(params=[None, {"icefields_nml": {"f_icy": "m"}}, @@ -325,22 +327,19 @@ def cice4_log_files(): """ Create cice log files matching those produced during ESM1.5 simulations. """ - log_names = ["ice_diag_out", "ice_diag.d", "debug.root.03", - "iceout086", "iceout088", "iceout090", "iceout092", - "iceout094", "iceout096", "iceout085", "iceout087", - "iceout089", "iceout091", "iceout093", "iceout095"] - log_paths = [os.path.join(expt_workdir, name) for name in log_names] + log_names = (["ice_diag_out", "ice_diag.d", "debug.root.03"] + + [f'iceout{x:03d}' for x in range(85, 96)]) + log_paths = [Path(expt_workdir)/name for name in log_names] for log_file in log_paths: - with open(log_file, "w") as f: - f.close() + log_file.touch() yield log_paths # Cleanup for log_file in log_paths: try: - os.remove(log_file) + log_file.unlink() except FileNotFoundError: pass @@ -351,14 +350,13 @@ def non_log_file(): Create a cice4 output file to be ignored by log compression. Use cice_in.nml which is copied to the work directory in ESM1.5. """ - non_log_path = os.path.join(expt_workdir, CICE_NML_NAME) - with open(non_log_path, "w") as f: - f.close() + non_log_path = Path(expt_workdir)/CICE_NML_NAME + non_log_path.touch() yield non_log_path # Cleanup - os.remove(non_log_path) + non_log_path.unlink() @pytest.mark.parametrize("config", [CONFIG_WITH_COMPRESSION], @@ -381,11 +379,12 @@ def test_log_compression(config, cice4_log_files, non_log_file, # Check that log tarball created and no original logs remain assert set(os.listdir(expt_workdir)) == {model.log_tar_name, - os.path.basename(non_log_file)} + non_log_file.name} # Check all logs present in tarball - log_file_names = {os.path.basename(log_path) for + log_file_names = {log_path.name for log_path in cice4_log_files} + with tarfile.open(os.path.join(expt_workdir, model.log_tar_name), "r") as tar: assert set(tar.getnames()) == log_file_names From 44a0b07637368c9e137818b371b738db88634d4f Mon Sep 17 00:00:00 2001 From: Spencer Wong Date: Tue, 5 Nov 2024 10:08:41 +1100 Subject: [PATCH 4/7] Add docs for new archive settings --- docs/source/config.rst | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/docs/source/config.rst b/docs/source/config.rst index 829edeb7..5236d0f8 100644 --- a/docs/source/config.rst +++ b/docs/source/config.rst @@ -273,6 +273,21 @@ section for details. POSIX filesystem. +Archiving +--------- + +``archiving`` + On completion of a model run, payu moves model output, restart, and log + files from the temporary work area to the experiment archive directory. + The following settings control the steps taken during the archive step: + ``enable`` (*Default:* ``True``) + Flag to enable/disable the archive step. If ``False`` all output, restart, + and log files will remain in the work directory, and any collation, post-processing, + and syncing will not be run. + ``compress_logs`` (*Default:* ``True``) + Compress model log files into a tarball. Currently only implemented for CICE4. + + Collation --------- From 2d4d016fd1d2267346ae472a125a95206a32da1a Mon Sep 17 00:00:00 2001 From: Spencer Wong Date: Mon, 18 Nov 2024 12:09:22 +1100 Subject: [PATCH 5/7] Review suggestions: Delete files only after whole archive written. Test that correct file contents written to archive --- payu/models/cice.py | 5 ++++- test/models/test_cice.py | 43 +++++++++++++++++++++++++++++----------- 2 files changed, 35 insertions(+), 13 deletions(-) diff --git a/payu/models/cice.py b/payu/models/cice.py index 3d99cb51..9ad09cb7 100644 --- a/payu/models/cice.py +++ b/payu/models/cice.py @@ -381,7 +381,10 @@ def compress_log_files(self): mode="w:gz") as tar: for file in log_files: tar.add(file, arcname=os.path.basename(file)) - os.remove(file) + + # Delete files after tarball is written + for file in log_files: + os.remove(file) def collate(self): pass diff --git a/test/models/test_cice.py b/test/models/test_cice.py index 5b78c3a6..8f9453a7 100644 --- a/test/models/test_cice.py +++ b/test/models/test_cice.py @@ -325,16 +325,28 @@ def test_no_restart_ptr(config, cice_nml, cice_history_nml): @pytest.fixture def cice4_log_files(): """ - Create cice log files matching those produced during ESM1.5 simulations. + Create cice log files based on ESM1.5 logs. """ - log_names = (["ice_diag_out", "ice_diag.d", "debug.root.03"] - + [f'iceout{x:03d}' for x in range(85, 96)]) - log_paths = [Path(expt_workdir)/name for name in log_names] - - for log_file in log_paths: - log_file.touch() - - yield log_paths + non_pe_logs = { + "ice_diag_out": "block id, proc, local_block:", + "ice_diag.d": "istep0 = ******", + "debug.root.03": "oasis_io_read_avfile:av2_isst_ia:NetCDF:" + } + pe_logs = { + f'iceout{x:03d}': "Fake iceout file {x}" + for x in range(85, 96) + } + + log_files = non_pe_logs | pe_logs + + log_paths = [] + for log_name, log_contents in log_files.items(): + log_path = Path(expt_workdir/log_name) + with open(log_path, "w") as log: + log.write(log_contents) + log_paths.append(log_path) + + yield log_files # Cleanup for log_file in log_paths: @@ -382,9 +394,16 @@ def test_log_compression(config, cice4_log_files, non_log_file, non_log_file.name} # Check all logs present in tarball - log_file_names = {log_path.name for - log_path in cice4_log_files} + log_file_names = {log_name for + log_name in cice4_log_files} with tarfile.open(os.path.join(expt_workdir, model.log_tar_name), - "r") as tar: + mode="r") as tar: assert set(tar.getnames()) == log_file_names + + # Check contents of compressed files + for entry in tar: + entry_name = entry.name + with tar.extractfile(entry) as open_entry: + file_contents = open_entry.read().decode("utf-8") + assert file_contents == cice4_log_files[entry_name] From d110caef4afb660419470d7603dbc8af8e9931cf Mon Sep 17 00:00:00 2001 From: Spencer Wong Date: Thu, 5 Dec 2024 11:14:25 +1100 Subject: [PATCH 6/7] Use more efficient regex pattern --- payu/models/cice.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/payu/models/cice.py b/payu/models/cice.py index 9ad09cb7..33324bb7 100644 --- a/payu/models/cice.py +++ b/payu/models/cice.py @@ -367,8 +367,7 @@ def get_log_files(self): """ log_files = [] for filename in os.listdir(self.work_path): - if any((re.match(pattern, filename) - for pattern in self.logs_to_compress)): + if re.match("|".join(self.logs_to_compress), filename): log_files.append(os.path.join(self.work_path, filename)) return log_files From c111d6bb4fd54de0262557a415f057a99914250c Mon Sep 17 00:00:00 2001 From: Spencer Wong Date: Thu, 5 Dec 2024 12:27:35 +1100 Subject: [PATCH 7/7] Convert legacy archive config to dict in fsops.py. Simplify its parsing --- docs/source/config.rst | 3 ++- payu/experiment.py | 11 +---------- payu/fsops.py | 6 ++++++ payu/models/cice.py | 15 +++------------ test/test_payu.py | 1 + 5 files changed, 13 insertions(+), 23 deletions(-) diff --git a/docs/source/config.rst b/docs/source/config.rst index 5236d0f8..d6141fa9 100644 --- a/docs/source/config.rst +++ b/docs/source/config.rst @@ -276,10 +276,11 @@ section for details. Archiving --------- -``archiving`` +``archive`` On completion of a model run, payu moves model output, restart, and log files from the temporary work area to the experiment archive directory. The following settings control the steps taken during the archive step: + ``enable`` (*Default:* ``True``) Flag to enable/disable the archive step. If ``False`` all output, restart, and log files will remain in the work directory, and any collation, post-processing, diff --git a/payu/experiment.py b/payu/experiment.py index 03d1223e..c3366543 100644 --- a/payu/experiment.py +++ b/payu/experiment.py @@ -775,16 +775,7 @@ def archiving(self): Default to True when archive settings are absent. """ archive_config = self.config.get('archive', {}) - if isinstance(archive_config, dict): - return archive_config.get('enable', True) - - # Backwards compatibility for configs with boolean archive setting - elif isinstance(archive_config, bool): - return archive_config - - else: - msg = "Incorrect format for archive settings in config.yaml" - raise RuntimeError(msg) + return archive_config.get('enable', True) def archive(self, force_prune_restarts=False): if not self.archiving(): diff --git a/payu/fsops.py b/payu/fsops.py index 6560eade..9ce2a1e3 100644 --- a/payu/fsops.py +++ b/payu/fsops.py @@ -126,6 +126,12 @@ def read_config(config_fname=None): config['collate'] = collate_config + # Transform legacy archive config options + archive_config = config.pop('archive', {}) + if type(archive_config) is bool: + archive_config = {'enable': archive_config} + config['archive'] = archive_config + # Transform legacy modules config options modules_config = config.pop('modules', {}) if type(modules_config) is list: diff --git a/payu/models/cice.py b/payu/models/cice.py index 33324bb7..7793ae2e 100644 --- a/payu/models/cice.py +++ b/payu/models/cice.py @@ -342,19 +342,10 @@ def archive(self, **kwargs): else: shutil.rmtree(self.work_input_path) - if self.compression_enabled(): - self.compress_log_files() - - def compression_enabled(self): - """ - Determine whether to run log compression based on config.yaml settings. - Default to True when 'compress_logs' setting is absent. - """ archive_config = self.expt.config.get('archive', {}) - if isinstance(archive_config, dict): - return archive_config.get('compress_logs', True) - else: - return True + compressing_logs = archive_config.get('compress_logs', True) + if compressing_logs: + self.compress_log_files() def get_log_files(self): """ diff --git a/test/test_payu.py b/test/test_payu.py index 7ab218cf..6cc1b689 100644 --- a/test/test_payu.py +++ b/test/test_payu.py @@ -150,6 +150,7 @@ def test_read_config(): assert(config.pop('collate') == {}) assert(config.pop('control_path') == os.getcwd()) assert(config.pop('modules') == {}) + assert(config.pop('archive') == {}) assert(config == {}) os.remove(config_tmp)