From bb7175cdec2fbf64cbade33c4ebda5c973dcb0ae Mon Sep 17 00:00:00 2001 From: Jian Sun Date: Wed, 22 Mar 2023 23:39:30 -0600 Subject: [PATCH 001/176] Add Jim's changes for Gust's GPU options, based on https://github.com/jedwards4b/cime/compare/28b7431..3f4b1ab modified: CIME/Tools/Makefile modified: CIME/XML/env_batch.py modified: CIME/XML/env_mach_specific.py modified: CIME/build.py modified: CIME/case/case.py modified: CIME/data/config/xml_schemas/config_machines.xsd modified: CIME/data/config/xml_schemas/env_mach_specific.xsd modified: CIME/scripts/create_newcase.py modified: CIME/test_scheduler.py modified: CIME/tests/test_unit_case.py --- CIME/Tools/Makefile | 3 + CIME/XML/env_batch.py | 3 + CIME/XML/env_mach_specific.py | 3 +- CIME/build.py | 14 ++++ CIME/case/case.py | 76 ++++++++++--------- .../config/xml_schemas/config_machines.xsd | 4 + .../config/xml_schemas/env_mach_specific.xsd | 4 + CIME/scripts/create_newcase.py | 18 +++++ CIME/test_scheduler.py | 9 ++- CIME/tests/test_unit_case.py | 4 + 10 files changed, 101 insertions(+), 37 deletions(-) diff --git a/CIME/Tools/Makefile b/CIME/Tools/Makefile index 8cf5ba1104e..d01eeebd0d6 100644 --- a/CIME/Tools/Makefile +++ b/CIME/Tools/Makefile @@ -613,6 +613,9 @@ endif # Remove arch flag if it exists F90_LDFLAGS := $(filter-out -arch%,$(LDFLAGS)) +ifdef GPUFLAGS + F90_LDFLAGS += $(GPUFLAGS) +endif # Machine stuff to appear last on the link step ifndef MLIBS diff --git a/CIME/XML/env_batch.py b/CIME/XML/env_batch.py index 21fba92ad90..5a960119cbe 100644 --- a/CIME/XML/env_batch.py +++ b/CIME/XML/env_batch.py @@ -620,6 +620,9 @@ def _process_args(self, case, submit_arg_nodes, job): if name: if "$" in name: rflag = self._resolve_argument(case, flag, name, job) + # This is to prevent -gpu_type=none in qsub args + if rflag.endswith("=none"): + continue if len(rflag) > len(flag): submitargs += " {}".format(rflag) else: diff --git a/CIME/XML/env_mach_specific.py b/CIME/XML/env_mach_specific.py index 03e84f0faee..4652f2a7d0a 100644 --- a/CIME/XML/env_mach_specific.py +++ b/CIME/XML/env_mach_specific.py @@ -320,7 +320,8 @@ def _compute_resource_actions(self, resource_nodes, case, job=None): def _compute_actions(self, nodes, child_tag, case, job=None): result = [] # list of tuples ("name", "argument") - compiler, mpilib = case.get_value("COMPILER"), case.get_value("MPILIB") + compiler = case.get_value("COMPILER") + mpilib = case.get_value("MPILIB") for node in nodes: if self._match_attribs(self.attrib(node), case, job=job): diff --git a/CIME/build.py b/CIME/build.py index 0e232396f3a..99506ff9bf7 100644 --- a/CIME/build.py +++ b/CIME/build.py @@ -239,6 +239,20 @@ def get_standard_cmake_args(case, sharedpath): cmake_args += " -Dcompile_threaded={} ".format( stringify_bool(case.get_build_threaded()) ) + # check settings for GPU + gpu_type = case.get_value("GPU_TYPE") + gpu_offload = case.get_value("GPU_OFFLOAD") + if gpu_type != "none": + expect( + gpu_offload != "none", + "Both GPU_TYPE and GPU_OFFLOAD must be defined if either is", + ) + cmake_args += f" -DGPU_TYPE={gpu_type} -DGPU_OFFLOAD={gpu_offload}" + else: + expect( + gpu_offload == "none", + "Both GPU_TYPE and GPU_OFFLOAD must be defined if either is", + ) ocn_model = case.get_value("COMP_OCN") atm_model = case.get_value("COMP_ATM") diff --git a/CIME/case/case.py b/CIME/case/case.py index 4924baf8cda..1000f944acd 100644 --- a/CIME/case/case.py +++ b/CIME/case/case.py @@ -123,6 +123,7 @@ def __init__(self, case_root=None, read_only=True, record=False, non_local=False self._env_generic_files = [] self._files = [] self._comp_interface = None + self.gpu_enabled = None self._non_local = non_local self.read_xml() @@ -451,6 +452,12 @@ def get_values(self, item, attribute=None, resolved=True, subgroup=None): return [] def get_value(self, item, attribute=None, resolved=True, subgroup=None): + if item == "GPU_ENABLED": + if self.gpu_enabled == None: + if self.get_value("GPU_TYPE") != "none": + self.gpu_enabled = True + return "true" if self.gpu_enabled else "false" + result = None for env_file in self._files: # Wait and resolve in self rather than in env_file @@ -1262,6 +1269,8 @@ def configure( extra_machines_dir=None, case_group=None, ngpus_per_node=0, + gpu_type=None, + gpu_offload=None, ): expect( @@ -1384,14 +1393,19 @@ def configure( if not dmax: dmax = machobj.get_value(name) if dmax: + print(f"here name is {name} and dmax is {dmax}") self.set_value(name, dmax) elif name == "MAX_GPUS_PER_NODE": logger.debug( - "Variable {} not defined for machine {}".format(name, machine_name) + "Variable {} not defined for machine {} and compiler {}".format( + name, machine_name, compiler + ) ) else: logger.warning( - "Variable {} not defined for machine {}".format(name, machine_name) + "Variable {} not defined for machine {} and compiler {}".format( + name, machine_name, compiler + ) ) machdir = machobj.get_machines_dir() @@ -1509,47 +1523,37 @@ def configure( self.set_value("TEST", True) # ---------------------------------------------------------------------------------------------------------- - # Sanity check: - # 1. We assume that there is always a string "gpu" in the compiler name if we want to enable GPU - # 2. For compilers without the string "gpu" in the name: - # 2.1. the ngpus-per-node argument would not update the NGPUS_PER_NODE XML variable, as long as - # the MAX_GPUS_PER_NODE XML variable is not defined (i.e., this argument is not in effect). - # 2.2. if the MAX_GPUS_PER_NODE XML variable is defined, then the ngpus-per-node argument - # must be set to 0. Otherwise, an error will be triggered. - # 3. For compilers with the string "gpu" in the name: - # 3.1. if ngpus-per-node argument is smaller than 0, an error will be triggered. - # 3.2. if ngpus_per_node argument is larger than the value of MAX_GPUS_PER_NODE, the NGPUS_PER_NODE + # Sanity check for a GPU run: + # 1. GPU_TYPE and GPU_OFFLOAD must both be defined to use GPUS + # 2. if ngpus_per_node argument is larger than the value of MAX_GPUS_PER_NODE, the NGPUS_PER_NODE # XML variable in the env_mach_pes.xml file would be set to MAX_GPUS_PER_NODE automatically. - # 3.3. if ngpus-per-node argument is equal to 0, it will be updated to 1 automatically. + # 3. if ngpus-per-node argument is equal to 0, it will be updated to 1 automatically. # ---------------------------------------------------------------------------------------------------------- max_gpus_per_node = self.get_value("MAX_GPUS_PER_NODE") - if max_gpus_per_node: - if "gpu" in compiler: - if not ngpus_per_node: - ngpus_per_node = 1 - logger.warning( - "Setting ngpus_per_node to 1 for compiler {}".format(compiler) - ) - expect( - ngpus_per_node > 0, - " ngpus_per_node is expected > 0 for compiler {}; current value is {}".format( - compiler, ngpus_per_node - ), - ) - else: - expect( - ngpus_per_node == 0, - " ngpus_per_node is expected = 0 for compiler {}; current value is {}".format( - compiler, ngpus_per_node - ), - ) + if gpu_type: + expect( + max_gpus_per_node, + f"GPUS are not defined for machine={machine_name} and compiler={compiler}", + ) + expect( + gpu_offload, + "Both gpu-type and gpu-offload must be defined if either is defined", + ) + self.set_value("GPU_TYPE", gpu_type) + self.set_value("GPU_OFFLOAD", gpu_offload) + self.gpu_enabled = True if ngpus_per_node >= 0: self.set_value( "NGPUS_PER_NODE", - ngpus_per_node + max(1, ngpus_per_node) if ngpus_per_node <= max_gpus_per_node else max_gpus_per_node, ) + elif gpu_offload: + expect( + False, + "Both gpu-type and gpu-offload must be defined if either is defined", + ) self.initialize_derived_attributes() @@ -2354,6 +2358,8 @@ def create( extra_machines_dir=None, case_group=None, ngpus_per_node=0, + gpu_type=None, + gpu_offload=None, ): try: # Set values for env_case.xml @@ -2427,6 +2433,8 @@ def create( extra_machines_dir=extra_machines_dir, case_group=case_group, ngpus_per_node=ngpus_per_node, + gpu_type=gpu_type, + gpu_offload=gpu_offload, ) self.create_caseroot() diff --git a/CIME/data/config/xml_schemas/config_machines.xsd b/CIME/data/config/xml_schemas/config_machines.xsd index d6e3c280a93..d5cc7d5c597 100644 --- a/CIME/data/config/xml_schemas/config_machines.xsd +++ b/CIME/data/config/xml_schemas/config_machines.xsd @@ -6,6 +6,8 @@ + + @@ -249,6 +251,8 @@ + + diff --git a/CIME/data/config/xml_schemas/env_mach_specific.xsd b/CIME/data/config/xml_schemas/env_mach_specific.xsd index f86c6b9f6e1..7778635592b 100644 --- a/CIME/data/config/xml_schemas/env_mach_specific.xsd +++ b/CIME/data/config/xml_schemas/env_mach_specific.xsd @@ -9,6 +9,8 @@ + + @@ -102,6 +104,8 @@ + + diff --git a/CIME/scripts/create_newcase.py b/CIME/scripts/create_newcase.py index 3faea5d6553..ee3df32dc76 100755 --- a/CIME/scripts/create_newcase.py +++ b/CIME/scripts/create_newcase.py @@ -269,6 +269,18 @@ def parse_command_line(args, cimeroot, description): help="Specify number of GPUs used for simulation. ", ) + parser.add_argument( + "--gpu-type", + default=None, + help="Specify type of GPU hardware - currently supported are v100, a100, mi250", + ) + + parser.add_argument( + "--gpu-offload", + default=None, + help="Specify gpu offload method - currently supported are openacc, openmp, combined", + ) + args = CIME.utils.parse_args_and_handle_standard_logging_options(args, parser) if args.srcroot is not None: @@ -345,6 +357,8 @@ def parse_command_line(args, cimeroot, description): args.extra_machines_dir, args.case_group, args.ngpus_per_node, + args.gpu_type, + args.gpu_offload, ) @@ -382,6 +396,8 @@ def _main_func(description=None): extra_machines_dir, case_group, ngpus_per_node, + gpu_type, + gpu_offload, ) = parse_command_line(sys.argv, cimeroot, description) if script_root is None: @@ -447,6 +463,8 @@ def _main_func(description=None): extra_machines_dir=extra_machines_dir, case_group=case_group, ngpus_per_node=ngpus_per_node, + gpu_type=gpu_type, + gpu_offload=gpu_offload, ) # Called after create since casedir does not exist yet diff --git a/CIME/test_scheduler.py b/CIME/test_scheduler.py index a657e2a6b39..d6de0801cfa 100644 --- a/CIME/test_scheduler.py +++ b/CIME/test_scheduler.py @@ -661,8 +661,13 @@ def _create_newcase_phase(self, test): pesize = case_opt[1:] create_newcase_cmd += " --pecount {}".format(pesize) elif case_opt.startswith("G"): - ngpus_per_node = case_opt[1:] - create_newcase_cmd += " --ngpus-per-node {}".format(ngpus_per_node) + if "-" in case_opt: + ngpus_per_node, gpu_type, gpu_offload = case_opt[1:].split("-") + else: + error = "GPU test argument format is ngpus_per_node-gpu_type-gpu_offload" + self._log_output(test, error) + return False, error + create_newcase_cmd += " --ngpus-per-node {} --gpu-type {} --gpu-offload {}".format(ngpus_per_node,gpu_type,gpu_offload) elif case_opt.startswith("V"): self._cime_driver = case_opt[1:] create_newcase_cmd += " --driver {}".format(self._cime_driver) diff --git a/CIME/tests/test_unit_case.py b/CIME/tests/test_unit_case.py index ed473cea21f..dd4d18edf66 100755 --- a/CIME/tests/test_unit_case.py +++ b/CIME/tests/test_unit_case.py @@ -251,6 +251,8 @@ def test_copy( extra_machines_dir=None, case_group=None, ngpus_per_node=0, + gpu_type=None, + gpu_offload=None, ) create_caseroot.assert_called() apply_user_mods.assert_called() @@ -326,6 +328,8 @@ def test_create( extra_machines_dir=None, case_group=None, ngpus_per_node=0, + gpu_type=None, + gpu_offload=None, ) create_caseroot.assert_called() apply_user_mods.assert_called() From 6667b4061546c807e7f7bfab0c9874e0d1b2b700 Mon Sep 17 00:00:00 2001 From: Jian Sun Date: Thu, 23 Mar 2023 11:11:21 -0600 Subject: [PATCH 002/176] Change the settings for Gust GPU nodes modified: CIME/XML/env_mach_pes.py modified: CIME/case/case.py modified: CIME/config.py modified: doc/source/users_guide/cime-customize.rst --- CIME/XML/env_mach_pes.py | 6 ++++- CIME/case/case.py | 32 ++++++++++++++++++----- CIME/config.py | 2 +- doc/source/users_guide/cime-customize.rst | 2 +- 4 files changed, 32 insertions(+), 10 deletions(-) diff --git a/CIME/XML/env_mach_pes.py b/CIME/XML/env_mach_pes.py index c7635573f95..f5dfa68e889 100644 --- a/CIME/XML/env_mach_pes.py +++ b/CIME/XML/env_mach_pes.py @@ -167,11 +167,15 @@ def get_tasks_per_node(self, total_tasks, max_thread_count): "totaltasks > 0 expected, totaltasks = {}".format(total_tasks), ) if self._comp_interface == "nuopc" and self.get_value("ESMF_AWARE_THREADING"): - tasks_per_node = self.get_value("MAX_MPITASKS_PER_NODE") + if self.get_value("NGPUS_PER_NODE") > 0: + tasks_per_node = self.get_value("MAX_CPUTASKS_PER_GPU_NODE") + else: + tasks_per_node = self.get_value("MAX_MPITASKS_PER_NODE") else: tasks_per_node = min( self.get_value("MAX_TASKS_PER_NODE") // max_thread_count, self.get_value("MAX_MPITASKS_PER_NODE"), + self.get_value("MAX_CPUTASKS_PER_GPU_NODE"), total_tasks, ) return tasks_per_node if tasks_per_node > 0 else 1 diff --git a/CIME/case/case.py b/CIME/case/case.py index 1000f944acd..26eacfe2b1f 100644 --- a/CIME/case/case.py +++ b/CIME/case/case.py @@ -123,7 +123,7 @@ def __init__(self, case_root=None, read_only=True, record=False, non_local=False self._env_generic_files = [] self._files = [] self._comp_interface = None - self.gpu_enabled = None + self.gpu_enabled = False self._non_local = non_local self.read_xml() @@ -276,6 +276,9 @@ def initialize_derived_attributes(self): if max_gpus_per_node: self.ngpus_per_node = self.get_value("NGPUS_PER_NODE") + # update the maximum MPI tasks for a GPU node (could differ from a pure-CPU node) + if self.ngpus_per_node > 0: + max_mpitasks_per_node = self.get_value("MAX_CPUTASKS_PER_GPU_NODE") self.tasks_per_numa = int(math.ceil(self.tasks_per_node / 2.0)) smt_factor = max( @@ -453,7 +456,7 @@ def get_values(self, item, attribute=None, resolved=True, subgroup=None): def get_value(self, item, attribute=None, resolved=True, subgroup=None): if item == "GPU_ENABLED": - if self.gpu_enabled == None: + if not self.gpu_enabled: if self.get_value("GPU_TYPE") != "none": self.gpu_enabled = True return "true" if self.gpu_enabled else "false" @@ -1353,6 +1356,7 @@ def configure( and "MPILIB" not in x and "MAX_MPITASKS_PER_NODE" not in x and "MAX_TASKS_PER_NODE" not in x + and "MAX_CPUTASKS_PER_GPU_NODE" not in x and "MAX_GPUS_PER_NODE" not in x ] @@ -1387,6 +1391,7 @@ def configure( for name in ( "MAX_TASKS_PER_NODE", "MAX_MPITASKS_PER_NODE", + "MAX_CPUTASKS_PER_GPU_NODE", "MAX_GPUS_PER_NODE", ): dmax = machobj.get_value(name, {"compiler": compiler}) @@ -1395,6 +1400,12 @@ def configure( if dmax: print(f"here name is {name} and dmax is {dmax}") self.set_value(name, dmax) + elif name == "MAX_CPUTASKS_PER_GPU_NODE": + logger.debug( + "Variable {} not defined for machine {} and compiler {}".format( + name, machine_name, compiler + ) + ) elif name == "MAX_GPUS_PER_NODE": logger.debug( "Variable {} not defined for machine {} and compiler {}".format( @@ -2057,11 +2068,18 @@ def get_mpirun_cmd(self, job=None, allow_unresolved_envvars=True, overrides=None ngpus_per_node = self.get_value("NGPUS_PER_NODE") if ngpus_per_node and ngpus_per_node > 0 and config.gpus_use_set_device_rank: - # 1. this setting is tested on Casper only and may not work on other machines - # 2. need to be revisited in the future for a more adaptable implementation - rundir = self.get_value("RUNDIR") - output_name = rundir + "/set_device_rank.sh" - mpi_arg_string = mpi_arg_string + " " + output_name + " " + if self.get_value("MACH") == "Gust": + mpi_arg_string = mpi_arg_string + " get_local_rank " + else: + # this wrapper script only works with OpenMPI library + # has been tested on Casper + expect( + self.get_value("MPILIB") == "openmpi", + "The wrapper script only works with OpenMPI library; {} is currently used".format(self.get_value("MPILIB")), + ) + rundir = self.get_value("RUNDIR") + output_name = rundir + "/set_device_rank.sh" + mpi_arg_string = mpi_arg_string + " " + output_name + " " return self.get_resolved_value( "{} {} {} {}".format( diff --git a/CIME/config.py b/CIME/config.py index 8491b2f3f2e..9666439cb2e 100644 --- a/CIME/config.py +++ b/CIME/config.py @@ -180,7 +180,7 @@ def __init__(self): self._set_attribute( "gpus_use_set_device_rank", True, - desc="If set to `True` and NGPUS_PER_NODE > 0 then `$RUNDIR/set_device_rank.sh` is appended when the MPI run command is generated.", + desc="If set to `True` and NGPUS_PER_NODE > 0 then `$RUNDIR/set_device_rank.sh` or `get_local_rank` (a global script on Gust) is appended when the MPI run command is generated.", ) self._set_attribute( "test_custom_project_machine", diff --git a/doc/source/users_guide/cime-customize.rst b/doc/source/users_guide/cime-customize.rst index ed90e21472a..2c65d1ab954 100644 --- a/doc/source/users_guide/cime-customize.rst +++ b/doc/source/users_guide/cime-customize.rst @@ -44,7 +44,7 @@ default_short_term_archiving True bool If set to `Tr driver_choices ('mct', 'nuopc') tuple Sets the available driver choices for the model. driver_default nuopc str Sets the default driver for the model. enable_smp True bool If set to `True` then `SMP=` is added to model compile command. -gpus_use_set_device_rank True bool If set to `True` and NGPUS_PER_NODE > 0 then `$RUNDIR/set_device_rank.sh` is appended when the MPI run command is generated. +gpus_use_set_device_rank True bool If set to `True` and NGPUS_PER_NODE > 0 then `$RUNDIR/set_device_rank.sh` or `get_local_rank` (a global script on Gust) is appended when the MPI run command is generated. make_case_run_batch_script False bool If set to `True` and case is not a test then `case.run.sh` is created in case directory from `$MACHDIR/template.case.run.sh`. mct_path {srcroot}/libraries/mct str Sets the path to the mct library. serialize_sharedlib_builds True bool If set to `True` then the TestScheduler will use `proc_pool + 1` processors to build shared libraries otherwise a single processor is used. From e0d625ef3e404a77a49c741072bc7d9fdf8bb3ef Mon Sep 17 00:00:00 2001 From: Jian Sun Date: Thu, 23 Mar 2023 22:16:25 -0600 Subject: [PATCH 003/176] Add missing definition of max_cputasks_per_gpu_node modified: CIME/data/config/xml_schemas/config_machines.xsd --- CIME/data/config/xml_schemas/config_machines.xsd | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CIME/data/config/xml_schemas/config_machines.xsd b/CIME/data/config/xml_schemas/config_machines.xsd index d5cc7d5c597..53bd359a503 100644 --- a/CIME/data/config/xml_schemas/config_machines.xsd +++ b/CIME/data/config/xml_schemas/config_machines.xsd @@ -58,6 +58,7 @@ + @@ -168,6 +169,9 @@ + + - + + + + + @@ -50,6 +50,7 @@ + diff --git a/CIME/hist_utils.py b/CIME/hist_utils.py index 32b22abbdbc..83f738dda95 100644 --- a/CIME/hist_utils.py +++ b/CIME/hist_utils.py @@ -71,6 +71,8 @@ def copy_histfiles(case, suffix, match_suffix=None): comments = "Copying hist files to suffix '{}'\n".format(suffix) num_copied = 0 for model in _iter_model_file_substrs(case): + if case.get_value("TEST") and archive.exclude_testing(model): + continue comments += " Copying hist files for model '{}'\n".format(model) test_hists = archive.get_latest_hist_files( casename, model, rundir, suffix=match_suffix, ref_case=ref_case diff --git a/CIME/tests/test_unit_hist_utils.py b/CIME/tests/test_unit_hist_utils.py new file mode 100644 index 00000000000..1412ba3c02e --- /dev/null +++ b/CIME/tests/test_unit_hist_utils.py @@ -0,0 +1,66 @@ +import io +import unittest +from unittest import mock + +from CIME.hist_utils import copy_histfiles +from CIME.XML.archive import Archive + + +class TestHistUtils(unittest.TestCase): + @mock.patch("CIME.hist_utils.safe_copy") + def test_copy_histfiles_exclude(self, safe_copy): + case = mock.MagicMock() + + case.get_env.return_value.get_latest_hist_files.side_effect = [ + ["/tmp/testing.cpl.hi.nc"], + ["/tmp/testing.atm.hi.nc"] + ] + + case.get_env.return_value.exclude_testing.side_effect = [True, False] + + case.get_value.side_effect = [ + "/tmp", # RUNDIR + None, # RUN_REFCASE + "testing", # CASE + True, # TEST + True # TEST + ] + + case.get_compset_components.return_value = ["atm"] + + test_files = [ + "testing.cpl.hi.nc", + ] + + with mock.patch("os.listdir", return_value=test_files): + comments, num_copied = copy_histfiles(case, "base") + + assert num_copied == 1 + + @mock.patch("CIME.hist_utils.safe_copy") + def test_copy_histfiles(self, safe_copy): + case = mock.MagicMock() + + case.get_env.return_value.get_latest_hist_files.return_value = [ + "/tmp/testing.cpl.hi.nc", + ] + + case.get_env.return_value.exclude_testing.return_value = False + + case.get_value.side_effect = [ + "/tmp", # RUNDIR + None, # RUN_REFCASE + "testing", # CASE + True, # TEST + ] + + case.get_compset_components.return_value = [] + + test_files = [ + "testing.cpl.hi.nc", + ] + + with mock.patch("os.listdir", return_value=test_files): + comments, num_copied = copy_histfiles(case, "base") + + assert num_copied == 1 diff --git a/CIME/tests/test_unit_xml_archive_base.py b/CIME/tests/test_unit_xml_archive_base.py index 1a5039c7f57..44d035a3418 100644 --- a/CIME/tests/test_unit_xml_archive_base.py +++ b/CIME/tests/test_unit_xml_archive_base.py @@ -22,6 +22,18 @@ """ +EXCLUDE_TEST_CONFIG = """ + + unique\.name\.unique.nc + + + unique\.name\.unique.nc + + + unique\.name\.unique.nc + +""" + class TestXMLArchiveBase(unittest.TestCase): @contextmanager @@ -32,6 +44,23 @@ def _setup_environment(self, test_files): yield temp_dir + def test_exclude_testing(self): + archiver = ArchiveBase() + + archiver.read_fd(io.StringIO(EXCLUDE_TEST_CONFIG)) + + # no attribute + assert not archiver.exclude_testing("eam") + + # not in config + assert not archiver.exclude_testing("mpassi") + + # set false + assert not archiver.exclude_testing("mpasso") + + # set true + assert archiver.exclude_testing("cpl") + def test_match_files(self): archiver = ArchiveBase() From e9d1b0835767b4fc9f66f81a97817409637c2233 Mon Sep 17 00:00:00 2001 From: Jason Boutte Date: Wed, 5 Jul 2023 13:37:58 -0700 Subject: [PATCH 091/176] Fixes formatting --- CIME/tests/test_unit_hist_utils.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/CIME/tests/test_unit_hist_utils.py b/CIME/tests/test_unit_hist_utils.py index 1412ba3c02e..fe6d4866c34 100644 --- a/CIME/tests/test_unit_hist_utils.py +++ b/CIME/tests/test_unit_hist_utils.py @@ -13,17 +13,17 @@ def test_copy_histfiles_exclude(self, safe_copy): case.get_env.return_value.get_latest_hist_files.side_effect = [ ["/tmp/testing.cpl.hi.nc"], - ["/tmp/testing.atm.hi.nc"] + ["/tmp/testing.atm.hi.nc"], ] case.get_env.return_value.exclude_testing.side_effect = [True, False] case.get_value.side_effect = [ - "/tmp", # RUNDIR - None, # RUN_REFCASE + "/tmp", # RUNDIR + None, # RUN_REFCASE "testing", # CASE - True, # TEST - True # TEST + True, # TEST + True, # TEST ] case.get_compset_components.return_value = ["atm"] @@ -48,10 +48,10 @@ def test_copy_histfiles(self, safe_copy): case.get_env.return_value.exclude_testing.return_value = False case.get_value.side_effect = [ - "/tmp", # RUNDIR - None, # RUN_REFCASE + "/tmp", # RUNDIR + None, # RUN_REFCASE "testing", # CASE - True, # TEST + True, # TEST ] case.get_compset_components.return_value = [] From 75c7fbe6626dd3d55d3742c572d5738719a93631 Mon Sep 17 00:00:00 2001 From: Jason Boutte Date: Wed, 5 Jul 2023 13:49:42 -0700 Subject: [PATCH 092/176] Refactors exclude_testing method --- CIME/XML/archive_base.py | 40 ++++++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/CIME/XML/archive_base.py b/CIME/XML/archive_base.py index dcb1f8b46d4..1463d1ea460 100644 --- a/CIME/XML/archive_base.py +++ b/CIME/XML/archive_base.py @@ -9,32 +9,40 @@ class ArchiveBase(GenericXML): - def get_entry(self, compname): - """ - Returns an xml node corresponding to compname in comp_archive_spec - """ - return self.scan_optional_child( - "comp_archive_spec", attributes={"compname": compname} - ) - def exclude_testing(self, compname): """ Checks if component should be excluded from testing. """ + value = self._get_attribute(compname, "exclude_testing") - entry = self.get_entry(compname) - - if entry is None: + if value is None: return False - attrs = self.attrib(entry) + return convert_to_type(value, "logical") - value = attrs.get("exclude_testing", None) + def _get_attribute(self, compname, attr_name): + attrib = self.get_entry_attributes(compname) - if not value: - return False + if attrib is None: + return None - return convert_to_type(value, "logical") + return attrib.get(attr_name, None) + + def get_entry_attributes(self, compname): + entry = self.get_entry(compname) + + if entry is None: + return None + + return self.attrib(entry) + + def get_entry(self, compname): + """ + Returns an xml node corresponding to compname in comp_archive_spec + """ + return self.scan_optional_child( + "comp_archive_spec", attributes={"compname": compname} + ) def _get_file_node_text(self, attnames, archive_entry): """ From b5b80e9b15c96891e171352d1ce67887399db522 Mon Sep 17 00:00:00 2001 From: Jason Boutte Date: Thu, 6 Jul 2023 19:38:27 -0700 Subject: [PATCH 093/176] Links CIME developer's guide in user guide --- doc/source/users_guide/testing.rst | 57 +++--------------------------- 1 file changed, 5 insertions(+), 52 deletions(-) diff --git a/doc/source/users_guide/testing.rst b/doc/source/users_guide/testing.rst index 5c0339fa38b..8ea7c29467c 100644 --- a/doc/source/users_guide/testing.rst +++ b/doc/source/users_guide/testing.rst @@ -450,56 +450,9 @@ If this test will only be run as a single test, you can now create a test name and follow the individual_ test instructions for create_test. If you want this test to be part of a suite, then it must be described in the relevant testlists_YYY.xml file. -=============================== -CIME's scripts regression tests -=============================== -.. _`CIME's scripts regression tests`: +====================== +CIME Developer's guide +====================== +.. _`CIME Developer's guide`: -**$CIMEROOT/scripts/lib/CIME/tests/scripts_regression_tests.py** is the suite of internal tests we run -for the stand-alone CIME testing. With no arguments, it will run the full suite. You can limit testing to a specific -test class or even a specific test within a test class. - -**CCSM_CPRNC should be defined in config_machines.xml for your system prior to running scripts_regression_tests.py** - -Run full suite:: - - python scripts/lib/CIME/tests/scripts_regression_tests.py - -Run a test class:: - - python scripts/lib/CIME/tests/scripts_regression_tests.py CIME.tests.test_unit_case - -Run a specific test:: - - python scripts/lib/CIME/tests/scripts_regression_tests.py CIME.tests.test_unit_case.TestCaseSubmit.test_check_case - -If a test fails, the unittest module that drives scripts_regression_tests wil note the failure, but -won't print the output of the test until testing has completed. When there are failures for a -test, the case directories for that test will not be cleaned up so that the user can do a post-mortem -analysis. The user will be notified of the specific directories that will be left for them to -examine. - -The test suite can also be ran with `pytest` and `pytest-cov`. After the test suite is done running -a coverage report will be presented. - -Install dependencies:: - - python -m pip install pytest pytest-cov - -Run full suite:: - - pytest -vvv - -Run just unit tests:: - - pytest -vvv scripts/lib/CIME/tests/test_unit* - -Run a test class:: - - pytest -vvv scripts/lib/CIME/tests/test_unit_case.py - -Run a specific test:: - - pytest -vvv scripts/lib/CIME/tests/test_unit_case.py::TestCaseSubmit::test_check_case - -More description can be found in https://github.com/ESCOMP/ctsm/wiki/System-Testing-Guide +The CIME Developer's guide can be found on the project's GitHub `wiki `_. From e288589bd622c95f3e4a33bfb953456a3e87358c Mon Sep 17 00:00:00 2001 From: Jason Boutte Date: Thu, 6 Jul 2023 19:42:06 -0700 Subject: [PATCH 094/176] Fixes creating directory for input data --- docker/entrypoint.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh index 0743181a4db..c89d6a9d375 100755 --- a/docker/entrypoint.sh +++ b/docker/entrypoint.sh @@ -106,6 +106,8 @@ function init_e3sm() { update_cime "${install_path}/cime" + mkdir -p /storage/inputdata + rsync -vr /cache/ /storage/inputdata/ cd "${install_path}/cime" From bc8b1a3a1daa419950e5bd7b1b04614d7732b271 Mon Sep 17 00:00:00 2001 From: Bill Sacks Date: Fri, 7 Jul 2023 15:26:46 -0600 Subject: [PATCH 095/176] Fix gen_domain installation instructions --- tools/mapping/gen_domain_files/INSTALL | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/mapping/gen_domain_files/INSTALL b/tools/mapping/gen_domain_files/INSTALL index 71d7caefcae..b9d474e0cc9 100644 --- a/tools/mapping/gen_domain_files/INSTALL +++ b/tools/mapping/gen_domain_files/INSTALL @@ -3,7 +3,7 @@ HOW TO BUILD ============ (1) $ cd src -(2) $ ../../../configure --macros-format Makefile --mpilib mpi-serial +(2) $ ../../../../CIME/scripts/configure --macros-format Makefile --mpilib mpi-serial Bash users: (3) $ (. ./.env_mach_specific.sh ; gmake) csh users: From a608d9930fbc0eda44786645bd76be6520487f3c Mon Sep 17 00:00:00 2001 From: Bill Sacks Date: Fri, 7 Jul 2023 15:27:03 -0600 Subject: [PATCH 096/176] Use standard Fortran functions for getting command-line arguments Resolves ESMCI/cime#4455 --- .../gen_domain_files/src/gen_domain.F90 | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tools/mapping/gen_domain_files/src/gen_domain.F90 b/tools/mapping/gen_domain_files/src/gen_domain.F90 index f0b97f20e4e..b06f1e7442a 100644 --- a/tools/mapping/gen_domain_files/src/gen_domain.F90 +++ b/tools/mapping/gen_domain_files/src/gen_domain.F90 @@ -53,7 +53,7 @@ program fmain set_omask = .false. ! Make sure we have arguments - nargs = iargc() + nargs = command_argument_count() if (nargs == 0) then write(6,*)'invoke gen_domain -h for usage' stop @@ -64,47 +64,47 @@ program fmain n = 1 do while (n <= nargs) arg = ' ' - call getarg (n, arg) + call get_command_argument (n, arg) n = n + 1 select case (arg) case ('-m') ! input mapping file - call getarg (n, arg) + call get_command_argument (n, arg) n = n + 1 fmap = trim(arg) cmdline = trim(cmdline) // ' -m ' // trim(arg) case ('-o') ! output ocean grid name - call getarg (n, arg) + call get_command_argument (n, arg) n = n + 1 fn1_out = trim(arg) cmdline = trim(cmdline) // ' -o ' // trim(arg) case ('-l') ! output land grid name - call getarg (n, arg) + call get_command_argument (n, arg) n = n + 1 fn2_out = trim(arg) cmdline = trim(cmdline) // ' -l ' // trim(arg) case ('-p') ! set pole on this grid [0,1,2] - call getarg (n, arg) + call get_command_argument (n, arg) n = n + 1 set_fv_pole_yc = ichar(trim(arg))-48 write(6,*)'set_fv_pole_yc is ',set_fv_pole_yc case ('--fminval') ! set fminval (min allowable land fraction) - call getarg (n, arg) + call get_command_argument (n, arg) n = n + 1 read(arg,*) fminval case ('--fmaxval') ! set fminval (min allowable land fraction) - call getarg (n, arg) + call get_command_argument (n, arg) n = n + 1 read(arg,*) fmaxval case ('-c') ! user comment - call getarg (n, arg) + call get_command_argument (n, arg) n = n + 1 usercomment = trim(arg) case ('--set-omask') From 2878fbc62f2039a3532fe1a75e5526987369ce4a Mon Sep 17 00:00:00 2001 From: Jason Boutte Date: Thu, 13 Jul 2023 15:36:41 -0700 Subject: [PATCH 097/176] Fixes docs workflow --- .github/workflows/docs.yml | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index c7d10f3de44..cd818a8dd53 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -9,6 +9,8 @@ on: branches: - master + workflow_dispatch: + permissions: contents: read jobs: @@ -20,9 +22,8 @@ jobs: steps: - uses: actions/checkout@v3 with: - fetch-depth: 0 - lfs: true - - uses: tj-actions/changed-files@v32 + fetch-depth: 2 + - uses: tj-actions/changed-files@v37 id: changed-check with: files: doc/** @@ -31,9 +32,7 @@ jobs: contents: write # for peaceiris/actions-gh-pages to push pull-requests: write # to comment on pull requests needs: check-changes - if: | - needs.check-changes.outputs.any_changed == 'true' && - github.event.pull_request.head.repo.full_name == github.repository + if: needs.check-changes.outputs.any_changed == 'true' name: Build and deploy documentation runs-on: ubuntu-latest steps: @@ -61,7 +60,9 @@ jobs: run: | make BUILDDIR=${PWD}/_build -C doc/ html - name: Push PR preview - if: ${{ github.event_name == 'pull_request' }} + if: | + github.event_name == 'pull_request' && + github.event.pull_request.head.repo.full_name == github.repository uses: peaceiris/actions-gh-pages@v3 with: github_token: ${{secrets.GITHUB_TOKEN}} From 585138e899d1356abd52abdd479c0c685dc4b3b3 Mon Sep 17 00:00:00 2001 From: Jason Boutte Date: Thu, 13 Jul 2023 15:41:15 -0700 Subject: [PATCH 098/176] Test commit --- doc/source/index.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/source/index.rst b/doc/source/index.rst index 89cc7155218..acaaa14f96e 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -11,6 +11,8 @@ The Common Infrastructure for Modeling the Earth (CIME - pronounced Earth system models, data and stub model components, a driver and associated tools and libraries. +Test + Table of contents ----------------- .. toctree:: From e6baa7c369d3ac8c2ea122776091cf5613ea7004 Mon Sep 17 00:00:00 2001 From: Jason Boutte Date: Thu, 13 Jul 2023 15:49:33 -0700 Subject: [PATCH 099/176] Adds path filtering --- .github/workflows/docs.yml | 4 ++++ .github/workflows/testing.yml | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index cd818a8dd53..479ed00a349 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -4,10 +4,14 @@ on: push: branches: - master + paths: + - 'doc/**' pull_request: branches: - master + paths: + - 'doc/**' workflow_dispatch: diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index b2d6c00e09e..fe526c49553 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -4,10 +4,14 @@ on: push: branches: - master + paths: + - 'CIME/**' pull_request: branches: - master + paths: + - 'CIME/**' concurrency: group: ${{ github.ref }} From a7e08ab530bc302898d8a7a839754e05d51104cb Mon Sep 17 00:00:00 2001 From: Jason Boutte Date: Thu, 13 Jul 2023 15:57:50 -0700 Subject: [PATCH 100/176] Skip testing workflow when only docs are updated --- .github/workflows/testing.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index fe526c49553..e92b0b8c337 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -4,14 +4,14 @@ on: push: branches: - master - paths: - - 'CIME/**' + paths-ignore: + - 'doc/**' pull_request: branches: - master - paths: - - 'CIME/**' + paths-ignore: + - 'doc/**' concurrency: group: ${{ github.ref }} From a3c59c824e9ad2f73f9f9433149434e4b25bf84e Mon Sep 17 00:00:00 2001 From: Jason Boutte Date: Thu, 13 Jul 2023 16:26:34 -0700 Subject: [PATCH 101/176] Removes check-changes and reorders jobs --- .github/workflows/docs.yml | 65 +++++++++++++++----------------------- 1 file changed, 26 insertions(+), 39 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 479ed00a349..5a450876543 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -18,25 +18,40 @@ on: permissions: contents: read jobs: - check-changes: - name: Check for changes to documentation + cleanup: + permissions: + contents: write # for git push + name: Cleanup branch previews runs-on: ubuntu-latest - outputs: - any_changed: ${{ steps.changed-check.outputs.any_changed }} + if: ${{ github.event_name == 'push' }} steps: - uses: actions/checkout@v3 with: - fetch-depth: 2 - - uses: tj-actions/changed-files@v37 - id: changed-check - with: - files: doc/** + ref: 'gh-pages' + fetch-depth: 0 + lfs: true + path: gh-pages + - name: Remove branch previews + run: | + for name in `ls branch/` + do + if [[ -z "$(git show-ref --quiet ${name})" ]] + then + git rm -rf branch/${name} + fi + done + - name: Commit and push local changes to gh-pages + run: | + git config user.name github-actions[bot] + git config user.email github-actions[bot]@users.noreply.github.com + git commit -m "Clean up branch previews" + git push build-and-deploy: permissions: contents: write # for peaceiris/actions-gh-pages to push pull-requests: write # to comment on pull requests - needs: check-changes - if: needs.check-changes.outputs.any_changed == 'true' + needs: cleanup + if: ${{ always() }} name: Build and deploy documentation runs-on: ubuntu-latest steps: @@ -104,31 +119,3 @@ jobs: destination_dir: './versions/master/html' user_name: 'github-actions[bot]' user_email: 'github-actions[bot]@users.noreply.github.com' - cleanup: - permissions: - contents: write # for git push - needs: build-and-deploy - name: Cleanup branch previews - runs-on: ubuntu-latest - if: ${{ github.event_name == 'push' }} - steps: - - uses: actions/checkout@v3 - with: - ref: 'gh-pages' - fetch-depth: 0 - lfs: true - - name: Remove branch previews - run: | - for name in `ls branch/` - do - if [[ -z "$(git show-ref --quiet ${name})" ]] - then - git rm -rf branch/${name} - fi - done - - name: Commit and push local changes to gh-pages - run: | - git config user.name github-actions[bot] - git config user.email github-actions[bot]@users.noreply.github.com - git commit -m "Clean up branch previews" - git push From 22a036366deeb93babb53959f5d5de2563a7b9cf Mon Sep 17 00:00:00 2001 From: Jason Boutte Date: Thu, 13 Jul 2023 16:31:12 -0700 Subject: [PATCH 102/176] Test change --- doc/source/index.rst | 2 -- 1 file changed, 2 deletions(-) diff --git a/doc/source/index.rst b/doc/source/index.rst index acaaa14f96e..89cc7155218 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -11,8 +11,6 @@ The Common Infrastructure for Modeling the Earth (CIME - pronounced Earth system models, data and stub model components, a driver and associated tools and libraries. -Test - Table of contents ----------------- .. toctree:: From baf870133eaa403f0cdf22bb6d31939e87839bfd Mon Sep 17 00:00:00 2001 From: Jason Boutte Date: Thu, 13 Jul 2023 16:36:50 -0700 Subject: [PATCH 103/176] Quick fix --- .github/workflows/docs.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 5a450876543..098cd4c8c01 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -33,6 +33,8 @@ jobs: path: gh-pages - name: Remove branch previews run: | + pushd $GITHUB_WORKSPACE/gh-pages + for name in `ls branch/` do if [[ -z "$(git show-ref --quiet ${name})" ]] @@ -40,8 +42,7 @@ jobs: git rm -rf branch/${name} fi done - - name: Commit and push local changes to gh-pages - run: | + git config user.name github-actions[bot] git config user.email github-actions[bot]@users.noreply.github.com git commit -m "Clean up branch previews" From 777fc3cac517f3b0f4e8da1b5eb0c326d48b70c9 Mon Sep 17 00:00:00 2001 From: Jim Edwards Date: Mon, 17 Jul 2023 08:25:07 -0600 Subject: [PATCH 104/176] replace PROJECT arg with nothing if PROJECT not required --- CIME/XML/env_batch.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/CIME/XML/env_batch.py b/CIME/XML/env_batch.py index f12db7e2f0f..507f04c247b 100644 --- a/CIME/XML/env_batch.py +++ b/CIME/XML/env_batch.py @@ -607,7 +607,6 @@ def _process_args(self, case, submit_arg_nodes, job, resolve=True): flag, name = self._get_argument(case, arg) except ValueError: continue - if self._batchtype == "cobalt" and job == "case.st_archive": if flag == "-n": name = "task_count" @@ -964,8 +963,19 @@ def _submit_single_job( return - submitargs = case.get_value("BATCH_COMMAND_FLAGS", subgroup=job) + submitargs = case.get_value("BATCH_COMMAND_FLAGS", subgroup=job, resolved=False) + + project = case.get_value("PROJECT", subgroup=job) + if not project: + # If there is no project then we need to remove the project flag + # slurm defines --account only on machines that require it, so this strip isn't required + if batch_system == "pbs" or batch_system=="cobalt": + submitargs = submitargs.replace("-A","") + elif batch_system == "lsf": + submitargs = submitargs.replace("-P","") + + if dep_jobs is not None and len(dep_jobs) > 0: logger.debug("dependencies: {}".format(dep_jobs)) if allow_fail: @@ -1023,7 +1033,8 @@ def _submit_single_job( if mail_type: mail_type = mail_type.split(",") # pylint: disable=no-member - + + if mail_type: mail_type_flag = self.get_value("batch_mail_type_flag", subgroup=None) if mail_type_flag is not None: From 3fe2e88096420dd869585e8b488600d80fa1d318 Mon Sep 17 00:00:00 2001 From: Jim Edwards Date: Mon, 17 Jul 2023 08:53:26 -0600 Subject: [PATCH 105/176] apply black --- CIME/XML/env_batch.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/CIME/XML/env_batch.py b/CIME/XML/env_batch.py index 507f04c247b..93545a146a0 100644 --- a/CIME/XML/env_batch.py +++ b/CIME/XML/env_batch.py @@ -970,12 +970,11 @@ def _submit_single_job( if not project: # If there is no project then we need to remove the project flag # slurm defines --account only on machines that require it, so this strip isn't required - if batch_system == "pbs" or batch_system=="cobalt": - submitargs = submitargs.replace("-A","") + if batch_system == "pbs" or batch_system == "cobalt": + submitargs = submitargs.replace("-A", "") elif batch_system == "lsf": - submitargs = submitargs.replace("-P","") - - + submitargs = submitargs.replace("-P", "") + if dep_jobs is not None and len(dep_jobs) > 0: logger.debug("dependencies: {}".format(dep_jobs)) if allow_fail: @@ -1033,8 +1032,7 @@ def _submit_single_job( if mail_type: mail_type = mail_type.split(",") # pylint: disable=no-member - - + if mail_type: mail_type_flag = self.get_value("batch_mail_type_flag", subgroup=None) if mail_type_flag is not None: From 4167d9a6342b415ac1e73f114189cabf325ddc8e Mon Sep 17 00:00:00 2001 From: Jason Boutte Date: Wed, 19 Jul 2023 08:42:52 -0700 Subject: [PATCH 106/176] Adds missing conda packages --- docker/Dockerfile | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index b2499b47ce5..7b4e2260cea 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -44,6 +44,8 @@ RUN mamba install --yes -c conda-forge \ rm -rf /opt/conda/pkgs/* # Install version locked packages +# gcc, gxx, gfortran provide symlinks for x86_64-conda-linux-gnu-* +# ar and ranlib are not symlinked RUN mamba install --yes -c conda-forge \ libnetcdf=${LIBNETCDF_VERSION}=*openmpi* \ netcdf-fortran=${NETCDF_FORTRAN_VERSION}=*openmpi* \ @@ -51,7 +53,10 @@ RUN mamba install --yes -c conda-forge \ gcc_linux-64=${GCC_VERSION} \ gxx_linux-64=${GCC_VERSION} \ openmpi-mpifort \ - gfortran_linux-64=${GCC_VERSION} && \ + gfortran_linux-64=${GCC_VERSION} \ + gcc \ + gxx \ + gfortran && \ rm -rf /opt/conda/pkgs/* && \ ln -sf /opt/conda/bin/x86_64-conda-linux-gnu-ar /opt/conda/bin/ar && \ ln -sf /opt/conda/bin/x86_64-conda-linux-gnu-ranlib /opt/conda/bin/ranlib From 283988c7e67dee25c527d21960486450cdaead15 Mon Sep 17 00:00:00 2001 From: Jason Boutte Date: Wed, 19 Jul 2023 08:48:44 -0700 Subject: [PATCH 107/176] Removes duplicates from gnu.cmake and path specific set's there are now correct symlinks for compilers --- docker/docker.cmake | 66 +++------------------------------------------ 1 file changed, 3 insertions(+), 63 deletions(-) diff --git a/docker/docker.cmake b/docker/docker.cmake index eb95c7113ce..cf367dcfd9e 100644 --- a/docker/docker.cmake +++ b/docker/docker.cmake @@ -1,71 +1,11 @@ -set(AR "/opt/conda/bin/x86_64-conda-linux-gnu-ar") -string(APPEND CFLAGS " -mcmodel=medium") -if (compile_threaded) - string(APPEND CFLAGS " -fopenmp") -endif() -if (DEBUG) - string(APPEND CFLAGS " -g -Wall -fbacktrace -fcheck=bounds -ffpe-trap=invalid,zero,overflow") -endif() -if (NOT DEBUG) - string(APPEND CFLAGS " -O") -endif() -if (COMP_NAME STREQUAL csm_share) - string(APPEND CFLAGS " -std=c99") -endif() string(APPEND CXXFLAGS " -std=c++14") -if (compile_threaded) - string(APPEND CXXFLAGS " -fopenmp") -endif() -if (DEBUG) - string(APPEND CXXFLAGS " -g -Wall -fbacktrace") -endif() -if (NOT DEBUG) - string(APPEND CXXFLAGS " -O") -endif() -if (COMP_NAME STREQUAL cism) - string(APPEND CMAKE_OPTS " -D CISM_GNU=ON") -endif() -string(APPEND CMAKE_OPTS " -D CMAKE_AR=/opt/conda/bin/x86_64-conda-linux-gnu-ar") -string(APPEND CMAKE_OPTS " -DCMAKE_Fortran_COMPILER_RANLIB=/opt/conda/bin/x86_64-conda-linux-gnu-ranlib") -string(APPEND CMAKE_OPTS " -DCMAKE_C_COMPILER_RANLIB=/opt/conda/bin/x86_64-conda-linux-gnu-ranlib") -string(APPEND CMAKE_OPTS " -DCMAKE_CXX_COMPILER_RANLIB=/opt/conda/bin/x86_64-conda-linux-gnu-ranlib") -string(APPEND CPPDEFS " -DFORTRANUNDERSCORE -DNO_R16 -DCPRGNU") -if (DEBUG) - string(APPEND CPPDEFS " -DYAKL_DEBUG") -endif() -set(SLIBS "-L/opt/conda/lib -lnetcdf -lnetcdff") -set(CXX_LIBS "-lstdc++") -set(CXX_LINKER "FORTRAN") -string(APPEND FC_AUTO_R8 " -fdefault-real-8") -string(APPEND FFLAGS " -I/opt/conda/include -mcmodel=medium -fconvert=big-endian -ffree-line-length-none -ffixed-line-length-none") -if (compile_threaded) - string(APPEND FFLAGS " -fopenmp") -endif() -if (DEBUG) - string(APPEND FFLAGS " -g -Wall -fbacktrace -fcheck=bounds -ffpe-trap=zero,overflow") -endif() -if (NOT DEBUG) - string(APPEND FFLAGS " -O") -endif() -string(APPEND FFLAGS_NOOPT " -O0") -string(APPEND FIXEDFLAGS " -ffixed-form") -string(APPEND FREEFLAGS " -ffree-form") -set(HAS_F2008_CONTIGUOUS "FALSE") -if (compile_threaded) - string(APPEND LDFLAGS " -fopenmp") -endif() -set(SLIBS " -L/opt/conda/lib -lnetcdff -lnetcdf") +string(APPEND CXX_LIBS " -lstdc++") +string(APPEND SLIBS " -L/opt/conda/lib -lnetcdf -lnetcdff") +string(APPEND FFLAGS " -I/opt/conda/include") set(MPI_PATH "/opt/conda") -set(MPICC "/opt/conda/bin/mpicc") -set(MPICXX "/opt/conda/bin/mpicxx") -set(MPIFC "/opt/conda/bin/mpif90") set(NETCDF_C_PATH "/opt/conda") set(NETCDF_FORTRAN_PATH "/opt/conda") set(PNETCDF_PATH "/opt/conda") -set(SCC "/opt/conda/bin/x86_64-conda-linux-gnu-gcc") -set(SCXX "/opt/conda/bin/x86_64-conda-linux-gnu-g++") -set(SFC "/opt/conda/bin/x86_64-conda-linux-gnu-gfortran") -set(SUPPORTS_CXX "TRUE") if (CMAKE_Fortran_COMPILER_VERSION VERSION_GREATER_EQUAL 10) string(APPEND FFLAGS " -fallow-argument-mismatch -fallow-invalid-boz ") endif() From f2cd932ca4bea8c01abde1ca2005fd31da824974 Mon Sep 17 00:00:00 2001 From: Jason Boutte Date: Wed, 19 Jul 2023 10:02:43 -0700 Subject: [PATCH 108/176] Adds comment --- CIME/scripts/create_newcase.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CIME/scripts/create_newcase.py b/CIME/scripts/create_newcase.py index 3faea5d6553..eb82d392994 100755 --- a/CIME/scripts/create_newcase.py +++ b/CIME/scripts/create_newcase.py @@ -239,6 +239,8 @@ def parse_command_line(args, cimeroot, description): parser.add_argument( "--driver", + # use get_cime_default_driver rather than config.driver_default as it considers + # environment, user config then config.driver_default default=get_cime_default_driver(), choices=drv_choices, help=drv_help, From 9708936ef211a8fb553dd7a367cf58e489645db5 Mon Sep 17 00:00:00 2001 From: Jason Boutte Date: Wed, 19 Jul 2023 10:08:38 -0700 Subject: [PATCH 109/176] Fixes using driver_choices from model customize config --- CIME/scripts/create_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CIME/scripts/create_test.py b/CIME/scripts/create_test.py index 44ccb1687be..65fcc03b359 100755 --- a/CIME/scripts/create_test.py +++ b/CIME/scripts/create_test.py @@ -219,7 +219,7 @@ def parse_command_line(args, description): parser.add_argument( "--driver", - choices=("mct", "nuopc", "moab"), + choices=model_config.driver_choices, help="Override driver specified in tests and use this one.", ) From c71f9eaf54805e2dc983108ac53a071492f746e8 Mon Sep 17 00:00:00 2001 From: Jason Boutte Date: Wed, 19 Jul 2023 10:14:44 -0700 Subject: [PATCH 110/176] Adds --driver and deprecates --comp_interface --- CIME/scripts/query_config.py | 14 +++++++++++--- CIME/utils.py | 7 +++++++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/CIME/scripts/query_config.py b/CIME/scripts/query_config.py index 674713e0485..b746087911f 100755 --- a/CIME/scripts/query_config.py +++ b/CIME/scripts/query_config.py @@ -8,7 +8,7 @@ from CIME.Tools.standard_script_setup import * import re -from CIME.utils import expect +from CIME.utils import expect, get_cime_default_driver, deprecate_action from CIME.XML.files import Files from CIME.XML.component import Component from CIME.XML.compsets import Compsets @@ -314,8 +314,16 @@ def parse_command_line(args, description): parser.add_argument( "--comp_interface", - choices=supported_comp_interfaces, + choices=supported_comp_interfaces, # same as config.driver_choices default="mct", + action=deprecate_action(", use --driver argument"), + help="DEPRECATED: Use --driver argument", + ) + + parser.add_argument( + "--driver", + choices=config.driver_choices, + default=get_cime_default_driver(), help="Coupler/Driver interface", ) @@ -332,7 +340,7 @@ def parse_command_line(args, description): args.machines, args.long, args.xml, - files[args.comp_interface], + files[args.driver], ) diff --git a/CIME/utils.py b/CIME/utils.py index b79fcd5116e..78bbf324045 100644 --- a/CIME/utils.py +++ b/CIME/utils.py @@ -8,6 +8,7 @@ import importlib.util import errno, signal, warnings, filecmp import stat as statlib +from argparse import Action from contextlib import contextmanager from distutils import file_util @@ -21,6 +22,12 @@ GLOBAL = {} +def deprecate_action(message): + class ActionStoreDeprecated(Action): + def __call__(self, parser, namespace, values, option_string=None): + raise DeprecationWarning(f"{option_string} is deprecated{message}") + return ActionStoreDeprecated + def import_from_file(name, file_path): loader = importlib.machinery.SourceFileLoader(name, file_path) From 76add4d90b71877b42b9c34c69bc5bb5126cb939 Mon Sep 17 00:00:00 2001 From: Jason Boutte Date: Wed, 19 Jul 2023 11:58:06 -0700 Subject: [PATCH 111/176] Fixes black formatting --- CIME/scripts/query_config.py | 2 +- CIME/utils.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CIME/scripts/query_config.py b/CIME/scripts/query_config.py index b746087911f..88d2151d1c1 100755 --- a/CIME/scripts/query_config.py +++ b/CIME/scripts/query_config.py @@ -314,7 +314,7 @@ def parse_command_line(args, description): parser.add_argument( "--comp_interface", - choices=supported_comp_interfaces, # same as config.driver_choices + choices=supported_comp_interfaces, # same as config.driver_choices default="mct", action=deprecate_action(", use --driver argument"), help="DEPRECATED: Use --driver argument", diff --git a/CIME/utils.py b/CIME/utils.py index 78bbf324045..4f0e133ae73 100644 --- a/CIME/utils.py +++ b/CIME/utils.py @@ -26,8 +26,10 @@ def deprecate_action(message): class ActionStoreDeprecated(Action): def __call__(self, parser, namespace, values, option_string=None): raise DeprecationWarning(f"{option_string} is deprecated{message}") + return ActionStoreDeprecated + def import_from_file(name, file_path): loader = importlib.machinery.SourceFileLoader(name, file_path) From 33bfa2a8faacf22a848764532ebf25d769a19216 Mon Sep 17 00:00:00 2001 From: James Foucar Date: Mon, 24 Jul 2023 10:03:14 -0600 Subject: [PATCH 112/176] Add support for MPASO_USE_PETSC --- CIME/build.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CIME/build.py b/CIME/build.py index 675c19f002b..c99d66f0f90 100644 --- a/CIME/build.py +++ b/CIME/build.py @@ -1116,6 +1116,7 @@ def _case_build_impl( ninst_build = case.get_value("NINST_BUILD") smp_value = case.get_value("SMP_VALUE") clm_use_petsc = case.get_value("CLM_USE_PETSC") + mpaso_use_petsc = case.get_value("MPASO_USE_PETSC") cism_use_trilinos = case.get_value("CISM_USE_TRILINOS") mali_use_albany = case.get_value("MALI_USE_ALBANY") mach = case.get_value("MACH") @@ -1140,7 +1141,7 @@ def _case_build_impl( # the future there may be others -- so USE_PETSC will be true if # ANY of those are true. - use_petsc = clm_use_petsc + use_petsc = clm_use_petsc or mpaso_use_petsc case.set_value("USE_PETSC", use_petsc) # Set the overall USE_TRILINOS variable to TRUE if any of the From 226053ab5e1cd72a27e72d674e5489b57af21d07 Mon Sep 17 00:00:00 2001 From: James Foucar Date: Mon, 24 Jul 2023 11:36:15 -0600 Subject: [PATCH 113/176] Fix --- CIME/build.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CIME/build.py b/CIME/build.py index c99d66f0f90..c48db5ea24a 100644 --- a/CIME/build.py +++ b/CIME/build.py @@ -1141,7 +1141,7 @@ def _case_build_impl( # the future there may be others -- so USE_PETSC will be true if # ANY of those are true. - use_petsc = clm_use_petsc or mpaso_use_petsc + use_petsc = bool(clm_use_petsc) or bool(mpaso_use_petsc) case.set_value("USE_PETSC", use_petsc) # Set the overall USE_TRILINOS variable to TRUE if any of the From b0c93839b210ca06215bfdd4e111485b300b14d7 Mon Sep 17 00:00:00 2001 From: James Edwards Date: Mon, 24 Jul 2023 13:36:51 -0600 Subject: [PATCH 114/176] improve the account remove and add support for slurm --- CIME/XML/env_batch.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/CIME/XML/env_batch.py b/CIME/XML/env_batch.py index 93545a146a0..780166e4898 100644 --- a/CIME/XML/env_batch.py +++ b/CIME/XML/env_batch.py @@ -969,11 +969,16 @@ def _submit_single_job( if not project: # If there is no project then we need to remove the project flag - # slurm defines --account only on machines that require it, so this strip isn't required - if batch_system == "pbs" or batch_system == "cobalt": + if ( + batch_system == "pbs" + or batch_system == "cobalt" + and " -A " in submitargs + ): submitargs = submitargs.replace("-A", "") - elif batch_system == "lsf": + elif batch_system == "lsf" and " -P " in submitargs: submitargs = submitargs.replace("-P", "") + elif batch_system == "slurm" and " --account " in submitargs: + submitargs = submitargs.replace("--account", "") if dep_jobs is not None and len(dep_jobs) > 0: logger.debug("dependencies: {}".format(dep_jobs)) From fcdf8dfc5a94522639285539dfdbf29fa85eaa9e Mon Sep 17 00:00:00 2001 From: James Edwards Date: Mon, 24 Jul 2023 13:45:41 -0600 Subject: [PATCH 115/176] improve the account remove and add support for slurm --- CIME/XML/env_batch.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/CIME/XML/env_batch.py b/CIME/XML/env_batch.py index 780166e4898..9cf4136b539 100644 --- a/CIME/XML/env_batch.py +++ b/CIME/XML/env_batch.py @@ -970,10 +970,8 @@ def _submit_single_job( if not project: # If there is no project then we need to remove the project flag if ( - batch_system == "pbs" - or batch_system == "cobalt" - and " -A " in submitargs - ): + batch_system == "pbs" or batch_system == "cobalt" + ) and " -A " in submitargs: submitargs = submitargs.replace("-A", "") elif batch_system == "lsf" and " -P " in submitargs: submitargs = submitargs.replace("-P", "") From ca2386bdad06dcff82a2301bbf9dc89133f9f170 Mon Sep 17 00:00:00 2001 From: Jason Boutte Date: Sat, 29 Jul 2023 00:39:15 -0700 Subject: [PATCH 116/176] Adds skip-submit argument to case.build --- CIME/SystemTests/system_tests_common.py | 13 +++++++++++++ CIME/Tools/case.build | 11 +++++++++++ CIME/test_status.py | 11 +++++++++++ CIME/tests/test_unit_test_status.py | 14 ++++++++++++++ 4 files changed, 49 insertions(+) diff --git a/CIME/SystemTests/system_tests_common.py b/CIME/SystemTests/system_tests_common.py index ea757d1220c..e977e6f08e9 100644 --- a/CIME/SystemTests/system_tests_common.py +++ b/CIME/SystemTests/system_tests_common.py @@ -154,6 +154,7 @@ def build( ninja=False, dry_run=False, separate_builds=False, + skip_submit=False, ): """ Do NOT override this method, this method is the framework that @@ -164,6 +165,9 @@ def build( self._ninja = ninja self._dry_run = dry_run self._user_separate_builds = separate_builds + + was_run_pend = self._test_status.current_is(RUN_PHASE, TEST_PEND_STATUS) + for phase_name, phase_bool in [ (SHAREDLIB_BUILD_PHASE, not model_only), (MODEL_BUILD_PHASE, not sharedlib_only), @@ -202,6 +206,15 @@ def build( comments=("time={:d}".format(int(time_taken))), ) + # Building model while job is queued and awaiting run + if ( + skip_submit + and was_run_pend + and self._test_status.current_is(SUBMIT_PHASE, TEST_PEND_STATUS) + ): + with self._test_status: + self._test_status.set_status(SUBMIT_PHASE, TEST_PASS_STATUS) + return success def build_phase(self, sharedlib_only=False, model_only=False): diff --git a/CIME/Tools/case.build b/CIME/Tools/case.build index c8e4d54c467..4edf177198e 100755 --- a/CIME/Tools/case.build +++ b/CIME/Tools/case.build @@ -80,6 +80,14 @@ def parse_command_line(args, description): help="Build each component one at a time, separately, with output going to separate logs", ) + parser.add_argument( + "--skip-submit", + action="store_true", + help="Sets the current test phase to RUN, skipping the SUBMIT phase. This " + "may be useful if rebuilding the model while this test is in the batch queue. " + "ONLY USE IF A TEST CASE, OTHERWISE IGNORED.", + ) + parser.add_argument( "--dry-run", action="store_true", @@ -173,6 +181,7 @@ def parse_command_line(args, description): args.separate_builds, args.ninja, args.dry_run, + args.skip_submit, ) @@ -191,6 +200,7 @@ def _main_func(description): separate_builds, ninja, dry_run, + skip_submit, ) = parse_command_line(sys.argv, description) success = True @@ -234,6 +244,7 @@ def _main_func(description): ninja=ninja, dry_run=dry_run, separate_builds=separate_builds, + skip_submit=skip_submit, ) else: diff --git a/CIME/test_status.py b/CIME/test_status.py index 20d2cef93a9..90714631eb8 100644 --- a/CIME/test_status.py +++ b/CIME/test_status.py @@ -274,6 +274,17 @@ def get_status(self, phase): def get_comment(self, phase): return self._phase_statuses[phase][1] if phase in self._phase_statuses else None + def current_is(self, phase, status): + try: + latest = self.get_latest_phase() + except KeyError: + return False + + return latest == phase and self.get_status(phase) == status + + def get_latest_phase(self): + return list(self._phase_statuses.keys())[-1] + def phase_statuses_dump( self, prefix="", skip_passes=False, skip_phase_list=None, xfails=None ): diff --git a/CIME/tests/test_unit_test_status.py b/CIME/tests/test_unit_test_status.py index 0b79c8bac6a..9b3036801fc 100755 --- a/CIME/tests/test_unit_test_status.py +++ b/CIME/tests/test_unit_test_status.py @@ -41,6 +41,20 @@ def _set_phase_to_status(self, phase, status): with self._ts: self._ts.set_status(phase, status) + def test_get_latest_phase(self): + assert self._ts.get_latest_phase() == test_status.RUN_PHASE + + def test_current_is(self): + assert self._ts.current_is(test_status.RUN_PHASE, test_status.TEST_PASS_STATUS) + + assert not self._ts.current_is( + test_status.RUN_PHASE, test_status.TEST_PEND_STATUS + ) + + assert not self._ts.current_is( + test_status.SUBMIT_PHASE, test_status.TEST_PASS_STATUS + ) + # ------------------------------------------------------------------------ # Tests of TestStatus.phase_statuses_dump # ------------------------------------------------------------------------ From a5779ab9c319b039663b5cd662877a1055279ec7 Mon Sep 17 00:00:00 2001 From: James Edwards Date: Tue, 1 Aug 2023 07:28:56 -0600 Subject: [PATCH 117/176] if the config_machines.xml file cannot be found give a meaningful error --- CIME/XML/grids.py | 4 ++++ CIME/XML/machines.py | 5 ++++- CIME/tests/test_unit_xml_machines.py | 4 ++-- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/CIME/XML/grids.py b/CIME/XML/grids.py index 819838edddd..b98a89b963b 100644 --- a/CIME/XML/grids.py +++ b/CIME/XML/grids.py @@ -25,6 +25,10 @@ def __init__(self, infile=None, files=None, comp_interface=None): infile = files.get_value("GRIDS_SPEC_FILE") logger.debug(" Grid specification file is {}".format(infile)) schema = files.get_schema("GRIDS_SPEC_FILE") + expect( + os.path.isfile(infile) and os.access(infile, os.R_OK), + f"ERROR: grid file not found {infile}", + ) try: GenericXML.__init__(self, infile, schema) except: diff --git a/CIME/XML/machines.py b/CIME/XML/machines.py index 54c6edea515..1b45cf5b580 100644 --- a/CIME/XML/machines.py +++ b/CIME/XML/machines.py @@ -41,9 +41,12 @@ def __init__(self, infile=None, files=None, machine=None, extra_machines_dir=Non logger.debug("Verifying using schema {}".format(schema)) self.machines_dir = os.path.dirname(infile) + if os.path.exists(infile): + checked_files.append(infile) + else: + expect(False, f"file not found {infile}") GenericXML.__init__(self, infile, schema) - checked_files.append(infile) # Append the contents of $HOME/.cime/config_machines.xml if it exists. # diff --git a/CIME/tests/test_unit_xml_machines.py b/CIME/tests/test_unit_xml_machines.py index df6b9ce90d3..3f47a02b968 100644 --- a/CIME/tests/test_unit_xml_machines.py +++ b/CIME/tests/test_unit_xml_machines.py @@ -140,8 +140,8 @@ class TestUnitXMLMachines(unittest.TestCase): def setUp(self): Machines._FILEMAP = {} - - self.machine = Machines() + # read_only=False for github testing + self.machine = Machines(machine="centos7-linux") self.machine.read_fd(io.StringIO(MACHINE_TEST_XML)) From 565712540458fdaf6854dd40fe7046313a1930e7 Mon Sep 17 00:00:00 2001 From: James Edwards Date: Tue, 1 Aug 2023 08:03:18 -0600 Subject: [PATCH 118/176] remove redundent ERROR --- CIME/XML/grids.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CIME/XML/grids.py b/CIME/XML/grids.py index b98a89b963b..e34aacf2d01 100644 --- a/CIME/XML/grids.py +++ b/CIME/XML/grids.py @@ -27,7 +27,7 @@ def __init__(self, infile=None, files=None, comp_interface=None): schema = files.get_schema("GRIDS_SPEC_FILE") expect( os.path.isfile(infile) and os.access(infile, os.R_OK), - f"ERROR: grid file not found {infile}", + f" grid file not found {infile}", ) try: GenericXML.__init__(self, infile, schema) From 2031fb5912d70293011ae57080cd1031dba105cd Mon Sep 17 00:00:00 2001 From: David <36640545+daianzhuo@users.noreply.github.com> Date: Wed, 2 Aug 2023 17:17:01 +0100 Subject: [PATCH 119/176] Update create-a-case.rst with how to use SourceMods Emphasised two points on how to use SourceMods to modify a case. 1. The name of the xmlchange shell file should be shell_commands 2. SourceMods changes do not need to have the same strucutre as the component source codes --- doc/source/users_guide/create-a-case.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/doc/source/users_guide/create-a-case.rst b/doc/source/users_guide/create-a-case.rst index 71b39b5cbd8..4089be063c8 100644 --- a/doc/source/users_guide/create-a-case.rst +++ b/doc/source/users_guide/create-a-case.rst @@ -207,6 +207,12 @@ As an example, the directory could contain the following files: :: > shell_commands (this would contain ./xmlchange commands) > SourceMods/src.cam/dyncomp.F90 +It is important to note that the file containing the **xmlchange** +commands must be named ``shell_commands`` in order for it to be recognised +and run upon case creation. + +The structure of the component directories do not need to be the same as in the component source code. As an example, should the user want to modify the ``src/dynamics/eul/dyncomp.F90`` file within the CAM source code, the modified file should be put into the ``SourceMods/src.cam`` directly. There is no need to mimic the source code structure, such as ``SourceMods/src.cam/dynamics/eul``. + When the user calls **create_newcase** with the ``--user-mods-dir`` pointing to the full pathname of the directory containing these changes, then the ``CASEROOT`` will be created with these changes applied. From 08d33b3f790b4604a8d91d7ba683756773f6c242 Mon Sep 17 00:00:00 2001 From: David <36640545+daianzhuo@users.noreply.github.com> Date: Wed, 2 Aug 2023 17:24:18 +0100 Subject: [PATCH 120/176] Wrap lines in create-a-case.rst --- doc/source/users_guide/create-a-case.rst | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/doc/source/users_guide/create-a-case.rst b/doc/source/users_guide/create-a-case.rst index 4089be063c8..c9257da19a6 100644 --- a/doc/source/users_guide/create-a-case.rst +++ b/doc/source/users_guide/create-a-case.rst @@ -211,7 +211,12 @@ It is important to note that the file containing the **xmlchange** commands must be named ``shell_commands`` in order for it to be recognised and run upon case creation. -The structure of the component directories do not need to be the same as in the component source code. As an example, should the user want to modify the ``src/dynamics/eul/dyncomp.F90`` file within the CAM source code, the modified file should be put into the ``SourceMods/src.cam`` directly. There is no need to mimic the source code structure, such as ``SourceMods/src.cam/dynamics/eul``. +The structure of the component directories do not need to be the +same as in the component source code. As an example, should the user +want to modify the ``src/dynamics/eul/dyncomp.F90`` file within the +CAM source code, the modified file should be put into the directory +``SourceMods/src.cam`` directly. There is no need to mimic the source +code structure, such as ``SourceMods/src.cam/dynamics/eul``. When the user calls **create_newcase** with the ``--user-mods-dir`` pointing to the full pathname of the directory containing these changes, then the ``CASEROOT`` will be From 234185085a0d392a4d986769bf6a8d71d7084f2b Mon Sep 17 00:00:00 2001 From: Jian Sun Date: Sun, 6 Aug 2023 22:02:12 -0600 Subject: [PATCH 121/176] fix the usage of wrapper script for Derecho --- CIME/case/case.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CIME/case/case.py b/CIME/case/case.py index fffa1e51d08..ec552d57bec 100644 --- a/CIME/case/case.py +++ b/CIME/case/case.py @@ -2107,7 +2107,7 @@ def get_mpirun_cmd(self, job=None, allow_unresolved_envvars=True, overrides=None ngpus_per_node = self.get_value("NGPUS_PER_NODE") if ngpus_per_node and ngpus_per_node > 0 and config.gpus_use_set_device_rank: - if self.get_value("MACH") == "gust": + if self.get_value("MACH") == "gust" or self.get_value("MACH") == "derecho": mpi_arg_string = mpi_arg_string + " get_local_rank " else: # this wrapper script only works with OpenMPI library From de5476ab9aa83563f8fde727f538be593438b171 Mon Sep 17 00:00:00 2001 From: Jian Sun Date: Mon, 7 Aug 2023 12:09:08 -0600 Subject: [PATCH 122/176] update description for Derecho --- CIME/config.py | 2 +- doc/source/users_guide/cime-customize.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CIME/config.py b/CIME/config.py index 9666439cb2e..0c0a8cc04a2 100644 --- a/CIME/config.py +++ b/CIME/config.py @@ -180,7 +180,7 @@ def __init__(self): self._set_attribute( "gpus_use_set_device_rank", True, - desc="If set to `True` and NGPUS_PER_NODE > 0 then `$RUNDIR/set_device_rank.sh` or `get_local_rank` (a global script on Gust) is appended when the MPI run command is generated.", + desc="If set to `True` and NGPUS_PER_NODE > 0 then `$RUNDIR/set_device_rank.sh` or `get_local_rank` (a global script on Derecho/Gust) is appended when the MPI run command is generated.", ) self._set_attribute( "test_custom_project_machine", diff --git a/doc/source/users_guide/cime-customize.rst b/doc/source/users_guide/cime-customize.rst index 2c65d1ab954..58dbe9a6a40 100644 --- a/doc/source/users_guide/cime-customize.rst +++ b/doc/source/users_guide/cime-customize.rst @@ -44,7 +44,7 @@ default_short_term_archiving True bool If set to `Tr driver_choices ('mct', 'nuopc') tuple Sets the available driver choices for the model. driver_default nuopc str Sets the default driver for the model. enable_smp True bool If set to `True` then `SMP=` is added to model compile command. -gpus_use_set_device_rank True bool If set to `True` and NGPUS_PER_NODE > 0 then `$RUNDIR/set_device_rank.sh` or `get_local_rank` (a global script on Gust) is appended when the MPI run command is generated. +gpus_use_set_device_rank True bool If set to `True` and NGPUS_PER_NODE > 0 then `$RUNDIR/set_device_rank.sh` or `get_local_rank` (a global script on Derecho/Gust) is appended when the MPI run command is generated. make_case_run_batch_script False bool If set to `True` and case is not a test then `case.run.sh` is created in case directory from `$MACHDIR/template.case.run.sh`. mct_path {srcroot}/libraries/mct str Sets the path to the mct library. serialize_sharedlib_builds True bool If set to `True` then the TestScheduler will use `proc_pool + 1` processors to build shared libraries otherwise a single processor is used. From f6c42fd12bd4a27f13f3755c147430d437cc1275 Mon Sep 17 00:00:00 2001 From: Jian Sun Date: Tue, 8 Aug 2023 09:25:24 -0600 Subject: [PATCH 123/176] use a generic way to apply the MPI wrapper script --- CIME/case/case.py | 17 +++-------- CIME/case/case_setup.py | 28 ------------------- CIME/config.py | 5 ---- .../config/xml_schemas/config_machines.xsd | 4 +++ doc/source/users_guide/cime-customize.rst | 1 - 5 files changed, 8 insertions(+), 47 deletions(-) diff --git a/CIME/case/case.py b/CIME/case/case.py index ec552d57bec..df881db3201 100644 --- a/CIME/case/case.py +++ b/CIME/case/case.py @@ -2106,19 +2106,10 @@ def get_mpirun_cmd(self, job=None, allow_unresolved_envvars=True, overrides=None mpi_arg_string += " : " ngpus_per_node = self.get_value("NGPUS_PER_NODE") - if ngpus_per_node and ngpus_per_node > 0 and config.gpus_use_set_device_rank: - if self.get_value("MACH") == "gust" or self.get_value("MACH") == "derecho": - mpi_arg_string = mpi_arg_string + " get_local_rank " - else: - # this wrapper script only works with OpenMPI library - # has been tested on Casper - expect( - self.get_value("MPILIB") == "openmpi", - "The wrapper script only works with OpenMPI library; {} is currently used".format(self.get_value("MPILIB")), - ) - rundir = self.get_value("RUNDIR") - output_name = rundir + "/set_device_rank.sh" - mpi_arg_string = mpi_arg_string + " " + output_name + " " + if ngpus_per_node and ngpus_per_node > 0: + mpi_gpu_run_script = self.get_value("MPI_GPU_WRAPPER_SCRIPT") + if mpi_gpu_run_script: + mpi_arg_string = mpi_arg_string + " " + mpi_gpu_run_script return self.get_resolved_value( "{} {} {} {}".format( diff --git a/CIME/case/case_setup.py b/CIME/case/case_setup.py index aa8fb8b6b6c..d45c775b74a 100644 --- a/CIME/case/case_setup.py +++ b/CIME/case/case_setup.py @@ -482,31 +482,3 @@ def case_setup(self, clean=False, test_mode=False, reset=False, keep=None): caseroot=caseroot, is_batch=is_batch, ) - - # put the following section here to make sure the rundir is generated first - machdir = self.get_value("MACHDIR") - mach = self.get_value("MACH") - ngpus_per_node = self.get_value("NGPUS_PER_NODE") - overrides = {} - overrides["ngpus_per_node"] = ngpus_per_node - input_template = os.path.join(machdir, "mpi_run_gpu.{}".format(mach)) - if os.path.isfile(input_template): - # update the wrapper script that sets the device id for each MPI rank - output_text = transform_vars( - open(input_template, "r").read(), case=self, overrides=overrides - ) - - # write it out to the run dir - rundir = self.get_value("RUNDIR") - output_name = os.path.join(rundir, "set_device_rank.sh") - logger.info("Creating file {}".format(output_name)) - with open(output_name, "w") as f: - f.write(output_text) - - # make the wrapper script executable - if os.path.isfile(output_name): - os.system("chmod +x " + output_name) - else: - expect( - False, "The file {} is not written out correctly.".format(output_name) - ) diff --git a/CIME/config.py b/CIME/config.py index 0c0a8cc04a2..3cef6cc0530 100644 --- a/CIME/config.py +++ b/CIME/config.py @@ -177,11 +177,6 @@ def __init__(self): False, desc="If set to `True` then COMP_ROOT_DIR_CPL is set using UFS_DRIVER if defined.", ) - self._set_attribute( - "gpus_use_set_device_rank", - True, - desc="If set to `True` and NGPUS_PER_NODE > 0 then `$RUNDIR/set_device_rank.sh` or `get_local_rank` (a global script on Derecho/Gust) is appended when the MPI run command is generated.", - ) self._set_attribute( "test_custom_project_machine", "melvin", diff --git a/CIME/data/config/xml_schemas/config_machines.xsd b/CIME/data/config/xml_schemas/config_machines.xsd index e8c24144b7e..b025c4039e0 100644 --- a/CIME/data/config/xml_schemas/config_machines.xsd +++ b/CIME/data/config/xml_schemas/config_machines.xsd @@ -61,6 +61,7 @@ + @@ -178,6 +179,9 @@ + +