diff --git a/utils/omega/ctest/README.md b/utils/omega/ctest/README.md new file mode 100644 index 000000000..84a271ea7 --- /dev/null +++ b/utils/omega/ctest/README.md @@ -0,0 +1,93 @@ +# Omega CTest utility + +This utility helps Omega developers build the model and run +CTests for a given compiler. + +The utility will check out submodules that Omega needs and build Omega with +the requested compilers (see below). On a compute node, the utility will also +run CTests directly. On a login node, it will create a job script for running +CTests and can optionally submit the job script. + +## Instructions + +1. You must have followed the instructions for configuring + Polaris on a [supported machine](https://e3sm-project.github.io/polaris/main/developers_guide/quick_start.html#supported-machines), + specifying the compiler for which you wish to test Omega. The result + should be an activation script like: + ``` + load_dev_polaris_0.3.0-alpha.1_chrysalis_intel_openmpi.sh + ``` + You must be on a machine that supports both E3SM (present in Omega's + [config_machines.xml](https://github.com/E3SM-Project/Omega/blob/develop/cime_config/machines/config_machines.xml)) + and Polaris (see [supported machines](https://e3sm-project.github.io/polaris/main/developers_guide/machines/index.html#supported-machines)). + +2. Source the polaris load script for the desired compiler, e.g.: + ``` + source load_dev_polaris_0.3.0-alpha.1_chrysalis_intel_openmpi.sh + ``` + +3. Run the utility: + ``` + ./util/omega/ctest/omega_ctest.py + ``` + The utility will check out submodules and build Omega with the compilers + associated with the Polaris load script (e.g. `intel` in the example above). + + The code is built in a subdirectory `build_omega/build__` + within the current directory (e.g. the base of the Polaris branch so not + typically within the Omega branch). + + **Flags**: + + ``` + usage: omega_ctest.py [-h] [-o OMEGA_BRANCH] [-c] [-s] [-d] + [--cmake_flags CMAKE_FLAGS] + ``` + + * `-o `: point to a branch of Omega + (`e3sm_submodules/Omega` by default) + + * `-c`: indicates that the build subdirectory should be removed first to + allow a clean build + + * `-s`: if running the utility on a login node, submit the job script that + the utility generates (does nothing on a compute node) + + * `-d`: build Omega in debug mode + + * `--cmake_flags=""`: Extra flags to pass to the `cmake` command + +4. If you are on a login node and didn't use the `-s` flag, you will need + to submit the batch job to run CTests yourself (perhaps after editing the + job script), e.g.: + ``` + sbatch build_omega/job_build_and_ctest_omega_chrysalis_intel.sh + ``` + +If all goes well, you will see something like: +``` +$ cat omega_ctest_chrysalis_intel.o464153 +Test project /gpfs/fs1/home/ac.xylar/e3sm_work/polaris/add-omega-ctest-util/build_omega/build_chrysalis_intel + Start 1: DATA_TYPES_TEST +1/9 Test #1: DATA_TYPES_TEST .................. Passed 0.38 sec + Start 2: MACHINE_ENV_TEST +2/9 Test #2: MACHINE_ENV_TEST ................. Passed 0.98 sec + Start 3: BROADCAST_TEST +3/9 Test #3: BROADCAST_TEST ................... Passed 1.13 sec + Start 4: LOGGING_TEST +4/9 Test #4: LOGGING_TEST ..................... Passed 0.03 sec + Start 5: DECOMP_TEST +5/9 Test #5: DECOMP_TEST ...................... Passed 1.20 sec + Start 6: HALO_TEST +6/9 Test #6: HALO_TEST ........................ Passed 1.08 sec + Start 7: IO_TEST +7/9 Test #7: IO_TEST .......................... Passed 2.94 sec + Start 8: CONFIG_TEST +8/9 Test #8: CONFIG_TEST ...................... Passed 1.01 sec + Start 9: YAKL_TEST +9/9 Test #9: YAKL_TEST ........................ Passed 0.03 sec + +100% tests passed, 0 tests failed out of 9 + +Total Test time (real) = 8.91 sec +``` diff --git a/utils/omega/ctest/build_and_ctest.template b/utils/omega/ctest/build_and_ctest.template new file mode 100644 index 000000000..304a9de04 --- /dev/null +++ b/utils/omega/ctest/build_and_ctest.template @@ -0,0 +1,52 @@ +#!/usr/bin/env bash + +cwd=${PWD} + +module load cmake + +# quit on errors +set -e +# trace commands +set -x + +{% if update_omega_submodule %} +cd {{ polaris_source_dir }} +git submodule update --init e3sm_submodules/Omega +{% endif %} + +cd {{ omega_base_dir }} + +git submodule update --init --recursive externals/YAKL externals/ekat \ + externals/scorpio cime + +cd ${cwd} + +{% if clean %} +rm -rf build_omega/{{ build_dir }} +{% endif %} +mkdir -p build_omega/{{ build_dir }} +cd build_omega/{{ build_dir }} + +export METIS_ROOT={{ metis_root }} +export PARMETIS_ROOT={{ parmetis_root }} + +cmake \ + -DOMEGA_BUILD_TYPE={{ build_type }} \ + -DOMEGA_CIME_COMPILER={{ compiler }} \ + -DOMEGA_CIME_MACHINE={{ machine }} \ + -DOMEGA_METIS_ROOT=${METIS_ROOT} \ + -DOMEGA_PARMETIS_ROOT=${PARMETIS_ROOT} \ + -DOMEGA_BUILD_TEST=ON \ + -Wno-dev \ + -S {{ omega_base_dir }}/components/omega \ + -B . {{ cmake_flags }} + +./omega_build.sh + +cd test + +ln -sfn {{ omega_mesh_filename }} OmegaMesh.nc + +{% if run_ctest %} +./omega_ctest.sh +{% endif %} diff --git a/utils/omega/ctest/job_script.template b/utils/omega/ctest/job_script.template new file mode 100644 index 000000000..785b1d57f --- /dev/null +++ b/utils/omega/ctest/job_script.template @@ -0,0 +1,21 @@ +#!/bin/bash +#SBATCH --job-name={{ job_name }} +{% if account != '' -%} +#SBATCH --account={{ account}} +{%- endif %} +#SBATCH --nodes={{ nodes }} +#SBATCH --output={{ job_name }}.o%j +#SBATCH --exclusive +#SBATCH --time={{ wall_time }} +{% if qos != '' -%} +#SBATCH --qos={{ qos }} +{%- endif %} +{% if partition != '' -%} +#SBATCH --partition={{ partition }} +{%- endif %} +{% if constraint != '' -%} +#SBATCH --constraint={{ constraint }} +{%- endif %} + +cd {{ build_dir }} +./omega_ctest.sh diff --git a/utils/omega/ctest/omega_ctest.py b/utils/omega/ctest/omega_ctest.py new file mode 100755 index 000000000..51a8744b8 --- /dev/null +++ b/utils/omega/ctest/omega_ctest.py @@ -0,0 +1,218 @@ +#!/usr/bin/env python3 + +import argparse +import os +import subprocess + +from jinja2 import Template + +from polaris.config import PolarisConfigParser +from polaris.io import download +from polaris.job import _clean_up_whitespace, get_slurm_options + + +def make_build_script(machine, compiler, branch, build_only, mesh_filename, + debug, clean, cmake_flags): + """ + Make a shell script for checking out Omega and its submodules, building + Omega and its ctests, linking to testing data files, and running ctests. + """ + + polaris_source_dir = os.environ['POLARIS_BRANCH'] + metis_root = os.environ['METIS_ROOT'] + parmetis_root = os.environ['PARMETIS_ROOT'] + + build_dir = f'build_{machine}_{compiler}' + + branch = os.path.abspath(branch) + omega_submodule = os.path.join(polaris_source_dir, 'e3sm_submodules/Omega') + update_omega_submodule = (branch == omega_submodule) + + this_dir = os.path.realpath( + os.path.join(os.getcwd(), os.path.dirname(__file__))) + + template_filename = os.path.join(this_dir, 'build_and_ctest.template') + + with open(template_filename, 'r', encoding='utf-8') as f: + template = Template(f.read()) + + if debug: + build_type = 'Debug' + else: + build_type = 'Release' + + if cmake_flags is None: + cmake_flags = '' + + script = template.render(update_omega_submodule=update_omega_submodule, + polaris_source_dir=polaris_source_dir, + omega_base_dir=branch, + build_dir=build_dir, + machine=machine, + compiler=compiler, + metis_root=metis_root, + parmetis_root=parmetis_root, + omega_mesh_filename=mesh_filename, + run_ctest=(not build_only), + build_type=build_type, + clean=clean, + cmake_flags=cmake_flags) + + script = _clean_up_whitespace(script) + + build_omega_dir = os.path.abspath('build_omega') + os.makedirs(build_omega_dir, exist_ok=True) + + if build_only: + script_filename = f'build_omega_{machine}_{compiler}.sh' + else: + script_filename = f'build_and_ctest_omega_{machine}_{compiler}.sh' + + script_filename = os.path.join(build_omega_dir, script_filename) + + with open(script_filename, 'w', encoding='utf-8') as f: + f.write(script) + + return script_filename + + +def download_mesh(config): + """ + Download and symlink a mesh to use for testing. + """ + base_url = config.get('download', 'server_base_url') + database_root = config.get('paths', 'database_root') + + filepath = 'ocean/polaris_cache/global_convergence/icos/cosine_bell/' \ + 'Icos480/mesh/mesh.230220.nc' + + url = f'{base_url}/{filepath}' + download_path = os.path.join(database_root, filepath) + download_target = download(url, download_path, config) + return download_target + + +def write_job_script(config, machine, compiler, submit): + """ + Write a job script for running the build script + """ + + if config.has_option('parallel', 'account'): + account = config.get('parallel', 'account') + else: + account = '' + + nodes = 1 + + partition, qos, constraint, _ = get_slurm_options( + config, machine, nodes) + + wall_time = '0:15:00' + + # see if we can find a debug partition + if config.has_option('parallel', 'partitions'): + partition_list = config.getlist('parallel', 'partitions') + for partition_local in partition_list: + if 'debug' in partition_local: + partition = partition_local + break + + # see if we can find a debug qos + if config.has_option('parallel', 'qos'): + qos_list = config.getlist('parallel', 'qos') + for qos_local in qos_list: + if 'debug' in qos_local: + qos = qos_local + break + + job_name = f'omega_ctest_{machine}_{compiler}' + + this_dir = os.path.realpath( + os.path.join(os.getcwd(), os.path.dirname(__file__))) + template_filename = os.path.join(this_dir, 'job_script.template') + + with open(template_filename, 'r', encoding='utf-8') as f: + template = Template(f.read()) + + build_dir = os.path.abspath( + os.path.join('build_omega', f'build_{machine}_{compiler}')) + + script = template.render(job_name=job_name, account=account, + nodes=f'{nodes}', wall_time=wall_time, qos=qos, + partition=partition, constraint=constraint, + build_dir=build_dir) + script = _clean_up_whitespace(script) + + build_omega_dir = os.path.abspath('build_omega') + script_filename = f'job_build_and_ctest_omega_{machine}_{compiler}.sh' + script_filename = os.path.join(build_omega_dir, script_filename) + + with open(script_filename, 'w', encoding='utf-8') as f: + f.write(script) + + if submit: + args = ['sbatch', script_filename] + print(f'\nRunning:\n {" ".join(args)}\n') + subprocess.run(args=args, check=True) + + +def main(): + """ + Main function for building Omega and performing ctests + """ + parser = argparse.ArgumentParser( + description='Check out submodules, build Omega and run ctest') + parser.add_argument('-o', '--omega_branch', dest='omega_branch', + default='e3sm_submodules/Omega', + help='The local Omega branch to test.') + parser.add_argument('-c', '--clean', dest='clean', action='store_true', + help='Whether to remove the build directory and start ' + 'fresh') + parser.add_argument('-s', '--submit', dest='submit', action='store_true', + help='Whether to submit a job to run ctests') + parser.add_argument('-d', '--debug', dest='debug', action='store_true', + help='Whether to only build Omega in debug mode') + parser.add_argument('--cmake_flags', dest='cmake_flags', + help='Quoted string with additional cmake flags') + + args = parser.parse_args() + + machine = os.environ['POLARIS_MACHINE'] + compiler = os.environ['POLARIS_COMPILER'] + + config = PolarisConfigParser() + config.add_from_package('polaris', 'default.cfg') + config.add_from_package('mache.machines', f'{machine}.cfg') + config.add_from_package('polaris.machines', f'{machine}.cfg') + + submit = args.submit + branch = args.omega_branch + debug = args.debug + clean = args.clean + cmake_flags = args.cmake_flags + + if 'SLURM_JOB_ID' in os.environ: + # already on a comptue node so we will just run ctests directly + submit = False + else: + build_only = True + + mesh_filename = download_mesh(config=config) + + script_filename = make_build_script(machine=machine, compiler=compiler, + branch=branch, build_only=build_only, + mesh_filename=mesh_filename, + debug=debug, clean=clean, + cmake_flags=cmake_flags) + + # clear environment variables and start fresh with those from login + # so spack doesn't get confused by conda + subprocess.check_call(f'env -i HOME="$HOME" bash -l {script_filename}', + shell=True) + + write_job_script(config=config, machine=machine, compiler=compiler, + submit=submit) + + +if __name__ == '__main__': + main()