diff --git a/.github/workflows/build_workflow.yml b/.github/workflows/build_workflow.yml index cdca3a6ad..258c86359 100644 --- a/.github/workflows/build_workflow.yml +++ b/.github/workflows/build_workflow.yml @@ -88,6 +88,7 @@ jobs: - if: ${{ steps.skip_check.outputs.should_skip != 'true' }} name: Install polaris run: | + git config --global url."https://github.com/".insteadOf "git@github.com:" ./configure_polaris_envs.py \ --conda_env_only \ --env_name polaris_test \ diff --git a/.github/workflows/docs_workflow.yml b/.github/workflows/docs_workflow.yml index b057b5ed0..2bca4270f 100644 --- a/.github/workflows/docs_workflow.yml +++ b/.github/workflows/docs_workflow.yml @@ -44,6 +44,7 @@ jobs: - if: ${{ steps.skip_check.outputs.should_skip != 'true' }} name: Install polaris run: | + git config --global url."https://github.com/".insteadOf "git@github.com:" ./configure_polaris_envs.py \ --conda_env_only \ --env_name polaris_test \ diff --git a/.gitmodules b/.gitmodules index 7219981e2..5d48517b9 100644 --- a/.gitmodules +++ b/.gitmodules @@ -7,3 +7,6 @@ [submodule "e3sm_submodules/Omega"] path = e3sm_submodules/Omega url = git@github.com:E3SM-Project/Omega.git +[submodule "jigsaw-python"] + path = jigsaw-python + url = git@github.com:dengwirda/jigsaw-python.git diff --git a/configure_polaris_envs.py b/configure_polaris_envs.py index 43318b924..85abd955e 100755 --- a/configure_polaris_envs.py +++ b/configure_polaris_envs.py @@ -43,7 +43,12 @@ def setup_install_env(env_name, activate_base, use_local, logger, recreate, channels = '--use-local' else: channels = '' - packages = f'jinja2 {mache} packaging progressbar2' + if mache == '': + # development mache so include dependencies + packages = 'importlib_resources jinja2 lxml packaging progressbar2 ' \ + 'pyyaml' + else: + packages = f'jinja2 {mache} packaging progressbar2' if recreate or not os.path.exists(env_path): print('Setting up a conda environment for installing polaris\n') commands = f'{activate_base} && ' \ @@ -114,7 +119,8 @@ def main(): f'git clone -b {args.mache_branch} ' \ f'git@github.com:{args.mache_fork}.git mache && ' \ f'cd mache && ' \ - f'python -m pip install .' + f'conda install -y --file spec-file.txt && ' \ + f'python -m pip install --no-deps .' check_call(commands, logger=logger) diff --git a/deploy/albany_supported.txt b/deploy/albany_supported.txt index e09445797..25d67f13a 100644 --- a/deploy/albany_supported.txt +++ b/deploy/albany_supported.txt @@ -4,5 +4,9 @@ anvil, gnu, openmpi chicoma-cpu, gnu, mpich chrysalis, gnu, openmpi compy, gnu, openmpi +frontier, gnu, mpich +frontier, gnugpu, mpich +frontier, crayclang, mpich +frontier, crayclanggpu, mpich pm-cpu, gnu, mpich morpheus, gnu, openmpi diff --git a/deploy/bootstrap.py b/deploy/bootstrap.py index dd4300c95..8aee7690a 100755 --- a/deploy/bootstrap.py +++ b/deploy/bootstrap.py @@ -9,6 +9,7 @@ import socket import stat import subprocess +import time from configparser import ConfigParser from typing import Dict @@ -214,13 +215,16 @@ def get_env_setup(args, config, machine, compiler, mpi, env_type, source_path, ver = version.parse(polaris_version) release_version = '.'.join(str(vr) for vr in ver.release) spack_env = f'dev_polaris_{release_version}{env_suffix}' + conda_env = f'dev_polaris_{polaris_version}{env_suffix}' elif env_type == 'test_release': spack_env = f'test_polaris_{polaris_version}{env_suffix}' + conda_env = spack_env else: spack_env = f'polaris_{polaris_version}{env_suffix}' + conda_env = spack_env if env_name is None or env_type != 'dev': - env_name = spack_env + env_name = conda_env # add the compiler and MPI library to the spack env name spack_env = f'{spack_env}_{compiler}_{mpi}{lib_suffix}' @@ -241,7 +245,7 @@ def get_env_setup(args, config, machine, compiler, mpi, env_type, source_path, def build_conda_env(config, env_type, recreate, mpi, conda_mpi, version, python, source_path, conda_template_path, conda_base, env_name, env_path, activate_base, use_local, - local_conda_build, logger, local_mache): + local_conda_build, logger, local_mache, update_jigsaw): if env_type != 'dev': install_miniforge(conda_base, activate_base, logger) @@ -287,9 +291,9 @@ def build_conda_env(config, env_type, recreate, mpi, conda_mpi, version, mpi_prefix=mpi_prefix, include_mache=not local_mache) - for package in ['esmf', 'geometric_features', 'jigsaw', 'jigsawpy', - 'mache', 'mpas_tools', 'netcdf_c', 'netcdf_fortran', - 'otps', 'parallelio', 'pnetcdf']: + for package in ['esmf', 'geometric_features', 'mache', 'metis', + 'mpas_tools', 'netcdf_c', 'netcdf_fortran', 'otps', + 'parallelio', 'pnetcdf']: replacements[package] = config.get('deploy', package) spec_file = template.render(**replacements) @@ -300,7 +304,10 @@ def build_conda_env(config, env_type, recreate, mpi, conda_mpi, version, else: spec_filename = None - if not os.path.exists(env_path) or recreate: + if not os.path.exists(env_path): + recreate = True + + if recreate: print(f'creating {env_name}') if env_type == 'dev': # install dev dependencies and polaris itself @@ -309,13 +316,6 @@ def build_conda_env(config, env_type, recreate, mpi, conda_mpi, version, f'conda create -y -n {env_name} {channels} ' \ f'--file {spec_filename} {packages}' check_call(commands, logger=logger) - - commands = \ - f'{activate_env} && ' \ - f'cd {source_path} && ' \ - f'python -m pip install --no-deps -e .' - check_call(commands, logger=logger) - else: # conda packages don't like dashes version_conda = version.replace('-', '') @@ -332,16 +332,22 @@ def build_conda_env(config, env_type, recreate, mpi, conda_mpi, version, f'conda install -y -n {env_name} {channels} ' \ f'--file {spec_filename} {packages}' check_call(commands, logger=logger) - - commands = \ - f'{activate_env} && ' \ - f'cd {source_path} && ' \ - f'python -m pip install --no-deps -e .' - check_call(commands, logger=logger) else: print(f'{env_name} already exists') if env_type == 'dev': + if recreate or update_jigsaw: + build_jigsaw(activate_env, source_path, env_path, logger) + + # install (or reinstall) polaris in edit mode + print('Installing polaris\n') + commands = \ + f'{activate_env} && ' \ + f'cd {source_path} && ' \ + f'rm -rf polaris.egg-info && ' \ + f'python -m pip install --no-deps -e .' + check_call(commands, logger=logger) + print('Installing pre-commit\n') commands = \ f'{activate_env} && ' \ @@ -350,6 +356,57 @@ def build_conda_env(config, env_type, recreate, mpi, conda_mpi, version, check_call(commands, logger=logger) +def build_jigsaw(activate_env, source_path, env_path, logger): + # remove conda jigsaw and jigsaw-python + t0 = time.time() + commands = \ + f'{activate_env} && ' \ + f'conda remove -y --force-remove jigsaw jigsawpy' + check_call(commands, logger=logger) + + commands = \ + f'{activate_env} && ' \ + f'cd {source_path} && ' \ + f'git submodule update --init jigsaw-python' + check_call(commands, logger=logger) + + print('Building JIGSAW\n') + # add build tools to deployment env, not polaris env + jigsaw_build_deps = 'cxx-compiler cmake' + netcdf_lib = f'{env_path}/lib/libnetcdf.so' + cmake_args = f'-DCMAKE_BUILD_TYPE=Release -DNETCDF_LIBRARY={netcdf_lib}' + + commands = \ + f'conda install -y {jigsaw_build_deps} && ' \ + f'cd {source_path}/jigsaw-python/external/jigsaw && ' \ + f'rm -rf tmp && ' \ + f'mkdir tmp && ' \ + f'cd tmp && ' \ + f'cmake .. {cmake_args} && ' \ + f'cmake --build . --config Release --target install --parallel 4 && ' \ + f'cd {source_path}/jigsaw-python && ' \ + f'rm -rf jigsawpy/_bin jigsawpy/_lib && ' \ + f'cp -r external/jigsaw/bin/ jigsawpy/_bin && ' \ + f'cp -r external/jigsaw/lib/ jigsawpy/_lib' + check_call(commands, logger=logger) + + print('Installing JIGSAW and JIGSAW-Python\n') + commands = \ + f'{activate_env} && ' \ + f'cd {source_path}/jigsaw-python && ' \ + f'python -m pip install --no-deps -e . && ' \ + f'cp jigsawpy/_bin/* ${{CONDA_PREFIX}}/bin' + check_call(commands, logger=logger) + + t1 = time.time() + total = int(t1 - t0 + 0.5) + message = f'JIGSAW install took {total:.1f} s.' + if logger is None: + print(message) + else: + logger.info(message) + + def get_env_vars(machine, compiler, mpilib): if machine is None: @@ -400,14 +457,106 @@ def get_env_vars(machine, compiler, mpilib): return env_vars -def build_spack_env(config, update_spack, machine, compiler, mpi, # noqa: C901 - spack_env, spack_base, spack_template_path, env_vars, - tmpdir, logger): +def build_spack_soft_env(config, update_spack, machine, env_type, # noqa: C901 + polaris_version, source_path, spack_base, + spack_template_path, tmpdir): + + if not config.has_option('deploy', 'software_compiler'): + return None + + compiler = config.get('deploy', 'software_compiler') + if not config.has_option('deploy', f'mpi_{compiler}'): + raise ValueError(f'Machine config file for {machine} is missing ' + f'mpi_{compiler}, the MPI library for the software ' + f'compiler.') + mpi = config.get('deploy', f'mpi_{compiler}') + + if machine is not None: + spack_base = get_spack_base(spack_base, config) + + if spack_base is None: + return None + + if env_type == 'dev': + ver = version.parse(polaris_version) + release_version = '.'.join(str(vr) for vr in ver.release) + spack_env = f'dev_polaris_soft_{release_version}' + elif env_type == 'test_release': + spack_env = f'test_polaris_soft_{polaris_version}' + else: + spack_env = f'polaris_soft_{polaris_version}' + + spack_env = spack_env.replace('.', '_') + + build_dir = f'deploy_tmp/build_soft_{machine}' + + try: + shutil.rmtree(build_dir) + except OSError: + pass + try: + os.makedirs(build_dir) + except FileExistsError: + pass + + os.chdir(build_dir) + + esmf = config.get('deploy', 'esmf') + + spack_branch_base = f'{spack_base}/{spack_env}' + + specs = list() + + e3sm_hdf5_netcdf = config.getboolean('deploy', 'use_e3sm_hdf5_netcdf') + if not e3sm_hdf5_netcdf: + hdf5 = config.get('deploy', 'hdf5') + netcdf_c = config.get('deploy', 'netcdf_c') + netcdf_fortran = config.get('deploy', 'netcdf_fortran') + specs.extend([ + f'"hdf5@{hdf5}+cxx+fortran+hl+mpi+shared"', + f'"netcdf-c@{netcdf_c}+mpi~parallel-netcdf"', + f'"netcdf-fortran@{netcdf_fortran}"']) + + if esmf != 'None': + specs.append(f'"esmf@{esmf}+mpi+netcdf~pnetcdf~external-parallelio"') + + yaml_template: str | None = None + template_path = f'{spack_template_path}/{machine}_{compiler}_{mpi}.yaml' + if os.path.exists(template_path): + yaml_template = template_path + + if machine is not None: + here = os.path.abspath(os.path.dirname(__file__)) + machine_config = os.path.join(here, '..', 'polaris', 'machines', + f'{machine}.cfg') + else: + machine_config = None + + if update_spack: + make_spack_env(spack_path=spack_branch_base, env_name=spack_env, + spack_specs=specs, compiler=compiler, mpi=mpi, + machine=machine, config_file=machine_config, + include_e3sm_hdf5_netcdf=e3sm_hdf5_netcdf, + yaml_template=yaml_template, tmpdir=tmpdir) + + spack_view = f'{spack_branch_base}/var/spack/environments/' \ + f'{spack_env}/.spack-env/view' + + os.chdir(source_path) + + return spack_view + + +def build_spack_libs_env(config, update_spack, machine, compiler, # noqa: C901 + mpi, spack_env, spack_base, spack_template_path, + env_vars, tmpdir, logger): albany = config.get('deploy', 'albany') cmake = config.get('deploy', 'cmake') - esmf = config.get('deploy', 'esmf') lapack = config.get('deploy', 'lapack') + metis = config.get('deploy', 'metis') + moab = config.get('deploy', 'moab') + parmetis = config.get('deploy', 'parmetis') petsc = config.get('deploy', 'petsc') scorpio = config.get('deploy', 'scorpio') @@ -430,29 +579,25 @@ def build_spack_env(config, update_spack, machine, compiler, mpi, # noqa: C901 f'"netcdf-fortran@{netcdf_fortran}"', f'"parallel-netcdf@{pnetcdf}+cxx+fortran"']) - if esmf != 'None': - specs.append(f'"esmf@{esmf}+mpi+netcdf~pnetcdf~external-parallelio"') if lapack != 'None': specs.append(f'"netlib-lapack@{lapack}"') include_e3sm_lapack = False else: include_e3sm_lapack = True + if metis != 'None': + specs.append(f'"metis@{metis}~shared"') + if moab != 'None': + specs.append( + f'"moab@{moab}+mpi+hdf5+netcdf+pnetcdf+metis+parmetis+tempest"') + if parmetis != 'None': + specs.append(f'"parmetis@{parmetis}~shared"') if petsc != 'None': specs.append(f'"petsc@{petsc}+mpi+batch"') - custom_spack = '' if scorpio != 'None': specs.append( f'"scorpio' f'@{scorpio}+pnetcdf~timing+internal-timing~tools+malloc"') - # make sure scorpio, not esmf, libraries are linked - lib_path = f'{spack_branch_base}/var/spack/environments/' \ - f'{spack_env}/.spack-env/view/lib' - scorpio_lib_path = '$(spack find --format "{prefix}" scorpio)' - custom_spack = \ - f'{custom_spack}' \ - f'ln -sfn {scorpio_lib_path}/lib/libpioc.a {lib_path}\n' \ - f'ln -sfn {scorpio_lib_path}/lib/libpiof.a {lib_path}\n' if albany != 'None': specs.append(f'"albany@{albany}+mpas"') @@ -475,16 +620,8 @@ def build_spack_env(config, update_spack, machine, compiler, mpi, # noqa: C901 machine=machine, config_file=machine_config, include_e3sm_lapack=include_e3sm_lapack, include_e3sm_hdf5_netcdf=e3sm_hdf5_netcdf, - yaml_template=yaml_template, tmpdir=tmpdir, - custom_spack=custom_spack) - - # remove ESMC/ESMF include files that interfere with MPAS time keeping - include_path = f'{spack_branch_base}/var/spack/environments/' \ - f'{spack_env}/.spack-env/view/include' - for prefix in ['ESMC', 'esmf']: - files = glob.glob(os.path.join(include_path, f'{prefix}*')) - for filename in files: - os.remove(filename) + yaml_template=yaml_template, tmpdir=tmpdir) + set_ld_library_path(spack_branch_base, spack_env, logger) spack_script = get_spack_script( @@ -526,12 +663,20 @@ def build_spack_env(config, update_spack, machine, compiler, mpi, # noqa: C901 f'export LAPACK={spack_view}\n' \ f'export USE_LAPACK=true\n' + if metis != 'None': + env_vars = f'{env_vars}' \ + f'export METIS_ROOT={spack_view}\n' + + if parmetis != 'None': + env_vars = f'{env_vars}' \ + f'export PARMETIS_ROOT={spack_view}\n' + if petsc != 'None': env_vars = f'{env_vars}' \ f'export PETSC={spack_view}\n' \ f'export USE_PETSC=true\n' - return spack_branch_base, spack_script, env_vars + return spack_script, env_vars def set_ld_library_path(spack_branch_base, spack_env, logger): @@ -954,6 +1099,10 @@ def main(): # noqa: C901 permissions_dirs = [] activ_path = None + soft_spack_view = build_spack_soft_env( + config, args.update_spack, machine, env_type, polaris_version, + source_path, args.spack_base, spack_template_path, args.tmpdir) + for compiler, mpi in zip(compilers, mpis): python, recreate, conda_mpi, activ_suffix, env_suffix, \ @@ -992,14 +1141,17 @@ def main(): # noqa: C901 config, env_type, recreate, mpi, conda_mpi, polaris_version, python, source_path, conda_template_path, conda_base, conda_env_name, conda_env_path, activate_base, args.use_local, - args.local_conda_build, logger, local_mache) + args.local_conda_build, logger, local_mache, + args.update_jigsaw) if local_mache: print('Install local mache\n') commands = f'source {conda_base}/etc/profile.d/conda.sh && ' \ f'conda activate {conda_env_name} && ' \ - 'cd ../build_mache/mache && ' \ - 'python -m pip install .' + f'conda install -y importlib_resources jinja2' \ + f' lxml pyyaml progressbar2 && ' \ + f'cd ../build_mache/mache && ' \ + f'python -m pip install --no-deps .' check_call(commands, logger=logger) previous_conda_env = conda_env_name @@ -1011,7 +1163,8 @@ def main(): # noqa: C901 if compiler is not None: env_vars = get_env_vars(machine, compiler, mpi) if spack_base is not None: - _, spack_script, env_vars = build_spack_env( + + spack_script, env_vars = build_spack_libs_env( config, args.update_spack, machine, compiler, mpi, spack_env, spack_base, spack_template_path, env_vars, args.tmpdir, logger) @@ -1024,6 +1177,14 @@ def main(): # noqa: C901 f'{env_vars}' \ f'export PIO={conda_env_path}\n' \ f'export OPENMP_INCLUDE=-I"{conda_env_path}/include"\n' + + if soft_spack_view is None: + raise ValueError('A software compiler or a spack base was not ' + 'defined so required software was not ' + 'installed with spack.') + env_vars = f'{env_vars}' \ + f'export PATH="{soft_spack_view}/bin:$PATH"\n' + else: env_vars = '' @@ -1067,6 +1228,7 @@ def main(): # noqa: C901 if args.update_spack or env_type != 'dev': # we need to update permissions on shared stuff + update_permissions(config, env_type, activ_path, permissions_dirs) diff --git a/deploy/conda-dev-spec.template b/deploy/conda-dev-spec.template index ec26b1765..547e1a4a7 100644 --- a/deploy/conda-dev-spec.template +++ b/deploy/conda-dev-spec.template @@ -16,15 +16,13 @@ holoviews hvplot importlib_resources ipython -jigsaw={{ jigsaw }} -jigsawpy={{ jigsawpy }} jupyter lxml {% if include_mache %} mache={{ mache }} {% endif %} matplotlib-base >=3.6.0,!=3.7.2 -metis +metis={{ metis }} mpas_tools={{ mpas_tools }} nco netcdf4=*=nompi_* diff --git a/deploy/default.cfg b/deploy/default.cfg index 38bc13a07..0b0014011 100644 --- a/deploy/default.cfg +++ b/deploy/default.cfg @@ -21,15 +21,15 @@ mpi = nompi # versions of conda packages geometric_features = 1.2.0 -jigsaw = 0.9.14 -jigsawpy = 0.3.3 -mache = 1.16.0 +mache = 1.19.0 mpas_tools = 0.27.0 otps = 2021.10 parallelio = 2.6.0 # versions of conda or spack packages (depending on machine type) esmf = 8.4.2 +metis = 5.1.0 +moab = 5.5.1 netcdf_c = 4.9.2 netcdf_fortran = 4.6.1 pnetcdf = 1.12.3 @@ -40,5 +40,6 @@ albany = develop cmake = 3.23.0: hdf5 = 1.14.1 lapack = 3.9.1 +parmetis = 4.0.3 petsc = 3.19.1 -scorpio = 1.4.1 +scorpio = 1.4.2 diff --git a/deploy/petsc_supported.txt b/deploy/petsc_supported.txt index 5bc50ab6a..0ece6897f 100644 --- a/deploy/petsc_supported.txt +++ b/deploy/petsc_supported.txt @@ -7,4 +7,5 @@ chicoma-cpu, gnu, mpich chrysalis, intel, openmpi chrysalis, gnu, openmpi compy, intel, impi +frontier, gnu, mpich pm-cpu, gnu, mpich diff --git a/deploy/shared.py b/deploy/shared.py index fb2ea5365..1567961d7 100644 --- a/deploy/shared.py +++ b/deploy/shared.py @@ -35,6 +35,10 @@ def parse_args(bootstrap): "for building E3SM components).") parser.add_argument("--recreate", dest="recreate", action='store_true', help="Recreate the environment if it exists.") + parser.add_argument("--update_jigsaw", dest="update_jigsaw", + action='store_true', + help="Reinstall JIGSAW even if not recreating conda " + "environment.") parser.add_argument("-f", "--config_file", dest="config_file", help="Config file to override deployment config " "options.") @@ -177,7 +181,6 @@ def install_miniforge(conda_base, activate_base, logger): commands = f'{activate_base} && ' \ f'conda config --add channels conda-forge && ' \ f'conda config --set channel_priority strict && ' \ - f'conda install -y "conda>=23.1.0" && ' \ f'conda update -y --all && ' \ f'conda init --no-user' diff --git a/deploy/unsupported.txt b/deploy/unsupported.txt index 4490368e8..d75dfb9ca 100644 --- a/deploy/unsupported.txt +++ b/deploy/unsupported.txt @@ -13,10 +13,11 @@ compy, intel, mvapich2 compy, gnu, openmpi compy, pgi, impi compy, pgi, mvapich2 -pm-cpu, nvidia, mpich pm-cpu, aocc, mpich pm-cpu, amdclang, mpich +# cannot build MOAB in spack +anvil, gnu, mvapich # compiles but tests unreliable (errors or hanging), # see https://github.com/MPAS-Dev/compass/issues/336 diff --git a/docs/developers_guide/deploying_spack.md b/docs/developers_guide/deploying_spack.md index 55ffdbf59..0245299b2 100644 --- a/docs/developers_guide/deploying_spack.md +++ b/docs/developers_guide/deploying_spack.md @@ -21,15 +21,29 @@ re-deploy shared spack environments on each supported machine. ### Spack -Spack is for libraries used by MPAS and tools that need system MPI: +Spack is used to build libraries used by E3SM components and tools that need +system MPI: - ESMF +- MOAB - SCORPIO +- Metis +- Parmetis +- Trilinos - Albany - PETSc - Netlib LAPACK -When we update the versions of any of these libraries in Polaris, we also need +We build one spack environment for tools (e.g. ESMF and MOAB) and another for +libraries. This allows us to build the tools with one set of compilers and +MPI libraries adn the libraries with another. This is sometimes necessary, +since ESMF, MOAB and/or their dependencies can't always be built or don't +run correctly with all compiler and MPI combinations. For example, we have +experienced problems running ESMF built with intel compilers on Perlmutter. +We are also not able to build ESMF or the Eigen dependency of MOAB using +`nvidiagpu` compilers. + +When we update the versions of any of these packages in Polaris, we also need to bump the Polaris version (typically either the major or the minor version) and then re-deploy shared spack environments on each supported machine. @@ -37,8 +51,8 @@ and then re-deploy shared spack environments on each supported machine. Conda (via conda-forge) is used for python packages and related dependencies that don’t need system MPI. Conda environments aren’t shared between -developers because the polaris you’re developing is part of the conda -environment. +developers because the polaris python package you’re developing is part of the +conda environment. When we update the constraints on conda dependencies, we also need to bump the Polaris alpha, beta or rc version. We do not need to re-deploy spack @@ -67,6 +81,7 @@ These config options are shared across packages including: - E3SM_Diags - zppy - polaris +- compass - E3SM-Unified Polaris uses these config options to know how to make a job script, where to diff --git a/docs/developers_guide/machines/anvil.md b/docs/developers_guide/machines/anvil.md index 9c8556527..ab2f8fe47 100644 --- a/docs/developers_guide/machines/anvil.md +++ b/docs/developers_guide/machines/anvil.md @@ -17,7 +17,7 @@ Then, you can build the MPAS model with make [DEBUG=true] [OPENMP=true] intel-mpi ``` -For other MPI libraries (`openmpi` or `mvapich` instead of `impi`), use +For other MPI libraries (`openmpi` instead of `impi`), use ```bash make [DEBUG=true] [OPENMP=true] ifort diff --git a/docs/developers_guide/machines/frontier.md b/docs/developers_guide/machines/frontier.md new file mode 100644 index 000000000..465b5cd68 --- /dev/null +++ b/docs/developers_guide/machines/frontier.md @@ -0,0 +1,31 @@ +# Frontier + +## frontier, gnu + +If you've set things up for this compiler, you should be able to source a load +script similar to: + +```bash +source load_dev_polaris_0.3.0-alpha.1_frontier_gnu_mpich.sh +``` + +Then, you can build the MPAS model with + +```bash +make [DEBUG=true] gnu-cray +``` + +## frontier, crayclang + +Similarly to `gnu`, for `crayclang`, if you've set things up right, sourcing +the load scrip will look something like: + +```bash +source load_dev_polaris_0.3.0-alpha.1_frontier_crayclang_mpich.sh +``` + +To build MPAS components, use: + +```bash +make [DEBUG=true] cray-cray +``` diff --git a/docs/developers_guide/machines/index.md b/docs/developers_guide/machines/index.md index 8f17fd3ee..72f74405b 100644 --- a/docs/developers_guide/machines/index.md +++ b/docs/developers_guide/machines/index.md @@ -2,11 +2,11 @@ # Machines -Polaris attempts to be aware of the capabilities of the machine it is running -on. This is a particular advantage for so-called "supported" machines with a -config file defined for them in the `polaris` package. But even for "unknown" -machines, it is not difficult to set a few config options in your user config -file to describe your machine. Then, polaris can use this data to make sure +Polaris attempts to be aware of the capabilities of the machine it is running +on. This is a particular advantage for so-called "supported" machines with a +config file defined for them in the `polaris` package. But even for "unknown" +machines, it is not difficult to set a few config options in your user config +file to describe your machine. Then, polaris can use this data to make sure test cases are configured in a way that is appropriate for your machine. (dev-supported-machines)= @@ -45,8 +45,6 @@ supported for those configurations with `gnu` compilers. | | | openmpi | ifort | | +------------+-----------+-------------------+ | | gnu | openmpi | gfortran | -| | +-----------+-------------------+ -| | | mvapich | gfortran | +--------------+------------+-----------+-------------------+ | chicoma-cpu | gnu | mpich | gnu-cray | +--------------+------------+-----------+-------------------+ @@ -56,7 +54,13 @@ supported for those configurations with `gnu` compilers. +--------------+------------+-----------+-------------------+ | compy | intel | impi | intel-mpi | +--------------+------------+-----------+-------------------+ +| frontier | gnu | mpich | gnu-cray | +| +------------+-----------+-------------------+ +| | crayclang | mpich | cray-cray | ++--------------+------------+-----------+-------------------+ | pm-cpu | gnu | mpich | gnu-cray | +| +------------+-----------+-------------------+ +| | intel | mpich | intel-cray | +--------------+------------+-----------+-------------------+ ``` @@ -69,6 +73,7 @@ anvil chicoma chrysalis compy +frontier perlmutter ``` @@ -83,13 +88,13 @@ rather than system compilers. To create a development conda environment and an activation script for it, on Linux, run: ```bash -./conda/configure_polaris_envs.py --conda -c gnu -i mpich +./configure_polaris_envs.py --conda -c gnu -i mpich ``` and on OSX run: ```bash -./conda/configure_polaris_envs.py --conda -c clang -i mpich +./configure_polaris_envs.py --conda -c clang -i mpich ``` You may use `openmpi` instead of `mpich` but we have had better experiences @@ -288,8 +293,8 @@ spack: modules: [] environment: {} extra_rpaths: [] -``` - +``` + Typically your system will already have compilers if nothing else, and this is what we assume here. Give the appropriate path (replace `/usr` with the appropriate path on your system). We have had better luck with `gcc` than diff --git a/docs/developers_guide/machines/perlmutter.md b/docs/developers_guide/machines/perlmutter.md index cca5cb004..56a524efd 100644 --- a/docs/developers_guide/machines/perlmutter.md +++ b/docs/developers_guide/machines/perlmutter.md @@ -14,3 +14,18 @@ Then, you can build the MPAS model with ```bash make [DEBUG=true] gnu-cray ``` + +## pm-cpu, intel + +Similarly to `gnu`, for `intel`, if you've set things up right, sourcing the +load scrip will look something like: + +```bash +source load_dev_polaris_0.1.0-alpha.1_pm-cpu_intel_mpich.sh +``` + +To build MPAS components, use: + +```bash +make [DEBUG=true] intel-cray +``` diff --git a/docs/developers_guide/quick_start.md b/docs/developers_guide/quick_start.md index 5b7dd49e0..ebf155761 100644 --- a/docs/developers_guide/quick_start.md +++ b/docs/developers_guide/quick_start.md @@ -168,6 +168,10 @@ with the developers. In addition to installing Miniforge3 and creating the conda environment for you, this script will also: +- install [Jigsaw](https://github.com/dengwirda/jigsaw) and + [Jigsaw-Python](https://github.com/dengwirda/jigsaw-python) from source + from the `jigsaw-python` submodule. These tools are used to create many of + the meshes used in Polaris. - install the `polaris` package from the local branch in "development" mode so changes you make to the repo are immediately reflected in the conda environment. @@ -212,10 +216,14 @@ this script will also: to something other than the default (`dev_polaris_` or `dev_polaris__`). -`--with_albany` +`--update_jigsaw` -: Install Albany for full MALI support (currently only with `gnu` - compilers) +: Used to reinstall Jigsaw and Jigsaw-Python into the conda environment if + you have made changes to the Jigsaw (c++) code in the `jigsaw-python` + submodule. You should not need to reinstall Jigsaw-Python if you have made + changes only to the python code in `jigsaw-python`, as the python package + is installed in + [edit mode](https://setuptools.pypa.io/en/latest/userguide/development_mode.html). ### Activating the environment @@ -329,14 +337,14 @@ current branch. If the two don't match, an error like the following results and the environment is not activated: ``` -$ source load_polaris_test_morpheus_gnu_openmpi.sh +$ source load_polaris_test_morpheus_gnu_openmpi.sh This load script is for a different version of polaris: __version__ = '0.2.0' Your code is version: __version__ = '0.3.0-alpha.1' -You need to run ./configure_polaris_envs.py to update your conda +You need to run ./configure_polaris_envs.py to update your conda environment and load script. ``` @@ -378,7 +386,7 @@ the root directory of your polaris branch. The activation script will also check if the current polaris version matches the one used to create the activation script, thus catching situations where the dependencies are out of date and the configure script needs to be rerun. Since sourcing the -activation script is substantially faster than rerunning the configure script, +activation script is substantially faster than rerunning the configure script, it is best to try the activation script first and run the configure script only if you have to. ::: diff --git a/docs/index.md b/docs/index.md index 50fc6e0d3..59a8ccd01 100644 --- a/docs/index.md +++ b/docs/index.md @@ -5,41 +5,49 @@ :width: 384 px ``` -Polaris is a python package that provides an automated system to set up test -cases or analysis tasks for several components of the Exascale Energy Earth +Polaris is a python package that provides an automated system to set up test +cases or analysis tasks for several components of the Exascale Energy Earth System Model ([E3SM](https://e3sm.org/). The development version -of polaris will be kept closely synchronized with the development repositories -for the components it supports. Release versions will be compatible with +of Polaris will be kept closely synchronized with the development repositories +for the components it supports. Release versions will be compatible with specific tags of the MPAS components. -Many polaris tasks are idealized, and are used for things like +Many Polaris tasks are idealized, and are used for things like performing convergence tests or regression tests on particular parts of the -model code. Many other polaris tasks, such as those under the -{ref}`ocean-global-ocean` and {ref}`landice-greenland` test -groups, are "realistic" in the sense that they use data sets from observations +model code. Many other Polaris tasks, such as those under the +{ref}`ocean-global-ocean` and {ref}`landice-greenland` test +groups, are "realistic" in the sense that they use data sets from observations to create create global and regional meshes, initial conditions, and boundary conditions. Polaris will be the tool used to create new land-ice and ocean meshes and -initial conditions for future versions of E3SM. +initial conditions for future versions of E3SM. ```{note} Polaris does *not* provide the tools for creating many of the files needed for full E3SM coupling, a process that requires expert help from the E3SM development team. ``` -The ``polaris`` python package defines the tasks and analysis tasks along -with the commands to list and set up both tasks and suites (groups -of tasks or analysis tasks). Polaris currently supports ``landice`` +The ``polaris`` python package defines the tasks and analysis tasks along +with the commands to list and set up both tasks and suites (groups +of tasks or analysis tasks). Polaris currently supports ``landice`` and ``ocean`` components. Nearly all tasks include calls that launch one -of these E3SM components, built in "standalone" (uncoupled) mode. These runs -are configured with config files (e.g. YAML or namelist files) and one of the -benefits of using polaris over attempting to run one of the components directly -is that polaris begins with default values for all these config options -for a given version of the component, modifying only those options where the -default is not appropriate. In this way, polaris requires little alteration +of these E3SM components, built in "standalone" (uncoupled) mode. These runs +are configured with config files (e.g. YAML or namelist files) and one of the +benefits of using Polaris over attempting to run one of the components directly +is that Polaris begins with default values for all these config options +for a given version of the component, modifying only those options where the +default is not appropriate. In this way, Polaris requires little alteration as the model components themselves evolves and new functionality is added. +Polaris makes extensive use of the +[Jigsaw](https://github.com/dengwirda/jigsaw) and +[Jigsaw-Python](https://github.com/dengwirda/jigsaw-python) tools to make all +but the simplest meshes for Polaris tasks. These tools, without which Polaris' + mesh generation capabilities would not be possible, are developed primarily by +[Darren Engwirda](https://dengwirda.github.io/). + + ```{toctree} :caption: User's guide :maxdepth: 2 @@ -91,7 +99,7 @@ glossary (compass)= # Compass -The ``compass`` package is the predecessor of polaris. Documentation for +The ``compass`` package is the predecessor of Polaris. Documentation for compass can be found at: diff --git a/docs/users_guide/machines/anvil.md b/docs/users_guide/machines/anvil.md index 57c8ae3cf..be0d5d10a 100644 --- a/docs/users_guide/machines/anvil.md +++ b/docs/users_guide/machines/anvil.md @@ -70,38 +70,8 @@ partitions = acme-small, acme-medium, acme-large qos = regular, acme_high ``` -## Intel on Anvil +## Loading and running Polaris on Anvil -To load the polaris environment and modules, and set appropriate environment -variables: - -```bash -source /lcrc/soft/climate/polaris/anvil/load_latest_polaris_intel_impi.sh -``` - -To build the MPAS model with - -```bash -make [DEBUG=true] [OPENMP=true] intel-mpi -``` - -For other MPI libraries (`openmpi` or `mvapich` instead of `impi`), use - -```bash -make [DEBUG=true] [OPENMP=true] ifort -``` - -## Gnu on Anvil - -To load the polaris environment and modules, and set appropriate environment -variables: - -```bash -source /lcrc/soft/climate/polaris/anvil/load_latest_polaris_gnu_openmpi.sh -``` - -To build the MPAS model with - -```bash -make [DEBUG=true] [OPENMP=true] [ALBANY=true] gfortran -``` +Follow the developer's guide at {ref}`dev-machines` to get set up. There are +currently no plans to support a different deployment strategy (e.g. a shared +environoment) for users. diff --git a/docs/users_guide/machines/chicoma.md b/docs/users_guide/machines/chicoma.md index f9a126eed..284bcc552 100644 --- a/docs/users_guide/machines/chicoma.md +++ b/docs/users_guide/machines/chicoma.md @@ -153,17 +153,8 @@ modules_before = False modules_after = False ``` -### Gnu on Chicoma-CPU +## Loading and running Polaris on Chicoma -To load the polaris environment and modules, and set appropriate environment -variables: - -```bash -source /usr/projects/climate/SHARED_CLIMATE/polaris/chicoma-cpu/load_latest_polaris_gnu_mpich.sh -``` - -To build the MPAS model with - -```bash -make [DEBUG=true] [OPENMP=true] [ALBANY=true] gnu-cray -``` +Follow the developer's guide at {ref}`dev-machines` to get set up. There are +currently no plans to support a different deployment strategy (e.g. a shared +environoment) for users. diff --git a/docs/users_guide/machines/chrysalis.md b/docs/users_guide/machines/chrysalis.md index b2b4edff0..09b97c40f 100644 --- a/docs/users_guide/machines/chrysalis.md +++ b/docs/users_guide/machines/chrysalis.md @@ -60,32 +60,8 @@ cores_per_node = 128 partitions = debug, compute, high ``` -## Intel on Chrysalis +## Loading and running Polaris on Chrysalis -To load the polaris environment and modules, and set appropriate environment -variables: - -```bash -source /lcrc/soft/climate/polaris/chrysalis/load_latest_polaris_intel_openmpi.sh -``` - -To build the MPAS model with - -```bash -make [DEBUG=true] [OPENMP=true] ifort -``` - -## Gnu on Chrysalis - -To load the polaris environment and modules, and set appropriate environment -variables: - -```bash -source /lcrc/soft/climate/polaris/chrysalis/load_latest_polaris_gnu_openmpi.sh -``` - -To build the MPAS model with - -```bash -make [DEBUG=true] [OPENMP=true] [ALBANY=true] gfortran -``` +Follow the developer's guide at {ref}`dev-machines` to get set up. There are +currently no plans to support a different deployment strategy (e.g. a shared +environoment) for users. diff --git a/docs/users_guide/machines/compy.md b/docs/users_guide/machines/compy.md index 72c3d167c..50caca861 100644 --- a/docs/users_guide/machines/compy.md +++ b/docs/users_guide/machines/compy.md @@ -68,17 +68,8 @@ partitions = slurm qos = regular ``` -## Intel on CompyMcNodeFace +## Loading and running Polaris on CompyMcNodeFace -To load the polaris environment and modules, and set appropriate environment -variables: - -```bash -source source /share/apps/E3SM/conda_envs/polaris/load_latest_polaris_intel_impi.sh -``` - -To build the MPAS model with - -```bash -make [DEBUG=true] [OPENMP=true] intel-mpi -``` +Follow the developer's guide at {ref}`dev-machines` to get set up. There are +currently no plans to support a different deployment strategy (e.g. a shared +environoment) for users. diff --git a/docs/users_guide/machines/frontier.md b/docs/users_guide/machines/frontier.md new file mode 100644 index 000000000..30cd7e563 --- /dev/null +++ b/docs/users_guide/machines/frontier.md @@ -0,0 +1,119 @@ +# Frontier + +login: `ssh @frontier.olcf.ornl.gov` + +interactive login: + +```bash +# for CPU: +salloc -A cli115 --partition=batch --nodes=1 --time=30:00 -C cpu + +# for GPU: +salloc -A cli115 --partition=batch --nodes=1 --time=30:00 -C gpu +``` + +Here is a link to the +[Frontier User Guide](https://docs.olcf.ornl.gov/systems/frontier_user_guide.html) + +## config options + +Here are the default config options added when you have configured Poaris on +a Frontier login node (or specified `./configure_polaris_envs.py -m frontier`): + +```cfg +# The paths section describes paths for data and environments +[paths] + +# A shared root directory where polaris data can be found +database_root = /lustre/orion/cli115/world-shared/polaris + +# the path to the base conda environment where polaris environments have +# been created +polaris_envs = /ccs/proj/cli115/software/polaris/frontier/conda/base + + +# Options related to deploying a polaris conda and spack environments +[deploy] + +# the compiler set to use for system libraries and MPAS builds +compiler = gnu + +# the compiler to use to build software (e.g. ESMF and MOAB) with spack +software_compiler = gnu + +# the system MPI library to use for gnu compiler +mpi_gnu = mpich + +# the system MPI library to use for gnugpu compiler +mpi_gnugpu = mpich + +# the system MPI library to use for crayclang compiler +mpi_crayclang = mpich + +# the system MPI library to use for crayclanggpu compiler +mpi_crayclanggpu = mpich + +# the base path for spack environments used by polaris +spack = /ccs/proj/cli115/software/polaris/frontier/spack + +# whether to use the same modules for hdf5, netcdf-c, netcdf-fortran and +# pnetcdf as E3SM (spack modules are used otherwise) +use_e3sm_hdf5_netcdf = True + +# The parallel section describes options related to running jobs in parallel. +# Most options in this section come from mache so here we just add or override +# some defaults +[parallel] + +# cores per node on the machine +cores_per_node = 64 + +# threads per core (set to 1 because hyperthreading requires extra sbatch +# flag --threads-per-core that polaris doesn't yet support) +threads_per_core = 1 +``` + +Additionally, some relevant config options come from the +[mache](https://github.com/E3SM-Project/mache/) package: + +```cfg +# The parallel section describes options related to running jobs in parallel +[parallel] + +# parallel system of execution: slurm, cobalt or single_node +system = slurm + +# whether to use mpirun or srun to run a task +parallel_executable = srun + +# cores per node on the machine +cores_per_node = 64 + +# account for running diagnostics jobs +account = cli115 + +# available partition(s) (default is the first) +partitions = batch + + +# Config options related to spack environments +[spack] + +# whether to load modules from the spack yaml file before loading the spack +# environment +modules_before = False + +# whether to load modules from the spack yaml file after loading the spack +# environment +modules_after = False + +# whether the machine uses cray compilers +cray_compilers = True +``` + +## Loading and running Polaris on Frontier + +Follow the developer's guide at {ref}`dev-machines` to get set up. There are +currently no plans to support a different deployment strategy (e.g. a shared +environoment) for users. + diff --git a/docs/users_guide/machines/index.md b/docs/users_guide/machines/index.md index 4b96fb0dc..0e1454759 100644 --- a/docs/users_guide/machines/index.md +++ b/docs/users_guide/machines/index.md @@ -130,6 +130,7 @@ anvil chicoma chrysalis compy +frontier perlmutter ``` diff --git a/docs/users_guide/machines/perlmutter.md b/docs/users_guide/machines/perlmutter.md index e5d1dc1a4..fd751bf18 100644 --- a/docs/users_guide/machines/perlmutter.md +++ b/docs/users_guide/machines/perlmutter.md @@ -6,10 +6,10 @@ interactive login: ```bash # for CPU: -salloc --partition=debug --nodes=1 --time=30:00 -C cpu +salloc --qos=debug --nodes=1 --time=30:00 -C cpu # for GPU: -salloc --partition=debug --nodes=1 --time=30:00 -C gpu +salloc --qos=debug --nodes=1 --time=30:00 -C gpu ``` Compute time: @@ -123,20 +123,11 @@ modules_after = False cray_compilers = True ``` -### Gnu on Perlmutter-CPU +## Loading and running Polaris on Perlmutter -To load the polaris environment and modules, and set appropriate environment -variables: - -```bash -source /global/cfs/cdirs/e3sm/software/polaris/pm-cpu/load_latest_polaris_gnu_mpich.sh -``` - -To build the MPAS model with - -```bash -make [DEBUG=true] [OPENMP=true] [ALBANY=true] gnu-cray -``` +Follow the developer's guide at {ref}`dev-machines` to get set up. There are +currently no plans to support a different deployment strategy (e.g. a shared +environoment) for users. ## Jupyter notebook on remote data diff --git a/jigsaw-python b/jigsaw-python new file mode 160000 index 000000000..d9d70e60f --- /dev/null +++ b/jigsaw-python @@ -0,0 +1 @@ +Subproject commit d9d70e60fae9b5686c85113d1d1d4b21ae341789 diff --git a/polaris/machines/anvil.cfg b/polaris/machines/anvil.cfg index 304f40315..43abd9d18 100644 --- a/polaris/machines/anvil.cfg +++ b/polaris/machines/anvil.cfg @@ -15,6 +15,9 @@ polaris_envs = /lcrc/soft/climate/polaris/anvil/base # the compiler set to use for system libraries and MPAS builds compiler = intel +# the compiler to use to build software (e.g. ESMF and MOAB) with spack +software_compiler = intel + # the system MPI library to use for intel compiler mpi_intel = impi diff --git a/polaris/machines/chicoma-cpu.cfg b/polaris/machines/chicoma-cpu.cfg index 18bab1e80..26a71e909 100644 --- a/polaris/machines/chicoma-cpu.cfg +++ b/polaris/machines/chicoma-cpu.cfg @@ -15,6 +15,9 @@ polaris_envs = /usr/projects/e3sm/polaris/chicoma-cpu/conda/base # the compiler set to use for system libraries and MPAS builds compiler = gnu +# the compiler to use to build software (e.g. ESMF and MOAB) with spack +software_compiler = gnu + # the system MPI library to use for gnu compiler mpi_gnu = mpich diff --git a/polaris/machines/chrysalis.cfg b/polaris/machines/chrysalis.cfg index c28f89292..26cbc8f6e 100644 --- a/polaris/machines/chrysalis.cfg +++ b/polaris/machines/chrysalis.cfg @@ -15,6 +15,9 @@ polaris_envs = /lcrc/soft/climate/polaris/chrysalis/base # the compiler set to use for system libraries and MPAS builds compiler = intel +# the compiler to use to build software (e.g. ESMF and MOAB) with spack +software_compiler = intel + # the system MPI library to use for intel compiler mpi_intel = openmpi diff --git a/polaris/machines/compy.cfg b/polaris/machines/compy.cfg index 33f24de0f..0fc702c3e 100644 --- a/polaris/machines/compy.cfg +++ b/polaris/machines/compy.cfg @@ -15,6 +15,9 @@ polaris_envs = /share/apps/E3SM/conda_envs/polaris/conda/base # the compiler set to use for system libraries and MPAS builds compiler = intel +# the compiler to use to build software (e.g. ESMF and MOAB) with spack +software_compiler = intel + # the system MPI library to use for intel compiler mpi_intel = impi diff --git a/polaris/machines/frontier.cfg b/polaris/machines/frontier.cfg new file mode 100644 index 000000000..0bf3d54a0 --- /dev/null +++ b/polaris/machines/frontier.cfg @@ -0,0 +1,50 @@ +# The paths section describes paths for data and environments +[paths] + +# A shared root directory where polaris data can be found +database_root = /lustre/orion/cli115/world-shared/polaris + +# the path to the base conda environment where polaris environments have +# been created +polaris_envs = /ccs/proj/cli115/software/polaris/frontier/conda/base + + +# Options related to deploying a polaris conda and spack environments +[deploy] + +# the compiler set to use for system libraries and MPAS builds +compiler = gnu + +# the compiler to use to build software (e.g. ESMF and MOAB) with spack +software_compiler = gnu + +# the system MPI library to use for gnu compiler +mpi_gnu = mpich + +# the system MPI library to use for gnugpu compiler +mpi_gnugpu = mpich + +# the system MPI library to use for crayclang compiler +mpi_crayclang = mpich + +# the system MPI library to use for crayclanggpu compiler +mpi_crayclanggpu = mpich + +# the base path for spack environments used by polaris +spack = /ccs/proj/cli115/software/polaris/frontier/spack + +# whether to use the same modules for hdf5, netcdf-c, netcdf-fortran and +# pnetcdf as E3SM (spack modules are used otherwise) +use_e3sm_hdf5_netcdf = True + +# The parallel section describes options related to running jobs in parallel. +# Most options in this section come from mache so here we just add or override +# some defaults +[parallel] + +# cores per node on the machine +cores_per_node = 56 + +# threads per core (set to 1 because hyperthreading requires extra sbatch +# flag --threads-per-core that polaris doesn't yet support) +threads_per_core = 1 diff --git a/polaris/machines/morpheus.cfg b/polaris/machines/morpheus.cfg index 0a3bc51b5..c16c0ec52 100644 --- a/polaris/machines/morpheus.cfg +++ b/polaris/machines/morpheus.cfg @@ -42,6 +42,9 @@ polaris_envs = /home/xylar/data/polaris_envs # the compiler set to use for system libraries and MPAS builds compiler = gnu +# the compiler to use to build software (e.g. ESMF and MOAB) with spack +software_compiler = gnu + # the system MPI library to use for gnu compiler mpi_gnu = openmpi diff --git a/polaris/machines/pm-cpu.cfg b/polaris/machines/pm-cpu.cfg index baa1ac7ed..7e526deaf 100644 --- a/polaris/machines/pm-cpu.cfg +++ b/polaris/machines/pm-cpu.cfg @@ -15,9 +15,18 @@ polaris_envs = /global/common/software/e3sm/polaris/pm-cpu/conda/base # the compiler set to use for system libraries and MPAS builds compiler = gnu +# the compiler to use to build software (e.g. ESMF and MOAB) with spack +software_compiler = gnu + # the system MPI library to use for gnu compiler mpi_gnu = mpich +# the system MPI library to use for intel compiler +mpi_intel = mpich + +# the system MPI library to use for nvidia compiler +mpi_nvidia = mpich + # the base path for spack environments used by polaris spack = /global/cfs/cdirs/e3sm/software/polaris/pm-cpu/spack diff --git a/polaris/machines/pm-gpu.cfg b/polaris/machines/pm-gpu.cfg new file mode 100644 index 000000000..6cb1a2c91 --- /dev/null +++ b/polaris/machines/pm-gpu.cfg @@ -0,0 +1,50 @@ +# The paths section describes paths for data and environments +[paths] + +# A shared root directory where polaris data can be found +database_root = /global/cfs/cdirs/e3sm/polaris + +# the path to the base conda environment where polaris environments have +# been created +polaris_envs = /global/common/software/e3sm/polaris/pm-gpu/conda/base + + +# Options related to deploying a polaris conda and spack environments +[deploy] + +# the compiler set to use for system libraries and MPAS builds +compiler = nvidiagpu + +# the compiler to use to build software (e.g. ESMF and MOAB) with spack +software_compiler = gnu + +# the system MPI library to use for gnu compiler +mpi_gnu = mpich + +# the system MPI library to use for gnugpu compiler +mpi_gnugpu = mpich + +# the system MPI library to use for nvidia compiler +mpi_nvidia = mpich + +# the system MPI library to use for nvidiagpu compiler +mpi_nvidiagpu = mpich + +# the base path for spack environments used by polaris +spack = /global/cfs/cdirs/e3sm/software/polaris/pm-gpu/spack + +# whether to use the same modules for hdf5, netcdf-c, netcdf-fortran and +# pnetcdf as E3SM (spack modules are used otherwise) +use_e3sm_hdf5_netcdf = True + +# The parallel section describes options related to running jobs in parallel. +# Most options in this section come from mache so here we just add or override +# some defaults +[parallel] + +# cores per node on the machine +cores_per_node = 128 + +# threads per core (set to 1 because trying to hyperthread seems to be causing +# hanging on perlmutter) +threads_per_core = 1 diff --git a/polaris/parallel.py b/polaris/parallel.py index a1bdcdf20..29f46c928 100644 --- a/polaris/parallel.py +++ b/polaris/parallel.py @@ -30,16 +30,27 @@ def get_available_parallel_resources(config): if parallel_system == 'slurm': job_id = os.environ['SLURM_JOB_ID'] node = os.environ['SLURMD_NODENAME'] - args = ['sinfo', '--noheader', '--node', node, '-o', '%X'] - sockets_per_node = _get_subprocess_int(args) - args = ['sinfo', '--noheader', '--node', node, '-o', '%Y'] - cores_per_socket = _get_subprocess_int(args) + args = ['sinfo', '--noheader', '--node', node, '-o', '%C'] + # get allocated, idle, other and total cores + aiot = _get_subprocess_str(args).split('/') + + # we can only use the allocated cores + cores_per_node = int(aiot[0]) + + if cores_per_node == 0: + # hmm, no allocated cores so I guess we'll go with total cores + cores_per_node = int(aiot[3]) + + args = ['sinfo', '--noheader', '--node', node, '-o', '%Z'] + slurm_threads_per_core = _get_subprocess_int(args) + if config.has_option('parallel', 'threads_per_core'): threads_per_core = config.getint('parallel', 'threads_per_core') + # correct for this in allocated_cores, which might not match + cores_per_node = ((cores_per_node * threads_per_core) // + slurm_threads_per_core) else: - args = ['sinfo', '--noheader', '--node', node, '-o', '%Z'] - threads_per_core = _get_subprocess_int(args) - cores_per_node = sockets_per_node * cores_per_socket * threads_per_core + threads_per_core = slurm_threads_per_core args = ['squeue', '--noheader', '-j', job_id, '-o', '%D'] nodes = _get_subprocess_int(args) cores = cores_per_node * nodes @@ -182,7 +193,12 @@ def get_parallel_command(args, cpus_per_task, ntasks, config): return command_line_args -def _get_subprocess_int(args): +def _get_subprocess_str(args): value = subprocess.check_output(args) - value_int = int(value.decode('utf-8').strip('\n')) + value_str = value.decode('utf-8').strip('\n') + return value_str + + +def _get_subprocess_int(args): + value_int = int(_get_subprocess_str(args)) return value_int diff --git a/polaris/version.py b/polaris/version.py index 7fd229a32..82b0b9b38 100644 --- a/polaris/version.py +++ b/polaris/version.py @@ -1 +1 @@ -__version__ = '0.2.0' +__version__ = '0.3.0-alpha.1' diff --git a/setup.cfg b/setup.cfg index 7fa96df20..f1e48bd2f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -27,7 +27,7 @@ install_requires = cmocean importlib_resources ipython - jigsawpy==0.3.3 + jigsawpy jupyter lxml matplotlib>=3.6.0,!=3.7.2 diff --git a/utils/matrix/setup_matrix.py b/utils/matrix/setup_matrix.py index 2a659d973..e0c59edc0 100755 --- a/utils/matrix/setup_matrix.py +++ b/utils/matrix/setup_matrix.py @@ -13,9 +13,7 @@ 'anvil': { ('intel', 'impi'): 'intel-mpi', ('intel', 'openmpi'): 'ifort', - ('intel', 'mvapich'): 'ifort', - ('gnu', 'openmpi'): 'gfortran', - ('gnu', 'mvapich'): 'gfortran'}, + ('gnu', 'openmpi'): 'gfortran'}, 'chicoma-cpu': { ('gnu', 'mpich'): 'gnu-cray'}, 'chrysalis': { @@ -23,10 +21,13 @@ ('intel', 'openmpi'): 'ifort', ('gnu', 'openmpi'): 'gfortran'}, 'compy': { - ('intel', 'impi'): 'intel-mpi', - ('gnu', 'openmpi'): 'gfortran'}, + ('intel', 'impi'): 'intel-mpi'}, + 'frontier': { + ('gnu', 'mpich'): 'gnu-cray', + ('crayclang', 'mpich'): 'cray-cray'}, 'pm-cpu': { - ('gnu', 'mpich'): 'gnu-cray'}, + ('gnu', 'mpich'): 'gnu-cray', + ('intel', 'mpich'): 'intel-cray'}, 'conda-linux': { ('gfortran', 'mpich'): 'gfortran', ('gfortran', 'openmpi'): 'gfortran'},