diff --git a/mache/cime_machine_config/config_machines.xml b/mache/cime_machine_config/config_machines.xml index ff0c583f..593acc1b 100644 --- a/mache/cime_machine_config/config_machines.xml +++ b/mache/cime_machine_config/config_machines.xml @@ -190,6 +190,7 @@ module + cpe cray-hdf5-parallel cray-netcdf-hdf5parallel cray-parallel-netcdf @@ -218,8 +219,8 @@ PrgEnv-gnu/8.5.0 - gcc/12.2.0 - cray-libsci/23.02.1.1 + gcc-native/12.3 + cray-libsci/23.12.5 @@ -229,35 +230,23 @@ PrgEnv-nvidia - nvidia/22.7 - cray-libsci/23.02.1.1 + nvidia/24.5 + cray-libsci/23.12.5 PrgEnv-aocc - aocc/4.0.0 - cray-libsci/23.02.1.1 + aocc/4.1.0 + cray-libsci/23.12.5 - + craype-accel-host craype/2.7.30 cray-mpich/8.1.28 cray-hdf5-parallel/1.12.2.9 cray-netcdf-hdf5parallel/4.9.0.9 cray-parallel-netcdf/1.12.3.9 - - - - craype-accel-host - craype/2.7.20 - cray-mpich/8.1.25 - cray-hdf5-parallel/1.12.2.3 - cray-netcdf-hdf5parallel/4.9.0.3 - cray-parallel-netcdf/1.12.3.3 - - - cmake/3.24.3 evp-patch @@ -367,6 +356,7 @@ module + cpe cray-hdf5-parallel cray-netcdf-hdf5parallel cray-parallel-netcdf @@ -377,12 +367,14 @@ PrgEnv-nvidia PrgEnv-cray PrgEnv-aocc + gcc-native intel intel-oneapi nvidia aocc cudatoolkit climate-utils + cray-libsci matlab craype-accel-nvidia80 craype-accel-host @@ -392,24 +384,24 @@ - PrgEnv-gnu/8.3.3 - gcc/11.2.0 + PrgEnv-gnu/8.5.0 + gcc-native/12.3 PrgEnv-nvidia - nvidia/22.7 + nvidia/24.5 - cudatoolkit/11.7 + cudatoolkit/12.2 craype-accel-nvidia80 - cudatoolkit/11.7 + cudatoolkit/12.2 craype-accel-nvidia80 - gcc-mixed/11.2.0 + gcc-native-mixed/12.3 @@ -421,12 +413,12 @@ - cray-libsci/23.02.1.1 - craype/2.7.20 - cray-mpich/8.1.25 - cray-hdf5-parallel/1.12.2.3 - cray-netcdf-hdf5parallel/4.9.0.3 - cray-parallel-netcdf/1.12.3.3 + cray-libsci/23.12.5 + craype/2.7.30 + cray-mpich/8.1.28 + cray-hdf5-parallel/1.12.2.9 + cray-netcdf-hdf5parallel/4.9.0.9 + cray-parallel-netcdf/1.12.3.9 cmake/3.24.3 @@ -434,6 +426,7 @@ $CIME_OUTPUT_ROOT/$CASE/run $CIME_OUTPUT_ROOT/$CASE/bld 0.1 + 0.20 1 @@ -454,6 +447,9 @@ 1 + + $SHELL{if [ -z "$MOAB_ROOT" ]; then echo /global/cfs/cdirs/e3sm/software/moab/gnugpu ; else echo "$MOAB_ROOT"; fi} + $SHELL{if [ -z "$ADIOS2_ROOT" ]; then echo /global/cfs/cdirs/e3sm/3rdparty/adios2/2.9.1/cray-mpich-8.1.25/gcc-11.2.0; else echo "$ADIOS2_ROOT"; fi} @@ -588,10 +584,9 @@ /global/cfs/cdirs/e3sm/perl/lib/perl5-only-switch software MPI_Bcast - $SHELL{if [ -z "$Albany_ROOT" ]; then echo /global/common/software/e3sm/mali_tpls/albany-e3sm-serial-release-gcc; else echo "$Albany_ROOT"; fi} - $SHELL{if [ -z "$Trilinos_ROOT" ]; then echo /global/common/software/e3sm/mali_tpls/trilinos-e3sm-serial-release-gcc; else echo "$Trilinos_ROOT"; fi} $ENV{CRAY_NETCDF_HDF5PARALLEL_PREFIX} $ENV{CRAY_PARALLEL_NETCDF_PREFIX} + 4000MB $SHELL{if [ -z "$ADIOS2_ROOT" ]; then echo /global/cfs/cdirs/e3sm/3rdparty/adios2/2.9.1/cray-mpich-8.1.25/intel-2023.1.0; else echo "$ADIOS2_ROOT"; fi} @@ -599,6 +594,8 @@ $SHELL{if [ -z "$ADIOS2_ROOT" ]; then echo /global/cfs/cdirs/e3sm/3rdparty/adios2/2.9.1/cray-mpich-8.1.25/gcc-11.2.0; else echo "$ADIOS2_ROOT"; fi} Generic + $SHELL{if [ -z "$Albany_ROOT" ]; then echo /global/common/software/e3sm/albany/2024.03.26/gcc/11.2.0; else echo "$Albany_ROOT"; fi} + $SHELL{if [ -z "$Trilinos_ROOT" ]; then echo /global/common/software/e3sm/trilinos/15.1.1/gcc/11.2.0; else echo "$Trilinos_ROOT"; fi} $SHELL{if [ -z "$ADIOS2_ROOT" ]; then echo /global/cfs/cdirs/e3sm/3rdparty/adios2/2.9.1/cray-mpich-8.1.25/nvidia-22.7; else echo "$ADIOS2_ROOT"; fi} @@ -614,6 +611,13 @@ $SHELL{if [ -z "$ADIOS2_ROOT" ]; then echo /global/cfs/cdirs/e3sm/3rdparty/adios2/2.9.1/cray-mpich-8.1.25/aocc-4.0.0; else echo "$ADIOS2_ROOT"; fi} + + $SHELL{if [ -z "$MOAB_ROOT" ]; then echo /global/cfs/cdirs/e3sm/software/moab/intel; else echo "$MOAB_ROOT"; fi} + + + $SHELL{if [ -z "$MOAB_ROOT" ]; then echo /global/cfs/cdirs/e3sm/software/moab/gnu; else echo "$MOAB_ROOT"; fi} + + -1 @@ -680,12 +684,14 @@ PrgEnv-nvidia PrgEnv-cray PrgEnv-aocc + gcc-native intel intel-oneapi nvidia aocc cudatoolkit climate-utils + cray-libsci matlab craype-accel-nvidia80 craype-accel-host @@ -695,26 +701,24 @@ - PrgEnv-gnu/8.3.3 - gcc/11.2.0 - + PrgEnv-gnu/8.5.0 + gcc-native/12.3 PrgEnv-nvidia - nvidia/23.9 + nvidia/24.5 - cudatoolkit/11.7 - + cudatoolkit/12.2 craype-accel-nvidia80 - cudatoolkit/11.7 + cudatoolkit/12.2 craype-accel-nvidia80 + gcc-native-mixed/12.3 @@ -726,26 +730,20 @@ - cray-libsci/23.02.1.1 - craype/2.7.20 - cray-mpich/8.1.25 - cray-hdf5-parallel/1.12.2.3 - cray-netcdf-hdf5parallel/4.9.0.3 - cray-parallel-netcdf/1.12.3.3 - + cray-parallel-netcdf/1.12.3.9 cmake/3.24.3 - evp-patch $CIME_OUTPUT_ROOT/$CASE/run $CIME_OUTPUT_ROOT/$CASE/bld 0.1 + 0.20 1 @@ -766,6 +764,9 @@ 1 + + $SHELL{if [ -z "$MOAB_ROOT" ]; then echo /global/cfs/cdirs/e3sm/software/moab/gnugpu ; else echo "$MOAB_ROOT"; fi} + $SHELL{if [ -z "$ADIOS2_ROOT" ]; then echo /global/cfs/cdirs/e3sm/3rdparty/adios2/2.9.1/cray-mpich-8.1.25/gcc-11.2.0; else echo "$ADIOS2_ROOT"; fi} @@ -823,6 +824,7 @@ module + cpe cray-hdf5-parallel cray-netcdf-hdf5parallel cray-parallel-netcdf @@ -850,36 +852,35 @@ - PrgEnv-gnu - gcc-native - cray-libsci + PrgEnv-gnu/8.5.0 + gcc-native/13.2 + cray-libsci/24.03.0 - PrgEnv-intel - intel + PrgEnv-intel/8.5.0 + intel/2024.1.0 PrgEnv-nvidia nvidia/24.5 - cray-libsci + cray-libsci/24.03.0 PrgEnv-aocc - aocc/4.0.1 - cray-libsci + aocc/4.1.0 + cray-libsci/24.03.0 craype-accel-host - cray-libsci - craype/2.7.30 - cray-mpich/8.1.28 - cray-hdf5-parallel/1.12.2.9 - cray-netcdf-hdf5parallel/4.9.0.9 - cray-parallel-netcdf/1.12.3.9 + craype/2.7.31.11 + cray-mpich/8.1.29 + cray-hdf5-parallel/1.12.2.11 + cray-netcdf-hdf5parallel/4.9.0.11 + cray-parallel-netcdf/1.12.3.11 cmake/3.24.3 @@ -887,6 +888,7 @@ $CIME_OUTPUT_ROOT/$CASE/run $CIME_OUTPUT_ROOT/$CASE/bld 0.1 + 0.20 1 @@ -901,12 +903,44 @@ MPI_Bcast $ENV{CRAY_NETCDF_HDF5PARALLEL_PREFIX} $ENV{CRAY_PARALLEL_NETCDF_PREFIX} + 4000MB + + + $SHELL{if [ -z "$ADIOS2_ROOT" ]; then echo /global/cfs/cdirs/e3sm/3rdparty/adios2/2.9.1/cray-mpich-8.1.25/intel-2023.1.0; else echo "$ADIOS2_ROOT"; fi} + + + $SHELL{if [ -z "$ADIOS2_ROOT" ]; then echo /global/cfs/cdirs/e3sm/3rdparty/adios2/2.9.1/cray-mpich-8.1.25/gcc-11.2.0; else echo "$ADIOS2_ROOT"; fi} + Generic + $SHELL{if [ -z "$Albany_ROOT" ]; then echo /global/common/software/e3sm/albany/2024.03.26/gcc/11.2.0; else echo "$Albany_ROOT"; fi} + $SHELL{if [ -z "$Trilinos_ROOT" ]; then echo /global/common/software/e3sm/trilinos/15.1.1/gcc/11.2.0; else echo "$Trilinos_ROOT"; fi} + + + $SHELL{if [ -z "$ADIOS2_ROOT" ]; then echo /global/cfs/cdirs/e3sm/3rdparty/adios2/2.9.1/cray-mpich-8.1.25/nvidia-22.7; else echo "$ADIOS2_ROOT"; fi} + + + $SHELL{if [ -z "$BLAS_ROOT" ]; then echo $NVIDIA_PATH/compilers; else echo "$BLAS_ROOT"; fi} + $SHELL{if [ -z "$LAPACK_ROOT" ]; then echo $NVIDIA_PATH/compilers; else echo "$LAPACK_ROOT"; fi} + NVHPC + + + Intel10_64_dyn + + + $SHELL{if [ -z "$ADIOS2_ROOT" ]; then echo /global/cfs/cdirs/e3sm/3rdparty/adios2/2.9.1/cray-mpich-8.1.25/aocc-4.0.0; else echo "$ADIOS2_ROOT"; fi} + + $SHELL{if [ -z "$MOAB_ROOT" ]; then echo /global/cfs/cdirs/e3sm/software/moab/intel; else echo "$MOAB_ROOT"; fi} + + + $SHELL{if [ -z "$MOAB_ROOT" ]; then echo /global/cfs/cdirs/e3sm/software/moab/gnu; else echo "$MOAB_ROOT"; fi} + + -1 + Spock. NCCS moderate-security system that contains similar hardware and software as the upcoming Frontier system at ORNL. .*spock.* @@ -1115,306 +1149,11 @@ commented out until "*** No rule to make target '.../libadios2pio-nm-lib.a'" iss --> - - Crusher. NCCS moderate-security system that contains similar hardware and software as the upcoming Frontier system at ORNL. 192 AMD EPYC 7A53 64C nodes, 128 hwthreads, 512GB DDR4, 4 MI250X GPUs - .*crusher.* - Linux - crayclang,gnu,amdclang,gnugpu,crayclanggpu,amdclanggpu - mpich - cli115 - /lustre/orion/cli115/world-shared/crusher - .* - /lustre/orion/$PROJECT/proj-shared/$ENV{USER}/e3sm_scratch/crusher - /lustre/orion/cli115/world-shared/e3sm/inputdata - /lustre/orion/cli115/world-shared/e3sm/inputdata/atm/datm7 - $CIME_OUTPUT_ROOT/archive/$CASE - /lustre/orion/cli115/world-shared/e3sm/baselines/crusher/$COMPILER - /lustre/orion/cli115/world-shared/e3sm/tools/cprnc/cprnc - 8 - 1 - slurm - e3sm - 56 - 56 - 8 - 8 - 8 - TRUE - - srun - - -l -K -n {{ total_tasks }} -N {{ num_nodes }} - -c $ENV{OMP_NUM_THREADS} - $ENV{NTASKS_PER_GPU} - $ENV{GPU_BIND_ARGS} - - - - /usr/share/lmod/lmod/init/sh - /usr/share/lmod/lmod/init/csh - /usr/share/lmod/lmod/init/perl - /usr/share/lmod/lmod/init/env_modules_python.py - /usr/share/lmod/lmod/libexec/lmod perl - module - module - /usr/share/lmod/lmod/libexec/lmod python - - - PrgEnv-cray PrgEnv-cray/8.3.3 - cce cce/15.0.1 - - - craype craype/2.7.20 - - - craype-accel-amd-gfx90a - rocm/5.4.0 - - - - PrgEnv-cray PrgEnv-amd/8.3.3 - amd amd/5.4.0 - - - craype-accel-amd-gfx90a - - - - PrgEnv-cray PrgEnv-gnu/8.3.3 - gcc gcc/11.2.0 - - - craype-accel-amd-gfx90a - rocm/5.4.0 - - - cray-python/3.9.13.1 - subversion/1.14.1 - git/2.36.1 - cmake/3.21.3 - zlib/1.2.11 - cray-hdf5-parallel/1.12.2.1 - cray-netcdf-hdf5parallel/4.9.0.1 - cray-parallel-netcdf/1.12.3.1 - - - $CIME_OUTPUT_ROOT/$CASE/run - $CIME_OUTPUT_ROOT/$CASE/bld - 0.1 - 0.25 - 0 - - $ENV{NETCDF_DIR} - $ENV{PNETCDF_DIR} - - - - - $ENV{CRAY_LIBSCI_DIR}/amd/4.0/x86_64/lib:$ENV{LD_LIBRARY_PATH} - - - --ntasks-per-gpu=$SHELL{echo "`./xmlquery --value MAX_MPITASKS_PER_NODE`/8"|bc} - --gpu-bind=closest - romio_cb_read=disable - 0 - - - 10 - 3 - - - 128M - spread - threads - - - - - Crusher. NCCS moderate-security system that contains similar hardware and software as the upcoming Frontier system at ORNL. 192 AMD EPYC 7A53 64C nodes, 128 hwthreads, 512GB DDR4, 4 MI250X GPUs - .*crusher.* - Linux - crayclang-scream - mpich - CLI115 - /lustre/orion/cli133/proj-shared/$ENV{USER}/e3sm_scratch/crusher - /lustre/orion/cli115/world-shared/e3sm/inputdata - /lustre/orion/cli115/world-shared/e3sm/inputdata/atm/datm7 - $CIME_OUTPUT_ROOT/archive/$CASE - /lustre/orion/cli133/world-shared/e3sm/baselines/$COMPILER - /lustre/orion/cli115/world-shared/e3sm/tools/cprnc/cprnc - 8 - 1 - slurm - e3sm - 56 - 56 - TRUE - - - srun - - -l -K -n {{ total_tasks }} -N {{ num_nodes }} - - - --threads-per-core=1 - -c $ENV{OMP_NUM_THREADS} - -m *:block - - - - - - /usr/share/lmod/lmod/init/sh - /usr/share/lmod/lmod/init/csh - /usr/share/lmod/lmod/init/perl - /usr/share/lmod/lmod/init/env_modules_python.py - /usr/share/lmod/lmod/libexec/lmod perl - module - module - /usr/share/lmod/lmod/libexec/lmod python - - - - PrgEnv-cray PrgEnv-cray/8.3.3 - cce cce/14.0.0 - - - - - PrgEnv-cray PrgEnv-amd/8.3.3 - amd amd/5.1.0 - - - - - PrgEnv-cray PrgEnv-gnu/8.3.3 - - - cray-mpich/8.1.12 - cray-python/3.9.4.2 - subversion/1.14.0 - git/2.31.1 - cmake/3.21.3 - zlib/1.2.11 - cray-libsci/21.08.1.2 - cray-hdf5-parallel/1.12.1.1 - cray-netcdf-hdf5parallel/4.8.1.1 - cray-parallel-netcdf/1.12.1.7 - - - - $CIME_OUTPUT_ROOT/$CASE/run - $CIME_OUTPUT_ROOT/$CASE/bld - 0.1 - - $ENV{NETCDF_DIR} - $ENV{PNETCDF_DIR} - - - - $ENV{CRAY_LIBSCI_DIR}/amd/4.0/x86_64/lib:$ENV{LD_LIBRARY_PATH} - - - - 0 - - - - 128M - spread - threads - - - - - Crusher. NCCS moderate-security system that contains similar hardware and software as the upcoming Frontier system at ORNL. 192 AMD EPYC 7A53 64C nodes, 128 hwthreads, 512GB DDR4, 4 MI250X GPUs - .*crusher.* - Linux - crayclang-scream - mpich - CLI115 - /lustre/orion/cli133/proj-shared/$ENV{USER}/e3sm_scratch/crusher - /lustre/orion/cli115/world-shared/e3sm/inputdata - /lustre/orion/cli115/world-shared/e3sm/inputdata/atm/datm7 - $CIME_OUTPUT_ROOT/archive/$CASE - /lustre/orion/cli133/world-shared/e3sm/baselines/$COMPILER - /lustre/orion/cli115/world-shared/e3sm/tools/cprnc/cprnc - 8 - 1 - slurm - e3sm - 64 - 8 - TRUE - - - srun - - -l -K -n {{ total_tasks }} -N {{ num_nodes }} - - - --gpus-per-node=8 --gpu-bind=closest - -c $ENV{OMP_NUM_THREADS} - - - - - - - /usr/share/lmod/lmod/init/sh - /usr/share/lmod/lmod/init/csh - /usr/share/lmod/lmod/init/perl - /usr/share/lmod/lmod/init/env_modules_python.py - /usr/share/lmod/lmod/libexec/lmod perl - module - module - /usr/share/lmod/lmod/libexec/lmod python - - - PrgEnv-cray - - craype-accel-amd-gfx90a - rocm/5.1.0 - - cce/14.0.3 - - - cray-python/3.9.4.2 - subversion/1.14.0 - git/2.31.1 - cmake/3.21.3 - zlib/1.2.11 - cray-hdf5-parallel/1.12.2.1 - cray-netcdf-hdf5parallel/4.9.0.1 - cray-parallel-netcdf/1.12.3.1 - - - - $CIME_OUTPUT_ROOT/$CASE/run - $CIME_OUTPUT_ROOT/$CASE/bld - 0.1 - 0 - - $ENV{NETCDF_DIR} - $ENV{PNETCDF_DIR} - 0 - - 1 - romio_cb_read=disable - - - - 128M - spread - threads - - - Frontier. AMD EPYC 7A53 64C nodes, 128 hwthreads, 512GB DDR4, 4 MI250X GPUs. .*frontier.* CNL - crayclang-scream + craygnuamdgpu,crayclang-scream mpich cli115 /lustre/orion/proj-shared/cli115 @@ -1451,17 +1190,34 @@ commented out until "*** No rule to make target '.../libadios2pio-nm-lib.a'" iss module module /usr/share/lmod/lmod/libexec/lmod python + + + + PrgEnv-gnu + cpe/24.07 + libfabric/1.15.2.0 + craype-accel-amd-gfx90a + rocm/6.2.0 + libunwind + cray-python + subversion + git + cmake + cray-hdf5-parallel + cray-netcdf-hdf5parallel + cray-parallel-netcdf + darshan-runtime + - PrgEnv-cray + cpe/22.12 craype-accel-amd-gfx90a rocm/5.4.0 libunwind/1.6.2 - - cce/15.0.1 - craype craype/2.7.20 - cray-mpich cray-mpich/8.1.26 + libfabric/1.15.2.0 + craype/2.7.20 + cray-mpich/8.1.26 cray-python/3.9.13.1 subversion/1.14.1 git/2.36.1 @@ -1495,9 +1251,13 @@ commented out until "*** No rule to make target '.../libadios2pio-nm-lib.a'" iss threads + + $SHELL{if [ -z "$ADIOS2_ROOT" ]; then echo /lustre/orion/cli115/world-shared/frontier/3rdparty/adios2/2.10/install/craygnuamdgppu/cpe-24.07/libfabric-1.15.2.0; else echo "$ADIOS2_ROOT"; fi} + $SHELL{if [ -z "$ADIOS2_ROOT" ]; then echo /lustre/orion/cli115/world-shared/frontier/3rdparty/adios2/2.9.1/cray-mpich-8.1.26/crayclang-scream-14.0.0; else echo "$ADIOS2_ROOT"; fi} + @@ -3044,6 +2804,7 @@ commented out until "*** No rule to make target '.../libadios2pio-nm-lib.a'" iss /lcrc/group/e3sm/soft/improv/pnetcdf/1.12.3/gcc-12.3.0/openmpi-4.1.6 /lcrc/group/e3sm/soft/improv/pnetcdf/1.12.3/gcc-12.3.0/openmpi-4.1.6/bin:/lcrc/group/e3sm/soft/improv/netcdf-fortran/4.6.1b/gcc-12.3.0/openmpi-4.1.6/bin:/lcrc/group/e3sm/soft/improv/netcdf-c/4.9.2b/gcc-12.3.0/openmpi-4.1.6/bin:/lcrc/group/e3sm/soft/improv/openmpi/4.1.6/gcc-12.3.0/bin:/lcrc/group/e3sm/soft/perl/improv/bin:$ENV{PATH} $SHELL{lp=/lcrc/group/e3sm/soft/improv/netlib-lapack/3.12.0/gcc-12.3.0:/lcrc/group/e3sm/soft/improv/pnetcdf/1.12.3/gcc-12.3.0/openmpi-4.1.6/lib:/lcrc/group/e3sm/soft/improv/netcdf-fortran/4.6.1b/gcc-12.3.0/openmpi-4.1.6/lib:/lcrc/group/e3sm/soft/improv/netcdf-c/4.9.2b/gcc-12.3.0/openmpi-4.1.6/lib:/opt/pbs/lib:/lcrc/group/e3sm/soft/improv/openmpi/4.1.6/gcc-12.3.0/lib; if [ -z "$LD_LIBRARY_PATH" ]; then echo $lp; else echo "$lp:$LD_LIBRARY_PATH"; fi} + $SHELL{if [ -z "$MOAB_ROOT" ]; then echo /lcrc/soft/climate/moab/improv/gnu; else echo "$MOAB_ROOT"; fi} ^lockedfile @@ -3065,9 +2826,9 @@ commented out until "*** No rule to make target '.../libadios2pio-nm-lib.a'" iss /usr/workspace/e3sm/ccsm3data/inputdata/atm/datm7 /p/lustre2/$USER/archive/$CASE /p/lustre2/$USER/ccsm_baselines/$COMPILER - /usr/workspace/e3sm/tools/cprnc + /usr/workspace/e3sm/apps/cprnc 8 - lc_slurm + slurm boutte3 -at- llnl.gov 56 56 @@ -3076,8 +2837,16 @@ commented out until "*** No rule to make target '.../libadios2pio-nm-lib.a'" iss srun + + --mpi=pmi2 + --export=ALL + -n {{ total_tasks }} -N {{ num_nodes }} + -c 1 + --cpu_bind=cores + -m plane={{ tasks_per_node }} + - + /usr/share/lmod/lmod/init/env_modules_python.py /usr/share/lmod/lmod/init/perl /usr/share/lmod/lmod/init/sh @@ -3089,24 +2858,27 @@ commented out until "*** No rule to make target '.../libadios2pio-nm-lib.a'" iss python/3.9.12 git + subversion + cmake/3.19.2 mkl/2022.1.0 intel-classic/2021.6.0-magic - mvapich2/2.3.7 - cmake/3.19.2 - /usr/workspace/e3sm/install/quartz/modulefiles - hdf5/1.12.2 - netcdf-c/4.9.0 - netcdf-fortran/4.6.0 - parallel-netcdf/1.12.3 - screamML-venv/0.0.1 - subversion + /usr/workspace/e3sm/spack/modules/ruby/linux-rhel8-x86_64/Core + mvapich2/2.3.7-ll7cmqm + hdf5/1.10.7-ewjpbjd + netcdf-c/4.4.1.1-vaxofek + netcdf-fortran/4.4.4-3pzbx2u + parallel-netcdf/1.11.0-tzgdala $CIME_OUTPUT_ROOT/$CASE/run $CIME_OUTPUT_ROOT/$CASE/bld - /usr/workspace/e3sm/install/quartz/netcdf-fortran/ - /usr/tce/packages/parallel-netcdf/parallel-netcdf-1.12.3-mvapich2-2.3.7-intel-classic-2021.6.0 + 128M + FALSE + /usr/workspace/e3sm/spack/libs/linux-rhel8-cascadelake/intel-2021.6.0/hdf5-1.10.7-ewjpbjdhjgjzrzjcvwyjyuulaesbsjhg + /usr/workspace/e3sm/spack/libs/linux-rhel8-cascadelake/intel-2021.6.0/netcdf-c-4.4.1.1-vaxofekwvnvngh7wptmzkwdb7tkzvesn + /usr/workspace/e3sm/spack/libs/linux-rhel8-cascadelake/intel-2021.6.0/netcdf-fortran-4.4.4-3pzbx2unddhladhubaahhhysjmprzqi2 + /usr/workspace/e3sm/spack/libs/linux-rhel8-cascadelake/intel-2021.6.0/parallel-netcdf-1.11.0-tzgdalakmem7tod6cruhqyeackeix5q5 @@ -3121,9 +2893,9 @@ commented out until "*** No rule to make target '.../libadios2pio-nm-lib.a'" iss /usr/workspace/e3sm/ccsm3data/inputdata/atm/datm7 /p/lustre2/$USER/archive/$CASE /p/lustre2/$USER/ccsm_baselines/$COMPILER - /usr/workspace/e3sm/tools/cprnc + /usr/workspace/e3sm/apps/cprnc 8 - lc_slurm + slurm boutte3 -at- llnl.gov 224 112 @@ -3132,8 +2904,16 @@ commented out until "*** No rule to make target '.../libadios2pio-nm-lib.a'" iss srun + + --mpi=pmi2 + --export=ALL + -n {{ total_tasks }} -N {{ num_nodes }} + -c 1 + --cpu_bind=cores + -m plane={{ tasks_per_node }} + - + /usr/share/lmod/lmod/init/env_modules_python.py /usr/share/lmod/lmod/init/perl /usr/share/lmod/lmod/init/sh @@ -3145,24 +2925,27 @@ commented out until "*** No rule to make target '.../libadios2pio-nm-lib.a'" iss python/3.9.12 git + subversion mkl/2022.1.0 intel-classic/2021.6.0-magic - mvapich2/2.3.7 cmake/3.19.2 - /usr/workspace/e3sm/install/quartz/modulefiles - hdf5/1.12.2 - netcdf-c/4.9.0 - netcdf-fortran/4.6.0 - parallel-netcdf/1.12.3 - screamML-venv/0.0.1 - subversion + /usr/workspace/e3sm/spack/modules/dane/linux-rhel8-x86_64/Core + mvapich2/2.3.7-27jao34 + hdf5/1.10.7-766kapa + netcdf-c/4.4.1.1-2uznnlw + netcdf-fortran/4.4.4-itpstyo + parallel-netcdf/1.11.0-26sxm4m $CIME_OUTPUT_ROOT/$CASE/run $CIME_OUTPUT_ROOT/$CASE/bld - /usr/workspace/e3sm/install/quartz/netcdf-fortran/ - /usr/tce/packages/parallel-netcdf/parallel-netcdf-1.12.3-mvapich2-2.3.7-intel-classic-2021.6.0 + 128M + FALSE + /usr/workspace/e3sm/spack/libs/linux-rhel8-sapphirerapids/intel-2021.6.0/hdf5-1.10.7-766kapalbrdntu2pcgdgbhg2ch26gsuv + /usr/workspace/e3sm/spack/libs/linux-rhel8-sapphirerapids/intel-2021.6.0/netcdf-c-4.4.1.1-2uznnlwgiezxute6iyqzqjrpolokeaib + /usr/workspace/e3sm/spack/libs/linux-rhel8-sapphirerapids/intel-2021.6.0/netcdf-fortran-4.4.4-itpstyordbern7vlulmlnt47eeeokzfp + /usr/workspace/e3sm/spack/libs/linux-rhel8-sapphirerapids/intel-2021.6.0/parallel-netcdf-1.11.0-26sxm4mormsglmhi24poix7sugbigkck diff --git a/mache/spack/pm-cpu_gnu_mpich.csh b/mache/spack/pm-cpu_gnu_mpich.csh index 14e3845f..174ef24f 100644 --- a/mache/spack/pm-cpu_gnu_mpich.csh +++ b/mache/spack/pm-cpu_gnu_mpich.csh @@ -1,41 +1,38 @@ -module rm cray-hdf5-parallel &> /dev/null -module rm cray-netcdf-hdf5parallel &> /dev/null -module rm cray-parallel-netcdf &> /dev/null -module rm PrgEnv-gnu &> /dev/null -module rm PrgEnv-intel &> /dev/null -module rm PrgEnv-nvidia &> /dev/null -module rm PrgEnv-cray &> /dev/null -module rm PrgEnv-aocc &> /dev/null -module rm gcc-native &> /dev/null -module rm intel &> /dev/null -module rm intel-oneapi &> /dev/null -module rm cudatoolkit &> /dev/null -module rm climate-utils &> /dev/null -module rm cray-libsci &> /dev/null -module rm matlab &> /dev/null -module rm craype-accel-nvidia80 &> /dev/null -module rm craype-accel-host &> /dev/null -module rm perftools-base &> /dev/null -module rm perftools &> /dev/null -module rm darshan &> /dev/null +module rm cpe \ + cray-hdf5-parallel \ + cray-netcdf-hdf5parallel \ + cray-parallel-netcdf \ + PrgEnv-gnu \ + PrgEnv-intel \ + PrgEnv-nvidia \ + PrgEnv-cray \ + PrgEnv-aocc \ + gcc-native \ + intel \ + intel-oneapi \ + cudatoolkit \ + climate-utils \ + cray-libsci \ + matlab \ + craype-accel-nvidia80 \ + craype-accel-host \ + perftools-base \ + perftools \ + darshan \ + cray-mpich &> /dev/null -module load PrgEnv-gnu/8.5.0 -module load gcc/12.2.0 -module load craype-accel-host -{% if e3sm_lapack %} -module load cray-libsci/23.02.1.1 -{% endif %} -module load craype/2.7.20 -module rm cray-mpich &> /dev/null -module load libfabric/1.15.2.0 -module load cray-mpich/8.1.25 +module load PrgEnv-gnu/8.5.0 \ + gcc-native/12.3 \ + cray-libsci/23.12.5 \ + craype-accel-host \ + craype/2.7.30 \ + libfabric/1.20.1 \ + cray-mpich/8.1.28 \ + cmake/3.24.3 {% if e3sm_hdf5_netcdf %} -module rm cray-hdf5-parallel &> /dev/null -module rm cray-netcdf-hdf5parallel &> /dev/null -module rm cray-parallel-netcdf &> /dev/null -module load cray-hdf5-parallel/1.12.2.3 -module load cray-netcdf-hdf5parallel/4.9.0.3 -module load cray-parallel-netcdf/1.12.3.3 +module load cray-hdf5-parallel/1.12.2.9 \ + cray-netcdf-hdf5parallel/4.9.0.9 \ + cray-parallel-netcdf/1.12.3.9 {% endif %} {% if e3sm_hdf5_netcdf %} @@ -55,6 +52,7 @@ setenv HDF5_USE_FILE_LOCKING FALSE ## Not needed # setenv PERL5LIB /global/cfs/cdirs/e3sm/perl/lib/perl5-only-switch setenv FI_CXI_RX_MATCH_MODE software +setenv FI_MR_CACHE_MONITOR kdreg2 setenv MPICH_COLL_SYNC MPI_Bcast setenv GATOR_INITIAL_MB 4000MB setenv BLA_VENDOR Generic diff --git a/mache/spack/pm-cpu_gnu_mpich.sh b/mache/spack/pm-cpu_gnu_mpich.sh index 75fb3870..b824d419 100644 --- a/mache/spack/pm-cpu_gnu_mpich.sh +++ b/mache/spack/pm-cpu_gnu_mpich.sh @@ -1,41 +1,38 @@ -module rm cray-hdf5-parallel &> /dev/null -module rm cray-netcdf-hdf5parallel &> /dev/null -module rm cray-parallel-netcdf &> /dev/null -module rm PrgEnv-gnu &> /dev/null -module rm PrgEnv-intel &> /dev/null -module rm PrgEnv-nvidia &> /dev/null -module rm PrgEnv-cray &> /dev/null -module rm PrgEnv-aocc &> /dev/null -module rm gcc-native &> /dev/null -module rm intel &> /dev/null -module rm intel-oneapi &> /dev/null -module rm cudatoolkit &> /dev/null -module rm climate-utils &> /dev/null -module rm cray-libsci &> /dev/null -module rm matlab &> /dev/null -module rm craype-accel-nvidia80 &> /dev/null -module rm craype-accel-host &> /dev/null -module rm perftools-base &> /dev/null -module rm perftools &> /dev/null -module rm darshan &> /dev/null +module rm cpe \ + cray-hdf5-parallel \ + cray-netcdf-hdf5parallel \ + cray-parallel-netcdf \ + PrgEnv-gnu \ + PrgEnv-intel \ + PrgEnv-nvidia \ + PrgEnv-cray \ + PrgEnv-aocc \ + gcc-native \ + intel \ + intel-oneapi \ + cudatoolkit \ + climate-utils \ + cray-libsci \ + matlab \ + craype-accel-nvidia80 \ + craype-accel-host \ + perftools-base \ + perftools \ + darshan \ + cray-mpich &> /dev/null -module load PrgEnv-gnu/8.5.0 -module load gcc/12.2.0 -module load craype-accel-host -{% if e3sm_lapack %} -module load cray-libsci/23.02.1.1 -{% endif %} -module load craype/2.7.20 -module rm cray-mpich &> /dev/null -module load libfabric/1.15.2.0 -module load cray-mpich/8.1.25 +module load PrgEnv-gnu/8.5.0 \ + gcc-native/12.3 \ + cray-libsci/23.12.5 \ + craype-accel-host \ + craype/2.7.30 \ + libfabric/1.20.1 \ + cray-mpich/8.1.28 \ + cmake/3.24.3 {% if e3sm_hdf5_netcdf %} -module rm cray-hdf5-parallel &> /dev/null -module rm cray-netcdf-hdf5parallel &> /dev/null -module rm cray-parallel-netcdf &> /dev/null -module load cray-hdf5-parallel/1.12.2.3 -module load cray-netcdf-hdf5parallel/4.9.0.3 -module load cray-parallel-netcdf/1.12.3.3 +module load cray-hdf5-parallel/1.12.2.9 \ + cray-netcdf-hdf5parallel/4.9.0.9 \ + cray-parallel-netcdf/1.12.3.9 {% endif %} {% if e3sm_hdf5_netcdf %} @@ -55,6 +52,7 @@ export HDF5_USE_FILE_LOCKING=FALSE ## Not needed # export PERL5LIB=/global/cfs/cdirs/e3sm/perl/lib/perl5-only-switch export FI_CXI_RX_MATCH_MODE=software +export FI_MR_CACHE_MONITOR=kdreg2 if [ -z "${NERSC_HOST:-}" ]; then # happens when building spack environment diff --git a/mache/spack/pm-cpu_gnu_mpich.yaml b/mache/spack/pm-cpu_gnu_mpich.yaml index 1a8b5d6d..7fd22e6f 100644 --- a/mache/spack/pm-cpu_gnu_mpich.yaml +++ b/mache/spack/pm-cpu_gnu_mpich.yaml @@ -2,9 +2,7 @@ spack: specs: - gcc - cray-mpich -{% if e3sm_lapack %} - cray-libsci -{% endif %} {% if e3sm_hdf5_netcdf %} - hdf5 - netcdf-c @@ -16,12 +14,10 @@ spack: unify: when_possible packages: all: - compiler: [gcc@12.2.0] + compiler: [gcc@12.3] providers: - mpi: [cray-mpich@8.1.25] -{% if e3sm_lapack %} - lapack: [cray-libsci@23.02.1.1] -{% endif %} + mpi: [cray-mpich@8.1.28] + lapack: [cray-libsci@23.12.5] bzip2: externals: - spec: bzip2@1.0.6 @@ -101,65 +97,55 @@ spack: buildable: false gcc: externals: - - spec: gcc@12.2.0 + - spec: gcc@12.3 modules: - PrgEnv-gnu/8.5.0 - - gcc/12.2.0 + - gcc-native/12.3 + - cray-libsci/23.12.5 - craype-accel-host - - craype/2.7.20 - - libfabric/1.15.2.0 + - craype/2.7.30 + - libfabric/1.20.1 buildable: false cray-mpich: externals: - - spec: cray-mpich@8.1.25 - prefix: /opt/cray/pe/mpich/8.1.25/ofi/gnu/9.1 - modules: - - libfabric/1.15.2.0 - - cray-mpich/8.1.25 - buildable: false - libfabric: - externals: - - spec: libfabric@1.15.2.0 - prefix: /opt/cray/libfabric/1.15.2.0 + - spec: cray-mpich@8.1.28 modules: - - libfabric/1.15.2.0 + - libfabric/1.20.1 + - cray-mpich/8.1.28 buildable: false -{% if e3sm_lapack %} cray-libsci: externals: - - spec: cray-libsci@23.02.1.1 - prefix: /opt/cray/pe/libsci/23.02.1.1/GNU/9.1/x86_64 + - spec: cray-libsci@23.12.5 modules: - - cray-libsci/23.02.1.1 + - cray-libsci/23.12.5 buildable: false -{% endif %} {% if e3sm_hdf5_netcdf %} hdf5: externals: - - spec: hdf5@1.12.2.3~cxx+fortran+hl~java+mpi+shared - prefix: /opt/cray/pe/hdf5-parallel/1.12.2.3/GNU/9.1 + - spec: hdf5@1.12.2.9~cxx+fortran+hl~java+mpi+shared + prefix: /opt/cray/pe/hdf5-parallel/1.12.2.9/gnu/12.3 buildable: false parallel-netcdf: externals: - - spec: parallel-netcdf@1.12.3.3+cxx+fortran+pic+shared - prefix: /opt/cray/pe/parallel-netcdf/1.12.3.3/GNU/9.1/ + - spec: parallel-netcdf@1.12.3.9+cxx+fortran+pic+shared + prefix: /opt/cray/pe/parallel-netcdf/1.12.3.9/gnu/12.3 buildable: false netcdf-c: externals: - - spec: netcdf-c@4.9.0.3+mpi~parallel-netcdf - prefix: /opt/cray/pe/netcdf-hdf5parallel/4.9.0.3/GNU/9.1 + - spec: netcdf-c@4.9.0.9+mpi~parallel-netcdf + prefix: /opt/cray/pe/netcdf-hdf5parallel/4.9.0.9/gnu/12.3 buildable: false netcdf-fortran: externals: - spec: netcdf-fortran@4.5.3 - prefix: /opt/cray/pe/netcdf-hdf5parallel/4.9.0.3/GNU/9.1 + prefix: /opt/cray/pe/netcdf-hdf5parallel/4.9.0.9/gnu/12.3 buildable: false {% endif %} config: install_missing_compilers: false compilers: - compiler: - spec: gcc@12.2.0 + spec: gcc@12.3 paths: cc: cc cxx: CC @@ -170,10 +156,11 @@ spack: target: x86_64 modules: - PrgEnv-gnu/8.5.0 - - gcc/12.2.0 + - gcc-native/12.3 + - cray-libsci/23.12.5 - craype-accel-host - - craype/2.7.20 - - libfabric/1.15.2.0 + - craype/2.7.30 + - libfabric/1.20.1 environment: prepend_path: PKG_CONFIG_PATH: "/opt/cray/xpmem/2.6.2-2.5_2.33__gd067c3f.shasta/lib64/pkgconfig" diff --git a/mache/spack/pm-cpu_intel_mpich.csh b/mache/spack/pm-cpu_intel_mpich.csh index d6eda3d1..52de4034 100644 --- a/mache/spack/pm-cpu_intel_mpich.csh +++ b/mache/spack/pm-cpu_intel_mpich.csh @@ -1,38 +1,36 @@ -module rm cray-hdf5-parallel &> /dev/null -module rm cray-netcdf-hdf5parallel &> /dev/null -module rm cray-parallel-netcdf &> /dev/null -module rm PrgEnv-gnu &> /dev/null -module rm PrgEnv-intel &> /dev/null -module rm PrgEnv-nvidia &> /dev/null -module rm PrgEnv-cray &> /dev/null -module rm PrgEnv-aocc &> /dev/null -module rm gcc-native &> /dev/null -module rm intel &> /dev/null -module rm intel-oneapi &> /dev/null -module rm cudatoolkit &> /dev/null -module rm climate-utils &> /dev/null -module rm cray-libsci &> /dev/null -module rm matlab &> /dev/null -module rm craype-accel-nvidia80 &> /dev/null -module rm craype-accel-host &> /dev/null -module rm perftools-base &> /dev/null -module rm perftools &> /dev/null -module rm darshan &> /dev/null +module rm cpe \ + cray-hdf5-parallel \ + cray-netcdf-hdf5parallel \ + cray-parallel-netcdf \ + PrgEnv-gnu \ + PrgEnv-intel \ + PrgEnv-nvidia \ + PrgEnv-cray \ + PrgEnv-aocc \ + gcc-native \ + intel \ + intel-oneapi \ + cudatoolkit \ + climate-utils \ + cray-libsci \ + matlab \ + craype-accel-nvidia80 \ + craype-accel-host \ + perftools-base \ + perftools \ + darshan \ + cray-mpich &> /dev/null -module load PrgEnv-intel/8.5.0 -module load intel/2023.2.0 -module load craype-accel-host -module load craype/2.7.30 -module load libfabric/1.15.2.0 -module rm cray-mpich &> /dev/null -module load cray-mpich/8.1.28 +module load PrgEnv-intel/8.5.0 \ + intel/2023.2.0 \ + craype-accel-host \ + craype/2.7.30 \ + libfabric/1.20.1 \ + cray-mpich/8.1.28 {% if e3sm_hdf5_netcdf %} -module rm cray-hdf5-parallel &> /dev/null -module rm cray-netcdf-hdf5parallel &> /dev/null -module rm cray-parallel-netcdf &> /dev/null -module load cray-hdf5-parallel/1.12.2.9 -module load cray-netcdf-hdf5parallel/4.9.0.9 -module load cray-parallel-netcdf/1.12.3.9 +module load cray-hdf5-parallel/1.12.2.9 \ + cray-netcdf-hdf5parallel/4.9.0.9 \ + cray-parallel-netcdf/1.12.3.9 {% endif %} {% if e3sm_hdf5_netcdf %} @@ -52,6 +50,7 @@ setenv HDF5_USE_FILE_LOCKING FALSE ## Not needed # setenv PERL5LIB /global/cfs/cdirs/e3sm/perl/lib/perl5-only-switch setenv FI_CXI_RX_MATCH_MODE software +setenv FI_MR_CACHE_MONITOR kdreg2 setenv MPICH_COLL_SYNC MPI_Bcast setenv GATOR_INITIAL_MB 4000MB setenv BLA_VENDOR Intel10_64_dyn diff --git a/mache/spack/pm-cpu_intel_mpich.sh b/mache/spack/pm-cpu_intel_mpich.sh index f997eaca..5d4e3b85 100644 --- a/mache/spack/pm-cpu_intel_mpich.sh +++ b/mache/spack/pm-cpu_intel_mpich.sh @@ -1,38 +1,36 @@ -module rm cray-hdf5-parallel &> /dev/null -module rm cray-netcdf-hdf5parallel &> /dev/null -module rm cray-parallel-netcdf &> /dev/null -module rm PrgEnv-gnu &> /dev/null -module rm PrgEnv-intel &> /dev/null -module rm PrgEnv-nvidia &> /dev/null -module rm PrgEnv-cray &> /dev/null -module rm PrgEnv-aocc &> /dev/null -module rm gcc-native &> /dev/null -module rm intel &> /dev/null -module rm intel-oneapi &> /dev/null -module rm cudatoolkit &> /dev/null -module rm climate-utils &> /dev/null -module rm cray-libsci &> /dev/null -module rm matlab &> /dev/null -module rm craype-accel-nvidia80 &> /dev/null -module rm craype-accel-host &> /dev/null -module rm perftools-base &> /dev/null -module rm perftools &> /dev/null -module rm darshan &> /dev/null +module rm cpe \ + cray-hdf5-parallel \ + cray-netcdf-hdf5parallel \ + cray-parallel-netcdf \ + PrgEnv-gnu \ + PrgEnv-intel \ + PrgEnv-nvidia \ + PrgEnv-cray \ + PrgEnv-aocc \ + gcc-native \ + intel \ + intel-oneapi \ + cudatoolkit \ + climate-utils \ + cray-libsci \ + matlab \ + craype-accel-nvidia80 \ + craype-accel-host \ + perftools-base \ + perftools \ + darshan \ + cray-mpich &> /dev/null -module load PrgEnv-intel/8.5.0 -module load intel/2023.2.0 -module load craype-accel-host -module load craype/2.7.30 -module load libfabric/1.15.2.0 -module rm cray-mpich &> /dev/null -module load cray-mpich/8.1.28 +module load PrgEnv-intel/8.5.0 \ + intel/2023.2.0 \ + craype-accel-host \ + craype/2.7.30 \ + libfabric/1.20.1 \ + cray-mpich/8.1.28 {% if e3sm_hdf5_netcdf %} -module rm cray-hdf5-parallel &> /dev/null -module rm cray-netcdf-hdf5parallel &> /dev/null -module rm cray-parallel-netcdf &> /dev/null -module load cray-hdf5-parallel/1.12.2.9 -module load cray-netcdf-hdf5parallel/4.9.0.9 -module load cray-parallel-netcdf/1.12.3.9 +module load cray-hdf5-parallel/1.12.2.9 \ + cray-netcdf-hdf5parallel/4.9.0.9 \ + cray-parallel-netcdf/1.12.3.9 {% endif %} {% if e3sm_hdf5_netcdf %} @@ -52,12 +50,12 @@ export HDF5_USE_FILE_LOCKING=FALSE ## Not needed # export PERL5LIB=/global/cfs/cdirs/e3sm/perl/lib/perl5-only-switch export FI_CXI_RX_MATCH_MODE=software +export FI_MR_CACHE_MONITOR=kdreg2 if [ -z "${NERSC_HOST:-}" ]; then # happens when building spack environment export NERSC_HOST="perlmutter" fi -export FI_CXI_RX_MATCH_MODE=software export MPICH_COLL_SYNC=MPI_Bcast export GATOR_INITIAL_MB=4000MB export BLA_VENDOR=Intel10_64_dyn diff --git a/mache/spack/pm-cpu_intel_mpich.yaml b/mache/spack/pm-cpu_intel_mpich.yaml index 97846bdd..0a2d34fc 100644 --- a/mache/spack/pm-cpu_intel_mpich.yaml +++ b/mache/spack/pm-cpu_intel_mpich.yaml @@ -101,22 +101,14 @@ spack: - intel/2023.2.0 - craype-accel-host - craype/2.7.30 - - libfabric/1.15.2.0 + - libfabric/1.20.1 buildable: false cray-mpich: externals: - spec: cray-mpich@8.1.28 - prefix: /opt/cray/pe/mpich/8.1.28/ofi/intel/19.0 modules: - cray-mpich/8.1.28 - - libfabric/1.15.2.0 - buildable: false - libfabric: - externals: - - spec: libfabric@1.15.2.0 - prefix: /opt/cray/libfabric/1.15.2.0 - modules: - - libfabric/1.15.2.0 + - libfabric/1.20.1 buildable: false {% if e3sm_hdf5_netcdf %} hdf5: @@ -127,7 +119,7 @@ spack: parallel-netcdf: externals: - spec: parallel-netcdf@1.12.3.9+cxx+fortran+pic+shared - prefix: /opt/cray/pe/parallel-netcdf/1.12.3.9/intel/19.0/ + prefix: /opt/cray/pe/parallel-netcdf/1.12.3.9/intel/19.0 buildable: false netcdf-c: externals: @@ -158,7 +150,7 @@ spack: - intel/2023.2.0 - craype-accel-host - craype/2.7.30 - - libfabric/1.15.2.0 + - libfabric/1.20.1 environment: prepend_path: PKG_CONFIG_PATH: "/opt/cray/xpmem/2.6.2-2.5_2.33__gd067c3f.shasta/lib64/pkgconfig" diff --git a/mache/spack/pm-cpu_nvidia_mpich.csh b/mache/spack/pm-cpu_nvidia_mpich.csh index 87ff8220..6a0f0732 100644 --- a/mache/spack/pm-cpu_nvidia_mpich.csh +++ b/mache/spack/pm-cpu_nvidia_mpich.csh @@ -1,42 +1,38 @@ -module rm cray-hdf5-parallel &> /dev/null -module rm cray-netcdf-hdf5parallel &> /dev/null -module rm cray-parallel-netcdf &> /dev/null -module rm PrgEnv-gnu &> /dev/null -module rm PrgEnv-intel &> /dev/null -module rm PrgEnv-nvidia &> /dev/null -module rm PrgEnv-cray &> /dev/null -module rm PrgEnv-aocc &> /dev/null -module rm gcc-native &> /dev/null -module rm intel &> /dev/null -module rm intel-oneapi &> /dev/null -module rm cudatoolkit &> /dev/null -module rm climate-utils &> /dev/null -module rm cray-libsci &> /dev/null -module rm matlab &> /dev/null -module rm craype-accel-nvidia80 &> /dev/null -module rm craype-accel-host &> /dev/null -module rm perftools-base &> /dev/null -module rm perftools &> /dev/null -module rm darshan &> /dev/null +module rm cpe \ + cray-hdf5-parallel \ + cray-netcdf-hdf5parallel \ + cray-parallel-netcdf \ + PrgEnv-gnu \ + PrgEnv-intel \ + PrgEnv-nvidia \ + PrgEnv-cray \ + PrgEnv-aocc \ + gcc-native \ + intel \ + intel-oneapi \ + cudatoolkit \ + climate-utils \ + cray-libsci \ + matlab \ + craype-accel-nvidia80 \ + craype-accel-host \ + perftools-base \ + perftools \ + darshan \ + cray-mpich &> /dev/null -module load PrgEnv-nvidia -module load nvidia/22.7 -module load craype-x86-milan -module load libfabric/1.15.2.0 -module load craype-accel-host -module load craype/2.7.20 -module rm cray-mpich &> /dev/null -module load cray-mpich/8.1.25 -{% if e3sm_lapack %} -module load cray-libsci/23.02.1.1 -{% endif %} +module load PrgEnv-nvidia \ + nvidia/24.5 \ + cray-libsci/23.12.5 \ + craype-accel-host \ + craype/2.7.30 \ + libfabric/1.20.1 \ + cray-mpich/8.1.28 \ + cmake/3.24.3 {% if e3sm_hdf5_netcdf %} -module rm cray-hdf5-parallel &> /dev/null -module rm cray-netcdf-hdf5parallel &> /dev/null -module rm cray-parallel-netcdf &> /dev/null -module load cray-hdf5-parallel/1.12.2.3 -module load cray-netcdf-hdf5parallel/4.9.0.3 -module load cray-parallel-netcdf/1.12.3.3 +module load cray-hdf5-parallel/1.12.2.9 \ + cray-netcdf-hdf5parallel/4.9.0.9 \ + cray-parallel-netcdf/1.12.3.9 {% endif %} {% if e3sm_hdf5_netcdf %} @@ -56,6 +52,8 @@ setenv HDF5_USE_FILE_LOCKING FALSE ## Not needed # setenv PERL5LIB /global/cfs/cdirs/e3sm/perl/lib/perl5-only-switch setenv MPICH_GPU_SUPPORT_ENABLED 1 +setenv FI_CXI_RX_MATCH_MODE software +setenv FI_MR_CACHE_MONITOR kdreg2 setenv MPICH_COLL_SYNC MPI_Bcast setenv GATOR_INITIAL_MB 4000MB setenv BLA_VENDOR NVHPC diff --git a/mache/spack/pm-cpu_nvidia_mpich.sh b/mache/spack/pm-cpu_nvidia_mpich.sh index f28d4d23..90f1d967 100644 --- a/mache/spack/pm-cpu_nvidia_mpich.sh +++ b/mache/spack/pm-cpu_nvidia_mpich.sh @@ -1,42 +1,38 @@ -module rm cray-hdf5-parallel &> /dev/null -module rm cray-netcdf-hdf5parallel &> /dev/null -module rm cray-parallel-netcdf &> /dev/null -module rm PrgEnv-gnu &> /dev/null -module rm PrgEnv-intel &> /dev/null -module rm PrgEnv-nvidia &> /dev/null -module rm PrgEnv-cray &> /dev/null -module rm PrgEnv-aocc &> /dev/null -module rm gcc-native &> /dev/null -module rm intel &> /dev/null -module rm intel-oneapi &> /dev/null -module rm cudatoolkit &> /dev/null -module rm climate-utils &> /dev/null -module rm cray-libsci &> /dev/null -module rm matlab &> /dev/null -module rm craype-accel-nvidia80 &> /dev/null -module rm craype-accel-host &> /dev/null -module rm perftools-base &> /dev/null -module rm perftools &> /dev/null -module rm darshan &> /dev/null +module rm cpe \ + cray-hdf5-parallel \ + cray-netcdf-hdf5parallel \ + cray-parallel-netcdf \ + PrgEnv-gnu \ + PrgEnv-intel \ + PrgEnv-nvidia \ + PrgEnv-cray \ + PrgEnv-aocc \ + gcc-native \ + intel \ + intel-oneapi \ + cudatoolkit \ + climate-utils \ + cray-libsci \ + matlab \ + craype-accel-nvidia80 \ + craype-accel-host \ + perftools-base \ + perftools \ + darshan \ + cray-mpich &> /dev/null -module load PrgEnv-nvidia -module load nvidia/22.7 -module load craype-x86-milan -module load libfabric/1.15.2.0 -module load craype-accel-host -module load craype/2.7.20 -module rm cray-mpich &> /dev/null -module load cray-mpich/8.1.25 -{% if e3sm_lapack %} -module load cray-libsci/23.02.1.1 -{% endif %} +module load PrgEnv-nvidia \ + nvidia/24.5 \ + cray-libsci/23.12.5 \ + craype-accel-host \ + craype/2.7.30 \ + libfabric/1.20.1 \ + cray-mpich/8.1.28 \ + cmake/3.24.3 {% if e3sm_hdf5_netcdf %} -module rm cray-hdf5-parallel &> /dev/null -module rm cray-netcdf-hdf5parallel &> /dev/null -module rm cray-parallel-netcdf &> /dev/null -module load cray-hdf5-parallel/1.12.2.3 -module load cray-netcdf-hdf5parallel/4.9.0.3 -module load cray-parallel-netcdf/1.12.3.3 +module load cray-hdf5-parallel/1.12.2.9 \ + cray-netcdf-hdf5parallel/4.9.0.9 \ + cray-parallel-netcdf/1.12.3.9 {% endif %} {% if e3sm_hdf5_netcdf %} @@ -61,6 +57,8 @@ if [ -z "${NERSC_HOST:-}" ]; then # happens when building spack environment export NERSC_HOST="perlmutter" fi +export FI_CXI_RX_MATCH_MODE=software +export FI_MR_CACHE_MONITOR=kdreg2 export MPICH_COLL_SYNC=MPI_Bcast export GATOR_INITIAL_MB=4000MB export BLA_VENDOR=NVHPC diff --git a/mache/spack/pm-cpu_nvidia_mpich.yaml b/mache/spack/pm-cpu_nvidia_mpich.yaml index 52b02d73..92789bee 100644 --- a/mache/spack/pm-cpu_nvidia_mpich.yaml +++ b/mache/spack/pm-cpu_nvidia_mpich.yaml @@ -1,9 +1,7 @@ spack: specs: - cray-mpich -{% if e3sm_lapack %} - cray-libsci -{% endif %} {% if e3sm_hdf5_netcdf %} - hdf5 - netcdf-c @@ -15,12 +13,10 @@ spack: unify: when_possible packages: all: - compiler: [nvhpc@22.7] + compiler: [nvhpc@24.5] providers: - mpi: [cray-mpich@8.1.25] -{% if e3sm_lapack %} - lapack: [cray-libsci@23.02.1.1] -{% endif %} + mpi: [cray-mpich@8.1.28] + lapack: [cray-libsci@23.12.5] bzip2: externals: - spec: bzip2@1.0.6 @@ -100,67 +96,59 @@ spack: buildable: false cray-mpich: externals: - - spec: cray-mpich@8.1.25 - prefix: /opt/cray/pe/mpich/8.1.25/ofi/nvidia/20.7 - modules: - - libfabric/1.15.2.0 - - cray-mpich/8.1.25 - buildable: false - libfabric: - externals: - - spec: libfabric@1.15.2.0 - prefix: /opt/cray/libfabric/1.15.2.0 + - spec: cray-mpich@8.1.28 modules: - - libfabric/1.15.2.0 + - libfabric/1.20.1 + - cray-mpich/8.1.28 buildable: false -{% if e3sm_lapack %} cray-libsci: externals: - - spec: cray-libsci@23.02.1.1 - prefix: /opt/cray/pe/libsci/23.02.1.1/NVIDIA/20.7/x86_64 + - spec: cray-libsci@23.12.5 + module: + - cray-libsci/23.12.5 buildable: false -{% endif %} {% if e3sm_hdf5_netcdf %} hdf5: externals: - - spec: hdf5@1.12.2.3~cxx+fortran+hl~java+mpi+shared - prefix: /opt/cray/pe/hdf5-parallel/1.12.2.3/nvidia/20.7 + - spec: hdf5@1.12.2.9~cxx+fortran+hl~java+mpi+shared + prefix: /opt/cray/pe/hdf5-parallel/1.12.2.9/nvidia/23.3 buildable: false parallel-netcdf: externals: - - spec: parallel-netcdf@1.12.3.3+cxx+fortran+pic+shared - prefix: /opt/cray/pe/parallel-netcdf/1.12.3.3/nvidia/20.7 + - spec: parallel-netcdf@1.12.3.9+cxx+fortran+pic+shared + prefix: /opt/cray/pe/parallel-netcdf/1.12.3.9/nvidia/23.3 buildable: false netcdf-c: externals: - - spec: netcdf-c@4.9.0.3+mpi~parallel-netcdf - prefix: /opt/cray/pe/netcdf-hdf5parallel/4.9.0.3/nvidia/20.7 + - spec: netcdf-c@4.9.0.9+mpi~parallel-netcdf + prefix: /opt/cray/pe/netcdf-hdf5parallel/4.9.0.9/nvidia/23.3 buildable: false netcdf-fortran: externals: - spec: netcdf-fortran@4.5.3 - prefix: /opt/cray/pe/netcdf-hdf5parallel/4.9.0.3/nvidia/20.7 + prefix: /opt/cray/pe/netcdf-hdf5parallel/4.9.0.9/nvidia/23.3 buildable: false {% endif %} config: install_missing_compilers: false compilers: - compiler: - spec: nvhpc@22.7 + spec: nvhpc@24.5 paths: - cc: /opt/nvidia/hpc_sdk/Linux_x86_64/22.7/compilers/bin/nvc - cxx: /opt/nvidia/hpc_sdk/Linux_x86_64/22.7/compilers/bin/nvc++ - f77: /opt/nvidia/hpc_sdk/Linux_x86_64/22.7/compilers/bin/nvfortran - fc: /opt/nvidia/hpc_sdk/Linux_x86_64/22.7/compilers/bin/nvfortran + cc: /opt/nvidia/hpc_sdk/Linux_x86_64/24.5/compilers/bin/nvc + cxx: /opt/nvidia/hpc_sdk/Linux_x86_64/24.5/compilers/bin/nvc++ + f77: /opt/nvidia/hpc_sdk/Linux_x86_64/24.5/compilers/bin/nvfortran + fc: /opt/nvidia/hpc_sdk/Linux_x86_64/24.5/compilers/bin/nvfortran flags: {} operating_system: sles15 target: any modules: - PrgEnv-nvidia - - nvidia/22.7 - - craype-x86-milan - - libfabric + - nvidia/24.5 + - cray-libsci/23.12.5 - craype-accel-host + - craype/2.7.30 + - libfabric/1.20 environment: prepend_path: PKG_CONFIG_PATH: "/opt/cray/xpmem/2.6.2-2.5_2.33__gd067c3f.shasta/lib64/pkgconfig" diff --git a/mache/spack/pm-gpu_gnugpu_mpich.csh b/mache/spack/pm-gpu_gnugpu_mpich.csh index 53e03d55..b13ef5b3 100644 --- a/mache/spack/pm-gpu_gnugpu_mpich.csh +++ b/mache/spack/pm-gpu_gnugpu_mpich.csh @@ -1,41 +1,39 @@ -module rm cray-hdf5-parallel &> /dev/null -module rm cray-netcdf-hdf5parallel &> /dev/null -module rm cray-parallel-netcdf &> /dev/null -module rm PrgEnv-gnu &> /dev/null -module rm PrgEnv-intel &> /dev/null -module rm PrgEnv-nvidia &> /dev/null -module rm PrgEnv-cray &> /dev/null -module rm PrgEnv-aocc &> /dev/null -module rm intel &> /dev/null -module rm intel-oneapi &> /dev/null -module rm cudatoolkit &> /dev/null -module rm climate-utils &> /dev/null -module rm matlab &> /dev/null -module rm craype-accel-nvidia80 &> /dev/null -module rm craype-accel-host &> /dev/null -module rm perftools-base &> /dev/null -module rm perftools &> /dev/null -module rm darshan &> /dev/null +module rm cpe \ + cray-hdf5-parallel \ + cray-netcdf-hdf5parallel \ + cray-parallel-netcdf \ + PrgEnv-gnu \ + PrgEnv-intel \ + PrgEnv-nvidia \ + PrgEnv-cray \ + PrgEnv-aocc \ + gcc-native \ + intel \ + intel-oneapi \ + cudatoolkit \ + climate-utils \ + cray-libsci \ + matlab \ + craype-accel-nvidia80 \ + craype-accel-host \ + perftools-base \ + perftools \ + darshan \ + cray-mpich &> /dev/null -module load PrgEnv-gnu/8.3.3 -module load gcc/11.2.0 -module load craype-x86-milan -module load libfabric/1.15.2.0 -module load cudatoolkit/11.7 -module load craype-accel-nvidia80 -module load craype/2.7.20 -module rm cray-mpich &> /dev/null -module load cray-mpich/8.1.25 -{% if e3sm_lapack %} -module load cray-libsci/23.02.1.1 -{% endif %} +module load PrgEnv-gnu/8.5.0 \ + gcc-native/12.3 \ + cudatoolkit/12.4 \ + craype-accel-nvidia80 \ + cray-libsci/23.12.5 \ + libfabric/1.20.1 \ + craype/2.7.30 \ + cray-mpich/8.1.28 \ + cmake/3.24.3 {% if e3sm_hdf5_netcdf %} -module rm cray-hdf5-parallel &> /dev/null -module rm cray-netcdf-hdf5parallel &> /dev/null -module rm cray-parallel-netcdf &> /dev/null -module load cray-hdf5-parallel/1.12.2.3 -module load cray-netcdf-hdf5parallel/4.9.0.3 -module load cray-parallel-netcdf/1.12.3.3 +module load cray-hdf5-parallel/1.12.2.9 \ + cray-netcdf-hdf5parallel/4.9.0.9 \ + cray-parallel-netcdf/1.12.3.9 {% endif %} {% if e3sm_hdf5_netcdf %} @@ -54,4 +52,6 @@ setenv MPICH_MPIIO_DVS_MAXNODES 1 setenv HDF5_USE_FILE_LOCKING FALSE ## Not needed # setenv PERL5LIB /global/cfs/cdirs/e3sm/perl/lib/perl5-only-switch +setenv MPICH_COLL_SYNC MPI_Bcast +setenv FI_MR_CACHE_MONITOR kdreg2 setenv MPICH_GPU_SUPPORT_ENABLED 1 diff --git a/mache/spack/pm-gpu_gnugpu_mpich.sh b/mache/spack/pm-gpu_gnugpu_mpich.sh index 3ce26746..009ffd41 100644 --- a/mache/spack/pm-gpu_gnugpu_mpich.sh +++ b/mache/spack/pm-gpu_gnugpu_mpich.sh @@ -1,41 +1,39 @@ -module rm cray-hdf5-parallel &> /dev/null -module rm cray-netcdf-hdf5parallel &> /dev/null -module rm cray-parallel-netcdf &> /dev/null -module rm PrgEnv-gnu &> /dev/null -module rm PrgEnv-intel &> /dev/null -module rm PrgEnv-nvidia &> /dev/null -module rm PrgEnv-cray &> /dev/null -module rm PrgEnv-aocc &> /dev/null -module rm climate-utils &> /dev/null -module rm matlab &> /dev/null -module rm intel &> /dev/null -module rm intel-oneapi &> /dev/null -module rm cudatoolkit &> /dev/null -module rm craype-accel-nvidia80 &> /dev/null -module rm craype-accel-host &> /dev/null -module rm perftools-base &> /dev/null -module rm perftools &> /dev/null -module rm darshan &> /dev/null +module rm cpe \ + cray-hdf5-parallel \ + cray-netcdf-hdf5parallel \ + cray-parallel-netcdf \ + PrgEnv-gnu \ + PrgEnv-intel \ + PrgEnv-nvidia \ + PrgEnv-cray \ + PrgEnv-aocc \ + gcc-native \ + intel \ + intel-oneapi \ + cudatoolkit \ + climate-utils \ + cray-libsci \ + matlab \ + craype-accel-nvidia80 \ + craype-accel-host \ + perftools-base \ + perftools \ + darshan \ + cray-mpich &> /dev/null -module load PrgEnv-gnu/8.3.3 -module load gcc/11.2.0 -module load craype-x86-milan -module load libfabric/1.15.2.0 -module load cudatoolkit/11.7 -module load craype-accel-nvidia80 -module load craype/2.7.20 -module rm cray-mpich &> /dev/null -module load cray-mpich/8.1.25 -{% if e3sm_lapack %} -module load cray-libsci/23.02.1.1 -{% endif %} +module load PrgEnv-gnu/8.5.0 \ + gcc-native/12.3 \ + cudatoolkit/12.4 \ + craype-accel-nvidia80 \ + cray-libsci/23.12.5 \ + libfabric/1.20.1 \ + craype/2.7.30 \ + cray-mpich/8.1.28 \ + cmake/3.24.3 {% if e3sm_hdf5_netcdf %} -module rm cray-hdf5-parallel &> /dev/null -module rm cray-netcdf-hdf5parallel &> /dev/null -module rm cray-parallel-netcdf &> /dev/null -module load cray-hdf5-parallel/1.12.2.3 -module load cray-netcdf-hdf5parallel/4.9.0.3 -module load cray-parallel-netcdf/1.12.3.3 +module load cray-hdf5-parallel/1.12.2.9 \ + cray-netcdf-hdf5parallel/4.9.0.9 \ + cray-parallel-netcdf/1.12.3.9 {% endif %} {% if e3sm_hdf5_netcdf %} @@ -54,6 +52,8 @@ export MPICH_VERSION_DISPLAY=1 export HDF5_USE_FILE_LOCKING=FALSE ## Not needed # export PERL5LIB=/global/cfs/cdirs/e3sm/perl/lib/perl5-only-switch +export MPICH_COLL_SYNC=MPI_Bcast +export FI_MR_CACHE_MONITOR=kdreg2 export MPICH_GPU_SUPPORT_ENABLED=1 if [ -z "${NERSC_HOST:-}" ]; then diff --git a/mache/spack/pm-gpu_gnugpu_mpich.yaml b/mache/spack/pm-gpu_gnugpu_mpich.yaml index 985ceddb..0f18d918 100644 --- a/mache/spack/pm-gpu_gnugpu_mpich.yaml +++ b/mache/spack/pm-gpu_gnugpu_mpich.yaml @@ -2,9 +2,7 @@ spack: specs: - gcc - cray-mpich -{% if e3sm_lapack %} - cray-libsci -{% endif %} {% if e3sm_hdf5_netcdf %} - hdf5 - netcdf-c @@ -16,12 +14,10 @@ spack: unify: when_possible packages: all: - compiler: [gcc@11.2.0] + compiler: [gcc@12.3] providers: - mpi: [cray-mpich@8.1.24] -{% if e3sm_lapack %} - lapack: [cray-libsci@23.02.1.1] -{% endif %} + mpi: [cray-mpich@8.1.28] + lapack: [cray-libsci@23.12.5] bzip2: externals: - spec: bzip2@1.0.6 @@ -101,78 +97,66 @@ spack: buildable: false gcc: externals: - - spec: gcc@11.2.0 + - spec: gcc@12.3 modules: - - PrgEnv-gnu/8.3.3 - - gcc/11.2.0 - - cudatoolkit/11.7 + - PrgEnv-gnu/8.5.0 + - gcc-native/12.3 - craype-accel-nvidia80 - - craype/2.7.19 - - libfabric/1.15.2.0 + - cray-libsci/23.12.5 + - libfabric/1.20.1 + - craype/2.7.30 + - cudatoolkit/12.4 buildable: false cuda: externals: - - spec: cuda@11.7 - prefix: /opt/nvidia/hpc_sdk/Linux_x86_64/22.7/cuda/11.7 + - spec: cuda@12.4 modules: - - PrgEnv-gnu/8.3.3 - - gcc/11.2.0 - - cudatoolkit/11.7 + - PrgEnv-gnu/8.5.0 + - gcc-native/12.3 - craype-accel-nvidia80 - - craype/2.7.19 - - libfabric/1.15.2.0 + - cray-libsci/23.12.5 + - libfabric/1.20.1 + - craype/2.7.30 + - cudatoolkit/12.4 buildable: false cray-mpich: externals: - - spec: cray-mpich@8.1.25 - prefix: /opt/cray/pe/mpich/8.1.25/ofi/gnu/9.1 - modules: - - libfabric/1.15.2.0 - - cray-mpich/8.1.25 + - spec: cray-mpich@8.1.28 + prefix: /opt/cray/pe/mpich/8.1.28/ofi/gnu/12.3 buildable: false - libfabric: - externals: - - spec: libfabric@1.15.2.0 - prefix: /opt/cray/libfabric/1.15.2.0 - modules: - - libfabric/1.15.2.0 - buildable: false -{% if e3sm_lapack %} cray-libsci: externals: - - spec: cray-libsci@23.02.1.1 - prefix: /opt/cray/pe/libsci/23.02.1.1/GNU/9.1/x86_64 + - spec: cray-libsci@23.12.5 modules: - - cray-libsci/23.02.1.1 + - cray-libsci/23.12.5 buildable: false -{% endif %} {% if e3sm_hdf5_netcdf %} hdf5: externals: - - spec: hdf5@1.12.2.3~cxx+fortran+hl~java+mpi+shared - prefix: /opt/cray/pe/hdf5-parallel/1.12.2.3/GNU/9.1 + - spec: hdf5@1.12.2.9~cxx+fortran+hl~java+mpi+shared + prefix: /opt/cray/pe/hdf5-parallel/1.12.2.9/gnu/12.3 buildable: false parallel-netcdf: externals: - - spec: parallel-netcdf@1.12.3.3+cxx+fortran+pic+shared - prefix: /opt/cray/pe/parallel-netcdf/1.12.3.3/GNU/9.1/ + - spec: parallel-netcdf@1.12.3.9+cxx+fortran+pic+shared + prefix: /opt/cray/pe/parallel-netcdf/1.12.3.9/gnu/12.3 buildable: false netcdf-c: externals: - - spec: netcdf-c@4.9.0.3+mpi~parallel-netcdf - prefix: /opt/cray/pe/netcdf-hdf5parallel/4.9.0.3/GNU/9.1 + - spec: netcdf-c@4.9.0.9+mpi~parallel-netcdf + prefix: /opt/cray/pe/netcdf-hdf5parallel/4.9.0.9/gnu/12.3 buildable: false netcdf-fortran: externals: - spec: netcdf-fortran@4.5.3 - prefix: /opt/cray/pe/netcdf-hdf5parallel/4.9.0.3/GNU/9.1 + prefix: /opt/cray/pe/netcdf-hdf5parallel/4.9.0.9/gnu/12.3 buildable: false {% endif %} config: install_missing_compilers: false compilers: - compiler: - spec: gcc@11.2.0 + spec: gcc@12.3 paths: cc: cc cxx: CC @@ -182,12 +166,13 @@ spack: operating_system: sles15 target: any modules: - - PrgEnv-gnu/8.3.3 - - gcc/11.2.0 - - cudatoolkit/11.7 + - PrgEnv-gnu/8.5.0 + - gcc-native/12.3 - craype-accel-nvidia80 - - craype/2.7.20 - - libfabric/1.15.2.0 + - cray-libsci/23.12.5 + - libfabric/1.20.1 + - craype/2.7.30 + - cudatoolkit/12.4 environment: prepend_path: PKG_CONFIG_PATH: "/opt/cray/xpmem/2.6.2-2.5_2.33__gd067c3f.shasta/lib64/pkgconfig" diff --git a/mache/spack/pm-gpu_nvidiagpu_mpich.csh b/mache/spack/pm-gpu_nvidiagpu_mpich.csh index 5b474a81..4aca5458 100644 --- a/mache/spack/pm-gpu_nvidiagpu_mpich.csh +++ b/mache/spack/pm-gpu_nvidiagpu_mpich.csh @@ -1,42 +1,40 @@ -module rm cray-hdf5-parallel &> /dev/null -module rm cray-netcdf-hdf5parallel &> /dev/null -module rm cray-parallel-netcdf &> /dev/null -module rm PrgEnv-gnu &> /dev/null -module rm PrgEnv-intel &> /dev/null -module rm PrgEnv-nvidia &> /dev/null -module rm PrgEnv-cray &> /dev/null -module rm PrgEnv-aocc &> /dev/null -module rm intel &> /dev/null -module rm intel-oneapi &> /dev/null -module rm cudatoolkit &> /dev/null -module rm climate-utils &> /dev/null -module rm matlab &> /dev/null -module rm craype-accel-nvidia80 &> /dev/null -module rm craype-accel-host &> /dev/null -module rm perftools-base &> /dev/null -module rm perftools &> /dev/null -module rm darshan &> /dev/null +module rm cpe \ + cray-hdf5-parallel \ + cray-netcdf-hdf5parallel \ + cray-parallel-netcdf \ + PrgEnv-gnu \ + PrgEnv-intel \ + PrgEnv-nvidia \ + PrgEnv-cray \ + PrgEnv-aocc \ + gcc-native \ + intel \ + intel-oneapi \ + cudatoolkit \ + climate-utils \ + cray-libsci \ + matlab \ + craype-accel-nvidia80 \ + craype-accel-host \ + perftools-base \ + perftools \ + darshan \ + cray-mpich &> /dev/null -module load PrgEnv-nvidia -module load nvidia/22.7 -module load craype-x86-milan -module load libfabric/1.15.2.0 -module load cudatoolkit/11.7 -module load craype-accel-nvidia80 -module load gcc-mixed/11.2.0 -module load craype/2.7.20 -module rm cray-mpich &> /dev/null -module load cray-mpich/8.1.25 -{% if e3sm_lapack %} -module load cray-libsci/23.02.1.1 -{% endif %} +module load PrgEnv-nvidia \ + nvidia/24.5 \ + cudatoolkit/12.4 \ + craype-accel-nvidia80 \ + gcc-native-mixed/12.3 \ + cray-libsci/23.12.5 \ + libfabric/1.20.1 \ + craype/2.7.30 \ + cray-mpich/8.1.28 \ + cmake/3.24.3 {% if e3sm_hdf5_netcdf %} -module rm cray-hdf5-parallel &> /dev/null -module rm cray-netcdf-hdf5parallel &> /dev/null -module rm cray-parallel-netcdf &> /dev/null -module load cray-hdf5-parallel/1.12.2.3 -module load cray-netcdf-hdf5parallel/4.9.0.3 -module load cray-parallel-netcdf/1.12.3.3 +module load cray-hdf5-parallel/1.12.2.9 \ + cray-netcdf-hdf5parallel/4.9.0.9 \ + cray-parallel-netcdf/1.12.3.9 {% endif %} {% if e3sm_hdf5_netcdf %} @@ -53,6 +51,8 @@ setenv MPICH_MPIIO_DVS_MAXNODES 1 # setenv OMP_PROC_BIND spread # setenv OMP_PLACES threads setenv HDF5_USE_FILE_LOCKING FALSE +setenv MPICH_COLL_SYNC MPI_Bcast +setenv FI_MR_CACHE_MONITOR kdreg2 ## Not needed # setenv PERL5LIB /global/cfs/cdirs/e3sm/perl/lib/perl5-only-switch setenv MPICH_GPU_SUPPORT_ENABLED 1 diff --git a/mache/spack/pm-gpu_nvidiagpu_mpich.sh b/mache/spack/pm-gpu_nvidiagpu_mpich.sh index ae949ff8..0b88e212 100644 --- a/mache/spack/pm-gpu_nvidiagpu_mpich.sh +++ b/mache/spack/pm-gpu_nvidiagpu_mpich.sh @@ -1,42 +1,40 @@ -module rm cray-hdf5-parallel &> /dev/null -module rm cray-netcdf-hdf5parallel &> /dev/null -module rm cray-parallel-netcdf &> /dev/null -module rm PrgEnv-gnu &> /dev/null -module rm PrgEnv-intel &> /dev/null -module rm PrgEnv-nvidia &> /dev/null -module rm PrgEnv-cray &> /dev/null -module rm PrgEnv-aocc &> /dev/null -module rm intel &> /dev/null -module rm intel-oneapi &> /dev/null -module rm cudatoolkit &> /dev/null -module rm climate-utils &> /dev/null -module rm matlab &> /dev/null -module rm craype-accel-nvidia80 &> /dev/null -module rm craype-accel-host &> /dev/null -module rm perftools-base &> /dev/null -module rm perftools &> /dev/null -module rm darshan &> /dev/null +module rm cpe \ + cray-hdf5-parallel \ + cray-netcdf-hdf5parallel \ + cray-parallel-netcdf \ + PrgEnv-gnu \ + PrgEnv-intel \ + PrgEnv-nvidia \ + PrgEnv-cray \ + PrgEnv-aocc \ + gcc-native \ + intel \ + intel-oneapi \ + cudatoolkit \ + climate-utils \ + cray-libsci \ + matlab \ + craype-accel-nvidia80 \ + craype-accel-host \ + perftools-base \ + perftools \ + darshan \ + cray-mpich &> /dev/null -module load PrgEnv-nvidia -module load nvidia/22.7 -module load craype-x86-milan -module load libfabric/1.15.2.0 -module load cudatoolkit/11.7 -module load craype-accel-nvidia80 -module load gcc-mixed/11.2.0 -module load craype/2.7.20 -module rm cray-mpich &> /dev/null -module load cray-mpich/8.1.25 -{% if e3sm_lapack %} -module load cray-libsci/23.02.1.1 -{% endif %} +module load PrgEnv-nvidia \ + nvidia/24.5 \ + cudatoolkit/12.4 \ + craype-accel-nvidia80 \ + gcc-native-mixed/12.3 \ + cray-libsci/23.12.5 \ + libfabric/1.20.1 \ + craype/2.7.30 \ + cray-mpich/8.1.28 \ + cmake/3.24.3 {% if e3sm_hdf5_netcdf %} -module rm cray-hdf5-parallel &> /dev/null -module rm cray-netcdf-hdf5parallel &> /dev/null -module rm cray-parallel-netcdf &> /dev/null -module load cray-hdf5-parallel/1.12.2.3 -module load cray-netcdf-hdf5parallel/4.9.0.3 -module load cray-parallel-netcdf/1.12.3.3 +module load cray-hdf5-parallel/1.12.2.9 \ + cray-netcdf-hdf5parallel/4.9.0.9 \ + cray-parallel-netcdf/1.12.3.9 {% endif %} {% if e3sm_hdf5_netcdf %} @@ -55,6 +53,8 @@ export MPICH_MPIIO_DVS_MAXNODES=1 export HDF5_USE_FILE_LOCKING=FALSE ## Not needed # export PERL5LIB=/global/cfs/cdirs/e3sm/perl/lib/perl5-only-switch +export MPICH_COLL_SYNC=MPI_Bcast +export FI_MR_CACHE_MONITOR=kdreg2 export MPICH_GPU_SUPPORT_ENABLED=1 if [ -z "${NERSC_HOST:-}" ]; then diff --git a/mache/spack/pm-gpu_nvidiagpu_mpich.yaml b/mache/spack/pm-gpu_nvidiagpu_mpich.yaml index 5c1338df..863652f5 100644 --- a/mache/spack/pm-gpu_nvidiagpu_mpich.yaml +++ b/mache/spack/pm-gpu_nvidiagpu_mpich.yaml @@ -1,9 +1,7 @@ spack: specs: - cray-mpich -{% if e3sm_lapack %} - cray-libsci -{% endif %} {% if e3sm_hdf5_netcdf %} - hdf5 - netcdf-c @@ -15,12 +13,10 @@ spack: unify: when_possible packages: all: - compiler: [nvhpc@22.7] + compiler: [nvhpc@24.5] providers: - mpi: [cray-mpich@8.1.25] -{% if e3sm_lapack %} - lapack: [cray-libsci@23.02.1.1] -{% endif %} + mpi: [cray-mpich@8.1.28] + lapack: [cray-libsci@23.12.5] bzip2: externals: - spec: bzip2@1.0.6 @@ -100,84 +96,72 @@ spack: buildable: false cuda: externals: - - spec: cuda@11.7 - prefix: /opt/nvidia/hpc_sdk/Linux_x86_64/22.7/cuda/11.7 + - spec: cuda@12.4 modules: - PrgEnv-nvidia - - nvidia/22.7 - - cudatoolkit/11.7 + - nvidia/24.5 - craype-accel-nvidia80 - - gcc-mixed/11.2.0 - - craype-x86-milan - - libfabric + - cray-libsci/23.12.5 + - libfabric/1.20.1 + - craype/2.7.30 + - cudatoolkit/12.4 buildable: false cray-mpich: externals: - - spec: cray-mpich@8.1.25 - prefix: /opt/cray/pe/mpich/8.1.25/ofi/nvidia/20.7 - modules: - - libfabric/1.15.2.0 - - cray-mpich/8.1.25 - buildable: false - libfabric: - externals: - - spec: libfabric@1.15.2.0 - prefix: /opt/cray/libfabric/1.15.2.0 + - spec: cray-mpich@8.1.28 modules: - - libfabric/1.15.2.0 + - libfabric/1.20.1 + - cray-mpich/8.1.28 buildable: false -{% if e3sm_lapack %} cray-libsci: externals: - - spec: cray-libsci@23.02.1.1 - prefix: /opt/cray/pe/libsci/23.02.1.1/NVIDIA/20.7/x86_64 + - spec: cray-libsci@23.12.5 modules: - - cray-libsci/23.02.1.1 + - cray-libsci/23.12.5 buildable: false -{% endif %} {% if e3sm_hdf5_netcdf %} hdf5: externals: - - spec: hdf5@1.12.2.3~cxx+fortran+hl~java+mpi+shared - prefix: /opt/cray/pe/hdf5-parallel/1.12.2.3/nvidia/20.7 + - spec: hdf5@1.12.2.9~cxx+fortran+hl~java+mpi+shared + prefix: /opt/cray/pe/hdf5-parallel/1.12.2.9/nvidia/23.3 buildable: false parallel-netcdf: externals: - - spec: parallel-netcdf@1.12.3.3+cxx+fortran+pic+shared - prefix: /opt/cray/pe/parallel-netcdf/1.12.3.3/nvidia/20.7 + - spec: parallel-netcdf@1.12.3.9+cxx+fortran+pic+shared + prefix: /opt/cray/pe/parallel-netcdf/1.12.3.9/nvidia/23.3 buildable: false netcdf-c: externals: - - spec: netcdf-c@4.9.0.3+mpi~parallel-netcdf - prefix: /opt/cray/pe/netcdf-hdf5parallel/4.9.0.3/nvidia/20.7 + - spec: netcdf-c@4.9.0.9+mpi~parallel-netcdf + prefix: /opt/cray/pe/netcdf-hdf5parallel/4.9.0.9/nvidia/23.3 buildable: false netcdf-fortran: externals: - spec: netcdf-fortran@4.5.3 - prefix: /opt/cray/pe/netcdf-hdf5parallel/4.9.0.3/nvidia/20.7 + prefix: /opt/cray/pe/netcdf-hdf5parallel/4.9.0.9/nvidia/23.3 buildable: false {% endif %} config: install_missing_compilers: false compilers: - compiler: - spec: nvhpc@22.7 + spec: nvhpc@24.5 paths: - cc: /opt/nvidia/hpc_sdk/Linux_x86_64/22.7/compilers/bin/nvc - cxx: /opt/nvidia/hpc_sdk/Linux_x86_64/22.7/compilers/bin/nvc++ - f77: /opt/nvidia/hpc_sdk/Linux_x86_64/22.7/compilers/bin/nvfortran - fc: /opt/nvidia/hpc_sdk/Linux_x86_64/22.7/compilers/bin/nvfortran + cc: /opt/nvidia/hpc_sdk/Linux_x86_64/24.5/compilers/bin/nvc + cxx: /opt/nvidia/hpc_sdk/Linux_x86_64/24.5/compilers/bin/nvc++ + f77: /opt/nvidia/hpc_sdk/Linux_x86_64/24.5/compilers/bin/nvfortran + fc: /opt/nvidia/hpc_sdk/Linux_x86_64/24.5/compilers/bin/nvfortran flags: {} operating_system: sles15 target: any modules: - PrgEnv-nvidia - - nvidia/22.7 - - cudatoolkit/11.7 + - nvidia/24.5 - craype-accel-nvidia80 - - gcc-mixed/11.2.0 - - craype-x86-milan - - libfabric + - cray-libsci/23.12.5 + - libfabric/1.20.1 + - craype/2.7.30 + - cudatoolkit/12.4 environment: prepend_path: PKG_CONFIG_PATH: "/opt/cray/xpmem/2.6.2-2.5_2.33__gd067c3f.shasta/lib64/pkgconfig"