Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add machines muller-cpu/muller-gpu as NERSC internal testing machines similar to pm-cpu/pm-gpu #6154

Merged
merged 2 commits into from
Feb 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 15 additions & 16 deletions cime_config/allactive/config_pesall.xml
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@
</nthrds>
</pes>
</mach>
<mach name="theta|pm-cpu|alvarez|pm-gpu|muller|jlse">
<mach name="theta|pm-cpu|muller-cpu|alvarez|pm-gpu|muller-gpu|jlse">
<pes compset="any" pesize="any">
<comment>allactive: default, 1 node x MAX_MPITASKS_PER_NODE mpi x 1 omp @ root 0</comment>
<ntasks>
Expand Down Expand Up @@ -263,7 +263,7 @@
<!-- 2000_DATM%JRA_ELM%SPBC_MPASSI_MPASO%DATMFORCED_MOSART_SGLC_SWAV_SIAC_SESP -->
<!-- ATM_GRID is ne30np4.pg2 ICE_GRID is EC30to60E2r2 -->
<grid name="a%ne30np4">
<mach name="pm-cpu|alvarez">
<mach name="pm-cpu|muller-cpu|alvarez">
<pes compset="JRA_ELM.+MPASSI.+MPASO.+MOSART.+SGLC.+SWAV" pesize="any">
<comment>"pm-cpu 4 nodes, 256 partition, 128x1"</comment>
<ntasks>
Expand Down Expand Up @@ -547,7 +547,7 @@
</mach>
</grid>
<grid name="a%ne120np4">
<mach name="pm-cpu|alvarez">
<mach name="pm-cpu|muller-cpu|alvarez">
<pes compset=".*EAM.+ELM.+MPASSI.+MPASO.+MOSART.+SWAV.*" pesize="any">
<comment>ne120-wcycl on 42 nodes 128x1 ~0.7 sypd</comment>
<MAX_MPITASKS_PER_NODE>128</MAX_MPITASKS_PER_NODE>
Expand Down Expand Up @@ -1244,7 +1244,7 @@
</rootpe>
</pes>
</mach>
<mach name="pm-cpu|alvarez">
<mach name="pm-cpu|muller-cpu|alvarez">
<pes compset=".*EAM.+ELM.+MPASSI.+MPASO.+MOSART.+" pesize="any">
<comment> -compset A_WCYCL* -res ne30pg2_oECv3 with MPASO on 7 nodes, 128x1 </comment>
<MAX_MPITASKS_PER_NODE>128</MAX_MPITASKS_PER_NODE>
Expand Down Expand Up @@ -1792,7 +1792,7 @@
</mach>
</grid>
<grid name="a%ne30np4">
<mach name="pm-gpu|muller">
<mach name="pm-gpu|muller-gpu">
<pes compset="any" pesize="any">
<comment>"pm-gpu ne30np4 and ne30np4.pg2 2 nodes, 4x16"</comment>
<ntasks>
Expand All @@ -1815,7 +1815,7 @@
</nthrds>
</pes>
</mach>
<mach name="pm-cpu|alvarez">
<mach name="pm-cpu|muller-cpu|alvarez">
<pes compset="any" pesize="any">
<comment>"pm-cpu ne30np4 and ne30np4.pg2 2 nodes 1 thread, 128x1"</comment>
<ntasks>
Expand Down Expand Up @@ -2127,7 +2127,7 @@
</rootpe>
</pes>
</mach>
<mach name="pm-cpu|alvarez">
<mach name="pm-cpu|muller-cpu|alvarez">
<pes compset=".*EAM.+ELM.+MPASSI.+MPASO.+MOSART.+" pesize="any">
<comment> 8 nodes, 128x1</comment>
<ntasks>
Expand Down Expand Up @@ -2336,19 +2336,18 @@
</nthrds>
</pes>
</mach>
<mach name="pm-gpu|muller">
<mach name="pm-gpu|muller-gpu">
<pes compset="any" pesize="any">
<comment>pm-gpu conus 2 nodes, 4x1 except 16 threads in LND</comment>
<MAX_MPITASKS_PER_NODE>4</MAX_MPITASKS_PER_NODE>
<MAX_TASKS_PER_NODE>16</MAX_TASKS_PER_NODE>
<ntasks>
<ntasks_atm>-4</ntasks_atm>
<ntasks_lnd>-4</ntasks_lnd>
<ntasks_rof>-4</ntasks_rof>
<ntasks_ice>-4</ntasks_ice>
<ntasks_ocn>-4</ntasks_ocn>
<ntasks_glc>-4</ntasks_glc>
<ntasks_wav>-4</ntasks_wav>
<ntasks_cpl>-4</ntasks_cpl>
<ntasks_atm>-2</ntasks_atm>
<ntasks_lnd>-2</ntasks_lnd>
<ntasks_rof>-2</ntasks_rof>
<ntasks_ice>-2</ntasks_ice>
<ntasks_ocn>-2</ntasks_ocn>
<ntasks_cpl>-2</ntasks_cpl>
</ntasks>
<nthrds>
<nthrds_atm>1</nthrds_atm>
Expand Down
2 changes: 1 addition & 1 deletion cime_config/customize/provenance.py
Original file line number Diff line number Diff line change
Expand Up @@ -786,7 +786,7 @@ def _get_batch_job_id_for_syslog(case):
"""
mach = case.get_value("MACH")
try:
if mach in ["anvil", "chrysalis", "compy", "cori-haswell", "cori-knl", "pm-cpu", "pm-gpu", "alvarez","frontier","crusher"]:
if mach in ["anvil", "chrysalis", "compy", "pm-cpu", "pm-gpu", "muller-cpu", "muller-gpu", "alvarez","frontier","frontier-scream-gpu","crusher"]:
# Note: Besides, SLURM_JOB_ID, equivalent SLURM_JOBID is also present on some systems (Frontier).
return os.environ["SLURM_JOB_ID"]
elif mach in ["theta"]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ set(NOOPT

if (NOT DEBUG)
foreach(ITEM IN LISTS NOOPT)
e3sm_deoptimize_file(${ITEM})
e3sm_deoptimize_file("${ITEM}")
endforeach()
endif()




14 changes: 14 additions & 0 deletions cime_config/machines/Depends.alvarez.intel.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# For this file, we see internal compiler error with ifx (via intel-oneapi module) on pm-cpu with -O2
# Commenting for now as we are using intel module which is not seeing build issue
#set(NOOPT
# eam/src/physics/cam/debug_info.F90)

#if (NOT DEBUG)
# foreach(ITEM IN LISTS NOOPT)
# e3sm_add_flags("${ITEM}" "-O0")
# endforeach()
#endif()




31 changes: 31 additions & 0 deletions cime_config/machines/Depends.alvarez.nvidia.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
list(APPEND REDUCE_OPT_LIST
homme/src/share/derivative_mod_base.F90
)

# Can use this flag to avoid internal compiler error for this file (with nvidia/21.11)
# Still needed with nvidia/22.5
if (NOT DEBUG)
foreach(ITEM IN LISTS REDUCE_OPT_LIST)
e3sm_add_flags("${ITEM}" " -Mnovect")
endforeach()
endif()

# Use -O2 for a few files already found to benefit from increased optimization in Intel Depends file
set(PERFOBJS
homme/src/share/prim_advection_base.F90
homme/src/share/vertremap_base.F90
homme/src/share/edge_mod_base.F90
homme/src/share/bndry_mod_base.F90
homme/src/theta-l/share/prim_advance_mod.F90
homme/src/preqx/share/prim_advance_mod.F90
homme/src/preqx/share/viscosity_preqx_base.F90
homme/src/share/viscosity_base.F90
homme/src/theta-l/share/viscosity_theta.F90
homme/src/theta-l/share/eos.F90
eam/src/physics/cam/uwshcu.F90)

if (NOT DEBUG)
foreach(ITEM IN LISTS PERFOBJS)
e3sm_add_flags("${ITEM}" "-O2")
endforeach()
endif()
13 changes: 13 additions & 0 deletions cime_config/machines/Depends.muller-cpu.gnu.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# For this file, fixes non-BFB behavior of stealth feature on pm-cpu with -O2
set(NOOPT
eam/src/physics/cam/zm_conv.F90)

if (NOT DEBUG)
foreach(ITEM IN LISTS NOOPT)
e3sm_deoptimize_file("${ITEM}")
endforeach()
endif()




14 changes: 14 additions & 0 deletions cime_config/machines/Depends.muller-cpu.intel.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# For this file, we see internal compiler error with ifx (via intel-oneapi module) on pm-cpu with -O2
# Commenting for now as we are using intel module which is not seeing build issue
#set(NOOPT
# eam/src/physics/cam/debug_info.F90)

#if (NOT DEBUG)
# foreach(ITEM IN LISTS NOOPT)
# e3sm_add_flags("${ITEM}" "-O0")
# endforeach()
#endif()




31 changes: 31 additions & 0 deletions cime_config/machines/Depends.muller-cpu.nvidia.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
list(APPEND REDUCE_OPT_LIST
homme/src/share/derivative_mod_base.F90
)

# Can use this flag to avoid internal compiler error for this file (with nvidia/21.11)
# Still needed with nvidia/22.5
if (NOT DEBUG)
foreach(ITEM IN LISTS REDUCE_OPT_LIST)
e3sm_add_flags("${ITEM}" " -Mnovect")
endforeach()
endif()

# Use -O2 for a few files already found to benefit from increased optimization in Intel Depends file
set(PERFOBJS
homme/src/share/prim_advection_base.F90
homme/src/share/vertremap_base.F90
homme/src/share/edge_mod_base.F90
homme/src/share/bndry_mod_base.F90
homme/src/theta-l/share/prim_advance_mod.F90
homme/src/preqx/share/prim_advance_mod.F90
homme/src/preqx/share/viscosity_preqx_base.F90
homme/src/share/viscosity_base.F90
homme/src/theta-l/share/viscosity_theta.F90
homme/src/theta-l/share/eos.F90
eam/src/physics/cam/uwshcu.F90)

if (NOT DEBUG)
foreach(ITEM IN LISTS PERFOBJS)
e3sm_add_flags("${ITEM}" "-O2")
endforeach()
endif()
18 changes: 18 additions & 0 deletions cime_config/machines/cmake_macros/amdclang_muller-cpu.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
if (COMP_NAME STREQUAL gptl)
string(APPEND CPPDEFS " -DHAVE_NANOTIME -DBIT64 -DHAVE_SLASHPROC -DHAVE_GETTIMEOFDAY")
endif()
string(APPEND CMAKE_C_FLAGS_RELEASE " -O2 -g")
string(APPEND CMAKE_CXX_FLAGS_RELEASE " -O2 -g")
string(APPEND CMAKE_Fortran_FLAGS_RELEASE " -O2 -g")
#string(APPEND FFLAGS " -march=znver3")
set(SCC "clang")
set(SCXX "clang++")
set(SFC "flang")

string(APPEND CMAKE_Fortran_FLAGS " -Mflushz ")
string(APPEND CMAKE_Fortran_FORMAT_FIXED_FLAG " -Mfixed")
string(APPEND CMAKE_Fortran_FORMAT_FREE_FLAG " -Mfreeform")
if (compile_threaded)
string(APPEND CMAKE_Fortran_FLAGS " -mp")
string(APPEND CMAKE_EXE_LINKER_FLAGS " -mp")
endif()
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ string(APPEND CONFIG_ARGS " --host=cray")
if (COMP_NAME STREQUAL gptl)
string(APPEND CPPDEFS " -DHAVE_NANOTIME -DBIT64 -DHAVE_SLASHPROC -DHAVE_GETTIMEOFDAY")
endif()
set(E3SM_LINK_WITH_FORTRAN "ON")
string(APPEND CMAKE_C_FLAGS_RELEASE " -O2 -g")
string(APPEND CMAKE_Fortran_FLAGS_RELEASE " -O2 -g")
set(MPICC "cc")
Expand Down
12 changes: 12 additions & 0 deletions cime_config/machines/cmake_macros/gnu_muller-gpu.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
string(APPEND CONFIG_ARGS " --host=cray")
if (COMP_NAME STREQUAL gptl)
string(APPEND CPPDEFS " -DHAVE_NANOTIME -DBIT64 -DHAVE_SLASHPROC -DHAVE_GETTIMEOFDAY")
endif()
string(APPEND CMAKE_C_FLAGS_RELEASE " -O2 -g")
string(APPEND CMAKE_Fortran_FLAGS_RELEASE " -O2 -g")
set(MPICC "cc")
set(MPICXX "CC")
set(MPIFC "ftn")
set(SCC "gcc")
set(SCXX "g++")
set(SFC "gfortran")
32 changes: 32 additions & 0 deletions cime_config/machines/cmake_macros/intel_muller-cpu.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
string(APPEND CONFIG_ARGS " --host=cray")
if (COMP_NAME STREQUAL gptl)
string(APPEND CPPDEFS " -DHAVE_NANOTIME -DBIT64 -DHAVE_SLASHPROC -DHAVE_GETTIMEOFDAY")
endif()

set(MPICC "cc")
set(MPICXX "CC")
set(MPIFC "ftn")
set(SCC "icx")
set(SCXX "icpx")
set(SFC "ifx")

# Bit of a hack here. For whatever reason, the intel version on pm-cpu (both intel and intel-oneapi, and both icpc/icpx)
# does not seem to have the -fp-model=source flag (docs still show it). And I was unable to find a reliable way of testing
# on the compiler ID or version, so for now, simply manually adjust the CXXFLAG setting for pm-cpu/intel
# Try to manually remove -fp-model=source (and replace with -fp-model=precise) from CXXFLAGS
#message(STATUS "ndk CXXFLAGS=${CXXFLAGS}")
set(CMAKE_CXX_FLAGS " ") # hardcode it here to blank, then try to do same things as in intel.cmake
if (compile_threaded)
string(APPEND CMAKE_CXX_FLAGS " -qopenmp")
endif()
string(APPEND CMAKE_CXX_FLAGS_DEBUG " -O0 -g")
string(APPEND CMAKE_CXX_FLAGS_RELEASE " -O2")
string(APPEND CMAKE_CXX_FLAGS " -fp-model=precise") # and manually add precise
#message(STATUS "ndk CXXFLAGS=${CXXFLAGS}")

string(APPEND CMAKE_Fortran_FLAGS " -fp-model=consistent -fimf-use-svml")
# string(APPEND FFLAGS " -qno-opt-dynamic-align")
string(APPEND CMAKE_Fortran_FLAGS_RELEASE " -g -traceback")
string(APPEND CMAKE_CXX_FLAGS_RELEASE " -g -traceback")
string(APPEND CMAKE_Fortran_FLAGS " -DHAVE_ERF_INTRINSICS")
string(APPEND CMAKE_CXX_FLAGS " -fp-model=consistent")
16 changes: 16 additions & 0 deletions cime_config/machines/cmake_macros/nvidia_muller-cpu.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
string(APPEND CONFIG_ARGS " --host=cray")
if (COMP_NAME STREQUAL gptl)
string(APPEND CPPDEFS " -DHAVE_NANOTIME -DBIT64 -DHAVE_SLASHPROC -DHAVE_GETTIMEOFDAY")
endif()
string(APPEND CMAKE_C_FLAGS_RELEASE " -O2")
string(APPEND CMAKE_CXX_FLAGS_RELEASE " -O2")
string(APPEND CMAKE_Fortran_FLAGS_RELEASE " -g")
if (compile_threaded)
string(APPEND KOKKOS_OPTIONS " -DKokkos_ENABLE_OPENMP=Off") # work-around for nvidia as kokkos is not passing "-mp" for threaded build
endif()
set(MPICC "cc")
set(MPICXX "CC")
set(MPIFC "ftn")
set(SCC "cc")
set(SCXX "CC")
set(SFC "ftn")
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
string(APPEND CONFIG_ARGS " --host=cray")
set(USE_CUDA "TRUE")
string(APPEND CPPDEFS " -DGPU")
string(APPEND CPPDEFS " -DGPU -DMPAS_OPENACC")
if (COMP_NAME STREQUAL gptl)
string(APPEND CPPDEFS " -DHAVE_NANOTIME -DBIT64 -DHAVE_SLASHPROC -DHAVE_GETTIMEOFDAY")
endif()
string(APPEND CPPDEFS " -DTHRUST_IGNORE_CUB_VERSION_CHECK")
string(APPEND CMAKE_CUDA_FLAGS " -ccbin CC -O2 -arch sm_80 --use_fast_math")
string(APPEND CMAKE_EXE_LINKER_FLAGS " -acc -gpu=cc70,cc60 -Minfo=accel")
set(SCC "cc")
set(SCXX "CC")
set(SFC "ftn")
string(APPEND CMAKE_Fortran_FLAGS " -acc -gpu=cc70,cc60 -Minfo=accel")
22 changes: 17 additions & 5 deletions cime_config/machines/config_batch.xml
Original file line number Diff line number Diff line change
Expand Up @@ -458,19 +458,19 @@
</queues>
</batch_system>

<batch_system MACH="pm-cpu" type="nersc_slurm">
<batch_system MACH="muller-cpu" type="nersc_slurm">
<directives>
<directive> --constraint=cpu</directive>
</directives>
<queues>
<!-- Note: walltime is not the max walltime, but the default - see NERSC docs for Q limits, https://docs.nersc.gov/jobs/policy/ -->
<queue walltimemax="00:30:00" nodemax="4096" default="true">regular</queue>
<queue walltimemax="00:30:00" nodemax="4096" strict="true">preempt</queue>
<queue walltimemax="00:30:00" nodemax="16" default="true">regular</queue>
<queue walltimemax="00:30:00" nodemax="16" strict="true">preempt</queue>
<queue walltimemax="00:30:00" nodemax="8" strict="true">debug</queue>
</queues>
</batch_system>

<batch_system MACH="muller" type="nersc_slurm">
<batch_system MACH="muller-gpu" type="nersc_slurm">
<directives>
<directive> --constraint=gpu</directive>
</directives>
Expand All @@ -496,10 +496,22 @@
<queues>
<queue walltimemax="00:45:00" nodemax="64" default="true">regular</queue>
<queue walltimemax="00:45:00" nodemax="64" strict="true">preempt</queue>
<queue walltimemax="00:15:00" nodemax="8" strict="true">debug</queue>
<queue walltimemax="00:15:00" nodemax="4" strict="true">debug</queue>
</queues>
</batch_system>

<batch_system MACH="pm-cpu" type="nersc_slurm">
<directives>
<directive> --constraint=cpu</directive>
</directives>
<queues>
<!-- Note: walltime is not the max walltime, but the default - see NERSC docs for Q limits, https://docs.nersc.gov/jobs/policy/ -->
<queue walltimemax="00:30:00" nodemax="4096" default="true">regular</queue>
<queue walltimemax="00:30:00" nodemax="4096" strict="true">preempt</queue>
<queue walltimemax="00:30:00" nodemax="8" strict="true">debug</queue>
</queues>
</batch_system>

<batch_system MACH="alvarez" type="nersc_slurm">
<directives>
<directive> --constraint=cpu</directive>
Expand Down
Loading