Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update Frontier machine and compiler configuration to fix issues with crayclanggpu and amdclanggpu #6771

Merged
merged 1 commit into from
Nov 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions cime_config/machines/Depends.crayclanggpu.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ list(APPEND NOOPT_FILES
elm/src/data_types/VegetationDataType.F90
elm/src/biogeochem/CNNitrogenFluxType.F90
elm/src/biogeochem/CNCarbonFluxType.F90
mosart/src/wrm/WRM_subw_IO_mod.F90
mosart/src/riverroute/RtmMod.F90
)

# Files added below to mitigate excessive compilation times
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
set(MPICC "cc")
set(MPICXX "mpicxx")
#set(MPICXX "CC")
set(MPIFC "ftn")
set(SCC "cc")
set(SCXX "hipcc")
Expand Down Expand Up @@ -34,7 +33,7 @@ set(HAS_F2008_CONTIGUOUS "TRUE")
# -Wl,--allow-shlib-undefined was added to address rocm 5.4.3 Fortran linker issue:
# /opt/rocm-5.4.3/lib/libhsa-runtime64.so.1: undefined reference to `std::condition_variable::wait(std::unique_lock<std::mutex>&)@GLIBCXX_3.4.30'
# AMD started building with GCC 12.2.0, which brings in a GLIBCXX symbol that isn't in CCE's default GCC toolchain.
#string(APPEND CMAKE_EXE_LINKER_FLAGS " -Wl,--allow-multiple-definition -Wl,--allow-shlib-undefined")
string(APPEND CMAKE_EXE_LINKER_FLAGS " -Wl,--allow-shlib-undefined -Wl,--allow-multiple-definition")

# Switching to O3 for performance benchmarking
# Will revisit any failing tests
Expand Down
16 changes: 11 additions & 5 deletions cime_config/machines/config_machines.xml
Original file line number Diff line number Diff line change
Expand Up @@ -1069,7 +1069,7 @@
<cmd_path lang="python">/usr/share/lmod/lmod/libexec/lmod python</cmd_path>
<modules compiler="crayclang.*">
<command name="reset"></command>
<command name="switch">Core Core/24.07</command>
<command name="switch">Core Core/24.00</command>
<command name="switch">PrgEnv-cray PrgEnv-cray/8.3.3</command>
<command name="switch">cce cce/15.0.1</command>
<!-- craype module to address tcmalloc runtime errors at startup -->
Expand All @@ -1082,16 +1082,22 @@
</modules>
<modules compiler="amdclang.*">
<command name="reset"></command>
<command name="switch">Core Core/24.07</command>
<command name="switch">Core Core/24.00</command>
<command name="switch">PrgEnv-cray PrgEnv-amd/8.3.3</command>
<command name="switch">amd amd/5.4.0</command>
</modules>

<!-- Removed to resolve the issue: https://github.com/E3SM-Project/E3SM/issues/6755
May or may not need to be restored to support OpenMP Offload or OpenACC
-->
<!--
<modules compiler="amdclanggpu">
<command name="load">craype-accel-amd-gfx90a</command>
</modules>
-->
<modules compiler="gnu.*">
<command name="reset"></command>
<command name="switch">Core Core/24.07</command>
<command name="switch">Core Core/24.00</command>
<command name="switch">PrgEnv-cray PrgEnv-gnu/8.3.3</command>
<command name="switch">gcc gcc/12.2.0</command>
</modules>
Expand All @@ -1100,9 +1106,9 @@
<command name="load">rocm/5.4.0</command>
</modules>
<modules>
<command name="load">cray-python/3.11.5</command>
<command name="load">cray-python/3.9.13.1</command>
<command name="load">cray-libsci</command>
<command name="load">cmake/3.27.9</command>
<command name="load">cmake/3.21.3</command>
<command name="load">subversion</command>
<command name="load">git</command>
<command name="load">zlib</command>
Expand Down