From 7e5dade02e6b64c54c788ff79ac55f2fc4a237d5 Mon Sep 17 00:00:00 2001 From: Luca Bertagna Date: Wed, 15 May 2024 20:24:25 -0600 Subject: [PATCH 1/3] Point to ekat/master rather than a branch --- externals/ekat | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/externals/ekat b/externals/ekat index 44977d9eab51..fb4babcf3a24 160000 --- a/externals/ekat +++ b/externals/ekat @@ -1 +1 @@ -Subproject commit 44977d9eab51b812952b6bac7dfcb30aafdf7cb5 +Subproject commit fb4babcf3a24c14e9e94d71df152a7f097805c14 From 7d5fc96bebf0950590793285963f97ea28569100 Mon Sep 17 00:00:00 2001 From: Luca Bertagna Date: Wed, 15 May 2024 20:26:32 -0600 Subject: [PATCH 2/3] HOMME: fix usage of deprecated code from Kokkos --- components/homme/src/share/compose/cedr_kokkos.hpp | 2 +- components/homme/src/share/cxx/ExecSpaceDefs.hpp | 5 +---- components/homme/src/share/cxx/kokkos_utils.hpp | 2 +- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/components/homme/src/share/compose/cedr_kokkos.hpp b/components/homme/src/share/compose/cedr_kokkos.hpp index 891d9f763ecb..42e423e2913a 100644 --- a/components/homme/src/share/compose/cedr_kokkos.hpp +++ b/components/homme/src/share/compose/cedr_kokkos.hpp @@ -123,7 +123,7 @@ struct ExeSpaceUtils { #ifdef COMPOSE_MIMIC_GPU const int max_threads = #ifdef KOKKOS_ENABLE_OPENMP - ExeSpace::concurrency() + ExeSpace().concurrency() #else 1 #endif diff --git a/components/homme/src/share/cxx/ExecSpaceDefs.hpp b/components/homme/src/share/cxx/ExecSpaceDefs.hpp index fbcd314cb2b9..42be0b6f71e6 100644 --- a/components/homme/src/share/cxx/ExecSpaceDefs.hpp +++ b/components/homme/src/share/cxx/ExecSpaceDefs.hpp @@ -179,7 +179,7 @@ static typename std::enable_if::value,int>::type get_num_concurrent_teams (const Kokkos::TeamPolicy& policy) { const int team_size = policy.team_size(); - const int concurrency = ExecSpaceType::concurrency(); + const int concurrency = ExecSpaceType().concurrency(); return (concurrency + team_size - 1) / team_size; } @@ -187,9 +187,6 @@ template static typename std::enable_if::value,int>::type get_num_concurrent_teams (const Kokkos::TeamPolicy& policy) { - // const int team_size = policy.team_size() * policy.vector_length(); - // const int concurrency = ExecSpaceType::concurrency(); - // return (concurrency + team_size - 1) / team_size; return policy.league_size(); } diff --git a/components/homme/src/share/cxx/kokkos_utils.hpp b/components/homme/src/share/cxx/kokkos_utils.hpp index 85e2ca31dc52..fa4f7a231a3b 100644 --- a/components/homme/src/share/cxx/kokkos_utils.hpp +++ b/components/homme/src/share/cxx/kokkos_utils.hpp @@ -35,7 +35,7 @@ class _TeamUtilsCommonBase template _TeamUtilsCommonBase(const TeamPolicy& policy) { - _max_threads = ExeSpace::concurrency() / ( OnGpu::value ? 2 : 1); + _max_threads = ExeSpace().concurrency() / ( OnGpu::value ? 2 : 1); const int team_size = policy.team_size(); _num_teams = _max_threads / team_size; _team_size = _max_threads / _num_teams; From cdf7ef1786ebd1107b962b28cd609fb9d688e723 Mon Sep 17 00:00:00 2001 From: Luca Bertagna Date: Thu, 16 May 2024 11:47:12 -0600 Subject: [PATCH 3/3] Add kokkos config option on pm-gpu do disable async cuda malloc --- cime_config/machines/cmake_macros/gnugpu_pm-gpu.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cime_config/machines/cmake_macros/gnugpu_pm-gpu.cmake b/cime_config/machines/cmake_macros/gnugpu_pm-gpu.cmake index fd52a2046502..17c8066083bb 100644 --- a/cime_config/machines/cmake_macros/gnugpu_pm-gpu.cmake +++ b/cime_config/machines/cmake_macros/gnugpu_pm-gpu.cmake @@ -6,7 +6,7 @@ if (COMP_NAME STREQUAL gptl) endif() string(APPEND CPPDEFS " -DTHRUST_IGNORE_CUB_VERSION_CHECK") string(APPEND CMAKE_CUDA_FLAGS " -ccbin CC -O2 -arch sm_80 --use_fast_math") -string(APPEND KOKKOS_OPTIONS " -DKokkos_ARCH_AMPERE80=On -DKokkos_ENABLE_CUDA=On -DKokkos_ENABLE_CUDA_LAMBDA=On -DKokkos_ENABLE_SERIAL=ON -DKokkos_ENABLE_OPENMP=Off") +string(APPEND KOKKOS_OPTIONS " -DKokkos_ARCH_AMPERE80=On -DKokkos_ENABLE_CUDA=On -DKokkos_ENABLE_CUDA_LAMBDA=On -DKokkos_ENABLE_SERIAL=ON -DKokkos_ENABLE_OPENMP=Off -DKokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC=Off") set(CMAKE_CUDA_ARCHITECTURES "80") string(APPEND CMAKE_C_FLAGS_RELEASE " -O2") string(APPEND CMAKE_Fortran_FLAGS_RELEASE " -O2")