From d4282db45d84d51ba422b7e30d6df122c2cd0bb1 Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Mon, 6 Nov 2023 22:26:36 +0000 Subject: [PATCH 01/85] use branch with spot kokkos commit --- externals/ekat | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/externals/ekat b/externals/ekat index 2ff5853316e..a468d04e442 160000 --- a/externals/ekat +++ b/externals/ekat @@ -1 +1 @@ -Subproject commit 2ff5853316e15d4e8004c21890329fd257fa7459 +Subproject commit a468d04e442a3a7fa170563afa9a103c61170b10 From 2f256a2225317976d477eda66e273231795268be Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Mon, 6 Nov 2023 22:48:59 +0000 Subject: [PATCH 02/85] hardcopying homme sycl branch --- components/homme/CMakeLists.txt | 18 +- components/homme/cmake/HommeMacros.cmake | 10 +- components/homme/src/prim_main.F90 | 7 +- .../homme/src/share/compose/compose.hpp | 2 +- components/homme/src/share/cxx/Config.hpp | 2 +- components/homme/src/share/cxx/ErrorDefs.cpp | 2 + .../src/share/cxx/EulerStepFunctorImpl.hpp | 5 + .../homme/src/share/cxx/ExecSpaceDefs.cpp | 17 ++ .../homme/src/share/cxx/ExecSpaceDefs.hpp | 4 + .../homme/src/share/cxx/Hommexx_Session.cpp | 11 + .../homme/src/share/cxx/SphereOperators.hpp | 215 ++++++++++++++++++ .../src/share/cxx/utilities/BfbUtils.hpp | 2 +- components/homme/src/share/gllfvremap_mod.F90 | 21 +- .../src/test_src/dcmip2016-supercell.F90 | 36 +-- .../src/theta-l_kokkos/config.h.cmake.in | 2 + .../theta-l_kokkos/cxx/CaarFunctorImpl.hpp | 35 ++- .../theta-l_kokkos/cxx/DirkFunctorImpl.hpp | 2 +- .../src/theta-l_kokkos/cxx/LimiterFunctor.hpp | 4 +- .../theta-l_kokkos/cxx/RemapStateProvider.hpp | 85 ++++++- 19 files changed, 427 insertions(+), 53 deletions(-) diff --git a/components/homme/CMakeLists.txt b/components/homme/CMakeLists.txt index 4486ed47e24..4f3c335f25c 100644 --- a/components/homme/CMakeLists.txt +++ b/components/homme/CMakeLists.txt @@ -206,7 +206,10 @@ IF (HOMME_USE_KOKKOS) STRING (TOUPPER ${HOMMEXX_EXEC_SPACE} HOMMEXX_EXEC_SPACE_UPPER) - IF (HOMMEXX_EXEC_SPACE_UPPER STREQUAL "HIP") + #not user afaik + IF (${HOMMEXX_EXEC_SPACE_UPPER} STREQUAL "SYCL") + SET (HOMMEXX_SYCL_SPACE ON) + ELSEIF (${HOMMEXX_EXEC_SPACE_UPPER} STREQUAL "HIP") SET (HOMMEXX_HIP_SPACE ON) ELSEIF (HOMMEXX_EXEC_SPACE_UPPER STREQUAL "CUDA") SET (HOMMEXX_CUDA_SPACE ON) @@ -302,15 +305,18 @@ SET (HOMMEXX_ENABLE_GPU FALSE) IF (HOMME_USE_KOKKOS) - IF (CUDA_BUILD OR HIP_BUILD) + IF (CUDA_BUILD OR HIP_BUILD OR SYCL_BUILD) SET (DEFAULT_VECTOR_SIZE 1) SET (HOMMEXX_ENABLE_GPU TRUE) + + message("OG Set HOMMEXX_ENABLE_GPU to ${HOMMEXX_ENABLE_GPU}") + ELSE () SET (DEFAULT_VECTOR_SIZE 8) ENDIF() SET (HOMMEXX_VECTOR_SIZE ${DEFAULT_VECTOR_SIZE} CACHE STRING - "If AVX or Cuda or HIP don't take priority, use this software vector size.") + "If AVX or Cuda or HIP or SYCL don't take priority, use this software vector size.") IF (CMAKE_BUILD_TYPE_UPPER MATCHES "DEBUG" OR CMAKE_BUILD_TYPE_UPPER MATCHES "RELWITHDEBINFO") SET (HOMMEXX_DEBUG ON) @@ -447,6 +453,7 @@ ENDIF () # If we don't need kokkos we don't need EKAT, and if # Homme is built in EAMxx EKAT is already built +if("${E3SM_KOKKOS_PATH}" STREQUAL "") IF (HOMME_USE_KOKKOS AND HOMME_STANDALONE) # Add ekat's cmake/pkg_build folder to cmake path set (EKAT_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../../externals/ekat) @@ -458,6 +465,11 @@ IF (HOMME_USE_KOKKOS AND HOMME_STANDALONE) include (EkatBuildKokkos) BuildKokkos() ENDIF () +ELSE () + IF (${HOMME_USE_KOKKOS}) + INCLUDE(Kokkos) + ENDIF () +ENDIF () # This folder contains the CMake macro used to build cxx unit tests # Add unit tests for C++ code diff --git a/components/homme/cmake/HommeMacros.cmake b/components/homme/cmake/HommeMacros.cmake index 8595988bf23..4a42326b9d5 100644 --- a/components/homme/cmake/HommeMacros.cmake +++ b/components/homme/cmake/HommeMacros.cmake @@ -112,7 +112,7 @@ macro(createTestExec execName execType macroNP macroNC ADD_DEFINITIONS(-DHAVE_CONFIG_H) ADD_EXECUTABLE(${execName} ${EXEC_SOURCES}) - SET_TARGET_PROPERTIES(${execName} PROPERTIES LINKER_LANGUAGE Fortran) + SET_TARGET_PROPERTIES(${execName} PROPERTIES LINKER_LANGUAGE CXX) IF(BUILD_HOMME_WITHOUT_PIOLIBRARY) TARGET_COMPILE_DEFINITIONS(${execName} PUBLIC HOMME_WITHOUT_PIOLIBRARY) ENDIF() @@ -156,7 +156,11 @@ macro(createTestExec execName execType macroNP macroNC ENDIF () IF (HOMME_USE_KOKKOS) + if("${E3SM_KOKKOS_PATH}" STREQUAL "") target_link_libraries(${execName} kokkos) + else() + link_to_kokkos(${execName}) + endif() ENDIF () # Move the module files out of the way so the parallel build @@ -169,8 +173,8 @@ macro(createTestExec execName execType macroNP macroNC TARGET_LINK_LIBRARIES(${execName} -mkl) ELSE() IF (NOT HOMME_FIND_BLASLAPACK) - TARGET_LINK_LIBRARIES(${execName} lapack blas) - ADD_DEPENDENCIES(${execName} blas lapack) + #TARGET_LINK_LIBRARIES(${execName} lapack blas) + #ADD_DEPENDENCIES(${execName} blas lapack) ENDIF() ENDIF() diff --git a/components/homme/src/prim_main.F90 b/components/homme/src/prim_main.F90 index bfbe57e8b31..1d7f48e95a1 100644 --- a/components/homme/src/prim_main.F90 +++ b/components/homme/src/prim_main.F90 @@ -20,7 +20,7 @@ program prim_main use element_mod, only: element_t use common_io_mod, only: output_dir, infilenames use common_movie_mod, only: nextoutputstep - use perf_mod, only: t_initf, t_prf, t_finalizef, t_startf, t_stopf ! _EXTERNAL + use perf_mod, only: t_initf, t_prf, t_finalizef, t_startf, t_stopf,t_disablef, t_enablef ! _EXTERNAL use restart_io_mod , only: restartheader_t, writerestart use hybrid_mod, only: hybrid_create #if (defined MODEL_THETA_L && defined ARKODE) @@ -240,6 +240,11 @@ end subroutine finalize_kokkos_f90 nstep = nextoutputstep(tl) do while(tl%nstep= 2) call t_enablef() call t_startf('prim_run') call prim_run_subcycle(elem, hybrid,nets,nete, tstep, .false., tl, hvcoord,1) call t_stopf('prim_run') diff --git a/components/homme/src/share/compose/compose.hpp b/components/homme/src/share/compose/compose.hpp index 01be2635fcf..cd65102610e 100644 --- a/components/homme/src/share/compose/compose.hpp +++ b/components/homme/src/share/compose/compose.hpp @@ -23,7 +23,7 @@ typedef Kokkos::Experimental::HIPSpace ComposeGpuSpace; # endif # if defined KOKKOS_ENABLE_SYCL typedef Kokkos::Experimental::SYCL ComposeGpuExeSpace; -typedef Kokkos::Experimental::SYCL> ComposeGpuSpace; +typedef Kokkos::Experimental::SYCL ComposeGpuSpace; # endif #endif diff --git a/components/homme/src/share/cxx/Config.hpp b/components/homme/src/share/cxx/Config.hpp index 684f9143bea..b204b1dbd04 100644 --- a/components/homme/src/share/cxx/Config.hpp +++ b/components/homme/src/share/cxx/Config.hpp @@ -21,7 +21,7 @@ # endif #endif -#if ! defined HOMMEXX_CUDA_SPACE && ! defined HOMMEXX_OPENMP_SPACE && ! defined HOMMEXX_THREADS_SPACE && ! defined HOMMEXX_SERIAL_SPACE && ! defined HOMMEXX_HIP_SPACE +#if ! defined HOMMEXX_CUDA_SPACE && ! defined HOMMEXX_OPENMP_SPACE && ! defined HOMMEXX_THREADS_SPACE && ! defined HOMMEXX_SERIAL_SPACE && ! defined HOMMEXX_HIP_SPACE && ! defined HOMMEXX_SYCL_SPACE # define HOMMEXX_DEFAULT_SPACE #endif diff --git a/components/homme/src/share/cxx/ErrorDefs.cpp b/components/homme/src/share/cxx/ErrorDefs.cpp index ccb4631100d..a6eabfa1cf7 100644 --- a/components/homme/src/share/cxx/ErrorDefs.cpp +++ b/components/homme/src/share/cxx/ErrorDefs.cpp @@ -45,7 +45,9 @@ void runtime_abort(const std::string& message, int code) { } else { std::cerr << message << std::endl << "Exiting..." << std::endl; finalize_hommexx_session(); +#ifndef TESTER_NOMPI MPI_Abort(MPI_COMM_WORLD, code); +#endif } } diff --git a/components/homme/src/share/cxx/EulerStepFunctorImpl.hpp b/components/homme/src/share/cxx/EulerStepFunctorImpl.hpp index f3029764dac..f87bb108beb 100644 --- a/components/homme/src/share/cxx/EulerStepFunctorImpl.hpp +++ b/components/homme/src/share/cxx/EulerStepFunctorImpl.hpp @@ -652,7 +652,10 @@ class EulerStepFunctorImpl { minmax_and_biharmonic(); } } + + GPTLstart("tl-at adv-n-limit"); advect_and_limit(); + GPTLstop("tl-at adv-n-limit"); exchange_qdp_dss_var(); } @@ -667,6 +670,7 @@ class EulerStepFunctorImpl { void run_tracer_phase (const KernelVariables& kv) const { compute_qtens(kv); kv.team_barrier(); + if (m_data.limiter_option == 8) { limiter_optim_iter_full(kv); kv.team_barrier(); @@ -674,6 +678,7 @@ class EulerStepFunctorImpl { limiter_clip_and_sum(kv); kv.team_barrier(); } + apply_spheremp(kv); } diff --git a/components/homme/src/share/cxx/ExecSpaceDefs.cpp b/components/homme/src/share/cxx/ExecSpaceDefs.cpp index 784d37b65d2..2ec0ebb6fe0 100644 --- a/components/homme/src/share/cxx/ExecSpaceDefs.cpp +++ b/components/homme/src/share/cxx/ExecSpaceDefs.cpp @@ -21,6 +21,10 @@ #include #endif +#ifdef KOKKOS_ENABLE_SYCL +#include +#endif + namespace Homme { // Since we're initializing from inside a Fortran code and don't have access to @@ -52,7 +56,16 @@ void initialize_kokkos () { // It isn't a big deal if we can't get the device count. nd = 1; } +#elif defined(KOKKOS_ENABLE_SYCL) + +//https://developer.codeplay.com/products/computecpp/ce/2.11.0/guides/sycl-for-cuda-developers/migrating-from-cuda-to-sycl + +//to make it build + int nd = 1; + #endif + + #ifdef HOMMEXX_ENABLE_GPU std::stringstream ss; ss << "--kokkos-num-devices=" << nd; @@ -117,6 +130,7 @@ team_num_threads_vectors_for_gpu ( assert(num_warps_total >= max_num_warps); assert(tp.max_threads_usable >= 1 && tp.max_vectors_usable >= 1); +#ifndef KOKKOS_ENABLE_SYCL int num_warps; if (tp.prefer_larger_team) { const int num_warps_usable = @@ -161,6 +175,9 @@ team_num_threads_vectors_for_gpu ( return std::make_pair( num_device_threads / num_vectors, num_vectors ); } +#else + return std::make_pair(4,16); +#endif } } // namespace Parallel diff --git a/components/homme/src/share/cxx/ExecSpaceDefs.hpp b/components/homme/src/share/cxx/ExecSpaceDefs.hpp index 8c18d8bcbb9..d799af38783 100644 --- a/components/homme/src/share/cxx/ExecSpaceDefs.hpp +++ b/components/homme/src/share/cxx/ExecSpaceDefs.hpp @@ -31,6 +31,10 @@ using HommexxGPU = Kokkos::Cuda; using HommexxGPU = Kokkos::Experimental::HIP; #endif +#ifdef KOKKOS_ENABLE_SYCL +using HommexxGPU = Kokkos::Experimental::SYCL; +#endif + #else using HommexxGPU = void; #endif diff --git a/components/homme/src/share/cxx/Hommexx_Session.cpp b/components/homme/src/share/cxx/Hommexx_Session.cpp index c93174d2442..db50ec27d6c 100644 --- a/components/homme/src/share/cxx/Hommexx_Session.cpp +++ b/components/homme/src/share/cxx/Hommexx_Session.cpp @@ -7,8 +7,12 @@ #include "Config.hpp" #include "Hommexx_Session.hpp" #include "ExecSpaceDefs.hpp" +#include "Types.hpp" + +#ifndef TESTER_NOMPI #include "profiling.hpp" #include "mpi/Comm.hpp" +#endif #include "Context.hpp" @@ -75,7 +79,10 @@ void initialize_hommexx_session () // If hommexx session is not currently inited, then init it. if (!Session::m_inited) { /* Make certain profiling is only done for code we're working on */ + +#ifndef TESTER_NOMPI profiling_pause(); +#endif /* Set Environment variables to control how many * threads/processors Kokkos uses */ @@ -83,12 +90,16 @@ void initialize_hommexx_session () initialize_kokkos(); } +#ifndef TESTER_NOMPI // Note: at this point, the Comm *should* already be created. const auto& comm = Context::singleton().get(); if (comm.root()) { ExecSpace().print_configuration(std::cout, true); print_homme_config_settings (); } +#else + ExecSpace().print_configuration(std::cout, true); +#endif Session::m_inited = true; } diff --git a/components/homme/src/share/cxx/SphereOperators.hpp b/components/homme/src/share/cxx/SphereOperators.hpp index c227d97ea70..e8571c57f3b 100644 --- a/components/homme/src/share/cxx/SphereOperators.hpp +++ b/components/homme/src/share/cxx/SphereOperators.hpp @@ -244,6 +244,8 @@ class SphereOperators kv.team_barrier(); } + + KOKKOS_INLINE_FUNCTION void divergence_sphere_wk_sl (const KernelVariables &kv, const ExecViewUnmanaged& v, @@ -296,6 +298,102 @@ class SphereOperators } // end of divergence_sphere_wk_sl + + + + +#if 0 + KOKKOS_INLINE_FUNCTION void + divergence_sphere_wk_sl (const KernelVariables &kv, + const ExecViewUnmanaged& v, + const ExecViewUnmanaged< Real [NP][NP]>& div_v) const + { + // Make sure the buffers have been created + assert (vector_buf_sl.size()>0); + + const auto& D_inv = Homme::subview(m_dinv,kv.ie); + const auto& spheremp = Homme::subview(m_spheremp,kv.ie); + const auto& gv_buf = Homme::subview(vector_buf_sl,kv.team_idx,0); + + // copied from strong divergence as is but without metdet + // conversion to contravariant + + double * ggv = &gv_buf(0,0,0); + + const int s1 = &v(1,0,0)-&v(0,0,0); + const int s2 = &v(0,1,0)-&v(0,0,0); + const int s3 = &v(0,0,1)-&v(0,0,0); + + //not sure we can reuse strides above, so using new ones + const int d1 = &D_inv(1,0,0,0)-&D_inv(0,0,0,0); + const int d2 = &D_inv(0,1,0,0)-&D_inv(0,0,0,0); + const int d3 = &D_inv(0,0,1,0)-&D_inv(0,0,0,0); + const int d4 = &D_inv(0,0,0,1)-&D_inv(0,0,0,0); + + constexpr int np_squared = NP * NP; + Kokkos::parallel_for(Kokkos::TeamThreadRange(kv.team, np_squared), + [&](const int loop_idx) { + const int igp = loop_idx / NP; + const int jgp = loop_idx % NP; + + int linind1 = s1 * 0 + s2 * igp + s3 * jgp; + const auto& vv0 = (&v(0,0,0) + linind1); + int linind2 = s1 * 1 + s2 * igp + s3 * jgp; + const auto& vv1 = (&v(0,0,0) + linind2); + + int linind3 = d1 * 0 + d2 * 0 + d3 * igp + d4 * jgp; + int linind4 = d1 * 1 + d2 * 0 + d3 * igp + d4 * jgp; + *(&gv_buf(0,0,0)+linind1) = *(&D_inv(0,0,0,0)+linind3) * (*vv0) + *(&D_inv(0,0,0,0)+linind4) * (*vv1); + + linind3 = d1 * 0 + d2 * 1 + d3 * igp + d4 * jgp; + linind4 = d1 * 1 + d2 * 1 + d3 * igp + d4 * jgp; + *(&gv_buf(0,0,0)+linind2) = *(&D_inv(0,0,0,0)+linind3) * (*vv0) + *(&D_inv(0,0,0,0)+linind4) * (*vv1); + + }); + kv.team_barrier(); + + // in strong div + // kgp = i in strong code, jgp=j, igp=l + // in weak div, n is like j in strong div, + // n(weak)=j(strong)=jgp + // m(weak)=l(strong)=igp + // j(weak)=i(strong)=kgp + constexpr int div_iters = NP * NP; + // keeping indices' names as in F + + //gv_buf strides are as before, s1 s2 s3 + //dvv, div_v, and spheremp should have the same strides + const int f1 = &dvv(1,0)-&dvv(0,0); + const int f2 = &dvv(0,1)-&dvv(0,0); + + Kokkos::parallel_for(Kokkos::TeamThreadRange(kv.team, div_iters), + [&](const int loop_idx) { + // Note: for this one time, it is better if m strides faster, due to + // the way the views are accessed. + const int mgp = loop_idx % NP; + const int ngp = loop_idx / NP; + Real dd = 0.0; + for (int jgp = 0; jgp < NP; ++jgp) { + int linind1 = s1 * 0 + s2 * ngp + s3 * jgp; + int linind2 = s1 * 1 + s2 * jgp + s3 * mgp; + + int l1 = f1 * ngp + f2 * jgp; + int l2 = f1 * jgp + f2 * mgp; + int l3 = f1 * jgp + f2 * ngp; + + dd -= ( *(&spheremp(0,0)+l1) * *(&gv_buf(0,0,0)+linind1) * *(&dvv(0,0)+l2) + + *(&spheremp(0,0)+l2) * *(&gv_buf(0,0,0)+linind2) * *(&dvv(0,0)+l3)) * + m_scale_factor_inv; + } + int l1 = f1 * ngp + f2 * mgp; + *(&div_v(0,0)+l1) = dd; + }); + kv.team_barrier(); + + } // end of divergence_sphere_wk_sl +#endif + + // Note that divergence_sphere requires scratch space of 3 x NP x NP Reals // This must be called from the device space KOKKOS_INLINE_FUNCTION void @@ -715,6 +813,116 @@ class SphereOperators vorticity_sphere(kv, v, vort, NUM_LEV_REQUEST); } + + + +#if 0 + + template + KOKKOS_INLINE_FUNCTION void + divergence_sphere_wk (const KernelVariables &kv, + // On input, a field whose divergence is sought; on + // output, the view's data are invalid. + const ExecViewUnmanaged& v, + const ExecViewUnmanaged& div_v, + const int NUM_LEV_REQUEST) const + { + assert(NUM_LEV_REQUEST>=0); + assert(NUM_LEV_REQUEST<=NUM_LEV_IN); + assert(NUM_LEV_REQUEST<=NUM_LEV_OUT); + + // Make sure the buffers have been created + assert (vector_buf_ml.size()>0); + + const auto& D_inv = Homme::subview(m_dinv, kv.ie); + const auto& spheremp = Homme::subview(m_spheremp, kv.ie); + constexpr int np_squared = NP * NP; + + const int s1 = &v(1,0,0,0)[0]-&v(0,0,0,0)[0]; + const int s2 = &v(0,1,0,0)[0]-&v(0,0,0,0)[0]; + const int s3 = &v(0,0,1,0)[0]-&v(0,0,0,0)[0]; + const int s4 = &v(0,0,0,1)[0]-&v(0,0,0,0)[0]; + + const int d1 = &D_inv(1,0,0,0)-&D_inv(0,0,0,0); + const int d2 = &D_inv(0,1,0,0)-&D_inv(0,0,0,0); + const int d3 = &D_inv(0,0,1,0)-&D_inv(0,0,0,0); + const int d4 = &D_inv(0,0,0,1)-&D_inv(0,0,0,0); + + Real * const vv = &v(0,0,0,0)[0]; + const Real * const dd = &D_inv(0,0,0,0); + + Kokkos::parallel_for(Kokkos::TeamThreadRange(kv.team, np_squared), + [&](const int loop_idx) { + const int igp = loop_idx / NP; + const int jgp = loop_idx % NP; + Kokkos::parallel_for(Kokkos::ThreadVectorRange(kv.team, NUM_LEV_REQUEST), [&] (const int& ilev) { + + const int l1 = s1*0 + s2*igp + s3*jgp + s4*ilev; + const int l2 = s1*1 + l1; + const Real v0old = vv[l1]; + const Real v1old = vv[l2]; + + int l3 = d1*0 + d2*0 + d3*igp + d4*jgp; + int l4 = d1*1 + d2*0 + d3*igp + d4*jgp; + + vv[l1] = dd[l3] * v0old + dd[l4] * v1old; + + l3 = d1*0 + d2*1 + d3*igp + d4*jgp; + l4 = d1*1 + d2*1 + d3*igp + d4*jgp; + + vv[l2] = dd[l3] * v0old + dd[l4] * v1old; + + }); + }); + kv.team_barrier(); + + const int f1 = &dvv(1,0)-&dvv(0,0); + const int f2 = &dvv(0,1)-&dvv(0,0); + + const Real * const ss = &spheremp(0,0); + const Real * const ddv = &dvv(0,0); + + const int k1 = &div_v(1,0,0)[0]-&div_v(0,0,0)[0]; + const int k2 = &div_v(0,1,0)[0]-&div_v(0,0,0)[0]; + const int k3 = &div_v(0,0,1)[0]-&div_v(0,0,0)[0]; + + constexpr int div_iters = NP * NP; + Kokkos::parallel_for(Kokkos::TeamThreadRange(kv.team, div_iters), + [&](const int loop_idx) { + // Note: for this one time, it is better if m strides faster, due to + // the way the views are accessed. + const int mgp = loop_idx % NP; + const int ngp = loop_idx / NP; + Kokkos::parallel_for(Kokkos::ThreadVectorRange(kv.team, NUM_LEV_REQUEST), [&] (const int& ilev) { + Real dd = 0.0; + // TODO: move multiplication by scale_factor_inv outside the loop + for (int jgp = 0; jgp < NP; ++jgp) { + // Here, v is the temporary buffer, aliased on the input v. + + const int l1 = s1*0 + s2*ngp + s3*jgp + s4*ilev; + const int l2 = s1*1 + s2*jgp + s3*mgp + s4*ilev; + + const int x1 = f1 * ngp + f2 * jgp; + const int x2 = f1 * jgp + f2 * mgp; + const int x3 = f1 * jgp + f2 * ngp; + + dd -= (ss[x1] * vv[l1] * ddv[x2] + + ss[x2] * vv[l2] * ddv[x3]) * + m_scale_factor_inv; + } + //div_v(ngp, mgp, ilev) = dd; + const int l1 = k1 * ngp + k2 * mgp + k3 * ilev; + *(&div_v(0,0,0)[0]+l1) = dd; + }); + }); + kv.team_barrier(); + + }//end of divergence_sphere_wk + +#else + + + template KOKKOS_INLINE_FUNCTION void divergence_sphere_wk (const KernelVariables &kv, @@ -770,6 +978,13 @@ class SphereOperators }//end of divergence_sphere_wk + +#endif + + + + + template KOKKOS_INLINE_FUNCTION void divergence_sphere_wk (const KernelVariables &kv, diff --git a/components/homme/src/share/cxx/utilities/BfbUtils.hpp b/components/homme/src/share/cxx/utilities/BfbUtils.hpp index e3570874e26..475cd9f2d95 100644 --- a/components/homme/src/share/cxx/utilities/BfbUtils.hpp +++ b/components/homme/src/share/cxx/utilities/BfbUtils.hpp @@ -64,7 +64,7 @@ KOKKOS_INLINE_FUNCTION ScalarType int_pow (ScalarType val, int k) { constexpr int max_shift = 30; if (k<0) { - printf ("k = %d\n",k); + Kokkos::print ("k = %d\n",k); Kokkos::abort("int_pow implemented only for k>=0.\n"); } diff --git a/components/homme/src/share/gllfvremap_mod.F90 b/components/homme/src/share/gllfvremap_mod.F90 index 1e0ee9b8184..13b863c44e4 100644 --- a/components/homme/src/share/gllfvremap_mod.F90 +++ b/components/homme/src/share/gllfvremap_mod.F90 @@ -989,7 +989,7 @@ subroutine gfr_init_R(np, nphys, w_gg, M_gf, R, tau) end do end do end do - call dgeqrf(np*np, nphys*nphys, R, size(R,1), tau, wrk, np*np*nphys*nphys, info) +! call dgeqrf(np*np, nphys*nphys, R, size(R,1), tau, wrk, np*np*nphys*nphys, info) end subroutine gfr_init_R subroutine gfr_init_interp_matrix(npsrc, interp) @@ -1071,12 +1071,13 @@ subroutine gfr_f2g_remapd_op(gfr, R, tau, f, g) ! g = inv(M_sgsg) M_sgf inv(S) M_ff f wrk = reshape(gfr%w_ff(:nf2), (/nf,nf/))*f(:nf,:nf) if (nf == npi) then - call dtrsm('L', 'U', 'T', 'N', nf2, 1, one, R, size(R,1), wrk, nf2) - call dormqr('L', 'N', nf2, 1, nf2, R, size(R,1), tau, wrk, nf2, wr, np2, info) + +! call dtrsm('l', 'u', 't', 'n', nf2, 1, one, R, size(R,1), wrk, nf2) +! call dormqr('l', 'n', nf2, 1, nf2, R, size(R,1), tau, wrk, nf2, wr, np2, info) g(:npi,:npi) = wrk else - call dtrtrs('U', 'T', 'N', nf2, 1, R, size(R,1), wrk, nf2, info) - call dtrtrs('U', 'N', 'N', nf2, 1, R, size(R,1), wrk, nf2, info) +! call dtrtrs('u', 't', 'n', nf2, 1, R, size(R,1), wrk, nf2, info) +! call dtrtrs('u', 'n', 'n', nf2, 1, R, size(R,1), wrk, nf2, info) g(:npi,:npi) = zero do fj = 1,nf do fi = 1,nf @@ -1620,7 +1621,7 @@ subroutine gfr_pg1_init(gfr) n = np*np - call dpotrf('U', n, gfr%pg1sd%Achol, size(gfr%pg1sd%Achol,1), info) +! call dpotrf('u', n, gfr%pg1sd%Achol, size(gfr%pg1sd%Achol,1), info) if (info /= 0) print *, 'gfr ERROR> dpotrf returned', info do i = 1,n @@ -1631,8 +1632,8 @@ subroutine gfr_pg1_init(gfr) gfr%pg1sd%s = reshape(gfr%w_gg(:np,:np), (/np*np/)) ! Form R's = c - call dtrtrs('U', 'T', 'N', n, 1, gfr%pg1sd%Achol, size(gfr%pg1sd%Achol,1), & - gfr%pg1sd%s, np*np, info) +! call dtrtrs('u', 't', 'n', n, 1, gfr%pg1sd%Achol, size(gfr%pg1sd%Achol,1), & +! gfr%pg1sd%s, np*np, info) if (info /= 0) print *, 'gfr ERROR> dtrtrs returned', info gfr%pg1sd%sts = sum(gfr%pg1sd%s*gfr%pg1sd%s) end subroutine gfr_pg1_init @@ -1665,11 +1666,11 @@ subroutine gfr_pg1_solve(gfr, s, g) mass = sum(gfr%w_gg*g) ! Solve R'z = b. - call dtrtrs('U', 'T', 'N', n, 1, s%Achol, size(s%Achol,1), x, np*np, info) +! call dtrtrs('u', 't', 'n', n, 1, s%Achol, size(s%Achol,1), x, np*np, info) ! Assemble z + (d - s'z)/(s's) s. x(:n) = x(:n) + ((mass - sum(s%s(:n)*x(:n)))/s%sts)*s%s(:n) ! Solve R x = z + (d - s'z)/(s's) s. - call dtrtrs('U', 'N', 'N', n, 1, s%Achol, size(s%Achol,1), x, np*np, info) +! call dtrtrs('u', 'n', 'n', n, 1, s%Achol, size(s%Achol,1), x, np*np, info) ! Extract g(I). g = reshape(x(:n), (/np,np/)) diff --git a/components/homme/src/test_src/dcmip2016-supercell.F90 b/components/homme/src/test_src/dcmip2016-supercell.F90 index afd1c2a3914..20489e87564 100644 --- a/components/homme/src/test_src/dcmip2016-supercell.F90 +++ b/components/homme/src/test_src/dcmip2016-supercell.F90 @@ -203,10 +203,10 @@ SUBROUTINE supercell_init() & lwork = 5*nphi ddphibak = ddphi - call DGESVD('A', 'A', & - nphi, nphi, ddphibak, nphi, & - svdps, svdpu, nphi, svdpvt, nphi, & - pwork, lwork, info) +! call DGESVD('A', 'A', & +! nphi, nphi, ddphibak, nphi, & +! svdps, svdpu, nphi, svdpvt, nphi, & +! pwork, lwork, info) if (info .ne. 0) then write(*,*) 'Unable to compute SVD of d/dphi matrix' @@ -215,23 +215,23 @@ SUBROUTINE supercell_init() & do i = 1, nphi if (abs(svdps(i)) .le. 1.0d-12) then - call DSCAL(nphi, 0.0d0, svdpu(1,i), 1) +! call DSCAL(nphi, 0.0d0, svdpu(1,i), 1) else - call DSCAL(nphi, 1.0d0 / svdps(i), svdpu(1,i), 1) +! call DSCAL(nphi, 1.0d0 / svdps(i), svdpu(1,i), 1) end if end do - call DGEMM('T', 'T', & - nphi, nphi, nphi, 1.0d0, svdpvt, nphi, svdpu, nphi, 0.0d0, & - intphi, nphi) +! call DGEMM('T', 'T', & +! nphi, nphi, nphi, 1.0d0, svdpvt, nphi, svdpu, nphi, 0.0d0, & +! intphi, nphi) ! Compute the int(dz) operator via pseudoinverse lwork = 5*nz ddzbak = ddz - call DGESVD('A', 'A', & - nz, nz, ddzbak, nz, & - svdzs, svdzu, nz, svdzvt, nz, & - zwork, lwork, info) +! call DGESVD('A', 'A', & +! nz, nz, ddzbak, nz, & +! svdzs, svdzu, nz, svdzvt, nz, & +! zwork, lwork, info) if (info .ne. 0) then write(*,*) 'Unable to compute SVD of d/dz matrix' @@ -240,14 +240,14 @@ SUBROUTINE supercell_init() & do i = 1, nz if (abs(svdzs(i)) .le. 1.0d-12) then - call DSCAL(nz, 0.0d0, svdzu(1,i), 1) +! call DSCAL(nz, 0.0d0, svdzu(1,i), 1) else - call DSCAL(nz, 1.0d0 / svdzs(i), svdzu(1,i), 1) +! call DSCAL(nz, 1.0d0 / svdzs(i), svdzu(1,i), 1) end if end do - call DGEMM('T', 'T', & - nz, nz, nz, 1.0d0, svdzvt, nz, svdzu, nz, 0.0d0, & - intz, nz) +! call DGEMM('T', 'T', & +! nz, nz, nz, 1.0d0, svdzvt, nz, svdzu, nz, 0.0d0, & +! intz, nz) ! Sample the equatorial velocity field and its derivative do k = 1, nz diff --git a/components/homme/src/theta-l_kokkos/config.h.cmake.in b/components/homme/src/theta-l_kokkos/config.h.cmake.in index f5cacd509ff..b36d7d55bc2 100644 --- a/components/homme/src/theta-l_kokkos/config.h.cmake.in +++ b/components/homme/src/theta-l_kokkos/config.h.cmake.in @@ -75,3 +75,5 @@ /* Detect whether COMPOSE passive tracer transport is enabled */ #cmakedefine HOMME_ENABLE_COMPOSE + +#cmakedefine TESTER_NOMPI diff --git a/components/homme/src/theta-l_kokkos/cxx/CaarFunctorImpl.hpp b/components/homme/src/theta-l_kokkos/cxx/CaarFunctorImpl.hpp index 38f9dc8573d..febb7eb0a7f 100644 --- a/components/homme/src/theta-l_kokkos/cxx/CaarFunctorImpl.hpp +++ b/components/homme/src/theta-l_kokkos/cxx/CaarFunctorImpl.hpp @@ -350,6 +350,8 @@ struct CaarFunctorImpl { Kokkos::parallel_reduce("caar loop pre-boundary exchange", m_policy_pre, *this, nerr); Kokkos::fence(); GPTLstop("caar compute"); + +#ifndef TESTER_NOMPI if (nerr > 0) check_print_abort_on_bad_elems("CaarFunctorImpl::run TagPreExchange", data.n0); @@ -366,10 +368,19 @@ struct CaarFunctorImpl { } limiter.run(data.np1); +#endif profiling_pause(); } +#define K1 +#undef K2 +#undef K3 +#undef K4 +#undef K5 +#undef K6 +#undef K7 + KOKKOS_INLINE_FUNCTION void operator()(const TagPreExchange&, const TeamMember &team, int& nerr) const { // In this body, we use '====' to separate sync epochs (delimited by barriers) @@ -377,54 +388,76 @@ struct CaarFunctorImpl { KernelVariables kv(team, m_tu); +#ifdef K1 // =========== EPOCH 1 =========== // compute_div_vdp(kv); +#endif +#ifdef K2 // =========== EPOCH 2 =========== // kv.team_barrier(); - // Computes pi, omega, and phi. const bool ok = compute_scan_quantities(kv); if ( ! ok) nerr = 1; +#endif +#if 0 if (m_rsplit==0 || !m_theta_hydrostatic_mode) { // ============ EPOCH 2.1 =========== // kv.team_barrier(); compute_interface_quantities(kv); } +#endif +#if 0 if (m_rsplit==0) { // ============= EPOCH 2.2 ============ // kv.team_barrier(); compute_vertical_advection(kv); } +#endif +#ifdef K3 // ============= EPOCH 3 ============== // kv.team_barrier(); compute_accumulated_quantities(kv); +#endif +#if 0 // Compute update quantities if (!m_theta_hydrostatic_mode) { compute_w_and_phi_tens (kv); } +#endif +#ifdef K4 compute_dp_and_theta_tens (kv); +#endif +#ifdef K5 // ============= EPOCH 4 =========== // // compute_v_tens reuses some buffers used by compute_dp_and_theta_tens kv.team_barrier(); compute_v_tens (kv); +#endif +#if 0 // Update states if (!m_theta_hydrostatic_mode) { compute_w_and_phi_np1(kv); } +#endif + +#ifdef K6 compute_dp3d_and_theta_np1(kv); +#endif +#ifdef K7 // ============= EPOCH 5 =========== // // v_tens has been computed after last barrier. Need to make sure it's done kv.team_barrier(); compute_v_np1(kv); +#endif } KOKKOS_INLINE_FUNCTION diff --git a/components/homme/src/theta-l_kokkos/cxx/DirkFunctorImpl.hpp b/components/homme/src/theta-l_kokkos/cxx/DirkFunctorImpl.hpp index ace1ba92014..671c46bfc54 100644 --- a/components/homme/src/theta-l_kokkos/cxx/DirkFunctorImpl.hpp +++ b/components/homme/src/theta-l_kokkos/cxx/DirkFunctorImpl.hpp @@ -382,7 +382,7 @@ struct DirkFunctorImpl { kv.team_barrier(); if (it >= maxiter) { - printf("[DIRK] WARNING! Newton reached max iteration count," + Kokkos::print("[DIRK] WARNING! Newton reached max iteration count," " with deltaerr = %3.17f\n", deltaerr); nerr = 1; } diff --git a/components/homme/src/theta-l_kokkos/cxx/LimiterFunctor.hpp b/components/homme/src/theta-l_kokkos/cxx/LimiterFunctor.hpp index bf93be710e9..00585505510 100644 --- a/components/homme/src/theta-l_kokkos/cxx/LimiterFunctor.hpp +++ b/components/homme/src/theta-l_kokkos/cxx/LimiterFunctor.hpp @@ -141,7 +141,7 @@ struct LimiterFunctor { [&](const int k,Real& result) { #ifndef HOMMEXX_BFB_TESTING if(diff_as_real(k) < 0){ - printf("WARNING:CAAR: dp3d too small. k=%d, dp3d(k)=%f, dp0=%f \n", + Kokkos::print("WARNING:CAAR: dp3d too small. k=%d, dp3d(k)=%f, dp0=%f \n", k+1,dp_as_real(k),dp0_as_real(k)); } #endif @@ -202,7 +202,7 @@ struct LimiterFunctor { for (int ivec=0; ivec 0) { + +std::cout << "INSIDE w phi assignment m_process_nh_vars is true \n"; - if (m_process_nh_vars) { m_delta_w = decltype(m_delta_w) ("w_i increments",elements.num_elems()); m_delta_phinh = decltype(m_delta_phinh) ("phinh_i increments",elements.num_elems()); } +if(m_process_nh_vars){ + std::cout << "2hey m_process_nh_vars is true \n"; +}else +{ + std::cout << "2hey m_process_nh_vars is false \n"; +} + m_hvcoord = Context::singleton().get(); assert (m_hvcoord.m_inited); +if(m_process_nh_vars){ + std::cout << "3hey m_process_nh_vars is true \n"; +}else +{ + std::cout << "3hey m_process_nh_vars is false \n"; +} + m_eos.init(params.theta_hydrostatic_mode,m_hvcoord); m_elem_ops.init(m_hvcoord); - } + + if(m_process_nh_vars){ + std::cout << "4hey m_process_nh_vars is true \n"; +}else +{ + std::cout << "4hey m_process_nh_vars is false \n"; +} + + } int requested_buffer_size (int num_teams) const { - if (!m_process_nh_vars) { + +if(m_process_nh_vars){ + std::cout << "IN REQUESTED hey m_process_nh_vars is true \n"; +}else +{ + std::cout << "IN REQUESTED hey m_process_nh_vars is false \n"; +} + + //if (!m_process_nh_vars) { + if (m_process_nh_vars==0) { return 0; } @@ -81,8 +129,20 @@ struct RemapStateProvider { } void init_buffers(const FunctorsBuffersManager& fbm, int num_teams) { - if (!m_process_nh_vars) { - return; + +if(m_process_nh_vars){ + std::cout << "IN BUFFERS hey m_process_nh_vars is true \n"; +}else +{ + std::cout << "IN BUFFERS hey m_process_nh_vars is false \n"; +} + + + //if (!m_process_nh_vars) { + if (m_process_nh_vars==0) { + + std::cout << "hey we should be returning from init_buffers \n"; + return; } Scalar* mem = reinterpret_cast(fbm.get_memory()); @@ -95,17 +155,20 @@ struct RemapStateProvider { KOKKOS_INLINE_FUNCTION int num_states_remap() const { - return (m_process_nh_vars ? 5 : 3); + //return (m_process_nh_vars ? 5 : 3); + return ( (m_process_nh_vars>0) ? 5 : 3); } KOKKOS_INLINE_FUNCTION int num_states_preprocess() const { - return (m_process_nh_vars ? 2 : 0); + //return (m_process_nh_vars ? 2 : 0); + return ( (m_process_nh_vars>0) ? 2 : 0); } KOKKOS_INLINE_FUNCTION int num_states_postprocess() const { - return (m_process_nh_vars ? 2 : 0); + //return (m_process_nh_vars ? 2 : 0); + return ((m_process_nh_vars>0) ? 2 : 0); } KOKKOS_INLINE_FUNCTION From 4f24ef4e31b6acf5f7ca80437cfcf0b0f08f987e Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Tue, 7 Nov 2023 16:34:31 +0000 Subject: [PATCH 03/85] wip cime changes --- .../machines/cmake_macros/oneapi-ifxgpu.cmake | 9 ++++--- .../oneapi-ifxgpu_sunspot-gen.cmake | 26 +++++++++++++++++++ 2 files changed, 32 insertions(+), 3 deletions(-) create mode 100644 cime_config/machines/cmake_macros/oneapi-ifxgpu_sunspot-gen.cmake diff --git a/cime_config/machines/cmake_macros/oneapi-ifxgpu.cmake b/cime_config/machines/cmake_macros/oneapi-ifxgpu.cmake index bc92818df9b..a44dc2bfa16 100644 --- a/cime_config/machines/cmake_macros/oneapi-ifxgpu.cmake +++ b/cime_config/machines/cmake_macros/oneapi-ifxgpu.cmake @@ -14,9 +14,9 @@ if (DEBUG) string(APPEND CFLAGS " -O0 -g") string(APPEND CXXFLAGS " -O0 -g") endif() -string(APPEND CFLAGS " -traceback -fp-model precise -std=gnu99") -string(APPEND CXXFLAGS " -traceback -fp-model precise") -string(APPEND FFLAGS " -traceback -convert big_endian -assume byterecl -assume realloc_lhs -fp-model precise") +string(APPEND CFLAGS " -fp-model precise -std=gnu99") +string(APPEND CXXFLAGS " -fp-model precise") +string(APPEND FFLAGS " -convert big_endian -assume byterecl -assume realloc_lhs -fp-model precise") set(SUPPORTS_CXX "TRUE") string(APPEND CPPDEFS " -DFORTRANUNDERSCORE -DNO_R16 -DCPRINTEL -DHAVE_SLASHPROC -DHIDE_MPI") string(APPEND FC_AUTO_R8 " -r8") @@ -30,3 +30,6 @@ set(MPICXX "mpicxx") set(SCC "icx") set(SCXX "icpx") set(SFC "ifx") + + + diff --git a/cime_config/machines/cmake_macros/oneapi-ifxgpu_sunspot-gen.cmake b/cime_config/machines/cmake_macros/oneapi-ifxgpu_sunspot-gen.cmake new file mode 100644 index 00000000000..1f355208ff1 --- /dev/null +++ b/cime_config/machines/cmake_macros/oneapi-ifxgpu_sunspot-gen.cmake @@ -0,0 +1,26 @@ + +set(CXX_LINKER "CXX") + +execute_process(COMMAND $ENV{NETCDF_PATH}/bin/nf-config --flibs OUTPUT_VARIABLE SHELL_CMD_OUTPUT_BUILD_INTERNAL_IGNORE0 OUTPUT_STRIP_TRAILING_WHITESPACE) + +string(APPEND SLIBS " ${SHELL_CMD_OUTPUT_BUILD_INTERNAL_IGNORE0} -Wl,-rpath -Wl,$ENV{NETCDF_PATH}/lib -lmkl_intel_lp64 -lmkl_sequential -lmkl_core") + +execute_process(COMMAND $ENV{NETCDF_PATH}/bin/nc-config --libs OUTPUT_VARIABLE SHELL_CMD_OUTPUT_BUILD_INTERNAL_IGNORE0 OUTPUT_STRIP_TRAILING_WHITESPACE) + +string(APPEND SLIBS " ${SHELL_CMD_OUTPUT_BUILD_INTERNAL_IGNORE0}") +string(APPEND SLIBS " -fiopenmp -fopenmp-targets=spir64") + +set(NETCDF_PATH "$ENV{NETCDF_PATH}") +set(PNETCDF_PATH "$ENV{PNETCDF_PATH}") + +set(USE_SYCL "TRUE") + +string(APPEND KOKKOS_OPTIONS " -DCMAKE_CXX_STANDARD=17 -DKokkos_ENABLE_SERIAL=On -DKokkos_ARCH_INTEL_GEN=On -DKokkos_ENABLE_SYCL=On -DKokkos_ENABLE_EXPLICIT_INSTANTIATION=Off") + +string(APPEND SYCL_FLAGS " -\-intel -Xclang -fsycl-allow-virtual-functions -fsycl -mlong-double-64 -fsycl-device-code-split=per_kernel -fno-sycl-id-queries-fit-in-int -fsycl-unnamed-lambda") + +#string(APPEND SYCL_FLAGS " -\-intel -fsycl") +string(APPEND CXX_LDFLAGS " -Wl,-\-defsym,main=MAIN_\_ -lifcore -\-intel -Xclang -fsycl-allow-virtual-functions -fsycl -lsycl -mlong-double-64 -fsycl-link-huge-device-code -fsycl-device-code-split=per_kernel -fsycl-targets=spir64") + + + From 18800c6e46b38dbd14be744e60332cf5c4e5f521 Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Wed, 8 Nov 2023 19:00:33 +0000 Subject: [PATCH 04/85] sync ekat with its branch --- externals/ekat | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/externals/ekat b/externals/ekat index a468d04e442..0d851fc93b3 160000 --- a/externals/ekat +++ b/externals/ekat @@ -1 +1 @@ -Subproject commit a468d04e442a3a7fa170563afa9a103c61170b10 +Subproject commit 0d851fc93b3a79b716bc32b2c32166f491d44aac From ea9006e9a10f890832f10062f80d582c9b1c9bba Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Wed, 8 Nov 2023 19:02:07 +0000 Subject: [PATCH 05/85] config, wip --- .../oneapi-ifxgpu_sunspot-gen.cmake | 4 + cime_config/machines/config_batch.xml | 8 ++ cime_config/machines/config_machines.xml | 116 ++++++++++++++++++ components/eamxx/CMakeLists.txt | 13 +- .../cmake/machine-files/sunspot-gen.cmake | 31 +++++ .../eamxx/src/dynamics/homme/CMakeLists.txt | 3 +- .../eamxx/src/physics/rrtmgp/CMakeLists.txt | 3 + 7 files changed, 175 insertions(+), 3 deletions(-) create mode 100644 components/eamxx/cmake/machine-files/sunspot-gen.cmake diff --git a/cime_config/machines/cmake_macros/oneapi-ifxgpu_sunspot-gen.cmake b/cime_config/machines/cmake_macros/oneapi-ifxgpu_sunspot-gen.cmake index 1f355208ff1..9c9eb97add1 100644 --- a/cime_config/machines/cmake_macros/oneapi-ifxgpu_sunspot-gen.cmake +++ b/cime_config/machines/cmake_macros/oneapi-ifxgpu_sunspot-gen.cmake @@ -22,5 +22,9 @@ string(APPEND SYCL_FLAGS " -\-intel -Xclang -fsycl-allow-virtual-functions -fsyc #string(APPEND SYCL_FLAGS " -\-intel -fsycl") string(APPEND CXX_LDFLAGS " -Wl,-\-defsym,main=MAIN_\_ -lifcore -\-intel -Xclang -fsycl-allow-virtual-functions -fsycl -lsycl -mlong-double-64 -fsycl-link-huge-device-code -fsycl-device-code-split=per_kernel -fsycl-targets=spir64") +SET(CMAKE_CXX_COMPILER "mpicxx" CACHE STRING "") +SET(CMAKE_C_COMPILER "mpicc" CACHE STRING "") +SET(CMAKE_FORTRAN_COMPILER "mpifort" CACHE STRING "") + diff --git a/cime_config/machines/config_batch.xml b/cime_config/machines/config_batch.xml index 726714c40f2..632abc2b3e2 100644 --- a/cime_config/machines/config_batch.xml +++ b/cime_config/machines/config_batch.xml @@ -520,6 +520,14 @@ + + /lus/gila/projects/CSC249ADSE15_CNDA/tools/qsub/throttle + + workq + debug + + + --output=slurm.out diff --git a/cime_config/machines/config_machines.xml b/cime_config/machines/config_machines.xml index 5f69e3c61f3..31c2134e342 100644 --- a/cime_config/machines/config_machines.xml +++ b/cime_config/machines/config_machines.xml @@ -3048,6 +3048,122 @@ + + + + + + + + + ANL Sunspot Test and Development System (TDS), batch system is pbspro + uan-.* + LINUX + oneapi-ifxgpu + mpich + CSC249ADSE15_CNDA + /gila/CSC249ADSE15_CNDA/performance_archive + .* + /lus/gila/projects/CSC249ADSE15_CNDA/$USER/scratch + /lus/gila/projects/CSC249ADSE15_CNDA/inputdata + /lus/gila/projects/CSC249ADSE15_CNDA/inputdata/atm/datm7 + $CIME_OUTPUT_ROOT/archive/$CASE + /lus/gila/projects/CSC249ADSE15_CNDA/baselines/$COMPILER + /lus/gila/projects/CSC249ADSE15_CNDA/tools/cprnc/cprnc + 16 + e3sm_developer + 4 + pbspro + e3sm + 12 + 12 + 12 + 12 + FALSE + + mpiexec + + + -np {{ total_tasks }} --label + -ppn {{ tasks_per_node }} + --cpu-bind depth -envall + -d $ENV{OMP_NUM_THREADS} + $ENV{GPU_TILE_COMPACT} + + + + /soft/packaging/lmod/lmod/init/sh + /soft/packaging/lmod/lmod/init/csh + /soft/packaging/lmod/lmod/init/env_modules_python.py + module + module + /soft/packaging/lmod/lmod/libexec/lmod python + + + /soft/modulefiles + spack cmake + /soft/restricted/CNDA/updates/modulefiles + + + oneapi/eng-compiler/2022.12.30.003 + mpich/52.2/icc-all-pmix-gpu + + + + + + + + cray-pals + append-deps/default + libfabric/1.15.2.0 + + + $CIME_OUTPUT_ROOT/$CASE/run + $CIME_OUTPUT_ROOT/$CASE/bld + + /lus/gila/projects/CSC249ADSE15_CNDA/software/oneAPI.2022.12.30.003/netcdf + /lus/gila/projects/CSC249ADSE15_CNDA/software/oneAPI.2022.12.30.003/pnetcdf + + + 1 + + + level_zero:gpu + NO_GPU + 0 + disable + disable + 1 + 4000MB + 0 + /soft/tools/mpi_wrapper_utils/gpu_tile_compact.sh + 131072 + 20 + + + verbose,granularity=thread,balanced + 128M + + + -1 + + + + + + + + + + + + + + + + + ANL Sunspot Test and Development System (TDS), batch system is pbspro uan-.* diff --git a/components/eamxx/CMakeLists.txt b/components/eamxx/CMakeLists.txt index 263e8d4b9b6..9c193f68e3f 100644 --- a/components/eamxx/CMakeLists.txt +++ b/components/eamxx/CMakeLists.txt @@ -120,6 +120,7 @@ include(CTest) set (EAMXX_ENABLE_GPU FALSE CACHE BOOL "") set (CUDA_BUILD FALSE CACHE BOOL "") #needed for yakl if kokkos vars are not visible there? set (HIP_BUILD FALSE CACHE BOOL "") #needed for yakl if kokkos vars are not visible there? +set (SYCL_BUILD FALSE CACHE BOOL "") #needed for yakl if kokkos vars are not visible there? # Determine if this is a Cuda build. if (Kokkos_ENABLE_CUDA) @@ -129,7 +130,7 @@ if (Kokkos_ENABLE_CUDA) set (CUDA_BUILD TRUE CACHE BOOL "" FORCE) #needed for yakl if kokkos vars are not visible there? endif () -# Determine if this is a Cuda build. +# Determine if this is a HIP build. if (Kokkos_ENABLE_HIP) # Add CUDA as a language for CUDA builds enable_language(HIP) @@ -137,6 +138,13 @@ if (Kokkos_ENABLE_HIP) set (HIP_BUILD TRUE CACHE BOOL "" FORCE) #needed for yakl if kokkos vars are not visible there? endif () +# Determine if this is a sycl build. +if (Kokkos_ENABLE_SYCL) + #enable_language(SYCL) + set (EAMXX_ENABLE_GPU TRUE CACHE BOOL "" FORCE) + set (SYCL_BUILD TRUE CACHE BOOL "" FORCE) #needed for yakl if kokkos vars are not visible there? +endif () + if( NOT "${CMAKE_CXX_COMPILER_ID}" MATCHES "[Cc]lang" ) set (CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -cpp") endif() @@ -198,7 +206,7 @@ elseif(MACH) endif() set(DEFAULT_SMALL_KERNELS FALSE) -if (Kokkos_ENABLE_HIP) +if (Kokkos_ENABLE_HIP OR Kokkos_ENABLE_SYCL) set(DEFAULT_SMALL_KERNELS TRUE) endif() @@ -435,6 +443,7 @@ print_var(SCREAM_MACHINE) print_var(EAMXX_ENABLE_GPU) print_var(CUDA_BUILD) print_var(HIP_BUILD) +print_var(SYCL_BUILD) print_var(SCREAM_DOUBLE_PRECISION) print_var(SCREAM_MIMIC_GPU) print_var(SCREAM_FPE) diff --git a/components/eamxx/cmake/machine-files/sunspot-gen.cmake b/components/eamxx/cmake/machine-files/sunspot-gen.cmake new file mode 100644 index 00000000000..3e33ac7b461 --- /dev/null +++ b/components/eamxx/cmake/machine-files/sunspot-gen.cmake @@ -0,0 +1,31 @@ +include(${CMAKE_CURRENT_LIST_DIR}/common.cmake) +common_setup() + +include (${EKAT_MACH_FILES_PATH}/kokkos/intel-gen.cmake) +include (${EKAT_MACH_FILES_PATH}/kokkos/sycl.cmake) +include (${EKAT_MACH_FILES_PATH}/mpi/srun.cmake) + +#AB flags from ekat +# -fsycl -fsycl-unnamed-lambda -sycl-std=2020 -qopenmp-simd -Wsycl-strict -fsycl-device-code-split=per_kernel +SET(SYCL_COMPILE_FLAGS "-std=c++17 -fsycl -fsycl-device-code-split=per_kernel -fno-sycl-id-queries-fit-in-int -fsycl-unnamed-lambda") +SET(SYCL_LINK_FLAGS "-fsycl -fsycl-link-huge-device-code -fsycl-device-code-split=per_kernel -fsycl-targets=spir64") + +#SET(MPICH_DIR "/soft/restricted/CNDA/updates/mpich/52.2/mpich-ofi-all-icc-default-pmix-gpu-drop52/" CACHE STRING "") + +set(CMAKE_CXX_FLAGS " -\-intel -Xclang -fsycl-allow-virtual-functions -mlong-double-64 -O3 -DNDEBUG ${SYCL_COMPILE_FLAGS}" CACHE STRING "" FORCE) +set(CMAKE_Fortran_FLAGS "-fc=ifx -O3 -DNDEBUG -DCPRINTEL -g" CACHE STRING "" FORCE) +set(CMAKE_C_FLAGS "-O3 -DNDEBUG" CACHE STRING "" FORCE) +set(CMAKE_EXE_LINKER_FLAGS " -lifcore -\-intel -Xclang -fsycl-allow-virtual-functions -lsycl -mlong-double-64 -DNDEBUG ${SYCL_LINK_FLAGS} -fortlib" CACHE STRING "" FORCE) +#set(CMAKE_EXE_LINKER_FLAGS " -Wl,-\-defsym,main=MAIN_\_ -lifcore -\-intel -Xclang -fsycl-allow-virtual-functions -lsycl -mlong-double-64 -DNDEBUG ${SYCL_LINK_FLAGS} -fortlib -L${MPICH_DIR}/lib" CACHE STRING "" FORCE) + + + +set(NETCDF_PATH "$ENV{NETCDF_PATH}") +set(NETCDF_C_PATH "$ENV{NETCDF_PATH}") +#this one is for rrtmgp +set(NetCDF_C_PATH "$ENV{NETCDF_PATH}" CACHE STRING "") +set(NETCDF_FORTRAN_PATH "$ENV{NETCDF_PATH}") +set(PNETCDF_PATH "$ENV{PNETCDF_PATH}") + + + diff --git a/components/eamxx/src/dynamics/homme/CMakeLists.txt b/components/eamxx/src/dynamics/homme/CMakeLists.txt index 2613eadefeb..c818b5a8ae4 100644 --- a/components/eamxx/src/dynamics/homme/CMakeLists.txt +++ b/components/eamxx/src/dynamics/homme/CMakeLists.txt @@ -23,7 +23,8 @@ set(BUILD_HOMME_PREQX_KOKKOS OFF CACHE BOOL "") set(BUILD_HOMME_PESE OFF CACHE BOOL "") set(BUILD_HOMME_SWIM OFF CACHE BOOL "") set(BUILD_HOMME_PRIM OFF CACHE BOOL "") -set(HOMME_ENABLE_COMPOSE ON CACHE BOOL "") +#set(HOMME_ENABLE_COMPOSE ON CACHE BOOL "") +set(HOMME_ENABLE_COMPOSE OFF CACHE BOOL "") set(BUILD_HOMME_TOOL OFF CACHE BOOL "") if (NOT Kokkos_ENABLE_SERIAL) diff --git a/components/eamxx/src/physics/rrtmgp/CMakeLists.txt b/components/eamxx/src/physics/rrtmgp/CMakeLists.txt index fa22062bc32..160cc355e4d 100644 --- a/components/eamxx/src/physics/rrtmgp/CMakeLists.txt +++ b/components/eamxx/src/physics/rrtmgp/CMakeLists.txt @@ -26,6 +26,9 @@ else () string (REPLACE " " ";" YAKL_HIP_FLAGS_LIST ${YAKL_HIP_FLAGS}) endif() + ####### SYCL here + + set (YAKL_SOURCE_DIR ${SCREAM_BASE_DIR}/../../externals/YAKL) add_subdirectory(${YAKL_SOURCE_DIR} ${CMAKE_BINARY_DIR}/externals/YAKL) From b8fe4ace54329ae53d57b5d13a60a44b3ed900c6 Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Fri, 10 Nov 2023 19:20:12 +0000 Subject: [PATCH 06/85] typo --- components/homme/src/share/cxx/utilities/BfbUtils.hpp | 2 +- components/homme/src/theta-l_kokkos/cxx/DirkFunctorImpl.hpp | 2 +- components/homme/src/theta-l_kokkos/cxx/LimiterFunctor.hpp | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/components/homme/src/share/cxx/utilities/BfbUtils.hpp b/components/homme/src/share/cxx/utilities/BfbUtils.hpp index 475cd9f2d95..7fb4d042f7f 100644 --- a/components/homme/src/share/cxx/utilities/BfbUtils.hpp +++ b/components/homme/src/share/cxx/utilities/BfbUtils.hpp @@ -64,7 +64,7 @@ KOKKOS_INLINE_FUNCTION ScalarType int_pow (ScalarType val, int k) { constexpr int max_shift = 30; if (k<0) { - Kokkos::print ("k = %d\n",k); + Kokkos::printf ("k = %d\n",k); Kokkos::abort("int_pow implemented only for k>=0.\n"); } diff --git a/components/homme/src/theta-l_kokkos/cxx/DirkFunctorImpl.hpp b/components/homme/src/theta-l_kokkos/cxx/DirkFunctorImpl.hpp index 671c46bfc54..a5cf2aa0111 100644 --- a/components/homme/src/theta-l_kokkos/cxx/DirkFunctorImpl.hpp +++ b/components/homme/src/theta-l_kokkos/cxx/DirkFunctorImpl.hpp @@ -382,7 +382,7 @@ struct DirkFunctorImpl { kv.team_barrier(); if (it >= maxiter) { - Kokkos::print("[DIRK] WARNING! Newton reached max iteration count," + Kokkos::printf("[DIRK] WARNING! Newton reached max iteration count," " with deltaerr = %3.17f\n", deltaerr); nerr = 1; } diff --git a/components/homme/src/theta-l_kokkos/cxx/LimiterFunctor.hpp b/components/homme/src/theta-l_kokkos/cxx/LimiterFunctor.hpp index 00585505510..79906948638 100644 --- a/components/homme/src/theta-l_kokkos/cxx/LimiterFunctor.hpp +++ b/components/homme/src/theta-l_kokkos/cxx/LimiterFunctor.hpp @@ -141,7 +141,7 @@ struct LimiterFunctor { [&](const int k,Real& result) { #ifndef HOMMEXX_BFB_TESTING if(diff_as_real(k) < 0){ - Kokkos::print("WARNING:CAAR: dp3d too small. k=%d, dp3d(k)=%f, dp0=%f \n", + Kokkos::printf("WARNING:CAAR: dp3d too small. k=%d, dp3d(k)=%f, dp0=%f \n", k+1,dp_as_real(k),dp0_as_real(k)); } #endif @@ -202,7 +202,7 @@ struct LimiterFunctor { for (int ivec=0; ivec Date: Fri, 10 Nov 2023 19:20:34 +0000 Subject: [PATCH 07/85] hardwire team size to 4 --- .../eamxx/src/dynamics/homme/physics_dynamics_remapper.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/components/eamxx/src/dynamics/homme/physics_dynamics_remapper.cpp b/components/eamxx/src/dynamics/homme/physics_dynamics_remapper.cpp index 8b41a9fee7c..2b4a9a741cc 100644 --- a/components/eamxx/src/dynamics/homme/physics_dynamics_remapper.cpp +++ b/components/eamxx/src/dynamics/homme/physics_dynamics_remapper.cpp @@ -418,6 +418,10 @@ do_remap_fwd() const int team_size = std::min(256, std::min(128*m_num_phys_cols,32*(concurrency/this->m_num_fields+31)/32)); #endif +#ifdef KOKKOS_ENABLE_SYCL + const int team_size = 4; +#endif + //should exclude above cases of CUDA and HIP #ifndef EAMXX_ENABLE_GPU const int team_size = (concurrencym_num_fields ? 1 : concurrency/this->m_num_fields); From 0601c69bef4035dbd046397c5a4e7d53e872dc2a Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Fri, 10 Nov 2023 19:20:57 +0000 Subject: [PATCH 08/85] kokkos prinf --- .../eamxx/src/physics/shoc/impl/shoc_assumed_pdf_impl.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/components/eamxx/src/physics/shoc/impl/shoc_assumed_pdf_impl.hpp b/components/eamxx/src/physics/shoc/impl/shoc_assumed_pdf_impl.hpp index a407e83c2ee..97025116383 100644 --- a/components/eamxx/src/physics/shoc/impl/shoc_assumed_pdf_impl.hpp +++ b/components/eamxx/src/physics/shoc/impl/shoc_assumed_pdf_impl.hpp @@ -234,10 +234,10 @@ void Functions::shoc_assumed_pdf( const Smask is_nan_Tl1_1 = isnan(Tl1_1) && active_entries; const Smask is_nan_Tl1_2 = isnan(Tl1_2) && active_entries; if (is_nan_Tl1_1.any() || is_nan_Tl1_2.any()) { - printf("WARNING: NaN Detected in Tl1_1 or Tl1_2!\n"); + Kokkos::printf("WARNING: NaN Detected in Tl1_1 or Tl1_2!\n"); for (int i=0; i::shoc_assumed_pdf( n_mask++; } } - printf("WARNING: Tl1_1 has %d values <= allowable value. Resetting to minimum value.\n",n_mask); + Kokkos::printf("WARNING: Tl1_1 has %d values <= allowable value. Resetting to minimum value.\n",n_mask); } if( is_small_Tl1_2.any() ) { Tl1_2.set(is_small_Tl1_2,Tl_min); @@ -277,7 +277,7 @@ void Functions::shoc_assumed_pdf( n_mask++; } } - printf("WARNING: Tl1_2 has %d values <= allowable value. Resetting to minimum value.\n",n_mask); + Kokkos::printf("WARNING: Tl1_2 has %d values <= allowable value. Resetting to minimum value.\n",n_mask); } // Compute qs and beta From f425901be69fe269683a01b1016143be25127fa5 Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Fri, 10 Nov 2023 19:36:25 +0000 Subject: [PATCH 09/85] add sunspot-pvc --- .../oneapi-ifxgpu_sunspot-pvc.cmake | 30 +++++ cime_config/machines/config_batch.xml | 8 ++ cime_config/machines/config_machines.xml | 103 +++++++++++++++++- .../cmake/machine-files/sunspot-pvc.cmake | 32 ++++++ 4 files changed, 170 insertions(+), 3 deletions(-) create mode 100644 cime_config/machines/cmake_macros/oneapi-ifxgpu_sunspot-pvc.cmake create mode 100644 components/eamxx/cmake/machine-files/sunspot-pvc.cmake diff --git a/cime_config/machines/cmake_macros/oneapi-ifxgpu_sunspot-pvc.cmake b/cime_config/machines/cmake_macros/oneapi-ifxgpu_sunspot-pvc.cmake new file mode 100644 index 00000000000..d62f94c40fe --- /dev/null +++ b/cime_config/machines/cmake_macros/oneapi-ifxgpu_sunspot-pvc.cmake @@ -0,0 +1,30 @@ + +set(CXX_LINKER "CXX") + +execute_process(COMMAND $ENV{NETCDF_PATH}/bin/nf-config --flibs OUTPUT_VARIABLE SHELL_CMD_OUTPUT_BUILD_INTERNAL_IGNORE0 OUTPUT_STRIP_TRAILING_WHITESPACE) + +string(APPEND SLIBS " ${SHELL_CMD_OUTPUT_BUILD_INTERNAL_IGNORE0} -Wl,-rpath -Wl,$ENV{NETCDF_PATH}/lib -lmkl_intel_lp64 -lmkl_sequential -lmkl_core") + +execute_process(COMMAND $ENV{NETCDF_PATH}/bin/nc-config --libs OUTPUT_VARIABLE SHELL_CMD_OUTPUT_BUILD_INTERNAL_IGNORE0 OUTPUT_STRIP_TRAILING_WHITESPACE) + +string(APPEND SLIBS " ${SHELL_CMD_OUTPUT_BUILD_INTERNAL_IGNORE0}") +string(APPEND SLIBS " -fiopenmp -fopenmp-targets=spir64") + +set(NETCDF_PATH "$ENV{NETCDF_PATH}") +set(PNETCDF_PATH "$ENV{PNETCDF_PATH}") + +set(USE_SYCL "TRUE") + +string(APPEND KOKKOS_OPTIONS " -DCMAKE_CXX_STANDARD=17 -DKokkos_ENABLE_SERIAL=On -DKokkos_ARCH_INTEL_GEN=On -DKokkos_ENABLE_SYCL=On -DKokkos_ENABLE_EXPLICIT_INSTANTIATION=Off") + +string(APPEND SYCL_FLAGS " -\-intel -Xclang -fsycl-allow-virtual-functions -fsycl -mlong-double-64 -fsycl-device-code-split=per_kernel -fno-sycl-id-queries-fit-in-int -fsycl-unnamed-lambda") + +#string(APPEND SYCL_FLAGS " -\-intel -fsycl") +string(APPEND CXX_LDFLAGS " -Wl,-\-defsym,main=MAIN_\_ -lifcore -\-intel -Xclang -fsycl-allow-virtual-functions -fsycl -lsycl -mlong-double-64 -fsycl-link-huge-device-code -fsycl-device-code-split=per_kernel -fsycl-targets=spir64 -Xsycl-target-backend \"-device 12.60.7\"") + +SET(CMAKE_CXX_COMPILER "mpicxx" CACHE STRING "") +SET(CMAKE_C_COMPILER "mpicc" CACHE STRING "") +SET(CMAKE_FORTRAN_COMPILER "mpifort" CACHE STRING "") + + + diff --git a/cime_config/machines/config_batch.xml b/cime_config/machines/config_batch.xml index 632abc2b3e2..95918ce2848 100644 --- a/cime_config/machines/config_batch.xml +++ b/cime_config/machines/config_batch.xml @@ -520,6 +520,14 @@ + + /lus/gila/projects/CSC249ADSE15_CNDA/tools/qsub/throttle + + workq + debug + + + /lus/gila/projects/CSC249ADSE15_CNDA/tools/qsub/throttle diff --git a/cime_config/machines/config_machines.xml b/cime_config/machines/config_machines.xml index 31c2134e342..4ceece26afc 100644 --- a/cime_config/machines/config_machines.xml +++ b/cime_config/machines/config_machines.xml @@ -3055,7 +3055,7 @@ - + ANL Sunspot Test and Development System (TDS), batch system is pbspro uan-.* LINUX @@ -3105,8 +3105,8 @@ /soft/restricted/CNDA/updates/modulefiles - oneapi/eng-compiler/2022.12.30.003 - mpich/52.2/icc-all-pmix-gpu + oneapi/eng-compiler/2023.10.15.002 + mpich/52.2-256/icc-all-pmix-gpu @@ -3153,6 +3153,103 @@ + + ANL Sunspot Test and Development System (TDS), batch system is pbspro + uan-.* + LINUX + oneapi-ifxgpu + mpich + CSC249ADSE15_CNDA + /gila/CSC249ADSE15_CNDA/performance_archive + .* + /lus/gila/projects/CSC249ADSE15_CNDA/$USER/scratch + /lus/gila/projects/CSC249ADSE15_CNDA/inputdata + /lus/gila/projects/CSC249ADSE15_CNDA/inputdata/atm/datm7 + $CIME_OUTPUT_ROOT/archive/$CASE + /lus/gila/projects/CSC249ADSE15_CNDA/baselines/$COMPILER + /lus/gila/projects/CSC249ADSE15_CNDA/tools/cprnc/cprnc + 16 + e3sm_developer + 4 + pbspro + e3sm + 12 + 12 + 12 + 12 + FALSE + + mpiexec + + + -np {{ total_tasks }} --label + -ppn {{ tasks_per_node }} + --cpu-bind depth -envall + -d $ENV{OMP_NUM_THREADS} + $ENV{GPU_TILE_COMPACT} + + + + /soft/packaging/lmod/lmod/init/sh + /soft/packaging/lmod/lmod/init/csh + /soft/packaging/lmod/lmod/init/env_modules_python.py + module + module + /soft/packaging/lmod/lmod/libexec/lmod python + + + /soft/modulefiles + spack cmake + /soft/restricted/CNDA/updates/modulefiles + + + oneapi/eng-compiler/2023.10.15.002 + mpich/52.2-256/icc-all-pmix-gpu + + + + + + + + cray-pals + append-deps/default + libfabric/1.15.2.0 + + + $CIME_OUTPUT_ROOT/$CASE/run + $CIME_OUTPUT_ROOT/$CASE/bld + + /lus/gila/projects/CSC249ADSE15_CNDA/software/oneAPI.2022.12.30.003/netcdf + /lus/gila/projects/CSC249ADSE15_CNDA/software/oneAPI.2022.12.30.003/pnetcdf + + + 1 + + + level_zero:gpu + NO_GPU + 0 + disable + disable + 1 + 4000MB + 0 + /soft/tools/mpi_wrapper_utils/gpu_tile_compact.sh + 131072 + 20 + + + verbose,granularity=thread,balanced + 128M + + + -1 + + + + + diff --git a/components/eamxx/cmake/machine-files/sunspot-pvc.cmake b/components/eamxx/cmake/machine-files/sunspot-pvc.cmake new file mode 100644 index 00000000000..874b73e34eb --- /dev/null +++ b/components/eamxx/cmake/machine-files/sunspot-pvc.cmake @@ -0,0 +1,32 @@ +include(${CMAKE_CURRENT_LIST_DIR}/common.cmake) +common_setup() + +include (${EKAT_MACH_FILES_PATH}/kokkos/intel-pvc.cmake) +# kokkos sycl is on in the above file +#include (${EKAT_MACH_FILES_PATH}/kokkos/sycl.cmake) +include (${EKAT_MACH_FILES_PATH}/mpi/srun.cmake) + +#AB flags from ekat +# -fsycl -fsycl-unnamed-lambda -sycl-std=2020 -qopenmp-simd -Wsycl-strict -fsycl-device-code-split=per_kernel +SET(SYCL_COMPILE_FLAGS "-std=c++17 -fsycl -fsycl-device-code-split=per_kernel -fno-sycl-id-queries-fit-in-int -fsycl-unnamed-lambda") +SET(SYCL_LINK_FLAGS "-fsycl -fsycl-link-huge-device-code -fsycl-device-code-split=per_kernel -fsycl-targets=spir64_gen -Xsycl-target-backend \"-device 12.60.7\"") + +#SET(MPICH_DIR "/soft/restricted/CNDA/updates/mpich/52.2/mpich-ofi-all-icc-default-pmix-gpu-drop52/" CACHE STRING "") + +set(CMAKE_CXX_FLAGS " -\-intel -Xclang -fsycl-allow-virtual-functions -mlong-double-64 -O3 -DNDEBUG ${SYCL_COMPILE_FLAGS}" CACHE STRING "" FORCE) +set(CMAKE_Fortran_FLAGS "-fc=ifx -O3 -DNDEBUG -DCPRINTEL -g" CACHE STRING "" FORCE) +set(CMAKE_C_FLAGS "-O3 -DNDEBUG" CACHE STRING "" FORCE) +set(CMAKE_EXE_LINKER_FLAGS " -lifcore -\-intel -Xclang -fsycl-allow-virtual-functions -lsycl -mlong-double-64 -DNDEBUG ${SYCL_LINK_FLAGS} -fortlib" CACHE STRING "" FORCE) +#set(CMAKE_EXE_LINKER_FLAGS " -Wl,-\-defsym,main=MAIN_\_ -lifcore -\-intel -Xclang -fsycl-allow-virtual-functions -lsycl -mlong-double-64 -DNDEBUG ${SYCL_LINK_FLAGS} -fortlib -L${MPICH_DIR}/lib" CACHE STRING "" FORCE) + + + +set(NETCDF_PATH "$ENV{NETCDF_PATH}") +set(NETCDF_C_PATH "$ENV{NETCDF_PATH}") +#this one is for rrtmgp +set(NetCDF_C_PATH "$ENV{NETCDF_PATH}" CACHE STRING "") +set(NETCDF_FORTRAN_PATH "$ENV{NETCDF_PATH}") +set(PNETCDF_PATH "$ENV{PNETCDF_PATH}") + + + From a8a9599d6dd1a39b3fdfd21a617fe4f456407725 Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Fri, 10 Nov 2023 20:42:37 +0000 Subject: [PATCH 10/85] update ekat, ekat/kokkos --- externals/ekat | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/externals/ekat b/externals/ekat index 0d851fc93b3..3b1a7f9fee7 160000 --- a/externals/ekat +++ b/externals/ekat @@ -1 +1 @@ -Subproject commit 0d851fc93b3a79b716bc32b2c32166f491d44aac +Subproject commit 3b1a7f9fee7848006e5aa53ae0fd334701d8f5a7 From 09602cd3689deda5fb2fc3e8fe161aab2b7f1286 Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Fri, 10 Nov 2023 20:43:37 +0000 Subject: [PATCH 11/85] load certain cmake and python --- cime_config/machines/config_machines.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cime_config/machines/config_machines.xml b/cime_config/machines/config_machines.xml index 4ceece26afc..b31e9748010 100644 --- a/cime_config/machines/config_machines.xml +++ b/cime_config/machines/config_machines.xml @@ -3101,7 +3101,7 @@ /soft/modulefiles - spack cmake + spack cmake/3.24.2 python/3.9.13-gcc-11.2.0-76jlbxs /soft/restricted/CNDA/updates/modulefiles @@ -3199,7 +3199,7 @@ /soft/modulefiles - spack cmake + spack cmake/3.24.2 python/3.9.13-gcc-11.2.0-76jlbxs /soft/restricted/CNDA/updates/modulefiles From 80bf0d53655bf8913c43f69574c4ea278cb611bc Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Tue, 12 Dec 2023 02:14:29 +0000 Subject: [PATCH 12/85] build issue fixed thatnks to Andrew and Daniel A --- .../eamxx/src/control/atmosphere_surface_coupling_exporter.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/eamxx/src/control/atmosphere_surface_coupling_exporter.hpp b/components/eamxx/src/control/atmosphere_surface_coupling_exporter.hpp index 18db413ca32..9707c137f2b 100644 --- a/components/eamxx/src/control/atmosphere_surface_coupling_exporter.hpp +++ b/components/eamxx/src/control/atmosphere_surface_coupling_exporter.hpp @@ -23,7 +23,7 @@ namespace scream */ // enum to track how exported fields will be set. -enum ExportType { +enum ExportType:int { FROM_MODEL = 0, // Variable will be derived from atmosphere model state FROM_FILE = 1, // Variable will be set given data from a file CONSTANT = 2 // Set variable to a constant value From e2792ba614102685c15f98368f58508df7479f0e Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Mon, 29 Jan 2024 21:51:53 +0000 Subject: [PATCH 13/85] bad conflict resolution fixed --- components/homme/cmake/HommeMacros.cmake | 8 -------- 1 file changed, 8 deletions(-) diff --git a/components/homme/cmake/HommeMacros.cmake b/components/homme/cmake/HommeMacros.cmake index 532bd762950..1a49c27e852 100644 --- a/components/homme/cmake/HommeMacros.cmake +++ b/components/homme/cmake/HommeMacros.cmake @@ -156,15 +156,7 @@ macro(createTestExec execName execType macroNP macroNC ENDIF () IF (HOMME_USE_KOKKOS) -<<<<<<< HEAD - if("${E3SM_KOKKOS_PATH}" STREQUAL "") - target_link_libraries(${execName} kokkos) - else() - link_to_kokkos(${execName}) - endif() -======= target_link_libraries(${execName} Kokkos::kokkos) ->>>>>>> origin/master ENDIF () # Move the module files out of the way so the parallel build From 735bac660641d9e22c764429e9e5155f9aff818c Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Tue, 30 Jan 2024 20:16:09 +0000 Subject: [PATCH 14/85] changes for aurora --- cime_config/machines/config_machines.xml | 9 ++++----- components/eamxx/src/physics/rrtmgp/CMakeLists.txt | 4 ++-- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/cime_config/machines/config_machines.xml b/cime_config/machines/config_machines.xml index b0803f499ed..826397f2891 100644 --- a/cime_config/machines/config_machines.xml +++ b/cime_config/machines/config_machines.xml @@ -3503,19 +3503,18 @@ /soft/modulefiles /soft/restricted/CNDA/updates/modulefiles - spack-pe-gcc cmake + spack-pe-gcc/0.4-rc1 cmake/3.26.4-gcc-testing - oneapi/eng-compiler/2023.05.15.007 + oneapi/release/2023.12.15.001 spack-pe-gcc cmake - gcc/10.3.0 + gcc/11.2.0 - cray-pals + cray-pals/1.3.3 libfabric/1.15.2.0 - cray-libpals/1.3.2 $CIME_OUTPUT_ROOT/$CASE/run diff --git a/components/eamxx/src/physics/rrtmgp/CMakeLists.txt b/components/eamxx/src/physics/rrtmgp/CMakeLists.txt index 0f0b3eff6af..b5403587ca7 100644 --- a/components/eamxx/src/physics/rrtmgp/CMakeLists.txt +++ b/components/eamxx/src/physics/rrtmgp/CMakeLists.txt @@ -127,12 +127,12 @@ yakl_process_target(scream_rrtmgp_yakl) # NOTE: cannot use 'PUBLIC' in target_link_libraries, # since yakl_process_target already used it # with the "plain" signature -find_library(NETCDF_C netcdf HINTS ${NetCDF_C_PATH}/lib) +find_library(NETCDF_C netcdf HINTS $ENV{NETCDF_C_PATH}/lib) target_link_libraries(scream_rrtmgp_yakl ${NETCDF_C} rrtmgp scream_share) target_include_directories(scream_rrtmgp_yakl PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) target_include_directories(scream_rrtmgp_yakl SYSTEM PUBLIC - ${NetCDF_C_PATH}/include + ${NETCDF_C_PATH}/include ${EAM_RRTMGP_DIR}/external) ################################## From f5dfd58bf13c4812eeec1ab157cd6cf58e7cbe6f Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Tue, 30 Jan 2024 20:16:32 +0000 Subject: [PATCH 15/85] cache file --- .../eamxx/cmake/machine-files/aurora.cmake | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 components/eamxx/cmake/machine-files/aurora.cmake diff --git a/components/eamxx/cmake/machine-files/aurora.cmake b/components/eamxx/cmake/machine-files/aurora.cmake new file mode 100644 index 00000000000..874b73e34eb --- /dev/null +++ b/components/eamxx/cmake/machine-files/aurora.cmake @@ -0,0 +1,32 @@ +include(${CMAKE_CURRENT_LIST_DIR}/common.cmake) +common_setup() + +include (${EKAT_MACH_FILES_PATH}/kokkos/intel-pvc.cmake) +# kokkos sycl is on in the above file +#include (${EKAT_MACH_FILES_PATH}/kokkos/sycl.cmake) +include (${EKAT_MACH_FILES_PATH}/mpi/srun.cmake) + +#AB flags from ekat +# -fsycl -fsycl-unnamed-lambda -sycl-std=2020 -qopenmp-simd -Wsycl-strict -fsycl-device-code-split=per_kernel +SET(SYCL_COMPILE_FLAGS "-std=c++17 -fsycl -fsycl-device-code-split=per_kernel -fno-sycl-id-queries-fit-in-int -fsycl-unnamed-lambda") +SET(SYCL_LINK_FLAGS "-fsycl -fsycl-link-huge-device-code -fsycl-device-code-split=per_kernel -fsycl-targets=spir64_gen -Xsycl-target-backend \"-device 12.60.7\"") + +#SET(MPICH_DIR "/soft/restricted/CNDA/updates/mpich/52.2/mpich-ofi-all-icc-default-pmix-gpu-drop52/" CACHE STRING "") + +set(CMAKE_CXX_FLAGS " -\-intel -Xclang -fsycl-allow-virtual-functions -mlong-double-64 -O3 -DNDEBUG ${SYCL_COMPILE_FLAGS}" CACHE STRING "" FORCE) +set(CMAKE_Fortran_FLAGS "-fc=ifx -O3 -DNDEBUG -DCPRINTEL -g" CACHE STRING "" FORCE) +set(CMAKE_C_FLAGS "-O3 -DNDEBUG" CACHE STRING "" FORCE) +set(CMAKE_EXE_LINKER_FLAGS " -lifcore -\-intel -Xclang -fsycl-allow-virtual-functions -lsycl -mlong-double-64 -DNDEBUG ${SYCL_LINK_FLAGS} -fortlib" CACHE STRING "" FORCE) +#set(CMAKE_EXE_LINKER_FLAGS " -Wl,-\-defsym,main=MAIN_\_ -lifcore -\-intel -Xclang -fsycl-allow-virtual-functions -lsycl -mlong-double-64 -DNDEBUG ${SYCL_LINK_FLAGS} -fortlib -L${MPICH_DIR}/lib" CACHE STRING "" FORCE) + + + +set(NETCDF_PATH "$ENV{NETCDF_PATH}") +set(NETCDF_C_PATH "$ENV{NETCDF_PATH}") +#this one is for rrtmgp +set(NetCDF_C_PATH "$ENV{NETCDF_PATH}" CACHE STRING "") +set(NETCDF_FORTRAN_PATH "$ENV{NETCDF_PATH}") +set(PNETCDF_PATH "$ENV{PNETCDF_PATH}") + + + From 69b07b8a4fdeade85bedb3218e99b83947b6889a Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Fri, 9 Feb 2024 19:06:26 +0000 Subject: [PATCH 16/85] wip --- .../cmake_macros/oneapi-ifxgpu_aurora.cmake | 8 +++- cime_config/machines/config_machines.xml | 7 ++-- .../eamxx/src/control/atmosphere_driver.cpp | 32 ++++++++++++++++ .../eamxx/src/mct_coupling/atm_comp_mct.F90 | 38 +++++++++++++++++-- .../mct_coupling/scream_cxx_f90_interface.cpp | 9 +++++ .../atm_process/atmosphere_process_group.cpp | 10 +++++ .../homme/src/share/cxx/prim_driver.cpp | 10 ++++- 7 files changed, 104 insertions(+), 10 deletions(-) diff --git a/cime_config/machines/cmake_macros/oneapi-ifxgpu_aurora.cmake b/cime_config/machines/cmake_macros/oneapi-ifxgpu_aurora.cmake index 47d513408c2..f72f2f6b0e2 100644 --- a/cime_config/machines/cmake_macros/oneapi-ifxgpu_aurora.cmake +++ b/cime_config/machines/cmake_macros/oneapi-ifxgpu_aurora.cmake @@ -1,7 +1,13 @@ -string(APPEND CMAKE_EXE_LINKER_FLAGS " -lmkl_intel_lp64 -lmkl_sequential -lmkl_core") +string(APPEND CMAKE_EXE_LINKER_FLAGS " -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -fsycl-device-code-split=per_kernel -fsycl-max-parallel-link-jobs=16") if (compile_threaded) string(APPEND CMAKE_EXE_LINKER_FLAGS " -fiopenmp -fopenmp-targets=spir64") endif() string(APPEND KOKKOS_OPTIONS " -DCMAKE_CXX_STANDARD=17 -DKokkos_ENABLE_SERIAL=On -DKokkos_ARCH_INTEL_PVC=On -DKokkos_ENABLE_SYCL=On -DKokkos_ENABLE_EXPLICIT_INSTANTIATION=Off") string(APPEND SYCL_FLAGS " -\-intel -fsycl -fsycl-targets=spir64_gen -mlong-double-64 -Xsycl-target-backend \"-device 12.60.7\"") + +set(SCREAM_MPI_ON_DEVICE OFF CACHE STRING "") + + + + diff --git a/cime_config/machines/config_machines.xml b/cime_config/machines/config_machines.xml index 826397f2891..70306fbc7bf 100644 --- a/cime_config/machines/config_machines.xml +++ b/cime_config/machines/config_machines.xml @@ -3503,6 +3503,7 @@ /soft/modulefiles /soft/restricted/CNDA/updates/modulefiles + cray-python/3.9.13.1 spack-pe-gcc/0.4-rc1 cmake/3.26.4-gcc-testing @@ -3523,8 +3524,8 @@ /lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2023.05.15.007 /lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2023.05.15.007 /lus/gecko/projects/CSC249ADSE15_CNDA/software/pnetcdf/1.12.3/oneapi.eng.2023.05.15.007 - /lus/gecko/projects/CSC249ADSE15_CNDA/software/pnetcdf/1.12.3/oneapi.eng.2023.05.15.007/lib:/lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2023.05.15.007/lib:/lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2023.05.15.007/lib:$ENV{LD_LIBRARY_PATH} - /lus/gecko/projects/CSC249ADSE15_CNDA/software/pnetcdf/1.12.3/oneapi.eng.2023.05.15.007/bin:/lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2023.05.15.007/bin:/lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2023.05.15.007/bin:$ENV{PATH} + /opt/cray/pe/gcc-libs:/opt/cray/pe/python/3.9.13.1/lib:/lus/gecko/projects/CSC249ADSE15_CNDA/software/pnetcdf/1.12.3/oneapi.eng.2023.05.15.007/lib:/lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2023.05.15.007/lib:/lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2023.05.15.007/lib:$ENV{LD_LIBRARY_PATH} + /opt/cray/pe/python/3.9.13.1/bin:/lus/gecko/projects/CSC249ADSE15_CNDA/software/pnetcdf/1.12.3/oneapi.eng.2023.05.15.007/bin:/lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2023.05.15.007/bin:/lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2023.05.15.007/bin:$ENV{PATH} list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203 @@ -3532,7 +3533,7 @@ level_zero:gpu - NO_GPU + 0 disable disable diff --git a/components/eamxx/src/control/atmosphere_driver.cpp b/components/eamxx/src/control/atmosphere_driver.cpp index b9e0862c5a1..8268caf9653 100644 --- a/components/eamxx/src/control/atmosphere_driver.cpp +++ b/components/eamxx/src/control/atmosphere_driver.cpp @@ -209,25 +209,40 @@ setup_intensive_observation_period () void AtmosphereDriver::create_atm_processes() { + + std::cout << "OG cinit 1 \n" << std::flush; + m_atm_logger->info("[EAMxx] create_atm_processes ..."); + std::cout << "OG cinit 2 \n" << std::flush; start_timer("EAMxx::init"); + std::cout << "OG cinit 3 \n" << std::flush; start_timer("EAMxx::create_atm_processes"); + std::cout << "OG cinit 4 \n" << std::flush; // At this point, must have comm and params set. check_ad_status(s_comm_set | s_params_set); + std::cout << "OG cinit 5 \n" << std::flush; // Create the group of processes. This will recursively create the processes // tree, storing also the information regarding parallel execution (if needed). // See AtmosphereProcessGroup class documentation for more details. auto& atm_proc_params = m_atm_params.sublist("atmosphere_processes"); + std::cout << "OG cinit 6 \n" << std::flush; atm_proc_params.rename("EAMxx"); + std::cout << "OG cinit 7 \n" << std::flush; atm_proc_params.set("Logger",m_atm_logger); + std::cout << "OG cinit 8 \n" << std::flush; m_atm_process_group = std::make_shared(m_atm_comm,atm_proc_params); + std::cout << "OG cinit 9 \n" << std::flush; m_ad_status |= s_procs_created; + std::cout << "OG cinit 10 \n" << std::flush; stop_timer("EAMxx::create_atm_processes"); + std::cout << "OG cinit 11 \n" << std::flush; stop_timer("EAMxx::init"); + std::cout << "OG cinit 12 \n" << std::flush; m_atm_logger->info("[EAMxx] create_atm_processes ... done!"); + std::cout << "OG cinit 13 \n" << std::flush; } void AtmosphereDriver::create_grids() @@ -1492,15 +1507,25 @@ initialize_constant_field(const FieldIdentifier& fid, void AtmosphereDriver::initialize_atm_procs () { + std::cout << "OG init 1 \n" << std::flush; m_atm_logger->info("[EAMxx] initialize_atm_procs ..."); start_timer("EAMxx::init"); start_timer("EAMxx::initialize_atm_procs"); + std::cout << "OG init 2 \n" << std::flush; // Initialize memory buffer for all atm processes + std::cout << "OG hhhinit 3 \n" << std::flush; m_memory_buffer = std::make_shared(); + std::cout << "OG init 4 \n" << std::flush; + + + m_memory_buffer->request_bytes(m_atm_process_group->requested_buffer_size_in_bytes()); + std::cout << "OG init 5 \n" << std::flush; m_memory_buffer->allocate(); + std::cout << "OG init 6 \n" << std::flush; m_atm_process_group->init_buffers(*m_memory_buffer); + std::cout << "OG init 7 \n" << std::flush; const bool restarted_run = m_case_t0 < m_run_t0; @@ -1509,19 +1534,24 @@ void AtmosphereDriver::initialize_atm_procs () setup_surface_coupling_processes(); } + std::cout << "OG init 8 \n" << std::flush; // Initialize the processes m_atm_process_group->initialize(m_current_ts, restarted_run ? RunType::Restarted : RunType::Initial); + std::cout << "OG init 9 \n" << std::flush; // Create and add energy and mass conservation check to appropriate atm procs setup_column_conservation_checks(); + std::cout << "OG init 10 \n" << std::flush; // If user requests it, we set up NaN checks for all computed fields after each atm proc run if (m_atm_params.sublist("driver_options").get("check_all_computed_fields_for_nans",true)) { m_atm_process_group->add_postcondition_nan_checks(); } + std::cout << "OG init 11 \n" << std::flush; // Add additional column data fields to pre/postcondition checks (if they exist) add_additional_column_data_to_property_checks(); + std::cout << "OG init 12 \n" << std::flush; if (fvphyshack) { // [CGLL ICs in pg2] See related notes in atmosphere_dynamics.cpp. @@ -1530,12 +1560,14 @@ void AtmosphereDriver::initialize_atm_procs () m_field_mgrs.erase(gn); } + std::cout << "OG init 13 \n" << std::flush; m_ad_status |= s_procs_inited; stop_timer("EAMxx::initialize_atm_procs"); stop_timer("EAMxx::init"); m_atm_logger->info("[EAMxx] initialize_atm_procs ... done!"); + std::cout << "OG init 14 \n" << std::flush; report_res_dep_memory_footprint (); } diff --git a/components/eamxx/src/mct_coupling/atm_comp_mct.F90 b/components/eamxx/src/mct_coupling/atm_comp_mct.F90 index 34bbbedcc5c..2471280135f 100644 --- a/components/eamxx/src/mct_coupling/atm_comp_mct.F90 +++ b/components/eamxx/src/mct_coupling/atm_comp_mct.F90 @@ -35,8 +35,8 @@ module atm_comp_mct integer :: mpicom_atm ! mpi communicator integer(IN) :: my_task ! my task in mpi communicator mpicom integer :: inst_index ! number of current instance (ie. 1) - character(len=16) :: inst_name ! fullname of current instance (ie. "lnd_0001") - character(len=16) :: inst_suffix = "" ! char string associated with instance (ie. "_0001" or "") + character(len=16) :: inst_name ! fullname of current instance (ie. "lnd_0001') + character(len=16) :: inst_suffix = "" ! char string associated with instance (ie. "_0001" or "') integer(IN) :: ATM_ID ! mct comp id integer(IN),parameter :: master_task=0 ! task number of master task @@ -97,6 +97,8 @@ subroutine atm_init_mct( EClock, cdata, x2a, a2x, NLFilename ) !------------------------------------------------------------------------------- +print *,'OG a 1' + ! Grab some data from the cdata structure (coming from the coupler) call seq_cdata_setptrs(cdata, & id=ATM_ID, & @@ -104,38 +106,51 @@ subroutine atm_init_mct( EClock, cdata, x2a, a2x, NLFilename ) gsMap=gsmap_atm, & dom=dom_atm, & infodata=infodata) +print *, 'OG a 2' call seq_infodata_getData(infodata, atm_phase=phase, start_type=run_type, & username=username, case_name=caseid, hostname=hostname) - call seq_infodata_PutData(infodata, atm_aero=.true.) +print *, 'OG a 3' + call seq_infodata_PutData(infodata, atm_aero=.true.) +print *, 'OG a 4' call seq_infodata_PutData(infodata, atm_prognostic=.true.) +print *, 'OG a 5' if (phase > 1) RETURN +print *, 'OG a 6' ! Determine instance information inst_name = seq_comm_name(ATM_ID) inst_index = seq_comm_inst(ATM_ID) inst_suffix = seq_comm_suffix(ATM_ID) +print *, 'OG a 7' ! Determine communicator group call mpi_comm_rank(mpicom_atm, my_task, ierr) +print *, 'OG a 8' !---------------------------------------------------------------------------- ! Init atm.log !---------------------------------------------------------------------------- - +print *, 'OG a 9' if (my_task == master_task) then +print *, 'OG a 10' atm_log_unit = shr_file_getUnit() call shr_file_setIO ('atm_modelio.nml'//trim(inst_suffix),atm_log_unit) inquire(unit=atm_log_unit,name=atm_log_fname) endif +print *, 'OG a 11' call mpi_bcast(atm_log_unit,1,MPI_INTEGER,master_task,mpicom_atm,mpi_ierr) +print *, 'OG a 12' if (ierr /= 0) then +print *, 'OG a 13' print *,'[eamxx] ERROR broadcasting atm.log unit' call mpi_abort(mpicom_atm,ierr,mpi_ierr) end if +print *, 'OG a 14' call mpi_bcast(atm_log_fname,256,MPI_CHARACTER,master_task,mpicom_atm,ierr) +print *, 'OG a 15' if (ierr /= 0) then print *,'[eamxx] ERROR broadcasting atm.log file name' call mpi_abort(mpicom_atm,ierr,mpi_ierr) @@ -146,29 +161,40 @@ subroutine atm_init_mct( EClock, cdata, x2a, a2x, NLFilename ) !---------------------------------------------------------------------------- ! Init the AD +print *, 'OG a 16' call seq_timemgr_EClockGetData(EClock, calendar=calendar, & curr_ymd=cur_ymd, curr_tod=cur_tod, & start_ymd=case_start_ymd, start_tod=case_start_tod) +print *, 'OG a 17' call string_f2c(yaml_fname,yaml_fname_c) +print *, 'OG a 18' call string_f2c(calendar,calendar_c) +print *, 'OG a 19' call string_f2c(trim(atm_log_fname),atm_log_fname_c) +print *, 'OG a 20' call scream_create_atm_instance (mpicom_atm, ATM_ID, yaml_fname_c, atm_log_fname_c, & INT(cur_ymd,kind=C_INT), INT(cur_tod,kind=C_INT), & INT(case_start_ymd,kind=C_INT), INT(case_start_tod,kind=C_INT), & calendar_c) +print *, 'OG a 21' ! Init MCT gsMap call atm_Set_gsMap_mct (mpicom_atm, ATM_ID, gsMap_atm) +print *, 'OG a 22' lsize = mct_gsMap_lsize(gsMap_atm, mpicom_atm) +print *, 'OG a 23' ! Init MCT domain structure call atm_domain_mct (lsize, gsMap_atm, dom_atm) +print *, 'OG a 24' ! Init import/export mct attribute vectors call mct_aVect_init(x2a, rList=seq_flds_x2a_fields, lsize=lsize) +print *, 'OG a 25' call mct_aVect_init(a2x, rList=seq_flds_a2x_fields, lsize=lsize) +print *, 'OG a 26' ! Complete AD initialization based on run type if (trim(run_type) == trim(seq_infodata_start_type_start)) then restarted_run = .false. @@ -179,8 +205,10 @@ subroutine atm_init_mct( EClock, cdata, x2a, a2x, NLFilename ) call mpi_abort(mpicom_atm,ierr,mpi_ierr) endif +print *, 'OG a 27' ! Init surface coupling stuff in the AD call scream_set_cpl_indices (x2a, a2x) +print *, 'OG a 28' call scream_setup_surface_coupling (c_loc(import_field_names), c_loc(import_cpl_indices), & c_loc(x2a%rAttr), c_loc(import_vector_components), & @@ -191,11 +219,13 @@ subroutine atm_init_mct( EClock, cdata, x2a, a2x, NLFilename ) c_loc(export_constant_multiple), c_loc(do_export_during_init), & num_cpl_exports, num_scream_exports, export_field_size) +print *, 'OG a 29' call string_f2c(trim(caseid),caseid_c) call string_f2c(trim(username),username_c) call string_f2c(trim(hostname),hostname_c) call scream_init_atm (caseid_c,hostname_c,username_c) +print *, 'OG a 30' end subroutine atm_init_mct !=============================================================================== diff --git a/components/eamxx/src/mct_coupling/scream_cxx_f90_interface.cpp b/components/eamxx/src/mct_coupling/scream_cxx_f90_interface.cpp index 0bdf90eeb71..83bf5ba8741 100644 --- a/components/eamxx/src/mct_coupling/scream_cxx_f90_interface.cpp +++ b/components/eamxx/src/mct_coupling/scream_cxx_f90_interface.cpp @@ -210,19 +210,28 @@ void scream_init_atm (const char* caseid, using namespace scream::control; fpe_guard_wrapper([&](){ + + std::cout << "OG s 1 \n" << std::flush; + // Get the ad, then complete initialization auto& ad = get_ad_nonconst(); + std::cout << "OG s 2 \n" << std::flush; // Set provenance info in the driver (will be added to the output files) ad.set_provenance_data (caseid,hostname,username); + std::cout << "OG s 3 \n" << std::flush; // Init all fields, atm processes, and output streams ad.initialize_fields (); + std::cout << "OG s 4 \n" << std::flush; ad.initialize_atm_procs (); + std::cout << "OG s 5 \n" << std::flush; // Do this before init-ing the output managers, // so the fields are valid if outputing at t=0 ad.reset_accumulated_fields(); + std::cout << "OG s 6 \n" << std::flush; ad.initialize_output_managers (); + std::cout << "OG s 7 \n" << std::flush; }); } diff --git a/components/eamxx/src/share/atm_process/atmosphere_process_group.cpp b/components/eamxx/src/share/atm_process/atmosphere_process_group.cpp index 9d5ff488929..7985c9dc2dc 100644 --- a/components/eamxx/src/share/atm_process/atmosphere_process_group.cpp +++ b/components/eamxx/src/share/atm_process/atmosphere_process_group.cpp @@ -365,7 +365,17 @@ void AtmosphereProcessGroup::add_additional_data_fields_to_property_checks (cons } void AtmosphereProcessGroup::initialize_impl (const RunType run_type) { + + int mmm = 0; + for (auto& atm_proc : m_atm_processes) { + + mmm++; + std::cout << "process is "<< mmm << "\n" << std::flush; + std::cout << "process name is "<< atm_proc->name() << "\n"<< std::flush; + + m_atm_logger->flush(); + atm_proc->initialize(timestamp(),run_type); #ifdef SCREAM_HAS_MEMORY_USAGE long long my_mem_usage = get_mem_usage(MB); diff --git a/components/homme/src/share/cxx/prim_driver.cpp b/components/homme/src/share/cxx/prim_driver.cpp index b0f41a68ce7..b795ece613f 100644 --- a/components/homme/src/share/cxx/prim_driver.cpp +++ b/components/homme/src/share/cxx/prim_driver.cpp @@ -62,7 +62,10 @@ void prim_run_subcycle_c (const Real& dt, int& nstep, int& nm1, int& n0, int& np { GPTLstart("tl-sc prim_run_subcycle_c"); - auto& context = Context::singleton(); + std::cout << "OG--------------- In SUBCYCLE?\n"; + + + auto& context = Context::singleton(); // Get simulation params SimulationParams& params = context.get(); @@ -160,7 +163,10 @@ void prim_run_subcycle_c (const Real& dt, int& nstep, int& nm1, int& n0, int& np //////////////////////////////////////////////////////////////////////// update_q(tl.np1_qdp,tl.np1); } else { // independent_time_steps - prim_step_flexible(dt, compute_diagnostics); + + std::cout << "OG --------------- In FLEXIBLE?\n"; + + prim_step_flexible(dt, compute_diagnostics); } if (compute_diagnostics) { From 094798fda0b3870d34c662d1286fbce83b4fbd0c Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Fri, 9 Feb 2024 19:49:19 +0000 Subject: [PATCH 17/85] point to newest ekat changes --- externals/ekat | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/externals/ekat b/externals/ekat index 3b1a7f9fee7..5b785f02cb2 160000 --- a/externals/ekat +++ b/externals/ekat @@ -1 +1 @@ -Subproject commit 3b1a7f9fee7848006e5aa53ae0fd334701d8f5a7 +Subproject commit 5b785f02cb29f5226aad9b175f86c79a31b32037 From 8daf07470460174c025519a82987aed2ca42c4b0 Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Tue, 13 Feb 2024 23:34:05 +0000 Subject: [PATCH 18/85] add auroracpu machine --- .../machines/cmake_macros/oneapi-ifx.cmake | 4 +- .../cmake_macros/oneapi-ifx_auroracpu.cmake | 19 ++++ cime_config/machines/config_batch.xml | 11 +- cime_config/machines/config_machines.xml | 101 ++++++++++++++++++ .../eamxx/cmake/machine-files/auroracpu.cmake | 29 +++++ 5 files changed, 162 insertions(+), 2 deletions(-) create mode 100644 cime_config/machines/cmake_macros/oneapi-ifx_auroracpu.cmake create mode 100644 components/eamxx/cmake/machine-files/auroracpu.cmake diff --git a/cime_config/machines/cmake_macros/oneapi-ifx.cmake b/cime_config/machines/cmake_macros/oneapi-ifx.cmake index e98a65d32a6..e9a0f838b1f 100644 --- a/cime_config/machines/cmake_macros/oneapi-ifx.cmake +++ b/cime_config/machines/cmake_macros/oneapi-ifx.cmake @@ -23,4 +23,6 @@ set(MPICXX "mpicxx") set(SCC "icx") set(SCXX "icpx") set(SFC "ifx") -set(E3SM_LINK_WITH_FORTRAN "TRUE") + + +#set(E3SM_LINK_WITH_FORTRAN "TRUE") diff --git a/cime_config/machines/cmake_macros/oneapi-ifx_auroracpu.cmake b/cime_config/machines/cmake_macros/oneapi-ifx_auroracpu.cmake new file mode 100644 index 00000000000..bd6ec8ed913 --- /dev/null +++ b/cime_config/machines/cmake_macros/oneapi-ifx_auroracpu.cmake @@ -0,0 +1,19 @@ + +string(APPEND CMAKE_EXE_LINKER_FLAGS " -lmkl_intel_lp64 -lmkl_sequential -lmkl_core") +if (compile_threaded) + string(APPEND CMAKE_EXE_LINKER_FLAGS " -fiopenmp -fopenmp-targets=spir64") +endif() + +string(APPEND KOKKOS_OPTIONS " -DCMAKE_CXX_STANDARD=17 -DKokkos_ENABLE_SERIAL=On -DKokkos_ENABLE_EXPLICIT_INSTANTIATION=Off") + +#set(SCREAM_MPI_ON_DEVICE OFF CACHE STRING "") + + + + + + + + + + diff --git a/cime_config/machines/config_batch.xml b/cime_config/machines/config_batch.xml index 86d6673de60..d1347eaa0df 100644 --- a/cime_config/machines/config_batch.xml +++ b/cime_config/machines/config_batch.xml @@ -575,7 +575,16 @@ workq - + + + /lus/gecko/projects/CSC249ADSE15_CNDA/tools/qsub/throttle + + EarlyAppAccess + workq-route + workq + + + --output=slurm.out diff --git a/cime_config/machines/config_machines.xml b/cime_config/machines/config_machines.xml index 70306fbc7bf..2a1c0da8f2d 100644 --- a/cime_config/machines/config_machines.xml +++ b/cime_config/machines/config_machines.xml @@ -3565,6 +3565,107 @@ + + + + ALCF Aurora, 10624 nodes, 2x52c SPR, 6x2s PVC, 2x512GB DDR5, 2x64GB CPU-HBM, 6x128GB GPU-HBM, Slingshot 11, PBSPro + aurora-uan-.* + LINUX + oneapi-ifx + mpich + CSC249ADSE15_CNDA + /lus/gecko/projects/CSC249ADSE15_CNDA/performance_archive + .* + /lus/gecko/projects/CSC249ADSE15_CNDA/$USER/scratch + /lus/gecko/projects/CSC249ADSE15_CNDA/inputdata + /lus/gecko/projects/CSC249ADSE15_CNDA/inputdata/atm/datm7 + $CIME_OUTPUT_ROOT/archive/$CASE + /lus/gecko/projects/CSC249ADSE15_CNDA/baselines/$COMPILER + /lus/gecko/projects/CSC249ADSE15_CNDA/tools/cprnc/cprnc + 16 + e3sm_developer + 4 + pbspro + e3sm + 208 + 104 + FALSE + + mpiexec + + + -np {{ total_tasks }} --label + -ppn {{ tasks_per_node }} + --cpu-bind $ENV{RANKS_BIND} -envall + -d $ENV{OMP_NUM_THREADS} + $ENV{GPU_TILE_COMPACT} + + + + /lus/gecko/projects/CSC249ADSE15_CNDA/modules/lmod.sh + /soft/sunspot_migrate/soft/packaging/lmod/lmod/init/csh + /soft/sunspot_migrate/soft/packaging/lmod/lmod/init/env_modules_python.py + module + module + /soft/sunspot_migrate/soft/packaging/lmod/lmod/libexec/lmod python + + + /soft/modulefiles + /soft/restricted/CNDA/updates/modulefiles + cray-python/3.9.13.1 + spack-pe-gcc/0.4-rc1 cmake/3.26.4-gcc-testing + + + oneapi/release/2023.12.15.001 + + + cray-pals/1.3.3 + libfabric/1.15.2.0 + + + $CIME_OUTPUT_ROOT/$CASE/run + $CIME_OUTPUT_ROOT/$CASE/bld + + /lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2023.05.15.007 + /lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2023.05.15.007 + /lus/gecko/projects/CSC249ADSE15_CNDA/software/pnetcdf/1.12.3/oneapi.eng.2023.05.15.007 + /opt/cray/pe/gcc-libs:/opt/cray/pe/python/3.9.13.1/lib:/lus/gecko/projects/CSC249ADSE15_CNDA/software/pnetcdf/1.12.3/oneapi.eng.2023.05.15.007/lib:/lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2023.05.15.007/lib:/lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2023.05.15.007/lib:$ENV{LD_LIBRARY_PATH} + /opt/cray/pe/python/3.9.13.1/bin:/lus/gecko/projects/CSC249ADSE15_CNDA/software/pnetcdf/1.12.3/oneapi.eng.2023.05.15.007/bin:/lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2023.05.15.007/bin:/lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2023.05.15.007/bin:$ENV{PATH} + list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203 + + + 1 + + + 0 + DISABLED + 131072 + 20 + 0 + + + + verbose,granularity=thread,balanced + 128M + + + threads + 128M + + + -1 + + + + + + + + + + + + PNL cluster, OS is Linux, batch system is SLURM sooty diff --git a/components/eamxx/cmake/machine-files/auroracpu.cmake b/components/eamxx/cmake/machine-files/auroracpu.cmake new file mode 100644 index 00000000000..839c4c09814 --- /dev/null +++ b/components/eamxx/cmake/machine-files/auroracpu.cmake @@ -0,0 +1,29 @@ +include(${CMAKE_CURRENT_LIST_DIR}/common.cmake) +common_setup() + +include (${EKAT_MACH_FILES_PATH}/kokkos/serial.cmake) +# kokkos sycl is on in the above file +#include (${EKAT_MACH_FILES_PATH}/kokkos/sycl.cmake) +include (${EKAT_MACH_FILES_PATH}/mpi/srun.cmake) + +#AB flags from ekat +# -fsycl -fsycl-unnamed-lambda -sycl-std=2020 -qopenmp-simd -Wsycl-strict -fsycl-device-code-split=per_kernel + +#SET(MPICH_DIR "/soft/restricted/CNDA/updates/mpich/52.2/mpich-ofi-all-icc-default-pmix-gpu-drop52/" CACHE STRING "") + +set(CMAKE_CXX_FLAGS " -\-intel -Xclang -fsycl-allow-virtual-functions -mlong-double-64 -O3 -DNDEBUG" CACHE STRING "" FORCE) +set(CMAKE_Fortran_FLAGS "-fc=ifx -O3 -DNDEBUG -DCPRINTEL -g" CACHE STRING "" FORCE) +set(CMAKE_C_FLAGS "-O3 -DNDEBUG" CACHE STRING "" FORCE) +set(CMAKE_EXE_LINKER_FLAGS " -lifcore -\-intel -Xclang -mlong-double-64 -DNDEBUG -fortlib" CACHE STRING "" FORCE) +#set(CMAKE_EXE_LINKER_FLAGS " -Wl,-\-defsym,main=MAIN_\_ -lifcore -\-intel -Xclang -fsycl-allow-virtual-functions -lsycl -mlong-double-64 -DNDEBUG ${SYCL_LINK_FLAGS} -fortlib -L${MPICH_DIR}/lib" CACHE STRING "" FORCE) + + +set(NETCDF_PATH "$ENV{NETCDF_PATH}") +set(NETCDF_C_PATH "$ENV{NETCDF_PATH}") +#this one is for rrtmgp +set(NetCDF_C_PATH "$ENV{NETCDF_PATH}" CACHE STRING "") +set(NETCDF_FORTRAN_PATH "$ENV{NETCDF_PATH}") +set(PNETCDF_PATH "$ENV{PNETCDF_PATH}") + + + From 1c44b208b230646f7d35fd1ab82e32df80dc4829 Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Tue, 13 Feb 2024 23:34:24 +0000 Subject: [PATCH 19/85] adding iostream --- components/eamxx/src/share/util/scream_utils.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/components/eamxx/src/share/util/scream_utils.hpp b/components/eamxx/src/share/util/scream_utils.hpp index 4dddebf75aa..1989ce07e0b 100644 --- a/components/eamxx/src/share/util/scream_utils.hpp +++ b/components/eamxx/src/share/util/scream_utils.hpp @@ -12,6 +12,8 @@ #include #include +#include + namespace scream { enum MemoryUnits { From ca14fc6271f28bf0b70f844abe2c02898d67ed3b Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Tue, 13 Feb 2024 23:38:48 +0000 Subject: [PATCH 20/85] updated ekat branch --- externals/ekat | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/externals/ekat b/externals/ekat index 5b785f02cb2..d252d191a18 160000 --- a/externals/ekat +++ b/externals/ekat @@ -1 +1 @@ -Subproject commit 5b785f02cb29f5226aad9b175f86c79a31b32037 +Subproject commit d252d191a184aa73cd7666cf986bea94eb3f3688 From 1caf5dc228e0180deccc05372cf9d2db6bcadcbb Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Wed, 21 Feb 2024 19:16:16 +0000 Subject: [PATCH 21/85] bring changes from standalone homme sycl branch --- .../homme/src/share/cxx/ExecSpaceDefs.hpp | 6 + .../homme/src/share/cxx/prim_driver.cpp | 10 +- .../src/share/cxx/utilities/BfbUtils.hpp | 4 + .../theta-l_kokkos/cxx/CaarFunctorImpl.hpp | 162 +++++++++++++++++- .../theta-l_kokkos/cxx/DirkFunctorImpl.hpp | 6 +- .../cxx/HyperviscosityFunctorImpl.cpp | 9 +- .../cxx/HyperviscosityFunctorImpl.hpp | 2 +- .../src/theta-l_kokkos/cxx/LimiterFunctor.hpp | 14 +- .../theta-l_kokkos/cxx/RemapStateProvider.hpp | 70 ++++++-- .../cxx/cxx_f90_interface_theta.cpp | 18 +- 10 files changed, 263 insertions(+), 38 deletions(-) diff --git a/components/homme/src/share/cxx/ExecSpaceDefs.hpp b/components/homme/src/share/cxx/ExecSpaceDefs.hpp index d799af38783..6c0da08d7f0 100644 --- a/components/homme/src/share/cxx/ExecSpaceDefs.hpp +++ b/components/homme/src/share/cxx/ExecSpaceDefs.hpp @@ -65,6 +65,12 @@ using Hommexx_Serial = void; # define HOMMEXX_STATIC static #endif + +// a hack to have a cpu build without rebuilding kokkos +//#define HOMMEXX_SERIAL_SPACE + + + // Selecting the execution space. If no specific request, use Kokkos default // exec space #ifdef HOMMEXX_ENABLE_GPU diff --git a/components/homme/src/share/cxx/prim_driver.cpp b/components/homme/src/share/cxx/prim_driver.cpp index b795ece613f..b0f41a68ce7 100644 --- a/components/homme/src/share/cxx/prim_driver.cpp +++ b/components/homme/src/share/cxx/prim_driver.cpp @@ -62,10 +62,7 @@ void prim_run_subcycle_c (const Real& dt, int& nstep, int& nm1, int& n0, int& np { GPTLstart("tl-sc prim_run_subcycle_c"); - std::cout << "OG--------------- In SUBCYCLE?\n"; - - - auto& context = Context::singleton(); + auto& context = Context::singleton(); // Get simulation params SimulationParams& params = context.get(); @@ -163,10 +160,7 @@ void prim_run_subcycle_c (const Real& dt, int& nstep, int& nm1, int& n0, int& np //////////////////////////////////////////////////////////////////////// update_q(tl.np1_qdp,tl.np1); } else { // independent_time_steps - - std::cout << "OG --------------- In FLEXIBLE?\n"; - - prim_step_flexible(dt, compute_diagnostics); + prim_step_flexible(dt, compute_diagnostics); } if (compute_diagnostics) { diff --git a/components/homme/src/share/cxx/utilities/BfbUtils.hpp b/components/homme/src/share/cxx/utilities/BfbUtils.hpp index 7fb4d042f7f..2d85109e2a2 100644 --- a/components/homme/src/share/cxx/utilities/BfbUtils.hpp +++ b/components/homme/src/share/cxx/utilities/BfbUtils.hpp @@ -64,7 +64,11 @@ KOKKOS_INLINE_FUNCTION ScalarType int_pow (ScalarType val, int k) { constexpr int max_shift = 30; if (k<0) { +#ifdef KOKKOS_ENABLE_SYCL Kokkos::printf ("k = %d\n",k); +#else + printf ("k = %d\n",k); +#endif Kokkos::abort("int_pow implemented only for k>=0.\n"); } diff --git a/components/homme/src/theta-l_kokkos/cxx/CaarFunctorImpl.hpp b/components/homme/src/theta-l_kokkos/cxx/CaarFunctorImpl.hpp index febb7eb0a7f..4a861d5c747 100644 --- a/components/homme/src/theta-l_kokkos/cxx/CaarFunctorImpl.hpp +++ b/components/homme/src/theta-l_kokkos/cxx/CaarFunctorImpl.hpp @@ -41,7 +41,7 @@ struct CaarFunctorImpl { struct Buffers { static constexpr int num_3d_scalar_mid_buf = 10; - static constexpr int num_3d_vector_mid_buf = 5; + static constexpr int num_3d_vector_mid_buf = 6; //<-- for vvdp variable static constexpr int num_3d_scalar_int_buf = 6; static constexpr int num_3d_vector_int_buf = 3; @@ -76,6 +76,9 @@ struct CaarFunctorImpl { ExecViewUnmanaged phi_tens; }; + ExecViewUnmanaged vvdp; + //ExecViewUnmanaged vv_tens; + using deriv_type = ReferenceElement::deriv_type; RKStageData m_data; @@ -106,6 +109,10 @@ struct CaarFunctorImpl { struct TagPreExchange {}; struct TagPostExchange {}; +#ifdef TESTER_NOMPI + struct TagPreExchangeTest {}; +#endif + // Policies #ifndef NDEBUG template @@ -117,6 +124,10 @@ struct CaarFunctorImpl { TeamPolicyType m_policy_pre; +#ifdef TESTER_NOMPI + TeamPolicyType m_policy_pre_test; +#endif + Kokkos::RangePolicy m_policy_post; TeamUtils m_tu; @@ -138,6 +149,9 @@ struct CaarFunctorImpl { , m_deriv(ref_FE.get_deriv()) , m_sphere_ops(sphere_ops) , m_policy_pre (Homme::get_default_team_policy(m_num_elems)) +#ifdef TESTER_NOMPI + , m_policy_pre_test (Homme::get_default_team_policy(m_num_elems)) +#endif , m_policy_post (0,m_num_elems*NP*NP) , m_tu(m_policy_pre) { @@ -155,6 +169,9 @@ struct CaarFunctorImpl { , m_theta_advection_form(params.theta_adv_form) , m_pgrad_correction(params.pgrad_correction) , m_policy_pre (Homme::get_default_team_policy(m_num_elems)) +#ifdef TESTER_NOMPI + , m_policy_pre_test (Homme::get_default_team_policy(m_num_elems)) +#endif , m_policy_post (0,num_elems*NP*NP) , m_tu(m_policy_pre) {} @@ -256,6 +273,10 @@ struct CaarFunctorImpl { m_buffers.vdp = decltype(m_buffers.vdp )(mem,nslots); mem += m_buffers.vdp.size(); + + vvdp = decltype(vvdp )(mem,nslots); + mem += vvdp.size(); + m_buffers.v_tens = decltype(m_buffers.v_tens )(mem,nslots); mem += m_buffers.v_tens.size(); @@ -349,6 +370,10 @@ struct CaarFunctorImpl { int nerr; Kokkos::parallel_reduce("caar loop pre-boundary exchange", m_policy_pre, *this, nerr); Kokkos::fence(); +#ifdef TESTER_NOMPI + Kokkos::parallel_for("caar loop pre-boundary test", m_policy_pre_test, *this); + Kokkos::fence(); +#endif GPTLstop("caar compute"); #ifndef TESTER_NOMPI @@ -373,13 +398,43 @@ struct CaarFunctorImpl { profiling_pause(); } +#ifdef TESTER_NOMPI + KOKKOS_INLINE_FUNCTION + void operator()(const TagPreExchangeTest&, const TeamMember& team) const { + KernelVariables kv(team, m_tu); + test_dp_tendency(kv); + } +#endif + + +#ifndef TESTER_NOMPI +#define K1 +#define K2 +#define K2a +#define K2b +#define K3 +#define K3b +#define K4 +#define K5 +#define K5a +#define K6 +#define K7 + +#else + #define K1 #undef K2 +#undef K2a +#undef K2b #undef K3 +#undef K3b #undef K4 #undef K5 +#undef K5a #undef K6 #undef K7 +#endif + KOKKOS_INLINE_FUNCTION void operator()(const TagPreExchange&, const TeamMember &team, int& nerr) const { @@ -401,7 +456,7 @@ struct CaarFunctorImpl { if ( ! ok) nerr = 1; #endif -#if 0 +#ifdef K2a if (m_rsplit==0 || !m_theta_hydrostatic_mode) { // ============ EPOCH 2.1 =========== // kv.team_barrier(); @@ -409,7 +464,7 @@ struct CaarFunctorImpl { } #endif -#if 0 +#ifdef K2b if (m_rsplit==0) { // ============= EPOCH 2.2 ============ // kv.team_barrier(); @@ -423,7 +478,7 @@ struct CaarFunctorImpl { compute_accumulated_quantities(kv); #endif -#if 0 +#ifdef K3b // Compute update quantities if (!m_theta_hydrostatic_mode) { compute_w_and_phi_tens (kv); @@ -441,7 +496,7 @@ struct CaarFunctorImpl { compute_v_tens (kv); #endif -#if 0 +#ifdef K5a // Update states if (!m_theta_hydrostatic_mode) { compute_w_and_phi_np1(kv); @@ -532,25 +587,122 @@ struct CaarFunctorImpl { const int igp = idx / NP; const int jgp = idx % NP; +//ORIGINAL = subviews + call to div +//do not use vvdp in the !ORIGINAL version +//because it makes caar_ut fail. udp field is probbaly used in other functors, +//reverting to vvdp array will be easy if needed in c1_ut tests. + +#define ORIGINAL +//#undef ORIGINAL + auto u = Homme::subview(m_state.m_v,kv.ie,m_data.n0,0,igp,jgp); auto v = Homme::subview(m_state.m_v,kv.ie,m_data.n0,1,igp,jgp); auto dp3d = Homme::subview(m_state.m_dp3d,kv.ie,m_data.n0,igp,jgp); auto udp = Homme::subview(m_buffers.vdp,kv.team_idx,0,igp,jgp); auto vdp = Homme::subview(m_buffers.vdp,kv.team_idx,1,igp,jgp); + Kokkos::parallel_for(Kokkos::ThreadVectorRange(kv.team, NUM_LEV), [&] (const int& ilev) { udp(ilev) = u(ilev)*dp3d(ilev); vdp(ilev) = v(ilev)*dp3d(ilev); + + //version without subviews + //m_buffers.vdp(kv.team_idx,0,igp,jgp,ilev) = m_state.m_dp3d(kv.ie,m_data.n0,igp,jgp,ilev)* + // m_state.m_v(kv.ie,m_data.n0,0,igp,jgp,ilev); + //m_buffers.vdp(kv.team_idx,1,igp,jgp,ilev) = m_state.m_dp3d(kv.ie,m_data.n0,igp,jgp,ilev)* + // m_state.m_v(kv.ie,m_data.n0,1,igp,jgp,ilev); + + //version with vvdp instead of udp + //vvdp(kv.team_idx,0,igp,jgp,ilev) = m_state.m_dp3d(kv.ie,m_data.n0,igp,jgp,ilev)* + // m_state.m_v(kv.ie,m_data.n0,0,igp,jgp,ilev); + //vvdp(kv.team_idx,1,igp,jgp,ilev) = m_state.m_dp3d(kv.ie,m_data.n0,igp,jgp,ilev)* + // m_state.m_v(kv.ie,m_data.n0,1,igp,jgp,ilev); }); }); kv.team_barrier(); // Compute div(vdp) +#ifdef ORIGINAL m_sphere_ops.divergence_sphere(kv, Homme::subview(m_buffers.vdp, kv.team_idx), Homme::subview(m_buffers.div_vdp, kv.team_idx)); +#else + + const Real aa = 1.0, bb=0.0; + + //example of calling _cm + //m_sphere_ops.divergence_sphere_cm(kv, + // Homme::subview(vvdp, kv.team_idx), + // Homme::subview(m_buffers.div_vdp, kv.team_idx), + // aa, bb, NUM_LEV); + +//inlined version of divergence_sphere_cm + const auto& D_inv = Homme::subview(m_sphere_ops.m_dinv, kv.ie); + const auto& metdet = Homme::subview(m_sphere_ops.m_metdet, kv.ie); + ExecViewUnmanaged gv_buf( + Homme::subview(m_sphere_ops.vector_buf_ml,kv.team_idx, 0).data()); + constexpr int np_squared = NP * NP; + Kokkos::parallel_for(Kokkos::TeamThreadRange(kv.team, np_squared), + [&](const int loop_idx) { + const int igp = loop_idx / NP; + const int jgp = loop_idx % NP; + Kokkos::parallel_for(Kokkos::ThreadVectorRange(kv.team, NUM_LEV), [&] (const int& ilev) { + //const auto& v0 = vvdp(kv.team_idx,0, igp, jgp, ilev); + //const auto& v1 = vvdp(kv.team_idx,1, igp, jgp, ilev); + + const auto& v0 = m_buffers.vdp(kv.team_idx,0, igp, jgp, ilev); + const auto& v1 = m_buffers.vdp(kv.team_idx,1, igp, jgp, ilev); + + gv_buf(0,igp,jgp,ilev) = (D_inv(0,0,igp,jgp) * v0 + D_inv(1,0,igp,jgp) * v1) * metdet(igp,jgp); + gv_buf(1,igp,jgp,ilev) = (D_inv(0,1,igp,jgp) * v0 + D_inv(1,1,igp,jgp) * v1) * metdet(igp,jgp); + }); + }); + kv.team_barrier(); + // j, l, i -> i, j, k + constexpr int div_iters = NP * NP; + Kokkos::parallel_for(Kokkos::TeamThreadRange(kv.team, div_iters), + [&](const int loop_idx) { + const int igp = loop_idx / NP; + const int jgp = loop_idx % NP; + Kokkos::parallel_for(Kokkos::ThreadVectorRange(kv.team, NUM_LEV), [&] (const int& ilev) { + Scalar dudx, dvdy; + for (int kgp = 0; kgp < NP; ++kgp) { + dudx += m_sphere_ops.dvv(jgp, kgp) * gv_buf(0, igp, kgp, ilev); + dvdy += m_sphere_ops.dvv(igp, kgp) * gv_buf(1, kgp, jgp, ilev); + } + combine((dudx + dvdy) * (1.0 / metdet(igp, jgp) * m_sphere_ops.m_scale_factor_inv), + m_buffers.div_vdp(kv.team_idx,igp, jgp, ilev), aa, bb); + }); + }); + kv.team_barrier(); + +#endif } + +#ifdef TESTER_NOMPI +// a kernel only for perf c1 test, to put div(vdp) into dp tendency +// to print it on host for verification + KOKKOS_INLINE_FUNCTION + void test_dp_tendency(KernelVariables &kv) const { + // Compute vdp + Kokkos::parallel_for(Kokkos::TeamThreadRange(kv.team, NP * NP), + [&](const int idx) { + const int igp = idx / NP; + const int jgp = idx % NP; + + auto div_vdp = Homme::subview(m_buffers.div_vdp,kv.team_idx,igp,jgp); + auto dp_np1 = Homme::subview(m_state.m_dp3d,kv.ie,m_data.np1,igp,jgp); + + Kokkos::parallel_for(Kokkos::ThreadVectorRange(kv.team,NUM_LEV), + [&](const int ilev) { + dp_np1(ilev) += div_vdp(ilev); + }); + }); + } +#endif + + KOKKOS_INLINE_FUNCTION bool compute_scan_quantities (KernelVariables &kv) const { bool ok = true; diff --git a/components/homme/src/theta-l_kokkos/cxx/DirkFunctorImpl.hpp b/components/homme/src/theta-l_kokkos/cxx/DirkFunctorImpl.hpp index a5cf2aa0111..44a8af7fb70 100644 --- a/components/homme/src/theta-l_kokkos/cxx/DirkFunctorImpl.hpp +++ b/components/homme/src/theta-l_kokkos/cxx/DirkFunctorImpl.hpp @@ -382,8 +382,12 @@ struct DirkFunctorImpl { kv.team_barrier(); if (it >= maxiter) { +#ifdef KOKKOS_ENABLE_SYCL Kokkos::printf("[DIRK] WARNING! Newton reached max iteration count," - " with deltaerr = %3.17f\n", deltaerr); +#else + printf("[DIRK] WARNING! Newton reached max iteration count," +#endif + " with deltaerr = %3.17f\n", deltaerr); nerr = 1; } diff --git a/components/homme/src/theta-l_kokkos/cxx/HyperviscosityFunctorImpl.cpp b/components/homme/src/theta-l_kokkos/cxx/HyperviscosityFunctorImpl.cpp index 046e6f9956d..24750a570a9 100644 --- a/components/homme/src/theta-l_kokkos/cxx/HyperviscosityFunctorImpl.cpp +++ b/components/homme/src/theta-l_kokkos/cxx/HyperviscosityFunctorImpl.cpp @@ -118,9 +118,14 @@ void HyperviscosityFunctorImpl::init_params(const SimulationParams& params) m_eos.init(params.theta_hydrostatic_mode,m_hvcoord); #ifdef HOMMEXX_BFB_TESTING - m_process_nh_vars = true; + m_process_nh_vars = 1; #else - m_process_nh_vars = !params.theta_hydrostatic_mode; + //m_process_nh_vars = !params.theta_hydrostatic_mode; + if (params.theta_hydrostatic_mode){ + m_process_nh_vars = 0; + }else{ + m_process_nh_vars = 1; + } #endif } diff --git a/components/homme/src/theta-l_kokkos/cxx/HyperviscosityFunctorImpl.hpp b/components/homme/src/theta-l_kokkos/cxx/HyperviscosityFunctorImpl.hpp index a55ecbb365f..993d525422f 100644 --- a/components/homme/src/theta-l_kokkos/cxx/HyperviscosityFunctorImpl.hpp +++ b/components/homme/src/theta-l_kokkos/cxx/HyperviscosityFunctorImpl.hpp @@ -397,7 +397,7 @@ class HyperviscosityFunctorImpl Buffers m_buffers; HybridVCoord m_hvcoord; - bool m_process_nh_vars; + int m_process_nh_vars; // Policies Kokkos::TeamPolicy m_policy_update_states; diff --git a/components/homme/src/theta-l_kokkos/cxx/LimiterFunctor.hpp b/components/homme/src/theta-l_kokkos/cxx/LimiterFunctor.hpp index 79906948638..8513cb39d30 100644 --- a/components/homme/src/theta-l_kokkos/cxx/LimiterFunctor.hpp +++ b/components/homme/src/theta-l_kokkos/cxx/LimiterFunctor.hpp @@ -141,8 +141,12 @@ struct LimiterFunctor { [&](const int k,Real& result) { #ifndef HOMMEXX_BFB_TESTING if(diff_as_real(k) < 0){ +#ifdef KOKKOS_ENABLE_SYCL Kokkos::printf("WARNING:CAAR: dp3d too small. k=%d, dp3d(k)=%f, dp0=%f \n", - k+1,dp_as_real(k),dp0_as_real(k)); +#else + printf("WARNING:CAAR: dp3d too small. k=%d, dp3d(k)=%f, dp0=%f \n", +#endif + k+1,dp_as_real(k),dp0_as_real(k)); } #endif result = result<=diff_as_real(k) ? result : diff_as_real(k); @@ -202,8 +206,12 @@ struct LimiterFunctor { for (int ivec=0; ivec>>>>>>>>>>> m_process_nh_vars " << m_process_nh_vars << " \n"; + std::cout << ">>>>>>>>>>>> m_process_nh_vars_bool " << m_process_nh_vars_bool << " \n"; +if(m_process_nh_vars){ std::cout << "hey m_process_nh_vars is true \n"; -}else -{ +}else{ std::cout << "hey m_process_nh_vars is false \n"; } +if(m_process_nh_vars_bool){ + std::cout << "hey m_process_nh_vars_bool is true \n"; +}else{ + std::cout << "hey m_process_nh_vars_bool is false \n"; +} + +////////////////////////// put abort if bool assignment failed + + +//if(params.theta_hydrostatic_mode && m_process_nh_vars_bool) +//Kokkos::abort("BOOL assignment failed, (params.theta_hydrostatic_mode && m_process_nh_vars_bool) == TRUE.\n"); - if (m_process_nh_vars > 0) { + + + + + if (m_process_nh_vars) { std::cout << "INSIDE w phi assignment m_process_nh_vars is true \n"; @@ -116,8 +156,8 @@ if(m_process_nh_vars){ std::cout << "IN REQUESTED hey m_process_nh_vars is false \n"; } - //if (!m_process_nh_vars) { - if (m_process_nh_vars==0) { + if (!m_process_nh_vars) { + //if (m_process_nh_vars==0) { return 0; } @@ -137,9 +177,7 @@ if(m_process_nh_vars){ std::cout << "IN BUFFERS hey m_process_nh_vars is false \n"; } - - //if (!m_process_nh_vars) { - if (m_process_nh_vars==0) { + if (!m_process_nh_vars) { std::cout << "hey we should be returning from init_buffers \n"; return; @@ -156,19 +194,19 @@ if(m_process_nh_vars){ KOKKOS_INLINE_FUNCTION int num_states_remap() const { //return (m_process_nh_vars ? 5 : 3); - return ( (m_process_nh_vars>0) ? 5 : 3); + return ( (m_process_nh_vars) ? 5 : 3); } KOKKOS_INLINE_FUNCTION int num_states_preprocess() const { //return (m_process_nh_vars ? 2 : 0); - return ( (m_process_nh_vars>0) ? 2 : 0); + return ( (m_process_nh_vars) ? 2 : 0); } KOKKOS_INLINE_FUNCTION int num_states_postprocess() const { //return (m_process_nh_vars ? 2 : 0); - return ((m_process_nh_vars>0) ? 2 : 0); + return ((m_process_nh_vars) ? 2 : 0); } KOKKOS_INLINE_FUNCTION diff --git a/components/homme/src/theta-l_kokkos/cxx/cxx_f90_interface_theta.cpp b/components/homme/src/theta-l_kokkos/cxx/cxx_f90_interface_theta.cpp index ec4e2cbe632..f0f59205bde 100644 --- a/components/homme/src/theta-l_kokkos/cxx/cxx_f90_interface_theta.cpp +++ b/components/homme/src/theta-l_kokkos/cxx/cxx_f90_interface_theta.cpp @@ -49,7 +49,15 @@ void init_simulation_params_c (const int& remap_alg, const int& limiter_option, const double& scale_factor, const double& laplacian_rigid_factor, const int& nsplit, const bool& pgrad_correction, const double& dp3d_thresh, const double& vtheta_thresh, const int& internal_diagnostics_level) { - // Check that the simulation options are supported. This helps us in the future, since we + +if(theta_hydrostatic_mode){ + std::cout << " HEEEEEEEEEEEtheta_hydrostatic_mode =TRUE \n"; +}else +{ + std::cout << " HEEEEEEEEEEEtheta_hydrostatic_mode =FALSE \n"; +} + + // Check that the simulation options are supported. This helps us in the future, since we // are currently 'assuming' some option have/not have certain values. As we support for more // options in the C++ build, we will remove some checks Errors::check_option("init_simulation_params_c","vert_remap_q_alg",remap_alg,{1,3,10}); @@ -115,7 +123,13 @@ void init_simulation_params_c (const int& remap_alg, const int& limiter_option, params.moisture = (moisture ? MoistDry::MOIST : MoistDry::DRY); params.use_cpstar = use_cpstar; params.transport_alg = transport_alg; - params.theta_hydrostatic_mode = theta_hydrostatic_mode; + +if(theta_hydrostatic_mode){ + params.theta_hydrostatic_mode = true; +}else{ + params.theta_hydrostatic_mode = false; +} + //params.theta_hydrostatic_mode = theta_hydrostatic_mode; params.dcmip16_mu = dcmip16_mu; params.nsplit = nsplit; params.scale_factor = scale_factor; From 6bca905574b241eee51a01d8e9b4f478b69e4d1c Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Fri, 23 Feb 2024 03:02:32 +0000 Subject: [PATCH 22/85] partial fix, adds escaped quotation marks --- components/eamxx/src/physics/rrtmgp/CMakeLists.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/components/eamxx/src/physics/rrtmgp/CMakeLists.txt b/components/eamxx/src/physics/rrtmgp/CMakeLists.txt index b5403587ca7..8fbdd1435a8 100644 --- a/components/eamxx/src/physics/rrtmgp/CMakeLists.txt +++ b/components/eamxx/src/physics/rrtmgp/CMakeLists.txt @@ -28,6 +28,12 @@ else () endif() ####### SYCL here + if (SYCL_BUILD) + set(YAKL_ARCH "SYCL") + set(YAKL_SYCL_FLAGS "-DYAKL_ARCH_SYCL -\-intel -fsycl -fsycl-targets=spir64_gen -mlong-double-64 -Xsycl-target-backend \"-device 12.60.7\"") + string (REPLACE " " ";" YAKL_SYCL_FLAGS_LIST ${YAKL_SYCL_FLAGS}) + endif() + set (YAKL_SOURCE_DIR ${SCREAM_BASE_DIR}/../../externals/YAKL) From 9f954b82cb05cee5fe390443852df7cb7fcb50ab Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Thu, 7 Mar 2024 16:38:48 +0000 Subject: [PATCH 23/85] debug statements --- .../atm_process/atmosphere_process_group.cpp | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/components/eamxx/src/share/atm_process/atmosphere_process_group.cpp b/components/eamxx/src/share/atm_process/atmosphere_process_group.cpp index 7985c9dc2dc..cf69e569697 100644 --- a/components/eamxx/src/share/atm_process/atmosphere_process_group.cpp +++ b/components/eamxx/src/share/atm_process/atmosphere_process_group.cpp @@ -8,6 +8,14 @@ #include + +#include "share/scream_session.hpp" +#include "mct_coupling/ScreamContext.hpp" +#include "control/atmosphere_driver.hpp" +#include +#include "physics/share/physics_constants.hpp" + + namespace scream { AtmosphereProcessGroup:: @@ -399,12 +407,42 @@ void AtmosphereProcessGroup::run_sequential (const double dt) { auto ts = timestamp(); ts += dt; + + + auto& c = scream::ScreamContext::singleton(); + auto ad = c.getNonConst(); + const auto gn = "Physics"; + //const auto gn = "Physics GLL"; + const auto& phys_grid = ad.get_grids_manager()->get_grid(gn); + //auto area = phys_grid->get_geometry_data("area").get_view(); + const auto fm = ad.get_field_mgr(gn); + const int ncols = fm->get_grid()->get_num_local_dofs(); + const int nlevs = fm->get_grid()->get_num_vertical_levels(); + + //fm->get_field("T_mid").sync_to_host(); + auto ff = fm->get_field("T_mid").get_view(); + +#if 0 + //const auto vv = ff(1,1); + for (int ii = 0; ii < ncols; ii++) + for (int jj = 0; jj < nlevs; jj++){ + const auto vv = ff(ii,jj); +m_atm_logger->info("OG T field ("+std::to_string(ii)+","+std::to_string(jj)+") = "+std::to_string(vv)); +std::cout << "OG T field (" <name() << " dt="<set_update_time_stamps(do_update); // Run the process atm_proc->run(dt); From f61ce743938684035ccf7c9ae60afc07c0020013 Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Thu, 7 Mar 2024 16:39:45 +0000 Subject: [PATCH 24/85] -g flags --- cime_config/machines/cmake_macros/oneapi-ifxgpu.cmake | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cime_config/machines/cmake_macros/oneapi-ifxgpu.cmake b/cime_config/machines/cmake_macros/oneapi-ifxgpu.cmake index 9d08ca6c630..857f194bf72 100644 --- a/cime_config/machines/cmake_macros/oneapi-ifxgpu.cmake +++ b/cime_config/machines/cmake_macros/oneapi-ifxgpu.cmake @@ -4,9 +4,9 @@ if (compile_threaded) string(APPEND CMAKE_CXX_FLAGS " -qopenmp") string(APPEND CMAKE_EXE_LINKER_FLAGS " -qopenmp") endif() -string(APPEND CMAKE_C_FLAGS_RELEASE " -O2") -string(APPEND CMAKE_Fortran_FLAGS_RELEASE " -O2") -string(APPEND CMAKE_CXX_FLAGS_RELEASE " -O2") +string(APPEND CMAKE_C_FLAGS_RELEASE " -O2 -g") +string(APPEND CMAKE_Fortran_FLAGS_RELEASE " -O2 -g") +string(APPEND CMAKE_CXX_FLAGS_RELEASE " -O2 -g") string(APPEND CMAKE_Fortran_FLAGS_DEBUG " -O0 -g -check uninit -check bounds -check pointers -fpe0 -check noarg_temp_created") string(APPEND CMAKE_C_FLAGS_DEBUG " -O0 -g") string(APPEND CMAKE_CXX_FLAGS_DEBUG " -O0 -g") From 263da75856f61b96052a6ebe585c6cb93c79a5c4 Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Thu, 7 Mar 2024 16:40:03 +0000 Subject: [PATCH 25/85] debug printf --- components/eamxx/src/physics/p3/eamxx_p3_process_interface.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/components/eamxx/src/physics/p3/eamxx_p3_process_interface.hpp b/components/eamxx/src/physics/p3/eamxx_p3_process_interface.hpp index 47d543ded02..3e8cc6642ec 100644 --- a/components/eamxx/src/physics/p3/eamxx_p3_process_interface.hpp +++ b/components/eamxx/src/physics/p3/eamxx_p3_process_interface.hpp @@ -256,6 +256,8 @@ class P3Microphysics : public AtmosphereProcess // Unlike above, these fluxes do not need to be accumulated // since the conservation checks are run after each // Microphysics step. + + Kokkos::printf("OG -- before compute_mass_and_energy_fluxes"); if (compute_mass_and_energy_fluxes) { vapor_flux(icol) = 0.0; water_flux(icol) = precip_liq_surf_flux(icol)+precip_ice_surf_flux(icol); From 706bc26662f866722015898f13f8c2661c82d4ba Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Sun, 17 Mar 2024 22:40:21 +0000 Subject: [PATCH 26/85] modify sunspot config, change yakl files --- .../oneapi-ifxgpu_sunspot-pvc.cmake | 35 +++++++------------ cime_config/machines/config_machines.xml | 14 +++++--- .../eamxx/src/physics/rrtmgp/CMakeLists.txt | 2 +- 3 files changed, 23 insertions(+), 28 deletions(-) diff --git a/cime_config/machines/cmake_macros/oneapi-ifxgpu_sunspot-pvc.cmake b/cime_config/machines/cmake_macros/oneapi-ifxgpu_sunspot-pvc.cmake index d62f94c40fe..2719498f760 100644 --- a/cime_config/machines/cmake_macros/oneapi-ifxgpu_sunspot-pvc.cmake +++ b/cime_config/machines/cmake_macros/oneapi-ifxgpu_sunspot-pvc.cmake @@ -1,30 +1,21 @@ -set(CXX_LINKER "CXX") +string(APPEND CMAKE_EXE_LINKER_FLAGS " -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -fsycl-device-code-split=per_kernel -fsycl-max-parallel-link-jobs=16") +if (compile_threaded) + string(APPEND CMAKE_EXE_LINKER_FLAGS " -fiopenmp -fopenmp-targets=spir64") +endif() -execute_process(COMMAND $ENV{NETCDF_PATH}/bin/nf-config --flibs OUTPUT_VARIABLE SHELL_CMD_OUTPUT_BUILD_INTERNAL_IGNORE0 OUTPUT_STRIP_TRAILING_WHITESPACE) +if (DEBUG) +#undefined reference to `__msan.... +#https://community.intel.com/t5/Intel-Fortran-Compiler/Linking-errors-when-using-memory-sanitizer-in-fortran-project/m-p/1521476 +#When you compile with -check uninit (or -check all) you also need to link with that compiler option. +# string(APPEND CMAKE_EXE_LINKER_FLAGS " -check uninit") +endif() -string(APPEND SLIBS " ${SHELL_CMD_OUTPUT_BUILD_INTERNAL_IGNORE0} -Wl,-rpath -Wl,$ENV{NETCDF_PATH}/lib -lmkl_intel_lp64 -lmkl_sequential -lmkl_core") +string(APPEND KOKKOS_OPTIONS " -DCMAKE_CXX_STANDARD=17 -DKokkos_ENABLE_SERIAL=On -DKokkos_ARCH_INTEL_PVC=On -DKokkos_ENABLE_SYCL=On -DKokkos_ENABLE_EXPLICIT_INSTANTIATION=Off") +string(APPEND SYCL_FLAGS " -\-intel -fsycl -fsycl-targets=spir64_gen -mlong-double-64 -Xsycl-target-backend \"-device 12.60.7\"") -execute_process(COMMAND $ENV{NETCDF_PATH}/bin/nc-config --libs OUTPUT_VARIABLE SHELL_CMD_OUTPUT_BUILD_INTERNAL_IGNORE0 OUTPUT_STRIP_TRAILING_WHITESPACE) +set(SCREAM_MPI_ON_DEVICE OFF CACHE STRING "") -string(APPEND SLIBS " ${SHELL_CMD_OUTPUT_BUILD_INTERNAL_IGNORE0}") -string(APPEND SLIBS " -fiopenmp -fopenmp-targets=spir64") - -set(NETCDF_PATH "$ENV{NETCDF_PATH}") -set(PNETCDF_PATH "$ENV{PNETCDF_PATH}") - -set(USE_SYCL "TRUE") - -string(APPEND KOKKOS_OPTIONS " -DCMAKE_CXX_STANDARD=17 -DKokkos_ENABLE_SERIAL=On -DKokkos_ARCH_INTEL_GEN=On -DKokkos_ENABLE_SYCL=On -DKokkos_ENABLE_EXPLICIT_INSTANTIATION=Off") - -string(APPEND SYCL_FLAGS " -\-intel -Xclang -fsycl-allow-virtual-functions -fsycl -mlong-double-64 -fsycl-device-code-split=per_kernel -fno-sycl-id-queries-fit-in-int -fsycl-unnamed-lambda") - -#string(APPEND SYCL_FLAGS " -\-intel -fsycl") -string(APPEND CXX_LDFLAGS " -Wl,-\-defsym,main=MAIN_\_ -lifcore -\-intel -Xclang -fsycl-allow-virtual-functions -fsycl -lsycl -mlong-double-64 -fsycl-link-huge-device-code -fsycl-device-code-split=per_kernel -fsycl-targets=spir64 -Xsycl-target-backend \"-device 12.60.7\"") - -SET(CMAKE_CXX_COMPILER "mpicxx" CACHE STRING "") -SET(CMAKE_C_COMPILER "mpicc" CACHE STRING "") -SET(CMAKE_FORTRAN_COMPILER "mpifort" CACHE STRING "") diff --git a/cime_config/machines/config_machines.xml b/cime_config/machines/config_machines.xml index 2a1c0da8f2d..c312835a09e 100644 --- a/cime_config/machines/config_machines.xml +++ b/cime_config/machines/config_machines.xml @@ -3182,8 +3182,9 @@ /soft/restricted/CNDA/updates/modulefiles - oneapi/eng-compiler/2023.10.15.002 - mpich/52.2-256/icc-all-pmix-gpu + + oneapi/release/2023.12.15.001 + mpich/52.2-1024/icc-all-pmix-gpu @@ -3199,8 +3200,10 @@ $CIME_OUTPUT_ROOT/$CASE/run $CIME_OUTPUT_ROOT/$CASE/bld - /lus/gila/projects/CSC249ADSE15_CNDA/software/oneAPI.2022.12.30.003/netcdf - /lus/gila/projects/CSC249ADSE15_CNDA/software/oneAPI.2022.12.30.003/pnetcdf + /lus/gila/projects/CSC249ADSE15_CNDA/software/oneAPI.2022.12.30.003/netcdf + /lus/gila/projects/CSC249ADSE15_CNDA/software/oneAPI.2022.12.30.003/netcdf + /lus/gila/projects/CSC249ADSE15_CNDA/software/oneAPI.2022.12.30.003/pnetcdf + list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203 1 @@ -3297,7 +3300,8 @@ $CIME_OUTPUT_ROOT/$CASE/run $CIME_OUTPUT_ROOT/$CASE/bld - /lus/gila/projects/CSC249ADSE15_CNDA/software/oneAPI.2022.12.30.003/netcdf + /lus/gila/projects/CSC249ADSE15_CNDA/software/oneAPI.2022.12.30.003/netcdf + /lus/gila/projects/CSC249ADSE15_CNDA/software/oneAPI.2022.12.30.003/netcdf /lus/gila/projects/CSC249ADSE15_CNDA/software/oneAPI.2022.12.30.003/pnetcdf diff --git a/components/eamxx/src/physics/rrtmgp/CMakeLists.txt b/components/eamxx/src/physics/rrtmgp/CMakeLists.txt index 8fbdd1435a8..a385a1783bc 100644 --- a/components/eamxx/src/physics/rrtmgp/CMakeLists.txt +++ b/components/eamxx/src/physics/rrtmgp/CMakeLists.txt @@ -30,7 +30,7 @@ else () ####### SYCL here if (SYCL_BUILD) set(YAKL_ARCH "SYCL") - set(YAKL_SYCL_FLAGS "-DYAKL_ARCH_SYCL -\-intel -fsycl -fsycl-targets=spir64_gen -mlong-double-64 -Xsycl-target-backend \"-device 12.60.7\"") + set(YAKL_SYCL_FLAGS "-DYAKL_ARCH_SYCL -\-intel -fsycl -fsycl-targets=spir64_gen -mlong-double-64") string (REPLACE " " ";" YAKL_SYCL_FLAGS_LIST ${YAKL_SYCL_FLAGS}) endif() From cc10432d118cfa09fe3f9d53fe0a89acfdbf0532 Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Mon, 22 Apr 2024 21:51:23 +0000 Subject: [PATCH 27/85] updates for sunspot sw stack --- .../machines/cmake_macros/oneapi-ifxgpu.cmake | 6 +-- cime_config/machines/config_machines.xml | 26 ++++++------- .../cmake/machine-files/sunspot-pvc.cmake | 38 ++++++++++++++----- 3 files changed, 42 insertions(+), 28 deletions(-) diff --git a/cime_config/machines/cmake_macros/oneapi-ifxgpu.cmake b/cime_config/machines/cmake_macros/oneapi-ifxgpu.cmake index 857f194bf72..0ee9c4706ed 100644 --- a/cime_config/machines/cmake_macros/oneapi-ifxgpu.cmake +++ b/cime_config/machines/cmake_macros/oneapi-ifxgpu.cmake @@ -5,14 +5,14 @@ if (compile_threaded) string(APPEND CMAKE_EXE_LINKER_FLAGS " -qopenmp") endif() string(APPEND CMAKE_C_FLAGS_RELEASE " -O2 -g") -string(APPEND CMAKE_Fortran_FLAGS_RELEASE " -O2 -g") +string(APPEND CMAKE_Fortran_FLAGS_RELEASE " -O2 -g -fpscomp logicals") string(APPEND CMAKE_CXX_FLAGS_RELEASE " -O2 -g") -string(APPEND CMAKE_Fortran_FLAGS_DEBUG " -O0 -g -check uninit -check bounds -check pointers -fpe0 -check noarg_temp_created") +string(APPEND CMAKE_Fortran_FLAGS_DEBUG " -O0 -g -fpscomp logicals -check uninit -check bounds -check pointers -fpe0 -check noarg_temp_created") string(APPEND CMAKE_C_FLAGS_DEBUG " -O0 -g") string(APPEND CMAKE_CXX_FLAGS_DEBUG " -O0 -g") string(APPEND CMAKE_C_FLAGS " -fp-model precise -std=gnu99") string(APPEND CMAKE_CXX_FLAGS " -fp-model precise") -string(APPEND CMAKE_Fortran_FLAGS " -traceback -convert big_endian -assume byterecl -assume realloc_lhs -fp-model precise") +string(APPEND CMAKE_Fortran_FLAGS " -fpscomp logicals -traceback -convert big_endian -assume byterecl -assume realloc_lhs -fp-model precise") string(APPEND CPPDEFS " -DFORTRANUNDERSCORE -DNO_R16 -DCPRINTEL -DHAVE_SLASHPROC -DHIDE_MPI") string(APPEND CMAKE_Fortran_FORMAT_FIXED_FLAG " -fixed -132") string(APPEND CMAKE_Fortran_FORMAT_FREE_FLAG " -free") diff --git a/cime_config/machines/config_machines.xml b/cime_config/machines/config_machines.xml index c312835a09e..d0a6caa5409 100644 --- a/cime_config/machines/config_machines.xml +++ b/cime_config/machines/config_machines.xml @@ -3169,31 +3169,27 @@ - /soft/packaging/lmod/lmod/init/sh - /soft/packaging/lmod/lmod/init/csh - /soft/packaging/lmod/lmod/init/env_modules_python.py + /usr/share/lmod/lmod/init/sh + /usr/share/lmod/lmod/init/csh + /usr/share/lmod/lmod/init/env_modules_python.py module module - /soft/packaging/lmod/lmod/libexec/lmod python + /usr/share/lmod/lmod/libexec/lmod python - - /soft/modulefiles - spack cmake/3.24.2 python/3.9.13-gcc-11.2.0-76jlbxs - /soft/restricted/CNDA/updates/modulefiles + + + spack-pe-gcc/0.6.1-23.275.2 cmake python/3.10.10 + - - oneapi/release/2023.12.15.001 - mpich/52.2-1024/icc-all-pmix-gpu - - - + oneapi/eng-compiler/2023.12.15.002 + mpich/icc-all-pmix-gpu/52.2 cray-pals - append-deps/default + libfabric/1.15.2.0 diff --git a/components/eamxx/cmake/machine-files/sunspot-pvc.cmake b/components/eamxx/cmake/machine-files/sunspot-pvc.cmake index 874b73e34eb..d7e2d262b01 100644 --- a/components/eamxx/cmake/machine-files/sunspot-pvc.cmake +++ b/components/eamxx/cmake/machine-files/sunspot-pvc.cmake @@ -1,3 +1,13 @@ +cmake_minimum_required(VERSION 3.18) + +#cmake_policy(SET CMP0057 NEW) +#cmake_policy(SET CMP0074 NEW) +#cmake_policy(SET CMP0079 NEW) # Remove once scorpio in a better state + +#set(CMAKE_CXX_STANDARD 17) + +#project(aaa C CXX Fortran) + include(${CMAKE_CURRENT_LIST_DIR}/common.cmake) common_setup() @@ -8,25 +18,33 @@ include (${EKAT_MACH_FILES_PATH}/mpi/srun.cmake) #AB flags from ekat # -fsycl -fsycl-unnamed-lambda -sycl-std=2020 -qopenmp-simd -Wsycl-strict -fsycl-device-code-split=per_kernel -SET(SYCL_COMPILE_FLAGS "-std=c++17 -fsycl -fsycl-device-code-split=per_kernel -fno-sycl-id-queries-fit-in-int -fsycl-unnamed-lambda") -SET(SYCL_LINK_FLAGS "-fsycl -fsycl-link-huge-device-code -fsycl-device-code-split=per_kernel -fsycl-targets=spir64_gen -Xsycl-target-backend \"-device 12.60.7\"") +SET(SYCL_COMPILE_FLAGS "-std=c++17 -fsycl -fsycl-device-code-split=per_kernel -fno-sycl-id-queries-fit-in-int -fsycl-unnamed-lambda" CACHE STRING "") +SET(SYCL_LINK_FLAGS "-fsycl -fsycl-link-huge-device-code -fsycl-device-code-split=per_kernel -fsycl-targets=spir64_gen -Xsycl-target-backend \"-device 12.60.7\"" CACHE STRING "") + + +message("HEY SYCL_COMPILE_FLAGS is ${SYCL_COMPILE_FLAGS}") +message("HEY SYCL_LINK_FLAGS is ${SYCL_LINK_FLAGS}") #SET(MPICH_DIR "/soft/restricted/CNDA/updates/mpich/52.2/mpich-ofi-all-icc-default-pmix-gpu-drop52/" CACHE STRING "") -set(CMAKE_CXX_FLAGS " -\-intel -Xclang -fsycl-allow-virtual-functions -mlong-double-64 -O3 -DNDEBUG ${SYCL_COMPILE_FLAGS}" CACHE STRING "" FORCE) -set(CMAKE_Fortran_FLAGS "-fc=ifx -O3 -DNDEBUG -DCPRINTEL -g" CACHE STRING "" FORCE) -set(CMAKE_C_FLAGS "-O3 -DNDEBUG" CACHE STRING "" FORCE) -set(CMAKE_EXE_LINKER_FLAGS " -lifcore -\-intel -Xclang -fsycl-allow-virtual-functions -lsycl -mlong-double-64 -DNDEBUG ${SYCL_LINK_FLAGS} -fortlib" CACHE STRING "" FORCE) +set(CMAKE_CXX_FLAGS " -\-intel -Xclang -fsycl-allow-virtual-functions -mlong-double-64 -O3 -DNDEBUG -std=c++17 -fsycl -fsycl-device-code-split=per_kernel -fno-sycl-id-queries-fit-in-int -fsycl-unnamed-lambda" CACHE STRING "") +set(CMAKE_Fortran_FLAGS "-fc=ifx -O3 -DNDEBUG -DCPRINTEL -g" CACHE STRING "") +set(CMAKE_C_FLAGS "-O3 -DNDEBUG" CACHE STRING "") +set(CMAKE_EXE_LINKER_FLAGS " -lifcore -\-intel -Xclang -fsycl-allow-virtual-functions -lsycl -mlong-double-64 -DNDEBUG -fsycl -fsycl-link-huge-device-code -fsycl-device-code-split=per_kernel -fsycl-targets=spir64_gen -Xsycl-target-backend \"-device 12.60.7\" -fortlib" CACHE STRING "") #set(CMAKE_EXE_LINKER_FLAGS " -Wl,-\-defsym,main=MAIN_\_ -lifcore -\-intel -Xclang -fsycl-allow-virtual-functions -lsycl -mlong-double-64 -DNDEBUG ${SYCL_LINK_FLAGS} -fortlib -L${MPICH_DIR}/lib" CACHE STRING "" FORCE) -set(NETCDF_PATH "$ENV{NETCDF_PATH}") -set(NETCDF_C_PATH "$ENV{NETCDF_PATH}") +set(NETCDF_PATH "$ENV{NETCDF_PATH}" CACHE STRING "") +set(NETCDF_DIR "$ENV{NETCDF_PATH}" CACHE STRING "") +set(NETCDF_C_PATH "$ENV{NETCDF_PATH}" CACHE STRING "") #this one is for rrtmgp set(NetCDF_C_PATH "$ENV{NETCDF_PATH}" CACHE STRING "") -set(NETCDF_FORTRAN_PATH "$ENV{NETCDF_PATH}") -set(PNETCDF_PATH "$ENV{PNETCDF_PATH}") +set(NETCDF_FORTRAN_PATH "$ENV{NETCDF_PATH}" CACHE STRING "") +set(PNETCDF_PATH "$ENV{PNETCDF_PATH}" CACHE STRING "") + + +set(PNETCDF_DIR "$ENV{PNETCDF_PATH}" CACHE STRING "") From 3457fbef525ce86abdd0b07db02cd2c09d0f6780 Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Mon, 6 May 2024 15:30:48 +0000 Subject: [PATCH 28/85] add a flag to kokkos, redo cmake file for sunspot --- .../oneapi-ifxgpu_sunspot-pvc.cmake | 2 +- .../eamxx/cmake/machine-files/sunspot-pvc.cmake | 17 +++++++---------- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/cime_config/machines/cmake_macros/oneapi-ifxgpu_sunspot-pvc.cmake b/cime_config/machines/cmake_macros/oneapi-ifxgpu_sunspot-pvc.cmake index 2719498f760..91f65665a14 100644 --- a/cime_config/machines/cmake_macros/oneapi-ifxgpu_sunspot-pvc.cmake +++ b/cime_config/machines/cmake_macros/oneapi-ifxgpu_sunspot-pvc.cmake @@ -11,7 +11,7 @@ if (DEBUG) # string(APPEND CMAKE_EXE_LINKER_FLAGS " -check uninit") endif() -string(APPEND KOKKOS_OPTIONS " -DCMAKE_CXX_STANDARD=17 -DKokkos_ENABLE_SERIAL=On -DKokkos_ARCH_INTEL_PVC=On -DKokkos_ENABLE_SYCL=On -DKokkos_ENABLE_EXPLICIT_INSTANTIATION=Off") +string(APPEND KOKKOS_OPTIONS " -DCMAKE_CXX_STANDARD=17 -DKokkos_ENABLE_SERIAL=On -DKokkos_ARCH_INTEL_PVC=On -DKokkos_ENABLE_SYCL=On -DKokkos_ENABLE_EXPLICIT_INSTANTIATION=Off -DCMAKE_CXX_FLAGS='-fsycl-device-code-split=per_kernel'") string(APPEND SYCL_FLAGS " -\-intel -fsycl -fsycl-targets=spir64_gen -mlong-double-64 -Xsycl-target-backend \"-device 12.60.7\"") set(SCREAM_MPI_ON_DEVICE OFF CACHE STRING "") diff --git a/components/eamxx/cmake/machine-files/sunspot-pvc.cmake b/components/eamxx/cmake/machine-files/sunspot-pvc.cmake index d7e2d262b01..ee984e7a586 100644 --- a/components/eamxx/cmake/machine-files/sunspot-pvc.cmake +++ b/components/eamxx/cmake/machine-files/sunspot-pvc.cmake @@ -18,23 +18,20 @@ include (${EKAT_MACH_FILES_PATH}/mpi/srun.cmake) #AB flags from ekat # -fsycl -fsycl-unnamed-lambda -sycl-std=2020 -qopenmp-simd -Wsycl-strict -fsycl-device-code-split=per_kernel -SET(SYCL_COMPILE_FLAGS "-std=c++17 -fsycl -fsycl-device-code-split=per_kernel -fno-sycl-id-queries-fit-in-int -fsycl-unnamed-lambda" CACHE STRING "") -SET(SYCL_LINK_FLAGS "-fsycl -fsycl-link-huge-device-code -fsycl-device-code-split=per_kernel -fsycl-targets=spir64_gen -Xsycl-target-backend \"-device 12.60.7\"" CACHE STRING "") - - -message("HEY SYCL_COMPILE_FLAGS is ${SYCL_COMPILE_FLAGS}") -message("HEY SYCL_LINK_FLAGS is ${SYCL_LINK_FLAGS}") +SET(SYCL_COMPILE_FLAGS "-std=c++17 -fsycl -fsycl-device-code-split=per_kernel -fno-sycl-id-queries-fit-in-int -fsycl-unnamed-lambda") +SET(SYCL_LINK_FLAGS "-fsycl -fsycl-link-huge-device-code -fsycl-device-code-split=per_kernel -fsycl-targets=spir64_gen -Xsycl-target-backend \"-device 12.60.7\"") #SET(MPICH_DIR "/soft/restricted/CNDA/updates/mpich/52.2/mpich-ofi-all-icc-default-pmix-gpu-drop52/" CACHE STRING "") -set(CMAKE_CXX_FLAGS " -\-intel -Xclang -fsycl-allow-virtual-functions -mlong-double-64 -O3 -DNDEBUG -std=c++17 -fsycl -fsycl-device-code-split=per_kernel -fno-sycl-id-queries-fit-in-int -fsycl-unnamed-lambda" CACHE STRING "") -set(CMAKE_Fortran_FLAGS "-fc=ifx -O3 -DNDEBUG -DCPRINTEL -g" CACHE STRING "") -set(CMAKE_C_FLAGS "-O3 -DNDEBUG" CACHE STRING "") -set(CMAKE_EXE_LINKER_FLAGS " -lifcore -\-intel -Xclang -fsycl-allow-virtual-functions -lsycl -mlong-double-64 -DNDEBUG -fsycl -fsycl-link-huge-device-code -fsycl-device-code-split=per_kernel -fsycl-targets=spir64_gen -Xsycl-target-backend \"-device 12.60.7\" -fortlib" CACHE STRING "") +set(CMAKE_CXX_FLAGS " -\-intel -Xclang -fsycl-allow-virtual-functions -mlong-double-64 -O3 -DNDEBUG ${SYCL_COMPILE_FLAGS}" CACHE STRING "" FORCE) +set(CMAKE_Fortran_FLAGS "-fc=ifx -O3 -DNDEBUG -DCPRINTEL -g" CACHE STRING "" FORCE) +set(CMAKE_C_FLAGS "-O3 -DNDEBUG" CACHE STRING "" FORCE) +set(CMAKE_EXE_LINKER_FLAGS " -lifcore -\-intel -Xclang -fsycl-allow-virtual-functions -lsycl -mlong-double-64 -DNDEBUG ${SYCL_LINK_FLAGS} -fortlib" CACHE STRING "" FORCE) #set(CMAKE_EXE_LINKER_FLAGS " -Wl,-\-defsym,main=MAIN_\_ -lifcore -\-intel -Xclang -fsycl-allow-virtual-functions -lsycl -mlong-double-64 -DNDEBUG ${SYCL_LINK_FLAGS} -fortlib -L${MPICH_DIR}/lib" CACHE STRING "" FORCE) + set(NETCDF_PATH "$ENV{NETCDF_PATH}" CACHE STRING "") set(NETCDF_DIR "$ENV{NETCDF_PATH}" CACHE STRING "") set(NETCDF_C_PATH "$ENV{NETCDF_PATH}" CACHE STRING "") From 741b87b2238b8f9092692d81e3ad4fde135e1902 Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Thu, 25 Apr 2024 13:49:16 -0500 Subject: [PATCH 29/85] bool->int changes, but not linking --- .../homme/src/share/cxx/GllFvRemapImpl.cpp | 2 +- .../homme/src/share/cxx/SimulationParams.hpp | 4 +- .../src/theta-l_kokkos/cxx/CamForcing.cpp | 2 +- .../src/theta-l_kokkos/cxx/ForcingFunctor.hpp | 4 +- .../cxx/cxx_f90_interface_theta.cpp | 24 +++++------- .../src/theta-l_kokkos/prim_driver_mod.F90 | 37 ++++++++++++------- .../src/theta-l_kokkos/theta_f2c_mod.F90 | 12 +++--- 7 files changed, 44 insertions(+), 41 deletions(-) diff --git a/components/homme/src/share/cxx/GllFvRemapImpl.cpp b/components/homme/src/share/cxx/GllFvRemapImpl.cpp index 6148f69cfa9..ea1a52f5efd 100644 --- a/components/homme/src/share/cxx/GllFvRemapImpl.cpp +++ b/components/homme/src/share/cxx/GllFvRemapImpl.cpp @@ -142,7 +142,7 @@ ::init_data (const int nf, const int nf_max, const bool theta_hydrostatic_mode, " nf must be > 1.", Errors::err_not_implemented); auto& sp = Context::singleton().get(); - m_data.use_moisture = sp.moisture == MoistDry::MOIST; + m_data.use_moisture = sp.use_moisture; // Only in the unit test gllfvremap_ut does theta_hydrostatic_mode not already // == sp.theta_hydrostatic_mode. m_data.theta_hydrostatic_mode = sp.theta_hydrostatic_mode = theta_hydrostatic_mode; diff --git a/components/homme/src/share/cxx/SimulationParams.hpp b/components/homme/src/share/cxx/SimulationParams.hpp index b435911da2e..4f36962b16c 100644 --- a/components/homme/src/share/cxx/SimulationParams.hpp +++ b/components/homme/src/share/cxx/SimulationParams.hpp @@ -23,7 +23,7 @@ struct SimulationParams void print(std::ostream& out = std::cout); TimeStepType time_step_type; - MoistDry moisture; + bool use_moisture; RemapAlg remap_alg; TestCase test_case; ForcingAlg ftype = ForcingAlg::FORCING_OFF; @@ -77,7 +77,7 @@ inline void SimulationParams::print (std::ostream& out) { out << "\n************** CXX SimulationParams **********************\n\n"; out << " time_step_type: " << etoi(time_step_type) << "\n"; - out << " moisture: " << (moisture==MoistDry::DRY ? "dry" : "moist") << "\n"; + out << " use_moisture: " << (use_moisture ? "moist" : "dry") << "\n"; out << " remap_alg: " << etoi(remap_alg) << "\n"; out << " test case: " << etoi(test_case) << "\n"; out << " ftype: " << etoi(ftype) << "\n"; diff --git a/components/homme/src/theta-l_kokkos/cxx/CamForcing.cpp b/components/homme/src/theta-l_kokkos/cxx/CamForcing.cpp index 02b999db16e..bd7cee3e7c0 100644 --- a/components/homme/src/theta-l_kokkos/cxx/CamForcing.cpp +++ b/components/homme/src/theta-l_kokkos/cxx/CamForcing.cpp @@ -33,7 +33,7 @@ static void apply_cam_forcing_tracers(const Real dt, ForcingFunctor& ff, if ( p.ftype == ForcingAlg::FORCING_2) adjustment = true; #endif - ff.tracers_forcing(dt, tl.n0, tl.n0_qdp, adjustment, p.moisture); + ff.tracers_forcing(dt, tl.n0, tl.n0_qdp, adjustment, p.use_moisture); GPTLstop("ApplyCAMForcing_tracers"); } diff --git a/components/homme/src/theta-l_kokkos/cxx/ForcingFunctor.hpp b/components/homme/src/theta-l_kokkos/cxx/ForcingFunctor.hpp index 80993d1d0f1..f9b106c3640 100644 --- a/components/homme/src/theta-l_kokkos/cxx/ForcingFunctor.hpp +++ b/components/homme/src/theta-l_kokkos/cxx/ForcingFunctor.hpp @@ -237,7 +237,7 @@ class ForcingFunctor }); } - void tracers_forcing (const Real dt, const int np1, const int np1_qdp, const bool adjustment, const MoistDry moisture) { + void tracers_forcing (const Real dt, const int np1, const int np1_qdp, const bool adjustment, const bool use_moisture) { // The Functor needs to be fully setup to use this function assert (is_setup); @@ -246,7 +246,7 @@ class ForcingFunctor m_np1_qdp = np1_qdp; m_adjustment = adjustment; - m_moist = (moisture==MoistDry::MOIST); + m_moist = use_moisture; Kokkos::parallel_for("temperature, NH perturb press, FQps",m_policy_tracers_pre,*this); Kokkos::fence(); diff --git a/components/homme/src/theta-l_kokkos/cxx/cxx_f90_interface_theta.cpp b/components/homme/src/theta-l_kokkos/cxx/cxx_f90_interface_theta.cpp index f0f59205bde..139da761d67 100644 --- a/components/homme/src/theta-l_kokkos/cxx/cxx_f90_interface_theta.cpp +++ b/components/homme/src/theta-l_kokkos/cxx/cxx_f90_interface_theta.cpp @@ -43,10 +43,10 @@ void init_simulation_params_c (const int& remap_alg, const int& limiter_option, const Real& nu, const Real& nu_p, const Real& nu_q, const Real& nu_s, const Real& nu_div, const Real& nu_top, const int& hypervis_order, const int& hypervis_subcycle, const int& hypervis_subcycle_tom, const double& hypervis_scaling, const double& dcmip16_mu, - const int& ftype, const int& theta_adv_form, const bool& prescribed_wind, const bool& moisture, const bool& disable_diagnostics, - const bool& use_cpstar, const int& transport_alg, const bool& theta_hydrostatic_mode, const char** test_case, + const int& ftype, const int& theta_adv_form, const int& prescribed_wind, const int& use_moisture, const int& disable_diagnostics, + const int& use_cpstar, const int& transport_alg, const int& theta_hydrostatic_mode, const char** test_case, const int& dt_remap_factor, const int& dt_tracer_factor, - const double& scale_factor, const double& laplacian_rigid_factor, const int& nsplit, const bool& pgrad_correction, + const double& scale_factor, const double& laplacian_rigid_factor, const int& nsplit, const int& pgrad_correction, const double& dp3d_thresh, const double& vtheta_thresh, const int& internal_diagnostics_level) { @@ -119,22 +119,16 @@ if(theta_hydrostatic_mode){ params.hypervis_subcycle = hypervis_subcycle; params.hypervis_subcycle_tom = hypervis_subcycle_tom; params.hypervis_scaling = hypervis_scaling; - params.disable_diagnostics = disable_diagnostics; - params.moisture = (moisture ? MoistDry::MOIST : MoistDry::DRY); - params.use_cpstar = use_cpstar; + params.disable_diagnostics = (bool)disable_diagnostics; + params.use_moisture = (bool)use_moisture; + params.use_cpstar = (bool)use_cpstar; params.transport_alg = transport_alg; - -if(theta_hydrostatic_mode){ - params.theta_hydrostatic_mode = true; -}else{ - params.theta_hydrostatic_mode = false; -} - //params.theta_hydrostatic_mode = theta_hydrostatic_mode; + params.theta_hydrostatic_mode = (bool)theta_hydrostatic_mode; params.dcmip16_mu = dcmip16_mu; params.nsplit = nsplit; params.scale_factor = scale_factor; params.laplacian_rigid_factor = laplacian_rigid_factor; - params.pgrad_correction = pgrad_correction; + params.pgrad_correction = (bool)pgrad_correction; params.dp3d_thresh = dp3d_thresh; params.vtheta_thresh = vtheta_thresh; params.internal_diagnostics_level = internal_diagnostics_level; @@ -318,7 +312,7 @@ void init_elements_c (const int& num_elems) c.create_ref(e.m_forcing); } -void init_functors_c (const bool& allocate_buffer) +void init_functors_c (const int& allocate_buffer) { auto& c = Context::singleton(); diff --git a/components/homme/src/theta-l_kokkos/prim_driver_mod.F90 b/components/homme/src/theta-l_kokkos/prim_driver_mod.F90 index 96b42314453..a613045e7b7 100644 --- a/components/homme/src/theta-l_kokkos/prim_driver_mod.F90 +++ b/components/homme/src/theta-l_kokkos/prim_driver_mod.F90 @@ -64,7 +64,7 @@ subroutine prim_init2(elem, hybrid, nets, nete, tl, hvcoord) end subroutine prim_init2 subroutine prim_create_c_data_structures (tl, hvcoord, mp) - use iso_c_binding, only : c_loc, c_ptr, c_bool, C_NULL_CHAR + use iso_c_binding, only : c_loc, c_ptr, C_NULL_CHAR use theta_f2c_mod, only : init_reference_element_c, init_simulation_params_c, & init_time_level_c, init_hvcoord_c, init_elements_c use time_mod, only : TimeLevel_t, nsplit @@ -73,7 +73,7 @@ subroutine prim_create_c_data_structures (tl, hvcoord, mp) nu, nu_p, nu_q, nu_s, nu_div, nu_top, vert_remap_q_alg, & hypervis_order, hypervis_subcycle, hypervis_subcycle_tom,& hypervis_scaling, & - ftype, prescribed_wind, moisture, disable_diagnostics, & + ftype, prescribed_wind, use_moisture, disable_diagnostics, & use_cpstar, transport_alg, theta_hydrostatic_mode, & dcmip16_mu, theta_advect_form, test_case, & MAX_STRING_LEN, dt_remap_factor, dt_tracer_factor, & @@ -93,6 +93,8 @@ subroutine prim_create_c_data_structures (tl, hvcoord, mp) type (c_ptr) :: hybrid_am_ptr, hybrid_ai_ptr, hybrid_bm_ptr, hybrid_bi_ptr character(len=MAX_STRING_LEN), target :: test_name + integer :: disable_diagnostics_int, theta_hydrostatic_mode_int, use_moisture_int + ! Initialize the C++ reference element structure (i.e., pseudo-spectral deriv matrix and ref element mass matrix) dvv = deriv1%dvv elem_mp = mp @@ -100,22 +102,28 @@ subroutine prim_create_c_data_structures (tl, hvcoord, mp) ! Fill the simulation params structures in C++ test_name = TRIM(test_case) // C_NULL_CHAR + + if (disable_diagnostics) disable_diagnostics_int=1 + if (.not.disable_diagnostics) disable_diagnostics_int=0 + if (use_moisture) use_moisture_int=1 + if (.not.use_moisture) use_moisture_int=0 + call init_simulation_params_c (vert_remap_q_alg, limiter_option, rsplit, qsplit, tstep_type, & qsize, statefreq, nu, nu_p, nu_q, nu_s, nu_div, nu_top, & hypervis_order, hypervis_subcycle, hypervis_subcycle_tom, & hypervis_scaling, & dcmip16_mu, ftype, theta_advect_form, & - LOGICAL(prescribed_wind==1,c_bool), & - LOGICAL(moisture/="dry",c_bool), & - LOGICAL(disable_diagnostics,c_bool), & - LOGICAL(use_cpstar==1,c_bool), & + prescribed_wind, & + use_moisture_int, & + disable_diagnostics_int, & + use_cpstar, & transport_alg, & - LOGICAL(theta_hydrostatic_mode,c_bool), & + theta_hydrostatic_mode_int, & c_loc(test_name), & dt_remap_factor, dt_tracer_factor, & scale_factor, laplacian_rigid_factor, & nsplit, & - LOGICAL(pgrad_correction==1,c_bool), & + pgrad_correction, & dp3d_thresh, vtheta_thresh, internal_diagnostics_level) ! Initialize time level structure in C++ @@ -343,22 +351,23 @@ subroutine prim_init_elements_views (elem) end subroutine prim_init_elements_views subroutine prim_init_kokkos_functors (allocate_buffer) - use iso_c_binding, only : c_bool use theta_f2c_mod, only : init_functors_c, init_boundary_exchanges_c - ! ! Optional Input ! - logical(kind=c_bool), optional :: allocate_buffer ! Whether functor memory buffer should be allocated internally + logical, intent(in), optional :: allocate_buffer ! Whether functor memory buffer should be allocated internally + + integer :: allocate_buffer_int ! Initialize the C++ functors in the C++ context ! If no argument allocate_buffer is present, ! let Homme internally allocate buffers + allocate_buffer_int=1 if (present(allocate_buffer)) then - call init_functors_c (logical(allocate_buffer,c_bool)) - else - call init_functors_c (logical(.true.,c_bool)) + if (allocate_buffer) allocate_buffer_int=1 + if (.not.allocate_buffer) allocate_buffer_int=0 endif + call init_functors_c (allocate_buffer_int) ! Initialize boundary exchange structure in C++ call init_boundary_exchanges_c () diff --git a/components/homme/src/theta-l_kokkos/theta_f2c_mod.F90 b/components/homme/src/theta-l_kokkos/theta_f2c_mod.F90 index 7a4c0424807..ba39bb03c22 100644 --- a/components/homme/src/theta-l_kokkos/theta_f2c_mod.F90 +++ b/components/homme/src/theta-l_kokkos/theta_f2c_mod.F90 @@ -11,14 +11,14 @@ subroutine init_simulation_params_c (remap_alg, limiter_option, rsplit, qsplit, qsize, state_frequency, nu, nu_p, nu_q, nu_s, nu_div, nu_top, & hypervis_order, hypervis_subcycle, hypervis_subcycle_tom, & hypervis_scaling, & - dcmip16_mu, ftype, theta_adv_form, prescribed_wind, moisture, & + dcmip16_mu, ftype, theta_adv_form, prescribed_wind, use_moisture, & disable_diagnostics, use_cpstar, transport_alg, & theta_hydrostatic_mode, test_case_name, dt_remap_factor, & dt_tracer_factor, scale_factor, laplacian_rigid_factor, & nsplit, pgrad_correction, dp3d_thresh, vtheta_thresh, & internal_diagnostics_level) bind(c) - use iso_c_binding, only: c_int, c_bool, c_double, c_ptr + use iso_c_binding, only: c_int, c_double, c_ptr ! ! Inputs ! @@ -29,8 +29,8 @@ subroutine init_simulation_params_c (remap_alg, limiter_option, rsplit, qsplit, scale_factor, laplacian_rigid_factor, dp3d_thresh, vtheta_thresh integer(kind=c_int), intent(in) :: hypervis_order, hypervis_subcycle, hypervis_subcycle_tom integer(kind=c_int), intent(in) :: ftype, theta_adv_form - logical(kind=c_bool), intent(in) :: prescribed_wind, moisture, disable_diagnostics, use_cpstar - logical(kind=c_bool), intent(in) :: theta_hydrostatic_mode, pgrad_correction + integer(kind=c_int), intent(in) :: prescribed_wind, use_moisture, disable_diagnostics, use_cpstar + integer(kind=c_int), intent(in) :: theta_hydrostatic_mode, pgrad_correction type(c_ptr), intent(in) :: test_case_name end subroutine init_simulation_params_c @@ -138,11 +138,11 @@ end subroutine init_reference_element_c ! Create C++ functors subroutine init_functors_c (allocate_buffer) bind(c) - use iso_c_binding, only: c_bool + use iso_c_binding, only: c_int ! ! Inputs ! - logical(kind=c_bool), intent(in) :: allocate_buffer + integer(kind=c_int), intent(in) :: allocate_buffer end subroutine init_functors_c ! Initialize C++ boundary exchange structures From 0c4da33d48492e59fed4faa515156c254617ceaa Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Fri, 26 Apr 2024 23:04:44 -0500 Subject: [PATCH 30/85] fixing bug, all cxx vs f pass --- .../src/theta-l_kokkos/prim_driver_mod.F90 | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/components/homme/src/theta-l_kokkos/prim_driver_mod.F90 b/components/homme/src/theta-l_kokkos/prim_driver_mod.F90 index a613045e7b7..262ba19f4b7 100644 --- a/components/homme/src/theta-l_kokkos/prim_driver_mod.F90 +++ b/components/homme/src/theta-l_kokkos/prim_driver_mod.F90 @@ -107,6 +107,8 @@ subroutine prim_create_c_data_structures (tl, hvcoord, mp) if (.not.disable_diagnostics) disable_diagnostics_int=0 if (use_moisture) use_moisture_int=1 if (.not.use_moisture) use_moisture_int=0 + if(theta_hydrostatic_mode) theta_hydrostatic_mode_int=1 + if(.not.theta_hydrostatic_mode) theta_hydrostatic_mode_int=0 call init_simulation_params_c (vert_remap_q_alg, limiter_option, rsplit, qsplit, tstep_type, & qsize, statefreq, nu, nu_p, nu_q, nu_s, nu_div, nu_top, & @@ -351,23 +353,22 @@ subroutine prim_init_elements_views (elem) end subroutine prim_init_elements_views subroutine prim_init_kokkos_functors (allocate_buffer) + use iso_c_binding, only : c_int use theta_f2c_mod, only : init_functors_c, init_boundary_exchanges_c ! ! Optional Input ! - logical, intent(in), optional :: allocate_buffer ! Whether functor memory buffer should be allocated internally - - integer :: allocate_buffer_int - + integer, intent(in), optional :: allocate_buffer ! Whether functor memory buffer should be allocated internally + integer(kind=c_int) :: dummy ! Initialize the C++ functors in the C++ context ! If no argument allocate_buffer is present, ! let Homme internally allocate buffers - allocate_buffer_int=1 if (present(allocate_buffer)) then - if (allocate_buffer) allocate_buffer_int=1 - if (.not.allocate_buffer) allocate_buffer_int=0 + call init_functors_c (allocate_buffer) + else + dummy=1; + call init_functors_c (dummy) endif - call init_functors_c (allocate_buffer_int) ! Initialize boundary exchange structure in C++ call init_boundary_exchanges_c () From 2caefc04bfabf08753548294799970c83d62accc Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Thu, 9 May 2024 20:26:36 +0000 Subject: [PATCH 31/85] make ad consistent with bool->int changes --- .../eamxx/src/dynamics/homme/eamxx_homme_process_interface.cpp | 2 +- .../eamxx/src/dynamics/homme/interface/homme_driver_mod.F90 | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/components/eamxx/src/dynamics/homme/eamxx_homme_process_interface.cpp b/components/eamxx/src/dynamics/homme/eamxx_homme_process_interface.cpp index c535829fbfa..31f2363f4bf 100644 --- a/components/eamxx/src/dynamics/homme/eamxx_homme_process_interface.cpp +++ b/components/eamxx/src/dynamics/homme/eamxx_homme_process_interface.cpp @@ -814,7 +814,7 @@ void HommeDynamics::init_homme_views () { std::stringstream msg; msg << "\n************** HOMMEXX SimulationParams **********************\n\n"; msg << " time_step_type: " << Homme::etoi(params.time_step_type) << "\n"; - msg << " moisture: " << (params.moisture==Homme::MoistDry::DRY ? "dry" : "moist") << "\n"; + msg << " moisture: " << (params.use_moisture ? "moist" : "dry") << "\n"; msg << " remap_alg: " << Homme::etoi(params.remap_alg) << "\n"; msg << " test case: " << Homme::etoi(params.test_case) << "\n"; msg << " ftype: " << Homme::etoi(params.ftype) << "\n"; diff --git a/components/eamxx/src/dynamics/homme/interface/homme_driver_mod.F90 b/components/eamxx/src/dynamics/homme/interface/homme_driver_mod.F90 index eefcd65e8d7..3ce903b611d 100644 --- a/components/eamxx/src/dynamics/homme/interface/homme_driver_mod.F90 +++ b/components/eamxx/src/dynamics/homme/interface/homme_driver_mod.F90 @@ -192,7 +192,7 @@ subroutine prim_init_model_f90 () bind(c) elem, hybrid, hvcoord, deriv, tl ! Local variable - logical(kind=c_bool), parameter :: allocate_buffer = .false. + integer, parameter :: allocate_buffer = 0 if (.not. is_data_structures_inited) then call abortmp ("Error! 'prim_init_data_structures_f90' has not been called yet.\n") From 9d9eef22efed35778a91dcc597c2f382ec90f3b6 Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Sat, 11 May 2024 18:43:40 +0000 Subject: [PATCH 32/85] add sunsporspu --- .../cmake_macros/oneapi-ifx_sunspotcpu.cmake | 19 ++++ cime_config/machines/config_batch.xml | 8 ++ cime_config/machines/config_machines.xml | 94 +++++++++++++++++++ .../cmake/machine-files/sunspotcpu.cmake | 33 +++++++ 4 files changed, 154 insertions(+) create mode 100644 cime_config/machines/cmake_macros/oneapi-ifx_sunspotcpu.cmake create mode 100644 components/eamxx/cmake/machine-files/sunspotcpu.cmake diff --git a/cime_config/machines/cmake_macros/oneapi-ifx_sunspotcpu.cmake b/cime_config/machines/cmake_macros/oneapi-ifx_sunspotcpu.cmake new file mode 100644 index 00000000000..bd6ec8ed913 --- /dev/null +++ b/cime_config/machines/cmake_macros/oneapi-ifx_sunspotcpu.cmake @@ -0,0 +1,19 @@ + +string(APPEND CMAKE_EXE_LINKER_FLAGS " -lmkl_intel_lp64 -lmkl_sequential -lmkl_core") +if (compile_threaded) + string(APPEND CMAKE_EXE_LINKER_FLAGS " -fiopenmp -fopenmp-targets=spir64") +endif() + +string(APPEND KOKKOS_OPTIONS " -DCMAKE_CXX_STANDARD=17 -DKokkos_ENABLE_SERIAL=On -DKokkos_ENABLE_EXPLICIT_INSTANTIATION=Off") + +#set(SCREAM_MPI_ON_DEVICE OFF CACHE STRING "") + + + + + + + + + + diff --git a/cime_config/machines/config_batch.xml b/cime_config/machines/config_batch.xml index d1347eaa0df..5adae921564 100644 --- a/cime_config/machines/config_batch.xml +++ b/cime_config/machines/config_batch.xml @@ -551,6 +551,14 @@ + + /lus/gila/projects/CSC249ADSE15_CNDA/tools/qsub/throttle + + workq + debug + + + /lus/gila/projects/CSC249ADSE15_CNDA/tools/qsub/throttle diff --git a/cime_config/machines/config_machines.xml b/cime_config/machines/config_machines.xml index d0a6caa5409..cb43c4a3684 100644 --- a/cime_config/machines/config_machines.xml +++ b/cime_config/machines/config_machines.xml @@ -3228,6 +3228,100 @@ + + ANL Sunspot Test and Development System (TDS), batch system is pbspro + uan-.* + LINUX + oneapi-ifx + mpich + CSC249ADSE15_CNDA + /gila/CSC249ADSE15_CNDA/performance_archive + .* + /lus/gila/projects/CSC249ADSE15_CNDA/$USER/scratch + /lus/gila/projects/CSC249ADSE15_CNDA/inputdata + /lus/gila/projects/CSC249ADSE15_CNDA/inputdata/atm/datm7 + $CIME_OUTPUT_ROOT/archive/$CASE + /lus/gila/projects/CSC249ADSE15_CNDA/baselines/$COMPILER + /lus/gila/projects/CSC249ADSE15_CNDA/tools/cprnc/cprnc + 16 + e3sm_developer + 4 + pbspro + e3sm + 208 + 104 + FALSE + + mpiexec + + + -np {{ total_tasks }} --label + -ppn {{ tasks_per_node }} + --cpu-bind depth -envall + -d $ENV{OMP_NUM_THREADS} + + + + + /usr/share/lmod/lmod/init/sh + /usr/share/lmod/lmod/init/csh + /usr/share/lmod/lmod/init/env_modules_python.py + module + module + /usr/share/lmod/lmod/libexec/lmod python + + + + spack-pe-gcc/0.6.1-23.275.2 cmake python/3.10.10 + + + + oneapi/eng-compiler/2023.12.15.002 + mpich/icc-all-pmix-gpu/52.2 + + + + + cray-pals + + libfabric/1.15.2.0 + + + $CIME_OUTPUT_ROOT/$CASE/run + $CIME_OUTPUT_ROOT/$CASE/bld + + /lus/gila/projects/CSC249ADSE15_CNDA/software/oneAPI.2022.12.30.003/netcdf + /lus/gila/projects/CSC249ADSE15_CNDA/software/oneAPI.2022.12.30.003/netcdf + /lus/gila/projects/CSC249ADSE15_CNDA/software/oneAPI.2022.12.30.003/pnetcdf + list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203 + + + 1 + + + DISABLED + + + + + + 131072 + 20 + 0 + + + verbose,granularity=thread,balanced + 128M + + + -1 + + + + + + + ANL Sunspot Test and Development System (TDS), batch system is pbspro diff --git a/components/eamxx/cmake/machine-files/sunspotcpu.cmake b/components/eamxx/cmake/machine-files/sunspotcpu.cmake new file mode 100644 index 00000000000..02b05de9720 --- /dev/null +++ b/components/eamxx/cmake/machine-files/sunspotcpu.cmake @@ -0,0 +1,33 @@ +include(${CMAKE_CURRENT_LIST_DIR}/common.cmake) +common_setup() + +include (${EKAT_MACH_FILES_PATH}/kokkos/serial.cmake) +# kokkos sycl is on in the above file +#include (${EKAT_MACH_FILES_PATH}/kokkos/sycl.cmake) +include (${EKAT_MACH_FILES_PATH}/mpi/srun.cmake) + +#AB flags from ekat +# -fsycl -fsycl-unnamed-lambda -sycl-std=2020 -qopenmp-simd -Wsycl-strict -fsycl-device-code-split=per_kernel + +#SET(MPICH_DIR "/soft/restricted/CNDA/updates/mpich/52.2/mpich-ofi-all-icc-default-pmix-gpu-drop52/" CACHE STRING "") + +set(CMAKE_CXX_FLAGS " -\-intel -Xclang -fsycl-allow-virtual-functions -mlong-double-64 -O3 -DNDEBUG" CACHE STRING "" FORCE) +set(CMAKE_Fortran_FLAGS "-fc=ifx -O3 -DNDEBUG -DCPRINTEL -g" CACHE STRING "" FORCE) +set(CMAKE_C_FLAGS "-O3 -DNDEBUG" CACHE STRING "" FORCE) +set(CMAKE_EXE_LINKER_FLAGS " -lifcore -\-intel -Xclang -mlong-double-64 -DNDEBUG -fortlib" CACHE STRING "" FORCE) +#set(CMAKE_EXE_LINKER_FLAGS " -Wl,-\-defsym,main=MAIN_\_ -lifcore -\-intel -Xclang -fsycl-allow-virtual-functions -lsycl -mlong-double-64 -DNDEBUG ${SYCL_LINK_FLAGS} -fortlib -L${MPICH_DIR}/lib" CACHE STRING "" FORCE) + + + +set(NETCDF_PATH "$ENV{NETCDF_PATH}" CACHE STRING "") +set(NETCDF_DIR "$ENV{NETCDF_PATH}" CACHE STRING "") +set(NETCDF_C_PATH "$ENV{NETCDF_PATH}" CACHE STRING "") +#this one is for rrtmgp +set(NetCDF_C_PATH "$ENV{NETCDF_PATH}" CACHE STRING "") +set(NETCDF_FORTRAN_PATH "$ENV{NETCDF_PATH}" CACHE STRING "") +set(PNETCDF_PATH "$ENV{PNETCDF_PATH}" CACHE STRING "") + + +set(PNETCDF_DIR "$ENV{PNETCDF_PATH}" CACHE STRING "") + + From 96b62a99b2704359181d1b8364ec76a436f86aa0 Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Sat, 11 May 2024 18:44:47 +0000 Subject: [PATCH 33/85] remove -g for now --- cime_config/machines/cmake_macros/oneapi-ifxgpu.cmake | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/cime_config/machines/cmake_macros/oneapi-ifxgpu.cmake b/cime_config/machines/cmake_macros/oneapi-ifxgpu.cmake index 0ee9c4706ed..c798c53ee8b 100644 --- a/cime_config/machines/cmake_macros/oneapi-ifxgpu.cmake +++ b/cime_config/machines/cmake_macros/oneapi-ifxgpu.cmake @@ -4,9 +4,11 @@ if (compile_threaded) string(APPEND CMAKE_CXX_FLAGS " -qopenmp") string(APPEND CMAKE_EXE_LINKER_FLAGS " -qopenmp") endif() -string(APPEND CMAKE_C_FLAGS_RELEASE " -O2 -g") -string(APPEND CMAKE_Fortran_FLAGS_RELEASE " -O2 -g -fpscomp logicals") -string(APPEND CMAKE_CXX_FLAGS_RELEASE " -O2 -g") + +#adding -g here leads to linker internal errors +string(APPEND CMAKE_C_FLAGS_RELEASE " -O2") +string(APPEND CMAKE_Fortran_FLAGS_RELEASE " -O2 -fpscomp logicals") +string(APPEND CMAKE_CXX_FLAGS_RELEASE " -O2") string(APPEND CMAKE_Fortran_FLAGS_DEBUG " -O0 -g -fpscomp logicals -check uninit -check bounds -check pointers -fpe0 -check noarg_temp_created") string(APPEND CMAKE_C_FLAGS_DEBUG " -O0 -g") string(APPEND CMAKE_CXX_FLAGS_DEBUG " -O0 -g") From 563321577e2bdef2d29cc8d09ac66c5a59c60764 Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Sat, 11 May 2024 18:45:49 +0000 Subject: [PATCH 34/85] switch theta_hy_mode to int for now --- components/homme/src/share/cxx/SimulationParams.hpp | 2 +- .../theta-l_kokkos/cxx/cxx_f90_interface_theta.cpp | 12 +++++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/components/homme/src/share/cxx/SimulationParams.hpp b/components/homme/src/share/cxx/SimulationParams.hpp index 4f36962b16c..923f25129c4 100644 --- a/components/homme/src/share/cxx/SimulationParams.hpp +++ b/components/homme/src/share/cxx/SimulationParams.hpp @@ -42,7 +42,7 @@ struct SimulationParams bool disable_diagnostics; int transport_alg; bool use_cpstar; - bool theta_hydrostatic_mode; // Only for theta model + int theta_hydrostatic_mode; // Only for theta model double dcmip16_mu; // Only for theta model double nu; diff --git a/components/homme/src/theta-l_kokkos/cxx/cxx_f90_interface_theta.cpp b/components/homme/src/theta-l_kokkos/cxx/cxx_f90_interface_theta.cpp index 139da761d67..97588045644 100644 --- a/components/homme/src/theta-l_kokkos/cxx/cxx_f90_interface_theta.cpp +++ b/components/homme/src/theta-l_kokkos/cxx/cxx_f90_interface_theta.cpp @@ -50,6 +50,9 @@ void init_simulation_params_c (const int& remap_alg, const int& limiter_option, const double& dp3d_thresh, const double& vtheta_thresh, const int& internal_diagnostics_level) { + std::cout << "In transfer routine theta_hydrostatic_mode =" << theta_hydrostatic_mode << "\n"; + + if(theta_hydrostatic_mode){ std::cout << " HEEEEEEEEEEEtheta_hydrostatic_mode =TRUE \n"; }else @@ -101,6 +104,13 @@ if(theta_hydrostatic_mode){ params.theta_adv_form = AdvectionForm::NonConservative; } +// if (theta_hydrostatic_mode==0) { +// params.theta_hydrostatic_mode = false; +// } else { +// params.theta_hydrostatic_mode = true; +// } + + params.limiter_option = limiter_option; params.rsplit = rsplit; params.qsplit = qsplit; @@ -123,7 +133,7 @@ if(theta_hydrostatic_mode){ params.use_moisture = (bool)use_moisture; params.use_cpstar = (bool)use_cpstar; params.transport_alg = transport_alg; - params.theta_hydrostatic_mode = (bool)theta_hydrostatic_mode; + params.theta_hydrostatic_mode = theta_hydrostatic_mode; params.dcmip16_mu = dcmip16_mu; params.nsplit = nsplit; params.scale_factor = scale_factor; From d38b8e1e955f94e13424e825c755d826c7dc7022 Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Mon, 13 May 2024 16:55:02 +0000 Subject: [PATCH 35/85] debug statements --- .../eamxx/src/control/atmosphere_driver.cpp | 17 ++++++++++ .../physics/p3/eamxx_p3_process_interface.hpp | 31 +++++++++++++++++-- .../rrtmgp/eamxx_rrtmgp_process_interface.cpp | 13 ++++++++ .../atm_process/atmosphere_process_group.cpp | 26 ++++++++++++++-- 4 files changed, 82 insertions(+), 5 deletions(-) diff --git a/components/eamxx/src/control/atmosphere_driver.cpp b/components/eamxx/src/control/atmosphere_driver.cpp index 8268caf9653..467cb415bb6 100644 --- a/components/eamxx/src/control/atmosphere_driver.cpp +++ b/components/eamxx/src/control/atmosphere_driver.cpp @@ -1611,14 +1611,22 @@ initialize (const ekat::Comm& atm_comm, void AtmosphereDriver::run (const int dt) { start_timer("EAMxx::run"); + std::cout << "IN DRIVER 1 \n"; + + // Make sure the end of the time step is after the current start_time EKAT_REQUIRE_MSG (dt>0, "Error! Input time step must be positive.\n"); + + std::cout << "IN DRIVER 2 \n"; + // Print current timestamp information m_atm_logger->log(ekat::logger::LogLevel::info, "Atmosphere step = " + std::to_string(m_current_ts.get_num_steps()) + "\n" + " model start-of-step time = " + m_current_ts.get_date_string() + " " + m_current_ts.get_time_string() + "\n"); + + std::cout << "IN DRIVER 3 \n"; // Reset accum fields to 0 // Note: at the 1st timestep this is redundant, since we did it at init, // to ensure t=0 INSTANT output was correct. However, it's not a @@ -1626,10 +1634,12 @@ void AtmosphereDriver::run (const int dt) { // nano-opt of removing the call for the 1st timestep. reset_accumulated_fields(); + std::cout << "IN DRIVER 4 \n" << std::flush; // The class AtmosphereProcessGroup will take care of dispatching arguments to // the individual processes, which will be called in the correct order. m_atm_process_group->run(dt); + std::cout << "IN DRIVER 5 \n"<< std::flush; // Some accumulated fields need to be divided by dt at the end of the atm step for (auto fm_it : m_field_mgrs) { const auto& fm = fm_it.second; @@ -1643,15 +1653,22 @@ void AtmosphereDriver::run (const int dt) { } } + std::cout << "IN DRIVER 6 \n"<debug("[EAMxx::run] running output managers..."); for (auto& out_mgr : m_output_managers) { out_mgr.run(m_current_ts); } +#endif + #ifdef SCREAM_HAS_MEMORY_USAGE long long my_mem_usage = get_mem_usage(MB); long long max_mem_usage; diff --git a/components/eamxx/src/physics/p3/eamxx_p3_process_interface.hpp b/components/eamxx/src/physics/p3/eamxx_p3_process_interface.hpp index 3e8cc6642ec..373d2efe7e3 100644 --- a/components/eamxx/src/physics/p3/eamxx_p3_process_interface.hpp +++ b/components/eamxx/src/physics/p3/eamxx_p3_process_interface.hpp @@ -203,8 +203,17 @@ class P3Microphysics : public AtmosphereProcess struct p3_postamble { p3_postamble() = default; // Functor for Kokkos loop to pre-process every run step + + //Kokkos::printf("OG postamble start"); + KOKKOS_INLINE_FUNCTION void operator()(const int icol) const { + +//Kokkos::printf("OG postamble P################3\n"); + +#if 1 +#if 1 + for (int ipack=0;ipack(get_field_out("T_mid"),m_grid,100.0, 500.0,false); @@ -446,6 +449,13 @@ void RRTMGPRadiation::run_impl (const double dt) { using PC = scream::physics::Constants; using CO = scream::ColumnOps; + + std::cout << "RRTMGP IMPL 1 ------------------------ \n"; + std::cout << std::flush ; + + +#if 0 + // get a host copy of lat/lon auto h_lat = m_lat.get_view(); auto h_lon = m_lon.get_view(); @@ -1108,6 +1118,9 @@ void RRTMGPRadiation::run_impl (const double dt) { }); } +#endif + + } // ========================================================================================= diff --git a/components/eamxx/src/share/atm_process/atmosphere_process_group.cpp b/components/eamxx/src/share/atm_process/atmosphere_process_group.cpp index cf69e569697..b05982e4cf4 100644 --- a/components/eamxx/src/share/atm_process/atmosphere_process_group.cpp +++ b/components/eamxx/src/share/atm_process/atmosphere_process_group.cpp @@ -392,6 +392,7 @@ void AtmosphereProcessGroup::initialize_impl (const RunType run_type) { m_atm_logger->debug("[EAMxx::initialize::"+atm_proc->name()+"] memory usage: " + std::to_string(max_mem_usage) + "MB"); #endif } + std::cout << "process GROUP is done\n" << std::flush; } void AtmosphereProcessGroup::run_impl (const double dt) { @@ -400,6 +401,9 @@ void AtmosphereProcessGroup::run_impl (const double dt) { } else { run_parallel(dt); } + + std::cout << "process GROUP RUN is done\n" << std::flush; + } void AtmosphereProcessGroup::run_sequential (const double dt) { @@ -419,11 +423,10 @@ void AtmosphereProcessGroup::run_sequential (const double dt) { const int ncols = fm->get_grid()->get_num_local_dofs(); const int nlevs = fm->get_grid()->get_num_vertical_levels(); - //fm->get_field("T_mid").sync_to_host(); + fm->get_field("T_mid").sync_to_host(); auto ff = fm->get_field("T_mid").get_view(); #if 0 - //const auto vv = ff(1,1); for (int ii = 0; ii < ncols; ii++) for (int jj = 0; jj < nlevs; jj++){ const auto vv = ff(ii,jj); @@ -443,9 +446,28 @@ std::cout << "OG T field (" <name() << " dt="<get_field("T_mid").sync_to_host(); + auto ff = fm->get_field("T_mid").get_view(); + +#if 0 + for (int ii = 0; ii < 5; ii++) + for (int jj = 0; jj < nlevs; jj++){ + const auto vv = ff(ii,jj); +m_atm_logger->info("OG T field ("+std::to_string(ii)+","+std::to_string(jj)+") = "+std::to_string(vv)); +std::cout << "OG T field (" <name() <<"\n"<set_update_time_stamps(do_update); // Run the process atm_proc->run(dt); + +std::cout << "OG proc AFTER RUN " << atm_proc->name() <<"\n"< Date: Wed, 15 May 2024 16:08:04 +0000 Subject: [PATCH 36/85] fixes after merges --- cime_config/machines/config_machines.xml | 82 ++---------------------- 1 file changed, 5 insertions(+), 77 deletions(-) diff --git a/cime_config/machines/config_machines.xml b/cime_config/machines/config_machines.xml index 23d72218b23..ec65b527772 100644 --- a/cime_config/machines/config_machines.xml +++ b/cime_config/machines/config_machines.xml @@ -3454,7 +3454,7 @@ 131072 20 - + verbose,granularity=thread,balanced 128M @@ -3643,7 +3643,7 @@ 20 0 - + verbose,granularity=thread,balanced 128M @@ -3744,7 +3744,7 @@ 131072 20 - + verbose,granularity=thread,balanced 128M @@ -3754,78 +3754,6 @@ - - - - - - - - - - - - -======= - /usr/share/lmod/8.3.1/init/python - /usr/share/lmod/8.3.1/init/sh - /usr/share/lmod/8.3.1/init/csh - /usr/share/lmod/lmod/libexec/lmod python - module - module - - - cmake/3.23.2 - craype-x86-rome - - - PrgEnv-gnu/8.3.3 - - - gcc/12.2.0 gcc/11.2.0 - cudatoolkit-standalone/11.4.4 - - - PrgEnv-nvhpc/8.3.3 - - - cudatoolkit-standalone/11.4.4 - craype-accel-nvidia80 - - - craype-network-ofi - libfabric/1.15.2.0 - cray-libsci/23.02.1.1 - cray-hdf5-parallel/1.12.2.3 - cray-netcdf-hdf5parallel/4.9.0.3 - cray-parallel-netcdf/1.12.3.3 - - - $CIME_OUTPUT_ROOT/$CASE/run - $CIME_OUTPUT_ROOT/$CASE/bld - - $ENV{CRAY_NETCDF_HDF5PARALLEL_PREFIX} - $ENV{CRAY_PARALLEL_NETCDF_PREFIX} - 0 - host - - - - 1 - nvidia80 - /grand/E3SMinput/soft/qsub/set_affinity_gpu_polaris.sh - - - /opt/cray/pe/gcc/11.2.0/snos/lib64/libstdc++.so - - - 128M - spread - threads - - - ->>>>>>> origin/master ANL Sunspot Test and Development System (TDS), batch system is pbspro uan-.* @@ -4132,11 +4060,11 @@ 0 - + verbose,granularity=thread,balanced 128M - + threads 128M From 4a9430ca6880befaa3b5aa77025a9c3e9be094a2 Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Thu, 16 May 2024 15:37:28 +0000 Subject: [PATCH 37/85] cpu build changes after merge --- components/eamxx/CMakeLists.txt | 84 +++++++++---------- .../cmake/machine-files/sunspotcpu.cmake | 1 + 2 files changed, 43 insertions(+), 42 deletions(-) diff --git a/components/eamxx/CMakeLists.txt b/components/eamxx/CMakeLists.txt index 9cd2109c848..3d59c1010a3 100644 --- a/components/eamxx/CMakeLists.txt +++ b/components/eamxx/CMakeLists.txt @@ -549,25 +549,25 @@ if (SCREAM_DOUBLE_PRECISION) endif() endif() -print_var(SCREAM_MACHINE) -print_var(EAMXX_ENABLE_GPU) -print_var(CUDA_BUILD) -print_var(HIP_BUILD) -print_var(SYCL_BUILD) -print_var(SCREAM_DOUBLE_PRECISION) -print_var(SCREAM_MIMIC_GPU) -print_var(SCREAM_FPE) -print_var(SCREAM_NUM_VERTICAL_LEV) -print_var(SCREAM_PACK_SIZE) -print_var(SCREAM_SMALL_PACK_SIZE) -print_var(SCREAM_POSSIBLY_NO_PACK_SIZE) -print_var(SCREAM_LINK_FLAGS) -print_var(SCREAM_FPMODEL) -print_var(SCREAM_LIB_ONLY) -print_var(SCREAM_TPL_LIBRARIES) -print_var(SCREAM_TEST_MAX_THREADS) -print_var(SCREAM_TEST_THREAD_INC) -print_var(SCREAM_TEST_MAX_RANKS) +#print_var(SCREAM_MACHINE) +#print_var(EAMXX_ENABLE_GPU) +#print_var(CUDA_BUILD) +#print_var(HIP_BUILD) +#print_var(SYCL_BUILD) +#print_var(SCREAM_DOUBLE_PRECISION) +#print_var(SCREAM_MIMIC_GPU) +#print_var(SCREAM_FPE) +#print_var(SCREAM_NUM_VERTICAL_LEV) +#print_var(SCREAM_PACK_SIZE) +#print_var(SCREAM_SMALL_PACK_SIZE) +#print_var(SCREAM_POSSIBLY_NO_PACK_SIZE) +#print_var(SCREAM_LINK_FLAGS) +#print_var(SCREAM_FPMODEL) +#print_var(SCREAM_LIB_ONLY) +#print_var(SCREAM_TPL_LIBRARIES) +#print_var(SCREAM_TEST_MAX_THREADS) +#print_var(SCREAM_TEST_THREAD_INC) +#print_var(SCREAM_TEST_MAX_RANKS) # This must be done using add_definitions because it is used to determine # whether to include scream_config.h. @@ -621,29 +621,29 @@ message ("* Summary of EAMxx config settings *") message ("**************************************************") # Shortcut function, to print a variable -function (print_var var) - message ("${var}: ${${var}}") -endfunction () - -print_var(EAMXX_ENABLE_GPU) -print_var(CUDA_BUILD) -print_var(HIP_BUILD) -print_var(SCREAM_MACHINE) -print_var(SCREAM_DYNAMICS_DYCORE) -print_var(SCREAM_DOUBLE_PRECISION) -print_var(SCREAM_MIMIC_GPU) -print_var(SCREAM_FPE) -print_var(SCREAM_NUM_VERTICAL_LEV) -print_var(SCREAM_PACK_SIZE) -print_var(SCREAM_SMALL_PACK_SIZE) -print_var(SCREAM_POSSIBLY_NO_PACK_SIZE) -print_var(SCREAM_LINK_FLAGS) -print_var(SCREAM_FPMODEL) -print_var(SCREAM_LIB_ONLY) -print_var(SCREAM_TPL_LIBRARIES) -print_var(SCREAM_TEST_MAX_THREADS) -print_var(SCREAM_TEST_THREAD_INC) -print_var(SCREAM_TEST_MAX_RANKS) +#function (print_var var) +# message ("${var}: ${${var}}") +#endfunction () + +#print_var(EAMXX_ENABLE_GPU) +#print_var(CUDA_BUILD) +#print_var(HIP_BUILD) +#print_var(SCREAM_MACHINE) +#print_var(SCREAM_DYNAMICS_DYCORE) +#print_var(SCREAM_DOUBLE_PRECISION) +#print_var(SCREAM_MIMIC_GPU) +#print_var(SCREAM_FPE) +#print_var(SCREAM_NUM_VERTICAL_LEV) +#print_var(SCREAM_PACK_SIZE) +#print_var(SCREAM_SMALL_PACK_SIZE) +#print_var(SCREAM_POSSIBLY_NO_PACK_SIZE) +#print_var(SCREAM_LINK_FLAGS) +#print_var(SCREAM_FPMODEL) +#print_var(SCREAM_LIB_ONLY) +#print_var(SCREAM_TPL_LIBRARIES) +#print_var(SCREAM_TEST_MAX_THREADS) +#print_var(SCREAM_TEST_THREAD_INC) +#print_var(SCREAM_TEST_MAX_RANKS) message ("**************************************************") diff --git a/components/eamxx/cmake/machine-files/sunspotcpu.cmake b/components/eamxx/cmake/machine-files/sunspotcpu.cmake index 02b05de9720..7b186e58c42 100644 --- a/components/eamxx/cmake/machine-files/sunspotcpu.cmake +++ b/components/eamxx/cmake/machine-files/sunspotcpu.cmake @@ -22,6 +22,7 @@ set(CMAKE_EXE_LINKER_FLAGS " -lifcore -\-intel -Xclang -mlong-double-64 -DNDEBUG set(NETCDF_PATH "$ENV{NETCDF_PATH}" CACHE STRING "") set(NETCDF_DIR "$ENV{NETCDF_PATH}" CACHE STRING "") set(NETCDF_C_PATH "$ENV{NETCDF_PATH}" CACHE STRING "") +set(NETCDF_C "$ENV{NETCDF_PATH}" CACHE STRING "") #this one is for rrtmgp set(NetCDF_C_PATH "$ENV{NETCDF_PATH}" CACHE STRING "") set(NETCDF_FORTRAN_PATH "$ENV{NETCDF_PATH}" CACHE STRING "") From 652575cfc6edafd7c87b84596b6247b35bde1111 Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Sat, 18 May 2024 23:43:53 +0000 Subject: [PATCH 38/85] fix for use_moisture --- components/eamxx/src/dynamics/homme/eamxx_homme_iop.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/eamxx/src/dynamics/homme/eamxx_homme_iop.cpp b/components/eamxx/src/dynamics/homme/eamxx_homme_iop.cpp index 9e04b4a0092..fe9f65bad5e 100644 --- a/components/eamxx/src/dynamics/homme/eamxx_homme_iop.cpp +++ b/components/eamxx/src/dynamics/homme/eamxx_homme_iop.cpp @@ -277,7 +277,7 @@ apply_iop_forcing(const Real dt) ElementOps elem_ops; elem_ops.init(hvcoord); - const bool use_moisture = (params.moisture == Homme::MoistDry::MOIST); + const bool use_moisture = params.use_moisture; // Load data from IOP files, if necessary m_iop->read_iop_file_data(timestamp()); From ee9fd25afb45409403e60bef578438fb3ff52fac Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Sat, 18 May 2024 23:45:29 +0000 Subject: [PATCH 39/85] cmake changes for cpu and sunspotcpu and debug prints --- .../machines/cmake_macros/oneapi-ifx.cmake | 12 +++++----- .../cmake/machine-files/sunspotcpu.cmake | 22 +++++++++---------- .../atm_process/atmosphere_process_group.cpp | 13 +++++------ 3 files changed, 23 insertions(+), 24 deletions(-) diff --git a/cime_config/machines/cmake_macros/oneapi-ifx.cmake b/cime_config/machines/cmake_macros/oneapi-ifx.cmake index e9a0f838b1f..5782a126eca 100644 --- a/cime_config/machines/cmake_macros/oneapi-ifx.cmake +++ b/cime_config/machines/cmake_macros/oneapi-ifx.cmake @@ -4,15 +4,15 @@ if (compile_threaded) string(APPEND CMAKE_CXX_FLAGS " -qopenmp") string(APPEND CMAKE_EXE_LINKER_FLAGS " -qopenmp") endif() -string(APPEND CMAKE_C_FLAGS_RELEASE " -O2") -string(APPEND CMAKE_Fortran_FLAGS_RELEASE " -O2") -string(APPEND CMAKE_CXX_FLAGS_RELEASE " -O2") +string(APPEND CMAKE_C_FLAGS_RELEASE " -O2 -gline-tables-only -g") +string(APPEND CMAKE_Fortran_FLAGS_RELEASE " -O2 -gline-tables-only -g") +string(APPEND CMAKE_CXX_FLAGS_RELEASE " -O2 -gline-tables-only -g") string(APPEND CMAKE_Fortran_FLAGS_DEBUG " -O0 -g -check uninit -check bounds -check pointers -fpe0 -check noarg_temp_created") string(APPEND CMAKE_C_FLAGS_DEBUG " -O0 -g") string(APPEND CMAKE_CXX_FLAGS_DEBUG " -O0 -g") -string(APPEND CMAKE_C_FLAGS " -fp-model precise -std=gnu99") -string(APPEND CMAKE_CXX_FLAGS " -fp-model precise") -string(APPEND CMAKE_Fortran_FLAGS " -traceback -convert big_endian -assume byterecl -assume realloc_lhs -fp-model precise") +string(APPEND CMAKE_C_FLAGS " -fp-model precise -std=gnu99 -gline-tables-only -g") +string(APPEND CMAKE_CXX_FLAGS " -fp-model precise -gline-tables-only -g") +string(APPEND CMAKE_Fortran_FLAGS " -traceback -convert big_endian -assume byterecl -assume realloc_lhs -fp-model precise -gline-tables-only -g") string(APPEND CPPDEFS " -DFORTRANUNDERSCORE -DNO_R16 -DCPRINTEL -DHAVE_SLASHPROC -DHIDE_MPI") string(APPEND CMAKE_Fortran_FORMAT_FIXED_FLAG " -fixed -132") string(APPEND CMAKE_Fortran_FORMAT_FREE_FLAG " -free") diff --git a/components/eamxx/cmake/machine-files/sunspotcpu.cmake b/components/eamxx/cmake/machine-files/sunspotcpu.cmake index 7b186e58c42..ff7773daae1 100644 --- a/components/eamxx/cmake/machine-files/sunspotcpu.cmake +++ b/components/eamxx/cmake/machine-files/sunspotcpu.cmake @@ -11,24 +11,24 @@ include (${EKAT_MACH_FILES_PATH}/mpi/srun.cmake) #SET(MPICH_DIR "/soft/restricted/CNDA/updates/mpich/52.2/mpich-ofi-all-icc-default-pmix-gpu-drop52/" CACHE STRING "") -set(CMAKE_CXX_FLAGS " -\-intel -Xclang -fsycl-allow-virtual-functions -mlong-double-64 -O3 -DNDEBUG" CACHE STRING "" FORCE) -set(CMAKE_Fortran_FLAGS "-fc=ifx -O3 -DNDEBUG -DCPRINTEL -g" CACHE STRING "" FORCE) -set(CMAKE_C_FLAGS "-O3 -DNDEBUG" CACHE STRING "" FORCE) +set(CMAKE_CXX_FLAGS " -\-intel -Xclang -fsycl-allow-virtual-functions -mlong-double-64 -O3 -DNDEBUG -gline-tables-only -g" CACHE STRING "" FORCE) +set(CMAKE_Fortran_FLAGS "-fc=ifx -O3 -DNDEBUG -DCPRINTEL -g -gline-tables-only" CACHE STRING "" FORCE) +set(CMAKE_C_FLAGS "-O3 -DNDEBUG -gline-tables-only -g" CACHE STRING "" FORCE) set(CMAKE_EXE_LINKER_FLAGS " -lifcore -\-intel -Xclang -mlong-double-64 -DNDEBUG -fortlib" CACHE STRING "" FORCE) #set(CMAKE_EXE_LINKER_FLAGS " -Wl,-\-defsym,main=MAIN_\_ -lifcore -\-intel -Xclang -fsycl-allow-virtual-functions -lsycl -mlong-double-64 -DNDEBUG ${SYCL_LINK_FLAGS} -fortlib -L${MPICH_DIR}/lib" CACHE STRING "" FORCE) -set(NETCDF_PATH "$ENV{NETCDF_PATH}" CACHE STRING "") -set(NETCDF_DIR "$ENV{NETCDF_PATH}" CACHE STRING "") -set(NETCDF_C_PATH "$ENV{NETCDF_PATH}" CACHE STRING "") -set(NETCDF_C "$ENV{NETCDF_PATH}" CACHE STRING "") +set(NETCDF_PATH "/lus/gila/projects/CSC249ADSE15_CNDA/software/oneAPI.2022.12.30.003/netcdf" CACHE STRING "") +set(NETCDF_DIR "/lus/gila/projects/CSC249ADSE15_CNDA/software/oneAPI.2022.12.30.003/netcdf" CACHE STRING "") +set(NETCDF_C_PATH "/lus/gila/projects/CSC249ADSE15_CNDA/software/oneAPI.2022.12.30.003/netcdf" CACHE STRING "") +set(NETCDF_C "/lus/gila/projects/CSC249ADSE15_CNDA/software/oneAPI.2022.12.30.003/netcdf" CACHE STRING "") #this one is for rrtmgp -set(NetCDF_C_PATH "$ENV{NETCDF_PATH}" CACHE STRING "") -set(NETCDF_FORTRAN_PATH "$ENV{NETCDF_PATH}" CACHE STRING "") -set(PNETCDF_PATH "$ENV{PNETCDF_PATH}" CACHE STRING "") +set(NetCDF_C_PATH "/lus/gila/projects/CSC249ADSE15_CNDA/software/oneAPI.2022.12.30.003/netcdf" CACHE STRING "") +set(NETCDF_FORTRAN_PATH "/lus/gila/projects/CSC249ADSE15_CNDA/software/oneAPI.2022.12.30.003/netcdf" CACHE STRING "") +set(PNETCDF_PATH "/lus/gila/projects/CSC249ADSE15_CNDA/software/oneAPI.2022.12.30.003/pnetcdf" CACHE STRING "") -set(PNETCDF_DIR "$ENV{PNETCDF_PATH}" CACHE STRING "") +set(PNETCDF_DIR "/lus/gila/projects/CSC249ADSE15_CNDA/software/oneAPI.2022.12.30.003/pnetcdf" CACHE STRING "") diff --git a/components/eamxx/src/share/atm_process/atmosphere_process_group.cpp b/components/eamxx/src/share/atm_process/atmosphere_process_group.cpp index b05982e4cf4..d13f166d6b8 100644 --- a/components/eamxx/src/share/atm_process/atmosphere_process_group.cpp +++ b/components/eamxx/src/share/atm_process/atmosphere_process_group.cpp @@ -401,9 +401,7 @@ void AtmosphereProcessGroup::run_impl (const double dt) { } else { run_parallel(dt); } - - std::cout << "process GROUP RUN is done\n" << std::flush; - + std::cout << "process GROUP RUN is done\n" << std::flush; } void AtmosphereProcessGroup::run_sequential (const double dt) { @@ -411,9 +409,7 @@ void AtmosphereProcessGroup::run_sequential (const double dt) { auto ts = timestamp(); ts += dt; - - - auto& c = scream::ScreamContext::singleton(); + auto& c = scream::ScreamContext::singleton(); auto ad = c.getNonConst(); const auto gn = "Physics"; //const auto gn = "Physics GLL"; @@ -436,7 +432,6 @@ std::cout << "OG T field (" <name() <<"\n"<debug("[EAMxx::run_sequential::"+atm_proc->name()+"] memory usage: " + std::to_string(max_mem_usage) + "MB"); #endif + +std::cout << "OG AFTER mem usage " << atm_proc->name() <<"\n"< Date: Mon, 20 May 2024 17:52:40 +0000 Subject: [PATCH 40/85] fix LD path for sunspot --- cime_config/machines/config_machines.xml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cime_config/machines/config_machines.xml b/cime_config/machines/config_machines.xml index ec65b527772..4f3910c6fb2 100644 --- a/cime_config/machines/config_machines.xml +++ b/cime_config/machines/config_machines.xml @@ -3627,7 +3627,9 @@ /lus/gila/projects/CSC249ADSE15_CNDA/software/oneAPI.2022.12.30.003/netcdf /lus/gila/projects/CSC249ADSE15_CNDA/software/oneAPI.2022.12.30.003/netcdf /lus/gila/projects/CSC249ADSE15_CNDA/software/oneAPI.2022.12.30.003/pnetcdf - list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203 + /lus/gila/projects/CSC249ADSE15_CNDA/software/oneAPI.2022.12.30.003/netcdf/lib:$ENV{LD_LIBRARY_PATH} + /lus/gila/projects/CSC249ADSE15_CNDA/software/oneAPI.2022.12.30.003/netcdf/bin:$ENV{PATH} + list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203 1 From 891700f8779c31a44f26fbd862ce11d7840188ac Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Wed, 22 May 2024 16:43:18 +0000 Subject: [PATCH 41/85] update aurora builds, cpu built, gpu wip --- cime_config/machines/config_machines.xml | 10 +++++----- .../eamxx/cmake/machine-files/aurora.cmake | 14 +++++++------ .../eamxx/cmake/machine-files/auroracpu.cmake | 20 ++++++++++++++----- 3 files changed, 28 insertions(+), 16 deletions(-) diff --git a/cime_config/machines/config_machines.xml b/cime_config/machines/config_machines.xml index 4f3910c6fb2..1d99b26bd10 100644 --- a/cime_config/machines/config_machines.xml +++ b/cime_config/machines/config_machines.xml @@ -3920,9 +3920,9 @@ /soft/modulefiles - /soft/restricted/CNDA/updates/modulefiles - cray-python/3.9.13.1 - spack-pe-gcc/0.4-rc1 cmake/3.26.4-gcc-testing + /soft/restricted/CNDA/updates/modulefiles + spack-pe-gcc/0.6.1-23.275.2 cmake + python/3.10.10 oneapi/release/2023.12.15.001 @@ -4030,8 +4030,8 @@ /soft/modulefiles /soft/restricted/CNDA/updates/modulefiles - cray-python/3.9.13.1 - spack-pe-gcc/0.4-rc1 cmake/3.26.4-gcc-testing + spack-pe-gcc/0.6.1-23.275.2 cmake + python/3.10.10 oneapi/release/2023.12.15.001 diff --git a/components/eamxx/cmake/machine-files/aurora.cmake b/components/eamxx/cmake/machine-files/aurora.cmake index 874b73e34eb..cdebb0500a6 100644 --- a/components/eamxx/cmake/machine-files/aurora.cmake +++ b/components/eamxx/cmake/machine-files/aurora.cmake @@ -21,12 +21,14 @@ set(CMAKE_EXE_LINKER_FLAGS " -lifcore -\-intel -Xclang -fsycl-allow-virtual-func -set(NETCDF_PATH "$ENV{NETCDF_PATH}") -set(NETCDF_C_PATH "$ENV{NETCDF_PATH}") +#this is needed for cime builds! +set(NETCDF_PATH "/lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2023.05.15.007") +set(NETCDF_DIR "/lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2023.05.15.007") +set(NETCDF_C_PATH "/lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2023.05.15.007") +set(NETCDF_C "/lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2023.05.15.007") #this one is for rrtmgp -set(NetCDF_C_PATH "$ENV{NETCDF_PATH}" CACHE STRING "") -set(NETCDF_FORTRAN_PATH "$ENV{NETCDF_PATH}") -set(PNETCDF_PATH "$ENV{PNETCDF_PATH}") - +set(NetCDF_C_PATH "/lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2023.05.15.007" CACHE STRING "") +set(NETCDF_FORTRAN_PATH "/lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2023.05.15.007") +set(PNETCDF_PATH "/lus/gecko/projects/CSC249ADSE15_CNDA/software/pnetcdf/1.12.3/oneapi.eng.2023.05.15.007") diff --git a/components/eamxx/cmake/machine-files/auroracpu.cmake b/components/eamxx/cmake/machine-files/auroracpu.cmake index 839c4c09814..1d8f246f63f 100644 --- a/components/eamxx/cmake/machine-files/auroracpu.cmake +++ b/components/eamxx/cmake/machine-files/auroracpu.cmake @@ -18,12 +18,22 @@ set(CMAKE_EXE_LINKER_FLAGS " -lifcore -\-intel -Xclang -mlong-double-64 -DNDEBUG #set(CMAKE_EXE_LINKER_FLAGS " -Wl,-\-defsym,main=MAIN_\_ -lifcore -\-intel -Xclang -fsycl-allow-virtual-functions -lsycl -mlong-double-64 -DNDEBUG ${SYCL_LINK_FLAGS} -fortlib -L${MPICH_DIR}/lib" CACHE STRING "" FORCE) -set(NETCDF_PATH "$ENV{NETCDF_PATH}") -set(NETCDF_C_PATH "$ENV{NETCDF_PATH}") + +# +# /lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2023.05.15.007 +# /lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2023.05.15.007 +# /lus/gecko/projects/CSC249ADSE15_CNDA/software/pnetcdf/1.12.3/oneapi.eng.2023.05.15.007 + + +#this is needed for cime builds! +set(NETCDF_PATH "/lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2023.05.15.007") +set(NETCDF_DIR "/lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2023.05.15.007") +set(NETCDF_C_PATH "/lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2023.05.15.007") +set(NETCDF_C "/lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2023.05.15.007") #this one is for rrtmgp -set(NetCDF_C_PATH "$ENV{NETCDF_PATH}" CACHE STRING "") -set(NETCDF_FORTRAN_PATH "$ENV{NETCDF_PATH}") -set(PNETCDF_PATH "$ENV{PNETCDF_PATH}") +set(NetCDF_C_PATH "/lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2023.05.15.007" CACHE STRING "") +set(NETCDF_FORTRAN_PATH "/lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2023.05.15.007") +set(PNETCDF_PATH "/lus/gecko/projects/CSC249ADSE15_CNDA/software/pnetcdf/1.12.3/oneapi.eng.2023.05.15.007") From 64a5b920ff75a6c724499878af64e28d4aeac06f Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Mon, 17 Jun 2024 21:33:42 +0000 Subject: [PATCH 42/85] committing wip clone that ran on cpu and gpu --- .../machines/cmake_macros/oneapi-ifxgpu.cmake | 6 +++--- cime_config/machines/config_machines.xml | 15 +++++++++------ .../cmake/machine-files/sunspot-pvc.cmake | 18 ++++++++++-------- .../eamxx/src/control/atmosphere_driver.cpp | 2 +- .../atm_process/atmosphere_process_group.cpp | 5 +++-- .../homme/src/share/cxx/SimulationParams.hpp | 2 +- .../cxx/cxx_f90_interface_theta.cpp | 8 +++++++- 7 files changed, 34 insertions(+), 22 deletions(-) diff --git a/cime_config/machines/cmake_macros/oneapi-ifxgpu.cmake b/cime_config/machines/cmake_macros/oneapi-ifxgpu.cmake index c798c53ee8b..a4dc8fc1214 100644 --- a/cime_config/machines/cmake_macros/oneapi-ifxgpu.cmake +++ b/cime_config/machines/cmake_macros/oneapi-ifxgpu.cmake @@ -6,9 +6,9 @@ if (compile_threaded) endif() #adding -g here leads to linker internal errors -string(APPEND CMAKE_C_FLAGS_RELEASE " -O2") -string(APPEND CMAKE_Fortran_FLAGS_RELEASE " -O2 -fpscomp logicals") -string(APPEND CMAKE_CXX_FLAGS_RELEASE " -O2") +string(APPEND CMAKE_C_FLAGS_RELEASE " -O2 -g -gline-tables-only") +string(APPEND CMAKE_Fortran_FLAGS_RELEASE " -O2 -fpscomp logicals -g -gline-tables-only") +string(APPEND CMAKE_CXX_FLAGS_RELEASE " -O2 -g -gline-tables-only") string(APPEND CMAKE_Fortran_FLAGS_DEBUG " -O0 -g -fpscomp logicals -check uninit -check bounds -check pointers -fpe0 -check noarg_temp_created") string(APPEND CMAKE_C_FLAGS_DEBUG " -O0 -g") string(APPEND CMAKE_CXX_FLAGS_DEBUG " -O0 -g") diff --git a/cime_config/machines/config_machines.xml b/cime_config/machines/config_machines.xml index 1d99b26bd10..2ae5c37aaa5 100644 --- a/cime_config/machines/config_machines.xml +++ b/cime_config/machines/config_machines.xml @@ -3415,12 +3415,12 @@ - spack-pe-gcc/0.6.1-23.275.2 cmake python/3.10.10 + spack-pe-gcc/0.7.0-24.086.0 cmake python/3.10.11 - oneapi/eng-compiler/2023.12.15.002 - mpich/icc-all-pmix-gpu/52.2 + oneapi/eng-compiler/2024.04.15.002 + mpich/icc-all-pmix-gpu/20231026 @@ -3436,12 +3436,15 @@ /lus/gila/projects/CSC249ADSE15_CNDA/software/oneAPI.2022.12.30.003/netcdf /lus/gila/projects/CSC249ADSE15_CNDA/software/oneAPI.2022.12.30.003/netcdf /lus/gila/projects/CSC249ADSE15_CNDA/software/oneAPI.2022.12.30.003/pnetcdf + /lus/gila/projects/CSC249ADSE15_CNDA/software/oneAPI.2022.12.30.003/netcdf/lib:$ENV{LD_LIBRARY_PATH} + /lus/gila/projects/CSC249ADSE15_CNDA/software/oneAPI.2022.12.30.003/netcdf/bin:$ENV{PATH} list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203 1 + 1 level_zero:gpu NO_GPU 0 @@ -3606,12 +3609,12 @@ - spack-pe-gcc/0.6.1-23.275.2 cmake python/3.10.10 + spack-pe-gcc/0.7.0-24.086.0 cmake python/3.10.11 - oneapi/eng-compiler/2023.12.15.002 - mpich/icc-all-pmix-gpu/52.2 + oneapi/eng-compiler/2024.04.15.002 + mpich/icc-all-pmix-gpu/20231026 diff --git a/components/eamxx/cmake/machine-files/sunspot-pvc.cmake b/components/eamxx/cmake/machine-files/sunspot-pvc.cmake index ee984e7a586..f36dc8f473c 100644 --- a/components/eamxx/cmake/machine-files/sunspot-pvc.cmake +++ b/components/eamxx/cmake/machine-files/sunspot-pvc.cmake @@ -31,17 +31,19 @@ set(CMAKE_EXE_LINKER_FLAGS " -lifcore -\-intel -Xclang -fsycl-allow-virtual-func - -set(NETCDF_PATH "$ENV{NETCDF_PATH}" CACHE STRING "") -set(NETCDF_DIR "$ENV{NETCDF_PATH}" CACHE STRING "") -set(NETCDF_C_PATH "$ENV{NETCDF_PATH}" CACHE STRING "") +set(NETCDF_PATH "/lus/gila/projects/CSC249ADSE15_CNDA/software/oneAPI.2022.12.30.003/netcdf" CACHE STRING "") +set(NETCDF_DIR "/lus/gila/projects/CSC249ADSE15_CNDA/software/oneAPI.2022.12.30.003/netcdf" CACHE STRING "") +set(NETCDF_C_PATH "/lus/gila/projects/CSC249ADSE15_CNDA/software/oneAPI.2022.12.30.003/netcdf" CACHE STRING "") +set(NETCDF_C "/lus/gila/projects/CSC249ADSE15_CNDA/software/oneAPI.2022.12.30.003/netcdf" CACHE STRING "") #this one is for rrtmgp -set(NetCDF_C_PATH "$ENV{NETCDF_PATH}" CACHE STRING "") -set(NETCDF_FORTRAN_PATH "$ENV{NETCDF_PATH}" CACHE STRING "") -set(PNETCDF_PATH "$ENV{PNETCDF_PATH}" CACHE STRING "") +set(NetCDF_C_PATH "/lus/gila/projects/CSC249ADSE15_CNDA/software/oneAPI.2022.12.30.003/netcdf" CACHE STRING "") +set(NETCDF_FORTRAN_PATH "/lus/gila/projects/CSC249ADSE15_CNDA/software/oneAPI.2022.12.30.003/netcdf" CACHE STRING "") +set(PNETCDF_PATH "/lus/gila/projects/CSC249ADSE15_CNDA/software/oneAPI.2022.12.30.003/pnetcdf" CACHE STRING "") + + +set(PNETCDF_DIR "/lus/gila/projects/CSC249ADSE15_CNDA/software/oneAPI.2022.12.30.003/pnetcdf" CACHE STRING "") -set(PNETCDF_DIR "$ENV{PNETCDF_PATH}" CACHE STRING "") diff --git a/components/eamxx/src/control/atmosphere_driver.cpp b/components/eamxx/src/control/atmosphere_driver.cpp index f76b48e270a..d50469a5e9b 100644 --- a/components/eamxx/src/control/atmosphere_driver.cpp +++ b/components/eamxx/src/control/atmosphere_driver.cpp @@ -103,7 +103,7 @@ AtmosphereDriver(const ekat::Comm& atm_comm, AtmosphereDriver::~AtmosphereDriver () { - finalize(); +// finalize(); } void AtmosphereDriver:: diff --git a/components/eamxx/src/share/atm_process/atmosphere_process_group.cpp b/components/eamxx/src/share/atm_process/atmosphere_process_group.cpp index d13f166d6b8..a975855a14b 100644 --- a/components/eamxx/src/share/atm_process/atmosphere_process_group.cpp +++ b/components/eamxx/src/share/atm_process/atmosphere_process_group.cpp @@ -439,7 +439,8 @@ std::cout << "OG T field (" <name() << " dt="<name() << " dt="<name() << std::flush; fm->get_field("T_mid").sync_to_host(); @@ -467,7 +468,7 @@ std::cout << "OG proc AFTER RUN " << atm_proc->name() <<"\n"<debug("[EAMxx::run_sequential::"+atm_proc->name()+"] memory usage: " + std::to_string(max_mem_usage) + "MB"); +// m_atm_logger->debug("[EAMxx::run_sequential::"+atm_proc->name()+"] memory usage: " + std::to_string(max_mem_usage) + "MB"); #endif std::cout << "OG AFTER mem usage " << atm_proc->name() <<"\n"<(); auto& ff = c.create_if_not_there(); auto& diag = c.create_if_not_there (elems.num_elems(),tracers.num_tracers(), - params.theta_hydrostatic_mode); + (bool)params.theta_hydrostatic_mode); auto& vrm = c.create_if_not_there(elems.num_elems()); auto& fbm = c.create_if_not_there(); From b03d38904244ffb5bf216401f237971e3ba24c87 Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Thu, 20 Jun 2024 00:30:31 +0000 Subject: [PATCH 43/85] clean up --- .../eamxx/src/control/atmosphere_driver.cpp | 50 ++----------------- .../atm_process/atmosphere_process_group.cpp | 46 +++++++++-------- 2 files changed, 29 insertions(+), 67 deletions(-) diff --git a/components/eamxx/src/control/atmosphere_driver.cpp b/components/eamxx/src/control/atmosphere_driver.cpp index d50469a5e9b..c5aa4869914 100644 --- a/components/eamxx/src/control/atmosphere_driver.cpp +++ b/components/eamxx/src/control/atmosphere_driver.cpp @@ -103,7 +103,8 @@ AtmosphereDriver(const ekat::Comm& atm_comm, AtmosphereDriver::~AtmosphereDriver () { -// finalize(); +// std::cout << "OG ------------------ I AM CALLING FINALIZE \n" << std::flush; + finalize(); } void AtmosphereDriver:: @@ -209,39 +210,25 @@ setup_iop () void AtmosphereDriver::create_atm_processes() { - std::cout << "OG cinit 1 \n" << std::flush; - m_atm_logger->info("[EAMxx] create_atm_processes ..."); - std::cout << "OG cinit 2 \n" << std::flush; start_timer("EAMxx::init"); - std::cout << "OG cinit 3 \n" << std::flush; start_timer("EAMxx::create_atm_processes"); - std::cout << "OG cinit 4 \n" << std::flush; // At this point, must have comm and params set. check_ad_status(s_comm_set | s_params_set); - std::cout << "OG cinit 5 \n" << std::flush; // Create the group of processes. This will recursively create the processes // tree, storing also the information regarding parallel execution (if needed). // See AtmosphereProcessGroup class documentation for more details. auto& atm_proc_params = m_atm_params.sublist("atmosphere_processes"); - std::cout << "OG cinit 6 \n" << std::flush; atm_proc_params.rename("EAMxx"); - std::cout << "OG cinit 7 \n" << std::flush; atm_proc_params.set("Logger",m_atm_logger); - std::cout << "OG cinit 8 \n" << std::flush; m_atm_process_group = std::make_shared(m_atm_comm,atm_proc_params); - std::cout << "OG cinit 9 \n" << std::flush; m_ad_status |= s_procs_created; - std::cout << "OG cinit 10 \n" << std::flush; stop_timer("EAMxx::create_atm_processes"); - std::cout << "OG cinit 11 \n" << std::flush; stop_timer("EAMxx::init"); - std::cout << "OG cinit 12 \n" << std::flush; m_atm_logger->info("[EAMxx] create_atm_processes ... done!"); - std::cout << "OG cinit 13 \n" << std::flush; } void AtmosphereDriver::create_grids() @@ -1524,25 +1511,16 @@ initialize_constant_field(const FieldIdentifier& fid, void AtmosphereDriver::initialize_atm_procs () { - std::cout << "OG init 1 \n" << std::flush; m_atm_logger->info("[EAMxx] initialize_atm_procs ..."); start_timer("EAMxx::init"); start_timer("EAMxx::initialize_atm_procs"); - std::cout << "OG init 2 \n" << std::flush; // Initialize memory buffer for all atm processes - std::cout << "OG hhhinit 3 \n" << std::flush; m_memory_buffer = std::make_shared(); - std::cout << "OG init 4 \n" << std::flush; - - m_memory_buffer->request_bytes(m_atm_process_group->requested_buffer_size_in_bytes()); - std::cout << "OG init 5 \n" << std::flush; m_memory_buffer->allocate(); - std::cout << "OG init 6 \n" << std::flush; m_atm_process_group->init_buffers(*m_memory_buffer); - std::cout << "OG init 7 \n" << std::flush; const bool restarted_run = m_case_t0 < m_run_t0; @@ -1551,24 +1529,19 @@ void AtmosphereDriver::initialize_atm_procs () setup_surface_coupling_processes(); } - std::cout << "OG init 8 \n" << std::flush; // Initialize the processes m_atm_process_group->initialize(m_current_ts, restarted_run ? RunType::Restarted : RunType::Initial); - std::cout << "OG init 9 \n" << std::flush; // Create and add energy and mass conservation check to appropriate atm procs setup_column_conservation_checks(); - std::cout << "OG init 10 \n" << std::flush; // If user requests it, we set up NaN checks for all computed fields after each atm proc run if (m_atm_params.sublist("driver_options").get("check_all_computed_fields_for_nans",true)) { m_atm_process_group->add_postcondition_nan_checks(); } - std::cout << "OG init 11 \n" << std::flush; // Add additional column data fields to pre/postcondition checks (if they exist) add_additional_column_data_to_property_checks(); - std::cout << "OG init 12 \n" << std::flush; if (fvphyshack) { // [CGLL ICs in pg2] See related notes in atmosphere_dynamics.cpp. @@ -1577,14 +1550,12 @@ void AtmosphereDriver::initialize_atm_procs () m_field_mgrs.erase(gn); } - std::cout << "OG init 13 \n" << std::flush; m_ad_status |= s_procs_inited; stop_timer("EAMxx::initialize_atm_procs"); stop_timer("EAMxx::init"); m_atm_logger->info("[EAMxx] initialize_atm_procs ... done!"); - std::cout << "OG init 14 \n" << std::flush; report_res_dep_memory_footprint (); } @@ -1622,30 +1593,21 @@ initialize (const ekat::Comm& atm_comm, void AtmosphereDriver::run (const int dt) { start_timer("EAMxx::run"); - std::cout << "IN DRIVER 1 \n"; - - // Make sure the end of the time step is after the current start_time EKAT_REQUIRE_MSG (dt>0, "Error! Input time step must be positive.\n"); - - std::cout << "IN DRIVER 2 \n"; - // Print current timestamp information m_atm_logger->log(ekat::logger::LogLevel::info, "Atmosphere step = " + std::to_string(m_current_ts.get_num_steps()) + "\n" + " model start-of-step time = " + m_current_ts.get_date_string() + " " + m_current_ts.get_time_string() + "\n"); - - std::cout << "IN DRIVER 3 \n"; // Reset accum fields to 0 // Note: at the 1st timestep this is redundant, since we did it at init, // to ensure t=0 INSTANT output was correct. However, it's not a // very expensive operation, so it's not worth the effort of the // nano-opt of removing the call for the 1st timestep. reset_accumulated_fields(); - - std::cout << "IN DRIVER 4 \n" << std::flush; + // Tell the output managers that we're starting a timestep. This is usually // a no-op, but some diags *may* require to do something. E.g., a diag that // computes tendency of an arbitrary quantity may want to store a copy of @@ -1660,7 +1622,6 @@ void AtmosphereDriver::run (const int dt) { // the individual processes, which will be called in the correct order. m_atm_process_group->run(dt); - std::cout << "IN DRIVER 5 \n"<< std::flush; // Some accumulated fields need to be divided by dt at the end of the atm step for (auto fm_it : m_field_mgrs) { const auto& fm = fm_it.second; @@ -1674,20 +1635,15 @@ void AtmosphereDriver::run (const int dt) { } } - std::cout << "IN DRIVER 6 \n"<debug("[EAMxx::run] running output managers..."); for (auto& out_mgr : m_output_managers) { out_mgr.run(m_current_ts); } - #endif #ifdef SCREAM_HAS_MEMORY_USAGE diff --git a/components/eamxx/src/share/atm_process/atmosphere_process_group.cpp b/components/eamxx/src/share/atm_process/atmosphere_process_group.cpp index a975855a14b..bfef71ef3d5 100644 --- a/components/eamxx/src/share/atm_process/atmosphere_process_group.cpp +++ b/components/eamxx/src/share/atm_process/atmosphere_process_group.cpp @@ -374,15 +374,19 @@ void AtmosphereProcessGroup::add_additional_data_fields_to_property_checks (cons void AtmosphereProcessGroup::initialize_impl (const RunType run_type) { - int mmm = 0; +#undef D1 +#ifdef D1 + int mmm = 0; +#endif for (auto& atm_proc : m_atm_processes) { - mmm++; - std::cout << "process is "<< mmm << "\n" << std::flush; - std::cout << "process name is "<< atm_proc->name() << "\n"<< std::flush; - - m_atm_logger->flush(); +#ifdef D1 + mmm++; + std::cout << "process is "<< mmm << "\n" << std::flush; + std::cout << "process name is "<< atm_proc->name() << "\n"<< std::flush; + m_atm_logger->flush(); +#endif atm_proc->initialize(timestamp(),run_type); #ifdef SCREAM_HAS_MEMORY_USAGE @@ -392,7 +396,7 @@ void AtmosphereProcessGroup::initialize_impl (const RunType run_type) { m_atm_logger->debug("[EAMxx::initialize::"+atm_proc->name()+"] memory usage: " + std::to_string(max_mem_usage) + "MB"); #endif } - std::cout << "process GROUP is done\n" << std::flush; +// std::cout << "process GROUP is done\n" << std::flush; } void AtmosphereProcessGroup::run_impl (const double dt) { @@ -401,7 +405,7 @@ void AtmosphereProcessGroup::run_impl (const double dt) { } else { run_parallel(dt); } - std::cout << "process GROUP RUN is done\n" << std::flush; +// std::cout << "process GROUP RUN is done\n" << std::flush; } void AtmosphereProcessGroup::run_sequential (const double dt) { @@ -409,6 +413,8 @@ void AtmosphereProcessGroup::run_sequential (const double dt) { auto ts = timestamp(); ts += dt; +#undef D2 +#ifdef D2 auto& c = scream::ScreamContext::singleton(); auto ad = c.getNonConst(); const auto gn = "Physics"; @@ -421,8 +427,9 @@ void AtmosphereProcessGroup::run_sequential (const double dt) { fm->get_field("T_mid").sync_to_host(); auto ff = fm->get_field("T_mid").get_view(); +#endif -#if 0 +#ifdef D2 for (int ii = 0; ii < ncols; ii++) for (int jj = 0; jj < nlevs; jj++){ const auto vv = ff(ii,jj); @@ -439,16 +446,15 @@ std::cout << "OG T field (" <name() << " dt="<name() << std::flush; - +//std::cout << "OG proc begin ------------------------ " << atm_proc->name() << std::flush; +#ifdef D2 fm->get_field("T_mid").sync_to_host(); auto ff = fm->get_field("T_mid").get_view(); - -#if 0 +#endif +#ifdef D2 for (int ii = 0; ii < 5; ii++) - for (int jj = 0; jj < nlevs; jj++){ + for (int jj = 0; jj < 3; jj++){ const auto vv = ff(ii,jj); m_atm_logger->info("OG T field ("+std::to_string(ii)+","+std::to_string(jj)+") = "+std::to_string(vv)); std::cout << "OG T field (" <name() <<"\n"<name() <<"\n"<set_update_time_stamps(do_update); // Run the process atm_proc->run(dt); -std::cout << "OG proc AFTER RUN " << atm_proc->name() <<"\n"<name() <<"\n"<debug("[EAMxx::run_sequential::"+atm_proc->name()+"] memory usage: " + std::to_string(max_mem_usage) + "MB"); + m_atm_logger->debug("[EAMxx::run_sequential::"+atm_proc->name()+"] memory usage: " + std::to_string(max_mem_usage) + "MB"); #endif -std::cout << "OG AFTER mem usage " << atm_proc->name() <<"\n"<name() <<"\n"< Date: Sun, 14 Jul 2024 16:17:32 +0000 Subject: [PATCH 44/85] fix another boolean instance --- components/homme/src/share/control_mod.F90 | 1 + components/homme/src/share/cxx/GllFvRemap.cpp | 4 ++-- components/homme/src/share/cxx/GllFvRemap.hpp | 4 ++-- components/homme/src/share/cxx/GllFvRemapImpl.cpp | 6 +++--- components/homme/src/share/cxx/GllFvRemapImpl.hpp | 5 +++-- components/homme/src/share/gllfvremap_mod.F90 | 14 ++++++++------ components/homme/src/share/namelist_mod.F90 | 6 ++++++ .../src/theta-l_kokkos/cxx/EquationOfState.hpp | 4 ++-- 8 files changed, 27 insertions(+), 17 deletions(-) diff --git a/components/homme/src/share/control_mod.F90 b/components/homme/src/share/control_mod.F90 index 0e9494f5a6c..9c3c599b232 100644 --- a/components/homme/src/share/control_mod.F90 +++ b/components/homme/src/share/control_mod.F90 @@ -43,6 +43,7 @@ module control_mod ! flag used by preqx, theta-l and theta-c models ! should be renamed to "hydrostatic_mode" logical, public :: theta_hydrostatic_mode + integer, public :: theta_hydrostatic_mode_integer integer, public :: tstep_type= 5 ! preqx timestepping options diff --git a/components/homme/src/share/cxx/GllFvRemap.cpp b/components/homme/src/share/cxx/GllFvRemap.cpp index e36dbc14d74..a8f564958d4 100644 --- a/components/homme/src/share/cxx/GllFvRemap.cpp +++ b/components/homme/src/share/cxx/GllFvRemap.cpp @@ -16,7 +16,7 @@ namespace Homme { void init_gllfvremap_c (int nelemd, int np, int nf, int nf_max, - bool theta_hydrostatic_mode, + int theta_hydrostatic_mode, CF90Ptr fv_metdet, CF90Ptr g2f_remapd, CF90Ptr f2g_remapd, CF90Ptr D_f, CF90Ptr Dinv_f) { auto& c = Context::singleton(); @@ -52,7 +52,7 @@ void GllFvRemap::init_boundary_exchanges () { } void GllFvRemap -::init_data (const int nf, const int nf_max, bool theta_hydrostatic_mode, +::init_data (const int nf, const int nf_max, const int theta_hydrostatic_mode, const Real* fv_metdet, const Real* g2f_remapd, const Real* f2g_remapd, const Real* D_f, const Real* Dinv_f) { m_impl->init_data(nf, nf_max, theta_hydrostatic_mode, fv_metdet, diff --git a/components/homme/src/share/cxx/GllFvRemap.hpp b/components/homme/src/share/cxx/GllFvRemap.hpp index 07e4bf58a90..2adff0aeaa9 100644 --- a/components/homme/src/share/cxx/GllFvRemap.hpp +++ b/components/homme/src/share/cxx/GllFvRemap.hpp @@ -40,7 +40,7 @@ class GllFvRemap { typedef Phys2T::const_type CPhys2T; typedef Phys3T::const_type CPhys3T; - void init_data(const int nf, const int nf_max, bool theta_hydrostatic_mode, + void init_data(const int nf, const int nf_max, const int theta_hydrostatic_mode, const Real* fv_metdet, const Real* g2f_remapd, const Real* f2g_remapd, const Real* D_f, const Real* Dinv_f); @@ -81,7 +81,7 @@ class GllFvRemap { extern "C" void init_gllfvremap_c(int nelemd, int np, int nf, int nf_max, - const bool theta_hydrostatic_mode, + const int theta_hydrostatic_mode, CF90Ptr fv_metdet, CF90Ptr g2f_remapd, CF90Ptr f2g_remapd, CF90Ptr D_f, CF90Ptr Dinv_f); diff --git a/components/homme/src/share/cxx/GllFvRemapImpl.cpp b/components/homme/src/share/cxx/GllFvRemapImpl.cpp index ea1a52f5efd..d4ab5c89f51 100644 --- a/components/homme/src/share/cxx/GllFvRemapImpl.cpp +++ b/components/homme/src/share/cxx/GllFvRemapImpl.cpp @@ -131,7 +131,7 @@ void GllFvRemapImpl::init_boundary_exchanges () { template using FV = Kokkos::View; void GllFvRemapImpl -::init_data (const int nf, const int nf_max, const bool theta_hydrostatic_mode, +::init_data (const int nf, const int nf_max, const int theta_hydrostatic_mode, const Real* fv_metdet_r, const Real* g2f_remapd_r, const Real* f2g_remapd_r, const Real* D_f_r, const Real* Dinv_f_r) { using Kokkos::create_mirror_view; @@ -395,7 +395,7 @@ ::run_dyn_to_fv_phys (const int timeidx, const Phys1T& ps, const Phys1T& phis, c const auto hvcoord = m_hvcoord; const bool use_moisture = m_data.use_moisture; - const bool theta_hydrostatic_mode = m_data.theta_hydrostatic_mode; + const int theta_hydrostatic_mode = m_data.theta_hydrostatic_mode; const bool want_dp_fv_out = dp_fv_out_ptr != nullptr; VPhys2T dp_fv_out; @@ -605,7 +605,7 @@ run_fv_phys_to_dyn (const int timeidx, const CPhys2T& Ts, const CPhys3T& uvs, const auto fT = m_forcing.m_ft; const auto hvcoord = m_hvcoord; const auto dp3d = m_state.m_dp3d; - const bool theta_hydrostatic_mode = m_data.theta_hydrostatic_mode; + const int theta_hydrostatic_mode = m_data.theta_hydrostatic_mode; EquationOfState eos; eos.init(theta_hydrostatic_mode, hvcoord); ElementOps ops; ops.init(hvcoord); const auto tu_ne = m_tu_ne; diff --git a/components/homme/src/share/cxx/GllFvRemapImpl.hpp b/components/homme/src/share/cxx/GllFvRemapImpl.hpp index 11738b2bf45..7388fddb123 100644 --- a/components/homme/src/share/cxx/GllFvRemapImpl.hpp +++ b/components/homme/src/share/cxx/GllFvRemapImpl.hpp @@ -60,7 +60,8 @@ struct GllFvRemapImpl { struct Data { int nelemd, qsize, nf2, n_dss_fld; - bool use_moisture, theta_hydrostatic_mode; + bool use_moisture; + int theta_hydrostatic_mode; static constexpr int nbuf1 = 2, nbuf2 = 1; Buf1 buf1[nbuf1]; @@ -107,7 +108,7 @@ struct GllFvRemapImpl { void init_buffers(const FunctorsBuffersManager& fbm); void init_boundary_exchanges(); - void init_data(const int nf, const int nf_max, const bool theta_hydrostatic_mode, + void init_data(const int nf, const int nf_max, const int theta_hydrostatic_mode, const Real* fv_metdet_r, const Real* g2f_remapd_r, const Real* f2g_remapd_r, const Real* D_f_r, const Real* Dinv_f_r); diff --git a/components/homme/src/share/gllfvremap_mod.F90 b/components/homme/src/share/gllfvremap_mod.F90 index 48351259f5c..1628d128602 100644 --- a/components/homme/src/share/gllfvremap_mod.F90 +++ b/components/homme/src/share/gllfvremap_mod.F90 @@ -265,22 +265,24 @@ end subroutine gfr_init subroutine gfr_init_hxx() bind(c) #if KOKKOS_TARGET - use control_mod, only: theta_hydrostatic_mode - use iso_c_binding, only: c_bool + use control_mod, only: theta_hydrostatic_mode_integer + use iso_c_binding, only: c_int interface - subroutine init_gllfvremap_c(nelemd, np, nf, nf_max, theta_hydrostatic_mode, & + subroutine init_gllfvremap_c(nelemd, np, nf, nf_max, theta_hydrostatic_mode_integer, & fv_metdet, g2f_remapd, f2g_remapd, D_f, Dinv_f) bind(c) use iso_c_binding, only: c_bool, c_int, c_double integer (c_int), value, intent(in) :: nelemd, np, nf, nf_max - logical (c_bool), value, intent(in) :: theta_hydrostatic_mode + !logical (c_bool), value, intent(in) :: theta_hydrostatic_mode + integer (c_int), value, intent(in) :: theta_hydrostatic_mode_integer real (c_double), dimension(nf*nf,nelemd), intent(in) :: fv_metdet real (c_double), dimension(np,np,nf_max*nf_max), intent(in) :: g2f_remapd real (c_double), dimension(nf_max*nf_max,np,np), intent(in) :: f2g_remapd real (c_double), dimension(nf*nf,2,2,nelemd), intent(in) :: D_f, Dinv_f end subroutine init_gllfvremap_c end interface - logical (c_bool) :: thm - thm = theta_hydrostatic_mode + integer (c_int) :: thm + !logical (c_bool) :: thm + thm = theta_hydrostatic_mode_integer call init_gllfvremap_c(nelemd, np, gfr%nphys, nphys_max, thm, & gfr%fv_metdet, gfr%g2f_remapd, gfr%f2g_remapd, gfr%D_f, gfr%Dinv_f) #endif diff --git a/components/homme/src/share/namelist_mod.F90 b/components/homme/src/share/namelist_mod.F90 index 1d47090182b..8dcceca6652 100644 --- a/components/homme/src/share/namelist_mod.F90 +++ b/components/homme/src/share/namelist_mod.F90 @@ -41,6 +41,7 @@ module namelist_mod runtype, & integration, & ! integration method theta_hydrostatic_mode, & + theta_hydrostatic_mode_integer, & transport_alg , & ! SE Eulerian, classical SL, cell-integrated SL semi_lagrange_cdr_alg, & ! see control_mod for semi_lagrange_* descriptions semi_lagrange_cdr_check, & @@ -452,8 +453,10 @@ subroutine readnl(par) planar_slice = .false. theta_hydrostatic_mode = .true. ! for preqx, this must be .true. + theta_hydrostatic_mode_integer = 1 ! for preqx, this must be .true. #if ( defined MODEL_THETA_C || defined MODEL_THETA_L ) theta_hydrostatic_mode = .false. ! default NH + theta_hydrostatic_mode_integer = 0 ! default NH #endif @@ -850,7 +853,10 @@ subroutine readnl(par) call MPI_bcast(case_planar_bubble,1,MPIlogical_t,par%root,par%comm,ierr) #endif +if(theta_hydrostatic_mode) theta_hydrostatic_mode_integer = 1 +if(.not. theta_hydrostatic_mode) theta_hydrostatic_mode_integer = 0 call MPI_bcast(theta_hydrostatic_mode ,1,MPIlogical_t,par%root,par%comm,ierr) + call MPI_bcast(theta_hydrostatic_mode_integer ,1,MPIinteger_t,par%root,par%comm,ierr) call MPI_bcast(transport_alg ,1,MPIinteger_t,par%root,par%comm,ierr) call MPI_bcast(semi_lagrange_cdr_alg ,1,MPIinteger_t,par%root,par%comm,ierr) call MPI_bcast(semi_lagrange_cdr_check ,1,MPIlogical_t,par%root,par%comm,ierr) diff --git a/components/homme/src/theta-l_kokkos/cxx/EquationOfState.hpp b/components/homme/src/theta-l_kokkos/cxx/EquationOfState.hpp index dd97720f1be..99732ee640a 100644 --- a/components/homme/src/theta-l_kokkos/cxx/EquationOfState.hpp +++ b/components/homme/src/theta-l_kokkos/cxx/EquationOfState.hpp @@ -23,7 +23,7 @@ class EquationOfState { EquationOfState () = default; - void init (const bool theta_hydrostatic_mode, + void init (const int theta_hydrostatic_mode, const HybridVCoord& hvcoord) { m_theta_hydrostatic_mode = theta_hydrostatic_mode; m_hvcoord = hvcoord; @@ -250,7 +250,7 @@ class EquationOfState { public: - bool m_theta_hydrostatic_mode; + int m_theta_hydrostatic_mode; HybridVCoord m_hvcoord; }; From 8b9716523ecec4ba919160feaf775d32a23134e9 Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Sun, 14 Jul 2024 21:50:59 +0000 Subject: [PATCH 45/85] add mpi options --- .../oneapi-ifxgpu_sunspot-pvc.cmake | 2 +- cime_config/machines/config_machines.xml | 64 ++++++++++++++----- 2 files changed, 50 insertions(+), 16 deletions(-) diff --git a/cime_config/machines/cmake_macros/oneapi-ifxgpu_sunspot-pvc.cmake b/cime_config/machines/cmake_macros/oneapi-ifxgpu_sunspot-pvc.cmake index 91f65665a14..7f3d9ab5d21 100644 --- a/cime_config/machines/cmake_macros/oneapi-ifxgpu_sunspot-pvc.cmake +++ b/cime_config/machines/cmake_macros/oneapi-ifxgpu_sunspot-pvc.cmake @@ -14,7 +14,7 @@ endif() string(APPEND KOKKOS_OPTIONS " -DCMAKE_CXX_STANDARD=17 -DKokkos_ENABLE_SERIAL=On -DKokkos_ARCH_INTEL_PVC=On -DKokkos_ENABLE_SYCL=On -DKokkos_ENABLE_EXPLICIT_INSTANTIATION=Off -DCMAKE_CXX_FLAGS='-fsycl-device-code-split=per_kernel'") string(APPEND SYCL_FLAGS " -\-intel -fsycl -fsycl-targets=spir64_gen -mlong-double-64 -Xsycl-target-backend \"-device 12.60.7\"") -set(SCREAM_MPI_ON_DEVICE OFF CACHE STRING "") +set(SCREAM_MPI_ON_DEVICE ON CACHE STRING "") diff --git a/cime_config/machines/config_machines.xml b/cime_config/machines/config_machines.xml index 2ae5c37aaa5..6e90e5fc6d8 100644 --- a/cime_config/machines/config_machines.xml +++ b/cime_config/machines/config_machines.xml @@ -3400,7 +3400,7 @@ -np {{ total_tasks }} --label -ppn {{ tasks_per_node }} - --cpu-bind depth -envall + --cpu-bind=list:0-7:8-15:16-23:24-31:32-39:40-47:52-59:60-67:68-75:76-83:84-91:92-99 -envall -d $ENV{OMP_NUM_THREADS} $ENV{GPU_TILE_COMPACT} @@ -3436,27 +3436,61 @@ /lus/gila/projects/CSC249ADSE15_CNDA/software/oneAPI.2022.12.30.003/netcdf /lus/gila/projects/CSC249ADSE15_CNDA/software/oneAPI.2022.12.30.003/netcdf /lus/gila/projects/CSC249ADSE15_CNDA/software/oneAPI.2022.12.30.003/pnetcdf - /lus/gila/projects/CSC249ADSE15_CNDA/software/oneAPI.2022.12.30.003/netcdf/lib:$ENV{LD_LIBRARY_PATH} - /lus/gila/projects/CSC249ADSE15_CNDA/software/oneAPI.2022.12.30.003/netcdf/bin:$ENV{PATH} - list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203 + /lus/gila/projects/CSC249ADSE15_CNDA/software/oneAPI.2022.12.30.003/netcdf/lib:$ENV{LD_LIBRARY_PATH} + /lus/gila/projects/CSC249ADSE15_CNDA/software/oneAPI.2022.12.30.003/netcdf/bin:$ENV{PATH} + + list:0-7:8-15:16-23:24-31:32-39:40-47:52-59:60-67:68-75:76-83:84-91:92-99 1 - + + + + + - 1 - level_zero:gpu - NO_GPU - 0 - disable + 1 + + + + 1 + recursive_doubling + + + + + 1 + 1 + + disable disable - 1 - 4000MB + + + 0 + + 4000MB 0 - /soft/tools/mpi_wrapper_utils/gpu_tile_compact.sh - 131072 + + /soft/tools/mpi_wrapper_utils/gpu_tile_compact.sh + + 131072 20 - + memhooks + warn + + verbose,granularity=thread,balanced 128M From 226bf95586c93590e434ef445a76408865190ab9 Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Tue, 16 Jul 2024 21:50:27 +0000 Subject: [PATCH 46/85] turn mmfxx off --- components/eamxx/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/eamxx/CMakeLists.txt b/components/eamxx/CMakeLists.txt index 3d59c1010a3..802da033885 100644 --- a/components/eamxx/CMakeLists.txt +++ b/components/eamxx/CMakeLists.txt @@ -205,7 +205,7 @@ set(NetCDF_Fortran_PATH ${DEFAULT_NetCDF_Fortran_PATH} CACHE FILEPATH "Path to n set(NetCDF_C_PATH ${DEFAULT_NetCDF_C_PATH} CACHE FILEPATH "Path to netcdf C installation") set(SCREAM_MACHINE ${DEFAULT_SCREAM_MACHINE} CACHE STRING "The CIME/SCREAM name for the current machine") option(SCREAM_MPI_ON_DEVICE "Whether to use device pointers for MPI calls" ON) -option(SCREAM_ENABLE_MAM "Whether to enable MAM aerosol support" ON) +option(SCREAM_ENABLE_MAM "Whether to enable MAM aerosol support" OFF) set(SCREAM_SMALL_KERNELS ${DEFAULT_SMALL_KERNELS} CACHE STRING "Use small, non-monolothic kokkos kernels") if (NOT SCREAM_SMALL_KERNELS) set(EKAT_DISABLE_WORKSPACE_SHARING TRUE CACHE STRING "") From b3fe5d3d407e93118a9aa9625ea56b6c22494cc5 Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Tue, 16 Jul 2024 21:58:53 +0000 Subject: [PATCH 47/85] update ekat --- externals/ekat | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/externals/ekat b/externals/ekat index 215d83184dd..7b8e5d883b3 160000 --- a/externals/ekat +++ b/externals/ekat @@ -1 +1 @@ -Subproject commit 215d83184dddb09891c6de466d9a392b43b283fb +Subproject commit 7b8e5d883b3fef5d9209050f0f65d685de5a86de From cf0e1fbd31d8a9e1e75c4d5b1edfe6ace98bec36 Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Fri, 16 Aug 2024 16:11:39 +0000 Subject: [PATCH 48/85] testmod files for sunspot --- .../testmods_dirs/scream/sunspot_run/shell_commands | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 components/eamxx/cime_config/testdefs/testmods_dirs/scream/sunspot_run/shell_commands diff --git a/components/eamxx/cime_config/testdefs/testmods_dirs/scream/sunspot_run/shell_commands b/components/eamxx/cime_config/testdefs/testmods_dirs/scream/sunspot_run/shell_commands new file mode 100644 index 00000000000..6ae48c59d4a --- /dev/null +++ b/components/eamxx/cime_config/testdefs/testmods_dirs/scream/sunspot_run/shell_commands @@ -0,0 +1,6 @@ + +$CIMEROOT/../components/eamxx/scripts/atmchange transport_alg=0 -b +$CIMEROOT/../components/eamxx/scripts/atmchange hypervis_subcycle_q=1 -b +$CIMEROOT/../components/eamxx/scripts/atmchange dt_tracer_factor=2 -b +$CIMEROOT/../components/eamxx/scripts/atmchange tstep_type=9 -b +$CIMEROOT/../components/eamxx/scripts/atmchange theta_hydrostatic_mode=False -b From ad78e435a36f27d04fa1095587f7b593b3d1450a Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Fri, 16 Aug 2024 17:11:42 +0000 Subject: [PATCH 49/85] flag to avoid default -fast-math for gpu --- cime_config/machines/cmake_macros/oneapi-ifx.cmake | 2 +- cime_config/machines/cmake_macros/oneapi-ifxgpu.cmake | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cime_config/machines/cmake_macros/oneapi-ifx.cmake b/cime_config/machines/cmake_macros/oneapi-ifx.cmake index 5782a126eca..e590456e9f3 100644 --- a/cime_config/machines/cmake_macros/oneapi-ifx.cmake +++ b/cime_config/machines/cmake_macros/oneapi-ifx.cmake @@ -6,7 +6,7 @@ if (compile_threaded) endif() string(APPEND CMAKE_C_FLAGS_RELEASE " -O2 -gline-tables-only -g") string(APPEND CMAKE_Fortran_FLAGS_RELEASE " -O2 -gline-tables-only -g") -string(APPEND CMAKE_CXX_FLAGS_RELEASE " -O2 -gline-tables-only -g") +string(APPEND CMAKE_CXX_FLAGS_RELEASE " -fp-model precise -O2 -gline-tables-only -g") string(APPEND CMAKE_Fortran_FLAGS_DEBUG " -O0 -g -check uninit -check bounds -check pointers -fpe0 -check noarg_temp_created") string(APPEND CMAKE_C_FLAGS_DEBUG " -O0 -g") string(APPEND CMAKE_CXX_FLAGS_DEBUG " -O0 -g") diff --git a/cime_config/machines/cmake_macros/oneapi-ifxgpu.cmake b/cime_config/machines/cmake_macros/oneapi-ifxgpu.cmake index a4dc8fc1214..faf8748217a 100644 --- a/cime_config/machines/cmake_macros/oneapi-ifxgpu.cmake +++ b/cime_config/machines/cmake_macros/oneapi-ifxgpu.cmake @@ -8,7 +8,7 @@ endif() #adding -g here leads to linker internal errors string(APPEND CMAKE_C_FLAGS_RELEASE " -O2 -g -gline-tables-only") string(APPEND CMAKE_Fortran_FLAGS_RELEASE " -O2 -fpscomp logicals -g -gline-tables-only") -string(APPEND CMAKE_CXX_FLAGS_RELEASE " -O2 -g -gline-tables-only") +string(APPEND CMAKE_CXX_FLAGS_RELEASE " -fp-model precise -O2 -g -gline-tables-only") string(APPEND CMAKE_Fortran_FLAGS_DEBUG " -O0 -g -fpscomp logicals -check uninit -check bounds -check pointers -fpe0 -check noarg_temp_created") string(APPEND CMAKE_C_FLAGS_DEBUG " -O0 -g") string(APPEND CMAKE_CXX_FLAGS_DEBUG " -O0 -g") From 91df3960c71e37f2e6ef8c3c8e7a5817f08dff4f Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Sat, 17 Aug 2024 20:27:11 +0000 Subject: [PATCH 50/85] start to remove testing setup for caar kernels --- components/homme/src/share/cxx/ErrorDefs.cpp | 2 - .../homme/src/share/cxx/Hommexx_Session.cpp | 11 - .../theta-l_kokkos/cxx/CaarFunctorImpl.hpp | 197 +----------------- 3 files changed, 10 insertions(+), 200 deletions(-) diff --git a/components/homme/src/share/cxx/ErrorDefs.cpp b/components/homme/src/share/cxx/ErrorDefs.cpp index a6eabfa1cf7..ccb4631100d 100644 --- a/components/homme/src/share/cxx/ErrorDefs.cpp +++ b/components/homme/src/share/cxx/ErrorDefs.cpp @@ -45,9 +45,7 @@ void runtime_abort(const std::string& message, int code) { } else { std::cerr << message << std::endl << "Exiting..." << std::endl; finalize_hommexx_session(); -#ifndef TESTER_NOMPI MPI_Abort(MPI_COMM_WORLD, code); -#endif } } diff --git a/components/homme/src/share/cxx/Hommexx_Session.cpp b/components/homme/src/share/cxx/Hommexx_Session.cpp index db50ec27d6c..c93174d2442 100644 --- a/components/homme/src/share/cxx/Hommexx_Session.cpp +++ b/components/homme/src/share/cxx/Hommexx_Session.cpp @@ -7,12 +7,8 @@ #include "Config.hpp" #include "Hommexx_Session.hpp" #include "ExecSpaceDefs.hpp" -#include "Types.hpp" - -#ifndef TESTER_NOMPI #include "profiling.hpp" #include "mpi/Comm.hpp" -#endif #include "Context.hpp" @@ -79,10 +75,7 @@ void initialize_hommexx_session () // If hommexx session is not currently inited, then init it. if (!Session::m_inited) { /* Make certain profiling is only done for code we're working on */ - -#ifndef TESTER_NOMPI profiling_pause(); -#endif /* Set Environment variables to control how many * threads/processors Kokkos uses */ @@ -90,16 +83,12 @@ void initialize_hommexx_session () initialize_kokkos(); } -#ifndef TESTER_NOMPI // Note: at this point, the Comm *should* already be created. const auto& comm = Context::singleton().get(); if (comm.root()) { ExecSpace().print_configuration(std::cout, true); print_homme_config_settings (); } -#else - ExecSpace().print_configuration(std::cout, true); -#endif Session::m_inited = true; } diff --git a/components/homme/src/theta-l_kokkos/cxx/CaarFunctorImpl.hpp b/components/homme/src/theta-l_kokkos/cxx/CaarFunctorImpl.hpp index 4a861d5c747..9c31422b519 100644 --- a/components/homme/src/theta-l_kokkos/cxx/CaarFunctorImpl.hpp +++ b/components/homme/src/theta-l_kokkos/cxx/CaarFunctorImpl.hpp @@ -41,7 +41,7 @@ struct CaarFunctorImpl { struct Buffers { static constexpr int num_3d_scalar_mid_buf = 10; - static constexpr int num_3d_vector_mid_buf = 6; //<-- for vvdp variable + static constexpr int num_3d_vector_mid_buf = 5; static constexpr int num_3d_scalar_int_buf = 6; static constexpr int num_3d_vector_int_buf = 3; @@ -76,9 +76,6 @@ struct CaarFunctorImpl { ExecViewUnmanaged phi_tens; }; - ExecViewUnmanaged vvdp; - //ExecViewUnmanaged vv_tens; - using deriv_type = ReferenceElement::deriv_type; RKStageData m_data; @@ -109,10 +106,6 @@ struct CaarFunctorImpl { struct TagPreExchange {}; struct TagPostExchange {}; -#ifdef TESTER_NOMPI - struct TagPreExchangeTest {}; -#endif - // Policies #ifndef NDEBUG template @@ -124,10 +117,6 @@ struct CaarFunctorImpl { TeamPolicyType m_policy_pre; -#ifdef TESTER_NOMPI - TeamPolicyType m_policy_pre_test; -#endif - Kokkos::RangePolicy m_policy_post; TeamUtils m_tu; @@ -149,9 +138,6 @@ struct CaarFunctorImpl { , m_deriv(ref_FE.get_deriv()) , m_sphere_ops(sphere_ops) , m_policy_pre (Homme::get_default_team_policy(m_num_elems)) -#ifdef TESTER_NOMPI - , m_policy_pre_test (Homme::get_default_team_policy(m_num_elems)) -#endif , m_policy_post (0,m_num_elems*NP*NP) , m_tu(m_policy_pre) { @@ -169,9 +155,6 @@ struct CaarFunctorImpl { , m_theta_advection_form(params.theta_adv_form) , m_pgrad_correction(params.pgrad_correction) , m_policy_pre (Homme::get_default_team_policy(m_num_elems)) -#ifdef TESTER_NOMPI - , m_policy_pre_test (Homme::get_default_team_policy(m_num_elems)) -#endif , m_policy_post (0,num_elems*NP*NP) , m_tu(m_policy_pre) {} @@ -273,10 +256,6 @@ struct CaarFunctorImpl { m_buffers.vdp = decltype(m_buffers.vdp )(mem,nslots); mem += m_buffers.vdp.size(); - - vvdp = decltype(vvdp )(mem,nslots); - mem += vvdp.size(); - m_buffers.v_tens = decltype(m_buffers.v_tens )(mem,nslots); mem += m_buffers.v_tens.size(); @@ -370,16 +349,13 @@ struct CaarFunctorImpl { int nerr; Kokkos::parallel_reduce("caar loop pre-boundary exchange", m_policy_pre, *this, nerr); Kokkos::fence(); -#ifdef TESTER_NOMPI - Kokkos::parallel_for("caar loop pre-boundary test", m_policy_pre_test, *this); - Kokkos::fence(); -#endif GPTLstop("caar compute"); - -#ifndef TESTER_NOMPI if (nerr > 0) check_print_abort_on_bad_elems("CaarFunctorImpl::run TagPreExchange", data.n0); + + + GPTLstart("caar_bexchV"); m_bes[data.np1]->exchange(m_geometry.m_rspheremp); Kokkos::fence(); @@ -393,49 +369,10 @@ struct CaarFunctorImpl { } limiter.run(data.np1); -#endif profiling_pause(); } -#ifdef TESTER_NOMPI - KOKKOS_INLINE_FUNCTION - void operator()(const TagPreExchangeTest&, const TeamMember& team) const { - KernelVariables kv(team, m_tu); - test_dp_tendency(kv); - } -#endif - - -#ifndef TESTER_NOMPI -#define K1 -#define K2 -#define K2a -#define K2b -#define K3 -#define K3b -#define K4 -#define K5 -#define K5a -#define K6 -#define K7 - -#else - -#define K1 -#undef K2 -#undef K2a -#undef K2b -#undef K3 -#undef K3b -#undef K4 -#undef K5 -#undef K5a -#undef K6 -#undef K7 -#endif - - KOKKOS_INLINE_FUNCTION void operator()(const TagPreExchange&, const TeamMember &team, int& nerr) const { // In this body, we use '====' to separate sync epochs (delimited by barriers) @@ -443,76 +380,59 @@ struct CaarFunctorImpl { KernelVariables kv(team, m_tu); -#ifdef K1 + // Kokkos::printf("OG before div_vdp\n"); + // =========== EPOCH 1 =========== // compute_div_vdp(kv); -#endif -#ifdef K2 // =========== EPOCH 2 =========== // kv.team_barrier(); + +// Kokkos::printf("OG before div_vdp\n"); // Computes pi, omega, and phi. const bool ok = compute_scan_quantities(kv); if ( ! ok) nerr = 1; -#endif -#ifdef K2a if (m_rsplit==0 || !m_theta_hydrostatic_mode) { // ============ EPOCH 2.1 =========== // kv.team_barrier(); compute_interface_quantities(kv); +// Kokkos::printf("OG nonhydro \n"); } -#endif -#ifdef K2b if (m_rsplit==0) { // ============= EPOCH 2.2 ============ // kv.team_barrier(); compute_vertical_advection(kv); } -#endif -#ifdef K3 +// Kokkos::printf("OG before accum \n"); // ============= EPOCH 3 ============== // kv.team_barrier(); compute_accumulated_quantities(kv); -#endif -#ifdef K3b // Compute update quantities if (!m_theta_hydrostatic_mode) { compute_w_and_phi_tens (kv); } -#endif -#ifdef K4 compute_dp_and_theta_tens (kv); -#endif -#ifdef K5 // ============= EPOCH 4 =========== // // compute_v_tens reuses some buffers used by compute_dp_and_theta_tens kv.team_barrier(); compute_v_tens (kv); -#endif -#ifdef K5a // Update states if (!m_theta_hydrostatic_mode) { compute_w_and_phi_np1(kv); } -#endif - -#ifdef K6 compute_dp3d_and_theta_np1(kv); -#endif -#ifdef K7 // ============= EPOCH 5 =========== // // v_tens has been computed after last barrier. Need to make sure it's done kv.team_barrier(); compute_v_np1(kv); -#endif } KOKKOS_INLINE_FUNCTION @@ -587,122 +507,25 @@ struct CaarFunctorImpl { const int igp = idx / NP; const int jgp = idx % NP; -//ORIGINAL = subviews + call to div -//do not use vvdp in the !ORIGINAL version -//because it makes caar_ut fail. udp field is probbaly used in other functors, -//reverting to vvdp array will be easy if needed in c1_ut tests. - -#define ORIGINAL -//#undef ORIGINAL - auto u = Homme::subview(m_state.m_v,kv.ie,m_data.n0,0,igp,jgp); auto v = Homme::subview(m_state.m_v,kv.ie,m_data.n0,1,igp,jgp); auto dp3d = Homme::subview(m_state.m_dp3d,kv.ie,m_data.n0,igp,jgp); auto udp = Homme::subview(m_buffers.vdp,kv.team_idx,0,igp,jgp); auto vdp = Homme::subview(m_buffers.vdp,kv.team_idx,1,igp,jgp); - Kokkos::parallel_for(Kokkos::ThreadVectorRange(kv.team, NUM_LEV), [&] (const int& ilev) { udp(ilev) = u(ilev)*dp3d(ilev); vdp(ilev) = v(ilev)*dp3d(ilev); - - //version without subviews - //m_buffers.vdp(kv.team_idx,0,igp,jgp,ilev) = m_state.m_dp3d(kv.ie,m_data.n0,igp,jgp,ilev)* - // m_state.m_v(kv.ie,m_data.n0,0,igp,jgp,ilev); - //m_buffers.vdp(kv.team_idx,1,igp,jgp,ilev) = m_state.m_dp3d(kv.ie,m_data.n0,igp,jgp,ilev)* - // m_state.m_v(kv.ie,m_data.n0,1,igp,jgp,ilev); - - //version with vvdp instead of udp - //vvdp(kv.team_idx,0,igp,jgp,ilev) = m_state.m_dp3d(kv.ie,m_data.n0,igp,jgp,ilev)* - // m_state.m_v(kv.ie,m_data.n0,0,igp,jgp,ilev); - //vvdp(kv.team_idx,1,igp,jgp,ilev) = m_state.m_dp3d(kv.ie,m_data.n0,igp,jgp,ilev)* - // m_state.m_v(kv.ie,m_data.n0,1,igp,jgp,ilev); }); }); kv.team_barrier(); // Compute div(vdp) -#ifdef ORIGINAL m_sphere_ops.divergence_sphere(kv, Homme::subview(m_buffers.vdp, kv.team_idx), Homme::subview(m_buffers.div_vdp, kv.team_idx)); -#else - - const Real aa = 1.0, bb=0.0; - - //example of calling _cm - //m_sphere_ops.divergence_sphere_cm(kv, - // Homme::subview(vvdp, kv.team_idx), - // Homme::subview(m_buffers.div_vdp, kv.team_idx), - // aa, bb, NUM_LEV); - -//inlined version of divergence_sphere_cm - const auto& D_inv = Homme::subview(m_sphere_ops.m_dinv, kv.ie); - const auto& metdet = Homme::subview(m_sphere_ops.m_metdet, kv.ie); - ExecViewUnmanaged gv_buf( - Homme::subview(m_sphere_ops.vector_buf_ml,kv.team_idx, 0).data()); - constexpr int np_squared = NP * NP; - Kokkos::parallel_for(Kokkos::TeamThreadRange(kv.team, np_squared), - [&](const int loop_idx) { - const int igp = loop_idx / NP; - const int jgp = loop_idx % NP; - Kokkos::parallel_for(Kokkos::ThreadVectorRange(kv.team, NUM_LEV), [&] (const int& ilev) { - //const auto& v0 = vvdp(kv.team_idx,0, igp, jgp, ilev); - //const auto& v1 = vvdp(kv.team_idx,1, igp, jgp, ilev); - - const auto& v0 = m_buffers.vdp(kv.team_idx,0, igp, jgp, ilev); - const auto& v1 = m_buffers.vdp(kv.team_idx,1, igp, jgp, ilev); - - gv_buf(0,igp,jgp,ilev) = (D_inv(0,0,igp,jgp) * v0 + D_inv(1,0,igp,jgp) * v1) * metdet(igp,jgp); - gv_buf(1,igp,jgp,ilev) = (D_inv(0,1,igp,jgp) * v0 + D_inv(1,1,igp,jgp) * v1) * metdet(igp,jgp); - }); - }); - kv.team_barrier(); - // j, l, i -> i, j, k - constexpr int div_iters = NP * NP; - Kokkos::parallel_for(Kokkos::TeamThreadRange(kv.team, div_iters), - [&](const int loop_idx) { - const int igp = loop_idx / NP; - const int jgp = loop_idx % NP; - Kokkos::parallel_for(Kokkos::ThreadVectorRange(kv.team, NUM_LEV), [&] (const int& ilev) { - Scalar dudx, dvdy; - for (int kgp = 0; kgp < NP; ++kgp) { - dudx += m_sphere_ops.dvv(jgp, kgp) * gv_buf(0, igp, kgp, ilev); - dvdy += m_sphere_ops.dvv(igp, kgp) * gv_buf(1, kgp, jgp, ilev); - } - combine((dudx + dvdy) * (1.0 / metdet(igp, jgp) * m_sphere_ops.m_scale_factor_inv), - m_buffers.div_vdp(kv.team_idx,igp, jgp, ilev), aa, bb); - }); - }); - kv.team_barrier(); - -#endif } - -#ifdef TESTER_NOMPI -// a kernel only for perf c1 test, to put div(vdp) into dp tendency -// to print it on host for verification - KOKKOS_INLINE_FUNCTION - void test_dp_tendency(KernelVariables &kv) const { - // Compute vdp - Kokkos::parallel_for(Kokkos::TeamThreadRange(kv.team, NP * NP), - [&](const int idx) { - const int igp = idx / NP; - const int jgp = idx % NP; - - auto div_vdp = Homme::subview(m_buffers.div_vdp,kv.team_idx,igp,jgp); - auto dp_np1 = Homme::subview(m_state.m_dp3d,kv.ie,m_data.np1,igp,jgp); - - Kokkos::parallel_for(Kokkos::ThreadVectorRange(kv.team,NUM_LEV), - [&](const int ilev) { - dp_np1(ilev) += div_vdp(ilev); - }); - }); - } -#endif - - KOKKOS_INLINE_FUNCTION bool compute_scan_quantities (KernelVariables &kv) const { bool ok = true; From 5c908e36d2022ff2fab4623519d008be85d492cb Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Sat, 17 Aug 2024 20:57:30 +0000 Subject: [PATCH 51/85] cleanup --- components/eamxx/CMakeLists.txt | 44 ++++++++++++++++----------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/components/eamxx/CMakeLists.txt b/components/eamxx/CMakeLists.txt index c77294d5720..5e0d8a378b4 100644 --- a/components/eamxx/CMakeLists.txt +++ b/components/eamxx/CMakeLists.txt @@ -638,28 +638,28 @@ message ("* Summary of EAMxx config settings *") message ("**************************************************") # Shortcut function, to print a variable -#function (print_var var) -# message ("${var}: ${${var}}") -#endfunction () +function (print_var var) + message ("${var}: ${${var}}") +endfunction () -#print_var(EAMXX_ENABLE_GPU) -#print_var(CUDA_BUILD) -#print_var(HIP_BUILD) -#print_var(SCREAM_MACHINE) -#print_var(SCREAM_DYNAMICS_DYCORE) -#print_var(SCREAM_DOUBLE_PRECISION) -#print_var(SCREAM_MIMIC_GPU) -#print_var(SCREAM_FPE) -#print_var(SCREAM_NUM_VERTICAL_LEV) -#print_var(SCREAM_PACK_SIZE) -#print_var(SCREAM_SMALL_PACK_SIZE) -#print_var(SCREAM_POSSIBLY_NO_PACK_SIZE) -#print_var(SCREAM_LINK_FLAGS) -#print_var(SCREAM_FPMODEL) -#print_var(SCREAM_LIB_ONLY) -#print_var(SCREAM_TPL_LIBRARIES) -#print_var(SCREAM_TEST_MAX_THREADS) -#print_var(SCREAM_TEST_THREAD_INC) -#print_var(SCREAM_TEST_MAX_RANKS) +print_var(EAMXX_ENABLE_GPU) +print_var(CUDA_BUILD) +print_var(HIP_BUILD) +print_var(SCREAM_MACHINE) +print_var(SCREAM_DYNAMICS_DYCORE) +print_var(SCREAM_DOUBLE_PRECISION) +print_var(SCREAM_MIMIC_GPU) +print_var(SCREAM_FPE) +print_var(SCREAM_NUM_VERTICAL_LEV) +print_var(SCREAM_PACK_SIZE) +print_var(SCREAM_SMALL_PACK_SIZE) +print_var(SCREAM_POSSIBLY_NO_PACK_SIZE) +print_var(SCREAM_LINK_FLAGS) +print_var(SCREAM_FPMODEL) +print_var(SCREAM_LIB_ONLY) +print_var(SCREAM_TPL_LIBRARIES) +print_var(SCREAM_TEST_MAX_THREADS) +print_var(SCREAM_TEST_THREAD_INC) +print_var(SCREAM_TEST_MAX_RANKS) message ("**************************************************") From ba7b030c71cb64cf9d4f0eaf432467a682123f68 Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Sat, 17 Aug 2024 20:58:22 +0000 Subject: [PATCH 52/85] fix for prev cleanup --- components/eamxx/CMakeLists.txt | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/components/eamxx/CMakeLists.txt b/components/eamxx/CMakeLists.txt index 5e0d8a378b4..926f0773de2 100644 --- a/components/eamxx/CMakeLists.txt +++ b/components/eamxx/CMakeLists.txt @@ -566,26 +566,6 @@ if (SCREAM_DOUBLE_PRECISION) endif() endif() -#print_var(SCREAM_MACHINE) -#print_var(EAMXX_ENABLE_GPU) -#print_var(CUDA_BUILD) -#print_var(HIP_BUILD) -#print_var(SYCL_BUILD) -#print_var(SCREAM_DOUBLE_PRECISION) -#print_var(SCREAM_MIMIC_GPU) -#print_var(SCREAM_FPE) -#print_var(SCREAM_NUM_VERTICAL_LEV) -#print_var(SCREAM_PACK_SIZE) -#print_var(SCREAM_SMALL_PACK_SIZE) -#print_var(SCREAM_POSSIBLY_NO_PACK_SIZE) -#print_var(SCREAM_LINK_FLAGS) -#print_var(SCREAM_FPMODEL) -#print_var(SCREAM_LIB_ONLY) -#print_var(SCREAM_TPL_LIBRARIES) -#print_var(SCREAM_TEST_MAX_THREADS) -#print_var(SCREAM_TEST_THREAD_INC) -#print_var(SCREAM_TEST_MAX_RANKS) - # This must be done using add_definitions because it is used to determine # whether to include scream_config.h. add_definitions(-DSCREAM_CONFIG_IS_CMAKE) From 237612a3e6f1476ef0ae6049021c02d155ec780e Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Sat, 17 Aug 2024 20:59:32 +0000 Subject: [PATCH 53/85] fix qsplit for sunspot EUL tests --- .../testdefs/testmods_dirs/scream/sunspot_run/shell_commands | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/eamxx/cime_config/testdefs/testmods_dirs/scream/sunspot_run/shell_commands b/components/eamxx/cime_config/testdefs/testmods_dirs/scream/sunspot_run/shell_commands index 6ae48c59d4a..6ca33b486f9 100644 --- a/components/eamxx/cime_config/testdefs/testmods_dirs/scream/sunspot_run/shell_commands +++ b/components/eamxx/cime_config/testdefs/testmods_dirs/scream/sunspot_run/shell_commands @@ -1,6 +1,6 @@ $CIMEROOT/../components/eamxx/scripts/atmchange transport_alg=0 -b $CIMEROOT/../components/eamxx/scripts/atmchange hypervis_subcycle_q=1 -b -$CIMEROOT/../components/eamxx/scripts/atmchange dt_tracer_factor=2 -b +$CIMEROOT/../components/eamxx/scripts/atmchange dt_tracer_factor=1 -b $CIMEROOT/../components/eamxx/scripts/atmchange tstep_type=9 -b $CIMEROOT/../components/eamxx/scripts/atmchange theta_hydrostatic_mode=False -b From 2403aa387913561cc081c1f67a12c05ca172705f Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Sat, 17 Aug 2024 21:12:52 +0000 Subject: [PATCH 54/85] clean prints --- .../eamxx/src/mct_coupling/atm_comp_mct.F90 | 38 ++----------------- 1 file changed, 4 insertions(+), 34 deletions(-) diff --git a/components/eamxx/src/mct_coupling/atm_comp_mct.F90 b/components/eamxx/src/mct_coupling/atm_comp_mct.F90 index 2471280135f..34bbbedcc5c 100644 --- a/components/eamxx/src/mct_coupling/atm_comp_mct.F90 +++ b/components/eamxx/src/mct_coupling/atm_comp_mct.F90 @@ -35,8 +35,8 @@ module atm_comp_mct integer :: mpicom_atm ! mpi communicator integer(IN) :: my_task ! my task in mpi communicator mpicom integer :: inst_index ! number of current instance (ie. 1) - character(len=16) :: inst_name ! fullname of current instance (ie. "lnd_0001') - character(len=16) :: inst_suffix = "" ! char string associated with instance (ie. "_0001" or "') + character(len=16) :: inst_name ! fullname of current instance (ie. "lnd_0001") + character(len=16) :: inst_suffix = "" ! char string associated with instance (ie. "_0001" or "") integer(IN) :: ATM_ID ! mct comp id integer(IN),parameter :: master_task=0 ! task number of master task @@ -97,8 +97,6 @@ subroutine atm_init_mct( EClock, cdata, x2a, a2x, NLFilename ) !------------------------------------------------------------------------------- -print *,'OG a 1' - ! Grab some data from the cdata structure (coming from the coupler) call seq_cdata_setptrs(cdata, & id=ATM_ID, & @@ -106,51 +104,38 @@ subroutine atm_init_mct( EClock, cdata, x2a, a2x, NLFilename ) gsMap=gsmap_atm, & dom=dom_atm, & infodata=infodata) -print *, 'OG a 2' call seq_infodata_getData(infodata, atm_phase=phase, start_type=run_type, & username=username, case_name=caseid, hostname=hostname) -print *, 'OG a 3' - call seq_infodata_PutData(infodata, atm_aero=.true.) -print *, 'OG a 4' + call seq_infodata_PutData(infodata, atm_aero=.true.) call seq_infodata_PutData(infodata, atm_prognostic=.true.) -print *, 'OG a 5' if (phase > 1) RETURN -print *, 'OG a 6' ! Determine instance information inst_name = seq_comm_name(ATM_ID) inst_index = seq_comm_inst(ATM_ID) inst_suffix = seq_comm_suffix(ATM_ID) -print *, 'OG a 7' ! Determine communicator group call mpi_comm_rank(mpicom_atm, my_task, ierr) -print *, 'OG a 8' !---------------------------------------------------------------------------- ! Init atm.log !---------------------------------------------------------------------------- -print *, 'OG a 9' + if (my_task == master_task) then -print *, 'OG a 10' atm_log_unit = shr_file_getUnit() call shr_file_setIO ('atm_modelio.nml'//trim(inst_suffix),atm_log_unit) inquire(unit=atm_log_unit,name=atm_log_fname) endif -print *, 'OG a 11' call mpi_bcast(atm_log_unit,1,MPI_INTEGER,master_task,mpicom_atm,mpi_ierr) -print *, 'OG a 12' if (ierr /= 0) then -print *, 'OG a 13' print *,'[eamxx] ERROR broadcasting atm.log unit' call mpi_abort(mpicom_atm,ierr,mpi_ierr) end if -print *, 'OG a 14' call mpi_bcast(atm_log_fname,256,MPI_CHARACTER,master_task,mpicom_atm,ierr) -print *, 'OG a 15' if (ierr /= 0) then print *,'[eamxx] ERROR broadcasting atm.log file name' call mpi_abort(mpicom_atm,ierr,mpi_ierr) @@ -161,40 +146,29 @@ subroutine atm_init_mct( EClock, cdata, x2a, a2x, NLFilename ) !---------------------------------------------------------------------------- ! Init the AD -print *, 'OG a 16' call seq_timemgr_EClockGetData(EClock, calendar=calendar, & curr_ymd=cur_ymd, curr_tod=cur_tod, & start_ymd=case_start_ymd, start_tod=case_start_tod) -print *, 'OG a 17' call string_f2c(yaml_fname,yaml_fname_c) -print *, 'OG a 18' call string_f2c(calendar,calendar_c) -print *, 'OG a 19' call string_f2c(trim(atm_log_fname),atm_log_fname_c) -print *, 'OG a 20' call scream_create_atm_instance (mpicom_atm, ATM_ID, yaml_fname_c, atm_log_fname_c, & INT(cur_ymd,kind=C_INT), INT(cur_tod,kind=C_INT), & INT(case_start_ymd,kind=C_INT), INT(case_start_tod,kind=C_INT), & calendar_c) -print *, 'OG a 21' ! Init MCT gsMap call atm_Set_gsMap_mct (mpicom_atm, ATM_ID, gsMap_atm) -print *, 'OG a 22' lsize = mct_gsMap_lsize(gsMap_atm, mpicom_atm) -print *, 'OG a 23' ! Init MCT domain structure call atm_domain_mct (lsize, gsMap_atm, dom_atm) -print *, 'OG a 24' ! Init import/export mct attribute vectors call mct_aVect_init(x2a, rList=seq_flds_x2a_fields, lsize=lsize) -print *, 'OG a 25' call mct_aVect_init(a2x, rList=seq_flds_a2x_fields, lsize=lsize) -print *, 'OG a 26' ! Complete AD initialization based on run type if (trim(run_type) == trim(seq_infodata_start_type_start)) then restarted_run = .false. @@ -205,10 +179,8 @@ subroutine atm_init_mct( EClock, cdata, x2a, a2x, NLFilename ) call mpi_abort(mpicom_atm,ierr,mpi_ierr) endif -print *, 'OG a 27' ! Init surface coupling stuff in the AD call scream_set_cpl_indices (x2a, a2x) -print *, 'OG a 28' call scream_setup_surface_coupling (c_loc(import_field_names), c_loc(import_cpl_indices), & c_loc(x2a%rAttr), c_loc(import_vector_components), & @@ -219,13 +191,11 @@ subroutine atm_init_mct( EClock, cdata, x2a, a2x, NLFilename ) c_loc(export_constant_multiple), c_loc(do_export_during_init), & num_cpl_exports, num_scream_exports, export_field_size) -print *, 'OG a 29' call string_f2c(trim(caseid),caseid_c) call string_f2c(trim(username),username_c) call string_f2c(trim(hostname),hostname_c) call scream_init_atm (caseid_c,hostname_c,username_c) -print *, 'OG a 30' end subroutine atm_init_mct !=============================================================================== From f5680ced9a193ce8046ed42af5533da1453db0a6 Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Sat, 17 Aug 2024 21:21:12 +0000 Subject: [PATCH 55/85] clean AD messages --- .../eamxx/src/mct_coupling/scream_cxx_f90_interface.cpp | 9 --------- 1 file changed, 9 deletions(-) diff --git a/components/eamxx/src/mct_coupling/scream_cxx_f90_interface.cpp b/components/eamxx/src/mct_coupling/scream_cxx_f90_interface.cpp index 83bf5ba8741..0bdf90eeb71 100644 --- a/components/eamxx/src/mct_coupling/scream_cxx_f90_interface.cpp +++ b/components/eamxx/src/mct_coupling/scream_cxx_f90_interface.cpp @@ -210,28 +210,19 @@ void scream_init_atm (const char* caseid, using namespace scream::control; fpe_guard_wrapper([&](){ - - std::cout << "OG s 1 \n" << std::flush; - // Get the ad, then complete initialization auto& ad = get_ad_nonconst(); - std::cout << "OG s 2 \n" << std::flush; // Set provenance info in the driver (will be added to the output files) ad.set_provenance_data (caseid,hostname,username); - std::cout << "OG s 3 \n" << std::flush; // Init all fields, atm processes, and output streams ad.initialize_fields (); - std::cout << "OG s 4 \n" << std::flush; ad.initialize_atm_procs (); - std::cout << "OG s 5 \n" << std::flush; // Do this before init-ing the output managers, // so the fields are valid if outputing at t=0 ad.reset_accumulated_fields(); - std::cout << "OG s 6 \n" << std::flush; ad.initialize_output_managers (); - std::cout << "OG s 7 \n" << std::flush; }); } From 9d0726abcbd50876695461d256ef7630fc2c0adb Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Sat, 17 Aug 2024 21:23:37 +0000 Subject: [PATCH 56/85] p3 messages cleanup --- .../physics/p3/eamxx_p3_process_interface.hpp | 27 +------------------ 1 file changed, 1 insertion(+), 26 deletions(-) diff --git a/components/eamxx/src/physics/p3/eamxx_p3_process_interface.hpp b/components/eamxx/src/physics/p3/eamxx_p3_process_interface.hpp index 373d2efe7e3..e8abf2b322b 100644 --- a/components/eamxx/src/physics/p3/eamxx_p3_process_interface.hpp +++ b/components/eamxx/src/physics/p3/eamxx_p3_process_interface.hpp @@ -203,17 +203,8 @@ class P3Microphysics : public AtmosphereProcess struct p3_postamble { p3_postamble() = default; // Functor for Kokkos loop to pre-process every run step - - //Kokkos::printf("OG postamble start"); - KOKKOS_INLINE_FUNCTION void operator()(const int icol) const { - -//Kokkos::printf("OG postamble P################3\n"); - -#if 1 -#if 1 - for (int ipack=0;ipack Date: Sun, 18 Aug 2024 19:36:28 +0000 Subject: [PATCH 57/85] clean ad file --- components/eamxx/src/control/atmosphere_driver.cpp | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/components/eamxx/src/control/atmosphere_driver.cpp b/components/eamxx/src/control/atmosphere_driver.cpp index d8715f871f8..d1edf8f0dcb 100644 --- a/components/eamxx/src/control/atmosphere_driver.cpp +++ b/components/eamxx/src/control/atmosphere_driver.cpp @@ -103,7 +103,6 @@ AtmosphereDriver(const ekat::Comm& atm_comm, AtmosphereDriver::~AtmosphereDriver () { -// std::cout << "OG ------------------ I AM CALLING FINALIZE \n" << std::flush; finalize(); } @@ -206,7 +205,6 @@ setup_iop () void AtmosphereDriver::create_atm_processes() { - m_atm_logger->info("[EAMxx] create_atm_processes ..."); start_timer("EAMxx::init"); start_timer("EAMxx::create_atm_processes"); @@ -1515,7 +1513,6 @@ void AtmosphereDriver::initialize_atm_procs () // Initialize memory buffer for all atm processes m_memory_buffer = std::make_shared(); - m_memory_buffer->request_bytes(m_atm_process_group->requested_buffer_size_in_bytes()); m_memory_buffer->allocate(); m_atm_process_group->init_buffers(*m_memory_buffer); @@ -1605,7 +1602,7 @@ void AtmosphereDriver::run (const int dt) { // very expensive operation, so it's not worth the effort of the // nano-opt of removing the call for the 1st timestep. reset_accumulated_fields(); - + // Tell the output managers that we're starting a timestep. This is usually // a no-op, but some diags *may* require to do something. E.g., a diag that // computes tendency of an arbitrary quantity may want to store a copy of @@ -1636,13 +1633,11 @@ void AtmosphereDriver::run (const int dt) { // Update current time stamps m_current_ts += dt; -#if 1 // Update output streams m_atm_logger->debug("[EAMxx::run] running output managers..."); for (auto& out_mgr : m_output_managers) { out_mgr.run(m_current_ts); } -#endif #ifdef SCREAM_HAS_MEMORY_USAGE long long my_mem_usage = get_mem_usage(MB); From b8ff0b2f5b7a8ce56d48e34984cd85b8a443c6a2 Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Sun, 18 Aug 2024 19:37:01 +0000 Subject: [PATCH 58/85] add fpmodel flag to rrtmgp --- components/eamxx/src/physics/rrtmgp/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/eamxx/src/physics/rrtmgp/CMakeLists.txt b/components/eamxx/src/physics/rrtmgp/CMakeLists.txt index e1b7b094b47..9ef44c78de7 100644 --- a/components/eamxx/src/physics/rrtmgp/CMakeLists.txt +++ b/components/eamxx/src/physics/rrtmgp/CMakeLists.txt @@ -75,7 +75,7 @@ else () ####### SYCL here if (SYCL_BUILD) set(YAKL_ARCH "SYCL") - set(YAKL_SYCL_FLAGS "-DYAKL_ARCH_SYCL -\-intel -fsycl -fsycl-targets=spir64_gen -mlong-double-64") + set(YAKL_SYCL_FLAGS " -fp-model precise -DYAKL_ARCH_SYCL -\-intel -fsycl -fsycl-targets=spir64_gen -mlong-double-64") string (REPLACE " " ";" YAKL_SYCL_FLAGS_LIST ${YAKL_SYCL_FLAGS}) endif() From 2d487f8fbba07c038103a29a9187da162bd6ebd7 Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Sun, 18 Aug 2024 19:37:36 +0000 Subject: [PATCH 59/85] turn rrtmgp back on --- .../rrtmgp/eamxx_rrtmgp_process_interface.cpp | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/components/eamxx/src/physics/rrtmgp/eamxx_rrtmgp_process_interface.cpp b/components/eamxx/src/physics/rrtmgp/eamxx_rrtmgp_process_interface.cpp index bafe8648465..1ff3f27b2fc 100644 --- a/components/eamxx/src/physics/rrtmgp/eamxx_rrtmgp_process_interface.cpp +++ b/components/eamxx/src/physics/rrtmgp/eamxx_rrtmgp_process_interface.cpp @@ -637,9 +637,6 @@ void RRTMGPRadiation::initialize_impl(const RunType /* run_type */) { VALIDATE_KOKKOS(rrtmgp::cloud_optics_lw, rrtmgp::cloud_optics_lw_k); #endif - - std::cout << "After RRTMGP initialize ------------------------ \n"; - // Set property checks for fields in this process add_invariant_check(get_field_out("T_mid"),m_grid,100.0, 500.0,false); @@ -661,13 +658,6 @@ void RRTMGPRadiation::run_impl (const double dt) { using PC = scream::physics::Constants; using CO = scream::ColumnOps; - - std::cout << "RRTMGP IMPL 1 ------------------------ \n"; - std::cout << std::flush ; - - -#if 0 - // get a host copy of lat/lon auto h_lat = m_lat.get_view(); auto h_lon = m_lon.get_view(); @@ -1773,9 +1763,6 @@ void RRTMGPRadiation::run_impl (const double dt) { }); } -#endif - - } // ========================================================================================= From 2afcd0e18dce155c54d4d3f846712ec1336818fe Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Sun, 18 Aug 2024 19:38:13 +0000 Subject: [PATCH 60/85] fix printfs --- .../p3/tests/p3_subgrid_variance_scaling_unit_tests.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/components/eamxx/src/physics/p3/tests/p3_subgrid_variance_scaling_unit_tests.cpp b/components/eamxx/src/physics/p3/tests/p3_subgrid_variance_scaling_unit_tests.cpp index 9a728b6c57e..5bd6aff72da 100644 --- a/components/eamxx/src/physics/p3/tests/p3_subgrid_variance_scaling_unit_tests.cpp +++ b/components/eamxx/src/physics/p3/tests/p3_subgrid_variance_scaling_unit_tests.cpp @@ -91,7 +91,7 @@ struct UnitWrap::UnitTest::TestP3SubgridVarianceScaling Spack c_scaling = Functions::subgrid_variance_scaling(relvars,1.0); if ( std::abs(c_scaling[0] - 1) > tol ){ - printf("subgrid_variance_scaling should be 1 for expon=1, but is %e. " + Kokkos::printf("subgrid_variance_scaling should be 1 for expon=1, but is %e. " "Diff = %e, Tol = %e\n",c_scaling[0],c_scaling[0]-1, tol); errors++;} } @@ -109,7 +109,7 @@ struct UnitWrap::UnitTest::TestP3SubgridVarianceScaling Real fact = std::tgamma(5.0); //factorial(n) = gamma(n+1) if ( std::abs(c_scaling[0] - fact) > tol ){ - printf("subgrid_variance_scaling should be factorial(expon) when relvar=1. " + Kokkos::printf("subgrid_variance_scaling should be factorial(expon) when relvar=1. " "For expon=4, should be %f but is=%f\n Diff = %e, Tol = %e\n", fact,c_scaling[0], c_scaling[0] - fact, tol); errors++;} @@ -142,7 +142,7 @@ struct UnitWrap::UnitTest::TestP3SubgridVarianceScaling const Real max_tol = tol*cond_num; if ( std::abs(targ - c_scaling[0]) > max_tol * targ ){ - printf("When expon=3, subgrid_variance_scaling doesn't match analytic expectation. " + Kokkos::printf("When expon=3, subgrid_variance_scaling doesn't match analytic expectation. " "Val = %e, expected = %e, rel diff = %e, tol = %e\n", c_scaling[0],targ, (targ-c_scaling[0]), max_tol*targ ); errors++; From 57819a49891a634afed5bd65ad45458aef05e3a3 Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Mon, 19 Aug 2024 21:46:17 +0000 Subject: [PATCH 61/85] fix for testing po spot --- components/eamxx/src/physics/rrtmgp/rrtmgp_test_utils.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/components/eamxx/src/physics/rrtmgp/rrtmgp_test_utils.hpp b/components/eamxx/src/physics/rrtmgp/rrtmgp_test_utils.hpp index 466467919fc..a8864ca9934 100644 --- a/components/eamxx/src/physics/rrtmgp/rrtmgp_test_utils.hpp +++ b/components/eamxx/src/physics/rrtmgp/rrtmgp_test_utils.hpp @@ -46,9 +46,9 @@ template ; -using real1dk = typename interface_t::view_t; -using real2dk = typename interface_t::view_t; -using real3dk = typename interface_t::view_t; +using real1dk = typename interface_t::template view_t; +using real2dk = typename interface_t::template view_t; +using real3dk = typename interface_t::template view_t; using MDRP = typename conv::MDRP; static bool all_close(real2dk &arr1, real2dk &arr2, double tolerance) From 9a55245f502492e6242ef29bfd5d528bf76864f7 Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Mon, 26 Aug 2024 22:47:15 +0000 Subject: [PATCH 62/85] switch to kokkos radiation --- components/eamxx/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/components/eamxx/CMakeLists.txt b/components/eamxx/CMakeLists.txt index 8f3b9e2aa42..ac04ee6f54d 100644 --- a/components/eamxx/CMakeLists.txt +++ b/components/eamxx/CMakeLists.txt @@ -218,8 +218,8 @@ endif() # #cmakedefine RRTMGP_EXPENSIVE_CHECKS option (SCREAM_RRTMGP_DEBUG "Turn on extra debug checks in RRTMGP" ${SCREAM_DEBUG}) -option(SCREAM_RRTMGP_ENABLE_YAKL "Use YAKL under rrtmgp" TRUE) -option(SCREAM_RRTMGP_ENABLE_KOKKOS "Use Kokkos under rrtmgp" FALSE) +option(SCREAM_RRTMGP_ENABLE_YAKL "Use YAKL under rrtmgp" FALSE) +option(SCREAM_RRTMGP_ENABLE_KOKKOS "Use Kokkos under rrtmgp" TRUE) if (SCREAM_RRTMGP_ENABLE_YAKL) add_definitions("-DRRTMGP_ENABLE_YAKL") endif() From c3397060d27b8d57131a7611edfe74b72b583da7 Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Mon, 26 Aug 2024 22:47:56 +0000 Subject: [PATCH 63/85] clean --- components/eamxx/src/control/atmosphere_driver.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/eamxx/src/control/atmosphere_driver.cpp b/components/eamxx/src/control/atmosphere_driver.cpp index d1edf8f0dcb..c3da1bacd3f 100644 --- a/components/eamxx/src/control/atmosphere_driver.cpp +++ b/components/eamxx/src/control/atmosphere_driver.cpp @@ -1602,7 +1602,7 @@ void AtmosphereDriver::run (const int dt) { // very expensive operation, so it's not worth the effort of the // nano-opt of removing the call for the 1st timestep. reset_accumulated_fields(); - + // Tell the output managers that we're starting a timestep. This is usually // a no-op, but some diags *may* require to do something. E.g., a diag that // computes tendency of an arbitrary quantity may want to store a copy of From 23fef644629323178ec8add0cbb3bb964e11bd77 Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Mon, 26 Aug 2024 22:50:48 +0000 Subject: [PATCH 64/85] remove debug --- components/eamxx/src/physics/p3/eamxx_p3_process_interface.hpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/components/eamxx/src/physics/p3/eamxx_p3_process_interface.hpp b/components/eamxx/src/physics/p3/eamxx_p3_process_interface.hpp index e8abf2b322b..c1f4f853bd1 100644 --- a/components/eamxx/src/physics/p3/eamxx_p3_process_interface.hpp +++ b/components/eamxx/src/physics/p3/eamxx_p3_process_interface.hpp @@ -245,7 +245,6 @@ class P3Microphysics : public AtmosphereProcess diag_eff_radius_qr(icol,ipack) *= 1e6; } // for ipack - //Kokkos::printf("OG postamble 4\n"); // Microphysics can be subcycled together during a single physics timestep, // therefore we must accumulate these fluxes precip_liq_surf_mass(icol) += precip_liq_surf_flux(icol) * PC::RHO_H2O * m_dt; @@ -264,7 +263,6 @@ class P3Microphysics : public AtmosphereProcess heat_flux(icol) = 0.0; } } // operator() - // Local variables int m_ncol, m_npack; double m_dt; From ea26296eaa1e4ecf09b4d581a6ccb8138694062c Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Mon, 26 Aug 2024 22:52:42 +0000 Subject: [PATCH 65/85] clean formatting --- .../p3/tests/p3_subgrid_variance_scaling_unit_tests.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/components/eamxx/src/physics/p3/tests/p3_subgrid_variance_scaling_unit_tests.cpp b/components/eamxx/src/physics/p3/tests/p3_subgrid_variance_scaling_unit_tests.cpp index 5bd6aff72da..5e0cf401bec 100644 --- a/components/eamxx/src/physics/p3/tests/p3_subgrid_variance_scaling_unit_tests.cpp +++ b/components/eamxx/src/physics/p3/tests/p3_subgrid_variance_scaling_unit_tests.cpp @@ -91,9 +91,9 @@ struct UnitWrap::UnitTest::TestP3SubgridVarianceScaling Spack c_scaling = Functions::subgrid_variance_scaling(relvars,1.0); if ( std::abs(c_scaling[0] - 1) > tol ){ - Kokkos::printf("subgrid_variance_scaling should be 1 for expon=1, but is %e. " + Kokkos::printf("subgrid_variance_scaling should be 1 for expon=1, but is %e. " "Diff = %e, Tol = %e\n",c_scaling[0],c_scaling[0]-1, tol); - errors++;} + errors++;} } //----------------------------------------------------------------- @@ -109,7 +109,7 @@ struct UnitWrap::UnitTest::TestP3SubgridVarianceScaling Real fact = std::tgamma(5.0); //factorial(n) = gamma(n+1) if ( std::abs(c_scaling[0] - fact) > tol ){ - Kokkos::printf("subgrid_variance_scaling should be factorial(expon) when relvar=1. " + Kokkos::printf("subgrid_variance_scaling should be factorial(expon) when relvar=1. " "For expon=4, should be %f but is=%f\n Diff = %e, Tol = %e\n", fact,c_scaling[0], c_scaling[0] - fact, tol); errors++;} @@ -142,7 +142,7 @@ struct UnitWrap::UnitTest::TestP3SubgridVarianceScaling const Real max_tol = tol*cond_num; if ( std::abs(targ - c_scaling[0]) > max_tol * targ ){ - Kokkos::printf("When expon=3, subgrid_variance_scaling doesn't match analytic expectation. " + Kokkos::printf("When expon=3, subgrid_variance_scaling doesn't match analytic expectation. " "Val = %e, expected = %e, rel diff = %e, tol = %e\n", c_scaling[0],targ, (targ-c_scaling[0]), max_tol*targ ); errors++; From 63b03940d8f74361eb708380b0e0c5394edbfbf9 Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Mon, 26 Aug 2024 22:56:44 +0000 Subject: [PATCH 66/85] clean formatting --- .../eamxx/src/physics/shoc/impl/shoc_assumed_pdf_impl.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/components/eamxx/src/physics/shoc/impl/shoc_assumed_pdf_impl.hpp b/components/eamxx/src/physics/shoc/impl/shoc_assumed_pdf_impl.hpp index ab61e4239e2..19d897c63d9 100644 --- a/components/eamxx/src/physics/shoc/impl/shoc_assumed_pdf_impl.hpp +++ b/components/eamxx/src/physics/shoc/impl/shoc_assumed_pdf_impl.hpp @@ -153,10 +153,10 @@ void Functions::shoc_assumed_pdf( const Smask is_nan_Tl1_1 = isnan(Tl1_1) && active_entries; const Smask is_nan_Tl1_2 = isnan(Tl1_2) && active_entries; if (is_nan_Tl1_1.any() || is_nan_Tl1_2.any()) { - Kokkos::printf("WARNING: NaN Detected in Tl1_1 or Tl1_2!\n"); + Kokkos::printf("WARNING: NaN Detected in Tl1_1 or Tl1_2!\n"); for (int i=0; i Date: Mon, 26 Aug 2024 23:00:45 +0000 Subject: [PATCH 67/85] remove debug in atm_proc_group --- .../atm_process/atmosphere_process_group.cpp | 76 ------------------- 1 file changed, 76 deletions(-) diff --git a/components/eamxx/src/share/atm_process/atmosphere_process_group.cpp b/components/eamxx/src/share/atm_process/atmosphere_process_group.cpp index bfef71ef3d5..9d5ff488929 100644 --- a/components/eamxx/src/share/atm_process/atmosphere_process_group.cpp +++ b/components/eamxx/src/share/atm_process/atmosphere_process_group.cpp @@ -8,14 +8,6 @@ #include - -#include "share/scream_session.hpp" -#include "mct_coupling/ScreamContext.hpp" -#include "control/atmosphere_driver.hpp" -#include -#include "physics/share/physics_constants.hpp" - - namespace scream { AtmosphereProcessGroup:: @@ -373,21 +365,7 @@ void AtmosphereProcessGroup::add_additional_data_fields_to_property_checks (cons } void AtmosphereProcessGroup::initialize_impl (const RunType run_type) { - -#undef D1 -#ifdef D1 - int mmm = 0; -#endif - for (auto& atm_proc : m_atm_processes) { - -#ifdef D1 - mmm++; - std::cout << "process is "<< mmm << "\n" << std::flush; - std::cout << "process name is "<< atm_proc->name() << "\n"<< std::flush; - m_atm_logger->flush(); -#endif - atm_proc->initialize(timestamp(),run_type); #ifdef SCREAM_HAS_MEMORY_USAGE long long my_mem_usage = get_mem_usage(MB); @@ -396,7 +374,6 @@ void AtmosphereProcessGroup::initialize_impl (const RunType run_type) { m_atm_logger->debug("[EAMxx::initialize::"+atm_proc->name()+"] memory usage: " + std::to_string(max_mem_usage) + "MB"); #endif } -// std::cout << "process GROUP is done\n" << std::flush; } void AtmosphereProcessGroup::run_impl (const double dt) { @@ -405,7 +382,6 @@ void AtmosphereProcessGroup::run_impl (const double dt) { } else { run_parallel(dt); } -// std::cout << "process GROUP RUN is done\n" << std::flush; } void AtmosphereProcessGroup::run_sequential (const double dt) { @@ -413,74 +389,22 @@ void AtmosphereProcessGroup::run_sequential (const double dt) { auto ts = timestamp(); ts += dt; -#undef D2 -#ifdef D2 - auto& c = scream::ScreamContext::singleton(); - auto ad = c.getNonConst(); - const auto gn = "Physics"; - //const auto gn = "Physics GLL"; - const auto& phys_grid = ad.get_grids_manager()->get_grid(gn); - //auto area = phys_grid->get_geometry_data("area").get_view(); - const auto fm = ad.get_field_mgr(gn); - const int ncols = fm->get_grid()->get_num_local_dofs(); - const int nlevs = fm->get_grid()->get_num_vertical_levels(); - - fm->get_field("T_mid").sync_to_host(); - auto ff = fm->get_field("T_mid").get_view(); -#endif - -#ifdef D2 - for (int ii = 0; ii < ncols; ii++) - for (int jj = 0; jj < nlevs; jj++){ - const auto vv = ff(ii,jj); -m_atm_logger->info("OG T field ("+std::to_string(ii)+","+std::to_string(jj)+") = "+std::to_string(vv)); -std::cout << "OG T field (" <name() << std::flush; - -#ifdef D2 - fm->get_field("T_mid").sync_to_host(); - auto ff = fm->get_field("T_mid").get_view(); -#endif -#ifdef D2 - for (int ii = 0; ii < 5; ii++) - for (int jj = 0; jj < 3; jj++){ - const auto vv = ff(ii,jj); -m_atm_logger->info("OG T field ("+std::to_string(ii)+","+std::to_string(jj)+") = "+std::to_string(vv)); -std::cout << "OG T field (" <name() <<"\n"<set_update_time_stamps(do_update); // Run the process atm_proc->run(dt); - -//std::cout << "OG proc AFTER RUN " << atm_proc->name() <<"\n"<debug("[EAMxx::run_sequential::"+atm_proc->name()+"] memory usage: " + std::to_string(max_mem_usage) + "MB"); #endif - -//std::cout << "OG AFTER mem usage " << atm_proc->name() <<"\n"< Date: Mon, 26 Aug 2024 23:06:56 +0000 Subject: [PATCH 68/85] clean up --- components/eamxx/src/share/util/scream_utils.hpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/components/eamxx/src/share/util/scream_utils.hpp b/components/eamxx/src/share/util/scream_utils.hpp index a5a41774b7b..dbb315fc4b9 100644 --- a/components/eamxx/src/share/util/scream_utils.hpp +++ b/components/eamxx/src/share/util/scream_utils.hpp @@ -13,8 +13,6 @@ #include #include -#include - namespace scream { enum MemoryUnits { From 9deaf95a2064666e809fcd40553ceef42d2273ac Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Fri, 6 Sep 2024 18:04:40 +0000 Subject: [PATCH 69/85] sync homme files with homme branch --- components/homme/CMakeLists.txt | 13 +- components/homme/cmake/HommeMacros.cmake | 12 +- components/homme/src/prim_main.F90 | 4 +- .../homme/src/share/cxx/ExecSpaceDefs.cpp | 2 +- .../homme/src/share/cxx/ExecSpaceDefs.hpp | 32 ++- .../homme/src/share/cxx/SimulationParams.hpp | 4 +- .../homme/src/share/cxx/SphereOperators.hpp | 215 ------------------ .../src/share/cxx/utilities/BfbUtils.hpp | 4 - components/homme/src/share/gllfvremap_mod.F90 | 25 +- components/homme/src/share/namelist_mod.F90 | 2 +- components/homme/src/share/viscosity_base.F90 | 40 ++-- .../src/test_src/dcmip2016-supercell.F90 | 36 +-- .../src/theta-l_kokkos/config.h.cmake.in | 2 - .../theta-l_kokkos/cxx/CaarFunctorImpl.hpp | 8 - .../theta-l_kokkos/cxx/DirkFunctorImpl.hpp | 6 +- .../theta-l_kokkos/cxx/EquationOfState.hpp | 2 +- .../cxx/HyperviscosityFunctorImpl.cpp | 7 +- .../src/theta-l_kokkos/cxx/LimiterFunctor.hpp | 14 +- .../theta-l_kokkos/cxx/RemapStateProvider.hpp | 117 +--------- .../cxx/cxx_f90_interface_theta.cpp | 29 +-- 20 files changed, 107 insertions(+), 467 deletions(-) diff --git a/components/homme/CMakeLists.txt b/components/homme/CMakeLists.txt index 92f36725118..6fe81180ab5 100644 --- a/components/homme/CMakeLists.txt +++ b/components/homme/CMakeLists.txt @@ -206,7 +206,6 @@ IF (HOMME_USE_KOKKOS) STRING (TOUPPER ${HOMMEXX_EXEC_SPACE} HOMMEXX_EXEC_SPACE_UPPER) - #not user afaik IF (${HOMMEXX_EXEC_SPACE_UPPER} STREQUAL "SYCL") SET (HOMMEXX_SYCL_SPACE ON) ELSEIF (${HOMMEXX_EXEC_SPACE_UPPER} STREQUAL "HIP") @@ -453,11 +452,8 @@ if(HOMME_BUILD_EXECS AND NOT BUILD_HOMME_WITHOUT_PIOLIBRARY) ENDIF () ENDIF () -# If we don't need kokkos we don't need EKAT, and if -# Homme is built in EAMxx EKAT is already built -if("${E3SM_KOKKOS_PATH}" STREQUAL "") -IF (HOMME_USE_KOKKOS AND HOMME_STANDALONE) - # Add ekat's cmake/pkg_build folder to cmake path +IF (HOMME_USE_KOKKOS) + # Add ekat's cmake scripts folders to cmake path set (EKAT_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../../externals/ekat) set (EKAT_CMAKE_PATH ${EKAT_SOURCE_DIR}/cmake) list(APPEND CMAKE_MODULE_PATH @@ -473,11 +469,6 @@ IF (HOMME_USE_KOKKOS AND HOMME_STANDALONE) include (EkatBuildKokkos) endif() ENDIF () -ELSE () - IF (${HOMME_USE_KOKKOS}) - INCLUDE(Kokkos) - ENDIF () -ENDIF () # This folder contains the CMake macro used to build cxx unit tests # Add unit tests for C++ code diff --git a/components/homme/cmake/HommeMacros.cmake b/components/homme/cmake/HommeMacros.cmake index 1a49c27e852..5610947cb29 100644 --- a/components/homme/cmake/HommeMacros.cmake +++ b/components/homme/cmake/HommeMacros.cmake @@ -112,7 +112,13 @@ macro(createTestExec execName execType macroNP macroNC ADD_DEFINITIONS(-DHAVE_CONFIG_H) ADD_EXECUTABLE(${execName} ${EXEC_SOURCES}) - SET_TARGET_PROPERTIES(${execName} PROPERTIES LINKER_LANGUAGE CXX) + + if(SUNSPOT_MACHINE) + SET_TARGET_PROPERTIES(${execName} PROPERTIES LINKER_LANGUAGE CXX) + else() + SET_TARGET_PROPERTIES(${execName} PROPERTIES LINKER_LANGUAGE Fortran) + endif() + IF(BUILD_HOMME_WITHOUT_PIOLIBRARY) TARGET_COMPILE_DEFINITIONS(${execName} PUBLIC HOMME_WITHOUT_PIOLIBRARY) ENDIF() @@ -169,8 +175,8 @@ macro(createTestExec execName execType macroNP macroNC TARGET_LINK_LIBRARIES(${execName} -mkl) ELSE() IF (NOT HOMME_FIND_BLASLAPACK) - #TARGET_LINK_LIBRARIES(${execName} lapack blas) - #ADD_DEPENDENCIES(${execName} blas lapack) + TARGET_LINK_LIBRARIES(${execName} lapack blas) + ADD_DEPENDENCIES(${execName} blas lapack) ENDIF() ENDIF() diff --git a/components/homme/src/prim_main.F90 b/components/homme/src/prim_main.F90 index 1d7f48e95a1..d6901151d36 100644 --- a/components/homme/src/prim_main.F90 +++ b/components/homme/src/prim_main.F90 @@ -20,7 +20,7 @@ program prim_main use element_mod, only: element_t use common_io_mod, only: output_dir, infilenames use common_movie_mod, only: nextoutputstep - use perf_mod, only: t_initf, t_prf, t_finalizef, t_startf, t_stopf,t_disablef, t_enablef ! _EXTERNAL + use perf_mod, only: t_initf, t_prf, t_finalizef, t_startf, t_stopf, t_disablef, t_enablef ! _EXTERNAL use restart_io_mod , only: restartheader_t, writerestart use hybrid_mod, only: hybrid_create #if (defined MODEL_THETA_L && defined ARKODE) @@ -241,7 +241,7 @@ end subroutine finalize_kokkos_f90 nstep = nextoutputstep(tl) do while(tl%nstep= 2) call t_enablef() diff --git a/components/homme/src/share/cxx/ExecSpaceDefs.cpp b/components/homme/src/share/cxx/ExecSpaceDefs.cpp index 3e337b15f38..c9ca8a0ecd9 100644 --- a/components/homme/src/share/cxx/ExecSpaceDefs.cpp +++ b/components/homme/src/share/cxx/ExecSpaceDefs.cpp @@ -177,7 +177,7 @@ team_num_threads_vectors_for_gpu ( } #else return std::make_pair(4,16); -#endif +#endif } } // namespace Parallel diff --git a/components/homme/src/share/cxx/ExecSpaceDefs.hpp b/components/homme/src/share/cxx/ExecSpaceDefs.hpp index efb457a6317..82f5e803801 100644 --- a/components/homme/src/share/cxx/ExecSpaceDefs.hpp +++ b/components/homme/src/share/cxx/ExecSpaceDefs.hpp @@ -8,6 +8,9 @@ #define HOMMEXX_EXEC_SPACE_DEFS_HPP #include +#ifdef HOMMEXX_BFB_TESTING +#include +#endif #include @@ -65,12 +68,6 @@ using Hommexx_Serial = void; # define HOMMEXX_STATIC static #endif - -// a hack to have a cpu build without rebuilding kokkos -//#define HOMMEXX_SERIAL_SPACE - - - // Selecting the execution space. If no specific request, use Kokkos default // exec space #ifdef HOMMEXX_ENABLE_GPU @@ -353,7 +350,7 @@ struct Dispatch { }); // Broadcast result to all threads by doing sum of one thread's // non-0 value and the rest of the 0s. - Kokkos::Impl::CudaTeamMember::vector_reduce( + Kokkos::TeamPolicy::member_type::vector_reduce( Kokkos::Sum(local_tmp)); result = local_tmp; #else @@ -381,6 +378,21 @@ struct Dispatch { lambda, result); } +#ifdef HOMMEXX_BFB_TESTING + // Template for getting the type of the second argument to a lambda + private: + template struct arg2; + + template + struct arg2 + { + using type = typename std::remove_reference< + typename std::tuple_element<1,std::tuple>::type + >::type; + }; + public: +#endif + template static KOKKOS_FORCEINLINE_FUNCTION void parallel_scan ( @@ -393,11 +405,7 @@ struct Dispatch { // serialize parallel scans. // Detect the value type - using value_type = - typename Kokkos::Impl::FunctorAnalysis - < Kokkos::Impl::FunctorPatternInterface::SCAN - , void - , Lambda >::value_type ; + using value_type = typename arg2::type; // All threads init result. value_type accumulator = Kokkos::reduction_identity::sum(); diff --git a/components/homme/src/share/cxx/SimulationParams.hpp b/components/homme/src/share/cxx/SimulationParams.hpp index 63d7a109f44..4f36962b16c 100644 --- a/components/homme/src/share/cxx/SimulationParams.hpp +++ b/components/homme/src/share/cxx/SimulationParams.hpp @@ -42,7 +42,7 @@ struct SimulationParams bool disable_diagnostics; int transport_alg; bool use_cpstar; - int theta_hydrostatic_mode; // Only for theta model + bool theta_hydrostatic_mode; // Only for theta model double dcmip16_mu; // Only for theta model double nu; @@ -105,7 +105,7 @@ inline void SimulationParams::print (std::ostream& out) { out << " use_cpstar: " << (use_cpstar ? "yes" : "no") << "\n"; out << " transport_alg: " << transport_alg << "\n"; out << " disable_diagnostics: " << (disable_diagnostics ? "yes" : "no") << "\n"; - out << " theta_hydrostatic_mode: " << ( (bool)theta_hydrostatic_mode ? "yes" : "no") << "\n"; + out << " theta_hydrostatic_mode: " << (theta_hydrostatic_mode ? "yes" : "no") << "\n"; out << " prescribed_wind: " << (prescribed_wind ? "yes" : "no") << "\n"; out << " nsplit: " << nsplit << "\n"; out << " scale_factor: " << scale_factor << "\n"; diff --git a/components/homme/src/share/cxx/SphereOperators.hpp b/components/homme/src/share/cxx/SphereOperators.hpp index e8571c57f3b..c227d97ea70 100644 --- a/components/homme/src/share/cxx/SphereOperators.hpp +++ b/components/homme/src/share/cxx/SphereOperators.hpp @@ -244,8 +244,6 @@ class SphereOperators kv.team_barrier(); } - - KOKKOS_INLINE_FUNCTION void divergence_sphere_wk_sl (const KernelVariables &kv, const ExecViewUnmanaged& v, @@ -298,102 +296,6 @@ class SphereOperators } // end of divergence_sphere_wk_sl - - - - -#if 0 - KOKKOS_INLINE_FUNCTION void - divergence_sphere_wk_sl (const KernelVariables &kv, - const ExecViewUnmanaged& v, - const ExecViewUnmanaged< Real [NP][NP]>& div_v) const - { - // Make sure the buffers have been created - assert (vector_buf_sl.size()>0); - - const auto& D_inv = Homme::subview(m_dinv,kv.ie); - const auto& spheremp = Homme::subview(m_spheremp,kv.ie); - const auto& gv_buf = Homme::subview(vector_buf_sl,kv.team_idx,0); - - // copied from strong divergence as is but without metdet - // conversion to contravariant - - double * ggv = &gv_buf(0,0,0); - - const int s1 = &v(1,0,0)-&v(0,0,0); - const int s2 = &v(0,1,0)-&v(0,0,0); - const int s3 = &v(0,0,1)-&v(0,0,0); - - //not sure we can reuse strides above, so using new ones - const int d1 = &D_inv(1,0,0,0)-&D_inv(0,0,0,0); - const int d2 = &D_inv(0,1,0,0)-&D_inv(0,0,0,0); - const int d3 = &D_inv(0,0,1,0)-&D_inv(0,0,0,0); - const int d4 = &D_inv(0,0,0,1)-&D_inv(0,0,0,0); - - constexpr int np_squared = NP * NP; - Kokkos::parallel_for(Kokkos::TeamThreadRange(kv.team, np_squared), - [&](const int loop_idx) { - const int igp = loop_idx / NP; - const int jgp = loop_idx % NP; - - int linind1 = s1 * 0 + s2 * igp + s3 * jgp; - const auto& vv0 = (&v(0,0,0) + linind1); - int linind2 = s1 * 1 + s2 * igp + s3 * jgp; - const auto& vv1 = (&v(0,0,0) + linind2); - - int linind3 = d1 * 0 + d2 * 0 + d3 * igp + d4 * jgp; - int linind4 = d1 * 1 + d2 * 0 + d3 * igp + d4 * jgp; - *(&gv_buf(0,0,0)+linind1) = *(&D_inv(0,0,0,0)+linind3) * (*vv0) + *(&D_inv(0,0,0,0)+linind4) * (*vv1); - - linind3 = d1 * 0 + d2 * 1 + d3 * igp + d4 * jgp; - linind4 = d1 * 1 + d2 * 1 + d3 * igp + d4 * jgp; - *(&gv_buf(0,0,0)+linind2) = *(&D_inv(0,0,0,0)+linind3) * (*vv0) + *(&D_inv(0,0,0,0)+linind4) * (*vv1); - - }); - kv.team_barrier(); - - // in strong div - // kgp = i in strong code, jgp=j, igp=l - // in weak div, n is like j in strong div, - // n(weak)=j(strong)=jgp - // m(weak)=l(strong)=igp - // j(weak)=i(strong)=kgp - constexpr int div_iters = NP * NP; - // keeping indices' names as in F - - //gv_buf strides are as before, s1 s2 s3 - //dvv, div_v, and spheremp should have the same strides - const int f1 = &dvv(1,0)-&dvv(0,0); - const int f2 = &dvv(0,1)-&dvv(0,0); - - Kokkos::parallel_for(Kokkos::TeamThreadRange(kv.team, div_iters), - [&](const int loop_idx) { - // Note: for this one time, it is better if m strides faster, due to - // the way the views are accessed. - const int mgp = loop_idx % NP; - const int ngp = loop_idx / NP; - Real dd = 0.0; - for (int jgp = 0; jgp < NP; ++jgp) { - int linind1 = s1 * 0 + s2 * ngp + s3 * jgp; - int linind2 = s1 * 1 + s2 * jgp + s3 * mgp; - - int l1 = f1 * ngp + f2 * jgp; - int l2 = f1 * jgp + f2 * mgp; - int l3 = f1 * jgp + f2 * ngp; - - dd -= ( *(&spheremp(0,0)+l1) * *(&gv_buf(0,0,0)+linind1) * *(&dvv(0,0)+l2) + - *(&spheremp(0,0)+l2) * *(&gv_buf(0,0,0)+linind2) * *(&dvv(0,0)+l3)) * - m_scale_factor_inv; - } - int l1 = f1 * ngp + f2 * mgp; - *(&div_v(0,0)+l1) = dd; - }); - kv.team_barrier(); - - } // end of divergence_sphere_wk_sl -#endif - - // Note that divergence_sphere requires scratch space of 3 x NP x NP Reals // This must be called from the device space KOKKOS_INLINE_FUNCTION void @@ -813,116 +715,6 @@ class SphereOperators vorticity_sphere(kv, v, vort, NUM_LEV_REQUEST); } - - - -#if 0 - - template - KOKKOS_INLINE_FUNCTION void - divergence_sphere_wk (const KernelVariables &kv, - // On input, a field whose divergence is sought; on - // output, the view's data are invalid. - const ExecViewUnmanaged& v, - const ExecViewUnmanaged& div_v, - const int NUM_LEV_REQUEST) const - { - assert(NUM_LEV_REQUEST>=0); - assert(NUM_LEV_REQUEST<=NUM_LEV_IN); - assert(NUM_LEV_REQUEST<=NUM_LEV_OUT); - - // Make sure the buffers have been created - assert (vector_buf_ml.size()>0); - - const auto& D_inv = Homme::subview(m_dinv, kv.ie); - const auto& spheremp = Homme::subview(m_spheremp, kv.ie); - constexpr int np_squared = NP * NP; - - const int s1 = &v(1,0,0,0)[0]-&v(0,0,0,0)[0]; - const int s2 = &v(0,1,0,0)[0]-&v(0,0,0,0)[0]; - const int s3 = &v(0,0,1,0)[0]-&v(0,0,0,0)[0]; - const int s4 = &v(0,0,0,1)[0]-&v(0,0,0,0)[0]; - - const int d1 = &D_inv(1,0,0,0)-&D_inv(0,0,0,0); - const int d2 = &D_inv(0,1,0,0)-&D_inv(0,0,0,0); - const int d3 = &D_inv(0,0,1,0)-&D_inv(0,0,0,0); - const int d4 = &D_inv(0,0,0,1)-&D_inv(0,0,0,0); - - Real * const vv = &v(0,0,0,0)[0]; - const Real * const dd = &D_inv(0,0,0,0); - - Kokkos::parallel_for(Kokkos::TeamThreadRange(kv.team, np_squared), - [&](const int loop_idx) { - const int igp = loop_idx / NP; - const int jgp = loop_idx % NP; - Kokkos::parallel_for(Kokkos::ThreadVectorRange(kv.team, NUM_LEV_REQUEST), [&] (const int& ilev) { - - const int l1 = s1*0 + s2*igp + s3*jgp + s4*ilev; - const int l2 = s1*1 + l1; - const Real v0old = vv[l1]; - const Real v1old = vv[l2]; - - int l3 = d1*0 + d2*0 + d3*igp + d4*jgp; - int l4 = d1*1 + d2*0 + d3*igp + d4*jgp; - - vv[l1] = dd[l3] * v0old + dd[l4] * v1old; - - l3 = d1*0 + d2*1 + d3*igp + d4*jgp; - l4 = d1*1 + d2*1 + d3*igp + d4*jgp; - - vv[l2] = dd[l3] * v0old + dd[l4] * v1old; - - }); - }); - kv.team_barrier(); - - const int f1 = &dvv(1,0)-&dvv(0,0); - const int f2 = &dvv(0,1)-&dvv(0,0); - - const Real * const ss = &spheremp(0,0); - const Real * const ddv = &dvv(0,0); - - const int k1 = &div_v(1,0,0)[0]-&div_v(0,0,0)[0]; - const int k2 = &div_v(0,1,0)[0]-&div_v(0,0,0)[0]; - const int k3 = &div_v(0,0,1)[0]-&div_v(0,0,0)[0]; - - constexpr int div_iters = NP * NP; - Kokkos::parallel_for(Kokkos::TeamThreadRange(kv.team, div_iters), - [&](const int loop_idx) { - // Note: for this one time, it is better if m strides faster, due to - // the way the views are accessed. - const int mgp = loop_idx % NP; - const int ngp = loop_idx / NP; - Kokkos::parallel_for(Kokkos::ThreadVectorRange(kv.team, NUM_LEV_REQUEST), [&] (const int& ilev) { - Real dd = 0.0; - // TODO: move multiplication by scale_factor_inv outside the loop - for (int jgp = 0; jgp < NP; ++jgp) { - // Here, v is the temporary buffer, aliased on the input v. - - const int l1 = s1*0 + s2*ngp + s3*jgp + s4*ilev; - const int l2 = s1*1 + s2*jgp + s3*mgp + s4*ilev; - - const int x1 = f1 * ngp + f2 * jgp; - const int x2 = f1 * jgp + f2 * mgp; - const int x3 = f1 * jgp + f2 * ngp; - - dd -= (ss[x1] * vv[l1] * ddv[x2] + - ss[x2] * vv[l2] * ddv[x3]) * - m_scale_factor_inv; - } - //div_v(ngp, mgp, ilev) = dd; - const int l1 = k1 * ngp + k2 * mgp + k3 * ilev; - *(&div_v(0,0,0)[0]+l1) = dd; - }); - }); - kv.team_barrier(); - - }//end of divergence_sphere_wk - -#else - - - template KOKKOS_INLINE_FUNCTION void divergence_sphere_wk (const KernelVariables &kv, @@ -978,13 +770,6 @@ class SphereOperators }//end of divergence_sphere_wk - -#endif - - - - - template KOKKOS_INLINE_FUNCTION void divergence_sphere_wk (const KernelVariables &kv, diff --git a/components/homme/src/share/cxx/utilities/BfbUtils.hpp b/components/homme/src/share/cxx/utilities/BfbUtils.hpp index 2d85109e2a2..7fb4d042f7f 100644 --- a/components/homme/src/share/cxx/utilities/BfbUtils.hpp +++ b/components/homme/src/share/cxx/utilities/BfbUtils.hpp @@ -64,11 +64,7 @@ KOKKOS_INLINE_FUNCTION ScalarType int_pow (ScalarType val, int k) { constexpr int max_shift = 30; if (k<0) { -#ifdef KOKKOS_ENABLE_SYCL Kokkos::printf ("k = %d\n",k); -#else - printf ("k = %d\n",k); -#endif Kokkos::abort("int_pow implemented only for k>=0.\n"); } diff --git a/components/homme/src/share/gllfvremap_mod.F90 b/components/homme/src/share/gllfvremap_mod.F90 index 1628d128602..e927f04aba0 100644 --- a/components/homme/src/share/gllfvremap_mod.F90 +++ b/components/homme/src/share/gllfvremap_mod.F90 @@ -270,9 +270,8 @@ subroutine gfr_init_hxx() bind(c) interface subroutine init_gllfvremap_c(nelemd, np, nf, nf_max, theta_hydrostatic_mode_integer, & fv_metdet, g2f_remapd, f2g_remapd, D_f, Dinv_f) bind(c) - use iso_c_binding, only: c_bool, c_int, c_double + use iso_c_binding, only: c_int, c_double integer (c_int), value, intent(in) :: nelemd, np, nf, nf_max - !logical (c_bool), value, intent(in) :: theta_hydrostatic_mode integer (c_int), value, intent(in) :: theta_hydrostatic_mode_integer real (c_double), dimension(nf*nf,nelemd), intent(in) :: fv_metdet real (c_double), dimension(np,np,nf_max*nf_max), intent(in) :: g2f_remapd @@ -281,7 +280,6 @@ subroutine init_gllfvremap_c(nelemd, np, nf, nf_max, theta_hydrostatic_mode_inte end subroutine init_gllfvremap_c end interface integer (c_int) :: thm - !logical (c_bool) :: thm thm = theta_hydrostatic_mode_integer call init_gllfvremap_c(nelemd, np, gfr%nphys, nphys_max, thm, & gfr%fv_metdet, gfr%g2f_remapd, gfr%f2g_remapd, gfr%D_f, gfr%Dinv_f) @@ -995,7 +993,7 @@ subroutine gfr_init_R(np, nphys, w_gg, M_gf, R, tau) end do end do end do -! call dgeqrf(np*np, nphys*nphys, R, size(R,1), tau, wrk, np*np*nphys*nphys, info) + call dgeqrf(np*np, nphys*nphys, R, size(R,1), tau, wrk, np*np*nphys*nphys, info) end subroutine gfr_init_R subroutine gfr_init_interp_matrix(npsrc, interp) @@ -1077,13 +1075,12 @@ subroutine gfr_f2g_remapd_op(gfr, R, tau, f, g) ! g = inv(M_sgsg) M_sgf inv(S) M_ff f wrk = reshape(gfr%w_ff(:nf2), (/nf,nf/))*f(:nf,:nf) if (nf == npi) then - -! call dtrsm('l', 'u', 't', 'n', nf2, 1, one, R, size(R,1), wrk, nf2) -! call dormqr('l', 'n', nf2, 1, nf2, R, size(R,1), tau, wrk, nf2, wr, np2, info) + call dtrsm('L', 'U', 'T', 'N', nf2, 1, one, R, size(R,1), wrk, nf2) + call dormqr('L', 'N', nf2, 1, nf2, R, size(R,1), tau, wrk, nf2, wr, np2, info) g(:npi,:npi) = wrk else -! call dtrtrs('u', 't', 'n', nf2, 1, R, size(R,1), wrk, nf2, info) -! call dtrtrs('u', 'n', 'n', nf2, 1, R, size(R,1), wrk, nf2, info) + call dtrtrs('U', 'T', 'N', nf2, 1, R, size(R,1), wrk, nf2, info) + call dtrtrs('U', 'N', 'N', nf2, 1, R, size(R,1), wrk, nf2, info) g(:npi,:npi) = zero do fj = 1,nf do fi = 1,nf @@ -1652,7 +1649,7 @@ subroutine gfr_pg1_init(gfr) n = np*np -! call dpotrf('u', n, gfr%pg1sd%Achol, size(gfr%pg1sd%Achol,1), info) + call dpotrf('U', n, gfr%pg1sd%Achol, size(gfr%pg1sd%Achol,1), info) if (info /= 0) print *, 'gfr ERROR> dpotrf returned', info do i = 1,n @@ -1663,8 +1660,8 @@ subroutine gfr_pg1_init(gfr) gfr%pg1sd%s = reshape(gfr%w_gg(:np,:np), (/np*np/)) ! Form R's = c -! call dtrtrs('u', 't', 'n', n, 1, gfr%pg1sd%Achol, size(gfr%pg1sd%Achol,1), & -! gfr%pg1sd%s, np*np, info) + call dtrtrs('U', 'T', 'N', n, 1, gfr%pg1sd%Achol, size(gfr%pg1sd%Achol,1), & + gfr%pg1sd%s, np*np, info) if (info /= 0) print *, 'gfr ERROR> dtrtrs returned', info gfr%pg1sd%sts = sum(gfr%pg1sd%s*gfr%pg1sd%s) end subroutine gfr_pg1_init @@ -1697,11 +1694,11 @@ subroutine gfr_pg1_solve(gfr, s, g) mass = sum(gfr%w_gg*g) ! Solve R'z = b. -! call dtrtrs('u', 't', 'n', n, 1, s%Achol, size(s%Achol,1), x, np*np, info) + call dtrtrs('U', 'T', 'N', n, 1, s%Achol, size(s%Achol,1), x, np*np, info) ! Assemble z + (d - s'z)/(s's) s. x(:n) = x(:n) + ((mass - sum(s%s(:n)*x(:n)))/s%sts)*s%s(:n) ! Solve R x = z + (d - s'z)/(s's) s. -! call dtrtrs('u', 'n', 'n', n, 1, s%Achol, size(s%Achol,1), x, np*np, info) + call dtrtrs('U', 'N', 'N', n, 1, s%Achol, size(s%Achol,1), x, np*np, info) ! Extract g(I). g = reshape(x(:n), (/np,np/)) diff --git a/components/homme/src/share/namelist_mod.F90 b/components/homme/src/share/namelist_mod.F90 index 8dcceca6652..a3edaa07e23 100644 --- a/components/homme/src/share/namelist_mod.F90 +++ b/components/homme/src/share/namelist_mod.F90 @@ -41,7 +41,7 @@ module namelist_mod runtype, & integration, & ! integration method theta_hydrostatic_mode, & - theta_hydrostatic_mode_integer, & + theta_hydrostatic_mode_integer, & transport_alg , & ! SE Eulerian, classical SL, cell-integrated SL semi_lagrange_cdr_alg, & ! see control_mod for semi_lagrange_* descriptions semi_lagrange_cdr_check, & diff --git a/components/homme/src/share/viscosity_base.F90 b/components/homme/src/share/viscosity_base.F90 index 968e3e63c01..9c42f57158f 100644 --- a/components/homme/src/share/viscosity_base.F90 +++ b/components/homme/src/share/viscosity_base.F90 @@ -582,6 +582,8 @@ subroutine smooth_phis(phis,elem,hybrid,deriv,nets,nete,minf,numcycle,p2filt,xgl real (kind=real_kind), dimension(nets:nete) :: pmin,pmax real (kind=real_kind) :: phis4(np) integer :: nt,ie,ic,i,j + integer :: minmax_halo =-1 ! -1 = disabled. + ! 0 = recompute each time if (p2filt>=1 .and. np/=4) then call abortmp('ERROR: topo smoothing p2 filter option only supported with np==4') @@ -593,34 +595,42 @@ subroutine smooth_phis(phis,elem,hybrid,deriv,nets,nete,minf,numcycle,p2filt,xgl ! compute local element neighbor min/max do ie=nets,nete - pstens(:,:,ie)=minval(phis(:,:,ie)) - call edgeVpack(edgebuf,pstens(:,:,ie),1,0,ie) + pmin(ie)=minval(phis(:,:,ie)) + pmax(ie)=maxval(phis(:,:,ie)) enddo - call t_startf('smooth_phis_bexchV1') + do ic=1,minmax_halo ! take the min/max over three element halo + do ie=nets,nete + pstens(:,:,ie)=pmin(ie) + call edgeVpack(edgebuf,pstens(:,:,ie),1,0,ie) + enddo call bndry_exchangeV(hybrid,edgebuf) - call t_stopf('smooth_phis_bexchV1') - do ie=nets,nete call edgeVunpackMin(edgebuf, pstens(:,:,ie), 1, 0, ie) pmin(ie)=minval(pstens(:,:,ie)) enddo + do ie=nets,nete - pstens(:,:,ie)=maxval(phis(:,:,ie)) + pstens(:,:,ie)=pmax(ie) call edgeVpack(edgebuf,pstens(:,:,ie),1,0,ie) enddo - - call t_startf('smooth_phis_bexchV2') call bndry_exchangeV(hybrid,edgebuf) - call t_stopf('smooth_phis_bexchV2') - do ie=nets,nete call edgeVunpackMax(edgebuf, pstens(:,:,ie), 1, 0, ie) pmax(ie)=maxval(pstens(:,:,ie)) enddo + enddo + do ic=1,numcycle + ! recompute halo each step? + !if (minmax_halo==0) then + ! do ie=nets,nete + ! pmin(ie)=minval(phis(:,:,ie)) + ! pmax(ie)=maxval(phis(:,:,ie)) + ! enddo + !endif if (p2filt>=1) then ! apply p2 filter before laplace do ie=nets,nete @@ -652,16 +662,16 @@ subroutine smooth_phis(phis,elem,hybrid,deriv,nets,nete,minf,numcycle,p2filt,xgl smooth_phis_nudt*pstens(:,:,ie)/elem(ie)%spheremp(:,:) -#if 0 + if (minmax_halo>=0) then ! remove new extrema. could use conservative reconstruction from advection ! but no reason to conserve mean PHI. do i=1,np do j=1,np - if (phis(i,j,ie)>mx) phis(i,j,ie)=pmax(ie) - if (phis(i,j,ie)pmax(ie)) phis(i,j,ie)=pmax(ie) + if (phis(i,j,ie) 0) check_print_abort_on_bad_elems("CaarFunctorImpl::run TagPreExchange", data.n0); - - - GPTLstart("caar_bexchV"); m_bes[data.np1]->exchange(m_geometry.m_rspheremp); Kokkos::fence(); @@ -380,15 +377,12 @@ struct CaarFunctorImpl { KernelVariables kv(team, m_tu); - // Kokkos::printf("OG before div_vdp\n"); - // =========== EPOCH 1 =========== // compute_div_vdp(kv); // =========== EPOCH 2 =========== // kv.team_barrier(); -// Kokkos::printf("OG before div_vdp\n"); // Computes pi, omega, and phi. const bool ok = compute_scan_quantities(kv); if ( ! ok) nerr = 1; @@ -397,7 +391,6 @@ struct CaarFunctorImpl { // ============ EPOCH 2.1 =========== // kv.team_barrier(); compute_interface_quantities(kv); -// Kokkos::printf("OG nonhydro \n"); } if (m_rsplit==0) { @@ -406,7 +399,6 @@ struct CaarFunctorImpl { compute_vertical_advection(kv); } -// Kokkos::printf("OG before accum \n"); // ============= EPOCH 3 ============== // kv.team_barrier(); compute_accumulated_quantities(kv); diff --git a/components/homme/src/theta-l_kokkos/cxx/DirkFunctorImpl.hpp b/components/homme/src/theta-l_kokkos/cxx/DirkFunctorImpl.hpp index 44a8af7fb70..d1676907972 100644 --- a/components/homme/src/theta-l_kokkos/cxx/DirkFunctorImpl.hpp +++ b/components/homme/src/theta-l_kokkos/cxx/DirkFunctorImpl.hpp @@ -382,12 +382,8 @@ struct DirkFunctorImpl { kv.team_barrier(); if (it >= maxiter) { -#ifdef KOKKOS_ENABLE_SYCL Kokkos::printf("[DIRK] WARNING! Newton reached max iteration count," -#else - printf("[DIRK] WARNING! Newton reached max iteration count," -#endif - " with deltaerr = %3.17f\n", deltaerr); + " with deltaerr = %3.17f\n", deltaerr); nerr = 1; } diff --git a/components/homme/src/theta-l_kokkos/cxx/EquationOfState.hpp b/components/homme/src/theta-l_kokkos/cxx/EquationOfState.hpp index 99732ee640a..a50a28d58f5 100644 --- a/components/homme/src/theta-l_kokkos/cxx/EquationOfState.hpp +++ b/components/homme/src/theta-l_kokkos/cxx/EquationOfState.hpp @@ -250,7 +250,7 @@ class EquationOfState { public: - int m_theta_hydrostatic_mode; + int m_theta_hydrostatic_mode; HybridVCoord m_hvcoord; }; diff --git a/components/homme/src/theta-l_kokkos/cxx/HyperviscosityFunctorImpl.cpp b/components/homme/src/theta-l_kokkos/cxx/HyperviscosityFunctorImpl.cpp index 24750a570a9..ecde17b5b8d 100644 --- a/components/homme/src/theta-l_kokkos/cxx/HyperviscosityFunctorImpl.cpp +++ b/components/homme/src/theta-l_kokkos/cxx/HyperviscosityFunctorImpl.cpp @@ -120,12 +120,7 @@ void HyperviscosityFunctorImpl::init_params(const SimulationParams& params) #ifdef HOMMEXX_BFB_TESTING m_process_nh_vars = 1; #else - //m_process_nh_vars = !params.theta_hydrostatic_mode; - if (params.theta_hydrostatic_mode){ - m_process_nh_vars = 0; - }else{ - m_process_nh_vars = 1; - } + m_process_nh_vars = static_cast (not params.theta_hydrostatic_mode); #endif } diff --git a/components/homme/src/theta-l_kokkos/cxx/LimiterFunctor.hpp b/components/homme/src/theta-l_kokkos/cxx/LimiterFunctor.hpp index 3e3f8c15d25..7914c0a60e3 100644 --- a/components/homme/src/theta-l_kokkos/cxx/LimiterFunctor.hpp +++ b/components/homme/src/theta-l_kokkos/cxx/LimiterFunctor.hpp @@ -141,12 +141,8 @@ struct LimiterFunctor { [&](const int k,Real& result) { #ifndef HOMMEXX_BFB_TESTING if(diff_as_real(k) < 0){ -#ifdef KOKKOS_ENABLE_SYCL Kokkos::printf("WARNING:CAAR: dp3d too small. k=%d, dp3d(k)=%f, dp0=%f \n", -#else - printf("WARNING:CAAR: dp3d too small. k=%d, dp3d(k)=%f, dp0=%f \n", -#endif - k+1,dp_as_real(k),dp0_as_real(k)); + k+1,dp_as_real(k),dp0_as_real(k)); } #endif result = result<=diff_as_real(k) ? result : diff_as_real(k); @@ -206,12 +202,8 @@ struct LimiterFunctor { for (int ivec=0; ivec>>>>>>>>>>> m_process_nh_vars " << m_process_nh_vars << " \n"; - std::cout << ">>>>>>>>>>>> m_process_nh_vars_bool " << m_process_nh_vars_bool << " \n"; -if(m_process_nh_vars){ - std::cout << "hey m_process_nh_vars is true \n"; -}else{ - std::cout << "hey m_process_nh_vars is false \n"; -} - -if(m_process_nh_vars_bool){ - std::cout << "hey m_process_nh_vars_bool is true \n"; -}else{ - std::cout << "hey m_process_nh_vars_bool is false \n"; -} - -////////////////////////// put abort if bool assignment failed - - -//if(params.theta_hydrostatic_mode && m_process_nh_vars_bool) -//Kokkos::abort("BOOL assignment failed, (params.theta_hydrostatic_mode && m_process_nh_vars_bool) == TRUE.\n"); - - - - - if (m_process_nh_vars) { - -std::cout << "INSIDE w phi assignment m_process_nh_vars is true \n"; - m_delta_w = decltype(m_delta_w) ("w_i increments",elements.num_elems()); m_delta_phinh = decltype(m_delta_phinh) ("phinh_i increments",elements.num_elems()); } -if(m_process_nh_vars){ - std::cout << "2hey m_process_nh_vars is true \n"; -}else -{ - std::cout << "2hey m_process_nh_vars is false \n"; -} - m_hvcoord = Context::singleton().get(); assert (m_hvcoord.m_inited); -if(m_process_nh_vars){ - std::cout << "3hey m_process_nh_vars is true \n"; -}else -{ - std::cout << "3hey m_process_nh_vars is false \n"; -} - m_eos.init(params.theta_hydrostatic_mode,m_hvcoord); m_elem_ops.init(m_hvcoord); - - if(m_process_nh_vars){ - std::cout << "4hey m_process_nh_vars is true \n"; -}else -{ - std::cout << "4hey m_process_nh_vars is false \n"; -} - - } + } int requested_buffer_size (int num_teams) const { - -if(m_process_nh_vars){ - std::cout << "IN REQUESTED hey m_process_nh_vars is true \n"; -}else -{ - std::cout << "IN REQUESTED hey m_process_nh_vars is false \n"; -} - if (!m_process_nh_vars) { - //if (m_process_nh_vars==0) { return 0; } @@ -169,18 +81,8 @@ if(m_process_nh_vars){ } void init_buffers(const FunctorsBuffersManager& fbm, int num_teams) { - -if(m_process_nh_vars){ - std::cout << "IN BUFFERS hey m_process_nh_vars is true \n"; -}else -{ - std::cout << "IN BUFFERS hey m_process_nh_vars is false \n"; -} - if (!m_process_nh_vars) { - - std::cout << "hey we should be returning from init_buffers \n"; - return; + return; } Scalar* mem = reinterpret_cast(fbm.get_memory()); @@ -193,20 +95,17 @@ if(m_process_nh_vars){ KOKKOS_INLINE_FUNCTION int num_states_remap() const { - //return (m_process_nh_vars ? 5 : 3); - return ( (m_process_nh_vars) ? 5 : 3); + return (m_process_nh_vars ? 5 : 3); } KOKKOS_INLINE_FUNCTION int num_states_preprocess() const { - //return (m_process_nh_vars ? 2 : 0); - return ( (m_process_nh_vars) ? 2 : 0); + return (m_process_nh_vars ? 2 : 0); } KOKKOS_INLINE_FUNCTION int num_states_postprocess() const { - //return (m_process_nh_vars ? 2 : 0); - return ((m_process_nh_vars) ? 2 : 0); + return (m_process_nh_vars ? 2 : 0); } KOKKOS_INLINE_FUNCTION diff --git a/components/homme/src/theta-l_kokkos/cxx/cxx_f90_interface_theta.cpp b/components/homme/src/theta-l_kokkos/cxx/cxx_f90_interface_theta.cpp index de51e52a793..40c4ae64dc9 100644 --- a/components/homme/src/theta-l_kokkos/cxx/cxx_f90_interface_theta.cpp +++ b/components/homme/src/theta-l_kokkos/cxx/cxx_f90_interface_theta.cpp @@ -50,17 +50,7 @@ void init_simulation_params_c (const int& remap_alg, const int& limiter_option, const double& dp3d_thresh, const double& vtheta_thresh, const int& internal_diagnostics_level) { - std::cout << "In transfer routine theta_hydrostatic_mode =" << theta_hydrostatic_mode << "\n"; - - -if(theta_hydrostatic_mode){ - std::cout << " HEEEEEEEEEEEtheta_hydrostatic_mode =TRUE \n"; -}else -{ - std::cout << " HEEEEEEEEEEEtheta_hydrostatic_mode =FALSE \n"; -} - - // Check that the simulation options are supported. This helps us in the future, since we + // Check that the simulation options are supported. This helps us in the future, since we // are currently 'assuming' some option have/not have certain values. As we support for more // options in the C++ build, we will remove some checks Errors::check_option("init_simulation_params_c","vert_remap_q_alg",remap_alg,{1,3,10}); @@ -80,7 +70,6 @@ if(theta_hydrostatic_mode){ Errors::check_option("init_simulation_params_c","vtheta_thresh",vtheta_thresh,0.0,Errors::ComparisonOp::GT); Errors::check_option("init_simulation_params_c","nu_div",nu_div,0.0,Errors::ComparisonOp::GT); Errors::check_option("init_simulation_params_c","theta_advection_form",theta_adv_form,{0,1}); - Errors::check_option("init_simulation_params_c","theta_hydrostatic_mode",theta_hydrostatic_mode,{0,1}); #ifndef SCREAM Errors::check_option("init_simulation_params_c","nsplit",nsplit,1,Errors::ComparisonOp::GE); #else @@ -105,13 +94,6 @@ if(theta_hydrostatic_mode){ params.theta_adv_form = AdvectionForm::NonConservative; } -// if (theta_hydrostatic_mode==0) { -// params.theta_hydrostatic_mode = false; -// } else { -// params.theta_hydrostatic_mode = true; -// } - - params.limiter_option = limiter_option; params.rsplit = rsplit; params.qsplit = qsplit; @@ -134,7 +116,7 @@ if(theta_hydrostatic_mode){ params.use_moisture = (bool)use_moisture; params.use_cpstar = (bool)use_cpstar; params.transport_alg = transport_alg; - params.theta_hydrostatic_mode = theta_hydrostatic_mode; + params.theta_hydrostatic_mode = (bool)theta_hydrostatic_mode; params.dcmip16_mu = dcmip16_mu; params.nsplit = nsplit; params.scale_factor = scale_factor; @@ -144,11 +126,6 @@ if(theta_hydrostatic_mode){ params.vtheta_thresh = vtheta_thresh; params.internal_diagnostics_level = internal_diagnostics_level; - - std::cout << "In transfer routine AFTER ASSIGNMENT params.theta_hydrostatic_mode =" << params.theta_hydrostatic_mode << "\n"; - - - if (time_step_type==5) { //5 stage, 3rd order, explicit params.time_step_type = TimeStepType::ttype5; @@ -383,7 +360,7 @@ void init_functors_c (const int& allocate_buffer) auto& hvf = c.create_if_not_there(); auto& ff = c.create_if_not_there(); auto& diag = c.create_if_not_there (elems.num_elems(),tracers.num_tracers(), - (bool)params.theta_hydrostatic_mode); + params.theta_hydrostatic_mode); auto& vrm = c.create_if_not_there(elems.num_elems()); auto& fbm = c.create_if_not_there(); From 0de86a2b3118400f1b5bf782dde4f751e5408624 Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Sun, 8 Sep 2024 20:38:52 +0000 Subject: [PATCH 70/85] partially working switch to flare on aurora --- cime_config/machines/config_batch.xml | 4 +- cime_config/machines/config_machines.xml | 48 +++++++++---------- .../eamxx/cmake/machine-files/aurora.cmake | 14 +++--- .../eamxx/cmake/machine-files/auroracpu.cmake | 22 +++------ 4 files changed, 40 insertions(+), 48 deletions(-) diff --git a/cime_config/machines/config_batch.xml b/cime_config/machines/config_batch.xml index b0a4e8ee692..3abac928ac2 100644 --- a/cime_config/machines/config_batch.xml +++ b/cime_config/machines/config_batch.xml @@ -586,7 +586,7 @@ - /lus/gecko/projects/CSC249ADSE15_CNDA/tools/qsub/throttle + /lus/flare/projects/CSC249ADSE15_CNDA/tools/qsub/throttle EarlyAppAccess workq-route @@ -595,7 +595,7 @@ - /lus/gecko/projects/CSC249ADSE15_CNDA/tools/qsub/throttle + /lus/flare/projects/CSC249ADSE15_CNDA/tools/qsub/throttle EarlyAppAccess workq-route diff --git a/cime_config/machines/config_machines.xml b/cime_config/machines/config_machines.xml index a77c1671b08..dc26bafc0a5 100644 --- a/cime_config/machines/config_machines.xml +++ b/cime_config/machines/config_machines.xml @@ -3753,14 +3753,14 @@ export FI_CXI_DEFAULT_CQ_SIZE=131072 # try avoiding F90 MPI_BCAST errors oneapi-ifx,oneapi-ifxgpu,gnu mpich CSC249ADSE15_CNDA - /lus/gecko/projects/CSC249ADSE15_CNDA/performance_archive + /lus/flare/projects/CSC249ADSE15_CNDA/performance_archive .* - /lus/gecko/projects/CSC249ADSE15_CNDA/$USER/scratch - /lus/gecko/projects/CSC249ADSE15_CNDA/inputdata - /lus/gecko/projects/CSC249ADSE15_CNDA/inputdata/atm/datm7 + /lus/flare/projects/CSC249ADSE15_CNDA/$USER/scratch + /lus/flare/projects/CSC249ADSE15_CNDA/inputdata + /lus/flare/projects/CSC249ADSE15_CNDA/inputdata/atm/datm7 $CIME_OUTPUT_ROOT/archive/$CASE - /lus/gecko/projects/CSC249ADSE15_CNDA/baselines/$COMPILER - /lus/gecko/projects/CSC249ADSE15_CNDA/tools/cprnc/cprnc + /lus/flare/projects/CSC249ADSE15_CNDA/baselines/$COMPILER + /lus/flare/projects/CSC249ADSE15_CNDA/tools/cprnc/cprnc 16 e3sm_developer 4 @@ -3783,7 +3783,7 @@ export FI_CXI_DEFAULT_CQ_SIZE=131072 # try avoiding F90 MPI_BCAST errors - /lus/gecko/projects/CSC249ADSE15_CNDA/modules/lmod.sh + /lus/flare/projects/CSC249ADSE15_CNDA/modules/lmod.sh /soft/sunspot_migrate/soft/packaging/lmod/lmod/init/csh /soft/sunspot_migrate/soft/packaging/lmod/lmod/init/env_modules_python.py module @@ -3811,11 +3811,11 @@ export FI_CXI_DEFAULT_CQ_SIZE=131072 # try avoiding F90 MPI_BCAST errors $CIME_OUTPUT_ROOT/$CASE/run $CIME_OUTPUT_ROOT/$CASE/bld - /lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2023.05.15.007 - /lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2023.05.15.007 - /lus/gecko/projects/CSC249ADSE15_CNDA/software/pnetcdf/1.12.3/oneapi.eng.2023.05.15.007 - /opt/cray/pe/gcc-libs:/opt/cray/pe/python/3.9.13.1/lib:/lus/gecko/projects/CSC249ADSE15_CNDA/software/pnetcdf/1.12.3/oneapi.eng.2023.05.15.007/lib:/lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2023.05.15.007/lib:/lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2023.05.15.007/lib:$ENV{LD_LIBRARY_PATH} - /opt/cray/pe/python/3.9.13.1/bin:/lus/gecko/projects/CSC249ADSE15_CNDA/software/pnetcdf/1.12.3/oneapi.eng.2023.05.15.007/bin:/lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2023.05.15.007/bin:/lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2023.05.15.007/bin:$ENV{PATH} + /lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2023.05.15.007 + /lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2023.05.15.007 + /lus/flare/projects/CSC249ADSE15_CNDA/software/pnetcdf/1.12.3/oneapi.eng.2023.05.15.007 + /opt/cray/pe/gcc-libs:/opt/cray/pe/python/3.9.13.1/lib:/lus/flare/projects/CSC249ADSE15_CNDA/software/pnetcdf/1.12.3/oneapi.eng.2023.05.15.007/lib:/lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2023.05.15.007/lib:/lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2023.05.15.007/lib:$ENV{LD_LIBRARY_PATH} + /opt/cray/pe/python/3.9.13.1/bin:/lus/flare/projects/CSC249ADSE15_CNDA/software/pnetcdf/1.12.3/oneapi.eng.2023.05.15.007/bin:/lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2023.05.15.007/bin:/lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2023.05.15.007/bin:$ENV{PATH} list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203 @@ -3864,14 +3864,14 @@ export FI_CXI_DEFAULT_CQ_SIZE=131072 # try avoiding F90 MPI_BCAST errors oneapi-ifx mpich CSC249ADSE15_CNDA - /lus/gecko/projects/CSC249ADSE15_CNDA/performance_archive + /lus/flare/projects/CSC249ADSE15_CNDA/performance_archive .* - /lus/gecko/projects/CSC249ADSE15_CNDA/$USER/scratch - /lus/gecko/projects/CSC249ADSE15_CNDA/inputdata - /lus/gecko/projects/CSC249ADSE15_CNDA/inputdata/atm/datm7 + /lus/flare/projects/CSC249ADSE15_CNDA/$USER/scratch + /lus/flare/projects/CSC249ADSE15_CNDA/inputdata + /lus/flare/projects/CSC249ADSE15_CNDA/inputdata/atm/datm7 $CIME_OUTPUT_ROOT/archive/$CASE - /lus/gecko/projects/CSC249ADSE15_CNDA/baselines/$COMPILER - /lus/gecko/projects/CSC249ADSE15_CNDA/tools/cprnc/cprnc + /lus/flare/projects/CSC249ADSE15_CNDA/baselines/$COMPILER + /lus/flare/projects/CSC249ADSE15_CNDA/tools/cprnc/cprnc 16 e3sm_developer 4 @@ -3892,7 +3892,7 @@ export FI_CXI_DEFAULT_CQ_SIZE=131072 # try avoiding F90 MPI_BCAST errors - /lus/gecko/projects/CSC249ADSE15_CNDA/modules/lmod.sh + /lus/flare/projects/CSC249ADSE15_CNDA/modules/lmod.sh /soft/sunspot_migrate/soft/packaging/lmod/lmod/init/csh /soft/sunspot_migrate/soft/packaging/lmod/lmod/init/env_modules_python.py module @@ -3916,11 +3916,11 @@ export FI_CXI_DEFAULT_CQ_SIZE=131072 # try avoiding F90 MPI_BCAST errors $CIME_OUTPUT_ROOT/$CASE/run $CIME_OUTPUT_ROOT/$CASE/bld - /lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2023.05.15.007 - /lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2023.05.15.007 - /lus/gecko/projects/CSC249ADSE15_CNDA/software/pnetcdf/1.12.3/oneapi.eng.2023.05.15.007 - /opt/cray/pe/gcc-libs:/opt/cray/pe/python/3.9.13.1/lib:/lus/gecko/projects/CSC249ADSE15_CNDA/software/pnetcdf/1.12.3/oneapi.eng.2023.05.15.007/lib:/lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2023.05.15.007/lib:/lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2023.05.15.007/lib:$ENV{LD_LIBRARY_PATH} - /opt/cray/pe/python/3.9.13.1/bin:/lus/gecko/projects/CSC249ADSE15_CNDA/software/pnetcdf/1.12.3/oneapi.eng.2023.05.15.007/bin:/lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2023.05.15.007/bin:/lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2023.05.15.007/bin:$ENV{PATH} + /lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2023.05.15.007 + /lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2023.05.15.007 + /lus/flare/projects/CSC249ADSE15_CNDA/software/pnetcdf/1.12.3/oneapi.eng.2023.05.15.007 + /opt/cray/pe/gcc-libs:/opt/cray/pe/python/3.9.13.1/lib:/lus/flare/projects/CSC249ADSE15_CNDA/software/pnetcdf/1.12.3/oneapi.eng.2023.05.15.007/lib:/lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2023.05.15.007/lib:/lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2023.05.15.007/lib:$ENV{LD_LIBRARY_PATH} + /opt/cray/pe/python/3.9.13.1/bin:/lus/flare/projects/CSC249ADSE15_CNDA/software/pnetcdf/1.12.3/oneapi.eng.2023.05.15.007/bin:/lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2023.05.15.007/bin:/lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2023.05.15.007/bin:$ENV{PATH} list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203 diff --git a/components/eamxx/cmake/machine-files/aurora.cmake b/components/eamxx/cmake/machine-files/aurora.cmake index cdebb0500a6..c06c82dcbcb 100644 --- a/components/eamxx/cmake/machine-files/aurora.cmake +++ b/components/eamxx/cmake/machine-files/aurora.cmake @@ -22,13 +22,13 @@ set(CMAKE_EXE_LINKER_FLAGS " -lifcore -\-intel -Xclang -fsycl-allow-virtual-func #this is needed for cime builds! -set(NETCDF_PATH "/lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2023.05.15.007") -set(NETCDF_DIR "/lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2023.05.15.007") -set(NETCDF_C_PATH "/lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2023.05.15.007") -set(NETCDF_C "/lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2023.05.15.007") +set(NETCDF_PATH "/lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2023.05.15.007") +set(NETCDF_DIR "/lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2023.05.15.007") +set(NETCDF_C_PATH "/lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2023.05.15.007") +set(NETCDF_C "/lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2023.05.15.007") #this one is for rrtmgp -set(NetCDF_C_PATH "/lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2023.05.15.007" CACHE STRING "") -set(NETCDF_FORTRAN_PATH "/lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2023.05.15.007") -set(PNETCDF_PATH "/lus/gecko/projects/CSC249ADSE15_CNDA/software/pnetcdf/1.12.3/oneapi.eng.2023.05.15.007") +set(NetCDF_C_PATH "/lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2023.05.15.007" CACHE STRING "") +set(NETCDF_FORTRAN_PATH "/lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2023.05.15.007") +set(PNETCDF_PATH "/lus/flare/projects/CSC249ADSE15_CNDA/software/pnetcdf/1.12.3/oneapi.eng.2023.05.15.007") diff --git a/components/eamxx/cmake/machine-files/auroracpu.cmake b/components/eamxx/cmake/machine-files/auroracpu.cmake index 1d8f246f63f..d620dcb3a0d 100644 --- a/components/eamxx/cmake/machine-files/auroracpu.cmake +++ b/components/eamxx/cmake/machine-files/auroracpu.cmake @@ -17,23 +17,15 @@ set(CMAKE_C_FLAGS "-O3 -DNDEBUG" CACHE STRING "" FORCE) set(CMAKE_EXE_LINKER_FLAGS " -lifcore -\-intel -Xclang -mlong-double-64 -DNDEBUG -fortlib" CACHE STRING "" FORCE) #set(CMAKE_EXE_LINKER_FLAGS " -Wl,-\-defsym,main=MAIN_\_ -lifcore -\-intel -Xclang -fsycl-allow-virtual-functions -lsycl -mlong-double-64 -DNDEBUG ${SYCL_LINK_FLAGS} -fortlib -L${MPICH_DIR}/lib" CACHE STRING "" FORCE) - - -# -# /lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2023.05.15.007 -# /lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2023.05.15.007 -# /lus/gecko/projects/CSC249ADSE15_CNDA/software/pnetcdf/1.12.3/oneapi.eng.2023.05.15.007 - - #this is needed for cime builds! -set(NETCDF_PATH "/lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2023.05.15.007") -set(NETCDF_DIR "/lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2023.05.15.007") -set(NETCDF_C_PATH "/lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2023.05.15.007") -set(NETCDF_C "/lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2023.05.15.007") +set(NETCDF_PATH "/lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2023.05.15.007") +set(NETCDF_DIR "/lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2023.05.15.007") +set(NETCDF_C_PATH "/lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2023.05.15.007") +set(NETCDF_C "/lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2023.05.15.007") #this one is for rrtmgp -set(NetCDF_C_PATH "/lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2023.05.15.007" CACHE STRING "") -set(NETCDF_FORTRAN_PATH "/lus/gecko/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2023.05.15.007") -set(PNETCDF_PATH "/lus/gecko/projects/CSC249ADSE15_CNDA/software/pnetcdf/1.12.3/oneapi.eng.2023.05.15.007") +set(NetCDF_C_PATH "/lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2023.05.15.007" CACHE STRING "") +set(NETCDF_FORTRAN_PATH "/lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2023.05.15.007") +set(PNETCDF_PATH "/lus/flare/projects/CSC249ADSE15_CNDA/software/pnetcdf/1.12.3/oneapi.eng.2023.05.15.007") From 9ddec8c232a53c5f53ab4ea4761d1cf5a8238b9c Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Sun, 8 Sep 2024 21:57:10 +0000 Subject: [PATCH 71/85] attempt to use new oneapi --- cime_config/machines/config_machines.xml | 16 ++++++---------- .../eamxx/cmake/machine-files/aurora.cmake | 14 +++++++------- 2 files changed, 13 insertions(+), 17 deletions(-) diff --git a/cime_config/machines/config_machines.xml b/cime_config/machines/config_machines.xml index dc26bafc0a5..981a1b3161b 100644 --- a/cime_config/machines/config_machines.xml +++ b/cime_config/machines/config_machines.xml @@ -3797,11 +3797,7 @@ export FI_CXI_DEFAULT_CQ_SIZE=131072 # try avoiding F90 MPI_BCAST errors python/3.10.10 - oneapi/release/2023.12.15.001 - - - spack-pe-gcc cmake - gcc/11.2.0 + oneapi/eng-compiler/2024.04.15.002 cray-pals/1.3.3 @@ -3811,11 +3807,11 @@ export FI_CXI_DEFAULT_CQ_SIZE=131072 # try avoiding F90 MPI_BCAST errors $CIME_OUTPUT_ROOT/$CASE/run $CIME_OUTPUT_ROOT/$CASE/bld - /lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2023.05.15.007 - /lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2023.05.15.007 - /lus/flare/projects/CSC249ADSE15_CNDA/software/pnetcdf/1.12.3/oneapi.eng.2023.05.15.007 - /opt/cray/pe/gcc-libs:/opt/cray/pe/python/3.9.13.1/lib:/lus/flare/projects/CSC249ADSE15_CNDA/software/pnetcdf/1.12.3/oneapi.eng.2023.05.15.007/lib:/lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2023.05.15.007/lib:/lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2023.05.15.007/lib:$ENV{LD_LIBRARY_PATH} - /opt/cray/pe/python/3.9.13.1/bin:/lus/flare/projects/CSC249ADSE15_CNDA/software/pnetcdf/1.12.3/oneapi.eng.2023.05.15.007/bin:/lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2023.05.15.007/bin:/lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2023.05.15.007/bin:$ENV{PATH} + /lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2024.04.15.002 + /lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2024.04.15.002 + /lus/flare/projects/CSC249ADSE15_CNDA/software/pnetcdf/1.12.3/oneapi.eng.2024.04.15.002 + /opt/cray/pe/gcc-libs:/opt/cray/pe/python/3.9.13.1/lib:/lus/flare/projects/CSC249ADSE15_CNDA/software/pnetcdf/1.12.3/oneapi.eng.2024.04.15.002/lib:/lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2024.04.15.002/lib:/lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2024.04.15.002/lib:$ENV{LD_LIBRARY_PATH} + /opt/cray/pe/python/3.9.13.1/bin:/lus/flare/projects/CSC249ADSE15_CNDA/software/pnetcdf/1.12.3/oneapi.eng.2024.04.15.002/bin:/lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2024.04.15.002/bin:/lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2024.04.15.002/bin:$ENV{PATH} list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203 diff --git a/components/eamxx/cmake/machine-files/aurora.cmake b/components/eamxx/cmake/machine-files/aurora.cmake index c06c82dcbcb..c0a62e3cd20 100644 --- a/components/eamxx/cmake/machine-files/aurora.cmake +++ b/components/eamxx/cmake/machine-files/aurora.cmake @@ -22,13 +22,13 @@ set(CMAKE_EXE_LINKER_FLAGS " -lifcore -\-intel -Xclang -fsycl-allow-virtual-func #this is needed for cime builds! -set(NETCDF_PATH "/lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2023.05.15.007") -set(NETCDF_DIR "/lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2023.05.15.007") -set(NETCDF_C_PATH "/lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2023.05.15.007") -set(NETCDF_C "/lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2023.05.15.007") +set(NETCDF_PATH "/lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2024.04.15.002") +set(NETCDF_DIR "/lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2024.04.15.002") +set(NETCDF_C_PATH "/lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2024.04.15.002") +set(NETCDF_C "/lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2024.04.15.002") #this one is for rrtmgp -set(NetCDF_C_PATH "/lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2023.05.15.007" CACHE STRING "") -set(NETCDF_FORTRAN_PATH "/lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2023.05.15.007") -set(PNETCDF_PATH "/lus/flare/projects/CSC249ADSE15_CNDA/software/pnetcdf/1.12.3/oneapi.eng.2023.05.15.007") +set(NetCDF_C_PATH "/lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2024.04.15.002" CACHE STRING "") +set(NETCDF_FORTRAN_PATH "/lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2024.04.15.002") +set(PNETCDF_PATH "/lus/flare/projects/CSC249ADSE15_CNDA/software/pnetcdf/1.12.3/oneapi.eng.2024.04.15.002") From 55cfe5485f7c1d0c79630665a4c547c293db7f55 Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Tue, 10 Sep 2024 19:27:22 +0000 Subject: [PATCH 72/85] sync with homme branch --- components/homme/cmake/HommeMacros.cmake | 7 ------ .../homme/src/preqx_kokkos/cxx/CamForcing.cpp | 6 ++--- .../cxx/cxx_f90_interface_preqx.cpp | 4 +-- .../src/preqx_kokkos/cxx/prim_advance_exp.cpp | 2 +- .../homme/src/share/compose/cedr_kokkos.hpp | 2 +- .../src/share/compose/compose_slmm_siqk.cpp | 2 ++ components/homme/src/share/control_mod.F90 | 1 - components/homme/src/share/cxx/GllFvRemap.cpp | 6 ++--- components/homme/src/share/cxx/GllFvRemap.hpp | 2 +- .../homme/src/share/cxx/GllFvRemapImpl.cpp | 6 ++--- .../homme/src/share/cxx/GllFvRemapImpl.hpp | 5 ++-- .../homme/src/share/cxx/HommexxEnums.hpp | 5 ---- components/homme/src/share/gllfvremap_mod.F90 | 9 ++++--- components/homme/src/share/namelist_mod.F90 | 6 ----- .../theta-l_kokkos/cxx/EquationOfState.hpp | 4 +-- .../cxx/HyperviscosityFunctorImpl.cpp | 2 +- .../cxx/HyperviscosityFunctorImpl.hpp | 2 +- .../src/theta-l_kokkos/prim_driver_mod.F90 | 25 +++++++++---------- components/homme/test_execs/CMakeLists.txt | 3 +++ .../test_execs/share_kokkos_ut/CMakeLists.txt | 4 +-- .../thetal_kokkos_ut/CMakeLists.txt | 3 +++ .../thetal_kokkos_ut/forcing_ut.cpp | 8 +++--- .../thetal_kokkos_ut/gllfvremap_ut.cpp | 4 +-- 23 files changed, 53 insertions(+), 65 deletions(-) diff --git a/components/homme/cmake/HommeMacros.cmake b/components/homme/cmake/HommeMacros.cmake index 5610947cb29..b553a8668eb 100644 --- a/components/homme/cmake/HommeMacros.cmake +++ b/components/homme/cmake/HommeMacros.cmake @@ -112,13 +112,6 @@ macro(createTestExec execName execType macroNP macroNC ADD_DEFINITIONS(-DHAVE_CONFIG_H) ADD_EXECUTABLE(${execName} ${EXEC_SOURCES}) - - if(SUNSPOT_MACHINE) - SET_TARGET_PROPERTIES(${execName} PROPERTIES LINKER_LANGUAGE CXX) - else() - SET_TARGET_PROPERTIES(${execName} PROPERTIES LINKER_LANGUAGE Fortran) - endif() - IF(BUILD_HOMME_WITHOUT_PIOLIBRARY) TARGET_COMPILE_DEFINITIONS(${execName} PUBLIC HOMME_WITHOUT_PIOLIBRARY) ENDIF() diff --git a/components/homme/src/preqx_kokkos/cxx/CamForcing.cpp b/components/homme/src/preqx_kokkos/cxx/CamForcing.cpp index 2b1e6514389..36ca5f4a95f 100644 --- a/components/homme/src/preqx_kokkos/cxx/CamForcing.cpp +++ b/components/homme/src/preqx_kokkos/cxx/CamForcing.cpp @@ -51,7 +51,7 @@ void state_forcing( void tracer_forcing( const ExecViewUnmanaged &f_q, const HybridVCoord &hvcoord, const TimeLevel &tl, const int &num_q, - const MoistDry &moisture, const double &dt, + const bool &use_moisture, const double &dt, const ExecViewManaged &ps_v, const ExecViewManaged< Scalar * [Q_NUM_TIME_LEVELS][QSIZE_D][NP][NP][NUM_LEV]> &qdp, @@ -61,7 +61,7 @@ void tracer_forcing( const int np1 = tl.n0; const int np1_qdp = tl.n0_qdp; - if (moisture == MoistDry::MOIST) { + if (use_moisture) { // Remove the m_fq_ps_v buffer since it's not actually needed. // Instead apply the forcing to m_ps_v directly // Bonus - one less parallel reduce in dry cases! @@ -161,7 +161,7 @@ void apply_cam_forcing(const Real &dt) { tracers.fq = decltype(tracers.fq)("fq", elems.num_elems(),tracers.num_tracers()); } tracer_forcing(tracers.fq, hvcoord, tl, tracers.num_tracers(), - sim_params.moisture, dt, elems.m_state.m_ps_v, tracers.qdp, tracers.Q); + sim_params.use_moisture, dt, elems.m_state.m_ps_v, tracers.qdp, tracers.Q); GPTLstop("ApplyCAMForcing"); } diff --git a/components/homme/src/preqx_kokkos/cxx/cxx_f90_interface_preqx.cpp b/components/homme/src/preqx_kokkos/cxx/cxx_f90_interface_preqx.cpp index c75143a9836..b433a48c2ab 100644 --- a/components/homme/src/preqx_kokkos/cxx/cxx_f90_interface_preqx.cpp +++ b/components/homme/src/preqx_kokkos/cxx/cxx_f90_interface_preqx.cpp @@ -37,7 +37,7 @@ void init_simulation_params_c (const int& remap_alg, const int& limiter_option, const int& time_step_type, const int& qsize, const int& state_frequency, const Real& nu, const Real& nu_p, const Real& nu_q, const Real& nu_s, const Real& nu_div, const Real& nu_top, const int& hypervis_order, const int& hypervis_subcycle, const double& hypervis_scaling, - const int& ftype, const bool& prescribed_wind, const bool& moisture, const bool& disable_diagnostics, + const int& ftype, const bool& prescribed_wind, const bool& use_moisture, const bool& disable_diagnostics, const bool& use_cpstar, const int& transport_alg, const int& dt_remap_factor, const int& dt_tracer_factor, const double& scale_factor, const double& laplacian_rigid_factor) @@ -90,7 +90,7 @@ void init_simulation_params_c (const int& remap_alg, const int& limiter_option, params.hypervis_subcycle = hypervis_subcycle; params.hypervis_scaling = hypervis_scaling; params.disable_diagnostics = disable_diagnostics; - params.moisture = (moisture ? MoistDry::MOIST : MoistDry::DRY); + params.use_moisture = use_moisture; params.use_cpstar = use_cpstar; params.transport_alg = transport_alg; // SphereOperators parameters; preqx supports only the sphere. diff --git a/components/homme/src/preqx_kokkos/cxx/prim_advance_exp.cpp b/components/homme/src/preqx_kokkos/cxx/prim_advance_exp.cpp index f7c7600aab8..58e58f0160b 100644 --- a/components/homme/src/preqx_kokkos/cxx/prim_advance_exp.cpp +++ b/components/homme/src/preqx_kokkos/cxx/prim_advance_exp.cpp @@ -34,7 +34,7 @@ void prim_advance_exp (TimeLevel& tl, const Real dt, const bool compute_diagnost // Determine the tracers time level tl.n0_qdp= -1; - if (params.moisture == MoistDry::MOIST) { + if (params.use_moisture) { tl.update_tracers_levels(params.qsplit); } diff --git a/components/homme/src/share/compose/cedr_kokkos.hpp b/components/homme/src/share/compose/cedr_kokkos.hpp index 42e423e2913..758f4148a9a 100644 --- a/components/homme/src/share/compose/cedr_kokkos.hpp +++ b/components/homme/src/share/compose/cedr_kokkos.hpp @@ -18,7 +18,7 @@ typedef Kokkos::Experimental::HIPSpace CedrGpuSpace; # endif # if defined KOKKOS_ENABLE_SYCL typedef Kokkos::Experimental::SYCL CedrGpuExeSpace; -typedef Kokkos::Experimental::SYCL> CedrGpuSpace; +typedef Kokkos::Experimental::SYCL CedrGpuSpace; # endif #endif diff --git a/components/homme/src/share/compose/compose_slmm_siqk.cpp b/components/homme/src/share/compose/compose_slmm_siqk.cpp index 628c023090c..56564b0b8ca 100644 --- a/components/homme/src/share/compose/compose_slmm_siqk.cpp +++ b/components/homme/src/share/compose/compose_slmm_siqk.cpp @@ -60,8 +60,10 @@ class TestSphereToRefKernel { // tol is on dx, not (a,b), so adjust slightly. if ( ! info.success || err > 1e4*tol_) { jinfo.nfails++; +#ifndef KOKKOS_ENABLE_SYCL printf("calc_sphere_to_ref ei %d i %d j %d: nits %d re %1.1e\n", ei, i, j, info.n_iterations, err); +#endif } jinfo.sum_nits += info.n_iterations; jinfo.max_nits = max(jinfo.max_nits, info.n_iterations); diff --git a/components/homme/src/share/control_mod.F90 b/components/homme/src/share/control_mod.F90 index 9c3c599b232..0e9494f5a6c 100644 --- a/components/homme/src/share/control_mod.F90 +++ b/components/homme/src/share/control_mod.F90 @@ -43,7 +43,6 @@ module control_mod ! flag used by preqx, theta-l and theta-c models ! should be renamed to "hydrostatic_mode" logical, public :: theta_hydrostatic_mode - integer, public :: theta_hydrostatic_mode_integer integer, public :: tstep_type= 5 ! preqx timestepping options diff --git a/components/homme/src/share/cxx/GllFvRemap.cpp b/components/homme/src/share/cxx/GllFvRemap.cpp index a8f564958d4..7b0400427f3 100644 --- a/components/homme/src/share/cxx/GllFvRemap.cpp +++ b/components/homme/src/share/cxx/GllFvRemap.cpp @@ -21,8 +21,8 @@ void init_gllfvremap_c (int nelemd, int np, int nf, int nf_max, CF90Ptr f2g_remapd, CF90Ptr D_f, CF90Ptr Dinv_f) { auto& c = Context::singleton(); auto& g = c.get(); - g.init_data(nf, nf_max, theta_hydrostatic_mode, fv_metdet, g2f_remapd, - f2g_remapd, D_f, Dinv_f); + const bool thm = static_cast(theta_hydrostatic_mode); + g.init_data(nf, nf_max, thm, fv_metdet, g2f_remapd, f2g_remapd, D_f, Dinv_f); } GllFvRemap::GllFvRemap () { @@ -52,7 +52,7 @@ void GllFvRemap::init_boundary_exchanges () { } void GllFvRemap -::init_data (const int nf, const int nf_max, const int theta_hydrostatic_mode, +::init_data (const int nf, const int nf_max, const bool theta_hydrostatic_mode, const Real* fv_metdet, const Real* g2f_remapd, const Real* f2g_remapd, const Real* D_f, const Real* Dinv_f) { m_impl->init_data(nf, nf_max, theta_hydrostatic_mode, fv_metdet, diff --git a/components/homme/src/share/cxx/GllFvRemap.hpp b/components/homme/src/share/cxx/GllFvRemap.hpp index 2adff0aeaa9..7ebf5a82b71 100644 --- a/components/homme/src/share/cxx/GllFvRemap.hpp +++ b/components/homme/src/share/cxx/GllFvRemap.hpp @@ -40,7 +40,7 @@ class GllFvRemap { typedef Phys2T::const_type CPhys2T; typedef Phys3T::const_type CPhys3T; - void init_data(const int nf, const int nf_max, const int theta_hydrostatic_mode, + void init_data(const int nf, const int nf_max, const bool theta_hydrostatic_mode, const Real* fv_metdet, const Real* g2f_remapd, const Real* f2g_remapd, const Real* D_f, const Real* Dinv_f); diff --git a/components/homme/src/share/cxx/GllFvRemapImpl.cpp b/components/homme/src/share/cxx/GllFvRemapImpl.cpp index d4ab5c89f51..ea1a52f5efd 100644 --- a/components/homme/src/share/cxx/GllFvRemapImpl.cpp +++ b/components/homme/src/share/cxx/GllFvRemapImpl.cpp @@ -131,7 +131,7 @@ void GllFvRemapImpl::init_boundary_exchanges () { template using FV = Kokkos::View; void GllFvRemapImpl -::init_data (const int nf, const int nf_max, const int theta_hydrostatic_mode, +::init_data (const int nf, const int nf_max, const bool theta_hydrostatic_mode, const Real* fv_metdet_r, const Real* g2f_remapd_r, const Real* f2g_remapd_r, const Real* D_f_r, const Real* Dinv_f_r) { using Kokkos::create_mirror_view; @@ -395,7 +395,7 @@ ::run_dyn_to_fv_phys (const int timeidx, const Phys1T& ps, const Phys1T& phis, c const auto hvcoord = m_hvcoord; const bool use_moisture = m_data.use_moisture; - const int theta_hydrostatic_mode = m_data.theta_hydrostatic_mode; + const bool theta_hydrostatic_mode = m_data.theta_hydrostatic_mode; const bool want_dp_fv_out = dp_fv_out_ptr != nullptr; VPhys2T dp_fv_out; @@ -605,7 +605,7 @@ run_fv_phys_to_dyn (const int timeidx, const CPhys2T& Ts, const CPhys3T& uvs, const auto fT = m_forcing.m_ft; const auto hvcoord = m_hvcoord; const auto dp3d = m_state.m_dp3d; - const int theta_hydrostatic_mode = m_data.theta_hydrostatic_mode; + const bool theta_hydrostatic_mode = m_data.theta_hydrostatic_mode; EquationOfState eos; eos.init(theta_hydrostatic_mode, hvcoord); ElementOps ops; ops.init(hvcoord); const auto tu_ne = m_tu_ne; diff --git a/components/homme/src/share/cxx/GllFvRemapImpl.hpp b/components/homme/src/share/cxx/GllFvRemapImpl.hpp index 7388fddb123..11738b2bf45 100644 --- a/components/homme/src/share/cxx/GllFvRemapImpl.hpp +++ b/components/homme/src/share/cxx/GllFvRemapImpl.hpp @@ -60,8 +60,7 @@ struct GllFvRemapImpl { struct Data { int nelemd, qsize, nf2, n_dss_fld; - bool use_moisture; - int theta_hydrostatic_mode; + bool use_moisture, theta_hydrostatic_mode; static constexpr int nbuf1 = 2, nbuf2 = 1; Buf1 buf1[nbuf1]; @@ -108,7 +107,7 @@ struct GllFvRemapImpl { void init_buffers(const FunctorsBuffersManager& fbm); void init_boundary_exchanges(); - void init_data(const int nf, const int nf_max, const int theta_hydrostatic_mode, + void init_data(const int nf, const int nf_max, const bool theta_hydrostatic_mode, const Real* fv_metdet_r, const Real* g2f_remapd_r, const Real* f2g_remapd_r, const Real* D_f_r, const Real* Dinv_f_r); diff --git a/components/homme/src/share/cxx/HommexxEnums.hpp b/components/homme/src/share/cxx/HommexxEnums.hpp index 59c8f3c9652..06abbf35adb 100644 --- a/components/homme/src/share/cxx/HommexxEnums.hpp +++ b/components/homme/src/share/cxx/HommexxEnums.hpp @@ -47,11 +47,6 @@ enum class ForcingAlg : int { FORCING_2 = 2, // TODO: Rename FORCING_1 and FORCING_2 to something more descriptive }; -enum class MoistDry { - MOIST, - DRY -}; - enum class AdvectionForm { Conservative, NonConservative diff --git a/components/homme/src/share/gllfvremap_mod.F90 b/components/homme/src/share/gllfvremap_mod.F90 index e927f04aba0..a5f9b3033c9 100644 --- a/components/homme/src/share/gllfvremap_mod.F90 +++ b/components/homme/src/share/gllfvremap_mod.F90 @@ -265,14 +265,14 @@ end subroutine gfr_init subroutine gfr_init_hxx() bind(c) #if KOKKOS_TARGET - use control_mod, only: theta_hydrostatic_mode_integer + use control_mod, only: theta_hydrostatic_mode use iso_c_binding, only: c_int interface - subroutine init_gllfvremap_c(nelemd, np, nf, nf_max, theta_hydrostatic_mode_integer, & + subroutine init_gllfvremap_c(nelemd, np, nf, nf_max, theta_hydrostatic_mode, & fv_metdet, g2f_remapd, f2g_remapd, D_f, Dinv_f) bind(c) use iso_c_binding, only: c_int, c_double integer (c_int), value, intent(in) :: nelemd, np, nf, nf_max - integer (c_int), value, intent(in) :: theta_hydrostatic_mode_integer + integer (c_int), value, intent(in) :: theta_hydrostatic_mode real (c_double), dimension(nf*nf,nelemd), intent(in) :: fv_metdet real (c_double), dimension(np,np,nf_max*nf_max), intent(in) :: g2f_remapd real (c_double), dimension(nf_max*nf_max,np,np), intent(in) :: f2g_remapd @@ -280,7 +280,8 @@ subroutine init_gllfvremap_c(nelemd, np, nf, nf_max, theta_hydrostatic_mode_inte end subroutine init_gllfvremap_c end interface integer (c_int) :: thm - thm = theta_hydrostatic_mode_integer + thm = 0 + if (theta_hydrostatic_mode) thm = 1 call init_gllfvremap_c(nelemd, np, gfr%nphys, nphys_max, thm, & gfr%fv_metdet, gfr%g2f_remapd, gfr%f2g_remapd, gfr%D_f, gfr%Dinv_f) #endif diff --git a/components/homme/src/share/namelist_mod.F90 b/components/homme/src/share/namelist_mod.F90 index a3edaa07e23..1d47090182b 100644 --- a/components/homme/src/share/namelist_mod.F90 +++ b/components/homme/src/share/namelist_mod.F90 @@ -41,7 +41,6 @@ module namelist_mod runtype, & integration, & ! integration method theta_hydrostatic_mode, & - theta_hydrostatic_mode_integer, & transport_alg , & ! SE Eulerian, classical SL, cell-integrated SL semi_lagrange_cdr_alg, & ! see control_mod for semi_lagrange_* descriptions semi_lagrange_cdr_check, & @@ -453,10 +452,8 @@ subroutine readnl(par) planar_slice = .false. theta_hydrostatic_mode = .true. ! for preqx, this must be .true. - theta_hydrostatic_mode_integer = 1 ! for preqx, this must be .true. #if ( defined MODEL_THETA_C || defined MODEL_THETA_L ) theta_hydrostatic_mode = .false. ! default NH - theta_hydrostatic_mode_integer = 0 ! default NH #endif @@ -853,10 +850,7 @@ subroutine readnl(par) call MPI_bcast(case_planar_bubble,1,MPIlogical_t,par%root,par%comm,ierr) #endif -if(theta_hydrostatic_mode) theta_hydrostatic_mode_integer = 1 -if(.not. theta_hydrostatic_mode) theta_hydrostatic_mode_integer = 0 call MPI_bcast(theta_hydrostatic_mode ,1,MPIlogical_t,par%root,par%comm,ierr) - call MPI_bcast(theta_hydrostatic_mode_integer ,1,MPIinteger_t,par%root,par%comm,ierr) call MPI_bcast(transport_alg ,1,MPIinteger_t,par%root,par%comm,ierr) call MPI_bcast(semi_lagrange_cdr_alg ,1,MPIinteger_t,par%root,par%comm,ierr) call MPI_bcast(semi_lagrange_cdr_check ,1,MPIlogical_t,par%root,par%comm,ierr) diff --git a/components/homme/src/theta-l_kokkos/cxx/EquationOfState.hpp b/components/homme/src/theta-l_kokkos/cxx/EquationOfState.hpp index a50a28d58f5..dd97720f1be 100644 --- a/components/homme/src/theta-l_kokkos/cxx/EquationOfState.hpp +++ b/components/homme/src/theta-l_kokkos/cxx/EquationOfState.hpp @@ -23,7 +23,7 @@ class EquationOfState { EquationOfState () = default; - void init (const int theta_hydrostatic_mode, + void init (const bool theta_hydrostatic_mode, const HybridVCoord& hvcoord) { m_theta_hydrostatic_mode = theta_hydrostatic_mode; m_hvcoord = hvcoord; @@ -250,7 +250,7 @@ class EquationOfState { public: - int m_theta_hydrostatic_mode; + bool m_theta_hydrostatic_mode; HybridVCoord m_hvcoord; }; diff --git a/components/homme/src/theta-l_kokkos/cxx/HyperviscosityFunctorImpl.cpp b/components/homme/src/theta-l_kokkos/cxx/HyperviscosityFunctorImpl.cpp index ecde17b5b8d..d160e114475 100644 --- a/components/homme/src/theta-l_kokkos/cxx/HyperviscosityFunctorImpl.cpp +++ b/components/homme/src/theta-l_kokkos/cxx/HyperviscosityFunctorImpl.cpp @@ -120,7 +120,7 @@ void HyperviscosityFunctorImpl::init_params(const SimulationParams& params) #ifdef HOMMEXX_BFB_TESTING m_process_nh_vars = 1; #else - m_process_nh_vars = static_cast (not params.theta_hydrostatic_mode); + m_process_nh_vars = not params.theta_hydrostatic_mode; #endif } diff --git a/components/homme/src/theta-l_kokkos/cxx/HyperviscosityFunctorImpl.hpp b/components/homme/src/theta-l_kokkos/cxx/HyperviscosityFunctorImpl.hpp index 993d525422f..a55ecbb365f 100644 --- a/components/homme/src/theta-l_kokkos/cxx/HyperviscosityFunctorImpl.hpp +++ b/components/homme/src/theta-l_kokkos/cxx/HyperviscosityFunctorImpl.hpp @@ -397,7 +397,7 @@ class HyperviscosityFunctorImpl Buffers m_buffers; HybridVCoord m_hvcoord; - int m_process_nh_vars; + bool m_process_nh_vars; // Policies Kokkos::TeamPolicy m_policy_update_states; diff --git a/components/homme/src/theta-l_kokkos/prim_driver_mod.F90 b/components/homme/src/theta-l_kokkos/prim_driver_mod.F90 index 262ba19f4b7..eae8544ca86 100644 --- a/components/homme/src/theta-l_kokkos/prim_driver_mod.F90 +++ b/components/homme/src/theta-l_kokkos/prim_driver_mod.F90 @@ -103,12 +103,12 @@ subroutine prim_create_c_data_structures (tl, hvcoord, mp) ! Fill the simulation params structures in C++ test_name = TRIM(test_case) // C_NULL_CHAR - if (disable_diagnostics) disable_diagnostics_int=1 - if (.not.disable_diagnostics) disable_diagnostics_int=0 - if (use_moisture) use_moisture_int=1 - if (.not.use_moisture) use_moisture_int=0 - if(theta_hydrostatic_mode) theta_hydrostatic_mode_int=1 - if(.not.theta_hydrostatic_mode) theta_hydrostatic_mode_int=0 + disable_diagnostics_int = 0 + if (disable_diagnostics) disable_diagnostics_int = 1 + use_moisture_int = 0 + if (use_moisture) use_moisture_int = 1 + theta_hydrostatic_mode_int = 0 + if (theta_hydrostatic_mode) theta_hydrostatic_mode_int = 1 call init_simulation_params_c (vert_remap_q_alg, limiter_option, rsplit, qsplit, tstep_type, & qsize, statefreq, nu, nu_p, nu_q, nu_s, nu_div, nu_top, & @@ -358,17 +358,16 @@ subroutine prim_init_kokkos_functors (allocate_buffer) ! ! Optional Input ! - integer, intent(in), optional :: allocate_buffer ! Whether functor memory buffer should be allocated internally - integer(kind=c_int) :: dummy + logical, intent(in), optional :: allocate_buffer ! Whether functor memory buffer should be allocated internally + integer(kind=c_int) :: ab ! Initialize the C++ functors in the C++ context ! If no argument allocate_buffer is present, ! let Homme internally allocate buffers + ab = 1 if (present(allocate_buffer)) then - call init_functors_c (allocate_buffer) - else - dummy=1; - call init_functors_c (dummy) - endif + if (.not. allocate_buffer) ab = 0 + end if + call init_functors_c (ab) ! Initialize boundary exchange structure in C++ call init_boundary_exchanges_c () diff --git a/components/homme/test_execs/CMakeLists.txt b/components/homme/test_execs/CMakeLists.txt index a3113921b02..a007a5532b6 100644 --- a/components/homme/test_execs/CMakeLists.txt +++ b/components/homme/test_execs/CMakeLists.txt @@ -142,8 +142,11 @@ ADD_CUSTOM_TARGET(test-execs) ADD_CUSTOM_TARGET(check COMMAND ${CMAKE_CTEST_COMMAND} "--output-on-failure") +if(NOT BUILD_HOMME_WITHOUT_PIOLIBRARY) # Force cprnc to be built when make check is run ADD_DEPENDENCIES(check cprnc) +endif() + # Create a target for making the reference data ADD_CUSTOM_TARGET(baseline diff --git a/components/homme/test_execs/share_kokkos_ut/CMakeLists.txt b/components/homme/test_execs/share_kokkos_ut/CMakeLists.txt index 3fbeff9f6f2..bc788462ce6 100644 --- a/components/homme/test_execs/share_kokkos_ut/CMakeLists.txt +++ b/components/homme/test_execs/share_kokkos_ut/CMakeLists.txt @@ -10,7 +10,7 @@ SET(UTILS_TIMING_DIRS ${UTILS_TIMING_SRC_DIR} ${UTILS_TIMING_BIN_DIR}) # Note: need CUDA_BUILD and HOMMEXX_BFB_TESTING here, since the share # unit tests do not include a config.h file SET (COMMON_DEFINITIONS NP=4 NC=4) -IF (CUDA_BUILD OR HIP_BUILD) +IF (CUDA_BUILD OR HIP_BUILD OR SYCL_BUILD) SET(COMMON_DEFINITIONS ${COMMON_DEFINITIONS} HOMMEXX_ENABLE_GPU_F90) ENDIF() IF (HOMMEXX_BFB_TESTING) @@ -158,7 +158,7 @@ ELSE() SET (NUM_CPUS 1) ENDIF() cxx_unit_test (sphere_op_ut "${SPHERE_OP_UT_F90_SRCS}" "${SPHERE_OP_UT_CXX_SRCS}" "${SPHERE_OP_UT_INCLUDE_DIRS}" "${CONFIG_DEFINES}" ${NUM_CPUS}) -endif () +endif () #BFB ### Limiters unit test ### diff --git a/components/homme/test_execs/thetal_kokkos_ut/CMakeLists.txt b/components/homme/test_execs/thetal_kokkos_ut/CMakeLists.txt index 205635e918c..e8bf5e20bd0 100644 --- a/components/homme/test_execs/thetal_kokkos_ut/CMakeLists.txt +++ b/components/homme/test_execs/thetal_kokkos_ut/CMakeLists.txt @@ -11,6 +11,8 @@ SET(UTILS_TIMING_BIN_DIR ${HOMME_BINARY_DIR}/utils/cime/CIME/non_py/src/timing) THETAL_KOKKOS_SETUP() # This is needed to compile the lib and test executables with the correct options +#these vars shared between all targets, so changing one var +#for one test only won't work, config is built once and for the last test SET(THIS_CONFIG_IN ${HOMME_SOURCE_DIR}/src/theta-l_kokkos/config.h.cmake.in) SET(THIS_CONFIG_HC ${CMAKE_CURRENT_BINARY_DIR}/config.h.c) SET(THIS_CONFIG_H ${CMAKE_CURRENT_BINARY_DIR}/config.h) @@ -18,6 +20,7 @@ SET (NUM_POINTS 4) SET (NUM_PLEV 12) SET (QSIZE_D 4) SET (PIO_INTERP TRUE) + HommeConfigFile (${THIS_CONFIG_IN} ${THIS_CONFIG_HC} ${THIS_CONFIG_H} ) ADD_LIBRARY(thetal_kokkos_ut_lib diff --git a/components/homme/test_execs/thetal_kokkos_ut/forcing_ut.cpp b/components/homme/test_execs/thetal_kokkos_ut/forcing_ut.cpp index 5e4c51c7ca1..fb301166f42 100644 --- a/components/homme/test_execs/thetal_kokkos_ut/forcing_ut.cpp +++ b/components/homme/test_execs/thetal_kokkos_ut/forcing_ut.cpp @@ -160,8 +160,8 @@ TEST_CASE("forcing", "forcing") { std::cout << "Testing tracers forcing.\n"; for (const bool hydrostatic : {true,false}) { std::cout << " -> hydrostatic mode: " << (hydrostatic ? "true" : "false") << "\n"; - for (const MoistDry moisture : {MoistDry::DRY,MoistDry::MOIST}) { - std::cout << " -> moisture: " << (moisture==MoistDry::MOIST ? "moist" : "dry") << "\n"; + for (const bool use_moisture: {false,true}) { + std::cout << " -> moisture: " << (use_moisture ? "moist" : "dry") << "\n"; for (const bool adjustment : {true,false}) { std::cout << " -> adjustment: " << (adjustment ? "true" : "false") << "\n"; @@ -200,8 +200,8 @@ TEST_CASE("forcing", "forcing") { ff.init_buffers(fbm); // Run tracers forcing (cxx and f90) - ff.tracers_forcing(dt,np1,np1_qdp,adjustment,moisture); - tracers_forcing_f90(dt,np1+1,np1_qdp+1,hydrostatic,moisture==MoistDry::MOIST,adjustment); + ff.tracers_forcing(dt,np1,np1_qdp,adjustment,use_moisture); + tracers_forcing_f90(dt,np1+1,np1_qdp+1,hydrostatic,use_moisture,adjustment); // Compare answers Kokkos::deep_copy(h_dp,state.m_dp3d); diff --git a/components/homme/test_execs/thetal_kokkos_ut/gllfvremap_ut.cpp b/components/homme/test_execs/thetal_kokkos_ut/gllfvremap_ut.cpp index 0f14b0c3e55..cf9db941ea1 100644 --- a/components/homme/test_execs/thetal_kokkos_ut/gllfvremap_ut.cpp +++ b/components/homme/test_execs/thetal_kokkos_ut/gllfvremap_ut.cpp @@ -183,7 +183,7 @@ struct Session { p.qsize = qsize; p.hypervis_scaling = 0; p.transport_alg = 0; - p.moisture = MoistDry::MOIST; + p.use_moisture = true; p.theta_hydrostatic_mode = false; p.scale_factor = is_sphere ? PhysicalConstants::rearth0 : 1; p.laplacian_rigid_factor = is_sphere ? 1/p.scale_factor : 0; @@ -725,7 +725,7 @@ static void test_get_temperature (Session& s) { const auto& sp = c.get(); EquationOfState eos; eos.init(theta_hydrostatic_mode, s.h); ElementOps ops; ops.init(s.h); - const bool use_moisture = sp.moisture == MoistDry::MOIST; + const bool use_moisture = sp.use_moisture; const auto state = c.get(); const auto tracers = c.get(); const auto dp3d = state.m_dp3d; From 7b3460900ee33d293f24e173f0ccc00fcea0c233 Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Wed, 11 Sep 2024 15:35:38 +0000 Subject: [PATCH 73/85] make consistent change --- .../eamxx/src/dynamics/homme/interface/homme_driver_mod.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/eamxx/src/dynamics/homme/interface/homme_driver_mod.F90 b/components/eamxx/src/dynamics/homme/interface/homme_driver_mod.F90 index 3ce903b611d..aa6e537baa4 100644 --- a/components/eamxx/src/dynamics/homme/interface/homme_driver_mod.F90 +++ b/components/eamxx/src/dynamics/homme/interface/homme_driver_mod.F90 @@ -192,7 +192,7 @@ subroutine prim_init_model_f90 () bind(c) elem, hybrid, hvcoord, deriv, tl ! Local variable - integer, parameter :: allocate_buffer = 0 + logical, parameter :: allocate_buffer = 0 if (.not. is_data_structures_inited) then call abortmp ("Error! 'prim_init_data_structures_f90' has not been called yet.\n") From 08a386020a7b9664e79e88fda406a64fd0797c1e Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Tue, 17 Sep 2024 14:43:34 +0000 Subject: [PATCH 74/85] sync with homme branch --- .../homme/src/share/cxx/ExecSpaceDefs.cpp | 2 +- .../share/cxx/utilities/scream_tridiag.hpp | 21 +++++++++++++++++-- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/components/homme/src/share/cxx/ExecSpaceDefs.cpp b/components/homme/src/share/cxx/ExecSpaceDefs.cpp index c9ca8a0ecd9..4f3d97135fe 100644 --- a/components/homme/src/share/cxx/ExecSpaceDefs.cpp +++ b/components/homme/src/share/cxx/ExecSpaceDefs.cpp @@ -176,7 +176,7 @@ team_num_threads_vectors_for_gpu ( num_vectors ); } #else - return std::make_pair(4,16); + return std::make_pair(16,8); #endif } diff --git a/components/homme/src/share/cxx/utilities/scream_tridiag.hpp b/components/homme/src/share/cxx/utilities/scream_tridiag.hpp index e18bbc4e7e2..26221db3955 100644 --- a/components/homme/src/share/cxx/utilities/scream_tridiag.hpp +++ b/components/homme/src/share/cxx/utilities/scream_tridiag.hpp @@ -128,6 +128,10 @@ int get_thread_id_within_team_gpu (const TeamMember& team) { // Can't use team.team_rank() here because vector direction also uses physical // threads but TeamMember types don't expose that information. return blockDim.x * threadIdx.y + threadIdx.x; +#elif defined(__SYCL_DEVICE_ONLY__) + auto item = team.item(); + return static_cast(item.get_local_range(1) * item.get_local_id(0) + + item.get_local_id(1)); #else assert(0); return -1; @@ -138,6 +142,9 @@ template KOKKOS_FORCEINLINE_FUNCTION int get_team_nthr_gpu (const TeamMember& team) { #if defined __CUDA_ARCH__ || defined __HIP_DEVICE_COMPILE__ return blockDim.x * blockDim.y; +#elif defined __SYCL_DEVICE_ONLY__ + auto item = team.item(); + return static_cast(item.get_local_range(0) * item.get_local_range(1)); #else assert(0); return -1; @@ -161,6 +168,16 @@ KOKKOS_FORCEINLINE_FUNCTION int get_team_nthr (const Kokkos::Impl::HIPTeamMember& team) { return get_team_nthr_gpu(team); } #endif // KOKKOS_ENABLE_HIP + +#ifdef KOKKOS_ENABLE_SYCL +KOKKOS_FORCEINLINE_FUNCTION +int get_thread_id_within_team (const Kokkos::Impl::SYCLTeamMember& team) +{ return get_thread_id_within_team_gpu(team); } +KOKKOS_FORCEINLINE_FUNCTION +int get_team_nthr (const Kokkos::Impl::SYCLTeamMember& team) +{ return get_team_nthr_gpu(team); } +#endif // KOKKOS_ENABLE_SYCL + template KOKKOS_INLINE_FUNCTION const T& min (const T& a, const T& b) { return a < b ? a : b; } @@ -634,7 +651,7 @@ void bfb (const TeamMember& team, const auto f = [&] (const int& j) { impl::bfb_thomas_solve(dl, d, du, Kokkos::subview(X , Kokkos::ALL(), j)); }; - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, nrhs), f); + Kokkos::parallel_for(Kokkos::TeamVectorRange(team, nrhs), f); } template @@ -664,7 +681,7 @@ void bfb (const TeamMember& team, subview(du, ALL(), j), subview(X , ALL(), j)); }; - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, nrhs), f); + Kokkos::parallel_for(Kokkos::TeamVectorRange(team, nrhs), f); } } // namespace tridiag From 0a4d14754de4ef5a5702c89dd802c3860404a5ea Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Fri, 4 Oct 2024 18:11:46 +0000 Subject: [PATCH 75/85] update ekat (which has updated kokkos) --- externals/ekat | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/externals/ekat b/externals/ekat index 237edd071c6..ea985c76836 160000 --- a/externals/ekat +++ b/externals/ekat @@ -1 +1 @@ -Subproject commit 237edd071c6c6e7f92edaa598582ce961c6b69ef +Subproject commit ea985c76836d2ef9d433756654f821a64a7d57bf From 49bd4313135f2262e49698e3673f2a198506f1d4 Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Fri, 4 Oct 2024 18:12:07 +0000 Subject: [PATCH 76/85] update aurora modules --- cime_config/machines/config_machines.xml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cime_config/machines/config_machines.xml b/cime_config/machines/config_machines.xml index 981a1b3161b..c5894916e8f 100644 --- a/cime_config/machines/config_machines.xml +++ b/cime_config/machines/config_machines.xml @@ -3793,15 +3793,15 @@ export FI_CXI_DEFAULT_CQ_SIZE=131072 # try avoiding F90 MPI_BCAST errors /soft/modulefiles /soft/restricted/CNDA/updates/modulefiles - spack-pe-gcc/0.6.1-23.275.2 cmake - python/3.10.10 + spack-pe-gcc/0.7.0-24.086.0 cmake + python/3.10.11 - oneapi/eng-compiler/2024.04.15.002 + oneapi/eng-compiler/2024.07.30.002 mpich/icc-all-pmix-gpu/20240717 - cray-pals/1.3.3 - libfabric/1.15.2.0 + cray-pals/1.4.0 + libfabric/1.20.1 $CIME_OUTPUT_ROOT/$CASE/run From 3afe043133525835bb693b4e45d49102c0597043 Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Wed, 9 Oct 2024 19:20:19 +0000 Subject: [PATCH 77/85] add no-vni --- cime_config/machines/config_machines.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cime_config/machines/config_machines.xml b/cime_config/machines/config_machines.xml index c5894916e8f..376b2b8adda 100644 --- a/cime_config/machines/config_machines.xml +++ b/cime_config/machines/config_machines.xml @@ -3777,7 +3777,7 @@ export FI_CXI_DEFAULT_CQ_SIZE=131072 # try avoiding F90 MPI_BCAST errors -np {{ total_tasks }} --label -ppn {{ tasks_per_node }} - --cpu-bind $ENV{RANKS_BIND} -envall + --no-vni --cpu-bind $ENV{RANKS_BIND} -envall -d $ENV{OMP_NUM_THREADS} $ENV{GPU_TILE_COMPACT} From 900845259322ddd87fa5749403720c8b6af74551 Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Thu, 10 Oct 2024 17:43:38 +0000 Subject: [PATCH 78/85] fixes after merge --- cime_config/machines/cmake_macros/oneapi-ifxgpu_aurora.cmake | 1 + cime_config/machines/config_machines.xml | 2 -- components/eamxx/cmake/machine-files/aurora.cmake | 1 - 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/cime_config/machines/cmake_macros/oneapi-ifxgpu_aurora.cmake b/cime_config/machines/cmake_macros/oneapi-ifxgpu_aurora.cmake index 1ee009aa515..d5a9a6494a2 100644 --- a/cime_config/machines/cmake_macros/oneapi-ifxgpu_aurora.cmake +++ b/cime_config/machines/cmake_macros/oneapi-ifxgpu_aurora.cmake @@ -3,6 +3,7 @@ string(APPEND CMAKE_EXE_LINKER_FLAGS " -lmkl_intel_lp64 -lmkl_sequential -lmkl_c if (compile_threaded) string(APPEND CMAKE_EXE_LINKER_FLAGS " -fiopenmp -fopenmp-targets=spir64") endif() +string(APPEND KOKKOS_OPTIONS " -DCMAKE_CXX_STANDARD=17 -DKokkos_ENABLE_SERIAL=On -DKokkos_ARCH_INTEL_PVC=On -DKokkos_ENABLE_SYCL=On -DKokkos_ENABLE_EXPLICIT_INSTANTIATION=Off") string(APPEND SYCL_FLAGS " -\-intel -fsycl -fsycl-targets=spir64_gen -mlong-double-64 -Xsycl-target-backend \"-device 12.60.7\"") set(SCREAM_MPI_ON_DEVICE OFF CACHE STRING "") diff --git a/cime_config/machines/config_machines.xml b/cime_config/machines/config_machines.xml index 7b279ae2362..083253a62e6 100644 --- a/cime_config/machines/config_machines.xml +++ b/cime_config/machines/config_machines.xml @@ -3503,7 +3503,6 @@ export FI_CXI_DEFAULT_CQ_SIZE=131072 # try avoiding F90 MPI_BCAST errors /soft/tools/mpi_wrapper_utils/gpu_tile_compact.sh 131072 20 - $ENV{KOKKOS_ROOT} 1 0:4,1:4,2:4,3:4:4:4,5:4,6:4,7:4 @@ -3611,7 +3610,6 @@ export FI_CXI_DEFAULT_CQ_SIZE=131072 # try avoiding F90 MPI_BCAST errors /soft/tools/mpi_wrapper_utils/gpu_tile_compact.sh 131072 20 - $ENV{KOKKOS_ROOT} 1 0:4,1:4,2:4,3:4:4:4,5:4,6:4,7:4 diff --git a/components/eamxx/cmake/machine-files/aurora.cmake b/components/eamxx/cmake/machine-files/aurora.cmake index a8cfb611c25..e6a32fc72c0 100644 --- a/components/eamxx/cmake/machine-files/aurora.cmake +++ b/components/eamxx/cmake/machine-files/aurora.cmake @@ -1,7 +1,6 @@ include(${CMAKE_CURRENT_LIST_DIR}/common.cmake) common_setup() -<<<<<<< HEAD include (${EKAT_MACH_FILES_PATH}/kokkos/intel-pvc.cmake) include (${EKAT_MACH_FILES_PATH}/mpi/other.cmake) set(EKAT_MPIRUN_EXE "mpiexec" CACHE STRING "" FORCE) From 13d2546ff5c2e2268cdf053f4f6c9d27da1f23a1 Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Tue, 22 Oct 2024 20:03:32 +0000 Subject: [PATCH 79/85] update ekat with updated kokkos --- externals/ekat | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/externals/ekat b/externals/ekat index ea985c76836..c86becf4dfa 160000 --- a/externals/ekat +++ b/externals/ekat @@ -1 +1 @@ -Subproject commit ea985c76836d2ef9d433756654f821a64a7d57bf +Subproject commit c86becf4dfac6b4e3c0f2e5becd051495ccf8f26 From 340ce70f79af5ca39ca9fa276631ab8dab404aa6 Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Wed, 23 Oct 2024 18:22:49 +0000 Subject: [PATCH 80/85] swithc to 12 ranks/node after merge --- cime_config/machines/config_machines.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cime_config/machines/config_machines.xml b/cime_config/machines/config_machines.xml index 083253a62e6..0ff979c8f9c 100644 --- a/cime_config/machines/config_machines.xml +++ b/cime_config/machines/config_machines.xml @@ -3548,9 +3548,9 @@ export FI_CXI_DEFAULT_CQ_SIZE=131072 # try avoiding F90 MPI_BCAST errors pbspro e3sm 208 - 104 + 12 104 - 48 + 12 FALSE mpiexec From 30ddcdd1037de40b2a8f066044873b03fb32d6dd Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Wed, 23 Oct 2024 18:23:20 +0000 Subject: [PATCH 81/85] Revert "Merge Pull Request #3028 from E3SM-Project/scream/jgfouca/reduce_rrtmgp_interf_allocs" This reverts commit 9b1b4c7c5ca36923c47ca5cdb6ab6152149a88f7, reversing changes made to d9814845218b70d81d29aac152a5a7fbded48626. --- components/eam/src/physics/rrtmgp/external | 2 +- .../rrtmgp/scream_rrtmgp_interface.hpp | 178 ++++++------------ 2 files changed, 57 insertions(+), 123 deletions(-) diff --git a/components/eam/src/physics/rrtmgp/external b/components/eam/src/physics/rrtmgp/external index e64b99cce24..8ff525eeed1 160000 --- a/components/eam/src/physics/rrtmgp/external +++ b/components/eam/src/physics/rrtmgp/external @@ -1 +1 @@ -Subproject commit e64b99cce24eb31bb6f317bddb6f0ffbdfaf8bb7 +Subproject commit 8ff525eeed1d87a2ca6f251c4d16b46222c5554d diff --git a/components/eamxx/src/physics/rrtmgp/scream_rrtmgp_interface.hpp b/components/eamxx/src/physics/rrtmgp/scream_rrtmgp_interface.hpp index a420c0c1dd4..216722b3766 100644 --- a/components/eamxx/src/physics/rrtmgp/scream_rrtmgp_interface.hpp +++ b/components/eamxx/src/physics/rrtmgp/scream_rrtmgp_interface.hpp @@ -565,44 +565,6 @@ static void rrtmgp_sw( int ngpt = k_dist.get_ngpt(); int ngas = gas_concs.get_num_gases(); - // Allocate temporaries from pool - const int size1 = nday; - const int size2 = nday*nlay; // 4 - const int size3 = nday*(nlay+1); // 5 - const int size4 = ncol*nlay; - const int size5 = nbnd*nday; //2 - const int size6 = nday*ngpt; - const int size7 = nday*(nlay+1)*nbnd; // 3 - const int size8 = ncol*nlay*(k_dist.get_ngas()+1); - - RealT* data = pool_t::template alloc_raw(size1 + size2*4 + size3*5 + size4 + size5*2 + size6 + size7*3 + size8), *dcurr = data; - - auto mu0_day = view_t (dcurr, nday); dcurr += size1; - - auto p_lay_day = view_t (dcurr, nday, nlay); dcurr += size2; - auto t_lay_day = view_t (dcurr, nday, nlay); dcurr += size2; - auto vmr_day = view_t (dcurr, nday, nlay); dcurr += size2; - auto t_lay_limited = view_t (dcurr, nday, nlay); dcurr += size2; - - auto p_lev_day = view_t (dcurr, nday, nlay+1); dcurr += size3; - auto t_lev_day = view_t (dcurr, nday, nlay+1); dcurr += size3; - auto flux_up_day = view_t (dcurr, nday, nlay+1); dcurr += size3; - auto flux_dn_day = view_t (dcurr, nday, nlay+1); dcurr += size3; - auto flux_dn_dir_day = view_t (dcurr, nday, nlay+1); dcurr += size3; - - auto vmr = view_t (dcurr, ncol, nlay); dcurr += size4; - - auto sfc_alb_dir_T = view_t (dcurr, nbnd, nday); dcurr += size5; - auto sfc_alb_dif_T = view_t (dcurr, nbnd, nday); dcurr += size5; - - auto toa_flux = view_t (dcurr, nday, ngpt); dcurr += size6; - - auto bnd_flux_up_day = view_t(dcurr, nday, nlay+1, nbnd); dcurr += size7; - auto bnd_flux_dn_day = view_t(dcurr, nday, nlay+1, nbnd); dcurr += size7; - auto bnd_flux_dn_dir_day = view_t(dcurr, nday, nlay+1, nbnd); dcurr += size7; - - auto col_gas = view_t(dcurr, ncol, nlay, k_dist.get_ngas()+1); dcurr += size8; - // Associate local pointers for fluxes auto &flux_up = fluxes.flux_up; auto &flux_dn = fluxes.flux_dn; @@ -661,15 +623,20 @@ static void rrtmgp_sw( } // Subset mu0 + auto mu0_day = view_t("mu0_day", nday); Kokkos::parallel_for(nday, KOKKOS_LAMBDA(int iday) { mu0_day(iday) = mu0(dayIndices(iday)); }); // subset state variables + auto p_lay_day = view_t("p_lay_day", nday, nlay); + auto t_lay_day = view_t("t_lay_day", nday, nlay); Kokkos::parallel_for(MDRP::template get<2>({nlay,nday}), KOKKOS_LAMBDA(int ilay, int iday) { p_lay_day(iday,ilay) = p_lay(dayIndices(iday),ilay); t_lay_day(iday,ilay) = t_lay(dayIndices(iday),ilay); }); + auto p_lev_day = view_t("p_lev_day", nday, nlay+1); + auto t_lev_day = view_t("t_lev_day", nday, nlay+1); Kokkos::parallel_for(MDRP::template get<2>({nlay+1,nday}), KOKKOS_LAMBDA(int ilev, int iday) { p_lev_day(iday,ilev) = p_lev(dayIndices(iday),ilev); t_lev_day(iday,ilev) = t_lev(dayIndices(iday),ilev); @@ -680,6 +647,8 @@ static void rrtmgp_sw( gas_concs_t gas_concs_day; gas_concs_day.init(gas_names, nday, nlay); for (int igas = 0; igas < ngas; igas++) { + auto vmr_day = view_t("vmr_day", nday, nlay); + auto vmr = view_t("vmr" , ncol, nlay); gas_concs.get_vmr(gas_names[igas], vmr); Kokkos::parallel_for(MDRP::template get<2>({nlay,nday}), KOKKOS_LAMBDA(int ilay, int iday) { vmr_day(iday,ilay) = vmr(dayIndices(iday),ilay); @@ -711,12 +680,20 @@ static void rrtmgp_sw( // RRTMGP assumes surface albedos have a screwy dimension ordering // for some strange reason, so we need to transpose these; also do // daytime subsetting in the same kernel + view_t sfc_alb_dir_T("sfc_alb_dir", nbnd, nday); + view_t sfc_alb_dif_T("sfc_alb_dif", nbnd, nday); Kokkos::parallel_for(MDRP::template get<2>({nbnd,nday}), KOKKOS_LAMBDA(int ibnd, int icol) { sfc_alb_dir_T(ibnd,icol) = sfc_alb_dir(dayIndices(icol),ibnd); sfc_alb_dif_T(ibnd,icol) = sfc_alb_dif(dayIndices(icol),ibnd); }); // Temporaries we need for daytime-only fluxes + auto flux_up_day = view_t("flux_up_day", nday, nlay+1); + auto flux_dn_day = view_t("flux_dn_day", nday, nlay+1); + auto flux_dn_dir_day = view_t("flux_dn_dir_day", nday, nlay+1); + auto bnd_flux_up_day = view_t("bnd_flux_up_day", nday, nlay+1, nbnd); + auto bnd_flux_dn_day = view_t("bnd_flux_dn_day", nday, nlay+1, nbnd); + auto bnd_flux_dn_dir_day = view_t("bnd_flux_dn_dir_day", nday, nlay+1, nbnd); fluxes_t fluxes_day; fluxes_day.flux_up = flux_up_day; fluxes_day.flux_dn = flux_dn_day; @@ -736,14 +713,18 @@ static void rrtmgp_sw( } // Limit temperatures for gas optics look-up tables + auto t_lay_limited = view_t("t_lay_limited", nday, nlay); limit_to_bounds_k(t_lay_day, k_dist_sw_k.get_temp_min(), k_dist_sw_k.get_temp_max(), t_lay_limited); // Do gas optics + view_t toa_flux("toa_flux", nday, ngpt); bool top_at_1 = false; Kokkos::parallel_reduce(1, KOKKOS_LAMBDA(int, bool& val) { val |= p_lay(0, 0) < p_lay(0, nlay-1); }, Kokkos::LOr(top_at_1)); + oview_t col_gas("col_gas", std::make_pair(0, ncol-1), std::make_pair(0, nlay-1), std::make_pair(-1, k_dist.get_ngas()-1)); + k_dist.gas_optics(nday, nlay, top_at_1, p_lay_day, p_lev_day, t_lay_limited, gas_concs_day, col_gas, optics, toa_flux); if (extra_clnsky_diag) { k_dist.gas_optics(nday, nlay, top_at_1, p_lay_day, p_lev_day, t_lay_limited, gas_concs_day, col_gas, optics_no_aerosols, toa_flux); @@ -822,8 +803,6 @@ static void rrtmgp_sw( clnsky_flux_dn_dir(icol,ilev) = flux_dn_dir_day(iday,ilev); }); } - - pool_t::dealloc(data, dcurr - data); } /* @@ -840,24 +819,6 @@ static void rrtmgp_lw( { // Problem size int nbnd = k_dist.get_nband(); - int constexpr max_gauss_pts = 4; - - const int size1 = ncol; - const int size2 = nbnd*ncol; - const int size3 = max_gauss_pts*max_gauss_pts; - const int size4 = ncol*nlay; - const int size5 = ncol*(nlay+1); - const int size6 = ncol*nlay*(k_dist.get_ngas()+1); - - RealT* data = pool_t::template alloc_raw(size1 + size2 + size3*2 + size4 + size5 + size6), *dcurr = data; - - view_t t_sfc (dcurr, ncol); dcurr += size1; - view_t emis_sfc (dcurr, nbnd,ncol); dcurr += size2; - view_t gauss_Ds (dcurr, max_gauss_pts,max_gauss_pts); dcurr += size3; - view_t gauss_wts (dcurr, max_gauss_pts,max_gauss_pts); dcurr += size3; - view_t t_lay_limited(dcurr, ncol, nlay); dcurr += size4; - view_t t_lev_limited(dcurr, ncol, nlay+1); dcurr += size5; - view_t col_gas (dcurr, std::make_pair(0, ncol-1), std::make_pair(0, nlay-1), std::make_pair(-1, k_dist.get_ngas()-1)); dcurr += size6; // Associate local pointers for fluxes auto &flux_up = fluxes.flux_up; @@ -902,6 +863,8 @@ static void rrtmgp_lw( // Boundary conditions source_func_t lw_sources; lw_sources.alloc(ncol, nlay, k_dist); + view_t t_sfc ("t_sfc" ,ncol); + view_t emis_sfc("emis_sfc",nbnd,ncol); bool top_at_1 = false; Kokkos::parallel_reduce(1, KOKKOS_LAMBDA(int, bool& val) { @@ -919,31 +882,32 @@ static void rrtmgp_lw( // Weights and angle secants for first order (k=1) Gaussian quadrature. // Values from Table 2, Clough et al, 1992, doi:10.1029/92JD01419 // after Abramowitz & Stegun 1972, page 921 - RealT gauss_Ds_host_raw[max_gauss_pts][max_gauss_pts] = { - {1.66, 1.18350343, 1.09719858, 1.06056257}, - {0., 2.81649655, 1.69338507, 1.38282560}, - {0., 0., 4.70941630, 2.40148179}, - {0., 0., 0., 7.15513024} - }; - hview_t gauss_Ds_host (&gauss_Ds_host_raw[0][0], max_gauss_pts, max_gauss_pts); - - RealT gauss_wts_host_raw[max_gauss_pts][max_gauss_pts] = { - {0.5, 0.3180413817, 0.2009319137, 0.1355069134}, - {0., 0.1819586183, 0.2292411064, 0.2034645680}, - {0., 0., 0.0698269799, 0.1298475476}, - {0., 0., 0., 0.0311809710} - }; - - hview_t gauss_wts_host(&gauss_wts_host_raw[0][0],max_gauss_pts,max_gauss_pts); - + int constexpr max_gauss_pts = 4; + hview_t gauss_Ds_host ("gauss_Ds" ,max_gauss_pts,max_gauss_pts); + gauss_Ds_host(0,0) = 1.66 ; gauss_Ds_host(1,0) = 0.; gauss_Ds_host(2,0) = 0.; gauss_Ds_host(3,0) = 0.; + gauss_Ds_host(0,1) = 1.18350343; gauss_Ds_host(1,1) = 2.81649655; gauss_Ds_host(2,1) = 0.; gauss_Ds_host(3,1) = 0.; + gauss_Ds_host(0,2) = 1.09719858; gauss_Ds_host(1,2) = 1.69338507; gauss_Ds_host(2,2) = 4.70941630; gauss_Ds_host(3,2) = 0.; + gauss_Ds_host(0,3) = 1.06056257; gauss_Ds_host(1,3) = 1.38282560; gauss_Ds_host(2,3) = 2.40148179; gauss_Ds_host(3,3) = 7.15513024; + + hview_t gauss_wts_host("gauss_wts",max_gauss_pts,max_gauss_pts); + gauss_wts_host(0,0) = 0.5 ; gauss_wts_host(1,0) = 0. ; gauss_wts_host(2,0) = 0. ; gauss_wts_host(3,0) = 0. ; + gauss_wts_host(0,1) = 0.3180413817; gauss_wts_host(1,1) = 0.1819586183; gauss_wts_host(2,1) = 0. ; gauss_wts_host(3,1) = 0. ; + gauss_wts_host(0,2) = 0.2009319137; gauss_wts_host(1,2) = 0.2292411064; gauss_wts_host(2,2) = 0.0698269799; gauss_wts_host(3,2) = 0. ; + gauss_wts_host(0,3) = 0.1355069134; gauss_wts_host(1,3) = 0.2034645680; gauss_wts_host(2,3) = 0.1298475476; gauss_wts_host(3,3) = 0.0311809710; + + view_t gauss_Ds ("gauss_Ds" ,max_gauss_pts,max_gauss_pts); + view_t gauss_wts("gauss_wts",max_gauss_pts,max_gauss_pts); Kokkos::deep_copy(gauss_Ds, gauss_Ds_host); Kokkos::deep_copy(gauss_wts, gauss_wts_host); // Limit temperatures for gas optics look-up tables + auto t_lay_limited = view_t("t_lay_limited", ncol, nlay); + auto t_lev_limited = view_t("t_lev_limited", ncol, nlay+1); limit_to_bounds_k(t_lay, k_dist_lw_k.get_temp_min(), k_dist_lw_k.get_temp_max(), t_lay_limited); limit_to_bounds_k(t_lev, k_dist_lw_k.get_temp_min(), k_dist_lw_k.get_temp_max(), t_lev_limited); // Do gas optics + oview_t col_gas("col_gas", std::make_pair(0, ncol-1), std::make_pair(0, nlay-1), std::make_pair(-1, k_dist.get_ngas()-1)); k_dist.gas_optics(ncol, nlay, top_at_1, p_lay, p_lev, t_lay_limited, t_sfc, gas_concs, col_gas, optics, lw_sources, view_t(), t_lev_limited); if (extra_clnsky_diag) { k_dist.gas_optics(ncol, nlay, top_at_1, p_lay, p_lev, t_lay_limited, t_sfc, gas_concs, col_gas, optics_no_aerosols, lw_sources, view_t(), t_lev_limited); @@ -977,22 +941,23 @@ static void rrtmgp_lw( // Compute clean-sky fluxes rte_lw(max_gauss_pts, gauss_Ds, gauss_wts, optics_no_aerosols, top_at_1, lw_sources, emis_sfc, clnsky_fluxes); } - - pool_t::dealloc(data, dcurr - data); } /* * Return a subcolumn mask consistent with a specified overlap assumption */ -static void get_subcolumn_mask(const int ncol, const int nlay, const int ngpt, const real2dk &cldf, const int overlap_option, int1dk &seeds, int3dk& subcolumn_mask) +static int3dk get_subcolumn_mask(const int ncol, const int nlay, const int ngpt, const real2dk &cldf, const int overlap_option, int1dk &seeds) { + // Routine will return subcolumn mask with values of 0 indicating no cloud, 1 indicating cloud + int3dk subcolumn_mask = int3dk("subcolumn_mask", ncol, nlay, ngpt); + // Subcolumn generators are a means for producing a variable x(i,j,k), where // // c(i,j,k) = 1 for x(i,j,k) > 1 - cldf(i,j) // c(i,j,k) = 0 for x(i,j,k) <= 1 - cldf(i,j) // // I am going to call this "cldx" to be just slightly less ambiguous - auto cldx = pool_t::template alloc(ncol, nlay, ngpt); + auto cldx = view_t("cldx", ncol, nlay, ngpt); // Apply overlap assumption to set cldx if (overlap_option == 0) { // Dummy mask, always cloudy @@ -1052,9 +1017,6 @@ static void get_subcolumn_mask(const int ncol, const int nlay, const int ngpt, c subcolumn_mask(icol,ilay,igpt) = 0; } }); - - pool_t::dealloc(cldx); - return subcolumn_mask; } @@ -1067,7 +1029,7 @@ static void compute_cloud_area( { // Subcolumn binary cld mask; if any layers with pressure between pmin and pmax are cloudy // then 2d subcol mask is 1, otherwise it is 0 - auto subcol_mask = pool_t::template alloc(ncol, ngpt); + auto subcol_mask = view_t("subcol_mask", ncol, ngpt); Kokkos::parallel_for(MDRP::template get<3>({ngpt, nlay, ncol}), KOKKOS_LAMBDA(int igpt, int ilay, int icol) { // NOTE: using plev would need to assume level ordering (top to bottom or bottom to top), but // using play/pmid does not @@ -1084,8 +1046,6 @@ static void compute_cloud_area( cld_area(icol) += subcol_mask(icol,igpt) * ngpt_inv; } }); - - pool_t::dealloc(subcol_mask); } /* @@ -1118,7 +1078,7 @@ static void compute_aerocom_cloudtop( Kokkos::deep_copy(eff_radius_qi_at_cldtop, 0.0); // Initialize the 1D "clear fraction" as 1 (totally clear) - auto aerocom_clr = pool_t::template alloc(ncol); + auto aerocom_clr = view_t("aerocom_clr", ncol); Kokkos::deep_copy(aerocom_clr, 1.0); // Get gravity acceleration constant from constants @@ -1191,8 +1151,6 @@ static void compute_aerocom_cloudtop( // (their products) cldfrac_tot_at_cldtop(icol) = 1.0 - aerocom_clr(icol); }); - - pool_t::dealloc(aerocom_clr); } /* @@ -1295,17 +1253,14 @@ static optical_props2_t get_cloud_optics_sw( cloud_optics.set_ice_roughness(2); // Limit effective radii to be within bounds of lookup table - auto rel_limited = pool_t::template alloc(ncol, nlay); - auto rei_limited = pool_t::template alloc(ncol, nlay); + auto rel_limited = view_t("rel_limited", ncol, nlay); + auto rei_limited = view_t("rei_limited", ncol, nlay); limit_to_bounds_k(rel, cloud_optics.radliq_lwr, cloud_optics.radliq_upr, rel_limited); limit_to_bounds_k(rei, cloud_optics.radice_lwr, cloud_optics.radice_upr, rei_limited); // Calculate cloud optics cloud_optics.cloud_optics(ncol, nlay, lwp, iwp, rel_limited, rei_limited, clouds); - pool_t::dealloc(rel_limited); - pool_t::dealloc(rei_limited); - // Return optics return clouds; } @@ -1324,17 +1279,14 @@ static optical_props1_t get_cloud_optics_lw( cloud_optics.set_ice_roughness(2); // Limit effective radii to be within bounds of lookup table - auto rel_limited = pool_t::alloc(ncol, nlay); - auto rei_limited = pool_t::alloc(ncol, nlay); + auto rel_limited = view_t("rel_limited", ncol, nlay); + auto rei_limited = view_t("rei_limited", ncol, nlay); limit_to_bounds_k(rel, cloud_optics.radliq_lwr, cloud_optics.radliq_upr, rel_limited); limit_to_bounds_k(rei, cloud_optics.radice_lwr, cloud_optics.radice_upr, rei_limited); // Calculate cloud optics cloud_optics.cloud_optics(ncol, nlay, lwp, iwp, rel_limited, rei_limited, clouds); - pool_t::dealloc(rel_limited); - pool_t::dealloc(rei_limited); - // Return optics return clouds; } @@ -1346,10 +1298,6 @@ static optical_props2_t get_subsampled_clouds( optical_props2_t subsampled_optics; subsampled_optics.init(kdist.get_band_lims_wavenumber(), kdist.get_band_lims_gpoint(), "subsampled_optics"); subsampled_optics.alloc_2str(ncol, nlay); - - // Subcolumn mask with values of 0 indicating no cloud, 1 indicating cloud - auto cldmask = pool_t::alloc(ncol, nlay, ngpt); - // Check that we do not have clouds with no optical properties; this would get corrected // when we assign optical props, but we want to use a "radiative cloud fraction" // for the subcolumn sampling too because otherwise we can get vertically-contiguous cloud @@ -1357,7 +1305,7 @@ static optical_props2_t get_subsampled_clouds( // the vertical correlation of cloudy layers. I.e., cloudy layers might look maximally overlapped // even when separated by layers with no cloud properties, when in fact those layers should be // randomly overlapped. - auto cldfrac_rad = pool_t::alloc(ncol, nlay); + auto cldfrac_rad = view_t("cldfrac_rad", ncol, nlay); Kokkos::parallel_for(MDRP::template get<3>({nbnd,nlay,ncol}), KOKKOS_LAMBDA (int ibnd, int ilay, int icol) { if (cloud_optics.tau(icol,ilay,ibnd) > 0) { cldfrac_rad(icol,ilay) = cld(icol,ilay); @@ -1371,11 +1319,11 @@ static optical_props2_t get_subsampled_clouds( int overlap = 1; // Get unique seeds for each column that are reproducible across different MPI rank layouts; // use decimal part of pressure for this, consistent with the implementation in EAM - auto seeds = pool_t::alloc(ncol); + auto seeds = view_t("seeds", ncol); Kokkos::parallel_for(ncol, KOKKOS_LAMBDA(int icol) { seeds(icol) = 1e9 * (p_lay(icol,nlay-1) - int(p_lay(icol,nlay-1))); }); - get_subcolumn_mask(ncol, nlay, ngpt, cldfrac_rad, overlap, seeds, cldmask); + auto cldmask = get_subcolumn_mask(ncol, nlay, ngpt, cldfrac_rad, overlap, seeds); // Assign optical properties to subcolumns (note this implements MCICA) auto gpoint_bands = kdist.get_gpoint_bands(); Kokkos::parallel_for(MDRP::template get<3>({ngpt,nlay,ncol}), KOKKOS_LAMBDA(int igpt, int ilay, int icol) { @@ -1390,11 +1338,6 @@ static optical_props2_t get_subsampled_clouds( subsampled_optics.g (icol,ilay,igpt) = 0; } }); - - pool_t::dealloc(cldmask); - pool_t::dealloc(cldfrac_rad); - pool_t::dealloc(seeds); - return subsampled_optics; } @@ -1406,10 +1349,6 @@ static optical_props1_t get_subsampled_clouds( optical_props1_t subsampled_optics; subsampled_optics.init(kdist.get_band_lims_wavenumber(), kdist.get_band_lims_gpoint(), "subsampled_optics"); subsampled_optics.alloc_1scl(ncol, nlay); - - // Subcolumn mask with values of 0 indicating no cloud, 1 indicating cloud - auto cldmask = pool_t::alloc(ncol, nlay, ngpt); - // Check that we do not have clouds with no optical properties; this would get corrected // when we assign optical props, but we want to use a "radiative cloud fraction" // for the subcolumn sampling too because otherwise we can get vertically-contiguous cloud @@ -1417,7 +1356,7 @@ static optical_props1_t get_subsampled_clouds( // the vertical correlation of cloudy layers. I.e., cloudy layers might look maximally overlapped // even when separated by layers with no cloud properties, when in fact those layers should be // randomly overlapped. - auto cldfrac_rad = pool_t::alloc(ncol, nlay); + auto cldfrac_rad = view_t("cldfrac_rad", ncol, nlay); Kokkos::parallel_for(MDRP::template get<3>({nbnd,nlay,ncol}), KOKKOS_LAMBDA (int ibnd, int ilay, int icol) { if (cloud_optics.tau(icol,ilay,ibnd) > 0) { cldfrac_rad(icol,ilay) = cld(icol,ilay); @@ -1428,11 +1367,11 @@ static optical_props1_t get_subsampled_clouds( // Get unique seeds for each column that are reproducible across different MPI rank layouts; // use decimal part of pressure for this, consistent with the implementation in EAM; use different // seed values for longwave and shortwave - auto seeds = pool_t::alloc(ncol); + auto seeds = view_t("seeds", ncol); Kokkos::parallel_for(ncol, KOKKOS_LAMBDA(int icol) { seeds(icol) = 1e9 * (p_lay(icol,nlay-2) - int(p_lay(icol,nlay-2))); }); - get_subcolumn_mask(ncol, nlay, ngpt, cldfrac_rad, overlap, seeds, cldmask); + auto cldmask = get_subcolumn_mask(ncol, nlay, ngpt, cldfrac_rad, overlap, seeds); // Assign optical properties to subcolumns (note this implements MCICA) auto gpoint_bands = kdist.get_gpoint_bands(); Kokkos::parallel_for(MDRP::template get<3>({ngpt,nlay,ncol}), KOKKOS_LAMBDA(int igpt, int ilay, int icol) { @@ -1443,11 +1382,6 @@ static optical_props1_t get_subsampled_clouds( subsampled_optics.tau(icol,ilay,igpt) = 0; } }); - - pool_t::dealloc(cldmask); - pool_t::dealloc(cldfrac_rad); - pool_t::dealloc(seeds); - return subsampled_optics; } From 75e0fd932ed63c4e7f37e1cabfcf7d641fe70b38 Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Wed, 23 Oct 2024 19:56:46 +0000 Subject: [PATCH 82/85] update kokkos --- externals/ekat | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/externals/ekat b/externals/ekat index c86becf4dfa..17adc61faae 160000 --- a/externals/ekat +++ b/externals/ekat @@ -1 +1 @@ -Subproject commit c86becf4dfac6b4e3c0f2e5becd051495ccf8f26 +Subproject commit 17adc61faae0ebb6de19fe389596e3dd3622d2d2 From c7e8ad158e2821d7804a50daad4d44bf7ab4bca6 Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Mon, 4 Nov 2024 23:26:24 +0000 Subject: [PATCH 83/85] remove streaming option that is underutilizing gpusfor 12 ranks --- cime_config/machines/config_machines.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cime_config/machines/config_machines.xml b/cime_config/machines/config_machines.xml index 0ff979c8f9c..f8bda3b422e 100644 --- a/cime_config/machines/config_machines.xml +++ b/cime_config/machines/config_machines.xml @@ -3611,7 +3611,7 @@ export FI_CXI_DEFAULT_CQ_SIZE=131072 # try avoiding F90 MPI_BCAST errors 131072 20 1 - 0:4,1:4,2:4,3:4:4:4,5:4,6:4,7:4 + 0 From 5c2c7143bd4de56970b4c97c73ab89c1b9e2d7c9 Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Thu, 7 Nov 2024 19:27:39 +0000 Subject: [PATCH 84/85] AB env in aurora --- cime_config/machines/config_machines.xml | 67 ++++++++++++++++++++---- 1 file changed, 57 insertions(+), 10 deletions(-) diff --git a/cime_config/machines/config_machines.xml b/cime_config/machines/config_machines.xml index f8bda3b422e..2d36ae36b28 100644 --- a/cime_config/machines/config_machines.xml +++ b/cime_config/machines/config_machines.xml @@ -3598,21 +3598,65 @@ export FI_CXI_DEFAULT_CQ_SIZE=131072 # try avoiding F90 MPI_BCAST errors 1 + + + - level_zero:gpu - - 0 - disable - disable - 1 - 4000MB - 0 - /soft/tools/mpi_wrapper_utils/gpu_tile_compact.sh + + 1 + 0 + + + + 1 + 1 + 1 + 131072 20 - 1 + cxi + disabled + 8388608 + + 240 + 240 + + disable + disable + + level_zero:gpu + 1 + + 4000MB + 0 + + /soft/tools/mpi_wrapper_utils/gpu_tile_compact.sh + + + 0 DISABLED @@ -3621,14 +3665,17 @@ export FI_CXI_DEFAULT_CQ_SIZE=131072 # try avoiding F90 MPI_BCAST errors 0 + verbose,granularity=thread,balanced 128M + threads 128M + -1 From 0b43211eec9d06880334983a09d6713ecf3f6244 Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Fri, 8 Nov 2024 04:39:36 +0000 Subject: [PATCH 85/85] fixing ne256 build (works for ne30 too) for \failed to convert GOTPCREL\ --- cime_config/machines/cmake_macros/oneapi-ifxgpu_aurora.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cime_config/machines/cmake_macros/oneapi-ifxgpu_aurora.cmake b/cime_config/machines/cmake_macros/oneapi-ifxgpu_aurora.cmake index d5a9a6494a2..c6afa7c2329 100644 --- a/cime_config/machines/cmake_macros/oneapi-ifxgpu_aurora.cmake +++ b/cime_config/machines/cmake_macros/oneapi-ifxgpu_aurora.cmake @@ -1,5 +1,5 @@ -string(APPEND CMAKE_EXE_LINKER_FLAGS " -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -fsycl-device-code-split=per_kernel -fsycl-max-parallel-link-jobs=16") +string(APPEND CMAKE_EXE_LINKER_FLAGS " -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -fsycl-device-code-split=per_kernel -fsycl-max-parallel-link-jobs=16 -Wl,--no-relax") if (compile_threaded) string(APPEND CMAKE_EXE_LINKER_FLAGS " -fiopenmp -fopenmp-targets=spir64") endif()