Merge 'CONRAD/add_pgrad_correction_to_theta-l_kokkos' (PR #5299)

Add pgrad_correction to theta-l_kokkos. A few changes: -- Change test using thetahs1.nl namelist to use pgrad_correction and test BFB -- Add cxx_log() callable from F90 to compute log on device for BFB -- Moved some computations around in CaarFunctorImpl to match F90 ordering (some needed for BFB now that pgrad_correction exists) [nonBFB] for HOMMEBFB only due to new test option for theta-fhs1.
E3SM-Project · Nov 29, 2022 · 0b6c1ba · 0b6c1ba
2 parents fb7d271 + f8b34e1
commit 0b6c1ba
Show file tree

Hide file tree

Showing 10 changed files with 144 additions and 25 deletions.
diff --git a/components/homme/src/share/cxx/SimulationParams.hpp b/components/homme/src/share/cxx/SimulationParams.hpp
@@ -59,6 +59,7 @@ struct SimulationParams
   int       nsplit = 0;
   int       nsplit_iteration;
   double    rearth; //propagated then to Geometry and SphereOps
+  bool      pgrad_correction;
 
   // Use this member to check whether the struct has been initialized
   bool      params_set = false;

diff --git a/components/homme/src/share/cxx/utilities/BfbUtils.cpp b/components/homme/src/share/cxx/utilities/BfbUtils.cpp
@@ -55,3 +55,20 @@ void tridiag_diagdom_bfb_a1x1 (int n, void* dl, void* d, void* du, void* x, int
 }
 
 } // extern C
+
+// Allows F90 to call c++ log() on device for BFB testing
+extern "C" {
+double cxx_log(double input)
+{
+#ifdef HOMMEXX_ENABLE_GPU
+  double result;
+  Kokkos::RangePolicy<Homme::ExecSpace> policy(0,1);
+  Kokkos::parallel_reduce(policy, KOKKOS_LAMBDA(const int&, double& value) {
+    value = log(input);
+  }, result);
+  return result;
+#else
+  return log(input);
+#endif
+}
+} // extern C
diff --git a/components/homme/src/share/cxx/utilities/VectorUtils.hpp b/components/homme/src/share/cxx/utilities/VectorUtils.hpp
@@ -60,6 +60,21 @@ VECTOR_SIMD_LOOP
   return vp;
 }
 
+template <typename SpT, int l>
+KOKKOS_INLINE_FUNCTION
+Vector<VectorTag<SIMD<double, SpT>, l> >
+log (const Vector<VectorTag<SIMD<double,SpT>,l>>& v)
+{
+  using VectorType = Vector<VectorTag<SIMD<double,SpT>,l>>;
+  VectorType vp;
+VECTOR_SIMD_LOOP
+  for (int i = 0; i < VectorType::vector_length; ++i) {
+    vp[i] = std::log(v[i]);
+  }
+
+  return vp;
+}
+
 } // namespace KokkosKernels
 } // namespace Batched
 } // namespace Experimental

diff --git a/components/homme/src/share/cxx/utilities/bfb_mod.F90 b/components/homme/src/share/cxx/utilities/bfb_mod.F90
@@ -29,6 +29,18 @@ subroutine tridiag_diagdom_bfb_a1x1(n, dl, d, du, x, real_size) bind(c)
      end subroutine tridiag_diagdom_bfb_a1x1
   end interface
 
+  interface
+    function cxx_log(input) bind(C)
+      use iso_c_binding, only: c_double
+
+      !arguments:
+      real(kind=c_double), value, intent(in) :: input
+
+      ! return
+      real(kind=c_double)            :: cxx_log
+    end function cxx_log
+  end interface
+
   interface bfb_pow
     module procedure bfb_pow_0d
     module procedure bfb_pow_1d

diff --git a/components/homme/src/theta-l/share/prim_advance_mod.F90 b/components/homme/src/theta-l/share/prim_advance_mod.F90
@@ -50,6 +50,10 @@ module prim_advance_mod
     use, intrinsic :: iso_c_binding
 #endif
 
+#ifdef HOMMEXX_BFB_TESTING
+  use bfb_mod,        only: cxx_log
+#endif
+
   implicit none
   private
   save
@@ -1428,7 +1432,19 @@ subroutine compute_andor_apply_rhs(np1,nm1,n0,dt2,elem,hvcoord,hybrid,&
 
         if (pgrad_correction==1) then
            T0 = TREF-tref_lapse_rate*TREF*Cp/g     ! = 97  
+#ifdef HOMMEXX_BFB_TESTING
+           ! For BFB testing, calculate log(exner) using cxx_log()
+           ! and then call gradient sphere.
+           do j=1,np
+             do i=1,np
+               temp(i,j,k) = cxx_log(exner(i,j,k))
+             end do
+           end do
+
+           vtemp(:,:,:,k)=gradient_sphere(temp(:,:,k),deriv,elem(ie)%Dinv)
+#else
            vtemp(:,:,:,k)=gradient_sphere(log(exner(:,:,k)),deriv,elem(ie)%Dinv)
+#endif
            mgrad(:,:,1,k)=mgrad(:,:,1,k) + Cp*T0*(vtemp(:,:,1,k)-gradexner(:,:,1,k)/exner(:,:,k))
            mgrad(:,:,2,k)=mgrad(:,:,2,k) + Cp*T0*(vtemp(:,:,2,k)-gradexner(:,:,2,k)/exner(:,:,k))
         endif

diff --git a/components/homme/src/theta-l_kokkos/cxx/CaarFunctorImpl.hpp b/components/homme/src/theta-l_kokkos/cxx/CaarFunctorImpl.hpp
@@ -39,7 +39,7 @@ struct CaarFunctorImpl {
 
   struct Buffers {
     static constexpr int num_3d_scalar_mid_buf = 10;
-    static constexpr int num_3d_vector_mid_buf =  3;
+    static constexpr int num_3d_vector_mid_buf =  5;
     static constexpr int num_3d_scalar_int_buf =  6;
     static constexpr int num_3d_vector_int_buf =  3;
 
@@ -53,6 +53,8 @@ struct CaarFunctorImpl {
     ExecViewUnmanaged<Scalar*    [NP][NP][NUM_LEV]  >   omega_p;
     ExecViewUnmanaged<Scalar*    [NP][NP][NUM_LEV]  >   vort;
 
+    ExecViewUnmanaged<Scalar* [2][NP][NP][NUM_LEV]  >   grad_exner;
+    ExecViewUnmanaged<Scalar* [2][NP][NP][NUM_LEV]  >   mgrad;
     ExecViewUnmanaged<Scalar* [2][NP][NP][NUM_LEV]  >   grad_tmp;
     ExecViewUnmanaged<Scalar* [2][NP][NP][NUM_LEV]  >   vdp;
 
@@ -87,6 +89,7 @@ struct CaarFunctorImpl {
   const int           m_rsplit;
   const bool          m_theta_hydrostatic_mode;
   const AdvectionForm m_theta_advection_form;
+  const bool          m_pgrad_correction;
 
   HybridVCoord          m_hvcoord;
   ElementsState         m_state;
@@ -127,6 +130,7 @@ struct CaarFunctorImpl {
       , m_rsplit(params.rsplit)
       , m_theta_hydrostatic_mode(params.theta_hydrostatic_mode)
       , m_theta_advection_form(params.theta_adv_form)
+      , m_pgrad_correction(params.pgrad_correction)
       , m_hvcoord(hvcoord)
       , m_state(elements.m_state)
       , m_derived(elements.m_derived)
@@ -150,6 +154,7 @@ struct CaarFunctorImpl {
       , m_rsplit(params.rsplit)
       , m_theta_hydrostatic_mode(params.theta_hydrostatic_mode)
       , m_theta_advection_form(params.theta_adv_form)
+      , m_pgrad_correction(params.pgrad_correction)
       , m_policy_pre (Homme::get_default_team_policy<ExecSpace,TagPreExchange>(m_num_elems))
       , m_policy_post (0,num_elems*NP*NP)
       , m_policy_dp3d_lim (Homme::get_default_team_policy<ExecSpace,TagDp3dLimiter>(m_num_elems))
@@ -245,6 +250,10 @@ struct CaarFunctorImpl {
     mem += m_buffers.dp_tens.size();
 
     // Midpoints vectors
+    m_buffers.grad_exner = decltype(m_buffers.grad_exner)(mem,nslots);
+    mem += m_buffers.grad_exner.size();
+    m_buffers.mgrad = decltype(m_buffers.mgrad)(mem,nslots);
+    mem += m_buffers.mgrad.size();
     m_buffers.grad_tmp = decltype(m_buffers.grad_tmp)(mem,nslots);
     mem += m_buffers.grad_tmp.size();
 
@@ -1183,13 +1192,16 @@ struct CaarFunctorImpl {
     //  - Compute gradKE = grad(v*v/2)
     //  - Compute gradExner = grad(exner)
     //  - Compute mgrad = average[dpnh_dp_i*gradphinh_i]
+    //              + cp*T0*(grad(log(exner))-grad(exner)/exner) (pgrad_correction, if applicable)
     //  - Compute v_tens =
-    //           scale1*(-v_vadv + v2*(fcor+vort)-gradKE -mgrad  -cp*vtheta*gradExner - wvor)
-    //           scale1*(-v_vadv - v1*(fcor+vort)-gradKE -mgrad  -cp*vtheta*gradExner - wvor)
+    //           scale1*(-v_vadv + v2*(fcor+vort)-gradKE -mgrad  -cp*vtheta*gradExner - (mgrad + wvor))
+    //           scale1*(-v_vadv - v1*(fcor+vort)-gradKE -mgrad  -cp*vtheta*gradExner - (mgrad + wvor))
 
-    auto v_tens = Homme::subview(m_buffers.v_tens,kv.team_idx);
     auto vort  = Homme::subview(m_buffers.vort,kv.team_idx);
     auto wvor = Homme::subview(m_buffers.vdp,kv.team_idx);
+    auto grad_exner = Homme::subview(m_buffers.grad_exner,kv.team_idx);
+    auto mgrad = Homme::subview(m_buffers.mgrad,kv.team_idx);
+    auto grad_tmp = Homme::subview(m_buffers.grad_tmp,kv.team_idx);
 
     // Compute vorticity(v)
     m_sphere_ops.vorticity_sphere(kv, Homme::subview(m_state.m_v,kv.ie,m_data.n0),
@@ -1219,7 +1231,7 @@ struct CaarFunctorImpl {
       });
       kv.team_barrier();
 
-      // Compute grad(average(w^2/2))
+      // Compute grad(average(w^2/2)). Store in wvor.
       m_sphere_ops.gradient_sphere(kv, Homme::subview(m_buffers.temp,kv.team_idx),
                                        wvor);
 
@@ -1229,6 +1241,24 @@ struct CaarFunctorImpl {
       kv.team_barrier();
     }
 
+    // Compute grad(exner)
+    // Note: exner = (pi/p0)^k, therefore grad(exner) = k*(exner/pi)*grad(pi).
+    //       So you *could* avoid computing this grad, at the price of some arithmetic op.
+    m_sphere_ops.gradient_sphere(kv, Homme::subview(m_buffers.exner,kv.team_idx),
+                                     grad_exner);
+
+    // If pgrad_correction=1, we need the gradient sphere
+    // for log(exner) below. Store into m_buffers.grad_tmp.
+    if (m_pgrad_correction) {
+      const auto exner = Homme::subview(m_buffers.exner,kv.team_idx);
+      const auto log_exner = [&exner](const int igp, const int jgp, const int ilev)->Scalar {
+        return log(exner(igp, jgp, ilev));
+      };
+      m_sphere_ops.gradient_sphere(kv, log_exner,
+                                       grad_tmp);
+    }
+    kv.team_barrier();
+
     // Scalar w_vor,mgrad,w_gradw,gradw2;
     Kokkos::parallel_for(Kokkos::TeamThreadRange(kv.team,NP*NP),
                          [&](const int idx) {
@@ -1237,6 +1267,7 @@ struct CaarFunctorImpl {
 
       auto wvor_x = Homme::subview(wvor,0,igp,jgp);
       auto wvor_y = Homme::subview(wvor,1,igp,jgp);
+
       if (!m_theta_hydrostatic_mode) {
         // Compute wvor = grad(average(w^2/2)) - average(w*grad(w))
         // Note: vtens is already storing grad(avg(w^2/2))
@@ -1255,14 +1286,25 @@ struct CaarFunctorImpl {
                           w_gradw_x, wvor_x, -1.0);
         ColumnOps::compute_midpoint_values<CombineMode::ScaleAdd>(kv,
                           w_gradw_y, wvor_y, -1.0);
+      } else {
+        // wvor is not used if theta_hydrostatic_mode=1. Set to zero
+        // here to avoid adding in uninitialized values into v_tens.
+        Kokkos::parallel_for(Kokkos::ThreadVectorRange(kv.team,NUM_LEV),
+                             [&](const int ilev) {
+          wvor_x(ilev) = 0;
+          wvor_y(ilev) = 0;
+        });
       }
 
-      // Compute average(dpnh_dp_i*grad(phinh_i)), and add to wvor
+      auto mgrad_x = Homme::subview(mgrad,0,igp,jgp);
+      auto mgrad_y = Homme::subview(mgrad,1,igp,jgp);
+
+      // Compute mgrad = average(dpnh_dp_i*grad(phinh_i))
       const auto phinh_i_x = Homme::subview(m_buffers.grad_phinh_i,kv.team_idx,0,igp,jgp);
       const auto phinh_i_y = Homme::subview(m_buffers.grad_phinh_i,kv.team_idx,1,igp,jgp);
       if (m_theta_hydrostatic_mode) {
-        ColumnOps::compute_midpoint_values(kv,phinh_i_x,wvor_x);
-        ColumnOps::compute_midpoint_values(kv,phinh_i_y,wvor_y);
+        ColumnOps::compute_midpoint_values(kv,phinh_i_x,mgrad_x);
+        ColumnOps::compute_midpoint_values(kv,phinh_i_y,mgrad_y);
       } else {
         const auto dpnh_dp_i = Homme::subview(m_buffers.dpnh_dp_i,kv.team_idx,igp,jgp);
         const auto prod_x = [&phinh_i_x,&dpnh_dp_i](const int ilev)->Scalar {
@@ -1272,9 +1314,29 @@ struct CaarFunctorImpl {
           return phinh_i_y(ilev)*dpnh_dp_i(ilev);
         };
 
-        ColumnOps::compute_midpoint_values<CombineMode::Add>(kv,prod_x,wvor_x);
-        ColumnOps::compute_midpoint_values<CombineMode::Add>(kv,prod_y,wvor_y);
+        ColumnOps::compute_midpoint_values(kv,prod_x,mgrad_x);
+        ColumnOps::compute_midpoint_values(kv,prod_y,mgrad_y);
       }
+      kv.team_barrier();
+
+      // Apply pgrad_correction: mgrad += cp*T0*(grad(log(exner))-grad(exner)/exner) (if applicable)
+      if (m_pgrad_correction) {
+        using namespace PhysicalConstants;
+        constexpr Real T0 = Tref - Tref_lapse_rate*Tref*cp/g;
+
+        const auto grad_tmp_i_x   = Homme::subview(grad_tmp,0,igp,jgp);
+        const auto grad_tmp_i_y   = Homme::subview(grad_tmp,1,igp,jgp);
+        const auto grad_exner_i_x = Homme::subview(grad_exner,0,igp,jgp);
+        const auto grad_exner_i_y = Homme::subview(grad_exner,1,igp,jgp);
+        const auto exner_i        = Homme::subview(m_buffers.exner,kv.team_idx,igp,jgp);
+
+        Kokkos::parallel_for(Kokkos::ThreadVectorRange(kv.team,NUM_LEV),
+                             [&](const int ilev) {
+          mgrad_x(ilev) += cp*T0*(grad_tmp_i_x(ilev) - grad_exner_i_x(ilev)/exner_i(ilev));
+          mgrad_y(ilev) += cp*T0*(grad_tmp_i_y(ilev) - grad_exner_i_y(ilev)/exner_i(ilev));
+        });
+      }
+      kv.team_barrier();
 
       // Compute KE. Also, add fcor to vort
       auto u  = Homme::subview(m_state.m_v,kv.ie,m_data.n0,0,igp,jgp);
@@ -1303,13 +1365,6 @@ struct CaarFunctorImpl {
                                        Homme::subview(m_buffers.v_tens,kv.team_idx));
     }
 
-    // Note: exner = (pi/p0)^k, therefore grad(exner) = k*(exner/pi)*grad(pi).
-    //       So you *could* avoid computing this grad, at the price of some arithmetic op.
-    auto grad_exner = Homme::subview(m_buffers.grad_tmp,kv.team_idx);
-    m_sphere_ops.gradient_sphere(kv, Homme::subview(m_buffers.exner,kv.team_idx),
-                                     grad_exner);
-    kv.team_barrier();
-
     Kokkos::parallel_for(Kokkos::TeamThreadRange(kv.team,NP*NP),
                          [&](const int idx) {
       const int igp = idx / NP;
@@ -1332,9 +1387,8 @@ struct CaarFunctorImpl {
         u_tens(ilev) += cp_vtheta*grad_exner(0,igp,jgp,ilev);
         v_tens(ilev) += cp_vtheta*grad_exner(1,igp,jgp,ilev);
 
-        // avg(dpnh_dp*gradphinh) + wvor (added up in wvor)
-        u_tens(ilev) += wvor(0,igp,jgp,ilev);
-        v_tens(ilev) += wvor(1,igp,jgp,ilev);
+        u_tens(ilev) += (mgrad(0,igp,jgp,ilev) + wvor(0,igp,jgp,ilev));
+        v_tens(ilev) += (mgrad(1,igp,jgp,ilev) + wvor(1,igp,jgp,ilev));
 
         // Add +/- v_j*(vort+fcor), where v_j=v for u_tens and v_j=u for v_tens
         u_tens(ilev) -= m_state.m_v(kv.ie,m_data.n0,1,igp,jgp,ilev)*vort(ilev);

diff --git a/components/homme/src/theta-l_kokkos/cxx/cxx_f90_interface_theta.cpp b/components/homme/src/theta-l_kokkos/cxx/cxx_f90_interface_theta.cpp
@@ -45,7 +45,7 @@ void init_simulation_params_c (const int& remap_alg, const int& limiter_option,
                                const int& ftype, const int& theta_adv_form, const bool& prescribed_wind, const bool& moisture, const bool& disable_diagnostics,
                                const bool& use_cpstar, const int& transport_alg, const bool& theta_hydrostatic_mode, const char** test_case,
                                const int& dt_remap_factor, const int& dt_tracer_factor,
-                               const double& rearth, const int& nsplit)
+                               const double& rearth, const int& nsplit, const bool& pgrad_correction)
 {
   // Check that the simulation options are supported. This helps us in the future, since we
   // are currently 'assuming' some option have/not have certain values. As we support for more
@@ -113,6 +113,7 @@ void init_simulation_params_c (const int& remap_alg, const int& limiter_option,
   params.dcmip16_mu                    = dcmip16_mu;
   params.nsplit                        = nsplit;
   params.rearth                        = rearth;
+  params.pgrad_correction              = pgrad_correction;
 
   if (time_step_type==5) {
     //5 stage, 3rd order, explicit

diff --git a/components/homme/src/theta-l_kokkos/prim_driver_mod.F90 b/components/homme/src/theta-l_kokkos/prim_driver_mod.F90
@@ -74,7 +74,8 @@ subroutine prim_create_c_data_structures (tl, hvcoord, mp)
                               ftype, prescribed_wind, moisture, disable_diagnostics,   &
                               use_cpstar, transport_alg, theta_hydrostatic_mode,       &
                               dcmip16_mu, theta_advect_form, test_case,                &
-                              MAX_STRING_LEN, dt_remap_factor, dt_tracer_factor
+                              MAX_STRING_LEN, dt_remap_factor, dt_tracer_factor,       &
+                              pgrad_correction
     !
     ! Input(s)
     !
@@ -108,7 +109,8 @@ subroutine prim_create_c_data_structures (tl, hvcoord, mp)
                                    transport_alg,                                                 &
                                    LOGICAL(theta_hydrostatic_mode,c_bool),                        &
                                    c_loc(test_name),                                              &
-                                   dt_remap_factor, dt_tracer_factor, rearth, nsplit)
+                                   dt_remap_factor, dt_tracer_factor, rearth, nsplit,             &
+                                   LOGICAL(pgrad_correction==1,c_bool))
 
     ! Initialize time level structure in C++
     call init_time_level_c(tl%nm1, tl%n0, tl%np1, tl%nstep, tl%nstep0)

diff --git a/components/homme/src/theta-l_kokkos/theta_f2c_mod.F90 b/components/homme/src/theta-l_kokkos/theta_f2c_mod.F90
@@ -14,7 +14,7 @@ subroutine init_simulation_params_c (remap_alg, limiter_option, rsplit, qsplit,
                                        dcmip16_mu, ftype, theta_adv_form, prescribed_wind, moisture, &
                                        disable_diagnostics, use_cpstar, transport_alg,               &
                                        theta_hydrostatic_mode, test_case_name, dt_remap_factor,      &
-                                       dt_tracer_factor, rearth, nsplit) bind(c)
+                                       dt_tracer_factor, rearth, nsplit, pgrad_correction) bind(c)
 
     use iso_c_binding, only: c_int, c_bool, c_double, c_ptr
     !
@@ -28,7 +28,7 @@ subroutine init_simulation_params_c (remap_alg, limiter_option, rsplit, qsplit,
     integer(kind=c_int),  intent(in) :: hypervis_order, hypervis_subcycle, hypervis_subcycle_tom
     integer(kind=c_int),  intent(in) :: ftype, theta_adv_form
     logical(kind=c_bool), intent(in) :: prescribed_wind, moisture, disable_diagnostics, use_cpstar
-    logical(kind=c_bool), intent(in) :: theta_hydrostatic_mode
+    logical(kind=c_bool), intent(in) :: theta_hydrostatic_mode, pgrad_correction
     type(c_ptr), intent(in) :: test_case_name
   end subroutine init_simulation_params_c
 

diff --git a/components/homme/test/reg_test/namelists/thetahs1.nl b/components/homme/test/reg_test/namelists/thetahs1.nl
@@ -37,6 +37,7 @@ theta_hydrostatic_mode = false
 theta_advect_form = 1
 tstep_type        = 10
 moisture          = 'notdry'
+pgrad_correction  = 1
 /
 &solver_nl
 precon_method = "identity"