Skip to content

Commit

Permalink
Merge 'CONRAD/add_pgrad_correction_to_theta-l_kokkos' (PR #5299)
Browse files Browse the repository at this point in the history
Add pgrad_correction to theta-l_kokkos. A few changes:
-- Change test using thetahs1.nl namelist to use pgrad_correction and test BFB
-- Add cxx_log() callable from F90 to compute log on device for BFB
-- Moved some computations around in CaarFunctorImpl to match F90
ordering (some needed for BFB now that pgrad_correction exists)

[nonBFB] for HOMMEBFB only due to new test option for theta-fhs1.
  • Loading branch information
oksanaguba committed Nov 29, 2022
2 parents fb7d271 + f8b34e1 commit 0b6c1ba
Show file tree
Hide file tree
Showing 10 changed files with 144 additions and 25 deletions.
1 change: 1 addition & 0 deletions components/homme/src/share/cxx/SimulationParams.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ struct SimulationParams
int nsplit = 0;
int nsplit_iteration;
double rearth; //propagated then to Geometry and SphereOps
bool pgrad_correction;

// Use this member to check whether the struct has been initialized
bool params_set = false;
Expand Down
17 changes: 17 additions & 0 deletions components/homme/src/share/cxx/utilities/BfbUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,20 @@ void tridiag_diagdom_bfb_a1x1 (int n, void* dl, void* d, void* du, void* x, int
}

} // extern C

// Allows F90 to call c++ log() on device for BFB testing
extern "C" {
double cxx_log(double input)
{
#ifdef HOMMEXX_ENABLE_GPU
double result;
Kokkos::RangePolicy<Homme::ExecSpace> policy(0,1);
Kokkos::parallel_reduce(policy, KOKKOS_LAMBDA(const int&, double& value) {
value = log(input);
}, result);
return result;
#else
return log(input);
#endif
}
} // extern C
15 changes: 15 additions & 0 deletions components/homme/src/share/cxx/utilities/VectorUtils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,21 @@ VECTOR_SIMD_LOOP
return vp;
}

template <typename SpT, int l>
KOKKOS_INLINE_FUNCTION
Vector<VectorTag<SIMD<double, SpT>, l> >
log (const Vector<VectorTag<SIMD<double,SpT>,l>>& v)
{
using VectorType = Vector<VectorTag<SIMD<double,SpT>,l>>;
VectorType vp;
VECTOR_SIMD_LOOP
for (int i = 0; i < VectorType::vector_length; ++i) {
vp[i] = std::log(v[i]);
}

return vp;
}

} // namespace KokkosKernels
} // namespace Batched
} // namespace Experimental
Expand Down
12 changes: 12 additions & 0 deletions components/homme/src/share/cxx/utilities/bfb_mod.F90
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,18 @@ subroutine tridiag_diagdom_bfb_a1x1(n, dl, d, du, x, real_size) bind(c)
end subroutine tridiag_diagdom_bfb_a1x1
end interface

interface
function cxx_log(input) bind(C)
use iso_c_binding, only: c_double

!arguments:
real(kind=c_double), value, intent(in) :: input

! return
real(kind=c_double) :: cxx_log
end function cxx_log
end interface

interface bfb_pow
module procedure bfb_pow_0d
module procedure bfb_pow_1d
Expand Down
16 changes: 16 additions & 0 deletions components/homme/src/theta-l/share/prim_advance_mod.F90
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ module prim_advance_mod
use, intrinsic :: iso_c_binding
#endif

#ifdef HOMMEXX_BFB_TESTING
use bfb_mod, only: cxx_log
#endif

implicit none
private
save
Expand Down Expand Up @@ -1428,7 +1432,19 @@ subroutine compute_andor_apply_rhs(np1,nm1,n0,dt2,elem,hvcoord,hybrid,&

if (pgrad_correction==1) then
T0 = TREF-tref_lapse_rate*TREF*Cp/g ! = 97
#ifdef HOMMEXX_BFB_TESTING
! For BFB testing, calculate log(exner) using cxx_log()
! and then call gradient sphere.
do j=1,np
do i=1,np
temp(i,j,k) = cxx_log(exner(i,j,k))
end do
end do

vtemp(:,:,:,k)=gradient_sphere(temp(:,:,k),deriv,elem(ie)%Dinv)
#else
vtemp(:,:,:,k)=gradient_sphere(log(exner(:,:,k)),deriv,elem(ie)%Dinv)
#endif
mgrad(:,:,1,k)=mgrad(:,:,1,k) + Cp*T0*(vtemp(:,:,1,k)-gradexner(:,:,1,k)/exner(:,:,k))
mgrad(:,:,2,k)=mgrad(:,:,2,k) + Cp*T0*(vtemp(:,:,2,k)-gradexner(:,:,2,k)/exner(:,:,k))
endif
Expand Down
94 changes: 74 additions & 20 deletions components/homme/src/theta-l_kokkos/cxx/CaarFunctorImpl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ struct CaarFunctorImpl {

struct Buffers {
static constexpr int num_3d_scalar_mid_buf = 10;
static constexpr int num_3d_vector_mid_buf = 3;
static constexpr int num_3d_vector_mid_buf = 5;
static constexpr int num_3d_scalar_int_buf = 6;
static constexpr int num_3d_vector_int_buf = 3;

Expand All @@ -53,6 +53,8 @@ struct CaarFunctorImpl {
ExecViewUnmanaged<Scalar* [NP][NP][NUM_LEV] > omega_p;
ExecViewUnmanaged<Scalar* [NP][NP][NUM_LEV] > vort;

ExecViewUnmanaged<Scalar* [2][NP][NP][NUM_LEV] > grad_exner;
ExecViewUnmanaged<Scalar* [2][NP][NP][NUM_LEV] > mgrad;
ExecViewUnmanaged<Scalar* [2][NP][NP][NUM_LEV] > grad_tmp;
ExecViewUnmanaged<Scalar* [2][NP][NP][NUM_LEV] > vdp;

Expand Down Expand Up @@ -87,6 +89,7 @@ struct CaarFunctorImpl {
const int m_rsplit;
const bool m_theta_hydrostatic_mode;
const AdvectionForm m_theta_advection_form;
const bool m_pgrad_correction;

HybridVCoord m_hvcoord;
ElementsState m_state;
Expand Down Expand Up @@ -127,6 +130,7 @@ struct CaarFunctorImpl {
, m_rsplit(params.rsplit)
, m_theta_hydrostatic_mode(params.theta_hydrostatic_mode)
, m_theta_advection_form(params.theta_adv_form)
, m_pgrad_correction(params.pgrad_correction)
, m_hvcoord(hvcoord)
, m_state(elements.m_state)
, m_derived(elements.m_derived)
Expand All @@ -150,6 +154,7 @@ struct CaarFunctorImpl {
, m_rsplit(params.rsplit)
, m_theta_hydrostatic_mode(params.theta_hydrostatic_mode)
, m_theta_advection_form(params.theta_adv_form)
, m_pgrad_correction(params.pgrad_correction)
, m_policy_pre (Homme::get_default_team_policy<ExecSpace,TagPreExchange>(m_num_elems))
, m_policy_post (0,num_elems*NP*NP)
, m_policy_dp3d_lim (Homme::get_default_team_policy<ExecSpace,TagDp3dLimiter>(m_num_elems))
Expand Down Expand Up @@ -245,6 +250,10 @@ struct CaarFunctorImpl {
mem += m_buffers.dp_tens.size();

// Midpoints vectors
m_buffers.grad_exner = decltype(m_buffers.grad_exner)(mem,nslots);
mem += m_buffers.grad_exner.size();
m_buffers.mgrad = decltype(m_buffers.mgrad)(mem,nslots);
mem += m_buffers.mgrad.size();
m_buffers.grad_tmp = decltype(m_buffers.grad_tmp)(mem,nslots);
mem += m_buffers.grad_tmp.size();

Expand Down Expand Up @@ -1183,13 +1192,16 @@ struct CaarFunctorImpl {
// - Compute gradKE = grad(v*v/2)
// - Compute gradExner = grad(exner)
// - Compute mgrad = average[dpnh_dp_i*gradphinh_i]
// + cp*T0*(grad(log(exner))-grad(exner)/exner) (pgrad_correction, if applicable)
// - Compute v_tens =
// scale1*(-v_vadv + v2*(fcor+vort)-gradKE -mgrad -cp*vtheta*gradExner - wvor)
// scale1*(-v_vadv - v1*(fcor+vort)-gradKE -mgrad -cp*vtheta*gradExner - wvor)
// scale1*(-v_vadv + v2*(fcor+vort)-gradKE -mgrad -cp*vtheta*gradExner - (mgrad + wvor))
// scale1*(-v_vadv - v1*(fcor+vort)-gradKE -mgrad -cp*vtheta*gradExner - (mgrad + wvor))

auto v_tens = Homme::subview(m_buffers.v_tens,kv.team_idx);
auto vort = Homme::subview(m_buffers.vort,kv.team_idx);
auto wvor = Homme::subview(m_buffers.vdp,kv.team_idx);
auto grad_exner = Homme::subview(m_buffers.grad_exner,kv.team_idx);
auto mgrad = Homme::subview(m_buffers.mgrad,kv.team_idx);
auto grad_tmp = Homme::subview(m_buffers.grad_tmp,kv.team_idx);

// Compute vorticity(v)
m_sphere_ops.vorticity_sphere(kv, Homme::subview(m_state.m_v,kv.ie,m_data.n0),
Expand Down Expand Up @@ -1219,7 +1231,7 @@ struct CaarFunctorImpl {
});
kv.team_barrier();

// Compute grad(average(w^2/2))
// Compute grad(average(w^2/2)). Store in wvor.
m_sphere_ops.gradient_sphere(kv, Homme::subview(m_buffers.temp,kv.team_idx),
wvor);

Expand All @@ -1229,6 +1241,24 @@ struct CaarFunctorImpl {
kv.team_barrier();
}

// Compute grad(exner)
// Note: exner = (pi/p0)^k, therefore grad(exner) = k*(exner/pi)*grad(pi).
// So you *could* avoid computing this grad, at the price of some arithmetic op.
m_sphere_ops.gradient_sphere(kv, Homme::subview(m_buffers.exner,kv.team_idx),
grad_exner);

// If pgrad_correction=1, we need the gradient sphere
// for log(exner) below. Store into m_buffers.grad_tmp.
if (m_pgrad_correction) {
const auto exner = Homme::subview(m_buffers.exner,kv.team_idx);
const auto log_exner = [&exner](const int igp, const int jgp, const int ilev)->Scalar {
return log(exner(igp, jgp, ilev));
};
m_sphere_ops.gradient_sphere(kv, log_exner,
grad_tmp);
}
kv.team_barrier();

// Scalar w_vor,mgrad,w_gradw,gradw2;
Kokkos::parallel_for(Kokkos::TeamThreadRange(kv.team,NP*NP),
[&](const int idx) {
Expand All @@ -1237,6 +1267,7 @@ struct CaarFunctorImpl {

auto wvor_x = Homme::subview(wvor,0,igp,jgp);
auto wvor_y = Homme::subview(wvor,1,igp,jgp);

if (!m_theta_hydrostatic_mode) {
// Compute wvor = grad(average(w^2/2)) - average(w*grad(w))
// Note: vtens is already storing grad(avg(w^2/2))
Expand All @@ -1255,14 +1286,25 @@ struct CaarFunctorImpl {
w_gradw_x, wvor_x, -1.0);
ColumnOps::compute_midpoint_values<CombineMode::ScaleAdd>(kv,
w_gradw_y, wvor_y, -1.0);
} else {
// wvor is not used if theta_hydrostatic_mode=1. Set to zero
// here to avoid adding in uninitialized values into v_tens.
Kokkos::parallel_for(Kokkos::ThreadVectorRange(kv.team,NUM_LEV),
[&](const int ilev) {
wvor_x(ilev) = 0;
wvor_y(ilev) = 0;
});
}

// Compute average(dpnh_dp_i*grad(phinh_i)), and add to wvor
auto mgrad_x = Homme::subview(mgrad,0,igp,jgp);
auto mgrad_y = Homme::subview(mgrad,1,igp,jgp);

// Compute mgrad = average(dpnh_dp_i*grad(phinh_i))
const auto phinh_i_x = Homme::subview(m_buffers.grad_phinh_i,kv.team_idx,0,igp,jgp);
const auto phinh_i_y = Homme::subview(m_buffers.grad_phinh_i,kv.team_idx,1,igp,jgp);
if (m_theta_hydrostatic_mode) {
ColumnOps::compute_midpoint_values(kv,phinh_i_x,wvor_x);
ColumnOps::compute_midpoint_values(kv,phinh_i_y,wvor_y);
ColumnOps::compute_midpoint_values(kv,phinh_i_x,mgrad_x);
ColumnOps::compute_midpoint_values(kv,phinh_i_y,mgrad_y);
} else {
const auto dpnh_dp_i = Homme::subview(m_buffers.dpnh_dp_i,kv.team_idx,igp,jgp);
const auto prod_x = [&phinh_i_x,&dpnh_dp_i](const int ilev)->Scalar {
Expand All @@ -1272,9 +1314,29 @@ struct CaarFunctorImpl {
return phinh_i_y(ilev)*dpnh_dp_i(ilev);
};

ColumnOps::compute_midpoint_values<CombineMode::Add>(kv,prod_x,wvor_x);
ColumnOps::compute_midpoint_values<CombineMode::Add>(kv,prod_y,wvor_y);
ColumnOps::compute_midpoint_values(kv,prod_x,mgrad_x);
ColumnOps::compute_midpoint_values(kv,prod_y,mgrad_y);
}
kv.team_barrier();

// Apply pgrad_correction: mgrad += cp*T0*(grad(log(exner))-grad(exner)/exner) (if applicable)
if (m_pgrad_correction) {
using namespace PhysicalConstants;
constexpr Real T0 = Tref - Tref_lapse_rate*Tref*cp/g;

const auto grad_tmp_i_x = Homme::subview(grad_tmp,0,igp,jgp);
const auto grad_tmp_i_y = Homme::subview(grad_tmp,1,igp,jgp);
const auto grad_exner_i_x = Homme::subview(grad_exner,0,igp,jgp);
const auto grad_exner_i_y = Homme::subview(grad_exner,1,igp,jgp);
const auto exner_i = Homme::subview(m_buffers.exner,kv.team_idx,igp,jgp);

Kokkos::parallel_for(Kokkos::ThreadVectorRange(kv.team,NUM_LEV),
[&](const int ilev) {
mgrad_x(ilev) += cp*T0*(grad_tmp_i_x(ilev) - grad_exner_i_x(ilev)/exner_i(ilev));
mgrad_y(ilev) += cp*T0*(grad_tmp_i_y(ilev) - grad_exner_i_y(ilev)/exner_i(ilev));
});
}
kv.team_barrier();

// Compute KE. Also, add fcor to vort
auto u = Homme::subview(m_state.m_v,kv.ie,m_data.n0,0,igp,jgp);
Expand Down Expand Up @@ -1303,13 +1365,6 @@ struct CaarFunctorImpl {
Homme::subview(m_buffers.v_tens,kv.team_idx));
}

// Note: exner = (pi/p0)^k, therefore grad(exner) = k*(exner/pi)*grad(pi).
// So you *could* avoid computing this grad, at the price of some arithmetic op.
auto grad_exner = Homme::subview(m_buffers.grad_tmp,kv.team_idx);
m_sphere_ops.gradient_sphere(kv, Homme::subview(m_buffers.exner,kv.team_idx),
grad_exner);
kv.team_barrier();

Kokkos::parallel_for(Kokkos::TeamThreadRange(kv.team,NP*NP),
[&](const int idx) {
const int igp = idx / NP;
Expand All @@ -1332,9 +1387,8 @@ struct CaarFunctorImpl {
u_tens(ilev) += cp_vtheta*grad_exner(0,igp,jgp,ilev);
v_tens(ilev) += cp_vtheta*grad_exner(1,igp,jgp,ilev);

// avg(dpnh_dp*gradphinh) + wvor (added up in wvor)
u_tens(ilev) += wvor(0,igp,jgp,ilev);
v_tens(ilev) += wvor(1,igp,jgp,ilev);
u_tens(ilev) += (mgrad(0,igp,jgp,ilev) + wvor(0,igp,jgp,ilev));
v_tens(ilev) += (mgrad(1,igp,jgp,ilev) + wvor(1,igp,jgp,ilev));

// Add +/- v_j*(vort+fcor), where v_j=v for u_tens and v_j=u for v_tens
u_tens(ilev) -= m_state.m_v(kv.ie,m_data.n0,1,igp,jgp,ilev)*vort(ilev);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ void init_simulation_params_c (const int& remap_alg, const int& limiter_option,
const int& ftype, const int& theta_adv_form, const bool& prescribed_wind, const bool& moisture, const bool& disable_diagnostics,
const bool& use_cpstar, const int& transport_alg, const bool& theta_hydrostatic_mode, const char** test_case,
const int& dt_remap_factor, const int& dt_tracer_factor,
const double& rearth, const int& nsplit)
const double& rearth, const int& nsplit, const bool& pgrad_correction)
{
// Check that the simulation options are supported. This helps us in the future, since we
// are currently 'assuming' some option have/not have certain values. As we support for more
Expand Down Expand Up @@ -113,6 +113,7 @@ void init_simulation_params_c (const int& remap_alg, const int& limiter_option,
params.dcmip16_mu = dcmip16_mu;
params.nsplit = nsplit;
params.rearth = rearth;
params.pgrad_correction = pgrad_correction;

if (time_step_type==5) {
//5 stage, 3rd order, explicit
Expand Down
6 changes: 4 additions & 2 deletions components/homme/src/theta-l_kokkos/prim_driver_mod.F90
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,8 @@ subroutine prim_create_c_data_structures (tl, hvcoord, mp)
ftype, prescribed_wind, moisture, disable_diagnostics, &
use_cpstar, transport_alg, theta_hydrostatic_mode, &
dcmip16_mu, theta_advect_form, test_case, &
MAX_STRING_LEN, dt_remap_factor, dt_tracer_factor
MAX_STRING_LEN, dt_remap_factor, dt_tracer_factor, &
pgrad_correction
!
! Input(s)
!
Expand Down Expand Up @@ -108,7 +109,8 @@ subroutine prim_create_c_data_structures (tl, hvcoord, mp)
transport_alg, &
LOGICAL(theta_hydrostatic_mode,c_bool), &
c_loc(test_name), &
dt_remap_factor, dt_tracer_factor, rearth, nsplit)
dt_remap_factor, dt_tracer_factor, rearth, nsplit, &
LOGICAL(pgrad_correction==1,c_bool))

! Initialize time level structure in C++
call init_time_level_c(tl%nm1, tl%n0, tl%np1, tl%nstep, tl%nstep0)
Expand Down
4 changes: 2 additions & 2 deletions components/homme/src/theta-l_kokkos/theta_f2c_mod.F90
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ subroutine init_simulation_params_c (remap_alg, limiter_option, rsplit, qsplit,
dcmip16_mu, ftype, theta_adv_form, prescribed_wind, moisture, &
disable_diagnostics, use_cpstar, transport_alg, &
theta_hydrostatic_mode, test_case_name, dt_remap_factor, &
dt_tracer_factor, rearth, nsplit) bind(c)
dt_tracer_factor, rearth, nsplit, pgrad_correction) bind(c)

use iso_c_binding, only: c_int, c_bool, c_double, c_ptr
!
Expand All @@ -28,7 +28,7 @@ subroutine init_simulation_params_c (remap_alg, limiter_option, rsplit, qsplit,
integer(kind=c_int), intent(in) :: hypervis_order, hypervis_subcycle, hypervis_subcycle_tom
integer(kind=c_int), intent(in) :: ftype, theta_adv_form
logical(kind=c_bool), intent(in) :: prescribed_wind, moisture, disable_diagnostics, use_cpstar
logical(kind=c_bool), intent(in) :: theta_hydrostatic_mode
logical(kind=c_bool), intent(in) :: theta_hydrostatic_mode, pgrad_correction
type(c_ptr), intent(in) :: test_case_name
end subroutine init_simulation_params_c

Expand Down
1 change: 1 addition & 0 deletions components/homme/test/reg_test/namelists/thetahs1.nl
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ theta_hydrostatic_mode = false
theta_advect_form = 1
tstep_type = 10
moisture = 'notdry'
pgrad_correction = 1
/
&solver_nl
precon_method = "identity"
Expand Down

0 comments on commit 0b6c1ba

Please sign in to comment.