Skip to content

Commit

Permalink
Lots of work getting RRTMGP+Kokkos to perform
Browse files Browse the repository at this point in the history
Change list:
1) Changes default RRMTGP backend to Kokkos
2) Adds new testmods for selecting RRTMGP backend
3) All kernels in rrtmgp interface can now be timed
4) Detranspose dimensions in kernels
5) Use a faster approach for getting random cldx
6) Update rrtmgp submodule
  • Loading branch information
jgfouca committed Jan 8, 2025
1 parent d782204 commit a8779ef
Show file tree
Hide file tree
Showing 11 changed files with 233 additions and 189 deletions.
4 changes: 2 additions & 2 deletions components/eamxx/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -212,8 +212,8 @@ endif()
# #cmakedefine RRTMGP_EXPENSIVE_CHECKS
option (SCREAM_RRTMGP_DEBUG "Turn on extra debug checks in RRTMGP" ${SCREAM_DEBUG})

option(SCREAM_RRTMGP_ENABLE_YAKL "Use YAKL under rrtmgp" TRUE)
option(SCREAM_RRTMGP_ENABLE_KOKKOS "Use Kokkos under rrtmgp" FALSE)
option(SCREAM_RRTMGP_ENABLE_YAKL "Use YAKL under rrtmgp" FALSE)
option(SCREAM_RRTMGP_ENABLE_KOKKOS "Use Kokkos under rrtmgp" TRUE)
if (SCREAM_RRTMGP_ENABLE_YAKL)
add_definitions("-DRRTMGP_ENABLE_YAKL")
endif()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
./xmlchange --append SCREAM_CMAKE_OPTIONS='SCREAM_RRTMGP_ENABLE_YAKL Off'
./xmlchange --append SCREAM_CMAKE_OPTIONS='SCREAM_RRTMGP_ENABLE_KOKKOS On'

Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
./xmlchange --append SCREAM_CMAKE_OPTIONS='SCREAM_RRTMGP_ENABLE_YAKL On'
./xmlchange --append SCREAM_CMAKE_OPTIONS='SCREAM_RRTMGP_ENABLE_KOKKOS Off'
Original file line number Diff line number Diff line change
Expand Up @@ -907,6 +907,7 @@ void RRTMGPRadiation::run_impl (const double dt) {
ulrreal2dk d_dz = ulrreal2dk(m_buffer.d_dz.data(), m_col_chunk_size, m_nlay);
auto d_mu0 = m_buffer.cosine_zenith;
#ifdef RRTMGP_ENABLE_YAKL
TIMED_INLINE_KERNEL(init_views,
// Create YAKL arrays. RRTMGP expects YAKL arrays with styleFortran, i.e., data has ncol
// as the fastest index. For this reason we must copy the data.
auto subview_1d = [&](const real1d v) -> real1d {
Expand Down Expand Up @@ -976,9 +977,11 @@ void RRTMGPRadiation::run_impl (const double dt) {
auto cld_tau_lw_bnd = subview_3d(m_buffer.cld_tau_lw_bnd);
auto cld_tau_sw_gpt = subview_3d(m_buffer.cld_tau_sw_gpt);
auto cld_tau_lw_gpt = subview_3d(m_buffer.cld_tau_lw_gpt);
);
#endif
#ifdef RRTMGP_ENABLE_KOKKOS
ConvertToRrtmgpSubview conv = {beg, ncol};
TIMED_INLINE_KERNEL(init_views,

// Note, ncol will not necessary be m_col_chunk_size because the number of cols
// will not always be evenly divided by m_col_chunk_size. In most cases, the
Expand Down Expand Up @@ -1039,6 +1042,7 @@ void RRTMGPRadiation::run_impl (const double dt) {
auto cld_tau_lw_bnd_k = conv.subview3d(m_buffer.cld_tau_lw_bnd_k);
auto cld_tau_sw_gpt_k = conv.subview3d(m_buffer.cld_tau_sw_gpt_k);
auto cld_tau_lw_gpt_k = conv.subview3d(m_buffer.cld_tau_lw_gpt_k);
);
#endif

// Set gas concs to "view" only the first ncol columns
Expand Down Expand Up @@ -1072,6 +1076,7 @@ void RRTMGPRadiation::run_impl (const double dt) {
Kokkos::deep_copy(d_mu0,h_mu0);

const auto policy = ekat::ExeSpaceUtils<ExeSpace>::get_default_team_policy(ncol, m_nlay);
TIMED_KERNEL(
Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const MemberType& team) {
const int i = team.league_rank();
const int icol = i+beg;
Expand Down Expand Up @@ -1215,6 +1220,7 @@ void RRTMGPRadiation::run_impl (const double dt) {
}
#endif
});
);
}
Kokkos::fence();
#ifdef RRTMGP_ENABLE_KOKKOS
Expand Down Expand Up @@ -1362,25 +1368,30 @@ void RRTMGPRadiation::run_impl (const double dt) {
// Compute band-by-band surface_albedos. This is needed since
// the AD passes broadband albedos, but rrtmgp require band-by-band.
#ifdef RRTMGP_ENABLE_YAKL
TIMED_KERNEL(
rrtmgp::compute_band_by_band_surface_albedos(
ncol, nswbands,
sfc_alb_dir_vis, sfc_alb_dir_nir,
sfc_alb_dif_vis, sfc_alb_dif_nir,
sfc_alb_dir, sfc_alb_dif);
);
#endif
#ifdef RRTMGP_ENABLE_KOKKOS
TIMED_KERNEL(
interface_t::compute_band_by_band_surface_albedos(
ncol, nswbands,
sfc_alb_dir_vis_k, sfc_alb_dir_nir_k,
sfc_alb_dif_vis_k, sfc_alb_dif_nir_k,
sfc_alb_dir_k, sfc_alb_dif_k);
);
COMPARE_ALL_WRAP(std::vector<real2d>({sfc_alb_dir, sfc_alb_dif}),
std::vector<real2dk>({sfc_alb_dir_k, sfc_alb_dif_k}));
#endif
// Compute cloud optical properties here?

// Run RRTMGP driver
#ifdef RRTMGP_ENABLE_YAKL
TIMED_KERNEL(
rrtmgp::rrtmgp_main(
ncol, m_nlay,
p_lay, t_lay, p_lev, t_lev,
Expand All @@ -1401,8 +1412,10 @@ void RRTMGPRadiation::run_impl (const double dt) {
eccf, m_atm_logger,
m_extra_clnclrsky_diag, m_extra_clnsky_diag
);
);
#endif
#ifdef RRTMGP_ENABLE_KOKKOS
TIMED_KERNEL(
interface_t::rrtmgp_main(
ncol, m_nlay,
p_lay_k, t_lay_k, p_lev_k, t_lev_k,
Expand All @@ -1423,6 +1436,7 @@ void RRTMGPRadiation::run_impl (const double dt) {
eccf, m_atm_logger,
m_extra_clnclrsky_diag, m_extra_clnsky_diag
);
);
COMPARE_ALL_WRAP(std::vector<real2d>({
sw_flux_up, sw_flux_dn, sw_flux_dn_dir, lw_flux_up, lw_flux_dn,
sw_clnclrsky_flux_up, sw_clnclrsky_flux_dn, sw_clnclrsky_flux_dn_dir,
Expand All @@ -1445,6 +1459,7 @@ void RRTMGPRadiation::run_impl (const double dt) {

// Update heating tendency
#ifdef RRTMGP_ENABLE_YAKL
TIMED_INLINE_KERNEL(heating_tendency,
auto sw_heating = m_buffer.sw_heating;
auto lw_heating = m_buffer.lw_heating;
rrtmgp::compute_heating_rate(
Expand All @@ -1466,8 +1481,10 @@ void RRTMGPRadiation::run_impl (const double dt) {
});
}
Kokkos::fence();
);
#endif
#ifdef RRTMGP_ENABLE_KOKKOS
TIMED_INLINE_KERNEL(heating_tendency,
auto sw_heating_k = m_buffer.sw_heating_k;
auto lw_heating_k = m_buffer.lw_heating_k;
rrtmgp::compute_heating_rate(
Expand All @@ -1489,6 +1506,7 @@ void RRTMGPRadiation::run_impl (const double dt) {
});
}
Kokkos::fence();
);
COMPARE_ALL_WRAP(std::vector<real2d>({sw_heating, lw_heating}),
std::vector<real2dk>({sw_heating_k, lw_heating_k}));
#endif
Expand All @@ -1497,6 +1515,7 @@ void RRTMGPRadiation::run_impl (const double dt) {
#ifdef RRTMGP_ENABLE_YAKL
const int kbot = nlay+1;

TIMED_KERNEL(
// Compute diffuse flux as difference between total and direct
Kokkos::parallel_for(Kokkos::RangePolicy<ExeSpace>(0,nswbands*(nlay+1)*ncol),
KOKKOS_LAMBDA (const int idx) {
Expand All @@ -1513,10 +1532,12 @@ void RRTMGPRadiation::run_impl (const double dt) {
sfc_flux_dir_vis, sfc_flux_dir_nir,
sfc_flux_dif_vis, sfc_flux_dif_nir
);
);
#endif
#ifdef RRTMGP_ENABLE_KOKKOS
const int kbot_k = nlay;

TIMED_KERNEL(
// Compute diffuse flux as difference between total and direct
Kokkos::parallel_for(Kokkos::RangePolicy<ExeSpace>(0,nswbands*(nlay+1)*ncol),
KOKKOS_LAMBDA (const int idx) {
Expand All @@ -1533,12 +1554,14 @@ void RRTMGPRadiation::run_impl (const double dt) {
sfc_flux_dir_vis_k, sfc_flux_dir_nir_k,
sfc_flux_dif_vis_k, sfc_flux_dif_nir_k
);
);
COMPARE_ALL_WRAP(std::vector<real1d>({sfc_flux_dir_vis, sfc_flux_dir_nir, sfc_flux_dif_vis, sfc_flux_dif_nir}),
std::vector<real1dk>({sfc_flux_dir_vis_k, sfc_flux_dir_nir_k, sfc_flux_dif_vis_k, sfc_flux_dif_nir_k}));
#endif

// Compute diagnostic total cloud area (vertically-projected cloud cover)
#ifdef RRTMGP_ENABLE_YAKL
TIMED_KERNEL(
real1d cldlow ("cldlow", d_cldlow.data() + m_col_chunk_beg[ic], ncol);
real1d cldmed ("cldmed", d_cldmed.data() + m_col_chunk_beg[ic], ncol);
real1d cldhgh ("cldhgh", d_cldhgh.data() + m_col_chunk_beg[ic], ncol);
Expand All @@ -1553,8 +1576,10 @@ void RRTMGPRadiation::run_impl (const double dt) {
rrtmgp::compute_cloud_area(ncol, nlay, nlwgpts, 400e2, 700e2, p_lay, cld_tau_lw_gpt, cldmed);
rrtmgp::compute_cloud_area(ncol, nlay, nlwgpts, 0, 400e2, p_lay, cld_tau_lw_gpt, cldhgh);
rrtmgp::compute_cloud_area(ncol, nlay, nlwgpts, 0, std::numeric_limits<Real>::max(), p_lay, cld_tau_lw_gpt, cldtot);
);
#endif
#ifdef RRTMGP_ENABLE_KOKKOS
TIMED_KERNEL(
real1dk cldlow_k (d_cldlow.data() + m_col_chunk_beg[ic], ncol);
real1dk cldmed_k (d_cldmed.data() + m_col_chunk_beg[ic], ncol);
real1dk cldhgh_k (d_cldhgh.data() + m_col_chunk_beg[ic], ncol);
Expand All @@ -1569,12 +1594,15 @@ void RRTMGPRadiation::run_impl (const double dt) {
interface_t::compute_cloud_area(ncol, nlay, nlwgpts, 400e2, 700e2, p_lay_k, cld_tau_lw_gpt_k, cldmed_k);
interface_t::compute_cloud_area(ncol, nlay, nlwgpts, 0, 400e2, p_lay_k, cld_tau_lw_gpt_k, cldhgh_k);
interface_t::compute_cloud_area(ncol, nlay, nlwgpts, 0, std::numeric_limits<Real>::max(), p_lay_k, cld_tau_lw_gpt_k, cldtot_k);
);
COMPARE_ALL_WRAP(std::vector<real1d>({cldlow, cldmed, cldhgh, cldtot}),
std::vector<real1dk>({cldlow_k, cldmed_k, cldhgh_k, cldtot_k}));
#endif

// Compute cloud-top diagnostics following AeroCOM recommendation
#ifdef RRTMGP_ENABLE_YAKL
TIMED_INLINE_KERNEL(cloud_top,

// Get visible 0.67 micron band for COSP
auto idx_067 = rrtmgp::get_wavelength_index_sw(0.67e-6);
// Get IR 10.5 micron band for COSP
Expand All @@ -1595,8 +1623,10 @@ void RRTMGPRadiation::run_impl (const double dt) {
nc, T_mid_at_cldtop, p_mid_at_cldtop, cldfrac_ice_at_cldtop,
cldfrac_liq_at_cldtop, cldfrac_tot_at_cldtop, cdnc_at_cldtop,
eff_radius_qc_at_cldtop, eff_radius_qi_at_cldtop);
);
#endif
#ifdef RRTMGP_ENABLE_KOKKOS
TIMED_INLINE_KERNEL(cloud_top,
// Get visible 0.67 micron band for COSP
auto idx_067_k = interface_t::get_wavelength_index_sw_k(0.67e-6);
// Get IR 10.5 micron band for COSP
Expand All @@ -1616,6 +1646,7 @@ void RRTMGPRadiation::run_impl (const double dt) {
nc_k, T_mid_at_cldtop_k, p_mid_at_cldtop_k, cldfrac_ice_at_cldtop_k,
cldfrac_liq_at_cldtop_k, cldfrac_tot_at_cldtop_k, cdnc_at_cldtop_k,
eff_radius_qc_at_cldtop_k, eff_radius_qi_at_cldtop_k);
);
COMPARE_ALL_WRAP(std::vector<real1d>({
T_mid_at_cldtop, p_mid_at_cldtop, cldfrac_ice_at_cldtop,
cldfrac_liq_at_cldtop, cldfrac_tot_at_cldtop, cdnc_at_cldtop,
Expand All @@ -1629,6 +1660,7 @@ void RRTMGPRadiation::run_impl (const double dt) {
// Copy output data back to FieldManager
const auto policy = ekat::ExeSpaceUtils<ExeSpace>::get_default_team_policy(ncol, m_nlay);
#ifdef RRTMGP_ENABLE_YAKL
TIMED_KERNEL(
Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const MemberType& team) {
const int i = team.league_rank();
const int icol = i + beg;
Expand Down Expand Up @@ -1671,8 +1703,10 @@ void RRTMGPRadiation::run_impl (const double dt) {
d_sunlit(icol) = 0.0;
}
});
);
#endif
#ifdef RRTMGP_ENABLE_KOKKOS
TIMED_KERNEL(
Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const MemberType& team) {
const int i = team.league_rank();
const int icol = i + beg;
Expand Down Expand Up @@ -1714,6 +1748,7 @@ void RRTMGPRadiation::run_impl (const double dt) {
d_sunlit(icol) = 0.0;
}
});
);
#ifdef RRTMGP_ENABLE_YAKL
// Sync back to gas_concs_k
real3dk temp(gas_concs_k, std::make_pair(0, ncol), Kokkos::ALL, Kokkos::ALL);
Expand Down
2 changes: 1 addition & 1 deletion components/eamxx/src/physics/rrtmgp/rrtmgp_test_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ static void dummy_clouds(
// put them in 2/3 of the columns since that's roughly the total cloudiness of earth.
// Set sane values for liquid and ice water path.
// NOTE: these "sane" values are in g/m2!
Kokkos::parallel_for( MDRP::template get<2>({nlay,ncol}) , KOKKOS_LAMBDA (int ilay, int icol) {
Kokkos::parallel_for( MDRP::template get<2>({ncol, nlay}) , KOKKOS_LAMBDA (int icol, int ilay) {
cloud_mask(icol,ilay) = p_lay(icol,ilay) > 100. * 100. && p_lay(icol,ilay) < 900. * 100. && ((icol+1)%3) != 0;
// Ice and liquid will overlap in a few layers
lwp(icol,ilay) = conv::merge(10., 0., cloud_mask(icol,ilay) && t_lay(icol,ilay) > 263.);
Expand Down
2 changes: 1 addition & 1 deletion components/eamxx/src/physics/rrtmgp/rrtmgp_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ void compute_heating_rate (
using MDRP = typename conv::MDRP<typename View1::array_layout>;
auto ncol = flux_up.extent(0);
auto nlay = flux_up.extent(1)-1;
Kokkos::parallel_for(MDRP::template get<2>({nlay,ncol}), KOKKOS_LAMBDA(int ilay, int icol) {
Kokkos::parallel_for(MDRP::template get<2>({ncol, nlay}), KOKKOS_LAMBDA(int icol, int ilay) {
heating_rate(icol,ilay) = (
flux_up(icol,ilay+1) - flux_up(icol,ilay) -
flux_dn(icol,ilay+1) + flux_dn(icol,ilay)
Expand Down
Loading

0 comments on commit a8779ef

Please sign in to comment.