Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Homme(xx)/SL: Enhanced trajectory method. #6874

Merged
merged 10 commits into from
Jan 14, 2025
Merged
4 changes: 3 additions & 1 deletion cime_config/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -532,6 +532,7 @@
"SMS.ne4pg2_oQU480.F2010.eam-thetanh_ftype2",
"SMS.ne4pg2_oQU480.F2010.eam-thetanh_ftype4",
"SMS.ne4pg2_oQU480.F2010.eam-thetahy_sl",
"ERS.ne4pg2_oQU480.F2010.eam-thetahy_sl_nsubstep2",
"ERS.ne4pg2_oQU480.F2010.eam-thetahy_ftype2",
"ERS.ne4pg2_oQU480.F2010.eam-thetanh_ftype2",
)
Expand Down Expand Up @@ -722,7 +723,8 @@
"ERS_Ln90.ne30pg2_ne30pg2.F2010-SCREAMv1.scream-small_kernels--scream-output-preset-5",
"ERP_Ln22.conusx4v1pg2_r05_oECv3.F2010-SCREAMv1-noAero.scream-bfbhash--scream-output-preset-6",
"ERS_Ln22.ne30pg2_ne30pg2.F2010-SCREAMv1.scream-L128--scream-output-preset-4",
"REP_Ld5.ne30pg2_ne30pg2.F2010-SCREAMv1.scream-L128--scream-output-preset-6"
"REP_Ld5.ne30pg2_ne30pg2.F2010-SCREAMv1.scream-L128--scream-output-preset-6",
"ERS_Ln90.ne30pg2_ne30pg2.F2010-SCREAMv1.scream-L128--scream-sl_nsubstep2",
)
},

Expand Down
30 changes: 30 additions & 0 deletions components/eam/bld/namelist_files/namelist_definition.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6809,6 +6809,36 @@ example, to apply hyperviscosity to moisture, the first tracer, set the value to
Default: (set by dycore)
</entry>

<entry id="semi_lagrange_halo" type="integer" category="se"
group="ctl_nl" valid_values="">
Number of element haloes to include in trajectory search. -1 triggers an
automatic calculation of max(1, dt_tracer_factor/3). This is based on the
advective CFL condition that governs the dynamics time step.
Default: -1 (set by dycore)
</entry>

<entry id="semi_lagrange_trajectory_nsubstep" type="integer" category="se"
group="ctl_nl" valid_values="">
Number of substeps to take in computing semi-Lagrangian transport
trajectories. 0 triggers the original algorithm; 1 or larger triggers the
enhanced-trajectory algorithm.
Default: 0 (set by dycore)
</entry>

<entry id="semi_lagrange_trajectory_nvelocity" type="integer" category="se"
group="ctl_nl" valid_values="">
Number of velocity snapshots to store for use when computing the enhanced
trajectories. -1 triggers an automatic calculation. 0, 1, 2 all become 2, the
minimum.
Default: -1 (set by dycore)
</entry>

<entry id="semi_lagrange_diagnostics" type="integer" category="se"
group="ctl_nl" valid_values="">
Optional diagnostic output from transport module.
Default: 0 (set by dycore)
</entry>

<!-- Physics grid -->

<entry id="se_fv_phys_remap_alg" type="integer" category="se"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
semi_lagrange_diagnostics = 1
semi_lagrange_trajectory_nsubstep = 2
semi_lagrange_trajectory_nvelocity = 3
1 change: 1 addition & 0 deletions components/eamxx/cime_config/namelist_defaults_scream.xml
Original file line number Diff line number Diff line change
Expand Up @@ -864,6 +864,7 @@ be lost if SCREAM_HACK_XML is not enabled.
<dt_tracer_factor constraints="ge 1" hgrid=".*pg2">6</dt_tracer_factor>
<hypervis_subcycle_q hgrid=".*pg2">6</hypervis_subcycle_q>
<transport_alg hgrid=".*pg2">12</transport_alg>
<semi_lagrange_trajectory_nsubstep>0</semi_lagrange_trajectory_nsubstep>
<!-- Other settings that we'll trigger based on pg2 for convenience -->
<se_ftype valid_values="0,2" hgrid=".*pg2">2</se_ftype>
<mesh_file type="file">none</mesh_file>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
ATMCHANGE=$CIMEROOT/../components/eamxx/scripts/atmchange

$ATMCHANGE semi_lagrange_trajectory_nsubstep=2 -b
5 changes: 4 additions & 1 deletion components/homme/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,10 @@ IF (HOMME_USE_KOKKOS)
IF (CUDA_BUILD OR HIP_BUILD OR SYCL_BUILD)
SET (DEFAULT_VECTOR_SIZE 1)
SET (HOMMEXX_ENABLE_GPU TRUE)
SET (HOMMEXX_ENABLE_GPU_F90 TRUE)
SET (HOMMEXX_ENABLE_GPU_F90 TRUE)
IF (SYCL_BUILD)
SET (DISABLE_TIMERS_IN_FIRST_STEP TRUE)
ENDIF()
ELSE ()
SET (DEFAULT_VECTOR_SIZE 8)
ENDIF()
Expand Down
8 changes: 6 additions & 2 deletions components/homme/cmake/machineFiles/perlmutter-gnu.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ SET(HDF5_DIR $ENV{CRAY_HDF5_PARALLEL_PREFIX} CACHE FILEPATH "")
SET (NetCDF_C_PATH $ENV{CRAY_NETCDF_HDF5PARALLEL_PREFIX} CACHE FILEPATH "")
SET (NetCDF_Fortran_PATH $ENV{CRAY_NETCDF_HDF5PARALLEL_PREFIX} CACHE FILEPATH "")

SET(BUILD_HOMME_WITHOUT_PIOLIBRARY TRUE CACHE BOOL "")
SET(BUILD_HOMME_WITHOUT_PIOLIBRARY FALSE CACHE BOOL "")

SET(HOMME_FIND_BLASLAPACK TRUE CACHE BOOL "")

Expand All @@ -31,6 +31,7 @@ SET(Kokkos_ENABLE_OPENMP OFF CACHE BOOL "")
SET(Kokkos_ENABLE_CUDA ON CACHE BOOL "")
SET(Kokkos_ENABLE_CUDA_LAMBDA ON CACHE BOOL "")
SET(Kokkos_ARCH_AMPERE80 ON CACHE BOOL "")
SET(Kokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC OFF CACHE BOOL "")
#SET(Kokkos_ARCH_ZEN2 ON CACHE BOOL "") # works, and perf same if both AMPERE80 and ZEN2 are on
#SET(Kokkos_ENABLE_CUDA_UVM ON CACHE BOOL "")
SET(Kokkos_ENABLE_EXPLICIT_INSTANTIATION OFF CACHE BOOL "")
Expand All @@ -42,7 +43,10 @@ SET(Kokkos_ENABLE_EXPLICIT_INSTANTIATION OFF CACHE BOOL "")
SET(CMAKE_C_COMPILER "cc" CACHE STRING "")
SET(CMAKE_Fortran_COMPILER "ftn" CACHE STRING "")
SET(CMAKE_CXX_COMPILER "CC" CACHE STRING "")
# Note: need to set MPICH_CXX env variable and perhaps NVCC_WRAPPER_DEFAULT_COMPILER
# Note: No longer need to set MPICH_CXX env variable and perhaps
# NVCC_WRAPPER_DEFAULT_COMPILER. Ignore the warning about nvcc_wrapper during
# configuration.
SET(CUDA_BUILD TRUE CACHE STRING "")

SET(CXXLIB_SUPPORTED_CACHE FALSE CACHE BOOL "")

Expand Down
21 changes: 14 additions & 7 deletions components/homme/src/preqx/prim_advection_mod.F90
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,13 @@ module prim_advection_mod

use dimensions_mod, only : nlev, qsize, nelemd
use kinds, only : real_kind
use parallel_mod, only : parallel_t
use parallel_mod, only : parallel_t, abortmp
use derivative_mod, only : derivative_t
use element_mod, only : element_t
use hybvcoord_mod, only : hvcoord_t
use time_mod, only : TimeLevel_t
use hybrid_mod, only : hybrid_t
use control_mod, only : transport_alg
use sl_advection, only : prim_advec_tracers_remap_ALE, sl_init1
use prim_advection_base, only: prim_advec_init1_rk2, prim_advec_tracers_remap_rk2,&
prim_advec_init2

Expand All @@ -35,12 +34,20 @@ subroutine Prim_Advec_Init1(par, elem)
type (element_t) :: elem(:)

call prim_advec_init1_rk2(par, elem)
call sl_init1(par,elem)

end subroutine Prim_Advec_Init1

subroutine Prim_Advec_Tracers_observe_velocity(elem, tl, n, nets, nete)
type (element_t) , intent(inout) :: elem(:)
type (TimeLevel_t) , intent(in ) :: tl
integer , intent(in ) :: n
integer , intent(in ) :: nets
integer , intent(in ) :: nete

! Do nothing. Only SL transport uses this routine, and it's not supported in
! preqx.
end subroutine Prim_Advec_Tracers_observe_velocity

subroutine Prim_Advec_Tracers_remap( elem , deriv , hvcoord , hybrid , dt , tl , nets , nete )
subroutine Prim_Advec_Tracers_remap( elem , deriv , hvcoord , hybrid , dt , tl , nets , nete )
implicit none
type (element_t) , intent(inout) :: elem(:)
type (derivative_t) , intent(in ) :: deriv
Expand All @@ -54,8 +61,8 @@ subroutine Prim_Advec_Tracers_remap( elem , deriv , hvcoord , hybrid , dt , tl

if (transport_alg == 0) then
call Prim_Advec_Tracers_remap_rk2( elem , deriv , hvcoord , hybrid , dt , tl , nets , nete )
else
call Prim_Advec_Tracers_remap_ALE( elem , deriv , hvcoord , hybrid , dt , tl , nets , nete )
else
call abortmp('Semi-Lagrangian transport is not supported in preqx.')
end if
end subroutine Prim_Advec_Tracers_remap

Expand Down
12 changes: 12 additions & 0 deletions components/homme/src/preqx_acc/prim_advection_mod.F90
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ module prim_advection_mod
logical, private :: first_time = .true.

public :: Prim_Advec_Tracers_remap
public :: Prim_Advec_Tracers_observe_velocity
public :: prim_advec_init1
public :: prim_advec_init2

Expand Down Expand Up @@ -302,6 +303,17 @@ subroutine prim_advec_init2(elem,hvcoord,hybrid)
!$omp barrier
end subroutine prim_advec_init2

subroutine Prim_Advec_Tracers_observe_velocity(elem, tl, n, nets, nete)
type (element_t) , intent(inout) :: elem(:)
type (TimeLevel_t) , intent(in ) :: tl
integer , intent(in ) :: n
integer , intent(in ) :: nets
integer , intent(in ) :: nete

! Do nothing. Only SL transport uses this routine, and it's not supported in
! preqx.
end subroutine Prim_Advec_Tracers_observe_velocity

subroutine advance_hypervis_scalar( elem , hvcoord , hybrid , deriv , nt , nt_qdp , nets , nete , dt2 )
! hyperviscsoity operator for foward-in-time scheme
! take one timestep of:
Expand Down
2 changes: 1 addition & 1 deletion components/homme/src/preqx_kokkos/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ MACRO(PREQX_KOKKOS_SETUP)
${TEST_SRC_DIR}/dcmip12_wrapper.F90
${TEST_SRC_DIR}/dcmip16_wrapper.F90
${TEST_SRC_DIR}/dcmip2012_test1_2_3.F90
${TEST_SRC_DIR}/dcmip2012_test1_conv.F90
${TEST_SRC_DIR}/dcmip2012_test1_conv_mod.F90
${TEST_SRC_DIR}/dcmip2012_test4.F90
${TEST_SRC_DIR}/dcmip2012_test5.F90
${TEST_SRC_DIR}/dcmip2016-baroclinic.F90
Expand Down
10 changes: 10 additions & 0 deletions components/homme/src/preqx_kokkos/prim_advection_mod.F90
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,16 @@ subroutine Prim_Advec_Init1(par, elem)

end subroutine Prim_Advec_Init1

subroutine Prim_Advec_Tracers_observe_velocity(elem, tl, n, nets, nete)
type (element_t) , intent(inout) :: elem(:)
type (TimeLevel_t) , intent(in ) :: tl
integer , intent(in ) :: n
integer , intent(in ) :: nets
integer , intent(in ) :: nete

! Do nothing. Only SL transport uses this routine, and it's not supported in
! preqx.
end subroutine Prim_Advec_Tracers_observe_velocity

subroutine Prim_Advec_Tracers_remap( elem , deriv , hvcoord , hybrid , dt , tl , nets , nete )
implicit none
Expand Down
19 changes: 14 additions & 5 deletions components/homme/src/prim_main.F90
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ end subroutine finalize_kokkos_f90
character (len=20) :: numtrac_char

logical :: dir_e ! boolean existence of directory where output netcdf goes
logical :: call_enablef

! =====================================================
! Begin executable code set distributed memory world...
Expand Down Expand Up @@ -228,7 +229,20 @@ end subroutine finalize_kokkos_f90

if(par%masterproc) print *,"Entering main timestepping loop"
call t_startf('prim_main_loop')
call_enablef = .false.
do while(tl%nstep < nEndStep)
#ifdef DISABLE_TIMERS_IN_FIRST_STEP
! Certain compilers, e.g., for Intel GPU, do just-in-time compilation. Turn
! off timers in the first step to avoid counting that cost.
if (tl%nstep == 0) then
call t_disablef()
call_enablef = .true.
elseif (call_enablef) then
call t_enablef()
call_enablef = .false.
end if
#endif

#if (defined HORIZ_OPENMP)
!$OMP PARALLEL NUM_THREADS(hthreads), DEFAULT(SHARED), PRIVATE(ithr,nets,nete,hybrid)
call omp_set_num_threads(vthreads)
Expand All @@ -240,11 +254,6 @@ end subroutine finalize_kokkos_f90

nstep = nextoutputstep(tl)
do while(tl%nstep<nstep)

if(tl%nstep < 2) then
call t_disablef()
endif
if(tl%nstep >= 2) call t_enablef()
call t_startf('prim_run')
call prim_run_subcycle(elem, hybrid,nets,nete, tstep, .false., tl, hvcoord,1)
call t_stopf('prim_run')
Expand Down
2 changes: 2 additions & 0 deletions components/homme/src/share/compose/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,14 @@ add_library (${COMPOSE_LIBRARY}
compose_slmm_islmpi_pack.cpp
compose_slmm_islmpi_q.cpp
compose_slmm_islmpi_qextrema.cpp
compose_slmm_islmpi_interpolate.cpp
compose_slmm_islmpi_step.cpp
compose_cedr_sl_run_global.cpp
compose_cedr_sl_run_local.cpp
compose_cedr_sl_run_check.cpp
compose_cedr_qlt.cpp
compose_cedr_caas.cpp
compose_slmm_islmpi_calc_trajectory.cpp
cedr_util.cpp
cedr_mpi.cpp
cedr_local.cpp
Expand Down
Loading
Loading