diff --git a/Docs/sphinx_doc/Inputs.rst b/Docs/sphinx_doc/Inputs.rst index 9768977c1..cc190297e 100644 --- a/Docs/sphinx_doc/Inputs.rst +++ b/Docs/sphinx_doc/Inputs.rst @@ -1165,41 +1165,41 @@ In addition, there is a run-time option to project the initial velocity field to List of Parameters ------------------ -+----------------------------------+-------------------+--------------------+------------------+ -| Parameter | Definition | Acceptable | Default | -| | | Values | | -+==================================+===================+====================+==================+ -| **erf.init_type** | Initialization | “custom”, | “*custom*” | -| | type | “ideal”, | | -| | | "real", | | -| | |"input_sounding" | | -+----------------------------------+-------------------+--------------------+------------------+ -| **erf.input_sounding_file** | Path to WRF-style | String | "input_sounding" | -| | input sounding | | | -| | file | | | -+----------------------------------+-------------------+--------------------+------------------+ -| **erf.init_sounding_ideal** | Perform | true or false | false | -| | initialization | | | -| | like WRF's | | | -| | ideal.exe | | | -+----------------------------------+-------------------+--------------------+------------------+ -| **erf.use_real_bcs** | If init_type is | true or false | true if | -| | real or metgrid, | | if init_type | -| | do we want to use | | is real or | -| | these bcs? | | metgrid; | -| | | | else false | -+----------------------------------+-------------------+--------------------+------------------+ -| **erf.nc_init_file** | NetCDF file with | String | NONE | -| | initial mesoscale | | | -| | data | | | -+----------------------------------+-------------------+--------------------+------------------+ -| **erf.nc_bdy_file** | NetCDF file with | String | NONE | -| | mesoscale data at | | | -| | lateral boundaries| | | -+----------------------------------+-------------------+--------------------+------------------+ -| **erf.project_initial_velocity** | project initial | Integer | 1 | -| | velocity? | | | -+----------------------------------+-------------------+--------------------+------------------+ ++----------------------------------+-------------------+--------------------+-----------------------+ +| Parameter | Definition | Acceptable | Default | +| | | Values | | ++==================================+===================+====================+=======================+ +| **erf.init_type** | Initialization | “custom”, | “*custom*” | +| | type | “ideal”, | | +| | | "real", | | +| | |"input_sounding" | | ++----------------------------------+-------------------+--------------------+-----------------------+ +| **erf.input_sounding_file** | Path to WRF-style | String | "input_sounding" | +| | input sounding | | | +| | file | | | ++----------------------------------+-------------------+--------------------+-----------------------+ +| **erf.init_sounding_ideal** | Perform | true or false | false | +| | initialization | | | +| | like WRF's | | | +| | ideal.exe | | | ++----------------------------------+-------------------+--------------------+-----------------------+ +| **erf.use_real_bcs** | If init_type is | true or false | true if | +| | real or metgrid, | | if init_type | +| | do we want to use | | is real or | +| | these bcs? | | metgrid; | +| | | | else false | ++----------------------------------+-------------------+--------------------+-----------------------+ +| **erf.nc_init_file** | NetCDF file with | String | NONE | +| | initial mesoscale | | | +| | data | | | ++----------------------------------+-------------------+--------------------+-----------------------+ +| **erf.nc_bdy_file** | NetCDF file with | String | NONE | +| | mesoscale data at | | | +| | lateral boundaries| | | ++----------------------------------+-------------------+--------------------+-----------------------+ +| **erf.project_initial_velocity** | project initial | true or false | true if anelastic; | +| | velocity? | | false if compressible | ++----------------------------------+-------------------+--------------------+-----------------------+ Notes ----------------- @@ -1227,6 +1227,10 @@ integrating the hydrostatic equation from the surface. If **erf.init_type = custom** or **erf.init_type = input_sounding**, ``erf.nc_init_file`` and ``erf.nc_bdy_file`` do not need to be set. +Note that the **erf.project_initial_velocity** option is available for all **init_type** options. If using the anelastic +formulation this will be true regardless of the input; if using the compressible formulation the default is false but +that can be over-written. + Map Scale Factors ================= diff --git a/Source/DataStructs/ERF_DataStruct.H b/Source/DataStructs/ERF_DataStruct.H index c1cbec3d8..354d2d749 100644 --- a/Source/DataStructs/ERF_DataStruct.H +++ b/Source/DataStructs/ERF_DataStruct.H @@ -193,11 +193,6 @@ struct SolverChoice { } // ******************************************************************************* - // Should we project the initial velocity field to make it divergence-free? - pp.query("project_initial_velocity", project_initial_velocity); - - // ******************************************************************************* - bool any_anelastic = false; for (int i = 0; i <= max_level; ++i) { if (anelastic[i] == 1) any_anelastic = true; @@ -209,10 +204,14 @@ struct SolverChoice { project_initial_velocity = true; buoyancy_type = 2; // uses Tprime } else { + pp.query("project_initial_velocity", project_initial_velocity); + constant_density = false; // We default to false but allow the user to set it pp.query("constant_density", constant_density); } + // ******************************************************************************* + pp.query("project_every_stage", project_every_stage); pp.query("ncorr", ncorr); pp.query("poisson_abstol", poisson_abstol); diff --git a/Source/ERF.H b/Source/ERF.H index a914b926f..781fae659 100644 --- a/Source/ERF.H +++ b/Source/ERF.H @@ -164,10 +164,12 @@ public: #endif // Project the velocities to be divergence-free -- this is only relevant if anelastic == 1 - void project_velocities (int lev, amrex::Real dt, amrex::Vector& vars, amrex::MultiFab& p); + void project_velocities (int lev, amrex::Real dt, amrex::Vector& vars, + amrex::MultiFab& Omega, amrex::MultiFab& p); // Project the velocities to be divergence-free with a thin body - void project_velocities_tb (int lev, amrex::Real dt, amrex::Vector& vars, amrex::MultiFab& p); + void project_velocities_tb (int lev, amrex::Real dt, amrex::Vector& vars, + amrex::MultiFab& Omega, amrex::MultiFab& p); // Define the projection bc's based on the domain bc types amrex::Array @@ -731,6 +733,9 @@ private: amrex::Vector rW_old; amrex::Vector rW_new; + // Normal momentum on z-face + amrex::Vector Omega; + // amrex::Vector xmom_crse_rhs; // amrex::Vector ymom_crse_rhs; amrex::Vector zmom_crse_rhs; diff --git a/Source/ERF.cpp b/Source/ERF.cpp index 08f123b8a..3fa57b44d 100644 --- a/Source/ERF.cpp +++ b/Source/ERF.cpp @@ -197,6 +197,8 @@ ERF::ERF_shared () rV_old.resize(nlevs_max); rW_old.resize(nlevs_max); + Omega.resize(nlevs_max); + // xmom_crse_rhs.resize(nlevs_max); // ymom_crse_rhs.resize(nlevs_max); zmom_crse_rhs.resize(nlevs_max); @@ -856,6 +858,11 @@ ERF::InitData_post () } } + // + // If we are starting from scratch, we have the option to project the initial velocity field + // regardless of how we initialized. + // pp_inc is used as scratch space here; we zero it out after the projection + // if (restart_chkfile == "") { // Note -- this projection is only defined for no terrain @@ -864,7 +871,7 @@ ERF::InitData_post () Real dummy_dt = 1.0; for (int lev = 0; lev <= finest_level; ++lev) { - project_velocities(lev, dummy_dt, vars_new[lev], pp_inc[lev]); + project_velocities(lev, dummy_dt, vars_new[lev], Omega[lev], pp_inc[lev]); pp_inc[lev].setVal(0.); } } diff --git a/Source/ERF_make_new_arrays.cpp b/Source/ERF_make_new_arrays.cpp index d1a856c19..b9d018ea0 100644 --- a/Source/ERF_make_new_arrays.cpp +++ b/Source/ERF_make_new_arrays.cpp @@ -137,6 +137,11 @@ ERF::init_stuff (int lev, const BoxArray& ba, const DistributionMapping& dm, pp_inc[lev].setVal(0.0); } + // We keep Omega as a persistent variable now in order to use the projected Omega + // directly rather than calculating it again from (u,v,w) + Omega[lev].define(convert(ba, IntVect(0,0,1)), dm, 1, 1); + Omega[lev].setVal(5.6e23); + // ******************************************************************************************** // These are just used for scratch in the time integrator but we might as well define them here // ******************************************************************************************** diff --git a/Source/ERF_make_new_level.cpp b/Source/ERF_make_new_level.cpp index dbe285ec8..07840783a 100644 --- a/Source/ERF_make_new_level.cpp +++ b/Source/ERF_make_new_level.cpp @@ -448,6 +448,8 @@ ERF::ClearLevel (int lev) rW_new[lev].clear(); rW_old[lev].clear(); + Omega[lev].clear(); + if (lev > 0) { zmom_crse_rhs[lev].clear(); } diff --git a/Source/TimeIntegration/ERF_TI_no_substep_fun.H b/Source/TimeIntegration/ERF_TI_no_substep_fun.H index 27a07bab3..c9ba95680 100644 --- a/Source/TimeIntegration/ERF_TI_no_substep_fun.H +++ b/Source/TimeIntegration/ERF_TI_no_substep_fun.H @@ -138,9 +138,9 @@ bool have_tb = (thin_xforce[0] || thin_yforce[0] || thin_zforce[0]); if (solverChoice.project_every_stage || (nrk==1)) { if (have_tb) { - project_velocities_tb(level, slow_dt, S_sum, pp_inc[level]); + project_velocities_tb(level, slow_dt, S_sum, Omega[level], pp_inc[level]); } else { - project_velocities(level, slow_dt, S_sum, pp_inc[level]); + project_velocities(level, slow_dt, S_sum, Omega[level], pp_inc[level]); } } } diff --git a/Source/TimeIntegration/ERF_TI_slow_rhs_fun.H b/Source/TimeIntegration/ERF_TI_slow_rhs_fun.H index 1e29bea7f..5dacd6b97 100644 --- a/Source/TimeIntegration/ERF_TI_slow_rhs_fun.H +++ b/Source/TimeIntegration/ERF_TI_slow_rhs_fun.H @@ -133,7 +133,7 @@ erf_slow_rhs_pre(level, finest_level, nrk, slow_dt, S_rhs, S_old, S_data, S_prim, S_scratch, xvel_new, yvel_new, zvel_new, - z_t_rk[level], Omega, cc_src, xmom_src, ymom_src, zmom_src, + z_t_rk[level], Omega[level], cc_src, xmom_src, ymom_src, zmom_src, (level > 0) ? &zmom_crse_rhs[level] : nullptr, Tau11_lev[level].get(), Tau22_lev[level].get(), Tau33_lev[level].get(), Tau12_lev[level].get(), Tau13_lev[level].get(), Tau21_lev[level].get(), Tau23_lev[level].get(), Tau31_lev[level].get(), @@ -242,7 +242,7 @@ erf_slow_rhs_pre(level, finest_level, nrk, slow_dt, S_rhs, S_old, S_data, S_prim, S_scratch, xvel_new, yvel_new, zvel_new, - z_t_rk[level], Omega, cc_src, xmom_src, ymom_src, zmom_src, + z_t_rk[level], Omega[level], cc_src, xmom_src, ymom_src, zmom_src, (level > 0) ? &zmom_crse_rhs[level] : nullptr, Tau11_lev[level].get(), Tau22_lev[level].get(), Tau33_lev[level].get(), Tau12_lev[level].get(), Tau13_lev[level].get(), Tau21_lev[level].get(), Tau23_lev[level].get(), Tau31_lev[level].get(), @@ -448,7 +448,7 @@ erf_slow_rhs_pre(level, finest_level, nrk, slow_dt, S_rhs, S_old, S_data, S_prim, S_scratch, xvel_new, yvel_new, zvel_new, - z_t_rk[level], Omega, cc_src, xmom_src, ymom_src, zmom_src, + z_t_rk[level], Omega[level], cc_src, xmom_src, ymom_src, zmom_src, (level > 0) ? &zmom_crse_rhs[level] : nullptr, Tau11_lev[level].get(), Tau22_lev[level].get(), Tau33_lev[level].get(), Tau12_lev[level].get(), Tau13_lev[level].get(), Tau21_lev[level].get(), Tau23_lev[level].get(), Tau31_lev[level].get(), diff --git a/Source/TimeIntegration/ERF_TI_substep_fun.H b/Source/TimeIntegration/ERF_TI_substep_fun.H index bddb14930..a93c6dfd5 100644 --- a/Source/TimeIntegration/ERF_TI_substep_fun.H +++ b/Source/TimeIntegration/ERF_TI_substep_fun.H @@ -102,7 +102,7 @@ auto fast_rhs_fun = [&](int fast_step, int /*n_sub*/, int nrk, S_slow_rhs, S_old, S_stage, S_prim, pi_stage, fast_coeffs, S_data, S_scratch, fine_geom, solverChoice.gravity, solverChoice.use_lagged_delta_rt, - Omega, z_t_rk[level], z_t_pert.get(), + Omega[level], z_t_rk[level], z_t_pert.get(), z_phys_nd[level], z_phys_nd_new[level], z_phys_nd_src[level], detJ_cc[level], detJ_cc_new[level], detJ_cc_src[level], dtau, beta_s, inv_fac, @@ -114,7 +114,7 @@ auto fast_rhs_fun = [&](int fast_step, int /*n_sub*/, int nrk, S_slow_rhs, S_data, S_stage, S_prim, pi_stage, fast_coeffs, S_data, S_scratch, fine_geom, solverChoice.gravity, solverChoice.use_lagged_delta_rt, - Omega, z_t_rk[level], z_t_pert.get(), + Omega[level], z_t_rk[level], z_t_pert.get(), z_phys_nd[level], z_phys_nd_new[level], z_phys_nd_src[level], detJ_cc[level], detJ_cc_new[level], detJ_cc_src[level], dtau, beta_s, inv_fac, @@ -133,7 +133,7 @@ auto fast_rhs_fun = [&](int fast_step, int /*n_sub*/, int nrk, // and S_data is the new-time solution to be defined here erf_fast_rhs_T(fast_step, nrk, level, finest_level, S_slow_rhs, S_old, S_stage, S_prim, pi_stage, fast_coeffs, - S_data, S_scratch, fine_geom, solverChoice.gravity, Omega, + S_data, S_scratch, fine_geom, solverChoice.gravity, Omega[level], z_phys_nd[level], detJ_cc[level], dtau, beta_s, inv_fac, mapfac_m[level], mapfac_u[level], mapfac_v[level], fr_as_crse, fr_as_fine, l_use_moisture, l_reflux, l_implicit_substepping); @@ -142,7 +142,7 @@ auto fast_rhs_fun = [&](int fast_step, int /*n_sub*/, int nrk, // and as the new-time solution to be defined here erf_fast_rhs_T(fast_step, nrk, level, finest_level, S_slow_rhs, S_data, S_stage, S_prim, pi_stage, fast_coeffs, - S_data, S_scratch, fine_geom, solverChoice.gravity, Omega, + S_data, S_scratch, fine_geom, solverChoice.gravity, Omega[level], z_phys_nd[level], detJ_cc[level], dtau, beta_s, inv_fac, mapfac_m[level], mapfac_u[level], mapfac_v[level], fr_as_crse, fr_as_fine, l_use_moisture, l_reflux, l_implicit_substepping); diff --git a/Source/TimeIntegration/ERF_advance_dycore.cpp b/Source/TimeIntegration/ERF_advance_dycore.cpp index e211ff464..a04c46a62 100644 --- a/Source/TimeIntegration/ERF_advance_dycore.cpp +++ b/Source/TimeIntegration/ERF_advance_dycore.cpp @@ -183,8 +183,6 @@ void ERF::advance_dycore(int level, } // l_use_diff } // profile - MultiFab Omega (state_old[IntVars::zmom].boxArray(),dm,1,1); - #include "ERF_TI_utils.H" // Additional SFS quantities, calculated once per timestep diff --git a/Source/TimeIntegration/ERF_slow_rhs_pre.cpp b/Source/TimeIntegration/ERF_slow_rhs_pre.cpp index 78e0b6519..973fe18f9 100644 --- a/Source/TimeIntegration/ERF_slow_rhs_pre.cpp +++ b/Source/TimeIntegration/ERF_slow_rhs_pre.cpp @@ -297,10 +297,11 @@ void erf_slow_rhs_pre (int level, int finest_level, const Array4< Real>& omega_arr = Omega.array(mfi); Array4 z_t; - if (z_t_mf) + if (z_t_mf) { z_t = z_t_mf->array(mfi); - else + } else { z_t = Array4{}; + } const Array4& rho_u_rhs = S_rhs[IntVars::xmom].array(mfi); const Array4& rho_v_rhs = S_rhs[IntVars::ymom].array(mfi); @@ -366,8 +367,16 @@ void erf_slow_rhs_pre (int level, int finest_level, { BL_PROFILE("slow_rhs_making_omega"); Box gbxo = surroundingNodes(bx,2); gbxo.grow(IntVect(1,1,1)); + // // Now create Omega with momentum (not velocity) with z_t subtracted if moving terrain - if (l_use_terrain) { + // ONLY if not doing anelastic + terrain -- in that case Omega will be defined coming + // out of the projection + // + if (!l_use_terrain) { + ParallelFor(gbxo, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { + omega_arr(i,j,k) = rho_w(i,j,k); + }); + } else if (!l_anelastic) { Box gbxo_lo = gbxo; gbxo_lo.setBig(2,domain.smallEnd(2)); int lo_z_face = domain.smallEnd(2); @@ -384,7 +393,7 @@ void erf_slow_rhs_pre (int level, int finest_level, }); } - if (z_t) { + if (z_t) { // Note we never do anelastic with moving terrain Box gbxo_mid = gbxo; gbxo_mid.setSmall(2,1); gbxo_mid.setBig(2,gbxo.bigEnd(2)-1); ParallelFor(gbxo_mid, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { // We define rho on the z-face the same way as in MomentumToVelocity/VelocityToMomentum @@ -404,10 +413,6 @@ void erf_slow_rhs_pre (int level, int finest_level, omega_arr(i,j,k) = OmegaFromW(i,j,k,rho_w(i,j,k),rho_u,rho_v,z_nd,dxInv); }); } - } else { - ParallelFor(gbxo, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { - omega_arr(i,j,k) = rho_w(i,j,k); - }); } } // end profile diff --git a/Source/TimeIntegration/MASK/ERF_TI_fast_headers.H b/Source/TimeIntegration/MASK/ERF_TI_fast_headers.H new file mode 100644 index 000000000..e53aa50a6 --- /dev/null +++ b/Source/TimeIntegration/MASK/ERF_TI_fast_headers.H @@ -0,0 +1,131 @@ +#ifndef ERF_FAST_INTEGRATION_H_ +#define ERF_FAST_INTEGRATION_H_ + +#include +#include +#include +#include "ERF_DataStruct.H" +#include "ERF_IndexDefines.H" +#include +#include + +#include +#include + +#ifdef ERF_USE_EB +#include +#endif + +/** + * Function for computing the fast RHS with no terrain + * + */ +void erf_fast_rhs_N (int step, int nrk, int level, int finest_level, + amrex::Vector& S_slow_rhs, + const amrex::Vector& S_prev, + amrex::Vector& S_stage_data, + const amrex::MultiFab& S_stage_prim, + const amrex::MultiFab& pi_stage, + const amrex::MultiFab& fast_coeffs, + amrex::Vector& S_data, + amrex::Vector& S_scratch, + const amrex::Geometry geom, + const amrex::Real gravity, + const amrex::Real dtau, const amrex::Real beta_s, + const amrex::Real facinv, + std::unique_ptr& mapfac_m, + std::unique_ptr& mapfac_u, + std::unique_ptr& mapfac_v, + ERFFillPatcher* FPr_u, + ERFFillPatcher* FPr_v, + ERFFillPatcher* FPr_w, + amrex::YAFluxRegister* fr_as_crse, + amrex::YAFluxRegister* fr_as_fine, + bool l_use_moisture, bool l_reflux, + bool l_implicit_substepping); + +/** + * Function for computing the fast RHS with fixed terrain + * + */ +void erf_fast_rhs_T (int step, int nrk, int level, int finest_level, + amrex::Vector& S_slow_rhs, + const amrex::Vector& S_prev, + amrex::Vector& S_stage_data, + const amrex::MultiFab& S_stage_prim, + const amrex::MultiFab& pi_stage, + const amrex::MultiFab& fast_coeffs, + amrex::Vector& S_data, + amrex::Vector& S_scratch, + const amrex::Geometry geom, + const amrex::Real gravity, + amrex::MultiFab& Omega, + std::unique_ptr& z_phys_nd, + std::unique_ptr& detJ_cc, + const amrex::Real dtau, const amrex::Real beta_s, + const amrex::Real facinv, + std::unique_ptr& mapfac_m, + std::unique_ptr& mapfac_u, + std::unique_ptr& mapfac_v, + amrex::YAFluxRegister* fr_as_crse, + amrex::YAFluxRegister* fr_as_fine, + bool l_use_moisture, bool l_reflux, + bool l_implicit_substepping); + +/** + * Function for computing the fast RHS with moving terrain + * + */ +void erf_fast_rhs_MT (int step, int nrk, int level, int finest_level, + amrex::Vector& S_slow_rhs, + const amrex::Vector& S_prev, + amrex::Vector& S_stg_data, + const amrex::MultiFab& S_stg_prim, + const amrex::MultiFab& pi_stage, + const amrex::MultiFab& fast_coeffs, + amrex::Vector& S_data, + amrex::Vector& S_scratch, + const amrex::Geometry geom, + const amrex::Real gravity, + const bool use_lagged_delta_rt, + amrex::MultiFab& Omega, + std::unique_ptr& z_t_rk, + const amrex::MultiFab* z_t_pert, + std::unique_ptr& z_phys_nd_old, + std::unique_ptr& z_phys_nd_new, + std::unique_ptr& z_phys_nd_stg, + std::unique_ptr& detJ_cc_old, + std::unique_ptr& detJ_cc_new, + std::unique_ptr& detJ_cc_stg, + const amrex::Real dtau, const amrex::Real beta_s, + const amrex::Real facinv, + std::unique_ptr& mapfac_m, + std::unique_ptr& mapfac_u, + std::unique_ptr& mapfac_v, + amrex::YAFluxRegister* fr_as_crse, + amrex::YAFluxRegister* fr_as_fine, + bool l_use_moisture, bool l_reflux, + bool l_implicit_substepping); + +/** + * Function for computing the coefficients for the tridiagonal solver used in the fast + * integrator (the acoustic substepping). + */ +void make_fast_coeffs (int level, + amrex::MultiFab& fast_coeffs, + amrex::Vector& S_stage_data, + const amrex::MultiFab& S_stage_prim, + const amrex::MultiFab& pi_stage, + const amrex::Geometry geom, + const bool use_moisture, + const bool use_terrain, + const amrex::Real gravity, + const amrex::Real c_p, + std::unique_ptr& detJ_cc, + const amrex::MultiFab* r0, + const amrex::MultiFab* pi0, + const amrex::Real dtau, + const amrex::Real beta_s, + amrex::GpuArray &phys_bc_type); + +#endif diff --git a/Source/TimeIntegration/MASK/ERF_TI_fast_rhs_fun.H b/Source/TimeIntegration/MASK/ERF_TI_fast_rhs_fun.H new file mode 100644 index 000000000..71c44dbbf --- /dev/null +++ b/Source/TimeIntegration/MASK/ERF_TI_fast_rhs_fun.H @@ -0,0 +1,196 @@ +/** + * Wrapper for calling the routine that creates the fast RHS + */ +auto fast_rhs_fun = [&](int fast_step, int /*n_sub*/, int nrk, + Vector& S_slow_rhs, + const Vector& S_old, + Vector& S_stage, + Vector& S_data, + Vector& S_scratch, + const Real dtau, + const Real inv_fac, + const Real old_substep_time, + const Real new_substep_time) + { + BL_PROFILE("fast_rhs_fun"); + if (verbose) amrex::Print() << "Calling fast rhs at level " << level << " with dt = " << dtau << std::endl; + + // Define beta_s here so that it is consistent between where we make the fast coefficients + // and where we use them + // Per p2902 of Klemp-Skamarock-Dudhia-2007 + // beta_s = -1.0 : fully explicit + // beta_s = 1.0 : fully implicit + Real beta_s; + if (solverChoice.substepping_type[level] == SubsteppingType::Implicit) + { + beta_s = 0.1; + } else { // Fully explicit + beta_s = -1.0; + } + + // ************************************************************************* + // Set up flux registers if using two_way coupling + // ************************************************************************* + const bool l_reflux = (solverChoice.coupling_type == CouplingType::TwoWay); + + YAFluxRegister* fr_as_crse = nullptr; + YAFluxRegister* fr_as_fine = nullptr; + if (l_reflux) { + if (level < finest_level) { + fr_as_crse = getAdvFluxReg(level+1); + } + if (level > 0) { + fr_as_fine = getAdvFluxReg(level); + } + } + + // Moving terrain + std::unique_ptr z_t_pert; + if ( solverChoice.use_terrain && (solverChoice.terrain_type == TerrainType::Moving) ) + { + // Make "old" fast geom -- store in z_phys_nd for convenience + if (verbose) Print() << "Making geometry at start of substep time: " << old_substep_time << std::endl; + prob->init_custom_terrain(fine_geom,*z_phys_nd[level],old_substep_time); + init_terrain_grid (level,fine_geom,*z_phys_nd[level], zlevels_stag[level], phys_bc_type); + make_J (fine_geom,*z_phys_nd[level], *detJ_cc[level]); + + // Make "new" fast geom + if (verbose) Print() << "Making geometry for end of substep time :" << new_substep_time << std::endl; + prob->init_custom_terrain(fine_geom,*z_phys_nd_new[level],new_substep_time); + init_terrain_grid (level,fine_geom,*z_phys_nd_new[level], zlevels_stag[level], phys_bc_type); + make_J (fine_geom,*z_phys_nd_new[level], *detJ_cc_new[level]); + make_areas (fine_geom,*z_phys_nd_new[level], *ax_new[level], *ay_new[level], *az_new[level]); + + Real inv_dt = 1./dtau; + + z_t_pert = std::make_unique(S_data[IntVars::zmom].boxArray(), S_data[IntVars::zmom].DistributionMap(), 1, 1); + + for (MFIter mfi(*z_t_rk[level],TilingIfNotGPU()); mfi.isValid(); ++mfi) + { + Box gbx = mfi.growntilebox(IntVect(1,1,0)); + + const Array4& z_t_arr = z_t_rk[level]->array(mfi); + const Array4& zp_t_arr = z_t_pert->array(mfi); + + const Array4& z_nd_new_arr = z_phys_nd_new[level]->const_array(mfi); + const Array4& z_nd_old_arr = z_phys_nd[level]->const_array(mfi); + + // Loop over horizontal plane + amrex::ParallelFor(gbx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + // Evaluate between RK stages assuming the geometry is linear between old and new time + zp_t_arr(i,j,k) = 0.25 * inv_dt * (z_nd_new_arr(i+1,j+1,k) - z_nd_old_arr(i+1,j+1,k) + +z_nd_new_arr(i ,j+1,k) - z_nd_old_arr( i,j+1,k) + +z_nd_new_arr(i+1,j ,k) - z_nd_old_arr(i+1,j ,k) + +z_nd_new_arr(i ,j ,k) - z_nd_old_arr(i ,j ,k)); + // Convert to perturbation: z"_t(t) = z_t(t) - z_t^{RK} + zp_t_arr(i,j,k) -= z_t_arr(i,j,k); + }); + } // mfi + + // We have to call this each step since it depends on the substep time now + // Note we pass in the *old* detJ here + make_fast_coeffs(level, fast_coeffs, S_stage, S_prim, pi_stage, fine_geom, + l_use_moisture, solverChoice.use_terrain, solverChoice.gravity, solverChoice.c_p, + detJ_cc[level], r0, pi0, dtau, beta_s, phys_bc_type); + + if (fast_step == 0) { + // If this is the first substep we pass in S_old as the previous step's solution + erf_fast_rhs_MT(fast_step, nrk, level, finest_level, + S_slow_rhs, S_old, S_stage, S_prim, pi_stage, fast_coeffs, + S_data, S_scratch, fine_geom, + solverChoice.gravity, solverChoice.use_lagged_delta_rt, + Omega, z_t_rk[level], z_t_pert.get(), + z_phys_nd[level], z_phys_nd_new[level], z_phys_nd_src[level], + detJ_cc[level], detJ_cc_new[level], detJ_cc_src[level], + dtau, beta_s, inv_fac, + mapfac_m[level], mapfac_u[level], mapfac_v[level], + fr_as_crse, fr_as_fine, l_use_moisture, l_reflux, l_implicit_substepping); + } else { + // If this is not the first substep we pass in S_data as the previous step's solution + erf_fast_rhs_MT(fast_step, nrk, level, finest_level, + S_slow_rhs, S_data, S_stage, S_prim, pi_stage, fast_coeffs, + S_data, S_scratch, fine_geom, + solverChoice.gravity, solverChoice.use_lagged_delta_rt, + Omega, z_t_rk[level], z_t_pert.get(), + z_phys_nd[level], z_phys_nd_new[level], z_phys_nd_src[level], + detJ_cc[level], detJ_cc_new[level], detJ_cc_src[level], + dtau, beta_s, inv_fac, + mapfac_m[level], mapfac_u[level], mapfac_v[level], + fr_as_crse, fr_as_fine, l_use_moisture, l_reflux, l_implicit_substepping); + } + } else if (solverChoice.use_terrain && solverChoice.terrain_type == TerrainType::Static) { + if (fast_step == 0) { + + // If this is the first substep we make the coefficients since they are based only on stage data + make_fast_coeffs(level, fast_coeffs, S_stage, S_prim, pi_stage, fine_geom, + l_use_moisture, solverChoice.use_terrain, solverChoice.gravity, solverChoice.c_p, + detJ_cc[level], r0, pi0, dtau, beta_s, phys_bc_type); + + // If this is the first substep we pass in S_old as the previous step's solution + // and S_data is the new-time solution to be defined here + erf_fast_rhs_T(fast_step, nrk, level, finest_level, + S_slow_rhs, S_old, S_stage, S_prim, pi_stage, fast_coeffs, + S_data, S_scratch, fine_geom, solverChoice.gravity, Omega, + z_phys_nd[level], detJ_cc[level], dtau, beta_s, inv_fac, + mapfac_m[level], mapfac_u[level], mapfac_v[level], + fr_as_crse, fr_as_fine, l_use_moisture, l_reflux, l_implicit_substepping); + } else { + // If this is not the first substep we pass in S_data as both the previous step's solution + // and as the new-time solution to be defined here + erf_fast_rhs_T(fast_step, nrk, level, finest_level, + S_slow_rhs, S_data, S_stage, S_prim, pi_stage, fast_coeffs, + S_data, S_scratch, fine_geom, solverChoice.gravity, Omega, + z_phys_nd[level], detJ_cc[level], dtau, beta_s, inv_fac, + mapfac_m[level], mapfac_u[level], mapfac_v[level], + fr_as_crse, fr_as_fine, l_use_moisture, l_reflux, l_implicit_substepping); + } + } else { + if (fast_step == 0) { + + // If this is the first substep we make the coefficients since they are based only on stage data + make_fast_coeffs(level, fast_coeffs, S_stage, S_prim, pi_stage, fine_geom, + l_use_moisture, solverChoice.use_terrain, solverChoice.gravity, solverChoice.c_p, + detJ_cc[level], r0, pi0, dtau, beta_s, phys_bc_type); + + // If this is the first substep we pass in S_old as the previous step's solution + // and S_data is the new-time solution to be defined here + erf_fast_rhs_N(fast_step, nrk, level, finest_level, + S_slow_rhs, S_old, S_stage, S_prim, pi_stage, fast_coeffs, + S_data, S_scratch, fine_geom, solverChoice.gravity, + dtau, beta_s, inv_fac, + mapfac_m[level], mapfac_u[level], mapfac_v[level], + (level > 0) ? &FPr_u[level-1] : nullptr, + (level > 0) ? &FPr_v[level-1] : nullptr, + (level > 0) ? &FPr_w[level-1] : nullptr, + fr_as_crse, fr_as_fine, l_use_moisture, l_reflux, l_implicit_substepping); + } else { + // If this is not the first substep we pass in S_data as both the previous step's solution + // and as the new-time solution to be defined here + erf_fast_rhs_N(fast_step, nrk, level, finest_level, + S_slow_rhs, S_data, S_stage, S_prim, pi_stage, fast_coeffs, + S_data, S_scratch, fine_geom, solverChoice.gravity, + dtau, beta_s, inv_fac, + mapfac_m[level], mapfac_u[level], mapfac_v[level], + (level > 0) ? &FPr_u[level-1] : nullptr, + (level > 0) ? &FPr_v[level-1] : nullptr, + (level > 0) ? &FPr_w[level-1] : nullptr, + fr_as_crse, fr_as_fine, l_use_moisture, l_reflux, l_implicit_substepping); + } + } + + // Even if we update all the conserved variables we don't need + // to fillpatch the slow ones every acoustic substep + + // NOTE: Numerical diffusion is tested on in FillPatchIntermediate and dictates the size of the + // box over which VelocityToMomentum is computed. V2M requires one more ghost cell be + // filled for rho than velocity. This logical condition ensures we fill enough ghost cells + // when use_NumDiff is true. + int ng_cons = S_data[IntVars::cons].nGrowVect().max() - 1; + int ng_vel = S_data[IntVars::xmom].nGrowVect().max(); + if (!solverChoice.use_NumDiff) { + ng_cons = 1; + ng_vel = 1; + } + apply_bcs(S_data, new_substep_time, ng_cons, ng_vel, fast_only=true, vel_and_mom_synced=false); + }; diff --git a/Source/TimeIntegration/MASK/ERF_fast_rhs_N.cpp b/Source/TimeIntegration/MASK/ERF_fast_rhs_N.cpp new file mode 100644 index 000000000..a8079f36c --- /dev/null +++ b/Source/TimeIntegration/MASK/ERF_fast_rhs_N.cpp @@ -0,0 +1,638 @@ + +#include + +using namespace amrex; + +/** + * Function for computing the fast RHS with no terrain + * + * @param[in ] step which fast time step within each Runge-Kutta step + * @param[in ] nrk which Runge-Kutta step + * @param[in ] level level of resolution + * @param[in ] finest_level finest level of resolution + * @param[in ] S_slow_rhs slow RHS computed in erf_slow_rhs_pre + * @param[in ] S_prev if step == 0, this is S_old, else the previous fast solution + * @param[in ] S_stage_data solution at previous RK stage + * @param[in ] S_stage_prim primitive variables at previous RK stage + * @param[in ] pi_stage Exner function at previous RK stage + * @param[in ] fast_coeffs coefficients for the tridiagonal solve used in the fast integrator + * @param[ out] S_data current solution + * @param[in ] S_scratch scratch space + * @param[in ] geom container for geometric information + * @param[in ] gravity magnitude of gravity + * @param[in ] dtau fast time step + * @param[in ] beta_s Coefficient which determines how implicit vs explicit the solve is + * @param[in ] facinv inverse factor for time-averaging the momenta + * @param[in ] mapfac_m map factor at cell centers + * @param[in ] mapfac_u map factor at x-faces + * @param[in ] mapfac_v map factor at y-faces + * @param[inout] fr_as_crse YAFluxRegister at level l at level l / l+1 interface + * @param[inout] fr_as_fine YAFluxRegister at level l at level l-1 / l interface + * @param[in ] l_use_moisture + * @param[in ] l_reflux should we add fluxes to the FluxRegisters? + * @param[in ] l_implicit_substepping + */ + +void erf_fast_rhs_N (int step, int nrk, + int level, int finest_level, + Vector& S_slow_rhs, // the slow RHS already computed + const Vector& S_prev, // if step == 0, this is S_old, else the previous solution + Vector& S_stage_data, // S_stage = S^n, S^* or S^** + const MultiFab & S_stage_prim, // Primitive version of S_stage_data[IntVars::cons] + const MultiFab & pi_stage, // Exner function evaluated at last stage + const MultiFab &fast_coeffs, // Coeffs for tridiagonal solve + Vector& S_data, // S_sum = most recent full solution + Vector& S_scratch, // S_sum_old at most recent fast timestep for (rho theta) + const Geometry geom, + const Real gravity, + const Real dtau, const Real beta_s, + const Real facinv, + std::unique_ptr& mapfac_m, + std::unique_ptr& mapfac_u, + std::unique_ptr& mapfac_v, + ERFFillPatcher* FPr_u, + ERFFillPatcher* FPr_v, + ERFFillPatcher* FPr_w, + YAFluxRegister* fr_as_crse, + YAFluxRegister* fr_as_fine, + bool l_use_moisture, + bool l_reflux, + bool l_implicit_substepping) +{ + // + // NOTE: for step > 0, S_data and S_prev point to the same MultiFab data!! + // + + BL_PROFILE_REGION("erf_fast_rhs_N()"); + + Real beta_1 = 0.5 * (1.0 - beta_s); // multiplies explicit terms + Real beta_2 = 0.5 * (1.0 + beta_s); // multiplies implicit terms + + // How much do we project forward the (rho theta) that is used in the horizontal momentum equations + Real beta_d = 0.1; + + const Real* dx = geom.CellSize(); + const GpuArray dxInv = geom.InvCellSizeArray(); + + const auto dom_lo = lbound(geom.Domain()); + const auto dom_hi = ubound(geom.Domain()); + + Real dxi = dxInv[0]; + Real dyi = dxInv[1]; + Real dzi = dxInv[2]; + + const auto& ba = S_stage_data[IntVars::cons].boxArray(); + const auto& dm = S_stage_data[IntVars::cons].DistributionMap(); + + MultiFab Delta_rho_theta( ba , dm, 1, 1); + MultiFab Delta_rho_w (convert(ba,IntVect(0,0,1)), dm, 1, IntVect(1,1,0)); + + MultiFab coeff_A_mf(fast_coeffs, make_alias, 0, 1); + MultiFab inv_coeff_B_mf(fast_coeffs, make_alias, 1, 1); + MultiFab coeff_C_mf(fast_coeffs, make_alias, 2, 1); + MultiFab coeff_P_mf(fast_coeffs, make_alias, 3, 1); + MultiFab coeff_Q_mf(fast_coeffs, make_alias, 4, 1); + + // ************************************************************************* + // Set gravity as a vector + const Array grav{0.0, 0.0, -gravity}; + const GpuArray grav_gpu{grav[0], grav[1], grav[2]}; + + // This will hold theta extrapolated forward in time + MultiFab extrap(S_data[IntVars::cons].boxArray(),S_data[IntVars::cons].DistributionMap(),1,1); + + // This will hold the update for (rho) and (rho theta) + MultiFab temp_rhs(S_stage_data[IntVars::zmom].boxArray(),S_stage_data[IntVars::zmom].DistributionMap(),2,0); + + // This will hold the new x- and y-momenta temporarily (so that we don't overwrite values we need when tiling) + MultiFab temp_cur_xmom(S_stage_data[IntVars::xmom].boxArray(),S_stage_data[IntVars::xmom].DistributionMap(),1,0); + MultiFab temp_cur_ymom(S_stage_data[IntVars::ymom].boxArray(),S_stage_data[IntVars::ymom].DistributionMap(),1,0); + + // We assume that in the first step (nrk == 0) we are only doing one substep. + AMREX_ALWAYS_ASSERT(nrk > 0 || step == 0); + + iMultiFab* mask_u; if (level > 0) mask_u = FPr_u->GetMask(); + iMultiFab* mask_v; if (level > 0) mask_v = FPr_v->GetMask(); + iMultiFab* mask_w; if (level > 0) mask_w = FPr_w->GetMask(); + + // ************************************************************************* + // First set up some arrays we'll need + // ************************************************************************* + +#ifdef _OPENMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for ( MFIter mfi(S_stage_data[IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi) + { + const Array4& prev_cons = S_prev[IntVars::cons].const_array(mfi); + const Array4& prev_zmom = S_prev[IntVars::zmom].const_array(mfi); + + const Array4& stage_cons = S_stage_data[IntVars::cons].const_array(mfi); + const Array4& stage_zmom = S_stage_data[IntVars::zmom].const_array(mfi); + + const Array4& prev_drho_w = Delta_rho_w.array(mfi); + const Array4& prev_drho_theta = Delta_rho_theta.array(mfi); + const Array4& lagged_delta_rt = S_scratch[IntVars::cons].array(mfi); + const Array4& theta_extrap = extrap.array(mfi); + + Box gbx = mfi.growntilebox(1); + ParallelFor(gbx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + prev_drho_theta(i,j,k) = prev_cons(i,j,k,RhoTheta_comp) - stage_cons(i,j,k,RhoTheta_comp); + + if (step == 0) { + theta_extrap(i,j,k) = prev_drho_theta(i,j,k); + } else { + theta_extrap(i,j,k) = prev_drho_theta(i,j,k) + beta_d * + ( prev_drho_theta(i,j,k) - lagged_delta_rt(i,j,k,RhoTheta_comp) ); + } + + // We define lagged_delta_rt for our next step as the current delta_rt + // (after using it above to extrapolate theta for this step) + lagged_delta_rt(i,j,k,RhoTheta_comp) = prev_drho_theta(i,j,k); + }); + + // NOTE: We must do this here because for step > 0, prev_zmom and cur_zmom both point to the same data, + // so by the time we would use prev_zmom to define zflux, it would have already been over-written. + Box gtbz = mfi.nodaltilebox(2); + gtbz.grow(IntVect(1,1,0)); + ParallelFor(gtbz, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { + prev_drho_w(i,j,k) = prev_zmom(i,j,k) - stage_zmom(i,j,k); + }); + } // mfi + + // ************************************************************************* + // Define updates in the current RK stage + // ************************************************************************* + +#ifdef _OPENMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for ( MFIter mfi(S_stage_data[IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi) + { + Box tbx = mfi.nodaltilebox(0); + Box tby = mfi.nodaltilebox(1); + + const Array4 & stage_xmom = S_stage_data[IntVars::xmom].const_array(mfi); + const Array4 & stage_ymom = S_stage_data[IntVars::ymom].const_array(mfi); + const Array4 & prim = S_stage_prim.const_array(mfi); + + const Array4& slow_rhs_rho_u = S_slow_rhs[IntVars::xmom].const_array(mfi); + const Array4& slow_rhs_rho_v = S_slow_rhs[IntVars::ymom].const_array(mfi); + + const Array4& temp_cur_xmom_arr = temp_cur_xmom.array(mfi); + const Array4& temp_cur_ymom_arr = temp_cur_ymom.array(mfi); + + const Array4& prev_xmom = S_prev[IntVars::xmom].const_array(mfi); + const Array4& prev_ymom = S_prev[IntVars::ymom].const_array(mfi); + + // These store the advection momenta which we will use to update the slow variables + const Array4< Real>& avg_xmom = S_scratch[IntVars::xmom].array(mfi); + const Array4< Real>& avg_ymom = S_scratch[IntVars::ymom].array(mfi); + + const Array4& pi_stage_ca = pi_stage.const_array(mfi); + + const Array4& theta_extrap = extrap.array(mfi); + + // Map factors + const Array4& mf_u = mapfac_u->const_array(mfi); + const Array4& mf_v = mapfac_v->const_array(mfi); + + // Coarse/fine masking + const Array4 mask_u_arr = (level > 0) ? mask_u->const_array(mfi) : Array4{}; + const Array4 mask_v_arr = (level > 0) ? mask_v->const_array(mfi) : Array4{}; + + int mask_u_set = (level > 0) ? FPr_u->GetSetMaskVal() : 0; + int mask_v_set = (level > 0) ? FPr_v->GetSetMaskVal() : 0; + + // ********************************************************************* + // Define updates in the RHS of {x, y, z}-momentum equations + // ********************************************************************* + if (nrk == 0 and step == 0) { + ParallelFor(tbx, tby, + [=] AMREX_GPU_DEVICE (int i, int j, int k) + { + temp_cur_xmom_arr(i,j,k) = prev_xmom(i,j,k) + dtau * slow_rhs_rho_u(i,j,k); + avg_xmom(i,j,k) += facinv * (temp_cur_xmom_arr(i,j,k) - stage_xmom(i,j,k)); + }, + [=] AMREX_GPU_DEVICE (int i, int j, int k) + { + temp_cur_ymom_arr(i,j,k) = prev_ymom(i,j,k) + dtau * slow_rhs_rho_v(i,j,k); + avg_ymom(i,j,k) += facinv * (temp_cur_ymom_arr(i,j,k) - stage_ymom(i,j,k)); + }); + } else { + ParallelFor(tbx, tby, + [=] AMREX_GPU_DEVICE (int i, int j, int k) + { + // Add (negative) gradient of (rho theta) multiplied by lagged "pi" + Real gpx = (theta_extrap(i,j,k) - theta_extrap(i-1,j,k))*dxi; + gpx *= mf_u(i,j,0); + + if (l_use_moisture) { + Real q = 0.5 * ( prim(i,j,k,PrimQ1_comp) + prim(i-1,j,k,PrimQ1_comp) + +prim(i,j,k,PrimQ2_comp) + prim(i-1,j,k,PrimQ2_comp) ); + gpx /= (1.0 + q); + } + + Real pi_c = 0.5 * (pi_stage_ca(i-1,j,k,0) + pi_stage_ca(i,j,k,0)); + + Real fast_rhs_rho_u; + if (level > 0) { + fast_rhs_rho_u = (i != dom_lo.x && i != dom_hi.x+1 && mask_u_arr(i,j,k) == mask_u_set) ? + -Gamma * R_d * pi_c * (theta_extrap(i-1,j,k) - theta_extrap(i-2,j,k))*dxi : + -Gamma * R_d * pi_c * gpx; + + // fast_rhs_rho_u = -Gamma * R_d * pi_c * gpx; + } else { + fast_rhs_rho_u = -Gamma * R_d * pi_c * gpx; + } + + temp_cur_xmom_arr(i,j,k) = prev_xmom(i,j,k) + dtau * (fast_rhs_rho_u + slow_rhs_rho_u(i,j,k)); + + avg_xmom(i,j,k) += facinv * (temp_cur_xmom_arr(i,j,k) - stage_xmom(i,j,k)); + }, + [=] AMREX_GPU_DEVICE (int i, int j, int k) + { + // Add (negative) gradient of (rho theta) multiplied by lagged "pi" + Real gpy = (theta_extrap(i,j,k) - theta_extrap(i,j-1,k))*dyi; + gpy *= mf_v(i,j,0); + + if (l_use_moisture) { + Real q = 0.5 * ( prim(i,j,k,PrimQ1_comp) + prim(i,j-1,k,PrimQ1_comp) + +prim(i,j,k,PrimQ2_comp) + prim(i,j-1,k,PrimQ2_comp) ); + gpy /= (1.0 + q); + } + + Real pi_c = 0.5 * (pi_stage_ca(i,j-1,k,0) + pi_stage_ca(i,j,k,0)); + + Real fast_rhs_rho_v; + if (level > 0) { + fast_rhs_rho_v = (j != dom_lo.y && j != dom_hi.y+1 && mask_v_arr(i,j,k) == mask_v_set) ? 0.0 : -Gamma * R_d * pi_c * gpy; + // fast_rhs_rho_v = -Gamma * R_d * pi_c * gpy; + } else { + fast_rhs_rho_v = -Gamma * R_d * pi_c * gpy; + } + + temp_cur_ymom_arr(i,j,k) = prev_ymom(i,j,k) + dtau * (fast_rhs_rho_v + slow_rhs_rho_v(i,j,k)); + + avg_ymom(i,j,k) += facinv * (temp_cur_ymom_arr(i,j,k) - stage_ymom(i,j,k)); + + }); + } // nrk > 0 and/or step > 0 + } //mfi + +#ifdef _OPENMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + { + std::array flux; + for ( MFIter mfi(S_stage_data[IntVars::cons],TileNoZ()); mfi.isValid(); ++mfi) + { + Box bx = mfi.tilebox(); + Box tbz = surroundingNodes(bx,2); + + Box vbx = mfi.validbox(); + const auto& vbx_hi = ubound(vbx); + + const Array4& stage_xmom = S_stage_data[IntVars::xmom].const_array(mfi); + const Array4& stage_ymom = S_stage_data[IntVars::ymom].const_array(mfi); + const Array4& stage_zmom = S_stage_data[IntVars::zmom].const_array(mfi); + const Array4 & prim = S_stage_prim.const_array(mfi); + + const Array4& prev_drho_theta = Delta_rho_theta.array(mfi); + + const Array4& prev_cons = S_prev[IntVars::cons].const_array(mfi); + const Array4& stage_cons = S_stage_data[IntVars::cons].const_array(mfi); + + const Array4& slow_rhs_cons = S_slow_rhs[IntVars::cons].const_array(mfi); + const Array4& slow_rhs_rho_w = S_slow_rhs[IntVars::zmom].const_array(mfi); + + const Array4& prev_zmom = S_prev[IntVars::zmom].const_array(mfi); + const Array4< Real>& cur_zmom = S_data[IntVars::zmom].array(mfi); + + const Array4& temp_cur_xmom_arr = temp_cur_xmom.array(mfi); + const Array4& temp_cur_ymom_arr = temp_cur_ymom.array(mfi); + + // These store the advection momenta which we will use to update the slow variables + const Array4< Real>& avg_zmom = S_scratch[IntVars::zmom].array(mfi); + + // Map factors + const Array4& mf_m = mapfac_m->const_array(mfi); + const Array4& mf_u = mapfac_u->const_array(mfi); + const Array4& mf_v = mapfac_v->const_array(mfi); + + FArrayBox RHS_fab; + RHS_fab.resize(tbz,1, The_Async_Arena()); + + FArrayBox soln_fab; + soln_fab.resize(tbz,1, The_Async_Arena()); + + auto const& RHS_a = RHS_fab.array(); + auto const& soln_a = soln_fab.array(); + + auto const& temp_rhs_arr = temp_rhs.array(mfi); + + auto const& coeffA_a = coeff_A_mf.array(mfi); + auto const& inv_coeffB_a = inv_coeff_B_mf.array(mfi); + auto const& coeffC_a = coeff_C_mf.array(mfi); + auto const& coeffP_a = coeff_P_mf.array(mfi); + auto const& coeffQ_a = coeff_Q_mf.array(mfi); + + // ************************************************************************* + // Define flux arrays for use in advection + // ************************************************************************* + for (int dir = 0; dir < AMREX_SPACEDIM; ++dir) { + flux[dir].resize(surroundingNodes(bx,dir),2); + flux[dir].setVal(0.); + } + const GpuArray, AMREX_SPACEDIM> + flx_arr{{AMREX_D_DECL(flux[0].array(), flux[1].array(), flux[2].array())}}; + + // ********************************************************************* + ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { + Real xflux_lo = (temp_cur_xmom_arr(i ,j,k) - stage_xmom(i ,j,k)) / mf_u(i ,j,0); + Real xflux_hi = (temp_cur_xmom_arr(i+1,j,k) - stage_xmom(i+1,j,k)) / mf_u(i+1,j,0); + Real yflux_lo = (temp_cur_ymom_arr(i,j ,k) - stage_ymom(i,j ,k)) / mf_v(i,j ,0); + Real yflux_hi = (temp_cur_ymom_arr(i,j+1,k) - stage_ymom(i,j+1,k)) / mf_v(i,j+1,0); + + Real mfsq = mf_m(i,j,0) * mf_m(i,j,0); + + temp_rhs_arr(i,j,k,Rho_comp ) = ( xflux_hi - xflux_lo ) * dxi * mfsq + + ( yflux_hi - yflux_lo ) * dyi * mfsq; + temp_rhs_arr(i,j,k,RhoTheta_comp) = (( xflux_hi * (prim(i,j,k,0) + prim(i+1,j,k,0)) - + xflux_lo * (prim(i,j,k,0) + prim(i-1,j,k,0)) ) * dxi * mfsq + + ( yflux_hi * (prim(i,j,k,0) + prim(i,j+1,k,0)) - + yflux_lo * (prim(i,j,k,0) + prim(i,j-1,k,0)) ) * dyi * mfsq) * 0.5; + + (flx_arr[0])(i,j,k,0) = xflux_lo; + (flx_arr[0])(i,j,k,1) = (flx_arr[0])(i ,j,k,0) * 0.5 * (prim(i,j,k,0) + prim(i-1,j,k,0)); + + (flx_arr[1])(i,j,k,0) = yflux_lo; + (flx_arr[1])(i,j,k,1) = (flx_arr[0])(i,j ,k,0) * 0.5 * (prim(i,j,k,0) + prim(i,j-1,k,0)); + + if (i == vbx_hi.x) { + (flx_arr[0])(i+1,j,k,0) = xflux_hi; + (flx_arr[0])(i+1,j,k,1) = (flx_arr[0])(i+1,j,k,0) * 0.5 * (prim(i,j,k,0) + prim(i+1,j,k,0)); + } + if (j == vbx_hi.y) { + (flx_arr[1])(i,j+1,k,0) = yflux_hi; + (flx_arr[1])(i,j+1,k,1) = (flx_arr[1])(i,j+1,k,0) * 0.5 * (prim(i,j,k,0) + prim(i,j+1,k,0)); + } + }); + + Box bx_shrunk_in_k = bx; + int klo = tbz.smallEnd(2); + int khi = tbz.bigEnd(2); + bx_shrunk_in_k.setSmall(2,klo+1); + bx_shrunk_in_k.setBig(2,khi-1); + + // Note that the notes use "g" to mean the magnitude of gravity, so it is positive + // We set grav_gpu[2] to be the vector component which is negative + // We define halfg to match the notes (which is why we take the absolute value) + Real halfg = std::abs(0.5 * grav_gpu[2]); + + // ********************************************************************* + // fast_loop_on_shrunk + // ********************************************************************* + //Note we don't act on the bottom or top boundaries of the domain + ParallelFor(bx_shrunk_in_k, [=] AMREX_GPU_DEVICE (int i, int j, int k) + { + Real coeff_P = coeffP_a(i,j,k); + Real coeff_Q = coeffQ_a(i,j,k); + + if (l_use_moisture) { + Real q = 0.5 * ( prim(i,j,k,PrimQ1_comp) + prim(i,j,k-1,PrimQ1_comp) + +prim(i,j,k,PrimQ2_comp) + prim(i,j,k-1,PrimQ2_comp) ); + coeff_P /= (1.0 + q); + coeff_Q /= (1.0 + q); + } + + Real theta_t_lo = 0.5 * ( prim(i,j,k-2,PrimTheta_comp) + prim(i,j,k-1,PrimTheta_comp) ); + Real theta_t_mid = 0.5 * ( prim(i,j,k-1,PrimTheta_comp) + prim(i,j,k ,PrimTheta_comp) ); + Real theta_t_hi = 0.5 * ( prim(i,j,k ,PrimTheta_comp) + prim(i,j,k+1,PrimTheta_comp) ); + + Real Omega_kp1 = prev_zmom(i,j,k+1) - stage_zmom(i,j,k+1); + Real Omega_k = prev_zmom(i,j,k ) - stage_zmom(i,j,k ); + Real Omega_km1 = prev_zmom(i,j,k-1) - stage_zmom(i,j,k-1); + + // line 2 last two terms (order dtau) + Real old_drho_k = prev_cons(i,j,k ,Rho_comp) - stage_cons(i,j,k ,Rho_comp); + Real old_drho_km1 = prev_cons(i,j,k-1,Rho_comp) - stage_cons(i,j,k-1,Rho_comp); + Real R0_tmp = coeff_P * prev_drho_theta(i,j,k) + coeff_Q * prev_drho_theta(i,j,k-1) + - halfg * ( old_drho_k + old_drho_km1 ); + + // lines 3-5 residuals (order dtau^2) 1.0 <-> beta_2 + Real R1_tmp = halfg * (-slow_rhs_cons(i,j,k ,Rho_comp) + -slow_rhs_cons(i,j,k-1,Rho_comp) + +temp_rhs_arr(i,j,k,0) + temp_rhs_arr(i,j,k-1) ) + + ( coeff_P * (slow_rhs_cons(i,j,k ,RhoTheta_comp) - temp_rhs_arr(i,j,k ,RhoTheta_comp)) + + coeff_Q * (slow_rhs_cons(i,j,k-1,RhoTheta_comp) - temp_rhs_arr(i,j,k-1,RhoTheta_comp)) ); + + // lines 6&7 consolidated (reuse Omega & metrics) (order dtau^2) + R1_tmp += beta_1 * dzi * ( (Omega_kp1 - Omega_km1) * halfg + -(Omega_kp1*theta_t_hi - Omega_k *theta_t_mid) * coeff_P + -(Omega_k *theta_t_mid - Omega_km1*theta_t_lo ) * coeff_Q ); + + // line 1 + RHS_a(i,j,k) = Omega_k + dtau * (slow_rhs_rho_w(i,j,k) + R0_tmp + dtau * beta_2 * R1_tmp); + + }); // bx_shrunk_in_k + + Box b2d = tbz; // Copy constructor + b2d.setRange(2,0); + + auto const lo = lbound(bx); + auto const hi = ubound(bx); + + ParallelFor(b2d, [=] AMREX_GPU_DEVICE (int i, int j, int) + { + // w at bottom boundary of grid is 0 if at domain boundary, otherwise w = w_old + dtau * slow_rhs + RHS_a (i,j,lo.z ) = prev_zmom(i,j,lo.z ) - stage_zmom(i,j,lo.z) + + dtau * slow_rhs_rho_w(i,j,lo.z); + + // w at top boundary of grid is 0 if at domain boundary, otherwise w = w_old + dtau * slow_rhs + RHS_a (i,j,hi.z+1) = prev_zmom(i,j,hi.z+1) - stage_zmom(i,j,hi.z+1) + + dtau * slow_rhs_rho_w(i,j,hi.z+1); + }); // b2d + +#ifdef AMREX_USE_GPU + if (l_implicit_substepping) { + + ParallelFor(b2d, [=] AMREX_GPU_DEVICE (int i, int j, int) + { + // w = specified Dirichlet value at k = lo.z + soln_a(i,j,lo.z) = RHS_a(i,j,lo.z) * inv_coeffB_a(i,j,lo.z); + cur_zmom(i,j,lo.z) = stage_zmom(i,j,lo.z) + soln_a(i,j,lo.z); + + for (int k = lo.z+1; k <= hi.z+1; k++) { + soln_a(i,j,k) = (RHS_a(i,j,k)-coeffA_a(i,j,k)*soln_a(i,j,k-1)) * inv_coeffB_a(i,j,k); + } + + cur_zmom(i,j,hi.z+1) = stage_zmom(i,j,hi.z+1) + soln_a(i,j,hi.z+1); + + for (int k = hi.z; k >= lo.z; k--) { + soln_a(i,j,k) -= ( coeffC_a(i,j,k) * inv_coeffB_a(i,j,k) ) *soln_a(i,j,k+1); + cur_zmom(i,j,k) = stage_zmom(i,j,k) + soln_a(i,j,k); + } + }); // b2d + + } else { // explicit substepping (beta_1 = 1; beta_2 = 0) + + ParallelFor(b2d, [=] AMREX_GPU_DEVICE (int i, int j, int) + { + for (int k = lo.z; k <= hi.z+1; k++) { + soln_a(i,j,k) = RHS_a(i,j,k); + cur_zmom(i,j,k) = stage_zmom(i,j,k) + soln_a(i,j,k); + } + }); // b2d + } // end of explicit substepping +#else + if (l_implicit_substepping) { + + for (int j = lo.y; j <= hi.y; ++j) { + AMREX_PRAGMA_SIMD + for (int i = lo.x; i <= hi.x; ++i) { + soln_a(i,j,lo.z) = RHS_a(i,j,lo.z) * inv_coeffB_a(i,j,lo.z); + } + } + for (int k = lo.z+1; k <= hi.z+1; ++k) { + for (int j = lo.y; j <= hi.y; ++j) { + AMREX_PRAGMA_SIMD + for (int i = lo.x; i <= hi.x; ++i) { + soln_a(i,j,k) = (RHS_a(i,j,k)-coeffA_a(i,j,k)*soln_a(i,j,k-1)) * inv_coeffB_a(i,j,k); + } + } + } + for (int j = lo.y; j <= hi.y; ++j) { + AMREX_PRAGMA_SIMD + for (int i = lo.x; i <= hi.x; ++i) { + cur_zmom(i,j,hi.z+1) = stage_zmom(i,j,hi.z+1) + soln_a(i,j,hi.z+1); + } + } + for (int k = hi.z; k >= lo.z; --k) { + for (int j = lo.y; j <= hi.y; ++j) { + AMREX_PRAGMA_SIMD + for (int i = lo.x; i <= hi.x; ++i) { + soln_a(i,j,k) -= ( coeffC_a(i,j,k) * inv_coeffB_a(i,j,k) ) * soln_a(i,j,k+1); + cur_zmom(i,j,k) = stage_zmom(i,j,k) + soln_a(i,j,k); + } + } + } + } else { // explicit substepping (beta_1 = 1; beta_2 = 0) + + for (int k = lo.z; k <= hi.z+1; ++k) { + for (int j = lo.y; j <= hi.y; ++j) { + AMREX_PRAGMA_SIMD + for (int i = lo.x; i <= hi.x; ++i) { + + soln_a(i,j,k) = RHS_a(i,j,k); + + cur_zmom(i,j,k) = stage_zmom(i,j,k) + soln_a(i,j,k); + } + } + } + + } // end of explicit substepping +#endif + + // ************************************************************************** + // Define updates in the RHS of rho and (rho theta) + // ************************************************************************** + const Array4& prev_drho_w = Delta_rho_w.array(mfi); + ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + Real zflux_lo = beta_2 * soln_a(i,j,k ) + beta_1 * prev_drho_w(i,j,k ); + Real zflux_hi = beta_2 * soln_a(i,j,k+1) + beta_1 * prev_drho_w(i,j,k+1); + + avg_zmom(i,j,k) += facinv*zflux_lo / (mf_m(i,j,0) * mf_m(i,j,0)); + (flx_arr[2])(i,j,k,0) = zflux_lo / (mf_m(i,j,0) * mf_m(i,j,0)); + (flx_arr[2])(i,j,k,1) = (flx_arr[2])(i,j,k,0) * 0.5 * (prim(i,j,k) + prim(i,j,k-1)); + + if (k == vbx_hi.z) { + avg_zmom(i,j,k+1) += facinv * zflux_hi / (mf_m(i,j,0) * mf_m(i,j,0)); + (flx_arr[2])(i,j,k+1,0) = zflux_hi / (mf_m(i,j,0) * mf_m(i,j,0)); + (flx_arr[2])(i,j,k+1,1) = (flx_arr[2])(i,j,k+1,0) * 0.5 * (prim(i,j,k) + prim(i,j,k+1)); + } + + temp_rhs_arr(i,j,k,Rho_comp ) += dzi * ( zflux_hi - zflux_lo ); + temp_rhs_arr(i,j,k,RhoTheta_comp) += 0.5 * dzi * ( zflux_hi * (prim(i,j,k) + prim(i,j,k+1)) + - zflux_lo * (prim(i,j,k) + prim(i,j,k-1)) ); + }); + + // We only add to the flux registers in the final RK step + if (l_reflux && nrk == 2) { + int strt_comp_reflux = 0; + // For now we don't reflux (rho theta) because it seems to create issues at c/f boundaries + int num_comp_reflux = 1; + if (level < finest_level) { + fr_as_crse->CrseAdd(mfi, + {{AMREX_D_DECL(&(flux[0]), &(flux[1]), &(flux[2]))}}, + dx, dtau, strt_comp_reflux, strt_comp_reflux, num_comp_reflux, RunOn::Device); + } + if (level > 0) { + fr_as_fine->FineAdd(mfi, + {{AMREX_D_DECL(&(flux[0]), &(flux[1]), &(flux[2]))}}, + dx, dtau, strt_comp_reflux, strt_comp_reflux, num_comp_reflux, RunOn::Device); + } + + // This is necessary here so we don't go on to the next FArrayBox without + // having finished copying the fluxes into the FluxRegisters (since the fluxes + // are stored in temporary FArrayBox's) + Gpu::streamSynchronize(); + + } // two-way coupling + } // mfi + } // OMP + +#ifdef _OPENMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for ( MFIter mfi(S_stage_data[IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi) + { + const Box& bx = mfi.tilebox(); + + const Array4< Real>& cur_cons = S_data[IntVars::cons].array(mfi); + const Array4& prev_cons = S_prev[IntVars::cons].const_array(mfi); + auto const& temp_rhs_arr = temp_rhs.const_array(mfi); + auto const& slow_rhs_cons = S_slow_rhs[IntVars::cons].const_array(mfi); + + if (step == 0) { + ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + cur_cons(i,j,k,Rho_comp) = prev_cons(i,j,k,Rho_comp) + + dtau * (slow_rhs_cons(i,j,k,Rho_comp) - temp_rhs_arr(i,j,k,Rho_comp)); + cur_cons(i,j,k,RhoTheta_comp) = prev_cons(i,j,k,RhoTheta_comp) + + dtau * (slow_rhs_cons(i,j,k,RhoTheta_comp) - temp_rhs_arr(i,j,k,RhoTheta_comp)); + }); + } else { + ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + // + // We didn't need to set cur_cons = prev_cons above because they point to the same data for step > 0 + // + cur_cons(i,j,k,Rho_comp) += dtau * (slow_rhs_cons(i,j,k,Rho_comp) - temp_rhs_arr(i,j,k,Rho_comp)); + cur_cons(i,j,k,RhoTheta_comp) += dtau * (slow_rhs_cons(i,j,k,RhoTheta_comp) - temp_rhs_arr(i,j,k,RhoTheta_comp)); + }); + } // step = 0 + + const Array4& cur_xmom = S_data[IntVars::xmom].array(mfi); + const Array4& cur_ymom = S_data[IntVars::ymom].array(mfi); + + const Array4& temp_cur_xmom_arr = temp_cur_xmom.const_array(mfi); + const Array4& temp_cur_ymom_arr = temp_cur_ymom.const_array(mfi); + + Box tbx = surroundingNodes(bx,0); + Box tby = surroundingNodes(bx,1); + + ParallelFor(tbx, tby, + [=] AMREX_GPU_DEVICE (int i, int j, int k) + { + cur_xmom(i,j,k) = temp_cur_xmom_arr(i,j,k); + }, + [=] AMREX_GPU_DEVICE (int i, int j, int k) + { + cur_ymom(i,j,k) = temp_cur_ymom_arr(i,j,k); + }); + + } // mfi +} diff --git a/Source/Utils/ERF_PoissonSolve.cpp b/Source/Utils/ERF_PoissonSolve.cpp index 0f731fe38..6ded377e7 100644 --- a/Source/Utils/ERF_PoissonSolve.cpp +++ b/Source/Utils/ERF_PoissonSolve.cpp @@ -45,7 +45,7 @@ bool ERF::projection_has_dirichlet (Array bcs) const * Project the single-level velocity field to enforce incompressibility * Note that the level may or may not be level 0. */ -void ERF::project_velocities (int lev, Real l_dt, Vector& mom_mf, MultiFab& pmf) +void ERF::project_velocities (int lev, Real l_dt, Vector& mom_mf, MultiFab& /*Omega*/, MultiFab& pmf) { BL_PROFILE("ERF::project_velocities()"); diff --git a/Source/Utils/ERF_PoissonSolve_tb.cpp b/Source/Utils/ERF_PoissonSolve_tb.cpp index 74139c6e4..165dfcc69 100644 --- a/Source/Utils/ERF_PoissonSolve_tb.cpp +++ b/Source/Utils/ERF_PoissonSolve_tb.cpp @@ -9,7 +9,7 @@ using namespace amrex; * Project the single-level velocity field to enforce incompressibility with a * thin body */ -void ERF::project_velocities_tb (int lev, Real l_dt, Vector& vmf, MultiFab& pmf) +void ERF::project_velocities_tb (int lev, Real l_dt, Vector& vmf, MultiFab& /*Omega*/, MultiFab& pmf) { BL_PROFILE("ERF::project_velocities_tb()"); AMREX_ALWAYS_ASSERT(!solverChoice.use_terrain);