From 0f73f1762ee2b97b24a2f618199af7c9974aa705 Mon Sep 17 00:00:00 2001
From: Ann Almgren <asalmgren@lbl.gov>
Date: Tue, 14 Nov 2023 14:46:30 -0800
Subject: [PATCH] fix computation of avg_zmom (#1295)

* fix computation of avg_zmom

* fix white spaces

* add refluxing contribs from fast integrator
---
 Docs/sphinx_doc/MeshRefinement.rst            |  27 ++---
 Source/Advection/AdvectionSrcForScalars.H     |  20 ++--
 Source/Advection/AdvectionSrcForState.cpp     |  20 ++--
 Source/TimeIntegration/ERF_MRI.H              |   8 +-
 Source/TimeIntegration/ERF_fast_rhs_MT.cpp    | 108 ++++++++++++------
 Source/TimeIntegration/ERF_fast_rhs_N.cpp     |  84 ++++++++++----
 Source/TimeIntegration/ERF_fast_rhs_T.cpp     |  93 ++++++++++-----
 .../TimeIntegration/ERF_make_fast_coeffs.cpp  |   1 -
 Source/TimeIntegration/TI_fast_rhs_fun.H      |  48 +++++---
 Source/TimeIntegration/TI_headers.H           |  21 +++-
 Source/TimeIntegration/TI_slow_rhs_fun.H      |   2 +-
 11 files changed, 291 insertions(+), 141 deletions(-)
diff --git a/Docs/sphinx_doc/MeshRefinement.rst b/Docs/sphinx_doc/MeshRefinement.rst
index 2a0ae0a5d..8c2ca8bef 100644
--- a/Docs/sphinx_doc/MeshRefinement.rst
+++ b/Docs/sphinx_doc/MeshRefinement.rst
@@ -124,7 +124,7 @@ computed by dividing the variable named rhotheta by the variable named density.
 Coupling Types
 --------------
 
-ERF supports one-way, two-way, and "mixed" coupling; this is a run-time input
+ERF supports one-way, two-way, and "mixed" coupling between levels; this is a run-time input
 
 ::
 
@@ -136,8 +136,9 @@ for the time advance of the fine solution . For cell-centered quantities,
 and face-baced normal momenta on the coarse-fine interface, the coarse data is conservatively
 interpolated to the fine mesh. The interpolated data is utilized to specify ghost cell data
 (outside of the valid fine region) as well as specified and relaxation data inside the lateral boundaries
-of the fine region. More specifically, a user may specify the total width of the interior
-Dirichlet and relaxation region with ``erf.cf_width = <Int>`` (yellow + blue)
+of the fine region. More specifically, similarly to how the lateral boundaries are treated,
+a user may specify the total width of the interior Dirichlet and relaxation region with
+``erf.cf_width = <Int>`` (yellow + blue)
 and analogously the width of the interior Dirichlet region may be specified with
 ``erf.cf_set_width = <Int>`` (yellow).
 
@@ -173,7 +174,9 @@ where :math:`G` is the RHS of the evolution equations, :math:`\psi^{\prime}` is
 relaxation, :math:`\psi^{FP}` is the fine data obtained from spatial and temporal interpolation of the
 coarse data, and :math:`n` is the minimum number of grid points from a lateral boundary. The specified and
 relaxation regions are applied to all dycore variables :math:`\left[\rho \; \rho\Theta \; U\; V\; W \right]`
-on the fine mesh. Finally, we note that time dependent Dirichlet data, provided via an external boundary file,
+on the fine mesh.
+
+Finally, we note that time dependent Dirichlet data, provided via an external boundary file,
 may be enforced on the lateral boundary conditions of the domain (coarsest mesh). For such cases,
 the relaxation region width at the domain edges may be specified with ``erf.wrfbdy_width = <Int>``
 (yellow + blue) while the interior Dirichlet region may be specified with ``erf.wrfbdy_set_width = <Int>``
@@ -187,17 +190,11 @@ the fine mesh communicates data back to the coarse mesh in two ways:
 
 - A "reflux" operation is performed for all cell-centered data.
 
-Because the normal momentum at the fine level is itself interpolated from the coarse level, the
-difference between fluxes -- along the coarse-fine interfaces -- used to update the coarse data and fluxes
-used to update the fine data is due to the difference in the averaging of the advected quantity to the face
-where the flux is defined.
-
-We note that both coupling schemes are conservative for mass because the fluxes for the continuity
-equation are the momenta themselves, which are interpolated on faces at the coarse-fine interface.  Other advected
-quantities which are advanced in conservation form will lose conservation with one-way coupling.
-Two-way coupling is conservative for these scalars as long as the refluxing operation is included with the
-averaging down.
-
 We define "mixed" coupling as using the two-way coupling algorithm for all cell-centered quantities except for
 :math:`\rho` and :math:`\rho \theta.`
 
+We note that all three coupling schemes are conservative for mass because the fluxes for the continuity
+equation are the momenta themselves, which are interpolated on faces at the coarse-fine interface.  Other advected
+quantities which are advanced in conservation form will lose conservation with one-way coupling.
+Two-way coupling ensures conservation of the advective contribution to all scalar updates but
+does not account for loss of conservation due to diffusive or source terms.
diff --git a/Source/Advection/AdvectionSrcForScalars.H b/Source/Advection/AdvectionSrcForScalars.H
index 5b99c5cbf..c09f15c81 100644
--- a/Source/Advection/AdvectionSrcForScalars.H
+++ b/Source/Advection/AdvectionSrcForScalars.H
@@ -72,28 +72,28 @@ AdvectionSrcForScalarsVert(const amrex::Box& bx,
     switch(vert_adv_type) {
     case AdvType::Centered_2nd:
         AdvectionSrcForScalarsWrapper<InterpType_H,CENTERED2>(bx, ncomp, icomp,
-                                                                flx_arr, cell_prim,
-                                                                avg_xmom, avg_ymom, avg_zmom);
+                                                              flx_arr, cell_prim,
+                                                              avg_xmom, avg_ymom, avg_zmom);
         break;
     case AdvType::Upwind_3rd:
         AdvectionSrcForScalarsWrapper<InterpType_H,UPWIND3>(bx, ncomp, icomp,
-                                                              flx_arr, cell_prim,
-                                                              avg_xmom, avg_ymom, avg_zmom);
+                                                            flx_arr, cell_prim,
+                                                            avg_xmom, avg_ymom, avg_zmom);
         break;
     case AdvType::Centered_4th:
         AdvectionSrcForScalarsWrapper<InterpType_H,CENTERED4>(bx, ncomp, icomp,
-                                                                flx_arr, cell_prim,
-                                                                avg_xmom, avg_ymom, avg_zmom);
+                                                              flx_arr, cell_prim,
+                                                              avg_xmom, avg_ymom, avg_zmom);
         break;
     case AdvType::Upwind_5th:
         AdvectionSrcForScalarsWrapper<InterpType_H,UPWIND5>(bx, ncomp, icomp,
-                                                              flx_arr, cell_prim,
-                                                              avg_xmom, avg_ymom, avg_zmom);
+                                                            flx_arr, cell_prim,
+                                                            avg_xmom, avg_ymom, avg_zmom);
         break;
     case AdvType::Centered_6th:
         AdvectionSrcForScalarsWrapper<InterpType_H,CENTERED6>(bx, ncomp, icomp,
-                                                                flx_arr, cell_prim,
-                                                                avg_xmom, avg_ymom, avg_zmom);
+                                                              flx_arr, cell_prim,
+                                                              avg_xmom, avg_ymom, avg_zmom);
         break;
     default:
         AMREX_ASSERT_WITH_MESSAGE(false, "Unknown vertical advection scheme!");
diff --git a/Source/Advection/AdvectionSrcForState.cpp b/Source/Advection/AdvectionSrcForState.cpp
index 286a4fecd..015107e59 100644
--- a/Source/Advection/AdvectionSrcForState.cpp
+++ b/Source/Advection/AdvectionSrcForState.cpp
@@ -176,43 +176,43 @@ AdvectionSrcForScalars (const Box& bx, const int icomp, const int ncomp,
         switch(horiz_adv_type) {
         case AdvType::Centered_2nd:
             AdvectionSrcForScalarsVert<CENTERED2>(bx, ncomp, icomp, flx_arr, cell_prim,
-                                                    avg_xmom, avg_ymom, avg_zmom, vert_adv_type);
+                                                  avg_xmom, avg_ymom, avg_zmom, vert_adv_type);
             break;
         case AdvType::Upwind_3rd:
             AdvectionSrcForScalarsVert<UPWIND3>(bx, ncomp, icomp, flx_arr, cell_prim,
-                                                  avg_xmom, avg_ymom, avg_zmom, vert_adv_type);
+                                                avg_xmom, avg_ymom, avg_zmom, vert_adv_type);
             break;
         case AdvType::Centered_4th:
             AdvectionSrcForScalarsVert<CENTERED4>(bx, ncomp, icomp, flx_arr, cell_prim,
-                                                    avg_xmom, avg_ymom, avg_zmom, vert_adv_type);
+                                                  avg_xmom, avg_ymom, avg_zmom, vert_adv_type);
             break;
         case AdvType::Upwind_5th:
             AdvectionSrcForScalarsVert<UPWIND5>(bx, ncomp, icomp, flx_arr, cell_prim,
-                                                  avg_xmom, avg_ymom, avg_zmom, vert_adv_type);
+                                                avg_xmom, avg_ymom, avg_zmom, vert_adv_type);
             break;
         case AdvType::Centered_6th:
             AdvectionSrcForScalarsVert<CENTERED6>(bx, ncomp, icomp, flx_arr, cell_prim,
-                                                    avg_xmom, avg_ymom, avg_zmom, vert_adv_type);
+                                                  avg_xmom, avg_ymom, avg_zmom, vert_adv_type);
             break;
         case AdvType::Weno_3:
             AdvectionSrcForScalarsWrapper<WENO3,WENO3>(bx, ncomp, icomp, flx_arr, cell_prim,
-                                                         avg_xmom, avg_ymom, avg_zmom);
+                                                       avg_xmom, avg_ymom, avg_zmom);
             break;
         case AdvType::Weno_5:
             AdvectionSrcForScalarsWrapper<WENO5,WENO5>(bx, ncomp, icomp, flx_arr, cell_prim,
-                                                         avg_xmom, avg_ymom, avg_zmom);
+                                                       avg_xmom, avg_ymom, avg_zmom);
             break;
         case AdvType::Weno_3Z:
             AdvectionSrcForScalarsWrapper<WENO_Z3,WENO_Z3>(bx, ncomp, icomp, flx_arr, cell_prim,
-                                                             avg_xmom, avg_ymom, avg_zmom);
+                                                           avg_xmom, avg_ymom, avg_zmom);
             break;
         case AdvType::Weno_3MZQ:
             AdvectionSrcForScalarsWrapper<WENO_MZQ3,WENO_MZQ3>(bx, ncomp, icomp, flx_arr, cell_prim,
-                                                                 avg_xmom, avg_ymom, avg_zmom);
+                                                               avg_xmom, avg_ymom, avg_zmom);
             break;
         case AdvType::Weno_5Z:
             AdvectionSrcForScalarsWrapper<WENO_Z5,WENO_Z5>(bx, ncomp, icomp, flx_arr, cell_prim,
-                                                             avg_xmom, avg_ymom, avg_zmom);
+                                                           avg_xmom, avg_ymom, avg_zmom);
             break;
         default:
             AMREX_ASSERT_WITH_MESSAGE(false, "Unknown advection scheme!");
diff --git a/Source/TimeIntegration/ERF_MRI.H b/Source/TimeIntegration/ERF_MRI.H
index fc75fed67..38e00811f 100644
--- a/Source/TimeIntegration/ERF_MRI.H
+++ b/Source/TimeIntegration/ERF_MRI.H
@@ -18,8 +18,8 @@ private:
     std::function<void(T&, T&, T&, const amrex::Real, const amrex::Real, const amrex::Real, const int)> slow_rhs_pre;
     std::function<void(T&, T&, T&, T&, const amrex::Real, const amrex::Real, const amrex::Real, const int)> slow_rhs_inc;
     std::function<void(T&, T&, T&, T&, T&, const amrex::Real, const amrex::Real, const amrex::Real, const int )> slow_rhs_post;
-    std::function<void(int, int, T&, const T&, T&, T&, T&, const amrex::Real, const amrex::Real,
-                                                           const amrex::Real, const amrex::Real)> fast_rhs;
+    std::function<void(int, int, int, T&, const T&, T&, T&, T&, const amrex::Real, const amrex::Real,
+                                                                const amrex::Real, const amrex::Real)> fast_rhs;
 
    /**
     * \brief Integrator timestep size (Real)
@@ -133,7 +133,7 @@ public:
         slow_rhs_post = F;
     }
 
-    void set_fast_rhs (std::function<void(int, int, T&, const T&, T&, T&, T&,
+    void set_fast_rhs (std::function<void(int, int, int, T&, const T&, T&, T&, T&,
                                           const amrex::Real, const amrex::Real,
                                           const amrex::Real, const amrex::Real)> F)
     {
@@ -281,7 +281,7 @@ public:
                 // *******************************************************************************
                 for (int ks = 0; ks < nsubsteps; ++ks)
                 {
-                    fast_rhs(ks, nsubsteps, *F_slow, S_old, S_new, *S_sum, *S_scratch, dtau, inv_fac,
+                    fast_rhs(ks, nsubsteps, nrk, *F_slow, S_old, S_new, *S_sum, *S_scratch, dtau, inv_fac,
                              time + ks*dtau, time + (ks+1) * dtau);
 
                 } // ks
diff --git a/Source/TimeIntegration/ERF_fast_rhs_MT.cpp b/Source/TimeIntegration/ERF_fast_rhs_MT.cpp
index afb303590..9b41d7426 100644
--- a/Source/TimeIntegration/ERF_fast_rhs_MT.cpp
+++ b/Source/TimeIntegration/ERF_fast_rhs_MT.cpp
@@ -15,37 +15,43 @@ using namespace amrex;
 /**
  * Function for computing the fast RHS with moving terrain
  *
- * @param[in]  step  which fast time step
- * @param[in]  level level of resolution
- * @param[in]  S_slow_rhs slow RHS computed in erf_slow_rhs_pre
- * @param[in]  S_prev previous solution
- * @param[in]  S_stg_data solution            at previous RK stage
- * @param[in]  S_stg_prim primitive variables at previous RK stage
- * @param[in]  pi_stage   Exner function      at previous RK stage
- * @param[in]  fast_coeffs coefficients for the tridiagonal solve used in the fast integrator
- * @param[out] S_data current solution
- * @param[in]  S_scratch scratch space
- * @param[in]  geom container for geometric information
- * @param[in]  gravity Magnitude of gravity
- * @param[in]  use_lagged_delta_rt define lagged_delta_rt for our next step
- * @param[in]  Omega component of the momentum normal to the z-coordinate surface
- * @param[in]  z_t_rk rate of change of grid height -- only relevant for moving terrain
- * @param[in]  z_t_pert rate of change of grid height -- interpolated between RK stages
- * @param[in] z_phys_nd_old height coordinate at nodes at old time
- * @param[in] z_phys_nd_new height coordinate at nodes at new time
- * @param[in] z_phys_nd_stg height coordinate at nodes at previous stage
- * @param[in] detJ_cc_old Jacobian of the metric transformation at old time
- * @param[in] detJ_cc_new Jacobian of the metric transformation at new time
- * @param[in] detJ_cc_stg Jacobian of the metric transformation at previous stage
- * @param[in]  dtau fast time step
- * @param[in]  beta_s  Coefficient which determines how implicit vs explicit the solve is
- * @param[in]  facinv inverse factor for time-averaging the momenta
- * @param[in] mapfac_m map factor at cell centers
- * @param[in] mapfac_u map factor at x-faces
- * @param[in] mapfac_v map factor at y-faces
+ * @param[in]    step  which fast time step within each Runge-Kutta step
+ * @param[in]    nrk   which Runge-Kutta step
+ * @param[in]    level level of resolution
+ * @param[in]    finest_level finest level of resolution
+ * @param[in]    S_slow_rhs slow RHS computed in erf_slow_rhs_pre
+ * @param[in]    S_prev previous solution
+ * @param[in]    S_stg_data solution            at previous RK stage
+ * @param[in]    S_stg_prim primitive variables at previous RK stage
+ * @param[in]    pi_stage   Exner function      at previous RK stage
+ * @param[in]    fast_coeffs coefficients for the tridiagonal solve used in the fast integrator
+ * @param[out]   S_data current solution
+ * @param[in]    S_scratch scratch space
+ * @param[in]    geom container for geometric information
+ * @param[in]    gravity Magnitude of gravity
+ * @param[in]    use_lagged_delta_rt define lagged_delta_rt for our next step
+ * @param[in]    Omega component of the momentum normal to the z-coordinate surface
+ * @param[in]    z_t_rk rate of change of grid height -- only relevant for moving terrain
+ * @param[in]    z_t_pert rate of change of grid height -- interpolated between RK stages
+ * @param[in]    z_phys_nd_old height coordinate at nodes at old time
+ * @param[in]    z_phys_nd_new height coordinate at nodes at new time
+ * @param[in]    z_phys_nd_stg height coordinate at nodes at previous stage
+ * @param[in]    detJ_cc_old Jacobian of the metric transformation at old time
+ * @param[in]    detJ_cc_new Jacobian of the metric transformation at new time
+ * @param[in]    detJ_cc_stg Jacobian of the metric transformation at previous stage
+ * @param[in]    dtau fast time step
+ * @param[in]    beta_s  Coefficient which determines how implicit vs explicit the solve is
+ * @param[in]    facinv inverse factor for time-averaging the momenta
+ * @param[in]    mapfac_m map factor at cell centers
+ * @param[in]    mapfac_u map factor at x-faces
+ * @param[in]    mapfac_v map factor at y-faces
+ * @param[inout] fr_as_crse YAFluxRegister at level l at level l   / l+1 interface
+ * @param[inout] fr_as_fine YAFluxRegister at level l at level l-1 / l   interface
+ * @param[in]    l_reflux should we add fluxes to the FluxRegisters?
  */
 
-void erf_fast_rhs_MT (int step, int /*level*/,
+void erf_fast_rhs_MT (int step, int nrk,
+                      int level, int finest_level,
                       Vector<MultiFab>& S_slow_rhs,                  // the slow RHS already computed
                       const Vector<MultiFab>& S_prev,                // if step == 0, this is S_old, else the previous solution
                       Vector<MultiFab>& S_stg_data,                  // at last RK stg: S^n, S^* or S^**
@@ -68,9 +74,12 @@ void erf_fast_rhs_MT (int step, int /*level*/,
                       std::unique_ptr<MultiFab>& detJ_cc_stg,        // at last RK stg
                       const Real dtau, const Real beta_s,
                       const Real facinv,
-                      std::unique_ptr<MultiFab>& /*mapfac_m*/,
+                      std::unique_ptr<MultiFab>& mapfac_m,
                       std::unique_ptr<MultiFab>& mapfac_u,
-                      std::unique_ptr<MultiFab>& mapfac_v)
+                      std::unique_ptr<MultiFab>& mapfac_v,
+                      YAFluxRegister* fr_as_crse,
+                      YAFluxRegister* fr_as_fine,
+                      bool l_reflux)
 {
     BL_PROFILE_REGION("erf_fast_rhs_MT()");
 
@@ -80,6 +89,7 @@ void erf_fast_rhs_MT (int step, int /*level*/,
     // How much do we project forward the (rho theta) that is used in the horizontal momentum equations
     Real beta_d = 0.1;
 
+    const Real* dx = geom.CellSize();
     const GpuArray<Real, AMREX_SPACEDIM> dxInv = geom.InvCellSizeArray();
 
     Real dxi = dxInv[0];
@@ -111,6 +121,8 @@ void erf_fast_rhs_MT (int step, int /*level*/,
     FArrayBox RHS_fab;
     FArrayBox soln_fab;
 
+    std::array<FArrayBox,AMREX_SPACEDIM> flux;
+
     //  NOTE: we leave tiling off here for efficiency -- to make this loop work with tiling
     //        will require additional changes
     for ( MFIter mfi(S_stg_data[IntVar::cons],false); mfi.isValid(); ++mfi)
@@ -165,6 +177,7 @@ void erf_fast_rhs_MT (int step, int /*level*/,
         const Array4<Real>& theta_extrap = extrap.array(mfi);
 
         // Map factors
+        const Array4<const Real>& mf_m = mapfac_m->const_array(mfi);
         const Array4<const Real>& mf_u = mapfac_u->const_array(mfi);
         const Array4<const Real>& mf_v = mapfac_v->const_array(mfi);
 
@@ -293,6 +306,16 @@ void erf_fast_rhs_MT (int step, int /*level*/,
         });
         } // end profile
 
+        // *************************************************************************
+        // Define flux arrays for use in advection
+        // *************************************************************************
+        for (int dir = 0; dir < AMREX_SPACEDIM; ++dir) {
+            flux[dir].resize(amrex::surroundingNodes(bx,dir),2);
+            flux[dir].setVal<RunOn::Device>(0.);
+        }
+        const GpuArray<const Array4<Real>, AMREX_SPACEDIM>
+            flx_arr{{AMREX_D_DECL(flux[0].array(), flux[1].array(), flux[2].array())}};
+
         // *********************************************************************
         {
         BL_PROFILE("fast_T_making_rho_rhs");
@@ -553,7 +576,8 @@ void erf_fast_rhs_MT (int step, int /*level*/,
 
               // Note that in the solve we effectively impose new_drho_w(i,j,vbx_hi.z+1)=0
               // so we don't update avg_zmom at k=vbx_hi.z+1
-              avg_zmom(i,j,k) += facinv*zflux_lo;
+              avg_zmom(i,j,k)      += facinv*zflux_lo / (mf_m(i,j,0) * mf_m(i,j,0));
+              (flx_arr[2])(i,j,k,0) = facinv*zflux_lo / (mf_m(i,j,0) * mf_m(i,j,0));
 
               // Note that the factor of (1/J) in the fast source term is canceled
               // when we multiply old and new by detJ_old and detJ_new , respectively
@@ -572,9 +596,27 @@ void erf_fast_rhs_MT (int step, int /*level*/,
               Real temp_rth = detJ_old(i,j,k) * cur_cons(i,j,k,1) +
                               dtau * ( slow_rhs_cons(i,j,k,1) + fast_rhs_rhotheta );
               cur_cons(i,j,k,1) = temp_rth / detJ_new(i,j,k);
+              (flx_arr[2])(i,j,k,1) = (flx_arr[2])(i,j,k,0) * 0.5 * (prim(i,j,k) + prim(i,j,k-1));
 
         });
         } // end profile
+
+        // We only add to the flux registers in the final RK step
+        if (l_reflux && nrk == 2) {
+            int strt_comp_reflux = 0;
+            int  num_comp_reflux = 2;
+            if (level < finest_level) {
+                fr_as_crse->CrseAdd(mfi,
+                    {{AMREX_D_DECL(&(flux[0]), &(flux[1]), &(flux[2]))}},
+                    dx, dtau, strt_comp_reflux, strt_comp_reflux, num_comp_reflux, amrex::RunOn::Device);
+            }
+            if (level > 0) {
+                fr_as_fine->FineAdd(mfi,
+                    {{AMREX_D_DECL(&(flux[0]), &(flux[1]), &(flux[2]))}},
+                    dx, dtau, strt_comp_reflux, strt_comp_reflux, num_comp_reflux, amrex::RunOn::Device);
+            }
+        } // two-way coupling
+
     } // mfi
-    }
+    } // OMP
 }
diff --git a/Source/TimeIntegration/ERF_fast_rhs_N.cpp b/Source/TimeIntegration/ERF_fast_rhs_N.cpp
index 98cde76a9..4f27f325b 100644
--- a/Source/TimeIntegration/ERF_fast_rhs_N.cpp
+++ b/Source/TimeIntegration/ERF_fast_rhs_N.cpp
@@ -14,27 +14,33 @@ using namespace amrex;
 /**
  * Function for computing the fast RHS with no terrain
  *
- * @param[in]  step  which fast time step
- * @param[in]  level level of resolution
- * @param[in]  S_slow_rhs slow RHS computed in erf_slow_rhs_pre
- * @param[in]  S_prev previous solution
- * @param[in]  S_stage_data solution            at previous RK stage
- * @param[in]  S_stage_prim primitive variables at previous RK stage
- * @param[in]  pi_stage   Exner function      at previous RK stage
- * @param[in]  fast_coeffs coefficients for the tridiagonal solve used in the fast integrator
- * @param[out] S_data current solution
- * @param[in]  S_scratch scratch space
- * @param[in]  geom container for geometric information
- * @param[in]  gravity magnitude of gravity
- * @param[in]  dtau fast time step
- * @param[in]  beta_s  Coefficient which determines how implicit vs explicit the solve is
- * @param[in]  facinv inverse factor for time-averaging the momenta
- * @param[in] mapfac_m map factor at cell centers
- * @param[in] mapfac_u map factor at x-faces
- * @param[in] mapfac_v map factor at y-faces
+ * @param[in]    step  which fast time step within each Runge-Kutta step
+ * @param[in]    nrk   which Runge-Kutta step
+ * @param[in]    level level of resolution
+ * @param[in]    finest_level finest level of resolution
+ * @param[in]    S_slow_rhs slow RHS computed in erf_slow_rhs_pre
+ * @param[in]    S_prev previous solution
+ * @param[in]    S_stage_data solution            at previous RK stage
+ * @param[in]    S_stage_prim primitive variables at previous RK stage
+ * @param[in]    pi_stage   Exner function      at previous RK stage
+ * @param[in]    fast_coeffs coefficients for the tridiagonal solve used in the fast integrator
+ * @param[out]   S_data current solution
+ * @param[in]    S_scratch scratch space
+ * @param[in]    geom container for geometric information
+ * @param[in]    gravity magnitude of gravity
+ * @param[in]    dtau fast time step
+ * @param[in]    beta_s  Coefficient which determines how implicit vs explicit the solve is
+ * @param[in]    facinv inverse factor for time-averaging the momenta
+ * @param[in]    mapfac_m map factor at cell centers
+ * @param[in]    mapfac_u map factor at x-faces
+ * @param[in]    mapfac_v map factor at y-faces
+ * @param[inout] fr_as_crse YAFluxRegister at level l at level l   / l+1 interface
+ * @param[inout] fr_as_fine YAFluxRegister at level l at level l-1 / l   interface
+ * @param[in]    l_reflux should we add fluxes to the FluxRegisters?
  */
 
-void erf_fast_rhs_N (int step, int /*level*/,
+void erf_fast_rhs_N (int step, int nrk,
+                     int level, int finest_level,
                      Vector<MultiFab>& S_slow_rhs,                   // the slow RHS already computed
                      const Vector<MultiFab>& S_prev,                 // if step == 0, this is S_old, else the previous solution
                      Vector<MultiFab>& S_stage_data,                 // S_bar = S^n, S^* or S^**
@@ -49,7 +55,10 @@ void erf_fast_rhs_N (int step, int /*level*/,
                      const Real facinv,
                      std::unique_ptr<MultiFab>& mapfac_m,
                      std::unique_ptr<MultiFab>& mapfac_u,
-                     std::unique_ptr<MultiFab>& mapfac_v)
+                     std::unique_ptr<MultiFab>& mapfac_v,
+                     YAFluxRegister* fr_as_crse,
+                     YAFluxRegister* fr_as_fine,
+                     bool l_reflux)
 {
     BL_PROFILE_REGION("erf_fast_rhs_N()");
 
@@ -59,7 +68,7 @@ void erf_fast_rhs_N (int step, int /*level*/,
     // How much do we project forward the (rho theta) that is used in the horizontal momentum equations
     Real beta_d = 0.1;
 
-    const Box domain(geom.Domain());
+    const Real* dx = geom.CellSize();
     const GpuArray<Real, AMREX_SPACEDIM> dxInv = geom.InvCellSizeArray();
 
     Real dxi = dxInv[0];
@@ -265,6 +274,8 @@ void erf_fast_rhs_N (int step, int /*level*/,
 #ifdef _OPENMP
 #pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
 #endif
+    {
+    std::array<FArrayBox,AMREX_SPACEDIM> flux;
     for ( MFIter mfi(S_stage_data[IntVar::cons],TileNoZ()); mfi.isValid(); ++mfi)
     {
         Box bx  = mfi.tilebox();
@@ -317,6 +328,16 @@ void erf_fast_rhs_N (int step, int /*level*/,
         auto const&     coeffP_a =     coeff_P_mf.array(mfi);
         auto const&     coeffQ_a =     coeff_Q_mf.array(mfi);
 
+        // *************************************************************************
+        // Define flux arrays for use in advection
+        // *************************************************************************
+        for (int dir = 0; dir < AMREX_SPACEDIM; ++dir) {
+            flux[dir].resize(amrex::surroundingNodes(bx,dir),2);
+            flux[dir].setVal<RunOn::Device>(0.);
+        }
+        const GpuArray<const Array4<Real>, AMREX_SPACEDIM>
+            flx_arr{{AMREX_D_DECL(flux[0].array(), flux[1].array(), flux[2].array())}};
+
         // *********************************************************************
         {
         BL_PROFILE("making_rho_rhs");
@@ -484,7 +505,8 @@ void erf_fast_rhs_N (int step, int /*level*/,
             Real zflux_lo = beta_2 * soln_a(i,j,k  ) + beta_1 * old_drho_w(i,j,k  );
             Real zflux_hi = beta_2 * soln_a(i,j,k+1) + beta_1 * old_drho_w(i,j,k+1);
 
-            avg_zmom(i,j,k) += facinv*zflux_lo;
+            avg_zmom(i,j,k)      += facinv*zflux_lo / (mf_m(i,j,0) * mf_m(i,j,0));
+            (flx_arr[2])(i,j,k,0) = facinv*zflux_lo / (mf_m(i,j,0) * mf_m(i,j,0));
 
             // Note that in the solve we effectively impose soln_a(i,j,vbx_hi.z+1)=0
             // so we don't update avg_zmom at k=vbx_hi.z+1
@@ -492,9 +514,27 @@ void erf_fast_rhs_N (int step, int /*level*/,
             temp_rhs_arr(i,j,k,Rho_comp     ) += dzi * ( zflux_hi - zflux_lo );
             temp_rhs_arr(i,j,k,RhoTheta_comp) += 0.5 * dzi * ( zflux_hi * (prim(i,j,k) + prim(i,j,k+1))
                                                              - zflux_lo * (prim(i,j,k) + prim(i,j,k-1)) );
+            (flx_arr[2])(i,j,k,1) = (flx_arr[2])(i,j,k,0) * 0.5 * (prim(i,j,k) + prim(i,j,k-1));
         });
         } // end profile
+
+        // We only add to the flux registers in the final RK step
+        if (l_reflux && nrk == 2) {
+            int strt_comp_reflux = 0;
+            int  num_comp_reflux = 2;
+            if (level < finest_level) {
+                fr_as_crse->CrseAdd(mfi,
+                    {{AMREX_D_DECL(&(flux[0]), &(flux[1]), &(flux[2]))}},
+                    dx, dtau, strt_comp_reflux, strt_comp_reflux, num_comp_reflux, amrex::RunOn::Device);
+            }
+            if (level > 0) {
+                fr_as_fine->FineAdd(mfi,
+                    {{AMREX_D_DECL(&(flux[0]), &(flux[1]), &(flux[2]))}},
+                    dx, dtau, strt_comp_reflux, strt_comp_reflux, num_comp_reflux, amrex::RunOn::Device);
+            }
+        } // two-way coupling
     } // mfi
+    } // OMP
 
 #ifdef _OPENMP
 #pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
diff --git a/Source/TimeIntegration/ERF_fast_rhs_T.cpp b/Source/TimeIntegration/ERF_fast_rhs_T.cpp
index 8cdd9be40..40aea842a 100644
--- a/Source/TimeIntegration/ERF_fast_rhs_T.cpp
+++ b/Source/TimeIntegration/ERF_fast_rhs_T.cpp
@@ -15,30 +15,36 @@ using namespace amrex;
 /**
  * Function for computing the fast RHS with fixed terrain
  *
- * @param[in]  step  which fast time step
- * @param[in]  level level of resolution
- * @param[in]  S_slow_rhs slow RHS computed in erf_slow_rhs_pre
- * @param[in]  S_prev previous solution
- * @param[in]  S_stage_data solution            at previous RK stage
- * @param[in]  S_stage_prim primitive variables at previous RK stage
- * @param[in]  pi_stage   Exner function      at previous RK stage
- * @param[in]  fast_coeffs coefficients for the tridiagonal solve used in the fast integrator
- * @param[out] S_data current solution
- * @param[in]  S_scratch scratch space
- * @param[in]  geom container for geometric information
- * @param[in]  gravity magnitude of gravity
- * @param[in]  Omega component of the momentum normal to the z-coordinate surface
- * @param[in] z_phys_nd height coordinate at nodes
- * @param[in] detJ_cc Jacobian of the metric transformation
- * @param[in]  dtau fast time step
- * @param[in]  beta_s  Coefficient which determines how implicit vs explicit the solve is
- * @param[in]  facinv inverse factor for time-averaging the momenta
- * @param[in] mapfac_m map factor at cell centers
- * @param[in] mapfac_u map factor at x-faces
- * @param[in] mapfac_v map factor at y-faces
+ * @param[in]    step  which fast time step within each Runge-Kutta step
+ * @param[in]    nrk   which Runge-Kutta step
+ * @param[in]    level level of resolution
+ * @param[in]    finest_level finest level of resolution
+ * @param[in]    S_slow_rhs slow RHS computed in erf_slow_rhs_pre
+ * @param[in]    S_prev previous solution
+ * @param[in]    S_stage_data solution            at previous RK stage
+ * @param[in]    S_stage_prim primitive variables at previous RK stage
+ * @param[in]    pi_stage   Exner function      at previous RK stage
+ * @param[in]    fast_coeffs coefficients for the tridiagonal solve used in the fast integrator
+ * @param[out]   S_data current solution
+ * @param[in]    S_scratch scratch space
+ * @param[in]    geom container for geometric information
+ * @param[in]    gravity magnitude of gravity
+ * @param[in]    Omega component of the momentum normal to the z-coordinate surface
+ * @param[in]    z_phys_nd height coordinate at nodes
+ * @param[in]    detJ_cc Jacobian of the metric transformation
+ * @param[in]    dtau fast time step
+ * @param[in]    beta_s  Coefficient which determines how implicit vs explicit the solve is
+ * @param[in]    facinv inverse factor for time-averaging the momenta
+ * @param[in]    mapfac_m map factor at cell centers
+ * @param[in]    mapfac_u map factor at x-faces
+ * @param[in]    mapfac_v map factor at y-faces
+ * @param[inout] fr_as_crse YAFluxRegister at level l at level l   / l+1 interface
+ * @param[inout] fr_as_fine YAFluxRegister at level l at level l-1 / l   interface
+ * @param[in]    l_reflux should we add fluxes to the FluxRegisters?
  */
 
-void erf_fast_rhs_T (int step, int /*level*/,
+void erf_fast_rhs_T (int step, int nrk,
+                     int level, int finest_level,
                      Vector<MultiFab>& S_slow_rhs,                   // the slow RHS already computed
                      const Vector<MultiFab>& S_prev,                 // if step == 0, this is S_old, else the previous solution
                      Vector<MultiFab>& S_stage_data,                 // S_bar = S^n, S^* or S^**
@@ -56,7 +62,10 @@ void erf_fast_rhs_T (int step, int /*level*/,
                      const Real facinv,
                      std::unique_ptr<MultiFab>& mapfac_m,
                      std::unique_ptr<MultiFab>& mapfac_u,
-                     std::unique_ptr<MultiFab>& mapfac_v)
+                     std::unique_ptr<MultiFab>& mapfac_v,
+                     YAFluxRegister* fr_as_crse,
+                     YAFluxRegister* fr_as_fine,
+                     bool l_reflux)
 {
     BL_PROFILE_REGION("erf_fast_rhs_T()");
 
@@ -66,7 +75,7 @@ void erf_fast_rhs_T (int step, int /*level*/,
     // How much do we project forward the (rho theta) that is used in the horizontal momentum equations
     Real beta_d = 0.1;
 
-    const Box domain(geom.Domain());
+    const Real* dx = geom.CellSize();
     const GpuArray<Real, AMREX_SPACEDIM> dxInv = geom.InvCellSizeArray();
 
     Real dxi = dxInv[0];
@@ -306,6 +315,8 @@ void erf_fast_rhs_T (int step, int /*level*/,
 #ifdef _OPENMP
 #pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
 #endif
+    {
+    std::array<FArrayBox,AMREX_SPACEDIM> flux;
     for ( MFIter mfi(S_stage_data[IntVar::cons],TileNoZ()); mfi.isValid(); ++mfi)
     {
         Box bx  = mfi.tilebox();
@@ -369,6 +380,16 @@ void erf_fast_rhs_T (int step, int /*level*/,
         auto const&     coeffP_a =     coeff_P_mf.array(mfi);
         auto const&     coeffQ_a =     coeff_Q_mf.array(mfi);
 
+        // *************************************************************************
+        // Define flux arrays for use in advection
+        // *************************************************************************
+        for (int dir = 0; dir < AMREX_SPACEDIM; ++dir) {
+            flux[dir].resize(amrex::surroundingNodes(bx,dir),2);
+            flux[dir].setVal<RunOn::Device>(0.);
+        }
+        const GpuArray<const Array4<Real>, AMREX_SPACEDIM>
+            flx_arr{{AMREX_D_DECL(flux[0].array(), flux[1].array(), flux[2].array())}};
+
         // *********************************************************************
         {
         BL_PROFILE("fast_T_making_rho_rhs");
@@ -584,15 +605,35 @@ void erf_fast_rhs_T (int step, int /*level*/,
 
               // Note that in the solve we effectively impose new_drho_w(i,j,vbx_hi.z+1)=0
               // so we don't update avg_zmom at k=vbx_hi.z+1
-              avg_zmom(i,j,k) += facinv*zflux_lo;
+              avg_zmom(i,j,k)      += facinv*zflux_lo / (mf_m(i,j,0) * mf_m(i,j,0));
+              (flx_arr[2])(i,j,k,0) = facinv*zflux_lo / (mf_m(i,j,0) * mf_m(i,j,0));
 
               Real fast_rhs_rho = -(temp_rhs_arr(i,j,k,0) + ( zflux_hi - zflux_lo ) * dzi) / detJ(i,j,k);
               cur_cons(i,j,k,0) += dtau * (slow_rhs_cons(i,j,k,0) + fast_rhs_rho);
 
               Real fast_rhs_rhotheta = -( temp_rhs_arr(i,j,k,1) + 0.5 *
-                ( zflux_hi * (prim(i,j,k) + prim(i,j,k+1)) - zflux_lo * (prim(i,j,k) + prim(i,j,k-1)) ) * dzi ) / detJ(i,j,k);
+                ( zflux_hi * (prim(i,j,k) + prim(i,j,k+1)) -
+                  zflux_lo * (prim(i,j,k) + prim(i,j,k-1)) ) * dzi ) / detJ(i,j,k);
               cur_cons(i,j,k,1) += dtau * (slow_rhs_cons(i,j,k,1) + fast_rhs_rhotheta);
+              (flx_arr[2])(i,j,k,1) = (flx_arr[2])(i,j,k,0) * 0.5 * (prim(i,j,k) + prim(i,j,k-1));
         });
         } // end profile
+
+        // We only add to the flux registers in the final RK step
+        if (l_reflux && nrk == 2) {
+            int strt_comp_reflux = 0;
+            int  num_comp_reflux = 2;
+            if (level < finest_level) {
+                fr_as_crse->CrseAdd(mfi,
+                    {{AMREX_D_DECL(&(flux[0]), &(flux[1]), &(flux[2]))}},
+                    dx, dtau, strt_comp_reflux, strt_comp_reflux, num_comp_reflux, amrex::RunOn::Device);
+            }
+            if (level > 0) {
+                fr_as_fine->FineAdd(mfi,
+                    {{AMREX_D_DECL(&(flux[0]), &(flux[1]), &(flux[2]))}},
+                    dx, dtau, strt_comp_reflux, strt_comp_reflux, num_comp_reflux, amrex::RunOn::Device);
+            }
+        } // two-way coupling
     } // mfi
+    } // OMP
 }
diff --git a/Source/TimeIntegration/ERF_make_fast_coeffs.cpp b/Source/TimeIntegration/ERF_make_fast_coeffs.cpp
index c728636b6..311868e38 100644
--- a/Source/TimeIntegration/ERF_make_fast_coeffs.cpp
+++ b/Source/TimeIntegration/ERF_make_fast_coeffs.cpp
@@ -44,7 +44,6 @@ void make_fast_coeffs (int /*level*/,
 
     Real c_v = c_p - R_d;
 
-    const Box domain(geom.Domain());
     const GpuArray<Real, AMREX_SPACEDIM> dxInv = geom.InvCellSizeArray();
 
     Real dzi = dxInv[2];
diff --git a/Source/TimeIntegration/TI_fast_rhs_fun.H b/Source/TimeIntegration/TI_fast_rhs_fun.H
index f27ea13ad..f208ea0de 100644
--- a/Source/TimeIntegration/TI_fast_rhs_fun.H
+++ b/Source/TimeIntegration/TI_fast_rhs_fun.H
@@ -1,7 +1,7 @@
 /**
  *  Wrapper for calling the routine that creates the fast RHS
  */
-auto fast_rhs_fun = [&](int fast_step, int n_sub,
+auto fast_rhs_fun = [&](int fast_step, int n_sub, int nrk,
                         Vector<MultiFab>& S_slow_rhs,
                         const Vector<MultiFab>& S_old,
                         Vector<MultiFab>& S_stage,
@@ -22,6 +22,22 @@ auto fast_rhs_fun = [&](int fast_step, int n_sub,
         // beta_s =  1.0 : fully implicit
         Real beta_s = 0.1;
 
+        // *************************************************************************
+        // Set up flux registers if using two_way coupling
+        // *************************************************************************
+        const bool l_reflux = (solverChoice.coupling_type == CouplingType::TwoWay);
+
+        YAFluxRegister* fr_as_crse = nullptr;
+        YAFluxRegister* fr_as_fine = nullptr;
+        if (l_reflux) {
+            if (level < finest_level) {
+                fr_as_crse = getAdvFluxReg(level+1);
+            }
+            if (level > 0) {
+                fr_as_fine = getAdvFluxReg(level);
+            }
+        }
+
         // Moving terrain
         std::unique_ptr<MultiFab> z_t_pert;
         if ( solverChoice.use_terrain &&  (solverChoice.terrain_type == TerrainType::Moving) )
@@ -73,7 +89,7 @@ auto fast_rhs_fun = [&](int fast_step, int n_sub,
 
             if (fast_step == 0) {
                 // If this is the first substep we pass in S_old as the previous step's solution
-                erf_fast_rhs_MT(fast_step, level,
+                erf_fast_rhs_MT(fast_step, nrk, level, finest_level,
                                 S_slow_rhs, S_old, S_stage, S_prim, pi_stage, fast_coeffs,
                                 S_data, S_scratch, fine_geom,
                                 solverChoice.gravity, solverChoice.use_lagged_delta_rt,
@@ -81,10 +97,11 @@ auto fast_rhs_fun = [&](int fast_step, int n_sub,
                                 z_phys_nd[level], z_phys_nd_new[level], z_phys_nd_src[level],
                                   detJ_cc[level],   detJ_cc_new[level],   detJ_cc_src[level],
                                 dtau, beta_s, inv_fac,
-                                mapfac_m[level], mapfac_u[level], mapfac_v[level]);
+                                mapfac_m[level], mapfac_u[level], mapfac_v[level],
+                                fr_as_crse, fr_as_fine, l_reflux);
             } else {
                 // If this is not the first substep we pass in S_data as the previous step's solution
-                erf_fast_rhs_MT(fast_step, level,
+                erf_fast_rhs_MT(fast_step, nrk, level, finest_level,
                                 S_slow_rhs, S_data, S_stage, S_prim, pi_stage, fast_coeffs,
                                 S_data, S_scratch, fine_geom,
                                 solverChoice.gravity, solverChoice.use_lagged_delta_rt,
@@ -92,7 +109,8 @@ auto fast_rhs_fun = [&](int fast_step, int n_sub,
                                 z_phys_nd[level], z_phys_nd_new[level], z_phys_nd_src[level],
                                   detJ_cc[level],   detJ_cc_new[level],   detJ_cc_src[level],
                                 dtau, beta_s, inv_fac,
-                                mapfac_m[level], mapfac_u[level], mapfac_v[level]);
+                                mapfac_m[level], mapfac_u[level], mapfac_v[level],
+                                fr_as_crse, fr_as_fine, l_reflux);
             }
         } else if (solverChoice.use_terrain && solverChoice.terrain_type == TerrainType::Static) {
             if (fast_step == 0) {
@@ -103,18 +121,20 @@ auto fast_rhs_fun = [&](int fast_step, int n_sub,
                                  detJ_cc[level], r0, pi0, dtau, beta_s);
 
                 // If this is the first substep we pass in S_old as the previous step's solution
-                erf_fast_rhs_T(fast_step, level,
+                erf_fast_rhs_T(fast_step, nrk, level, finest_level,
                                S_slow_rhs, S_old, S_stage, S_prim, pi_stage, fast_coeffs,
                                S_data, S_scratch, fine_geom, solverChoice.gravity, Omega,
                                z_phys_nd[level], detJ_cc[level], dtau, beta_s, inv_fac,
-                               mapfac_m[level], mapfac_u[level], mapfac_v[level]);
+                               mapfac_m[level], mapfac_u[level], mapfac_v[level],
+                               fr_as_crse, fr_as_fine, l_reflux);
             } else {
                 // If this is not the first substep we pass in S_data as the previous step's solution
-                erf_fast_rhs_T(fast_step, level,
+                erf_fast_rhs_T(fast_step, nrk, level, finest_level,
                                S_slow_rhs, S_data, S_stage, S_prim, pi_stage, fast_coeffs,
                                S_data, S_scratch, fine_geom, solverChoice.gravity, Omega,
                                z_phys_nd[level], detJ_cc[level], dtau, beta_s, inv_fac,
-                               mapfac_m[level], mapfac_u[level], mapfac_v[level]);
+                               mapfac_m[level], mapfac_u[level], mapfac_v[level],
+                               fr_as_crse, fr_as_fine, l_reflux);
             }
         } else {
             if (fast_step == 0) {
@@ -125,18 +145,20 @@ auto fast_rhs_fun = [&](int fast_step, int n_sub,
                                  detJ_cc[level], r0, pi0, dtau, beta_s);
 
                 // If this is the first substep we pass in S_old as the previous step's solution
-                erf_fast_rhs_N(fast_step, level,
+                erf_fast_rhs_N(fast_step, nrk, level, finest_level,
                                S_slow_rhs, S_old, S_stage, S_prim, pi_stage, fast_coeffs,
                                S_data, S_scratch, fine_geom, solverChoice.gravity,
                                dtau, beta_s, inv_fac,
-                               mapfac_m[level], mapfac_u[level], mapfac_v[level]);
+                               mapfac_m[level], mapfac_u[level], mapfac_v[level],
+                               fr_as_crse, fr_as_fine, l_reflux);
             } else {
                 // If this is not the first substep we pass in S_data as the previous step's solution
-                erf_fast_rhs_N(fast_step, level,
+                erf_fast_rhs_N(fast_step, nrk, level, finest_level,
                                S_slow_rhs, S_data, S_stage, S_prim, pi_stage, fast_coeffs,
                                S_data, S_scratch, fine_geom, solverChoice.gravity,
                                dtau, beta_s, inv_fac,
-                               mapfac_m[level], mapfac_u[level], mapfac_v[level]);
+                               mapfac_m[level], mapfac_u[level], mapfac_v[level],
+                               fr_as_crse, fr_as_fine, l_reflux);
             }
         }
 
diff --git a/Source/TimeIntegration/TI_headers.H b/Source/TimeIntegration/TI_headers.H
index f3591df28..f4840eecf 100644
--- a/Source/TimeIntegration/TI_headers.H
+++ b/Source/TimeIntegration/TI_headers.H
@@ -111,7 +111,7 @@ void erf_slow_rhs_post (int level, int finest_level, int nrk,
  * Function for computing the fast RHS with no terrain
  *
  */
-void erf_fast_rhs_N (int step, int level,
+void erf_fast_rhs_N (int step, int nrk, int level, int finest_level,
                      amrex::Vector<amrex::MultiFab >& S_slow_rhs,
                      const amrex::Vector<amrex::MultiFab >& S_prev,
                      amrex::Vector<amrex::MultiFab >& S_stage_data,
@@ -126,13 +126,16 @@ void erf_fast_rhs_N (int step, int level,
                      const amrex::Real facinv,
                      std::unique_ptr<amrex::MultiFab>& mapfac_m,
                      std::unique_ptr<amrex::MultiFab>& mapfac_u,
-                     std::unique_ptr<amrex::MultiFab>& mapfac_v);
+                     std::unique_ptr<amrex::MultiFab>& mapfac_v,
+                     amrex::YAFluxRegister* fr_as_crse,
+                     amrex::YAFluxRegister* fr_as_fine,
+                     bool l_reflux);
 
 /**
  * Function for computing the fast RHS with fixed terrain
  *
  */
-void erf_fast_rhs_T (int step, int level,
+void erf_fast_rhs_T (int step, int nrk, int level, int finest_level,
                      amrex::Vector<amrex::MultiFab >& S_slow_rhs,
                      const amrex::Vector<amrex::MultiFab >& S_prev,
                      amrex::Vector<amrex::MultiFab >& S_stage_data,
@@ -150,13 +153,16 @@ void erf_fast_rhs_T (int step, int level,
                      const amrex::Real facinv,
                      std::unique_ptr<amrex::MultiFab>& mapfac_m,
                      std::unique_ptr<amrex::MultiFab>& mapfac_u,
-                     std::unique_ptr<amrex::MultiFab>& mapfac_v);
+                     std::unique_ptr<amrex::MultiFab>& mapfac_v,
+                     amrex::YAFluxRegister* fr_as_crse,
+                     amrex::YAFluxRegister* fr_as_fine,
+                     bool l_reflux);
 
 /**
  * Function for computing the fast RHS with moving terrain
  *
  */
-void erf_fast_rhs_MT (int step, int level,
+void erf_fast_rhs_MT (int step, int nrk, int level, int finest_level,
                       amrex::Vector<amrex::MultiFab >& S_slow_rhs,
                       const amrex::Vector<amrex::MultiFab >& S_prev,
                       amrex::Vector<amrex::MultiFab >& S_stg_data,
@@ -181,7 +187,10 @@ void erf_fast_rhs_MT (int step, int level,
                       const amrex::Real facinv,
                       std::unique_ptr<amrex::MultiFab>& mapfac_m,
                       std::unique_ptr<amrex::MultiFab>& mapfac_u,
-                      std::unique_ptr<amrex::MultiFab>& mapfac_v);
+                      std::unique_ptr<amrex::MultiFab>& mapfac_v,
+                      amrex::YAFluxRegister* fr_as_crse,
+                      amrex::YAFluxRegister* fr_as_fine,
+                      bool l_reflux);
 
 /**
  * Function for computing the coefficients for the tridiagonal solver used in the fast
diff --git a/Source/TimeIntegration/TI_slow_rhs_fun.H b/Source/TimeIntegration/TI_slow_rhs_fun.H
index 249587df1..2d26fb1a6 100644
--- a/Source/TimeIntegration/TI_slow_rhs_fun.H
+++ b/Source/TimeIntegration/TI_slow_rhs_fun.H
@@ -293,7 +293,7 @@
 #endif
 
         // *************************************************************************
-        // Set up flux registers if using two_way coupling
+        // Set up flux registers if using mixed or two_way coupling
         // *************************************************************************
         YAFluxRegister* fr_as_crse = nullptr;
         YAFluxRegister* fr_as_fine = nullptr;