diff --git a/CMake/BuildERFExe.cmake b/CMake/BuildERFExe.cmake
index 884e4da91..bcb15eff8 100644
--- a/CMake/BuildERFExe.cmake
+++ b/CMake/BuildERFExe.cmake
@@ -42,14 +42,15 @@ function(build_erf_lib erf_lib_name)
                    ${SRC_DIR}/EB/ERF_eb_cylinder.cpp
                    ${SRC_DIR}/EB/ERF_eb_regular.cpp
                    ${SRC_DIR}/EB/ERF_initEB.cpp
-                   ${SRC_DIR}/EB/ERF_writeEBsurface.cpp)
+                   ${SRC_DIR}/EB/ERF_writeEBsurface.cpp 
+                   ${SRC_DIR}/LinearSolvers/ERF_solve_with_EB_mlmg.cpp)
     target_include_directories(${erf_lib_name} PUBLIC $<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}/Source/EB>)
     target_compile_definitions(${erf_lib_name} PUBLIC ERF_USE_EB)
   endif()
 
   if(ERF_ENABLE_FFT)
     target_sources(${erf_lib_name} PRIVATE
-                   ${SRC_DIR}/Utils/ERF_solve_with_fft.cpp)
+                   ${SRC_DIR}/LinearSolvers/ERF_solve_with_fft.cpp)
     target_compile_definitions(${erf_lib_name} PUBLIC ERF_USE_FFT)
   endif()
 
@@ -102,10 +103,6 @@ function(build_erf_lib erf_lib_name)
                               )
   endif()
 
-  if(ERF_ENABLE_HDF5)
-    target_compile_definitions(${erf_lib_name} PUBLIC ERF_USE_HDF5)
-  endif()
-
   target_sources(${erf_lib_name}
      PRIVATE
        ${SRC_DIR}/ERF_Derive.cpp
@@ -189,10 +186,12 @@ function(build_erf_lib erf_lib_name)
        ${SRC_DIR}/Utils/ERF_AverageDown.cpp
        ${SRC_DIR}/Utils/ERF_ChopGrids.cpp
        ${SRC_DIR}/Utils/ERF_MomentumToVelocity.cpp
-       ${SRC_DIR}/Utils/ERF_PoissonSolve.cpp
-       ${SRC_DIR}/Utils/ERF_PoissonSolve_tb.cpp
-       ${SRC_DIR}/Utils/ERF_solve_with_gmres.cpp
-       ${SRC_DIR}/Utils/ERF_solve_with_mlmg.cpp
+       ${SRC_DIR}/LinearSolvers/ERF_PoissonSolve.cpp
+       ${SRC_DIR}/LinearSolvers/ERF_PoissonSolve_tb.cpp
+       ${SRC_DIR}/LinearSolvers/ERF_compute_divergence.cpp
+       ${SRC_DIR}/LinearSolvers/ERF_solve_with_gmres.cpp
+       ${SRC_DIR}/LinearSolvers/ERF_solve_with_mlmg.cpp
+       ${SRC_DIR}/LinearSolvers/ERF_TerrainPoisson.cpp
        ${SRC_DIR}/Utils/ERF_TerrainMetrics.cpp
        ${SRC_DIR}/Utils/ERF_VelocityToMomentum.cpp
        ${SRC_DIR}/Utils/ERF_InteriorGhostCells.cpp
@@ -244,6 +243,7 @@ endif()
   target_include_directories(${erf_lib_name} PUBLIC $<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}/Source/Diffusion>)
   target_include_directories(${erf_lib_name} PUBLIC $<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}/Source/Initialization>)
   target_include_directories(${erf_lib_name} PUBLIC $<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}/Source/IO>)
+  target_include_directories(${erf_lib_name} PUBLIC $<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}/Source/LinearSolvers>)
   target_include_directories(${erf_lib_name} PUBLIC $<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}/Source/PBL>)
   target_include_directories(${erf_lib_name} PUBLIC $<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}/Source/SourceTerms>)
   target_include_directories(${erf_lib_name} PUBLIC $<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}/Source/TimeIntegration>)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8671db185..515237115 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -245,7 +245,7 @@ target_link_libraries(erf_api PUBLIC erf_srclib)
 add_library(${PROJECT_NAME}::erf_api ALIAS erf_srclib)
 
 # Collect all headers and make them installable with the target
-set(ERF_INCLUDES "Source/ERF.H;Source/ERF_Constants.H;Source/WindFarmParametrization/SimpleActuatorDisk/ERF_SimpleAD.H;Source/WindFarmParametrization/EWP/ERF_EWP.H;Source/WindFarmParametrization/Null/ERF_NullWindFarm.H;Source/WindFarmParametrization/ERF_WindFarm.H;Source/WindFarmParametrization/Fitch/ERF_Fitch.H;Source/BoundaryConditions/ERF_PhysBCFunct.H;Source/BoundaryConditions/ERF_MOSTAverage.H;Source/BoundaryConditions/ERF_MOSTRoughness.H;Source/BoundaryConditions/ERF_ABLMost.H;Source/BoundaryConditions/ERF_FillPatcher.H;Source/BoundaryConditions/ERF_MOSTStress.H;Source/BoundaryConditions/ERF_TimeInterpolatedData.H;Source/Utils/ERF_Interpolation.H;Source/Utils/ERF_TileNoZ.H;Source/Utils/ERF_PlaneAverage.H;Source/Utils/ERF_Interpolation_WENO.H;Source/Utils/ERF_DirectionSelector.H;Source/Utils/ERF_ParFunctions.H;Source/Utils/ERF_Wstar.H;Source/Utils/ERF_Microphysics_Utils.H;Source/Utils/ERF_Sat_methods.H;Source/Utils/ERF_Interpolation_1D.H;Source/Utils/ERF_Interpolation_UPW.H;Source/Utils/ERF_TerrainMetrics.H;Source/Utils/ERF_Interpolation_WENO_Z.H;Source/Utils/ERF_Thetav.H;Source/Utils/ERF_Water_vapor_saturation.H;Source/Utils/ERF_Utils.H;Source/Utils/ERF_Orbit.H;Source/Utils/ERF_EOS.H;Source/Utils/ERF_HSE_utils.H;Source/EB/ERF_TerrainIF.H;Source/EB/ERF_FlowerIF.H;Source/EB/ERF_eb_if.H;Source/Particles/ERFPC.H;Source/Particles/ERF_ParticleData.H;Source/Prob/ERF_init_density_hse_dry.H;Source/Prob/ERF_init_rayleigh_damping.H;Source/Prob/ERF_init_constant_density_hse.H;Source/ERF_prob_common.H;Source/ERF_Derive.H;Source/Radiation/ERF_Mam4_constituents.H;Source/Radiation/ERF_Mam4_aero.H;Source/Radiation/ERF_Optics.H;Source/Radiation/ERF_Modal_aero_wateruptake.H;Source/Radiation/ERF_Cloud_rad_props.H;Source/Radiation/ERF_Phys_prop.H;Source/Radiation/ERF_Radiation.H;Source/Radiation/ERF_Albedo.H;Source/Radiation/ERF_Parameterizations.H;Source/Radiation/ERF_Rad_constants.H;Source/Radiation/ERF_Aero_rad_props.H;Source/Radiation/ERF_m2005_effradius.H;Source/Radiation/ERF_Linear_interpolate.H;Source/Radiation/ERF_Slingo.H;Source/Radiation/ERF_Rrtmgp.H;Source/Radiation/ERF_Ebert_curry.H;Source/SourceTerms/ERF_NumericalDiffusion.H;Source/SourceTerms/ERF_Src_headers.H;Source/IO/ERF_SampleData.H;Source/IO/ERF_NCInterface.H;Source/IO/ERF_NCWpsFile.H;Source/IO/ERF_NCPlotFile.H;Source/IO/ERF_ReadBndryPlanes.H;Source/IO/ERF_WriteBndryPlanes.H;Source/PBL/ERF_MYNNStruct.H;Source/PBL/ERF_PBLModels.H;Source/PBL/ERF_PBLHeight.H;Source/TimeIntegration/ERF_TI_substep_fun.H;Source/TimeIntegration/ERF_TI_slow_headers.H;Source/TimeIntegration/ERF_TI_slow_rhs_fun.H;Source/TimeIntegration/ERF_TI_fast_headers.H;Source/TimeIntegration/ERF_TI_utils.H;Source/TimeIntegration/ERF_MRI.H;Source/TimeIntegration/ERF_TI_no_substep_fun.H;Source/LandSurfaceModel/Null/ERF_NullSurf.H;Source/LandSurfaceModel/ERF_LandSurface.H;Source/LandSurfaceModel/MM5/ERF_MM5.H;Source/LandSurfaceModel/SLM/ERF_SLM.H;Source/ERF_IndexDefines.H;Source/Advection/ERF_AdvectionSrcForMom_N.H;Source/Advection/ERF_AdvectionSrcForScalars.H;Source/Advection/ERF_AdvectionSrcForMom_T.H;Source/Advection/ERF_Advection.H;Source/MultiBlock/ERF_MultiBlockContainer.H;Source/Initialization/ERF_Metgrid_utils.H;Source/Diffusion/ERF_EddyViscosity.H;Source/Diffusion/ERF_Diffusion.H;Source/Microphysics/Null/ERF_NullMoistLagrangian.H;Source/Microphysics/Null/ERF_NullMoist.H;Source/Microphysics/ERF_Microphysics.H;Source/Microphysics/ERF_LagrangianMicrophysics.H;Source/Microphysics/ERF_EulerianMicrophysics.H;Source/Microphysics/Kessler/ERF_Kessler.H;Source/Microphysics/SAM/ERF_SAM.H;Source/DataStructs/ERF_InputSpongeData.H;Source/DataStructs/ERF_TurbPertStruct.H;Source/DataStructs/ERF_SpongeStruct.H;Source/DataStructs/ERF_AdvStruct.H;Source/DataStructs/ERF_DataStruct.H;Source/DataStructs/ERF_InputSoundingData.H;Source/DataStructs/ERF_DiffStruct.H;Source/DataStructs/ERF_TurbStruct.H")
+set(ERF_INCLUDES "Source/ERF.H;Source/ERF_Constants.H;Source/WindFarmParametrization/SimpleActuatorDisk/ERF_SimpleAD.H;Source/WindFarmParametrization/EWP/ERF_EWP.H;Source/WindFarmParametrization/Null/ERF_NullWindFarm.H;Source/WindFarmParametrization/ERF_WindFarm.H;Source/WindFarmParametrization/Fitch/ERF_Fitch.H;Source/BoundaryConditions/ERF_PhysBCFunct.H;Source/BoundaryConditions/ERF_MOSTAverage.H;Source/BoundaryConditions/ERF_MOSTRoughness.H;Source/BoundaryConditions/ERF_ABLMost.H;Source/BoundaryConditions/ERF_FillPatcher.H;Source/BoundaryConditions/ERF_MOSTStress.H;Source/BoundaryConditions/ERF_TimeInterpolatedData.H;Source/Utils/ERF_Interpolation.H;Source/Utils/ERF_TileNoZ.H;Source/Utils/ERF_PlaneAverage.H;Source/Utils/ERF_Interpolation_WENO.H;Source/Utils/ERF_DirectionSelector.H;Source/Utils/ERF_ParFunctions.H;Source/Utils/ERF_Wstar.H;Source/Utils/ERF_Microphysics_Utils.H;Source/Utils/ERF_Sat_methods.H;Source/Utils/ERF_Interpolation_1D.H;Source/Utils/ERF_Interpolation_UPW.H;Source/Utils/ERF_TerrainMetrics.H;Source/Utils/ERF_Interpolation_WENO_Z.H;Source/Utils/ERF_Thetav.H;Source/Utils/ERF_Water_vapor_saturation.H;Source/Utils/ERF_Utils.H;Source/Utils/ERF_Orbit.H;Source/Utils/ERF_EOS.H;Source/Utils/ERF_HSE_utils.H;Source/EB/ERF_TerrainIF.H;Source/EB/ERF_FlowerIF.H;Source/EB/ERF_eb_if.H;Source/Particles/ERFPC.H;Source/Particles/ERF_ParticleData.H;Source/Prob/ERF_init_density_hse_dry.H;Source/Prob/ERF_init_rayleigh_damping.H;Source/Prob/ERF_init_constant_density_hse.H;Source/ERF_prob_common.H;Source/ERF_Derive.H;Source/Radiation/ERF_Mam4_constituents.H;Source/Radiation/ERF_Mam4_aero.H;Source/Radiation/ERF_Optics.H;Source/Radiation/ERF_Modal_aero_wateruptake.H;Source/Radiation/ERF_Cloud_rad_props.H;Source/Radiation/ERF_Phys_prop.H;Source/Radiation/ERF_Radiation.H;Source/Radiation/ERF_Albedo.H;Source/Radiation/ERF_Parameterizations.H;Source/Radiation/ERF_Rad_constants.H;Source/Radiation/ERF_Aero_rad_props.H;Source/Radiation/ERF_m2005_effradius.H;Source/Radiation/ERF_Linear_interpolate.H;Source/Radiation/ERF_Slingo.H;Source/Radiation/ERF_Rrtmgp.H;Source/Radiation/ERF_Ebert_curry.H;Source/SourceTerms/ERF_NumericalDiffusion.H;Source/SourceTerms/ERF_Src_headers.H;Source/IO/ERF_SampleData.H;Source/IO/ERF_NCInterface.H;Source/IO/ERF_NCWpsFile.H;Source/IO/ERF_NCPlotFile.H;Source/IO/ERF_ReadBndryPlanes.H;Source/IO/ERF_WriteBndryPlanes.H;Source/PBL/ERF_MYNNStruct.H;Source/PBL/ERF_PBLModels.H;Source/PBL/ERF_PBLHeight.H;Source/TimeIntegration/ERF_TI_substep_fun.H;Source/TimeIntegration/ERF_TI_slow_headers.H;Source/TimeIntegration/ERF_TI_slow_rhs_fun.H;Source/TimeIntegration/ERF_TI_fast_headers.H;Source/TimeIntegration/ERF_TI_utils.H;Source/TimeIntegration/ERF_MRI.H;Source/TimeIntegration/ERF_TI_no_substep_fun.H;Source/LandSurfaceModel/Null/ERF_NullSurf.H;Source/LandSurfaceModel/ERF_LandSurface.H;Source/LandSurfaceModel/MM5/ERF_MM5.H;Source/LandSurfaceModel/SLM/ERF_SLM.H;Source/ERF_IndexDefines.H;Source/Advection/ERF_AdvectionSrcForMom_N.H;Source/Advection/ERF_AdvectionSrcForScalars.H;Source/Advection/ERF_AdvectionSrcForMom_T.H;Source/Advection/ERF_Advection.H;Source/MultiBlock/ERF_MultiBlockContainer.H;Source/Initialization/ERF_Metgrid_utils.H;Source/Diffusion/ERF_EddyViscosity.H;Source/Diffusion/ERF_Diffusion.H;Source/Microphysics/Null/ERF_NullMoistLagrangian.H;Source/Microphysics/Null/ERF_NullMoist.H;Source/Microphysics/ERF_Microphysics.H;Source/Microphysics/ERF_LagrangianMicrophysics.H;Source/Microphysics/ERF_EulerianMicrophysics.H;Source/Microphysics/Kessler/ERF_Kessler.H;Source/Microphysics/SAM/ERF_SAM.H;Source/DataStructs/ERF_InputSpongeData.H;Source/DataStructs/ERF_TurbPertStruct.H;Source/DataStructs/ERF_SpongeStruct.H;Source/DataStructs/ERF_AdvStruct.H;Source/DataStructs/ERF_DataStruct.H;Source/DataStructs/ERF_InputSoundingData.H;Source/DataStructs/ERF_DiffStruct.H;Source/DataStructs/ERF_TurbStruct.H ERF_TerrainPoisson.H ERF_FFT_TerrainPrecond.H")
 set_target_properties(
   erf_srclib PROPERTIES PUBLIC_HEADER "${ERF_INCLUDES}")
 
diff --git a/Docs/sphinx_doc/LinearSolvers.rst b/Docs/sphinx_doc/LinearSolvers.rst
index 9ac62c56c..6b6a01f2a 100644
--- a/Docs/sphinx_doc/LinearSolvers.rst
+++ b/Docs/sphinx_doc/LinearSolvers.rst
@@ -7,6 +7,18 @@
 Linear Solvers
 ==============
 
-Evolving the anelastic equation set requires solution of a Poisson equation in which we solve for the update to the perturbational pressure at cell centers.   AMReX provides several solver options: geometric multigrid, Fast Fourier Transforms (FFTs) and preconditioned GMRES.  For simulations with no terrain or grid stretching, one of the FFT options is generally the fastest solver, followed by multigrid.  We note that the multigrid solver has the option to ``ignore'' a coordinate direction if the domain is only one cell wide in that direction; this allows for efficient solution of effectively 2D problems.  Multigrid can also be used when the union of grids at a level is not in itself rectangular; the FFT solvers do not work in that general case.
+Evolving the anelastic equation set requires solution of a Poisson equation in which we solve for the update to the perturbational pressure at cell centers.
+ERF uses several solver options available through AMReX: geometric multigrid, Fast Fourier Transforms (FFTs) and preconditioned GMRES.
+For simulations with no terrain or grid stretching, one of the FFT options is generally the fastest solver,
+followed by multigrid.  We note that the multigrid solver has the option to ``ignore'' a coordinate direction
+if the domain is only one cell wide in that direction; this allows for efficient solution of effectively 2D problems.
+Multigrid can also be used when the union of grids at a level is not in itself rectangular; the FFT solvers do not work in that general case.
 
-For simulations using grid stretching in the vertical but flat terrain, we must use the hybrid FFT solver in which we perform 2D transforms only in the lateral directions and couple the solution in the vertical direction with a tridiagonal solve. In both these cases we use a 7-point stencil. To solve the Poisson equation on terrain-fitted coordinates with general terrain, we rely on the preconditioned GMRES solver since the stencil effectively has variable coefficients and requires 19 points.
+For simulations using grid stretching in the vertical but flat terrain, we must use the hybrid FFT solver in which
+we perform 2D transforms only in the lateral directions and couple the solution in the vertical direction with a tridiagonal solve.
+In both these cases we use a 7-point stencil.
+To solve the Poisson equation on terrain-fitted coordinates with general terrain,
+we rely on the FFT-preconditioned GMRES solver since the stencil effectively has variable coefficients and requires 19 points.
+
+   .. note::
+      **Currently only doubly periodic lateral boundary conditions are supported by the hybrid FFT, and therefore by the GMRES solver.  More general boundary conditions are a work in progress.**
diff --git a/Docs/sphinx_doc/Plotfiles.rst b/Docs/sphinx_doc/Plotfiles.rst
index 18b3cb3ea..322e813a9 100644
--- a/Docs/sphinx_doc/Plotfiles.rst
+++ b/Docs/sphinx_doc/Plotfiles.rst
@@ -34,9 +34,8 @@ List of Parameters
 | Parameter                   | Definition       | Acceptable            | Default    |
 |                             |                  | Values                |            |
 +=============================+==================+=======================+============+
-| **erf.plotfile_type**       | AMReX, NETCDF    | "amrex" or            | "amrex"    |
-|                             | or HDF5          | "netcdf / "NetCDF" or |            |
-|                             |                  | "hdf5" / "HDF5"       |            |
+| **erf.plotfile_type**       | AMReX or NETCDF  | "amrex" or            | "amrex"    |
+|                             |                  | "netcdf / "NetCDF" or |            |
 +-----------------------------+------------------+-----------------------+------------+
 | **erf.plot_file_1**         | prefix for       | String                | “*plt_1_*” |
 |                             | plotfiles        |                       |            |
@@ -104,8 +103,7 @@ Examples of Usage
    means that native plot files (actually directories) starting with the prefix
    “*plt_run*” will be generated every 10 level-0 time steps. If using
    amrex format, that directory names will be *plt_run00000*, *plt_run00010*,
-   *plt_run00020*, etc.  If using HDF5 format, the names will have ".h5"
-   appended;  if using NetCDF format, the names will have ".nc" appended.
+   *plt_run00020*, etc.  If using NetCDF format, the names will have ".nc" appended.
 
    In addition, while the amrex plotfiles will contain data at all of the refinement
    levels,  NetCDF files are separated by level.
diff --git a/Docs/sphinx_doc/building.rst b/Docs/sphinx_doc/building.rst
index fa47d8c35..4f4cf1d94 100644
--- a/Docs/sphinx_doc/building.rst
+++ b/Docs/sphinx_doc/building.rst
@@ -92,8 +92,6 @@ or if using tcsh,
    +--------------------+------------------------------+------------------+-------------+
    | USE_NETCDF         | Whether to enable NETCDF     | TRUE / FALSE     | FALSE       |
    +--------------------+------------------------------+------------------+-------------+
-   | USE_HDF5           | Whether to enable HDF5       | TRUE / FALSE     | FALSE       |
-   +--------------------+------------------------------+------------------+-------------+
    | USE_PARTICLES      | Whether to enable particles  | TRUE / FALSE     | FALSE       |
    +--------------------+------------------------------+------------------+-------------+
    | USE_WARM_NO_PRECIP | Whether to use warm moisture | TRUE / FALSE     | FALSE       |
@@ -183,8 +181,6 @@ Analogous to GNU Make, the list of cmake directives is as follows:
    +---------------------------+------------------------------+------------------+-------------+
    | ERF_ENABLE_NETCDF         | Whether to enable NETCDF     | TRUE / FALSE     | FALSE       |
    +---------------------------+------------------------------+------------------+-------------+
-   | ERF_ENABLE_HDF5           | Whether to enable HDF5       | TRUE / FALSE     | FALSE       |
-   +---------------------------+------------------------------+------------------+-------------+
    | ERF_ENABLE_PARTICLES      | Whether to enable particles  | TRUE / FALSE     | FALSE       |
    +---------------------------+------------------------------+------------------+-------------+
    | ERF_ENABLE_WARM_NO_PRECIP | Whether to use warm moisture | TRUE / FALSE     | FALSE       |
@@ -205,20 +201,10 @@ Analogous to GNU Make, the list of cmake directives is as follows:
 Mac with CMake
 ~~~~~~~~~~~~~~
 Tested with macOS 12.7 (Monterey) using cmake (3.27.8), open-mpi (5.0.0), and
-pkg-config (0.29.2) installed with the homebrew package manager. HDF5 and
+pkg-config (0.29.2) installed with the homebrew package manager.
 NetCDF will be compiled from source. The instructions below should be version
 agnostic.
 
-HDF5 (tested with v1.14.3)
-
-#. Download latest source package from `hdfgroup.org`_
-#. Extract source code ``tar xzf hdf5-<version>.tar.gz``
-#. Create build directory ``cd hdf5-<version> && mkdir build && cd build``
-#. Configure for your system ``../configure --prefix=/usr/local --enable-parallel``
-#. Build ``make -j8`` and ``sudo make install``
-
-.. _hdfgroup.org: https://www.hdfgroup.org/download-hdf5/source-code/
-
 NetCDF (tested with v4.9.2)
 
 #. Download latest source package from `ucar.edu`_
@@ -250,7 +236,6 @@ ERF (tested with commit ``40e64ed35ebc080ad61d08aea828330dfbdbc162``)
        -DERF_ENABLE_FCOMPARE:BOOL=ON \
        -DERF_ENABLE_DOCUMENTATION:BOOL=OFF \
        -DERF_ENABLE_NETCDF:BOOL=ON \
-       -DERF_ENABLE_HDF5:BOOL=ON \
        -DCMAKE_EXPORT_COMPILE_COMMANDS:BOOL=ON \
        .. && make -j8
 
@@ -272,7 +257,6 @@ If you will be using NetCDF, we suggest you add the following four lines to you
 
    module load cray-hdf5-parallel/1.12.2.9
    module load cray-netcdf-hdf5parallel
-   export HDF5_DIR=/opt/cray/pe/hdf5-parallel/1.12.2.9
    export NETCDF_DIR=/opt/cray/pe/netcdf-hdf5parallel/4.9.0.9
 
 Then build ERF as, for example (specify your own path to the AMReX submodule in ``ERF/Submodules/AMReX``):
diff --git a/Exec/DryRegTests/EkmanSpiral/GNUmakefile b/Exec/DryRegTests/EkmanSpiral/GNUmakefile
index 23a64fb3e..5ac3ad752 100644
--- a/Exec/DryRegTests/EkmanSpiral/GNUmakefile
+++ b/Exec/DryRegTests/EkmanSpiral/GNUmakefile
@@ -25,7 +25,6 @@ TEST = TRUE
 USE_ASSERTION = TRUE
 
 USE_NETCDF  = TRUE
-USE_HDF5    = TRUE
 
 # GNU Make
 Bpack := ./Make.package
diff --git a/Exec/DryRegTests/WPS_Test/GNUmakefile b/Exec/DryRegTests/WPS_Test/GNUmakefile
index 83fd3d453..e346e04d3 100644
--- a/Exec/DryRegTests/WPS_Test/GNUmakefile
+++ b/Exec/DryRegTests/WPS_Test/GNUmakefile
@@ -25,7 +25,6 @@ TEST = TRUE
 USE_ASSERTION = TRUE
 
 USE_NETCDF  = TRUE
-#USE_HDF5    = TRUE
 
 # GNU Make
 Bpack := ./Make.package
diff --git a/Exec/LLJ/GNUmakefile b/Exec/LLJ/GNUmakefile
index 4a773e7b1..5c501f580 100644
--- a/Exec/LLJ/GNUmakefile
+++ b/Exec/LLJ/GNUmakefile
@@ -25,7 +25,6 @@ TEST = TRUE
 USE_ASSERTION = TRUE
 
 USE_NETCDF  = TRUE
-#USE_HDF5    = TRUE
 
 # GNU Make
 Bpack := ./Make.package
diff --git a/Exec/Make.ERF b/Exec/Make.ERF
index 779e68b93..d90ccc172 100644
--- a/Exec/Make.ERF
+++ b/Exec/Make.ERF
@@ -33,6 +33,9 @@ include $(ERF_INIT_DIR)/Make.package
 ERF_DATA_DIR = $(ERF_SOURCE_DIR)/DataStructs
 include $(ERF_DATA_DIR)/Make.package
 
+ERF_SOLVERS_DIR = $(ERF_SOURCE_DIR)/LinearSolvers
+include $(ERF_SOLVERS_DIR)/Make.package
+
 ERF_UTIL_DIR = $(ERF_SOURCE_DIR)/Utils
 include $(ERF_UTIL_DIR)/Make.package
 
@@ -57,6 +60,9 @@ INCLUDE_LOCATIONS += $(ERF_DATA_DIR)
 VPATH_LOCATIONS   += $(ERF_UTIL_DIR)
 INCLUDE_LOCATIONS += $(ERF_UTIL_DIR)
 
+VPATH_LOCATIONS   += $(ERF_SOLVERS_DIR)
+INCLUDE_LOCATIONS += $(ERF_SOLVERS_DIR)
+
 ifeq ($(USE_MULTIBLOCK),TRUE)
 VPATH_LOCATIONS   += $(ERF_MULTIBLOCK_DIR)
 INCLUDE_LOCATIONS += $(ERF_MULTIBLOCK_DIR)
@@ -119,10 +125,6 @@ ifeq ($(USE_FFT),TRUE)
 AMReXdirs                += FFT
 endif
 
-ifeq ($(USE_HDF5),TRUE)
-AMReXdirs             += Extern/HDF5
-endif
-
 AMReXpack             += $(foreach dir, $(AMReXdirs), $(AMREX_HOME)/Src/$(dir)/Make.package)
 
 include $(AMReXpack)
@@ -261,11 +263,6 @@ ifeq ($(USE_NETCDF), TRUE)
   endif
 endif
 
-ifeq ($(USE_HDF5), TRUE)
-  DEFINES += -DERF_USE_HDF5
-  DEFINES += -DAMREX_USE_HDF5
-endif
-
 ifeq ($(USE_TERRAIN_VELOCITY), TRUE)
   DEFINES += -DERF_USE_TERRAIN_VELOCITY
 endif
diff --git a/Source/ERF.H b/Source/ERF.H
index aa8c7a8fa..777625041 100644
--- a/Source/ERF.H
+++ b/Source/ERF.H
@@ -89,7 +89,7 @@ AMREX_ENUM(StateInterpType,
  * Enum of possible plotfile types
 */
 AMREX_ENUM(PlotFileType,
-    None, Amrex, Netcdf, HDF5
+    None, Amrex, Netcdf
 );
 
 
@@ -163,6 +163,10 @@ public:
     void WriteMyEBSurface ();
 #endif
 
+    // Compute the divergence of the momenta
+    void compute_divergence (int lev, amrex::MultiFab& rhs, amrex::Vector<amrex::MultiFab>& mom_mf,
+                             amrex::Geometry const& geom_at_lev);
+
     // Project the velocities to be divergence-free -- this is only relevant if anelastic == 1
     void project_velocities (int lev, amrex::Real dt, amrex::Vector<amrex::MultiFab >& vars,
                              amrex::MultiFab& p);
@@ -182,7 +186,7 @@ public:
 #endif
 
     void solve_with_gmres (int lev, amrex::Vector<amrex::MultiFab>& rhs, amrex::Vector<amrex::MultiFab>& p,
-                           amrex::Vector<amrex::Array<amrex::MultiFab,AMREX_SPACEDIM>>& fluxes);
+                           amrex::Array<amrex::MultiFab,AMREX_SPACEDIM>& fluxes);
     void solve_with_mlmg (int lev, amrex::Vector<amrex::MultiFab>& rhs, amrex::Vector<amrex::MultiFab>& p,
                           amrex::Vector<amrex::Array<amrex::MultiFab,AMREX_SPACEDIM>>& fluxes);
 
diff --git a/Source/IO/ERF_Plotfile.cpp b/Source/IO/ERF_Plotfile.cpp
index c53f2c89c..a06152928 100644
--- a/Source/IO/ERF_Plotfile.cpp
+++ b/Source/IO/ERF_Plotfile.cpp
@@ -1389,14 +1389,6 @@ ERF::WritePlotFile (int which, PlotFileType plotfile_type, Vector<std::string> p
 #ifdef ERF_USE_PARTICLES
             particleData.writePlotFile(plotfilename);
 #endif
-#ifdef ERF_USE_HDF5
-        } else if (plotfile_type == PlotFileType::HDF5) {
-            Print() << "Writing plotfile " << plotfilename+"d01.h5" << "\n";
-            WriteMultiLevelPlotfileHDF5(plotfilename, finest_level+1,
-                                        GetVecOfConstPtrs(mf),
-                                        varnames,
-                                        Geom(), t_new[0], istep, refRatio());
-#endif
 #ifdef ERF_USE_NETCDF
         } else if (plotfile_type == PlotFileType::Netcdf) {
              int lev   = 0;
diff --git a/Source/LinearSolvers/ERF_FFT_TerrainPrecond.H b/Source/LinearSolvers/ERF_FFT_TerrainPrecond.H
new file mode 100644
index 000000000..941fe3f29
--- /dev/null
+++ b/Source/LinearSolvers/ERF_FFT_TerrainPrecond.H
@@ -0,0 +1,262 @@
+#ifndef ERF_FFT_TERRAIN_PRECOND_H_
+#define ERF_FFT_TERRAIN_PRECOND_H_
+
+#include <AMReX_FFT.H>
+#include <AMReX_FFT_Poisson.H>
+#include <AMReX_Geometry.H>
+
+namespace amrex::FFT
+{
+
+/**
+ * \brief 3D Preconditioner for terrain problems with periodic boundaries in the first two
+ * dimensions and Neumann in the last dimension.
+ */
+template <typename MF = MultiFab>
+class PoissonTerrainPrecond
+{
+public:
+    using T = typename MF::value_type;
+
+    template <typename FA=MF, std::enable_if_t<IsFabArray_v<FA>,int> = 0>
+    explicit PoissonTerrainPrecond (Geometry const& geom)
+        : m_geom(geom), m_r2c(geom.Domain(), Info().setBatchMode(true))
+    {
+        AMREX_ALWAYS_ASSERT(geom.isPeriodic(0) && geom.isPeriodic(1));
+    }
+
+    void solve (MF& soln, MF const& rhs, MF const& height);
+
+    template <typename DZ>
+    void solve_doit (MF& soln, MF const& rhs, MF const& height, DZ const& dz); // has to be public for cuda
+
+private:
+    Geometry m_geom;
+    R2C<typename MF::value_type, Direction::both> m_r2c;
+};
+
+template <typename MF>
+void PoissonTerrainPrecond<MF>::solve (MF& soln, MF const& rhs, MF const& height)
+{
+    auto delz = T(m_geom.CellSize(AMREX_SPACEDIM-1));
+    solve_doit(soln, rhs, height, fft_poisson_detail::DZ<T>{delz});
+}
+
+template <typename MF>
+template <typename DZ>
+void PoissonTerrainPrecond<MF>::solve_doit (MF& soln, MF const& rhs, MF const& height, DZ const& dz)
+{
+    BL_PROFILE("FFT::PoissonTerrainPrecond::solve");
+
+#if (AMREX_SPACEDIM < 3)
+    amrex::ignore_unused(soln, rhs, dz);
+#else
+    auto facx = T(2)*Math::pi<T>()/T(m_geom.ProbLength(0));
+    auto facy = T(2)*Math::pi<T>()/T(m_geom.ProbLength(1));
+
+    auto dx =T(m_geom.CellSize(0));
+    auto dy = T(m_geom.CellSize(1));
+
+    auto dxinv = T(m_geom.InvCellSize(0));
+    auto dyinv = T(m_geom.InvCellSize(1));
+
+    auto scale = T(1.0)/(T(m_geom.Domain().length(0)) *
+                         T(m_geom.Domain().length(1)));
+    auto ny = m_geom.Domain().length(1);
+    auto nz = m_geom.Domain().length(2);
+
+    Box cdomain = m_geom.Domain();
+    cdomain.setBig(0,cdomain.length(0)/2);
+    auto cba = amrex::decompose(cdomain, ParallelContext::NProcsSub(),
+                                {AMREX_D_DECL(true,true,false)});
+    DistributionMapping dm = detail::make_iota_distromap(cba.size());
+    FabArray<BaseFab<GpuComplex<T> > > spmf(cba, dm, 1, 0);
+
+    m_r2c.forward(rhs, spmf);
+
+    for (MFIter mfi(spmf); mfi.isValid(); ++mfi)
+    {
+        auto const& spectral = spmf.array(mfi);
+        auto const& box = mfi.validbox();
+        auto const& xybox = amrex::makeSlab(box, 2, 0);
+
+        auto const zp = height.const_array(mfi);
+
+#ifdef AMREX_USE_GPU
+        // xxxxx TODO: We need to explore how to optimize this
+        // function. Maybe we can use cusparse. Maybe we should make
+        // z-direction to be the unit stride direction.
+
+        FArrayBox tridiag_workspace(box,4);
+        auto const& ald = tridiag_workspace.array(0);
+        auto const& bd = tridiag_workspace.array(1);
+        auto const& cud = tridiag_workspace.array(2);
+        auto const& scratch = tridiag_workspace.array(3);
+
+        amrex::ParallelFor(xybox, [=] AMREX_GPU_DEVICE (int i, int j, int)
+        {
+            T a = facx*i;
+            T b = (j < ny/2) ? facy*j : facy*(ny-j);
+
+            T k2 = T(2)*(std::cos(a*dx)-T(1))/(dx*dx)
+                +  T(2)*(std::cos(b*dy)-T(1))/(dy*dy);
+
+            // Tridiagonal solve with homogeneous Neumann
+            for(int k=0; k < nz; k++) {
+                Real hzeta_inv_on_cc = 4.0 / ( (zp(i,j,k+1) + zp(i+1,j,k+1) + zp(i,j+1,k+1) + zp(i+1,j+1,k+1))
+                                              -(zp(i,j,k  ) + zp(i+1,j,k  ) + zp(i,j+1,k  ) + zp(i+1,j+1,k  )) );
+                if(k==0) {
+
+                    Real hzeta_inv_on_zhi = 8.0 / ( (zp(i,j,k+2) + zp(i+1,j,k+2) + zp(i,j+1,k+2) + zp(i+1,j+1,k+2))
+                                                   -(zp(i,j,k  ) + zp(i+1,j,k  ) + zp(i,j+1,k  ) + zp(i+1,j+1,k  )) );
+                    Real h_xi_on_zhi  = 0.5 * (zp(i+1,j+1,k+1) + zp(i+1,j,k+1) - zp(i,j+1,k+1) - zp(i,j,k+1)) * dxinv;
+                    Real h_eta_on_zhi = 0.5 * (zp(i+1,j+1,k+1) + zp(i,j+1,k+1) - zp(i+1,j,k+1) - zp(i,j,k+1)) * dyinv;
+
+                    ald(i,j,k) = 0.;
+                    cud(i,j,k) = hzeta_inv_on_cc * (1.0 + h_xi_on_zhi*h_xi_on_zhi + h_eta_on_zhi*h_eta_on_zhi) * hzeta_inv_on_zhi;
+                    bd(i,j,k) = k2 - ald(i,j,k) - cud(i,j,k);
+
+                } else if (k == nz-1) {
+
+                    Real hzeta_inv_on_zlo = 8.0 / ( (zp(i,j,k+1) + zp(i+1,j,k+1) + zp(i,j+1,k+1) + zp(i+1,j+1,k+1))
+                                                   -(zp(i,j,k-1) + zp(i+1,j,k-1) + zp(i,j+1,k-1) + zp(i+1,j+1,k-1)) );
+                    Real h_xi_on_zlo  = 0.5 * (zp(i+1,j+1,k  ) + zp(i+1,j,k  ) - zp(i,j+1,k  ) - zp(i,j,k  )) * dxinv;
+                    Real h_eta_on_zlo = 0.5 * (zp(i+1,j+1,k  ) + zp(i,j+1,k  ) - zp(i+1,j,k  ) - zp(i,j,k  )) * dyinv;
+                    ald(i,j,k) = hzeta_inv_on_cc * (1.0 + h_xi_on_zlo*h_xi_on_zlo + h_eta_on_zlo*h_eta_on_zlo) * hzeta_inv_on_zlo;
+                    cud(i,j,k) = 0.;
+                    bd(i,j,k) = k2 - ald(i,j,k) - cud(i,j,k);
+                    if (i == 0 && j == 0) {
+                        bd(i,j,k) *= 2.0;
+                    }
+                } else {
+                    Real hzeta_inv_on_zlo = 8.0 / ( (zp(i,j,k+1) + zp(i+1,j,k+1) + zp(i,j+1,k+1) + zp(i+1,j+1,k+1))
+                                                   -(zp(i,j,k-1) + zp(i+1,j,k-1) + zp(i,j+1,k-1) + zp(i+1,j+1,k-1)) );
+                    Real h_xi_on_zlo  = 0.5 * (zp(i+1,j+1,k  ) + zp(i+1,j,k  ) - zp(i,j+1,k  ) - zp(i,j,k  )) * dxinv;
+                    Real h_eta_on_zlo = 0.5 * (zp(i+1,j+1,k  ) + zp(i,j+1,k  ) - zp(i+1,j,k  ) - zp(i,j,k  )) * dyinv;
+
+                    Real hzeta_inv_on_zhi = 8.0 / ( (zp(i,j,k+2) + zp(i+1,j,k+2) + zp(i,j+1,k+2) + zp(i+1,j+1,k+2))
+                                                   -(zp(i,j,k  ) + zp(i+1,j,k  ) + zp(i,j+1,k  ) + zp(i+1,j+1,k  )) );
+                    Real h_xi_on_zhi  = 0.5 * (zp(i+1,j+1,k+1) + zp(i+1,j,k+1) - zp(i,j+1,k+1) - zp(i,j,k+1)) * dxinv;
+                    Real h_eta_on_zhi = 0.5 * (zp(i+1,j+1,k+1) + zp(i,j+1,k+1) - zp(i+1,j,k+1) - zp(i,j,k+1)) * dyinv;
+
+                    ald(i,j,k) = hzeta_inv_on_cc * (1.0 + h_xi_on_zlo*h_xi_on_zlo + h_eta_on_zlo*h_eta_on_zlo) * hzeta_inv_on_zlo;
+                    cud(i,j,k) = hzeta_inv_on_cc * (1.0 + h_xi_on_zhi*h_xi_on_zhi + h_eta_on_zhi*h_eta_on_zhi) * hzeta_inv_on_zhi;
+                    bd(i,j,k) = k2 - ald(i,j,k) - cud(i,j,k);
+
+                }
+            }
+
+            scratch(i,j,0) = cud(i,j,0)/bd(i,j,0);
+            spectral(i,j,0) = spectral(i,j,0)/bd(i,j,0);
+
+            for (int k = 1; k < nz; k++) {
+                if (k < nz-1) {
+                    scratch(i,j,k) = cud(i,j,k) / (bd(i,j,k) - ald(i,j,k) * scratch(i,j,k-1));
+                }
+                spectral(i,j,k) = (spectral(i,j,k) - ald(i,j,k) * spectral(i,j,k - 1))
+                    / (bd(i,j,k) - ald(i,j,k) * scratch(i,j,k-1));
+            }
+
+            for (int k = nz - 2; k >= 0; k--) {
+                spectral(i,j,k) -= scratch(i,j,k) * spectral(i,j,k + 1);
+            }
+
+            for (int k = 0; k < nz; ++k) {
+                spectral(i,j,k) *= scale;
+            }
+        });
+        Gpu::streamSynchronize();
+
+#else
+
+        Gpu::DeviceVector<GpuComplex<Real>> ald(nz);
+        Gpu::DeviceVector<GpuComplex<Real>> bd(nz);
+        Gpu::DeviceVector<GpuComplex<Real>> cud(nz);
+        Gpu::DeviceVector<GpuComplex<Real>> scratch(nz);
+
+        amrex::LoopOnCpu(xybox, [&] (int i, int j, int)
+        {
+            T a = facx*i;
+            T b = (j < ny/2) ? facy*j : facy*(ny-j);
+
+            T k2 = T(2)*(std::cos(a*dx)-T(1))/(dx*dx)
+                +  T(2)*(std::cos(b*dy)-T(1))/(dy*dy);
+
+            // Tridiagonal solve with homogeneous Neumann
+            for(int k=0; k < nz; k++) {
+
+                Real hzeta_inv_on_cc = 4.0 / ( (zp(i,j,k+1) + zp(i+1,j,k+1) + zp(i,j+1,k+1) + zp(i+1,j+1,k+1))
+                                              -(zp(i,j,k  ) + zp(i+1,j,k  ) + zp(i,j+1,k  ) + zp(i+1,j+1,k  )) );
+
+                if(k==0) {
+
+                    Real hzeta_inv_on_zhi = 8.0 / ( (zp(i,j,k+2) + zp(i+1,j,k+2) + zp(i,j+1,k+2) + zp(i+1,j+1,k+2))
+                                                   -(zp(i,j,k  ) + zp(i+1,j,k  ) + zp(i,j+1,k  ) + zp(i+1,j+1,k  )) );
+                    Real h_xi_on_zhi  = 0.5 * (zp(i+1,j+1,k+1) + zp(i+1,j,k+1) - zp(i,j+1,k+1) - zp(i,j,k+1)) * dxinv;
+                    Real h_eta_on_zhi = 0.5 * (zp(i+1,j+1,k+1) + zp(i,j+1,k+1) - zp(i+1,j,k+1) - zp(i,j,k+1)) * dyinv;
+
+                    ald[k] = 0.;
+                    cud[k] = hzeta_inv_on_cc * (1.0 + h_xi_on_zhi*h_xi_on_zhi + h_eta_on_zhi*h_eta_on_zhi) * hzeta_inv_on_zhi;
+                    bd[k]  = k2 -ald[k]-cud[k];
+
+                } else if (k == nz-1) {
+
+                    Real hzeta_inv_on_zlo = 8.0 / ( (zp(i,j,k+1) + zp(i+1,j,k+1) + zp(i,j+1,k+1) + zp(i+1,j+1,k+1))
+                                                   -(zp(i,j,k-1) + zp(i+1,j,k-1) + zp(i,j+1,k-1) + zp(i+1,j+1,k-1)) );
+                    Real h_xi_on_zlo  = 0.5 * (zp(i+1,j+1,k  ) + zp(i+1,j,k  ) - zp(i,j+1,k  ) - zp(i,j,k  )) * dxinv;
+                    Real h_eta_on_zlo = 0.5 * (zp(i+1,j+1,k  ) + zp(i,j+1,k  ) - zp(i+1,j,k  ) - zp(i,j,k  )) * dyinv;
+
+                    ald[k] = hzeta_inv_on_cc * (1.0 + h_xi_on_zlo*h_xi_on_zlo + h_eta_on_zlo*h_eta_on_zlo) * hzeta_inv_on_zlo;
+                    cud[k] = 0.;
+                    bd[k]  = k2 -ald[k]-cud[k];
+
+                    if (i == 0 && j == 0) {
+                        bd[k] *= 2.0;
+                    }
+                } else {
+
+                    Real hzeta_inv_on_zlo = 8.0 / ( (zp(i,j,k+1) + zp(i+1,j,k+1) + zp(i,j+1,k+1) + zp(i+1,j+1,k+1))
+                                                   -(zp(i,j,k-1) + zp(i+1,j,k-1) + zp(i,j+1,k-1) + zp(i+1,j+1,k-1)) );
+                    Real h_xi_on_zlo  = 0.5 * (zp(i+1,j+1,k  ) + zp(i+1,j,k  ) - zp(i,j+1,k  ) - zp(i,j,k  )) * dxinv;
+                    Real h_eta_on_zlo = 0.5 * (zp(i+1,j+1,k  ) + zp(i,j+1,k  ) - zp(i+1,j,k  ) - zp(i,j,k  )) * dyinv;
+
+                    Real hzeta_inv_on_zhi = 8.0 / ( (zp(i,j,k+2) + zp(i+1,j,k+2) + zp(i,j+1,k+2) + zp(i+1,j+1,k+2))
+                                                   -(zp(i,j,k  ) + zp(i+1,j,k  ) + zp(i,j+1,k  ) + zp(i+1,j+1,k  )) );
+                    Real h_xi_on_zhi  = 0.5 * (zp(i+1,j+1,k+1) + zp(i+1,j,k+1) - zp(i,j+1,k+1) - zp(i,j,k+1)) * dxinv;
+                    Real h_eta_on_zhi = 0.5 * (zp(i+1,j+1,k+1) + zp(i,j+1,k+1) - zp(i+1,j,k+1) - zp(i,j,k+1)) * dyinv;
+
+                    ald[k] = hzeta_inv_on_cc * (1.0 + h_xi_on_zlo*h_xi_on_zlo + h_eta_on_zlo*h_eta_on_zlo) * hzeta_inv_on_zlo;
+                    cud[k] = hzeta_inv_on_cc * (1.0 + h_xi_on_zhi*h_xi_on_zhi + h_eta_on_zhi*h_eta_on_zhi) * hzeta_inv_on_zhi;
+                    bd[k]  = k2 - ald[k] - cud[k];
+                }
+            }
+
+            scratch[0] = cud[0]/bd[0];
+            spectral(i,j,0) = spectral(i,j,0)/bd[0];
+
+            for (int k = 1; k < nz; k++) {
+                if (k < nz-1) {
+                    scratch[k] = cud[k] / (bd[k] - ald[k] * scratch[k-1]);
+                }
+                spectral(i,j,k) = (spectral(i,j,k) - ald[k] * spectral(i,j,k - 1))
+                    / (bd[k] - ald[k] * scratch[k-1]);
+            }
+
+            for (int k = nz - 2; k >= 0; k--) {
+                spectral(i,j,k) -= scratch[k] * spectral(i,j,k + 1);
+            }
+
+            for (int k = 0; k < nz; ++k) {
+                spectral(i,j,k) *= scale;
+            }
+        });
+#endif
+    }
+
+    m_r2c.backward(spmf, soln);
+#endif
+}
+
+}
+
+#endif
diff --git a/Source/Utils/ERF_PoissonSolve.cpp b/Source/LinearSolvers/ERF_PoissonSolve.cpp
similarity index 61%
rename from Source/Utils/ERF_PoissonSolve.cpp
rename to Source/LinearSolvers/ERF_PoissonSolve.cpp
index 2a98ee4a7..2218380c4 100644
--- a/Source/Utils/ERF_PoissonSolve.cpp
+++ b/Source/LinearSolvers/ERF_PoissonSolve.cpp
@@ -11,14 +11,13 @@ void ERF::project_velocities (int lev, Real l_dt, Vector<MultiFab>& mom_mf, Mult
 {
     BL_PROFILE("ERF::project_velocities()");
 
-    bool l_use_terrain = SolverChoice::terrain_type != TerrainType::None;
-    bool use_gmres     = (l_use_terrain && !SolverChoice::terrain_is_flat);
-
 #ifndef ERF_USE_EB
     auto const dom_lo = lbound(geom[lev].Domain());
     auto const dom_hi = ubound(geom[lev].Domain());
 #endif
 
+    bool l_use_terrain = SolverChoice::terrain_type != TerrainType::None;
+
     // Make sure the solver only sees the levels over which we are solving
     Vector<BoxArray>            ba_tmp;   ba_tmp.push_back(mom_mf[Vars::cons].boxArray());
     Vector<DistributionMapping> dm_tmp;   dm_tmp.push_back(mom_mf[Vars::cons].DistributionMap());
@@ -28,7 +27,6 @@ void ERF::project_velocities (int lev, Real l_dt, Vector<MultiFab>& mom_mf, Mult
 
     Vector<MultiFab> rhs;
     Vector<MultiFab> phi;
-    Vector<Array<MultiFab,AMREX_SPACEDIM> > fluxes;
 
 #ifdef ERF_USE_EB
     rhs.resize(1); rhs[0].define(ba_tmp[0], dm_tmp[0], 1, 0, MFInfo(), Factory(lev));
@@ -38,34 +36,14 @@ void ERF::project_velocities (int lev, Real l_dt, Vector<MultiFab>& mom_mf, Mult
     phi.resize(1); phi[0].define(ba_tmp[0], dm_tmp[0], 1, 1);
 #endif
 
-    fluxes.resize(1);
-    for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
-#ifdef ERF_USE_EB
-        fluxes[0][idim].define(convert(ba_tmp[0], IntVect::TheDimensionVector(idim)), dm_tmp[0], 1, 0, MFInfo(), Factory(lev));
-#else
-        fluxes[0][idim].define(convert(ba_tmp[0], IntVect::TheDimensionVector(idim)), dm_tmp[0], 1, 0);
-#endif
-    }
-
     auto dxInv = geom[lev].InvCellSizeArray();
 
-    Array<MultiFab const*, AMREX_SPACEDIM> rho0_u_const;
-    rho0_u_const[0] = &mom_mf[IntVars::xmom];
-    rho0_u_const[1] = &mom_mf[IntVars::ymom];
-    rho0_u_const[2] = &mom_mf[IntVars::zmom];
-
-    Real abstol = solverChoice.poisson_abstol;
-
+    //
     // ****************************************************************************
-    // Compute divergence which will form RHS
-    // Note that we replace "rho0w" with the contravariant momentum, Omega
+    // Now convert the rho0w MultiFab to hold Omega rather than rhow
     // ****************************************************************************
-#ifdef ERF_USE_EB
-    bool already_on_centroids = true;
-    EB_computeDivergence(rhs[0], rho0_u_const, geom_tmp[0], already_on_centroids);
-#else
-    if (l_use_terrain)
-    {
+    //
+    if (l_use_terrain && !SolverChoice::terrain_is_flat) {
         for ( MFIter mfi(rhs[0],TilingIfNotGPU()); mfi.isValid(); ++mfi)
         {
             const Array4<Real const>& rho0u_arr = mom_mf[IntVars::xmom].const_array(mfi);
@@ -86,77 +64,125 @@ void ERF::project_velocities (int lev, Real l_dt, Vector<MultiFab>& mom_mf, Mult
                 }
             });
         } // mfi
-        //
-        // Note we compute the divergence after we convert rho0W --> Omega
-        //
-        for ( MFIter mfi(rhs[0],TilingIfNotGPU()); mfi.isValid(); ++mfi)
-        {
-            Box bx = mfi.tilebox();
-            const Array4<Real const>& rho0u_arr = mom_mf[IntVars::xmom].const_array(mfi);
-            const Array4<Real const>& rho0v_arr = mom_mf[IntVars::ymom].const_array(mfi);
-            const Array4<Real const>& rho0w_arr = mom_mf[IntVars::zmom].const_array(mfi);
-            const Array4<Real      >&  rhs_arr = rhs[0].array(mfi);
-
-            Real* stretched_dz_d_ptr = stretched_dz_d[lev].data();
-            ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
-                Real dz = stretched_dz_d_ptr[k];
-                rhs_arr(i,j,k) =  (rho0u_arr(i+1,j,k) - rho0u_arr(i,j,k)) * dxInv[0]
-                                 +(rho0v_arr(i,j+1,k) - rho0v_arr(i,j,k)) * dxInv[1]
-                                 +(rho0w_arr(i,j,k+1) - rho0w_arr(i,j,k)) / dz;
-            });
-        } // mfi
-
-    } else {
-
-        computeDivergence(rhs[0], rho0_u_const, geom_tmp[0]);
-
     }
-#endif
+
+    // ****************************************************************************
+    // Compute divergence which will form RHS
+    // Note that we replace "rho0w" with the contravariant momentum, Omega
+    // ****************************************************************************
+    compute_divergence(lev, rhs[0], mom_mf, geom_tmp[0]);
 
     Real rhsnorm = rhs[0].norm0();
 
     if (mg_verbose > 0) {
-        Print() << "Max norm of divergence before at level " << lev << " : " << rhsnorm << std::endl;
+        Print() << "Max norm of divergence before solve at level " << lev << " : " << rhsnorm << std::endl;
     }
 
+    // ****************************************************************************
+    //
+    // No need to build the solver if RHS == 0
+    //
+    if (rhsnorm <= solverChoice.poisson_abstol) return;
+    // ****************************************************************************
+
+    // ****************************************************************************
     // Initialize phi to 0
     // (It is essential that we do this in order to fill the corners; these are never
     //  used but the Saxpy requires the values to be initialized.)
+    // ****************************************************************************
     phi[0].setVal(0.0);
 
-    if (rhsnorm <= abstol) return;
-
     Real start_step = static_cast<Real>(ParallelDescriptor::second());
 
+    // ****************************************************************************
+    // Allocate fluxes
+    // ****************************************************************************
+    Vector<Array<MultiFab,AMREX_SPACEDIM> > fluxes;
+    fluxes.resize(1);
+    for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
+#ifdef ERF_USE_EB
+        fluxes[0][idim].define(convert(ba_tmp[0], IntVect::TheDimensionVector(idim)), dm_tmp[0], 1, 0, MFInfo(), Factory(lev));
+#else
+        fluxes[0][idim].define(convert(ba_tmp[0], IntVect::TheDimensionVector(idim)), dm_tmp[0], 1, 0);
+#endif
+    }
+
     // ****************************************************************************
     // Choose the solver and solve
     // ****************************************************************************
+
+    // ****************************************************************************
+    // EB
+    // ****************************************************************************
 #ifdef ERF_USE_EB
     solve_with_EB_mlmg(lev, rhs, phi, fluxes);
 #else
-#ifdef ERF_USE_FFT
-    if (use_fft) {
-
-        solve_with_fft(lev, rhs[0], phi[0], fluxes[0]);
 
-    } else
+#ifdef ERF_USE_FFT
+        bool boxes_make_rectangle = (geom_tmp[0].Domain().numPts() == ba_tmp[0].numPts());
 #endif
-    if (use_gmres)
-    {
-        solve_with_gmres(lev, rhs, phi, fluxes);
-
-    } else {
 
+    // ****************************************************************************
+    // No terrain or grid stretching
+    // ****************************************************************************
+    if (!l_use_terrain) {
+#ifdef ERF_USE_FFT
+        if (use_fft) {
+            if (boxes_make_rectangle) {
+                solve_with_fft(lev, rhs[0], phi[0], fluxes[0]);
+            } else {
+                amrex::Warning("FFT won't work unless the boxArray covers the domain: defaulting to MLMG");
+                solve_with_mlmg(lev, rhs, phi, fluxes);
+            }
+        } else {
+            solve_with_mlmg(lev, rhs, phi, fluxes);
+        }
+#else
+        if (use_fft) {
+            amrex::Warning("use_fft can't be used unless you build with USE_FFT = TRUE; defaulting to MLMG");
+        }
         solve_with_mlmg(lev, rhs, phi, fluxes);
-    }
 #endif
+    } // No terrain or grid stretching
 
     // ****************************************************************************
-    // Subtract dt grad(phi) from the momenta (rho0u, rho0v, Omega)
+    // Grid stretching (flat terrain)
     // ****************************************************************************
-    MultiFab::Add(mom_mf[IntVars::xmom],fluxes[0][0],0,0,1,0);
-    MultiFab::Add(mom_mf[IntVars::ymom],fluxes[0][1],0,0,1,0);
-    MultiFab::Add(mom_mf[IntVars::zmom],fluxes[0][2],0,0,1,0);
+    else if (l_use_terrain && SolverChoice::terrain_is_flat) {
+#ifndef ERF_USE_FFT
+        amrex::Abort("Rebuild with USE_FFT = TRUE so you can use the FFT solver");
+#else
+        if (!boxes_make_rectangle) {
+            amrex::Abort("FFT won't work unless the boxArray covers the domain");
+        } else {
+            if (!use_fft) {
+                amrex::Warning("Using FFT even though you didn't set use_fft = 0; it's the best choice");
+            }
+            solve_with_fft(lev, rhs[0], phi[0], fluxes[0]);
+        }
+#endif
+    } // grid stretching
+
+    // ****************************************************************************
+    // General terrain
+    // ****************************************************************************
+    else if (l_use_terrain && !SolverChoice::terrain_is_flat) {
+#ifdef ERF_USE_FFT
+        if (use_fft)
+        {
+            amrex::Warning("FFT solver does not work for general terrain: switching to GMRES");
+        }
+        if (!boxes_make_rectangle) {
+            amrex::Abort("FFT preconditioner for GMRES won't work unless the boxArray covers the domain");
+        } else {
+            solve_with_gmres(lev, rhs, phi, fluxes[0]);
+        }
+#else
+        amrex::Abort("Rebuild with USE_FFT = TRUE so you can use the FFT preconditioner for GMRES");
+#endif
+    } // general terrain
+
+#endif // not EB
 
     // ****************************************************************************
     // Print time in solve
@@ -166,52 +192,36 @@ void ERF::project_velocities (int lev, Real l_dt, Vector<MultiFab>& mom_mf, Mult
         amrex::Print() << "Time in solve " << end_step - start_step << std::endl;
     }
 
+    // ****************************************************************************
+    // Subtract dt grad(phi) from the momenta (rho0u, rho0v, Omega)
+    // ****************************************************************************
+    MultiFab::Add(mom_mf[IntVars::xmom],fluxes[0][0],0,0,1,0);
+    MultiFab::Add(mom_mf[IntVars::ymom],fluxes[0][1],0,0,1,0);
+    MultiFab::Add(mom_mf[IntVars::zmom],fluxes[0][2],0,0,1,0);
+
     //
     // This call is only to verify the divergence after the solve
     // It is important we do this before computing the rho0w_arr from Omega back to rho0w
     //
-    //
     // ****************************************************************************
     // THIS IS SIMPLY VERIFYING THE DIVERGENCE AFTER THE SOLVE
     // ****************************************************************************
     //
     if (mg_verbose > 0)
     {
-        if (l_use_terrain)
-        {
-            //
-            // Note we compute the divergence before we convert Omega back to rho0W
-            //
-            for ( MFIter mfi(rhs[0],TilingIfNotGPU()); mfi.isValid(); ++mfi)
-            {
-                Box bx = mfi.tilebox();
-                const Array4<Real const>& rho0u_arr = mom_mf[IntVars::xmom].const_array(mfi);
-                const Array4<Real const>& rho0v_arr = mom_mf[IntVars::ymom].const_array(mfi);
-                const Array4<Real const>& rho0w_arr = mom_mf[IntVars::zmom].const_array(mfi);
-                const Array4<Real      >&  rhs_arr = rhs[0].array(mfi);
-
-                Real* stretched_dz_d_ptr = stretched_dz_d[lev].data();
-                ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
-                    Real dz = stretched_dz_d_ptr[k];
-                    rhs_arr(i,j,k) =  (rho0u_arr(i+1,j,k) - rho0u_arr(i,j,k)) * dxInv[0]
-                                     +(rho0v_arr(i,j+1,k) - rho0v_arr(i,j,k)) * dxInv[1]
-                                     +(rho0w_arr(i,j,k+1) - rho0w_arr(i,j,k)) / dz;
-                });
-            } // mfi
+        compute_divergence(lev, rhs[0], mom_mf, geom_tmp[0]);
 
-        } else {
-            computeDivergence(rhs[0], rho0_u_const, geom_tmp[0]);
-        }
+        amrex::Print() << "Max norm of divergence after  solve at level " << lev << " : " << rhs[0].norm0() << std::endl;
 
-        amrex::Print() << "Max norm of divergence after solve at level " << lev << " : " << rhs[0].norm0() << std::endl;
     } // mg_verbose
 
+
     //
     // ****************************************************************************
     // Now convert the rho0w MultiFab back to holding (rho0w) rather than Omega
     // ****************************************************************************
     //
-    if (l_use_terrain)
+    if (l_use_terrain && !solverChoice.terrain_is_flat)
     {
         for (MFIter mfi(mom_mf[Vars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi)
         {
diff --git a/Source/Utils/ERF_PoissonSolve_tb.cpp b/Source/LinearSolvers/ERF_PoissonSolve_tb.cpp
similarity index 100%
rename from Source/Utils/ERF_PoissonSolve_tb.cpp
rename to Source/LinearSolvers/ERF_PoissonSolve_tb.cpp
diff --git a/Source/LinearSolvers/ERF_TerrainPoisson.H b/Source/LinearSolvers/ERF_TerrainPoisson.H
new file mode 100644
index 000000000..40fdc95a1
--- /dev/null
+++ b/Source/LinearSolvers/ERF_TerrainPoisson.H
@@ -0,0 +1,60 @@
+#ifndef ERF_TERRAIN_POISSON_H_
+#define ERF_TERRAIN_POISSON_H_
+
+#include "ERF_TerrainPoisson_3D_K.H"
+#ifdef ERF_USE_FFT
+#include "ERF_FFT_TerrainPrecond.H"
+#endif
+
+#include <AMReX_Geometry.H>
+#include <AMReX_MultiFab.H>
+
+class TerrainPoisson
+{
+public:
+
+    using RT = amrex::Real;
+
+    TerrainPoisson (amrex::Geometry const& geom, amrex::BoxArray const& ba,
+                    amrex::DistributionMapping const& dm,
+                    amrex::MultiFab const* z_phys_nd);
+
+    void apply(amrex::MultiFab& lhs, amrex::MultiFab const& rhs);
+
+    void usePrecond(bool precond_flag);
+
+    void getFluxes(amrex::MultiFab& phi, amrex::Array<amrex::MultiFab,AMREX_SPACEDIM>& fluxes);
+
+    void assign(amrex::MultiFab& lhs, amrex::MultiFab const& rhs);
+
+    void scale(amrex::MultiFab& lhs, amrex::Real fac);
+
+    amrex::Real dotProduct(amrex::MultiFab const& v1, amrex::MultiFab const& v2);
+
+    void increment(amrex::MultiFab& lhs, amrex::MultiFab const& rhs, amrex::Real a);
+
+    void linComb(amrex::MultiFab& lhs, amrex::Real a, amrex::MultiFab const& rhs_a,
+                 amrex::Real b, amrex::MultiFab const& rhs_b);
+
+    amrex::MultiFab makeVecRHS();
+
+    amrex::MultiFab makeVecLHS();
+
+    amrex::Real norm2(amrex::MultiFab const& v);
+
+    void precond(amrex::MultiFab& lhs, amrex::MultiFab const& rhs);
+
+    void setToZero(amrex::MultiFab& v);
+
+private:
+    bool m_use_precond = false;
+    amrex::Geometry m_geom;
+    amrex::BoxArray m_grids;
+    amrex::DistributionMapping m_dmap;
+    const amrex::MultiFab* m_zphys;
+#ifdef ERF_USE_FFT
+    std::unique_ptr<amrex::FFT::PoissonTerrainPrecond<amrex::MultiFab>> m_2D_fft_precond;
+#endif
+};
+
+#endif
diff --git a/Source/LinearSolvers/ERF_TerrainPoisson.cpp b/Source/LinearSolvers/ERF_TerrainPoisson.cpp
new file mode 100644
index 000000000..7e4b354fe
--- /dev/null
+++ b/Source/LinearSolvers/ERF_TerrainPoisson.cpp
@@ -0,0 +1,204 @@
+#include "ERF_TerrainPoisson.H"
+
+using namespace amrex;
+
+TerrainPoisson::TerrainPoisson (Geometry const& geom, BoxArray const& ba,
+                                DistributionMapping const& dm,
+                                MultiFab const* z_phys_nd)
+    : m_geom(geom),
+      m_grids(ba),
+      m_dmap(dm),
+      m_zphys(z_phys_nd)
+{
+#ifdef ERF_USE_FFT
+    if (!m_2D_fft_precond) {
+        m_2D_fft_precond = std::make_unique<FFT::PoissonTerrainPrecond<MultiFab>>(geom);
+    }
+#endif
+}
+
+void TerrainPoisson::usePrecond(bool use_precond_in)
+{
+    m_use_precond = use_precond_in;
+}
+
+void TerrainPoisson::apply(MultiFab& lhs, MultiFab const& rhs)
+{
+    AMREX_ASSERT(rhs.nGrowVect().allGT(0));
+
+    auto domlo = lbound(m_geom.Domain());
+    auto domhi = ubound(m_geom.Domain());
+
+    MultiFab& xx = const_cast<MultiFab&>(rhs);
+
+    auto const& dxinv = m_geom.InvCellSizeArray();
+
+    auto const& y = lhs.arrays();
+    auto const& zpa = m_zphys->const_arrays();
+
+    // Impose periodic and internal boundary conditions
+    xx.FillBoundary(m_geom.periodicity());
+
+    if (!m_geom.isPeriodic(0)) {
+        for (MFIter mfi(xx,true); mfi.isValid(); ++mfi)
+        {
+            Box bx = mfi.tilebox();
+            const Array4<Real>& x_arr = xx.array(mfi);
+            ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k)
+            {
+                if (i == domlo.x) {
+                    x_arr(i-1,j,k) = x_arr(i,j,k);
+                } else if (i == domhi.x) { // OUTFLOW
+                    x_arr(i+1,j,k) = -x_arr(i,j,k);
+                }
+            });
+        }
+    }
+    if (!m_geom.isPeriodic(1)) {
+        for (MFIter mfi(xx,true); mfi.isValid(); ++mfi)
+        {
+            Box bx = mfi.tilebox();
+            Box bx2(bx); bx2.grow(0,1);
+            const Array4<Real>& x_arr = xx.array(mfi);
+            ParallelFor(bx2, [=] AMREX_GPU_DEVICE (int i, int j, int k)
+            {
+                if (j == domlo.y) {
+                    x_arr(i,j-1,k) = x_arr(i,j,k);
+                } else if (j == domhi.y) {
+                    x_arr(i,j+1,k) = x_arr(i,j,k);
+                }
+            });
+        } // mfi
+    }
+
+    for (MFIter mfi(xx,true); mfi.isValid(); ++mfi)
+    {
+        Box bx = mfi.tilebox();
+        Box bx2(bx); bx2.grow(0,1);
+        Box bx3(bx2); bx3.grow(1,1);
+        const Array4<Real>& x_arr = xx.array(mfi);
+        ParallelFor(bx3, [=] AMREX_GPU_DEVICE (int i, int j, int k)
+        {
+            if (k == domlo.z) {
+                x_arr(i,j,k-1) = x_arr(i,j,k);
+            } else if (k == domhi.z) {
+                x_arr(i,j,k+1) = x_arr(i,j,k);
+            }
+        });
+    } // mfi
+
+    auto const& xc = xx.const_arrays();
+    ParallelFor(rhs, [=] AMREX_GPU_DEVICE (int b, int i, int j, int k)
+    {
+        terrpoisson_adotx(i,j,k,y[b], xc[b], zpa[b], dxinv[0], dxinv[1]);
+    });
+}
+
+void TerrainPoisson::getFluxes(MultiFab& phi,
+                               Array<MultiFab,AMREX_SPACEDIM>& fluxes)
+{
+    auto const& dxinv = m_geom.InvCellSizeArray();
+
+    auto domlo = lbound(m_geom.Domain());
+    auto domhi = ubound(m_geom.Domain());
+
+    auto const& x   = phi.const_arrays();
+    auto const& zpa = m_zphys->const_arrays();
+
+    phi.FillBoundary(m_geom.periodicity());
+
+    auto const& fx = fluxes[0].arrays();
+    ParallelFor(fluxes[0], [=] AMREX_GPU_DEVICE (int b, int i, int j, int k)
+    {
+        if (i == domlo.x) {
+            fx[b](i,j,k) = 0.0;
+        } else if (i == domhi.x+1) {
+            fx[b](i,j,k) = 0.0;
+        } else {
+            fx[b](i,j,k) = terrpoisson_flux_x(i,j,k,x[b],zpa[b],dxinv[0]);
+        }
+    });
+
+    auto const& fy = fluxes[1].arrays();
+    ParallelFor(fluxes[1], [=] AMREX_GPU_DEVICE (int b, int i, int j, int k)
+    {
+        if (j == domlo.y) {
+            fy[b](i,j,k) = 0.0;
+        } else if (j == domhi.y+1) {
+            fy[b](i,j,k) = 0.0;
+        } else {
+            fy[b](i,j,k) = terrpoisson_flux_y(i,j,k,x[b],zpa[b],dxinv[1]);
+        }
+    });
+
+    auto const& fz = fluxes[2].arrays();
+    ParallelFor(fluxes[2], [=] AMREX_GPU_DEVICE (int b, int i, int j, int k)
+    {
+        if (k == domlo.z) {
+            fz[b](i,j,k) = 0.0;
+        } else if (k == domhi.z+1) {
+            fz[b](i,j,k) = 0.0;
+        } else {
+            fz[b](i,j,k) = terrpoisson_flux_z(i,j,k,x[b],zpa[b],dxinv[0],dxinv[1]);
+        }
+    });
+}
+
+void TerrainPoisson::assign(MultiFab& lhs, MultiFab const& rhs)
+{
+    MultiFab::Copy(lhs, rhs, 0, 0, 1, 0);
+}
+
+void TerrainPoisson::scale(MultiFab& lhs, Real fac)
+{
+    lhs.mult(fac);
+}
+
+Real TerrainPoisson::dotProduct(MultiFab const& v1, MultiFab const& v2)
+{
+    return MultiFab::Dot(v1, 0, v2, 0, 1, 0);
+}
+
+void TerrainPoisson::increment(MultiFab& lhs, MultiFab const& rhs, Real a)
+{
+    MultiFab::Saxpy(lhs, a, rhs, 0, 0, 1, 0);
+}
+
+void TerrainPoisson::linComb(MultiFab& lhs, Real a, MultiFab const& rhs_a,
+                             Real b, MultiFab const& rhs_b)
+{
+    MultiFab::LinComb(lhs, a, rhs_a, 0, b, rhs_b, 0, 0, 1, 0);
+}
+
+
+MultiFab TerrainPoisson::makeVecRHS()
+{
+    return MultiFab(m_grids, m_dmap, 1, 0);
+}
+
+MultiFab TerrainPoisson::makeVecLHS()
+{
+    return MultiFab(m_grids, m_dmap, 1, 1);
+}
+
+Real TerrainPoisson::norm2(MultiFab const& v)
+{
+    return v.norm2();
+}
+
+void TerrainPoisson::precond(MultiFab& lhs, MultiFab const& rhs)
+{
+#ifdef ERF_USE_FFT
+    if (m_use_precond) {
+        m_2D_fft_precond->solve(lhs, rhs, *m_zphys);
+    } else
+#endif
+    {
+        MultiFab::Copy(lhs, rhs, 0, 0, 1, 0);
+    }
+}
+
+void TerrainPoisson::setToZero(MultiFab& v)
+{
+    v.setVal(0);
+}
diff --git a/Source/LinearSolvers/ERF_TerrainPoisson_3D_K.H b/Source/LinearSolvers/ERF_TerrainPoisson_3D_K.H
new file mode 100644
index 000000000..707860d02
--- /dev/null
+++ b/Source/LinearSolvers/ERF_TerrainPoisson_3D_K.H
@@ -0,0 +1,366 @@
+#ifndef ERF_TERRAINPOISSON_3D_K_H_
+#define ERF_TERRAINPOISSON_3D_K_H_
+
+#include <AMReX_FArrayBox.H>
+
+template <typename T>
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+T terrpoisson_flux_x (int i, int j, int k,
+                      amrex::Array4<T const> const& sol,
+                      amrex::Array4<T const> const& zp,
+                      T dxinv) noexcept
+{
+    using amrex::Real;
+
+    Real h_xi, h_zeta;
+
+    // On x-face
+    Real px_lo = (sol(i,j,k) - sol(i-1,j,k)) * dxinv;
+
+    // On y-edges
+    Real pz_lo_md_hi  = Real(0.5) * ( sol(i,j,k+1) + sol(i-1,j,k+1)
+                                     -sol(i,j,k  ) - sol(i-1,j,k  ) );
+         h_xi         = Real(0.25) * ( zp(i,j  ,k+1) - zp(i-2,j  ,k+1)
+                                      +zp(i,j+1,k+1) - zp(i-2,j+1,k+1) ) * dxinv;
+         h_zeta       = Real(0.25) * ( zp(i,j  ,k+2) - zp(i  ,j  ,k  )
+                                      +zp(i,j+1,k+2) - zp(i  ,j+1,k  ) );
+         pz_lo_md_hi *= h_xi / h_zeta;
+
+    // On y-edges
+    Real pz_lo_md_lo  = Real(0.5) * ( sol(i,j,k  ) + sol(i-1,j,k  )
+                                     -sol(i,j,k-1) - sol(i-1,j,k-1) );
+
+         h_xi         = Real(0.25) * ( zp(i,j  ,k  ) - zp(i-2,j  ,k  )
+                                      +zp(i,j+1,k  ) - zp(i-2,j+1,k  ) ) * dxinv;
+         h_zeta       = Real(0.25) * ( zp(i,j  ,k+1) - zp(i  ,j  ,k-1)
+                                      +zp(i,j+1,k+1) - zp(i  ,j+1,k-1) );
+         pz_lo_md_lo *= h_xi / h_zeta;
+
+    // On x-face
+    px_lo -= Real(0.5) * ( pz_lo_md_hi + pz_lo_md_lo );
+
+    return -px_lo;
+}
+template <typename T>
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+T terrpoisson_flux_y (int i, int j, int k,
+                      amrex::Array4<T const> const& sol,
+                      amrex::Array4<T const> const& zp,
+                      T dyinv) noexcept
+{
+    using amrex::Real;
+
+    Real h_eta, h_zeta;
+
+    Real py_lo = (sol(i,j,k) - sol(i,j-1,k)) * dyinv;
+
+    // On x-edges
+    Real pz_md_lo_hi  = Real(0.5) * ( sol(i,j,k+1) + sol(i,j-1,k+1)
+                                     -sol(i,j,k  ) - sol(i,j-1,k  ) );
+         h_eta        = Real(0.25) * ( zp(i  ,j,k+1) - zp(i  ,j-2,k+1)
+                                      +zp(i+1,j,k+1) - zp(i+1,j-2,k+1) ) * dyinv;
+         h_zeta       = Real(0.25) * ( zp(i  ,j,k+2) - zp(i  ,j  ,k  )
+                                      +zp(i+1,j,k+2) - zp(i+1,j  ,k  ) );
+         pz_md_lo_hi *= h_eta/ h_zeta;
+
+    // On x-edges
+    Real pz_md_lo_lo  = Real(0.5) * ( sol(i,j,k  ) + sol(i,j-1,k  )
+                                     -sol(i,j,k-1) - sol(i,j-1,k-1) );
+
+         h_eta        = Real(0.25) * ( zp(i  ,j,k  ) - zp(i  ,j-2,k  )
+                                      +zp(i+1,j,k  ) - zp(i+1,j-2,k  ) ) * dyinv;
+         h_zeta       = Real(0.25) * ( zp(i  ,j,k+1) - zp(i  ,j  ,k-1)
+                                      +zp(i+1,j,k+1) - zp(i+1,j  ,k-1) );
+         pz_md_lo_lo *= h_eta/ h_zeta;
+
+    // On y-face
+    py_lo -= Real(0.5) * ( pz_md_lo_hi + pz_md_lo_lo );
+
+    return -py_lo;
+}
+
+template <typename T>
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+T terrpoisson_flux_z (int i, int j, int k,
+                      amrex::Array4<T const> const& sol,
+                      amrex::Array4<T const> const& zp,
+                      T dxinv, T dyinv) noexcept
+{
+    using amrex::Real;
+
+    Real h_xi, h_eta, h_zeta;
+
+    // On z-face
+    Real pz_lo = (sol(i,j,k  ) - sol(i,j,k-1));
+    Real hzeta_inv_on_zlo = 8.0 / ( (zp(i,j,k+1) + zp(i+1,j,k+1) + zp(i,j+1,k+1) + zp(i+1,j+1,k+1))
+                                   -(zp(i,j,k-1) + zp(i+1,j,k-1) + zp(i,j+1,k-1) + zp(i+1,j+1,k-1)) );
+    pz_lo *= hzeta_inv_on_zlo;
+
+    // On corners
+    Real px_hi_md_lo = Real(0.5) * ( sol(i+1,j  ,k  ) - sol(i  ,j  ,k  )
+                                    +sol(i+1,j  ,k-1) - sol(i  ,j  ,k-1)) * dxinv;
+    Real px_lo_md_lo = Real(0.5) * ( sol(i  ,j  ,k  ) - sol(i-1,j  ,k  )
+                                    +sol(i  ,j  ,k-1) - sol(i-1,j  ,k-1)) * dxinv;
+    Real py_md_hi_lo = Real(0.5) * ( sol(i  ,j+1,k  ) - sol(i  ,j  ,k  )
+                                    +sol(i  ,j+1,k-1) - sol(i  ,j  ,k-1)) * dyinv;
+    Real py_md_lo_lo = Real(0.5) * ( sol(i  ,j  ,k  ) - sol(i  ,j-1,k  )
+                                    +sol(i  ,j  ,k-1) - sol(i  ,j-1,k-1)) * dyinv;
+
+    // On y-edges
+    Real pz_hi_md_lo  = Real(0.5) * ( sol(i+1,j,k  ) + sol(i,j,k  )
+                                     -sol(i+1,j,k-1) - sol(i,j,k-1) );
+         h_xi         = Real(0.25) * ( zp(i+1,j  ,k  ) - zp(i-1,j  ,k  )
+                                      +zp(i+1,j+1,k  ) - zp(i-1,j+1,k  ) ) * dxinv;
+         h_zeta       = Real(0.25) * ( zp(i+1,j  ,k+1) - zp(i+1,j  ,k-1)
+                                      +zp(i+1,j+1,k+1) - zp(i+1,j+1,k-1) );
+         pz_hi_md_lo *= h_xi / h_zeta;
+
+    // On y-edges
+    Real pz_lo_md_lo  = Real(0.5) * ( sol(i,j,k  ) + sol(i-1,j,k  )
+                                     -sol(i,j,k-1) - sol(i-1,j,k-1) );
+         h_xi         = Real(0.25) * ( zp(i,j  ,k  ) - zp(i-2,j  ,k  )
+                                      +zp(i,j+1,k  ) - zp(i-2,j+1,k  ) ) * dxinv;
+         h_zeta       = Real(0.25) * ( zp(i,j  ,k+1) - zp(i  ,j  ,k-1)
+                                      +zp(i,j+1,k+1) - zp(i  ,j+1,k-1) );
+         pz_lo_md_lo *= h_xi / h_zeta;
+
+    // On x-edges
+    Real pz_md_hi_lo  = Real(0.5) * ( sol(i,j+1,k  ) + sol(i,j,k  )
+                                     -sol(i,j+1,k-1) - sol(i,j,k-1) );
+         h_eta        = Real(0.25) * ( zp(i  ,j+1,k  ) - zp(i  ,j-1,k)
+                                      +zp(i+1,j+1,k  ) - zp(i+1,j-1,k) ) * dyinv;
+         h_zeta       = Real(0.25) * ( zp(i  ,j+1,k+1) - zp(i  ,j+1,k-1)
+                                      +zp(i+1,j+1,k+1) - zp(i+1,j+1,k-1) );
+         pz_md_hi_lo *= h_eta/ h_zeta;
+
+    // On x-edges
+    Real pz_md_lo_lo  = Real(0.5) * ( sol(i,j,k  ) + sol(i,j-1,k  )
+                                     -sol(i,j,k-1) - sol(i,j-1,k-1) );
+         h_eta        = Real(0.25) * ( zp(i  ,j,k  ) - zp(i  ,j-2,k  )
+                                      +zp(i+1,j,k  ) - zp(i+1,j-2,k  ) ) * dyinv;
+         h_zeta       = Real(0.25) * ( zp(i  ,j,k+1) - zp(i  ,j  ,k-1)
+                                      +zp(i+1,j,k+1) - zp(i+1,j  ,k-1) );
+         pz_md_lo_lo *= h_eta/ h_zeta;
+
+    // On z-face
+    Real h_xi_on_zlo  = 0.5 * (zp(i+1,j+1,k) + zp(i+1,j,k) - zp(i,j+1,k) - zp(i,j,k)) * dxinv;
+    Real h_eta_on_zlo = 0.5 * (zp(i+1,j+1,k) + zp(i,j+1,k) - zp(i+1,j,k) - zp(i,j,k)) * dyinv;
+
+    pz_lo -= 0.5 * h_xi_on_zlo  * ( (px_hi_md_lo + px_lo_md_lo) - (pz_hi_md_lo + pz_lo_md_lo) );
+    pz_lo -= 0.5 * h_eta_on_zlo * ( (py_md_hi_lo + py_md_lo_lo) - (pz_md_hi_lo + pz_md_lo_lo) );
+
+    return -pz_lo;
+}
+
+template <typename T>
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+void terrpoisson_adotx (int i, int j, int k, amrex::Array4<T> const& y,
+                        amrex::Array4<T const> const& x,
+                        amrex::Array4<T const> const& zp,
+                        T dxinv, T dyinv) noexcept
+{
+    using amrex::Real;
+    Real h_xi, h_eta, h_zeta;
+
+#if 1
+    // *********************************************************
+    // Hi x-face
+    // *********************************************************
+    // On x-face
+    Real px_hi = (x(i+1,j,k) - x(i,j,k)) * dxinv;
+
+    // On y-edges
+    Real pz_hi_md_hi  = Real(0.25) * ( x(i+1,j,k+1) + x(i,j,k+1)
+                                      -x(i+1,j,k  ) - x(i,j,k  ) );
+         h_xi         = Real(0.25) * ( zp(i+1,j  ,k+1) - zp(i-1,j  ,k+1)
+                                      +zp(i+1,j+1,k+1) - zp(i-1,j+1,k+1) ) * dxinv;
+         h_zeta       = Real(0.25) * ( zp(i+1,j  ,k+2) - zp(i+1,j  ,k  )
+                                      +zp(i+1,j+1,k+2) - zp(i+1,j+1,k  ) );
+         pz_hi_md_hi *= h_xi / h_zeta;
+
+    // On y-edges
+    Real pz_hi_md_lo  = Real(0.5) * ( x(i+1,j,k  ) + x(i,j,k  )
+                                     -x(i+1,j,k-1) - x(i,j,k-1) );
+         h_xi         = Real(0.25) * ( zp(i+1,j  ,k  ) - zp(i-1,j  ,k  )
+                                      +zp(i+1,j+1,k  ) - zp(i-1,j+1,k  ) ) * dxinv;
+         h_zeta       = Real(0.25) * ( zp(i+1,j  ,k+1) - zp(i+1,j  ,k-1)
+                                      +zp(i+1,j+1,k+1) - zp(i+1,j+1,k-1) );
+         pz_hi_md_lo *= h_xi / h_zeta;
+
+    // On x-face
+    px_hi -= Real(0.5) * ( pz_hi_md_hi + pz_hi_md_lo );
+
+    // *********************************************************
+    // Lo x-face
+    // ********************************************************* // On x-face
+    Real px_lo = (x(i,j,k) - x(i-1,j,k)) * dxinv;
+
+    // On y-edges
+    Real pz_lo_md_hi  = Real(0.5) * ( x(i,j,k+1) + x(i-1,j,k+1)
+                                     -x(i,j,k  ) - x(i-1,j,k  ) );
+         h_xi         = Real(0.25) * ( zp(i,j  ,k+1) - zp(i-2,j  ,k+1)
+                                      +zp(i,j+1,k+1) - zp(i-2,j+1,k+1) ) * dxinv;
+         h_zeta       = Real(0.25) * ( zp(i,j  ,k+2) - zp(i  ,j  ,k  )
+                                      +zp(i,j+1,k+2) - zp(i  ,j+1,k  ) );
+         pz_lo_md_hi *= h_xi / h_zeta;
+
+    // On y-edges
+    Real pz_lo_md_lo  = Real(0.5) * ( x(i,j,k  ) + x(i-1,j,k  )
+                                     -x(i,j,k-1) - x(i-1,j,k-1) );
+
+         h_xi         = Real(0.25) * ( zp(i,j  ,k  ) - zp(i-2,j  ,k  )
+                                      +zp(i,j+1,k  ) - zp(i-2,j+1,k  ) ) * dxinv;
+         h_zeta       = Real(0.25) * ( zp(i,j  ,k+1) - zp(i  ,j  ,k-1)
+                                      +zp(i,j+1,k+1) - zp(i  ,j+1,k-1) );
+         pz_lo_md_lo *= h_xi / h_zeta;
+
+    // On x-face
+    px_lo -= Real(0.5) * ( pz_lo_md_hi + pz_lo_md_lo );
+
+    // *********************************************************
+    // Hi y-face
+    // *********************************************************
+    // On y-face
+    Real py_hi = (x(i,j+1,k) - x(i,j,k)) * dyinv;
+
+    // On x-edges
+    Real pz_md_hi_hi  = Real(0.25) * ( x(i,j+1,k+1) + x(i,j,k+1)
+                                      -x(i,j+1,k  ) - x(i,j,k  ) );
+         h_eta        = Real(0.25) * ( zp(i  ,j+1,k+1) - zp(i  ,j-1,k+1)
+                                      +zp(i+1,j+1,k+1) - zp(i+1,j-1,k+1) ) * dyinv;
+
+         h_zeta       = Real(0.25) * ( zp(i  ,j+1,k+2) - zp(i  ,j+1,k  )
+                                      +zp(i+1,j+1,k+2) - zp(i+1,j+1,k  ) );
+         pz_md_hi_hi *= h_eta/ h_zeta;
+
+    // On x-edges
+    Real pz_md_hi_lo  = Real(0.5) * ( x(i,j+1,k  ) + x(i,j,k  )
+                                     -x(i,j+1,k-1) - x(i,j,k-1) );
+
+         h_eta        = Real(0.25) * ( zp(i  ,j+1,k  ) - zp(i  ,j-1,k)
+                                      +zp(i+1,j+1,k  ) - zp(i+1,j-1,k) ) * dyinv;
+
+         h_zeta       = Real(0.25) * ( zp(i  ,j+1,k+1) - zp(i  ,j+1,k-1)
+                                      +zp(i+1,j+1,k+1) - zp(i+1,j+1,k-1) );
+         pz_md_hi_lo *= h_eta/ h_zeta;
+
+    // On y-face
+    py_hi -= Real(0.5) * ( pz_md_hi_hi + pz_md_hi_lo );
+
+    // *********************************************************
+    // Lo y-face
+    // *********************************************************
+    // On y-face
+    Real py_lo = (x(i,j,k) - x(i,j-1,k)) * dyinv;
+
+    // On x-edges
+    Real pz_md_lo_hi  = Real(0.5) * ( x(i,j,k+1) + x(i,j-1,k+1)
+                                     -x(i,j,k  ) - x(i,j-1,k  ) );
+
+         h_eta        = Real(0.25) * ( zp(i  ,j,k+1) - zp(i  ,j-2,k+1)
+                                      +zp(i+1,j,k+1) - zp(i+1,j-2,k+1) ) * dyinv;
+
+         h_zeta       = Real(0.25) * ( zp(i  ,j,k+2) - zp(i  ,j  ,k  )
+                                      +zp(i+1,j,k+2) - zp(i+1,j  ,k  ) );
+         pz_md_lo_hi *= h_eta/ h_zeta;
+
+    // On x-edges
+    Real pz_md_lo_lo  = Real(0.5) * ( x(i,j,k  ) + x(i,j-1,k  )
+                                     -x(i,j,k-1) - x(i,j-1,k-1) );
+
+         h_eta        = Real(0.25) * ( zp(i  ,j,k  ) - zp(i  ,j-2,k  )
+                                      +zp(i+1,j,k  ) - zp(i+1,j-2,k  ) ) * dyinv;
+
+         h_zeta       = Real(0.25) * ( zp(i  ,j,k+1) - zp(i  ,j  ,k-1)
+                                      +zp(i+1,j,k+1) - zp(i+1,j  ,k-1) );
+         pz_md_lo_lo *= h_eta/ h_zeta;
+
+    // On y-face
+    py_lo -= Real(0.5) * ( pz_md_lo_hi + pz_md_lo_lo );
+
+    // *********************************************************
+    // Hi z-face
+    // *********************************************************
+    // On z-face
+    Real pz_hi    = x(i,j,k+1) - x(i,j,k  );
+
+    // On corners
+    Real px_hi_md_hi = Real(0.5) * ( x(i+1,j,k+1) - x(i  ,j  ,k+1)
+                                    +x(i+1,j,k  ) - x(i  ,j  ,k  )) * dxinv;
+    Real px_lo_md_hi = Real(0.5) * ( x(i  ,j,k+1) - x(i-1,j  ,k+1)
+                                    +x(i  ,j,k  ) - x(i-1,j  ,k  )) * dxinv;
+    Real py_md_hi_hi = Real(0.5) * ( x(i,j+1,k+1) - x(i  ,j  ,k+1)
+                                    +x(i,j+1,k  ) - x(i  ,j  ,k  )) * dyinv;
+    Real py_md_lo_hi = Real(0.5) * ( x(i,j  ,k+1) - x(i  ,j-1,k+1)
+                                    +x(i,j  ,k  ) - x(i  ,j-1,k  )) * dyinv;
+
+    // On z-face
+    Real h_xi_on_zhi  = 0.5 * ( zp(i+1,j+1,k+1) + zp(i+1,j,k+1) - zp(i,j+1,k+1) - zp(i,j,k+1) ) * dxinv;
+    Real h_eta_on_zhi = 0.5 * ( zp(i+1,j+1,k+1) + zp(i,j+1,k+1) - zp(i+1,j,k+1) - zp(i,j,k+1) ) * dyinv;
+
+    Real hzeta_inv_on_zhi = 8.0 / ( (zp(i,j,k+2) + zp(i+1,j,k+2) + zp(i,j+1,k+2) + zp(i+1,j+1,k+2))
+                                   -(zp(i,j,k  ) + zp(i+1,j,k  ) + zp(i,j+1,k  ) + zp(i+1,j+1,k  )) );
+    // pz_hi *= hzeta_inv_on_zhi * (1.0 + h_xi_on_zhi*h_xi_on_zhi + h_eta_on_zhi*h_eta_on_zhi);
+    pz_hi *= hzeta_inv_on_zhi;
+
+    pz_hi -= 0.5 * h_xi_on_zhi  * ( (px_hi_md_hi + px_lo_md_hi) - (pz_hi_md_hi + pz_lo_md_hi) );
+    pz_hi -= 0.5 * h_eta_on_zhi * ( (py_md_hi_hi + py_md_lo_hi) - (pz_md_hi_hi + pz_md_lo_hi) );
+
+    // *********************************************************
+    // Lo z-face
+    // *********************************************************
+    // On z-face
+    Real pz_lo = x(i,j,k  ) - x(i,j,k-1);
+
+    // On corners
+    Real px_hi_md_lo = Real(0.5) * ( x(i+1,j  ,k  ) - x(i  ,j  ,k  )
+                                    +x(i+1,j  ,k-1) - x(i  ,j  ,k-1)) * dxinv;
+    Real px_lo_md_lo = Real(0.5) * ( x(i  ,j  ,k  ) - x(i-1,j  ,k  )
+                                    +x(i  ,j  ,k-1) - x(i-1,j  ,k-1)) * dxinv;
+    Real py_md_hi_lo = Real(0.5) * ( x(i  ,j+1,k  ) - x(i  ,j  ,k  )
+                                    +x(i  ,j+1,k-1) - x(i  ,j  ,k-1)) * dyinv;
+    Real py_md_lo_lo = Real(0.5) * ( x(i  ,j  ,k  ) - x(i  ,j-1,k  )
+                                    +x(i  ,j  ,k-1) - x(i  ,j-1,k-1)) * dyinv;
+
+    // On z-face
+    Real h_xi_on_zlo  = 0.5 * (zp(i+1,j+1,k) + zp(i+1,j,k) - zp(i,j+1,k) - zp(i,j,k)) * dxinv;
+    Real h_eta_on_zlo = 0.5 * (zp(i+1,j+1,k) + zp(i,j+1,k) - zp(i+1,j,k) - zp(i,j,k)) * dyinv;
+
+    Real hzeta_inv_on_zlo = 8.0 / ( (zp(i,j,k+1) + zp(i+1,j,k+1) + zp(i,j+1,k+1) + zp(i+1,j+1,k+1))
+                                   -(zp(i,j,k-1) + zp(i+1,j,k-1) + zp(i,j+1,k-1) + zp(i+1,j+1,k-1)) );
+    // pz_lo *= hzeta_inv_on_zlo * (1.0 + h_xi_on_zlo*h_xi_on_zlo + h_eta_on_zlo*h_eta_on_zlo);
+    pz_lo *= hzeta_inv_on_zlo;
+
+    pz_lo -= 0.5 * h_xi_on_zlo  * ( (px_hi_md_lo + px_lo_md_lo) - (pz_hi_md_lo + pz_lo_md_lo) );
+    pz_lo -= 0.5 * h_eta_on_zlo * ( (py_md_hi_lo + py_md_lo_lo) - (pz_md_hi_lo + pz_md_lo_lo) );
+
+#else
+    //
+    // This option uses calls to the flux routines so there is
+    //      some duplicated computation
+    // This option should give the same answer as above
+    //
+    Real px_lo = -terrpoisson_flux_x(i  ,j,k,x,zp,dxinv);
+    Real px_hi = -terrpoisson_flux_x(i+1,j,k,x,zp,dxinv);
+    Real py_lo = -terrpoisson_flux_y(i,j  ,k,x,zp,dxinv);
+    Real py_hi = -terrpoisson_flux_y(i,j+1,k,x,zp,dyinv);
+    Real pz_lo = -terrpoisson_flux_z(i,j,k  ,x,zp,dxinv,dyinv);
+    Real pz_hi = -terrpoisson_flux_z(i,j,k+1,x,zp,dxinv,dyinv);
+#endif
+
+    // *********************************************************
+    // Adotx
+    // *********************************************************
+    Real invdJ = 4.0 / ( zp(i,j,k+1) + zp(i+1,j,k+1) + zp(i,j+1,k+1) + zp(i+1,j+1,k+1)
+                        -zp(i,j,k  ) - zp(i+1,j,k  ) - zp(i,j+1,k  ) - zp(i+1,j+1,k  ) );
+
+    Real ax_lo = .5 * (zp(i  ,j,k+1) + zp(i  ,j+1,k+1) - zp(i  ,j,k) - zp(i  ,j+1,k));
+    Real ax_hi = .5 * (zp(i+1,j,k+1) + zp(i+1,j+1,k+1) - zp(i+1,j,k) - zp(i+1,j+1,k));
+    Real ay_lo = .5 * (zp(i,j  ,k+1) + zp(i+1,j  ,k+1) - zp(i,j  ,k) - zp(i+1,j  ,k));
+    Real ay_hi = .5 * (zp(i,j+1,k+1) + zp(i+1,j+1,k+1) - zp(i,j+1,k) - zp(i+1,j+1,k));
+
+    y(i,j,k) =  ( (ax_hi*px_hi - ax_lo*px_lo) * dxinv
+                 +(ay_hi*py_hi - ay_lo*py_lo) * dyinv
+                 +(      pz_hi -       pz_lo)        ) * invdJ;
+}
+#endif
diff --git a/Source/LinearSolvers/ERF_compute_divergence.cpp b/Source/LinearSolvers/ERF_compute_divergence.cpp
new file mode 100644
index 000000000..6a96dd8c6
--- /dev/null
+++ b/Source/LinearSolvers/ERF_compute_divergence.cpp
@@ -0,0 +1,82 @@
+#include "ERF.H"
+#include "ERF_Utils.H"
+
+using namespace amrex;
+
+/**
+ * Project the single-level velocity field to enforce incompressibility
+ * Note that the level may or may not be level 0.
+ */
+void ERF::compute_divergence (int lev, MultiFab& rhs, Vector<MultiFab>& mom_mf, Geometry const& geom_at_lev)
+{
+    BL_PROFILE("ERF::compute_divergence()");
+
+    bool l_use_terrain = (solverChoice.terrain_type != TerrainType::None);
+
+    auto dxInv = geom[lev].InvCellSizeArray();
+
+    Array<MultiFab const*, AMREX_SPACEDIM> rho0_u_const;
+    rho0_u_const[0] = &mom_mf[IntVars::xmom];
+    rho0_u_const[1] = &mom_mf[IntVars::ymom];
+    rho0_u_const[2] = &mom_mf[IntVars::zmom];
+
+    // ****************************************************************************
+    // Compute divergence which will form RHS
+    // Note that we replace "rho0w" with the contravariant momentum, Omega
+    // ****************************************************************************
+#ifdef ERF_USE_EB
+    bool already_on_centroids = true;
+    EB_computeDivergence(rhs, rho0_u_const, geom_at_lev, already_on_centroids);
+#else
+    if (l_use_terrain && SolverChoice::terrain_is_flat)
+    {
+        for ( MFIter mfi(rhs,TilingIfNotGPU()); mfi.isValid(); ++mfi)
+        {
+            Box bx = mfi.tilebox();
+            const Array4<Real const>& rho0u_arr = mom_mf[IntVars::xmom].const_array(mfi);
+            const Array4<Real const>& rho0v_arr = mom_mf[IntVars::ymom].const_array(mfi);
+            const Array4<Real const>& rho0w_arr = mom_mf[IntVars::zmom].const_array(mfi);
+            const Array4<Real      >&  rhs_arr = rhs.array(mfi);
+
+            Real* stretched_dz_d_ptr = stretched_dz_d[lev].data();
+            ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
+                Real dz = stretched_dz_d_ptr[k];
+                rhs_arr(i,j,k) =  (rho0u_arr(i+1,j,k) - rho0u_arr(i,j,k)) * dxInv[0]
+                                 +(rho0v_arr(i,j+1,k) - rho0v_arr(i,j,k)) * dxInv[1]
+                                 +(rho0w_arr(i,j,k+1) - rho0w_arr(i,j,k)) / dz;
+            });
+        } // mfi
+    }
+    else if (l_use_terrain) // terrain is not flat
+    {
+        //
+        // Note we compute the divergence using "rho0w" == Omega
+        //
+        for ( MFIter mfi(rhs,TilingIfNotGPU()); mfi.isValid(); ++mfi)
+        {
+            Box bx = mfi.tilebox();
+            const Array4<Real >& rho0u_arr = mom_mf[IntVars::xmom].array(mfi);
+            const Array4<Real >& rho0v_arr = mom_mf[IntVars::ymom].array(mfi);
+            const Array4<Real >& rho0w_arr = mom_mf[IntVars::zmom].array(mfi);
+            const Array4<Real      >&  rhs_arr = rhs.array(mfi);
+
+            const Array4<Real const>& ax_arr = ax[lev]->const_array(mfi);
+            const Array4<Real const>& ay_arr = ay[lev]->const_array(mfi);
+            const Array4<Real const>& az_arr = az[lev]->const_array(mfi);
+            const Array4<Real const>& dJ_arr = detJ_cc[lev]->const_array(mfi);
+
+            ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
+            {
+                rhs_arr(i,j,k) =   ((ax_arr(i+1,j,k)*rho0u_arr(i+1,j,k) - ax_arr(i,j,k)*rho0u_arr(i,j,k)) * dxInv[0]
+                                   +(ay_arr(i,j+1,k)*rho0v_arr(i,j+1,k) - ay_arr(i,j,k)*rho0v_arr(i,j,k)) * dxInv[1]
+                                   +(az_arr(i,j,k+1)*rho0w_arr(i,j,k+1) - az_arr(i,j,k)*rho0w_arr(i,j,k)) * dxInv[2]) / dJ_arr(i,j,k);
+            });
+        } // mfi
+
+    }
+    else // no terrain
+    {
+        computeDivergence(rhs, rho0_u_const, geom_at_lev);
+    }
+#endif
+}
diff --git a/Source/Utils/ERF_solve_with_EB_mlmg.cpp b/Source/LinearSolvers/ERF_solve_with_EB_mlmg.cpp
similarity index 100%
rename from Source/Utils/ERF_solve_with_EB_mlmg.cpp
rename to Source/LinearSolvers/ERF_solve_with_EB_mlmg.cpp
diff --git a/Source/Utils/ERF_solve_with_fft.cpp b/Source/LinearSolvers/ERF_solve_with_fft.cpp
similarity index 62%
rename from Source/Utils/ERF_solve_with_fft.cpp
rename to Source/LinearSolvers/ERF_solve_with_fft.cpp
index 26bc23341..291cacad4 100644
--- a/Source/Utils/ERF_solve_with_fft.cpp
+++ b/Source/LinearSolvers/ERF_solve_with_fft.cpp
@@ -50,7 +50,9 @@ void ERF::solve_with_fft (int lev, MultiFab& rhs, MultiFab& phi, Array<MultiFab,
 {
     BL_PROFILE("ERF::solve_with_fft()");
 
-    AMREX_ALWAYS_ASSERT(use_fft);
+    // For now the logic only works correctly for lev == 0
+    // TODO: enable FFT for nested domains at lev > 0
+    AMREX_ALWAYS_ASSERT(lev == 0);
 
     bool l_use_terrain = SolverChoice::terrain_type != TerrainType::None;
 
@@ -110,93 +112,6 @@ void ERF::solve_with_fft (int lev, MultiFab& rhs, MultiFab& phi, Array<MultiFab,
         amrex::Abort("FFT isn't appropriate for spatially varying terrain");
     }
 
-    phi.FillBoundary(geom[lev].periodicity());
-
-    // ****************************************************************************
-    // Impose bc's on pprime
-    // ****************************************************************************
-#ifdef _OPENMP
-#pragma omp parallel if (Gpu::notInLaunchRegion())
-#endif
-    for (MFIter mfi(phi,TilingIfNotGPU()); mfi.isValid(); ++mfi)
-    {
-        Array4<Real> const& pp_arr  = phi.array(mfi);
-        Box const& bx    = mfi.tilebox();
-        auto const bx_lo = lbound(bx);
-        auto const bx_hi = ubound(bx);
-        if (bx_lo.x == dom_lo.x) {
-            auto bc_type = domain_bc_type[Orientation(0,Orientation::low)];
-            if (bc_type == "Outflow" || bc_type == "Open") {
-                ParallelFor(makeSlab(bx,0,dom_lo.x), [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
-                {
-                    pp_arr(i-1,j,k) = -pp_arr(i,j,k);
-                });
-            } else {
-                ParallelFor(makeSlab(bx,0,dom_lo.x), [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
-                {
-                    pp_arr(i-1,j,k) = pp_arr(i,j,k);
-                });
-            }
-        }
-        if (bx_lo.y == dom_lo.y) {
-            auto bc_type = domain_bc_type[Orientation(1,Orientation::low)];
-            if (bc_type == "Outflow" || bc_type == "Open") {
-                ParallelFor(makeSlab(bx,1,dom_lo.y), [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
-                {
-                    pp_arr(i,j-1,k) = -pp_arr(i,j,k);
-                });
-            } else {
-                ParallelFor(makeSlab(bx,1,dom_lo.y), [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
-                {
-                    pp_arr(i,j-1,k) = pp_arr(i,j,k);
-                });
-            }
-        }
-        if (bx_lo.z == dom_lo.z) {
-            ParallelFor(makeSlab(bx,2,dom_lo.z), [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
-            {
-                pp_arr(i,j,k-1) = pp_arr(i,j,k);
-            });
-        }
-        if (bx_hi.x == dom_hi.x) {
-            auto bc_type = domain_bc_type[Orientation(0,Orientation::high)];
-            if (bc_type == "Outflow" || bc_type == "Open") {
-                ParallelFor(makeSlab(bx,0,dom_hi.x), [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
-                {
-                    pp_arr(i+1,j,k) = -pp_arr(i,j,k);
-                });
-            } else {
-                ParallelFor(makeSlab(bx,0,dom_hi.x), [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
-                {
-                    pp_arr(i+1,j,k) = pp_arr(i,j,k);
-                });
-            }
-        }
-        if (bx_hi.y == dom_hi.y) {
-            auto bc_type = domain_bc_type[Orientation(1,Orientation::high)];
-            if (bc_type == "Outflow" || bc_type == "Open") {
-                ParallelFor(makeSlab(bx,1,dom_hi.y), [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
-                {
-                    pp_arr(i,j+1,k) = -pp_arr(i,j,k);
-                });
-            } else {
-                ParallelFor(makeSlab(bx,1,dom_hi.y), [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
-                {
-                    pp_arr(i,j+1,k) = pp_arr(i,j,k);
-                });
-            }
-        }
-        if (bx_hi.z == dom_hi.z) {
-            ParallelFor(makeSlab(bx,2,dom_hi.z), [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
-            {
-                pp_arr(i,j,k+1) = pp_arr(i,j,k);
-            });
-        }
-    } // mfi
-
-    // Now overwrite with periodic fill outside domain and fine-fine fill inside
-    phi.FillBoundary(geom[lev].periodicity());
-
 #ifdef _OPENMP
 #pragma omp parallel if (Gpu::notInLaunchRegion())
 #endif
diff --git a/Source/LinearSolvers/ERF_solve_with_gmres.cpp b/Source/LinearSolvers/ERF_solve_with_gmres.cpp
new file mode 100644
index 000000000..d4ef6214e
--- /dev/null
+++ b/Source/LinearSolvers/ERF_solve_with_gmres.cpp
@@ -0,0 +1,32 @@
+#include "ERF.H"
+#include "ERF_Utils.H"
+#include "ERF_TerrainPoisson.H"
+
+#include <AMReX_GMRES.H>
+
+using namespace amrex;
+
+/**
+ * Solve the Poisson equation using GMRES
+ */
+void ERF::solve_with_gmres (int lev, Vector<MultiFab>& rhs, Vector<MultiFab>& phi, Array<MultiFab,AMREX_SPACEDIM>& fluxes)
+{
+    BL_PROFILE("ERF::solve_with_gmres()");
+
+    Real reltol = solverChoice.poisson_reltol;
+    Real abstol = solverChoice.poisson_abstol;
+
+    amrex::GMRES<MultiFab, TerrainPoisson> gmsolver;
+
+    TerrainPoisson tp(geom[lev], rhs[lev].boxArray(), rhs[lev].DistributionMap(), z_phys_nd[lev].get());
+
+    gmsolver.define(tp);
+
+    gmsolver.setVerbose(mg_verbose);
+
+    tp.usePrecond(true);
+
+    gmsolver.solve(phi[0], rhs[0], reltol, abstol);
+
+    tp.getFluxes(phi[0], fluxes);
+}
diff --git a/Source/Utils/ERF_solve_with_mlmg.cpp b/Source/LinearSolvers/ERF_solve_with_mlmg.cpp
similarity index 100%
rename from Source/Utils/ERF_solve_with_mlmg.cpp
rename to Source/LinearSolvers/ERF_solve_with_mlmg.cpp
diff --git a/Source/LinearSolvers/Make.package b/Source/LinearSolvers/Make.package
new file mode 100644
index 000000000..7f25cdfca
--- /dev/null
+++ b/Source/LinearSolvers/Make.package
@@ -0,0 +1,18 @@
+CEXE_headers += ERF_TerrainPoisson_3D_K.H
+CEXE_headers += ERF_TerrainPoisson.H
+CEXE_sources += ERF_TerrainPoisson.cpp
+
+CEXE_sources += ERF_compute_divergence.cpp
+CEXE_sources += ERF_PoissonSolve.cpp
+CEXE_sources += ERF_PoissonSolve_tb.cpp
+CEXE_sources += ERF_solve_with_fft.cpp
+CEXE_sources += ERF_solve_with_gmres.cpp
+CEXE_sources += ERF_solve_with_mlmg.cpp
+
+ifeq ($(USE_FFT), TRUE)
+CEXE_headers += ERF_FFT_TerrainPrecond.H
+endif
+
+ifeq ($(USE_EB), TRUE)
+CEXE_sources += ERF_solve_with_EB_mlmg.cpp
+endif
diff --git a/Source/Utils/ERF_solve_with_gmres.cpp b/Source/Utils/ERF_solve_with_gmres.cpp
deleted file mode 100644
index fe5423408..000000000
--- a/Source/Utils/ERF_solve_with_gmres.cpp
+++ /dev/null
@@ -1,65 +0,0 @@
-#include "ERF.H"
-#include "ERF_Utils.H"
-
-#include <AMReX_MLMG.H>
-#include <AMReX_GMRES.H>
-#include <AMReX_GMRES_MLMG.H>
-
-using namespace amrex;
-
-/**
- * Solve the Poisson equation using GMRES
- */
-void ERF::solve_with_gmres (int /*lev*/, Vector<MultiFab>& /*rhs*/, Vector<MultiFab>& /*phi*/,
-                            Vector<Array<MultiFab,AMREX_SPACEDIM>>& /*fluxes*/)
-{
-#if 0
-    BL_PROFILE("ERF::solve_with_gmres()");
-
-    auto const dom_lo = lbound(geom[lev].Domain());
-    auto const dom_hi = ubound(geom[lev].Domain());
-
-    LPInfo info;
-    // Allow a hidden direction if the domain is one cell wide in any lateral direction
-    if (dom_lo.x == dom_hi.x) {
-        info.setHiddenDirection(0);
-    } else if (dom_lo.y == dom_hi.y) {
-        info.setHiddenDirection(1);
-    }
-
-    // Make sure the solver only sees the levels over which we are solving
-    Vector<BoxArray>            ba_tmp;   ba_tmp.push_back(rhs[lev].boxArray());
-    Vector<DistributionMapping> dm_tmp;   dm_tmp.push_back(rhs[lev].DistributionMap());
-    Vector<Geometry>          geom_tmp; geom_tmp.push_back(geom[lev]);
-
-    auto bclo = get_projection_bc(Orientation::low);
-    auto bchi = get_projection_bc(Orientation::high);
-
-    // amrex::Print() << "BCLO " << bclo[0] << " " << bclo[1] << " " << bclo[2] << std::endl;
-    // amrex::Print() << "BCHI " << bchi[0] << " " << bchi[1] << " " << bchi[2] << std::endl;
-
-    Real reltol = solverChoice.poisson_reltol;
-    Real abstol = solverChoice.poisson_abstol;
-
-    MLTerrainPoisson terrpoisson(geom_tmp, ba_tmp, dm_tmp, info);
-    terrpoisson.setDomainBC(bclo, bchi);
-    terrpoisson.setMaxOrder(2);
-
-    terrpoisson.setZPhysNd(lev, *z_phys_nd[lev]);
-
-    if (lev > 0) {
-        terrpoisson.setCoarseFineBC(nullptr, ref_ratio[lev-1], LinOpBCType::Neumann);
-    }
-    terrpoisson.setLevelBC(lev, &phi[lev]);
-
-    MLMG mlmg(terrpoisson);
-    GMRESMLMG gmsolver(mlmg);
-    gmsolver.usePrecond(false);
-    gmsolver.setVerbose(mg_verbose);
-    gmsolver.solve(phi[0], rhs[0], reltol, abstol);
-
-    Vector<MultiFab*> phi_vec; phi_vec.resize(1);
-    phi_vec[0] = &phi[0];
-    terrpoisson.getFluxes(GetVecOfArrOfPtrs(fluxes), phi_vec);
-#endif
-}
diff --git a/Source/Utils/Make.package b/Source/Utils/Make.package
index 8667c9a3d..9f752364f 100644
--- a/Source/Utils/Make.package
+++ b/Source/Utils/Make.package
@@ -11,7 +11,8 @@ CEXE_headers += ERF_TileNoZ.H
 CEXE_headers += ERF_Utils.H
 
 CEXE_headers += ERF_TerrainPoisson_3D_K.H
-CEXE_headers += ERF_MLTerrainPoisson.H
+CEXE_headers += ERF_TerrainPoisson.H
+CEXE_sources += ERF_TerrainPoisson.cpp
 
 CEXE_headers += ERF_ParFunctions.H
 
@@ -26,13 +27,3 @@ CEXE_sources += ERF_VelocityToMomentum.cpp
 CEXE_sources += ERF_InteriorGhostCells.cpp
 CEXE_sources += ERF_TerrainMetrics.cpp
 CEXE_sources += ERF_Time_Avg_Vel.cpp  
-
-CEXE_sources += ERF_PoissonSolve.cpp
-CEXE_sources += ERF_PoissonSolve_tb.cpp
-CEXE_sources += ERF_solve_with_fft.cpp
-CEXE_sources += ERF_solve_with_gmres.cpp
-CEXE_sources += ERF_solve_with_mlmg.cpp
-
-ifeq ($(USE_EB), TRUE)
-CEXE_sources += ERF_solve_with_EB_mlmg.cpp
-endif
diff --git a/Submodules/AMReX b/Submodules/AMReX
index 456c93c7d..f7f3ee9f5 160000
--- a/Submodules/AMReX
+++ b/Submodules/AMReX
@@ -1 +1 @@
-Subproject commit 456c93c7d9512f1cdffac0574973d7df41417898
+Subproject commit f7f3ee9f57170e227af1fee1c6b95e5b41a45af8