From 4813b17c27b736103df03ff5e67e46363e528d81 Mon Sep 17 00:00:00 2001
From: Robert Hallberg <Robert.Hallberg@noaa.gov>
Date: Fri, 19 Jan 2024 16:35:43 -0500
Subject: [PATCH 01/17] *Use Halley method iterations in cuberoot function

  Modified the cuberoot function to do 3 iterations with Halley's method
starting with a first guess that balances the errors at the two ends of the
range of the iterations, before a final iteration with Newton's method that
polishes the root and gives a solution that is accurate to machine precision.
Following on performance testing of the previous version, all convergence
testing has been removed and the same number of iterations are applied
regardless of the input value.  This changes answers at roundoff for code that
uses the cuberoot function, so ideally this PR would be dealt with before the
cuberoot becomes widely used.
---
 src/framework/MOM_intrinsic_functions.F90 | 70 +++++++----------------
 1 file changed, 20 insertions(+), 50 deletions(-)

diff --git a/src/framework/MOM_intrinsic_functions.F90 b/src/framework/MOM_intrinsic_functions.F90
index 808bc408a8..8d8cdde39f 100644
--- a/src/framework/MOM_intrinsic_functions.F90
+++ b/src/framework/MOM_intrinsic_functions.F90
@@ -45,8 +45,6 @@ elemental function cuberoot(x) result(root)
               ! of the cube root of asx in arbitrary units that can grow or shrink with each iteration [B D]
   real :: den_prev ! The denominator of an expression for the previous iteration of the evolving estimate of
               ! the cube root of asx in arbitrary units that can grow or shrink with each iteration [D]
-  real, parameter :: den_min = 2.**(minexponent(1.) / 4 + 4)  ! A value of den that triggers rescaling [C]
-  real, parameter :: den_max = 2.**(maxexponent(1.) / 4 - 2)  ! A value of den that triggers rescaling [C]
   integer :: ex_3 ! One third of the exponent part of x, used to rescale x to get a.
   integer :: itt
 
@@ -58,56 +56,28 @@ elemental function cuberoot(x) result(root)
     ! Here asx is in the range of 0.125 <= asx < 1.0
     asx = scale(abs(x), -3*ex_3)
 
-    ! This first estimate is one iteration of Newton's method with a starting guess of 1.  It is
-    ! always an over-estimate of the true solution, but it is a good approximation for asx near 1.
-    num = 2.0 + asx
-    den = 3.0
-    ! Iteratively determine Root = asx**1/3 using Newton's method, noting that in this case Newton's
-    ! method converges monotonically from above and needs no bounding.  For the range of asx from
-    ! 0.125 to 1.0 with the first guess used above, 6 iterations suffice to converge to roundoff.
-    do itt=1,9
-      ! Newton's method iterates estimates as Root = Root - (Root**3 - asx) / (3.0 * Root**2), or
-      ! equivalently as Root = (2.0*Root**2 + asx) / (3.0 * Root**2).
-      ! Keeping the estimates in a fractional form Root = num / den allows this calculation with
-      ! fewer (or no) real divisions during the iterations before doing a single real division
-      ! at the end, and it is therefore more computationally efficient.
-
+    !   Iteratively determine root_asx = asx**1/3 using Halley's method and then Newton's method,
+    ! noting that Halley's method onverges monotonically and needs no bounding.  Halley's method is
+    ! slightly more complicated that Newton's method, but converges in a third fewer iterations.
+    !   Keeping the estimates in a fractional form Root = num / den allows this calculation with
+    ! no real divisions during the iterations before doing a single real division at the end,
+    ! and it is therefore more computationally efficient.
+
+    ! This first estimate gives the same magnitude of errors for 0.125 and 1.0 after two iterations.
+    num = 0.707106 ; den = 1.0
+    do itt=1,3
+      ! Halley's method iterates estimates as Root = Root * (Root**3 + 2.*asx) / (2.*Root**3 + asx).
       num_prev = num ; den_prev = den
-      num = 2.0 * num_prev**3 + asx * den_prev**3
-      den = 3.0 * (den_prev * num_prev**2)
-
-      if ((num * den_prev == num_prev * den) .or. (itt == 9)) then
-        !   If successive estimates of root are identical, this is a converged solution.
-        root_asx = num / den
-        exit
-      elseif (num * den_prev > num_prev * den) then
-        !   If the estimates are increasing, this also indicates convergence, but for a more subtle
-        ! reason.  Because Newton's method converges monotonically from above (at least for infinite
-        ! precision math), the only reason why this estimate could increase is if the iterations
-        ! have converged to a roundoff-level limit cycle around an irrational or otherwise
-        ! unrepresentable solution, with values only changing in the last bit or two.  If so, we
-        ! should stop iterating and accept the one of the current or previous solutions, both of
-        ! which will be within numerical roundoff of the true solution.
-        root_asx = num / den
-        ! Pick the more accurate of the last two iterations.
-        ! Given that both of the two previous iterations are within roundoff of the true
-        ! solution, this next step might be overkill.
-        if ( abs(den_prev**3*root_asx**3 - den_prev**3*asx) > abs(num_prev**3 - den_prev**3*asx) ) then
-          ! The previous iteration was slightly more accurate, so use that for root_asx.
-          root_asx = num_prev / den_prev
-        endif
-        exit
-      endif
-
-      ! Because successive estimates of the numerator and denominator tend to be the cube of their
-      ! predecessors, the numerator and denominator need to be rescaled by division when they get
-      ! too large or small to avoid overflow or underflow in the convergence test below.
-      if ((den > den_max) .or. (den < den_min)) then
-        num = scale(num, -exponent(den))
-        den = scale(den, -exponent(den))
-      endif
-
+      num = num_prev * (num_prev**3 + 2.0 * asx * (den_prev**3))
+      den = den_prev * (2.0 * num_prev**3 + asx * (den_prev**3))
+      ! Equivalent to:  root_asx = root_asx * (root_asx**3 + 2.*asx) / (2.*root_asx**3 + asx)
     enddo
+    ! At this point the error in root_asx is better than 1 part in 3e14.
+    root_asx = num / den
+
+    ! One final iteration with Newton's method polishes up the root and gives a solution
+    ! that is within the last bit of the true solution.
+    root_asx = root_asx - (root_asx**3 - asx) / (3.0 * (root_asx**2))
 
     root = sign(scale(root_asx, ex_3), x)
   endif

From 17f1c40dfa881a4a834cbca61dc2ee9a41f6d6aa Mon Sep 17 00:00:00 2001
From: Marshall Ward <marshall.ward@gmail.com>
Date: Fri, 26 Jan 2024 10:39:54 -0500
Subject: [PATCH 02/17] Cuberoot: Apply first iteration explicitly (#11)

Applying the first iteration explicitly appears to speed up the cuberoot
function by a bit over 20%:

 Before:
 Halley Final:  0.14174999999999999

 After:
 Halley Final:  0.11080000000000001

There is an assumption that compilers will precompute the constants like
`0.7 * (0.7)**3`, and that all will do so in the same manner.
---
 src/framework/MOM_intrinsic_functions.F90 | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/framework/MOM_intrinsic_functions.F90 b/src/framework/MOM_intrinsic_functions.F90
index 8d8cdde39f..4327cfa5a6 100644
--- a/src/framework/MOM_intrinsic_functions.F90
+++ b/src/framework/MOM_intrinsic_functions.F90
@@ -64,8 +64,11 @@ elemental function cuberoot(x) result(root)
     ! and it is therefore more computationally efficient.
 
     ! This first estimate gives the same magnitude of errors for 0.125 and 1.0 after two iterations.
-    num = 0.707106 ; den = 1.0
-    do itt=1,3
+    ! The first iteration is applied explicitly.
+    num = 0.707106 * (0.707106**3 + 2.0 * asx)
+    den = 2.0 * (0.707106**3) + asx
+
+    do itt=1,2
       ! Halley's method iterates estimates as Root = Root * (Root**3 + 2.*asx) / (2.*Root**3 + asx).
       num_prev = num ; den_prev = den
       num = num_prev * (num_prev**3 + 2.0 * asx * (den_prev**3))

From 435ccaae4e60ca8fd8b836d4b8239dc3fdcbc79b Mon Sep 17 00:00:00 2001
From: Ashley Barnes <53282288+ashjbarnes@users.noreply.github.com>
Date: Tue, 30 Jan 2024 03:18:12 +1100
Subject: [PATCH 03/17] Data table documentation (#551)

* Update forcing.rst

* add more detail to forcing.rst removing ambiguity about yaml format.
---
 docs/forcing.rst | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/docs/forcing.rst b/docs/forcing.rst
index 911f708b68..8496317608 100644
--- a/docs/forcing.rst
+++ b/docs/forcing.rst
@@ -1,5 +1,33 @@
 Forcing
 =======
+Data Override
+-------
+When running MOM6 with the Flexible Modelling System (FMS) coupler, forcing can be specified by a `data_table` file. This is particularly useful when running MOM6 with a data atmosphere, as paths to the relevent atmospheric forcing products (eg. JRA55-do or ERA5) can be provided here. Each item in the data table must be separated by a new line, and contains the following information:
+
+| ``gridname``: The component of the model this data applies to. eg. `atm` `ocn` `lnd` `ice`.
+| ``fieldname_code``: The field name according to the model component. eg. `salt`
+| ``fieldname_file``: The name of the field within the source file. 
+| ``file_name``: Path to the source file.
+| ``interpol_method``: Interpolation method eg. `bilinear`
+| ``factor``: A scalar by which to multiply the field ahead of passing it onto the model. This is a quick way to do unit conversions for example. 
+
+| 
+The data table is commonly formatted by specifying each of the fields in the order listed above, with a new line for each entry.
+
+Example Format:
+    "ATM", "t_bot",  "t2m", "./INPUT/2t_ERA5.nc", "bilinear", 1.0
+
+A `yaml` format is also possible if you prefer. This is outlined in the `FMS data override <https://github.com/NOAA-GFDL/FMS/tree/main/data_override>`_ github page, along with other details. 
+
+Speficying a constant value:
+    Rather than overriding with data from a file, one can also set a field to constant. To do this, pass empty strings to `fieldname_file` and `file_name`. The `factor` now corresponds to the override value. For example, the following sets the temperature at the bottom of the atmosphere to 290 Kelvin.
+
+
+    "ATM", "t_bot",  "", "", "bilinear", 290.0
+
+Which units do I need?
+    For configurations using SIS2 and MOM, a list of available surface flux variables along with the expected units can be found in the `flux_exchange <https://github.com/NOAA-GFDL/FMScoupler/blob/f4782c2c033df086eeac29fbbefb4a0bdac1649f/full/flux_exchange.F90>`_ file. 
+
 
 .. toctree::
     :maxdepth: 2

From f1e0f01a8efb63fd360ad6e4f9afc84ac6ea4af1 Mon Sep 17 00:00:00 2001
From: Marshall Ward <marshall.ward@noaa.gov>
Date: Sun, 28 Jan 2024 13:20:28 -0500
Subject: [PATCH 04/17] Params: Add do_not_log to param block open/close

This patch adds `do_not_log` to `openParameterBlock`, to prevent logging
of `BLOCK%` `%BLOCK` entry and exit calls.

The argument was not added to `closeParameterBlock`, since this state
can be tracked inside the `block` with a new `log_access` field in
`parameter_block`.

This flag does not extend to parameters within the block, since (as far
as I know) there is no way for a `get_param` to know if it is within a
block or not.  Even if it could know this, there would need to be some
careful handling of nested blocks.

The potential block/parameter inconsistency should be supported at some
point, but for now it is the user's responsibility to consistently apply
`do_not_log` to blocks and its contents.
---
 src/framework/MOM_file_parser.F90 | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/src/framework/MOM_file_parser.F90 b/src/framework/MOM_file_parser.F90
index 944ccfdf07..22d3789ea5 100644
--- a/src/framework/MOM_file_parser.F90
+++ b/src/framework/MOM_file_parser.F90
@@ -56,6 +56,8 @@ module MOM_file_parser
 !> Specify the active parameter block
 type, private :: parameter_block ; private
   character(len=240) :: name = ''   !< The active parameter block name
+  logical :: log_access = .true.
+    !< Log the entry and exit of the block (but not its contents)
 end type parameter_block
 
 !> A structure that can be parsed to read and document run-time parameters.
@@ -2082,17 +2084,29 @@ subroutine clearParameterBlock(CS)
 end subroutine clearParameterBlock
 
 !> Tags blockName onto the end of the active parameter block name
-subroutine openParameterBlock(CS,blockName,desc)
+subroutine openParameterBlock(CS, blockName, desc, do_not_log)
   type(param_file_type),      intent(in) :: CS      !< The control structure for the file_parser module,
                                          !! it is also a structure to parse for run-time parameters
   character(len=*),           intent(in) :: blockName !< The name of a parameter block being added
   character(len=*), optional, intent(in) :: desc    !< A description of the parameter block being added
+  logical, optional, intent(in) :: do_not_log
+    !< Log block entry if true.  This only prevents logging of entry to the block, and not the contents.
 
   type(parameter_block), pointer :: block => NULL()
+  logical :: do_log
+
+  do_log = .true.
+  if (present(do_not_log)) do_log = .not. do_not_log
+
   if (associated(CS%blockName)) then
     block => CS%blockName
     block%name = pushBlockLevel(block%name,blockName)
-    call doc_openBlock(CS%doc,block%name,desc)
+    if (do_log) then
+      call doc_openBlock(CS%doc, block%name, desc)
+      block%log_access = .true.
+    else
+      block%log_access = .false.
+    endif
   else
     if (is_root_pe()) call MOM_error(FATAL, &
       'openParameterBlock: A push was attempted before allocation.')
@@ -2111,7 +2125,7 @@ subroutine closeParameterBlock(CS)
     if (is_root_pe().and.len_trim(block%name)==0) call MOM_error(FATAL, &
       'closeParameterBlock: A pop was attempted on an empty stack. ("'//&
       trim(block%name)//'")')
-    call doc_closeBlock(CS%doc,block%name)
+    if (block%log_access) call doc_closeBlock(CS%doc, block%name)
   else
     if (is_root_pe()) call MOM_error(FATAL, &
       'closeParameterBlock: A pop was attempted before allocation.')

From 5ca70bac6dd4306a0e1d3b5c653aa679ac4e4fa4 Mon Sep 17 00:00:00 2001
From: Kate Hedstrom <kshedstrom@alaska.edu>
Date: Tue, 9 Jan 2024 09:12:50 -0900
Subject: [PATCH 05/17] Open parameter block before querying BODNER23

Fix the error caused by readinging `MLE%BODNER23`, and instead
explicitly opening and closing the parameter blocks.
---
 src/parameterizations/lateral/MOM_mixed_layer_restrat.F90 | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/parameterizations/lateral/MOM_mixed_layer_restrat.F90 b/src/parameterizations/lateral/MOM_mixed_layer_restrat.F90
index e21c33beaf..36a83cd43a 100644
--- a/src/parameterizations/lateral/MOM_mixed_layer_restrat.F90
+++ b/src/parameterizations/lateral/MOM_mixed_layer_restrat.F90
@@ -1767,8 +1767,10 @@ subroutine mixedlayer_restrat_register_restarts(HI, GV, US, param_file, CS, rest
                  units="s", default=0., scale=US%s_to_T, do_not_log=.true.)
   call get_param(param_file, mdl, "MLE_MLD_DECAY_TIME2", CS%MLE_MLD_decay_time2, &
                  units="s", default=0., scale=US%s_to_T, do_not_log=.true.)
-  call get_param(param_file, mdl, "MLE%USE_BODNER23", use_Bodner, &
+  call openParameterBlock(param_file, 'MLE', do_not_log=.true.)
+  call get_param(param_file, mdl, "USE_BODNER23", use_Bodner, &
                  default=.false., do_not_log=.true.)
+  call closeParameterBlock(param_file)
   if (CS%MLE_MLD_decay_time>0. .or. CS%MLE_MLD_decay_time2>0. .or. use_Bodner) then
     ! CS%MLD_filtered is used to keep a running mean of the PBL's actively mixed MLD.
     allocate(CS%MLD_filtered(HI%isd:HI%ied,HI%jsd:HI%jed), source=0.)

From 541c2f47970adb809b24d9838d25ac2a716db980 Mon Sep 17 00:00:00 2001
From: Robert Hallberg <Robert.Hallberg@noaa.gov>
Date: Fri, 19 Jan 2024 16:55:18 -0500
Subject: [PATCH 06/17] (*)Oil_tracer_column_physics unit conversion fix

  Added a missing unit conversion factor to a hard-coded 10 m distance in
oil_tracer_column_physics.  This will not change answers in Boussinesq cases
without any dimensional rescaling, but it will correct answers in a hypothetical
non-Boussinesq case.  Also made some white space in expressions near this fix
more closely match the MOM6 style guide. No answers are affected in any known
existing regression test cases or other runs.
---
 src/tracer/oil_tracer.F90 | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/tracer/oil_tracer.F90 b/src/tracer/oil_tracer.F90
index fc8f82f0df..40d6f27b44 100644
--- a/src/tracer/oil_tracer.F90
+++ b/src/tracer/oil_tracer.F90
@@ -373,21 +373,21 @@ subroutine oil_tracer_column_physics(h_old, h_new, ea, eb, fluxes, dt, G, GV, US
   ! Add oil at the source location
   if (year>=CS%oil_start_year .and. year<=CS%oil_end_year .and. &
       CS%oil_source_i>-999 .and. CS%oil_source_j>-999) then
-    i=CS%oil_source_i ; j=CS%oil_source_j
-    k_max=nz ; h_total=0.
+    i = CS%oil_source_i ; j = CS%oil_source_j
+    k_max = nz ; h_total = 0.
     vol_scale = GV%H_to_m * US%L_to_m**2
     do k=nz, 2, -1
       h_total = h_total + h_new(i,j,k)
-      if (h_total<10.) k_max=k-1 ! Find bottom most interface that is 10 m above bottom
+      if (h_total < 10.*GV%m_to_H) k_max=k-1 ! Find bottom most interface that is 10 m above bottom
     enddo
     do m=1,CS%ntr
-      k=CS%oil_source_k(m)
+      k = CS%oil_source_k(m)
       if (k>0) then
-        k=min(k,k_max) ! Only insert k or first layer with interface 10 m above bottom
+        k = min(k,k_max) ! Only insert k or first layer with interface 10 m above bottom
         CS%tr(i,j,k,m) = CS%tr(i,j,k,m) + CS%oil_source_rate*dt / &
                 (vol_scale * (h_new(i,j,k)+GV%H_subroundoff) * G%areaT(i,j) )
       elseif (k<0) then
-        h_total=GV%H_subroundoff
+        h_total = GV%H_subroundoff
         do k=1, nz
           h_total = h_total + h_new(i,j,k)
         enddo

From 76f0668146d5f709f093774033927f68bf7514f3 Mon Sep 17 00:00:00 2001
From: Robert Hallberg <Robert.Hallberg@noaa.gov>
Date: Sat, 20 Jan 2024 06:06:38 -0500
Subject: [PATCH 07/17] (*)Avoid using RHO_0 in non-Boussinesq averaging

  Use GV%H_to_MKS instead of GV%H_to_m when undoing the dimensional rescaling of
thicknesses when taking weighted averages in horizontally_average_diag_field,
global_layer_mean and global_volume_mean.  In Boussinesq mode, these are
identical, but in non-Boussinesq mode using GV%H_to_m introduced a
multiplication and then division by the Boussinesq reference density, whereas
GV%H_to_MKS avoids this by rescaling to a volume or mass-based coordinate
depending on the mode.  Several comments were also updated to reflect these
conditional changes in the units of some internal variables.  All expressions
are mathematically equivalent, and this does not impact any solutions, but there
can be changes in the last bits in some non-Boussinesq averaged diagnostics.
---
 src/diagnostics/MOM_spatial_means.F90 | 20 +++++++++++---------
 src/framework/MOM_diag_remap.F90      | 17 ++++++++++++-----
 2 files changed, 23 insertions(+), 14 deletions(-)

diff --git a/src/diagnostics/MOM_spatial_means.F90 b/src/diagnostics/MOM_spatial_means.F90
index ab1210c0f5..60ad8dfba5 100644
--- a/src/diagnostics/MOM_spatial_means.F90
+++ b/src/diagnostics/MOM_spatial_means.F90
@@ -211,11 +211,13 @@ function global_layer_mean(var, h, G, GV, scale, tmp_scale)
   ! Local variables
   ! In the following comments, [A] is used to indicate the arbitrary, possibly rescaled units of the
   ! input array while [a] indicates the unscaled (e.g., mks) units that can be used with the reproducing sums
-  real, dimension(G%isc:G%iec,G%jsc:G%jec,SZK_(GV)) :: tmpForSumming  ! An unscaled cell integral [a m3]
-  real, dimension(G%isc:G%iec,G%jsc:G%jec,SZK_(GV)) :: weight  ! The volume of each cell, used as a weight [m3]
+  real, dimension(G%isc:G%iec,G%jsc:G%jec,SZK_(GV)) :: tmpForSumming  ! An unscaled cell integral [a m3] or [a kg]
+  real, dimension(G%isc:G%iec,G%jsc:G%jec,SZK_(GV)) :: weight  ! The volume or mass of each cell, depending on
+                                                    ! whether the model is Boussinesq, used as a weight [m3] or [kg]
   type(EFP_type), dimension(2*SZK_(GV)) :: laysums
-  real, dimension(SZK_(GV)) :: global_temp_scalar    ! The global integral of the tracer in each layer [a m3]
-  real, dimension(SZK_(GV)) :: global_weight_scalar  ! The global integral of the volume of each layer [m3]
+  real, dimension(SZK_(GV)) :: global_temp_scalar   ! The global integral of the tracer in each layer [a m3] or [a kg]
+  real, dimension(SZK_(GV)) :: global_weight_scalar ! The global integral of the volume or mass of each
+                                                    ! layer [m3] or [kg]
   real :: temp_scale ! A temporary scaling factor [a A-1 ~> 1] or [1]
   real :: scalefac  ! A scaling factor for the variable [a A-1 ~> 1]
   integer :: i, j, k, is, ie, js, je, nz
@@ -226,7 +228,7 @@ function global_layer_mean(var, h, G, GV, scale, tmp_scale)
   tmpForSumming(:,:,:) = 0. ; weight(:,:,:) = 0.
 
   do k=1,nz ; do j=js,je ; do i=is,ie
-    weight(i,j,k)  =  (GV%H_to_m * h(i,j,k)) * (G%US%L_to_m**2*G%areaT(i,j) * G%mask2dT(i,j))
+    weight(i,j,k)  =  (GV%H_to_MKS * h(i,j,k)) * (G%US%L_to_m**2*G%areaT(i,j) * G%mask2dT(i,j))
     tmpForSumming(i,j,k) =  scalefac * var(i,j,k) * weight(i,j,k)
   enddo ; enddo ; enddo
 
@@ -262,9 +264,9 @@ function global_volume_mean(var, h, G, GV, scale, tmp_scale)
   ! input array while [a] indicates the unscaled (e.g., mks) units that can be used with the reproducing sums
   real :: temp_scale ! A temporary scaling factor [a A-1 ~> 1] or [1]
   real :: scalefac   ! A scaling factor for the variable [a A-1 ~> 1]
-  real :: weight_here ! The volume of a grid cell [m3]
-  real, dimension(SZI_(G),SZJ_(G)) :: tmpForSumming ! The volume integral of the variable in a column [a m3]
-  real, dimension(SZI_(G),SZJ_(G)) :: sum_weight  ! The volume of each column of water [m3]
+  real :: weight_here ! The volume or mass of a grid cell [m3] or [kg]
+  real, dimension(SZI_(G),SZJ_(G)) :: tmpForSumming ! The volume integral of the variable in a column [a m3] or [a kg]
+  real, dimension(SZI_(G),SZJ_(G)) :: sum_weight  ! The volume or mass of each column of water [m3] or [kg]
   integer :: i, j, k, is, ie, js, je, nz
   is = G%isc ; ie = G%iec ; js = G%jsc ; je = G%jec ; nz = GV%ke
 
@@ -273,7 +275,7 @@ function global_volume_mean(var, h, G, GV, scale, tmp_scale)
   tmpForSumming(:,:) = 0. ; sum_weight(:,:) = 0.
 
   do k=1,nz ; do j=js,je ; do i=is,ie
-    weight_here  =  (GV%H_to_m * h(i,j,k)) * (G%US%L_to_m**2*G%areaT(i,j) * G%mask2dT(i,j))
+    weight_here  =  (GV%H_to_MKS * h(i,j,k)) * (G%US%L_to_m**2*G%areaT(i,j) * G%mask2dT(i,j))
     tmpForSumming(i,j) = tmpForSumming(i,j) + scalefac * var(i,j,k) * weight_here
     sum_weight(i,j) = sum_weight(i,j) + weight_here
   enddo ; enddo ; enddo
diff --git a/src/framework/MOM_diag_remap.F90 b/src/framework/MOM_diag_remap.F90
index ff0eda6325..a2ecc197bc 100644
--- a/src/framework/MOM_diag_remap.F90
+++ b/src/framework/MOM_diag_remap.F90
@@ -658,8 +658,15 @@ subroutine horizontally_average_diag_field(G, GV, h, staggered_in_x, staggered_i
   logical, dimension(:), intent(inout) :: averaged_mask  !< Mask for horizontally averaged field [nondim]
 
   ! Local variables
-  real, dimension(G%isc:G%iec, G%jsc:G%jec, size(field,3)) :: volume, stuff
-  real, dimension(size(field, 3)) :: vol_sum, stuff_sum ! nz+1 is needed for interface averages
+  real :: volume(G%isc:G%iec, G%jsc:G%jec, size(field,3)) ! The area [m2], volume [m3] or mass [kg] of each cell.
+  real :: stuff(G%isc:G%iec, G%jsc:G%jec, size(field,3))  ! The area, volume or mass-weighted integral of the
+                                             ! field being averaged in each cell, in [m2 A], [m3 A] or [kg A],
+                                             ! depending on the weighting for the averages and whether the
+                                             ! model makes the Boussinesq approximation.
+  real, dimension(size(field, 3)) :: vol_sum   ! The global sum of the areas [m2], volumes [m3] or mass [kg]
+                                               ! in the cells that used in the weighted averages.
+  real, dimension(size(field, 3)) :: stuff_sum ! The global sum of the weighted field in all cells, in
+                                               ! [A m2], [A m3] or [A kg]
   type(EFP_type), dimension(2*size(field,3)) :: sums_EFP ! Sums of volume or stuff by layer
   real :: height  ! An average thickness attributed to an velocity point [H ~> m or kg m-2]
   integer :: i, j, k, nz
@@ -688,7 +695,7 @@ subroutine horizontally_average_diag_field(G, GV, h, staggered_in_x, staggered_i
             I1 = i - G%isdB + 1
             height = 0.5 * (h(i,j,k) + h(i+1,j,k))
             volume(I,j,k) = (G%US%L_to_m**2 * G%areaCu(I,j)) &
-                * (GV%H_to_m * height) * G%mask2dCu(I,j)
+                * (GV%H_to_MKS * height) * G%mask2dCu(I,j)
             stuff(I,j,k) = volume(I,j,k) * field(I1,j,k)
           enddo ; enddo
         endif
@@ -717,7 +724,7 @@ subroutine horizontally_average_diag_field(G, GV, h, staggered_in_x, staggered_i
             J1 = J - G%jsdB + 1
             height = 0.5 * (h(i,j,k) + h(i,j+1,k))
             volume(i,J,k) = (G%US%L_to_m**2 * G%areaCv(i,J)) &
-                * (GV%H_to_m * height) * G%mask2dCv(i,J)
+                * (GV%H_to_MKS * height) * G%mask2dCv(i,J)
             stuff(i,J,k) = volume(i,J,k) * field(i,J1,k)
           enddo ; enddo
         endif
@@ -748,7 +755,7 @@ subroutine horizontally_average_diag_field(G, GV, h, staggered_in_x, staggered_i
         else ! Intensive
           do j=G%jsc, G%jec ; do i=G%isc, G%iec
             volume(i,j,k) = (G%US%L_to_m**2 * G%areaT(i,j)) &
-                * (GV%H_to_m * h(i,j,k)) * G%mask2dT(i,j)
+                * (GV%H_to_MKS * h(i,j,k)) * G%mask2dT(i,j)
             stuff(i,j,k) = volume(i,j,k) * field(i,j,k)
           enddo ; enddo
         endif

From 60cb551a3a6c04ba33e5d5f3ce7e4d25d3f7be53 Mon Sep 17 00:00:00 2001
From: Marshall Ward <marshall.ward@noaa.gov>
Date: Sun, 28 Jan 2024 13:53:55 -0500
Subject: [PATCH 08/17] Intrinsics: Faster cuberoot scaling functions

This patch replaces the intrinsic-based exponent rescaling with explicit
bit manipulation of the floating point number.

This appears to produce a ~2.5x speedup of the solver, reducing its time
from embarassingly slow to disappointingly slow.  It is slightly faster
than the GNU cbrt function, but still about 3x slower than the Intel
SVML cbrt function.

Timings (s) (16M array, -O3 -mavx -mfma)

| Solver              |  -O2  |  -O3  |
|---------------------|-------|-------|
| GNU x**1/3          | 0.225 | 0.198 |
| GNU cuberoot before | 0.418 | 0.412 |
| GNU cuberoot after  | 0.208 | 0.187 |
| Intel x**1/3        | 0.068 | 0.067 |
| Intel before        | 0.514 | 0.507 |
| Intel after         | 0.213 | 0.189 |

At least one issue here is that Intel SVML is using fast vectorized
logic operators whereas the Fortran intrinsics are replaced with slower
legacy scalar versions.  Not sure there is much we could even do about
that without complaining to vendors.

Also, I'm sure there's magic in their solvers which we are not
capturing.  Regardless, I think this is a major improvement.

I do not believe it will change answers, but probably a good idea to
verify this and get it in before committing any solutions using
cuberoot().
---
 src/framework/MOM_intrinsic_functions.F90 | 104 ++++++++++++++++++++--
 1 file changed, 98 insertions(+), 6 deletions(-)

diff --git a/src/framework/MOM_intrinsic_functions.F90 b/src/framework/MOM_intrinsic_functions.F90
index 4327cfa5a6..5d420057d4 100644
--- a/src/framework/MOM_intrinsic_functions.F90
+++ b/src/framework/MOM_intrinsic_functions.F90
@@ -5,6 +5,7 @@ module MOM_intrinsic_functions
 ! This file is part of MOM6. See LICENSE.md for the license.
 
 use iso_fortran_env, only : stdout => output_unit, stderr => error_unit
+use iso_fortran_env, only : int64, real64
 
 implicit none ; private
 
@@ -28,6 +29,7 @@ function invcosh(x)
 
 end function invcosh
 
+
 !> Returns the cube root of a real argument at roundoff accuracy, in a form that works properly with
 !! rescaling of the argument by integer powers of 8.  If the argument is a NaN, a NaN is returned.
 elemental function cuberoot(x) result(root)
@@ -45,16 +47,15 @@ elemental function cuberoot(x) result(root)
               ! of the cube root of asx in arbitrary units that can grow or shrink with each iteration [B D]
   real :: den_prev ! The denominator of an expression for the previous iteration of the evolving estimate of
               ! the cube root of asx in arbitrary units that can grow or shrink with each iteration [D]
-  integer :: ex_3 ! One third of the exponent part of x, used to rescale x to get a.
   integer :: itt
 
+  integer(kind=int64) :: e_x, s_x
+
   if ((x >= 0.0) .eqv. (x <= 0.0)) then
     ! Return 0 for an input of 0, or NaN for a NaN input.
     root = x
   else
-    ex_3 = ceiling(exponent(x) / 3.)
-    ! Here asx is in the range of 0.125 <= asx < 1.0
-    asx = scale(abs(x), -3*ex_3)
+    call rescale_exp(x, asx, e_x, s_x)
 
     !   Iteratively determine root_asx = asx**1/3 using Halley's method and then Newton's method,
     ! noting that Halley's method onverges monotonically and needs no bounding.  Halley's method is
@@ -82,11 +83,102 @@ elemental function cuberoot(x) result(root)
     ! that is within the last bit of the true solution.
     root_asx = root_asx - (root_asx**3 - asx) / (3.0 * (root_asx**2))
 
-    root = sign(scale(root_asx, ex_3), x)
+    root = descale_cbrt(root_asx, e_x, s_x)
   endif
-
 end function cuberoot
 
+
+!> Rescale `a` to the range [0.125, 1) while preserving its fractional term.
+pure subroutine rescale_exp(a, x, e_a, s_a)
+  real, intent(in) :: a
+    !< The value to be rescaled
+  real, intent(out) :: x
+    !< The rescaled value of `a`
+  integer(kind=int64), intent(out) :: e_a
+    !< The biased exponent of `a`
+  integer(kind=int64), intent(out) :: s_a
+    !< The sign bit of `a`
+
+  ! Floating point model, if format is (sign, exp, frac)
+  integer, parameter :: bias = maxexponent(1.) - 1
+    !< The double precision exponent offset (assuming a balanced range)
+  integer, parameter :: signbit = storage_size(1.) - 1
+    !< Position of sign bit
+  integer, parameter :: explen = 1 + ceiling(log(real(bias))/log(2.))
+    !< Bit size of exponent
+  integer, parameter :: expbit = signbit - explen
+    !< Position of lowest exponent bit
+  integer, parameter :: fraclen = expbit
+    !< Length of fractional part
+
+  integer(kind=int64) :: xb
+    !< A floating point number, bit-packed as an integer
+  integer(kind=int64) :: e_scaled
+    !< The new rescaled exponent of `a` (i.e. the exponent of `x`)
+
+  ! Pack bits of `a` into `xb` and extract its exponent and sign
+  xb = transfer(a, 1_int64)
+  s_a = ibits(xb, signbit, 1)
+  e_a = ibits(xb, expbit, explen)
+
+  ! Decompose the exponent as `e = modulo(e,3) + 3*(e/3)` and extract the
+  ! rescaled exponent, now in {-3,-2,-1}
+  e_scaled = modulo(e_a, 3) - 3 + bias
+
+  ! Insert the new 11-bit exponent into `xb`, while also setting the sign bit
+  ! to zero, ensuring that `xb` is always positive.
+  call mvbits(e_scaled, 0, explen + 1, xb, fraclen)
+
+  ! Transfer the final modified value to `x`
+  x = transfer(xb, 1.)
+end subroutine rescale_exp
+
+
+!> Descale a real number to its original base, and apply the cube root to the
+!! remaining exponent.
+pure function descale_cbrt(x, e_a, s_a) result(r)
+  real, intent(in) :: x
+    !< Cube root of the rescaled value, which was rescaled to [0.125, 1.0)
+  integer(kind=int64), intent(in) :: e_a
+    !< Exponent of the original value to be cube rooted
+  integer(kind=int64), intent(in) :: s_a
+    !< Sign bit of the original value to be cube rooted
+  real :: r
+    !< Restored value with the cube root applied to its exponent
+
+  ! Floating point model, if format is (sign, exp, frac)
+  integer, parameter :: bias = maxexponent(1.) - 1
+    !< The double precision exponent offset (assuming a balanced range)
+  integer, parameter :: signbit = storage_size(1.) - 1
+    !< Position of sign bit
+  integer, parameter :: explen = 1 + ceiling(log(real(bias))/log(2.))
+    !< Bit size of exponent
+  integer, parameter :: expbit = signbit - explen
+    !< Position of lowest exponent bit
+  integer, parameter :: fraclen = expbit
+    !< Length of fractional part
+
+  integer(kind=int64) :: xb
+    ! Bit-packed real number into integer form
+  integer(kind=int64) :: e_r
+    ! Exponent of the descaled value
+
+  ! Extract the exponent of the rescaled value, in {-3, -2, -1}
+  xb = transfer(x, 1_8)
+  e_r = ibits(xb, expbit, explen)
+
+  ! Apply the cube root to the old exponent (after removing its bias) and add
+  ! to the rescaled exponent.  Correct the previous -3 with a +1.
+  e_r = e_r + (e_a/3 - bias/3 + 1)
+
+  ! Apply the corrected exponent and sign and convert back to real
+  call mvbits(e_r, 0, explen, xb, expbit)
+  call mvbits(s_a, 0, 1, xb, signbit)
+  r = transfer(xb, 1.)
+end function descale_cbrt
+
+
+
 !> Returns true if any unit test of intrinsic_functions fails, or false if they all pass.
 logical function intrinsic_functions_unit_tests(verbose)
   logical, intent(in) :: verbose !< If true, write results to stdout

From 5edba9b4d28892225423f872a9a83b1c22dda0a9 Mon Sep 17 00:00:00 2001
From: Marshall Ward <marshall.ward@noaa.gov>
Date: Tue, 30 Jan 2024 15:28:29 -0500
Subject: [PATCH 09/17] Cuberoot: Refactor (re|de)scale functions

Some modifications were made to the cuberoot rescale and descale
functions:

* The machine parameters were moved from function to module parameters.
  This could dangerously expose them to other functions, but it prevents
  multiple definitions of the same numbers.

* The exponent is now cube-rooted in rescale rather than descale.

* The exponent expressions are broken into more explicit steps, rather
  than combining multiple steps and assumptions into a single
  expression.

* The bias is no longer assumed to be a multiple of three.  This is true
  for double precision but not single precision.

A new test of quasi-random number was also added to the cuberoot test
suite.  These numbers were able to detect the differences in GNU and
Intel compiler output.  A potential error in the return value of the
test was also fixed.

The volatile test of 1 - 0.5*ULP has been added.  The cube root of this
value rounds to 1, and needs to be handled carefully.

The unit test function `cuberoot(v**3)` was reversed to `cuberoot(v)**`,
to include testing of this value.  (Cubing would wipe out the anomaly.)
---
 src/framework/MOM_intrinsic_functions.F90 | 148 +++++++++++-----------
 1 file changed, 75 insertions(+), 73 deletions(-)

diff --git a/src/framework/MOM_intrinsic_functions.F90 b/src/framework/MOM_intrinsic_functions.F90
index 5d420057d4..07c6abe3ad 100644
--- a/src/framework/MOM_intrinsic_functions.F90
+++ b/src/framework/MOM_intrinsic_functions.F90
@@ -12,6 +12,19 @@ module MOM_intrinsic_functions
 public :: invcosh, cuberoot
 public :: intrinsic_functions_unit_tests
 
+! Floating point model, if bit layout from high to low is (sign, exp, frac)
+
+integer, parameter :: bias = maxexponent(1.) - 1
+  !< The double precision exponent offset
+integer, parameter :: signbit = storage_size(1.) - 1
+  !< Position of sign bit
+integer, parameter :: explen = 1 + ceiling(log(real(bias))/log(2.))
+  !< Bit size of exponent
+integer, parameter :: expbit = signbit - explen
+  !< Position of lowest exponent bit
+integer, parameter :: fraclen = expbit
+  !< Length of fractional part
+
 contains
 
 !> Evaluate the inverse cosh, either using a math library or an
@@ -55,7 +68,7 @@ elemental function cuberoot(x) result(root)
     ! Return 0 for an input of 0, or NaN for a NaN input.
     root = x
   else
-    call rescale_exp(x, asx, e_x, s_x)
+    call rescale_cbrt(x, asx, e_x, s_x)
 
     !   Iteratively determine root_asx = asx**1/3 using Halley's method and then Newton's method,
     ! noting that Halley's method onverges monotonically and needs no bounding.  Halley's method is
@@ -83,109 +96,90 @@ elemental function cuberoot(x) result(root)
     ! that is within the last bit of the true solution.
     root_asx = root_asx - (root_asx**3 - asx) / (3.0 * (root_asx**2))
 
-    root = descale_cbrt(root_asx, e_x, s_x)
+    root = descale(root_asx, e_x, s_x)
   endif
 end function cuberoot
 
 
-!> Rescale `a` to the range [0.125, 1) while preserving its fractional term.
-pure subroutine rescale_exp(a, x, e_a, s_a)
+!> Rescale `a` to the range [0.125, 1) and compute its cube-root exponent.
+pure subroutine rescale_cbrt(a, x, e_r, s_a)
   real, intent(in) :: a
-    !< The value to be rescaled
+    !< The real parameter to be rescaled for cube root
   real, intent(out) :: x
-    !< The rescaled value of `a`
-  integer(kind=int64), intent(out) :: e_a
-    !< The biased exponent of `a`
+    !< The rescaled value of a
+  integer(kind=int64), intent(out) :: e_r
+    !< Cube root of the exponent of the rescaling of `a`
   integer(kind=int64), intent(out) :: s_a
-    !< The sign bit of `a`
-
-  ! Floating point model, if format is (sign, exp, frac)
-  integer, parameter :: bias = maxexponent(1.) - 1
-    !< The double precision exponent offset (assuming a balanced range)
-  integer, parameter :: signbit = storage_size(1.) - 1
-    !< Position of sign bit
-  integer, parameter :: explen = 1 + ceiling(log(real(bias))/log(2.))
-    !< Bit size of exponent
-  integer, parameter :: expbit = signbit - explen
-    !< Position of lowest exponent bit
-  integer, parameter :: fraclen = expbit
-    !< Length of fractional part
+    !< The sign bit of a
 
   integer(kind=int64) :: xb
-    !< A floating point number, bit-packed as an integer
-  integer(kind=int64) :: e_scaled
-    !< The new rescaled exponent of `a` (i.e. the exponent of `x`)
-
-  ! Pack bits of `a` into `xb` and extract its exponent and sign
+    ! Floating point value of a, bit-packed as an integer
+  integer(kind=int64) :: e_a
+    ! Unscaled exponent of a
+  integer(kind=int64) :: e_x
+    ! Exponent of x
+  integer(kind=int64) :: e_div, e_mod
+    ! Quotient and remainder of e in e = 3*(e/3) + modulo(e,3).
+
+  ! Pack bits of a into xb and extract its exponent and sign.
   xb = transfer(a, 1_int64)
   s_a = ibits(xb, signbit, 1)
-  e_a = ibits(xb, expbit, explen)
+  e_a = ibits(xb, expbit, explen) - bias
+
+  ! Compute terms of exponent decomposition e = 3*(e/3) + modulo(e,3).
+  ! (Fortran division is round-to-zero, so we must emulate floor division.)
+  e_mod = modulo(e_a, 3_int64)
+  e_div = (e_a - e_mod)/3
+
+  ! Our scaling decomposes e_a into e = {3*(e/3) + 3} + {modulo(e,3) - 3}.
 
-  ! Decompose the exponent as `e = modulo(e,3) + 3*(e/3)` and extract the
-  ! rescaled exponent, now in {-3,-2,-1}
-  e_scaled = modulo(e_a, 3) - 3 + bias
+  ! The first term is a perfect cube, whose cube root is computed below.
+  e_r = e_div + 1
 
-  ! Insert the new 11-bit exponent into `xb`, while also setting the sign bit
-  ! to zero, ensuring that `xb` is always positive.
-  call mvbits(e_scaled, 0, explen + 1, xb, fraclen)
+  ! The second term ensures that x is shifted to [0.125, 1).
+  e_x = e_mod - 3
 
-  ! Transfer the final modified value to `x`
+  ! Insert the new 11-bit exponent into xb and write to x and extend the
+  ! bitcount to 12, so that the sign bit is zero and x is always positive.
+  call mvbits(e_x + bias, 0, explen + 1, xb, fraclen)
   x = transfer(xb, 1.)
-end subroutine rescale_exp
+end subroutine rescale_cbrt
 
 
-!> Descale a real number to its original base, and apply the cube root to the
-!! remaining exponent.
-pure function descale_cbrt(x, e_a, s_a) result(r)
+!> Undo the rescaling of a real number back to its original base.
+pure function descale(x, e_a, s_a) result(a)
   real, intent(in) :: x
-    !< Cube root of the rescaled value, which was rescaled to [0.125, 1.0)
+    !< The rescaled value which is to be restored.
   integer(kind=int64), intent(in) :: e_a
-    !< Exponent of the original value to be cube rooted
+    !< Exponent of the unscaled value
   integer(kind=int64), intent(in) :: s_a
-    !< Sign bit of the original value to be cube rooted
-  real :: r
-    !< Restored value with the cube root applied to its exponent
-
-  ! Floating point model, if format is (sign, exp, frac)
-  integer, parameter :: bias = maxexponent(1.) - 1
-    !< The double precision exponent offset (assuming a balanced range)
-  integer, parameter :: signbit = storage_size(1.) - 1
-    !< Position of sign bit
-  integer, parameter :: explen = 1 + ceiling(log(real(bias))/log(2.))
-    !< Bit size of exponent
-  integer, parameter :: expbit = signbit - explen
-    !< Position of lowest exponent bit
-  integer, parameter :: fraclen = expbit
-    !< Length of fractional part
+    !< Sign bit of the unscaled value
+  real :: a
+    !< Restored value with the corrected exponent and sign
 
   integer(kind=int64) :: xb
     ! Bit-packed real number into integer form
-  integer(kind=int64) :: e_r
-    ! Exponent of the descaled value
+  integer(kind=int64) :: e_x
+    ! Biased exponent of x
 
-  ! Extract the exponent of the rescaled value, in {-3, -2, -1}
+  ! Apply the corrected exponent and sign to x.
   xb = transfer(x, 1_8)
-  e_r = ibits(xb, expbit, explen)
-
-  ! Apply the cube root to the old exponent (after removing its bias) and add
-  ! to the rescaled exponent.  Correct the previous -3 with a +1.
-  e_r = e_r + (e_a/3 - bias/3 + 1)
-
-  ! Apply the corrected exponent and sign and convert back to real
-  call mvbits(e_r, 0, explen, xb, expbit)
+  e_x = ibits(xb, expbit, explen)
+  call mvbits(e_a + e_x, 0, explen, xb, expbit)
   call mvbits(s_a, 0, 1, xb, signbit)
-  r = transfer(xb, 1.)
-end function descale_cbrt
-
+  a = transfer(xb, 1.)
+end function descale
 
 
 !> Returns true if any unit test of intrinsic_functions fails, or false if they all pass.
-logical function intrinsic_functions_unit_tests(verbose)
+function intrinsic_functions_unit_tests(verbose) result(fail)
   logical, intent(in) :: verbose !< If true, write results to stdout
+  logical :: fail !< True if any of the unit tests fail
 
   ! Local variables
   real :: testval  ! A test value for self-consistency testing [nondim]
-  logical :: fail, v
+  logical :: v
+  integer :: n
 
   fail = .false.
   v = verbose
@@ -199,7 +193,15 @@ logical function intrinsic_functions_unit_tests(verbose)
   fail = fail .or. Test_cuberoot(v, 1.0)
   fail = fail .or. Test_cuberoot(v, 0.125)
   fail = fail .or. Test_cuberoot(v, 0.965)
-
+  fail = fail .or. Test_cuberoot(v, 1.0 - epsilon(1.0))
+  fail = fail .or. Test_cuberoot(v, 1.0 - 0.5*epsilon(1.0))
+
+  testval = 1.0e-99
+  v = .false.
+  do n=-160,160
+    fail = fail .or. Test_cuberoot(v, testval)
+    testval = (-2.908 * (1.414213562373 + 1.2345678901234e-5*n)) * testval
+  enddo
 end function intrinsic_functions_unit_tests
 
 !> True if the cube of cuberoot(val) does not closely match val. False otherwise.
@@ -209,7 +211,7 @@ logical function Test_cuberoot(verbose, val)
   ! Local variables
   real :: diff ! The difference between val and the cube root of its cube.
 
-  diff = val - cuberoot(val**3)
+  diff = val - cuberoot(val)**3
   Test_cuberoot = (abs(diff) > 2.0e-15*abs(val))
 
   if (Test_cuberoot) then

From 736ef16a4016b538f946df4be9f30cdc532d03e4 Mon Sep 17 00:00:00 2001
From: Marshall Ward <marshall.ward@noaa.gov>
Date: Tue, 30 Jan 2024 15:56:41 -0500
Subject: [PATCH 10/17] Cuberoot: Break **3 into explicit integer cubes

In separate testing, we observed that Intel would use the `pow()`
function to evaluate the cubes of some numbers, causing different
answers with GNU.

In this patch, I replace the cubic x**3 operations with explicit x*x*x
multiplication, which appears to avoid this substitution.

Well, for the moment, at least.
---
 src/framework/MOM_intrinsic_functions.F90 | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/src/framework/MOM_intrinsic_functions.F90 b/src/framework/MOM_intrinsic_functions.F90
index 07c6abe3ad..fbb1c28096 100644
--- a/src/framework/MOM_intrinsic_functions.F90
+++ b/src/framework/MOM_intrinsic_functions.F90
@@ -52,14 +52,19 @@ elemental function cuberoot(x) result(root)
   real :: asx ! The absolute value of x rescaled by an integer power of 8 to put it into
               ! the range from 0.125 < asx <= 1.0, in ambiguous units cubed [B3]
   real :: root_asx ! The cube root of asx [B]
+  real :: ra_3 ! root_asx cubed [B3]
   real :: num ! The numerator of an expression for the evolving estimate of the cube root of asx
               ! in arbitrary units that can grow or shrink with each iteration [B C]
   real :: den ! The denominator of an expression for the evolving estimate of the cube root of asx
               ! in arbitrary units that can grow or shrink with each iteration [C]
   real :: num_prev ! The numerator of an expression for the previous iteration of the evolving estimate
               ! of the cube root of asx in arbitrary units that can grow or shrink with each iteration [B D]
+  real :: np_3 ! num_prev cubed  [B3 D3]
   real :: den_prev ! The denominator of an expression for the previous iteration of the evolving estimate of
               ! the cube root of asx in arbitrary units that can grow or shrink with each iteration [D]
+  real :: dp_3 ! den_prev cubed  [C3]
+  real :: r0  ! Initial value of the iterative solver. [B C]
+  real :: r0_3 ! r0 cubed [B3 C3]
   integer :: itt
 
   integer(kind=int64) :: e_x, s_x
@@ -79,14 +84,21 @@ elemental function cuberoot(x) result(root)
 
     ! This first estimate gives the same magnitude of errors for 0.125 and 1.0 after two iterations.
     ! The first iteration is applied explicitly.
-    num = 0.707106 * (0.707106**3 + 2.0 * asx)
-    den = 2.0 * (0.707106**3) + asx
+    r0 = 0.707106
+    r0_3 = r0 * r0 * r0
+    num = r0 * (r0_3 + 2.0 * asx)
+    den = 2.0 * r0_3 + asx
 
     do itt=1,2
       ! Halley's method iterates estimates as Root = Root * (Root**3 + 2.*asx) / (2.*Root**3 + asx).
       num_prev = num ; den_prev = den
-      num = num_prev * (num_prev**3 + 2.0 * asx * (den_prev**3))
-      den = den_prev * (2.0 * num_prev**3 + asx * (den_prev**3))
+
+      ! Pre-compute these as integer powers, to avoid `pow()`-like intrinsics.
+      np_3 = num_prev * num_prev * num_prev
+      dp_3 = den_prev * den_prev * den_prev
+
+      num = num_prev * (np_3 + 2.0 * asx * dp_3)
+      den = den_prev * (2.0 * np_3 + asx * dp_3)
       ! Equivalent to:  root_asx = root_asx * (root_asx**3 + 2.*asx) / (2.*root_asx**3 + asx)
     enddo
     ! At this point the error in root_asx is better than 1 part in 3e14.
@@ -94,7 +106,8 @@ elemental function cuberoot(x) result(root)
 
     ! One final iteration with Newton's method polishes up the root and gives a solution
     ! that is within the last bit of the true solution.
-    root_asx = root_asx - (root_asx**3 - asx) / (3.0 * (root_asx**2))
+    ra_3 = root_asx * root_asx * root_asx
+    root_asx = root_asx - (ra_3 - asx) / (3.0 * (root_asx * root_asx))
 
     root = descale(root_asx, e_x, s_x)
   endif

From 671c85d32864d89bce6cfe270a2c556c54b03ba4 Mon Sep 17 00:00:00 2001
From: Robert Hallberg <Robert.Hallberg@noaa.gov>
Date: Wed, 10 Jan 2024 13:14:10 -0500
Subject: [PATCH 11/17] (*)Use cuberoot in ePBL_column

  Use the new cuberoot() function in place of **(1./3.) to calculate the
turbulent velocity vstar in ePBL_column when EPBL_ANSWER_DATE is 20240101 or
higher.  This is mathematically equivalent to the previous version, but it does
change and answers at roundoff and it allows several dimensional scaling factors
that had previously been required to be eliminated.  All answers are
mathematically equivalant, but answers do change if EPBL_ANSWER_DATE is 20240101
or higher and the description of EPBL_ANSWER_DATE changes in some
MOM_parameter_doc files.
---
 .../vertical/MOM_energetic_PBL.F90            | 105 +++++++++++++-----
 1 file changed, 77 insertions(+), 28 deletions(-)

diff --git a/src/parameterizations/vertical/MOM_energetic_PBL.F90 b/src/parameterizations/vertical/MOM_energetic_PBL.F90
index 1a59b177bd..10907c04ed 100644
--- a/src/parameterizations/vertical/MOM_energetic_PBL.F90
+++ b/src/parameterizations/vertical/MOM_energetic_PBL.F90
@@ -13,6 +13,7 @@ module MOM_energetic_PBL
 use MOM_forcing_type,   only : forcing
 use MOM_grid,           only : ocean_grid_type
 use MOM_interface_heights, only : thickness_to_dz
+use MOM_intrinsic_functions, only : cuberoot
 use MOM_string_functions, only : uppercase
 use MOM_unit_scaling,   only : unit_scale_type
 use MOM_variables,      only : thermo_var_ptrs
@@ -161,7 +162,10 @@ module MOM_energetic_PBL
   integer :: answer_date     !< The vintage of the order of arithmetic and expressions in the ePBL
                              !! calculations.  Values below 20190101 recover the answers from the
                              !! end of 2018, while higher values use updated and more robust forms
-                             !! of the same expressions.
+                             !! of the same expressions.  Values below 20240101 use A**(1./3.) to
+                             !! estimate the cube root of A in several expressions, while higher
+                             !! values use the integer root function cuberoot(A) and therefore
+                             !! can work with scaled variables.
   logical :: orig_PE_calc    !< If true, the ePBL code uses the original form of the
                              !! potential energy change code.  Otherwise, it uses a newer version
                              !! that can work with successive increments to the diffusivity in
@@ -335,8 +339,10 @@ subroutine energetic_PBL(h_3d, u_3d, v_3d, tv, fluxes, dt, Kd_int, G, GV, US, CS
     mixvel, &       ! A turbulent mixing velocity [Z T-1 ~> m s-1].
     mixlen, &       ! A turbulent mixing length [Z ~> m].
     SpV_dt          ! Specific volume interpolated to interfaces divided by dt or 1.0 / (dt * Rho0)
-                    ! times conversion factors in [m3 Z-3 R-1 T2 s-3 ~> m3 kg-1 s-1],
-                    ! used to convert local TKE into a turbulence velocity cubed.
+                    ! times conversion factors for answer dates before 20240101 in
+                    ! [m3 Z-3 R-1 T2 s-3 ~> m3 kg-1 s-1] or without the convsersion factors for
+                    ! answer dates of 20240101 and later in [R-1 T-1 ~> m3 kg-1 s-1], used to
+                    ! convert local TKE into a turbulence velocity cubed.
   real :: h_neglect ! A thickness that is so small it is usually lost
                     ! in roundoff and can be neglected [H ~> m or kg m-2].
 
@@ -348,6 +354,8 @@ subroutine energetic_PBL(h_3d, u_3d, v_3d, tv, fluxes, dt, Kd_int, G, GV, US, CS
   real :: I_rho     ! The inverse of the Boussinesq reference density times a ratio of scaling
                     ! factors [Z L-1 R-1 ~> m3 kg-1]
   real :: I_dt      ! The Adcroft reciprocal of the timestep [T-1 ~> s-1]
+  real :: I_rho0dt  ! The inverse of the Boussinesq reference density times the time
+                    ! step [R-1 T-1 ~> m3 kg-1 s-1]
   real :: B_Flux    ! The surface buoyancy flux [Z2 T-3 ~> m2 s-3]
   real :: MLD_io    ! The mixed layer depth found by ePBL_column [Z ~> m]
 
@@ -374,6 +382,7 @@ subroutine energetic_PBL(h_3d, u_3d, v_3d, tv, fluxes, dt, Kd_int, G, GV, US, CS
   h_neglect = GV%H_subroundoff
   I_rho = US%L_to_Z * GV%H_to_Z * GV%RZ_to_H ! == US%L_to_Z / GV%Rho0 ! This is not used when fully non-Boussinesq.
   I_dt = 0.0 ; if (dt > 0.0) I_dt = 1.0 / dt
+  I_rho0dt = 1.0 / (GV%Rho0 * dt)  ! This is not used when fully non-Boussinesq.
 
   ! Zero out diagnostics before accumulation.
   if (CS%TKE_diagnostics) then
@@ -403,9 +412,15 @@ subroutine energetic_PBL(h_3d, u_3d, v_3d, tv, fluxes, dt, Kd_int, G, GV, US, CS
     ! Set the inverse density used to translating local TKE into a turbulence velocity
     SpV_dt(:) = 0.0
     if ((dt > 0.0) .and. GV%Boussinesq .or. .not.allocated(tv%SpV_avg)) then
-      do K=1,nz+1
-        SpV_dt(K) = (US%Z_to_m**3*US%s_to_T**3) / (dt*GV%Rho0)
-      enddo
+      if (CS%answer_date < 20240101) then
+        do K=1,nz+1
+          SpV_dt(K) = (US%Z_to_m**3*US%s_to_T**3) / (dt*GV%Rho0)
+        enddo
+      else
+        do K=1,nz+1
+          SpV_dt(K) = I_rho0dt
+        enddo
+      endif
     endif
 
     !   Determine the initial mech_TKE and conv_PErel, including the energy required
@@ -442,11 +457,19 @@ subroutine energetic_PBL(h_3d, u_3d, v_3d, tv, fluxes, dt, Kd_int, G, GV, US, CS
       endif
 
       if (allocated(tv%SpV_avg) .and. .not.GV%Boussinesq) then
-        SpV_dt(1) = (US%Z_to_m**3*US%s_to_T**3) * tv%SpV_avg(i,j,1) * I_dt
-        do K=2,nz
-          SpV_dt(K) = (US%Z_to_m**3*US%s_to_T**3) * 0.5*(tv%SpV_avg(i,j,k-1) + tv%SpV_avg(i,j,k)) * I_dt
-        enddo
-        SpV_dt(nz+1) = (US%Z_to_m**3*US%s_to_T**3) * tv%SpV_avg(i,j,nz) * I_dt
+        if (CS%answer_date < 20240101) then
+          SpV_dt(1) = (US%Z_to_m**3*US%s_to_T**3) * tv%SpV_avg(i,j,1) * I_dt
+          do K=2,nz
+            SpV_dt(K) = (US%Z_to_m**3*US%s_to_T**3) * 0.5*(tv%SpV_avg(i,j,k-1) + tv%SpV_avg(i,j,k)) * I_dt
+          enddo
+          SpV_dt(nz+1) = (US%Z_to_m**3*US%s_to_T**3) * tv%SpV_avg(i,j,nz) * I_dt
+        else
+          SpV_dt(1) = tv%SpV_avg(i,j,1) * I_dt
+          do K=2,nz
+            SpV_dt(K) = 0.5*(tv%SpV_avg(i,j,k-1) + tv%SpV_avg(i,j,k)) * I_dt
+          enddo
+          SpV_dt(nz+1) = tv%SpV_avg(i,j,nz) * I_dt
+        endif
       endif
 
       B_flux = buoy_flux(i,j)
@@ -565,9 +588,13 @@ subroutine ePBL_column(h, dz, u, v, T0, S0, dSV_dT, dSV_dS, SpV_dt, TKE_forcing,
   real, dimension(SZK_(GV)), intent(in)  :: dSV_dS !< The partial derivative of in-situ specific
                                                    !! volume with salinity [R-1 S-1 ~> m3 kg-1 ppt-1].
   real, dimension(SZK_(GV)+1), intent(in) :: SpV_dt !< Specific volume interpolated to interfaces
-                                                   !! divided by dt or 1.0 / (dt * Rho0) times conversion
-                                                   !! factors in [m3 Z-3 R-1 T2 s-3 ~> m3 kg-1 s-1],
-                                                   !! used to convert local TKE into a turbulence velocity.
+                                                   !! divided by dt or 1.0 / (dt * Rho0), times conversion
+                                                   !! factors for answer dates before 20240101 in
+                                                   !! [m3 Z-3 R-1 T2 s-3 ~> m3 kg-1 s-1] or without
+                                                   !! the convsersion factors for answer dates of
+                                                   !! 20240101 and later in [R-1 T-1 ~> m3 kg-1 s-1],
+                                                   !! used to convert local TKE into a turbulence
+                                                   !! velocity cubed.
   real, dimension(SZK_(GV)), intent(in)  :: TKE_forcing !< The forcing requirements to homogenize the
                                                    !! forcing that has been applied to each layer
                                                    !! [R Z3 T-2 ~> J m-2].
@@ -819,7 +846,7 @@ subroutine ePBL_column(h, dz, u, v, T0, S0, dSV_dT, dSV_dS, SpV_dt, TKE_forcing,
   max_itt = 20
 
   dz_tt_min = 0.0
-  vstar_unit_scale = US%m_to_Z * US%T_to_s
+  if (CS%answer_date < 20240101) vstar_unit_scale = US%m_to_Z * US%T_to_s
 
   MLD_guess = MLD_io
 
@@ -1160,12 +1187,22 @@ subroutine ePBL_column(h, dz, u, v, T0, S0, dSV_dT, dSV_dS, SpV_dt, TKE_forcing,
           dz_tt = dztot + dz_tt_min
           TKE_here = mech_TKE + CS%wstar_ustar_coef*conv_PErel
           if (TKE_here > 0.0) then
-            if (CS%wT_scheme==wT_from_cRoot_TKE) then
-              vstar = CS%vstar_scale_fac * vstar_unit_scale * (SpV_dt(K)*TKE_here)**C1_3
-            elseif (CS%wT_scheme==wT_from_RH18) then
-              Surface_Scale = max(0.05, 1.0 - dztot / MLD_guess)
-              vstar = CS%vstar_scale_fac * Surface_Scale * (CS%vstar_surf_fac*u_star + &
-                        vstar_unit_scale * (CS%wstar_ustar_coef*conv_PErel*SpV_dt(K))**C1_3)
+            if (CS%answer_date < 20240101) then
+              if (CS%wT_scheme==wT_from_cRoot_TKE) then
+                vstar = CS%vstar_scale_fac * vstar_unit_scale * (SpV_dt(K)*TKE_here)**C1_3
+              elseif (CS%wT_scheme==wT_from_RH18) then
+                Surface_Scale = max(0.05, 1.0 - dztot / MLD_guess)
+                vstar = CS%vstar_scale_fac * Surface_Scale * (CS%vstar_surf_fac*u_star + &
+                          vstar_unit_scale * (CS%wstar_ustar_coef*conv_PErel*SpV_dt(K))**C1_3)
+              endif
+            else
+              if (CS%wT_scheme==wT_from_cRoot_TKE) then
+                vstar = CS%vstar_scale_fac * cuberoot(SpV_dt(K)*TKE_here)
+              elseif (CS%wT_scheme==wT_from_RH18) then
+                Surface_Scale = max(0.05, 1.0 - dztot / MLD_guess)
+                vstar = (CS%vstar_scale_fac * Surface_Scale) * ( CS%vstar_surf_fac*u_star + &
+                          cuberoot((CS%wstar_ustar_coef*conv_PErel) * SpV_dt(K)) )
+              endif
             endif
             hbs_here = min(hb_hs(K), MixLen_shape(K))
             mixlen(K) = MAX(CS%min_mix_len, ((dz_tt*hbs_here)*vstar) / &
@@ -1209,12 +1246,22 @@ subroutine ePBL_column(h, dz, u, v, T0, S0, dSV_dT, dSV_dS, SpV_dt, TKE_forcing,
               ! Does MKE_src need to be included in the calculation of vstar here?
               TKE_here = mech_TKE + CS%wstar_ustar_coef*(conv_PErel-PE_chg_max)
               if (TKE_here > 0.0) then
-                if (CS%wT_scheme==wT_from_cRoot_TKE) then
-                  vstar = CS%vstar_scale_fac * vstar_unit_scale * (SpV_dt(K)*TKE_here)**C1_3
-                elseif (CS%wT_scheme==wT_from_RH18) then
-                  Surface_Scale = max(0.05, 1. - dztot / MLD_guess)
-                  vstar = CS%vstar_scale_fac * Surface_Scale * (CS%vstar_surf_fac*u_star + &
-                                  vstar_unit_scale * (CS%wstar_ustar_coef*conv_PErel*SpV_dt(K))**C1_3)
+                if (CS%answer_date < 20240101) then
+                  if (CS%wT_scheme==wT_from_cRoot_TKE) then
+                    vstar = CS%vstar_scale_fac * vstar_unit_scale * (SpV_dt(K)*TKE_here)**C1_3
+                  elseif (CS%wT_scheme==wT_from_RH18) then
+                    Surface_Scale = max(0.05, 1. - dztot / MLD_guess)
+                    vstar = CS%vstar_scale_fac * Surface_Scale * (CS%vstar_surf_fac*u_star + &
+                                    vstar_unit_scale * (CS%wstar_ustar_coef*conv_PErel*SpV_dt(K))**C1_3)
+                  endif
+                else
+                  if (CS%wT_scheme==wT_from_cRoot_TKE) then
+                    vstar = CS%vstar_scale_fac * cuberoot(SpV_dt(K)*TKE_here)
+                  elseif (CS%wT_scheme==wT_from_RH18) then
+                    Surface_Scale = max(0.05, 1. - dztot / MLD_guess)
+                    vstar = (CS%vstar_scale_fac * Surface_Scale) * ( CS%vstar_surf_fac*u_star + &
+                                    cuberoot((CS%wstar_ustar_coef*conv_PErel) * SpV_dt(K)) )
+                  endif
                 endif
                 hbs_here = min(hb_hs(K), MixLen_shape(K))
                 mixlen(K) = max(CS%min_mix_len, ((dz_tt*hbs_here)*vstar) / &
@@ -2076,7 +2123,9 @@ subroutine energetic_PBL_init(Time, G, GV, US, param_file, diag, CS)
                  "The vintage of the order of arithmetic and expressions in the energetic "//&
                  "PBL calculations.  Values below 20190101 recover the answers from the "//&
                  "end of 2018, while higher values use updated and more robust forms of the "//&
-                 "same expressions.", &
+                 "same expressions.  Values below 20240101 use A**(1./3.) to estimate the cube "//&
+                 "root of A in several expressions, while higher values use the integer root "//&
+                 "function cuberoot(A) and therefore can work with scaled variables.", &
                  default=default_answer_date, do_not_log=.not.GV%Boussinesq)
   if (.not.GV%Boussinesq) CS%answer_date = max(CS%answer_date, 20230701)
 

From 07bace68cfc6e498f39c5731b563601d6afec389 Mon Sep 17 00:00:00 2001
From: Robert Hallberg <Robert.Hallberg@noaa.gov>
Date: Tue, 30 Jan 2024 17:59:28 -0500
Subject: [PATCH 12/17] *Fix two bugs in convert_temp_salt_for_TEOS10

  Fixed two bugs on a single line of convert_temp_salt_for_TEOS10.  The first
bug was a reversal in the order of the temperature and salinity arguments to
poTemp_to_consTemp, resulting in temperatures that closely approximate the
salinities.  The second bug that was fixed on this line was temperatures being
rescaled with a factor that is appropriate for salinities. This bug-fix will
change answers dramatically for any cases that use the ROQUET_RHO, ROQUET_SPV
and TEOS10 equations of state and initialize the model with
INIT_LAYERS_FROM_Z_FILE = True.
---
 src/equation_of_state/MOM_EOS.F90 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/equation_of_state/MOM_EOS.F90 b/src/equation_of_state/MOM_EOS.F90
index d5c7abc977..7a9de49573 100644
--- a/src/equation_of_state/MOM_EOS.F90
+++ b/src/equation_of_state/MOM_EOS.F90
@@ -1703,7 +1703,7 @@ subroutine convert_temp_salt_for_TEOS10(T, S, HI, kd, mask_z, EOS)
   do k=1,kd ; do j=HI%jsc,HI%jec ; do i=HI%isc,HI%iec
     if (mask_z(i,j,k) >= 1.0) then
       S(i,j,k) = Sref_Sprac * S(i,j,k)
-      T(i,j,k) = EOS%degC_to_C*poTemp_to_consTemp(EOS%S_to_ppt*S(i,j,k), EOS%S_to_ppt*T(i,j,k))
+      T(i,j,k) = EOS%degC_to_C*poTemp_to_consTemp(EOS%C_to_degC*T(i,j,k), EOS%S_to_ppt*S(i,j,k))
     endif
   enddo ; enddo ; enddo
 end subroutine convert_temp_salt_for_TEOS10

From 915cfe225e5e3c42103f20501c60e6aa90cd613c Mon Sep 17 00:00:00 2001
From: Robert Hallberg <Robert.Hallberg@noaa.gov>
Date: Fri, 26 Jan 2024 18:10:41 -0500
Subject: [PATCH 13/17] +ALE_remap_scalar with arbitrary thickness units

  This commit add the new optional arguments h_neglect and h_neglect_edge to
ALE_remap_scalar to allow for the thicknesses used in this routine to be
provided in any self-consistent units, including [Z ~> m], instead of just
[H ~> m or kg m-2].  To help make use of this new capability, this commit also
adds the new functions set_h_neglect and set_dz_neglect to the MOM_regridding
module.  build_grid_rho and build_grid_HyCOM1 have been refactored to use
set_h_neglect in place of the corresponding duplicated code blocks.

  This commit also adds the new optional argument h_in_Z_units to
MOM_initialize_tracer_from_Z, which in turn uses this new capability for
ALE_remap_scalar to use vertical layer extents (in Z units) rather than
thicknesses (in H units).

  Although there are new optional arguments to public interfaces, they are not
yet being exercised with this commit so no answers are changed.  Moreover, even
if they were being exercised, all Boussinesq solutions would give identical
answers.
---
 src/ALE/MOM_ALE.F90                           | 36 +++++++----
 src/ALE/MOM_regridding.F90                    | 60 ++++++++++++++-----
 .../MOM_tracer_initialization_from_Z.F90      | 56 +++++++++++------
 3 files changed, 108 insertions(+), 44 deletions(-)

diff --git a/src/ALE/MOM_ALE.F90 b/src/ALE/MOM_ALE.F90
index 77ee1192a2..543d77a0f3 100644
--- a/src/ALE/MOM_ALE.F90
+++ b/src/ALE/MOM_ALE.F90
@@ -1260,16 +1260,17 @@ end subroutine mask_near_bottom_vel
 !! h_dst must be dimensioned as a model array with GV%ke layers while h_src can
 !! have an arbitrary number of layers specified by nk_src.
 subroutine ALE_remap_scalar(CS, G, GV, nk_src, h_src, s_src, h_dst, s_dst, all_cells, old_remap, &
-                            answers_2018, answer_date )
+                            answers_2018, answer_date, h_neglect, h_neglect_edge)
   type(remapping_CS),                      intent(in)    :: CS        !< Remapping control structure
   type(ocean_grid_type),                   intent(in)    :: G         !< Ocean grid structure
   type(verticalGrid_type),                 intent(in)    :: GV        !< Ocean vertical grid structure
   integer,                                 intent(in)    :: nk_src    !< Number of levels on source grid
   real, dimension(SZI_(G),SZJ_(G),nk_src), intent(in)    :: h_src     !< Level thickness of source grid
-                                                                      !! [H ~> m or kg m-2]
+                                                                      !! [H ~> m or kg m-2] or other units
+                                                                      !! if H_neglect is provided
   real, dimension(SZI_(G),SZJ_(G),nk_src), intent(in)    :: s_src     !< Scalar on source grid, in arbitrary units [A]
-  real, dimension(SZI_(G),SZJ_(G),SZK_(GV)),intent(in)   :: h_dst     !< Level thickness of destination grid
-                                                                      !! [H ~> m or kg m-2]
+  real, dimension(SZI_(G),SZJ_(G),SZK_(GV)),intent(in)   :: h_dst     !< Level thickness of destination grid in the
+                                                                      !! same units as h_src, often [H ~> m or kg m-2]
   real, dimension(SZI_(G),SZJ_(G),SZK_(GV)),intent(inout) :: s_dst    !< Scalar on destination grid, in the same
                                                                       !! arbitrary units as s_src [A]
   logical, optional,                       intent(in)    :: all_cells !< If false, only reconstruct for
@@ -1283,10 +1284,16 @@ subroutine ALE_remap_scalar(CS, G, GV, nk_src, h_src, s_src, h_dst, s_dst, all_c
                                                                       !! use more robust forms of the same expressions.
   integer,                       optional, intent(in)    :: answer_date !< The vintage of the expressions to use
                                                                       !! for remapping
+  real,                          optional, intent(in)    :: h_neglect !< A negligibly small thickness used in
+                                                                      !! remapping cell reconstructions, in the same
+                                                                      !! units as h_src, often [H ~> m or kg m-2]
+  real,                          optional, intent(in)    :: h_neglect_edge !< A negligibly small thickness used in
+                                                                      !! remapping edge value calculations, in the same
+                                                                      !! units as h_src, often [H ~> m or kg m-2]
   ! Local variables
   integer :: i, j, k, n_points
   real :: dx(GV%ke+1) ! Change in interface position [H ~> m or kg m-2]
-  real :: h_neglect, h_neglect_edge  ! Tiny thicknesses used in remapping [H ~> m or kg m-2]
+  real :: h_neg, h_neg_edge  ! Tiny thicknesses used in remapping [H ~> m or kg m-2]
   logical :: ignore_vanished_layers, use_remapping_core_w, use_2018_remap
 
   ignore_vanished_layers = .false.
@@ -1297,12 +1304,17 @@ subroutine ALE_remap_scalar(CS, G, GV, nk_src, h_src, s_src, h_dst, s_dst, all_c
   use_2018_remap = .true. ; if (present(answers_2018)) use_2018_remap = answers_2018
   if (present(answer_date)) use_2018_remap = (answer_date < 20190101)
 
-  if (.not.use_2018_remap) then
-    h_neglect = GV%H_subroundoff ; h_neglect_edge = GV%H_subroundoff
-  elseif (GV%Boussinesq) then
-    h_neglect = GV%m_to_H*1.0e-30 ; h_neglect_edge = GV%m_to_H*1.0e-10
+  if (present(h_neglect)) then
+    h_neg = h_neglect
+    h_neg_edge = h_neg ; if (present(h_neglect_edge)) h_neg_edge = h_neglect_edge
   else
-    h_neglect = GV%kg_m2_to_H*1.0e-30 ; h_neglect_edge = GV%kg_m2_to_H*1.0e-10
+    if (.not.use_2018_remap) then
+      h_neg = GV%H_subroundoff ; h_neg_edge = GV%H_subroundoff
+    elseif (GV%Boussinesq) then
+      h_neg = GV%m_to_H*1.0e-30 ; h_neg_edge = GV%m_to_H*1.0e-10
+    else
+      h_neg = GV%kg_m2_to_H*1.0e-30 ; h_neg_edge = GV%kg_m2_to_H*1.0e-10
+    endif
   endif
 
   !$OMP parallel do default(shared) firstprivate(n_points,dx)
@@ -1318,10 +1330,10 @@ subroutine ALE_remap_scalar(CS, G, GV, nk_src, h_src, s_src, h_dst, s_dst, all_c
       if (use_remapping_core_w) then
         call dzFromH1H2( n_points, h_src(i,j,1:n_points), GV%ke, h_dst(i,j,:), dx )
         call remapping_core_w(CS, n_points, h_src(i,j,1:n_points), s_src(i,j,1:n_points), &
-                              GV%ke, dx, s_dst(i,j,:), h_neglect, h_neglect_edge)
+                              GV%ke, dx, s_dst(i,j,:), h_neg, h_neg_edge)
       else
         call remapping_core_h(CS, n_points, h_src(i,j,1:n_points), s_src(i,j,1:n_points), &
-                              GV%ke, h_dst(i,j,:), s_dst(i,j,:), h_neglect, h_neglect_edge)
+                              GV%ke, h_dst(i,j,:), s_dst(i,j,:), h_neg, h_neg_edge)
       endif
     else
       s_dst(i,j,:) = 0.
diff --git a/src/ALE/MOM_regridding.F90 b/src/ALE/MOM_regridding.F90
index 8ef0679358..904164c8e7 100644
--- a/src/ALE/MOM_regridding.F90
+++ b/src/ALE/MOM_regridding.F90
@@ -144,6 +144,7 @@ module MOM_regridding
 public getCoordinateResolution, getCoordinateInterfaces
 public getCoordinateUnits, getCoordinateShortName, getStaticThickness
 public DEFAULT_COORDINATE_MODE
+public set_h_neglect, set_dz_neglect
 public get_zlike_CS, get_sigma_CS, get_rho_CS
 
 !> Documentation for coordinate options
@@ -1416,13 +1417,7 @@ subroutine build_rho_grid( G, GV, US, h, nom_depth_H, tv, dzInterface, remapCS,
 #endif
   logical :: ice_shelf
 
-  if (CS%remap_answer_date >= 20190101) then
-    h_neglect = GV%H_subroundoff ; h_neglect_edge = GV%H_subroundoff
-  elseif (GV%Boussinesq) then
-    h_neglect = GV%m_to_H*1.0e-30 ; h_neglect_edge = GV%m_to_H*1.0e-10
-  else
-    h_neglect = GV%kg_m2_to_H*1.0e-30 ; h_neglect_edge = GV%kg_m2_to_H*1.0e-10
-  endif
+  h_neglect = set_h_neglect(GV, CS%remap_answer_date, h_neglect_edge)
 
   nz = GV%ke
   ice_shelf = present(frac_shelf_h)
@@ -1575,13 +1570,7 @@ subroutine build_grid_HyCOM1( G, GV, US, h, nom_depth_H, tv, h_new, dzInterface,
   real :: z_top_col, totalThickness
   logical :: ice_shelf
 
-  if (CS%remap_answer_date >= 20190101) then
-    h_neglect = GV%H_subroundoff ; h_neglect_edge = GV%H_subroundoff
-  elseif (GV%Boussinesq) then
-    h_neglect = GV%m_to_H*1.0e-30 ; h_neglect_edge = GV%m_to_H*1.0e-10
-  else
-    h_neglect = GV%kg_m2_to_H*1.0e-30 ; h_neglect_edge = GV%kg_m2_to_H*1.0e-10
-  endif
+  h_neglect = set_h_neglect(GV, CS%remap_answer_date, h_neglect_edge)
 
   if (.not.CS%target_density_set) call MOM_error(FATAL, "build_grid_HyCOM1 : "//&
         "Target densities must be set before build_grid_HyCOM1 is called.")
@@ -2095,6 +2084,49 @@ subroutine write_regrid_file( CS, GV, filepath )
 
 end subroutine write_regrid_file
 
+!> Set appropriate values for the negligible thicknesses used for remapping based on an answer date.
+function set_h_neglect(GV, remap_answer_date, h_neglect_edge) result(h_neglect)
+  type(verticalGrid_type), intent(in)  :: GV   !< Ocean vertical grid structure
+  integer,                 intent(in)  :: remap_answer_date !< The vintage of the expressions to use
+                                               !! for remapping.  Values below 20190101 recover the
+                                               !! remapping answers from 2018. Higher values use more
+                                               !! robust forms of the same remapping algorithms.
+  real,                    intent(out) :: h_neglect_edge !< A negligibly small thickness used in
+                                               !! remapping edge value calculations [H ~> m or kg m-2]
+  real                                 :: h_neglect !< A negligibly small thickness used in
+                                               !! remapping cell reconstructions [H ~> m or kg m-2]
+
+  if (remap_answer_date >= 20190101) then
+    h_neglect = GV%H_subroundoff ; h_neglect_edge = GV%H_subroundoff
+  elseif (GV%Boussinesq) then
+    h_neglect = GV%m_to_H*1.0e-30 ; h_neglect_edge = GV%m_to_H*1.0e-10
+  else
+    h_neglect = GV%kg_m2_to_H*1.0e-30 ; h_neglect_edge = GV%kg_m2_to_H*1.0e-10
+  endif
+end function set_h_neglect
+
+!> Set appropriate values for the negligible vertical layer extents used for remapping based on an answer date.
+function set_dz_neglect(GV, US, remap_answer_date, dz_neglect_edge) result(dz_neglect)
+  type(verticalGrid_type), intent(in)  :: GV   !< Ocean vertical grid structure
+  type(unit_scale_type),   intent(in)  :: US   !< A dimensional unit scaling type
+  integer,                 intent(in)  :: remap_answer_date !< The vintage of the expressions to use
+                                               !! for remapping.  Values below 20190101 recover the
+                                               !! remapping answers from 2018. Higher values use more
+                                               !! robust forms of the same remapping algorithms.
+  real,                    intent(out) :: dz_neglect_edge !< A negligibly small vertical layer extent
+                                               !! used in remapping edge value calculations [Z ~> m]
+  real                                 :: dz_neglect !< A negligibly small vertical layer extent
+                                               !! used in remapping cell reconstructions [Z ~> m]
+
+  if (remap_answer_date >= 20190101) then
+    dz_neglect = GV%dZ_subroundoff ; dz_neglect_edge = GV%dZ_subroundoff
+  elseif (GV%Boussinesq) then
+    dz_neglect = US%m_to_Z*1.0e-30 ; dz_neglect_edge = US%m_to_Z*1.0e-10
+  else
+    dz_neglect = GV%kg_m2_to_H * (GV%H_to_m*US%m_to_Z) * 1.0e-30
+    dz_neglect_edge = GV%kg_m2_to_H * (GV%H_to_m*US%m_to_Z) * 1.0e-10
+  endif
+end function set_dz_neglect
 
 !------------------------------------------------------------------------------
 !> Query the fixed resolution data
diff --git a/src/initialization/MOM_tracer_initialization_from_Z.F90 b/src/initialization/MOM_tracer_initialization_from_Z.F90
index 808430df2c..5a172b5d97 100644
--- a/src/initialization/MOM_tracer_initialization_from_Z.F90
+++ b/src/initialization/MOM_tracer_initialization_from_Z.F90
@@ -3,20 +3,21 @@ module MOM_tracer_initialization_from_Z
 
 ! This file is part of MOM6. See LICENSE.md for the license.
 
-use MOM_debugging, only : hchksum
-use MOM_cpu_clock, only : cpu_clock_id, cpu_clock_begin, cpu_clock_end
-use MOM_cpu_clock, only : CLOCK_ROUTINE, CLOCK_LOOP
-use MOM_domains, only : pass_var
+use MOM_debugging,     only : hchksum
+use MOM_cpu_clock,     only : cpu_clock_id, cpu_clock_begin, cpu_clock_end
+use MOM_cpu_clock,     only : CLOCK_ROUTINE, CLOCK_LOOP
+use MOM_domains,       only : pass_var
 use MOM_error_handler, only : MOM_mesg, MOM_error, FATAL, WARNING
 use MOM_error_handler, only : callTree_enter, callTree_leave, callTree_waypoint
-use MOM_file_parser, only : get_param, param_file_type, log_version
-use MOM_grid, only : ocean_grid_type
+use MOM_file_parser,   only : get_param, param_file_type, log_version
+use MOM_grid,          only : ocean_grid_type
 use MOM_horizontal_regridding, only : myStats, horiz_interp_and_extrap_tracer
 use MOM_interface_heights, only : dz_to_thickness_simple
-use MOM_remapping, only : remapping_CS, initialize_remapping
-use MOM_unit_scaling, only : unit_scale_type
-use MOM_verticalGrid, only : verticalGrid_type
-use MOM_ALE, only : ALE_remap_scalar
+use MOM_regridding,    only : set_dz_neglect
+use MOM_remapping,     only : remapping_CS, initialize_remapping
+use MOM_unit_scaling,  only : unit_scale_type
+use MOM_verticalGrid,  only : verticalGrid_type
+use MOM_ALE,           only : ALE_remap_scalar
 
 implicit none ; private
 
@@ -36,12 +37,13 @@ module MOM_tracer_initialization_from_Z
 !> Initializes a tracer from a z-space data file, including any lateral regridding that is needed.
 subroutine MOM_initialize_tracer_from_Z(h, tr, G, GV, US, PF, src_file, src_var_nam, &
                           src_var_unit_conversion, src_var_record, homogenize, &
-                          useALEremapping, remappingScheme, src_var_gridspec )
+                          useALEremapping, remappingScheme, src_var_gridspec, h_in_Z_units )
   type(ocean_grid_type),      intent(inout) :: G   !< Ocean grid structure.
   type(verticalGrid_type),    intent(in)    :: GV  !< Ocean vertical grid structure.
   type(unit_scale_type),      intent(in)    :: US  !< A dimensional unit scaling type
   real, dimension(SZI_(G),SZJ_(G),SZK_(GV)), &
-                              intent(in)    :: h   !< Layer thickness [H ~> m or kg m-2].
+                              intent(in)    :: h   !< Layer thicknesses, in [H ~> m or kg m-2] or
+                                                   !! [Z ~> m] depending on the value of h_in_Z_units.
   real, dimension(:,:,:),     pointer       :: tr  !< Pointer to array to be initialized [CU ~> conc]
   type(param_file_type),      intent(in)    :: PF  !< parameter file
   character(len=*),           intent(in)    :: src_file !< source filename
@@ -54,12 +56,18 @@ subroutine MOM_initialize_tracer_from_Z(h, tr, G, GV, US, PF, src_file, src_var_
   character(len=*), optional, intent(in)    :: remappingScheme !< remapping scheme to use.
   character(len=*), optional, intent(in)    :: src_var_gridspec !< Source variable name in a gridspec file.
                                                                 !! This is not implemented yet.
+  logical,          optional, intent(in)    :: h_in_Z_units !< If present and true, the input grid
+                                                            !! thicknesses are in the units of height
+                                                            !! ([Z ~> m]) instead of the usual units of
+                                                            !! thicknesses ([H ~> m or kg m-2])
+
   ! Local variables
   real :: land_fill = 0.0  ! A value to use to replace missing values [CU ~> conc]
   real :: convert ! A conversion factor into the model's internal units [CU conc-1 ~> 1]
   integer            :: recnum
   character(len=64)  :: remapScheme
   logical            :: homog, useALE
+  logical            :: h_is_in_Z_units
 
   ! This include declares and sets the variable "version".
 # include "version_variable.h"
@@ -84,6 +92,10 @@ subroutine MOM_initialize_tracer_from_Z(h, tr, G, GV, US, PF, src_file, src_var_
   type(verticalGrid_type) :: GV_loc ! A temporary vertical grid structure
 
   real :: missing_value ! A value indicating that there is no valid input data at this point [CU ~> conc]
+  real :: dz_neglect              ! A negligibly small vertical layer extent used in
+                                  ! remapping cell reconstructions [Z ~> m]
+  real :: dz_neglect_edge         ! A negligibly small vertical layer extent used in
+                                  ! remapping edge value calculations [Z ~> m]
   integer :: nPoints    ! The number of valid input data points in a column
   integer :: id_clock_routine, id_clock_ALE
   integer :: default_answer_date  ! The default setting for the various ANSWER_DATE flags.
@@ -143,6 +155,8 @@ subroutine MOM_initialize_tracer_from_Z(h, tr, G, GV, US, PF, src_file, src_var_
   convert = 1.0
   if (PRESENT(src_var_unit_conversion)) convert = src_var_unit_conversion
 
+  h_is_in_Z_units = .false. ; if (present(h_in_Z_units)) h_is_in_Z_units = h_in_Z_units
+
   call horiz_interp_and_extrap_tracer(src_file, src_var_nam, recnum, &
             G, tr_z, mask_z, z_in, z_edges_in, missing_value, &
             scale=convert, homogenize=homog, m_to_Z=US%m_to_Z, answer_date=hor_regrid_answer_date)
@@ -185,12 +199,18 @@ subroutine MOM_initialize_tracer_from_Z(h, tr, G, GV, US, PF, src_file, src_var_
       dzSrc(i,j,:) = h1(:)
     enddo ; enddo
 
-    ! Equation of state data is not available, so a simpler rescaling will have to suffice,
-    ! but it might be problematic in non-Boussinesq mode.
-    GV_loc = GV ; GV_loc%ke = kd
-    call dz_to_thickness_simple(dzSrc, hSrc, G, GV_loc, US)
-
-    call ALE_remap_scalar(remapCS, G, GV, kd, hSrc, tr_z, h, tr, all_cells=.false., answer_date=remap_answer_date )
+    if (h_is_in_Z_units) then
+      dz_neglect = set_dz_neglect(GV, US, remap_answer_date, dz_neglect_edge)
+      call ALE_remap_scalar(remapCS, G, GV, kd, hSrc, tr_z, h, tr, all_cells=.false., answer_date=remap_answer_date, &
+                            H_neglect=dz_neglect, H_neglect_edge=dz_neglect_edge)
+    else
+      ! Equation of state data is not available, so a simpler rescaling will have to suffice,
+      ! but it might be problematic in non-Boussinesq mode.
+      GV_loc = GV ; GV_loc%ke = kd
+      call dz_to_thickness_simple(dzSrc, hSrc, G, GV_loc, US)
+
+      call ALE_remap_scalar(remapCS, G, GV, kd, hSrc, tr_z, h, tr, all_cells=.false., answer_date=remap_answer_date )
+    endif
 
     deallocate( hSrc )
     deallocate( dzSrc )

From e7a7a82ab33a339b124c0435a8005769417c321b Mon Sep 17 00:00:00 2001
From: Robert Hallberg <Robert.Hallberg@noaa.gov>
Date: Sat, 27 Jan 2024 13:10:18 -0500
Subject: [PATCH 14/17] (*)MOM_temp_salt_init_from_Z Z-unit tracer remap

  Revise MOM_temp_salt_initialize_from_Z in cases when Z_INIT_REMAP_GENERAL is
False to call ALE_remap_scalar with vertical layer extents (in Z units) rather
than layer thicknesses (in H units).  When in fully non-Boussinesq mode, this
same routine uses dz_to_thickness (using the full equation of state) rather than
dz_to_thickness_simple to initialize the layer thicknesses.  Boussinesq answers
are bitwise identical, but answers can change in some fully non-Boussinesq
cases.
---
 .../MOM_state_initialization.F90              | 36 ++++++++++++++-----
 1 file changed, 27 insertions(+), 9 deletions(-)

diff --git a/src/initialization/MOM_state_initialization.F90 b/src/initialization/MOM_state_initialization.F90
index 7dfced262b..855a6f2aa0 100644
--- a/src/initialization/MOM_state_initialization.F90
+++ b/src/initialization/MOM_state_initialization.F90
@@ -92,6 +92,7 @@ module MOM_state_initialization
 use MOM_ALE, only : ALE_remap_scalar, ALE_regrid_accelerated, TS_PLM_edge_values
 use MOM_regridding, only : regridding_CS, set_regrid_params, getCoordinateResolution
 use MOM_regridding, only : regridding_main, regridding_preadjust_reqs, convective_adjustment
+use MOM_regridding, only : set_dz_neglect
 use MOM_remapping, only : remapping_CS, initialize_remapping, remapping_core_h
 use MOM_horizontal_regridding, only : horiz_interp_and_extrap_tracer, homogenize_field
 use MOM_oda_incupd, only: oda_incupd_CS, initialize_oda_incupd_fixed, initialize_oda_incupd
@@ -2483,6 +2484,10 @@ subroutine MOM_temp_salt_initialize_from_Z(h, tv, depth_tot, G, GV, US, PF, just
   real, dimension(:,:,:), allocatable :: h1  ! Thicknesses on the input grid [H ~> m or kg m-2].
   real, dimension(:,:,:), allocatable :: dz_interface ! Change in position of interface due to
                                     ! regridding [H ~> m or kg m-2]
+  real :: dz_neglect                ! A negligibly small vertical layer extent used in
+                                    ! remapping cell reconstructions [Z ~> m]
+  real :: dz_neglect_edge           ! A negligibly small vertical layer extent used in
+                                    ! remapping edge value calculations [Z ~> m]
   real :: zTopOfCell, zBottomOfCell ! Heights in Z units [Z ~> m].
   type(regridding_CS) :: regridCS ! Regridding parameters and work arrays
   type(remapping_CS) :: remapCS ! Remapping parameters and work arrays
@@ -2768,6 +2773,11 @@ subroutine MOM_temp_salt_initialize_from_Z(h, tv, depth_tot, G, GV, US, PF, just
                             frac_shelf_h=frac_shelf_h )
 
       deallocate( dz_interface )
+
+      call ALE_remap_scalar(remapCS, G, GV, nkd, h1, tmpT1dIn, h, tv%T, all_cells=remap_full_column, &
+                            old_remap=remap_old_alg, answer_date=remap_answer_date )
+      call ALE_remap_scalar(remapCS, G, GV, nkd, h1, tmpS1dIn, h, tv%S, all_cells=remap_full_column, &
+                            old_remap=remap_old_alg, answer_date=remap_answer_date )
     else
       ! This is the old way of initializing to z* coordinates only
       allocate( hTarget(nz) )
@@ -2788,16 +2798,24 @@ subroutine MOM_temp_salt_initialize_from_Z(h, tv, depth_tot, G, GV, US, PF, just
       enddo ; enddo
       deallocate( hTarget )
 
-      ! This is a simple conversion of the target grid to thickness units that may not be
-      ! appropriate in non-Boussinesq mode.
-      call dz_to_thickness_simple(dz, h, G, GV, US)
+      dz_neglect = set_dz_neglect(GV, US, remap_answer_date, dz_neglect_edge)
+      call ALE_remap_scalar(remapCS, G, GV, nkd, dz1, tmpT1dIn, dz, tv%T, all_cells=remap_full_column, &
+                            old_remap=remap_old_alg, answer_date=remap_answer_date, &
+                            H_neglect=dz_neglect, H_neglect_edge=dz_neglect_edge)
+      call ALE_remap_scalar(remapCS, G, GV, nkd, dz1, tmpS1dIn, dz, tv%S, all_cells=remap_full_column, &
+                            old_remap=remap_old_alg, answer_date=remap_answer_date, &
+                            H_neglect=dz_neglect, H_neglect_edge=dz_neglect_edge)
+
+      if (GV%Boussinesq .or. GV%semi_Boussinesq) then
+        ! This is a simple conversion of the target grid to thickness units that is not
+        ! appropriate in non-Boussinesq mode.
+        call dz_to_thickness_simple(dz, h, G, GV, US)
+      else
+        ! Convert dz into thicknesses in units of H using the equation of state as appropriate.
+        call dz_to_thickness(dz, tv, h, G, GV, US)
+      endif
     endif
 
-    call ALE_remap_scalar(remapCS, G, GV, nkd, h1, tmpT1dIn, h, tv%T, all_cells=remap_full_column, &
-                          old_remap=remap_old_alg, answer_date=remap_answer_date )
-    call ALE_remap_scalar(remapCS, G, GV, nkd, h1, tmpS1dIn, h, tv%S, all_cells=remap_full_column, &
-                          old_remap=remap_old_alg, answer_date=remap_answer_date )
-
     deallocate( dz1 )
     deallocate( h1 )
     deallocate( tmpT1dIn )
@@ -2879,7 +2897,7 @@ subroutine MOM_temp_salt_initialize_from_Z(h, tv, depth_tot, G, GV, US, PF, just
                                  ks, G, GV, US, PF, just_read)
     endif
 
-    ! Now convert thicknesses to units of H.
+    ! Now convert dz into thicknesses in units of H.
     call dz_to_thickness(dz, tv, h, G, GV, US)
 
   endif ! useALEremapping

From 9a6ddee4787192b41da90ff803ae29d54e577d54 Mon Sep 17 00:00:00 2001
From: Robert Hallberg <Robert.Hallberg@noaa.gov>
Date: Sun, 28 Jan 2024 10:16:54 -0500
Subject: [PATCH 15/17] *+non-Boussinesq revisions to MOM_generic_tracer

  Revised initialize_MOM_generic_tracer to use thickness_to_dz to get the layer
vertical extents and then provide these to MOM_initialize_tracer_from_Z to read
in initial generic tracer concentrations from Z-space files.  The previous
approach inappropriately used an simple multiplicative rescaling (via a call to
dz_to_thickness_simple in MOM_initialize_tracer_from_Z) by a factor that
includes the Boussinesq reference density when in non-Boussinesq mode.  A new
thermo_vars_type arguments was added to initialize_MOM_generic_tracer to allow
for this change.

  Also revised MOM_generic_tracer_column_physics to use thickness_to_dz instead
of a simple multiplicative rescaling to get the layer vertical extents (in m)
that are used in calls to generic_tracer_source.  The multiplicative factor that
was used previously (GV%H_to_m) includes the Boussinesq reference density and
hence is inappropriate in non-Boussinesq mode; using thickness_to_dz avoids
this.

  Also added comments documenting the meaning and units of about 30 real
variables in the MOM_generic_tracer routines.

  There is a new mandatory argument to initialize_MOM_generic_tracer.  All
Boussinseq mode answers are bitwise identical, but in non-Boussinesq mode mode
generic tracer answers are changed by avoiding the use of the Boussinesq
reference density in several places.
---
 src/tracer/MOM_generic_tracer.F90      | 144 ++++++++++++++-----------
 src/tracer/MOM_tracer_flow_control.F90 |   2 +-
 2 files changed, 84 insertions(+), 62 deletions(-)

diff --git a/src/tracer/MOM_generic_tracer.F90 b/src/tracer/MOM_generic_tracer.F90
index 131110e6b2..7f550d8de5 100644
--- a/src/tracer/MOM_generic_tracer.F90
+++ b/src/tracer/MOM_generic_tracer.F90
@@ -38,6 +38,7 @@ module MOM_generic_tracer
   use MOM_forcing_type, only : forcing, optics_type
   use MOM_grid, only : ocean_grid_type
   use MOM_hor_index, only : hor_index_type
+  use MOM_interface_heights, only : thickness_to_dz
   use MOM_io, only : file_exists, MOM_read_data, slasher
   use MOM_open_boundary, only : ocean_OBC_type
   use MOM_open_boundary, only : register_obgc_segments, fill_obgc_segments
@@ -75,8 +76,10 @@ module MOM_generic_tracer
     character(len = 200) :: IC_file !< The file in which the generic tracer initial values can
                                     !! be found, or an empty string for internal initialization.
     logical :: Z_IC_file !< If true, the generic_tracer IC_file is in Z-space.  The default is false.
-    real :: tracer_IC_val = 0.0    !< The initial value assigned to tracers.
-    real :: tracer_land_val = -1.0 !< The values of tracers used where  land is masked out.
+    real :: tracer_IC_val = 0.0    !< The initial value assigned to tracers, in
+                                   !! concentration units [conc]
+    real :: tracer_land_val = -1.0 !< The values of tracers used where land is masked out, in
+                                   !! concentration units [conc]
     logical :: tracers_may_reinit  !< If true, tracers may go through the
                                    !! initialization code if they are not found in the restart files.
 
@@ -102,6 +105,7 @@ function register_MOM_generic_tracer(HI, GV, param_file, CS, tr_Reg, restart_CS)
     type(tracer_registry_type), pointer      :: tr_Reg     !< Pointer to the control structure for the tracer
                                                            !! advection and diffusion module.
     type(MOM_restart_CS), target, intent(inout)  :: restart_CS !< MOM restart control struct
+
     ! Local variables
     logical :: register_MOM_generic_tracer
     logical :: obc_has
@@ -113,14 +117,17 @@ function register_MOM_generic_tracer(HI, GV, param_file, CS, tr_Reg, restart_CS)
     ! These can be overridden later in via the field manager?
 
     integer :: ntau, axes(3)
-    type(g_tracer_type), pointer      :: g_tracer,g_tracer_next
-    character(len=fm_string_len)      :: g_tracer_name,longname,units
-    character(len=fm_string_len)      :: obc_src_file_name,obc_src_field_name
-    real                              :: lfac_in,lfac_out
-    real, dimension(:,:,:,:), pointer   :: tr_field
-    real, dimension(:,:,:), pointer     :: tr_ptr
-    real, dimension(HI%isd:HI%ied, HI%jsd:HI%jed,GV%ke)         :: grid_tmask
-    integer, dimension(HI%isd:HI%ied, HI%jsd:HI%jed)           :: grid_kmt
+    type(g_tracer_type), pointer      :: g_tracer, g_tracer_next
+    character(len=fm_string_len)      :: g_tracer_name, longname,units
+    character(len=fm_string_len)      :: obc_src_file_name, obc_src_field_name
+    real :: lfac_in   ! Multiplicative factor used in setting the tracer-specific inverse length
+                      ! scales associated with inflowing tracer reservoirs at OBCs [nondim]
+    real :: lfac_out  ! Multiplicative factor used in setting the tracer-specific inverse length
+                      ! scales associated with outflowing tracer reservoirs at OBCs [nondim]
+    real, dimension(:,:,:,:), pointer   :: tr_field ! A pointer to a generic tracer field, in concentration units [conc]
+    real, dimension(:,:,:), pointer     :: tr_ptr   ! A pointer to a generic tracer field, in concentration units [conc]
+    real,    dimension(SZI_(HI),SZJ_(HI),SZK_(GV)) :: grid_tmask ! A 3-d copy of G%mask2dT [nondim]
+    integer, dimension(SZI_(HI),SZJ_(HI))          :: grid_kmt   ! A 2-d array of nk
 
     register_MOM_generic_tracer = .false.
     if (associated(CS)) then
@@ -141,7 +148,7 @@ function register_MOM_generic_tracer(HI, GV, param_file, CS, tr_Reg, restart_CS)
   ! Read all relevant parameters and write them to the model log.
     call log_version(param_file, sub_name, version, "")
     call get_param(param_file, sub_name, "GENERIC_TRACER_IC_FILE", CS%IC_file, &
-                 "The file in which the generic trcer initial values can "//&
+                 "The file in which the generic tracer initial values can "//&
                  "be found, or an empty string for internal initialization.", &
                  default=" ")
     if ((len_trim(CS%IC_file) > 0) .and. (scan(CS%IC_file,'/') == 0)) then
@@ -169,7 +176,7 @@ function register_MOM_generic_tracer(HI, GV, param_file, CS, tr_Reg, restart_CS)
 
     !Fields cannot be diag registered as they are allocated and have to registered later.
     grid_tmask(:,:,:) = 0.0
-    grid_kmt(:,:) = 0.0
+    grid_kmt(:,:) = 0
     axes(:) = -1
 
     !
@@ -222,23 +229,26 @@ end function register_MOM_generic_tracer
 
   !> Register OBC segments for generic tracers
   subroutine register_MOM_generic_tracer_segments(CS, GV, OBC, tr_Reg, param_file)
-    type(MOM_generic_tracer_CS),           pointer    :: CS      !< Pointer to the control structure for this module.
-    type(verticalGrid_type),    intent(in)   :: GV         !< The ocean's vertical grid structure
-    type(ocean_OBC_type),       pointer      :: OBC        !< This open boundary condition type specifies whether,
-                                                           !! where, and what open boundary conditions are used.
-    type(tracer_registry_type), pointer      :: tr_Reg     !< Pointer to the control structure for the tracer
-                                                           !! advection and diffusion module.
-    type(param_file_type),      intent(in)   :: param_file !< A structure to parse for run-time parameters
+    type(MOM_generic_tracer_CS), pointer    :: CS         !< Pointer to the control structure for this module.
+    type(verticalGrid_type),     intent(in) :: GV         !< The ocean's vertical grid structure
+    type(ocean_OBC_type),        pointer    :: OBC        !< This open boundary condition type specifies whether,
+                                                          !! where, and what open boundary conditions are used.
+    type(tracer_registry_type),  pointer    :: tr_Reg     !< Pointer to the control structure for the tracer
+                                                          !! advection and diffusion module.
+    type(param_file_type),       intent(in) :: param_file !< A structure to parse for run-time parameters
+
     ! Local variables
     logical :: obc_has
     ! This include declares and sets the variable "version".
 #   include "version_variable.h"
-
     character(len=128), parameter :: sub_name = 'register_MOM_generic_tracer_segments'
     type(g_tracer_type), pointer      :: g_tracer,g_tracer_next
     character(len=fm_string_len)      :: g_tracer_name
-    character(len=fm_string_len)      :: obc_src_file_name,obc_src_field_name
-    real                              :: lfac_in,lfac_out
+    character(len=fm_string_len)      :: obc_src_file_name, obc_src_field_name
+    real :: lfac_in   ! Multiplicative factor used in setting the tracer-specific inverse length
+                      ! scales associated with inflowing tracer reservoirs at OBCs [nondim]
+    real :: lfac_out  ! Multiplicative factor used in setting the tracer-specific inverse length
+                      ! scales associated with outflowing tracer reservoirs at OBCs [nondim]
 
     if (.NOT. associated(OBC)) return
     !Get the tracer list
@@ -266,6 +276,7 @@ subroutine register_MOM_generic_tracer_segments(CS, GV, OBC, tr_Reg, param_file)
     enddo
 
   end subroutine register_MOM_generic_tracer_segments
+
   !>  Initialize phase II:  Initialize required variables for generic tracers
   !!  There are some steps of initialization that cannot be done in register_MOM_generic_tracer
   !!  This is the place and time to do them:
@@ -275,15 +286,17 @@ end subroutine register_MOM_generic_tracer_segments
   !!
   !!   This subroutine initializes the NTR tracer fields in tr(:,:,:,:)
   !! and it sets up the tracer output.
-  subroutine initialize_MOM_generic_tracer(restart, day, G, GV, US, h, param_file, diag, OBC, CS, &
-                                          sponge_CSp, ALE_sponge_CSp)
+  subroutine initialize_MOM_generic_tracer(restart, day, G, GV, US, h, tv, param_file, diag, OBC, &
+                                           CS, sponge_CSp, ALE_sponge_CSp)
     logical,                               intent(in) :: restart !< .true. if the fields have already been
                                                                  !! read from a restart file.
     type(time_type), target,               intent(in) :: day     !< Time of the start of the run.
     type(ocean_grid_type),                 intent(inout) :: G    !< The ocean's grid structure
     type(verticalGrid_type),               intent(in)    :: GV   !< The ocean's vertical grid structure
     type(unit_scale_type),                 intent(in)    :: US   !< A dimensional unit scaling type
-    real, dimension(SZI_(G),SZJ_(G),SZK_(GV)), intent(in) :: h    !< Layer thicknesses [H ~> m or kg m-2]
+    real, dimension(SZI_(G),SZJ_(G),SZK_(GV)), intent(in) :: h   !< Layer thicknesses [H ~> m or kg m-2]
+    type(thermo_var_ptrs),                 intent(in) :: tv      !< A structure pointing to various thermodynamic
+                                                                 !! variables
     type(param_file_type),                 intent(in) :: param_file !< A structure to parse for run-time parameters
     type(diag_ctrl),               target, intent(in) :: diag    !< Regulates diagnostic output.
     type(ocean_OBC_type),                  pointer    :: OBC     !< This open boundary condition type specifies whether,
@@ -298,10 +311,11 @@ subroutine initialize_MOM_generic_tracer(restart, day, G, GV, US, h, param_file,
     integer :: i, j, k, isc, iec, jsc, jec, nk
     type(g_tracer_type), pointer    :: g_tracer,g_tracer_next
     character(len=fm_string_len)      :: g_tracer_name
-    real, dimension(:,:,:,:), pointer   :: tr_field
-    real, dimension(:,:,:), pointer     :: tr_ptr
-    real,    dimension(G%isd:G%ied, G%jsd:G%jed, 1:GV%ke) :: grid_tmask
-    integer, dimension(G%isd:G%ied, G%jsd:G%jed)          :: grid_kmt
+    real, dimension(:,:,:,:), pointer   :: tr_field ! A pointer to a generic tracer field, in concentration units [conc]
+    real, dimension(:,:,:), pointer     :: tr_ptr   ! A pointer to a generic tracer field, in concentration units [conc]
+    real, dimension(SZI_(G),SZJ_(G),SZK_(GV)) :: dz ! Layer vertical extent [Z ~> m]
+    real,    dimension(SZI_(G),SZJ_(G),SZK_(GV)) :: grid_tmask ! A 3-d copy of G%mask2dT [nondim]
+    integer, dimension(SZI_(G),SZJ_(G))          :: grid_kmt   ! A 2-d array of nk
 
     !! 2010/02/04  Add code to re-initialize Generic Tracers if needed during a model simulation
     !! By default, restart cpio should not contain a Generic Tracer IC file and step below will be skipped.
@@ -316,6 +330,8 @@ subroutine initialize_MOM_generic_tracer(restart, day, G, GV, US, h, param_file,
     !For each tracer name get its  fields
     g_tracer=>CS%g_tracer_list
 
+    call thickness_to_dz(h, tv, dz, G, GV, US)
+
     do
       if (INDEX(CS%IC_file, '_NULL_') /= 0) then
         call MOM_error(WARNING, "The name of the IC_file "//trim(CS%IC_file)//&
@@ -335,12 +351,11 @@ subroutine initialize_MOM_generic_tracer(restart, day, G, GV, US, h, param_file,
                               "initializing generic tracer "//trim(g_tracer_name)//&
                               " using MOM_initialize_tracer_from_Z ")
 
-          call MOM_initialize_tracer_from_Z(h, tr_ptr, G, GV, US, param_file,               &
-                                 src_file = g_tracer%src_file,                              &
-                                 src_var_nam = g_tracer%src_var_name,                       &
-                                 src_var_unit_conversion = g_tracer%src_var_unit_conversion,&
-                                 src_var_record = g_tracer%src_var_record,                  &
-                                 src_var_gridspec = g_tracer%src_var_gridspec               )
+          call MOM_initialize_tracer_from_Z(dz, tr_ptr, G, GV, US, param_file, &
+                                 src_file=g_tracer%src_file, src_var_nam=g_tracer%src_var_name, &
+                                 src_var_unit_conversion=g_tracer%src_var_unit_conversion, &
+                                 src_var_record=g_tracer%src_var_record, src_var_gridspec=g_tracer%src_var_gridspec, &
+                                 h_in_Z_units=.true.)
 
           !Check/apply the bounds for each g_tracer
           do k=1,nk ; do j=jsc,jec ; do i=isc,iec
@@ -466,8 +481,9 @@ subroutine MOM_generic_tracer_column_physics(h_old, h_new, ea, eb, fluxes, Hml,
     type(MOM_generic_tracer_CS), pointer :: CS   !< Pointer to the control structure for this module.
     type(thermo_var_ptrs),   intent(in) :: tv    !< A structure pointing to various thermodynamic variables
     type(optics_type),       intent(in) :: optics !< The structure containing optical properties.
-    real,          optional, intent(in) :: evap_CFL_limit !< Limits how much water can be fluxed out of
-                                                 !! the top layer Stored previously in diabatic CS.
+    real,          optional, intent(in) :: evap_CFL_limit !< Limit on the fraction of the water that can
+                                                 !! be fluxed out of the top layer in a timestep [nondim]
+                                                 !   Stored previously in diabatic CS.
     real,          optional, intent(in) :: minimum_forcing_depth !< The smallest depth over which fluxes
                                                  !!  can be applied [H ~> m or kg m-2]
                                                  !   Stored previously in diabatic CS.
@@ -479,14 +495,17 @@ subroutine MOM_generic_tracer_column_physics(h_old, h_new, ea, eb, fluxes, Hml,
 
     type(g_tracer_type), pointer  :: g_tracer, g_tracer_next
     character(len=fm_string_len)  :: g_tracer_name
-    real, dimension(:,:), pointer :: stf_array,trunoff_array,runoff_tracer_flux_array
+    real, dimension(:,:), pointer :: stf_array   ! The surface flux of the tracer [conc kg m-2 s-1]
+    real, dimension(:,:), pointer :: trunoff_array  ! The tracer concentration in the river runoff [conc]
+    real, dimension(:,:), pointer :: runoff_tracer_flux_array ! The runoff tracer flux [conc kg m-2 s-1]
 
-    real :: surface_field(SZI_(G),SZJ_(G))
+    real :: surface_field(SZI_(G),SZJ_(G))  ! The surface value of some field, here only used for salinity [S ~> ppt]
     real :: dz_ml(SZI_(G),SZJ_(G))  ! The mixed layer depth in the MKS units used for generic tracers [m]
-    real :: sosga
+    real :: sosga ! The global mean surface salinity [ppt]
 
-    real, dimension(G%isd:G%ied,G%jsd:G%jed,GV%ke) :: rho_dzt, dzt
-    real, dimension(SZI_(G),SZJ_(G),SZK_(GV))      :: h_work
+    real, dimension(SZI_(G),SZJ_(G),SZK_(GV)) :: rho_dzt ! Layer mass per unit area [kg m-2]
+    real, dimension(SZI_(G),SZJ_(G),SZK_(GV)) :: dzt     ! Layer vertical extents [m]
+    real, dimension(SZI_(G),SZJ_(G),SZK_(GV)) :: h_work  ! A work array of thicknesses [H ~> m or kg m-2]
     integer :: i, j, k, isc, iec, jsc, jec, nk
 
     isc = G%isc ; iec = G%iec ; jsc = G%jsc ; jec = G%jec ; nk = GV%ke
@@ -536,14 +555,15 @@ subroutine MOM_generic_tracer_column_physics(h_old, h_new, ea, eb, fluxes, Hml,
     !
 
     rho_dzt(:,:,:) = GV%H_to_kg_m2 * GV%Angstrom_H
-    do k = 1, nk ; do j = jsc, jec ; do i = isc, iec  !{
+    do k=1,nk ; do j=jsc,jec ; do i=isc,iec
       rho_dzt(i,j,k) = GV%H_to_kg_m2 * h_old(i,j,k)
-    enddo ; enddo ; enddo !}
+    enddo ; enddo ; enddo
 
     dzt(:,:,:) = 1.0
-    do k = 1, nk ; do j = jsc, jec ; do i = isc, iec  !{
-      dzt(i,j,k) = GV%H_to_m * h_old(i,j,k)
-    enddo ; enddo ; enddo !}
+    call thickness_to_dz(h_old, tv, dzt, G, GV, US)
+    do k=1,nk ; do j=jsc,jec ; do i=isc,iec
+      dzt(i,j,k) = US%Z_to_m * dzt(i,j,k)
+    enddo ; enddo ; enddo
     dz_ml(:,:) = 0.0
     do j=jsc,jec ; do i=isc,iec
       surface_field(i,j) = tv%S(i,j,1)
@@ -639,8 +659,8 @@ function MOM_generic_tracer_stock(h, stocks, G, GV, CS, names, units, stock_inde
 
     ! Local variables
     type(g_tracer_type), pointer  :: g_tracer, g_tracer_next
-    real, dimension(:,:,:,:), pointer   :: tr_field
-    real, dimension(:,:,:), pointer     :: tr_ptr
+    real, dimension(:,:,:,:), pointer   :: tr_field ! A pointer to a generic tracer field, in concentration units [conc]
+    real, dimension(:,:,:), pointer     :: tr_ptr   ! A pointer to a generic tracer field, in concentration units [conc]
     character(len=128), parameter :: sub_name = 'MOM_generic_tracer_stock'
 
     integer :: m
@@ -802,7 +822,7 @@ subroutine array_global_min_max(tr_array, tmask, isd, jsd, isc, iec, jsc, jec, n
     real    :: tmax, tmin   ! Maximum and minimum tracer values, in the same units as tr_array
     real    :: tmax0, tmin0 ! First-guest values of tmax and tmin.
     integer :: itmax, jtmax, ktmax, itmin, jtmin, ktmin
-    real    :: fudge ! A factor that is close to 1 that is used to find the location of the extrema.
+    real    :: fudge ! A factor that is close to 1 that is used to find the location of the extrema [nondim].
 
      ! arrays to enable vectorization
     integer :: iminarr(3), imaxarr(3)
@@ -853,7 +873,7 @@ subroutine array_global_min_max(tr_array, tmask, isd, jsd, isc, iec, jsc, jec, n
 
     ! Now find the location of the global extrema.
     !
-    ! Note that the fudge factor above guarantees that the location of max (min) is uinque,
+    ! Note that the fudge factor above guarantees that the location of max (min) is unique,
     ! since tmax0 (tmin0) has slightly different values on each processor.
     ! Otherwise, the function tr_array(i,j,k) could be equal to global max (min) at more
     ! than one point in space and this would be a much more difficult problem to solve.
@@ -899,16 +919,16 @@ subroutine MOM_generic_tracer_surface_state(sfc_state, h, G, GV, CS)
     real, dimension(SZI_(G),SZJ_(G),SZK_(GV)), intent(in) :: h    !< Layer thicknesses [H ~> m or kg m-2]
     type(MOM_generic_tracer_CS),           pointer       :: CS   !< Pointer to the control structure for this module.
 
-! Local variables
-    real :: sosga
+    ! Local variables
+    real :: sosga ! The global mean surface salinity [ppt]
+    real, dimension(SZI_(G),SZJ_(G),SZK_(GV),1) :: rho0 ! An unused array of densities [kg m-3]
+    real, dimension(SZI_(G),SZJ_(G),SZK_(GV))   :: dzt  ! Layer vertical extents [m]
 
     character(len=128), parameter :: sub_name = 'MOM_generic_tracer_surface_state'
-    real, dimension(G%isd:G%ied,G%jsd:G%jed,1:GV%ke,1) :: rho0
-    real, dimension(G%isd:G%ied,G%jsd:G%jed,1:GV%ke) ::  dzt
 
     !Set coupler values
     !nnz: fake rho0
-    rho0=1.0
+    rho0(:,:,:,:) = 1.0
 
     dzt(:,:,:) = GV%H_to_m * h(:,:,:)
 
@@ -937,7 +957,7 @@ subroutine MOM_generic_tracer_surface_state(sfc_state, h, G, GV, CS)
     !Niki: The problem with calling diagnostic outputs here is that this subroutine is called every dt_cpld
     !      hence if dt_therm > dt_cpld we get output (and contribution to the mean) at times that tracers
     !      had not been updated.
-    !      Moving this to the end of column physics subrotuine fixes this issue.
+    !      Moving this to the end of column physics subroutine fixes this issue.
 
   end subroutine MOM_generic_tracer_surface_state
 
@@ -976,7 +996,7 @@ end subroutine MOM_generic_flux_init
   subroutine MOM_generic_tracer_fluxes_accumulate(flux_tmp, weight)
     type(forcing), intent(in)    :: flux_tmp  !< A structure containing pointers to
                                               !! thermodynamic and tracer forcing fields.
-    real,          intent(in)    :: weight    !< A weight for accumulating this flux
+    real,          intent(in)    :: weight    !< A weight for accumulating this flux [nondim]
 
     call generic_tracer_coupler_accumulate(flux_tmp%tr_fluxes, weight)
 
@@ -986,10 +1006,12 @@ end subroutine MOM_generic_tracer_fluxes_accumulate
   subroutine MOM_generic_tracer_get(name,member,array, CS)
     character(len=*),         intent(in)  :: name   !< Name of requested tracer.
     character(len=*),         intent(in)  :: member !< The tracer element to return.
-    real, dimension(:,:,:),   intent(out) :: array  !< Array filled by this routine.
-    type(MOM_generic_tracer_CS), pointer :: CS   !< Pointer to the control structure for this module.
+    real, dimension(:,:,:),   intent(out) :: array  !< Array filled by this routine, in arbitrary units [A]
+    type(MOM_generic_tracer_CS), pointer  :: CS     !< Pointer to the control structure for this module.
 
-    real, dimension(:,:,:),   pointer :: array_ptr
+    ! Local variables
+    real, dimension(:,:,:),   pointer :: array_ptr  ! The tracer in the generic tracer structures, in
+                                                    ! arbitrary units [A]
     character(len=128), parameter :: sub_name = 'MOM_generic_tracer_get'
 
     call g_tracer_get_pointer(CS%g_tracer_list,name,member,array_ptr)
diff --git a/src/tracer/MOM_tracer_flow_control.F90 b/src/tracer/MOM_tracer_flow_control.F90
index c8ce2f5f75..6d035e1d27 100644
--- a/src/tracer/MOM_tracer_flow_control.F90
+++ b/src/tracer/MOM_tracer_flow_control.F90
@@ -340,7 +340,7 @@ subroutine tracer_flow_control_init(restart, day, G, GV, US, h, param_file, diag
     call initialize_CFC_cap(restart, day, G, GV, US, h, diag, OBC, CS%CFC_cap_CSp)
 
   if (CS%use_MOM_generic_tracer) &
-    call initialize_MOM_generic_tracer(restart, day, G, GV, US, h, param_file, diag, OBC, &
+    call initialize_MOM_generic_tracer(restart, day, G, GV, US, h, tv, param_file, diag, OBC, &
                                 CS%MOM_generic_tracer_CSp, sponge_CSp, ALE_sponge_CSp)
   if (CS%use_pseudo_salt_tracer) &
     call initialize_pseudo_salt_tracer(restart, day, G, GV, US, h, diag, OBC, CS%pseudo_salt_tracer_CSp, &

From ea12532d8ef9fb41a0f663c3b84e49eedcf95c21 Mon Sep 17 00:00:00 2001
From: William Cooke <William.Cooke@noaa.gov>
Date: Thu, 1 Feb 2024 13:59:24 -0500
Subject: [PATCH 16/17] Fix issue with restart filenames when using ensembles.

Moved call for filename_appendix outside of loop.

Fixes issue where restart filename was
./MOM.res.ens_01.nc
./MOM.res.ens_01.ens_01_1.nc
./MOM.res.ens_01.ens_01.ens_01_2.nc
./MOM.res.ens_01.ens_01.ens_01.ens_01_3.nc

instead of
./MOM.res.ens_01.nc
./MOM.res.ens_01_1.nc
./MOM.res.ens_01_2.nc
./MOM.res.ens_01_3.nc
---
 src/framework/MOM_restart.F90 | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/framework/MOM_restart.F90 b/src/framework/MOM_restart.F90
index 188cfbb2ec..06f4abc065 100644
--- a/src/framework/MOM_restart.F90
+++ b/src/framework/MOM_restart.F90
@@ -1406,6 +1406,17 @@ subroutine save_restart(directory, time, G, CS, time_stamped, filename, GV, num_
     restartname = trim(CS%restartfile)//trim(restartname)
   endif ; endif
 
+  ! Determine if there is a filename_appendix (used for ensemble runs).
+  call get_filename_appendix(filename_appendix)
+  if (len_trim(filename_appendix) > 0) then
+    length = len_trim(restartname)
+    if (restartname(length-2:length) == '.nc') then
+      restartname = restartname(1:length-3)//'.'//trim(filename_appendix)//'.nc'
+    else
+      restartname = restartname(1:length)  //'.'//trim(filename_appendix)
+    endif
+  endif
+
   next_var = 1
   do while (next_var <= CS%novars )
     start_var = next_var
@@ -1430,17 +1441,6 @@ subroutine save_restart(directory, time, G, CS, time_stamped, filename, GV, num_
     enddo
     next_var = m
 
-    ! Determine if there is a filename_appendix (used for ensemble runs).
-    call get_filename_appendix(filename_appendix)
-    if (len_trim(filename_appendix) > 0) then
-      length = len_trim(restartname)
-      if (restartname(length-2:length) == '.nc') then
-        restartname = restartname(1:length-3)//'.'//trim(filename_appendix)//'.nc'
-      else
-        restartname = restartname(1:length)  //'.'//trim(filename_appendix)
-      endif
-    endif
-
     restartpath = trim(directory) // trim(restartname)
 
     if (num_files < 10) then

From 9282cd100b293e5140ef557597d23d192f910dde Mon Sep 17 00:00:00 2001
From: Robert Hallberg <Robert.Hallberg@noaa.gov>
Date: Fri, 19 Jan 2024 14:51:45 -0500
Subject: [PATCH 17/17] +(*)Bodner param with cuberoot and non-Boussinesq

  Use the new cuberoot function in the Bodner estimate of u'w' when the new
runtime parameter ML_RESTRAT_ANSWER_DATE is 20240201 or higher, which avoids the
need to undo and redo the dimensional scaling in this calculation.  In addition,
the refactoring associated with this change explicitly revealed that as it was
implemented it introduced an undesirable dependency on the value of the
Boussinesq reference density, RHO_0, when in non-Boussinesq mode.  To avoid
this, a new version of the Bodner u'w' calculation was introduced in fully
non-Boussinesq mode, which does change answers with this combination; because
there is not yet a known case that used this combination, we have chosen not to
add a runtime parameter to preserve the old answers when the Bodner
parameterization is used in fully-Boussinesq mode.  This change will modify the
contents of some MOM_parameter_doc files with USE_BODNER=True, and it changes
answers in cases that are also fully non-Boussinesq.  The new runtime parameter
ML_RESTRAT_ANSWER_DATE might need to be set below 20240201 to retain some
existing Boussinesq answers.
---
 .../lateral/MOM_mixed_layer_restrat.F90       | 99 ++++++++++++++-----
 1 file changed, 74 insertions(+), 25 deletions(-)

diff --git a/src/parameterizations/lateral/MOM_mixed_layer_restrat.F90 b/src/parameterizations/lateral/MOM_mixed_layer_restrat.F90
index 36a83cd43a..c10a55309b 100644
--- a/src/parameterizations/lateral/MOM_mixed_layer_restrat.F90
+++ b/src/parameterizations/lateral/MOM_mixed_layer_restrat.F90
@@ -14,6 +14,7 @@ module MOM_mixed_layer_restrat
 use MOM_forcing_type,  only : mech_forcing, find_ustar
 use MOM_grid,          only : ocean_grid_type
 use MOM_hor_index,     only : hor_index_type
+use MOM_intrinsic_functions, only : cuberoot
 use MOM_lateral_mixing_coeffs, only : VarMix_CS
 use MOM_restart,       only : register_restart_field, query_initialized, MOM_restart_CS
 use MOM_unit_scaling,  only : unit_scale_type
@@ -67,7 +68,7 @@ module MOM_mixed_layer_restrat
   real    :: nstar                 !< The n* value used to estimate the turbulent vertical momentum flux [nondim]
   real    :: min_wstar2            !< The minimum lower bound to apply to the vertical momentum flux,
                                    !! w'u', in the Bodner et al., restratification parameterization
-                                   !! [m2 s-2].  This avoids a division-by-zero in the limit when u*
+                                   !! [Z2 T-2 ~> m2 s-2].  This avoids a division-by-zero in the limit when u*
                                    !! and the buoyancy flux are zero.
   real    :: BLD_growing_Tfilt     !< The time-scale for a running-mean filter applied to the boundary layer
                                    !! depth (BLD) when the BLD is deeper than the running mean [T ~> s].
@@ -81,6 +82,11 @@ module MOM_mixed_layer_restrat
   real    :: MLD_growing_Tfilt     !< The time-scale for a running-mean filter applied to the time-filtered
                                    !! MLD, when the latter is deeper than the running mean [T ~> s].
                                    !! A value of 0 instantaneously sets the running mean to the current value of MLD.
+  integer :: answer_date           !< The vintage of the order of arithmetic and expressions in the
+                                   !! mixed layer restrat calculations.  Values below 20240201 recover
+                                   !! the answers from the end of 2023, while higher values use the new
+                                   !! cuberoot function in the Bodner code to avoid needing to undo
+                                   !! dimensional rescaling.
 
   logical :: debug = .false.       !< If true, calculate checksums of fields for debugging.
 
@@ -279,7 +285,7 @@ subroutine mixedlayer_restrat_OM4(h, uhtr, vhtr, tv, forces, dt, MLD_in, VarMix,
     !! TODO: use derivatives and mid-MLD pressure. Currently this is sigma-0. -AJA
     pRef_MLD(:) = 0.
     EOSdom(:) = EOS_domain(G%HI, halo=1)
-    do j = js-1, je+1
+    do j=js-1,je+1
       dK(:) = 0.5 * h(:,j,1) ! Depth of center of surface layer
       if (CS%use_Stanley_ML) then
         call calculate_density(tv%T(:,j,1), tv%S(:,j,1), pRef_MLD, tv%varT(:,j,1), covTS, varS, &
@@ -289,7 +295,7 @@ subroutine mixedlayer_restrat_OM4(h, uhtr, vhtr, tv, forces, dt, MLD_in, VarMix,
       endif
       deltaRhoAtK(:) = 0.
       MLD_fast(:,j) = 0.
-      do k = 2, nz
+      do k=2,nz
         dKm1(:) = dK(:) ! Depth of center of layer K-1
         dK(:) = dK(:) + 0.5 * ( h(:,j,k) + h(:,j,k-1) ) ! Depth of center of layer K
         ! Mixed-layer depth, using sigma-0 (surface reference pressure)
@@ -300,10 +306,10 @@ subroutine mixedlayer_restrat_OM4(h, uhtr, vhtr, tv, forces, dt, MLD_in, VarMix,
         else
           call calculate_density(tv%T(:,j,k), tv%S(:,j,k), pRef_MLD, deltaRhoAtK, tv%eqn_of_state, EOSdom)
         endif
-        do i = is-1,ie+1
+        do i=is-1,ie+1
           deltaRhoAtK(i) = deltaRhoAtK(i) - rhoSurf(i) ! Density difference between layer K and surface
         enddo
-        do i = is-1, ie+1
+        do i=is-1,ie+1
           ddRho = deltaRhoAtK(i) - deltaRhoAtKm1(i)
           if ((MLD_fast(i,j)==0.) .and. (ddRho>0.) .and. &
               (deltaRhoAtKm1(i)<CS%MLE_density_diff) .and. (deltaRhoAtK(i)>=CS%MLE_density_diff)) then
@@ -312,7 +318,7 @@ subroutine mixedlayer_restrat_OM4(h, uhtr, vhtr, tv, forces, dt, MLD_in, VarMix,
           endif
         enddo ! i-loop
       enddo ! k-loop
-      do i = is-1, ie+1
+      do i=is-1,ie+1
         MLD_fast(i,j) = CS%MLE_MLD_stretch * MLD_fast(i,j)
         if ((MLD_fast(i,j)==0.) .and. (deltaRhoAtK(i)<CS%MLE_density_diff)) &
           MLD_fast(i,j) = dK(i) ! Assume mixing to the bottom
@@ -320,7 +326,7 @@ subroutine mixedlayer_restrat_OM4(h, uhtr, vhtr, tv, forces, dt, MLD_in, VarMix,
     enddo ! j-loop
   elseif (CS%MLE_use_PBL_MLD) then
     if (GV%Boussinesq .or. (.not.allocated(tv%SpV_avg))) then
-      do j = js-1, je+1 ; do i = is-1, ie+1
+      do j=js-1,je+1 ; do i=is-1,ie+1
         MLD_fast(i,j) = CS%MLE_MLD_stretch * GV%Z_to_H * MLD_in(i,j)
       enddo ; enddo
     else  ! The fully non-Boussinesq conversion between height in MLD_in and thickness.
@@ -356,7 +362,7 @@ subroutine mixedlayer_restrat_OM4(h, uhtr, vhtr, tv, forces, dt, MLD_in, VarMix,
     endif
     aFac = CS%MLE_MLD_decay_time / ( dt + CS%MLE_MLD_decay_time )
     bFac = dt / ( dt + CS%MLE_MLD_decay_time )
-    do j = js-1, je+1 ; do i = is-1, ie+1
+    do j=js-1,je+1 ; do i=is-1,ie+1
       ! Expression bFac*MLD_fast(i,j) + aFac*CS%MLD_filtered(i,j) is the time-filtered
       ! (running mean) of MLD. The max() allows the "running mean" to be reset
       ! instantly to a deeper MLD.
@@ -373,7 +379,7 @@ subroutine mixedlayer_restrat_OM4(h, uhtr, vhtr, tv, forces, dt, MLD_in, VarMix,
     endif
     aFac = CS%MLE_MLD_decay_time2 / ( dt + CS%MLE_MLD_decay_time2 )
     bFac = dt / ( dt + CS%MLE_MLD_decay_time2 )
-    do j = js-1, je+1 ; do i = is-1, ie+1
+    do j=js-1,je+1 ; do i=is-1,ie+1
       ! Expression bFac*MLD_fast(i,j) + aFac*CS%MLD_filtered(i,j) is the time-filtered
       ! (running mean) of MLD. The max() allows the "running mean" to be reset
       ! instantly to a deeper MLD.
@@ -381,7 +387,7 @@ subroutine mixedlayer_restrat_OM4(h, uhtr, vhtr, tv, forces, dt, MLD_in, VarMix,
       MLD_slow(i,j) = CS%MLD_filtered_slow(i,j)
     enddo ; enddo
   else
-    do j = js-1, je+1 ; do i = is-1, ie+1
+    do j=js-1,je+1 ; do i=is-1,ie+1
       MLD_slow(i,j) = MLD_fast(i,j)
     enddo ; enddo
   endif
@@ -819,8 +825,8 @@ subroutine mixedlayer_restrat_Bodner(CS, G, GV, US, h, uhtr, vhtr, tv, forces, d
   real :: g_Rho0          ! G_Earth/Rho0 times a thickness conversion factor
                           ! [L2 H-1 T-2 R-1 ~> m4 s-2 kg-1 or m7 s-2 kg-2]
   real :: h_vel           ! htot interpolated onto velocity points [H ~> m or kg m-2]
-  real :: w_star3         ! Cube of turbulent convective velocity [m3 s-3]
-  real :: u_star3         ! Cube of surface fruction velocity [m3 s-3]
+  real :: w_star3         ! Cube of turbulent convective velocity [Z3 T-3 ~> m3 s-3]
+  real :: u_star3         ! Cube of surface friction velocity [Z3 T-3 ~> m3 s-3]
   real :: r_wpup          ! reciprocal of vertical momentum flux [T2 L-1 H-1 ~> s2 m-2 or m s2 kg-1]
   real :: absf            ! absolute value of f, interpolated to velocity points [T-1 ~> s-1]
   real :: grid_dsd        ! combination of grid scales [L2 ~> m2]
@@ -837,6 +843,10 @@ subroutine mixedlayer_restrat_Bodner(CS, G, GV, US, h, uhtr, vhtr, tv, forces, d
   real :: muza            ! mu(z) at top of the layer [nondim]
   real :: dh              ! Portion of the layer thickness that is in the mixed layer [H ~> m or kg m-2]
   real :: res_scaling_fac ! The resolution-dependent scaling factor [nondim]
+  real :: Z3_T3_to_m3_s3  ! Conversion factors to undo scaling and permit terms to be raised to a
+                          ! fractional power [T3 m3 Z-3 s-3 ~> 1]
+  real :: m2_s2_to_Z2_T2  ! Conversion factors to restore scaling after a term is raised to a
+                          ! fractional power [Z2 s2 T-2 m-2 ~> 1]
   real, parameter :: two_thirds = 2./3.  ! [nondim]
   logical :: line_is_empty, keep_going
   integer, dimension(2) :: EOSdom ! The i-computational domain for the equation of state
@@ -881,7 +891,7 @@ subroutine mixedlayer_restrat_Bodner(CS, G, GV, US, h, uhtr, vhtr, tv, forces, d
   ! Apply time filter to BLD (to remove diurnal cycle) to obtain "little h".
   ! "little h" is representative of the active mixing layer depth, used in B22 formula (eq 27).
   if (GV%Boussinesq .or. (.not.allocated(tv%SpV_avg))) then
-    do j = js-1, je+1 ; do i = is-1, ie+1
+    do j=js-1,je+1 ; do i=is-1,ie+1
       little_h(i,j) = rmean2ts(GV%Z_to_H*BLD(i,j), CS%MLD_filtered(i,j), &
                                CS%BLD_growing_Tfilt, CS%BLD_decaying_Tfilt, dt)
       CS%MLD_filtered(i,j) = little_h(i,j)
@@ -912,21 +922,49 @@ subroutine mixedlayer_restrat_Bodner(CS, G, GV, US, h, uhtr, vhtr, tv, forces, d
   endif
 
   ! Calculate "big H", representative of the mixed layer depth, used in B22 formula (eq 27).
-  do j = js-1, je+1 ; do i = is-1, ie+1
+  do j=js-1,je+1 ; do i=is-1,ie+1
     big_H(i,j) = rmean2ts(little_h(i,j), CS%MLD_filtered_slow(i,j), &
                           CS%MLD_growing_Tfilt, CS%MLD_decaying_Tfilt, dt)
     CS%MLD_filtered_slow(i,j) = big_H(i,j)
   enddo ; enddo
 
-  ! Estimate w'u' at h-points
-  do j = js-1, je+1 ; do i = is-1, ie+1
-    w_star3 = max(0., -bflux(i,j)) * BLD(i,j) & ! (this line in Z3 T-3 ~> m3 s-3)
-              * ( ( US%Z_to_m * US%s_to_T )**3 ) ! [m3 T3 Z-3 s-3 ~> 1]
-    u_star3 = ( US%Z_to_m * US%s_to_T * U_star_2d(i,j) )**3 ! m3 s-3
-    wpup(i,j) = max( CS%min_wstar2, &           ! The max() avoids division by zero later
-                ( CS%mstar * u_star3 + CS%nstar * w_star3 )**two_thirds ) & ! (this line m2 s-2)
-                * ( US%m_to_L * GV%m_to_H * US%T_to_s**2 ) ! [L H s2 m-2 T-2 ~> 1 or kg m-3]
-    ! We filter w'u' with the same time scales used for "little h"
+  ! Estimate w'u' at h-points, with a floor to avoid division by zero later.
+  if (allocated(tv%SpV_avg) .and. .not.(GV%Boussinesq .or. GV%semi_Boussinesq)) then
+    do j=js-1,je+1 ; do i=is-1,ie+1
+      ! This expression differs by a factor of 1. / (Rho_0 * SpV_avg) compared with the other
+      ! expressions below, and it is invariant to the value of Rho_0 in non-Boussinesq mode.
+      wpup(i,j) = max((cuberoot( CS%mstar * U_star_2d(i,j)**3 + &
+                                 CS%nstar * max(0., -bflux(i,j)) * BLD(i,j) ))**2, CS%min_wstar2) * &
+                  ( US%Z_to_L * GV%RZ_to_H / tv%SpV_avg(i,j,1))
+                ! The final line above converts from [Z2 T-2 ~> m2 s-2] to [L H T-2 ~> m2 s-2 or Pa].
+                ! Some rescaling factors and the division by specific volume compensating for other
+                ! factors that are in find_ustar_mech, and others effectively converting the wind
+                ! stresses from [R L Z T-2 ~> Pa] to [L H T-2 ~> m2 s-2 or Pa].  The rescaling factors
+                ! and density being applied to the buoyancy flux are not so neatly explained because
+                ! fractional powers cancel out or combine with terms in the definitions of BLD and
+                ! bflux (such as SpV_avg**-2/3 combining with other terms in bflux to give the thermal
+                ! expansion coefficient) and because the specific volume does vary within the mixed layer.
+    enddo ; enddo
+  elseif (CS%answer_date < 20240201) then
+    Z3_T3_to_m3_s3 = (US%Z_to_m * US%s_to_T)**3
+    m2_s2_to_Z2_T2 = (US%m_to_Z * US%T_to_s)**2
+    do j=js-1,je+1 ; do i=is-1,ie+1
+      w_star3 = max(0., -bflux(i,j)) * BLD(i,j)    ! In [Z3 T-3 ~> m3 s-3]
+      u_star3 = U_star_2d(i,j)**3                  ! In [Z3 T-3 ~> m3 s-3]
+      wpup(i,j) = max(m2_s2_to_Z2_T2 * (Z3_T3_to_m3_s3 * ( CS%mstar * u_star3 + CS%nstar * w_star3 ) )**two_thirds, &
+                      CS%min_wstar2) * &
+                  ( US%Z_to_L * US%Z_to_m * GV%m_to_H ) ! In [L H T-2 ~> m2 s-2 or kg m-1 s-2]
+    enddo ; enddo
+  else
+    do j=js-1,je+1 ; do i=is-1,ie+1
+      w_star3 = max(0., -bflux(i,j)) * BLD(i,j)    ! In [Z3 T-3 ~> m3 s-3]
+      wpup(i,j) = max( (cuberoot(CS%mstar * U_star_2d(i,j)**3 + CS%nstar * w_star3))**2, CS%min_wstar2 ) * &
+                  ( US%Z_to_L * US%Z_to_m * GV%m_to_H ) ! In [L H T-2 ~> m2 s-2 or kg m-1 s-2]
+    enddo ; enddo
+  endif
+
+  ! We filter w'u' with the same time scales used for "little h"
+  do j=js-1,je+1 ; do i=is-1,ie+1
     wpup(i,j) = rmean2ts(wpup(i,j), CS%wpup_filtered(i,j), &
                          CS%BLD_growing_Tfilt, CS%BLD_decaying_Tfilt, dt)
     CS%wpup_filtered(i,j) = wpup(i,j)
@@ -1459,7 +1497,7 @@ end subroutine mixedlayer_restrat_BML
 !> Return the growth timescale for the submesoscale mixed layer eddies in [T ~> s]
 real function growth_time(u_star, hBL, absf, h_neg, vonKar, Kv_rest, restrat_coef)
   real, intent(in) :: u_star   !< Surface friction velocity in thickness-based units [H T-1 ~> m s-1 or kg m-2 s-1]
-  real, intent(in) :: hBL      !< Boundary layer thickness including at least a neglible
+  real, intent(in) :: hBL      !< Boundary layer thickness including at least a negligible
                                !! value to keep it positive definite [H ~> m or kg m-2]
   real, intent(in) :: absf     !< Absolute value of the Coriolis parameter [T-1 ~> s-1]
   real, intent(in) :: h_neg    !< A tiny thickness that is usually lost in roundoff so can be
@@ -1513,6 +1551,7 @@ logical function mixedlayer_restrat_init(Time, G, GV, US, param_file, diag, CS,
   real :: ustar_min_dflt   ! The default value for RESTRAT_USTAR_MIN [Z T-1 ~> m s-1]
   real :: Stanley_coeff    ! Coefficient relating the temperature gradient and sub-gridscale
                            ! temperature variance [nondim]
+  integer :: default_answer_date  ! The default setting for the various ANSWER_DATE flags
   ! This include declares and sets the variable "version".
 # include "version_variable.h"
   integer :: i, j
@@ -1581,13 +1620,23 @@ logical function mixedlayer_restrat_init(Time, G, GV, US, param_file, diag, CS,
              "BLD, when the latter is shallower than the running mean. A value of 0 "//&
              "instantaneously sets the running mean to the current value filtered BLD.", &
              units="s", default=0., scale=US%s_to_T)
+    call get_param(param_file, mdl, "DEFAULT_ANSWER_DATE", default_answer_date, &
+             "This sets the default value for the various _ANSWER_DATE parameters.", &
+             default=99991231)
+    call get_param(param_file, mdl, "ML_RESTRAT_ANSWER_DATE", CS%answer_date, &
+             "The vintage of the order of arithmetic and expressions in the mixed layer "//&
+             "restrat calculations.  Values below 20240201 recover the answers from the end "//&
+             "of 2023, while higher values use the new cuberoot function in the Bodner code "//&
+             "to avoid needing to undo dimensional rescaling.", &
+             default=default_answer_date, &
+             do_not_log=.not.(CS%use_Bodner.and.(GV%Boussinesq.or.GV%semi_Boussinesq)))
     call get_param(param_file, mdl, "MIN_WSTAR2", CS%min_wstar2, &
              "The minimum lower bound to apply to the vertical momentum flux, w'u', "//&
              "in the Bodner et al., restratification parameterization. This avoids "//&
              "a division-by-zero in the limit when u* and the buoyancy flux are zero.  "//&
              "The default is less than the molecular viscosity of water times the Coriolis "//&
              "parameter a micron away from the equator.", &
-             units="m2 s-2", default=1.0e-24)   ! This parameter stays in MKS units.
+             units="m2 s-2", default=1.0e-24, scale=US%m_to_Z**2*US%T_to_s**2)
     call get_param(param_file, mdl, "TAIL_DH", CS%MLE_tail_dh, &
              "Fraction by which to extend the mixed-layer restratification "//&
              "depth used for a smoother stream function at the base of "//&