From 541c2f47970adb809b24d9838d25ac2a716db980 Mon Sep 17 00:00:00 2001
From: Robert Hallberg <Robert.Hallberg@noaa.gov>
Date: Fri, 19 Jan 2024 16:55:18 -0500
Subject: [PATCH 01/10] (*)Oil_tracer_column_physics unit conversion fix

  Added a missing unit conversion factor to a hard-coded 10 m distance in
oil_tracer_column_physics.  This will not change answers in Boussinesq cases
without any dimensional rescaling, but it will correct answers in a hypothetical
non-Boussinesq case.  Also made some white space in expressions near this fix
more closely match the MOM6 style guide. No answers are affected in any known
existing regression test cases or other runs.
---
 src/tracer/oil_tracer.F90 | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/tracer/oil_tracer.F90 b/src/tracer/oil_tracer.F90
index fc8f82f0df..40d6f27b44 100644
--- a/src/tracer/oil_tracer.F90
+++ b/src/tracer/oil_tracer.F90
@@ -373,21 +373,21 @@ subroutine oil_tracer_column_physics(h_old, h_new, ea, eb, fluxes, dt, G, GV, US
   ! Add oil at the source location
   if (year>=CS%oil_start_year .and. year<=CS%oil_end_year .and. &
       CS%oil_source_i>-999 .and. CS%oil_source_j>-999) then
-    i=CS%oil_source_i ; j=CS%oil_source_j
-    k_max=nz ; h_total=0.
+    i = CS%oil_source_i ; j = CS%oil_source_j
+    k_max = nz ; h_total = 0.
     vol_scale = GV%H_to_m * US%L_to_m**2
     do k=nz, 2, -1
       h_total = h_total + h_new(i,j,k)
-      if (h_total<10.) k_max=k-1 ! Find bottom most interface that is 10 m above bottom
+      if (h_total < 10.*GV%m_to_H) k_max=k-1 ! Find bottom most interface that is 10 m above bottom
     enddo
     do m=1,CS%ntr
-      k=CS%oil_source_k(m)
+      k = CS%oil_source_k(m)
       if (k>0) then
-        k=min(k,k_max) ! Only insert k or first layer with interface 10 m above bottom
+        k = min(k,k_max) ! Only insert k or first layer with interface 10 m above bottom
         CS%tr(i,j,k,m) = CS%tr(i,j,k,m) + CS%oil_source_rate*dt / &
                 (vol_scale * (h_new(i,j,k)+GV%H_subroundoff) * G%areaT(i,j) )
       elseif (k<0) then
-        h_total=GV%H_subroundoff
+        h_total = GV%H_subroundoff
         do k=1, nz
           h_total = h_total + h_new(i,j,k)
         enddo

From 76f0668146d5f709f093774033927f68bf7514f3 Mon Sep 17 00:00:00 2001
From: Robert Hallberg <Robert.Hallberg@noaa.gov>
Date: Sat, 20 Jan 2024 06:06:38 -0500
Subject: [PATCH 02/10] (*)Avoid using RHO_0 in non-Boussinesq averaging

  Use GV%H_to_MKS instead of GV%H_to_m when undoing the dimensional rescaling of
thicknesses when taking weighted averages in horizontally_average_diag_field,
global_layer_mean and global_volume_mean.  In Boussinesq mode, these are
identical, but in non-Boussinesq mode using GV%H_to_m introduced a
multiplication and then division by the Boussinesq reference density, whereas
GV%H_to_MKS avoids this by rescaling to a volume or mass-based coordinate
depending on the mode.  Several comments were also updated to reflect these
conditional changes in the units of some internal variables.  All expressions
are mathematically equivalent, and this does not impact any solutions, but there
can be changes in the last bits in some non-Boussinesq averaged diagnostics.
---
 src/diagnostics/MOM_spatial_means.F90 | 20 +++++++++++---------
 src/framework/MOM_diag_remap.F90      | 17 ++++++++++++-----
 2 files changed, 23 insertions(+), 14 deletions(-)

diff --git a/src/diagnostics/MOM_spatial_means.F90 b/src/diagnostics/MOM_spatial_means.F90
index ab1210c0f5..60ad8dfba5 100644
--- a/src/diagnostics/MOM_spatial_means.F90
+++ b/src/diagnostics/MOM_spatial_means.F90
@@ -211,11 +211,13 @@ function global_layer_mean(var, h, G, GV, scale, tmp_scale)
   ! Local variables
   ! In the following comments, [A] is used to indicate the arbitrary, possibly rescaled units of the
   ! input array while [a] indicates the unscaled (e.g., mks) units that can be used with the reproducing sums
-  real, dimension(G%isc:G%iec,G%jsc:G%jec,SZK_(GV)) :: tmpForSumming  ! An unscaled cell integral [a m3]
-  real, dimension(G%isc:G%iec,G%jsc:G%jec,SZK_(GV)) :: weight  ! The volume of each cell, used as a weight [m3]
+  real, dimension(G%isc:G%iec,G%jsc:G%jec,SZK_(GV)) :: tmpForSumming  ! An unscaled cell integral [a m3] or [a kg]
+  real, dimension(G%isc:G%iec,G%jsc:G%jec,SZK_(GV)) :: weight  ! The volume or mass of each cell, depending on
+                                                    ! whether the model is Boussinesq, used as a weight [m3] or [kg]
   type(EFP_type), dimension(2*SZK_(GV)) :: laysums
-  real, dimension(SZK_(GV)) :: global_temp_scalar    ! The global integral of the tracer in each layer [a m3]
-  real, dimension(SZK_(GV)) :: global_weight_scalar  ! The global integral of the volume of each layer [m3]
+  real, dimension(SZK_(GV)) :: global_temp_scalar   ! The global integral of the tracer in each layer [a m3] or [a kg]
+  real, dimension(SZK_(GV)) :: global_weight_scalar ! The global integral of the volume or mass of each
+                                                    ! layer [m3] or [kg]
   real :: temp_scale ! A temporary scaling factor [a A-1 ~> 1] or [1]
   real :: scalefac  ! A scaling factor for the variable [a A-1 ~> 1]
   integer :: i, j, k, is, ie, js, je, nz
@@ -226,7 +228,7 @@ function global_layer_mean(var, h, G, GV, scale, tmp_scale)
   tmpForSumming(:,:,:) = 0. ; weight(:,:,:) = 0.
 
   do k=1,nz ; do j=js,je ; do i=is,ie
-    weight(i,j,k)  =  (GV%H_to_m * h(i,j,k)) * (G%US%L_to_m**2*G%areaT(i,j) * G%mask2dT(i,j))
+    weight(i,j,k)  =  (GV%H_to_MKS * h(i,j,k)) * (G%US%L_to_m**2*G%areaT(i,j) * G%mask2dT(i,j))
     tmpForSumming(i,j,k) =  scalefac * var(i,j,k) * weight(i,j,k)
   enddo ; enddo ; enddo
 
@@ -262,9 +264,9 @@ function global_volume_mean(var, h, G, GV, scale, tmp_scale)
   ! input array while [a] indicates the unscaled (e.g., mks) units that can be used with the reproducing sums
   real :: temp_scale ! A temporary scaling factor [a A-1 ~> 1] or [1]
   real :: scalefac   ! A scaling factor for the variable [a A-1 ~> 1]
-  real :: weight_here ! The volume of a grid cell [m3]
-  real, dimension(SZI_(G),SZJ_(G)) :: tmpForSumming ! The volume integral of the variable in a column [a m3]
-  real, dimension(SZI_(G),SZJ_(G)) :: sum_weight  ! The volume of each column of water [m3]
+  real :: weight_here ! The volume or mass of a grid cell [m3] or [kg]
+  real, dimension(SZI_(G),SZJ_(G)) :: tmpForSumming ! The volume integral of the variable in a column [a m3] or [a kg]
+  real, dimension(SZI_(G),SZJ_(G)) :: sum_weight  ! The volume or mass of each column of water [m3] or [kg]
   integer :: i, j, k, is, ie, js, je, nz
   is = G%isc ; ie = G%iec ; js = G%jsc ; je = G%jec ; nz = GV%ke
 
@@ -273,7 +275,7 @@ function global_volume_mean(var, h, G, GV, scale, tmp_scale)
   tmpForSumming(:,:) = 0. ; sum_weight(:,:) = 0.
 
   do k=1,nz ; do j=js,je ; do i=is,ie
-    weight_here  =  (GV%H_to_m * h(i,j,k)) * (G%US%L_to_m**2*G%areaT(i,j) * G%mask2dT(i,j))
+    weight_here  =  (GV%H_to_MKS * h(i,j,k)) * (G%US%L_to_m**2*G%areaT(i,j) * G%mask2dT(i,j))
     tmpForSumming(i,j) = tmpForSumming(i,j) + scalefac * var(i,j,k) * weight_here
     sum_weight(i,j) = sum_weight(i,j) + weight_here
   enddo ; enddo ; enddo
diff --git a/src/framework/MOM_diag_remap.F90 b/src/framework/MOM_diag_remap.F90
index ff0eda6325..a2ecc197bc 100644
--- a/src/framework/MOM_diag_remap.F90
+++ b/src/framework/MOM_diag_remap.F90
@@ -658,8 +658,15 @@ subroutine horizontally_average_diag_field(G, GV, h, staggered_in_x, staggered_i
   logical, dimension(:), intent(inout) :: averaged_mask  !< Mask for horizontally averaged field [nondim]
 
   ! Local variables
-  real, dimension(G%isc:G%iec, G%jsc:G%jec, size(field,3)) :: volume, stuff
-  real, dimension(size(field, 3)) :: vol_sum, stuff_sum ! nz+1 is needed for interface averages
+  real :: volume(G%isc:G%iec, G%jsc:G%jec, size(field,3)) ! The area [m2], volume [m3] or mass [kg] of each cell.
+  real :: stuff(G%isc:G%iec, G%jsc:G%jec, size(field,3))  ! The area, volume or mass-weighted integral of the
+                                             ! field being averaged in each cell, in [m2 A], [m3 A] or [kg A],
+                                             ! depending on the weighting for the averages and whether the
+                                             ! model makes the Boussinesq approximation.
+  real, dimension(size(field, 3)) :: vol_sum   ! The global sum of the areas [m2], volumes [m3] or mass [kg]
+                                               ! in the cells that used in the weighted averages.
+  real, dimension(size(field, 3)) :: stuff_sum ! The global sum of the weighted field in all cells, in
+                                               ! [A m2], [A m3] or [A kg]
   type(EFP_type), dimension(2*size(field,3)) :: sums_EFP ! Sums of volume or stuff by layer
   real :: height  ! An average thickness attributed to an velocity point [H ~> m or kg m-2]
   integer :: i, j, k, nz
@@ -688,7 +695,7 @@ subroutine horizontally_average_diag_field(G, GV, h, staggered_in_x, staggered_i
             I1 = i - G%isdB + 1
             height = 0.5 * (h(i,j,k) + h(i+1,j,k))
             volume(I,j,k) = (G%US%L_to_m**2 * G%areaCu(I,j)) &
-                * (GV%H_to_m * height) * G%mask2dCu(I,j)
+                * (GV%H_to_MKS * height) * G%mask2dCu(I,j)
             stuff(I,j,k) = volume(I,j,k) * field(I1,j,k)
           enddo ; enddo
         endif
@@ -717,7 +724,7 @@ subroutine horizontally_average_diag_field(G, GV, h, staggered_in_x, staggered_i
             J1 = J - G%jsdB + 1
             height = 0.5 * (h(i,j,k) + h(i,j+1,k))
             volume(i,J,k) = (G%US%L_to_m**2 * G%areaCv(i,J)) &
-                * (GV%H_to_m * height) * G%mask2dCv(i,J)
+                * (GV%H_to_MKS * height) * G%mask2dCv(i,J)
             stuff(i,J,k) = volume(i,J,k) * field(i,J1,k)
           enddo ; enddo
         endif
@@ -748,7 +755,7 @@ subroutine horizontally_average_diag_field(G, GV, h, staggered_in_x, staggered_i
         else ! Intensive
           do j=G%jsc, G%jec ; do i=G%isc, G%iec
             volume(i,j,k) = (G%US%L_to_m**2 * G%areaT(i,j)) &
-                * (GV%H_to_m * h(i,j,k)) * G%mask2dT(i,j)
+                * (GV%H_to_MKS * h(i,j,k)) * G%mask2dT(i,j)
             stuff(i,j,k) = volume(i,j,k) * field(i,j,k)
           enddo ; enddo
         endif

From 60cb551a3a6c04ba33e5d5f3ce7e4d25d3f7be53 Mon Sep 17 00:00:00 2001
From: Marshall Ward <marshall.ward@noaa.gov>
Date: Sun, 28 Jan 2024 13:53:55 -0500
Subject: [PATCH 03/10] Intrinsics: Faster cuberoot scaling functions

This patch replaces the intrinsic-based exponent rescaling with explicit
bit manipulation of the floating point number.

This appears to produce a ~2.5x speedup of the solver, reducing its time
from embarassingly slow to disappointingly slow.  It is slightly faster
than the GNU cbrt function, but still about 3x slower than the Intel
SVML cbrt function.

Timings (s) (16M array, -O3 -mavx -mfma)

| Solver              |  -O2  |  -O3  |
|---------------------|-------|-------|
| GNU x**1/3          | 0.225 | 0.198 |
| GNU cuberoot before | 0.418 | 0.412 |
| GNU cuberoot after  | 0.208 | 0.187 |
| Intel x**1/3        | 0.068 | 0.067 |
| Intel before        | 0.514 | 0.507 |
| Intel after         | 0.213 | 0.189 |

At least one issue here is that Intel SVML is using fast vectorized
logic operators whereas the Fortran intrinsics are replaced with slower
legacy scalar versions.  Not sure there is much we could even do about
that without complaining to vendors.

Also, I'm sure there's magic in their solvers which we are not
capturing.  Regardless, I think this is a major improvement.

I do not believe it will change answers, but probably a good idea to
verify this and get it in before committing any solutions using
cuberoot().
---
 src/framework/MOM_intrinsic_functions.F90 | 104 ++++++++++++++++++++--
 1 file changed, 98 insertions(+), 6 deletions(-)

diff --git a/src/framework/MOM_intrinsic_functions.F90 b/src/framework/MOM_intrinsic_functions.F90
index 4327cfa5a6..5d420057d4 100644
--- a/src/framework/MOM_intrinsic_functions.F90
+++ b/src/framework/MOM_intrinsic_functions.F90
@@ -5,6 +5,7 @@ module MOM_intrinsic_functions
 ! This file is part of MOM6. See LICENSE.md for the license.
 
 use iso_fortran_env, only : stdout => output_unit, stderr => error_unit
+use iso_fortran_env, only : int64, real64
 
 implicit none ; private
 
@@ -28,6 +29,7 @@ function invcosh(x)
 
 end function invcosh
 
+
 !> Returns the cube root of a real argument at roundoff accuracy, in a form that works properly with
 !! rescaling of the argument by integer powers of 8.  If the argument is a NaN, a NaN is returned.
 elemental function cuberoot(x) result(root)
@@ -45,16 +47,15 @@ elemental function cuberoot(x) result(root)
               ! of the cube root of asx in arbitrary units that can grow or shrink with each iteration [B D]
   real :: den_prev ! The denominator of an expression for the previous iteration of the evolving estimate of
               ! the cube root of asx in arbitrary units that can grow or shrink with each iteration [D]
-  integer :: ex_3 ! One third of the exponent part of x, used to rescale x to get a.
   integer :: itt
 
+  integer(kind=int64) :: e_x, s_x
+
   if ((x >= 0.0) .eqv. (x <= 0.0)) then
     ! Return 0 for an input of 0, or NaN for a NaN input.
     root = x
   else
-    ex_3 = ceiling(exponent(x) / 3.)
-    ! Here asx is in the range of 0.125 <= asx < 1.0
-    asx = scale(abs(x), -3*ex_3)
+    call rescale_exp(x, asx, e_x, s_x)
 
     !   Iteratively determine root_asx = asx**1/3 using Halley's method and then Newton's method,
     ! noting that Halley's method onverges monotonically and needs no bounding.  Halley's method is
@@ -82,11 +83,102 @@ elemental function cuberoot(x) result(root)
     ! that is within the last bit of the true solution.
     root_asx = root_asx - (root_asx**3 - asx) / (3.0 * (root_asx**2))
 
-    root = sign(scale(root_asx, ex_3), x)
+    root = descale_cbrt(root_asx, e_x, s_x)
   endif
-
 end function cuberoot
 
+
+!> Rescale `a` to the range [0.125, 1) while preserving its fractional term.
+pure subroutine rescale_exp(a, x, e_a, s_a)
+  real, intent(in) :: a
+    !< The value to be rescaled
+  real, intent(out) :: x
+    !< The rescaled value of `a`
+  integer(kind=int64), intent(out) :: e_a
+    !< The biased exponent of `a`
+  integer(kind=int64), intent(out) :: s_a
+    !< The sign bit of `a`
+
+  ! Floating point model, if format is (sign, exp, frac)
+  integer, parameter :: bias = maxexponent(1.) - 1
+    !< The double precision exponent offset (assuming a balanced range)
+  integer, parameter :: signbit = storage_size(1.) - 1
+    !< Position of sign bit
+  integer, parameter :: explen = 1 + ceiling(log(real(bias))/log(2.))
+    !< Bit size of exponent
+  integer, parameter :: expbit = signbit - explen
+    !< Position of lowest exponent bit
+  integer, parameter :: fraclen = expbit
+    !< Length of fractional part
+
+  integer(kind=int64) :: xb
+    !< A floating point number, bit-packed as an integer
+  integer(kind=int64) :: e_scaled
+    !< The new rescaled exponent of `a` (i.e. the exponent of `x`)
+
+  ! Pack bits of `a` into `xb` and extract its exponent and sign
+  xb = transfer(a, 1_int64)
+  s_a = ibits(xb, signbit, 1)
+  e_a = ibits(xb, expbit, explen)
+
+  ! Decompose the exponent as `e = modulo(e,3) + 3*(e/3)` and extract the
+  ! rescaled exponent, now in {-3,-2,-1}
+  e_scaled = modulo(e_a, 3) - 3 + bias
+
+  ! Insert the new 11-bit exponent into `xb`, while also setting the sign bit
+  ! to zero, ensuring that `xb` is always positive.
+  call mvbits(e_scaled, 0, explen + 1, xb, fraclen)
+
+  ! Transfer the final modified value to `x`
+  x = transfer(xb, 1.)
+end subroutine rescale_exp
+
+
+!> Descale a real number to its original base, and apply the cube root to the
+!! remaining exponent.
+pure function descale_cbrt(x, e_a, s_a) result(r)
+  real, intent(in) :: x
+    !< Cube root of the rescaled value, which was rescaled to [0.125, 1.0)
+  integer(kind=int64), intent(in) :: e_a
+    !< Exponent of the original value to be cube rooted
+  integer(kind=int64), intent(in) :: s_a
+    !< Sign bit of the original value to be cube rooted
+  real :: r
+    !< Restored value with the cube root applied to its exponent
+
+  ! Floating point model, if format is (sign, exp, frac)
+  integer, parameter :: bias = maxexponent(1.) - 1
+    !< The double precision exponent offset (assuming a balanced range)
+  integer, parameter :: signbit = storage_size(1.) - 1
+    !< Position of sign bit
+  integer, parameter :: explen = 1 + ceiling(log(real(bias))/log(2.))
+    !< Bit size of exponent
+  integer, parameter :: expbit = signbit - explen
+    !< Position of lowest exponent bit
+  integer, parameter :: fraclen = expbit
+    !< Length of fractional part
+
+  integer(kind=int64) :: xb
+    ! Bit-packed real number into integer form
+  integer(kind=int64) :: e_r
+    ! Exponent of the descaled value
+
+  ! Extract the exponent of the rescaled value, in {-3, -2, -1}
+  xb = transfer(x, 1_8)
+  e_r = ibits(xb, expbit, explen)
+
+  ! Apply the cube root to the old exponent (after removing its bias) and add
+  ! to the rescaled exponent.  Correct the previous -3 with a +1.
+  e_r = e_r + (e_a/3 - bias/3 + 1)
+
+  ! Apply the corrected exponent and sign and convert back to real
+  call mvbits(e_r, 0, explen, xb, expbit)
+  call mvbits(s_a, 0, 1, xb, signbit)
+  r = transfer(xb, 1.)
+end function descale_cbrt
+
+
+
 !> Returns true if any unit test of intrinsic_functions fails, or false if they all pass.
 logical function intrinsic_functions_unit_tests(verbose)
   logical, intent(in) :: verbose !< If true, write results to stdout

From 5edba9b4d28892225423f872a9a83b1c22dda0a9 Mon Sep 17 00:00:00 2001
From: Marshall Ward <marshall.ward@noaa.gov>
Date: Tue, 30 Jan 2024 15:28:29 -0500
Subject: [PATCH 04/10] Cuberoot: Refactor (re|de)scale functions

Some modifications were made to the cuberoot rescale and descale
functions:

* The machine parameters were moved from function to module parameters.
  This could dangerously expose them to other functions, but it prevents
  multiple definitions of the same numbers.

* The exponent is now cube-rooted in rescale rather than descale.

* The exponent expressions are broken into more explicit steps, rather
  than combining multiple steps and assumptions into a single
  expression.

* The bias is no longer assumed to be a multiple of three.  This is true
  for double precision but not single precision.

A new test of quasi-random number was also added to the cuberoot test
suite.  These numbers were able to detect the differences in GNU and
Intel compiler output.  A potential error in the return value of the
test was also fixed.

The volatile test of 1 - 0.5*ULP has been added.  The cube root of this
value rounds to 1, and needs to be handled carefully.

The unit test function `cuberoot(v**3)` was reversed to `cuberoot(v)**`,
to include testing of this value.  (Cubing would wipe out the anomaly.)
---
 src/framework/MOM_intrinsic_functions.F90 | 148 +++++++++++-----------
 1 file changed, 75 insertions(+), 73 deletions(-)

diff --git a/src/framework/MOM_intrinsic_functions.F90 b/src/framework/MOM_intrinsic_functions.F90
index 5d420057d4..07c6abe3ad 100644
--- a/src/framework/MOM_intrinsic_functions.F90
+++ b/src/framework/MOM_intrinsic_functions.F90
@@ -12,6 +12,19 @@ module MOM_intrinsic_functions
 public :: invcosh, cuberoot
 public :: intrinsic_functions_unit_tests
 
+! Floating point model, if bit layout from high to low is (sign, exp, frac)
+
+integer, parameter :: bias = maxexponent(1.) - 1
+  !< The double precision exponent offset
+integer, parameter :: signbit = storage_size(1.) - 1
+  !< Position of sign bit
+integer, parameter :: explen = 1 + ceiling(log(real(bias))/log(2.))
+  !< Bit size of exponent
+integer, parameter :: expbit = signbit - explen
+  !< Position of lowest exponent bit
+integer, parameter :: fraclen = expbit
+  !< Length of fractional part
+
 contains
 
 !> Evaluate the inverse cosh, either using a math library or an
@@ -55,7 +68,7 @@ elemental function cuberoot(x) result(root)
     ! Return 0 for an input of 0, or NaN for a NaN input.
     root = x
   else
-    call rescale_exp(x, asx, e_x, s_x)
+    call rescale_cbrt(x, asx, e_x, s_x)
 
     !   Iteratively determine root_asx = asx**1/3 using Halley's method and then Newton's method,
     ! noting that Halley's method onverges monotonically and needs no bounding.  Halley's method is
@@ -83,109 +96,90 @@ elemental function cuberoot(x) result(root)
     ! that is within the last bit of the true solution.
     root_asx = root_asx - (root_asx**3 - asx) / (3.0 * (root_asx**2))
 
-    root = descale_cbrt(root_asx, e_x, s_x)
+    root = descale(root_asx, e_x, s_x)
   endif
 end function cuberoot
 
 
-!> Rescale `a` to the range [0.125, 1) while preserving its fractional term.
-pure subroutine rescale_exp(a, x, e_a, s_a)
+!> Rescale `a` to the range [0.125, 1) and compute its cube-root exponent.
+pure subroutine rescale_cbrt(a, x, e_r, s_a)
   real, intent(in) :: a
-    !< The value to be rescaled
+    !< The real parameter to be rescaled for cube root
   real, intent(out) :: x
-    !< The rescaled value of `a`
-  integer(kind=int64), intent(out) :: e_a
-    !< The biased exponent of `a`
+    !< The rescaled value of a
+  integer(kind=int64), intent(out) :: e_r
+    !< Cube root of the exponent of the rescaling of `a`
   integer(kind=int64), intent(out) :: s_a
-    !< The sign bit of `a`
-
-  ! Floating point model, if format is (sign, exp, frac)
-  integer, parameter :: bias = maxexponent(1.) - 1
-    !< The double precision exponent offset (assuming a balanced range)
-  integer, parameter :: signbit = storage_size(1.) - 1
-    !< Position of sign bit
-  integer, parameter :: explen = 1 + ceiling(log(real(bias))/log(2.))
-    !< Bit size of exponent
-  integer, parameter :: expbit = signbit - explen
-    !< Position of lowest exponent bit
-  integer, parameter :: fraclen = expbit
-    !< Length of fractional part
+    !< The sign bit of a
 
   integer(kind=int64) :: xb
-    !< A floating point number, bit-packed as an integer
-  integer(kind=int64) :: e_scaled
-    !< The new rescaled exponent of `a` (i.e. the exponent of `x`)
-
-  ! Pack bits of `a` into `xb` and extract its exponent and sign
+    ! Floating point value of a, bit-packed as an integer
+  integer(kind=int64) :: e_a
+    ! Unscaled exponent of a
+  integer(kind=int64) :: e_x
+    ! Exponent of x
+  integer(kind=int64) :: e_div, e_mod
+    ! Quotient and remainder of e in e = 3*(e/3) + modulo(e,3).
+
+  ! Pack bits of a into xb and extract its exponent and sign.
   xb = transfer(a, 1_int64)
   s_a = ibits(xb, signbit, 1)
-  e_a = ibits(xb, expbit, explen)
+  e_a = ibits(xb, expbit, explen) - bias
+
+  ! Compute terms of exponent decomposition e = 3*(e/3) + modulo(e,3).
+  ! (Fortran division is round-to-zero, so we must emulate floor division.)
+  e_mod = modulo(e_a, 3_int64)
+  e_div = (e_a - e_mod)/3
+
+  ! Our scaling decomposes e_a into e = {3*(e/3) + 3} + {modulo(e,3) - 3}.
 
-  ! Decompose the exponent as `e = modulo(e,3) + 3*(e/3)` and extract the
-  ! rescaled exponent, now in {-3,-2,-1}
-  e_scaled = modulo(e_a, 3) - 3 + bias
+  ! The first term is a perfect cube, whose cube root is computed below.
+  e_r = e_div + 1
 
-  ! Insert the new 11-bit exponent into `xb`, while also setting the sign bit
-  ! to zero, ensuring that `xb` is always positive.
-  call mvbits(e_scaled, 0, explen + 1, xb, fraclen)
+  ! The second term ensures that x is shifted to [0.125, 1).
+  e_x = e_mod - 3
 
-  ! Transfer the final modified value to `x`
+  ! Insert the new 11-bit exponent into xb and write to x and extend the
+  ! bitcount to 12, so that the sign bit is zero and x is always positive.
+  call mvbits(e_x + bias, 0, explen + 1, xb, fraclen)
   x = transfer(xb, 1.)
-end subroutine rescale_exp
+end subroutine rescale_cbrt
 
 
-!> Descale a real number to its original base, and apply the cube root to the
-!! remaining exponent.
-pure function descale_cbrt(x, e_a, s_a) result(r)
+!> Undo the rescaling of a real number back to its original base.
+pure function descale(x, e_a, s_a) result(a)
   real, intent(in) :: x
-    !< Cube root of the rescaled value, which was rescaled to [0.125, 1.0)
+    !< The rescaled value which is to be restored.
   integer(kind=int64), intent(in) :: e_a
-    !< Exponent of the original value to be cube rooted
+    !< Exponent of the unscaled value
   integer(kind=int64), intent(in) :: s_a
-    !< Sign bit of the original value to be cube rooted
-  real :: r
-    !< Restored value with the cube root applied to its exponent
-
-  ! Floating point model, if format is (sign, exp, frac)
-  integer, parameter :: bias = maxexponent(1.) - 1
-    !< The double precision exponent offset (assuming a balanced range)
-  integer, parameter :: signbit = storage_size(1.) - 1
-    !< Position of sign bit
-  integer, parameter :: explen = 1 + ceiling(log(real(bias))/log(2.))
-    !< Bit size of exponent
-  integer, parameter :: expbit = signbit - explen
-    !< Position of lowest exponent bit
-  integer, parameter :: fraclen = expbit
-    !< Length of fractional part
+    !< Sign bit of the unscaled value
+  real :: a
+    !< Restored value with the corrected exponent and sign
 
   integer(kind=int64) :: xb
     ! Bit-packed real number into integer form
-  integer(kind=int64) :: e_r
-    ! Exponent of the descaled value
+  integer(kind=int64) :: e_x
+    ! Biased exponent of x
 
-  ! Extract the exponent of the rescaled value, in {-3, -2, -1}
+  ! Apply the corrected exponent and sign to x.
   xb = transfer(x, 1_8)
-  e_r = ibits(xb, expbit, explen)
-
-  ! Apply the cube root to the old exponent (after removing its bias) and add
-  ! to the rescaled exponent.  Correct the previous -3 with a +1.
-  e_r = e_r + (e_a/3 - bias/3 + 1)
-
-  ! Apply the corrected exponent and sign and convert back to real
-  call mvbits(e_r, 0, explen, xb, expbit)
+  e_x = ibits(xb, expbit, explen)
+  call mvbits(e_a + e_x, 0, explen, xb, expbit)
   call mvbits(s_a, 0, 1, xb, signbit)
-  r = transfer(xb, 1.)
-end function descale_cbrt
-
+  a = transfer(xb, 1.)
+end function descale
 
 
 !> Returns true if any unit test of intrinsic_functions fails, or false if they all pass.
-logical function intrinsic_functions_unit_tests(verbose)
+function intrinsic_functions_unit_tests(verbose) result(fail)
   logical, intent(in) :: verbose !< If true, write results to stdout
+  logical :: fail !< True if any of the unit tests fail
 
   ! Local variables
   real :: testval  ! A test value for self-consistency testing [nondim]
-  logical :: fail, v
+  logical :: v
+  integer :: n
 
   fail = .false.
   v = verbose
@@ -199,7 +193,15 @@ logical function intrinsic_functions_unit_tests(verbose)
   fail = fail .or. Test_cuberoot(v, 1.0)
   fail = fail .or. Test_cuberoot(v, 0.125)
   fail = fail .or. Test_cuberoot(v, 0.965)
-
+  fail = fail .or. Test_cuberoot(v, 1.0 - epsilon(1.0))
+  fail = fail .or. Test_cuberoot(v, 1.0 - 0.5*epsilon(1.0))
+
+  testval = 1.0e-99
+  v = .false.
+  do n=-160,160
+    fail = fail .or. Test_cuberoot(v, testval)
+    testval = (-2.908 * (1.414213562373 + 1.2345678901234e-5*n)) * testval
+  enddo
 end function intrinsic_functions_unit_tests
 
 !> True if the cube of cuberoot(val) does not closely match val. False otherwise.
@@ -209,7 +211,7 @@ logical function Test_cuberoot(verbose, val)
   ! Local variables
   real :: diff ! The difference between val and the cube root of its cube.
 
-  diff = val - cuberoot(val**3)
+  diff = val - cuberoot(val)**3
   Test_cuberoot = (abs(diff) > 2.0e-15*abs(val))
 
   if (Test_cuberoot) then

From 736ef16a4016b538f946df4be9f30cdc532d03e4 Mon Sep 17 00:00:00 2001
From: Marshall Ward <marshall.ward@noaa.gov>
Date: Tue, 30 Jan 2024 15:56:41 -0500
Subject: [PATCH 05/10] Cuberoot: Break **3 into explicit integer cubes

In separate testing, we observed that Intel would use the `pow()`
function to evaluate the cubes of some numbers, causing different
answers with GNU.

In this patch, I replace the cubic x**3 operations with explicit x*x*x
multiplication, which appears to avoid this substitution.

Well, for the moment, at least.
---
 src/framework/MOM_intrinsic_functions.F90 | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/src/framework/MOM_intrinsic_functions.F90 b/src/framework/MOM_intrinsic_functions.F90
index 07c6abe3ad..fbb1c28096 100644
--- a/src/framework/MOM_intrinsic_functions.F90
+++ b/src/framework/MOM_intrinsic_functions.F90
@@ -52,14 +52,19 @@ elemental function cuberoot(x) result(root)
   real :: asx ! The absolute value of x rescaled by an integer power of 8 to put it into
               ! the range from 0.125 < asx <= 1.0, in ambiguous units cubed [B3]
   real :: root_asx ! The cube root of asx [B]
+  real :: ra_3 ! root_asx cubed [B3]
   real :: num ! The numerator of an expression for the evolving estimate of the cube root of asx
               ! in arbitrary units that can grow or shrink with each iteration [B C]
   real :: den ! The denominator of an expression for the evolving estimate of the cube root of asx
               ! in arbitrary units that can grow or shrink with each iteration [C]
   real :: num_prev ! The numerator of an expression for the previous iteration of the evolving estimate
               ! of the cube root of asx in arbitrary units that can grow or shrink with each iteration [B D]
+  real :: np_3 ! num_prev cubed  [B3 D3]
   real :: den_prev ! The denominator of an expression for the previous iteration of the evolving estimate of
               ! the cube root of asx in arbitrary units that can grow or shrink with each iteration [D]
+  real :: dp_3 ! den_prev cubed  [C3]
+  real :: r0  ! Initial value of the iterative solver. [B C]
+  real :: r0_3 ! r0 cubed [B3 C3]
   integer :: itt
 
   integer(kind=int64) :: e_x, s_x
@@ -79,14 +84,21 @@ elemental function cuberoot(x) result(root)
 
     ! This first estimate gives the same magnitude of errors for 0.125 and 1.0 after two iterations.
     ! The first iteration is applied explicitly.
-    num = 0.707106 * (0.707106**3 + 2.0 * asx)
-    den = 2.0 * (0.707106**3) + asx
+    r0 = 0.707106
+    r0_3 = r0 * r0 * r0
+    num = r0 * (r0_3 + 2.0 * asx)
+    den = 2.0 * r0_3 + asx
 
     do itt=1,2
       ! Halley's method iterates estimates as Root = Root * (Root**3 + 2.*asx) / (2.*Root**3 + asx).
       num_prev = num ; den_prev = den
-      num = num_prev * (num_prev**3 + 2.0 * asx * (den_prev**3))
-      den = den_prev * (2.0 * num_prev**3 + asx * (den_prev**3))
+
+      ! Pre-compute these as integer powers, to avoid `pow()`-like intrinsics.
+      np_3 = num_prev * num_prev * num_prev
+      dp_3 = den_prev * den_prev * den_prev
+
+      num = num_prev * (np_3 + 2.0 * asx * dp_3)
+      den = den_prev * (2.0 * np_3 + asx * dp_3)
       ! Equivalent to:  root_asx = root_asx * (root_asx**3 + 2.*asx) / (2.*root_asx**3 + asx)
     enddo
     ! At this point the error in root_asx is better than 1 part in 3e14.
@@ -94,7 +106,8 @@ elemental function cuberoot(x) result(root)
 
     ! One final iteration with Newton's method polishes up the root and gives a solution
     ! that is within the last bit of the true solution.
-    root_asx = root_asx - (root_asx**3 - asx) / (3.0 * (root_asx**2))
+    ra_3 = root_asx * root_asx * root_asx
+    root_asx = root_asx - (ra_3 - asx) / (3.0 * (root_asx * root_asx))
 
     root = descale(root_asx, e_x, s_x)
   endif

From 671c85d32864d89bce6cfe270a2c556c54b03ba4 Mon Sep 17 00:00:00 2001
From: Robert Hallberg <Robert.Hallberg@noaa.gov>
Date: Wed, 10 Jan 2024 13:14:10 -0500
Subject: [PATCH 06/10] (*)Use cuberoot in ePBL_column

  Use the new cuberoot() function in place of **(1./3.) to calculate the
turbulent velocity vstar in ePBL_column when EPBL_ANSWER_DATE is 20240101 or
higher.  This is mathematically equivalent to the previous version, but it does
change and answers at roundoff and it allows several dimensional scaling factors
that had previously been required to be eliminated.  All answers are
mathematically equivalant, but answers do change if EPBL_ANSWER_DATE is 20240101
or higher and the description of EPBL_ANSWER_DATE changes in some
MOM_parameter_doc files.
---
 .../vertical/MOM_energetic_PBL.F90            | 105 +++++++++++++-----
 1 file changed, 77 insertions(+), 28 deletions(-)

diff --git a/src/parameterizations/vertical/MOM_energetic_PBL.F90 b/src/parameterizations/vertical/MOM_energetic_PBL.F90
index 1a59b177bd..10907c04ed 100644
--- a/src/parameterizations/vertical/MOM_energetic_PBL.F90
+++ b/src/parameterizations/vertical/MOM_energetic_PBL.F90
@@ -13,6 +13,7 @@ module MOM_energetic_PBL
 use MOM_forcing_type,   only : forcing
 use MOM_grid,           only : ocean_grid_type
 use MOM_interface_heights, only : thickness_to_dz
+use MOM_intrinsic_functions, only : cuberoot
 use MOM_string_functions, only : uppercase
 use MOM_unit_scaling,   only : unit_scale_type
 use MOM_variables,      only : thermo_var_ptrs
@@ -161,7 +162,10 @@ module MOM_energetic_PBL
   integer :: answer_date     !< The vintage of the order of arithmetic and expressions in the ePBL
                              !! calculations.  Values below 20190101 recover the answers from the
                              !! end of 2018, while higher values use updated and more robust forms
-                             !! of the same expressions.
+                             !! of the same expressions.  Values below 20240101 use A**(1./3.) to
+                             !! estimate the cube root of A in several expressions, while higher
+                             !! values use the integer root function cuberoot(A) and therefore
+                             !! can work with scaled variables.
   logical :: orig_PE_calc    !< If true, the ePBL code uses the original form of the
                              !! potential energy change code.  Otherwise, it uses a newer version
                              !! that can work with successive increments to the diffusivity in
@@ -335,8 +339,10 @@ subroutine energetic_PBL(h_3d, u_3d, v_3d, tv, fluxes, dt, Kd_int, G, GV, US, CS
     mixvel, &       ! A turbulent mixing velocity [Z T-1 ~> m s-1].
     mixlen, &       ! A turbulent mixing length [Z ~> m].
     SpV_dt          ! Specific volume interpolated to interfaces divided by dt or 1.0 / (dt * Rho0)
-                    ! times conversion factors in [m3 Z-3 R-1 T2 s-3 ~> m3 kg-1 s-1],
-                    ! used to convert local TKE into a turbulence velocity cubed.
+                    ! times conversion factors for answer dates before 20240101 in
+                    ! [m3 Z-3 R-1 T2 s-3 ~> m3 kg-1 s-1] or without the convsersion factors for
+                    ! answer dates of 20240101 and later in [R-1 T-1 ~> m3 kg-1 s-1], used to
+                    ! convert local TKE into a turbulence velocity cubed.
   real :: h_neglect ! A thickness that is so small it is usually lost
                     ! in roundoff and can be neglected [H ~> m or kg m-2].
 
@@ -348,6 +354,8 @@ subroutine energetic_PBL(h_3d, u_3d, v_3d, tv, fluxes, dt, Kd_int, G, GV, US, CS
   real :: I_rho     ! The inverse of the Boussinesq reference density times a ratio of scaling
                     ! factors [Z L-1 R-1 ~> m3 kg-1]
   real :: I_dt      ! The Adcroft reciprocal of the timestep [T-1 ~> s-1]
+  real :: I_rho0dt  ! The inverse of the Boussinesq reference density times the time
+                    ! step [R-1 T-1 ~> m3 kg-1 s-1]
   real :: B_Flux    ! The surface buoyancy flux [Z2 T-3 ~> m2 s-3]
   real :: MLD_io    ! The mixed layer depth found by ePBL_column [Z ~> m]
 
@@ -374,6 +382,7 @@ subroutine energetic_PBL(h_3d, u_3d, v_3d, tv, fluxes, dt, Kd_int, G, GV, US, CS
   h_neglect = GV%H_subroundoff
   I_rho = US%L_to_Z * GV%H_to_Z * GV%RZ_to_H ! == US%L_to_Z / GV%Rho0 ! This is not used when fully non-Boussinesq.
   I_dt = 0.0 ; if (dt > 0.0) I_dt = 1.0 / dt
+  I_rho0dt = 1.0 / (GV%Rho0 * dt)  ! This is not used when fully non-Boussinesq.
 
   ! Zero out diagnostics before accumulation.
   if (CS%TKE_diagnostics) then
@@ -403,9 +412,15 @@ subroutine energetic_PBL(h_3d, u_3d, v_3d, tv, fluxes, dt, Kd_int, G, GV, US, CS
     ! Set the inverse density used to translating local TKE into a turbulence velocity
     SpV_dt(:) = 0.0
     if ((dt > 0.0) .and. GV%Boussinesq .or. .not.allocated(tv%SpV_avg)) then
-      do K=1,nz+1
-        SpV_dt(K) = (US%Z_to_m**3*US%s_to_T**3) / (dt*GV%Rho0)
-      enddo
+      if (CS%answer_date < 20240101) then
+        do K=1,nz+1
+          SpV_dt(K) = (US%Z_to_m**3*US%s_to_T**3) / (dt*GV%Rho0)
+        enddo
+      else
+        do K=1,nz+1
+          SpV_dt(K) = I_rho0dt
+        enddo
+      endif
     endif
 
     !   Determine the initial mech_TKE and conv_PErel, including the energy required
@@ -442,11 +457,19 @@ subroutine energetic_PBL(h_3d, u_3d, v_3d, tv, fluxes, dt, Kd_int, G, GV, US, CS
       endif
 
       if (allocated(tv%SpV_avg) .and. .not.GV%Boussinesq) then
-        SpV_dt(1) = (US%Z_to_m**3*US%s_to_T**3) * tv%SpV_avg(i,j,1) * I_dt
-        do K=2,nz
-          SpV_dt(K) = (US%Z_to_m**3*US%s_to_T**3) * 0.5*(tv%SpV_avg(i,j,k-1) + tv%SpV_avg(i,j,k)) * I_dt
-        enddo
-        SpV_dt(nz+1) = (US%Z_to_m**3*US%s_to_T**3) * tv%SpV_avg(i,j,nz) * I_dt
+        if (CS%answer_date < 20240101) then
+          SpV_dt(1) = (US%Z_to_m**3*US%s_to_T**3) * tv%SpV_avg(i,j,1) * I_dt
+          do K=2,nz
+            SpV_dt(K) = (US%Z_to_m**3*US%s_to_T**3) * 0.5*(tv%SpV_avg(i,j,k-1) + tv%SpV_avg(i,j,k)) * I_dt
+          enddo
+          SpV_dt(nz+1) = (US%Z_to_m**3*US%s_to_T**3) * tv%SpV_avg(i,j,nz) * I_dt
+        else
+          SpV_dt(1) = tv%SpV_avg(i,j,1) * I_dt
+          do K=2,nz
+            SpV_dt(K) = 0.5*(tv%SpV_avg(i,j,k-1) + tv%SpV_avg(i,j,k)) * I_dt
+          enddo
+          SpV_dt(nz+1) = tv%SpV_avg(i,j,nz) * I_dt
+        endif
       endif
 
       B_flux = buoy_flux(i,j)
@@ -565,9 +588,13 @@ subroutine ePBL_column(h, dz, u, v, T0, S0, dSV_dT, dSV_dS, SpV_dt, TKE_forcing,
   real, dimension(SZK_(GV)), intent(in)  :: dSV_dS !< The partial derivative of in-situ specific
                                                    !! volume with salinity [R-1 S-1 ~> m3 kg-1 ppt-1].
   real, dimension(SZK_(GV)+1), intent(in) :: SpV_dt !< Specific volume interpolated to interfaces
-                                                   !! divided by dt or 1.0 / (dt * Rho0) times conversion
-                                                   !! factors in [m3 Z-3 R-1 T2 s-3 ~> m3 kg-1 s-1],
-                                                   !! used to convert local TKE into a turbulence velocity.
+                                                   !! divided by dt or 1.0 / (dt * Rho0), times conversion
+                                                   !! factors for answer dates before 20240101 in
+                                                   !! [m3 Z-3 R-1 T2 s-3 ~> m3 kg-1 s-1] or without
+                                                   !! the convsersion factors for answer dates of
+                                                   !! 20240101 and later in [R-1 T-1 ~> m3 kg-1 s-1],
+                                                   !! used to convert local TKE into a turbulence
+                                                   !! velocity cubed.
   real, dimension(SZK_(GV)), intent(in)  :: TKE_forcing !< The forcing requirements to homogenize the
                                                    !! forcing that has been applied to each layer
                                                    !! [R Z3 T-2 ~> J m-2].
@@ -819,7 +846,7 @@ subroutine ePBL_column(h, dz, u, v, T0, S0, dSV_dT, dSV_dS, SpV_dt, TKE_forcing,
   max_itt = 20
 
   dz_tt_min = 0.0
-  vstar_unit_scale = US%m_to_Z * US%T_to_s
+  if (CS%answer_date < 20240101) vstar_unit_scale = US%m_to_Z * US%T_to_s
 
   MLD_guess = MLD_io
 
@@ -1160,12 +1187,22 @@ subroutine ePBL_column(h, dz, u, v, T0, S0, dSV_dT, dSV_dS, SpV_dt, TKE_forcing,
           dz_tt = dztot + dz_tt_min
           TKE_here = mech_TKE + CS%wstar_ustar_coef*conv_PErel
           if (TKE_here > 0.0) then
-            if (CS%wT_scheme==wT_from_cRoot_TKE) then
-              vstar = CS%vstar_scale_fac * vstar_unit_scale * (SpV_dt(K)*TKE_here)**C1_3
-            elseif (CS%wT_scheme==wT_from_RH18) then
-              Surface_Scale = max(0.05, 1.0 - dztot / MLD_guess)
-              vstar = CS%vstar_scale_fac * Surface_Scale * (CS%vstar_surf_fac*u_star + &
-                        vstar_unit_scale * (CS%wstar_ustar_coef*conv_PErel*SpV_dt(K))**C1_3)
+            if (CS%answer_date < 20240101) then
+              if (CS%wT_scheme==wT_from_cRoot_TKE) then
+                vstar = CS%vstar_scale_fac * vstar_unit_scale * (SpV_dt(K)*TKE_here)**C1_3
+              elseif (CS%wT_scheme==wT_from_RH18) then
+                Surface_Scale = max(0.05, 1.0 - dztot / MLD_guess)
+                vstar = CS%vstar_scale_fac * Surface_Scale * (CS%vstar_surf_fac*u_star + &
+                          vstar_unit_scale * (CS%wstar_ustar_coef*conv_PErel*SpV_dt(K))**C1_3)
+              endif
+            else
+              if (CS%wT_scheme==wT_from_cRoot_TKE) then
+                vstar = CS%vstar_scale_fac * cuberoot(SpV_dt(K)*TKE_here)
+              elseif (CS%wT_scheme==wT_from_RH18) then
+                Surface_Scale = max(0.05, 1.0 - dztot / MLD_guess)
+                vstar = (CS%vstar_scale_fac * Surface_Scale) * ( CS%vstar_surf_fac*u_star + &
+                          cuberoot((CS%wstar_ustar_coef*conv_PErel) * SpV_dt(K)) )
+              endif
             endif
             hbs_here = min(hb_hs(K), MixLen_shape(K))
             mixlen(K) = MAX(CS%min_mix_len, ((dz_tt*hbs_here)*vstar) / &
@@ -1209,12 +1246,22 @@ subroutine ePBL_column(h, dz, u, v, T0, S0, dSV_dT, dSV_dS, SpV_dt, TKE_forcing,
               ! Does MKE_src need to be included in the calculation of vstar here?
               TKE_here = mech_TKE + CS%wstar_ustar_coef*(conv_PErel-PE_chg_max)
               if (TKE_here > 0.0) then
-                if (CS%wT_scheme==wT_from_cRoot_TKE) then
-                  vstar = CS%vstar_scale_fac * vstar_unit_scale * (SpV_dt(K)*TKE_here)**C1_3
-                elseif (CS%wT_scheme==wT_from_RH18) then
-                  Surface_Scale = max(0.05, 1. - dztot / MLD_guess)
-                  vstar = CS%vstar_scale_fac * Surface_Scale * (CS%vstar_surf_fac*u_star + &
-                                  vstar_unit_scale * (CS%wstar_ustar_coef*conv_PErel*SpV_dt(K))**C1_3)
+                if (CS%answer_date < 20240101) then
+                  if (CS%wT_scheme==wT_from_cRoot_TKE) then
+                    vstar = CS%vstar_scale_fac * vstar_unit_scale * (SpV_dt(K)*TKE_here)**C1_3
+                  elseif (CS%wT_scheme==wT_from_RH18) then
+                    Surface_Scale = max(0.05, 1. - dztot / MLD_guess)
+                    vstar = CS%vstar_scale_fac * Surface_Scale * (CS%vstar_surf_fac*u_star + &
+                                    vstar_unit_scale * (CS%wstar_ustar_coef*conv_PErel*SpV_dt(K))**C1_3)
+                  endif
+                else
+                  if (CS%wT_scheme==wT_from_cRoot_TKE) then
+                    vstar = CS%vstar_scale_fac * cuberoot(SpV_dt(K)*TKE_here)
+                  elseif (CS%wT_scheme==wT_from_RH18) then
+                    Surface_Scale = max(0.05, 1. - dztot / MLD_guess)
+                    vstar = (CS%vstar_scale_fac * Surface_Scale) * ( CS%vstar_surf_fac*u_star + &
+                                    cuberoot((CS%wstar_ustar_coef*conv_PErel) * SpV_dt(K)) )
+                  endif
                 endif
                 hbs_here = min(hb_hs(K), MixLen_shape(K))
                 mixlen(K) = max(CS%min_mix_len, ((dz_tt*hbs_here)*vstar) / &
@@ -2076,7 +2123,9 @@ subroutine energetic_PBL_init(Time, G, GV, US, param_file, diag, CS)
                  "The vintage of the order of arithmetic and expressions in the energetic "//&
                  "PBL calculations.  Values below 20190101 recover the answers from the "//&
                  "end of 2018, while higher values use updated and more robust forms of the "//&
-                 "same expressions.", &
+                 "same expressions.  Values below 20240101 use A**(1./3.) to estimate the cube "//&
+                 "root of A in several expressions, while higher values use the integer root "//&
+                 "function cuberoot(A) and therefore can work with scaled variables.", &
                  default=default_answer_date, do_not_log=.not.GV%Boussinesq)
   if (.not.GV%Boussinesq) CS%answer_date = max(CS%answer_date, 20230701)
 

From 07bace68cfc6e498f39c5731b563601d6afec389 Mon Sep 17 00:00:00 2001
From: Robert Hallberg <Robert.Hallberg@noaa.gov>
Date: Tue, 30 Jan 2024 17:59:28 -0500
Subject: [PATCH 07/10] *Fix two bugs in convert_temp_salt_for_TEOS10

  Fixed two bugs on a single line of convert_temp_salt_for_TEOS10.  The first
bug was a reversal in the order of the temperature and salinity arguments to
poTemp_to_consTemp, resulting in temperatures that closely approximate the
salinities.  The second bug that was fixed on this line was temperatures being
rescaled with a factor that is appropriate for salinities. This bug-fix will
change answers dramatically for any cases that use the ROQUET_RHO, ROQUET_SPV
and TEOS10 equations of state and initialize the model with
INIT_LAYERS_FROM_Z_FILE = True.
---
 src/equation_of_state/MOM_EOS.F90 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/equation_of_state/MOM_EOS.F90 b/src/equation_of_state/MOM_EOS.F90
index d5c7abc977..7a9de49573 100644
--- a/src/equation_of_state/MOM_EOS.F90
+++ b/src/equation_of_state/MOM_EOS.F90
@@ -1703,7 +1703,7 @@ subroutine convert_temp_salt_for_TEOS10(T, S, HI, kd, mask_z, EOS)
   do k=1,kd ; do j=HI%jsc,HI%jec ; do i=HI%isc,HI%iec
     if (mask_z(i,j,k) >= 1.0) then
       S(i,j,k) = Sref_Sprac * S(i,j,k)
-      T(i,j,k) = EOS%degC_to_C*poTemp_to_consTemp(EOS%S_to_ppt*S(i,j,k), EOS%S_to_ppt*T(i,j,k))
+      T(i,j,k) = EOS%degC_to_C*poTemp_to_consTemp(EOS%C_to_degC*T(i,j,k), EOS%S_to_ppt*S(i,j,k))
     endif
   enddo ; enddo ; enddo
 end subroutine convert_temp_salt_for_TEOS10

From 915cfe225e5e3c42103f20501c60e6aa90cd613c Mon Sep 17 00:00:00 2001
From: Robert Hallberg <Robert.Hallberg@noaa.gov>
Date: Fri, 26 Jan 2024 18:10:41 -0500
Subject: [PATCH 08/10] +ALE_remap_scalar with arbitrary thickness units

  This commit add the new optional arguments h_neglect and h_neglect_edge to
ALE_remap_scalar to allow for the thicknesses used in this routine to be
provided in any self-consistent units, including [Z ~> m], instead of just
[H ~> m or kg m-2].  To help make use of this new capability, this commit also
adds the new functions set_h_neglect and set_dz_neglect to the MOM_regridding
module.  build_grid_rho and build_grid_HyCOM1 have been refactored to use
set_h_neglect in place of the corresponding duplicated code blocks.

  This commit also adds the new optional argument h_in_Z_units to
MOM_initialize_tracer_from_Z, which in turn uses this new capability for
ALE_remap_scalar to use vertical layer extents (in Z units) rather than
thicknesses (in H units).

  Although there are new optional arguments to public interfaces, they are not
yet being exercised with this commit so no answers are changed.  Moreover, even
if they were being exercised, all Boussinesq solutions would give identical
answers.
---
 src/ALE/MOM_ALE.F90                           | 36 +++++++----
 src/ALE/MOM_regridding.F90                    | 60 ++++++++++++++-----
 .../MOM_tracer_initialization_from_Z.F90      | 56 +++++++++++------
 3 files changed, 108 insertions(+), 44 deletions(-)

diff --git a/src/ALE/MOM_ALE.F90 b/src/ALE/MOM_ALE.F90
index 77ee1192a2..543d77a0f3 100644
--- a/src/ALE/MOM_ALE.F90
+++ b/src/ALE/MOM_ALE.F90
@@ -1260,16 +1260,17 @@ end subroutine mask_near_bottom_vel
 !! h_dst must be dimensioned as a model array with GV%ke layers while h_src can
 !! have an arbitrary number of layers specified by nk_src.
 subroutine ALE_remap_scalar(CS, G, GV, nk_src, h_src, s_src, h_dst, s_dst, all_cells, old_remap, &
-                            answers_2018, answer_date )
+                            answers_2018, answer_date, h_neglect, h_neglect_edge)
   type(remapping_CS),                      intent(in)    :: CS        !< Remapping control structure
   type(ocean_grid_type),                   intent(in)    :: G         !< Ocean grid structure
   type(verticalGrid_type),                 intent(in)    :: GV        !< Ocean vertical grid structure
   integer,                                 intent(in)    :: nk_src    !< Number of levels on source grid
   real, dimension(SZI_(G),SZJ_(G),nk_src), intent(in)    :: h_src     !< Level thickness of source grid
-                                                                      !! [H ~> m or kg m-2]
+                                                                      !! [H ~> m or kg m-2] or other units
+                                                                      !! if H_neglect is provided
   real, dimension(SZI_(G),SZJ_(G),nk_src), intent(in)    :: s_src     !< Scalar on source grid, in arbitrary units [A]
-  real, dimension(SZI_(G),SZJ_(G),SZK_(GV)),intent(in)   :: h_dst     !< Level thickness of destination grid
-                                                                      !! [H ~> m or kg m-2]
+  real, dimension(SZI_(G),SZJ_(G),SZK_(GV)),intent(in)   :: h_dst     !< Level thickness of destination grid in the
+                                                                      !! same units as h_src, often [H ~> m or kg m-2]
   real, dimension(SZI_(G),SZJ_(G),SZK_(GV)),intent(inout) :: s_dst    !< Scalar on destination grid, in the same
                                                                       !! arbitrary units as s_src [A]
   logical, optional,                       intent(in)    :: all_cells !< If false, only reconstruct for
@@ -1283,10 +1284,16 @@ subroutine ALE_remap_scalar(CS, G, GV, nk_src, h_src, s_src, h_dst, s_dst, all_c
                                                                       !! use more robust forms of the same expressions.
   integer,                       optional, intent(in)    :: answer_date !< The vintage of the expressions to use
                                                                       !! for remapping
+  real,                          optional, intent(in)    :: h_neglect !< A negligibly small thickness used in
+                                                                      !! remapping cell reconstructions, in the same
+                                                                      !! units as h_src, often [H ~> m or kg m-2]
+  real,                          optional, intent(in)    :: h_neglect_edge !< A negligibly small thickness used in
+                                                                      !! remapping edge value calculations, in the same
+                                                                      !! units as h_src, often [H ~> m or kg m-2]
   ! Local variables
   integer :: i, j, k, n_points
   real :: dx(GV%ke+1) ! Change in interface position [H ~> m or kg m-2]
-  real :: h_neglect, h_neglect_edge  ! Tiny thicknesses used in remapping [H ~> m or kg m-2]
+  real :: h_neg, h_neg_edge  ! Tiny thicknesses used in remapping [H ~> m or kg m-2]
   logical :: ignore_vanished_layers, use_remapping_core_w, use_2018_remap
 
   ignore_vanished_layers = .false.
@@ -1297,12 +1304,17 @@ subroutine ALE_remap_scalar(CS, G, GV, nk_src, h_src, s_src, h_dst, s_dst, all_c
   use_2018_remap = .true. ; if (present(answers_2018)) use_2018_remap = answers_2018
   if (present(answer_date)) use_2018_remap = (answer_date < 20190101)
 
-  if (.not.use_2018_remap) then
-    h_neglect = GV%H_subroundoff ; h_neglect_edge = GV%H_subroundoff
-  elseif (GV%Boussinesq) then
-    h_neglect = GV%m_to_H*1.0e-30 ; h_neglect_edge = GV%m_to_H*1.0e-10
+  if (present(h_neglect)) then
+    h_neg = h_neglect
+    h_neg_edge = h_neg ; if (present(h_neglect_edge)) h_neg_edge = h_neglect_edge
   else
-    h_neglect = GV%kg_m2_to_H*1.0e-30 ; h_neglect_edge = GV%kg_m2_to_H*1.0e-10
+    if (.not.use_2018_remap) then
+      h_neg = GV%H_subroundoff ; h_neg_edge = GV%H_subroundoff
+    elseif (GV%Boussinesq) then
+      h_neg = GV%m_to_H*1.0e-30 ; h_neg_edge = GV%m_to_H*1.0e-10
+    else
+      h_neg = GV%kg_m2_to_H*1.0e-30 ; h_neg_edge = GV%kg_m2_to_H*1.0e-10
+    endif
   endif
 
   !$OMP parallel do default(shared) firstprivate(n_points,dx)
@@ -1318,10 +1330,10 @@ subroutine ALE_remap_scalar(CS, G, GV, nk_src, h_src, s_src, h_dst, s_dst, all_c
       if (use_remapping_core_w) then
         call dzFromH1H2( n_points, h_src(i,j,1:n_points), GV%ke, h_dst(i,j,:), dx )
         call remapping_core_w(CS, n_points, h_src(i,j,1:n_points), s_src(i,j,1:n_points), &
-                              GV%ke, dx, s_dst(i,j,:), h_neglect, h_neglect_edge)
+                              GV%ke, dx, s_dst(i,j,:), h_neg, h_neg_edge)
       else
         call remapping_core_h(CS, n_points, h_src(i,j,1:n_points), s_src(i,j,1:n_points), &
-                              GV%ke, h_dst(i,j,:), s_dst(i,j,:), h_neglect, h_neglect_edge)
+                              GV%ke, h_dst(i,j,:), s_dst(i,j,:), h_neg, h_neg_edge)
       endif
     else
       s_dst(i,j,:) = 0.
diff --git a/src/ALE/MOM_regridding.F90 b/src/ALE/MOM_regridding.F90
index 8ef0679358..904164c8e7 100644
--- a/src/ALE/MOM_regridding.F90
+++ b/src/ALE/MOM_regridding.F90
@@ -144,6 +144,7 @@ module MOM_regridding
 public getCoordinateResolution, getCoordinateInterfaces
 public getCoordinateUnits, getCoordinateShortName, getStaticThickness
 public DEFAULT_COORDINATE_MODE
+public set_h_neglect, set_dz_neglect
 public get_zlike_CS, get_sigma_CS, get_rho_CS
 
 !> Documentation for coordinate options
@@ -1416,13 +1417,7 @@ subroutine build_rho_grid( G, GV, US, h, nom_depth_H, tv, dzInterface, remapCS,
 #endif
   logical :: ice_shelf
 
-  if (CS%remap_answer_date >= 20190101) then
-    h_neglect = GV%H_subroundoff ; h_neglect_edge = GV%H_subroundoff
-  elseif (GV%Boussinesq) then
-    h_neglect = GV%m_to_H*1.0e-30 ; h_neglect_edge = GV%m_to_H*1.0e-10
-  else
-    h_neglect = GV%kg_m2_to_H*1.0e-30 ; h_neglect_edge = GV%kg_m2_to_H*1.0e-10
-  endif
+  h_neglect = set_h_neglect(GV, CS%remap_answer_date, h_neglect_edge)
 
   nz = GV%ke
   ice_shelf = present(frac_shelf_h)
@@ -1575,13 +1570,7 @@ subroutine build_grid_HyCOM1( G, GV, US, h, nom_depth_H, tv, h_new, dzInterface,
   real :: z_top_col, totalThickness
   logical :: ice_shelf
 
-  if (CS%remap_answer_date >= 20190101) then
-    h_neglect = GV%H_subroundoff ; h_neglect_edge = GV%H_subroundoff
-  elseif (GV%Boussinesq) then
-    h_neglect = GV%m_to_H*1.0e-30 ; h_neglect_edge = GV%m_to_H*1.0e-10
-  else
-    h_neglect = GV%kg_m2_to_H*1.0e-30 ; h_neglect_edge = GV%kg_m2_to_H*1.0e-10
-  endif
+  h_neglect = set_h_neglect(GV, CS%remap_answer_date, h_neglect_edge)
 
   if (.not.CS%target_density_set) call MOM_error(FATAL, "build_grid_HyCOM1 : "//&
         "Target densities must be set before build_grid_HyCOM1 is called.")
@@ -2095,6 +2084,49 @@ subroutine write_regrid_file( CS, GV, filepath )
 
 end subroutine write_regrid_file
 
+!> Set appropriate values for the negligible thicknesses used for remapping based on an answer date.
+function set_h_neglect(GV, remap_answer_date, h_neglect_edge) result(h_neglect)
+  type(verticalGrid_type), intent(in)  :: GV   !< Ocean vertical grid structure
+  integer,                 intent(in)  :: remap_answer_date !< The vintage of the expressions to use
+                                               !! for remapping.  Values below 20190101 recover the
+                                               !! remapping answers from 2018. Higher values use more
+                                               !! robust forms of the same remapping algorithms.
+  real,                    intent(out) :: h_neglect_edge !< A negligibly small thickness used in
+                                               !! remapping edge value calculations [H ~> m or kg m-2]
+  real                                 :: h_neglect !< A negligibly small thickness used in
+                                               !! remapping cell reconstructions [H ~> m or kg m-2]
+
+  if (remap_answer_date >= 20190101) then
+    h_neglect = GV%H_subroundoff ; h_neglect_edge = GV%H_subroundoff
+  elseif (GV%Boussinesq) then
+    h_neglect = GV%m_to_H*1.0e-30 ; h_neglect_edge = GV%m_to_H*1.0e-10
+  else
+    h_neglect = GV%kg_m2_to_H*1.0e-30 ; h_neglect_edge = GV%kg_m2_to_H*1.0e-10
+  endif
+end function set_h_neglect
+
+!> Set appropriate values for the negligible vertical layer extents used for remapping based on an answer date.
+function set_dz_neglect(GV, US, remap_answer_date, dz_neglect_edge) result(dz_neglect)
+  type(verticalGrid_type), intent(in)  :: GV   !< Ocean vertical grid structure
+  type(unit_scale_type),   intent(in)  :: US   !< A dimensional unit scaling type
+  integer,                 intent(in)  :: remap_answer_date !< The vintage of the expressions to use
+                                               !! for remapping.  Values below 20190101 recover the
+                                               !! remapping answers from 2018. Higher values use more
+                                               !! robust forms of the same remapping algorithms.
+  real,                    intent(out) :: dz_neglect_edge !< A negligibly small vertical layer extent
+                                               !! used in remapping edge value calculations [Z ~> m]
+  real                                 :: dz_neglect !< A negligibly small vertical layer extent
+                                               !! used in remapping cell reconstructions [Z ~> m]
+
+  if (remap_answer_date >= 20190101) then
+    dz_neglect = GV%dZ_subroundoff ; dz_neglect_edge = GV%dZ_subroundoff
+  elseif (GV%Boussinesq) then
+    dz_neglect = US%m_to_Z*1.0e-30 ; dz_neglect_edge = US%m_to_Z*1.0e-10
+  else
+    dz_neglect = GV%kg_m2_to_H * (GV%H_to_m*US%m_to_Z) * 1.0e-30
+    dz_neglect_edge = GV%kg_m2_to_H * (GV%H_to_m*US%m_to_Z) * 1.0e-10
+  endif
+end function set_dz_neglect
 
 !------------------------------------------------------------------------------
 !> Query the fixed resolution data
diff --git a/src/initialization/MOM_tracer_initialization_from_Z.F90 b/src/initialization/MOM_tracer_initialization_from_Z.F90
index 808430df2c..5a172b5d97 100644
--- a/src/initialization/MOM_tracer_initialization_from_Z.F90
+++ b/src/initialization/MOM_tracer_initialization_from_Z.F90
@@ -3,20 +3,21 @@ module MOM_tracer_initialization_from_Z
 
 ! This file is part of MOM6. See LICENSE.md for the license.
 
-use MOM_debugging, only : hchksum
-use MOM_cpu_clock, only : cpu_clock_id, cpu_clock_begin, cpu_clock_end
-use MOM_cpu_clock, only : CLOCK_ROUTINE, CLOCK_LOOP
-use MOM_domains, only : pass_var
+use MOM_debugging,     only : hchksum
+use MOM_cpu_clock,     only : cpu_clock_id, cpu_clock_begin, cpu_clock_end
+use MOM_cpu_clock,     only : CLOCK_ROUTINE, CLOCK_LOOP
+use MOM_domains,       only : pass_var
 use MOM_error_handler, only : MOM_mesg, MOM_error, FATAL, WARNING
 use MOM_error_handler, only : callTree_enter, callTree_leave, callTree_waypoint
-use MOM_file_parser, only : get_param, param_file_type, log_version
-use MOM_grid, only : ocean_grid_type
+use MOM_file_parser,   only : get_param, param_file_type, log_version
+use MOM_grid,          only : ocean_grid_type
 use MOM_horizontal_regridding, only : myStats, horiz_interp_and_extrap_tracer
 use MOM_interface_heights, only : dz_to_thickness_simple
-use MOM_remapping, only : remapping_CS, initialize_remapping
-use MOM_unit_scaling, only : unit_scale_type
-use MOM_verticalGrid, only : verticalGrid_type
-use MOM_ALE, only : ALE_remap_scalar
+use MOM_regridding,    only : set_dz_neglect
+use MOM_remapping,     only : remapping_CS, initialize_remapping
+use MOM_unit_scaling,  only : unit_scale_type
+use MOM_verticalGrid,  only : verticalGrid_type
+use MOM_ALE,           only : ALE_remap_scalar
 
 implicit none ; private
 
@@ -36,12 +37,13 @@ module MOM_tracer_initialization_from_Z
 !> Initializes a tracer from a z-space data file, including any lateral regridding that is needed.
 subroutine MOM_initialize_tracer_from_Z(h, tr, G, GV, US, PF, src_file, src_var_nam, &
                           src_var_unit_conversion, src_var_record, homogenize, &
-                          useALEremapping, remappingScheme, src_var_gridspec )
+                          useALEremapping, remappingScheme, src_var_gridspec, h_in_Z_units )
   type(ocean_grid_type),      intent(inout) :: G   !< Ocean grid structure.
   type(verticalGrid_type),    intent(in)    :: GV  !< Ocean vertical grid structure.
   type(unit_scale_type),      intent(in)    :: US  !< A dimensional unit scaling type
   real, dimension(SZI_(G),SZJ_(G),SZK_(GV)), &
-                              intent(in)    :: h   !< Layer thickness [H ~> m or kg m-2].
+                              intent(in)    :: h   !< Layer thicknesses, in [H ~> m or kg m-2] or
+                                                   !! [Z ~> m] depending on the value of h_in_Z_units.
   real, dimension(:,:,:),     pointer       :: tr  !< Pointer to array to be initialized [CU ~> conc]
   type(param_file_type),      intent(in)    :: PF  !< parameter file
   character(len=*),           intent(in)    :: src_file !< source filename
@@ -54,12 +56,18 @@ subroutine MOM_initialize_tracer_from_Z(h, tr, G, GV, US, PF, src_file, src_var_
   character(len=*), optional, intent(in)    :: remappingScheme !< remapping scheme to use.
   character(len=*), optional, intent(in)    :: src_var_gridspec !< Source variable name in a gridspec file.
                                                                 !! This is not implemented yet.
+  logical,          optional, intent(in)    :: h_in_Z_units !< If present and true, the input grid
+                                                            !! thicknesses are in the units of height
+                                                            !! ([Z ~> m]) instead of the usual units of
+                                                            !! thicknesses ([H ~> m or kg m-2])
+
   ! Local variables
   real :: land_fill = 0.0  ! A value to use to replace missing values [CU ~> conc]
   real :: convert ! A conversion factor into the model's internal units [CU conc-1 ~> 1]
   integer            :: recnum
   character(len=64)  :: remapScheme
   logical            :: homog, useALE
+  logical            :: h_is_in_Z_units
 
   ! This include declares and sets the variable "version".
 # include "version_variable.h"
@@ -84,6 +92,10 @@ subroutine MOM_initialize_tracer_from_Z(h, tr, G, GV, US, PF, src_file, src_var_
   type(verticalGrid_type) :: GV_loc ! A temporary vertical grid structure
 
   real :: missing_value ! A value indicating that there is no valid input data at this point [CU ~> conc]
+  real :: dz_neglect              ! A negligibly small vertical layer extent used in
+                                  ! remapping cell reconstructions [Z ~> m]
+  real :: dz_neglect_edge         ! A negligibly small vertical layer extent used in
+                                  ! remapping edge value calculations [Z ~> m]
   integer :: nPoints    ! The number of valid input data points in a column
   integer :: id_clock_routine, id_clock_ALE
   integer :: default_answer_date  ! The default setting for the various ANSWER_DATE flags.
@@ -143,6 +155,8 @@ subroutine MOM_initialize_tracer_from_Z(h, tr, G, GV, US, PF, src_file, src_var_
   convert = 1.0
   if (PRESENT(src_var_unit_conversion)) convert = src_var_unit_conversion
 
+  h_is_in_Z_units = .false. ; if (present(h_in_Z_units)) h_is_in_Z_units = h_in_Z_units
+
   call horiz_interp_and_extrap_tracer(src_file, src_var_nam, recnum, &
             G, tr_z, mask_z, z_in, z_edges_in, missing_value, &
             scale=convert, homogenize=homog, m_to_Z=US%m_to_Z, answer_date=hor_regrid_answer_date)
@@ -185,12 +199,18 @@ subroutine MOM_initialize_tracer_from_Z(h, tr, G, GV, US, PF, src_file, src_var_
       dzSrc(i,j,:) = h1(:)
     enddo ; enddo
 
-    ! Equation of state data is not available, so a simpler rescaling will have to suffice,
-    ! but it might be problematic in non-Boussinesq mode.
-    GV_loc = GV ; GV_loc%ke = kd
-    call dz_to_thickness_simple(dzSrc, hSrc, G, GV_loc, US)
-
-    call ALE_remap_scalar(remapCS, G, GV, kd, hSrc, tr_z, h, tr, all_cells=.false., answer_date=remap_answer_date )
+    if (h_is_in_Z_units) then
+      dz_neglect = set_dz_neglect(GV, US, remap_answer_date, dz_neglect_edge)
+      call ALE_remap_scalar(remapCS, G, GV, kd, hSrc, tr_z, h, tr, all_cells=.false., answer_date=remap_answer_date, &
+                            H_neglect=dz_neglect, H_neglect_edge=dz_neglect_edge)
+    else
+      ! Equation of state data is not available, so a simpler rescaling will have to suffice,
+      ! but it might be problematic in non-Boussinesq mode.
+      GV_loc = GV ; GV_loc%ke = kd
+      call dz_to_thickness_simple(dzSrc, hSrc, G, GV_loc, US)
+
+      call ALE_remap_scalar(remapCS, G, GV, kd, hSrc, tr_z, h, tr, all_cells=.false., answer_date=remap_answer_date )
+    endif
 
     deallocate( hSrc )
     deallocate( dzSrc )

From e7a7a82ab33a339b124c0435a8005769417c321b Mon Sep 17 00:00:00 2001
From: Robert Hallberg <Robert.Hallberg@noaa.gov>
Date: Sat, 27 Jan 2024 13:10:18 -0500
Subject: [PATCH 09/10] (*)MOM_temp_salt_init_from_Z Z-unit tracer remap

  Revise MOM_temp_salt_initialize_from_Z in cases when Z_INIT_REMAP_GENERAL is
False to call ALE_remap_scalar with vertical layer extents (in Z units) rather
than layer thicknesses (in H units).  When in fully non-Boussinesq mode, this
same routine uses dz_to_thickness (using the full equation of state) rather than
dz_to_thickness_simple to initialize the layer thicknesses.  Boussinesq answers
are bitwise identical, but answers can change in some fully non-Boussinesq
cases.
---
 .../MOM_state_initialization.F90              | 36 ++++++++++++++-----
 1 file changed, 27 insertions(+), 9 deletions(-)

diff --git a/src/initialization/MOM_state_initialization.F90 b/src/initialization/MOM_state_initialization.F90
index 7dfced262b..855a6f2aa0 100644
--- a/src/initialization/MOM_state_initialization.F90
+++ b/src/initialization/MOM_state_initialization.F90
@@ -92,6 +92,7 @@ module MOM_state_initialization
 use MOM_ALE, only : ALE_remap_scalar, ALE_regrid_accelerated, TS_PLM_edge_values
 use MOM_regridding, only : regridding_CS, set_regrid_params, getCoordinateResolution
 use MOM_regridding, only : regridding_main, regridding_preadjust_reqs, convective_adjustment
+use MOM_regridding, only : set_dz_neglect
 use MOM_remapping, only : remapping_CS, initialize_remapping, remapping_core_h
 use MOM_horizontal_regridding, only : horiz_interp_and_extrap_tracer, homogenize_field
 use MOM_oda_incupd, only: oda_incupd_CS, initialize_oda_incupd_fixed, initialize_oda_incupd
@@ -2483,6 +2484,10 @@ subroutine MOM_temp_salt_initialize_from_Z(h, tv, depth_tot, G, GV, US, PF, just
   real, dimension(:,:,:), allocatable :: h1  ! Thicknesses on the input grid [H ~> m or kg m-2].
   real, dimension(:,:,:), allocatable :: dz_interface ! Change in position of interface due to
                                     ! regridding [H ~> m or kg m-2]
+  real :: dz_neglect                ! A negligibly small vertical layer extent used in
+                                    ! remapping cell reconstructions [Z ~> m]
+  real :: dz_neglect_edge           ! A negligibly small vertical layer extent used in
+                                    ! remapping edge value calculations [Z ~> m]
   real :: zTopOfCell, zBottomOfCell ! Heights in Z units [Z ~> m].
   type(regridding_CS) :: regridCS ! Regridding parameters and work arrays
   type(remapping_CS) :: remapCS ! Remapping parameters and work arrays
@@ -2768,6 +2773,11 @@ subroutine MOM_temp_salt_initialize_from_Z(h, tv, depth_tot, G, GV, US, PF, just
                             frac_shelf_h=frac_shelf_h )
 
       deallocate( dz_interface )
+
+      call ALE_remap_scalar(remapCS, G, GV, nkd, h1, tmpT1dIn, h, tv%T, all_cells=remap_full_column, &
+                            old_remap=remap_old_alg, answer_date=remap_answer_date )
+      call ALE_remap_scalar(remapCS, G, GV, nkd, h1, tmpS1dIn, h, tv%S, all_cells=remap_full_column, &
+                            old_remap=remap_old_alg, answer_date=remap_answer_date )
     else
       ! This is the old way of initializing to z* coordinates only
       allocate( hTarget(nz) )
@@ -2788,16 +2798,24 @@ subroutine MOM_temp_salt_initialize_from_Z(h, tv, depth_tot, G, GV, US, PF, just
       enddo ; enddo
       deallocate( hTarget )
 
-      ! This is a simple conversion of the target grid to thickness units that may not be
-      ! appropriate in non-Boussinesq mode.
-      call dz_to_thickness_simple(dz, h, G, GV, US)
+      dz_neglect = set_dz_neglect(GV, US, remap_answer_date, dz_neglect_edge)
+      call ALE_remap_scalar(remapCS, G, GV, nkd, dz1, tmpT1dIn, dz, tv%T, all_cells=remap_full_column, &
+                            old_remap=remap_old_alg, answer_date=remap_answer_date, &
+                            H_neglect=dz_neglect, H_neglect_edge=dz_neglect_edge)
+      call ALE_remap_scalar(remapCS, G, GV, nkd, dz1, tmpS1dIn, dz, tv%S, all_cells=remap_full_column, &
+                            old_remap=remap_old_alg, answer_date=remap_answer_date, &
+                            H_neglect=dz_neglect, H_neglect_edge=dz_neglect_edge)
+
+      if (GV%Boussinesq .or. GV%semi_Boussinesq) then
+        ! This is a simple conversion of the target grid to thickness units that is not
+        ! appropriate in non-Boussinesq mode.
+        call dz_to_thickness_simple(dz, h, G, GV, US)
+      else
+        ! Convert dz into thicknesses in units of H using the equation of state as appropriate.
+        call dz_to_thickness(dz, tv, h, G, GV, US)
+      endif
     endif
 
-    call ALE_remap_scalar(remapCS, G, GV, nkd, h1, tmpT1dIn, h, tv%T, all_cells=remap_full_column, &
-                          old_remap=remap_old_alg, answer_date=remap_answer_date )
-    call ALE_remap_scalar(remapCS, G, GV, nkd, h1, tmpS1dIn, h, tv%S, all_cells=remap_full_column, &
-                          old_remap=remap_old_alg, answer_date=remap_answer_date )
-
     deallocate( dz1 )
     deallocate( h1 )
     deallocate( tmpT1dIn )
@@ -2879,7 +2897,7 @@ subroutine MOM_temp_salt_initialize_from_Z(h, tv, depth_tot, G, GV, US, PF, just
                                  ks, G, GV, US, PF, just_read)
     endif
 
-    ! Now convert thicknesses to units of H.
+    ! Now convert dz into thicknesses in units of H.
     call dz_to_thickness(dz, tv, h, G, GV, US)
 
   endif ! useALEremapping

From 9a6ddee4787192b41da90ff803ae29d54e577d54 Mon Sep 17 00:00:00 2001
From: Robert Hallberg <Robert.Hallberg@noaa.gov>
Date: Sun, 28 Jan 2024 10:16:54 -0500
Subject: [PATCH 10/10] *+non-Boussinesq revisions to MOM_generic_tracer

  Revised initialize_MOM_generic_tracer to use thickness_to_dz to get the layer
vertical extents and then provide these to MOM_initialize_tracer_from_Z to read
in initial generic tracer concentrations from Z-space files.  The previous
approach inappropriately used an simple multiplicative rescaling (via a call to
dz_to_thickness_simple in MOM_initialize_tracer_from_Z) by a factor that
includes the Boussinesq reference density when in non-Boussinesq mode.  A new
thermo_vars_type arguments was added to initialize_MOM_generic_tracer to allow
for this change.

  Also revised MOM_generic_tracer_column_physics to use thickness_to_dz instead
of a simple multiplicative rescaling to get the layer vertical extents (in m)
that are used in calls to generic_tracer_source.  The multiplicative factor that
was used previously (GV%H_to_m) includes the Boussinesq reference density and
hence is inappropriate in non-Boussinesq mode; using thickness_to_dz avoids
this.

  Also added comments documenting the meaning and units of about 30 real
variables in the MOM_generic_tracer routines.

  There is a new mandatory argument to initialize_MOM_generic_tracer.  All
Boussinseq mode answers are bitwise identical, but in non-Boussinesq mode mode
generic tracer answers are changed by avoiding the use of the Boussinesq
reference density in several places.
---
 src/tracer/MOM_generic_tracer.F90      | 144 ++++++++++++++-----------
 src/tracer/MOM_tracer_flow_control.F90 |   2 +-
 2 files changed, 84 insertions(+), 62 deletions(-)

diff --git a/src/tracer/MOM_generic_tracer.F90 b/src/tracer/MOM_generic_tracer.F90
index 131110e6b2..7f550d8de5 100644
--- a/src/tracer/MOM_generic_tracer.F90
+++ b/src/tracer/MOM_generic_tracer.F90
@@ -38,6 +38,7 @@ module MOM_generic_tracer
   use MOM_forcing_type, only : forcing, optics_type
   use MOM_grid, only : ocean_grid_type
   use MOM_hor_index, only : hor_index_type
+  use MOM_interface_heights, only : thickness_to_dz
   use MOM_io, only : file_exists, MOM_read_data, slasher
   use MOM_open_boundary, only : ocean_OBC_type
   use MOM_open_boundary, only : register_obgc_segments, fill_obgc_segments
@@ -75,8 +76,10 @@ module MOM_generic_tracer
     character(len = 200) :: IC_file !< The file in which the generic tracer initial values can
                                     !! be found, or an empty string for internal initialization.
     logical :: Z_IC_file !< If true, the generic_tracer IC_file is in Z-space.  The default is false.
-    real :: tracer_IC_val = 0.0    !< The initial value assigned to tracers.
-    real :: tracer_land_val = -1.0 !< The values of tracers used where  land is masked out.
+    real :: tracer_IC_val = 0.0    !< The initial value assigned to tracers, in
+                                   !! concentration units [conc]
+    real :: tracer_land_val = -1.0 !< The values of tracers used where land is masked out, in
+                                   !! concentration units [conc]
     logical :: tracers_may_reinit  !< If true, tracers may go through the
                                    !! initialization code if they are not found in the restart files.
 
@@ -102,6 +105,7 @@ function register_MOM_generic_tracer(HI, GV, param_file, CS, tr_Reg, restart_CS)
     type(tracer_registry_type), pointer      :: tr_Reg     !< Pointer to the control structure for the tracer
                                                            !! advection and diffusion module.
     type(MOM_restart_CS), target, intent(inout)  :: restart_CS !< MOM restart control struct
+
     ! Local variables
     logical :: register_MOM_generic_tracer
     logical :: obc_has
@@ -113,14 +117,17 @@ function register_MOM_generic_tracer(HI, GV, param_file, CS, tr_Reg, restart_CS)
     ! These can be overridden later in via the field manager?
 
     integer :: ntau, axes(3)
-    type(g_tracer_type), pointer      :: g_tracer,g_tracer_next
-    character(len=fm_string_len)      :: g_tracer_name,longname,units
-    character(len=fm_string_len)      :: obc_src_file_name,obc_src_field_name
-    real                              :: lfac_in,lfac_out
-    real, dimension(:,:,:,:), pointer   :: tr_field
-    real, dimension(:,:,:), pointer     :: tr_ptr
-    real, dimension(HI%isd:HI%ied, HI%jsd:HI%jed,GV%ke)         :: grid_tmask
-    integer, dimension(HI%isd:HI%ied, HI%jsd:HI%jed)           :: grid_kmt
+    type(g_tracer_type), pointer      :: g_tracer, g_tracer_next
+    character(len=fm_string_len)      :: g_tracer_name, longname,units
+    character(len=fm_string_len)      :: obc_src_file_name, obc_src_field_name
+    real :: lfac_in   ! Multiplicative factor used in setting the tracer-specific inverse length
+                      ! scales associated with inflowing tracer reservoirs at OBCs [nondim]
+    real :: lfac_out  ! Multiplicative factor used in setting the tracer-specific inverse length
+                      ! scales associated with outflowing tracer reservoirs at OBCs [nondim]
+    real, dimension(:,:,:,:), pointer   :: tr_field ! A pointer to a generic tracer field, in concentration units [conc]
+    real, dimension(:,:,:), pointer     :: tr_ptr   ! A pointer to a generic tracer field, in concentration units [conc]
+    real,    dimension(SZI_(HI),SZJ_(HI),SZK_(GV)) :: grid_tmask ! A 3-d copy of G%mask2dT [nondim]
+    integer, dimension(SZI_(HI),SZJ_(HI))          :: grid_kmt   ! A 2-d array of nk
 
     register_MOM_generic_tracer = .false.
     if (associated(CS)) then
@@ -141,7 +148,7 @@ function register_MOM_generic_tracer(HI, GV, param_file, CS, tr_Reg, restart_CS)
   ! Read all relevant parameters and write them to the model log.
     call log_version(param_file, sub_name, version, "")
     call get_param(param_file, sub_name, "GENERIC_TRACER_IC_FILE", CS%IC_file, &
-                 "The file in which the generic trcer initial values can "//&
+                 "The file in which the generic tracer initial values can "//&
                  "be found, or an empty string for internal initialization.", &
                  default=" ")
     if ((len_trim(CS%IC_file) > 0) .and. (scan(CS%IC_file,'/') == 0)) then
@@ -169,7 +176,7 @@ function register_MOM_generic_tracer(HI, GV, param_file, CS, tr_Reg, restart_CS)
 
     !Fields cannot be diag registered as they are allocated and have to registered later.
     grid_tmask(:,:,:) = 0.0
-    grid_kmt(:,:) = 0.0
+    grid_kmt(:,:) = 0
     axes(:) = -1
 
     !
@@ -222,23 +229,26 @@ end function register_MOM_generic_tracer
 
   !> Register OBC segments for generic tracers
   subroutine register_MOM_generic_tracer_segments(CS, GV, OBC, tr_Reg, param_file)
-    type(MOM_generic_tracer_CS),           pointer    :: CS      !< Pointer to the control structure for this module.
-    type(verticalGrid_type),    intent(in)   :: GV         !< The ocean's vertical grid structure
-    type(ocean_OBC_type),       pointer      :: OBC        !< This open boundary condition type specifies whether,
-                                                           !! where, and what open boundary conditions are used.
-    type(tracer_registry_type), pointer      :: tr_Reg     !< Pointer to the control structure for the tracer
-                                                           !! advection and diffusion module.
-    type(param_file_type),      intent(in)   :: param_file !< A structure to parse for run-time parameters
+    type(MOM_generic_tracer_CS), pointer    :: CS         !< Pointer to the control structure for this module.
+    type(verticalGrid_type),     intent(in) :: GV         !< The ocean's vertical grid structure
+    type(ocean_OBC_type),        pointer    :: OBC        !< This open boundary condition type specifies whether,
+                                                          !! where, and what open boundary conditions are used.
+    type(tracer_registry_type),  pointer    :: tr_Reg     !< Pointer to the control structure for the tracer
+                                                          !! advection and diffusion module.
+    type(param_file_type),       intent(in) :: param_file !< A structure to parse for run-time parameters
+
     ! Local variables
     logical :: obc_has
     ! This include declares and sets the variable "version".
 #   include "version_variable.h"
-
     character(len=128), parameter :: sub_name = 'register_MOM_generic_tracer_segments'
     type(g_tracer_type), pointer      :: g_tracer,g_tracer_next
     character(len=fm_string_len)      :: g_tracer_name
-    character(len=fm_string_len)      :: obc_src_file_name,obc_src_field_name
-    real                              :: lfac_in,lfac_out
+    character(len=fm_string_len)      :: obc_src_file_name, obc_src_field_name
+    real :: lfac_in   ! Multiplicative factor used in setting the tracer-specific inverse length
+                      ! scales associated with inflowing tracer reservoirs at OBCs [nondim]
+    real :: lfac_out  ! Multiplicative factor used in setting the tracer-specific inverse length
+                      ! scales associated with outflowing tracer reservoirs at OBCs [nondim]
 
     if (.NOT. associated(OBC)) return
     !Get the tracer list
@@ -266,6 +276,7 @@ subroutine register_MOM_generic_tracer_segments(CS, GV, OBC, tr_Reg, param_file)
     enddo
 
   end subroutine register_MOM_generic_tracer_segments
+
   !>  Initialize phase II:  Initialize required variables for generic tracers
   !!  There are some steps of initialization that cannot be done in register_MOM_generic_tracer
   !!  This is the place and time to do them:
@@ -275,15 +286,17 @@ end subroutine register_MOM_generic_tracer_segments
   !!
   !!   This subroutine initializes the NTR tracer fields in tr(:,:,:,:)
   !! and it sets up the tracer output.
-  subroutine initialize_MOM_generic_tracer(restart, day, G, GV, US, h, param_file, diag, OBC, CS, &
-                                          sponge_CSp, ALE_sponge_CSp)
+  subroutine initialize_MOM_generic_tracer(restart, day, G, GV, US, h, tv, param_file, diag, OBC, &
+                                           CS, sponge_CSp, ALE_sponge_CSp)
     logical,                               intent(in) :: restart !< .true. if the fields have already been
                                                                  !! read from a restart file.
     type(time_type), target,               intent(in) :: day     !< Time of the start of the run.
     type(ocean_grid_type),                 intent(inout) :: G    !< The ocean's grid structure
     type(verticalGrid_type),               intent(in)    :: GV   !< The ocean's vertical grid structure
     type(unit_scale_type),                 intent(in)    :: US   !< A dimensional unit scaling type
-    real, dimension(SZI_(G),SZJ_(G),SZK_(GV)), intent(in) :: h    !< Layer thicknesses [H ~> m or kg m-2]
+    real, dimension(SZI_(G),SZJ_(G),SZK_(GV)), intent(in) :: h   !< Layer thicknesses [H ~> m or kg m-2]
+    type(thermo_var_ptrs),                 intent(in) :: tv      !< A structure pointing to various thermodynamic
+                                                                 !! variables
     type(param_file_type),                 intent(in) :: param_file !< A structure to parse for run-time parameters
     type(diag_ctrl),               target, intent(in) :: diag    !< Regulates diagnostic output.
     type(ocean_OBC_type),                  pointer    :: OBC     !< This open boundary condition type specifies whether,
@@ -298,10 +311,11 @@ subroutine initialize_MOM_generic_tracer(restart, day, G, GV, US, h, param_file,
     integer :: i, j, k, isc, iec, jsc, jec, nk
     type(g_tracer_type), pointer    :: g_tracer,g_tracer_next
     character(len=fm_string_len)      :: g_tracer_name
-    real, dimension(:,:,:,:), pointer   :: tr_field
-    real, dimension(:,:,:), pointer     :: tr_ptr
-    real,    dimension(G%isd:G%ied, G%jsd:G%jed, 1:GV%ke) :: grid_tmask
-    integer, dimension(G%isd:G%ied, G%jsd:G%jed)          :: grid_kmt
+    real, dimension(:,:,:,:), pointer   :: tr_field ! A pointer to a generic tracer field, in concentration units [conc]
+    real, dimension(:,:,:), pointer     :: tr_ptr   ! A pointer to a generic tracer field, in concentration units [conc]
+    real, dimension(SZI_(G),SZJ_(G),SZK_(GV)) :: dz ! Layer vertical extent [Z ~> m]
+    real,    dimension(SZI_(G),SZJ_(G),SZK_(GV)) :: grid_tmask ! A 3-d copy of G%mask2dT [nondim]
+    integer, dimension(SZI_(G),SZJ_(G))          :: grid_kmt   ! A 2-d array of nk
 
     !! 2010/02/04  Add code to re-initialize Generic Tracers if needed during a model simulation
     !! By default, restart cpio should not contain a Generic Tracer IC file and step below will be skipped.
@@ -316,6 +330,8 @@ subroutine initialize_MOM_generic_tracer(restart, day, G, GV, US, h, param_file,
     !For each tracer name get its  fields
     g_tracer=>CS%g_tracer_list
 
+    call thickness_to_dz(h, tv, dz, G, GV, US)
+
     do
       if (INDEX(CS%IC_file, '_NULL_') /= 0) then
         call MOM_error(WARNING, "The name of the IC_file "//trim(CS%IC_file)//&
@@ -335,12 +351,11 @@ subroutine initialize_MOM_generic_tracer(restart, day, G, GV, US, h, param_file,
                               "initializing generic tracer "//trim(g_tracer_name)//&
                               " using MOM_initialize_tracer_from_Z ")
 
-          call MOM_initialize_tracer_from_Z(h, tr_ptr, G, GV, US, param_file,               &
-                                 src_file = g_tracer%src_file,                              &
-                                 src_var_nam = g_tracer%src_var_name,                       &
-                                 src_var_unit_conversion = g_tracer%src_var_unit_conversion,&
-                                 src_var_record = g_tracer%src_var_record,                  &
-                                 src_var_gridspec = g_tracer%src_var_gridspec               )
+          call MOM_initialize_tracer_from_Z(dz, tr_ptr, G, GV, US, param_file, &
+                                 src_file=g_tracer%src_file, src_var_nam=g_tracer%src_var_name, &
+                                 src_var_unit_conversion=g_tracer%src_var_unit_conversion, &
+                                 src_var_record=g_tracer%src_var_record, src_var_gridspec=g_tracer%src_var_gridspec, &
+                                 h_in_Z_units=.true.)
 
           !Check/apply the bounds for each g_tracer
           do k=1,nk ; do j=jsc,jec ; do i=isc,iec
@@ -466,8 +481,9 @@ subroutine MOM_generic_tracer_column_physics(h_old, h_new, ea, eb, fluxes, Hml,
     type(MOM_generic_tracer_CS), pointer :: CS   !< Pointer to the control structure for this module.
     type(thermo_var_ptrs),   intent(in) :: tv    !< A structure pointing to various thermodynamic variables
     type(optics_type),       intent(in) :: optics !< The structure containing optical properties.
-    real,          optional, intent(in) :: evap_CFL_limit !< Limits how much water can be fluxed out of
-                                                 !! the top layer Stored previously in diabatic CS.
+    real,          optional, intent(in) :: evap_CFL_limit !< Limit on the fraction of the water that can
+                                                 !! be fluxed out of the top layer in a timestep [nondim]
+                                                 !   Stored previously in diabatic CS.
     real,          optional, intent(in) :: minimum_forcing_depth !< The smallest depth over which fluxes
                                                  !!  can be applied [H ~> m or kg m-2]
                                                  !   Stored previously in diabatic CS.
@@ -479,14 +495,17 @@ subroutine MOM_generic_tracer_column_physics(h_old, h_new, ea, eb, fluxes, Hml,
 
     type(g_tracer_type), pointer  :: g_tracer, g_tracer_next
     character(len=fm_string_len)  :: g_tracer_name
-    real, dimension(:,:), pointer :: stf_array,trunoff_array,runoff_tracer_flux_array
+    real, dimension(:,:), pointer :: stf_array   ! The surface flux of the tracer [conc kg m-2 s-1]
+    real, dimension(:,:), pointer :: trunoff_array  ! The tracer concentration in the river runoff [conc]
+    real, dimension(:,:), pointer :: runoff_tracer_flux_array ! The runoff tracer flux [conc kg m-2 s-1]
 
-    real :: surface_field(SZI_(G),SZJ_(G))
+    real :: surface_field(SZI_(G),SZJ_(G))  ! The surface value of some field, here only used for salinity [S ~> ppt]
     real :: dz_ml(SZI_(G),SZJ_(G))  ! The mixed layer depth in the MKS units used for generic tracers [m]
-    real :: sosga
+    real :: sosga ! The global mean surface salinity [ppt]
 
-    real, dimension(G%isd:G%ied,G%jsd:G%jed,GV%ke) :: rho_dzt, dzt
-    real, dimension(SZI_(G),SZJ_(G),SZK_(GV))      :: h_work
+    real, dimension(SZI_(G),SZJ_(G),SZK_(GV)) :: rho_dzt ! Layer mass per unit area [kg m-2]
+    real, dimension(SZI_(G),SZJ_(G),SZK_(GV)) :: dzt     ! Layer vertical extents [m]
+    real, dimension(SZI_(G),SZJ_(G),SZK_(GV)) :: h_work  ! A work array of thicknesses [H ~> m or kg m-2]
     integer :: i, j, k, isc, iec, jsc, jec, nk
 
     isc = G%isc ; iec = G%iec ; jsc = G%jsc ; jec = G%jec ; nk = GV%ke
@@ -536,14 +555,15 @@ subroutine MOM_generic_tracer_column_physics(h_old, h_new, ea, eb, fluxes, Hml,
     !
 
     rho_dzt(:,:,:) = GV%H_to_kg_m2 * GV%Angstrom_H
-    do k = 1, nk ; do j = jsc, jec ; do i = isc, iec  !{
+    do k=1,nk ; do j=jsc,jec ; do i=isc,iec
       rho_dzt(i,j,k) = GV%H_to_kg_m2 * h_old(i,j,k)
-    enddo ; enddo ; enddo !}
+    enddo ; enddo ; enddo
 
     dzt(:,:,:) = 1.0
-    do k = 1, nk ; do j = jsc, jec ; do i = isc, iec  !{
-      dzt(i,j,k) = GV%H_to_m * h_old(i,j,k)
-    enddo ; enddo ; enddo !}
+    call thickness_to_dz(h_old, tv, dzt, G, GV, US)
+    do k=1,nk ; do j=jsc,jec ; do i=isc,iec
+      dzt(i,j,k) = US%Z_to_m * dzt(i,j,k)
+    enddo ; enddo ; enddo
     dz_ml(:,:) = 0.0
     do j=jsc,jec ; do i=isc,iec
       surface_field(i,j) = tv%S(i,j,1)
@@ -639,8 +659,8 @@ function MOM_generic_tracer_stock(h, stocks, G, GV, CS, names, units, stock_inde
 
     ! Local variables
     type(g_tracer_type), pointer  :: g_tracer, g_tracer_next
-    real, dimension(:,:,:,:), pointer   :: tr_field
-    real, dimension(:,:,:), pointer     :: tr_ptr
+    real, dimension(:,:,:,:), pointer   :: tr_field ! A pointer to a generic tracer field, in concentration units [conc]
+    real, dimension(:,:,:), pointer     :: tr_ptr   ! A pointer to a generic tracer field, in concentration units [conc]
     character(len=128), parameter :: sub_name = 'MOM_generic_tracer_stock'
 
     integer :: m
@@ -802,7 +822,7 @@ subroutine array_global_min_max(tr_array, tmask, isd, jsd, isc, iec, jsc, jec, n
     real    :: tmax, tmin   ! Maximum and minimum tracer values, in the same units as tr_array
     real    :: tmax0, tmin0 ! First-guest values of tmax and tmin.
     integer :: itmax, jtmax, ktmax, itmin, jtmin, ktmin
-    real    :: fudge ! A factor that is close to 1 that is used to find the location of the extrema.
+    real    :: fudge ! A factor that is close to 1 that is used to find the location of the extrema [nondim].
 
      ! arrays to enable vectorization
     integer :: iminarr(3), imaxarr(3)
@@ -853,7 +873,7 @@ subroutine array_global_min_max(tr_array, tmask, isd, jsd, isc, iec, jsc, jec, n
 
     ! Now find the location of the global extrema.
     !
-    ! Note that the fudge factor above guarantees that the location of max (min) is uinque,
+    ! Note that the fudge factor above guarantees that the location of max (min) is unique,
     ! since tmax0 (tmin0) has slightly different values on each processor.
     ! Otherwise, the function tr_array(i,j,k) could be equal to global max (min) at more
     ! than one point in space and this would be a much more difficult problem to solve.
@@ -899,16 +919,16 @@ subroutine MOM_generic_tracer_surface_state(sfc_state, h, G, GV, CS)
     real, dimension(SZI_(G),SZJ_(G),SZK_(GV)), intent(in) :: h    !< Layer thicknesses [H ~> m or kg m-2]
     type(MOM_generic_tracer_CS),           pointer       :: CS   !< Pointer to the control structure for this module.
 
-! Local variables
-    real :: sosga
+    ! Local variables
+    real :: sosga ! The global mean surface salinity [ppt]
+    real, dimension(SZI_(G),SZJ_(G),SZK_(GV),1) :: rho0 ! An unused array of densities [kg m-3]
+    real, dimension(SZI_(G),SZJ_(G),SZK_(GV))   :: dzt  ! Layer vertical extents [m]
 
     character(len=128), parameter :: sub_name = 'MOM_generic_tracer_surface_state'
-    real, dimension(G%isd:G%ied,G%jsd:G%jed,1:GV%ke,1) :: rho0
-    real, dimension(G%isd:G%ied,G%jsd:G%jed,1:GV%ke) ::  dzt
 
     !Set coupler values
     !nnz: fake rho0
-    rho0=1.0
+    rho0(:,:,:,:) = 1.0
 
     dzt(:,:,:) = GV%H_to_m * h(:,:,:)
 
@@ -937,7 +957,7 @@ subroutine MOM_generic_tracer_surface_state(sfc_state, h, G, GV, CS)
     !Niki: The problem with calling diagnostic outputs here is that this subroutine is called every dt_cpld
     !      hence if dt_therm > dt_cpld we get output (and contribution to the mean) at times that tracers
     !      had not been updated.
-    !      Moving this to the end of column physics subrotuine fixes this issue.
+    !      Moving this to the end of column physics subroutine fixes this issue.
 
   end subroutine MOM_generic_tracer_surface_state
 
@@ -976,7 +996,7 @@ end subroutine MOM_generic_flux_init
   subroutine MOM_generic_tracer_fluxes_accumulate(flux_tmp, weight)
     type(forcing), intent(in)    :: flux_tmp  !< A structure containing pointers to
                                               !! thermodynamic and tracer forcing fields.
-    real,          intent(in)    :: weight    !< A weight for accumulating this flux
+    real,          intent(in)    :: weight    !< A weight for accumulating this flux [nondim]
 
     call generic_tracer_coupler_accumulate(flux_tmp%tr_fluxes, weight)
 
@@ -986,10 +1006,12 @@ end subroutine MOM_generic_tracer_fluxes_accumulate
   subroutine MOM_generic_tracer_get(name,member,array, CS)
     character(len=*),         intent(in)  :: name   !< Name of requested tracer.
     character(len=*),         intent(in)  :: member !< The tracer element to return.
-    real, dimension(:,:,:),   intent(out) :: array  !< Array filled by this routine.
-    type(MOM_generic_tracer_CS), pointer :: CS   !< Pointer to the control structure for this module.
+    real, dimension(:,:,:),   intent(out) :: array  !< Array filled by this routine, in arbitrary units [A]
+    type(MOM_generic_tracer_CS), pointer  :: CS     !< Pointer to the control structure for this module.
 
-    real, dimension(:,:,:),   pointer :: array_ptr
+    ! Local variables
+    real, dimension(:,:,:),   pointer :: array_ptr  ! The tracer in the generic tracer structures, in
+                                                    ! arbitrary units [A]
     character(len=128), parameter :: sub_name = 'MOM_generic_tracer_get'
 
     call g_tracer_get_pointer(CS%g_tracer_list,name,member,array_ptr)
diff --git a/src/tracer/MOM_tracer_flow_control.F90 b/src/tracer/MOM_tracer_flow_control.F90
index c8ce2f5f75..6d035e1d27 100644
--- a/src/tracer/MOM_tracer_flow_control.F90
+++ b/src/tracer/MOM_tracer_flow_control.F90
@@ -340,7 +340,7 @@ subroutine tracer_flow_control_init(restart, day, G, GV, US, h, param_file, diag
     call initialize_CFC_cap(restart, day, G, GV, US, h, diag, OBC, CS%CFC_cap_CSp)
 
   if (CS%use_MOM_generic_tracer) &
-    call initialize_MOM_generic_tracer(restart, day, G, GV, US, h, param_file, diag, OBC, &
+    call initialize_MOM_generic_tracer(restart, day, G, GV, US, h, tv, param_file, diag, OBC, &
                                 CS%MOM_generic_tracer_CSp, sponge_CSp, ALE_sponge_CSp)
   if (CS%use_pseudo_salt_tracer) &
     call initialize_pseudo_salt_tracer(restart, day, G, GV, US, h, diag, OBC, CS%pseudo_salt_tracer_CSp, &