*Use Halley method iterations in cuberoot function

Modified the cuberoot function to do 3 iterations with Halley's method before switching to Newton's method as before, which saves about 2 iterations. Also added a check for a perfect solution as a stopping point during the Newton's method iterations, because this prevents about half the instances where we would compare successive solutions, avoiding some divisions and extra iterations. This changes answers at roundoff for code that uses the cuberoot function, so ideally this PR would be dealt with before the cuberoot becomes widely used.
Hallberg-NOAA · Jan 19, 2024 · 2c227c9 · 2c227c9
1 parent d7d126a
commit 2c227c9
Showing 1 changed file with 41 additions and 22 deletions.
diff --git a/src/framework/MOM_intrinsic_functions.F90 b/src/framework/MOM_intrinsic_functions.F90
@@ -47,6 +47,7 @@ elemental function cuberoot(x) result(root)
               ! the cube root of asx in arbitrary units that can grow or shrink with each iteration [D]
   real, parameter :: den_min = 2.**(minexponent(1.) / 4 + 4)  ! A value of den that triggers rescaling [C]
   real, parameter :: den_max = 2.**(maxexponent(1.) / 4 - 2)  ! A value of den that triggers rescaling [C]
+  logical :: converged
   integer :: ex_3 ! One third of the exponent part of x, used to rescale x to get a.
   integer :: itt
 
@@ -58,25 +59,51 @@ elemental function cuberoot(x) result(root)
     ! Here asx is in the range of 0.125 <= asx < 1.0
     asx = scale(abs(x), -3*ex_3)
 
-    ! This first estimate is one iteration of Newton's method with a starting guess of 1.  It is
+    ! Iteratively determine Root = asx**1/3 using Halley's method and then Newton's method, noting
+    ! that in this case Newton's method and Halley's menthod both converge monotonically from above
+    ! and need no bounding.
+
+    !   This first estimate is one iteration of Halley's method with a starting guess of 1.  It is
     ! always an over-estimate of the true solution, but it is a good approximation for asx near 1.
-    num = 2.0 + asx
-    den = 3.0
-    ! Iteratively determine Root = asx**1/3 using Newton's method, noting that in this case Newton's
-    ! method converges monotonically from above and needs no bounding.  For the range of asx from
-    ! 0.125 to 1.0 with the first guess used above, 6 iterations suffice to converge to roundoff.
-    do itt=1,9
-      ! Newton's method iterates estimates as Root = Root - (Root**3 - asx) / (3.0 * Root**2), or
-      ! equivalently as Root = (2.0*Root**2 + asx) / (3.0 * Root**2).
-      ! Keeping the estimates in a fractional form Root = num / den allows this calculation with
-      ! fewer (or no) real divisions during the iterations before doing a single real division
-      ! at the end, and it is therefore more computationally efficient.
+    !   Keeping the estimates in a fractional form Root = num / den allows this calculation with
+    ! no real divisions during the iterations before doing a single real division at the end,
+    ! and it is therefore more computationally efficient.
+    num = 1.0 + 2.0*asx
+    den = 2.0 + asx
+    converged = .false.
+
+    do itt=1,2
+      ! Halley's method iterates estimates as Root = Root * (Root**3 + 2.*asx) / (2.*Root**3 + asx).
+      num_prev = num ; den_prev = den
+      num = num_prev * (num_prev**3 + 2.0 * asx * den_prev**3)
+      den = den_prev * (2.0 * num_prev**3 + asx * den_prev**3)
+
+      if (num * den_prev == num_prev * den) then
+        converged = .true.
+        root_asx = num / den
+        exit
+      endif
+    enddo
+
+    ! For the range of asx from 0.125 to 1.0 with the first guess of 1.0 and 3 iterations with
+    ! Halley's method, 2 more iterations with Newton's method suffice to converge within roundoff.
+    if (.not.converged) then ; do itt=1,4
 
+      ! Newton's method iterates estimates as Root = Root - (Root**3 - asx) / (3.0 * Root**2), or
+      ! equivalently as Root = (2.0*Root**3 + asx) / (3.0 * Root**2).
       num_prev = num ; den_prev = den
       num = 2.0 * num_prev**3 + asx * den_prev**3
       den = 3.0 * (den_prev * num_prev**2)
 
-      if ((num * den_prev == num_prev * den) .or. (itt == 9)) then
+      ! Because successive estimates of the numerator and denominator tend to be the cube of their
+      ! predecessors, the numerator and denominator need to be rescaled when they get too large or
+      ! small to avoid overflow or underflow in the convergence test below.
+      if ((den > den_max) .or. (den < den_min)) then
+        num = scale(num, -exponent(den))
+        den = scale(den, -exponent(den))
+      endif
+
+      if ((num * den_prev == num_prev * den) .or. (num**3 == asx * den**3) .or. (itt == 4)) then
         !   If successive estimates of root are identical, this is a converged solution.
         root_asx = num / den
         exit
@@ -99,15 +126,7 @@ elemental function cuberoot(x) result(root)
         exit
       endif
 
-      ! Because successive estimates of the numerator and denominator tend to be the cube of their
-      ! predecessors, the numerator and denominator need to be rescaled by division when they get
-      ! too large or small to avoid overflow or underflow in the convergence test below.
-      if ((den > den_max) .or. (den < den_min)) then
-        num = scale(num, -exponent(den))
-        den = scale(den, -exponent(den))
-      endif
-
-    enddo
+    enddo ; endif
 
     root = sign(scale(root_asx, ex_3), x)
   endif