From 2c227c9be8979baee21557c28104b89abd41aa10 Mon Sep 17 00:00:00 2001 From: Robert Hallberg Date: Fri, 19 Jan 2024 16:35:43 -0500 Subject: [PATCH] *Use Halley method iterations in cuberoot function Modified the cuberoot function to do 3 iterations with Halley's method before switching to Newton's method as before, which saves about 2 iterations. Also added a check for a perfect solution as a stopping point during the Newton's method iterations, because this prevents about half the instances where we would compare successive solutions, avoiding some divisions and extra iterations. This changes answers at roundoff for code that uses the cuberoot function, so ideally this PR would be dealt with before the cuberoot becomes widely used. --- src/framework/MOM_intrinsic_functions.F90 | 63 +++++++++++++++-------- 1 file changed, 41 insertions(+), 22 deletions(-) diff --git a/src/framework/MOM_intrinsic_functions.F90 b/src/framework/MOM_intrinsic_functions.F90 index 808bc408a8..4c249168fc 100644 --- a/src/framework/MOM_intrinsic_functions.F90 +++ b/src/framework/MOM_intrinsic_functions.F90 @@ -47,6 +47,7 @@ elemental function cuberoot(x) result(root) ! the cube root of asx in arbitrary units that can grow or shrink with each iteration [D] real, parameter :: den_min = 2.**(minexponent(1.) / 4 + 4) ! A value of den that triggers rescaling [C] real, parameter :: den_max = 2.**(maxexponent(1.) / 4 - 2) ! A value of den that triggers rescaling [C] + logical :: converged integer :: ex_3 ! One third of the exponent part of x, used to rescale x to get a. integer :: itt @@ -58,25 +59,51 @@ elemental function cuberoot(x) result(root) ! Here asx is in the range of 0.125 <= asx < 1.0 asx = scale(abs(x), -3*ex_3) - ! This first estimate is one iteration of Newton's method with a starting guess of 1. It is + ! Iteratively determine Root = asx**1/3 using Halley's method and then Newton's method, noting + ! that in this case Newton's method and Halley's menthod both converge monotonically from above + ! and need no bounding. + + ! This first estimate is one iteration of Halley's method with a starting guess of 1. It is ! always an over-estimate of the true solution, but it is a good approximation for asx near 1. - num = 2.0 + asx - den = 3.0 - ! Iteratively determine Root = asx**1/3 using Newton's method, noting that in this case Newton's - ! method converges monotonically from above and needs no bounding. For the range of asx from - ! 0.125 to 1.0 with the first guess used above, 6 iterations suffice to converge to roundoff. - do itt=1,9 - ! Newton's method iterates estimates as Root = Root - (Root**3 - asx) / (3.0 * Root**2), or - ! equivalently as Root = (2.0*Root**2 + asx) / (3.0 * Root**2). - ! Keeping the estimates in a fractional form Root = num / den allows this calculation with - ! fewer (or no) real divisions during the iterations before doing a single real division - ! at the end, and it is therefore more computationally efficient. + ! Keeping the estimates in a fractional form Root = num / den allows this calculation with + ! no real divisions during the iterations before doing a single real division at the end, + ! and it is therefore more computationally efficient. + num = 1.0 + 2.0*asx + den = 2.0 + asx + converged = .false. + + do itt=1,2 + ! Halley's method iterates estimates as Root = Root * (Root**3 + 2.*asx) / (2.*Root**3 + asx). + num_prev = num ; den_prev = den + num = num_prev * (num_prev**3 + 2.0 * asx * den_prev**3) + den = den_prev * (2.0 * num_prev**3 + asx * den_prev**3) + + if (num * den_prev == num_prev * den) then + converged = .true. + root_asx = num / den + exit + endif + enddo + + ! For the range of asx from 0.125 to 1.0 with the first guess of 1.0 and 3 iterations with + ! Halley's method, 2 more iterations with Newton's method suffice to converge within roundoff. + if (.not.converged) then ; do itt=1,4 + ! Newton's method iterates estimates as Root = Root - (Root**3 - asx) / (3.0 * Root**2), or + ! equivalently as Root = (2.0*Root**3 + asx) / (3.0 * Root**2). num_prev = num ; den_prev = den num = 2.0 * num_prev**3 + asx * den_prev**3 den = 3.0 * (den_prev * num_prev**2) - if ((num * den_prev == num_prev * den) .or. (itt == 9)) then + ! Because successive estimates of the numerator and denominator tend to be the cube of their + ! predecessors, the numerator and denominator need to be rescaled when they get too large or + ! small to avoid overflow or underflow in the convergence test below. + if ((den > den_max) .or. (den < den_min)) then + num = scale(num, -exponent(den)) + den = scale(den, -exponent(den)) + endif + + if ((num * den_prev == num_prev * den) .or. (num**3 == asx * den**3) .or. (itt == 4)) then ! If successive estimates of root are identical, this is a converged solution. root_asx = num / den exit @@ -99,15 +126,7 @@ elemental function cuberoot(x) result(root) exit endif - ! Because successive estimates of the numerator and denominator tend to be the cube of their - ! predecessors, the numerator and denominator need to be rescaled by division when they get - ! too large or small to avoid overflow or underflow in the convergence test below. - if ((den > den_max) .or. (den < den_min)) then - num = scale(num, -exponent(den)) - den = scale(den, -exponent(den)) - endif - - enddo + enddo ; endif root = sign(scale(root_asx, ex_3), x) endif