diff --git a/src/framework/MOM_intrinsic_functions.F90 b/src/framework/MOM_intrinsic_functions.F90 index 808bc408a8..4c249168fc 100644 --- a/src/framework/MOM_intrinsic_functions.F90 +++ b/src/framework/MOM_intrinsic_functions.F90 @@ -47,6 +47,7 @@ elemental function cuberoot(x) result(root) ! the cube root of asx in arbitrary units that can grow or shrink with each iteration [D] real, parameter :: den_min = 2.**(minexponent(1.) / 4 + 4) ! A value of den that triggers rescaling [C] real, parameter :: den_max = 2.**(maxexponent(1.) / 4 - 2) ! A value of den that triggers rescaling [C] + logical :: converged integer :: ex_3 ! One third of the exponent part of x, used to rescale x to get a. integer :: itt @@ -58,25 +59,51 @@ elemental function cuberoot(x) result(root) ! Here asx is in the range of 0.125 <= asx < 1.0 asx = scale(abs(x), -3*ex_3) - ! This first estimate is one iteration of Newton's method with a starting guess of 1. It is + ! Iteratively determine Root = asx**1/3 using Halley's method and then Newton's method, noting + ! that in this case Newton's method and Halley's menthod both converge monotonically from above + ! and need no bounding. + + ! This first estimate is one iteration of Halley's method with a starting guess of 1. It is ! always an over-estimate of the true solution, but it is a good approximation for asx near 1. - num = 2.0 + asx - den = 3.0 - ! Iteratively determine Root = asx**1/3 using Newton's method, noting that in this case Newton's - ! method converges monotonically from above and needs no bounding. For the range of asx from - ! 0.125 to 1.0 with the first guess used above, 6 iterations suffice to converge to roundoff. - do itt=1,9 - ! Newton's method iterates estimates as Root = Root - (Root**3 - asx) / (3.0 * Root**2), or - ! equivalently as Root = (2.0*Root**2 + asx) / (3.0 * Root**2). - ! Keeping the estimates in a fractional form Root = num / den allows this calculation with - ! fewer (or no) real divisions during the iterations before doing a single real division - ! at the end, and it is therefore more computationally efficient. + ! Keeping the estimates in a fractional form Root = num / den allows this calculation with + ! no real divisions during the iterations before doing a single real division at the end, + ! and it is therefore more computationally efficient. + num = 1.0 + 2.0*asx + den = 2.0 + asx + converged = .false. + + do itt=1,2 + ! Halley's method iterates estimates as Root = Root * (Root**3 + 2.*asx) / (2.*Root**3 + asx). + num_prev = num ; den_prev = den + num = num_prev * (num_prev**3 + 2.0 * asx * den_prev**3) + den = den_prev * (2.0 * num_prev**3 + asx * den_prev**3) + + if (num * den_prev == num_prev * den) then + converged = .true. + root_asx = num / den + exit + endif + enddo + + ! For the range of asx from 0.125 to 1.0 with the first guess of 1.0 and 3 iterations with + ! Halley's method, 2 more iterations with Newton's method suffice to converge within roundoff. + if (.not.converged) then ; do itt=1,4 + ! Newton's method iterates estimates as Root = Root - (Root**3 - asx) / (3.0 * Root**2), or + ! equivalently as Root = (2.0*Root**3 + asx) / (3.0 * Root**2). num_prev = num ; den_prev = den num = 2.0 * num_prev**3 + asx * den_prev**3 den = 3.0 * (den_prev * num_prev**2) - if ((num * den_prev == num_prev * den) .or. (itt == 9)) then + ! Because successive estimates of the numerator and denominator tend to be the cube of their + ! predecessors, the numerator and denominator need to be rescaled when they get too large or + ! small to avoid overflow or underflow in the convergence test below. + if ((den > den_max) .or. (den < den_min)) then + num = scale(num, -exponent(den)) + den = scale(den, -exponent(den)) + endif + + if ((num * den_prev == num_prev * den) .or. (num**3 == asx * den**3) .or. (itt == 4)) then ! If successive estimates of root are identical, this is a converged solution. root_asx = num / den exit @@ -99,15 +126,7 @@ elemental function cuberoot(x) result(root) exit endif - ! Because successive estimates of the numerator and denominator tend to be the cube of their - ! predecessors, the numerator and denominator need to be rescaled by division when they get - ! too large or small to avoid overflow or underflow in the convergence test below. - if ((den > den_max) .or. (den < den_min)) then - num = scale(num, -exponent(den)) - den = scale(den, -exponent(den)) - endif - - enddo + enddo ; endif root = sign(scale(root_asx, ex_3), x) endif