diff --git a/src/framework/MOM_intrinsic_functions.F90 b/src/framework/MOM_intrinsic_functions.F90 index 4327cfa5a6..fbb1c28096 100644 --- a/src/framework/MOM_intrinsic_functions.F90 +++ b/src/framework/MOM_intrinsic_functions.F90 @@ -5,12 +5,26 @@ module MOM_intrinsic_functions ! This file is part of MOM6. See LICENSE.md for the license. use iso_fortran_env, only : stdout => output_unit, stderr => error_unit +use iso_fortran_env, only : int64, real64 implicit none ; private public :: invcosh, cuberoot public :: intrinsic_functions_unit_tests +! Floating point model, if bit layout from high to low is (sign, exp, frac) + +integer, parameter :: bias = maxexponent(1.) - 1 + !< The double precision exponent offset +integer, parameter :: signbit = storage_size(1.) - 1 + !< Position of sign bit +integer, parameter :: explen = 1 + ceiling(log(real(bias))/log(2.)) + !< Bit size of exponent +integer, parameter :: expbit = signbit - explen + !< Position of lowest exponent bit +integer, parameter :: fraclen = expbit + !< Length of fractional part + contains !> Evaluate the inverse cosh, either using a math library or an @@ -28,6 +42,7 @@ function invcosh(x) end function invcosh + !> Returns the cube root of a real argument at roundoff accuracy, in a form that works properly with !! rescaling of the argument by integer powers of 8. If the argument is a NaN, a NaN is returned. elemental function cuberoot(x) result(root) @@ -37,24 +52,28 @@ elemental function cuberoot(x) result(root) real :: asx ! The absolute value of x rescaled by an integer power of 8 to put it into ! the range from 0.125 < asx <= 1.0, in ambiguous units cubed [B3] real :: root_asx ! The cube root of asx [B] + real :: ra_3 ! root_asx cubed [B3] real :: num ! The numerator of an expression for the evolving estimate of the cube root of asx ! in arbitrary units that can grow or shrink with each iteration [B C] real :: den ! The denominator of an expression for the evolving estimate of the cube root of asx ! in arbitrary units that can grow or shrink with each iteration [C] real :: num_prev ! The numerator of an expression for the previous iteration of the evolving estimate ! of the cube root of asx in arbitrary units that can grow or shrink with each iteration [B D] + real :: np_3 ! num_prev cubed [B3 D3] real :: den_prev ! The denominator of an expression for the previous iteration of the evolving estimate of ! the cube root of asx in arbitrary units that can grow or shrink with each iteration [D] - integer :: ex_3 ! One third of the exponent part of x, used to rescale x to get a. + real :: dp_3 ! den_prev cubed [C3] + real :: r0 ! Initial value of the iterative solver. [B C] + real :: r0_3 ! r0 cubed [B3 C3] integer :: itt + integer(kind=int64) :: e_x, s_x + if ((x >= 0.0) .eqv. (x <= 0.0)) then ! Return 0 for an input of 0, or NaN for a NaN input. root = x else - ex_3 = ceiling(exponent(x) / 3.) - ! Here asx is in the range of 0.125 <= asx < 1.0 - asx = scale(abs(x), -3*ex_3) + call rescale_cbrt(x, asx, e_x, s_x) ! Iteratively determine root_asx = asx**1/3 using Halley's method and then Newton's method, ! noting that Halley's method onverges monotonically and needs no bounding. Halley's method is @@ -65,14 +84,21 @@ elemental function cuberoot(x) result(root) ! This first estimate gives the same magnitude of errors for 0.125 and 1.0 after two iterations. ! The first iteration is applied explicitly. - num = 0.707106 * (0.707106**3 + 2.0 * asx) - den = 2.0 * (0.707106**3) + asx + r0 = 0.707106 + r0_3 = r0 * r0 * r0 + num = r0 * (r0_3 + 2.0 * asx) + den = 2.0 * r0_3 + asx do itt=1,2 ! Halley's method iterates estimates as Root = Root * (Root**3 + 2.*asx) / (2.*Root**3 + asx). num_prev = num ; den_prev = den - num = num_prev * (num_prev**3 + 2.0 * asx * (den_prev**3)) - den = den_prev * (2.0 * num_prev**3 + asx * (den_prev**3)) + + ! Pre-compute these as integer powers, to avoid `pow()`-like intrinsics. + np_3 = num_prev * num_prev * num_prev + dp_3 = den_prev * den_prev * den_prev + + num = num_prev * (np_3 + 2.0 * asx * dp_3) + den = den_prev * (2.0 * np_3 + asx * dp_3) ! Equivalent to: root_asx = root_asx * (root_asx**3 + 2.*asx) / (2.*root_asx**3 + asx) enddo ! At this point the error in root_asx is better than 1 part in 3e14. @@ -80,20 +106,93 @@ elemental function cuberoot(x) result(root) ! One final iteration with Newton's method polishes up the root and gives a solution ! that is within the last bit of the true solution. - root_asx = root_asx - (root_asx**3 - asx) / (3.0 * (root_asx**2)) + ra_3 = root_asx * root_asx * root_asx + root_asx = root_asx - (ra_3 - asx) / (3.0 * (root_asx * root_asx)) - root = sign(scale(root_asx, ex_3), x) + root = descale(root_asx, e_x, s_x) endif - end function cuberoot + +!> Rescale `a` to the range [0.125, 1) and compute its cube-root exponent. +pure subroutine rescale_cbrt(a, x, e_r, s_a) + real, intent(in) :: a + !< The real parameter to be rescaled for cube root + real, intent(out) :: x + !< The rescaled value of a + integer(kind=int64), intent(out) :: e_r + !< Cube root of the exponent of the rescaling of `a` + integer(kind=int64), intent(out) :: s_a + !< The sign bit of a + + integer(kind=int64) :: xb + ! Floating point value of a, bit-packed as an integer + integer(kind=int64) :: e_a + ! Unscaled exponent of a + integer(kind=int64) :: e_x + ! Exponent of x + integer(kind=int64) :: e_div, e_mod + ! Quotient and remainder of e in e = 3*(e/3) + modulo(e,3). + + ! Pack bits of a into xb and extract its exponent and sign. + xb = transfer(a, 1_int64) + s_a = ibits(xb, signbit, 1) + e_a = ibits(xb, expbit, explen) - bias + + ! Compute terms of exponent decomposition e = 3*(e/3) + modulo(e,3). + ! (Fortran division is round-to-zero, so we must emulate floor division.) + e_mod = modulo(e_a, 3_int64) + e_div = (e_a - e_mod)/3 + + ! Our scaling decomposes e_a into e = {3*(e/3) + 3} + {modulo(e,3) - 3}. + + ! The first term is a perfect cube, whose cube root is computed below. + e_r = e_div + 1 + + ! The second term ensures that x is shifted to [0.125, 1). + e_x = e_mod - 3 + + ! Insert the new 11-bit exponent into xb and write to x and extend the + ! bitcount to 12, so that the sign bit is zero and x is always positive. + call mvbits(e_x + bias, 0, explen + 1, xb, fraclen) + x = transfer(xb, 1.) +end subroutine rescale_cbrt + + +!> Undo the rescaling of a real number back to its original base. +pure function descale(x, e_a, s_a) result(a) + real, intent(in) :: x + !< The rescaled value which is to be restored. + integer(kind=int64), intent(in) :: e_a + !< Exponent of the unscaled value + integer(kind=int64), intent(in) :: s_a + !< Sign bit of the unscaled value + real :: a + !< Restored value with the corrected exponent and sign + + integer(kind=int64) :: xb + ! Bit-packed real number into integer form + integer(kind=int64) :: e_x + ! Biased exponent of x + + ! Apply the corrected exponent and sign to x. + xb = transfer(x, 1_8) + e_x = ibits(xb, expbit, explen) + call mvbits(e_a + e_x, 0, explen, xb, expbit) + call mvbits(s_a, 0, 1, xb, signbit) + a = transfer(xb, 1.) +end function descale + + !> Returns true if any unit test of intrinsic_functions fails, or false if they all pass. -logical function intrinsic_functions_unit_tests(verbose) +function intrinsic_functions_unit_tests(verbose) result(fail) logical, intent(in) :: verbose !< If true, write results to stdout + logical :: fail !< True if any of the unit tests fail ! Local variables real :: testval ! A test value for self-consistency testing [nondim] - logical :: fail, v + logical :: v + integer :: n fail = .false. v = verbose @@ -107,7 +206,15 @@ logical function intrinsic_functions_unit_tests(verbose) fail = fail .or. Test_cuberoot(v, 1.0) fail = fail .or. Test_cuberoot(v, 0.125) fail = fail .or. Test_cuberoot(v, 0.965) - + fail = fail .or. Test_cuberoot(v, 1.0 - epsilon(1.0)) + fail = fail .or. Test_cuberoot(v, 1.0 - 0.5*epsilon(1.0)) + + testval = 1.0e-99 + v = .false. + do n=-160,160 + fail = fail .or. Test_cuberoot(v, testval) + testval = (-2.908 * (1.414213562373 + 1.2345678901234e-5*n)) * testval + enddo end function intrinsic_functions_unit_tests !> True if the cube of cuberoot(val) does not closely match val. False otherwise. @@ -117,7 +224,7 @@ logical function Test_cuberoot(verbose, val) ! Local variables real :: diff ! The difference between val and the cube root of its cube. - diff = val - cuberoot(val**3) + diff = val - cuberoot(val)**3 Test_cuberoot = (abs(diff) > 2.0e-15*abs(val)) if (Test_cuberoot) then