NOAA-GFDL · Hallberg-NOAA · Feb 2, 2024 · Jan 28, 2024 · Jan 30, 2024 · Jan 30, 2024
diff --git a/src/framework/MOM_intrinsic_functions.F90 b/src/framework/MOM_intrinsic_functions.F90
@@ -5,12 +5,26 @@ module MOM_intrinsic_functions
 ! This file is part of MOM6. See LICENSE.md for the license.
 
 use iso_fortran_env, only : stdout => output_unit, stderr => error_unit
+use iso_fortran_env, only : int64, real64
 
 implicit none ; private
 
 public :: invcosh, cuberoot
 public :: intrinsic_functions_unit_tests
 
+! Floating point model, if bit layout from high to low is (sign, exp, frac)
+
+integer, parameter :: bias = maxexponent(1.) - 1
+  !< The double precision exponent offset
+integer, parameter :: signbit = storage_size(1.) - 1
+  !< Position of sign bit
+integer, parameter :: explen = 1 + ceiling(log(real(bias))/log(2.))
+  !< Bit size of exponent
+integer, parameter :: expbit = signbit - explen
+  !< Position of lowest exponent bit
+integer, parameter :: fraclen = expbit
+  !< Length of fractional part
+
 contains
 
 !> Evaluate the inverse cosh, either using a math library or an
@@ -28,6 +42,7 @@ function invcosh(x)
 
 end function invcosh
 
+
 !> Returns the cube root of a real argument at roundoff accuracy, in a form that works properly with
 !! rescaling of the argument by integer powers of 8.  If the argument is a NaN, a NaN is returned.
 elemental function cuberoot(x) result(root)
@@ -37,24 +52,28 @@ elemental function cuberoot(x) result(root)
   real :: asx ! The absolute value of x rescaled by an integer power of 8 to put it into
               ! the range from 0.125 < asx <= 1.0, in ambiguous units cubed [B3]
   real :: root_asx ! The cube root of asx [B]
+  real :: ra_3 ! root_asx cubed [B3]
   real :: num ! The numerator of an expression for the evolving estimate of the cube root of asx
               ! in arbitrary units that can grow or shrink with each iteration [B C]
   real :: den ! The denominator of an expression for the evolving estimate of the cube root of asx
               ! in arbitrary units that can grow or shrink with each iteration [C]
   real :: num_prev ! The numerator of an expression for the previous iteration of the evolving estimate
               ! of the cube root of asx in arbitrary units that can grow or shrink with each iteration [B D]
+  real :: np_3 ! num_prev cubed  [B3 D3]
   real :: den_prev ! The denominator of an expression for the previous iteration of the evolving estimate of
               ! the cube root of asx in arbitrary units that can grow or shrink with each iteration [D]
-  integer :: ex_3 ! One third of the exponent part of x, used to rescale x to get a.
+  real :: dp_3 ! den_prev cubed  [C3]
+  real :: r0  ! Initial value of the iterative solver. [B C]
+  real :: r0_3 ! r0 cubed [B3 C3]
   integer :: itt
 
+  integer(kind=int64) :: e_x, s_x
+
   if ((x >= 0.0) .eqv. (x <= 0.0)) then
     ! Return 0 for an input of 0, or NaN for a NaN input.
     root = x
   else
-    ex_3 = ceiling(exponent(x) / 3.)
-    ! Here asx is in the range of 0.125 <= asx < 1.0
-    asx = scale(abs(x), -3*ex_3)
+    call rescale_cbrt(x, asx, e_x, s_x)
 
     !   Iteratively determine root_asx = asx**1/3 using Halley's method and then Newton's method,
     ! noting that Halley's method onverges monotonically and needs no bounding.  Halley's method is
@@ -65,35 +84,115 @@ elemental function cuberoot(x) result(root)
 
     ! This first estimate gives the same magnitude of errors for 0.125 and 1.0 after two iterations.
     ! The first iteration is applied explicitly.
-    num = 0.707106 * (0.707106**3 + 2.0 * asx)
-    den = 2.0 * (0.707106**3) + asx
+    r0 = 0.707106
+    r0_3 = r0 * r0 * r0
+    num = r0 * (r0_3 + 2.0 * asx)
+    den = 2.0 * r0_3 + asx
 
     do itt=1,2
       ! Halley's method iterates estimates as Root = Root * (Root**3 + 2.*asx) / (2.*Root**3 + asx).
       num_prev = num ; den_prev = den
-      num = num_prev * (num_prev**3 + 2.0 * asx * (den_prev**3))
-      den = den_prev * (2.0 * num_prev**3 + asx * (den_prev**3))
+
+      ! Pre-compute these as integer powers, to avoid `pow()`-like intrinsics.
+      np_3 = num_prev * num_prev * num_prev
+      dp_3 = den_prev * den_prev * den_prev
+
+      num = num_prev * (np_3 + 2.0 * asx * dp_3)
+      den = den_prev * (2.0 * np_3 + asx * dp_3)
       ! Equivalent to:  root_asx = root_asx * (root_asx**3 + 2.*asx) / (2.*root_asx**3 + asx)
     enddo
     ! At this point the error in root_asx is better than 1 part in 3e14.
     root_asx = num / den
 
     ! One final iteration with Newton's method polishes up the root and gives a solution
     ! that is within the last bit of the true solution.
-    root_asx = root_asx - (root_asx**3 - asx) / (3.0 * (root_asx**2))
+    ra_3 = root_asx * root_asx * root_asx
+    root_asx = root_asx - (ra_3 - asx) / (3.0 * (root_asx * root_asx))
 
-    root = sign(scale(root_asx, ex_3), x)
+    root = descale(root_asx, e_x, s_x)
   endif
-
 end function cuberoot
 
+
+!> Rescale `a` to the range [0.125, 1) and compute its cube-root exponent.
+pure subroutine rescale_cbrt(a, x, e_r, s_a)
+  real, intent(in) :: a
+    !< The real parameter to be rescaled for cube root
+  real, intent(out) :: x
+    !< The rescaled value of a
+  integer(kind=int64), intent(out) :: e_r
+    !< Cube root of the exponent of the rescaling of `a`
+  integer(kind=int64), intent(out) :: s_a
+    !< The sign bit of a
+
+  integer(kind=int64) :: xb
+    ! Floating point value of a, bit-packed as an integer
+  integer(kind=int64) :: e_a
+    ! Unscaled exponent of a
+  integer(kind=int64) :: e_x
+    ! Exponent of x
+  integer(kind=int64) :: e_div, e_mod
+    ! Quotient and remainder of e in e = 3*(e/3) + modulo(e,3).
+
+  ! Pack bits of a into xb and extract its exponent and sign.
+  xb = transfer(a, 1_int64)
+  s_a = ibits(xb, signbit, 1)
+  e_a = ibits(xb, expbit, explen) - bias
+
+  ! Compute terms of exponent decomposition e = 3*(e/3) + modulo(e,3).
+  ! (Fortran division is round-to-zero, so we must emulate floor division.)
+  e_mod = modulo(e_a, 3_int64)
+  e_div = (e_a - e_mod)/3
+
+  ! Our scaling decomposes e_a into e = {3*(e/3) + 3} + {modulo(e,3) - 3}.
+
+  ! The first term is a perfect cube, whose cube root is computed below.
+  e_r = e_div + 1
+
+  ! The second term ensures that x is shifted to [0.125, 1).
+  e_x = e_mod - 3
+
+  ! Insert the new 11-bit exponent into xb and write to x and extend the
+  ! bitcount to 12, so that the sign bit is zero and x is always positive.
+  call mvbits(e_x + bias, 0, explen + 1, xb, fraclen)
+  x = transfer(xb, 1.)
+end subroutine rescale_cbrt
+
+
+!> Undo the rescaling of a real number back to its original base.
+pure function descale(x, e_a, s_a) result(a)
+  real, intent(in) :: x
+    !< The rescaled value which is to be restored.
+  integer(kind=int64), intent(in) :: e_a
+    !< Exponent of the unscaled value
+  integer(kind=int64), intent(in) :: s_a
+    !< Sign bit of the unscaled value
+  real :: a
+    !< Restored value with the corrected exponent and sign
+
+  integer(kind=int64) :: xb
+    ! Bit-packed real number into integer form
+  integer(kind=int64) :: e_x
+    ! Biased exponent of x
+
+  ! Apply the corrected exponent and sign to x.
+  xb = transfer(x, 1_8)
+  e_x = ibits(xb, expbit, explen)
+  call mvbits(e_a + e_x, 0, explen, xb, expbit)
+  call mvbits(s_a, 0, 1, xb, signbit)
+  a = transfer(xb, 1.)
+end function descale
+
+
 !> Returns true if any unit test of intrinsic_functions fails, or false if they all pass.
-logical function intrinsic_functions_unit_tests(verbose)
+function intrinsic_functions_unit_tests(verbose) result(fail)
   logical, intent(in) :: verbose !< If true, write results to stdout
+  logical :: fail !< True if any of the unit tests fail
 
   ! Local variables
   real :: testval  ! A test value for self-consistency testing [nondim]
-  logical :: fail, v
+  logical :: v
+  integer :: n
 
   fail = .false.
   v = verbose
@@ -107,7 +206,15 @@ logical function intrinsic_functions_unit_tests(verbose)
   fail = fail .or. Test_cuberoot(v, 1.0)
   fail = fail .or. Test_cuberoot(v, 0.125)
   fail = fail .or. Test_cuberoot(v, 0.965)
-
+  fail = fail .or. Test_cuberoot(v, 1.0 - epsilon(1.0))
+  fail = fail .or. Test_cuberoot(v, 1.0 - 0.5*epsilon(1.0))
+
+  testval = 1.0e-99
+  v = .false.
+  do n=-160,160
+    fail = fail .or. Test_cuberoot(v, testval)
+    testval = (-2.908 * (1.414213562373 + 1.2345678901234e-5*n)) * testval
+  enddo
 end function intrinsic_functions_unit_tests
 
 !> True if the cube of cuberoot(val) does not closely match val. False otherwise.
@@ -117,7 +224,7 @@ logical function Test_cuberoot(verbose, val)
   ! Local variables
   real :: diff ! The difference between val and the cube root of its cube.
 
-  diff = val - cuberoot(val**3)
+  diff = val - cuberoot(val)**3
   Test_cuberoot = (abs(diff) > 2.0e-15*abs(val))
 
   if (Test_cuberoot) then