Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Intrinsics: Faster cuberoot scaling functions #557

Merged
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
137 changes: 122 additions & 15 deletions src/framework/MOM_intrinsic_functions.F90
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,26 @@ module MOM_intrinsic_functions
! This file is part of MOM6. See LICENSE.md for the license.

use iso_fortran_env, only : stdout => output_unit, stderr => error_unit
use iso_fortran_env, only : int64, real64

implicit none ; private

public :: invcosh, cuberoot
public :: intrinsic_functions_unit_tests

! Floating point model, if bit layout from high to low is (sign, exp, frac)

integer, parameter :: bias = maxexponent(1.) - 1
!< The double precision exponent offset
integer, parameter :: signbit = storage_size(1.) - 1
!< Position of sign bit
integer, parameter :: explen = 1 + ceiling(log(real(bias))/log(2.))
!< Bit size of exponent
integer, parameter :: expbit = signbit - explen
!< Position of lowest exponent bit
integer, parameter :: fraclen = expbit
!< Length of fractional part

contains

!> Evaluate the inverse cosh, either using a math library or an
Expand All @@ -28,6 +42,7 @@ function invcosh(x)

end function invcosh


!> Returns the cube root of a real argument at roundoff accuracy, in a form that works properly with
!! rescaling of the argument by integer powers of 8. If the argument is a NaN, a NaN is returned.
elemental function cuberoot(x) result(root)
Expand All @@ -37,24 +52,28 @@ elemental function cuberoot(x) result(root)
real :: asx ! The absolute value of x rescaled by an integer power of 8 to put it into
! the range from 0.125 < asx <= 1.0, in ambiguous units cubed [B3]
real :: root_asx ! The cube root of asx [B]
real :: ra_3 ! root_asx cubed [B3]
real :: num ! The numerator of an expression for the evolving estimate of the cube root of asx
! in arbitrary units that can grow or shrink with each iteration [B C]
real :: den ! The denominator of an expression for the evolving estimate of the cube root of asx
! in arbitrary units that can grow or shrink with each iteration [C]
real :: num_prev ! The numerator of an expression for the previous iteration of the evolving estimate
! of the cube root of asx in arbitrary units that can grow or shrink with each iteration [B D]
real :: np_3 ! num_prev cubed [B3 D3]
real :: den_prev ! The denominator of an expression for the previous iteration of the evolving estimate of
! the cube root of asx in arbitrary units that can grow or shrink with each iteration [D]
integer :: ex_3 ! One third of the exponent part of x, used to rescale x to get a.
real :: dp_3 ! den_prev cubed [C3]
real :: r0 ! Initial value of the iterative solver. [B C]
real :: r0_3 ! r0 cubed [B3 C3]
integer :: itt

integer(kind=int64) :: e_x, s_x

if ((x >= 0.0) .eqv. (x <= 0.0)) then
! Return 0 for an input of 0, or NaN for a NaN input.
root = x
else
ex_3 = ceiling(exponent(x) / 3.)
! Here asx is in the range of 0.125 <= asx < 1.0
asx = scale(abs(x), -3*ex_3)
call rescale_cbrt(x, asx, e_x, s_x)

! Iteratively determine root_asx = asx**1/3 using Halley's method and then Newton's method,
! noting that Halley's method onverges monotonically and needs no bounding. Halley's method is
Expand All @@ -65,35 +84,115 @@ elemental function cuberoot(x) result(root)

! This first estimate gives the same magnitude of errors for 0.125 and 1.0 after two iterations.
! The first iteration is applied explicitly.
num = 0.707106 * (0.707106**3 + 2.0 * asx)
den = 2.0 * (0.707106**3) + asx
r0 = 0.707106
r0_3 = r0 * r0 * r0
num = r0 * (r0_3 + 2.0 * asx)
den = 2.0 * r0_3 + asx

do itt=1,2
! Halley's method iterates estimates as Root = Root * (Root**3 + 2.*asx) / (2.*Root**3 + asx).
num_prev = num ; den_prev = den
num = num_prev * (num_prev**3 + 2.0 * asx * (den_prev**3))
den = den_prev * (2.0 * num_prev**3 + asx * (den_prev**3))

! Pre-compute these as integer powers, to avoid `pow()`-like intrinsics.
np_3 = num_prev * num_prev * num_prev
dp_3 = den_prev * den_prev * den_prev

num = num_prev * (np_3 + 2.0 * asx * dp_3)
den = den_prev * (2.0 * np_3 + asx * dp_3)
! Equivalent to: root_asx = root_asx * (root_asx**3 + 2.*asx) / (2.*root_asx**3 + asx)
enddo
! At this point the error in root_asx is better than 1 part in 3e14.
root_asx = num / den

! One final iteration with Newton's method polishes up the root and gives a solution
! that is within the last bit of the true solution.
root_asx = root_asx - (root_asx**3 - asx) / (3.0 * (root_asx**2))
ra_3 = root_asx * root_asx * root_asx
root_asx = root_asx - (ra_3 - asx) / (3.0 * (root_asx * root_asx))

root = sign(scale(root_asx, ex_3), x)
root = descale(root_asx, e_x, s_x)
endif

end function cuberoot


!> Rescale `a` to the range [0.125, 1) and compute its cube-root exponent.
pure subroutine rescale_cbrt(a, x, e_r, s_a)
real, intent(in) :: a
!< The real parameter to be rescaled for cube root
real, intent(out) :: x
!< The rescaled value of a
integer(kind=int64), intent(out) :: e_r
!< Cube root of the exponent of the rescaling of `a`
integer(kind=int64), intent(out) :: s_a
!< The sign bit of a

integer(kind=int64) :: xb
! Floating point value of a, bit-packed as an integer
integer(kind=int64) :: e_a
! Unscaled exponent of a
integer(kind=int64) :: e_x
! Exponent of x
integer(kind=int64) :: e_div, e_mod
! Quotient and remainder of e in e = 3*(e/3) + modulo(e,3).

! Pack bits of a into xb and extract its exponent and sign.
xb = transfer(a, 1_int64)
s_a = ibits(xb, signbit, 1)
e_a = ibits(xb, expbit, explen) - bias

! Compute terms of exponent decomposition e = 3*(e/3) + modulo(e,3).
! (Fortran division is round-to-zero, so we must emulate floor division.)
e_mod = modulo(e_a, 3_int64)
e_div = (e_a - e_mod)/3

! Our scaling decomposes e_a into e = {3*(e/3) + 3} + {modulo(e,3) - 3}.

! The first term is a perfect cube, whose cube root is computed below.
e_r = e_div + 1

! The second term ensures that x is shifted to [0.125, 1).
e_x = e_mod - 3

! Insert the new 11-bit exponent into xb and write to x and extend the
! bitcount to 12, so that the sign bit is zero and x is always positive.
call mvbits(e_x + bias, 0, explen + 1, xb, fraclen)
x = transfer(xb, 1.)
end subroutine rescale_cbrt


!> Undo the rescaling of a real number back to its original base.
pure function descale(x, e_a, s_a) result(a)
real, intent(in) :: x
!< The rescaled value which is to be restored.
integer(kind=int64), intent(in) :: e_a
!< Exponent of the unscaled value
integer(kind=int64), intent(in) :: s_a
!< Sign bit of the unscaled value
real :: a
!< Restored value with the corrected exponent and sign

integer(kind=int64) :: xb
! Bit-packed real number into integer form
integer(kind=int64) :: e_x
! Biased exponent of x

! Apply the corrected exponent and sign to x.
xb = transfer(x, 1_8)
e_x = ibits(xb, expbit, explen)
call mvbits(e_a + e_x, 0, explen, xb, expbit)
call mvbits(s_a, 0, 1, xb, signbit)
a = transfer(xb, 1.)
end function descale


!> Returns true if any unit test of intrinsic_functions fails, or false if they all pass.
logical function intrinsic_functions_unit_tests(verbose)
function intrinsic_functions_unit_tests(verbose) result(fail)
logical, intent(in) :: verbose !< If true, write results to stdout
logical :: fail !< True if any of the unit tests fail

! Local variables
real :: testval ! A test value for self-consistency testing [nondim]
logical :: fail, v
logical :: v
integer :: n

fail = .false.
v = verbose
Expand All @@ -107,7 +206,15 @@ logical function intrinsic_functions_unit_tests(verbose)
fail = fail .or. Test_cuberoot(v, 1.0)
fail = fail .or. Test_cuberoot(v, 0.125)
fail = fail .or. Test_cuberoot(v, 0.965)

fail = fail .or. Test_cuberoot(v, 1.0 - epsilon(1.0))
fail = fail .or. Test_cuberoot(v, 1.0 - 0.5*epsilon(1.0))

testval = 1.0e-99
Hallberg-NOAA marked this conversation as resolved.
Show resolved Hide resolved
v = .false.
do n=-160,160
fail = fail .or. Test_cuberoot(v, testval)
testval = (-2.908 * (1.414213562373 + 1.2345678901234e-5*n)) * testval
enddo
end function intrinsic_functions_unit_tests

!> True if the cube of cuberoot(val) does not closely match val. False otherwise.
Expand All @@ -117,7 +224,7 @@ logical function Test_cuberoot(verbose, val)
! Local variables
real :: diff ! The difference between val and the cube root of its cube.

diff = val - cuberoot(val**3)
diff = val - cuberoot(val)**3
Test_cuberoot = (abs(diff) > 2.0e-15*abs(val))

if (Test_cuberoot) then
Expand Down
Loading