diff --git a/libc/src/math/generic/cos.cpp b/libc/src/math/generic/cos.cpp index dbfeec70d83ac6..e61d800ce2dada 100644 --- a/libc/src/math/generic/cos.cpp +++ b/libc/src/math/generic/cos.cpp @@ -19,26 +19,6 @@ #include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA #include "src/math/generic/sincos_eval.h" -#ifdef LIBC_TARGET_CPU_HAS_FMA -#include "range_reduction_double_fma.h" - -using LIBC_NAMESPACE::fma::FAST_PASS_EXPONENT; -using LIBC_NAMESPACE::fma::ONE_TWENTY_EIGHT_OVER_PI; -using LIBC_NAMESPACE::fma::range_reduction_small; -using LIBC_NAMESPACE::fma::SIN_K_PI_OVER_128; - -LIBC_INLINE constexpr bool NO_FMA = false; -#else -#include "range_reduction_double_nofma.h" - -using LIBC_NAMESPACE::nofma::FAST_PASS_EXPONENT; -using LIBC_NAMESPACE::nofma::ONE_TWENTY_EIGHT_OVER_PI; -using LIBC_NAMESPACE::nofma::range_reduction_small; -using LIBC_NAMESPACE::nofma::SIN_K_PI_OVER_128; - -LIBC_INLINE constexpr bool NO_FMA = true; -#endif // LIBC_TARGET_CPU_HAS_FMA - // TODO: We might be able to improve the performance of large range reduction of // non-FMA targets further by operating directly on 25-bit chunks of 128/pi and // pre-split SIN_K_PI_OVER_128, but that might double the memory footprint of diff --git a/libc/src/math/generic/range_reduction_double_common.h b/libc/src/math/generic/range_reduction_double_common.h index 2b1c4e8df0c3d0..290b642be4c69f 100644 --- a/libc/src/math/generic/range_reduction_double_common.h +++ b/libc/src/math/generic/range_reduction_double_common.h @@ -18,6 +18,28 @@ #include "src/__support/integer_literals.h" #include "src/__support/macros/config.h" +#ifdef LIBC_TARGET_CPU_HAS_FMA +#include "range_reduction_double_fma.h" + +// With FMA, we limit the maxmimum exponent to be 2^16, so that the error bound +// from the fma::range_reduction_small is bounded by 2^-88 instead of 2^-72. +#define FAST_PASS_EXPONENT 16 +using LIBC_NAMESPACE::fma::ONE_TWENTY_EIGHT_OVER_PI; +using LIBC_NAMESPACE::fma::range_reduction_small; +using LIBC_NAMESPACE::fma::SIN_K_PI_OVER_128; + +LIBC_INLINE constexpr bool NO_FMA = false; +#else +#include "range_reduction_double_nofma.h" + +using LIBC_NAMESPACE::nofma::FAST_PASS_EXPONENT; +using LIBC_NAMESPACE::nofma::ONE_TWENTY_EIGHT_OVER_PI; +using LIBC_NAMESPACE::nofma::range_reduction_small; +using LIBC_NAMESPACE::nofma::SIN_K_PI_OVER_128; + +LIBC_INLINE constexpr bool NO_FMA = true; +#endif // LIBC_TARGET_CPU_HAS_FMA + namespace LIBC_NAMESPACE_DECL { namespace generic { diff --git a/libc/src/math/generic/sin.cpp b/libc/src/math/generic/sin.cpp index 5a4b5a3056ea49..da3d1e94b5f645 100644 --- a/libc/src/math/generic/sin.cpp +++ b/libc/src/math/generic/sin.cpp @@ -20,26 +20,6 @@ #include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA #include "src/math/generic/sincos_eval.h" -#ifdef LIBC_TARGET_CPU_HAS_FMA -#include "range_reduction_double_fma.h" - -using LIBC_NAMESPACE::fma::FAST_PASS_EXPONENT; -using LIBC_NAMESPACE::fma::ONE_TWENTY_EIGHT_OVER_PI; -using LIBC_NAMESPACE::fma::range_reduction_small; -using LIBC_NAMESPACE::fma::SIN_K_PI_OVER_128; - -LIBC_INLINE constexpr bool NO_FMA = false; -#else -#include "range_reduction_double_nofma.h" - -using LIBC_NAMESPACE::nofma::FAST_PASS_EXPONENT; -using LIBC_NAMESPACE::nofma::ONE_TWENTY_EIGHT_OVER_PI; -using LIBC_NAMESPACE::nofma::range_reduction_small; -using LIBC_NAMESPACE::nofma::SIN_K_PI_OVER_128; - -LIBC_INLINE constexpr bool NO_FMA = true; -#endif // LIBC_TARGET_CPU_HAS_FMA - // TODO: We might be able to improve the performance of large range reduction of // non-FMA targets further by operating directly on 25-bit chunks of 128/pi and // pre-split SIN_K_PI_OVER_128, but that might double the memory footprint of diff --git a/libc/src/math/generic/sincos.cpp b/libc/src/math/generic/sincos.cpp index de107e6004b445..1af0ee7b0eb2c8 100644 --- a/libc/src/math/generic/sincos.cpp +++ b/libc/src/math/generic/sincos.cpp @@ -21,26 +21,6 @@ #include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA #include "src/math/generic/sincos_eval.h" -#ifdef LIBC_TARGET_CPU_HAS_FMA -#include "range_reduction_double_fma.h" - -using LIBC_NAMESPACE::fma::FAST_PASS_EXPONENT; -using LIBC_NAMESPACE::fma::ONE_TWENTY_EIGHT_OVER_PI; -using LIBC_NAMESPACE::fma::range_reduction_small; -using LIBC_NAMESPACE::fma::SIN_K_PI_OVER_128; - -LIBC_INLINE constexpr bool NO_FMA = false; -#else -#include "range_reduction_double_nofma.h" - -using LIBC_NAMESPACE::nofma::FAST_PASS_EXPONENT; -using LIBC_NAMESPACE::nofma::ONE_TWENTY_EIGHT_OVER_PI; -using LIBC_NAMESPACE::nofma::range_reduction_small; -using LIBC_NAMESPACE::nofma::SIN_K_PI_OVER_128; - -LIBC_INLINE constexpr bool NO_FMA = true; -#endif // LIBC_TARGET_CPU_HAS_FMA - // TODO: We might be able to improve the performance of large range reduction of // non-FMA targets further by operating directly on 25-bit chunks of 128/pi and // pre-split SIN_K_PI_OVER_128, but that might double the memory footprint of diff --git a/libc/src/math/generic/tan.cpp b/libc/src/math/generic/tan.cpp index e623adb6b1d1ed..45fd8bb9156be0 100644 --- a/libc/src/math/generic/tan.cpp +++ b/libc/src/math/generic/tan.cpp @@ -21,28 +21,6 @@ #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY #include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA -#ifdef LIBC_TARGET_CPU_HAS_FMA -#include "range_reduction_double_fma.h" - -// With FMA, we limit the maxmimum exponent to be 2^16, so that the error bound -// from the fma::range_reduction_small is bounded by 2^-88 instead of 2^-72. -#define FAST_PASS_EXPONENT 16 -using LIBC_NAMESPACE::fma::ONE_TWENTY_EIGHT_OVER_PI; -using LIBC_NAMESPACE::fma::range_reduction_small; -using LIBC_NAMESPACE::fma::SIN_K_PI_OVER_128; - -LIBC_INLINE constexpr bool NO_FMA = false; -#else -#include "range_reduction_double_nofma.h" - -using LIBC_NAMESPACE::nofma::FAST_PASS_EXPONENT; -using LIBC_NAMESPACE::nofma::ONE_TWENTY_EIGHT_OVER_PI; -using LIBC_NAMESPACE::nofma::range_reduction_small; -using LIBC_NAMESPACE::nofma::SIN_K_PI_OVER_128; - -LIBC_INLINE constexpr bool NO_FMA = true; -#endif // LIBC_TARGET_CPU_HAS_FMA - // TODO: We might be able to improve the performance of large range reduction of // non-FMA targets further by operating directly on 25-bit chunks of 128/pi and // pre-split SIN_K_PI_OVER_128, but that might double the memory footprint of