From 66c9b61d72656a93f4e799eaf1d0b3e817e9b182 Mon Sep 17 00:00:00 2001 From: Larry Gritz Date: Thu, 12 Jan 2023 22:48:55 -0800 Subject: [PATCH] platform.h: fully define bitcast template (#1635) Ported from OIIO. This lets us not have to depend on the specific definition in OIIO's fmath.h, and in particular not worry about whether it's a new enough version of OIIO that the old bit_cast is now marked as deprecated. Signed-off-by: Larry Gritz --- src/include/OSL/oslnoise.h | 7 +----- src/include/OSL/platform.h | 39 +++++++++++++++++++++++++++++ src/include/OSL/wide.h | 7 +----- src/liboslexec/wide/wide_ophash.cpp | 7 +----- 4 files changed, 42 insertions(+), 18 deletions(-) diff --git a/src/include/OSL/oslnoise.h b/src/include/OSL/oslnoise.h index 2170dcc2e..048c5ec51 100644 --- a/src/include/OSL/oslnoise.h +++ b/src/include/OSL/oslnoise.h @@ -144,12 +144,7 @@ inline OSL_HOSTDEVICE float bits_to_01 (unsigned int bits) { OSL_FORCEINLINE OSL_HOSTDEVICE int bitcast_to_uint (float x) { -#if OPENIMAGEIO_VERSION >= 20500 - return OIIO::bitcast(x); -#else - // obsolete call - return OIIO::bit_cast(x); -#endif + return OSL::bitcast(x); } diff --git a/src/include/OSL/platform.h b/src/include/OSL/platform.h index cd7fb238c..6ee6658c8 100644 --- a/src/include/OSL/platform.h +++ b/src/include/OSL/platform.h @@ -23,6 +23,9 @@ #include +#if defined(__x86_64__) && !defined(__CUDA_ARCH__) +# include +#endif ///////////////////////////////////////////////////////////////////////// // Detect which compiler and version we're using @@ -516,6 +519,42 @@ OSL_FORCEINLINE OSL_HOSTDEVICE To bitcast(const From& src) noexcept { return dst; } +#if defined(__x86_64__) && !defined(__CUDA_ARCH__) && \ + (defined(__INTEL_COMPILER) || defined(__INTEL_LLVM_COMPILER) \ + || OSL_CLANG_VERSION >= 100000 || OSL_APPLE_CLANG_VERSION >= 130000) +// On x86/x86_64 for certain compilers we can use Intel CPU intrinsics for +// some common bitcast cases that might be even more understandable to the +// compiler and generate better code without its getting confused about the +// memcpy in the general case. We're a bit conservative with the compiler +// version checks here, it may be that some earlier versions support these +// intrinsics. + +template<> OSL_FORCEINLINE uint32_t bitcast(const float& val) noexcept { + return static_cast(_castf32_u32(val)); +} +template<> OSL_FORCEINLINE int32_t bitcast(const float& val) noexcept { + return static_cast(_castf32_u32(val)); +} +template<> OSL_FORCEINLINE float bitcast(const uint32_t& val) noexcept { + return _castu32_f32(val); +} +template<> OSL_FORCEINLINE float bitcast(const int32_t& val) noexcept { + return _castu32_f32(val); +} +template<> OSL_FORCEINLINE uint64_t bitcast(const double& val) noexcept { + return static_cast(_castf64_u64(val)); +} +template<> OSL_FORCEINLINE int64_t bitcast(const double& val) noexcept { + return static_cast(_castf64_u64(val)); +} +template<> OSL_FORCEINLINE double bitcast(const uint64_t& val) noexcept { + return _castu64_f64(val); +} +template<> OSL_FORCEINLINE double bitcast(const int64_t& val) noexcept { + return _castu64_f64(val); +} +#endif + #if OSL_CPLUSPLUS_VERSION >= 20 diff --git a/src/include/OSL/wide.h b/src/include/OSL/wide.h index a12a8f07d..b9b866fd1 100644 --- a/src/include/OSL/wide.h +++ b/src/include/OSL/wide.h @@ -980,12 +980,7 @@ template struct alignas(VecReg) Block { #ifdef OIIO_USTRING_HAS_CTR_FROM_USTRINGHASH return ustringhash::from_hash(str[lane]); #else - // Dumb workaround if we are on old OIIO -# if OPENIMAGEIO_VERSION >= 20500 - return OIIO::bitcast(str[lane]); -# else - return OIIO::bit_cast(str[lane]); -# endif + return OSL::bitcast(str[lane]); #endif } diff --git a/src/liboslexec/wide/wide_ophash.cpp b/src/liboslexec/wide/wide_ophash.cpp index bf677588d..b640f7148 100644 --- a/src/liboslexec/wide/wide_ophash.cpp +++ b/src/liboslexec/wide/wide_ophash.cpp @@ -28,12 +28,7 @@ namespace { OSL_FORCEINLINE OSL_HOSTDEVICE int bitcast_to_uint(float x) { -#if OPENIMAGEIO_VERSION >= 20500 - return OIIO::bitcast(x); -#else - // obsolete call - return OIIO::bit_cast(x); -#endif + return OSL::bitcast(x); }