From ae43fc401afe63fb356f396a8e8136af479eb142 Mon Sep 17 00:00:00 2001 From: Kimball Thurston Date: Mon, 26 Jun 2023 20:52:23 +1200 Subject: [PATCH 1/3] Fix cpu detection of sse2 on non-64 x86 Previous code accidentally elided the i386 (i.e. non-64 bit versions of x86 compiles). This refactors and simplifies the ifdef snarl to clarify and fix that. Fixes #1459 Signed-off-by: Kimball Thurston --- src/lib/OpenEXRCore/internal_cpuid.h | 57 +++++++++++++++++++--------- 1 file changed, 39 insertions(+), 18 deletions(-) diff --git a/src/lib/OpenEXRCore/internal_cpuid.h b/src/lib/OpenEXRCore/internal_cpuid.h index 86c33f90a5..dd8fa04be1 100644 --- a/src/lib/OpenEXRCore/internal_cpuid.h +++ b/src/lib/OpenEXRCore/internal_cpuid.h @@ -5,7 +5,13 @@ #include "OpenEXRConfigInternal.h" -#if defined(__x86_64__) || defined(_M_X64) +#if defined(i386) || defined(__i386__) || defined(__i386) || defined(_M_X86) || defined(__x86_64__) || defined(_M_X64) +# define OPENEXR_ENABLE_X86_SIMD_CHECK 1 +#else +# define OPENEXR_ENABLE_X86_SIMD_CHECK 0 +#endif + +#if OPENEXR_ENABLE_X86_SIMD_CHECK # if defined(_MSC_VER) && defined(_WIN32) # include # else @@ -20,30 +26,35 @@ check_for_x86_simd (int* f16c, int* avx, int* sse2) #ifdef __e2k__ # if defined(__SSE2__) *sse2 = 1; +# else + *sse2 = 0; # endif # if defined(__AVX__) *avx = 1; +# else + *avx = 0; # endif # if defined(__F16C__) *f16c = 1; +# else + *f16c = 0; # endif -#else -# if (defined(__x86_64__) || defined(_M_X64)) +#elif OPENEXR_ENABLE_X86_SIMD_CHECK -# if defined(__AVX__) && defined(__F16C__) + // shortcut if everything is turned on / compiled in +# if defined(__AVX__) && defined(__F16C__) *f16c = 1; *avx = 1; *sse2 = 1; -# else -# ifdef _WIN32 +# elif defined(_MSC_VER) && defined(_WIN32) int regs[4], osxsave; __cpuid (regs, 0); if (regs[0] >= 1) { __cpuidex (regs, 1, 0); } else regs[2] = 0; -# else +# else unsigned int regs[4], osxsave; __get_cpuid (0, ®s[0], ®s[1], ®s[2], ®s[3]); if (regs[0] >= 1) @@ -52,9 +63,16 @@ check_for_x86_simd (int* f16c, int* avx, int* sse2) } else regs[2] = 0; -# endif - /* AVX is indicated by bit 28, F16C by 29 of ECX (reg 2) */ +# endif + + /* + * linux cpuid.h for x86 has defines but not consistent cross platform + * + * see cpuid.h bit_AVX bit_F16C bit_SSE2 + */ + osxsave = (regs[2] & (1 << 27)) ? 1 : 0; + /* AVX is indicated by bit 28, F16C by 29 of ECX (reg 2) */ *avx = (regs[2] & (1 << 28)) ? 1 : 0; *f16c = (regs[2] & (1 << 29)) ? 1 : 0; /* sse2 is in EDX bit 26 */ @@ -68,8 +86,7 @@ check_for_x86_simd (int* f16c, int* avx, int* sse2) else { /* check extended control register */ -# if defined(OPENEXR_IMF_HAVE_GCC_INLINE_ASM_AVX) && \ - (defined(_M_X64) || defined(__x86_64__)) +# if defined(_M_X64) || defined(__x86_64__) __asm__ __volatile__ ("xgetbv" : /* Output */ "=a"(regs[0]), "=d"(regs[3]) : /* Input */ "c"(0) @@ -80,29 +97,33 @@ check_for_x86_simd (int* f16c, int* avx, int* sse2) *avx = 0; *f16c = 0; } -# else +# else *avx = 0; *f16c = 0; -# endif +# endif } -# endif -# else + +#else + // not on x86 *f16c = 0; *avx = 0; *sse2 = 0; -# endif #endif + } static inline int has_native_half (void) { -#if defined(__x86_64__) || defined(_M_X64) +#if OPENEXR_ENABLE_X86_SIMD_CHECK int sse2, avx, f16c; check_for_x86_simd (&f16c, &avx, &sse2); return avx && f16c; +#elif defined(__aarch64__) + return 1; #else - // TODO: add case for neon? return 0; #endif } + +#undef OPENEXR_ENABLE_X86_SIMD_CHECK From af5eb8ae57a8d4305c3a99a7e64796ce7c5e8aea Mon Sep 17 00:00:00 2001 From: Kimball Thurston Date: Mon, 26 Jun 2023 21:09:56 +1200 Subject: [PATCH 2/3] Fix mismatch with windows and enable avx support check there Signed-off-by: Kimball Thurston --- src/lib/OpenEXRCore/internal_cpuid.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/lib/OpenEXRCore/internal_cpuid.h b/src/lib/OpenEXRCore/internal_cpuid.h index dd8fa04be1..2315f63058 100644 --- a/src/lib/OpenEXRCore/internal_cpuid.h +++ b/src/lib/OpenEXRCore/internal_cpuid.h @@ -86,11 +86,15 @@ check_for_x86_simd (int* f16c, int* avx, int* sse2) else { /* check extended control register */ -# if defined(_M_X64) || defined(__x86_64__) +# if defined(_M_X64) || defined(__x86_64__) +# if defined(_MSC_VER) + regs[0] = _xgetbv(0); +# else __asm__ __volatile__ ("xgetbv" : /* Output */ "=a"(regs[0]), "=d"(regs[3]) : /* Input */ "c"(0) : /* Clobber */); +# endif /* eax bit 1 - SSE managed, bit 2 - AVX managed */ if ((regs[0] & 6) != 6) { From bc7857c38b23d334b23e86d1672ed3da4869cfa2 Mon Sep 17 00:00:00 2001 From: Kimball Thurston Date: Mon, 26 Jun 2023 21:45:22 +1200 Subject: [PATCH 3/3] Disable avx detection under msvc for now We require gcc / clang style inline asm for avx support, detecting it properly causes problems for the dwa support which uses that. Disable again. Signed-off-by: Kimball Thurston --- src/lib/OpenEXRCore/internal_cpuid.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/lib/OpenEXRCore/internal_cpuid.h b/src/lib/OpenEXRCore/internal_cpuid.h index 2315f63058..edfee0b883 100644 --- a/src/lib/OpenEXRCore/internal_cpuid.h +++ b/src/lib/OpenEXRCore/internal_cpuid.h @@ -88,7 +88,12 @@ check_for_x86_simd (int* f16c, int* avx, int* sse2) /* check extended control register */ # if defined(_M_X64) || defined(__x86_64__) # if defined(_MSC_VER) + /* TODO: remove the following disablement once we can do inline msvc */ +# if defined(OPENEXR_IMF_HAVE_GCC_INLINE_ASM_AVX) regs[0] = _xgetbv(0); +# else + regs[0] = 0; +# endif # else __asm__ __volatile__ ("xgetbv" : /* Output */ "=a"(regs[0]), "=d"(regs[3])