diff --git a/src/lib/OpenEXRCore/internal_cpuid.h b/src/lib/OpenEXRCore/internal_cpuid.h index 86c33f90a5..edfee0b883 100644 --- a/src/lib/OpenEXRCore/internal_cpuid.h +++ b/src/lib/OpenEXRCore/internal_cpuid.h @@ -5,7 +5,13 @@ #include "OpenEXRConfigInternal.h" -#if defined(__x86_64__) || defined(_M_X64) +#if defined(i386) || defined(__i386__) || defined(__i386) || defined(_M_X86) || defined(__x86_64__) || defined(_M_X64) +# define OPENEXR_ENABLE_X86_SIMD_CHECK 1 +#else +# define OPENEXR_ENABLE_X86_SIMD_CHECK 0 +#endif + +#if OPENEXR_ENABLE_X86_SIMD_CHECK # if defined(_MSC_VER) && defined(_WIN32) # include # else @@ -20,30 +26,35 @@ check_for_x86_simd (int* f16c, int* avx, int* sse2) #ifdef __e2k__ # if defined(__SSE2__) *sse2 = 1; +# else + *sse2 = 0; # endif # if defined(__AVX__) *avx = 1; +# else + *avx = 0; # endif # if defined(__F16C__) *f16c = 1; +# else + *f16c = 0; # endif -#else -# if (defined(__x86_64__) || defined(_M_X64)) +#elif OPENEXR_ENABLE_X86_SIMD_CHECK -# if defined(__AVX__) && defined(__F16C__) + // shortcut if everything is turned on / compiled in +# if defined(__AVX__) && defined(__F16C__) *f16c = 1; *avx = 1; *sse2 = 1; -# else -# ifdef _WIN32 +# elif defined(_MSC_VER) && defined(_WIN32) int regs[4], osxsave; __cpuid (regs, 0); if (regs[0] >= 1) { __cpuidex (regs, 1, 0); } else regs[2] = 0; -# else +# else unsigned int regs[4], osxsave; __get_cpuid (0, ®s[0], ®s[1], ®s[2], ®s[3]); if (regs[0] >= 1) @@ -52,9 +63,16 @@ check_for_x86_simd (int* f16c, int* avx, int* sse2) } else regs[2] = 0; -# endif - /* AVX is indicated by bit 28, F16C by 29 of ECX (reg 2) */ +# endif + + /* + * linux cpuid.h for x86 has defines but not consistent cross platform + * + * see cpuid.h bit_AVX bit_F16C bit_SSE2 + */ + osxsave = (regs[2] & (1 << 27)) ? 1 : 0; + /* AVX is indicated by bit 28, F16C by 29 of ECX (reg 2) */ *avx = (regs[2] & (1 << 28)) ? 1 : 0; *f16c = (regs[2] & (1 << 29)) ? 1 : 0; /* sse2 is in EDX bit 26 */ @@ -68,41 +86,53 @@ check_for_x86_simd (int* f16c, int* avx, int* sse2) else { /* check extended control register */ -# if defined(OPENEXR_IMF_HAVE_GCC_INLINE_ASM_AVX) && \ - (defined(_M_X64) || defined(__x86_64__)) +# if defined(_M_X64) || defined(__x86_64__) +# if defined(_MSC_VER) + /* TODO: remove the following disablement once we can do inline msvc */ +# if defined(OPENEXR_IMF_HAVE_GCC_INLINE_ASM_AVX) + regs[0] = _xgetbv(0); +# else + regs[0] = 0; +# endif +# else __asm__ __volatile__ ("xgetbv" : /* Output */ "=a"(regs[0]), "=d"(regs[3]) : /* Input */ "c"(0) : /* Clobber */); +# endif /* eax bit 1 - SSE managed, bit 2 - AVX managed */ if ((regs[0] & 6) != 6) { *avx = 0; *f16c = 0; } -# else +# else *avx = 0; *f16c = 0; -# endif +# endif } -# endif -# else + +#else + // not on x86 *f16c = 0; *avx = 0; *sse2 = 0; -# endif #endif + } static inline int has_native_half (void) { -#if defined(__x86_64__) || defined(_M_X64) +#if OPENEXR_ENABLE_X86_SIMD_CHECK int sse2, avx, f16c; check_for_x86_simd (&f16c, &avx, &sse2); return avx && f16c; +#elif defined(__aarch64__) + return 1; #else - // TODO: add case for neon? return 0; #endif } + +#undef OPENEXR_ENABLE_X86_SIMD_CHECK