Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix cpu detection of sse2 on non-64 x86 #1467

Merged
merged 3 commits into from
Jun 26, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 48 additions & 18 deletions src/lib/OpenEXRCore/internal_cpuid.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,13 @@

#include "OpenEXRConfigInternal.h"

#if defined(__x86_64__) || defined(_M_X64)
#if defined(i386) || defined(__i386__) || defined(__i386) || defined(_M_X86) || defined(__x86_64__) || defined(_M_X64)
# define OPENEXR_ENABLE_X86_SIMD_CHECK 1
#else
# define OPENEXR_ENABLE_X86_SIMD_CHECK 0
#endif

#if OPENEXR_ENABLE_X86_SIMD_CHECK
# if defined(_MSC_VER) && defined(_WIN32)
# include <intrin.h>
# else
Expand All @@ -20,30 +26,35 @@ check_for_x86_simd (int* f16c, int* avx, int* sse2)
#ifdef __e2k__
# if defined(__SSE2__)
*sse2 = 1;
# else
*sse2 = 0;
# endif
# if defined(__AVX__)
*avx = 1;
# else
*avx = 0;
# endif
# if defined(__F16C__)
*f16c = 1;
# else
*f16c = 0;
# endif

#else
# if (defined(__x86_64__) || defined(_M_X64))
#elif OPENEXR_ENABLE_X86_SIMD_CHECK

# if defined(__AVX__) && defined(__F16C__)
// shortcut if everything is turned on / compiled in
# if defined(__AVX__) && defined(__F16C__)
*f16c = 1;
*avx = 1;
*sse2 = 1;
# else
# ifdef _WIN32
# elif defined(_MSC_VER) && defined(_WIN32)
int regs[4], osxsave;

__cpuid (regs, 0);
if (regs[0] >= 1) { __cpuidex (regs, 1, 0); }
else
regs[2] = 0;
# else
# else
unsigned int regs[4], osxsave;
__get_cpuid (0, &regs[0], &regs[1], &regs[2], &regs[3]);
if (regs[0] >= 1)
Expand All @@ -52,9 +63,16 @@ check_for_x86_simd (int* f16c, int* avx, int* sse2)
}
else
regs[2] = 0;
# endif
/* AVX is indicated by bit 28, F16C by 29 of ECX (reg 2) */
# endif

/*
* linux cpuid.h for x86 has defines but not consistent cross platform
*
* see cpuid.h bit_AVX bit_F16C bit_SSE2
*/

osxsave = (regs[2] & (1 << 27)) ? 1 : 0;
/* AVX is indicated by bit 28, F16C by 29 of ECX (reg 2) */
*avx = (regs[2] & (1 << 28)) ? 1 : 0;
*f16c = (regs[2] & (1 << 29)) ? 1 : 0;
/* sse2 is in EDX bit 26 */
Expand All @@ -68,41 +86,53 @@ check_for_x86_simd (int* f16c, int* avx, int* sse2)
else
{
/* check extended control register */
# if defined(OPENEXR_IMF_HAVE_GCC_INLINE_ASM_AVX) && \
(defined(_M_X64) || defined(__x86_64__))
# if defined(_M_X64) || defined(__x86_64__)
# if defined(_MSC_VER)
/* TODO: remove the following disablement once we can do inline msvc */
# if defined(OPENEXR_IMF_HAVE_GCC_INLINE_ASM_AVX)
regs[0] = _xgetbv(0);
# else
regs[0] = 0;
# endif
# else
__asm__ __volatile__ ("xgetbv"
: /* Output */ "=a"(regs[0]), "=d"(regs[3])
: /* Input */ "c"(0)
: /* Clobber */);
# endif
/* eax bit 1 - SSE managed, bit 2 - AVX managed */
if ((regs[0] & 6) != 6)
{
*avx = 0;
*f16c = 0;
}
# else
# else
*avx = 0;
*f16c = 0;
# endif
# endif
}
# endif
# else

#else
// not on x86
*f16c = 0;
*avx = 0;
*sse2 = 0;
# endif
#endif

}

static inline int
has_native_half (void)
{
#if defined(__x86_64__) || defined(_M_X64)
#if OPENEXR_ENABLE_X86_SIMD_CHECK
int sse2, avx, f16c;
check_for_x86_simd (&f16c, &avx, &sse2);
return avx && f16c;
#elif defined(__aarch64__)
return 1;
#else
// TODO: add case for neon?
return 0;
#endif
}

#undef OPENEXR_ENABLE_X86_SIMD_CHECK