-
Notifications
You must be signed in to change notification settings - Fork 750
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
OpenEXR 3.1.6 introduced several NEON-based optimizations that implied Aarch64. Add patched, either picked from upstream, or written by mandree@, to enable those NEON features that also require Aarch64 only there. PR-1366 is cherry-picked from upstream, and patch-lib/patch-test files are my work but build upon said PR. Also cherry-pick PR1354 that adds a missing check for AVX, which is why I am bumping PORTREVISION because it might change code (I have not checked). AcademySoftwareFoundation/openexr#1365 PR: 270348 Reported by: fuz@ (Robert Clausecker) (cherry picked from commit 54d6860)
- Loading branch information
Showing
6 changed files
with
203 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
25 changes: 25 additions & 0 deletions
25
graphics/openexr/files/patch-PR1354-3f97750d1ec203e7d7eb8d5f30f3d5e7e68ad720
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
From 3f97750d1ec203e7d7eb8d5f30f3d5e7e68ad720 Mon Sep 17 00:00:00 2001 | ||
From: Cary Phillips <cary@ilm.com> | ||
Date: Thu, 16 Mar 2023 07:23:18 -0700 | ||
Subject: [PATCH] Test for AVC in unpack.c (#1354) | ||
|
||
Addresses #1353 | ||
|
||
Signed-off-by: Cary Phillips <cary@ilm.com> | ||
--- | ||
src/lib/OpenEXRCore/unpack.c | 2 +- | ||
1 file changed, 1 insertion(+), 1 deletion(-) | ||
|
||
diff --git a/src/lib/OpenEXRCore/unpack.c b/src/lib/OpenEXRCore/unpack.c | ||
index 606d7c3eb..ac4e259f3 100644 | ||
--- a/src/lib/OpenEXRCore/unpack.c | ||
+++ b/src/lib/OpenEXRCore/unpack.c | ||
@@ -37,7 +37,7 @@ half_to_float8 (float* out, const uint16_t* src) | ||
} | ||
#endif | ||
|
||
-#if (defined(__x86_64__) || defined(_M_X64)) && \ | ||
+#if (defined(__x86_64__) || defined(_M_X64)) && defined(__AVX__) && \ | ||
(defined(__F16C__) || defined(__GNUC__) || defined(__clang__)) | ||
|
||
# if defined(__F16C__) |
98 changes: 98 additions & 0 deletions
98
graphics/openexr/files/patch-PR1366-a41a736d64e3d93baffef1042d4a3d1aaf74f1c9
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
From a41a736d64e3d93baffef1042d4a3d1aaf74f1c9 Mon Sep 17 00:00:00 2001 | ||
From: Aras Pranckevicius <aras@nesnausk.org> | ||
Date: Mon, 20 Mar 2023 19:31:42 +0200 | ||
Subject: [PATCH] Fix ARMv7 build by making recent ZIP NEON optimizations be | ||
ARMv8 (aarch64) only | ||
|
||
Should fix #1365. Recent PR (#1348) added NEON accelerated code paths | ||
for ZIP filtering. But that code uses several instructions that are | ||
ARMv8 (aarch64) only, and thus fail building on 32-bit ARM (armv7) | ||
platforms. Make these optimizations only kick in when building | ||
for 64-bit ARM platforms. | ||
|
||
Signed-off-by: Aras Pranckevicius <aras@nesnausk.org> | ||
--- | ||
src/lib/OpenEXR/ImfSimd.h | 4 ++++ | ||
src/lib/OpenEXR/ImfZip.cpp | 6 +++--- | ||
src/lib/OpenEXRCore/internal_zip.c | 8 ++++---- | ||
3 files changed, 11 insertions(+), 7 deletions(-) | ||
|
||
diff --git a/src/lib/OpenEXR/ImfSimd.h b/src/lib/OpenEXR/ImfSimd.h | ||
index 3053a5d4e..d1f064525 100644 | ||
--- a/src/lib/OpenEXR/ImfSimd.h | ||
+++ b/src/lib/OpenEXR/ImfSimd.h | ||
@@ -46,6 +46,10 @@ | ||
# define IMF_HAVE_NEON | ||
#endif | ||
|
||
+#if defined(__aarch64__) | ||
+# define IMF_HAVE_NEON_AARCH64 1 | ||
+#endif | ||
+ | ||
extern "C" { | ||
#ifdef IMF_HAVE_SSE2 | ||
# include <emmintrin.h> | ||
diff --git a/src/lib/OpenEXR/ImfZip.cpp b/src/lib/OpenEXR/ImfZip.cpp | ||
index 0e2b031d8..8dd53bea9 100644 | ||
--- a/src/lib/OpenEXR/ImfZip.cpp | ||
+++ b/src/lib/OpenEXR/ImfZip.cpp | ||
@@ -160,7 +160,7 @@ reconstruct_sse41 (char* buf, size_t outSize) | ||
|
||
#endif | ||
|
||
-#ifdef IMF_HAVE_NEON | ||
+#ifdef IMF_HAVE_NEON_AARCH64 | ||
|
||
void | ||
reconstruct_neon (char* buf, size_t outSize) | ||
@@ -262,7 +262,7 @@ interleave_sse2 (const char* source, size_t outSize, char* out) | ||
|
||
#endif | ||
|
||
-#ifdef IMF_HAVE_NEON | ||
+#ifdef IMF_HAVE_NEON_AARCH64 | ||
|
||
void | ||
interleave_neon (const char* source, size_t outSize, char* out) | ||
@@ -380,7 +380,7 @@ Zip::initializeFuncs () | ||
} | ||
#endif | ||
|
||
-#ifdef IMF_HAVE_NEON | ||
+#ifdef IMF_HAVE_NEON_AARCH64 | ||
reconstruct = reconstruct_neon; | ||
interleave = interleave_neon; | ||
#endif | ||
diff --git a/src/lib/OpenEXRCore/internal_zip.c b/src/lib/OpenEXRCore/internal_zip.c | ||
index e829e6d7c..1374cab0d 100644 | ||
--- a/src/lib/OpenEXRCore/internal_zip.c | ||
+++ b/src/lib/OpenEXRCore/internal_zip.c | ||
@@ -24,8 +24,8 @@ | ||
# define IMF_HAVE_SSE4_1 1 | ||
# include <smmintrin.h> | ||
#endif | ||
-#if defined(__ARM_NEON) | ||
-# define IMF_HAVE_NEON 1 | ||
+#if defined(__aarch64__) | ||
+# define IMF_HAVE_NEON_AARCH64 1 | ||
# include <arm_neon.h> | ||
#endif | ||
|
||
@@ -78,7 +78,7 @@ reconstruct (uint8_t* buf, uint64_t outSize) | ||
prev = d; | ||
} | ||
} | ||
-#elif defined(IMF_HAVE_NEON) | ||
+#elif defined(IMF_HAVE_NEON_AARCH64) | ||
static void | ||
reconstruct (uint8_t* buf, uint64_t outSize) | ||
{ | ||
@@ -174,7 +174,7 @@ interleave (uint8_t* out, const uint8_t* source, uint64_t outSize) | ||
*(sOut++) = (i % 2 == 0) ? *(t1++) : *(t2++); | ||
} | ||
|
||
-#elif defined(IMF_HAVE_NEON) | ||
+#elif defined(IMF_HAVE_NEON_AARCH64) | ||
static void | ||
interleave (uint8_t* out, const uint8_t* source, uint64_t outSize) | ||
{ |
11 changes: 11 additions & 0 deletions
11
graphics/openexr/files/patch-lib_OpenEXR_ImfDwaCompressor.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
--- ./src/lib/OpenEXR/ImfDwaCompressor.cpp.orig 2023-03-05 20:23:47 UTC | ||
+++ ./src/lib/OpenEXR/ImfDwaCompressor.cpp | ||
@@ -2888,7 +2888,7 @@ DwaCompressor::initializeFuncs() | ||
fromHalfZigZag = fromHalfZigZag_f16c; | ||
} | ||
|
||
-#ifdef IMF_HAVE_NEON | ||
+#ifdef IMF_HAVE_NEON_AARCH64 | ||
{ | ||
convertFloatToHalf64 = convertFloatToHalf64_neon; | ||
fromHalfZigZag = fromHalfZigZag_neon; |
29 changes: 29 additions & 0 deletions
29
graphics/openexr/files/patch-lib_OpenEXR_ImfDwaCompressorSimd.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
--- ./src/lib/OpenEXR/ImfDwaCompressorSimd.h.orig 2023-03-05 20:23:47 UTC | ||
+++ ./src/lib/OpenEXR/ImfDwaCompressorSimd.h | ||
@@ -395,7 +395,7 @@ convertFloatToHalf64_scalar (unsigned short* dst, floa | ||
dst[i] = ((half) src[i]).bits (); | ||
} | ||
|
||
-#ifdef IMF_HAVE_NEON | ||
+#ifdef IMF_HAVE_NEON_AARCH64 | ||
|
||
void | ||
convertFloatToHalf64_neon (unsigned short* dst, float* src) | ||
@@ -821,7 +821,7 @@ fromHalfZigZag_f16c (unsigned short* src, float* dst) | ||
#endif /* defined IMF_HAVE_GCC_INLINEASM_X86_64 */ | ||
} | ||
|
||
-#ifdef IMF_HAVE_NEON | ||
+#ifdef IMF_HAVE_NEON_AARCH64 | ||
|
||
|
||
void | ||
@@ -856,7 +856,7 @@ fromHalfZigZag_neon(unsigned short* __restrict__ src, | ||
} | ||
} | ||
|
||
-#endif // IMF_HAVE_NEON | ||
+#endif // IMF_HAVE_NEON_AARCH64 | ||
// | ||
// Inverse 8x8 DCT, only inverting the DC. This assumes that |
38 changes: 38 additions & 0 deletions
38
graphics/openexr/files/patch-test_OpenEXRTest_testDwaCompressorSimd.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
--- ./src/test/OpenEXRTest/testDwaCompressorSimd.cpp.orig 2023-03-05 20:23:47 UTC | ||
+++ ./src/test/OpenEXRTest/testDwaCompressorSimd.cpp | ||
@@ -401,7 +401,7 @@ testFloatToHalf() | ||
} | ||
} | ||
|
||
-#ifdef IMF_HAVE_NEON | ||
+#ifdef IMF_HAVE_NEON_AARCH64 | ||
{ | ||
cout << " convertFloatToHalf64_neon()" << endl; | ||
for (int iter = 0; iter < numIter; ++iter) | ||
@@ -430,7 +430,7 @@ testFloatToHalf() | ||
} | ||
} | ||
} | ||
- #endif // IMF_HAVE_NEON | ||
+ #endif // IMF_HAVE_NEON_AARCH64 | ||
} | ||
|
||
// | ||
@@ -516,7 +516,7 @@ testFromHalfZigZag() | ||
} // iter | ||
} // f16c | ||
|
||
-#ifdef IMF_HAVE_NEON | ||
+#ifdef IMF_HAVE_NEON_AARCH64 | ||
{ | ||
const int numIter = 1000000; | ||
Rand48 rand48 (0); | ||
@@ -553,7 +553,7 @@ testFromHalfZigZag() | ||
} // iter | ||
} // neon | ||
|
||
-#endif // IMF_HAVE_NEON | ||
+#endif // IMF_HAVE_NEON_AARCH64 | ||
} | ||
|
||
|