diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index faf0e218c..95a54d264 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -141,6 +141,24 @@ jobs: - name: Build run: msbuild Build\VS2022_CL_ARM\JoltPhysics.sln /property:Configuration=${{matrix.build_type}} + msvc_cl_arm_32_bit: + runs-on: windows-latest + name: Visual Studio CL ARM 32-bit + strategy: + fail-fast: false + matrix: + build_type: [Debug, Release] + + steps: + - name: Checkout Code + uses: actions/checkout@v3 + - name: Add msbuild to PATH + uses: microsoft/setup-msbuild@v1.1 + - name: Configure CMake + run: cmake -B ${{github.workspace}}/Build/VS2022_CL_ARM_32_BIT -G "Visual Studio 17 2022" -A ARM Build + - name: Build + run: msbuild Build\VS2022_CL_ARM_32_BIT\JoltPhysics.sln /property:Configuration=${{matrix.build_type}} + macos: runs-on: macos-latest name: macOS diff --git a/Build/CMakeLists.txt b/Build/CMakeLists.txt index 1fb9b28ed..27c4c6400 100644 --- a/Build/CMakeLists.txt +++ b/Build/CMakeLists.txt @@ -61,6 +61,11 @@ if (("${CMAKE_SYSTEM_NAME}" STREQUAL "Windows" OR "${CMAKE_SYSTEM_NAME}" STREQUA # Set compiler flag for disabling RTTI set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /GR-") + if ("${CMAKE_VS_PLATFORM_NAME}" STREQUAL "ARM") + # On ARM the exception handling flag is missing which causes warnings + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc") + endif() + # Set compiler flags for various configurations set(CMAKE_CXX_FLAGS_DEBUG "/GS /Od /Ob0 /RTC1") set(CMAKE_CXX_FLAGS_RELEASE "/GS- /Gy /O2 /Oi /Ot") @@ -69,8 +74,8 @@ if (("${CMAKE_SYSTEM_NAME}" STREQUAL "Windows" OR "${CMAKE_SYSTEM_NAME}" STREQUA set(CMAKE_CXX_FLAGS_RELEASEUBSAN "-fsanitize=undefined,implicit-conversion,float-divide-by-zero,local-bounds -fno-sanitize-recover=all") set(CMAKE_CXX_FLAGS_RELEASECOVERAGE "-fprofile-instr-generate -fcoverage-mapping") - if (NOT ("${CMAKE_VS_PLATFORM_NAME}" STREQUAL "ARM64")) - # On ARM64, whole program optimization triggers an internal compiler error during code gen, so we don't turn it on + if (NOT ("${CMAKE_VS_PLATFORM_NAME}" STREQUAL "ARM64") AND NOT ("${CMAKE_VS_PLATFORM_NAME}" STREQUAL "ARM")) + # On ARM, whole program optimization triggers an internal compiler error during code gen, so we don't turn it on set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /GL") set(CMAKE_CXX_FLAGS_DISTRIBUTION "${CMAKE_CXX_FLAGS_DISTRIBUTION} /GL") endif() @@ -276,7 +281,7 @@ if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore") endif() endif() -if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Windows") +if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Windows" AND NOT ("${CMAKE_VS_PLATFORM_NAME}" STREQUAL "ARM")) # ARM 32-bit is missing dinput8.lib # Windows only targets if (TARGET_SAMPLES OR TARGET_VIEWER) include(${PHYSICS_REPO_ROOT}/TestFramework/TestFramework.cmake) diff --git a/Build/cmake_vs2022_cl_arm_32bit.bat b/Build/cmake_vs2022_cl_arm_32bit.bat new file mode 100644 index 000000000..751802b97 --- /dev/null +++ b/Build/cmake_vs2022_cl_arm_32bit.bat @@ -0,0 +1,3 @@ +@echo off +cmake -S . -B VS2022_CL_ARM_32BIT -G "Visual Studio 17 2022" -A ARM %* +echo Open VS2022_CL_ARM_32BIT\JoltPhysics.sln to build the project. \ No newline at end of file diff --git a/Jolt/Core/Core.h b/Jolt/Core/Core.h index 3bb2371ba..677bdb690 100644 --- a/Jolt/Core/Core.h +++ b/Jolt/Core/Core.h @@ -56,6 +56,7 @@ #define JPH_CPU_ADDRESS_BITS 32 #endif #define JPH_USE_SSE + #define JPH_VECTOR_ALIGNMENT 16 // Detect enabled instruction sets #if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && !defined(JPH_USE_AVX512) @@ -95,15 +96,22 @@ #error Undefined compiler #endif #endif -#elif defined(__aarch64__) || defined(_M_ARM64) - // ARM64 CPU architecture - #define JPH_CPU_ARM64 - #define JPH_USE_NEON - #define JPH_CPU_ADDRESS_BITS 64 +#elif defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM) + // ARM CPU architecture + #define JPH_CPU_ARM + #if defined(__aarch64__) || defined(_M_ARM64) + #define JPH_CPU_ADDRESS_BITS 64 + #define JPH_USE_NEON + #define JPH_VECTOR_ALIGNMENT 16 + #else + #define JPH_CPU_ADDRESS_BITS 32 + #define JPH_VECTOR_ALIGNMENT 8 // 32-bit ARM does not support aligning on the stack on 16 byte boundaries + #endif #elif defined(JPH_PLATFORM_WASM) // WebAssembly CPU architecture #define JPH_CPU_WASM #define JPH_CPU_ADDRESS_BITS 32 + #define JPH_VECTOR_ALIGNMENT 16 #define JPH_DISABLE_CUSTOM_ALLOCATOR #else #error Unsupported CPU architecture @@ -197,7 +205,7 @@ #elif defined(JPH_PLATFORM_LINUX) || defined(JPH_PLATFORM_ANDROID) || defined(JPH_PLATFORM_MACOS) || defined(JPH_PLATFORM_IOS) #if defined(JPH_CPU_X86) #define JPH_BREAKPOINT __asm volatile ("int $0x3") - #elif defined(JPH_CPU_ARM64) + #elif defined(JPH_CPU_ARM) #define JPH_BREAKPOINT __builtin_trap() #endif #elif defined(JPH_PLATFORM_WASM) diff --git a/Jolt/Core/FPControlWord.h b/Jolt/Core/FPControlWord.h index 33a9a949a..5f2eb270a 100644 --- a/Jolt/Core/FPControlWord.h +++ b/Jolt/Core/FPControlWord.h @@ -30,7 +30,7 @@ class FPControlWord : public NonCopyable uint mPrevState; }; -#elif defined(JPH_USE_NEON) && defined(JPH_COMPILER_MSVC) +#elif defined(JPH_CPU_ARM) && defined(JPH_COMPILER_MSVC) /// Helper class that needs to be put on the stack to update the state of the floating point control word. /// This state is kept per thread. @@ -59,7 +59,7 @@ class FPControlWord : public NonCopyable unsigned int mPrevState; }; -#elif defined(JPH_USE_NEON) +#elif defined(JPH_CPU_ARM) /// Helper class that needs to be put on the stack to update the state of the floating point control word. /// This state is kept per thread. diff --git a/Jolt/Core/FPException.h b/Jolt/Core/FPException.h index 32aba1520..d9b3622fe 100644 --- a/Jolt/Core/FPException.h +++ b/Jolt/Core/FPException.h @@ -20,7 +20,7 @@ class FPExceptionDisableInvalid : public FPControlWord<_MM_MASK_INVALID, _MM_MAS /// Disable division by zero floating point exceptions class FPExceptionDisableDivByZero : public FPControlWord<_MM_MASK_DIV_ZERO, _MM_MASK_DIV_ZERO> { }; -#elif defined(JPH_USE_NEON) && defined(JPH_COMPILER_MSVC) +#elif defined(JPH_CPU_ARM) && defined(JPH_COMPILER_MSVC) /// Enable floating point divide by zero exception and exceptions on invalid numbers class FPExceptionsEnable : public FPControlWord<0, _EM_INVALID | _EM_ZERODIVIDE> { }; @@ -31,7 +31,7 @@ class FPExceptionDisableInvalid : public FPControlWord<_EM_INVALID, _EM_INVALID> /// Disable division by zero floating point exceptions class FPExceptionDisableDivByZero : public FPControlWord<_EM_ZERODIVIDE, _EM_ZERODIVIDE> { }; -#elif defined(JPH_USE_NEON) +#elif defined(JPH_CPU_ARM) /// Invalid operation exception bit static constexpr uint64 FP_IOE = 1 << 8; diff --git a/Jolt/Core/FPFlushDenormals.h b/Jolt/Core/FPFlushDenormals.h index fab695289..73c6254f1 100644 --- a/Jolt/Core/FPFlushDenormals.h +++ b/Jolt/Core/FPFlushDenormals.h @@ -13,11 +13,11 @@ JPH_NAMESPACE_BEGIN /// This can make floating point operations much faster when working with very small numbers class FPFlushDenormals : public FPControlWord<_MM_FLUSH_ZERO_ON, _MM_FLUSH_ZERO_MASK> { }; -#elif defined(JPH_USE_NEON) && defined(JPH_COMPILER_MSVC) +#elif defined(JPH_CPU_ARM) && defined(JPH_COMPILER_MSVC) class FPFlushDenormals : public FPControlWord<_DN_FLUSH, _MCW_DN> { }; -#elif defined(JPH_USE_NEON) +#elif defined(JPH_CPU_ARM) /// Flush denormals to zero bit static constexpr uint64 FP_FZ = 1 << 24; diff --git a/Jolt/Core/TickCounter.cpp b/Jolt/Core/TickCounter.cpp index 22f91b689..99963b01d 100644 --- a/Jolt/Core/TickCounter.cpp +++ b/Jolt/Core/TickCounter.cpp @@ -24,7 +24,7 @@ JPH_NAMESPACE_BEGIN -#if defined(JPH_PLATFORM_WINDOWS_UWP) || (defined(JPH_PLATFORM_WINDOWS) && defined(JPH_CPU_ARM64)) +#if defined(JPH_PLATFORM_WINDOWS_UWP) || (defined(JPH_PLATFORM_WINDOWS) && defined(JPH_CPU_ARM)) uint64 GetProcessorTickCount() { @@ -33,10 +33,10 @@ uint64 GetProcessorTickCount() return uint64(count.QuadPart); } -#endif // JPH_PLATFORM_WINDOWS_UWP || (JPH_PLATFORM_WINDOWS && JPH_CPU_ARM64) +#endif // JPH_PLATFORM_WINDOWS_UWP || (JPH_PLATFORM_WINDOWS && JPH_CPU_ARM) static const uint64 sProcessorTicksPerSecond = []() { -#if defined(JPH_PLATFORM_WINDOWS_UWP) || (defined(JPH_PLATFORM_WINDOWS) && defined(JPH_CPU_ARM64)) +#if defined(JPH_PLATFORM_WINDOWS_UWP) || (defined(JPH_PLATFORM_WINDOWS) && defined(JPH_CPU_ARM)) LARGE_INTEGER frequency { }; QueryPerformanceFrequency(&frequency); return uint64(frequency.QuadPart); @@ -71,7 +71,7 @@ static const uint64 sProcessorTicksPerSecond = []() { #if defined(JPH_CPU_X86) const char *cpu_str = "cpu MHz"; - #elif defined(JPH_CPU_ARM64) + #elif defined(JPH_CPU_ARM) const char *cpu_str = "BogoMIPS"; #else #error Unsupported CPU architecture diff --git a/Jolt/Core/TickCounter.h b/Jolt/Core/TickCounter.h index 15e2bdfa8..ba46e8e08 100644 --- a/Jolt/Core/TickCounter.h +++ b/Jolt/Core/TickCounter.h @@ -12,7 +12,7 @@ JPH_NAMESPACE_BEGIN -#if defined(JPH_PLATFORM_WINDOWS_UWP) || (defined(JPH_PLATFORM_WINDOWS) && defined(JPH_CPU_ARM64)) +#if defined(JPH_PLATFORM_WINDOWS_UWP) || (defined(JPH_PLATFORM_WINDOWS) && defined(JPH_CPU_ARM)) /// Functionality to get the processors cycle counter uint64 GetProcessorTickCount(); // Not inline to avoid having to include Windows.h @@ -26,7 +26,7 @@ JPH_INLINE uint64 GetProcessorTickCount() return JPH_PLATFORM_BLUE_GET_TICKS(); #elif defined(JPH_CPU_X86) return __rdtsc(); -#elif defined(JPH_CPU_ARM64) +#elif defined(JPH_CPU_ARM) uint64 val; asm volatile("mrs %0, cntvct_el0" : "=r" (val)); return val; @@ -37,7 +37,7 @@ JPH_INLINE uint64 GetProcessorTickCount() #endif } -#endif // JPH_PLATFORM_WINDOWS_UWP || (JPH_PLATFORM_WINDOWS && JPH_CPU_ARM64) +#endif // JPH_PLATFORM_WINDOWS_UWP || (JPH_PLATFORM_WINDOWS && JPH_CPU_ARM) /// Get the amount of ticks per second, note that this number will never be fully accurate as the amound of ticks per second may vary with CPU load, so this number is only to be used to give an indication of time for profiling purposes uint64 GetProcessorTicksPerSecond(); diff --git a/Jolt/Math/Mat44.h b/Jolt/Math/Mat44.h index 972864401..ec830d299 100644 --- a/Jolt/Math/Mat44.h +++ b/Jolt/Math/Mat44.h @@ -8,7 +8,7 @@ JPH_NAMESPACE_BEGIN /// Holds a 4x4 matrix of floats, but supports also operations on the 3x3 upper left part of the matrix. -class [[nodiscard]] alignas(16) Mat44 +class [[nodiscard]] alignas(JPH_VECTOR_ALIGNMENT) Mat44 { public: JPH_OVERRIDE_NEW_DELETE diff --git a/Jolt/Math/Math.h b/Jolt/Math/Math.h index 7edf704cd..91c6fab6e 100644 --- a/Jolt/Math/Math.h +++ b/Jolt/Math/Math.h @@ -107,7 +107,7 @@ inline uint CountTrailingZeros(uint32 inValue) return 32; return __builtin_ctz(inValue); #endif -#elif defined(JPH_CPU_ARM64) +#elif defined(JPH_CPU_ARM) #if defined(JPH_COMPILER_MSVC) if (inValue == 0) return 32; @@ -139,7 +139,7 @@ inline uint CountLeadingZeros(uint32 inValue) return 32; return __builtin_clz(inValue); #endif -#elif defined(JPH_CPU_ARM64) +#elif defined(JPH_CPU_ARM) #if defined(JPH_COMPILER_MSVC) return _CountLeadingZeros(inValue); #else diff --git a/Jolt/Math/Quat.h b/Jolt/Math/Quat.h index c53fa25c3..d351bf8ce 100644 --- a/Jolt/Math/Quat.h +++ b/Jolt/Math/Quat.h @@ -28,7 +28,7 @@ JPH_NAMESPACE_BEGIN /// it easy to extract the rotation axis of the quaternion: /// /// q = [cos(angle / 2), sin(angle / 2) * rotation_axis] -class [[nodiscard]] alignas(16) Quat +class [[nodiscard]] alignas(JPH_VECTOR_ALIGNMENT) Quat { public: JPH_OVERRIDE_NEW_DELETE diff --git a/Jolt/Math/UVec4.h b/Jolt/Math/UVec4.h index 15b097279..a392d15db 100644 --- a/Jolt/Math/UVec4.h +++ b/Jolt/Math/UVec4.h @@ -7,7 +7,7 @@ JPH_NAMESPACE_BEGIN -class [[nodiscard]] alignas(16) UVec4 +class [[nodiscard]] alignas(JPH_VECTOR_ALIGNMENT) UVec4 { public: JPH_OVERRIDE_NEW_DELETE diff --git a/Jolt/Math/Vec3.h b/Jolt/Math/Vec3.h index 38c1afcd7..8132a07eb 100644 --- a/Jolt/Math/Vec3.h +++ b/Jolt/Math/Vec3.h @@ -11,7 +11,7 @@ JPH_NAMESPACE_BEGIN /// 3 component vector (stored as 4 vectors). /// Note that we keep the 4th component the same as the 3rd component to avoid divisions by zero when JPH_FLOATING_POINT_EXCEPTIONS_ENABLED defined -class [[nodiscard]] alignas(16) Vec3 +class [[nodiscard]] alignas(JPH_VECTOR_ALIGNMENT) Vec3 { public: JPH_OVERRIDE_NEW_DELETE diff --git a/Jolt/Math/Vec4.h b/Jolt/Math/Vec4.h index 820445e88..9f8221917 100644 --- a/Jolt/Math/Vec4.h +++ b/Jolt/Math/Vec4.h @@ -9,7 +9,7 @@ JPH_NAMESPACE_BEGIN -class [[nodiscard]] alignas(16) Vec4 +class [[nodiscard]] alignas(JPH_VECTOR_ALIGNMENT) Vec4 { public: JPH_OVERRIDE_NEW_DELETE diff --git a/Jolt/Physics/Body/Body.h b/Jolt/Physics/Body/Body.h index 0f4b6436f..69f1655f4 100644 --- a/Jolt/Physics/Body/Body.h +++ b/Jolt/Physics/Body/Body.h @@ -317,7 +317,7 @@ class Body : public NonCopyable }; static_assert(sizeof(Body) == 128, "Body should be 128 bytes"); -static_assert(alignof(Body) == 16, "Body should align to 16 bytes"); +static_assert(alignof(Body) == JPH_VECTOR_ALIGNMENT, "Body should properly align"); JPH_NAMESPACE_END diff --git a/Jolt/Physics/Collision/BroadPhase/QuadTree.cpp b/Jolt/Physics/Collision/BroadPhase/QuadTree.cpp index 234bdf996..9d3284597 100644 --- a/Jolt/Physics/Collision/BroadPhase/QuadTree.cpp +++ b/Jolt/Physics/Collision/BroadPhase/QuadTree.cpp @@ -503,7 +503,7 @@ QuadTree::NodeID QuadTree::BuildTree(const BodyVector &inBodies, TrackingVector // Calculate centers of all bodies that are to be inserted Vec3 *centers = new Vec3 [inNumber]; - JPH_ASSERT(IsAligned(centers, 16)); + JPH_ASSERT(IsAligned(centers, JPH_VECTOR_ALIGNMENT)); Vec3 *c = centers; for (const NodeID *n = ioNodeIDs, *n_end = ioNodeIDs + inNumber; n < n_end; ++n, ++c) *c = GetNodeOrBodyBounds(inBodies, *n).GetCenter(); diff --git a/Jolt/Physics/Collision/Shape/ConvexHullShape.h b/Jolt/Physics/Collision/Shape/ConvexHullShape.h index 7865c60d7..d21a4d1a7 100644 --- a/Jolt/Physics/Collision/Shape/ConvexHullShape.h +++ b/Jolt/Physics/Collision/Shape/ConvexHullShape.h @@ -156,7 +156,7 @@ class ConvexHullShape final : public ConvexShape }; static_assert(sizeof(Point) == 32, "Unexpected size"); - static_assert(alignof(Point) == 16, "Unexpected alignment"); + static_assert(alignof(Point) == JPH_VECTOR_ALIGNMENT, "Unexpected alignment"); Vec3 mCenterOfMass; ///< Center of mass of this convex hull Mat44 mInertia; ///< Inertia matrix assuming density is 1 (needs to be multiplied by density) diff --git a/Jolt/Physics/Collision/TransformedShape.h b/Jolt/Physics/Collision/TransformedShape.h index 1d8ff99cc..88c1bab22 100644 --- a/Jolt/Physics/Collision/TransformedShape.h +++ b/Jolt/Physics/Collision/TransformedShape.h @@ -171,6 +171,6 @@ class TransformedShape }; static_assert(sizeof(TransformedShape) == 64, "Not properly packed"); -static_assert(alignof(TransformedShape) == 16, "Not properly aligned"); +static_assert(alignof(TransformedShape) == JPH_VECTOR_ALIGNMENT, "Not properly aligned"); JPH_NAMESPACE_END diff --git a/README.md b/README.md index a3c657f2c..0f157924b 100644 --- a/README.md +++ b/README.md @@ -81,8 +81,8 @@ For more information see the [Architecture and API documentation](https://jrouwe ## Supported Platforms -* Windows (VS2019, VS2022) x64/x86/ARM64 (Desktop/UWP) -* Linux (tested on Ubuntu 20.04) x64/ARM64 +* Windows (VS2019, VS2022) x86/x64/ARM/ARM64 (Desktop/UWP) +* Linux (tested on Ubuntu 22.04) x64/ARM64 * Android (tested on Android 10) x64/ARM64 * Platform Blue (a popular game console) x64 * macOS (tested on Monterey) x64/ARM64