diff --git a/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/.gitrepo b/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/.gitrepo new file mode 100644 index 0000000..7966bf8 --- /dev/null +++ b/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/.gitrepo @@ -0,0 +1,12 @@ +; DO NOT EDIT (unless you know what you are doing) +; +; This subdirectory is a git "subrepo", and this file is maintained by the +; git-subrepo command. See https://github.com/ingydotnet/git-subrepo#readme +; +[subrepo] + remote = https://github.com/V-Sekai/preconditioner-for-cloth-and-deformable-body-simulation.git + branch = master + commit = bf3d095a8636fa541990e38c1fce796515b7b119 + parent = c83e46325ad184347f259ac9ce3c43388d32f6e9 + method = merge + cmdver = 0.4.6 diff --git a/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/LICENSE b/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/LICENSE new file mode 100644 index 0000000..4d15781 --- /dev/null +++ b/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/LICENSE @@ -0,0 +1,24 @@ +Copyright (C) 2002 - 2022, +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The names of its contributors may not be used to endorse or promote + products derived from this software without specific prior written + permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/README.md b/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/README.md new file mode 100644 index 0000000..572300b --- /dev/null +++ b/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/README.md @@ -0,0 +1,7 @@ +# preconditioner-for-cloth-and-deformable-body-simulation + +This is a mirror of https://wanghmin.github.io/publications.html + +## Entry point + +See `SeSchwarzPreconditioner.h` for entry points. diff --git a/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SeAabb.h b/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SeAabb.h new file mode 100644 index 0000000..ba41f4f --- /dev/null +++ b/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SeAabb.h @@ -0,0 +1,185 @@ +///////////////////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2002 - 2022, +// All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The names of its contributors may not be used to endorse or promote +// products derived from this software without specific prior written +// permission. +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#pragma once + +#include "SeMath.h" +#include + +SE_NAMESPACE_BEGIN + +/************************************************************************* +****************************** SeAabb ****************************** +*************************************************************************/ + +/** + * @brief Axis-align Bounding Box. + */ +template struct SeAabb +{ + static_assert(std::is_floating_point::value, "Only support floating type currently."); + +public: + + //! @brief Default constructor. + SeAabb() : Lower(FLT_MAX), Upper(-FLT_MAX) {} + + //! @brief Constructed by a given point. + SeAabb(const VecType & Point) : Lower(Point), Upper(Point) {} + + //! @brief Constructed by a paired points explicitly. + explicit SeAabb(const VecType & P1, const VecType & P2) : Lower(Math::Min(P1, P2)), Upper(Math::Max(P1, P2)) {} + + //! @brief Constructed by a paired points explicitly. + explicit SeAabb(const VecType & P1, const VecType & P2, const VecType & P3) : Lower(Math::Min(P1, P2, P3)), Upper(Math::Max(P1, P2, P3)) {} + + + + +public: + + SeAabb operator+(const SeAabb & Other) const + { + return SeAabb(Math::Min(Lower, Other.Lower), Math::Max(Upper, Other.Upper)); + } + + SeAabb operator+(const VecType & Point) const + { + return SeAabb(Math::Min(Lower, Point), Math::Max(Upper, Point)); + } + + void operator+=(const SeAabb & Other) + { + Lower = Math::Min(Lower, Other.Lower); Upper = Math::Max(Upper, Other.Upper); + } + + void operator+=(const VecType & Point) + { + Lower = Math::Min(Lower, Point); Upper = Math::Max(Upper, Point); + } + + void Enlarge(float Amount) + { + Lower -= Amount; Upper += Amount; + } + + VecType Center() const + { + return Math::Average(Lower, Upper); + } + + VecType Extent() const + { + return Upper - Lower; + } + +public: + + VecType Lower, Upper; +}; + + +/************************************************************************* +*********************** IsInside ************************ +*************************************************************************/ + +template +static SE_INLINE bool IsContain(const SeAabb& aabb, const VecT& P) +{ + const int componentCount = VecT::Elements; + for (int i = 0; i < componentCount; i++) + { + if (P[i]aabb.Upper[i]) + { + return false; + } + } + return true; + +} +template +static SE_INLINE bool IsContain(const SeAabb& aabb, const VecT& P,float radium) +{ + auto enlargedAabb = aabb; + enlargedAabb.Upper += radium; + enlargedAabb.Lower -= radium; + return IsContain(aabb, enlargedAabb); +} + +template<> +static SE_INLINE bool IsContain(const SeAabb& aabb, const Float2& P) +{ + return !((aabb.Upper.x < P.x) || (aabb.Upper.y < P.y) || + (aabb.Lower.x > P.x) || (aabb.Lower.y > P.y)); +} + +template<> +static SE_INLINE bool IsContain(const SeAabb& aabb, const Float3& P) +{ + return !((aabb.Upper.x < P.x) || (aabb.Upper.y < P.y) || (aabb.Upper.z < P.z) || + (aabb.Lower.x > P.x) || (aabb.Lower.y > P.y) || (aabb.Lower.z > P.z)); +} + +/************************************************************************* +*********************** IsInside ************************ +*************************************************************************/ + +template +static bool IsIntersect(const SeAabb& aabb, const VecT& Pa, const VecT& Pb, const VecT& iv); + +template +static bool IsIntersect(const SeAabb& aabb, const VecT& Pa, const VecT& Pb); + + +template<> +static SE_INLINE bool IsIntersect(const SeAabb& aabb, const Float3& Pa, const Float3& Pb) +{ + Float3 Dir = Float3(Pb - Pa); + + if (Dir.x == 0.0f) Dir.x = 1e-6f; + if (Dir.y == 0.0f) Dir.y = 1e-6f; + if (Dir.z == 0.0f) Dir.z = 1e-6f; + + Float3 invDir = 1.0f / Dir; + Float3 Left = Float3(aabb.Lower - Pa) * invDir; + Float3 Right = Float3(aabb.Upper - Pa) * invDir; + + Float2 T1 = Float2(Math::Min(Left.x, Right.x), Math::Max(Left.x, Right.x)); + Float2 T2 = Float2(Math::Min(Left.y, Right.y), Math::Max(Left.y, Right.y)); + Float2 T3 = Float2(Math::Min(Left.z, Right.z), Math::Max(Left.z, Right.z)); + + float range0 = Math::Max(T1.x, T2.x, T3.x, 0.0f); + float range1 = Math::Min(T1.y, T2.y, T3.y, 1.0f); + + return range0 < range1; +} + + +template +static SE_INLINE bool IsOverlap(const SeAabb& aabb, const SeAabb& P) { return false; } + + +SE_NAMESPACE_END diff --git a/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SeAabbSimd.h b/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SeAabbSimd.h new file mode 100644 index 0000000..890da40 --- /dev/null +++ b/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SeAabbSimd.h @@ -0,0 +1,148 @@ +///////////////////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2002 - 2022, +// All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The names of its contributors may not be used to endorse or promote +// products derived from this software without specific prior written +// permission. +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#pragma once + +#include "SeMathSimd.h" + +#include "SeAabb.h" + +#include + +SE_NAMESPACE_BEGIN + +/************************************************************************* +****************************** SeAabb ****************************** +*************************************************************************/ + +/** + * @brief Axis-align Bounding Box. + */ + +template<> struct SeAabb +{ + +public: + + //! @brief Default constructor. + SeAabb() : Lower(FLT_MAX), Upper(-FLT_MAX) {} + + //! @brief Constructed by a given point. + SeAabb(const Simd3f & Point) : Lower(Point), Upper(Point) {} + + //! @brief Constructed by a paired points explicitly. + explicit SeAabb(const Simd3f & P1, const Simd3f & P2) : Lower(Math::Min(P1, P2)), Upper(Math::Max(P1, P2)) {} + +public: + + SeAabb operator+(const SeAabb & Other) const + { + return SeAabb(Math::Min(Lower, Other.Lower), Math::Max(Upper, Other.Upper)); + } + + SeAabb operator+(const Simd3f & Point) const + { + return SeAabb(Math::Min(Lower, Point), Math::Max(Upper, Point)); + } + + void operator += (const SeAabb & Other) + { + Lower = Math::Min(Lower, Other.Lower); Upper = Math::Max(Upper, Other.Upper); + } + + void operator += (const Simd3f & Point) + { + Lower = Math::Min(Lower, Point); Upper = Math::Max(Upper, Point); + } + + void Enlarge(float Amount) + { + Lower -= Amount; Upper += Amount; + } + + Simd3f Center() const + { + return Math::Average(Lower, Upper); + } + + Simd3f Extent() const + { + return Upper - Lower; + } + +public: + + Simd3f Lower, Upper; +}; + + + +template<> +static SE_INLINE bool IsContain(const SeAabb& aabb, const Simd3f& P) +{ + return !((aabb.Upper.x < P.x) || (aabb.Upper.y < P.y) || (aabb.Upper.z < P.z) || + (aabb.Lower.x > P.x) || (aabb.Lower.y > P.y) || (aabb.Lower.z > P.z)); +} + +template<> +static SE_INLINE bool IsContain(const SeAabb& aabb, const Simd3f& P, float radium) +{ + return !((aabb.Upper.x < P.x - radium) || (aabb.Upper.y < P.y - radium) || (aabb.Upper.z < P.z - radium) || + (aabb.Lower.x > P.x + radium) || (aabb.Lower.y > P.y + radium) || (aabb.Lower.z > P.z + radium)); +} + +template<> +static SE_INLINE bool IsOverlap(const SeAabb& aabb, const SeAabb& P) +{ + return !((aabb.Upper.x < P.Lower.x) || (aabb.Upper.y < P.Lower.y) || (aabb.Upper.z < P.Lower.z) || + (aabb.Lower.x > P.Upper.x) || (aabb.Lower.y > P.Upper.y) || (aabb.Lower.z > P.Upper.z)); +} + + +template<> +static SE_INLINE bool IsIntersect(const SeAabb& aabb, const Simd3f& Pa, const Simd3f& Pb) +{ + Simd3f Dir = Simd3f(Pb - Pa); + + if (Dir.x == 0.0f) Dir.x = 1e-6f; + if (Dir.y == 0.0f) Dir.y = 1e-6f; + if (Dir.z == 0.0f) Dir.z = 1e-6f; + + Simd3f invDir = 1.0f / Dir; + Simd3f Left = Simd3f(aabb.Lower - Pa) * invDir; + Simd3f Right = Simd3f(aabb.Upper - Pa) * invDir; + + Float2 T1 = Float2(Math::Min(Left.x, Right.x), Math::Max(Left.x, Right.x)); + Float2 T2 = Float2(Math::Min(Left.y, Right.y), Math::Max(Left.y, Right.y)); + Float2 T3 = Float2(Math::Min(Left.z, Right.z), Math::Max(Left.z, Right.z)); + + float range0 = Math::Max(T1.x, T2.x, T3.x, 0.0f); + float range1 = Math::Min(T1.y, T2.y, T3.y, 1.0f); + + return range0 < range1; +} + +SE_NAMESPACE_END diff --git a/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SeArray2D.h b/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SeArray2D.h new file mode 100644 index 0000000..d160c92 --- /dev/null +++ b/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SeArray2D.h @@ -0,0 +1,127 @@ +///////////////////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2002 - 2022, +// All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The names of its contributors may not be used to endorse or promote +// products derived from this software without specific prior written +// permission. +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#pragma once + +#include "SePreDefine.h" + +SE_NAMESPACE_BEGIN + +/************************************************************************* +**************************** SeArray2D ***************************** +*************************************************************************/ + +//! @brief 二维数组容器 +template class SeArray2D +{ + +public: + + //! @brief Default constructor. + SeArray2D() : m_Rows(0), m_Columns(0) {} + + //! @brief Construct and resize. + explicit SeArray2D(size_t _Rows, size_t _Columns) : m_Rows(_Rows), m_Columns(_Columns), m_Values(_Rows * _Columns) {} + + //! @brief Construct, resize and memory set. + explicit SeArray2D(size_t _Rows, size_t _Columns, Type _Value) : m_Rows(_Rows), m_Columns(_Columns), m_Values(_Rows * _Columns, _Value) {} + +public: + + //! @brief Data will be reserved in Column Major format + void Resize(size_t _Rows, size_t _Columns) + { + m_Values.resize(_Rows * _Columns); + + m_Columns = _Columns; + + m_Rows = _Rows; + } + + //! @brief release spare memory + void ShrinkToFit() + { + m_Values.shrink_to_fit(); + } + + //! @brief Fill data. + void Memset(Type _Value, size_t _Begin = 0, size_t _End = SIZE_MAX) + { + _End = _End < m_Values.size() ? _End : m_Values.size(); + + for (size_t i = _Begin; i < _End; ++i) + { + m_Values[i] = _Value; + } + } + + //! @brief Exchange context with right. + void Swap(SeArray2D & _Right) + { + m_Values.swap(_Right.m_Values); + + SE_SWAP(m_Columns, _Right.m_Columns); + + SE_SWAP(m_Rows, _Right.m_Rows); + } + + void Clear() + { + m_Values.clear(); + + m_Values.shrink_to_fit(); + + m_Rows = m_Columns = 0; + } + +public: + + const Type * operator[](size_t i) const { SE_ASSERT(i < m_Rows); return &m_Values[m_Columns * i]; } + + Type * operator[](size_t i) { SE_ASSERT(i < m_Rows); return &m_Values[m_Columns * i]; } + + const Type * Ptr() const { return m_Values.data(); } + + bool IsEmpty() const { return m_Values.empty(); } + + size_t Size() const { return m_Values.size(); } + + size_t Capacity() const { return m_Values.capacity(); } + + size_t Columns() const { return m_Columns; } + + size_t Rows() const { return m_Rows; } + + Type * Ptr() { return m_Values.data(); } + +private: + + std::vector m_Values; + + size_t m_Rows, m_Columns; +}; + +SE_NAMESPACE_END \ No newline at end of file diff --git a/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SeCollisionElements.h b/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SeCollisionElements.h new file mode 100644 index 0000000..e5c6688 --- /dev/null +++ b/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SeCollisionElements.h @@ -0,0 +1,71 @@ +///////////////////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2002 - 2022, +// All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The names of its contributors may not be used to endorse or promote +// products derived from this software without specific prior written +// permission. +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#pragma once + +#include "SeVectorSimd.h" + +SE_NAMESPACE_BEGIN + +struct EfSet +{ + int m_eId; + int m_fId; + float stiff; + Float3 m_bary; // (x, 1-x) / (y, z, 1-y-z): barycentric weight of the intersection point + SeVec3fSimd m_normal; // repulsion direction +}; + +struct VfSet +{ + int m_vId; + int m_fId; + float stiff; + Float2 m_bary; // (x, y, 1-x-y): barycentric weight of the vertex + SeVec3fSimd m_normal; // repulsion direction +}; + +struct EeSet +{ + int m_eId0; + int m_eId1; + float stiff; + Float2 m_bary; // (x, 1-x) / (y, 1-y): barycentric weight of the two closest points + SeVec3fSimd m_normal; // repulsion direction +}; + +struct Stencil +{ + int verextNumPerStencil; + int vertexNumOfFirstPrimitive; + + int index[5]; + float weight[5]; + float stiff; + SeVec3fSimd direction; +}; + +SE_NAMESPACE_END \ No newline at end of file diff --git a/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SeCsr.h b/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SeCsr.h new file mode 100644 index 0000000..6149715 --- /dev/null +++ b/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SeCsr.h @@ -0,0 +1,188 @@ +///////////////////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2002 - 2022, +// All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The names of its contributors may not be used to endorse or promote +// products derived from this software without specific prior written +// permission. +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#pragma once + +#include "SeOmp.h" +#include "SeArray2D.h" +#include "SeUtility.h" + +SE_NAMESPACE_BEGIN + +template class SeCompressSparseData +{ +public: + + SeCompressSparseData() {} + SeCompressSparseData(const std::vector& starts, const std::vector& idxs, const std::vector& values) + : + m_starts(starts), + m_idxs(idxs), + m_values(values) + {} + + virtual ~SeCompressSparseData() {} + + void InitIdxs(const std::vector> & array2D) + { + Clear(); + + int size = array2D.size(); + + m_starts.resize(size + 1); Utility::MemsetZero(m_starts); + + for (int i = 0; i < size; ++i) + { + int num = array2D[i].size(); + + m_starts[i + 1] = m_starts[i] + num; + } + + m_idxs.resize(m_starts.back()); + + OMP_PARALLEL_FOR + + for (int i = 0; i < size; ++i) + { + int num = array2D[i].size(); + std::memcpy(m_idxs.data() + m_starts[i], array2D[i].data(), sizeof(int) * num); + } + } + + void InitIdxs(const SeArray2D & array2D, bool isRowMajor) + { + Clear(); + + int DIM = isRowMajor ? array2D.Rows() : array2D.Columns(); + + m_starts.resize(DIM + 1); Utility::MemsetZero(m_starts); + + for (int i = 0; i < DIM; ++i) + { + int num = isRowMajor ? array2D[i][0] : array2D[0][i]; + + m_starts[i + 1] = m_starts[i] + num; + } + + m_idxs.resize(m_starts.back()); + + OMP_PARALLEL_FOR + + for (int i = 0; i < DIM; ++i) + { + int * address = m_idxs.data() + m_starts[i]; + + int num = Size(i); + + for (int k = 1; k <= num; ++k) + { + address[k - 1] = isRowMajor ? array2D[i][k] : array2D[k][i]; + } + } + } + + void Clear() + { + Utility::ClearAndShrink(m_starts); + Utility::ClearAndShrink(m_idxs); + Utility::ClearAndShrink(m_values); + } + + int Size() const + { + return m_starts.back(); + } + + int Size(int id) const + { + return -m_starts[id] + m_starts[id + 1]; + } + + int Start(int id) const + { + return m_starts[id]; + } + + const int* StartPtr(int id) const + { + return &(m_starts[id]); + } + + const int Idx(int id) const + { + return m_idxs[id]; + } + + const int * IdxPtr(int id) const + { + return m_idxs.data() + m_starts[id]; + } + + const Type * ValuePtr(int id) const + { + return m_values.data() + m_starts[id]; + } + + virtual const SeCompressSparseData * Ptr() const + { + return this; + } + +protected: + + std::vector m_starts; + std::vector m_idxs; + std::vector m_values; +}; + +template class SeCsr : public SeCompressSparseData +{ +public: + SeCsr() {} + SeCsr(const std::vector& starts, const std::vector& idxs, const std::vector& values) :SeCompressSparseData(starts, idxs, values) {} + + int Rows() const { return SeCompressSparseData::m_starts.size() - 1; } + + virtual const SeCsr * Ptr() const override + { + return this; + } +}; + + +template class SeCsc : public SeCompressSparseData +{ +public: + + int Columns() const { return SeCompressSparseData::m_starts.size() - 1; } + + virtual const SeCsc * Ptr() const override + { + return this; + } +}; + +SE_NAMESPACE_END \ No newline at end of file diff --git a/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SeIntrinsic.h b/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SeIntrinsic.h new file mode 100644 index 0000000..370b9d3 --- /dev/null +++ b/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SeIntrinsic.h @@ -0,0 +1,157 @@ +///////////////////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2002 - 2022, +// All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The names of its contributors may not be used to endorse or promote +// products derived from this software without specific prior written +// permission. +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#pragma once + +#include +#include +#include + +#include "SeMatrix.h" + +SE_NAMESPACE_BEGIN + +namespace Intrinsic +{ + /************************************************************************* + ************************** Count of bits *************************** + *************************************************************************/ + +#ifdef WIN32 + inline int Clz32(unsigned int Val) { return __lzcnt(Val); } + inline int Clz64(unsigned __int64 Val) { return __lzcnt64(Val); } + inline int Popcount32(unsigned int Val) { return __popcnt(Val); } + inline int Popcount64(unsigned __int64 Val) { return __popcnt64(Val); } + + #pragma intrinsic(_BitScanForward) + inline int Ffs(int s) + { + unsigned long index = -1; + unsigned char isNonzero = _BitScanForward(&index, s); + /*s should be greater than 0; index is zero-based*/ + return isNonzero ? (index + 1) : 0; + } + +#else + inline int Clz32(unsigned int Val) { return std::countl_zero(Val); } + inline int Clz64( uint64_t Val) { return std::countl_zero(Val); } + inline int Popcount32(unsigned int Val) { return std::popcount(Val); } + inline int Popcount64(uint64_t Val) { return std::popcount(Val); } + inline int Ffs(int s) { return ffs(s); } +#endif + + /************************************************************************* + ************************ Atomic operations ************************* + *************************************************************************/ + + + template inline Type AtomicOr(Type * Address, Type Val) { return reinterpret_cast*>(Address)->fetch_or(Val,std::memory_order_relaxed); } + template inline Type AtomicXor(Type * Address, Type Val) { return reinterpret_cast*>(Address)->fetch_xor(Val,std::memory_order_relaxed); } + template inline Type AtomicAnd(Type * Address, Type Val) { return reinterpret_cast*>(Address)->fetch_and(Val,std::memory_order_relaxed); } + template inline Type AtomicAdd(Type * Address, Type Val) { return reinterpret_cast*>(Address)->fetch_add(Val,std::memory_order_relaxed); } + template inline Type AtomicSub(Type * Address, Type Val) { return reinterpret_cast*>(Address)->fetch_sub(Val,std::memory_order_relaxed); } + template inline Type AtomicExch(Type * Address, Type Val) { return reinterpret_cast*>(Address)->exchange(Val,std::memory_order_relaxed); } + template inline Type AtomicCAS(Type* Address, Type Exp, Type Val) { reinterpret_cast*>(Address)->compare_exchange_strong(Exp, Val, std::memory_order_relaxed); return Exp; } + + template< typename Type, typename Op > Type AtomicOperationByCas(Type* addr, Type value, Op op) + { + Type oldMyValue = *addr; + bool isChangedByOtherThread = true; + while (isChangedByOtherThread) + { + oldMyValue = *addr; + Type m = op(value, oldMyValue); + float newMyValue = AtomicCAS(addr, oldMyValue, m); + isChangedByOtherThread = oldMyValue != newMyValue; + } + return oldMyValue; + } + + + template inline Type AtomicMax(Type* addr, Type value) + { + return AtomicOperationByCas(addr, value, [](const auto& a, const auto& b) { return a > b ? a : b; }); + } + + template inline Type AtomicMin(Type* addr, Type value) + { + return AtomicOperationByCas(addr, value, [](const auto& a, const auto& b) { return a < b ? a : b; }); + } + +#if defined(WIN32) && (_MSVC_LANG > 201703L) // only for C++ 20 on Windows + inline void AtomicAdd(SeMatrix3f* address, SeMatrix3f value) + { + for (int i = 0; i < 3; i++) + { + for (int j = 0; j < 3; j++) + { + AtomicAdd(&(*address)(i, j), value(i, j)); + } + } + } + + template inline void AtomicAdd(SeVector * address, SeVector value) + { + for (int i = 0; i < SeVector::Elements; i++) + { + AtomicAdd(&(*address)[i], value[i]); + } + } +#else + inline void AtomicAdd(SeMatrix3f* address, SeMatrix3f value) + { + for (int i = 0; i < 3; i++) + { + for (int j = 0; j < 3; j++) + { + AtomicOperationByCas( &(*address)(i, j), value(i, j), [](const auto& a, const auto& b) { return a + b; }); + } + } + } + + template inline void AtomicAdd(SeVector * address, SeVector value) + { + for (int i = 0; i < SeVector::Elements; i++) + { + AtomicOperationByCas(&(*address)[i], value[i], [](const auto& a, const auto& b) { return a + b; }); + } + } + +#endif + + /************************************************************************* + ************************* Warp operations ************************** + *************************************************************************/ + + + inline void SyncWarp() {} + inline void SyncBlock() {} + inline unsigned int ActiveMask(); + inline unsigned int LanemaskLt(); + inline unsigned int LanemaskLt(unsigned int laneId) { return (1U << laneId) - 1; } +} + +SE_NAMESPACE_END \ No newline at end of file diff --git a/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SeMath.h b/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SeMath.h new file mode 100644 index 0000000..2fab11f --- /dev/null +++ b/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SeMath.h @@ -0,0 +1,187 @@ +///////////////////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2002 - 2022, +// All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The names of its contributors may not be used to endorse or promote +// products derived from this software without specific prior written +// permission. +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#pragma once + +#include "SeVector.h" +#include "SeMatrix.h" + +SE_NAMESPACE_BEGIN + +#define SE_E 2.71828182845904523536 +#define SE_PI 3.14159265358979323846 +#define SE_2_PI 6.28318530717958647692 + +namespace Math +{ + /********************************************************************* + **************************** Helper **************************** + *********************************************************************/ + + template struct Helper + { + using Vec2 = SeVector2; + using Vec3 = SeVector3; + using Vec4 = SeVector4; + + template using Function = Type(*)(Types...); + + template Fn1> static SE_INLINE Vec2 Expand(const Vec2 & a) { return Vec2(Fn1(a.x), Fn1(a.y)); } + template Fn1> static SE_INLINE Vec3 Expand(const Vec3 & a) { return Vec3(Fn1(a.x), Fn1(a.y), Fn1(a.z)); } + template Fn1> static SE_INLINE Vec4 Expand(const Vec4 & a) { return Vec4(Fn1(a.x), Fn1(a.y), Fn1(a.z), Fn1(a.w)); } + + template Fn2> static SE_INLINE Vec2 Expand(const Vec2 & a, Type s) { return Vec2(Fn2(a.x, s), Fn2(a.y, s)); } + template Fn2> static SE_INLINE Vec3 Expand(const Vec3 & a, Type s) { return Vec3(Fn2(a.x, s), Fn2(a.y, s), Fn2(a.z, s)); } + template Fn2> static SE_INLINE Vec4 Expand(const Vec4 & a, Type s) { return Vec4(Fn2(a.x, s), Fn2(a.y, s), Fn2(a.z, s), Fn2(a.w, s)); } + + template Fn2> static SE_INLINE Vec2 Expand(const Vec2 & a, const Vec2 & b) { return Vec2(Fn2(a.x, b.x), Fn2(a.y, b.y)); } + template Fn2> static SE_INLINE Vec3 Expand(const Vec3 & a, const Vec3 & b) { return Vec3(Fn2(a.x, b.x), Fn2(a.y, b.y), Fn2(a.z, b.z)); } + template Fn2> static SE_INLINE Vec4 Expand(const Vec4 & a, const Vec4 & b) { return Vec4(Fn2(a.x, b.x), Fn2(a.y, b.y), Fn2(a.z, b.z), Fn2(a.w, b.w)); } + + template Fn3> static SE_INLINE Vec2 Expand(const Vec2 & a, Type s0, Type s1) { return Vec2(Fn3(a.x, s0, s1), Fn3(a.y, s0, s1)); } + template Fn3> static SE_INLINE Vec3 Expand(const Vec3 & a, Type s0, Type s1) { return Vec3(Fn3(a.x, s0, s1), Fn3(a.y, s0, s1), Fn3(a.z, s0, s1)); } + template Fn3> static SE_INLINE Vec4 Expand(const Vec4 & a, Type s0, Type s1) { return Vec4(Fn3(a.x, s0, s1), Fn3(a.y, s0, s1), Fn3(a.z, s0, s1), Fn3(a.w, s0, s1)); } + + template Fn3> static SE_INLINE Vec2 Expand(const Vec2 & a, const Vec2 & b, Type scalar) { return Vec2(Fn3(a.x, b.x, scalar), Fn3(a.y, b.y, scalar)); } + template Fn3> static SE_INLINE Vec3 Expand(const Vec3 & a, const Vec3 & b, Type scalar) { return Vec3(Fn3(a.x, b.x, scalar), Fn3(a.y, b.y, scalar), Fn3(a.z, b.z, scalar)); } + template Fn3> static SE_INLINE Vec4 Expand(const Vec4 & a, const Vec4 & b, Type scalar) { return Vec4(Fn3(a.x, b.x, scalar), Fn3(a.y, b.y, scalar), Fn3(a.z, b.z, scalar), Fn3(a.w, b.w, scalar)); } + }; + + /********************************************************************* + ************************* Mathematics ************************** + *********************************************************************/ + + SE_INLINE float Log(float x) { return logf(x);} + SE_INLINE float Abs(float a) { return fabs(a); } + SE_INLINE float Sin(float a) { return sinf(a); } + SE_INLINE float Cos(float a) { return cosf(a); } + SE_INLINE float Tan(float a) { return tanf(a); } + SE_INLINE float Exp(float a) { return expf(a); } + SE_INLINE float Asin(float a) { return asinf(a); } + SE_INLINE float Acos(float a) { return acosf(a); } + SE_INLINE float Atan(float a) { return atanf(a); } + SE_INLINE float Sqrt(float a) { return sqrtf(a); } + SE_INLINE float Cbrt(float x) { return cbrtf(x); } + SE_INLINE float Ceil(float a) { return ceilf(a); } + SE_INLINE float Recip(float a) { return 1.0f / a; } + SE_INLINE float Floor(float a) { return floorf(a); } + SE_INLINE float Round(float a) { return roundf(a); } + SE_INLINE float Pow(float x, float y) { return powf(x, y); } + SE_INLINE float Atan2(float y, float x) { return atan2f(y, x); } + SE_INLINE constexpr float Radians(float a) { return a * SE_SCF(SE_PI / 180.0); } + + SE_INLINE float Rsqrt(float a) { return 1.0f / sqrtf(a); } + + template SE_INLINE constexpr Type Square(Type a) { return a * a; } + template SE_INLINE constexpr Type Cubic(Type a) { return a * a * a; } + template SE_INLINE constexpr Type Min(Type a, Type b) { return SE_MIN(a, b); } + template SE_INLINE constexpr Type Max(Type a, Type b) { return SE_MAX(a, b); } + template SE_INLINE constexpr Type Sign(Type a) { return Type(a > 0) - Type(a < 0); } + template SE_INLINE constexpr Type Clamp(Type a, Type minVal, Type maxVal) { return Min(Max(minVal, a), maxVal); } + + template SE_INLINE bool IsNan(Type a) { return false; } + template SE_INLINE bool IsInf(Type a) { return false; } + template SE_INLINE Type Sum(const SeVector2 & a) { return a.x + a.y; } + template SE_INLINE Type Sum(const SeVector3 & a) { return a.x + a.y + a.z; } + template SE_INLINE Type Sum(const SeVector4 & a) { return a.x + a.y + a.z + a.w; } + template SE_INLINE Type Average(const Type & v0, const Type & v1) { return (v0 + v1) / 2; } + template SE_INLINE Type Average(const Type & v0, const Type & v1, const Type & v2) { return (v0 + v1 + v2) / 3; } + template SE_INLINE Type Lerp(const Type & a, const Type & b, float ratio) { return a * (1.0f - ratio) + b * ratio; } + + template SE_INLINE Type Dot(const SeVector & a, const SeVector & b) { return Sum(a * b); } + template SE_INLINE SeVector Abs(const SeVector & a) { return Helper::Expand(a); } + template SE_INLINE SeVector Sign(const SeVector & a) { return Helper::Expand(a); } + template SE_INLINE SeVector Recip(const SeVector & a) { return Helper::Expand(a); } + template SE_INLINE SeVector Rsqrt(const SeVector & a) { return Helper::Expand(a); } + template SE_INLINE SeVector Cubic(const SeVector & a) { return Helper::Expand(a); } + template SE_INLINE SeVector Square(const SeVector & a) { return Helper::Expand(a); } + template SE_INLINE SeVector Min(const SeVector & a, const SeVector & b) { return Helper::template Expand>(a, b); } + template SE_INLINE SeVector Max(const SeVector & a, const SeVector & b) { return Helper::template Expand>(a, b); } + template SE_INLINE SeVector Clamp(const SeVector & a, Type minVal, Type maxVal) { return Helper::Expand(a, minVal, maxVal); } + + template SE_INLINE Type Min(const Type & a, const Type & b, const Args &... args) { return Min(Min(a, b), args...); } + template SE_INLINE Type Max(const Type & a, const Type & b, const Args &... args) { return Max(Max(a, b), args...); } + + SE_INLINE SeVector3 Cross(const SeVector2 & a, const SeVector2 & b) { return SeVector3(0.0f, 0.0f, a.x * b.y - a.y * b.x); } + SE_INLINE SeVector3 Cross(const SeVector3 & a, const SeVector3 & b) { return SeVector3(a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x); } + SE_INLINE SeVector4 Cross(const SeVector4 & a, const SeVector4 & b) { return SeVector4(a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x, 0.0f); } + + template SE_INLINE float SqrLength(const SeVector & a) { return Dot(a, a); } + template SE_INLINE float Length(const SeVector & a) { return Sqrt(Dot(a, a)); } + template SE_INLINE float RelativeDistance(const SeVector& a, const SeVector& b) { float maxLength = Max(Length(b), Length(a)); if (maxLength == 0.f) return 0.f; else return Length(b - a) / maxLength; } + template SE_INLINE SeVector Sin(const SeVector & a) { return Helper::Expand(a); } + template SE_INLINE SeVector Cos(const SeVector & a) { return Helper::Expand(a); } + template SE_INLINE SeVector Tan(const SeVector & a) { return Helper::Expand(a); } + template SE_INLINE SeVector Asin(const SeVector & a) { return Helper::Expand(a); } + template SE_INLINE SeVector Acos(const SeVector & a) { return Helper::Expand(a); } + template SE_INLINE SeVector Atan(const SeVector & a) { return Helper::Expand(a); } + template SE_INLINE SeVector Sqrt(const SeVector & a) { return Helper::Expand(a); } + template SE_INLINE SeVector Ceil(const SeVector & a) { return Helper::Expand(a); } + template SE_INLINE SeVector Floor(const SeVector & a) { return Helper::Expand(a); } + template SE_INLINE SeVector Round(const SeVector & a) { return Helper::Expand(a); } + template SE_INLINE SeVector Radians(const SeVector & a) { return Helper::Expand(a); } + template SE_INLINE float TripleProduct(const SeVector & a, const SeVector & b, const SeVector & c) { return Dot(a, Cross(b, c)); } + + //! 若输入a为NaN或0,返回0.0f。 + template SE_INLINE SeVector Normalize(const SeVector & a) + { + float sqrLen = SqrLength(a); if (sqrLen > 0.0f) { return a * Rsqrt(sqrLen); } + + // printf("Normalizing an invalid vector: (%f, %f, %f)!\n", a.x, a.y, a.z); + + return SeVector(0.0f); + } + + template SE_INLINE bool Normalized(SeVector & a) + { + float sqrLen = SqrLength(a); + if (sqrLen > 0.f) + { + a *= Rsqrt(sqrLen); + return true; + } + return false; + } + + template<> SE_INLINE bool IsNan(float a) { return isnan(a); } + template<> SE_INLINE bool IsNan(double a) { return isnan(a); } + template<> SE_INLINE bool IsNan(SeVector2 a) { return isnan(a.x) || isnan(a.y); } + template<> SE_INLINE bool IsNan(SeVector3 a) { return isnan(a.x) || isnan(a.y) || isnan(a.z); } + template<> SE_INLINE bool IsNan(SeVector4 a) { return isnan(a.x) || isnan(a.y) || isnan(a.z) || isnan(a.w); } + template<> SE_INLINE bool IsNan(SeMatrix3f a) { return IsNan(a.Column(0)) || IsNan(a.Column(1)) || IsNan(a.Column(2)); } + + template<> SE_INLINE bool IsInf(float a) { return isinf(a); } + template<> SE_INLINE bool IsInf(double a) { return isinf(a); } + template<> SE_INLINE bool IsInf(SeVector2 a) { return isinf(a.x) || isinf(a.y); } + template<> SE_INLINE bool IsInf(SeVector3 a) { return isinf(a.x) || isinf(a.y) || isinf(a.z); } + template<> SE_INLINE bool IsInf(SeVector4 a) { return isinf(a.x) || isinf(a.y) || isinf(a.z) || isinf(a.w); } + template<> SE_INLINE bool IsInf(SeMatrix3f a) { return IsInf(a.Column(0)) || IsInf(a.Column(1)) || IsInf(a.Column(2)); } +}; + +SE_NAMESPACE_END + +#undef min +#undef max \ No newline at end of file diff --git a/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SeMathSimd.h b/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SeMathSimd.h new file mode 100644 index 0000000..a7e7cca --- /dev/null +++ b/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SeMathSimd.h @@ -0,0 +1,134 @@ +///////////////////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2002 - 2022, +// All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The names of its contributors may not be used to endorse or promote +// products derived from this software without specific prior written +// permission. +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#pragma once + +#include "SeVectorSimd.h" + +#include "SeMath.h" + +SE_NAMESPACE_BEGIN + +namespace Math +{ + template struct HelperSimd + { + using Smv3 = SimdVector3; + + template using Function = Type(*)(Types...); + + template Fn1> static SE_INLINE Smv3 Expand(const Smv3& a) { return Smv3(Fn1(a.x), Fn1(a.y), Fn1(a.z)); } + + template Fn2> static SE_INLINE Smv3 Expand(const Smv3& a, Type s) { return Smv3(Fn2(a.x, s), Fn2(a.y, s), Fn2(a.z, s)); } + + template Fn2> static SE_INLINE Smv3 Expand(const Smv3& a, const Smv3& b) { return Smv3(Fn2(a.x, b.x), Fn2(a.y, b.y), Fn2(a.z, b.z)); } + + template Fn3> static SE_INLINE Smv3 Expand(const Smv3& a, Type s0, Type s1) { return Smv3(Fn3(a.x, s0, s1), Fn3(a.y, s0, s1), Fn3(a.z, s0, s1)); } + + template Fn3> static SE_INLINE Smv3 Expand(const Smv3& a, const Smv3& b, Type scalar) { return Smv3(Fn3(a.x, b.x, scalar), Fn3(a.y, b.y, scalar), Fn3(a.z, b.z, scalar)); } + }; + + template SE_INLINE SimdVector3 Abs(const SimdVector3& a) { return HelperSimd::Expand(a); } + template SE_INLINE SimdVector3 Sign(const SimdVector3& a) { return HelperSimd::Expand(a); } + template SE_INLINE SimdVector3 Recip(const SimdVector3& a) { return HelperSimd::Expand(a); } + template SE_INLINE SimdVector3 Rsqrt(const SimdVector3& a) { return HelperSimd::Expand(a); } + template SE_INLINE SimdVector3 Cubic(const SimdVector3& a) { return HelperSimd::Expand(a); } + template SE_INLINE SimdVector3 Square(const SimdVector3& a) { return HelperSimd::Expand(a); } + template SE_INLINE SimdVector3 Min(const SimdVector3& a, const SimdVector3& b) { return HelperSimd::Expand(a, b); } + template SE_INLINE SimdVector3 Max(const SimdVector3& a, const SimdVector3& b) { return HelperSimd::Expand(a, b); } + template SE_INLINE SimdVector3 Clamp(const SimdVector3& a, Type minVal, Type maxVal) { return HelperSimd::Expand(a, minVal, maxVal); } + + + template<> SE_INLINE SimdVector3 Sin(const SimdVector3& a) { return HelperSimd::Expand(a); } + template<> SE_INLINE SimdVector3 Cos(const SimdVector3& a) { return HelperSimd::Expand(a); } + template<> SE_INLINE SimdVector3 Tan(const SimdVector3& a) { return HelperSimd::Expand(a); } + template<> SE_INLINE SimdVector3 Asin(const SimdVector3& a) { return HelperSimd::Expand(a); } + template<> SE_INLINE SimdVector3 Acos(const SimdVector3& a) { return HelperSimd::Expand(a); } + template<> SE_INLINE SimdVector3 Atan(const SimdVector3& a) { return HelperSimd::Expand(a); } + template<> SE_INLINE SimdVector3 Ceil(const SimdVector3& a) { return HelperSimd::Expand(a); } + template<> SE_INLINE SimdVector3 Floor(const SimdVector3& a) { return HelperSimd::Expand(a); } + template<> SE_INLINE SimdVector3 Round(const SimdVector3& a) { return HelperSimd::Expand(a); } + template<> SE_INLINE SimdVector3 Radians(const SimdVector3& a) { return HelperSimd::Expand(a); } + + /********************************************************************* + ************************* Mathematics ************************** + *********************************************************************/ + + template SE_INLINE Type Sum(const SimdVector3 & a) { return a.x + a.y + a.z; } + template SE_INLINE Type Dot(const SimdVector3 & a, const SimdVector3 & b) { return Sum(a * b); } + + template<> SE_INLINE bool IsNan(const Simd3f & a) { return isnan(a.x) || isnan(a.y) || isnan(a.z) || isnan(a.w); } + template<> SE_INLINE bool IsInf(const Simd3f & a) { return isinf(a.x) || isinf(a.y) || isinf(a.z) || isinf(a.w); } + + template<> SE_INLINE Simd3f Recip(const Simd3f & a) { return Simd3f(_mm_rcp_ps(a.pack)); } + template<> SE_INLINE Simd3f Sqrt(const Simd3f & a) { return Simd3f(_mm_sqrt_ps(a.pack)); } + template<> SE_INLINE Simd3f Rsqrt(const Simd3f & a) { return Simd3f(_mm_rsqrt_ps(a.pack)); } + template<> SE_INLINE Simd3f Min(const Simd3f & a, const Simd3f & b) { return Simd3f(_mm_min_ps(a.pack, b.pack)); } + template<> SE_INLINE Simd3f Max(const Simd3f & a, const Simd3f & b) { return Simd3f(_mm_max_ps(a.pack, b.pack)); } + + template<> SE_INLINE float SqrLength(const Simd3f& a) { return Dot(a, a); } + template<> SE_INLINE float Length(const Simd3f& a) { return Sqrt(Dot(a, a)); } + template<> SE_INLINE float RelativeDistance(const Simd3f& a, const Simd3f& b) { float maxLength = Max(Length(b), Length(a)); if (maxLength == 0.f) return 0.f; else return Length(b - a) / maxLength; } + + template<> SE_INLINE Simd3f Normalize(const Simd3f & a) + { + float sqrLength = Math::SqrLength(a); + return sqrLength > 0.f ? a * Rsqrt(Simd3f(sqrLength, sqrLength, sqrLength, 1.f)) : Simd3f(0.0f); + } + + template<> SE_INLINE bool Normalized(Simd3f & a) + { + float sqrLength = Math::SqrLength(a); + if (sqrLength > 0.f) + { + a *= Rsqrt(Simd3f(sqrLength, sqrLength, sqrLength, 1.f)); + return true; + } + a = Simd3f(0.f); + return false; + } + + SE_INLINE Simd3f Cross(const Simd3f & a, const Simd3f & b) + { + __m128 t0 = _mm_shuffle_ps(a.pack, a.pack, _MM_SHUFFLE(3, 0, 2, 1)); //! (y0, z0, x0, 0) + __m128 t1 = _mm_shuffle_ps(b.pack, b.pack, _MM_SHUFFLE(3, 1, 0, 2)); //! (z1, x1, y1, 0) + + __m128 t2 = _mm_shuffle_ps(t0, t0, _MM_SHUFFLE(3, 0, 2, 1)); //! (z0, x0, y0, 0) + __m128 t3 = _mm_shuffle_ps(t1, t1, _MM_SHUFFLE(3, 1, 0, 2)); //! (y1, z1, x1, 0) + + return _mm_sub_ps(_mm_mul_ps(t0, t1), _mm_mul_ps(t2, t3)); + } + + template<> SE_INLINE float TripleProduct(const SimdVector3& a, const SimdVector3& b, const SimdVector3& c) + { + return Dot(a, Cross(b, c)); + } +}; + +SE_NAMESPACE_END + +#undef min +#undef max diff --git a/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SeMatrix.h b/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SeMatrix.h new file mode 100644 index 0000000..8570f95 --- /dev/null +++ b/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SeMatrix.h @@ -0,0 +1,1400 @@ +///////////////////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2002 - 2022, +// All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The names of its contributors may not be used to endorse or promote +// products derived from this software without specific prior written +// permission. +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#pragma once + +#include "SeVector.h" + + +SE_NAMESPACE_BEGIN + + +template class SeMatrix +{ + +public: + + static constexpr int nRows = N; + static constexpr int nCols = M; + static constexpr int nElements = N * M; + +private: + + union + { + Type m_data[nElements]; + + SeVector m_columns[M]; + }; + + const SeMatrix & Self() const { return *this; } + +public: + + SeMatrix() {} + + SeMatrix(const Type* d) + { + for (int i = 0; i < nElements; ++i) + { + m_data[i] = d[i]; + } + } + + explicit SeMatrix(const Type & rhs) + { + for (int i = 0; i < nElements; ++i) + { + m_data[i] = rhs; + } + } + + const Type & operator() (int i, int j) const { return m_data[j*nRows + i]; } + Type & operator() (int i, int j) { return m_data[j*nRows + i]; } + + + SeMatrix operator - () const + { + SeMatrix rhs(Type(0)); + for (int i = 0; i < nElements; ++i) + { + rhs.m_data[i] = -m_data[i]; + } + return rhs; + } + + SeMatrix operator + (const SeMatrix & rhs) const + { + SeMatrix out(Type(0)); + for (int i = 0; i < nElements; ++i) + { + out.m_data[i] = m_data[i] + rhs.m_data[i]; + } + return out; + } + + SeMatrix operator - (const SeMatrix & rhs) const + { + SeMatrix out(Type(0)); + for (int i = 0; i < nElements; ++i) + { + out.m_data[i] = m_data[i] - rhs.m_data[i]; + } + return out; + } + + SeMatrix & operator += (const SeMatrix & rhs) + { + for (int i = 0; i < nElements; ++i) + { + m_data[i] += rhs.m_data[i]; + } + return *this; + } + + SeMatrix & operator -= (const SeMatrix & rhs) + { + for (int i = 0; i < nElements; ++i) + { + m_data[i] -= rhs.m_data[i]; + } + return *this; + } + + + SeMatrix operator * (const Type & rhs) const + { + SeMatrix out(Type(0)); + for (int i = 0; i < nElements; ++i) + { + out.m_data[i] = m_data[i] * rhs; + } + return out; + } + + SeMatrix & operator *= (const Type & rhs) + { + for (int i = 0; i < nElements; ++i) + { + m_data[i] *= rhs; + } + return *this; + } + + SeVector operator * (const SeVector & rhs) const + { + SeVector ans(Type(0)); + for (int i = 0; i < N; ++i) + { + Type s(0); + for (int j = 0; j < M; ++j) + { + s += Self()(i, j) * rhs[j]; + } + ans[i] = s; + } + return ans; + } + + template SeMatrix operator * (const SeMatrix& rhs) const + { + SeMatrix ans(Type(0)); + for (int i = 0; i < N; ++i) + { + for (int k = 0; k < K; ++k) + { + Type s(0); + for (int j = 0; j < M; ++j) + { + s += Self()(i, j) * rhs(j, k); + } + ans(i, k) = s; + } + } + return ans; + } + + + SeMatrix P2PProduct(const SeMatrix & rhs) const + { + SeMatrix out(Type(0)); + for (size_t i = 0; i < nElements; ++i) + { + out.m_data[i] = m_data[i] * rhs.m_data[i]; + } + return out; + } + + SeMatrix P2PDivide(const SeMatrix & rhs) const + { + SeMatrix out(Type(0)); + for (int i = 0; i < nElements; ++i) + { + out.m_data[i] = m_data[i] / rhs.m_data[i]; + } + return out; + } + + template SeMatrix KroneckerProduct(const SeMatrix& rhs) const + { + SeMatrix ans(Type(0)); + for (int i = 0; i < N; ++i) + { + for (int j = 0; j < M; ++j) + { + int rStart = i * K; + int cStart = j * L; + + Type ij = Self()(i, j); + + for (int p = 0; p < K; ++p) + { + for (int q = 0; q < L; ++q) + { + ans(rStart + p, cStart + q) = ij * rhs(p, q); + } + } + } + } + } + + + SeVector Row(int rId) const + { + SeVector x; + for (int k = 0; k < M; ++k) + { + x[k] = Self()(rId, k); + } + return x; + } + + SeVector Column(int cId) const + { + return m_columns[cId]; + } + + template SeMatrix Block(int row, int col) const + { + SeMatrix ans(Type(0)); + for (int i = 0; i < K; ++i) + { + for (int j = 0; j < L; ++j) + { + ans(i, j) = Self()(i + row, j + col); + } + } + return ans; + } + + SeVector Diagonal()const + { + SeVector x; + for (int i = 0; i < SE_MIN(N, M); ++i) + { + x[i] = Self()(i, i); + } + return x; + } + + void SetRow(const SeVector & row, int rId) + { + for (int k = 0; k < M; ++k) + { + m_data[rId + k * nRows] = row[k]; + } + } + + void SetColumn(const SeVector & column, int cId) + { + m_columns[cId] = column; + } + + template void SetBlock(const SeMatrix & block, int rowStart, int colStart) + { + for (int i = 0; i < K; ++i) + { + for (int j = 0; j < L; ++j) + { + Self()(i + rowStart, j + colStart) = block(i, j); + } + } + } + + void SetDiagonal(const SeVector & diagonal) + { + for (int i = 0; i < N; ++i) + { + for (int j = 0; j < M; ++j) + { + Self()(i, j) = (i == j) ? diagonal[i] : Type(0); + } + } + } + + SeMatrix Transpose() const + { + SeMatrix trans(Type(0)); + for (int i = 0; i < N; ++i) + { + for (int j = 0; j < M; ++j) + { + trans(j, i) = Self()(i, j); + } + } + return trans; + } + + static SeMatrix Identity(Type diagonal) + { + SeMatrix identity = SeMatrix(Type(0)); + for (int i = 0; i < SE_MIN(N, M); ++i) + { + identity(i, i) = diagonal; + } + return identity; + } + + + + Type FrobeniusNorm() const + { + Type sum(0); + for (int i = 0; i < nElements; ++i) + { + sum += m_data[i] * m_data[i]; + } + return std::sqrt(sum); + } + + Type Trace() const + { + Type trace(0); + for (int i = 0; i < SE_MIN(N, M); ++i) + { + trace += Self()(i, i); + } + return trace; + } + +}; + +template +SE_INLINE SeMatrix operator * (const Type & scalar, const SeMatrix & mat) +{ + return mat * scalar; +} + +template +SE_INLINE SeMatrix OuterProduct(const SeVector& vecM, const SeVector& vecN) +{ + SeMatrix ans(Type(0)); + for (int i = 0; i < M; ++i) + { + for (int j = 0; j < N; ++j) + { + ans(i, j) = vecM[i] * vecN[j]; + } + } + return ans; +} + + +////////////////////////////////////////////////////////////////////////// + +////////////////////////////////////////////////////////////////////////// + + +//template class _declspec(align(16)) SeMatrix +template class SE_ALIGN(16) SeMatrix +{ + +public: + + static constexpr int DIM = 2; + + static constexpr int nElements = DIM * DIM; + +private: + + union + { + Type m_data[nElements]; + + SeVector2 m_columns[DIM]; + }; + + const SeMatrix & Self() const { return *this; } + +public: + + SeMatrix() {} + + explicit SeMatrix(const Type & v) + { + m_data[0] = v; m_data[1] = v; m_data[2] = v; m_data[3] = v; + } + + SeMatrix(const Type & v0, const Type & v1, const Type & v2, const Type & v3) + { + m_data[0] = v0; m_data[1] = v1; m_data[2] = v2; m_data[3] = v3; + } + + + const Type & operator() (int i, int j) const { return m_data[j*DIM + i]; } + Type & operator() (int i, int j) { return m_data[j*DIM + i]; } + + SeMatrix operator - () const + { + SeMatrix rhs(Type(0)); + for (int i = 0; i < nElements; ++i) + { + rhs.m_data[i] = -m_data[i]; + } + return rhs; + } + + SeMatrix operator + (const SeMatrix & rhs) const + { + SeMatrix out(Type(0)); + for (int i = 0; i < nElements; ++i) + { + out.m_data[i] = m_data[i] + rhs.m_data[i]; + } + return out; + } + + SeMatrix operator - (const SeMatrix & rhs) const + { + SeMatrix out(Type(0)); + for (int i = 0; i < nElements; ++i) + { + out.m_data[i] = m_data[i] - rhs.m_data[i]; + } + return out; + } + + SeMatrix & operator += (const SeMatrix & rhs) + { + for (int i = 0; i < nElements; ++i) + { + m_data[i] += rhs.m_data[i]; + } + return *this; + } + + SeMatrix & operator -= (const SeMatrix & rhs) + { + for (int i = 0; i < nElements; ++i) + { + m_data[i] -= rhs.m_data[i]; + } + return *this; + } + + + SeMatrix operator * (const Type & rhs) const + { + SeMatrix out(Type(0)); + for (int i = 0; i < nElements; ++i) + { + out.m_data[i] = m_data[i] * rhs; + } + return out; + } + + SeMatrix & operator *= (const Type & rhs) + { + for (int i = 0; i < nElements; ++i) + { + m_data[i] *= rhs; + } + return *this; + } + + SeVector operator * (const SeVector & rhs) const + { + SeVector ans(Type(0)); + for (int i = 0; i < DIM; ++i) + { + Type s(0); + for (int j = 0; j < DIM; ++j) + { + s += Self()(i, j) * rhs[j]; + } + ans[i] = s; + } + return ans; + } + + template SeMatrix operator * (const SeMatrix& rhs) const + { + SeMatrix ans(Type(0)); + for (int i = 0; i < DIM; ++i) + { + for (int k = 0; k < K; ++k) + { + Type s(0); + for (int j = 0; j < DIM; ++j) + { + s += Self()(i, j) * rhs(j, k); + } + ans(i, k) = s; + } + } + return ans; + } + + + SeMatrix P2PProduct(const SeMatrix & rhs) const + { + SeMatrix out(Type(0)); + for (size_t i = 0; i < nElements; ++i) + { + out.m_data[i] = m_data[i] * rhs.m_data[i]; + } + return out; + } + + SeMatrix P2PDivide(const SeMatrix & rhs) const + { + SeMatrix out(Type(0)); + for (int i = 0; i < nElements; ++i) + { + out.m_data[i] = m_data[i] / rhs.m_data[i]; + } + return out; + } + + template SeMatrix KroneckerProduct(const SeMatrix& rhs) const + { + SeMatrix ans(Type(0)); + for (int i = 0; i < DIM; ++i) + { + for (int j = 0; j < DIM; ++j) + { + int rStart = i * K; + int cStart = j * L; + + Type ij = Self()(i, j); + + for (int p = 0; p < K; ++p) + { + for (int q = 0; q < L; ++q) + { + ans(rStart + p, cStart + q) = ij * rhs(p, q); + } + } + } + } + } + + + SeVector Row(int rId) const + { + SeVector x; + x[0] = m_data[rId]; + x[1] = m_data[rId + DIM]; + return x; + } + + SeVector Column(int cId) const + { + return m_columns[cId]; + } + + SeVector Diagonal()const + { + SeVector x; + x[0] = m_data[0]; + x[1] = m_data[3]; + return x; + } + + void SetRow(const SeVector & row, int rId) + { + m_data[rId] = row[0]; + m_data[rId + 2] = row[1]; + } + + void SetColumn(const SeVector & column, int cId) + { + m_columns[cId] = column; + } + + void SetDiagonal(const SeVector & diagonal) + { + m_data[0] = diagonal[0]; m_data[1] = 0; + m_data[2] = 0; m_data[3] = diagonal[1]; + } + + SeMatrix Transpose() const + { + SeMatrix trans = Self(); + trans(0, 1) = Self()(1, 0); + trans(1, 0) = Self()(0, 1); + return trans; + } + + static SeMatrix Identity(Type diagonal = Type(1)) + { + SeMatrix identity(Type(0)); + identity(0, 0) = diagonal; + identity(1, 1) = diagonal; + return identity; + } + + Type FrobeniusNorm() const + { + Type sum(0); + for (int i = 0; i < nElements; ++i) + { + sum += m_data[i] * m_data[i]; + } + return std::sqrt(sum); + } + + Type Trace() const + { + return m_data[0] + m_data[3]; + } + + Type Det() const + { + return m_data[0] * m_data[3] - m_data[1] * m_data[2]; + } + + void Inverse(SeMatrix & result) const + { + Type dt = 1 / Det(); + + result(0, 0) = dt * Self()(1, 1); + result(1, 0) = -dt * Self()(1, 0); + result(0, 1) = -dt * Self()(0, 1); + result(1, 1) = dt * Self()(0, 0); + } + + SeMatrix Inverse() const + { + SeMatrix r(Self()); + Inverse(r); + return r; + } +}; + + + +template class SeMatrix +{ + +public: + + static constexpr int DIM = 3; + + static constexpr int nElements = DIM * DIM; + +private: + + union + { + Type m_data[nElements]; + SeVector3 m_columns[DIM]; + }; + + const SeMatrix & Self() const { return *this; } + +public: + + SeMatrix() {} + + explicit SeMatrix(const Type & v) + { + for (int i = 0; i < nElements; ++i) + { + m_data[i] = v; + } + } + + const Type & operator() (int i, int j) const { return m_data[j*DIM + i]; } + Type & operator() (int i, int j) { return m_data[j*DIM + i]; } + + SeMatrix operator - () const + { + SeMatrix rhs(Type(0)); + for (int i = 0; i < nElements; ++i) + { + rhs.m_data[i] = -m_data[i]; + } + return rhs; + } + + SeMatrix operator + (const SeMatrix & rhs) const + { + SeMatrix out(Type(0)); + for (int i = 0; i < nElements; ++i) + { + out.m_data[i] = m_data[i] + rhs.m_data[i]; + } + return out; + } + + SeMatrix operator - (const SeMatrix & rhs) const + { + SeMatrix out(Type(0)); + for (int i = 0; i < nElements; ++i) + { + out.m_data[i] = m_data[i] - rhs.m_data[i]; + } + return out; + } + + SeMatrix & operator += (const SeMatrix & rhs) + { + for (int i = 0; i < nElements; ++i) + { + m_data[i] += rhs.m_data[i]; + } + return *this; + } + + SeMatrix & operator -= (const SeMatrix & rhs) + { + for (int i = 0; i < nElements; ++i) + { + m_data[i] -= rhs.m_data[i]; + } + return *this; + } + + void operator = (const SeMatrix & rhs) + { + for (int i = 0; i < nElements; ++i) + { + m_data[i] = rhs.m_data[i]; + } + } + + + SeMatrix operator * (const Type & rhs) const + { + SeMatrix out(Type(0)); + + for (int i = 0; i < nElements; ++i) + { + out.m_data[i] = m_data[i] * rhs; + } + + return out; + } + + SeMatrix operator / (const Type & rhs) const + { + SeMatrix out(Type(0)); + + for (int i = 0; i < nElements; ++i) + { + out.m_data[i] = m_data[i] / rhs; + } + + return out; + } + + SeMatrix & operator *= (const Type & rhs) + { + for (int i = 0; i < nElements; ++i) + { + m_data[i] *= rhs; + } + + return *this; + } + + SeMatrix & operator /= (const Type & rhs) + { + for (int i = 0; i < nElements; ++i) + { + m_data[i] /= rhs; + } + + return *this; + } + + SeVector operator * (const SeVector & rhs) const + { + SeVector ans(Type(0)); + for (int i = 0; i < DIM; ++i) + { + Type s(0); + for (int j = 0; j < DIM; ++j) + { + s += Self()(i, j) * rhs[j]; + } + ans[i] = s; + } + return ans; + } + + template SeMatrix operator * (const SeMatrix& rhs) const + { + SeMatrix ans(Type(0)); + for (int i = 0; i < DIM; ++i) + { + for (int k = 0; k < K; ++k) + { + Type s(0); + for (int j = 0; j < DIM; ++j) + { + s += Self()(i, j) * rhs(j, k); + } + ans(i, k) = s; + } + } + return ans; + } + + + SeMatrix P2PProduct(const SeMatrix & rhs) const + { + SeMatrix out(Type(0)); + for (size_t i = 0; i < nElements; ++i) + { + out.m_data[i] = m_data[i] * rhs.m_data[i]; + } + return out; + } + + SeMatrix P2PDivide(const SeMatrix & rhs) const + { + SeMatrix out(Type(0)); + for (int i = 0; i < nElements; ++i) + { + out.m_data[i] = m_data[i] / rhs.m_data[i]; + } + return out; + } + + template SeMatrix KroneckerProduct(const SeMatrix& rhs) const + { + SeMatrix ans(Type(0)); + for (int i = 0; i < DIM; ++i) + { + for (int j = 0; j < DIM; ++j) + { + int rStart = i * K; + int cStart = j * L; + + Type ij = Self()(i, j); + + for (int p = 0; p < K; ++p) + { + for (int q = 0; q < L; ++q) + { + ans(rStart + p, cStart + q) = ij * rhs(p, q); + } + } + } + } + } + + + SeVector Row(int rId) const + { + SeVector x; + x[0] = m_data[rId]; + x[1] = m_data[rId + 3]; + x[2] = m_data[rId + 6]; + return x; + } + + SeVector Column(int cId) const + { + return m_columns[cId]; + } + + SeVector Diagonal()const + { + SeVector x; + x[0] = m_data[0]; + x[1] = m_data[4]; + x[2] = m_data[8]; + return x; + } + + void SetRow(const SeVector & row, int rId) + { + m_data[rId] = row[0]; + m_data[rId + 3] = row[1]; + m_data[rId + 6] = row[2]; + } + + void SetColumn(const SeVector & column, int cId) + { + m_columns[cId] = column; + } + + void SetDiagonal(const SeVector & diagonal) + { + Type zero(0); + + m_data[0] = diagonal[0]; m_data[1] = zero; m_data[2] = zero; + m_data[3] = zero; m_data[4] = diagonal[1]; m_data[5] = zero; + m_data[6] = zero; m_data[7] = zero; m_data[8] = diagonal[2]; + } + + SeMatrix Transpose() const + { + SeMatrix trans(Type(0)); + for (int i = 0; i < DIM; ++i) + { + for (int j = 0; j < DIM; ++j) + { + trans(i, j) = Self()(j, i); + } + } + return trans; + } + + static SeMatrix Identity(Type diagonal = Type(1)) + { + SeMatrix identity(Type(0)); + identity(0, 0) = diagonal; + identity(1, 1) = diagonal; + identity(2, 2) = diagonal; + return identity; + } + + Type FrobeniusNorm() const + { + Type sum(0); + for (int i = 0; i < nElements; ++i) + { + sum += m_data[i] * m_data[i]; + } + return std::sqrt(sum); + } + + Type Trace() const + { + return m_data[0] + m_data[4] + m_data[8]; + } + + Type Det() const + { + return m_data[0] * (m_data[4] * m_data[8] - m_data[7] * m_data[5]) + - m_data[3] * (m_data[1] * m_data[8] - m_data[7] * m_data[2]) + + m_data[6] * (m_data[1] * m_data[5] - m_data[4] * m_data[2]); + } + + void Inverse(SeMatrix & result) const + { + Type dt = 1 / Det(); + + result(0, 0) = dt * (Self()(1, 1) * Self()(2, 2) - Self()(1, 2) * Self()(2, 1)); + result(1, 0) = dt * (Self()(1, 2) * Self()(2, 0) - Self()(1, 0) * Self()(2, 2)); + result(2, 0) = dt * (Self()(1, 0) * Self()(2, 1) - Self()(1, 1) * Self()(2, 0)); + + result(0, 1) = dt * (Self()(0, 2) * Self()(2, 1) - Self()(0, 1) * Self()(2, 2)); + result(1, 1) = dt * (Self()(0, 0) * Self()(2, 2) - Self()(0, 2) * Self()(2, 0)); + result(2, 1) = dt * (Self()(0, 1) * Self()(2, 0) - Self()(0, 0) * Self()(2, 1)); + + result(0, 2) = dt * (Self()(0, 1) * Self()(1, 2) - Self()(0, 2) * Self()(1, 1)); + result(1, 2) = dt * (Self()(0, 2) * Self()(1, 0) - Self()(0, 0) * Self()(1, 2)); + result(2, 2) = dt * (Self()(0, 0) * Self()(1, 1) - Self()(0, 1) * Self()(1, 0)); + } + + SeMatrix Inverse() const + { + SeMatrix r(Self()); + Inverse(r); + return r; + } +}; + +template +SE_INLINE SeMatrix operator * (const Type & scalar, const SeMatrix & mat) +{ + return mat * scalar; +} + +/*inline Simd3f operator * (const SeMatrix & mat, const Simd3f & vec) +{ + return Simd3f(mat * vec.xyz); +}*/ + + +////////////////////////////////////////////////////////////////////////// + +////////////////////////////////////////////////////////////////////////// + + +template class SE_ALIGN(16) SeMatrix +{ + +public: + + static constexpr int DIM = 4; + static constexpr int nElements = DIM * DIM; + +private: + + union + { + Type m_data[nElements]; + + SeVector4 m_columns[DIM]; + }; + + const SeMatrix & Self() const { return *this; } + +public: + + SeMatrix() {} + + explicit SeMatrix(const Type & scalar) + { + for (int i = 0; i < nElements; ++i) + { + m_data[i] = scalar; + } + } + + + const Type & operator() (int i, int j)const { return m_data[j*DIM + i]; } + Type & operator() (int i, int j) { return m_data[j*DIM + i]; } + + SeMatrix operator - () const + { + SeMatrix rhs(Type(0)); + for (int i = 0; i < nElements; ++i) + { + rhs.m_data[i] = -m_data[i]; + } + return rhs; + } + + SeMatrix operator + (const SeMatrix & rhs) const + { + SeMatrix out(Type(0)); + for (int i = 0; i < nElements; ++i) + { + out.m_data[i] = m_data[i] + rhs.m_data[i]; + } + return out; + } + + SeMatrix operator - (const SeMatrix & rhs) const + { + SeMatrix out(Type(0)); + for (int i = 0; i < nElements; ++i) + { + out.m_data[i] = m_data[i] - rhs.m_data[i]; + } + return out; + } + + SeMatrix & operator += (const SeMatrix & rhs) + { + for (int i = 0; i < nElements; ++i) + { + m_data[i] += rhs.m_data[i]; + } + return *this; + } + + SeMatrix & operator -= (const SeMatrix & rhs) + { + for (int i = 0; i < nElements; ++i) + { + m_data[i] -= rhs.m_data[i]; + } + return *this; + } + + + SeMatrix operator * (const Type & rhs) const + { + SeMatrix out(Type(0)); + for (int i = 0; i < nElements; ++i) + { + out.m_data[i] = m_data[i] * rhs; + } + return out; + } + + SeMatrix & operator *= (const Type & rhs) + { + for (int i = 0; i < nElements; ++i) + { + m_data[i] *= rhs; + } + return *this; + } + + SeVector operator * (const SeVector & rhs) const + { + SeVector ans(Type(0)); + for (int i = 0; i < DIM; ++i) + { + Type s(0); + for (int j = 0; j < DIM; ++j) + { + s += Self()(i, j) * rhs[j]; + } + ans[i] = s; + } + return ans; + } + + template SeMatrix operator * (const SeMatrix& rhs) const + { + SeMatrix ans(Type(0)); + for (int i = 0; i < DIM; ++i) + { + for (int k = 0; k < K; ++k) + { + Type s(0); + for (int j = 0; j < DIM; ++j) + { + s += Self()(i, j) * rhs(j, k); + } + ans(i, k) = s; + } + } + return ans; + } + + + SeMatrix P2PProduct(const SeMatrix & rhs) const + { + SeMatrix out(Type(0)); + for (size_t i = 0; i < nElements; ++i) + { + out.m_data[i] = m_data[i] * rhs.m_data[i]; + } + return out; + } + + SeMatrix P2PDivide(const SeMatrix & rhs) const + { + SeMatrix out(Type(0)); + for (int i = 0; i < nElements; ++i) + { + out.m_data[i] = m_data[i] / rhs.m_data[i]; + } + return out; + } + + template SeMatrix KroneckerProduct(const SeMatrix& rhs) const + { + SeMatrix ans(Type(0)); + for (int i = 0; i < DIM; ++i) + { + for (int j = 0; j < DIM; ++j) + { + int rStart = i * K; + int cStart = j * L; + + Type ij = Self()(i, j); + + for (int p = 0; p < K; ++p) + { + for (int q = 0; q < L; ++q) + { + ans(rStart + p, cStart + q) = ij * rhs(p, q); + } + } + } + } + } + + + SeVector Row(int rId) const + { + SeVector x; + x[0] = m_data[rId]; + x[1] = m_data[rId + DIM]; + x[2] = m_data[rId + DIM * 2]; + x[3] = m_data[rId + DIM * 3]; + return x; + } + + SeVector Column(int cId) const + { + return m_columns[cId]; + } + + SeVector Diagonal()const + { + SeVector x; + x[0] = m_data[0]; + x[1] = m_data[5]; + x[2] = m_data[10]; + x[3] = m_data[15]; + return x; + } + + void SetRow(const SeVector & row, int rId) + { + m_data[rId] = row[0]; + m_data[rId + DIM] = row[1]; + m_data[rId + DIM * 2] = row[2]; + m_data[rId + DIM * 3] = row[3]; + } + + void SetColumn(const SeVector & column, int cId) + { + m_columns[cId] = column; + } + + void SetDiagonal(const SeVector & diagonal) + { + for (int i = 0; i < DIM; ++i) + { + for (int j = 0; j < DIM; ++j) + { + Self()(i, j) = (i == j) ? diagonal[i] : Type(0); + } + } + } + + SeMatrix Transpose() const + { + SeMatrix trans(Type(0)); + for (int i = 0; i < DIM; ++i) + { + for (int j = 0; j < DIM; ++j) + { + trans(j, i) = Self()(i, j); + } + } + return trans; + } + + static SeMatrix Identity(Type diagonal = Type(1)) + { + SeMatrix identity(Type(0)); + identity(0, 0) = diagonal; + identity(1, 1) = diagonal; + identity(2, 2) = diagonal; + identity(3, 3) = diagonal; + return identity; + } + + Type FrobeniusNorm() const + { + Type sum(0); + for (int i = 0; i < nElements; ++i) + { + sum += m_data[i] * m_data[i]; + } + return std::sqrt(sum); + } + + Type Trace() const + { + return m_data[0] + m_data[5] + m_data[10] + m_data[15]; + } + + + + SeMatrix GetRotationPart() const + { + SeMatrix R; + for (int y = 0; y < 3; ++y) + for (int x = 0; x < 3; ++x) + R(y, x) = (*this)(y, x); + return R; + } + + SeVector3 GetTranslationPart() const + { + SeVector3 t; + for (int y = 0; y < 3; ++y) + t[y] = (*this)(y, 3); + return t; + } + + void SetRotationPart(const SeMatrix & R) + { + for (int y = 0; y < 3; ++y) + for (int x = 0; x < 3; ++x) + { + Self()(y, x) = R(y, x); + } + } + + void SetTranslationPart(const SeVector3 & t) + { + for (int y = 0; y < 3; ++y) + { + Self()(y, 3) = t[y]; + } + } +}; + +////////////////////////////////////////////////////////////////////////// + +////////////////////////////////////////////////////////////////////////// + +template static SeMatrix Kronecker(const SeMatrix& left, const SeMatrix& rhs) +{ + SeMatrix ans(Type(0)); + for (int i = 0; i < N; ++i) + { + for (int j = 0; j < M; ++j) + { + int rStart = i * K; + int cStart = j * L; + + Type ij = left(i, j); + + for (int p = 0; p < K; ++p) + { + for (int q = 0; q < L; ++q) + { + ans(rStart + p, cStart + q) = ij * rhs(p, q); + } + } + } + } + return ans; +} + + +template static SeMatrix GetColumn(const SeMatrix& a, int ci) +{ + SeMatrix ret; + for (int i = 0; i < N; i++) + { + ret(i, 0) = a(i, ci); + } + return ret; +} + + + + +////////////////////////////////////////////////////////////////////////// + +////////////////////////////////////////////////////////////////////////// + +template SE_INLINE bool HasNan(const SeMatrix & mat) +{ + for (int i = 0; i < N; ++i) + { + for (int j = 0; j < M; ++j) + { + if (std::isnan(mat(i, j))) + return true; + } + } + return false; +} + +template +struct AnonymousArray { T data[N]; }; + +template +static SeMatrix CreateWithColumnMajor(const AnonymousArray& a) +{ + SeMatrix m; + + for (int ri = 0; ri < Row; ri++) + { + for (int ci = 0; ci < Col; ci++) + { + m(ri, ci) = a.data[ri + ci * Row]; + } + } + + return m; +} + +////////////////////////////////////////////////////////////////////////// + +template using SeMatrix2 = SeMatrix; +template using SeMatrix3 = SeMatrix; +template using SeMatrix4 = SeMatrix; +template using SeMatrix9 = SeMatrix; +template using SeMatrix12 = SeMatrix; + +using SeMatrix2f = SeMatrix2; +using SeMatrix3f = SeMatrix3; +using SeMatrix4f = SeMatrix4; +using SeMatrix9f = SeMatrix9; +using SeMatrix12f = SeMatrix12; + +using SeMatrix2i = SeMatrix2; +using SeMatrix3i = SeMatrix3; +using SeMatrix4i = SeMatrix4; + +using SeMatrix23f = SeMatrix; +using SeMatrix32f = SeMatrix; +using SeMatrix34f = SeMatrix; +using SeMatrix43f = SeMatrix; + + +SE_NAMESPACE_END diff --git a/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SeMorton.h b/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SeMorton.h new file mode 100644 index 0000000..1c854b8 --- /dev/null +++ b/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SeMorton.h @@ -0,0 +1,108 @@ +///////////////////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2002 - 2022, +// All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The names of its contributors may not be used to endorse or promote +// products derived from this software without specific prior written +// permission. +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#pragma once + +#include "SeMath.h" + +SE_NAMESPACE_BEGIN + +/************************************************************************* +**************************** SeMorton64 **************************** +*************************************************************************/ + +/** + * @brief 64-bit Morton-Code object. + */ +class SE_ALIGN(8) SeMorton64 +{ + +public: + + using value_type = unsigned long long; + + //! @brief Default constructor. + SeMorton64() {} + + //! @brief Convert to vulue_type. + operator value_type() const { return m_Value; } + + //! @brief Constructed by a given value. + SeMorton64(value_type _Value) : m_Value(_Value) {} + + //! @brief Encoded by the given 3d point located within the unit cube (controlable precision). + template void Encode(float x, float y, float z, value_type lastBits) + { + static_assert(precision <= 21, "The highest precision for 64-bit Morton code is 21."); + + x = Math::Clamp(x * (1ll << precision), 0.0f, (1ll << precision) - 1.0f); + y = Math::Clamp(y * (1ll << precision), 0.0f, (1ll << precision) - 1.0f); + z = Math::Clamp(z * (1ll << precision), 0.0f, (1ll << precision) - 1.0f); + + value_type xx = SeMorton64::ExpandBits(static_cast(x)) << (66 - 3 * precision); + value_type yy = SeMorton64::ExpandBits(static_cast(y)) << (65 - 3 * precision); + value_type zz = SeMorton64::ExpandBits(static_cast(z)) << (64 - 3 * precision); + + constexpr value_type bitMask = ~value_type(0) >> (3 * precision); + + m_Value = xx + yy + zz + (lastBits & bitMask); + } + + //! @brief Encoded by the given 3d point located within the unit cube (full precision). + void Encode(float x, float y, float z) + { + x = Math::Clamp(x * (1ll << 21), 0.0f, (1ll << 21) - 1.0f); + y = Math::Clamp(y * (1ll << 21), 0.0f, (1ll << 21) - 1.0f); + z = Math::Clamp(z * (1ll << 21), 0.0f, (1ll << 21) - 1.0f); + + value_type xx = SeMorton64::ExpandBits(static_cast(x)); + value_type yy = SeMorton64::ExpandBits(static_cast(y)); + value_type zz = SeMorton64::ExpandBits(static_cast(z)); + + m_Value = (xx << 2) + (yy << 1) + zz; + } + +private: + + /** + * @brief Expand bits by inserting two zeros after each bit. + * @e.g. 0000 0000 1111 -> 0010 0100 1001 + */ + static value_type ExpandBits(value_type bits) + { + bits = (bits | (bits << 32)) & 0xFFFF00000000FFFFu; + bits = (bits | (bits << 16)) & 0x00FF0000FF0000FFu; + bits = (bits | (bits << 8)) & 0xF00F00F00F00F00Fu; + bits = (bits | (bits << 4)) & 0x30C30C30C30C30C3u; + return (bits | (bits << 2)) & 0x9249249249249249u; + } + +private: + + value_type m_Value; +}; + +SE_NAMESPACE_END \ No newline at end of file diff --git a/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SeOmp.cpp b/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SeOmp.cpp new file mode 100644 index 0000000..4a69503 --- /dev/null +++ b/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SeOmp.cpp @@ -0,0 +1,33 @@ +///////////////////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2002 - 2022, +// All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The names of its contributors may not be used to endorse or promote +// products derived from this software without specific prior written +// permission. +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "SeOmp.h" + +#ifdef WIN32 + int CPU_THREAD_NUM = (omp_get_num_procs() - 1) > 1 ? (omp_get_num_procs() - 1) : 1; +#else + int CPU_THREAD_NUM = 1; +#endif diff --git a/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SeOmp.h b/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SeOmp.h new file mode 100644 index 0000000..1c1f063 --- /dev/null +++ b/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SeOmp.h @@ -0,0 +1,37 @@ +///////////////////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2002 - 2022, +// All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The names of its contributors may not be used to endorse or promote +// products derived from this software without specific prior written +// permission. +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#pragma once + +#include + +extern int CPU_THREAD_NUM; + +#define OMP_BARRIER __pragma(omp barrier) +#define OMP_PARALLEL __pragma(omp parallel num_threads(CPU_THREAD_NUM)) +#define OMP_FOR __pragma(omp for) +#define OMP_PARALLEL_FOR __pragma(omp parallel for num_threads(CPU_THREAD_NUM)) +#define OMP_PARALLEL_FOR_SUM_REDUCTION(sum) __pragma(omp parallel for num_threads(CPU_THREAD_NUM) reduction(+: sum)) diff --git a/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SePreDefine.h b/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SePreDefine.h new file mode 100644 index 0000000..908c8b2 --- /dev/null +++ b/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SePreDefine.h @@ -0,0 +1,81 @@ +///////////////////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2002 - 2022, +// All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The names of its contributors may not be used to endorse or promote +// products derived from this software without specific prior written +// permission. +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#pragma once + +#include +#include +#include + +/************************************************************************* +*************************** Min_Max_Swap *************************** +*************************************************************************/ + +#define SE_MIN(a, b) (((a) < (b)) ? (a) : (b)) +#define SE_MAX(a, b) (((a) > (b)) ? (a) : (b)) +#define SE_SWAP(a, b) { auto c = a; a = b; b = c;} + +/************************************************************************* +**************************** Namespace ***************************** +*************************************************************************/ + +#define SE_NAMESPACE SE +#define SE_NAMESPACE_BEGIN namespace SE_NAMESPACE { +#define SE_USING_NAMESPACE using namespace SE_NAMESPACE; +#define SE_NAMESPACE_END } + +/************************************************************************* +*************************** Noncopyable **************************** +*************************************************************************/ + +#define SE_NONCOPYABLE(className) \ + \ + className(const className&) = delete; \ + \ + void operator=(const className&) = delete; \ + +/************************************************************************* +**************************** DLL_Export **************************** +*************************************************************************/ + + +#define SE_INLINE __inline +#define SE_ALIGN(n) __declspec(align(n)) //! Only for windows platform. +#define SE_ASSERT(expression) assert(expression) + + +/************************************************************************* +***************************** Casting ****************************** +*************************************************************************/ + +#define SE_SCI(ClassObject) static_cast(ClassObject) +#define SE_SCF(ClassObject) static_cast(ClassObject) +#define SE_SCD(ClassObject) static_cast(ClassObject) +#define SE_SCU(ClassObject) static_cast(ClassObject) + +#ifndef NOMINMAX + #define NOMINMAX +#endif \ No newline at end of file diff --git a/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SeSchwarzPreconditioner.cpp b/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SeSchwarzPreconditioner.cpp new file mode 100644 index 0000000..f8d6e9b --- /dev/null +++ b/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SeSchwarzPreconditioner.cpp @@ -0,0 +1,1719 @@ +///////////////////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2002 - 2022, +// All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The names of its contributors may not be used to endorse or promote +// products derived from this software without specific prior written +// permission. +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "SeSchwarzPreconditioner.h" + +#include "SeIntrinsic.h" + +#include +#include + + +SE_USING_NAMESPACE + + +void SeSchwarzPreconditioner::AllocatePrecoditioner(int numVerts, int numEdges, int numFaces) +{ + m_numVerts = numVerts; + m_numEdges = numEdges; + m_numFaces = numFaces; + + if (m_frameIndex == 0) + { + DoAlllocation(); + } + + if (m_frameIndex % 17 != 0) + { + return; + } + + //==== Reorder + + ComputeTotalAABB(); + + SpaceSort(); + + ComputeInverseMapper(); + + MapHessianTable(); + + m_frameIndex++; +} + +void SeSchwarzPreconditioner::PreparePreconditioner +( + const SeMatrix3f* diagonal, const SeMatrix3f* csrOffDiagonals, const int* csrRanges, + const EfSet* efSets, const EeSet* eeSets, const VfSet* vfSets, + unsigned int* efCounts, unsigned int* eeCounts, unsigned int* vfCounts +) +{ + m_mappedNeighborsRemain = m_mappedNeighbors; + m_mappedNeighborsNumRemain = m_mappedNeighborsNum; + + //==== before reordering + + PrepareCollisionStencils(efSets, eeSets, vfSets, efCounts, eeCounts, vfCounts); + + //==== + + ReorderRealtime(); + + //==== + + const int blockNum = m_totalSz / 32; + Utility::MemsetZero(m_hessian32.Ptr(), m_hessian32.Size()); + Utility::MemsetZero(m_additionalHessian32); + + //==== + + PrepareCollisionHessian(); //3 + + PrepareHessian(diagonal, csrOffDiagonals, csrRanges); + + LDLtInverse512(); +} + +void SeSchwarzPreconditioner::Preconditioning(SeVec3fSimd* z, const SeVec3fSimd* residual, int dim) +{ + Utility::MemsetZero(m_mappedZ); + Utility::MemsetZero(m_mappedR); + + BuildResidualHierarchy(residual); + + SchwarzLocalXSym(); + + CollectFinalZ(z); +} + +void SeSchwarzPreconditioner::ComputeLevelNums(int bankSize) +{ + constexpr float SizeRatio = 1.5f; + + //==== + + int totalSz = 0; + + int nLevel = 1; + int levelSz = (m_numVerts + bankSize - 1) / bankSize * bankSize; + totalSz += levelSz; + + while (levelSz > 32) + { + levelSz /= 32; + + nLevel++; + levelSz = (levelSz + bankSize - 1) / bankSize * bankSize; + totalSz += levelSz; + } + + m_numLevel = nLevel; + m_totalSz = totalSz * SizeRatio; +} + +void SeSchwarzPreconditioner::DoAlllocation() +{ + constexpr int bankSize = 32; //==== size of diagonal block + + ComputeLevelNums(bankSize); + + //==== + + m_MapperSortedGetOriginal.resize(m_numVerts); + m_mapperOriginalGetSorted.resize(m_numVerts); + m_mortonCode.resize(m_numVerts); + + m_hessian32.Resize(bankSize, m_totalSz); + m_mappedZ.resize(m_totalSz); + m_mappedR.resize(m_totalSz); + + m_mappedPos.resize(m_numVerts); + + m_denseLevel.resize(m_numVerts); + m_goingNext.resize(m_numLevel * m_numVerts); + m_coarseTables.resize(m_numVerts); + + m_prefixOrignal.resize(m_numVerts); + m_fineConnectMask.resize(m_numVerts); + m_nextConnectMsk.resize(m_numVerts); + m_nextPrefix.resize(m_numVerts); + + + int triSz = (1 + 96) * 96 / 2 + 16 * 3; + const int blockNum = m_totalSz / 32; + m_invSymR.resize(triSz * blockNum); + m_additionalHessian32.resize(m_totalSz + 1); + + //==== + + m_levelSize.resize(m_numLevel + 1); + m_CoarseSpaceTables.Resize(m_numLevel, m_numVerts); + + int maxNeighbours = 0; + + for (int vId = 0; vId < m_numVerts; ++vId) + { + maxNeighbours = Math::Max(m_neighbours->Size(vId) + 1, maxNeighbours); + } + + m_mappedNeighborsNum.resize(m_numVerts); + m_mappedNeighborsNumRemain.resize(m_numVerts); + m_mappedNeighbors.Resize(maxNeighbours, m_numVerts); + m_mappedNeighborsRemain.Resize(maxNeighbours, m_numVerts); + + const int maxCollisionPerVert = 32; + m_maxStencilNum = m_numVerts * maxCollisionPerVert; + m_stencils.resize(m_maxStencilNum); + m_stencilNum = 0; +} + +void SeSchwarzPreconditioner::ComputeTotalAABB() +{ + m_aabb = SeAabb(); + + ComputeAABB(); // TODO: omp reduce +} + + +void SeSchwarzPreconditioner::ComputeAABB() +{ + +//MSVC compiler is stuck with OpenMP version 2.0, and unfortunately for you, reduction(max:) was only introduced with version 3.1 of the OpenMP C/C++ standard (that was in September 2011) + +//#pragma omp parallel for reduction(aabbAdd:m_aabb) + for (int vid = 0; vid < m_numVerts; ++vid) // need omp custom reduction operation + { + m_aabb += m_positions[vid]; + } +} + +void SeSchwarzPreconditioner::SpaceSort() +{ + FillSortingData(); + DoingSort(); +} + +void SeSchwarzPreconditioner::FillSortingData() +{ + OMP_PARALLEL_FOR + + for (int vid = 0; vid < m_numVerts; ++vid) + { + SeVec3fSimd temp = (m_positions[vid] - m_aabb.Lower) / m_aabb.Extent(); + + SeMorton64 mt; + + mt.Encode(temp.x, temp.y, temp.z); + + m_mortonCode[vid] = mt; + + m_MapperSortedGetOriginal[vid] = vid; + } +} + + +void SeSchwarzPreconditioner::DoingSort() +{ + //==== sort m_MapperSortedGetOriginal by m_mortonCode1; + + std::sort(m_MapperSortedGetOriginal.begin(), m_MapperSortedGetOriginal.end(), [&](int i, int j) { return m_mortonCode[i] < m_mortonCode[j]; }); +} + +void SeSchwarzPreconditioner::ComputeInverseMapper() +{ + OMP_PARALLEL_FOR + + for (int vid = 0; vid < m_numVerts; ++vid) + { + int originalIndex = m_MapperSortedGetOriginal[vid]; + + m_mapperOriginalGetSorted[originalIndex] = vid; + } +} + + +void SeSchwarzPreconditioner::MapHessianTable() +{ + OMP_PARALLEL_FOR + + for (int vid = 0; vid < m_numVerts; ++vid) + { + const auto originalIndex = m_MapperSortedGetOriginal[vid]; + + m_mappedPos[vid] = m_positions[originalIndex]; + + //==== + + int numNeighbors = m_neighbours->Size(originalIndex); + + const int* neighbors = m_neighbours->IdxPtr(originalIndex); + + m_mappedNeighborsNum[vid] = numNeighbors + 1; // include self + + m_mappedNeighbors[0][vid] = vid; // include self + + for (int k = 1; k < numNeighbors + 1; ++k) + { + unsigned int originalNeighbors = neighbors[k - 1];//m_neighbors[k][originalIndex]; + + m_mappedNeighbors[k][vid] = m_mapperOriginalGetSorted[originalNeighbors]; + } + } +} + +void SeSchwarzPreconditioner::MapCollisionStencilIndices() +{ + m_stencilIndexMapped.resize(m_stencilNum); + + OMP_PARALLEL_FOR + + for (int i = 0; i < m_stencilNum; i++) + { + const auto& s = m_stencils[i]; + + for (int vi = 0; vi < s.verextNumPerStencil; vi++) + { + m_stencilIndexMapped[i][vi] = m_mapperOriginalGetSorted[s.index[vi]]; + } + } +} + +void SeSchwarzPreconditioner::PrepareCollisionStencils(const EfSet* efSets, const EeSet* eeSets, const VfSet* vfSets, unsigned int* efCounts, unsigned int* eeCounts, unsigned int* vfCounts) +{ + int efNum = efCounts[m_numEdges]; + int eeNum = eeCounts[m_numEdges]; + int vfNum = vfCounts[m_numVerts]; + + int totalStencilNum = efNum + eeNum + vfNum; + + if (totalStencilNum > m_maxStencilNum) + { + totalStencilNum = m_maxStencilNum; + printf("stencil size %d exceed max stencils num %d\n", totalStencilNum, m_maxStencilNum); + } + + m_stencilNum = 0; + + OMP_PARALLEL_FOR + + for (int i = 0; i < totalStencilNum; i++) + { + Stencil s; + + if (i < efNum) //process ef + { + const auto& pair = efSets[i]; + + if (pair.m_eId < 0 || pair.m_fId < 0) { continue; } + + const auto& edge = m_edges[pair.m_eId]; + const auto& face = m_faces[pair.m_fId]; + + s.verextNumPerStencil = 5; + s.vertexNumOfFirstPrimitive = 2; + + s.index[0] = edge[0]; + s.index[1] = edge[1]; + s.index[2] = face[0]; + s.index[3] = face[1]; + s.index[4] = face[2]; + + s.weight[0] = pair.m_bary[0]; + s.weight[1] = 1.f - pair.m_bary[0]; + s.weight[2] = -pair.m_bary[1]; + s.weight[3] = -pair.m_bary[2]; + s.weight[4] = -(1.f - pair.m_bary[1] - pair.m_bary[2]); + + s.direction = pair.m_normal; + + s.stiff = pair.stiff; + + } + else if (i < efNum + eeNum) // process ee + { + const auto& pair = eeSets[i]; + + if (pair.m_eId1 < 0 || pair.m_eId0 < 0) { continue; } + + s.verextNumPerStencil = 4; + s.vertexNumOfFirstPrimitive = 2; + + const auto& edge0 = m_edges[pair.m_eId0]; + const auto& edge1 = m_edges[pair.m_eId1]; + + s.index[0] = edge0[0]; + s.index[1] = edge0[1]; + s.index[2] = edge1[0]; + s.index[3] = edge1[1]; + + s.weight[0] = pair.m_bary[0]; + s.weight[1] = 1.f - pair.m_bary[0]; + s.weight[2] = -pair.m_bary[1]; + s.weight[3] = -(1.f - pair.m_bary[1]); + + s.direction = pair.m_normal; + + s.stiff = pair.stiff; + } + else if (i < totalStencilNum) //process vf + { + const auto& pair = vfSets[i]; + + if (pair.m_vId < 0 || pair.m_fId < 0) { continue; } + + s.verextNumPerStencil = 4; + s.vertexNumOfFirstPrimitive = 3; + + const auto& face = m_faces[pair.m_fId]; + + s.index[0] = face[0]; + s.index[1] = face[1]; + s.index[2] = face[2]; + s.index[3] = pair.m_vId; + + s.weight[0] = -pair.m_bary[0]; + s.weight[1] = -pair.m_bary[1]; + s.weight[2] = -(1.f - pair.m_bary[2]); + s.weight[3] = 1.f; + + s.direction = pair.m_normal; + + s.stiff = pair.stiff; + } + + int stencilNum = Intrinsic::AtomicAdd(&m_stencilNum, 1); + + m_stencils[stencilNum] = s; + } + + MapCollisionStencilIndices(); //1 +} + +void SeSchwarzPreconditioner::ReorderRealtime() +{ + Utility::MemsetZero(m_levelSize); + + BuildConnectMaskL0(); + + BuildCollisionConnection(m_fineConnectMask.data(), nullptr); //2 + + PreparePrefixSumL0(); + + BuildLevel1(); + + for (int level = 1; level < m_numLevel; level++) + { + Utility::MemsetZero(m_nextConnectMsk); + + BuildConnectMaskLx(level); + + BuildCollisionConnection(m_nextConnectMsk.data(), m_CoarseSpaceTables[level - 1]); + + NextLevelCluster(level); + + PrefixSumLx(level); + + ComputeNextLevel(level); + } + + TotalNodes(); + + AggregationKernel(); +} + +void SeSchwarzPreconditioner::BuildConnectMaskL0() +{ + const unsigned int warpDim = 32; + + int nWarps = (m_numVerts + warpDim - 1) / warpDim; + + OMP_PARALLEL_FOR + + for (int warpIdx = 0; warpIdx < nWarps; ++warpIdx) + { + for (int laneIdx = 0; laneIdx < warpDim; ++laneIdx) + { + int vId = warpIdx * warpDim + laneIdx; + + if (vId >= m_numVerts) + { + break; + } + + int numNeighbors = m_mappedNeighborsNumRemain[vId]; + + unsigned int connetMsk = 1U << laneIdx; // include self + + int nk = 0; + + for (int k = 0; k < numNeighbors; k++) + { + int vIdConnected = m_mappedNeighborsRemain[k][vId]; + + int warpIdxConnected = vIdConnected / warpDim; + + if (warpIdx == warpIdxConnected) // in the same warp + { + unsigned int laneIdxConnected = vIdConnected % warpDim; + + connetMsk |= (1U << laneIdxConnected); + } + else + { + m_mappedNeighborsRemain[nk][vId] = vIdConnected; + nk++; + } + } + + m_mappedNeighborsNumRemain[vId] = nk; + + // distance cullling + //const SeVec3fSimd& pos = m_mappedPos[vId]; + //for (int it = 0; it < Intrinsic::Popcount32(activeMsk); it++) + //{ + // const float th = AABB_TH * AABB_TH; + // CuFloat3 po(0.0f); + // po.x = Intrinsic::Shuffle(activeMsk, pos.x, it); + // po.y = Intrinsic::Shuffle(activeMsk, pos.y, it); + // po.z = Intrinsic::Shuffle(activeMsk, pos.z, it); + // if (Math::SqrLength(po - pos) < th) + // { + // connetMsk |= (1U << it); + // } + //} + + m_fineConnectMask[vId] = connetMsk; + } + } +} + + +void SeSchwarzPreconditioner::BuildCollisionConnection(unsigned int* pConnect, const int* pCoarseTable) +{ + const int bank = 32; + + OMP_PARALLEL_FOR + + for (int i = 0; i < m_stencilNum; i++) + { + const auto& s = m_stencils[i]; + Int5 idx = m_stencilIndexMapped[i]; + unsigned int connMsk[5] = {}; + + if (pCoarseTable) + { + for (int it = 0; it < s.verextNumPerStencil; it++) + { + idx[it] = pCoarseTable[idx[it]]; + } + } + + for (int ita = 0; ita < s.verextNumPerStencil; ita++) + { + for (int itb = ita + 1; itb < s.verextNumPerStencil; itb++) + { + unsigned int myID = idx[ita]; + unsigned int otID = idx[itb]; + if (myID == otID) // redundant ? + { + continue; + } + if ((myID / bank == otID / bank)) + { + if (ita < s.vertexNumOfFirstPrimitive && itb >= s.vertexNumOfFirstPrimitive) + { + connMsk[ita] |= (1U << (otID % bank)); + connMsk[itb] |= (1U << (myID % bank)); + } + } + } + } + + for (int it = 0; it < s.verextNumPerStencil; it++) + { + if (connMsk[it]) + { + Intrinsic::AtomicOr(&pConnect[idx[it]], connMsk[it]); + } + } + } +} + +void SeSchwarzPreconditioner::PreparePrefixSumL0() +{ + int warpDim = 32; + + int nWarps = (m_numVerts + warpDim - 1) / warpDim; + + OMP_PARALLEL_FOR + + for (int warpIdx = 0; warpIdx < nWarps; ++warpIdx) + { + unsigned int cacheMask[32]; + + for (unsigned int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + int vId = laneIdx + warpIdx * warpDim; + + if (vId >= m_numVerts) { break; } + + cacheMask[laneIdx] = m_fineConnectMask[vId]; + } + + Intrinsic::SyncWarp(); + + int prefixSum = 0; + + for (unsigned int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + int vid = laneIdx + warpIdx * warpDim; + + if (vid >= m_numVerts) { break; } + + unsigned int connetMsk = cacheMask[laneIdx]; + + unsigned int visited = (1U << laneIdx); + + while (connetMsk != -1) // + { + unsigned int todo = visited ^ connetMsk; // + + if (!todo) + { + break; + } + + unsigned int nextVisit = Intrinsic::Ffs(todo) - 1; + + visited |= (1U << nextVisit); + + connetMsk |= cacheMask[nextVisit]; + } + + m_fineConnectMask[vid] = connetMsk; + + unsigned int electedPrefix = Intrinsic::Popcount32(connetMsk & Intrinsic::LanemaskLt(laneIdx)); + + if (electedPrefix == 0) + { + prefixSum++; + } + } + + m_prefixOrignal[warpIdx] = prefixSum; + } +} + +void SeSchwarzPreconditioner::BuildLevel1() +{ + constexpr unsigned int blockDim = 1024;//32*32 + + int nBlocks = (m_numVerts + blockDim - 1) / blockDim; + + OMP_PARALLEL_FOR + + for (int blockIdx = 0; blockIdx < nBlocks; ++blockIdx) + { + unsigned int warpPrefix[32]; + + unsigned int theBlockGlobalPrefix = 0; + + unsigned int globalWarpOffset = blockIdx * 32; + + for (int warpIdx = 0; warpIdx < blockIdx; ++warpIdx) + { + for (unsigned int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + unsigned int threadIdx = laneIdx + warpIdx * 32; + + theBlockGlobalPrefix += m_prefixOrignal[threadIdx]; + } + } + + for (int warpIdx = 0; warpIdx < 32; ++warpIdx) + { + warpPrefix[warpIdx] = m_prefixOrignal[globalWarpOffset + warpIdx]; + } + + Intrinsic::SyncBlock(); + + for (unsigned int warpIdx = 0; warpIdx < 32; ++warpIdx) + { + unsigned int theWarpGlobalPrefix = theBlockGlobalPrefix; + + for (unsigned int prevWarpIdx = 0; prevWarpIdx < warpIdx; ++prevWarpIdx) + { + theWarpGlobalPrefix += warpPrefix[prevWarpIdx]; + } + + + unsigned int electedMask = 0; + + for (unsigned int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + unsigned int threadIdx = laneIdx + warpIdx * 32; + + int vid = threadIdx + blockIdx * blockDim; + + if (vid >= m_numVerts) { break; } + + + if (vid == m_numVerts - 1) + { + m_levelSize[1].x = theWarpGlobalPrefix + warpPrefix[warpIdx]; + m_levelSize[1].y = (m_numVerts + 31) / 32 * 32; + } + + unsigned int connMsk = m_fineConnectMask[vid]; + + unsigned int electedPrefix = Intrinsic::Popcount32(connMsk & Intrinsic::LanemaskLt(laneIdx)); + + if (electedPrefix == 0) + { + electedMask |= 1U << laneIdx; + } + //unsigned int electedMask = Intrinsic::BallotSync(-1, electedPrefix == 0); + } + + + unsigned int lanePrefix[32]; + + for (unsigned int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + unsigned int threadIdx = laneIdx + warpIdx * 32; + + int vid = threadIdx + blockIdx * blockDim; + + if (vid >= m_numVerts) { break; } + + + lanePrefix[laneIdx] = Intrinsic::Popcount32(electedMask & Intrinsic::LanemaskLt(laneIdx)); + + lanePrefix[laneIdx] += theWarpGlobalPrefix; + } + + + for (unsigned int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + unsigned int threadIdx = laneIdx + warpIdx * 32; + + int vid = threadIdx + blockIdx * blockDim; + + if (vid >= m_numVerts) { break; } + + + unsigned int connMsk = m_fineConnectMask[vid]; + + unsigned int elected_lane = Intrinsic::Ffs(connMsk) - 1; + + unsigned int theLanePrefix = lanePrefix[elected_lane]; + + m_CoarseSpaceTables[0][vid] = theLanePrefix; + + m_goingNext[vid] = theLanePrefix + (m_numVerts + 31) / 32 * 32; + } + } + } +} + + +void SeSchwarzPreconditioner::BuildConnectMaskLx(int level) +{ + constexpr unsigned int warpDim = 32; + + int nWarps = (m_numVerts + warpDim - 1) / warpDim; + + const unsigned int bank = 32; + + OMP_PARALLEL_FOR + + for (int warpIdx = 0; warpIdx < nWarps; ++warpIdx) + { + unsigned int prefixMsk[32]; + unsigned int connectMsk[32]; + + for (int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + int vid = laneIdx + warpIdx * 32; + + if (vid >= m_numVerts) { break; } + + prefixMsk[laneIdx] = m_fineConnectMask[vid]; + + unsigned int coarseVid = m_CoarseSpaceTables[level - 1][vid]; + + connectMsk[laneIdx] = 0;// 1U << (coarseVid % bank); //==== include self + + unsigned int kn = m_mappedNeighborsNumRemain[vid]; + + unsigned int nk = 0; + + for (unsigned int k = 0; k < kn; k++) + { + unsigned int connect = m_mappedNeighborsRemain[k][vid]; + + unsigned int coarseConnect = m_CoarseSpaceTables[level - 1][connect]; + + if (coarseVid / bank == coarseConnect / bank) + { + unsigned int off = coarseConnect % bank; + + connectMsk[laneIdx] |= (1U << off); + } + else + { + m_mappedNeighborsRemain[nk][vid] = connect; + nk++; + } + } + + m_mappedNeighborsNumRemain[vid] = nk; + } + + bool isFullWarp = (prefixMsk[0] == -1); + + if (isFullWarp) + { + unsigned int connectMskFull = 0; + + for (int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + int vid = laneIdx + warpIdx * 32; + + if (vid >= m_numVerts) { break; } + + connectMskFull |= connectMsk[laneIdx]; + } + for (int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + int vid = laneIdx + warpIdx * 32; + + if (vid >= m_numVerts) { break; } + + connectMsk[laneIdx] = connectMskFull; + } + } + else + { + unsigned int cacheMsk[32]; + + for (int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + cacheMsk[laneIdx] = 0; + } + + unsigned int electedLane[32]; + + for (int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + int vid = laneIdx + warpIdx * 32; + + if (vid >= m_numVerts) { break; } + + electedLane[laneIdx] = Intrinsic::Ffs(prefixMsk[laneIdx]) - 1; + + if (connectMsk[laneIdx]) + { + //cacheMsk[electedLane[laneIdx]] |= connectMsk[laneIdx]; + Intrinsic::AtomicOr(&cacheMsk[electedLane[laneIdx]], connectMsk[laneIdx]); + } + } + + for (unsigned int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + int vid = laneIdx + warpIdx * 32; + + if (vid >= m_numVerts) { break; } + + connectMsk[laneIdx] = cacheMsk[electedLane[laneIdx]]; + } + } + + for (unsigned int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + int vid = laneIdx + warpIdx * 32; + + if (vid >= m_numVerts) { break; } + + unsigned int coarseVid = m_CoarseSpaceTables[level - 1][vid]; + + unsigned int electedPrefix = Intrinsic::Popcount32(prefixMsk[laneIdx] & Intrinsic::LanemaskLt(laneIdx)); + + if (connectMsk[laneIdx] && electedPrefix == 0) + { + Intrinsic::AtomicOr(m_nextConnectMsk.data() + coarseVid, connectMsk[laneIdx]); + } + } + } +} + +void SeSchwarzPreconditioner::NextLevelCluster(int level) +{ + const int levelNum = m_levelSize[level].x; + + const int warpDim = 32; + + int numWarps = (m_numVerts + warpDim - 1) / warpDim; + + int maxWarpIdx = (levelNum + warpDim - 1) / warpDim; + + OMP_PARALLEL_FOR + + for (int warpIdx = 0; warpIdx < numWarps; ++warpIdx) + { + unsigned int cachedMsk[32]; + + for (unsigned int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + int vid = laneIdx + warpIdx * 32; + + if (vid >= (levelNum + 31) / 32 * 32) + { + break;// early culling + } + + unsigned int connectMsk = (1U << laneIdx); + + if (vid < levelNum) + { + connectMsk |= m_nextConnectMsk[vid];// connect to current + } + + cachedMsk[laneIdx] = connectMsk; + + if (vid >= levelNum) + { + break; + } + } + + Intrinsic::SyncWarp(); + + unsigned int prefixSum = 0; + + for (unsigned int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + int vid = laneIdx + warpIdx * 32; + + if (vid >= levelNum || vid >= (levelNum + 31) / 32 * 32) + { + break;// early culling + } + + unsigned int connectMsk = cachedMsk[laneIdx]; + + unsigned int visited = (1U << laneIdx); + + while (true) + { + unsigned int todo = visited ^ connectMsk; + + if (!todo) + { + break; + } + + unsigned int nextVisist = Intrinsic::Ffs(todo) - 1; + + visited |= (1U << nextVisist); + + connectMsk |= cachedMsk[nextVisist]; + } + + m_nextConnectMsk[vid] = connectMsk; + + unsigned int electedPrefix = Intrinsic::Popcount32(connectMsk & Intrinsic::LanemaskLt(laneIdx)); + + if (electedPrefix == 0) + { + prefixSum++; + } + } + + if (warpIdx < maxWarpIdx) + { + m_nextPrefix[warpIdx] = prefixSum; + } + } +} + +void SeSchwarzPreconditioner::PrefixSumLx(int level) +{ + const int levelNum = m_levelSize[level].x; + + const int levelBegin = m_levelSize[level].y; + + unsigned int blockDim = 1024; + + int nBlocks = (m_numVerts + blockDim - 1) / blockDim; + + OMP_PARALLEL_FOR + + for (int blockIdx = 0; blockIdx < nBlocks; ++blockIdx) + { + unsigned int warpPrefix[32]; + + unsigned int theBlockPrefix = 0; + + unsigned int globalWarpOffset = blockIdx * 32; + + for (int warpIdx = 0; warpIdx < blockIdx; ++warpIdx) + { + for (unsigned int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + unsigned int threadIdx = laneIdx + warpIdx * 32; + + unsigned int vid = threadIdx + blockIdx * blockDim; + + if (vid >= (levelNum + blockDim - 1) / blockDim * blockDim) + { + break; + } + + theBlockPrefix += m_nextPrefix[threadIdx]; + } + } + + for (int warpIdx = 0; warpIdx < 32; ++warpIdx) + { + warpPrefix[warpIdx] = m_nextPrefix[globalWarpOffset + warpIdx]; + } + + Intrinsic::SyncBlock(); + + for (unsigned int warpIdx = 0; warpIdx < 32; ++warpIdx) + { + unsigned int theWarpPrefix = theBlockPrefix; + + for (unsigned int prevWarpIdx = 0; prevWarpIdx < warpIdx; ++prevWarpIdx) + { + theWarpPrefix += warpPrefix[prevWarpIdx]; + } + + unsigned int electedMsk = 0; + + for (unsigned int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + unsigned int threadIdx = laneIdx + warpIdx * 32; + + int vid = threadIdx + blockIdx * blockDim; + + if (vid >= levelNum) + { + break; + } + if (vid == levelNum - 1) + { + m_levelSize[level + 1].x = theWarpPrefix + warpPrefix[warpIdx]; + m_levelSize[level + 1].y = levelBegin + (levelNum + 31) / 32 * 32; + } + + unsigned int connMsk = m_nextConnectMsk[vid]; + + unsigned int electedPrefix = Intrinsic::Popcount32(connMsk & Intrinsic::LanemaskLt(laneIdx)); + + if (electedPrefix == 0) + { + electedMsk |= (1U << laneIdx); + } + + //unsigned int electedMask = Intrinsic::BallotSync(-1, electedPrefix == 0); + } + + unsigned int lanePrefix[32]; + + for (unsigned int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + lanePrefix[laneIdx] = Intrinsic::Popcount32(electedMsk & Intrinsic::LanemaskLt(laneIdx)); + lanePrefix[laneIdx] += theWarpPrefix; + } + + for (unsigned int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + unsigned int threadIdx = laneIdx + warpIdx * 32; + + int vid = threadIdx + blockIdx * blockDim; + + if (vid >= levelNum) { break; } + + int electedLane = Intrinsic::Ffs(m_nextConnectMsk[vid]) - 1; + + unsigned int theLanePrefix = lanePrefix[electedLane]; + + m_nextConnectMsk[vid] = theLanePrefix; + + m_goingNext[vid + levelBegin] = theLanePrefix + levelBegin + (levelNum + 31) / 32 * 32; + } + } + } +} + +void SeSchwarzPreconditioner::ComputeNextLevel(int level) +{ + OMP_PARALLEL_FOR + + for (int vid = 0; vid < m_numVerts; ++vid) + { + int next = m_CoarseSpaceTables[level - 1][vid]; + + m_CoarseSpaceTables[level][vid] = m_nextConnectMsk[next]; + } +} + +void SeSchwarzPreconditioner::TotalNodes() +{ + m_totalNumberClusters = m_levelSize[m_numLevel].y; + //printf("%d\n", m_totalNumberClusters); +} + +void SeSchwarzPreconditioner::AggregationKernel() +{ + const int warpDim = 32; + + int numWarps = (m_numVerts + warpDim - 1) / warpDim; + + OMP_PARALLEL_FOR + + for (int warpIdx = 0; warpIdx < numWarps; ++warpIdx) + { + unsigned int firstInWarp = warpIdx * 32; + + int curr[32]; + int next[32]; + int aggLevel[32]; + + for (unsigned int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + int vid = laneIdx + warpIdx * 32; + + if (vid >= m_numVerts) { break; } + + curr[laneIdx] = vid; + + aggLevel[laneIdx] = m_numLevel - 1; + } + + Int4 coarseTable[32]; + + for (int l = 0; l < m_numLevel - 1; ++l) + { + for (unsigned int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + int vid = laneIdx + warpIdx * 32; + + if (vid >= m_numVerts) { break; } + + next[laneIdx] = m_goingNext[curr[laneIdx]]; + } + + Intrinsic::SyncWarp(); + + for (unsigned int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + int vid = laneIdx + warpIdx * 32; + + if (vid >= m_numVerts) { break; } + + if (next[laneIdx] == next[0]) + { + aggLevel[laneIdx] = Math::Min(l, aggLevel[laneIdx]); + } + + curr[laneIdx] = next[laneIdx]; + coarseTable[laneIdx][l] = next[laneIdx]; + } + } + + + for (unsigned int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + int vid = laneIdx + warpIdx * 32; + + if (vid >= m_numVerts) { break; } + + m_denseLevel[vid] = aggLevel[laneIdx]; + + m_coarseTables[vid] = coarseTable[laneIdx]; + } + } +} + +void SeSchwarzPreconditioner::AdditionalSchwarzHessian2(SeMatrix3f hessian, std::vector& pAdditionalHessian, SeArray2D& pDenseHessian, int v1, int v2, const std::vector& pGoingNext, int nLevel, int vNum) +{ + int level = 0; + unsigned int myID = v1; + unsigned int otID = v2; + const unsigned int bank = 32; + + while (myID / bank != otID / bank && level < nLevel) + { + myID = pGoingNext[myID]; + otID = pGoingNext[otID]; + level++; + } + + if (level >= nLevel) + return; + + Intrinsic::AtomicAdd(&pDenseHessian[otID % bank][myID], hessian); + Intrinsic::AtomicAdd(&pDenseHessian[myID % bank][otID], hessian); + + if (level < nLevel - 1) + { + myID = pGoingNext[myID]; + otID = pGoingNext[otID]; + + if (myID == otID) + { + Intrinsic::AtomicAdd(&pAdditionalHessian[myID], hessian * 2.0f); + } + else + { + Intrinsic::AtomicAdd(&pAdditionalHessian[myID], hessian); + Intrinsic::AtomicAdd(&pAdditionalHessian[otID], hessian); + } + } +} + +void SeSchwarzPreconditioner::PrepareCollisionHessian() +{ + OMP_PARALLEL_FOR + + for (int i = 0; i < m_stencilNum; i++) + { + const auto& s = m_stencils[i]; + auto idx = m_stencilIndexMapped[i]; + + Float3 d(s.direction[0], s.direction[1], s.direction[2]); + + SeMatrix3f hessian = OuterProduct(d, d * s.stiff); + + for (int it = 0; it < s.verextNumPerStencil; it++) + { + Intrinsic::AtomicAdd(&m_additionalHessian32[idx[it]], hessian * Math::Square(s.weight[it])); + } + + for (int ita = 0; ita < s.verextNumPerStencil; ita++) + { + for (int itb = ita + 1; itb < s.verextNumPerStencil; itb++) + { + AdditionalSchwarzHessian2(s.weight[ita] * s.weight[itb] * hessian, m_additionalHessian32, m_hessian32, idx[ita], idx[itb], m_goingNext, m_numLevel, m_numVerts); + } + } + } +} + +void SeSchwarzPreconditioner::PrepareHessian(const SeMatrix3f* diagonal, const SeMatrix3f* csrOffDiagonals, const int* csrRanges) +{ + + const unsigned int bank = 32; + + int nVC = (m_numVerts + 31) / 32 * 32; + + OMP_PARALLEL_FOR + + for (int vid = nVC; vid < m_totalNumberClusters; ++vid) + { + auto oldDiagonal = m_additionalHessian32[vid]; + int myID = vid; + Intrinsic::AtomicAdd(&m_hessian32[myID % bank][myID], oldDiagonal); + while (true) + { + myID = m_goingNext[myID]; + if (myID >= m_totalNumberClusters) + { + break; + } + Intrinsic::AtomicAdd(&m_hessian32[myID % bank][myID], oldDiagonal); + } + } + + int nblock = (m_numVerts + 31) / 32; + OMP_PARALLEL + { + std::vector> diagTable(m_numLevel); + +#pragma omp for + for (int bid = 0; bid < nblock; ++bid) + { + for (int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + int vid = laneIdx + bid * 32; + if (vid >= m_numVerts) + break; + const int vidOrig = m_MapperSortedGetOriginal[vid]; + int oldNum = m_mappedNeighborsNum[vid]; + + auto oldDiagonal = diagonal[vidOrig] + m_additionalHessian32[vid]; + m_hessian32[vid % bank][vid] += oldDiagonal; // self diagonal + + for (int k = 1; k < oldNum; k++) + { + const unsigned int neighbor = m_mappedNeighbors[k][vid]; + SeMatrix3f mat = csrOffDiagonals[csrRanges[vidOrig] + k - 1]; + int level = 0; + unsigned int levelSz = m_numVerts; + unsigned int myID = vid; + unsigned int otID = neighbor; + + while (myID / bank != otID / bank && level < m_numLevel) + { + level++; + myID = m_goingNext[myID]; + otID = m_goingNext[otID]; + } + if (level >= m_numLevel) + { + continue; + } + if (level <= 1) // 按照block分并行,level 1的位置也一定属于本线程 + m_hessian32[otID % bank][myID] += mat; + else + Intrinsic::AtomicAdd(&m_hessian32[otID % bank][myID], mat); + + if (level == 0) + oldDiagonal += mat; + else if (level + 1 < m_numLevel) + { + myID = m_goingNext[myID]; + auto& table = diagTable[level + 1]; + if (table.find(myID) == table.end()) + table[myID] = mat; + else + table[myID] += mat; + } + } + if (1 < m_numLevel) + { + int myID = m_goingNext[vid]; + m_hessian32[myID % bank][myID] += oldDiagonal; // self diagonal + if (2 < m_numLevel) + { + myID = m_goingNext[myID]; + auto& table = diagTable[2]; + if (table.find(myID) == table.end()) + table[myID] = oldDiagonal; + else + table[myID] += oldDiagonal; + } + } + } + } + + for (int lv = 2; lv < m_numLevel; lv++) + { + const auto& table = diagTable[lv]; + for (auto& pair : table) + { + int myID = pair.first; + Intrinsic::AtomicAdd(&m_hessian32[myID % bank][myID],pair.second); + if (lv + 1 < m_numLevel) + { + myID = m_goingNext[myID]; + auto& nexttable = diagTable[lv + 1]; + if (nexttable.find(myID) == nexttable.end()) + nexttable[myID] = pair.second; + else + nexttable[myID] += pair.second; + } + } + } + } +} + +void SeSchwarzPreconditioner::LDLtInverse512() +{ + const int triSz = (1 + 96) * 96 / 2 + 16 * 3; + + const int activeblockNum = m_totalNumberClusters / 32; + + OMP_PARALLEL_FOR + + for (int block = 0; block < activeblockNum; block++) + { + float A[96][96] = {}; + + for (int x = 0; x < 32; x++) + { + for (int y = 0; y < 32; y++) + { + SeMatrix3f temp = m_hessian32[y][x + block * 32]; + + if (x == y && temp(0, 0) == 0.0f) + { + temp = SeMatrix3f::Identity(); + } + for (int ii = 0; ii < 3; ii++) + { + for (int jj = 0; jj < 3; jj++) + { + A[x * 3 + ii][y * 3 + jj] = temp(ii, jj); + } + } + } + } + + //const int pt = -1;//212; + //if (block == pt) + //{ + // std::ofstream ofs("c:\\test\\MRF.txt"); + // for (int y = 0; y < 96; y++) + // { + // for (int x = 0; x < 96; x++) + // { + // ofs << A[y][x] << " "; + // } + // ofs << std::endl; + // } + //} + + // 向下消元 +#ifdef WIN32 + for (int x = 0; x < 96; x++) + { + float diag = A[x][x]; + __m256 line[12]; + for (int it = 0; it < 12; it++) + line[it] = _mm256_loadu_ps(A[x] + it * 8); + + for (int y = x + 1; y < 96; y++) + { + if (A[y][x] == 0.0f) + continue; + float r = -A[y][x] / diag; + __m256 ratio = _mm256_set1_ps(r); + for (int it = 0; it < 12; it++) + { + __m256 temp = _mm256_fmadd_ps(ratio, line[it], _mm256_loadu_ps(A[y] + it * 8)); // A[y] += A[x] * ratio + _mm256_storeu_ps(A[y] + it * 8, temp); + } + A[y][x] = r; + } + } + + + // inv diagonal + float diagonal[96] = {}; + for (int y = 0; y < 96; y++) + { + diagonal[95 - y] = A[y][y]; + A[y][y] = 1.0f; + for (int x = y + 1; x < Math::Min(96, y + 9); x++) + { + A[y][x] = 0.0f; + } + } + for (int it = 0; it < 12; it++) + { + __m256 temp = _mm256_div_ps(_mm256_set1_ps(1.0f), _mm256_loadu_ps(diagonal + it * 8)); + _mm256_storeu_ps(diagonal + it * 8, temp); + } + + int off = block * triSz; + // output diagonal + for (int it = 0; it < 12; it++) + { + int lc = 96 - it * 8; + __m256 acc = _mm256_setzero_ps(); + for (int l = 0; l < lc; l++) + { + __m256 a = _mm256_loadu_ps(A[95 - l] + it * 8); + a = _mm256_mul_ps(a, a); + __m256 r = _mm256_set1_ps(diagonal[l]); + acc = _mm256_fmadd_ps(r, a, acc); + } + _mm256_storeu_ps(&m_invSymR[off + it * 8], acc); + } + off += 12 * 8; + for (int it = 0; it < 12; it++) + { + int xBg = it * 8; + for (int scan = xBg + 1; scan <= (96 - 8); scan++) + { + int lc = 96 - scan; + __m256 acc = _mm256_setzero_ps(); + for (int l = 0; l < lc; l++) + { + __m256 a = _mm256_loadu_ps(A[95 - l] + xBg); + __m256 b = _mm256_loadu_ps(A[95 - l] + scan); + a = _mm256_mul_ps(a, b); + __m256 r = _mm256_set1_ps(diagonal[l]); + acc = _mm256_fmadd_ps(r, a, acc); + } + _mm256_storeu_ps(&m_invSymR[off], acc); + off += 8; + } + } +#else + //FIXME! + float diagonal[96] = {}; + int off = block * triSz; +#endif + for (int it = 0; it < 12; it++) + { + for (int lane = 0; lane < 7; lane++) + { + int xBg = it * 8 + lane; + for (int h = (96 - 7 + lane); h < 96; h++) + { + int lc = 96 - h; + float acc = 0.f; + for (int l = 0; l < lc; l++) + { + float a = A[95 - l][xBg]; + float b = A[95 - l][h]; + float r = diagonal[l]; + acc += a * b * r; + } + m_invSymR[off] = acc; + off++; + } + } + } + + //if (block == pt) + //{ + // std::ofstream ofs("c:\\test\\MRF3.txt"); + // float B[96][96] = {}; + // int Noff = block * triSz; + // for (int i = 0; i < 96; i++) + // { + // B[i][i] = m_invSymR[i + Noff]; + // } + // Noff += 96; + // // body + // for (int it = 0; it < 12; it++) + // { + // int xBg = it * 8; + // for (int scan = xBg + 1; scan <= (96 - 8); scan++) + // { + // for (int l = 0; l < 8; l++) + // { + // float value = m_invSymR[Noff + l]; + // B[scan + l][xBg + l] = value; + // } + // Noff += 8; + // } + // } + // // remainder + // for (int it = 0; it < 12; it++) + // { + // for (int lane = 0; lane < 7; lane++) + // { + // int xBg = it * 8 + lane; + // for (int h = (96 - 7 + lane); h < 96; h++) + // { + // float value = m_invSymR[Noff]; + // Noff++; + // B[h][xBg] = value; + // } + // } + // } + // + // for (int y = 0; y < 96; y++) + // { + // for (int x = 0; x < 96; x++) + // { + // ofs << B[y][x] << " "; + // } + // ofs << std::endl; + // } + //} + } +} + +void SeSchwarzPreconditioner::BuildResidualHierarchy(const SeVec3fSimd* m_cgResidual) +{ + int nblock = (m_numVerts + 31) / 32; + + OMP_PARALLEL + { + std::vector> diagTable(m_numLevel); + +#pragma omp for + + for (int bid = 0; bid < nblock; ++bid) + { + for (int lane = 0; lane < 32; lane++) + { + int vid = lane + bid * 32; + if (vid >= m_numVerts) + break; + unsigned int vidOrig = m_MapperSortedGetOriginal[vid]; + SeVec3fSimd r = m_cgResidual[vidOrig]; + m_mappedR[vid] = r; + if (1 < m_numLevel) + { + int next = m_goingNext[vid]; + m_mappedR[next] += r; + } + } + } + } + + if (m_numLevel > 2) + { + int bg = m_levelSize[1].y; + int ed = m_levelSize[2].y; + for (int vid = bg; vid < ed; vid++) + { + SeVec3fSimd r = m_mappedR[vid]; + int nx = vid; + for (int lv = 2; lv < m_numLevel; lv++) + { + nx = m_goingNext[nx]; + m_mappedR[nx] += r; + } + } + } + + //printf("level %d\n", m_nLevel); + + //for (int l = 0; l < m_nLevel + 1; l++) + // printf("%d ",m_levelSize[l].y); + //printf("\n"); +} + +void SeSchwarzPreconditioner::SchwarzLocalXSym() +{ + constexpr int blockDim = 32; + + const int activeblockNum = m_totalNumberClusters / blockDim; + + const int triSz = (1 + 96) * 96 / 2 + 16 * 3; + + OMP_PARALLEL_FOR + + for (int blockIdx = 0; blockIdx < activeblockNum; ++blockIdx) + { + float cacheRhs[96]; + float cacheOut[96]; + for (int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + int vid = laneIdx + blockIdx * blockDim; + auto rhs = m_mappedR[vid]; + cacheRhs[laneIdx * 3 + 0] = rhs.x; + cacheRhs[laneIdx * 3 + 1] = rhs.y; + cacheRhs[laneIdx * 3 + 2] = rhs.z; + } +#ifdef WIN32 + // ------------------------------------ + // diagnal + int off = blockIdx * triSz; + for (int it = 0; it < 12; it++) + { + __m256 diag = _mm256_loadu_ps(&m_invSymR[off + it * 8]); + __m256 diagRhs = _mm256_loadu_ps(&cacheRhs[it * 8]); + diag = _mm256_mul_ps(diag, diagRhs); + _mm256_storeu_ps(&cacheOut[it * 8], diag); + } + off += 8 * 12; + for (int it = 0; it < 11; it++) + { + int xBg = it * 8; + __m256 sigularRhs = _mm256_loadu_ps(&cacheRhs[xBg]); + __m256 sigularResult = _mm256_setzero_ps(); + for (int scan = xBg + 1; scan <= (96 - 8); scan++) + { + __m256 mtx = _mm256_loadu_ps(&m_invSymR[off]); + off += 8; + + __m256 scanRhs = _mm256_loadu_ps(&cacheRhs[scan]); + sigularResult = _mm256_fmadd_ps(mtx, scanRhs, sigularResult); + + __m256 scanResult = _mm256_loadu_ps(&cacheOut[scan]); + scanResult = _mm256_fmadd_ps(mtx, sigularRhs, scanResult); + _mm256_storeu_ps(&cacheOut[scan], scanResult); + } + + __m256 sigularResultOrg = _mm256_loadu_ps(&cacheOut[xBg]); + sigularResult = _mm256_add_ps(sigularResultOrg, sigularResult); + _mm256_storeu_ps(&cacheOut[xBg], sigularResult); + } +#else + //FIXME! + int off = blockIdx * triSz; +#endif + + for (int it = 0; it < 12; it++) + { + for (int lane = 0; lane < 7; lane++) + { + int xBg = it * 8 + lane; + float sigularRhs = cacheRhs[xBg]; + float singularResult = 0.0f; + for (int h = (96 - 7 + lane); h < 96; h++) + { + float value = m_invSymR[off]; + off++; + if (value) + { + singularResult += value * cacheRhs[h]; + cacheOut[h] += value * sigularRhs; + } + } + cacheOut[xBg] += singularResult; + } + } + + + for (int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + int vid = laneIdx + blockIdx * blockDim; + + SeVec3fSimd out(0.0f); + out.x = cacheOut[laneIdx * 3 + 0]; + out.y = cacheOut[laneIdx * 3 + 1]; + out.z = cacheOut[laneIdx * 3 + 2]; + + m_mappedZ[vid] = out; + } + + } +} + +void SeSchwarzPreconditioner::CollectFinalZ(SeVec3fSimd* m_cgZ) +{ + OMP_PARALLEL_FOR + + for (int vid = 0; vid < m_numVerts; ++vid) + { + unsigned int mappedIndex = m_MapperSortedGetOriginal[vid]; + + auto z = m_mappedZ[vid]; + + Int4 table = m_coarseTables[vid]; + + for (int l = 1; l < Math::Min(m_numLevel, 4); l++) + { + int now = table[l - 1]; + + z += m_mappedZ[now]; + } + + m_cgZ[mappedIndex] = z; + } +} diff --git a/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SeSchwarzPreconditioner.h b/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SeSchwarzPreconditioner.h new file mode 100644 index 0000000..01f7adf --- /dev/null +++ b/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SeSchwarzPreconditioner.h @@ -0,0 +1,180 @@ +///////////////////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2002 - 2022, +// All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The names of its contributors may not be used to endorse or promote +// products derived from this software without specific prior written +// permission. +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#pragma once + +#include "SeCsr.h" +#include "SeMorton.h" +#include "SeAabbSimd.h" +#include "SeCollisionElements.h" + + +SE_NAMESPACE_BEGIN + +class SeSchwarzPreconditioner +{ + +public: + + //==== input data + + const SeVec3fSimd* m_positions = nullptr; // the mesh nodal positions + + //==== input mesh topology data for collision computation + + const Int4* m_edges = nullptr; // indices of the two adjacent vertices and the two opposite vertices of edges + const Int4* m_faces = nullptr; // indices of the three adjacent vertices of faces, the fourth is useless + + const SeCsr* m_neighbours = nullptr; // the adjacent information of vertices stored in a csr format + +public: + + //==== call before time integration once a frame + void AllocatePrecoditioner(int numVerts, int numEdges, int numFaces); + + //==== call before PCG iteration loop + void PreparePreconditioner(const SeMatrix3f* diagonal, const SeMatrix3f* csrOffDiagonals, const int* csrRanges, + const EfSet* efSets, const EeSet* eeSets, const VfSet* vfSets, unsigned int* efCounts, unsigned int* eeCounts, unsigned int* vfCounts); + + //==== call during PCG iterations + void Preconditioning(SeVec3fSimd* z, const SeVec3fSimd* residual, int dim); + +private: + + int m_numVerts = 0; + int m_numEdges = 0; + int m_numFaces = 0; + + int m_frameIndex = 0; + + int m_totalSz = 0; + int m_numLevel = 0; + + int m_totalNumberClusters; + + + SeAabb m_aabb; + + + SeArray2D m_hessianMapped; + std::vector m_mappedNeighborsNum; + std::vector m_mappedNeighborsNumRemain; + SeArray2D m_mappedNeighbors; + SeArray2D m_mappedNeighborsRemain; + + SeArray2D m_CoarseSpaceTables; + std::vector m_prefixOrignal; + std::vector m_fineConnectMask; + std::vector m_nextConnectMsk; + std::vector m_nextPrefix; + + std::vector m_mappedPos; + + std::vector m_coarseTables; + std::vector m_goingNext; + std::vector m_denseLevel; + std::vector m_levelSize; // .x = current level size .y = (prefixed) current level begin index + + std::vector m_mappedR; + std::vector m_mappedZ; + std::vector m_MapperSortedGetOriginal; // sorted by morton + std::vector m_mapperOriginalGetSorted; + std::vector m_mortonCode; + + SeArray2D m_hessian32; + std::vector m_additionalHessian32; + std::vector m_invSymR; + + + int m_stencilNum; + int m_maxStencilNum; + std::vector m_stencils; + std::vector m_stencilIndexMapped; + +private: + + void ComputeLevelNums(int bankSize); + + void DoAlllocation(); + + void ComputeTotalAABB(); + + void ComputeAABB(); + + void SpaceSort(); + + void FillSortingData(); + + void DoingSort(); + + void ComputeInverseMapper(); + + void MapHessianTable(); + + void PrepareCollisionStencils(const EfSet* efSets, const EeSet* eeSets, const VfSet* vfSets, unsigned int* efCounts, unsigned int* eeCounts, unsigned int* vfCounts); + + void MapCollisionStencilIndices(); + + void ReorderRealtime(); + + + + void BuildConnectMaskL0(); + + void BuildCollisionConnection(unsigned int* pConnect, const int* pCoarseTable); + + void PreparePrefixSumL0(); + + void BuildLevel1(); + + void BuildConnectMaskLx(int level); + + void NextLevelCluster(int level); + + void PrefixSumLx(int level); + + void ComputeNextLevel(int level); + + void TotalNodes(); + + void AggregationKernel(); + + void AdditionalSchwarzHessian2(SeMatrix3f hessian, std::vector& pAdditionalHessian, SeArray2D& pDenseHessian, int v1, int v2, const std::vector& pGoingNext, int nLevel, int vNum); + + void PrepareCollisionHessian(); + + void PrepareHessian(const SeMatrix3f* diagonal, const SeMatrix3f* csrOffDiagonals, const int* csrRanges); + + void LDLtInverse512(); + + void BuildResidualHierarchy(const SeVec3fSimd* m_cgResidual); + + void SchwarzLocalXSym(); + + void CollectFinalZ(SeVec3fSimd* m_cgZ); +}; + +SE_NAMESPACE_END \ No newline at end of file diff --git a/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SeSchwarzPreconditionerPreviousVersion.h b/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SeSchwarzPreconditionerPreviousVersion.h new file mode 100644 index 0000000..e45950d --- /dev/null +++ b/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SeSchwarzPreconditionerPreviousVersion.h @@ -0,0 +1,2014 @@ +///////////////////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2002 - 2022, +// All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The names of its contributors may not be used to endorse or promote +// products derived from this software without specific prior written +// permission. +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#pragma once + +#include "SeCsr.h" +#include "SeMorton.h" +#include "SeAabbSimd.h" +#include "SeIntrinsic.h" +#include "SeCollisionElements.h" + +#include + +SE_NAMESPACE_BEGIN + +class SeSchwarzPreconditionerPreviousVersion +{ + +public: + + //==== input data + + const SeVec3fSimd* m_positions = nullptr; // the mesh nodal positions + + //==== input mesh topology data for collision computation + + const Int4* m_edges = nullptr; // indices of the two adjacent vertices and the two opposite vertices of edges + const Int4* m_faces = nullptr; // indices of the three adjacent vertices of faces, the fourth is useless + + const SeCsr* m_neighbours = nullptr; // the adjacent information of vertices stored in a csr format + +public: + + //==== call before time integration once a frame + void AllocatePrecoditioner(int numVerts, int numEdges, int numFaces) + { + m_numVerts = numVerts; + m_numEdges = numEdges; + m_numFaces = numFaces; + + if (m_frameIndex == 0) + { + DoAlllocation(); + } + + if (m_frameIndex % 17 != 0) { return; } + + //==== + + ComputeAABB(); + + FillSortingData(); + + DoingSort(); + + ComputeInverseMapper(); + + MapHessianTable(); + + m_frameIndex++; + } + + //==== call before PCG iteration loop + void PreparePreconditioner(const SeMatrix3f* diagonal, const SeMatrix3f* csrOffDiagonals, const int* csrRanges, + const EfSet* efSets, const EeSet* eeSets, const VfSet* vfSets, unsigned int* efCounts, unsigned int* eeCounts, unsigned int* vfCounts) + { + m_mappedNeighborsRemain = m_mappedNeighbors; + m_mappedNeighborsNumRemain = m_mappedNeighborsNum; + + //==== before reordering + + PrepareCollisionStencils(efSets, eeSets, vfSets, efCounts, eeCounts, vfCounts); + + //==== + + ReorderRealtime(); + + //==== + + const int blockNum = m_totalSz / 32; + Utility::MemsetZero(m_invSymR); + Utility::MemsetZero(m_hessian32.Ptr(), m_hessian32.Size()); + Utility::MemsetZero(m_additionalHessian32); + + //==== + + PrepareCollisionHessian(); + + PrepareHessian(diagonal, csrOffDiagonals, csrRanges); + + LDLtInverse512(); + } + + //==== call during PCG iterations + void Preconditioning(SeVec3fSimd* z, const SeVec3fSimd* residual, int dim) + { + Utility::MemsetZero(m_mappedZ); + Utility::MemsetZero(m_mappedR); + + BuildResidualHierarchy(residual); + + SchwarzLocalXSym(); + + CollectFinalZ(z); + } + +private: + + int m_numVerts = 0; + int m_numEdges = 0; + int m_numFaces = 0; + + int m_frameIndex = 0; + + int m_totalSz = 0; + int m_numLevel = 0; + + int m_totalNumberClusters; + + + SeAabb m_aabb; + + + SeArray2D m_hessianMapped; + std::vector m_mappedNeighborsNum; + std::vector m_mappedNeighborsNumRemain; + SeArray2D m_mappedNeighbors; + SeArray2D m_mappedNeighborsRemain; + + SeArray2D m_CoarseSpaceTables; + std::vector m_prefixOrignal; + std::vector m_fineConnectMask; + std::vector m_nextConnectMsk; + std::vector m_nextPrefix; + + std::vector m_mappedPos; + + std::vector m_coarseTables; + std::vector m_goingNext; + std::vector m_denseLevel; + std::vector m_levelSize; // .x = current level size .y = (prefixed) current level begin index + + std::vector m_mappedR; + std::vector m_mappedZ; + std::vector m_MapperSortedGetOriginal; // sorted by morton + std::vector m_mapperOriginalGetSorted; + std::vector m_mortonCode; + + SeArray2D m_hessian32; + std::vector m_additionalHessian32; + std::vector m_invSymR; + + int m_stencilNum; + int m_maxStencilNum; + std::vector m_stencils; + std::vector m_stencilIndexMapped; + +private: + + void ComputeLevelNums(int bankSize) + { + constexpr float SizeRatio = 1.5f; + + //==== + + int totalSz = 0; + + int nLevel = 1; + int levelSz = (m_numVerts + bankSize - 1) / bankSize * bankSize; + totalSz += levelSz; + + while (levelSz > 32) + { + levelSz /= 32; + + nLevel++; + levelSz = (levelSz + bankSize - 1) / bankSize * bankSize; + totalSz += levelSz; + } + + m_numLevel = nLevel; + m_totalSz = totalSz * SizeRatio; + } + + void DoAlllocation() + { + constexpr int bankSize = 32; //==== size of diagonal block + + ComputeLevelNums(bankSize); + + //==== + + m_MapperSortedGetOriginal.resize(m_numVerts); + m_mapperOriginalGetSorted.resize(m_numVerts); + m_mortonCode.resize(m_numVerts); + + m_hessian32.Resize(bankSize, m_totalSz); + m_mappedZ.resize(m_totalSz); + m_mappedR.resize(m_totalSz); + + m_mappedPos.resize(m_numVerts); + + m_denseLevel.resize(m_numVerts); + m_goingNext.resize(m_numLevel * m_numVerts); + m_coarseTables.resize(m_numVerts); + + m_prefixOrignal.resize(m_numVerts); + m_fineConnectMask.resize(m_numVerts); + m_nextConnectMsk.resize(m_numVerts); + m_nextPrefix.resize(m_numVerts); + + + int triSz = (1 + 96) * 96 / 2 + 16 * 3; + const int blockNum = m_totalSz / 32; + m_invSymR.resize(triSz * blockNum); + m_additionalHessian32.resize(m_totalSz + 1); + + //==== + + m_levelSize.resize(m_numLevel + 1); + m_CoarseSpaceTables.Resize(m_numLevel, m_numVerts); + + int maxNeighbours = 0; + + for (int vId = 0; vId < m_numVerts; ++vId) + { + maxNeighbours = Math::Max(m_neighbours->Size(vId) + 1, maxNeighbours); + } + + m_mappedNeighborsNum.resize(m_numVerts); + m_mappedNeighborsNumRemain.resize(m_numVerts); + m_mappedNeighbors.Resize(maxNeighbours, m_numVerts); + m_mappedNeighborsRemain.Resize(maxNeighbours, m_numVerts); + } + + void ComputeAABB() + { + m_aabb = SeAabb(); + + for (int tid = 0; tid < m_numVerts; ++tid) + { + m_aabb += m_positions[tid]; + } + } + + void FillSortingData() + { + OMP_PARALLEL_FOR + + for (int tid = 0; tid < m_numVerts; ++tid) + { + SeVec3fSimd temp = (m_positions[tid] - m_aabb.Lower) / m_aabb.Extent(); + + SeMorton64 mt; + + mt.Encode(temp.x, temp.y, temp.z); + + m_mortonCode[tid] = mt; + + m_MapperSortedGetOriginal[tid] = tid; + } + } + + void DoingSort() + { + //==== sort m_MapperSortedGetOriginal by m_mortonCode1; + + std::sort(m_MapperSortedGetOriginal.begin(), m_MapperSortedGetOriginal.end(), [&](int i, int j) { return m_mortonCode[i] < m_mortonCode[j]; }); + } + + void ComputeInverseMapper() + { + OMP_PARALLEL_FOR + + for (int vid = 0; vid < m_numVerts; ++vid) + { + int originalIndex = m_MapperSortedGetOriginal[vid]; + + m_mapperOriginalGetSorted[originalIndex] = vid; + } + } + + void MapHessianTable() + { + OMP_PARALLEL_FOR + + for (int vid = 0; vid < m_numVerts; ++vid) + { + const auto originalIndex = m_MapperSortedGetOriginal[vid]; + + m_mappedPos[vid] = m_positions[originalIndex]; + + //==== + + int numNeighbors = m_neighbours->Size(originalIndex); + + const int* neighbors = m_neighbours->IdxPtr(originalIndex); + + + m_mappedNeighborsNum[vid] = numNeighbors + 1; // include self + + m_mappedNeighbors[0][vid] = vid; // include self + + for (int k = 1; k < numNeighbors + 1; ++k) + { + unsigned int originalNeighbors = neighbors[k - 1];//m_neighbors[k][originalIndex]; + + m_mappedNeighbors[k][vid] = m_mapperOriginalGetSorted[originalNeighbors]; + } + } + } + + void MapCollisionStencilIndices() + { + m_stencilIndexMapped.resize(m_stencilNum); + + OMP_PARALLEL_FOR + + for (int i = 0; i < m_stencilNum; i++) + { + const auto& s = m_stencils[i]; + + for (int vi = 0; vi < s.verextNumPerStencil; vi++) + { + m_stencilIndexMapped[i][vi] = m_mapperOriginalGetSorted[s.index[vi]]; + } + } + } + + void PrepareCollisionStencils(const EfSet* efSets, const EeSet* eeSets, const VfSet* vfSets, unsigned int* efCounts, unsigned int* eeCounts, unsigned int* vfCounts) + { + int efNum = efCounts[m_numEdges]; + int eeNum = eeCounts[m_numEdges]; + int vfNum = vfCounts[m_numVerts]; + + int totalStencilNum = efNum + eeNum + vfNum; + + if (totalStencilNum > m_maxStencilNum) + { + totalStencilNum = m_maxStencilNum; + printf("stencil size %d exceed max stencils num %d\n", totalStencilNum, m_maxStencilNum); + } + + m_stencilNum = 0; + + OMP_PARALLEL_FOR + + for (int i = 0; i < totalStencilNum; i++) + { + Stencil s; + + if (i < efNum) //process ef + { + const auto& pair = efSets[i]; + + if (pair.m_eId < 0 || pair.m_fId < 0) { continue; } + + const auto& edge = m_edges[pair.m_eId]; + const auto& face = m_faces[pair.m_fId]; + + s.verextNumPerStencil = 5; + s.vertexNumOfFirstPrimitive = 2; + + s.index[0] = edge[0]; + s.index[1] = edge[1]; + s.index[2] = face[0]; + s.index[3] = face[1]; + s.index[4] = face[2]; + + s.weight[0] = pair.m_bary[0]; + s.weight[1] = 1.f - pair.m_bary[0]; + s.weight[2] = -pair.m_bary[1]; + s.weight[3] = -pair.m_bary[2]; + s.weight[4] = -(1.f - pair.m_bary[1] - pair.m_bary[2]); + + s.direction = pair.m_normal; + + s.stiff = pair.stiff; + + } + else if (i < efNum + eeNum) // process ee + { + const auto& pair = eeSets[i]; + + if (pair.m_eId1 < 0 || pair.m_eId0 < 0) { continue; } + + s.verextNumPerStencil = 4; + s.vertexNumOfFirstPrimitive = 2; + + const auto& edge0 = m_edges[pair.m_eId0]; + const auto& edge1 = m_edges[pair.m_eId1]; + + s.index[0] = edge0[0]; + s.index[1] = edge0[1]; + s.index[2] = edge1[0]; + s.index[3] = edge1[1]; + + s.weight[0] = pair.m_bary[0]; + s.weight[1] = 1.f - pair.m_bary[0]; + s.weight[2] = -pair.m_bary[1]; + s.weight[3] = -(1.f - pair.m_bary[1]); + + s.direction = pair.m_normal; + + s.stiff = pair.stiff; + } + else if (i < totalStencilNum) //process vf + { + const auto& pair = vfSets[i]; + + if (pair.m_vId < 0 || pair.m_fId < 0) { continue; } + + s.verextNumPerStencil = 4; + s.vertexNumOfFirstPrimitive = 3; + + const auto& face = m_faces[pair.m_fId]; + + s.index[0] = face[0]; + s.index[1] = face[1]; + s.index[2] = face[2]; + s.index[3] = pair.m_vId; + + s.weight[0] = -pair.m_bary[0]; + s.weight[1] = -pair.m_bary[1]; + s.weight[2] = -(1.f - pair.m_bary[2]); + s.weight[3] = 1.f; + + s.direction = pair.m_normal; + + s.stiff = pair.stiff; + } + + int stencilNum = Intrinsic::AtomicAdd(&m_stencilNum, 1); + + m_stencils[stencilNum] = s; + } + + MapCollisionStencilIndices(); //1 + } + + void ReorderRealtime() + { + Utility::MemsetZero(m_levelSize); + + BuildConnectMaskL0(); + + BuildCollisionConnection(m_fineConnectMask.data(), nullptr); //2 + + PreparePrefixSumL0(); + + BuildLevel1(); + + for (int level = 1; level < m_numLevel; level++) + { + Utility::MemsetZero(m_nextConnectMsk); + + BuildConnectMaskLx(level); + + BuildCollisionConnection(m_nextConnectMsk.data(), m_CoarseSpaceTables[level - 1]); + + NextLevelCluster(level); + + PrefixSumLx(level); + + ComputeNextLevel(level); + } + + TotalNodes(); + + AggregationKernel(); + } + + void BuildConnectMaskL0() + { + const unsigned int warpDim = 32; + + int nWarps = (m_numVerts + warpDim - 1) / warpDim; + + OMP_PARALLEL_FOR + + for (int warpIdx = 0; warpIdx < nWarps; ++warpIdx) + { + for (int laneIdx = 0; laneIdx < warpDim; ++laneIdx) + { + int vId = warpIdx * warpDim + laneIdx; + + if (vId >= m_numVerts) + { + break; + } + + int numNeighbors = m_mappedNeighborsNumRemain[vId]; + + unsigned int connetMsk = 1U << laneIdx; // include self + + int nk = 0; + + for (int k = 0; k < numNeighbors; k++) + { + int vIdConnected = m_mappedNeighborsRemain[k][vId]; + + int warpIdxConnected = vIdConnected / warpDim; + + if (warpIdx == warpIdxConnected) // in the same warp + { + unsigned int laneIdxConnected = vIdConnected % warpDim; + + connetMsk |= (1U << laneIdxConnected); + } + else + { + m_mappedNeighborsRemain[nk][vId] = vIdConnected; + nk++; + } + } + + m_mappedNeighborsNumRemain[vId] = nk; + + // distance cullling + //const SeVec3fSimd& pos = m_mappedPos[vId]; + //for (int it = 0; it < Intrinsic::Popcount32(activeMsk); it++) + //{ + // const float th = AABB_TH * AABB_TH; + // CuFloat3 po(0.0f); + // po.x = Intrinsic::Shuffle(activeMsk, pos.x, it); + // po.y = Intrinsic::Shuffle(activeMsk, pos.y, it); + // po.z = Intrinsic::Shuffle(activeMsk, pos.z, it); + // if (Math::SqrLength(po - pos) < th) + // { + // connetMsk |= (1U << it); + // } + //} + + m_fineConnectMask[vId] = connetMsk; + } + } + } + + void BuildCollisionConnection(unsigned int* pConnect, const int* pCoarseTable) + { + const int bank = 32; + + OMP_PARALLEL_FOR + + for (int i = 0; i < m_stencilNum; i++) + { + const auto& s = m_stencils[i]; + Int5 idx = m_stencilIndexMapped[i]; + unsigned int connMsk[5] = {}; + + if (pCoarseTable) + { + for (int it = 0; it < s.verextNumPerStencil; it++) + { + idx[it] = pCoarseTable[idx[it]]; + } + } + + for (int ita = 0; ita < s.verextNumPerStencil; ita++) + { + for (int itb = ita + 1; itb < s.verextNumPerStencil; itb++) + { + unsigned int myID = idx[ita]; + unsigned int otID = idx[itb]; + if (myID == otID) // redundant ? + { + continue; + } + if ((myID / bank == otID / bank)) + { + if (ita < s.vertexNumOfFirstPrimitive && itb >= s.vertexNumOfFirstPrimitive) + { + connMsk[ita] |= (1U << (otID % bank)); + connMsk[itb] |= (1U << (myID % bank)); + } + } + } + } + + for (int it = 0; it < s.verextNumPerStencil; it++) + { + if (connMsk[it]) + { + Intrinsic::AtomicOr(&pConnect[idx[it]], connMsk[it]); + } + } + } + } + + void PreparePrefixSumL0() + { + int warpDim = 32; + + int nWarps = (m_numVerts + warpDim - 1) / warpDim; + + OMP_PARALLEL_FOR + + for (int warpIdx = 0; warpIdx < nWarps; ++warpIdx) + { + unsigned int cacheMask[32]; + + for (unsigned int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + int vId = laneIdx + warpIdx * warpDim; + + if (vId >= m_numVerts) { break; } + + cacheMask[laneIdx] = m_fineConnectMask[vId]; + } + + Intrinsic::SyncWarp(); + + int prefixSum = 0; + + for (unsigned int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + int vid = laneIdx + warpIdx * warpDim; + + if (vid >= m_numVerts) { break; } + + unsigned int connetMsk = cacheMask[laneIdx]; + + unsigned int visited = (1U << laneIdx); + + while (connetMsk != -1) + { + unsigned int todo = visited ^ connetMsk; + + if (!todo) + { + break; + } + + unsigned int nextVisit = Intrinsic::Ffs(todo) - 1; + + visited |= (1U << nextVisit); + + connetMsk |= cacheMask[nextVisit]; + } + + m_fineConnectMask[vid] = connetMsk; + + unsigned int electedPrefix = Intrinsic::Popcount32(connetMsk & Intrinsic::LanemaskLt(laneIdx)); + + if (electedPrefix == 0) + { + prefixSum++; + } + } + + m_prefixOrignal[warpIdx] = prefixSum; + } + } + + void BuildLevel1() + { + constexpr unsigned int blockDim = 1024; + + int nBlocks = (m_numVerts + blockDim - 1) / blockDim; + + OMP_PARALLEL_FOR + + for (int blockIdx = 0; blockIdx < nBlocks; ++blockIdx) + { + unsigned int warpPrefix[32]; + + unsigned int theBlockGlobalPrefix = 0; + + unsigned int globalWarpOffset = blockIdx * 32; + + for (int warpIdx = 0; warpIdx < blockIdx; ++warpIdx) + { + for (unsigned int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + unsigned int threadIdx = laneIdx + warpIdx * 32; + + theBlockGlobalPrefix += m_prefixOrignal[threadIdx]; + } + } + + for (int warpIdx = 0; warpIdx < 32; ++warpIdx) + { + warpPrefix[warpIdx] = m_prefixOrignal[globalWarpOffset + warpIdx]; + } + + Intrinsic::SyncBlock(); + + for (unsigned int warpIdx = 0; warpIdx < 32; ++warpIdx) + { + unsigned int theWarpGlobalPrefix = theBlockGlobalPrefix; + + for (unsigned int prevWarpIdx = 0; prevWarpIdx < warpIdx; ++prevWarpIdx) + { + theWarpGlobalPrefix += warpPrefix[prevWarpIdx]; + } + + + unsigned int electedMask = 0; + + for (unsigned int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + unsigned int threadIdx = laneIdx + warpIdx * 32; + + int vid = threadIdx + blockIdx * blockDim; + + if (vid >= m_numVerts) { break; } + + + if (vid == m_numVerts - 1) + { + m_levelSize[1].x = theWarpGlobalPrefix + warpPrefix[warpIdx]; + m_levelSize[1].y = (m_numVerts + 31) / 32 * 32; + } + + unsigned int connMsk = m_fineConnectMask[vid]; + + unsigned int electedPrefix = Intrinsic::Popcount32(connMsk & Intrinsic::LanemaskLt(laneIdx)); + + if (electedPrefix == 0) + { + electedMask |= 1U << laneIdx; + } + //unsigned int electedMask = Intrinsic::BallotSync(-1, electedPrefix == 0); + } + + + unsigned int lanePrefix[32]; + + for (unsigned int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + unsigned int threadIdx = laneIdx + warpIdx * 32; + + int vid = threadIdx + blockIdx * blockDim; + + if (vid >= m_numVerts) { break; } + + + lanePrefix[laneIdx] = Intrinsic::Popcount32(electedMask & Intrinsic::LanemaskLt(laneIdx)); + + lanePrefix[laneIdx] += theWarpGlobalPrefix; + } + + + for (unsigned int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + unsigned int threadIdx = laneIdx + warpIdx * 32; + + int vid = threadIdx + blockIdx * blockDim; + + if (vid >= m_numVerts) { break; } + + + unsigned int connMsk = m_fineConnectMask[vid]; + + unsigned int elected_lane = Intrinsic::Ffs(connMsk) - 1; + + unsigned int theLanePrefix = lanePrefix[elected_lane]; + + m_CoarseSpaceTables[0][vid] = theLanePrefix; + + m_goingNext[vid] = theLanePrefix + (m_numVerts + 31) / 32 * 32; + } + } + } + } + + void BuildConnectMaskLx(int level) + { + constexpr unsigned int warpDim = 32; + + int nWarps = (m_numVerts + warpDim - 1) / warpDim; + + const unsigned int bank = 32; + + OMP_PARALLEL_FOR + + for (int warpIdx = 0; warpIdx < nWarps; ++warpIdx) + { + unsigned int prefixMsk[32]; + unsigned int connectMsk[32]; + + for (int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + int vid = laneIdx + warpIdx * 32; + + if (vid >= m_numVerts) { break; } + + prefixMsk[laneIdx] = m_fineConnectMask[vid]; + + unsigned int coarseVid = m_CoarseSpaceTables[level - 1][vid]; + + connectMsk[laneIdx] = 0;// 1U << (coarseVid % bank); //==== include self + + unsigned int kn = m_mappedNeighborsNumRemain[vid]; + + unsigned int nk = 0; + + for (unsigned int k = 0; k < kn; k++) + { + unsigned int connect = m_mappedNeighborsRemain[k][vid]; + + unsigned int coarseConnect = m_CoarseSpaceTables[level - 1][connect]; + + if (coarseVid / bank == coarseConnect / bank) + { + unsigned int off = coarseConnect % bank; + + connectMsk[laneIdx] |= (1U << off); + } + else + { + m_mappedNeighborsRemain[nk][vid] = connect; + nk++; + } + } + + m_mappedNeighborsNumRemain[vid] = nk; + } + + bool isFullWarp = (prefixMsk[0] == -1); + + if (isFullWarp) + { + unsigned int connectMskFull = 0; + + for (int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + int vid = laneIdx + warpIdx * 32; + + if (vid >= m_numVerts) { break; } + + connectMskFull |= connectMsk[laneIdx]; + } + for (int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + int vid = laneIdx + warpIdx * 32; + + if (vid >= m_numVerts) { break; } + + connectMsk[laneIdx] = connectMskFull; + } + } + else + { + unsigned int cacheMsk[32]; + + for (int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + cacheMsk[laneIdx] = 0; + } + + unsigned int electedLane[32]; + + for (int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + int vid = laneIdx + warpIdx * 32; + + if (vid >= m_numVerts) { break; } + + electedLane[laneIdx] = Intrinsic::Ffs(prefixMsk[laneIdx]) - 1; + + if (connectMsk[laneIdx]) + { + //cacheMsk[electedLane[laneIdx]] |= connectMsk[laneIdx]; + Intrinsic::AtomicOr(&cacheMsk[electedLane[laneIdx]], connectMsk[laneIdx]); + } + } + + for (unsigned int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + int vid = laneIdx + warpIdx * 32; + + if (vid >= m_numVerts) { break; } + + connectMsk[laneIdx] = cacheMsk[electedLane[laneIdx]]; + } + } + + for (unsigned int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + int vid = laneIdx + warpIdx * 32; + + if (vid >= m_numVerts) { break; } + + unsigned int coarseVid = m_CoarseSpaceTables[level - 1][vid]; + + unsigned int electedPrefix = Intrinsic::Popcount32(prefixMsk[laneIdx] & Intrinsic::LanemaskLt(laneIdx)); + + if (connectMsk[laneIdx] && electedPrefix == 0) + { + Intrinsic::AtomicOr(m_nextConnectMsk.data() + coarseVid, connectMsk[laneIdx]); + } + } + } + } + + void NextLevelCluster(int level) + { + const int levelNum = m_levelSize[level].x; + + //printf("nextLevelClusters %d %d\n", levelNum, m_levelSize[level].y); + + const int warpDim = 32; + + int numWarps = (m_numVerts + warpDim - 1) / warpDim; + + int maxWarpIdx = (levelNum + warpDim - 1) / warpDim; + + OMP_PARALLEL_FOR + + for (int warpIdx = 0; warpIdx < numWarps; ++warpIdx) + { + unsigned int cachedMsk[32]; + + for (unsigned int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + int vid = laneIdx + warpIdx * 32; + + if (vid >= (levelNum + 31) / 32 * 32) + { + break;// early culling + } + + unsigned int connectMsk = (1U << laneIdx); + + if (vid < levelNum) + { + connectMsk |= m_nextConnectMsk[vid];// connect to current + } + + cachedMsk[laneIdx] = connectMsk; + + if (vid >= levelNum) + { + break; + } + } + + Intrinsic::SyncWarp(); + + unsigned int prefixSum = 0; + + for (unsigned int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + int vid = laneIdx + warpIdx * 32; + + if (vid >= levelNum || vid >= (levelNum + 31) / 32 * 32) + { + break;// early culling + } + + unsigned int connectMsk = cachedMsk[laneIdx]; + + unsigned int visited = (1U << laneIdx); + + while (true) + { + unsigned int todo = visited ^ connectMsk; + + if (!todo) + { + break; + } + + unsigned int nextVisist = Intrinsic::Ffs(todo) - 1; + + visited |= (1U << nextVisist); + + connectMsk |= cachedMsk[nextVisist]; + } + + m_nextConnectMsk[vid] = connectMsk; + + unsigned int electedPrefix = Intrinsic::Popcount32(connectMsk & Intrinsic::LanemaskLt(laneIdx)); + + if (electedPrefix == 0) + { + prefixSum++; + } + } + + if (warpIdx < maxWarpIdx) + { + m_nextPrefix[warpIdx] = prefixSum; + } + } + } + + void PrefixSumLx(int level) + { + const int levelNum = m_levelSize[level].x; + + const int levelBegin = m_levelSize[level].y; + + unsigned int blockDim = 1024; + + int nBlocks = (m_numVerts + blockDim - 1) / blockDim; + + OMP_PARALLEL_FOR + + for (int blockIdx = 0; blockIdx < nBlocks; ++blockIdx) + { + unsigned int warpPrefix[32]; + + unsigned int theBlockPrefix = 0; + + unsigned int globalWarpOffset = blockIdx * 32; + + for (int warpIdx = 0; warpIdx < blockIdx; ++warpIdx) + { + for (unsigned int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + unsigned int threadIdx = laneIdx + warpIdx * 32; + + unsigned int vid = threadIdx + blockIdx * blockDim; + + if (vid >= (levelNum + blockDim - 1) / blockDim * blockDim) + { + break; + } + + theBlockPrefix += m_nextPrefix[threadIdx]; + } + } + + for (int warpIdx = 0; warpIdx < 32; ++warpIdx) + { + warpPrefix[warpIdx] = m_nextPrefix[globalWarpOffset + warpIdx]; + } + + Intrinsic::SyncBlock(); + + for (unsigned int warpIdx = 0; warpIdx < 32; ++warpIdx) + { + unsigned int theWarpPrefix = theBlockPrefix; + + for (unsigned int prevWarpIdx = 0; prevWarpIdx < warpIdx; ++prevWarpIdx) + { + theWarpPrefix += warpPrefix[prevWarpIdx]; + } + + unsigned int electedMsk = 0; + + for (unsigned int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + unsigned int threadIdx = laneIdx + warpIdx * 32; + + int vid = threadIdx + blockIdx * blockDim; + + if (vid >= levelNum) + { + break; + } + if (vid == levelNum - 1) + { + m_levelSize[level + 1].x = theWarpPrefix + warpPrefix[warpIdx]; + m_levelSize[level + 1].y = levelBegin + (levelNum + 31) / 32 * 32; + } + + unsigned int connMsk = m_nextConnectMsk[vid]; + + unsigned int electedPrefix = Intrinsic::Popcount32(connMsk & Intrinsic::LanemaskLt(laneIdx)); + + if (electedPrefix == 0) + { + electedMsk |= (1U << laneIdx); + } + + //unsigned int electedMask = Intrinsic::BallotSync(-1, electedPrefix == 0); + } + + unsigned int lanePrefix[32]; + + for (unsigned int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + lanePrefix[laneIdx] = Intrinsic::Popcount32(electedMsk & Intrinsic::LanemaskLt(laneIdx)); + lanePrefix[laneIdx] += theWarpPrefix; + } + + for (unsigned int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + unsigned int threadIdx = laneIdx + warpIdx * 32; + + int vid = threadIdx + blockIdx * blockDim; + + if (vid >= levelNum) { break; } + + int electedLane = Intrinsic::Ffs(m_nextConnectMsk[vid]) - 1; + + unsigned int theLanePrefix = lanePrefix[electedLane]; + + m_nextConnectMsk[vid] = theLanePrefix; + + m_goingNext[vid + levelBegin] = theLanePrefix + levelBegin + (levelNum + 31) / 32 * 32; + } + } + } + } + + void ComputeNextLevel(int level) + { + OMP_PARALLEL_FOR + + for (int vid = 0; vid < m_numVerts; ++vid) + { + int next = m_CoarseSpaceTables[level - 1][vid]; + + m_CoarseSpaceTables[level][vid] = m_nextConnectMsk[next]; + } + } + + void TotalNodes() + { + m_totalNumberClusters = m_levelSize[m_numLevel].y; + } + + void AggregationKernel() + { + const int warpDim = 32; + + int numWarps = (m_numVerts + warpDim - 1) / warpDim; + + OMP_PARALLEL_FOR + + for (int warpIdx = 0; warpIdx < numWarps; ++warpIdx) + { + unsigned int firstInWarp = warpIdx * 32; + + int curr[32]; + int next[32]; + int aggLevel[32]; + + for (unsigned int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + int vid = laneIdx + warpIdx * 32; + + if (vid >= m_numVerts) { break; } + + curr[laneIdx] = vid; + + aggLevel[laneIdx] = m_numLevel - 1; + } + + Int4 coarseTable[32]; + + for (int l = 0; l < m_numLevel - 1; ++l) + { + for (unsigned int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + int vid = laneIdx + warpIdx * 32; + + if (vid >= m_numVerts) { break; } + + next[laneIdx] = m_goingNext[curr[laneIdx]]; + } + + Intrinsic::SyncWarp(); + + for (unsigned int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + int vid = laneIdx + warpIdx * 32; + + if (vid >= m_numVerts) { break; } + + if (next[laneIdx] == next[0]) + { + aggLevel[laneIdx] = Math::Min(l, aggLevel[laneIdx]); + } + + curr[laneIdx] = next[laneIdx]; + coarseTable[laneIdx][l] = next[laneIdx]; + } + } + + + for (unsigned int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + int vid = laneIdx + warpIdx * 32; + + if (vid >= m_numVerts) { break; } + + m_denseLevel[vid] = aggLevel[laneIdx]; + + m_coarseTables[vid] = coarseTable[laneIdx]; + } + } + } + + void AdditionalSchwarzHessian2(SeMatrix3f hessian, std::vector& pAdditionalHessian, SeArray2D& pDenseHessian, int v1, int v2, const std::vector& pGoingNext, int nLevel, int vNum) + { + int level = 0; + unsigned int myID = v1; + unsigned int otID = v2; + const unsigned int bank = 32; + + while (myID / bank != otID / bank && level < nLevel) + { + myID = pGoingNext[myID]; + otID = pGoingNext[otID]; + level++; + } + + if (level >= nLevel) + return; + + Intrinsic::AtomicAdd(&pDenseHessian[otID % bank][myID], hessian); + Intrinsic::AtomicAdd(&pDenseHessian[myID % bank][otID], hessian); + + if (level < nLevel - 1) + { + myID = pGoingNext[myID]; + otID = pGoingNext[otID]; + + if (myID == otID) + { + Intrinsic::AtomicAdd(&pAdditionalHessian[myID], hessian * 2.0f); + } + else + { + Intrinsic::AtomicAdd(&pAdditionalHessian[myID], hessian); + Intrinsic::AtomicAdd(&pAdditionalHessian[otID], hessian); + } + } + } + + void PrepareCollisionHessian() + { + OMP_PARALLEL_FOR + + for (int i = 0; i < m_stencilNum; i++) + { + const auto& s = m_stencils[i]; + auto idx = m_stencilIndexMapped[i]; + + Float3 d(s.direction[0], s.direction[1], s.direction[2]); + + SeMatrix3f hessian = OuterProduct(d, d * s.stiff); + + for (int it = 0; it < s.verextNumPerStencil; it++) + { + Intrinsic::AtomicAdd(&m_additionalHessian32[idx[it]], hessian * Math::Square(s.weight[it])); + } + + for (int ita = 0; ita < s.verextNumPerStencil; ita++) + { + for (int itb = ita + 1; itb < s.verextNumPerStencil; itb++) + { + AdditionalSchwarzHessian2(s.weight[ita] * s.weight[itb] * hessian, m_additionalHessian32, m_hessian32, idx[ita], idx[itb], m_goingNext, m_numLevel, m_numVerts); + } + } + } + } + + void PrepareHessian(const SeMatrix3f* diagonal, const SeMatrix3f* csrOffDiagonals, const int* csrRanges) + { + const unsigned int bank = 32; + + int nVC = (m_numVerts + 31) / 32 * 32; + + OMP_PARALLEL_FOR + + for (int vid = nVC; vid < m_totalNumberClusters; ++vid) + { + auto oldDiagonal = m_additionalHessian32[vid]; + int myID = vid; + Intrinsic::AtomicAdd(&m_hessian32[myID % bank][myID], oldDiagonal); + while (true) + { + myID = m_goingNext[myID]; + if (myID >= m_totalNumberClusters) + { + break; + } + Intrinsic::AtomicAdd(&m_hessian32[myID % bank][myID], oldDiagonal); + } + } + + int numWarps = (nVC + 31) / 32; + + OMP_PARALLEL_FOR + + for (int warpIdx = 0; warpIdx < numWarps; ++warpIdx) + { + + /*__shared__*/ SeMatrix3f AtomicCache[10]; Utility::MemsetZero(AtomicCache, 10); + + for (int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + int vid = laneIdx + warpIdx * 32; + + if (vid >= nVC) { break; } + + if (vid < m_numVerts) + { + const int denseLevel = m_denseLevel[vid]; + + const int vidOrig = m_MapperSortedGetOriginal[vid]; + + int oldNum = m_mappedNeighborsNum[vid]; + + auto oldDiagonal = diagonal[vidOrig] + m_additionalHessian32[vid]; + + m_hessian32[vid % bank][vid] += oldDiagonal; // self diagonal + + { + int level = 0; + int myID = vid; + while (level < denseLevel) + { + myID = m_goingNext[myID]; + Intrinsic::AtomicAdd(&m_hessian32[myID % bank][myID], oldDiagonal); + //printf("%d %d", level, denseLevel); + level++; + } + Intrinsic::AtomicAdd(&AtomicCache[level], oldDiagonal); + } + + for (int k = 1; k < oldNum; k++) + { + const unsigned int neighbor = m_mappedNeighbors[k][vid]; + + SeMatrix3f mat = csrOffDiagonals[csrRanges[vidOrig] + k - 1]; + + int level = 0; + unsigned int levelSz = m_numVerts; + unsigned int myID = vid; + unsigned int otID = neighbor; + + while (myID / bank != otID / bank && level < m_numLevel) + { + level++; + myID = m_goingNext[myID]; + otID = m_goingNext[otID]; + } + + if (level >= m_numLevel) + { + //printf("%d\n", level); + continue; + } + + + if (level == 0) + m_hessian32[otID % bank][myID] += mat; + else + Intrinsic::AtomicAdd(&m_hessian32[otID % bank][myID], mat); + + while (level < denseLevel) + { + myID = m_goingNext[myID]; + Intrinsic::AtomicAdd(&m_hessian32[myID % bank][myID], mat); + //printf("%d %d", level, denseLevel); + level++; + } + Intrinsic::AtomicAdd(&AtomicCache[level], mat); + } + } + } + + Intrinsic::SyncWarp(); + + int vid = warpIdx * 32; + + if (vid < nVC) + { + SeMatrix3f total(0.f); + + unsigned int myID = vid; + + for (int level = 0; level < m_numLevel - 1; level++) + { + total += AtomicCache[level]; + + myID = m_goingNext[myID]; + + Intrinsic::AtomicAdd(&m_hessian32[myID % bank][myID], total); + } + } + } + } + + void LDLtInverse512() + { + int triSz = (1 + 96) * 96 / 2 + 16 * 3; + + const int blockNum = m_totalSz / 32; + + for (int block = 0; block < blockNum; block++) + { + float A[96][96] = {}; + float B[96][96] = {}; + + for (int it = 0; it < 96; it++) + { + B[it][it] = 1.0f; + } + for (int x = 0; x < 32; x++) + { + for (int y = 0; y < 32; y++) + { + SeMatrix3f temp = m_hessian32[y][x + block * 32]; + + if (x == y && temp(0, 0) == 0.0f) + { + temp = SeMatrix3f::Identity(); + } + for (int ii = 0; ii < 3; ii++) + { + for (int jj = 0; jj < 3; jj++) + { + A[x * 3 + ii][y * 3 + jj] = temp(ii, jj); + } + } + } + } + // 向下消元 + for (int x = 0; x < 96; x++) + { + float diag = A[x][x]; + for (int y = x + 1; y < 96; y++) + { + if (A[y][x] == 0.0f) + continue; + float rt = -A[y][x] / diag; + A[y][x] = 0.0f; + + for (int xx = x + 1; xx < 96; xx++) + { + A[y][xx] += A[x][xx] * rt; + } + + B[y][x] = rt; + + for (int xx = 0; xx < x; xx++) + { + B[y][xx] += B[x][xx] * rt; + } + } + } + /*if (block == pi) + { + for (int x = 0; x < 96; x++) + { + for (int y = 0; y < 96; y++) + { + of3 << B[x][y] << " "; + } + of3 << std::endl; + } + }*/ + + // 向上消元 + for (int x = 95; x >= 0; x--) + { + float diag = A[x][x]; + for (int y = x - 1; y >= 0; y--) + { + if (A[y][x] == 0.0f) + { + continue; + } + + float rt = -A[y][x] / diag; + A[y][x] = 0.0f; + + for (int xx = x + 1; xx < 96; xx++) + { + A[y][xx] += A[x][xx] * rt; + } + for (int xx = 0; xx < 96; xx++) + { + B[y][xx] += B[x][xx] * rt; + } + } + } + + for (int x = 0; x < 96; x++) + { + for (int y = 0; y < 96; y++) + { + B[x][y] *= 1.f / A[x][x]; + } + } + + //diagonal + for (int x = 0; x < 32; x++) + { + float d0 = B[x * 3 + 0][x * 3 + 0]; + float d1 = B[x * 3 + 1][x * 3 + 1]; + float d2 = B[x * 3 + 2][x * 3 + 2]; + + m_invSymR[block * triSz + x + 0] = d0; + m_invSymR[block * triSz + x + 32] = d1; + m_invSymR[block * triSz + x + 64] = d2; + } + + // upper triangle + for (int y = 0; y < 16; y++) + { + for (int x = 0; x < 32; x++) + { + float value = 0.f; + if (x <= y) + { + if (y != 15) + value = B[31 - y + x][x]; + } + else + value = B[x][x - y - 1]; + + int bank = y / 4; + int off = y % 4; + + m_invSymR[block * triSz + 96 + x * 4 + off + bank * 32 * 4] = value; + } + } + + // mid + for (int y = 0; y < 32; y++) + { + for (int x = 0; x < 32; x++) + { + float value = B[x + 32][y + x]; + int bank = y / 4; + int off = y % 4; + + m_invSymR[block * triSz + 96 + 512 + x * 4 + off + bank * 32 * 4] = value; + } + } + + for (int y = 0; y < 16; y++) + { + for (int x = 0; x < 32; x++) + { + float value = 0.f; + if (x <= y) + { + if (y != 15) + value = B[63 - y + x][x]; + } + else + { + value = B[x + 32][x - y - 1]; + } + + int bank = y / 4; + int off = y % 4; + + m_invSymR[block * triSz + 96 + 512 + 1024 + x * 4 + off + bank * 32 * 4] = value; + } + } + + // down + for (int y = 0; y < 64; y++) + { + for (int x = 0; x < 32; x++) + { + float value = B[x + 64][y + x]; + int bank = y / 4; + int off = y % 4; + + m_invSymR[block * triSz + 96 + 2048 + x * 4 + off + bank * 32 * 4] = value; + } + } + + for (int y = 0; y < 16; y++) + { + for (int x = 0; x < 32; x++) + { + float value = 0.f; + if (x <= y) + { + if (y != 15) + value = B[95 - y + x][x]; + } + else + { + value = B[x + 64][x - y - 1]; + } + int bank = y / 4; + int off = y % 4; + + m_invSymR[block * triSz + 96 + 4096 + x * 4 + off + bank * 32 * 4] = value; + } + } + } + } + + void BuildResidualHierarchy(const SeVec3fSimd* m_cgResidual) + { + int numWarp = (m_numVerts + 31) / 32; + + OMP_PARALLEL_FOR + + for (int warpIdx = 0; warpIdx < numWarp; ++warpIdx) + { + unsigned int vidLead = warpIdx * 32; + + unsigned int activeMsk = 0; + + for (unsigned int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + int vid = laneIdx + warpIdx * 32; + + activeMsk |= vid < m_numVerts ? (1U << laneIdx) : (0U); + } + + bool isFullConnected = (m_fineConnectMask[vidLead] == -1 && activeMsk == -1); + + if (isFullConnected) + { + SeVec3fSimd sumR(0.f); + + for (unsigned int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + int vid = laneIdx + warpIdx * 32; + + if (vid >= m_numVerts) { break; } + + unsigned int vidOrig = m_MapperSortedGetOriginal[vid]; + + SeVec3fSimd r = m_cgResidual[vidOrig]; + + m_mappedR[vid] = r; + + sumR += r; + } + + for (int levelIdx = 0; levelIdx < m_numLevel - 1; ++levelIdx) + { + int tb = m_coarseTables[vidLead][levelIdx]; + + Intrinsic::AtomicAdd(&m_mappedR[tb], sumR); + } + } + else + { + /*__shared__*/ SeVec3fSimd c_sumResidual[32]; Utility::MemsetZero(c_sumResidual, 32); + + for (unsigned int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + int vid = laneIdx + warpIdx * 32; + + if (vid >= m_numVerts) { break; } + + unsigned int vidOrig = m_MapperSortedGetOriginal[vid]; + + SeVec3fSimd r = m_cgResidual[vidOrig]; + + m_mappedR[vid] = r; + + //==== + + unsigned int connectMsk = m_fineConnectMask[vid]; + + int elected_lane = Intrinsic::Ffs(connectMsk) - 1; + + Intrinsic::AtomicAdd(&c_sumResidual[elected_lane], r); + } + + Intrinsic::SyncWarp(); + + for (unsigned int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + int vid = laneIdx + warpIdx * 32; + + if (vid >= m_numVerts) { break; } + + unsigned int connectMsk = m_fineConnectMask[vid]; + + unsigned int electedPrefix = Intrinsic::Popcount32(connectMsk & Intrinsic::LanemaskLt(laneIdx)); + + if (electedPrefix == 0) + { + const SeVec3fSimd& r = c_sumResidual[laneIdx]; + + const Int4& table = m_coarseTables[vid]; + + for (int it = 1; it < Math::Min(m_numLevel, 4); it++) + { + int now = table[it - 1]; + + Intrinsic::AtomicAdd(&m_mappedR[now], r); + } + } + } + } + } + } + + void SchwarzLocalXSym() + { + constexpr int blockDim = 128; + + int nBlocks = (m_totalSz + blockDim - 1) / blockDim; + + const int triSz = (1 + 96) * 96 / 2 + 16 * 3; + + OMP_PARALLEL_FOR + + for (int blockIdx = 0; blockIdx < nBlocks; ++blockIdx) + { + float cacheRhs[4][96]; + float cacheOut[4][96]; + + for (int warpIdx = 0; warpIdx < 4; ++warpIdx) + { + float* const cRhs = cacheRhs[warpIdx]; + float* const cOut = cacheOut[warpIdx]; + + float* pMatrix[32]; + + for (int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + int vid = laneIdx + warpIdx * 32 + blockIdx * blockDim; + + if (vid >= m_totalNumberClusters) { break; } + + const auto globalWarpIdx = vid / 32; + + pMatrix[laneIdx] = m_invSymR.data() + triSz * globalWarpIdx; + + // diagonal + + float d0 = pMatrix[laneIdx][laneIdx + 0]; + float d1 = pMatrix[laneIdx][laneIdx + 32]; + float d2 = pMatrix[laneIdx][laneIdx + 64]; + + auto rhs = m_mappedR[vid]; + + cRhs[laneIdx * 3 + 0] = rhs.x; + cRhs[laneIdx * 3 + 1] = rhs.y; + cRhs[laneIdx * 3 + 2] = rhs.z; + cOut[laneIdx * 3 + 0] = rhs.x * d0; + cOut[laneIdx * 3 + 1] = rhs.y * d1; + cOut[laneIdx * 3 + 2] = rhs.z * d2; + + pMatrix[laneIdx] += 32 * 3; + } + + Intrinsic::SyncWarp(); + + Float4 prefetchValue4[32]; + + float singularRHS[32]; + float singularSum[32]; + float normalRHS[32]; + float normalSum[32]; + + for (int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + int vid = laneIdx + warpIdx * 32 + blockIdx * blockDim; + + if (vid >= m_totalNumberClusters) { break; } + + singularRHS[laneIdx] = cRhs[laneIdx]; + + singularSum[laneIdx] = 0.f; + + //------------------------------------------------ + // upper tri + + normalRHS[laneIdx] = cRhs[laneIdx]; + normalSum[laneIdx] = 0.f; + + prefetchValue4[laneIdx] = ((const Float4*)pMatrix[laneIdx])[laneIdx]; + + for (int bankIdx = 0; bankIdx < 4; bankIdx++) + { + Float4 value = ((const Float4*)pMatrix[laneIdx])[laneIdx + bankIdx * 32 + 32]; + + int offset = bankIdx * 4; + + for (int y = offset; y < offset + 4; y++) + { + Intrinsic::SyncWarp(); + + float prefetchValue = prefetchValue4[laneIdx][y - offset]; + + if (laneIdx <= y) + { + if (y != 15) + { + float otherRhs = cRhs[31 - y + laneIdx]; + cOut[31 - y + laneIdx] += prefetchValue * singularRHS[laneIdx]; + singularSum[laneIdx] += prefetchValue * otherRhs; + } + } + else + { + float otherRhs = cRhs[laneIdx - y - 1]; + cOut[laneIdx - y - 1] += prefetchValue * normalRHS[laneIdx]; + normalSum[laneIdx] += prefetchValue * otherRhs; + } + } + + prefetchValue4[laneIdx] = value; + } + } + + Intrinsic::SyncWarp(); + + for (int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + int vid = laneIdx + warpIdx * 32 + blockIdx * blockDim; + + if (vid >= m_totalNumberClusters) { break; } + + pMatrix[laneIdx] += 16 * 32; + + cOut[laneIdx] += normalSum[laneIdx]; + //------------------------------------------------ + // mid trapezoid + + normalRHS[laneIdx] = cRhs[laneIdx + 32]; + normalSum[laneIdx] = 0.f; + + for (int bank = 0; bank < 8; bank++) + { + Float4 value = ((const Float4*)pMatrix[laneIdx])[laneIdx + bank * 32 + 32]; + for (int y = bank * 4; y < bank * 4 + 4; y++) + { + Intrinsic::SyncWarp(); + + float prefetchValue = prefetchValue4[laneIdx][y - bank * 4]; + float otherRhs = cRhs[y + laneIdx]; + + cOut[y + laneIdx] += prefetchValue * normalRHS[laneIdx]; + normalSum[laneIdx] += prefetchValue * otherRhs; + } + prefetchValue4[laneIdx] = value; + } + + pMatrix[laneIdx] += 32 * 32; + + for (int bankIdx = 0; bankIdx < 4; bankIdx++) + { + Float4 value = ((const Float4*)pMatrix[laneIdx])[laneIdx + bankIdx * 32 + 32]; + + for (int y = bankIdx * 4; y < bankIdx * 4 + 4; y++) + { + Intrinsic::SyncWarp(); + + float prefetchValue = prefetchValue4[laneIdx][y - bankIdx * 4]; + + if (laneIdx <= y) + { + if (y != 15) + { + float otherRhs = cRhs[63 - y + laneIdx]; + cOut[63 - y + laneIdx] += prefetchValue * singularRHS[laneIdx]; + singularSum[laneIdx] += prefetchValue * otherRhs; + } + } + else + { + float otherRhs = cRhs[laneIdx - y - 1]; + cOut[laneIdx - y - 1] += prefetchValue * normalRHS[laneIdx]; + normalSum[laneIdx] += prefetchValue * otherRhs; + } + } + prefetchValue4[laneIdx] = value; + } + pMatrix[laneIdx] += 32 * 16; + } + + Intrinsic::SyncWarp(); + + for (int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + int vid = laneIdx + warpIdx * 32 + blockIdx * blockDim; + + if (vid >= m_totalNumberClusters) { break; } + + cOut[laneIdx + 32] += normalSum[laneIdx]; + //------------------------------------------------ + // lower trapezoid + normalRHS[laneIdx] = cRhs[laneIdx + 64]; + normalSum[laneIdx] = 0.f; + normalRHS[laneIdx] = cRhs[laneIdx + 64]; + normalSum[laneIdx] = 0.f; + + for (int bank = 0; bank < 16; bank++) + { + Float4 value = ((const Float4*)pMatrix[laneIdx])[laneIdx + bank * 32 + 32]; + + for (int y = bank * 4; y < bank * 4 + 4; y++) + { + Intrinsic::SyncWarp(); + + float prefetchValue = prefetchValue4[laneIdx][y - bank * 4]; + float otherRhs = cRhs[y + laneIdx]; + + cOut[y + laneIdx] += prefetchValue * normalRHS[laneIdx]; + normalSum[laneIdx] += prefetchValue * otherRhs; + } + + prefetchValue4[laneIdx] = value; + } + + pMatrix[laneIdx] += 32 * 64; + + for (int bank = 0; bank < 4; bank++) + { + Float4 value(0.0f); + + if (bank != 3) + value = ((const Float4*)pMatrix[laneIdx])[laneIdx + bank * 32 + 32]; + + for (int y = bank * 4; y < bank * 4 + 4; y++) + { + Intrinsic::SyncWarp(); + + float prefetchValue = prefetchValue4[laneIdx][y - bank * 4]; + + if (laneIdx <= y) + { + if (y != 15) + { + float otherRhs = cRhs[95 - y + laneIdx]; + cOut[95 - y + laneIdx] += prefetchValue * singularRHS[laneIdx]; + singularSum[laneIdx] += prefetchValue * otherRhs; + } + } + else + { + float otherRhs = cRhs[laneIdx - y - 1]; + cOut[laneIdx - y - 1] += prefetchValue * normalRHS[laneIdx]; + normalSum[laneIdx] += prefetchValue * otherRhs; + } + } + + prefetchValue4[laneIdx] = value; + } + } + + Intrinsic::SyncWarp(); + + for (int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + int vid = laneIdx + warpIdx * 32 + blockIdx * blockDim; + + if (vid >= m_totalNumberClusters) { break; } + + cOut[laneIdx + 64] += normalSum[laneIdx]; + cOut[laneIdx] += singularSum[laneIdx]; + } + + Intrinsic::SyncWarp(); + + for (int laneIdx = 0; laneIdx < 32; ++laneIdx) + { + int vid = laneIdx + warpIdx * 32 + blockIdx * blockDim; + + if (vid >= m_totalNumberClusters) { break; } + + SeVec3fSimd out(0.0f); + out.x = cacheOut[warpIdx][laneIdx * 3 + 0]; + out.y = cacheOut[warpIdx][laneIdx * 3 + 1]; + out.z = cacheOut[warpIdx][laneIdx * 3 + 2]; + + m_mappedZ[vid] = out; + } + } + } + } + + void CollectFinalZ(SeVec3fSimd* m_cgZ) + { + for (int vid = 0; vid < m_numVerts; ++vid) + { + unsigned int mappedIndex = m_MapperSortedGetOriginal[vid]; + + auto z = m_mappedZ[vid]; + + Int4 table = m_coarseTables[vid]; + + for (int l = 1; l < Math::Min(m_numLevel, 4); l++) + { + int now = table[l - 1]; + + z += m_mappedZ[now]; + } + + m_cgZ[mappedIndex] = z; + } + } +}; + + +SE_NAMESPACE_END \ No newline at end of file diff --git a/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SeUtility.h b/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SeUtility.h new file mode 100644 index 0000000..3ec1d84 --- /dev/null +++ b/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SeUtility.h @@ -0,0 +1,112 @@ +///////////////////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2002 - 2022, +// All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The names of its contributors may not be used to endorse or promote +// products derived from this software without specific prior written +// permission. +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#pragma once + +#include "SePreDefine.h" + +SE_NAMESPACE_BEGIN + +namespace Utility +{ + template + size_t ByteSize(const std::vector & data) + { + return sizeof(Type) * data.size(); + } + + template + void Memcpy(Type * dstData, const Type * srcData, size_t size) + { + std::memcpy(dstData, srcData, size * sizeof(Type)); + } + + template + void Memcpy(std::vector & data, const std::vector & srcData, size_t size) + { + /*if(ByteSize(data) < ByteSize(srcData)) + { + SE_WARNING_LOG("DesSize is less than Src Size!"); + }*/ + std::memcpy(data.data(), srcData.data(), SE_MIN(SE_MIN(ByteSize(data), ByteSize(srcData)), size * sizeof(Type1))); + } + + template + void Memset(std::vector& data, const Type & value) + { + std::fill(data.begin(), data.end(), value); + } + + template + void Memset(Type* data, const Type& value, size_t size) + { + std::fill(data, data + size, value); + } + + template + void MemsetZero(std::vector & data) + { + std::memset(data.data(), 0, ByteSize(data)); + } + + template + void MemsetZero(Type * data, size_t size) + { + std::memset(data, 0, sizeof(Type) * size); + } + + template + void MemsetMinusOne(std::vector & data) + { + std::memset(data.data(), -1, ByteSize(data)); + } + + template + void MemsetMinusOne(Type * data, size_t size) + { + std::memset(data, -1, sizeof(Type) * size); + } + + template + void ClearAndShrink(std::vector & data) + { + data.clear(); data.shrink_to_fit(); + } + + template + void ResizeAndShrink(std::vector & data, size_t dim) + { + data.resize(dim); data.shrink_to_fit(); + } + + template + void CopyAndShrink(std::vector & desData, const std::vector & srcData) + { + desData = srcData; desData.shrink_to_fit(); + } +} + +SE_NAMESPACE_END diff --git a/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SeVector.h b/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SeVector.h new file mode 100644 index 0000000..671b9f5 --- /dev/null +++ b/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SeVector.h @@ -0,0 +1,433 @@ +///////////////////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2002 - 2022, +// All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The names of its contributors may not be used to endorse or promote +// products derived from this software without specific prior written +// permission. +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#pragma once + +#include "SePreDefine.h" + +#include + +SE_NAMESPACE_BEGIN + +#define SE_VEC2_ALIGN(Type) SE_ALIGN(SE_MIN(sizeof(Type) * 2, 16)) //! Maximum align bits -> 64. +#define SE_VEC4_ALIGN(Type) SE_ALIGN(SE_MAX(sizeof(Type) * 4, 16)) //! Maximum align bits -> 64. + +/************************************************************************* +***************************** SDVector ***************************** +*************************************************************************/ + +enum VecDescription : int { SIMD_VEC3 = -1, VEC2_XY = 2, VEC3_XYZ = 3, VEC4_XYZW = 4 }; + +template struct SeVector; +template using SeVector2 = SeVector; +template using SeVector3 = SeVector; +template using SeVector4 = SeVector; + +/************************************************************************* +***************************** SDVector ***************************** +*************************************************************************/ + +template struct SeVector +{ + static constexpr int Elements = N; + + Type m_data[N]; + + SeVector() + { + for (int i = 0; i < N; ++i) m_data[i] = 0; + } + + SeVector(Type scalar) + { + for (int i = 0; i < N; ++i) m_data[i] = scalar; + } + + const Type & operator[](unsigned int i) const { SE_ASSERT(i < Elements); return m_data[i]; } + + Type & operator[](unsigned int i) { SE_ASSERT(i < Elements); return m_data[i]; } + + SeVector operator+(const SeVector & a) const + { + SeVector ans; + for (int i = 0; i < N; ++i) + ans[i] = m_data[i] + a[i]; + return ans; + } + SeVector operator-(const SeVector & a) const + { + SeVector ans; + for (int i = 0; i < N; ++i) + ans[i] = m_data[i] - a[i]; + return ans; + } + + SeVector operator*(const Type scalar) const + { + SeVector ans; + for (int i = 0; i < N; ++i) + ans[i] = m_data[i] * scalar; + return ans; + } + SeVector operator/(const Type scalar) const + { + Type invScalar = Type(1) / scalar; + SeVector ans; + for (int i = 0; i < N; ++i) + ans[i] = m_data[i] * invScalar; + return ans; + } + + void operator+=(const SeVector & a) + { + for (int i = 0; i < N; ++i) + m_data[i] += a[i]; + } + + void operator-=(const SeVector & a) + { + for (int i = 0; i < N; ++i) + m_data[i] -= a[i]; + } + + friend SeVector operator*(Type scalar, const SeVector & a) { return a * scalar; } + + Type Dot(const SeVector & rhs) const + { + Type sum = 0; + for (int i = 0; i < N; ++i) + sum += m_data[i] * rhs.values[i]; + return sum; + } + + Type Sum() const + { + Type sum = 0; + for (int i = 0; i < N; ++i) + sum += m_data[i]; + return sum; + } + + Type SqrLength()const + { + Type sum = 0; + for (int i = 0; i < N; ++i) + sum += m_data[i] * m_data[i]; + return sum; + } + + Type Length()const + { + return std::sqrt(SqrLength()); + } + + SeVector Normalize()const + { + return (*this) / Length(); + } + + SeVector & NormalizeLocal() + { + (*this) /= Length(); return *this; + } +}; + +/************************************************************************* +**************************** SDVector2 ***************************** +*************************************************************************/ + +template struct SE_VEC2_ALIGN(Type) SeVector +{ + static constexpr int Elements = 2; + + using value_type = Type; + + union + { + struct { Type x, y; }; + struct { Type values[2]; }; + }; + + SeVector() {} + SeVector(Type s1, Type s2) : x(s1), y(s2) {} + SeVector(Type scalar) : x(scalar), y(scalar) {} + + void operator+=(Type scalar) { x += scalar; y += scalar; } + void operator-=(Type scalar) { x -= scalar; y -= scalar; } + void operator*=(Type scalar) { x *= scalar; y *= scalar; } + void operator/=(Type scalar) { x /= scalar; y /= scalar; } + + void operator+=(const SeVector & a) { x += a.x; y += a.y; } + void operator-=(const SeVector & a) { x -= a.x; y -= a.y; } + void operator*=(const SeVector & a) { x *= a.x; y *= a.y; } + void operator/=(const SeVector & a) { x /= a.x; y /= a.y; } + + SeVector operator+(Type scalar) const { return SeVector(x + scalar, y + scalar); } + SeVector operator-(Type scalar) const { return SeVector(x - scalar, y - scalar); } + SeVector operator*(Type scalar) const { return SeVector(x * scalar, y * scalar); } + SeVector operator/(Type scalar) const { return SeVector(x / scalar, y / scalar); } + + SeVector operator+(const SeVector & a) const { return SeVector(x + a.x, y + a.y); } + SeVector operator-(const SeVector & a) const { return SeVector(x - a.x, y - a.y); } + SeVector operator*(const SeVector & a) const { return SeVector(x * a.x, y * a.y); } + SeVector operator/(const SeVector & a) const { return SeVector(x / a.x, y / a.y); } + + friend SeVector operator+(Type scalar, const SeVector & a) { return SeVector(scalar + a.x, scalar + a.y); } + friend SeVector operator-(Type scalar, const SeVector & a) { return SeVector(scalar - a.x, scalar - a.y); } + friend SeVector operator*(Type scalar, const SeVector & a) { return SeVector(scalar * a.x, scalar * a.y); } + friend SeVector operator/(Type scalar, const SeVector & a) { return SeVector(scalar / a.x, scalar / a.y); } + + template explicit SeVector(const SeVector2 & vec2) : x(static_cast(vec2.x)), y(static_cast(vec2.y)) {} + template explicit SeVector(const SeVector3 & vec3) : x(static_cast(vec3.x)), y(static_cast(vec3.y)) {} + template explicit SeVector(const SeVector4 & vec4) : x(static_cast(vec4.x)), y(static_cast(vec4.y)) {} + + const Type & operator[](unsigned int i) const { SE_ASSERT(i < Elements); return values[i]; } + Type & operator[](unsigned int i) { SE_ASSERT(i < Elements); return values[i]; } +}; + +/************************************************************************* +**************************** SDVector3 ***************************** +*************************************************************************/ + +template struct SeVector +{ + static constexpr int Elements = 3; + + using value_type = Type; + + union + { + struct { Type x, y, z; }; + struct { Type values[3]; }; + }; + + SeVector() {} + SeVector(Type scalar) : x(scalar), y(scalar), z(scalar) {} + SeVector(Type s1, Type s2, Type s3) : x(s1), y(s2), z(s3) {} + explicit SeVector(Type s1, const SeVector2 & vec2) : x(s1), y(vec2.x), z(vec2.y) {} + explicit SeVector(const SeVector2 & vec2, Type s3) : x(vec2.x), y(vec2.y), z(s3) {} + + void operator+=(const SeVector & a) { x += a.x; y += a.y; z += a.z; } + void operator-=(const SeVector & a) { x -= a.x; y -= a.y; z -= a.z; } + void operator*=(const SeVector & a) { x *= a.x; y *= a.y; z *= a.z; } + void operator/=(const SeVector & a) { x /= a.x; y /= a.y; z /= a.z; } + + void operator+=(Type scalar) { x += scalar; y += scalar; z += scalar; } + void operator-=(Type scalar) { x -= scalar; y -= scalar; z -= scalar; } + void operator*=(Type scalar) { x *= scalar; y *= scalar; z *= scalar; } + void operator/=(Type scalar) { x /= scalar; y /= scalar; z /= scalar; } + + SeVector operator+(const SeVector & a) const { return SeVector(x + a.x, y + a.y, z + a.z); } + SeVector operator-(const SeVector & a) const { return SeVector(x - a.x, y - a.y, z - a.z); } + SeVector operator*(const SeVector & a) const { return SeVector(x * a.x, y * a.y, z * a.z); } + SeVector operator/(const SeVector & a) const { return SeVector(x / a.x, y / a.y, z / a.z); } + + SeVector operator+(Type scalar) const { return SeVector(x + scalar, y + scalar, z + scalar); } + SeVector operator-(Type scalar) const { return SeVector(x - scalar, y - scalar, z - scalar); } + SeVector operator*(Type scalar) const { return SeVector(x * scalar, y * scalar, z * scalar); } + SeVector operator/(Type scalar) const { return SeVector(x / scalar, y / scalar, z / scalar); } + + friend SeVector operator+(Type scalar, const SeVector & a) { return SeVector(scalar + a.x, scalar + a.y, scalar + a.z); } + friend SeVector operator-(Type scalar, const SeVector & a) { return SeVector(scalar - a.x, scalar - a.y, scalar - a.z); } + friend SeVector operator*(Type scalar, const SeVector & a) { return SeVector(scalar * a.x, scalar * a.y, scalar * a.z); } + friend SeVector operator/(Type scalar, const SeVector & a) { return SeVector(scalar / a.x, scalar / a.y, scalar / a.z); } + + template explicit SeVector(const SeVector2 & vec2) : x(static_cast(vec2.x)), y(static_cast(vec2.y)), z(static_cast(0)) {} + template explicit SeVector(const SeVector3 & vec3) : x(static_cast(vec3.x)), y(static_cast(vec3.y)), z(static_cast(vec3.z)) {} + template explicit SeVector(const SeVector4 & vec4) : x(static_cast(vec4.x)), y(static_cast(vec4.y)), z(static_cast(vec4.z)) {} + + const Type & operator[](unsigned int i) const { SE_ASSERT(i < Elements); return values[i]; } + Type & operator[](unsigned int i) { SE_ASSERT(i < Elements); return values[i]; } +}; + +/************************************************************************* +**************************** SDVector4 ***************************** +*************************************************************************/ + +template struct SE_VEC4_ALIGN(Type) SeVector +{ + static constexpr int Elements = 4; + + using value_type = Type; + + union + { + struct { Type values[4]; }; + struct { Type x, y, z, w; }; + struct { SeVector3 xyz; }; + }; + + SeVector() {} + SeVector(Type scalar) : x(scalar), y(scalar), z(scalar), w(scalar) {} + SeVector(Type s1, Type s2, Type s3, Type s4) : x(s1), y(s2), z(s3), w(s4) {} + explicit SeVector(Type s1, Type s2, const SeVector2 & vec2) : x(s1), y(s2), z(vec2.x), w(vec2.y) {} + explicit SeVector(Type s1, const SeVector2 & vec2, Type s4) : x(s1), y(vec2.x), z(vec2.y), w(s4) {} + explicit SeVector(const SeVector2 & vec2, Type s3, Type s4) : x(vec2.x), y(vec2.y), z(s3), w(s4) {} + explicit SeVector(const SeVector2 & vec2a, const SeVector2 & vec2b) : x(vec2a.x), y(vec2a.y), z(vec2b.x), w(vec2b.y) {} + explicit SeVector(const SeVector3 & vec3, Type s4) : x(vec3.x), y(vec3.y), z(vec3.z), w(s4) {} + explicit SeVector(Type s1, const SeVector3 & vec3) : x(s1), y(vec3.x), z(vec3.y), w(vec3.z) {} + + void operator+=(const SeVector3 & a) { x += a.x; y += a.y; z += a.z; } + void operator-=(const SeVector3 & a) { x -= a.x; y -= a.y; z -= a.z; } + void operator*=(const SeVector3 & a) { x *= a.x; y *= a.y; z *= a.z; } + void operator/=(const SeVector3 & a) { x /= a.x; y /= a.y; z /= a.z; } + + void operator+=(const SeVector & a) { x += a.x; y += a.y; z += a.z; w += a.w; } + void operator-=(const SeVector & a) { x -= a.x; y -= a.y; z -= a.z; w -= a.w; } + void operator*=(const SeVector & a) { x *= a.x; y *= a.y; z *= a.z; w *= a.w; } + void operator/=(const SeVector & a) { x /= a.x; y /= a.y; z /= a.z; w /= a.w; } + + void operator+=(Type scalar) { x += scalar; y += scalar; z += scalar; w += scalar; } + void operator-=(Type scalar) { x -= scalar; y -= scalar; z -= scalar; w -= scalar; } + void operator*=(Type scalar) { x *= scalar; y *= scalar; z *= scalar; w *= scalar; } + void operator/=(Type scalar) { x /= scalar; y /= scalar; z /= scalar; w /= scalar; } + + SeVector operator+(const SeVector & a) const { return SeVector(x + a.x, y + a.y, z + a.z, w + a.w); } + SeVector operator-(const SeVector & a) const { return SeVector(x - a.x, y - a.y, z - a.z, w - a.w); } + SeVector operator*(const SeVector & a) const { return SeVector(x * a.x, y * a.y, z * a.z, w * a.w); } + SeVector operator/(const SeVector & a) const { return SeVector(x / a.x, y / a.y, z / a.z, w / a.w); } + + SeVector operator+(const SeVector3 & a) const { return SeVector(x + a.x, y + a.y, z + a.z, w); } + SeVector operator-(const SeVector3 & a) const { return SeVector(x - a.x, y - a.y, z - a.z, w); } + SeVector operator*(const SeVector3 & a) const { return SeVector(x * a.x, y * a.y, z * a.z, w); } + SeVector operator/(const SeVector3 & a) const { return SeVector(x / a.x, y / a.y, z / a.z, w); } + + SeVector operator+(Type scalar) const { return SeVector(x + scalar, y + scalar, z + scalar, w + scalar); } + SeVector operator-(Type scalar) const { return SeVector(x - scalar, y - scalar, z - scalar, w - scalar); } + SeVector operator*(Type scalar) const { return SeVector(x * scalar, y * scalar, z * scalar, w * scalar); } + SeVector operator/(Type scalar) const { return SeVector(x / scalar, y / scalar, z / scalar, w / scalar); } + + friend SeVector operator+(Type scalar, const SeVector & a) { return SeVector(scalar + a.x, scalar + a.y, scalar + a.z, scalar + a.w); } + friend SeVector operator-(Type scalar, const SeVector & a) { return SeVector(scalar - a.x, scalar - a.y, scalar - a.z, scalar - a.w); } + friend SeVector operator*(Type scalar, const SeVector & a) { return SeVector(scalar * a.x, scalar * a.y, scalar * a.z, scalar * a.w); } + friend SeVector operator/(Type scalar, const SeVector & a) { return SeVector(scalar / a.x, scalar / a.y, scalar / a.z, scalar / a.w); } + + template explicit SeVector(const SeVector2 & vec2) : x(static_cast(vec2.x)), y(static_cast(vec2.y)), z(static_cast(0)), w(static_cast(0)) {} + template explicit SeVector(const SeVector3 & vec3) : x(static_cast(vec3.x)), y(static_cast(vec3.y)), z(static_cast(vec3.z)), w(static_cast(0)) {} + template explicit SeVector(const SeVector4 & vec4) : x(static_cast(vec4.x)), y(static_cast(vec4.y)), z(static_cast(vec4.z)), w(static_cast(vec4.w)) {} + + const Type & operator[](unsigned int i) const { SE_ASSERT(i < Elements); return values[i]; } + Type & operator[](unsigned int i) { SE_ASSERT(i < Elements); return values[i]; } +}; + +/************************************************************************* +**************************** Operators ***************************** +*************************************************************************/ + + +template SE_INLINE bool operator==(const SeVector & a, const SeVector & b) +{ + for (int i = 0; i < SeVector::Elements; ++i) { if (a[i] != b[i]) return false; } return true; +} +template SE_INLINE bool operator!=(const SeVector & a, const SeVector & b) +{ + for (int i = 0; i < SeVector::Elements; ++i) { if (a[i] != b[i]) return true; } return false; +} +template SE_INLINE SeVector operator-(SeVector a) +{ + for (int i = 0; i < SeVector::Elements; ++i) a[i] = -a[i]; return a; +} +template SE_INLINE SeVector operator!(SeVector a) +{ + for (int i = 0; i < SeVector::Elements; ++i) a[i] = !a[i]; return a; +} +template SE_INLINE SeVector operator~(SeVector a) +{ + for (int i = 0; i < SeVector::Elements; ++i) a[i] = ~a[i]; return a; +} + + +/************************************************************************* +*************************** Type defines *************************** +*************************************************************************/ +// bool +typedef SeVector2 Bool2; +// char +typedef SeVector2 Char2; +typedef SeVector3 Char3; +typedef SeVector4 Char4; +typedef SeVector Char5; +typedef SeVector Char6; +// unsign char +typedef SeVector2 UChar2; +typedef SeVector3 UChar3; +typedef SeVector4 UChar4; +typedef SeVector UChar5; +typedef SeVector UChar6; +// short +typedef SeVector2 Short2; +typedef SeVector3 Short3; +typedef SeVector4 Short4; +typedef SeVector Short5; +typedef SeVector Short6; +// unsigned short +typedef SeVector2 UShort2; +typedef SeVector3 UShort3; +typedef SeVector4 UShort4; +typedef SeVector UShort5; +typedef SeVector UShort6; +// int +typedef SeVector2 Int2; +typedef SeVector3 Int3; +typedef SeVector4 Int4; +typedef SeVector Int5; +typedef SeVector Int6; +typedef SeVector Int8; +typedef SeVector Int12; +typedef SeVector Int30; +// unsigned int +typedef SeVector2 UInt2; +typedef SeVector3 UInt3; +typedef SeVector4 UInt4; +typedef SeVector UInt5; +typedef SeVector UInt6; +// float +typedef SeVector2 Float2; +typedef SeVector3 Float3; +typedef SeVector4 Float4; +typedef SeVector Float5; +typedef SeVector Float6; +typedef SeVector Float9; +typedef SeVector Float12; +// double +typedef SeVector2 Double2; +typedef SeVector3 Double3; +typedef SeVector4 Double4; +typedef SeVector Double5; +typedef SeVector Double6; +// size_t +typedef SeVector2 Size2; +typedef SeVector3 Size3; +typedef SeVector4 Size4; +typedef SeVector Size5; +typedef SeVector Size6; +/* +// SIMD-float3 +typedef SeVector Simd3f;*/ + +typedef SeVector3 Half3; //just for compile reason,don't use it + +SE_NAMESPACE_END \ No newline at end of file diff --git a/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SeVectorSimd.h b/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SeVectorSimd.h new file mode 100644 index 0000000..66b06b9 --- /dev/null +++ b/thirdparty/preconditioner-for-cloth-and-deformable-body-simulation/SeVectorSimd.h @@ -0,0 +1,156 @@ +///////////////////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2002 - 2022, +// All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The names of its contributors may not be used to endorse or promote +// products derived from this software without specific prior written +// permission. +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#pragma once + +#include "SeVector.h" + +#include + +#ifdef WIN32 +#include +#endif + + +SE_NAMESPACE_BEGIN + + +#ifdef WIN32 + +template using SimdVector3 = SeVector; + +template<> struct SeVector +{ + static constexpr int Elements = 3; + + using value_type = float; + + union + { + struct { __m128 pack; }; + struct { float x, y, z, w; }; + struct { SeVector3 xyz; }; + struct { SeVector4 xyzw; }; + }; + + SeVector() {} + SeVector(const __m128 & p) : pack(p) {} + explicit SeVector(float scalar) : pack(_mm_setr_ps(scalar, scalar, scalar, 0.0f)) {} //pack(_mm_set_ps1(scalar)) {} + SeVector(float s1, float s2, float s3) : pack(_mm_setr_ps(s1, s2, s3, 0.0f)) {} + SeVector(float s1, float s2, float s3, float s4) : pack(_mm_setr_ps(s1, s2, s3, s4)) {} + explicit SeVector(const SeVector & vec) : pack(_mm_setr_ps(vec.x, vec.y, vec.z, 0.f)) {} + + void operator+=(const SeVector & a) { pack = _mm_add_ps(pack, a.pack); } + void operator-=(const SeVector & a) { pack = _mm_sub_ps(pack, a.pack); } + void operator*=(const SeVector & a) { pack = _mm_mul_ps(pack, a.pack); } + void operator/=(const SeVector & a) { pack = _mm_div_ps(pack, a.pack); } + + void operator+=(const __m128 & a) { pack = _mm_add_ps(pack, a); } + void operator-=(const __m128 & a) { pack = _mm_sub_ps(pack, a); } + void operator*=(const __m128 & a) { pack = _mm_mul_ps(pack, a); } + void operator/=(const __m128 & a) { pack = _mm_div_ps(pack, a); } + + void operator+=(float scalar) { pack = _mm_add_ps(pack, _mm_set_ps1(scalar)); } + void operator-=(float scalar) { pack = _mm_sub_ps(pack, _mm_set_ps1(scalar)); } + void operator*=(float scalar) { pack = _mm_mul_ps(pack, _mm_set_ps1(scalar)); } + void operator/=(float scalar) { pack = _mm_div_ps(pack, _mm_set_ps1(scalar)); } + + SeVector operator+(const SeVector & a) const { return SeVector(_mm_add_ps(pack, a.pack)); } + SeVector operator-(const SeVector & a) const { return SeVector(_mm_sub_ps(pack, a.pack)); } + SeVector operator*(const SeVector & a) const { return SeVector(_mm_mul_ps(pack, a.pack)); } + SeVector operator/(const SeVector & a) const { return SeVector(_mm_div_ps(pack, a.pack)); } + + SeVector operator+(const __m128 & a) const { return SeVector(_mm_add_ps(pack, a)); } + SeVector operator-(const __m128 & a) const { return SeVector(_mm_sub_ps(pack, a)); } + SeVector operator*(const __m128 & a) const { return SeVector(_mm_mul_ps(pack, a)); } + SeVector operator/(const __m128 & a) const { return SeVector(_mm_div_ps(pack, a)); } + + SeVector operator+(float scalar) const { return SeVector(_mm_add_ps(pack, _mm_set_ps1(scalar))); } + SeVector operator-(float scalar) const { return SeVector(_mm_sub_ps(pack, _mm_set_ps1(scalar))); } + SeVector operator*(float scalar) const { return SeVector(_mm_mul_ps(pack, _mm_set_ps1(scalar))); } + SeVector operator/(float scalar) const { return SeVector(_mm_div_ps(pack, _mm_set_ps1(scalar))); } + + friend SeVector operator+(float scalar, const SeVector & a) { return SeVector(_mm_add_ps(_mm_set_ps1(scalar), a.pack)); } + friend SeVector operator-(float scalar, const SeVector & a) { return SeVector(_mm_sub_ps(_mm_set_ps1(scalar), a.pack)); } + friend SeVector operator*(float scalar, const SeVector & a) { return SeVector(_mm_mul_ps(_mm_set_ps1(scalar), a.pack)); } + friend SeVector operator/(float scalar, const SeVector & a) { return SeVector(_mm_div_ps(_mm_set_ps1(scalar), a.pack)); } + + const float & operator[](unsigned int i) const { SE_ASSERT(i < 4); return pack.m128_f32[i]; } + float & operator[](unsigned int i) { SE_ASSERT(i < 4); return pack.m128_f32[i]; } +}; + + +/************************************************************************* +**************************** Operators ***************************** +*************************************************************************/ + +template SE_INLINE bool operator==(const SimdVector3 & a, const SimdVector3 & b) +{ + for (int i = 0; i < SimdVector3::Elements; ++i) { if (a[i] != b[i]) return false; } return true; +} +template SE_INLINE bool operator!=(const SimdVector3 & a, const SimdVector3 & b) +{ + for (int i = 0; i < SimdVector3::Elements; ++i) { if (a[i] != b[i]) return true; } return false; +} +template SE_INLINE SimdVector3 operator-(const SimdVector3 & a) +{ + for (int i = 0; i < SimdVector3::Elements; ++i) a[i] = -a[i]; return a; +} +template SE_INLINE SimdVector3 operator!(const SimdVector3 & a) +{ + for (int i = 0; i < SimdVector3::Elements; ++i) a[i] = !a[i]; return a; +} +template SE_INLINE SimdVector3 operator~(const SimdVector3 & a) +{ + for (int i = 0; i < SimdVector3::Elements; ++i) a[i] = ~a[i]; return a; +} + +template<> SE_INLINE SimdVector3 operator - (const SimdVector3 & a) +{ + return _mm_xor_ps(a.pack, _mm_set1_ps(-0.f)); +} + +/* +template<> SE_INLINE SeVector operator - (SeVector a) +{ + return _mm_xor_ps(a.pack, _mm_set1_ps(-0.f)); +}*/ + +#endif + + +/************************************************************************* +*************************** Type defines *************************** +*************************************************************************/ + + +using Simd3f = SimdVector3; +template using SimdVector3 = SeVector; + +using SeVec3fSimd = Simd3f; + + +SE_NAMESPACE_END