From 22545ada34334c54925740812442adc9168194b4 Mon Sep 17 00:00:00 2001
From: RunitaiLinden <davep@lindenlab.com>
Date: Mon, 16 Sep 2024 17:42:44 -0500
Subject: [PATCH] Profile guided optimization pass

- Tune up LLJointRiggingInfoTab
- Visualize joint bounding boxes when visualizing joints
- Use LLJointRiggingInfo to caclulate desired resolution of a texture
- Throttle calls to calcPixelArea
- Fetch MeshSkinInfo immediately when header is received
---
 indra/llmath/llrigginginfo.h       |   4 +
 indra/llprimitive/llmodel.cpp      |   7 ++
 indra/llprimitive/llmodel.h        |   5 +-
 indra/llrender/llrendersphere.cpp  |  62 ++++++++++++---
 indra/llrender/llrendersphere.h    |   1 +
 indra/llrender/llvertexbuffer.cpp  |   4 +-
 indra/newview/llface.cpp           |  79 ++++++++++++++++++--
 indra/newview/llface.h             |  13 ++++
 indra/newview/llmeshrepository.cpp |  88 +++++++++++++++++-----
 indra/newview/llmeshrepository.h   |  19 +++--
 indra/newview/llskinningutil.cpp   |  43 +----------
 indra/newview/llviewerobject.cpp   |   1 +
 indra/newview/llvoavatar.cpp       | 116 ++++++++++++++++++++++++++---
 indra/newview/llvoavatar.h         |   2 +-
 indra/newview/llvovolume.cpp       |   6 +-
 15 files changed, 346 insertions(+), 104 deletions(-)
diff --git a/indra/llmath/llrigginginfo.h b/indra/llmath/llrigginginfo.h
index fb550d013fc..d761af68b1e 100644
--- a/indra/llmath/llrigginginfo.h
+++ b/indra/llmath/llrigginginfo.h
@@ -66,6 +66,10 @@ class LLJointRiggingInfoTab
     const LLJointRiggingInfo& operator[](S32 i) const { return mRigInfoPtr[i]; };
     bool needsUpdate() { return mNeedsUpdate; }
     void setNeedsUpdate(bool val) { mNeedsUpdate = val; }
+
+    LLJointRiggingInfo* begin() { return mRigInfoPtr; }
+    LLJointRiggingInfo* end() { return mRigInfoPtr + mSize; }
+
 private:
     // Not implemented
     LLJointRiggingInfoTab& operator=(const LLJointRiggingInfoTab& src);
diff --git a/indra/llprimitive/llmodel.cpp b/indra/llprimitive/llmodel.cpp
index 15087a72552..9908a155f2d 100644
--- a/indra/llprimitive/llmodel.cpp
+++ b/indra/llprimitive/llmodel.cpp
@@ -1547,6 +1547,13 @@ void LLMeshSkinInfo::fromLLSD(LLSD& skin)
         mLockScaleIfJointPosition = false;
     }
 
+    // combine mBindShapeMatrix and mInvBindMatrix into mBindPoseMatrix
+    mBindPoseMatrix.resize(mInvBindMatrix.size());
+    for (U32 i = 0; i < mInvBindMatrix.size(); ++i)
+    {
+        matMul(mBindShapeMatrix, mInvBindMatrix[i], mBindPoseMatrix[i]);
+    }
+
     updateHash();
 }
 
diff --git a/indra/llprimitive/llmodel.h b/indra/llprimitive/llmodel.h
index b0cba2d655d..96cfb7151e8 100644
--- a/indra/llprimitive/llmodel.h
+++ b/indra/llprimitive/llmodel.h
@@ -56,12 +56,15 @@ class LLMeshSkinInfo : public LLRefCount
     LLUUID mMeshID;
     std::vector<std::string> mJointNames;
     mutable std::vector<S32> mJointNums;
-    typedef std::vector<LLMatrix4a, boost::alignment::aligned_allocator<LLMatrix4a, 16>> matrix_list_t;
+    typedef std::vector<LLMatrix4a> matrix_list_t;
     matrix_list_t mInvBindMatrix;
 
     // bones/joints position overrides
     matrix_list_t mAlternateBindMatrix;
 
+    // cached multiply of mBindShapeMatrix and mInvBindMatrix
+    matrix_list_t mBindPoseMatrix;
+
     LL_ALIGN_16(LLMatrix4a mBindShapeMatrix);
 
     float mPelvisOffset;
diff --git a/indra/llrender/llrendersphere.cpp b/indra/llrender/llrendersphere.cpp
index 95701805546..cd8ef7d68e3 100644
--- a/indra/llrender/llrendersphere.cpp
+++ b/indra/llrender/llrendersphere.cpp
@@ -34,6 +34,8 @@
 #include "llerror.h"
 
 #include "llglheaders.h"
+#include "llvertexbuffer.h"
+#include "llglslshader.h"
 
 LLRenderSphere gSphere;
 
@@ -53,12 +55,20 @@ inline LLVector3 polar_to_cart(F32 latitude, F32 longitude)
 
 void LLRenderSphere::renderGGL()
 {
+    LL_PROFILE_ZONE_SCOPED;
     S32 const LATITUDE_SLICES = 20;
     S32 const LONGITUDE_SLICES = 30;
 
-    if (mSpherePoints.empty())
+    if (mVertexBuffer.isNull())
     {
         mSpherePoints.resize(LATITUDE_SLICES + 1);
+        mVertexBuffer = new LLVertexBuffer(LLVertexBuffer::MAP_VERTEX);
+
+        mVertexBuffer->allocateBuffer((U32)(LATITUDE_SLICES + 1) * (LONGITUDE_SLICES + 1), LATITUDE_SLICES * LONGITUDE_SLICES * 6);
+
+        LLStrider<LLVector3> v;
+        mVertexBuffer->getVertexStrider(v);
+
         for (S32 lat_i = 0; lat_i < LATITUDE_SLICES + 1; lat_i++)
         {
             mSpherePoints[lat_i].resize(LONGITUDE_SLICES + 1);
@@ -68,24 +78,52 @@ void LLRenderSphere::renderGGL()
                 F32 lon = (F32)lon_i / LONGITUDE_SLICES;
 
                 mSpherePoints[lat_i][lon_i] = polar_to_cart(lat, lon);
+                v[lat_i * (LONGITUDE_SLICES + 1) + lon_i] = mSpherePoints[lat_i][lon_i];
             }
         }
+
+        LLStrider<U16> i;
+        mVertexBuffer->getIndexStrider(i);
+
+        for (S32 lat_i = 0; lat_i < LATITUDE_SLICES; lat_i++)
+        {
+            for (S32 lon_i = 0; lon_i < LONGITUDE_SLICES; lon_i++)
+            {
+                i[(lat_i * LONGITUDE_SLICES + lon_i) * 6 + 0] = lat_i * (LONGITUDE_SLICES + 1) + lon_i;
+                i[(lat_i * LONGITUDE_SLICES + lon_i) * 6 + 1] = lat_i * (LONGITUDE_SLICES + 1) + lon_i + 1;
+                i[(lat_i * LONGITUDE_SLICES + lon_i) * 6 + 2] = (lat_i + 1) * (LONGITUDE_SLICES + 1) + lon_i;
+
+                i[(lat_i * LONGITUDE_SLICES + lon_i) * 6 + 3] = (lat_i + 1) * (LONGITUDE_SLICES + 1) + lon_i;
+                i[(lat_i * LONGITUDE_SLICES + lon_i) * 6 + 4] = lat_i * (LONGITUDE_SLICES + 1) + lon_i + 1;
+                i[(lat_i * LONGITUDE_SLICES + lon_i) * 6 + 5] = (lat_i + 1) * (LONGITUDE_SLICES + 1) + lon_i + 1;
+            }
+        }
+
+        mVertexBuffer->unmapBuffer();
     }
 
-    gGL.begin(LLRender::TRIANGLES);
 
-    for (S32 lat_i = 0; lat_i < LATITUDE_SLICES; lat_i++)
-    {
-        for (S32 lon_i = 0; lon_i < LONGITUDE_SLICES; lon_i++)
+    if (LLGLSLShader::sCurBoundShaderPtr->mAttributeMask == LLVertexBuffer::MAP_VERTEX)
+    { // shader expects only vertex positions in vertex buffer, use fast path
+        mVertexBuffer->setBuffer();
+        mVertexBuffer->drawRange(LLRender::TRIANGLES, 0, mVertexBuffer->getNumVerts(), mVertexBuffer->getNumIndices(), 0);
+    }
+    else
+    { //shader wants colors in the vertex stream, use slow path
+        gGL.begin(LLRender::TRIANGLES);
+        for (S32 lat_i = 0; lat_i < LATITUDE_SLICES; lat_i++)
         {
-            gGL.vertex3fv(mSpherePoints[lat_i][lon_i].mV);
-            gGL.vertex3fv(mSpherePoints[lat_i][lon_i+1].mV);
-            gGL.vertex3fv(mSpherePoints[lat_i+1][lon_i].mV);
+            for (S32 lon_i = 0; lon_i < LONGITUDE_SLICES; lon_i++)
+            {
+                gGL.vertex3fv(mSpherePoints[lat_i][lon_i].mV);
+                gGL.vertex3fv(mSpherePoints[lat_i][lon_i + 1].mV);
+                gGL.vertex3fv(mSpherePoints[lat_i + 1][lon_i].mV);
 
-            gGL.vertex3fv(mSpherePoints[lat_i+1][lon_i].mV);
-            gGL.vertex3fv(mSpherePoints[lat_i][lon_i+1].mV);
-            gGL.vertex3fv(mSpherePoints[lat_i+1][lon_i+1].mV);
+                gGL.vertex3fv(mSpherePoints[lat_i + 1][lon_i].mV);
+                gGL.vertex3fv(mSpherePoints[lat_i][lon_i + 1].mV);
+                gGL.vertex3fv(mSpherePoints[lat_i + 1][lon_i + 1].mV);
+            }
         }
+        gGL.end();
     }
-    gGL.end();
 }
diff --git a/indra/llrender/llrendersphere.h b/indra/llrender/llrendersphere.h
index e2e886fa069..5b6eabecb8e 100644
--- a/indra/llrender/llrendersphere.h
+++ b/indra/llrender/llrendersphere.h
@@ -45,6 +45,7 @@ class LLRenderSphere
 
 private:
     std::vector< std::vector<LLVector3> > mSpherePoints;
+    LLPointer<LLVertexBuffer> mVertexBuffer;
 };
 
 extern LLRenderSphere gSphere;
diff --git a/indra/llrender/llvertexbuffer.cpp b/indra/llrender/llvertexbuffer.cpp
index c1f239fc43e..6d8eba4a6a8 100644
--- a/indra/llrender/llvertexbuffer.cpp
+++ b/indra/llrender/llvertexbuffer.cpp
@@ -1337,8 +1337,6 @@ void LLVertexBuffer::unmapBuffer()
 
 void LLVertexBuffer::_mapBuffer()
 {
-    // must only be called from main thread
-    llassert(LLCoros::on_main_thread_main_coro());
     if (!mMapped)
     {
         mMapped = true;
@@ -1600,7 +1598,7 @@ void LLVertexBuffer::setBuffer()
 
     if (mMapped)
     {
-        LL_WARNS() << "Missing call to unmapBuffer or flushBuffers" << LL_ENDL;
+        LL_WARNS_ONCE() << "Missing call to unmapBuffer or flushBuffers" << LL_ENDL;
         _unmapBuffer();
     }
 
diff --git a/indra/newview/llface.cpp b/indra/newview/llface.cpp
index a1ec75e34b3..1d426f8da72 100644
--- a/indra/newview/llface.cpp
+++ b/indra/newview/llface.cpp
@@ -56,6 +56,7 @@
 #include "llvoavatar.h"
 #include "llsculptidsize.h"
 #include "llmeshrepository.h"
+#include "llskinningutil.h"
 
 #if LL_LINUX
 // Work-around spurious used before init warning on Vector4a
@@ -2184,28 +2185,88 @@ F32 LLFace::getTextureVirtualSize()
 
 bool LLFace::calcPixelArea(F32& cos_angle_to_view_dir, F32& radius)
 {
+    constexpr F32 PIXEL_AREA_UPDATE_PERIOD = 0.1f;
+    // this is an expensive operation and the result is valid (enough) for several frames
+    // don't update every frame
+    if (gFrameTimeSeconds - mLastPixelAreaUpdate < PIXEL_AREA_UPDATE_PERIOD)
+    {
+        return true;
+    }
+
     LL_PROFILE_ZONE_SCOPED_CATEGORY_FACE;
 
     //get area of circle around face
-
     LLVector4a center;
     LLVector4a size;
 
-
     if (isState(LLFace::RIGGED))
     {
-        //override with avatar bounding box
+        LL_PROFILE_ZONE_NAMED_CATEGORY_FACE("calcPixelArea - rigged");
+        //override with joint volume face joint bounding boxes
         LLVOAvatar* avatar = mVObjp->getAvatar();
+        bool hasRiggedExtents = false;
+
         if (avatar && avatar->mDrawable)
         {
-            center.load3(avatar->getPositionAgent().mV);
-            const LLVector4a* exts = avatar->mDrawable->getSpatialExtents();
-            size.setSub(exts[1], exts[0]);
+            LLVolume* volume = mVObjp->getVolume();
+            if (volume)
+            {
+                LLVolumeFace& face = volume->getVolumeFace(mTEOffset);
+
+                auto& rigInfo = face.mJointRiggingInfoTab;
+
+                if (rigInfo.needsUpdate())
+                {
+                    LLVOVolume* vo_volume = (LLVOVolume*)mVObjp.get();
+                    LLVOAvatar* avatar = mVObjp->getAvatar();
+                    const LLMeshSkinInfo* skin = vo_volume->getSkinInfo();
+                    LLSkinningUtil::updateRiggingInfo(skin, avatar, face);
+                }
+
+                // calculate the world space bounding box of the face by combining the bounding boxes of all the joints
+                LLVector4a& minp = mRiggedExtents[0];
+                LLVector4a& maxp = mRiggedExtents[1];
+                minp = LLVector4a(FLT_MAX, FLT_MAX, FLT_MAX);
+                maxp = LLVector4a(-FLT_MAX, -FLT_MAX, -FLT_MAX);
+
+                for (S32 i = 0; i < rigInfo.size(); i++)
+                {
+                    auto& jointInfo = rigInfo[i];
+                    if (jointInfo.isRiggedTo())
+                    {
+                        LLJoint* joint = avatar->getJoint(i);
+
+                        if (joint)
+                        {
+                            LLVector4a jointPos;
+
+                            LLMatrix4a worldMat;
+                            worldMat.loadu((F32*)&joint->getWorldMatrix().mMatrix[0][0]);
+
+                            LLVector4a extents[2];
+
+                            matMulBoundBox(worldMat, jointInfo.getRiggedExtents(), extents);
+
+                            minp.setMin(minp, extents[0]);
+                            maxp.setMax(maxp, extents[1]);
+                            hasRiggedExtents = true;
+                        }
+                    }
+                }
+            }
         }
-        else
+
+        if (!hasRiggedExtents)
         {
+            // no rigged extents, zero out bounding box and skip update
+            mRiggedExtents[0] = mRiggedExtents[1] = LLVector4a(0.f, 0.f, 0.f);
+
             return false;
         }
+
+        center.setAdd(mRiggedExtents[1], mRiggedExtents[0]);
+        center.mul(0.5f);
+        size.setSub(mRiggedExtents[1], mRiggedExtents[0]);
     }
     else
     {
@@ -2231,6 +2292,10 @@ bool LLFace::calcPixelArea(F32& cos_angle_to_view_dir, F32& radius)
     F32 app_angle = atanf((F32) sqrt(size_squared) / dist);
     radius = app_angle*LLDrawable::sCurPixelAngle;
     mPixelArea = radius*radius * 3.14159f;
+
+    // remember last update time, add 10% noise to avoid all faces updating at the same time
+    mLastPixelAreaUpdate = gFrameTimeSeconds + ll_frand() * PIXEL_AREA_UPDATE_PERIOD * 0.1f;
+
     LLVector4a x_axis;
     x_axis.load3(camera->getXAxis().mV);
     cos_angle_to_view_dir = lookAt.dot3(x_axis).getF32();
diff --git a/indra/newview/llface.h b/indra/newview/llface.h
index 99642016f72..65637fbf859 100644
--- a/indra/newview/llface.h
+++ b/indra/newview/llface.h
@@ -234,8 +234,14 @@ class alignas(16) LLFace
     // return true if this face is in an alpha draw pool
     bool isInAlphaPool() const;
 public: //aligned members
+
+    // bounding box of face in drawable space
     LLVector4a      mExtents[2];
 
+    // cached bounding box of rigged face in world space
+    // calculated on-demand by LLFace::calcPixelArea and may not be up-to-date
+    LLVector4a  mRiggedExtents[2] = { LLVector4a(0,0,0), LLVector4a(0,0,0) };
+
 private:
     friend class LLViewerTextureList;
     F32         adjustPartialOverlapPixelArea(F32 cos_angle_to_view_dir, F32 radius );
@@ -301,7 +307,14 @@ class alignas(16) LLFace
     S32         mReferenceIndex;
     std::vector<S32> mRiggedIndex;
 
+    // gFrameTimeSeconds when mPixelArea was last updated
+    F32         mLastPixelAreaUpdate = 0.f;
+
+    // virtual size of face in texture area  (mPixelArea adjusted by texture repeats)
+    // used to determine desired resolution of texture
     F32         mVSize;
+
+    // pixel area face covers on screen
     F32         mPixelArea;
 
     //importance factor, in the range [0, 1.0].
diff --git a/indra/newview/llmeshrepository.cpp b/indra/newview/llmeshrepository.cpp
index 26e2d8f3199..532a87bbd15 100644
--- a/indra/newview/llmeshrepository.cpp
+++ b/indra/newview/llmeshrepository.cpp
@@ -77,6 +77,8 @@
 #include "llinventorypanel.h"
 #include "lluploaddialog.h"
 #include "llfloaterreg.h"
+#include "llvoavatarself.h"
+#include "llskinningutil.h"
 
 #include "boost/iostreams/device/array.hpp"
 #include "boost/iostreams/stream.hpp"
@@ -757,7 +759,7 @@ void log_upload_error(LLCore::HttpStatus status, const LLSD& content,
                        << " (" << status.toTerseString() << ")" << LL_ENDL;
 
     std::ostringstream details;
-    typedef std::set<std::string> mav_errors_set_t;
+    typedef std::unordered_set<std::string> mav_errors_set_t;
     mav_errors_set_t mav_errors;
 
     if (content.has("error"))
@@ -828,7 +830,8 @@ LLMeshRepoThread::LLMeshRepoThread()
   mHttpLargeOptions(),
   mHttpHeaders(),
   mHttpPolicyClass(LLCore::HttpRequest::DEFAULT_POLICY_ID),
-  mHttpLargePolicyClass(LLCore::HttpRequest::DEFAULT_POLICY_ID)
+  mHttpLargePolicyClass(LLCore::HttpRequest::DEFAULT_POLICY_ID),
+  mWorkQueue("MeshRepoThread", 1024*1024)
 {
     LLAppCoreHttp & app_core_http(LLAppViewer::instance()->getAppCoreHttp());
 
@@ -911,6 +914,10 @@ void LLMeshRepoThread::run()
             break;
         }
 
+        // run mWorkQueue for up to 8ms
+        static std::chrono::nanoseconds WorkTimeNanoSec{std::chrono::nanoseconds::rep(8 * 1000000) };
+        mWorkQueue.runFor(WorkTimeNanoSec);
+
         if (! mHttpRequestSet.empty())
         {
             // Dispatch all HttpHandler notifications
@@ -1322,7 +1329,7 @@ LLCore::HttpHandle LLMeshRepoThread::getByteRange(const std::string & url,
 
 bool LLMeshRepoThread::fetchMeshSkinInfo(const LLUUID& mesh_id, bool can_retry)
 {
-
+    LL_PROFILE_ZONE_SCOPED;
     if (!mHeaderMutex)
     {
         return false;
@@ -1447,6 +1454,7 @@ bool LLMeshRepoThread::fetchMeshSkinInfo(const LLUUID& mesh_id, bool can_retry)
 
 bool LLMeshRepoThread::fetchMeshDecomposition(const LLUUID& mesh_id)
 {
+    LL_PROFILE_ZONE_SCOPED;
     if (!mHeaderMutex)
     {
         return false;
@@ -1554,6 +1562,7 @@ bool LLMeshRepoThread::fetchMeshDecomposition(const LLUUID& mesh_id)
 
 bool LLMeshRepoThread::fetchMeshPhysicsShape(const LLUUID& mesh_id)
 {
+    LL_PROFILE_ZONE_SCOPED;
     if (!mHeaderMutex)
     {
         return false;
@@ -1693,6 +1702,7 @@ void LLMeshRepoThread::decActiveHeaderRequests()
 //return false if failed to get header
 bool LLMeshRepoThread::fetchMeshHeader(const LLVolumeParams& mesh_params, bool can_retry)
 {
+    LL_PROFILE_ZONE_SCOPED;
     ++LLMeshRepository::sMeshRequestCount;
 
     {
@@ -1756,6 +1766,7 @@ bool LLMeshRepoThread::fetchMeshHeader(const LLVolumeParams& mesh_params, bool c
 //return false if failed to get mesh lod.
 bool LLMeshRepoThread::fetchMeshLOD(const LLVolumeParams& mesh_params, S32 lod, bool can_retry)
 {
+    LL_PROFILE_ZONE_SCOPED;
     if (!mHeaderMutex)
     {
         return false;
@@ -1940,6 +1951,18 @@ EMeshProcessingResult LLMeshRepoThread::headerReceived(const LLVolumeParams& mes
             LLMeshRepository::sCacheBytesHeaders += (U32)header_size;
         }
 
+        // immediately request SkinInfo since we'll need it before we can render any LoD if it is present
+        {
+            LLMutexLock lock(gMeshRepo.mMeshMutex);
+
+            if (gMeshRepo.mLoadingSkins.find(mesh_id) == gMeshRepo.mLoadingSkins.end())
+            {
+                gMeshRepo.mLoadingSkins[mesh_id] = {}; // add an empty vector to indicate to main thread that we are loading skin info
+            }
+        }
+
+        fetchMeshSkinInfo(mesh_id);
+
         LLMutexLock lock(mMutex); // make sure only one thread access mPendingLOD at the same time.
 
         //check for pending requests
@@ -1971,6 +1994,18 @@ EMeshProcessingResult LLMeshRepoThread::lodReceived(const LLVolumeParams& mesh_p
     {
         if (volume->getNumFaces() > 0)
         {
+            // if we have a valid SkinInfo, cache per-joint bounding boxes for this LOD
+            LLMeshSkinInfo* skin_info = mSkinMap[mesh_params.getSculptID()];
+            if (skin_info && isAgentAvatarValid())
+            {
+                for (S32 i = 0; i < volume->getNumFaces(); ++i)
+                {
+                    // NOTE: no need to lock gAgentAvatarp as the state being checked is not changed after initialization
+                    LLVolumeFace& face = volume->getVolumeFace(i);
+                    LLSkinningUtil::updateRiggingInfo(skin_info, gAgentAvatarp, face);
+                }
+            }
+
             LoadedMesh mesh(volume, mesh_params, lod);
             {
                 LLMutexLock lock(mMutex);
@@ -2014,18 +2049,19 @@ bool LLMeshRepoThread::skinInfoReceived(const LLUUID& mesh_id, U8* data, S32 dat
     }
 
     {
-        LLMeshSkinInfo* info = nullptr;
-        try
-        {
-            info = new LLMeshSkinInfo(mesh_id, skin);
-        }
-        catch (const std::bad_alloc& ex)
-        {
-            LL_WARNS() << "Failed to allocate skin info with exception: " << ex.what()  << LL_ENDL;
-            return false;
+        LLPointer<LLMeshSkinInfo> info = nullptr;
+        info = new LLMeshSkinInfo(mesh_id, skin);
+
+        if (isAgentAvatarValid())
+        { // joint numbers are consistent inside LLVOAvatar and animations, but inconsistent inside meshes,
+            // generate a map of mesh joint numbers to LLVOAvatar joint numbers
+            LLSkinningUtil::initJointNums(info, gAgentAvatarp);
         }
 
-        // LL_DEBUGS(LOG_MESH) << "info pelvis offset" << info.mPelvisOffset << LL_ENDL;
+        // remember the skin info in the background thread so we can use it
+        // to calculate per-joint bounding boxes when volumes are loaded
+        mSkinMap[mesh_id] = info;
+
         {
             LLMutexLock lock(mMutex);
             mSkinInfoQ.push_back(info);
@@ -2265,10 +2301,10 @@ void LLMeshUploadThread::wholeModelToLLSD(LLSD& dest, bool include_textures)
     S32 mesh_num = 0;
     S32 texture_num = 0;
 
-    std::set<LLViewerTexture* > textures;
-    std::map<LLViewerTexture*,S32> texture_index;
+    std::unordered_set<LLViewerTexture* > textures;
+    std::unordered_map<LLViewerTexture*,S32> texture_index;
 
-    std::map<LLModel*,S32> mesh_index;
+    std::unordered_map<LLModel*,S32> mesh_index;
     std::string model_name;
 
     S32 instance_num = 0;
@@ -2957,7 +2993,7 @@ void LLMeshRepoThread::notifyLoadedMeshes()
     {
         if (mMutex->trylock())
         {
-            std::deque<LLMeshSkinInfo*> skin_info_q;
+            std::deque<LLPointer<LLMeshSkinInfo>> skin_info_q;
             std::deque<UUIDBasedRequest> skin_info_unavail_q;
             std::list<LLModel::Decomposition*> decomp_q;
 
@@ -3080,6 +3116,7 @@ S32 LLMeshRepository::getActualMeshLOD(LLMeshHeader& header, S32 lod)
 // are cases far off the norm.
 void LLMeshHandlerBase::onCompleted(LLCore::HttpHandle handle, LLCore::HttpResponse * response)
 {
+    LL_PROFILE_ZONE_SCOPED;
     mProcessed = true;
 
     unsigned int retries(0U);
@@ -3356,6 +3393,7 @@ void LLMeshLODHandler::processFailure(LLCore::HttpStatus status)
 void LLMeshLODHandler::processData(LLCore::BufferArray * /* body */, S32 /* body_offset */,
                                    U8 * data, S32 data_size)
 {
+    LL_PROFILE_ZONE_SCOPED;
     if ((!MESH_LOD_PROCESS_FAILED)
         && ((data != NULL) == (data_size > 0))) // if we have data but no size or have size but no data, something is wrong
     {
@@ -3421,6 +3459,7 @@ void LLMeshSkinInfoHandler::processFailure(LLCore::HttpStatus status)
 void LLMeshSkinInfoHandler::processData(LLCore::BufferArray * /* body */, S32 /* body_offset */,
                                         U8 * data, S32 data_size)
 {
+    LL_PROFILE_ZONE_SCOPED;
     if ((!MESH_SKIN_INFO_PROCESS_FAILED)
         && ((data != NULL) == (data_size > 0)) // if we have data but no size or have size but no data, something is wrong
         && gMeshRepo.mThread->skinInfoReceived(mMeshID, data, data_size))
@@ -3470,6 +3509,7 @@ void LLMeshDecompositionHandler::processFailure(LLCore::HttpStatus status)
 void LLMeshDecompositionHandler::processData(LLCore::BufferArray * /* body */, S32 /* body_offset */,
                                              U8 * data, S32 data_size)
 {
+    LL_PROFILE_ZONE_SCOPED;
     if ((!MESH_DECOMP_PROCESS_FAILED)
         && ((data != NULL) == (data_size > 0)) // if we have data but no size or have size but no data, something is wrong
         && gMeshRepo.mThread->decompositionReceived(mMeshID, data, data_size))
@@ -3517,6 +3557,7 @@ void LLMeshPhysicsShapeHandler::processFailure(LLCore::HttpStatus status)
 void LLMeshPhysicsShapeHandler::processData(LLCore::BufferArray * /* body */, S32 /* body_offset */,
                                             U8 * data, S32 data_size)
 {
+    LL_PROFILE_ZONE_SCOPED;
     if ((!MESH_PHYS_SHAPE_PROCESS_FAILED)
         && ((data != NULL) == (data_size > 0)) // if we have data but no size or have size but no data, something is wrong
         && gMeshRepo.mThread->physicsShapeReceived(mMeshID, data, data_size) == MESH_OK)
@@ -3870,6 +3911,13 @@ void LLMeshRepository::notifyLoadedMeshes()
             {
                 mSkinMap.erase(copy_iter);
             }
+
+            // erase from background thread
+            LLUUID id = iter->first;
+            mThread->mWorkQueue.post([=]()
+                {
+                    mThread->mSkinMap.erase(id);
+                });
         }
         //LL_INFOS() << "Skin info cache elements:" << mSkinMap.size() << " Memory: " << U64Kilobytes(skinbytes) << LL_ENDL;
     }
@@ -4206,7 +4254,7 @@ void LLMeshRepository::fetchPhysicsShape(const LLUUID& mesh_id)
         {
             LLMutexLock lock(mMeshMutex);
             //add volume to list of loading meshes
-            std::set<LLUUID>::iterator iter = mLoadingPhysicsShapes.find(mesh_id);
+            std::unordered_set<LLUUID>::iterator iter = mLoadingPhysicsShapes.find(mesh_id);
             if (iter == mLoadingPhysicsShapes.end())
             { //no request pending for this skin info
                 // *FIXME:  Nothing ever deletes entries, can't be right
@@ -4236,7 +4284,7 @@ LLModel::Decomposition* LLMeshRepository::getDecomposition(const LLUUID& mesh_id
         {
             LLMutexLock lock(mMeshMutex);
             //add volume to list of loading meshes
-            std::set<LLUUID>::iterator iter = mLoadingDecompositions.find(mesh_id);
+            std::unordered_set<LLUUID>::iterator iter = mLoadingDecompositions.find(mesh_id);
             if (iter == mLoadingDecompositions.end())
             { //no request pending for this skin info
                 mLoadingDecompositions.insert(mesh_id);
@@ -4287,6 +4335,8 @@ bool LLMeshRepository::hasPhysicsShape(const LLUUID& mesh_id)
 
 bool LLMeshRepository::hasSkinInfo(const LLUUID& mesh_id)
 {
+    LL_PROFILE_ZONE_SCOPED;
+
     if (mesh_id.isNull())
     {
         return false;
diff --git a/indra/newview/llmeshrepository.h b/indra/newview/llmeshrepository.h
index b850ade0bbe..b5c2424578e 100644
--- a/indra/newview/llmeshrepository.h
+++ b/indra/newview/llmeshrepository.h
@@ -28,6 +28,7 @@
 #define LL_MESH_REPOSITORY_H
 
 #include <unordered_map>
+#include <unordered_set>
 #include "llassettype.h"
 #include "llmodel.h"
 #include "lluuid.h"
@@ -341,7 +342,7 @@ class LLMeshRepoThread : public LLThread
     std::deque<UUIDBasedRequest> mSkinRequests;
 
     // list of completed skin info requests
-    std::deque<LLMeshSkinInfo*> mSkinInfoQ;
+    std::deque<LLPointer<LLMeshSkinInfo>> mSkinInfoQ;
 
     // list of skin info requests that have failed or are unavailaibe
     std::deque<UUIDBasedRequest> mSkinUnavailableQ;
@@ -368,9 +369,17 @@ class LLMeshRepoThread : public LLThread
     std::deque<LoadedMesh> mLoadedQ;
 
     //map of pending header requests and currently desired LODs
-    typedef boost::unordered_map<LLUUID, std::vector<S32> > pending_lod_map;
+    typedef std::unordered_map<LLUUID, std::vector<S32> > pending_lod_map;
     pending_lod_map mPendingLOD;
 
+    // map of mesh ID to skin info (mirrors LLMeshRepository::mSkinMap)
+    /// NOTE: LLMeshRepository::mSkinMap is accessed very frequently, so maintain a copy here to avoid mutex overhead
+    typedef std::unordered_map<LLUUID, LLPointer<LLMeshSkinInfo>> skin_map;
+    skin_map mSkinMap;
+
+    // workqueue for processing generic requests
+    LL::WorkQueue mWorkQueue;
+
     // llcorehttp library interface objects.
     LLCore::HttpStatus                  mHttpStatus;
     LLCore::HttpRequest *               mHttpRequest;
@@ -380,7 +389,7 @@ class LLMeshRepoThread : public LLThread
     LLCore::HttpRequest::policy_t       mHttpPolicyClass;
     LLCore::HttpRequest::policy_t       mHttpLargePolicyClass;
 
-    typedef std::set<LLCore::HttpHandler::ptr_t> http_request_set;
+    typedef std::unordered_set<LLCore::HttpHandler::ptr_t> http_request_set;
     http_request_set                    mHttpRequestSet;            // Outstanding HTTP requests
 
     std::string mGetMeshCapability;
@@ -696,13 +705,13 @@ class LLMeshRepository
     std::queue<LLUUID> mPendingSkinRequests;
 
     //list of mesh ids awaiting decompositions
-    std::set<LLUUID> mLoadingDecompositions;
+    std::unordered_set<LLUUID> mLoadingDecompositions;
 
     //list of mesh ids that need to send decomposition fetch requests
     std::queue<LLUUID> mPendingDecompositionRequests;
 
     //list of mesh ids awaiting physics shapes
-    std::set<LLUUID> mLoadingPhysicsShapes;
+    std::unordered_set<LLUUID> mLoadingPhysicsShapes;
 
     //list of mesh ids that need to send physics shape fetch requests
     std::queue<LLUUID> mPendingPhysicsShapeRequests;
diff --git a/indra/newview/llskinningutil.cpp b/indra/newview/llskinningutil.cpp
index 1c92e067007..cee43f3cff8 100644
--- a/indra/newview/llskinningutil.cpp
+++ b/indra/newview/llskinningutil.cpp
@@ -317,19 +317,16 @@ void LLSkinningUtil::initJointNums(LLMeshSkinInfo* skin, LLVOAvatar *avatar)
 
 void LLSkinningUtil::updateRiggingInfo(const LLMeshSkinInfo* skin, LLVOAvatar *avatar, LLVolumeFace& vol_face)
 {
-    LL_PROFILE_ZONE_SCOPED_CATEGORY_AVATAR;
-
     if (vol_face.mJointRiggingInfoTab.needsUpdate())
     {
         S32 num_verts = vol_face.mNumVertices;
         S32 num_joints = static_cast<S32>(skin->mJointNames.size());
         if (num_verts > 0 && vol_face.mWeights && num_joints > 0)
         {
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_AVATAR;
             initJointNums(const_cast<LLMeshSkinInfo*>(skin), avatar);
             if (vol_face.mJointRiggingInfoTab.size()==0)
             {
-                //std::set<S32> active_joints;
-                //S32 active_verts = 0;
                 vol_face.mJointRiggingInfoTab.resize(LL_CHARACTER_MAX_ANIMATED_JOINTS);
                 LLJointRiggingInfoTab &rig_info_tab = vol_face.mJointRiggingInfoTab;
                 for (S32 i=0; i<vol_face.mNumVertices; i++)
@@ -345,35 +342,22 @@ void LLSkinningUtil::updateRiggingInfo(const LLMeshSkinInfo* skin, LLVOAvatar *a
                         F32 w = weights[k];
                         idx[k] = llclamp((S32) floorf(w), (S32)0, (S32)LL_CHARACTER_MAX_ANIMATED_JOINTS-1);
                         wght[k] = w - idx[k];
-                        scale += wght[k];
-                    }
-                    if (scale > 0.0f)
-                    {
-                        for (U32 k=0; k<4; ++k)
-                        {
-                            wght[k] /= scale;
-                        }
                     }
+
                     for (U32 k=0; k<4; ++k)
                     {
                         S32 joint_index = idx[k];
-                        if (wght[k] > 0.0f && num_joints > joint_index)
+                        if (wght[k] > 0.2f && num_joints > joint_index)
                         {
                             S32 joint_num = skin->mJointNums[joint_index];
                             if (joint_num >= 0 && joint_num < LL_CHARACTER_MAX_ANIMATED_JOINTS)
                             {
                                 rig_info_tab[joint_num].setIsRiggedTo(true);
 
-                                // FIXME could precompute these matMuls.
-                                const LLMatrix4a& bind_shape = skin->mBindShapeMatrix;
-                                const LLMatrix4a& inv_bind = skin->mInvBindMatrix[joint_index];
-                                LLMatrix4a mat;
+                                const LLMatrix4a& mat = skin->mBindPoseMatrix[joint_index];
                                 LLVector4a pos_joint_space;
 
-                                matMul(bind_shape, inv_bind, mat);
-
                                 mat.affineTransform(pos, pos_joint_space);
-                                pos_joint_space.mul(wght[k]);
 
                                 LLVector4a *extents = rig_info_tab[joint_num].getRiggedExtents();
                                 update_min_max(extents[0], extents[1], pos_joint_space);
@@ -381,28 +365,9 @@ void LLSkinningUtil::updateRiggingInfo(const LLMeshSkinInfo* skin, LLVOAvatar *a
                         }
                     }
                 }
-                //LL_DEBUGS("RigSpammish") << "built rigging info for vf " << &vol_face
-                //                         << " num_verts " << vol_face.mNumVertices
-                //                         << " active joints " << active_joints.size()
-                //                         << " active verts " << active_verts
-                //                         << LL_ENDL;
                 vol_face.mJointRiggingInfoTab.setNeedsUpdate(false);
             }
         }
-
-#if DEBUG_SKINNING
-        if (vol_face.mJointRiggingInfoTab.size()!=0)
-        {
-            LL_DEBUGS("RigSpammish") << "we have rigging info for vf " << &vol_face
-                                     << " num_verts " << vol_face.mNumVertices << LL_ENDL;
-        }
-        else
-        {
-            LL_DEBUGS("RigSpammish") << "no rigging info for vf " << &vol_face
-                                     << " num_verts " << vol_face.mNumVertices << LL_ENDL;
-        }
-#endif
-
     }
 }
 
diff --git a/indra/newview/llviewerobject.cpp b/indra/newview/llviewerobject.cpp
index e36a03a749a..86440fca487 100644
--- a/indra/newview/llviewerobject.cpp
+++ b/indra/newview/llviewerobject.cpp
@@ -7280,6 +7280,7 @@ const std::string& LLViewerObject::getAttachmentItemName() const
 //virtual
 LLVOAvatar* LLViewerObject::getAvatar() const
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_AVATAR;
     if (getControlAvatar())
     {
         return getControlAvatar();
diff --git a/indra/newview/llvoavatar.cpp b/indra/newview/llvoavatar.cpp
index 5329b354d3a..6eebb3448f4 100644
--- a/indra/newview/llvoavatar.cpp
+++ b/indra/newview/llvoavatar.cpp
@@ -1654,6 +1654,9 @@ void LLVOAvatar::renderCollisionVolumes()
     }
 }
 
+// defined in llspatialpartition.cpp -- draw a box outline in the current GL context from given center and half-size
+void drawBoxOutline(const LLVector4a& pos, const LLVector4a& size);
+
 void LLVOAvatar::renderBones(const std::string &selected_joint)
 {
     LLGLEnable blend(GL_BLEND);
@@ -1730,6 +1733,88 @@ void LLVOAvatar::renderBones(const std::string &selected_joint)
 
         gGL.popMatrix();
     }
+
+
+    // draw joint space bounding boxes of rigged attachments in yellow
+    gGL.color3f(1.f, 1.f, 0.f);
+    for (S32 joint_num = 0; joint_num < LL_CHARACTER_MAX_ANIMATED_JOINTS; joint_num++)
+    {
+        LLJoint* joint = getJoint(joint_num);
+        LLJointRiggingInfo* rig_info = NULL;
+        if (joint_num < mJointRiggingInfoTab.size())
+        {
+            rig_info = &mJointRiggingInfoTab[joint_num];
+        }
+
+        if (joint && rig_info && rig_info->isRiggedTo())
+        {
+            LLViewerJointAttachment* as_joint_attach = dynamic_cast<LLViewerJointAttachment*>(joint);
+            if (as_joint_attach && as_joint_attach->getIsHUDAttachment())
+            {
+                // Ignore bounding box of HUD joints
+                continue;
+            }
+            gGL.pushMatrix();
+            gGL.multMatrix(&joint->getXform()->getWorldMatrix().mMatrix[0][0]);
+
+            LLVector4a pos;
+            LLVector4a size;
+
+            const LLVector4a* extents = rig_info->getRiggedExtents();
+
+            pos.setAdd(extents[0], extents[1]);
+            pos.mul(0.5f);
+            size.setSub(extents[1], extents[0]);
+            size.mul(0.5f);
+
+            drawBoxOutline(pos, size);
+
+            gGL.popMatrix();
+        }
+    }
+
+    // draw world space attachment rigged bounding boxes in cyan
+    gGL.color3f(0.f, 1.f, 1.f);
+    for (attachment_map_t::iterator iter = mAttachmentPoints.begin();
+         iter != mAttachmentPoints.end();
+         ++iter)
+    {
+        LLViewerJointAttachment* attachment = iter->second;
+
+        if (attachment->getValid())
+        {
+            for (LLViewerJointAttachment::attachedobjs_vec_t::iterator attachment_iter = attachment->mAttachedObjects.begin();
+                 attachment_iter != attachment->mAttachedObjects.end();
+                 ++attachment_iter)
+            {
+                LLViewerObject* attached_object = attachment_iter->get();
+                if (attached_object && !attached_object->isHUDAttachment())
+                {
+                    LLDrawable* drawable = attached_object->mDrawable;
+                    if (drawable && drawable->isState(LLDrawable::RIGGED | LLDrawable::RIGGED_CHILD))
+                    {
+                        // get face rigged extents
+                        for (S32 i = 0; i < drawable->getNumFaces(); ++i)
+                        {
+                            LLFace* facep = drawable->getFace(i);
+                            if (facep && facep->isState(LLFace::RIGGED))
+                            {
+                                LLVector4a center, size;
+
+                                LLVector4a* extents = facep->mRiggedExtents;
+
+                                center.setAdd(extents[0], extents[1]);
+                                center.mul(0.5f);
+                                size.setSub(extents[1], extents[0]);
+                                size.mul(0.5f);
+                                drawBoxOutline(center, size);
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
 }
 
 
@@ -10683,35 +10768,42 @@ void LLVOAvatar::updateRiggingInfo()
 
     LL_DEBUGS("RigSpammish") << getFullname() << " updating rig tab" << LL_ENDL;
 
-    std::vector<LLVOVolume*> volumes;
+    // use a local static for scratch space to avoid reallocation here
+    static std::vector<LLVOVolume*> volumes;
+    volumes.resize(0);
 
     getAssociatedVolumes(volumes);
 
-    std::map<LLUUID, S32> curr_rigging_info_key;
-
     {
         LL_PROFILE_ZONE_NAMED_CATEGORY_AVATAR("update rig info - get key")
+        HBXXH128 hash;
 
         // Get current rigging info key
         for (LLVOVolume* vol : volumes)
         {
-            if (vol->isMesh() && vol->getVolume())
+            if (vol->isRiggedMesh())
             {
                 const LLUUID& mesh_id = vol->getVolume()->getParams().getSculptID();
                 S32 max_lod = llmax(vol->getLOD(), vol->mLastRiggingInfoLOD);
-                curr_rigging_info_key[mesh_id] = max_lod;
+
+                hash.update(mesh_id.mData, sizeof(mesh_id.mData));
+                hash.update(&max_lod, sizeof(max_lod));
             }
         }
-    }
 
-    // Check for key change, which indicates some change in volume composition or LOD.
-    if (curr_rigging_info_key == mLastRiggingInfoKey)
-    {
-        return;
+        LLUUID curr_rigging_info_key = hash.digest();
+
+        // Check for key change, which indicates some change in volume composition or LOD.
+        if (curr_rigging_info_key == mLastRiggingInfoKey)
+        {
+            return;
+        }
+
+
+        // Something changed. Update.
+        mLastRiggingInfoKey = curr_rigging_info_key;
     }
 
-    // Something changed. Update.
-    mLastRiggingInfoKey = curr_rigging_info_key;
     mJointRiggingInfoTab.clear();
     for (LLVOVolume* vol : volumes)
     {
diff --git a/indra/newview/llvoavatar.h b/indra/newview/llvoavatar.h
index 2efb232704e..d5642d460ed 100644
--- a/indra/newview/llvoavatar.h
+++ b/indra/newview/llvoavatar.h
@@ -227,7 +227,7 @@ class LLVOAvatar :
     // virtual
     void                    updateRiggingInfo();
     // This encodes mesh id and LOD, so we can see whether display is up-to-date.
-    std::map<LLUUID,S32>    mLastRiggingInfoKey;
+    LLUUID    mLastRiggingInfoKey;
 
     std::set<LLUUID>        mActiveOverrideMeshes;
     virtual void            onActiveOverrideMeshesChanged();
diff --git a/indra/newview/llvovolume.cpp b/indra/newview/llvovolume.cpp
index 4f48a070e37..2859f8b1c25 100644
--- a/indra/newview/llvovolume.cpp
+++ b/indra/newview/llvovolume.cpp
@@ -3657,7 +3657,7 @@ const LLMeshSkinInfo* LLVOVolume::getSkinInfo() const
 // virtual
 bool LLVOVolume::isRiggedMesh() const
 {
-    return isMesh() && getSkinInfo();
+    return getSkinInfo() != nullptr;
 }
 
 //----------------------------------------------------------------------------
@@ -3818,7 +3818,6 @@ void LLVOVolume::updateRiggingInfo()
         LLVolume *volume = getVolume();
         if (skin && avatar && volume)
         {
-            LL_DEBUGS("RigSpammish") << "starting, vovol " << this << " lod " << getLOD() << " last " << mLastRiggingInfoLOD << LL_ENDL;
             if (getLOD()>mLastRiggingInfoLOD || getLOD()==3)
             {
                 // Rigging info may need update
@@ -3834,9 +3833,6 @@ void LLVOVolume::updateRiggingInfo()
                 }
                 // Keep the highest LOD info available.
                 mLastRiggingInfoLOD = getLOD();
-                LL_DEBUGS("RigSpammish") << "updated rigging info for LLVOVolume "
-                                         << this << " lod " << mLastRiggingInfoLOD
-                                         << LL_ENDL;
             }
         }
     }