Skip to content

Commit

Permalink
collider: improve performance
Browse files Browse the repository at this point in the history
Reduces the number of atomic instructions needed and only perform
allocation once. This improves the performance for complicated models by
~10%.
  • Loading branch information
pca006132 committed Oct 10, 2022
1 parent bc83f65 commit 0490c36
Showing 1 changed file with 32 additions and 38 deletions.
70 changes: 32 additions & 38 deletions src/collider/src/collider.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,26 +157,28 @@ struct CreateRadixTree {
}
};

template <typename T>
template <typename T, const bool allocateOnly>
struct FindCollisions {
thrust::pair<int*, int*> querryTri_;
int* numOverlaps_;
const int maxOverlaps_;
int* counts;
const Box* nodeBBox_;
const thrust::pair<int, int>* internalChildren_;

__host__ __device__ int RecordCollision(int node,
const thrust::tuple<T, int>& query) {
thrust::tuple<T, int>& query) {
const T& queryObj = thrust::get<0>(query);
const int queryIdx = thrust::get<1>(query);
int& count = counts[queryIdx];

bool overlaps = nodeBBox_[node].DoesOverlap(queryObj);
if (overlaps && IsLeaf(node)) {
int pos = AtomicAdd(*numOverlaps_, 1);
if (pos >= maxOverlaps_)
return -1; // Didn't allocate enough memory; bail out
querryTri_.first[pos] = queryIdx;
querryTri_.second[pos] = Node2Leaf(node);
if (allocateOnly) {
count++;
} else {
int pos = count++;
querryTri_.first[pos] = queryIdx;
querryTri_.second[pos] = Node2Leaf(node);
}
}
return overlaps && IsInternal(node); // Should traverse into node
}
Expand All @@ -188,15 +190,16 @@ struct FindCollisions {
int top = -1;
// Depth-first search
int node = kRoot;
const int queryIdx = thrust::get<1>(query);
// same implies that this query do not have any collision
if (!allocateOnly && counts[queryIdx] == counts[queryIdx + 1]) return;
while (1) {
int internal = Node2Internal(node);
int child1 = internalChildren_[internal].first;
int child2 = internalChildren_[internal].second;

int traverse1 = RecordCollision(child1, query);
if (traverse1 < 0) return;
int traverse2 = RecordCollision(child2, query);
if (traverse2 < 0) return;

if (!traverse1 && !traverse2) {
if (top < 0) break; // done
Expand Down Expand Up @@ -268,33 +271,24 @@ Collider::Collider(const VecDH<Box>& leafBB,
*/
template <typename T>
SparseIndices Collider::Collisions(const VecDH<T>& querriesIn) const {
int maxOverlaps = querriesIn.size() * 4;
SparseIndices querryTri(maxOverlaps);
int nOverlaps = 0;
while (1) {
// scalar number of overlaps found
VecDH<int> nOverlapsD(1, 0);
// calculate Bounding Box overlaps
for_each_n(
autoPolicy(querriesIn.size()), zip(querriesIn.cbegin(), countAt(0)),
querriesIn.size(),
FindCollisions<T>({querryTri.ptrDpq(), nOverlapsD.ptrD(), maxOverlaps,
nodeBBox_.ptrD(), internalChildren_.ptrD()}));
nOverlaps = nOverlapsD[0];
if (nOverlaps <= maxOverlaps)
break;
else { // if not enough memory was allocated, guess how much will be needed
int lastQuery = querryTri.Get(0).back();
float ratio = static_cast<float>(querriesIn.size()) / lastQuery;
if (ratio > 1000) // do not trust the ratio if it is too large
maxOverlaps *= 2;
else
maxOverlaps *= 2 * ratio;
querryTri.Resize(maxOverlaps);
}
}
// remove unused part of array
querryTri.Resize(nOverlaps);
// note that the length is 1 larger than the number of queries so the last
// element can store the sum when using exclusive scan
VecDH<int> counts(querriesIn.size() + 1, 0);
auto policy = autoPolicy(querriesIn.size());
// compute the number of collisions to determine the size for allocation and
// offset, this avoids the need for atomic
for_each_n(policy, zip(querriesIn.cbegin(), countAt(0)), querriesIn.size(),
FindCollisions<T, true>(
{thrust::pair<int*, int*>(nullptr, nullptr), counts.ptrD(),
nodeBBox_.ptrD(), internalChildren_.ptrD()}));
// compute start index for each query and total count
exclusive_scan(policy, counts.begin(), counts.end(), counts.begin());
SparseIndices querryTri(counts.back());
// actually recording collisions
for_each_n(
policy, zip(querriesIn.cbegin(), countAt(0)), querriesIn.size(),
FindCollisions<T, false>({querryTri.ptrDpq(), counts.ptrD(),
nodeBBox_.ptrD(), internalChildren_.ptrD()}));
return querryTri;
}

Expand Down

0 comments on commit 0490c36

Please sign in to comment.