-
Notifications
You must be signed in to change notification settings - Fork 97
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
optimize collision detection #235
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -157,26 +157,28 @@ struct CreateRadixTree { | |
} | ||
}; | ||
|
||
template <typename T> | ||
template <typename T, const bool allocateOnly> | ||
struct FindCollisions { | ||
thrust::pair<int*, int*> querryTri_; | ||
int* numOverlaps_; | ||
const int maxOverlaps_; | ||
int* counts; | ||
const Box* nodeBBox_; | ||
const thrust::pair<int, int>* internalChildren_; | ||
|
||
__host__ __device__ int RecordCollision(int node, | ||
const thrust::tuple<T, int>& query) { | ||
thrust::tuple<T, int>& query) { | ||
const T& queryObj = thrust::get<0>(query); | ||
const int queryIdx = thrust::get<1>(query); | ||
int& count = counts[queryIdx]; | ||
|
||
bool overlaps = nodeBBox_[node].DoesOverlap(queryObj); | ||
if (overlaps && IsLeaf(node)) { | ||
int pos = AtomicAdd(*numOverlaps_, 1); | ||
if (pos >= maxOverlaps_) | ||
return -1; // Didn't allocate enough memory; bail out | ||
querryTri_.first[pos] = queryIdx; | ||
querryTri_.second[pos] = Node2Leaf(node); | ||
if (allocateOnly) { | ||
count++; | ||
} else { | ||
int pos = count++; | ||
querryTri_.first[pos] = queryIdx; | ||
querryTri_.second[pos] = Node2Leaf(node); | ||
} | ||
} | ||
return overlaps && IsInternal(node); // Should traverse into node | ||
} | ||
|
@@ -188,15 +190,16 @@ struct FindCollisions { | |
int top = -1; | ||
// Depth-first search | ||
int node = kRoot; | ||
const int queryIdx = thrust::get<1>(query); | ||
// same implies that this query do not have any collision | ||
if (!allocateOnly && counts[queryIdx] == counts[queryIdx + 1]) return; | ||
while (1) { | ||
int internal = Node2Internal(node); | ||
int child1 = internalChildren_[internal].first; | ||
int child2 = internalChildren_[internal].second; | ||
|
||
int traverse1 = RecordCollision(child1, query); | ||
if (traverse1 < 0) return; | ||
int traverse2 = RecordCollision(child2, query); | ||
if (traverse2 < 0) return; | ||
|
||
if (!traverse1 && !traverse2) { | ||
if (top < 0) break; // done | ||
|
@@ -268,33 +271,24 @@ Collider::Collider(const VecDH<Box>& leafBB, | |
*/ | ||
template <typename T> | ||
SparseIndices Collider::Collisions(const VecDH<T>& querriesIn) const { | ||
int maxOverlaps = querriesIn.size() * 4; | ||
SparseIndices querryTri(maxOverlaps); | ||
int nOverlaps = 0; | ||
while (1) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's certainly nice to remove this; I use similar logic in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry forgot to address this. I think it should be possible, but will need some tests to see if there is significant performance improvement. |
||
// scalar number of overlaps found | ||
VecDH<int> nOverlapsD(1, 0); | ||
// calculate Bounding Box overlaps | ||
for_each_n( | ||
autoPolicy(querriesIn.size()), zip(querriesIn.cbegin(), countAt(0)), | ||
querriesIn.size(), | ||
FindCollisions<T>({querryTri.ptrDpq(), nOverlapsD.ptrD(), maxOverlaps, | ||
nodeBBox_.ptrD(), internalChildren_.ptrD()})); | ||
nOverlaps = nOverlapsD[0]; | ||
if (nOverlaps <= maxOverlaps) | ||
break; | ||
else { // if not enough memory was allocated, guess how much will be needed | ||
int lastQuery = querryTri.Get(0).back(); | ||
float ratio = static_cast<float>(querriesIn.size()) / lastQuery; | ||
if (ratio > 1000) // do not trust the ratio if it is too large | ||
maxOverlaps *= 2; | ||
else | ||
maxOverlaps *= 2 * ratio; | ||
querryTri.Resize(maxOverlaps); | ||
} | ||
} | ||
// remove unused part of array | ||
querryTri.Resize(nOverlaps); | ||
// note that the length is 1 larger than the number of queries so the last | ||
// element can store the sum when using exclusive scan | ||
VecDH<int> counts(querriesIn.size() + 1, 0); | ||
auto policy = autoPolicy(querriesIn.size()); | ||
// compute the number of collisions to determine the size for allocation and | ||
// offset, this avoids the need for atomic | ||
for_each_n(policy, zip(querriesIn.cbegin(), countAt(0)), querriesIn.size(), | ||
FindCollisions<T, true>( | ||
{thrust::pair<int*, int*>(nullptr, nullptr), counts.ptrD(), | ||
nodeBBox_.ptrD(), internalChildren_.ptrD()})); | ||
// compute start index for each query and total count | ||
exclusive_scan(policy, counts.begin(), counts.end(), counts.begin()); | ||
SparseIndices querryTri(counts.back()); | ||
// actually recording collisions | ||
for_each_n( | ||
policy, zip(querriesIn.cbegin(), countAt(0)), querriesIn.size(), | ||
FindCollisions<T, false>({querryTri.ptrDpq(), counts.ptrD(), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's a little surprising to me that running this function twice is faster than running it once simply because of removing the atomics. Would you mind doing a touch more perf comparison? I'd like to know how it compares for CPU and GPU, for meshes with lots of intersections (menger sponge is good) and few (our perf spheres are good). |
||
nodeBBox_.ptrD(), internalChildren_.ptrD()})); | ||
return querryTri; | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why is this no longer useful?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
because
RecordCollision
can no longer do early exit due to insufficient memory, it will calculate the number of elements and allocate exactly thatThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah, right; I'd forgotten about that. Thanks!