Skip to content

Commit

Permalink
Tune and speed up doublet algo (#158)
Browse files Browse the repository at this point in the history
Tune and speed up the pixel doublet alforithm, and take advantage of GPU read-only memory for a further speedup.

Includes a python notebook to tune the cuts for doublets and triplets.
  • Loading branch information
fwyzard committed Nov 27, 2020
1 parent e0f8e82 commit a711fe9
Show file tree
Hide file tree
Showing 4 changed files with 2,883 additions and 28 deletions.
40 changes: 25 additions & 15 deletions RecoPixelVertexing/PixelTriplets/plugins/gpuPixelDoublets.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@ namespace gpuPixelDoublets {

template<typename Hist>
__device__
void doubletsFromHisto(uint8_t const * layerPairs, uint32_t nPairs, GPUCACell * cells, uint32_t * nCells,
int16_t const * iphi, Hist const * hist, uint32_t const * offsets,
siPixelRecHitsHeterogeneousProduct::HitsOnGPU const & hh,
void doubletsFromHisto(uint8_t const * __restrict__ layerPairs, uint32_t nPairs, GPUCACell * cells, uint32_t * nCells,
int16_t const * __restrict__ iphi, Hist const * __restrict__ hist, uint32_t const * __restrict__ offsets,
siPixelRecHitsHeterogeneousProduct::HitsOnGPU const & __restrict__ hh,
GPU::VecArray< unsigned int, 256> * isOuterHitOfCell,
int16_t const * phicuts, float const * minz, float const * maxz, float const * maxr) {
int16_t const * phicuts, float const * minz, float const * maxz, float const * maxr) {

auto layerSize = [=](uint8_t li) { return offsets[li+1]-offsets[li]; };

Expand All @@ -41,7 +41,6 @@ namespace gpuPixelDoublets {

auto idx = blockIdx.x*blockDim.x + threadIdx.x;
for(auto j=idx;j<ntot;j+=blockDim.x*gridDim.x) {
auto j = idx;

uint32_t pairLayerId=0;
while(j>=innerLayerCumulativeSize[pairLayerId++]); --pairLayerId; // move to lower_bound ??
Expand All @@ -68,18 +67,28 @@ namespace gpuPixelDoublets {
auto mep = iphi[i];
auto mez = hh.zg_d[i];
auto mer = hh.rg_d[i];
auto cutoff = [&](int j) { return
auto cutoff = [&](int j) { return
abs(hh.zg_d[j]-mez) > maxz[pairLayerId] ||
abs(hh.zg_d[j]-mez) < minz[pairLayerId] ||
hh.rg_d[j]-mer > maxr[pairLayerId];
};

constexpr float z0cut = 12.f;
constexpr float hardPtCut = 0.5f;
constexpr float minRadius = hardPtCut * 87.f;
constexpr float minRadius2T4 = 4.f*minRadius*minRadius;
auto ptcut = [&](int j) {
auto r2t4 = minRadius2T4;
auto ri = mer;
auto ro = hh.rg_d[j];
auto dphi = short2phi( min( abs(int16_t(mep-iphi[j])),abs(int16_t(iphi[j]-mep)) ) );
return dphi*dphi*(r2t4 -ri*ro) > (ro-ri)*(ro-ri);
};
auto z0cutoff = [&](int j) {
auto zo = hh.zg_d[j];
auto ro = hh.rg_d[j];
auto ro = hh.rg_d[j];
auto dr = ro-mer;
return dr > maxr[pairLayerId] ||
return dr > maxr[pairLayerId] ||
dr<0 || std::abs((mez*ro - mer*zo)) > z0cut*dr;
};

Expand All @@ -92,7 +101,7 @@ namespace gpuPixelDoublets {
int nmin = 0;
auto khh = kh;
incr(khh);

int tooMany=0;
for (auto kk=kl; kk!=khh; incr(kk)) {
if (kk!=kl && kk!=kh) nmin+=hist[outer].size(kk);
Expand All @@ -103,7 +112,7 @@ namespace gpuPixelDoublets {

if (std::min(std::abs(int16_t(iphi[oi]-mep)), std::abs(int16_t(mep-iphi[oi]))) > iphicut)
continue;
if (z0cutoff(oi)) continue;
if (z0cutoff(oi) || ptcut(oi)) continue;
auto ind = atomicInc(nCells,MaxNumOfDoublets);
// int layerPairId, int doubletId, int innerHitId,int outerHitId)
cells[ind].init(hh,pairLayerId,ind,i,oi);
Expand All @@ -123,12 +132,13 @@ namespace gpuPixelDoublets {

} // loop in block...
}

__global__
void getDoubletsFromHisto(GPUCACell * cells, uint32_t * nCells, siPixelRecHitsHeterogeneousProduct::HitsOnGPU const * hhp,
void getDoubletsFromHisto(GPUCACell * cells, uint32_t * nCells, siPixelRecHitsHeterogeneousProduct::HitsOnGPU const * __restrict__ hhp,
GPU::VecArray< unsigned int, 256> *isOuterHitOfCell) {

uint8_t const layerPairs[2*13] = {0,1 ,1,2 ,2,3
// ,0,4 ,1,4 ,2,4 ,4,5 ,5,6
uint8_t const layerPairs[2*13] = {0,1 ,1,2 ,2,3
// ,0,4 ,1,4 ,2,4 ,4,5 ,5,6
,0,7 ,1,7 ,2,7 ,7,8 ,8,9
,0,4 ,1,4 ,2,4 ,4,5 ,5,6
};
Expand Down Expand Up @@ -158,8 +168,8 @@ namespace gpuPixelDoublets {
};


auto const & hh = *hhp;
doubletsFromHisto(layerPairs, 13, cells, nCells,
auto const & __restrict__ hh = *hhp;
doubletsFromHisto(layerPairs, 13, cells, nCells,
hh.iphi_d,hh.hist_d,hh.hitsLayerStart_d,
hh, isOuterHitOfCell,
phicuts, minz, maxz, maxr);
Expand Down
31 changes: 18 additions & 13 deletions RecoPixelVertexing/PixelTriplets/test/BuildFile.xml
Original file line number Diff line number Diff line change
@@ -1,18 +1,23 @@
<library file="HitTripletProducer.cc" name="HitTripletProducer">
<use name="boost"/>
<use name="root"/>
<use name="FWCore/Framework"/>
<use name="FWCore/ParameterSet"/>
<use name="Geometry/Records"/>
<use name="RecoTracker/TkTrackingRegions"/>
<use name="RecoPixelVertexing/PixelTriplets"/>
<flags EDM_PLUGIN="1"/>
<library file="HitTripletProducer.cc" name="HitTripletProducer">
<use name="boost"/>
<use name="root"/>
<use name="FWCore/Framework"/>
<use name="FWCore/PluginManager"/>
<use name="FWCore/ParameterSet"/>
<use name="Geometry/Records"/>
<use name="Geometry/CommonDetUnit"/>
<use name="Geometry/TrackerGeometryBuilder"/>
<use name="DataFormats/TrackerRecHit2D"/>
<use name="RecoTracker/TkHitPairs"/>
<use name="RecoTracker/TkTrackingRegions"/>
<use name="RecoPixelVertexing/PixelTriplets"/>
<flags EDM_PLUGIN="1"/>
</library>

<bin file="PixelTriplets_InvPrbl_t.cpp">
<use name="RecoPixelVertexing/PixelTriplets"/>
<use name="RecoPixelVertexing/PixelTriplets"/>
</bin>

<bin file="PixelTriplets_InvPrbl_prec.cpp">
<use name="RecoPixelVertexing/PixelTriplets"/>
<use name="RecoPixelVertexing/PixelTriplets"/>
</bin>

<bin file="fastDPHI_t.cpp"/>
197 changes: 197 additions & 0 deletions RecoPixelVertexing/PixelTriplets/test/fastDPHI_t.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
// this test documents the derivation of the fast deltaphi used in gpu doublet code..
//
//
//
#include<cmath>
#include<algorithm>
#include<numeric>
#include<cassert>

/**
| 1) circle is parameterized as: |
| C*[(X-Xp)**2+(Y-Yp)**2] - 2*alpha*(X-Xp) - 2*beta*(Y-Yp) = 0 |
| Xp,Yp is a point on the track (Yp is at the center of the chamber); |
| C = 1/r0 is the curvature ( sign of C is charge of particle ); |
| alpha & beta are the direction cosines of the radial vector at Xp,Yp |
| i.e. alpha = C*(X0-Xp), |
| beta = C*(Y0-Yp), |
| where center of circle is at X0,Y0. |
| Alpha > 0 |
| Slope dy/dx of tangent at Xp,Yp is -alpha/beta. |
| 2) the z dimension of the helix is parameterized by gamma = dZ/dSperp |
| this is also the tangent of the pitch angle of the helix. |
| with this parameterization, (alpha,beta,gamma) rotate like a vector. |
| 3) For tracks going inward at (Xp,Yp), C, alpha, beta, and gamma change sign|
|
*/

template<typename T>
class FastCircle {

public:

FastCircle(){}
FastCircle(T x1, T y1,
T x2, T y2,
T x3, T y3) {
compute(x1,y1,x2,y2,x3,y3);
}

void compute(T x1, T y1,
T x2, T y2,
T x3, T y3);


T m_xp;
T m_yp;
T m_c;
T m_alpha;
T m_beta;

};


template<typename T>
void FastCircle<T>::compute(T x1, T y1,
T x2, T y2,
T x3, T y3) {
bool flip = std::abs(x3-x1) > std::abs(y3-y1);

auto x1p = x1-x2;
auto y1p = y1-y2;
auto d12 = x1p*x1p + y1p*y1p;
auto x3p = x3-x2;
auto y3p = y3-y2;
auto d32 = x3p*x3p + y3p*y3p;

if (flip) {
std::swap(x1p,y1p);
std::swap(x3p,y3p);
}

auto num = x1p*y3p-y1p*x3p; // num also gives correct sign for CT
auto det = d12*y3p-d32*y1p;
if( std::abs(det)==0 ) {
// and why we flip????
}
auto ct = num/det;
auto sn = det>0 ? T(1.) : T(-1.);
auto st2 = (d12*x3p-d32*x1p)/det;
auto seq = T(1.) +st2*st2;
auto al2 = sn/std::sqrt(seq);
auto be2 = -st2*al2;
ct *= T(2.)*al2;

if (flip) {
std::swap(x1p,y1p);
std::swap(al2,be2);
al2 = -al2;
be2 = -be2;
ct = -ct;
}

m_xp = x1;
m_yp = y1;
m_c= ct;
m_alpha = al2 - ct*x1p;
m_beta = be2 - ct*y1p;

}



// compute curvature given two points (and origin)
float fastDPHI(float ri, float ro, float dphi) {

/*
x3=0 y1=0 x1=0;
y3=ro
*/

// auto x2 = ri*dphi;
// auto y2 = ri*(1.f-0.5f*dphi*dphi);


/*
auto x1p = x1-x2;
auto y1p = y1-y2;
auto d12 = x1p*x1p + y1p*y1p;
auto x3p = x3-x2;
auto y3p = y3-y2;
auto d32 = x3p*x3p + y3p*y3p;
*/

/*
auto x1p = -x2;
auto y1p = -y2;
auto d12 = ri*ri;
auto x3p = -x2;
auto y3p = ro-y2;
auto d32 = ri*ri + ro*ro - 2.f*ro*y2;
*/


// auto rat = (ro -2.f*y2);
// auto det = ro - ri - (ro - 2.f*ri -0.5f*ro)*dphi*dphi;

//auto det2 = (ro-ri)*(ro-ri) -2.*(ro-ri)*(ro - 2.f*ri -0.5f*ro)*dphi*dphi;
// auto seq = det2 + dphi*dphi*(ro-2.f*ri)*(ro-2.f*ri); // *rat2;
// auto seq = (ro-ri)*(ro-ri) + dphi*dphi*ri*ro;

// and little by little simplifing and removing higher over terms
// we get
auto r2 = (ro-ri)*(ro-ri)/(dphi*dphi) + ri*ro;


// d2 = (ro-ri)*(ro-ri)/(4.f*r2 -ri*ro);
// return -2.f*dphi/std::sqrt(seq);

return -1.f/std::sqrt(r2/4.f);

}



#include<iostream>

template<typename T>
bool equal(T a, T b) {
// return float(a-b)==0;
return std::abs(float(a-b)) < std::abs(0.01f*a);
}



int n=0;
void go(float ri, float ro, float dphi, bool print=false) {
++n;
float x3 = 0.f, y3 = ro;
float x2 = ri*sin(dphi);
float y2 = ri*cos(dphi);


FastCircle<float> c(0,0,
x2,y2,
x3,y3);

auto cc = fastDPHI(ri,ro,dphi);
if (print) std::cout << c.m_c << ' ' << cc << std::endl;
assert(equal(c.m_c,cc));


}

int main() {


go(4.,7.,0.1, true);

for (float r1=2; r1<15; r1+=1)
for (float dr=0.5; dr<10; dr+=0.5)
for (float dphi=0.02; dphi<0.2; dphi+=0.2)
go(r1,r1+dr,dphi);

std::cout << "done " << n << std::endl;
return 0;
};

Loading

0 comments on commit a711fe9

Please sign in to comment.