Tune and speed up doublet algo (#158)

Tune and speed up the pixel doublet alforithm, and take advantage of GPU read-only memory for a further speedup. Includes a python notebook to tune the cuts for doublets and triplets.
cms-patatrack · Nov 27, 2020 · a711fe9 · a711fe9
1 parent e0f8e82
commit a711fe9
Show file tree

Hide file tree

Showing 4 changed files with 2,883 additions and 28 deletions.
diff --git a/RecoPixelVertexing/PixelTriplets/plugins/gpuPixelDoublets.h b/RecoPixelVertexing/PixelTriplets/plugins/gpuPixelDoublets.h
@@ -20,11 +20,11 @@ namespace gpuPixelDoublets {
 
   template<typename Hist>
   __device__
-  void doubletsFromHisto(uint8_t const * layerPairs, uint32_t nPairs, GPUCACell * cells, uint32_t * nCells,
-                         int16_t const * iphi, Hist const * hist, uint32_t const * offsets,
-                         siPixelRecHitsHeterogeneousProduct::HitsOnGPU const & hh,
+  void doubletsFromHisto(uint8_t const * __restrict__ layerPairs, uint32_t nPairs, GPUCACell * cells, uint32_t * nCells,
+                         int16_t const * __restrict__ iphi, Hist const * __restrict__ hist, uint32_t const * __restrict__ offsets,
+                         siPixelRecHitsHeterogeneousProduct::HitsOnGPU const &  __restrict__ hh,
                          GPU::VecArray< unsigned int, 256>  * isOuterHitOfCell,
-                         int16_t const * phicuts, float const * minz, float const * maxz, float const * maxr) {
+                        int16_t const * phicuts, float const * minz, float const * maxz, float const * maxr) {
 
     auto layerSize = [=](uint8_t li) { return offsets[li+1]-offsets[li]; };
 
@@ -41,7 +41,6 @@ namespace gpuPixelDoublets {
 
     auto idx = blockIdx.x*blockDim.x + threadIdx.x;
   for(auto j=idx;j<ntot;j+=blockDim.x*gridDim.x) {
-    auto j = idx; 
 
     uint32_t pairLayerId=0;
     while(j>=innerLayerCumulativeSize[pairLayerId++]);  --pairLayerId; // move to lower_bound ??
@@ -68,18 +67,28 @@ namespace gpuPixelDoublets {
     auto mep = iphi[i];
     auto mez = hh.zg_d[i];
     auto mer = hh.rg_d[i];
-    auto cutoff = [&](int j) { return 
+    auto cutoff = [&](int j) { return
         abs(hh.zg_d[j]-mez) > maxz[pairLayerId] ||
       	abs(hh.zg_d[j]-mez) < minz[pairLayerId] ||
         hh.rg_d[j]-mer > maxr[pairLayerId];
     };
 
     constexpr float z0cut = 12.f;
+    constexpr float hardPtCut = 0.5f;
+    constexpr float minRadius = hardPtCut * 87.f;
+    constexpr float minRadius2T4 = 4.f*minRadius*minRadius;
+    auto ptcut = [&](int j) {
+      auto r2t4 = minRadius2T4;
+      auto ri = mer;
+      auto ro = hh.rg_d[j];
+      auto dphi = short2phi( min( abs(int16_t(mep-iphi[j])),abs(int16_t(iphi[j]-mep)) ) );
+      return dphi*dphi*(r2t4 -ri*ro) > (ro-ri)*(ro-ri);
+    };
     auto z0cutoff = [&](int j) {
       auto zo =	hh.zg_d[j];
-      auto ro = hh.rg_d[j]; 
+      auto ro = hh.rg_d[j];
       auto dr = ro-mer;
-      return dr > maxr[pairLayerId] || 
+      return dr > maxr[pairLayerId] ||
              dr<0 || std::abs((mez*ro - mer*zo)) > z0cut*dr;
     };
 
@@ -92,7 +101,7 @@ namespace gpuPixelDoublets {
     int nmin = 0;
     auto khh = kh;
     incr(khh);
-    
+
     int tooMany=0;
     for (auto kk=kl; kk!=khh; incr(kk)) {
       if (kk!=kl && kk!=kh) nmin+=hist[outer].size(kk);
@@ -103,7 +112,7 @@ namespace gpuPixelDoublets {
 
         if (std::min(std::abs(int16_t(iphi[oi]-mep)), std::abs(int16_t(mep-iphi[oi]))) > iphicut)
           continue;
-        if (z0cutoff(oi)) continue;
+        if (z0cutoff(oi) || ptcut(oi)) continue;
         auto ind = atomicInc(nCells,MaxNumOfDoublets);
         // int layerPairId, int doubletId, int innerHitId,int outerHitId)
         cells[ind].init(hh,pairLayerId,ind,i,oi);
@@ -123,12 +132,13 @@ namespace gpuPixelDoublets {
 
   }  // loop in block...
   }
+
   __global__
-  void getDoubletsFromHisto(GPUCACell * cells, uint32_t * nCells, siPixelRecHitsHeterogeneousProduct::HitsOnGPU const * hhp,                
+  void getDoubletsFromHisto(GPUCACell * cells, uint32_t * nCells, siPixelRecHitsHeterogeneousProduct::HitsOnGPU const *  __restrict__ hhp,
                             GPU::VecArray< unsigned int, 256> *isOuterHitOfCell) {
 
-    uint8_t const layerPairs[2*13] = {0,1 ,1,2 ,2,3 
-                                     // ,0,4 ,1,4 ,2,4 ,4,5 ,5,6  
+    uint8_t const layerPairs[2*13] = {0,1 ,1,2 ,2,3
+                                     // ,0,4 ,1,4 ,2,4 ,4,5 ,5,6
                                      ,0,7 ,1,7 ,2,7 ,7,8 ,8,9
                                      ,0,4 ,1,4 ,2,4 ,4,5 ,5,6
                                      };
@@ -158,8 +168,8 @@ namespace gpuPixelDoublets {
                            };
 
 
-    auto const & hh = *hhp;
-    doubletsFromHisto(layerPairs, 13, cells, nCells, 
+    auto const &  __restrict__ hh = *hhp;
+    doubletsFromHisto(layerPairs, 13, cells, nCells,
                       hh.iphi_d,hh.hist_d,hh.hitsLayerStart_d,
                       hh, isOuterHitOfCell,
                       phicuts, minz, maxz, maxr);

diff --git a/RecoPixelVertexing/PixelTriplets/test/BuildFile.xml b/RecoPixelVertexing/PixelTriplets/test/BuildFile.xml
@@ -1,18 +1,23 @@
-<library file="HitTripletProducer.cc" name="HitTripletProducer">
-  <use name="boost"/>
-  <use name="root"/>
-  <use name="FWCore/Framework"/>
-  <use name="FWCore/ParameterSet"/>
-  <use name="Geometry/Records"/>
-  <use name="RecoTracker/TkTrackingRegions"/>
-  <use name="RecoPixelVertexing/PixelTriplets"/>
-  <flags EDM_PLUGIN="1"/>
+<library   file="HitTripletProducer.cc" name="HitTripletProducer">
+<use   name="boost"/>
+<use   name="root"/>
+<use   name="FWCore/Framework"/>
+<use   name="FWCore/PluginManager"/>
+<use   name="FWCore/ParameterSet"/>
+<use   name="Geometry/Records"/>
+<use   name="Geometry/CommonDetUnit"/>
+<use   name="Geometry/TrackerGeometryBuilder"/>
+<use   name="DataFormats/TrackerRecHit2D"/>
+<use   name="RecoTracker/TkHitPairs"/>
+<use   name="RecoTracker/TkTrackingRegions"/>
+<use   name="RecoPixelVertexing/PixelTriplets"/>
+  <flags   EDM_PLUGIN="1"/>
 </library>
-
 <bin file="PixelTriplets_InvPrbl_t.cpp">
-  <use name="RecoPixelVertexing/PixelTriplets"/>
+  <use   name="RecoPixelVertexing/PixelTriplets"/>
 </bin>
-
 <bin file="PixelTriplets_InvPrbl_prec.cpp">
-  <use name="RecoPixelVertexing/PixelTriplets"/>
+  <use   name="RecoPixelVertexing/PixelTriplets"/>
 </bin>
+
+<bin file="fastDPHI_t.cpp"/>
diff --git a/RecoPixelVertexing/PixelTriplets/test/fastDPHI_t.cpp b/RecoPixelVertexing/PixelTriplets/test/fastDPHI_t.cpp
@@ -0,0 +1,197 @@
+// this test documents the derivation of the fast deltaphi used in gpu doublet code..
+//
+//
+//
+#include<cmath>
+#include<algorithm>
+#include<numeric>
+#include<cassert>
+
+/**
+| 1) circle is parameterized as:                                              |
+|    C*[(X-Xp)**2+(Y-Yp)**2] - 2*alpha*(X-Xp) - 2*beta*(Y-Yp) = 0             |
+|    Xp,Yp is a point on the track (Yp is at the center of the chamber);      |
+|    C = 1/r0 is the curvature  ( sign of C is charge of particle );          |
+|    alpha & beta are the direction cosines of the radial vector at Xp,Yp     |
+|    i.e.  alpha = C*(X0-Xp),                                                 |
+|          beta  = C*(Y0-Yp),                                                 |
+|    where center of circle is at X0,Y0.                                      |
+|    Alpha > 0                                                                |
+|    Slope dy/dx of tangent at Xp,Yp is -alpha/beta.                          |
+| 2) the z dimension of the helix is parameterized by gamma = dZ/dSperp       |
+|    this is also the tangent of the pitch angle of the helix.                |
+|    with this parameterization, (alpha,beta,gamma) rotate like a vector.     |
+| 3) For tracks going inward at (Xp,Yp), C, alpha, beta, and gamma change sign|
+|
+*/
+
+template<typename T>
+class FastCircle {
+
+public:
+
+  FastCircle(){}
+  FastCircle(T x1, T y1,
+	     T x2, T y2,
+	     T x3, T y3) { 
+    compute(x1,y1,x2,y2,x3,y3);
+  }
+
+  void compute(T x1, T y1,
+	       T x2, T y2,
+	       T x3, T y3);
+
+
+  T m_xp;
+  T m_yp;
+  T m_c;
+  T m_alpha;
+  T m_beta;
+
+};
+
+
+template<typename T>
+void FastCircle<T>::compute(T x1, T y1,
+			    T x2, T y2,
+			    T x3, T y3) {
+  bool flip = std::abs(x3-x1) > std::abs(y3-y1);
+
+  auto x1p = x1-x2;
+  auto y1p = y1-y2;
+  auto d12 = x1p*x1p + y1p*y1p;
+  auto x3p = x3-x2;
+  auto y3p = y3-y2;
+  auto d32 = x3p*x3p + y3p*y3p;
+
+  if (flip) {
+    std::swap(x1p,y1p);
+    std::swap(x3p,y3p);
+  }
+
+  auto num = x1p*y3p-y1p*x3p;  // num also gives correct sign for CT
+  auto det = d12*y3p-d32*y1p;
+  if( std::abs(det)==0 ) {
+    // and why we flip????
+  }
+  auto ct  = num/det;
+  auto sn  = det>0 ? T(1.) : T(-1.);  
+  auto st2 = (d12*x3p-d32*x1p)/det;
+  auto seq = T(1.) +st2*st2;
+  auto al2 = sn/std::sqrt(seq);
+  auto be2 = -st2*al2;
+  ct *= T(2.)*al2;
+
+  if (flip) {
+    std::swap(x1p,y1p);
+    std::swap(al2,be2);
+    al2 = -al2;
+    be2 = -be2;
+    ct = -ct;
+  }
+
+  m_xp = x1;
+  m_yp = y1;
+  m_c= ct;
+  m_alpha = al2 - ct*x1p;
+  m_beta = be2 - ct*y1p;
+
+}
+
+
+
+// compute curvature given two points (and origin)
+float fastDPHI(float ri, float ro, float dphi) {
+
+  /*
+  x3=0 y1=0 x1=0;
+  y3=ro
+  */
+
+  // auto x2 = ri*dphi;
+  // auto y2 = ri*(1.f-0.5f*dphi*dphi);
+
+
+  /*
+  auto x1p = x1-x2;
+  auto y1p = y1-y2;
+  auto d12 = x1p*x1p + y1p*y1p;
+  auto x3p = x3-x2;
+  auto y3p = y3-y2;
+  auto d32 = x3p*x3p + y3p*y3p;
+  */
+
+  /*
+  auto x1p = -x2;
+  auto y1p = -y2;
+  auto d12 = ri*ri;
+  auto x3p = -x2;
+  auto y3p = ro-y2;
+  auto d32 = ri*ri + ro*ro - 2.f*ro*y2;
+  */
+
+
+  // auto rat = (ro -2.f*y2);
+  // auto det =  ro - ri - (ro - 2.f*ri -0.5f*ro)*dphi*dphi;
+
+  //auto det2 = (ro-ri)*(ro-ri) -2.*(ro-ri)*(ro - 2.f*ri -0.5f*ro)*dphi*dphi;
+  // auto seq = det2 +  dphi*dphi*(ro-2.f*ri)*(ro-2.f*ri);    // *rat2;
+  // auto seq = (ro-ri)*(ro-ri) +  dphi*dphi*ri*ro;
+
+  // and little by little simplifing and removing higher over terms 
+  // we get
+  auto r2 = (ro-ri)*(ro-ri)/(dphi*dphi) + ri*ro;
+
+
+  // d2 = (ro-ri)*(ro-ri)/(4.f*r2 -ri*ro);  
+  // return -2.f*dphi/std::sqrt(seq);
+
+  return -1.f/std::sqrt(r2/4.f);
+
+}
+
+
+
+#include<iostream>
+
+template<typename T>
+bool equal(T a, T b) {
+  //  return float(a-b)==0;
+  return std::abs(float(a-b)) < std::abs(0.01f*a);
+}
+
+
+
+int n=0;
+void go(float ri, float ro, float dphi, bool print=false) {
+  ++n;
+  float x3 = 0.f, y3 = ro;
+  float x2 = ri*sin(dphi);
+  float y2 = ri*cos(dphi);
+
+
+  FastCircle<float> c(0,0,
+		  x2,y2,
+                  x3,y3);
+
+  auto cc = fastDPHI(ri,ro,dphi);
+  if (print) std::cout << c.m_c << ' ' << cc << std::endl;
+  assert(equal(c.m_c,cc));
+
+
+}
+
+int main() {
+
+
+  go(4.,7.,0.1, true);
+
+  for (float r1=2; r1<15; r1+=1)
+    for (float dr=0.5; dr<10; dr+=0.5)
+      for (float dphi=0.02; dphi<0.2; dphi+=0.2)
+	go(r1,r1+dr,dphi);
+
+  std::cout << "done " << n << std::endl;
+  return 0;
+};
+