Skip to content

Commit

Permalink
Tune and speed up doublet algo (#158)
Browse files Browse the repository at this point in the history
Tune and speed up the pixel doublet alforithm, and take advantage of GPU read-only memory for a further speedup.

Includes a python notebook to tune the cuts for doublets and triplets.
  • Loading branch information
VinInn authored and fwyzard committed Dec 29, 2020
1 parent f011f7d commit cf35be0
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 56 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ class SiPixelGainForHLTonGPU {
assert(offset<3088384);
assert(0==offset%2);

auto s = v_pedestals[offset/2];
DecodingStructure const * __restrict__ lp = v_pedestals;
auto s = lp[offset/2];

isDeadColumn = (s.ped & 0xFF) == deadFlag_;
isNoisyColumn = (s.ped & 0xFF) == noisyFlag_;
Expand Down
12 changes: 6 additions & 6 deletions RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@ namespace gpuCalibPixel {


__global__ void calibDigis(uint16_t * id,
uint16_t const * x,
uint16_t const * y,
uint16_t const * __restrict__ x,
uint16_t const * __restrict__ y,
uint16_t * adc,
SiPixelGainForHLTonGPU const * ped,
SiPixelGainForHLTonGPU const * __restrict__ ped,
int numElements
)
{
Expand Down Expand Up @@ -55,11 +55,11 @@ namespace gpuCalibPixel {
}

__global__ void calibADCByModule(uint16_t * id,
uint16_t const * x,
uint16_t const * y,
uint16_t const * __restrict__ x,
uint16_t const * __restrict__ y,
uint16_t * adc,
uint32_t * moduleStart,
SiPixelGainForHLTonGPU const * ped,
SiPixelGainForHLTonGPU const * __restrict__ ped,
int numElements
)
{
Expand Down
93 changes: 60 additions & 33 deletions RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,13 @@

#include "gpuClusteringConstants.h"

#include "HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h"

namespace gpuClustering {

__global__ void countModules(uint16_t const * id,
uint32_t * moduleStart,
int32_t * clusterId,
__global__ void countModules(uint16_t const * __restrict__ id,
uint32_t * __restrict__ moduleStart,
int32_t * __restrict__ clusterId,
int numElements)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
Expand All @@ -30,15 +32,16 @@ namespace gpuClustering {
}
}

__global__ void findClus(uint16_t const * id, // module id of each pixel
uint16_t const * x, // local coordinates of each pixel
uint16_t const * y, //
uint32_t const * moduleStart, // index of the first pixel of each module
uint32_t * nClustersInModule, // output: number of clusters found in each module
uint32_t * moduleId, // output: module id of each module
int32_t * clusterId, // output: cluster id of each pixel
__global__ void findClus(uint16_t const * __restrict__ id, // module id of each pixel
uint16_t const * __restrict__ x, // local coordinates of each pixel
uint16_t const * __restrict__ y, //
uint32_t const * __restrict__ moduleStart, // index of the first pixel of each module
uint32_t * __restrict__ nClustersInModule, // output: number of clusters found in each module
uint32_t * __restrict__ moduleId, // output: module id of each module
int32_t * __restrict__ clusterId, // output: cluster id of each pixel
int numElements)
{

if (blockIdx.x >= moduleStart[0])
return;

Expand Down Expand Up @@ -72,12 +75,32 @@ namespace gpuClustering {
}
}

//init hist (ymax < 512)
__shared__ HistoContainer<uint16_t,8,4,9,uint16_t> hist;
hist.nspills = 0;
for (auto k = threadIdx.x; k<hist.nbins(); k+=blockDim.x) hist.n[k]=0;

__syncthreads();
assert((msize == numElements) or ((msize < numElements) and (id[msize] != thisModuleId)));


assert((msize == numElements) or ((msize < numElements) and (id[msize] != thisModuleId)));
assert(msize-firstPixel<64000);

// skip threads not assocoated to pixels in this module
active = (first < msize);

// __syncthreads();


// fill histo
if (active) {
for (int i = first; i < msize; i += blockDim.x) {
if (id[i] == InvId) // skip invalid pixels
continue;
hist.fill(y[i],i-firstPixel);
}
}

// assume that we can cover the whole module with up to 10 blockDim.x-wide iterations
constexpr int maxiter = 10;
if (active) {
Expand All @@ -88,6 +111,9 @@ namespace gpuClustering {
jmax[k] = msize;

__syncthreads();
if (threadIdx.x==0 && hist.fullSpill()) printf("histo overflow in det %d\n",thisModuleId);


// for each pixel, look at all the pixels until the end of the module;
// when two valid pixels within +/- 1 in x or y are found, set their id to the minimum;
// after the loop, all the pixel in each cluster should have the id equeal to the lowest
Expand All @@ -96,31 +122,38 @@ namespace gpuClustering {
while (not __syncthreads_and(done)) {
done = true;
if (active) {
for (int i = first, k = 0; i < msize; i += blockDim.x, ++k) {
for (int i = first, k = 0; i < msize; i += blockDim.x, ++k) {
if (id[i] == InvId) // skip invalid pixels
continue;
assert(id[i] == thisModuleId); // same module
auto js = i + 1;
auto jm = jmax[k];
jmax[k] = i + 1;
for (int j = js; j < jm; ++j) {
if (id[j] == InvId) // skip invalid pixels
continue;
if (std::abs(int(x[j]) - int(x[i])) > 1 or
std::abs(int(y[j]) - int(y[i])) > 1)
continue;
// loop to columns
auto bs = hist.bin(y[i]>0 ? y[i]-1 : 0);
auto be = hist.bin(y[i]+1)+1;
auto loop = [&](int j) {
j+=firstPixel;
if (i>=j or j>jm or
std::abs(int(x[j]) - int(x[i])) > 1 or
std::abs(int(y[j]) - int(y[i])) > 1) return;
auto old = atomicMin(&clusterId[j], clusterId[i]);
if (old != clusterId[i]) {
// end the loop only if no changes were applied
done = false;
}
atomicMin(&clusterId[i], old);
// update the loop boundary for the next iteration
jmax[k] = j + 1;
}
}
}
}
jmax[k] = std::max(j + 1,jmax[k]);
};
for (auto b=bs; b<be; ++b){
for (auto pj=hist.begin(b);pj<hist.end(b);++pj) {
loop(*pj);
}}
for (auto pj=hist.beginSpill();pj<hist.endSpill();++pj)
loop(*pj);
} // pixel loop
} // end active
} // end while

__shared__ int foundClusters;
foundClusters = 0;
Expand All @@ -129,11 +162,9 @@ namespace gpuClustering {
// find the number of different clusters, identified by a pixels with clus[i] == i;
// mark these pixels with a negative id.
if (active) {
for (int i = first; i < numElements; i += blockDim.x) {
for (int i = first; i < msize; i += blockDim.x) {
if (id[i] == InvId) // skip invalid pixels
continue;
if (id[i] != thisModuleId) // stop once in a different module
break;
if (clusterId[i] == i) {
auto old = atomicAdd(&foundClusters, 1);
clusterId[i] = -(old + 1);
Expand All @@ -144,11 +175,9 @@ namespace gpuClustering {

// propagate the negative id to all the pixels in the cluster.
if (active) {
for (int i = first; i < numElements; i += blockDim.x) {
for (int i = first; i < msize; i += blockDim.x) {
if (id[i] == InvId) // skip invalid pixels
continue;
if (id[i] != thisModuleId) // stop once in a different module
break;
if (clusterId[i] >= 0) {
// mark each pixel in a cluster with the same id as the first one
clusterId[i] = clusterId[clusterId[i]];
Expand All @@ -159,13 +188,11 @@ namespace gpuClustering {

// adjust the cluster id to be a positive value starting from 0
if (active) {
for (int i = first; i < numElements; i += blockDim.x) {
for (int i = first; i < msize; i += blockDim.x) {
if (id[i] == InvId) { // skip invalid pixels
clusterId[i] = -9999;
continue;
}
if (id[i] != thisModuleId) // stop once in a different module
break;
clusterId[i] = - clusterId[i] - 1;
}
}
Expand Down
16 changes: 11 additions & 5 deletions RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,15 @@ namespace pixelCPEforGPU {
DetParams * m_detParams;

constexpr
CommonParams const & commonParams() const {return *m_commonParams;}
CommonParams const & __restrict__ commonParams() const {
CommonParams const * __restrict__ l = m_commonParams;
return *l;
}
constexpr
DetParams const & detParams(int i) const {return m_detParams[i];}
DetParams const & __restrict__ detParams(int i) const {
DetParams const * __restrict__ l = m_detParams;
return l[i];
}
};

// SOA (on device)
Expand Down Expand Up @@ -78,7 +84,7 @@ namespace pixelCPEforGPU {
using ClusParams = ClusParamsT<256>;

constexpr inline
void computeAnglesFromDet(DetParams const & detParams, float const x, float const y, float & cotalpha, float & cotbeta) {
void computeAnglesFromDet(DetParams const & __restrict__ detParams, float const x, float const y, float & cotalpha, float & cotbeta) {
// x,y local position on det
auto gvx = x - detParams.x0;
auto gvy = y - detParams.y0;
Expand Down Expand Up @@ -147,7 +153,7 @@ namespace pixelCPEforGPU {
}

constexpr inline
void position(CommonParams const & comParams, DetParams const & detParams, ClusParams & cp, uint32_t ic) {
void position(CommonParams const & __restrict__ comParams, DetParams const & __restrict__ detParams, ClusParams & cp, uint32_t ic) {

//--- Upper Right corner of Lower Left pixel -- in measurement frame
uint16_t llx = cp.minRow[ic]+1;
Expand Down Expand Up @@ -202,7 +208,7 @@ namespace pixelCPEforGPU {
}

constexpr inline
void error(CommonParams const & comParams, DetParams const & detParams, ClusParams & cp, uint32_t ic) {
void error(CommonParams const & __restrict__ comParams, DetParams const & __restrict__ detParams, ClusParams & cp, uint32_t ic) {
// Edge cluster errors
cp.xerr[ic]= 0.0050;
cp.yerr[ic]= 0.0085;
Expand Down
22 changes: 11 additions & 11 deletions RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,18 +23,18 @@ namespace gpuPixelRecHits {
using ClusParams = pixelCPEforGPU::ClusParams;


__global__ void getHits(pixelCPEforGPU::ParamsOnGPU const * cpeParams,
float const * bs,
uint16_t const * id,
uint16_t const * x,
uint16_t const * y,
uint16_t const * adc,
uint32_t const * digiModuleStart,
uint32_t const * clusInModule,
uint32_t const * moduleId,
int32_t const * clus,
__global__ void getHits(pixelCPEforGPU::ParamsOnGPU const * __restrict__ cpeParams,
float const * __restrict__ bs,
uint16_t const * __restrict__ id,
uint16_t const * __restrict__ x,
uint16_t const * __restrict__ y,
uint16_t const * __restrict__ adc,
uint32_t const * __restrict__ digiModuleStart,
uint32_t const * __restrict__ clusInModule,
uint32_t const * __restrict__ moduleId,
int32_t const * __restrict__ clus,
int numElements,
uint32_t const * hitsModuleStart,
uint32_t const * __restrict__ hitsModuleStart,
int32_t * chargeh,
uint16_t * detInd,
float * xg, float * yg, float * zg, float * rg, int16_t * iph,
Expand Down

0 comments on commit cf35be0

Please sign in to comment.