Skip to content

Commit

Permalink
Tune and speed up doublet algo (#158)
Browse files Browse the repository at this point in the history
Tune and speed up the pixel doublet alforithm, and take advantage of GPU read-only memory for a further speedup.

Includes a python notebook to tune the cuts for doublets and triplets.
  • Loading branch information
VinInn authored and fwyzard committed Oct 23, 2020
1 parent c59e9ec commit b4bbc39
Show file tree
Hide file tree
Showing 20 changed files with 159 additions and 77 deletions.
2 changes: 2 additions & 0 deletions CalibTracker/SiPixelESProducers/BuildFile.xml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
<use name="DataFormats/SiPixelDigi"/>
<use name="CalibTracker/Records"/>
<use name="MagneticField/VolumeBasedEngine"/>
<use name="HeterogeneousCore/CUDACore"/>
<use name="boost"/>
<use name="cuda-api-wrappers"/>
<export>
<lib name="1"/>
</export>
2 changes: 2 additions & 0 deletions CalibTracker/SiPixelESProducers/plugins/BuildFile.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
<use name="Geometry/Records"/>
<use name="Geometry/TrackerGeometryBuilder"/>
<use name="CalibTracker/SiPixelESProducers"/>
<use name="HeterogeneousCore/CUDACore"/>
<use name="cuda-api-wrappers"/>
<library file="*.cc" name="CalibTrackerSiPixelESProducersPlugins">
<flags EDM_PLUGIN="1"/>
</library>
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ class SiPixelGainForHLTonGPU {
assert(offset<3088384);
assert(0==offset%2);

auto s = v_pedestals[offset/2];
DecodingStructure const * __restrict__ lp = v_pedestals;
auto s = lp[offset/2];

isDeadColumn = (s.ped & 0xFF) == deadFlag_;
isNoisyColumn = (s.ped & 0xFF) == noisyFlag_;
Expand Down
3 changes: 2 additions & 1 deletion Configuration/StandardSequences/python/RawToDigi_cff.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@
RawToDigi_pixelOnly = cms.Sequence(siPixelDigis)

scalersRawToDigi.scalersInputTag = 'rawDataCollector'
siPixelDigis.InputLabel = 'rawDataCollector'
from Configuration.ProcessModifiers.gpu_cff import gpu
(~gpu).toModify(siPixelDigis, InputLabel = 'rawDataCollector')
#false by default anyways ecalDigis.DoRegional = False
ecalDigis.InputLabel = 'rawDataCollector'
ecalPreshowerDigis.sourceTag = 'rawDataCollector'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,23 +123,19 @@
process.muonCSCDigis.InputObjects = cms.InputTag("rawDataRepacker")
process.muonDTDigis.inputLabel = cms.InputTag("rawDataRepacker")
process.muonRPCDigis.InputLabel = cms.InputTag("rawDataRepacker")
process.muonGEMDigis.InputLabel = cms.InputTag("rawDataRepacker")
process.scalersRawToDigi.scalersInputTag = cms.InputTag("rawDataRepacker")
process.siPixelDigis.InputLabel = cms.InputTag("rawDataRepacker")
process.siStripDigis.ProductLabel = cms.InputTag("rawDataRepacker")
process.tcdsDigis.InputLabel = cms.InputTag("rawDataRepacker")
process.tcdsRawToDigi.InputLabel = cms.InputTag("rawDataRepacker")
process.totemRPRawToDigi.rawDataTag = cms.InputTag("rawDataRepacker")
process.totemTriggerRawToDigi.rawDataTag = cms.InputTag("rawDataRepacker")
process.totemTimingRawToDigi.rawDataTag = cms.InputTag("rawDataRepacker")
process.csctfDigis.producer = cms.InputTag("rawDataRepacker")
process.dttfDigis.DTTF_FED_Source = cms.InputTag("rawDataRepacker")
process.gctDigis.inputLabel = cms.InputTag("rawDataRepacker")
process.gtDigis.DaqGtInputTag = cms.InputTag("rawDataRepacker")
process.twinMuxStage2Digis.DTTM7_FED_Source = cms.InputTag("rawDataRepacker")
process.RPCTwinMuxRawToDigi.inputTag = cms.InputTag("rawDataRepacker")
process.bmtfDigis.InputLabel = cms.InputTag("rawDataRepacker")
process.omtfStage2Digis.inputLabel = cms.InputTag("rawDataRepacker")
process.emtfStage2Digis.InputLabel = cms.InputTag("rawDataRepacker")
process.gmtStage2Digis.InputLabel = cms.InputTag("rawDataRepacker")
process.caloStage1Digis.InputLabel = cms.InputTag("rawDataRepacker")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,23 +119,19 @@
process.muonCSCDigis.InputObjects = cms.InputTag("rawDataRepacker")
process.muonDTDigis.inputLabel = cms.InputTag("rawDataRepacker")
process.muonRPCDigis.InputLabel = cms.InputTag("rawDataRepacker")
process.muonGEMDigis.InputLabel = cms.InputTag("rawDataRepacker")
process.scalersRawToDigi.scalersInputTag = cms.InputTag("rawDataRepacker")
process.siPixelDigis.InputLabel = cms.InputTag("rawDataRepacker")
process.siStripDigis.ProductLabel = cms.InputTag("rawDataRepacker")
process.tcdsDigis.InputLabel = cms.InputTag("rawDataRepacker")
process.tcdsRawToDigi.InputLabel = cms.InputTag("rawDataRepacker")
process.totemRPRawToDigi.rawDataTag = cms.InputTag("rawDataRepacker")
process.totemTriggerRawToDigi.rawDataTag = cms.InputTag("rawDataRepacker")
process.totemTimingRawToDigi.rawDataTag = cms.InputTag("rawDataRepacker")
process.csctfDigis.producer = cms.InputTag("rawDataRepacker")
process.dttfDigis.DTTF_FED_Source = cms.InputTag("rawDataRepacker")
process.gctDigis.inputLabel = cms.InputTag("rawDataRepacker")
process.gtDigis.DaqGtInputTag = cms.InputTag("rawDataRepacker")
process.twinMuxStage2Digis.DTTM7_FED_Source = cms.InputTag("rawDataRepacker")
process.RPCTwinMuxRawToDigi.inputTag = cms.InputTag("rawDataRepacker")
process.bmtfDigis.InputLabel = cms.InputTag("rawDataRepacker")
process.omtfStage2Digis.inputLabel = cms.InputTag("rawDataRepacker")
process.emtfStage2Digis.InputLabel = cms.InputTag("rawDataRepacker")
process.gmtStage2Digis.InputLabel = cms.InputTag("rawDataRepacker")
process.caloStage1Digis.InputLabel = cms.InputTag("rawDataRepacker")
Expand Down
8 changes: 8 additions & 0 deletions EventFilter/SiPixelRawToDigi/python/SiPixelRawToDigi_cfi.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import FWCore.ParameterSet.Config as cms
import EventFilter.SiPixelRawToDigi.siPixelRawToDigi_cfi
import RecoLocalTracker.SiPixelClusterizer.siPixelDigiHeterogeneousConverter_cfi

siPixelDigis = EventFilter.SiPixelRawToDigi.siPixelRawToDigi_cfi.siPixelRawToDigi.clone()
siPixelDigis.Timing = cms.untracked.bool(False)
Expand All @@ -20,3 +21,10 @@

from Configuration.Eras.Modifier_phase1Pixel_cff import phase1Pixel
phase1Pixel.toModify(siPixelDigis, UsePhase1=True)

_siPixelDigis_gpu = RecoLocalTracker.SiPixelClusterizer.siPixelDigiHeterogeneousConverter_cfi.siPixelDigiHeterogeneousConverter.clone()
_siPixelDigis_gpu.includeErrors = cms.bool(True)

from Configuration.ProcessModifiers.gpu_cff import gpu
gpu.toReplaceWith(siPixelDigis, _siPixelDigis_gpu)

Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,15 @@ namespace phase1PixelTopology {

constexpr uint32_t numPixsInModule = uint32_t(numRowsInModule)* uint32_t(numColsInModule);

constexpr uint32_t numberOfModules = 1856;

constexpr uint32_t layerStart[11] = {0,96,320,672,1184,1296,1408,1520,1632,1744,1856};
constexpr char const * layerName[10] = {"BL1","BL2","BL3","BL4",
"E+1", "E+2", "E+3",
"E-1", "E-2", "E-3"
};


// this is for the ROC n<512 (upgrade 1024)
constexpr inline
uint16_t divu52(uint16_t n) {
Expand Down
4 changes: 4 additions & 0 deletions RecoLocalTracker/Configuration/python/RecoLocalTracker_cff.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@
striptrackerlocalreco = cms.Sequence(siStripZeroSuppression*siStripClusters*siStripMatchedRecHits)
trackerlocalreco = cms.Sequence(pixeltrackerlocalreco*striptrackerlocalreco*clusterSummaryProducer)

from Configuration.ProcessModifiers.gpu_cff import gpu
from RecoLocalTracker.SiPixelRecHits.siPixelRecHitHeterogeneous_cfi import siPixelRecHitHeterogeneous as _siPixelRecHitHeterogeneous
gpu.toReplaceWith(siPixelRecHitsPreSplitting, _siPixelRecHitHeterogeneous)

from RecoLocalTracker.SiPhase2Clusterizer.phase2TrackerClusterizer_cfi import *
from RecoLocalTracker.Phase2TrackerRecHits.Phase2StripCPEGeometricESProducer_cfi import *

Expand Down
11 changes: 10 additions & 1 deletion RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,16 @@
<use name="DataFormats/SiPixelDetId"/>
<use name="DataFormats/SiPixelCluster"/>
<use name="boost_serialization"/>
<use name="RecoLocalTracker/SiPixelClusterizer"/>
<use name="RecoTracker/Record"/>
<use name="CalibTracker/SiPixelESProducers"/>
<library file="*.cc" name="RecoLocalTrackerSiPixelClusterizerPlugins">
<use name="EventFilter/SiPixelRawToDigi"/>
<use name="HeterogeneousCore/Producer"/>
<use name="HeterogeneousCore/Product"/>
<use name="HeterogeneousCore/CUDACore"/>
<use name="cuda"/>
<use name="cuda-api-wrappers"/>
<use name="cub"/>
<library file="*.cc *.cu" name="RecoLocalTrackerSiPixelClusterizerPlugins">
<flags EDM_PLUGIN="1"/>
</library>
12 changes: 6 additions & 6 deletions RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@ namespace gpuCalibPixel {


__global__ void calibDigis(uint16_t * id,
uint16_t const * x,
uint16_t const * y,
uint16_t const * __restrict__ x,
uint16_t const * __restrict__ y,
uint16_t * adc,
SiPixelGainForHLTonGPU const * ped,
SiPixelGainForHLTonGPU const * __restrict__ ped,
int numElements
)
{
Expand Down Expand Up @@ -55,11 +55,11 @@ namespace gpuCalibPixel {
}

__global__ void calibADCByModule(uint16_t * id,
uint16_t const * x,
uint16_t const * y,
uint16_t const * __restrict__ x,
uint16_t const * __restrict__ y,
uint16_t * adc,
uint32_t * moduleStart,
SiPixelGainForHLTonGPU const * ped,
SiPixelGainForHLTonGPU const * __restrict__ ped,
int numElements
)
{
Expand Down
93 changes: 60 additions & 33 deletions RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,13 @@

#include "gpuClusteringConstants.h"

#include "HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h"

namespace gpuClustering {

__global__ void countModules(uint16_t const * id,
uint32_t * moduleStart,
int32_t * clusterId,
__global__ void countModules(uint16_t const * __restrict__ id,
uint32_t * __restrict__ moduleStart,
int32_t * __restrict__ clusterId,
int numElements)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
Expand All @@ -30,15 +32,16 @@ namespace gpuClustering {
}
}

__global__ void findClus(uint16_t const * id, // module id of each pixel
uint16_t const * x, // local coordinates of each pixel
uint16_t const * y, //
uint32_t const * moduleStart, // index of the first pixel of each module
uint32_t * nClustersInModule, // output: number of clusters found in each module
uint32_t * moduleId, // output: module id of each module
int32_t * clusterId, // output: cluster id of each pixel
__global__ void findClus(uint16_t const * __restrict__ id, // module id of each pixel
uint16_t const * __restrict__ x, // local coordinates of each pixel
uint16_t const * __restrict__ y, //
uint32_t const * __restrict__ moduleStart, // index of the first pixel of each module
uint32_t * __restrict__ nClustersInModule, // output: number of clusters found in each module
uint32_t * __restrict__ moduleId, // output: module id of each module
int32_t * __restrict__ clusterId, // output: cluster id of each pixel
int numElements)
{

if (blockIdx.x >= moduleStart[0])
return;

Expand Down Expand Up @@ -72,12 +75,32 @@ namespace gpuClustering {
}
}

//init hist (ymax < 512)
__shared__ HistoContainer<uint16_t,8,4,9,uint16_t> hist;
hist.nspills = 0;
for (auto k = threadIdx.x; k<hist.nbins(); k+=blockDim.x) hist.n[k]=0;

__syncthreads();
assert((msize == numElements) or ((msize < numElements) and (id[msize] != thisModuleId)));


assert((msize == numElements) or ((msize < numElements) and (id[msize] != thisModuleId)));
assert(msize-firstPixel<64000);

// skip threads not assocoated to pixels in this module
active = (first < msize);

// __syncthreads();


// fill histo
if (active) {
for (int i = first; i < msize; i += blockDim.x) {
if (id[i] == InvId) // skip invalid pixels
continue;
hist.fill(y[i],i-firstPixel);
}
}

// assume that we can cover the whole module with up to 10 blockDim.x-wide iterations
constexpr int maxiter = 10;
if (active) {
Expand All @@ -88,6 +111,9 @@ namespace gpuClustering {
jmax[k] = msize;

__syncthreads();
if (threadIdx.x==0 && hist.fullSpill()) printf("histo overflow in det %d\n",thisModuleId);


// for each pixel, look at all the pixels until the end of the module;
// when two valid pixels within +/- 1 in x or y are found, set their id to the minimum;
// after the loop, all the pixel in each cluster should have the id equeal to the lowest
Expand All @@ -96,31 +122,38 @@ namespace gpuClustering {
while (not __syncthreads_and(done)) {
done = true;
if (active) {
for (int i = first, k = 0; i < msize; i += blockDim.x, ++k) {
for (int i = first, k = 0; i < msize; i += blockDim.x, ++k) {
if (id[i] == InvId) // skip invalid pixels
continue;
assert(id[i] == thisModuleId); // same module
auto js = i + 1;
auto jm = jmax[k];
jmax[k] = i + 1;
for (int j = js; j < jm; ++j) {
if (id[j] == InvId) // skip invalid pixels
continue;
if (std::abs(int(x[j]) - int(x[i])) > 1 or
std::abs(int(y[j]) - int(y[i])) > 1)
continue;
// loop to columns
auto bs = hist.bin(y[i]>0 ? y[i]-1 : 0);
auto be = hist.bin(y[i]+1)+1;
auto loop = [&](int j) {
j+=firstPixel;
if (i>=j or j>jm or
std::abs(int(x[j]) - int(x[i])) > 1 or
std::abs(int(y[j]) - int(y[i])) > 1) return;
auto old = atomicMin(&clusterId[j], clusterId[i]);
if (old != clusterId[i]) {
// end the loop only if no changes were applied
done = false;
}
atomicMin(&clusterId[i], old);
// update the loop boundary for the next iteration
jmax[k] = j + 1;
}
}
}
}
jmax[k] = std::max(j + 1,jmax[k]);
};
for (auto b=bs; b<be; ++b){
for (auto pj=hist.begin(b);pj<hist.end(b);++pj) {
loop(*pj);
}}
for (auto pj=hist.beginSpill();pj<hist.endSpill();++pj)
loop(*pj);
} // pixel loop
} // end active
} // end while

__shared__ int foundClusters;
foundClusters = 0;
Expand All @@ -129,11 +162,9 @@ namespace gpuClustering {
// find the number of different clusters, identified by a pixels with clus[i] == i;
// mark these pixels with a negative id.
if (active) {
for (int i = first; i < numElements; i += blockDim.x) {
for (int i = first; i < msize; i += blockDim.x) {
if (id[i] == InvId) // skip invalid pixels
continue;
if (id[i] != thisModuleId) // stop once in a different module
break;
if (clusterId[i] == i) {
auto old = atomicAdd(&foundClusters, 1);
clusterId[i] = -(old + 1);
Expand All @@ -144,11 +175,9 @@ namespace gpuClustering {

// propagate the negative id to all the pixels in the cluster.
if (active) {
for (int i = first; i < numElements; i += blockDim.x) {
for (int i = first; i < msize; i += blockDim.x) {
if (id[i] == InvId) // skip invalid pixels
continue;
if (id[i] != thisModuleId) // stop once in a different module
break;
if (clusterId[i] >= 0) {
// mark each pixel in a cluster with the same id as the first one
clusterId[i] = clusterId[clusterId[i]];
Expand All @@ -159,13 +188,11 @@ namespace gpuClustering {

// adjust the cluster id to be a positive value starting from 0
if (active) {
for (int i = first; i < numElements; i += blockDim.x) {
for (int i = first; i < msize; i += blockDim.x) {
if (id[i] == InvId) { // skip invalid pixels
clusterId[i] = -9999;
continue;
}
if (id[i] != thisModuleId) // stop once in a different module
break;
clusterId[i] = - clusterId[i] - 1;
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@

import FWCore.ParameterSet.Config as cms

#
from CondTools.SiPixel.SiPixelGainCalibrationService_cfi import *
from RecoLocalTracker.SiPixelClusterizer.SiPixelClusterizer_cfi import siPixelClusters as _siPixelClusters
siPixelClustersPreSplitting = _siPixelClusters.clone()

from Configuration.ProcessModifiers.gpu_cff import gpu
from RecoLocalTracker.SiPixelClusterizer.siPixelClustersHeterogeneous_cfi import siPixelClustersHeterogeneous as _siPixelClustersHeterogeneous
from RecoLocalTracker.SiPixelClusterizer.siPixelFedCablingMapGPUWrapper_cfi import *
from CalibTracker.SiPixelESProducers.siPixelGainCalibrationForHLTGPU_cfi import *
gpu.toReplaceWith(siPixelClustersPreSplitting, _siPixelClustersHeterogeneous.clone())
6 changes: 6 additions & 0 deletions RecoLocalTracker/SiPixelClusterizer/test/BuildFile.xml
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,9 @@
<library file="Triplet.cc" name="Triplet">
<flags EDM_PLUGIN="1"/>
</library>

<bin file="gpuClustering.cu" name="gpuClustering_t">
<use name="cuda"/>
<use name="cuda-api-wrappers"/>
<flags CXXFLAGS="-g"/>
</bin>
4 changes: 4 additions & 0 deletions RecoLocalTracker/SiPixelRecHits/BuildFile.xml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@
<use name="boost"/>
<use name="vdt_headers"/>

<use name="cuda"/>
<use name="cuda-api-wrappers"/>
<use name="HeterogeneousCore/CUDAServices"/>

<export>
<lib name="1"/>
</export>
Loading

0 comments on commit b4bbc39

Please sign in to comment.