diff --git a/CUDADataFormats/Track/interface/PixelTrackHeterogeneous.h b/CUDADataFormats/Track/interface/PixelTrackHeterogeneous.h index d462be2c5dd7b..e79a32c21daa0 100644 --- a/CUDADataFormats/Track/interface/PixelTrackHeterogeneous.h +++ b/CUDADataFormats/Track/interface/PixelTrackHeterogeneous.h @@ -16,7 +16,7 @@ class TrackSoAT { static constexpr int32_t stride() { return S; } using Quality = trackQuality::Quality; - using hindex_type = uint16_t; + using hindex_type = uint32_t; using HitContainer = cms::cuda::OneToManyAssoc; // Always check quality is at least loose! diff --git a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h index 808feb2a4218f..6a83a66b60fbd 100644 --- a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h +++ b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h @@ -15,10 +15,12 @@ namespace pixelCPEforGPU { class TrackingRecHit2DSOAView { public: static constexpr uint32_t maxHits() { return gpuClustering::MaxNumClusters; } - using hindex_type = uint16_t; // if above is <=2^16 + using hindex_type = uint32_t; // if above is <=2^32 - using Hist = - cms::cuda::HistoContainer; + using PhiBinner = + cms::cuda::HistoContainer; + + using Hist = PhiBinner; // FIXME using AverageGeometry = phase1PixelTopology::AverageGeometry; @@ -93,9 +95,9 @@ class TrackingRecHit2DSOAView { uint32_t* m_hitsLayerStart; - Hist* m_hist; + PhiBinner* m_hist; // FIXME use a more descriptive name consistently uint32_t m_nHits; }; -#endif +#endif // CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DSOAView_h diff --git a/RecoPixelVertexing/PixelTriplets/plugins/CAConstants.h b/RecoPixelVertexing/PixelTriplets/plugins/CAConstants.h index 0ebbdf3ed3705..9eea4f528fcdb 100644 --- a/RecoPixelVertexing/PixelTriplets/plugins/CAConstants.h +++ b/RecoPixelVertexing/PixelTriplets/plugins/CAConstants.h @@ -44,7 +44,7 @@ namespace CAConstants { constexpr uint32_t maxTuples() { return maxNumberOfTuples(); } // types - using hindex_type = uint16_t; // FIXME from siPixelRecHitsHeterogeneousProduct + using hindex_type = uint32_t; // FIXME from siPixelRecHitsHeterogeneousProduct using tindex_type = uint16_t; // for tuples #ifndef ONLY_PHICUT diff --git a/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernels.cc b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernels.cc index 4d4791b87ad3b..1646cb503ff81 100644 --- a/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernels.cc +++ b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernels.cc @@ -134,6 +134,7 @@ void CAHitNtupletGeneratorKernelsCPU::launchKernels(HitsOnCPU const &hh, TkSoA * if (m_params.doStats_) { kernel_checkOverflows(tuples_d, device_tupleMultiplicity_.get(), + device_hitToTuple_.get(), device_hitTuple_apc_, device_theCells_.get(), device_nCells_, diff --git a/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernels.cu b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernels.cu index 8a213eee2f579..a8dac7992f4fa 100644 --- a/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernels.cu +++ b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernels.cu @@ -125,21 +125,6 @@ void CAHitNtupletGeneratorKernelsGPU::launchKernels(HitsOnCPU const &hh, TkSoA * cudaCheck(cudaGetLastError()); } - if (m_params.doStats_) { - numberOfBlocks = (std::max(nhits, m_params.maxNumberOfDoublets_) + blockSize - 1) / blockSize; - kernel_checkOverflows<<>>(tuples_d, - device_tupleMultiplicity_.get(), - device_hitTuple_apc_, - device_theCells_.get(), - device_nCells_, - device_theCellNeighbors_.get(), - device_theCellTracks_.get(), - device_isOuterHitOfCell_.get(), - nhits, - m_params.maxNumberOfDoublets_, - counters_); - cudaCheck(cudaGetLastError()); - } #ifdef GPU_DEBUG cudaDeviceSynchronize(); cudaCheck(cudaGetLastError()); @@ -278,6 +263,24 @@ void CAHitNtupletGeneratorKernelsGPU::classifyTuples(HitsOnCPU const &hh, TkSoA cudaCheck(cudaGetLastError()); } + if (m_params.doStats_) { + auto nhits = hh.nHits(); + numberOfBlocks = (std::max(nhits, m_params.maxNumberOfDoublets_) + blockSize - 1) / blockSize; + kernel_checkOverflows<<>>(tuples_d, + device_tupleMultiplicity_.get(), + device_hitToTuple_.get(), + device_hitTuple_apc_, + device_theCells_.get(), + device_nCells_, + device_theCellNeighbors_.get(), + device_theCellTracks_.get(), + device_isOuterHitOfCell_.get(), + nhits, + m_params.maxNumberOfDoublets_, + counters_); + cudaCheck(cudaGetLastError()); + } + if (m_params.doStats_) { // counters (add flag???) numberOfBlocks = (HitToTuple::capacity() + blockSize - 1) / blockSize; diff --git a/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernelsImpl.h b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernelsImpl.h index 691395887dddb..3a935efbe2b4b 100644 --- a/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernelsImpl.h +++ b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernelsImpl.h @@ -30,7 +30,8 @@ using TkSoA = pixelTrack::TrackSoA; using HitContainer = pixelTrack::HitContainer; __global__ void kernel_checkOverflows(HitContainer const *foundNtuplets, - CAConstants::TupleMultiplicity *tupleMultiplicity, + CAConstants::TupleMultiplicity const *tupleMultiplicity, + CAHitNtupletGeneratorKernelsGPU::HitToTuple const *hitToTuple, cms::cuda::AtomicPairCounter *apc, GPUCACell const *__restrict__ cells, uint32_t const *__restrict__ nCells, @@ -95,7 +96,7 @@ __global__ void kernel_checkOverflows(HitContainer const *foundNtuplets, atomicAdd(&c.nKilledCells, 1); if (0 == thisCell.theUsed) atomicAdd(&c.nEmptyCells, 1); - if (thisCell.tracks().empty()) + if (0 == hitToTuple->size(thisCell.get_inner_hit_id()) && 0 == hitToTuple->size(thisCell.get_outer_hit_id())) atomicAdd(&c.nZeroTrackCells, 1); }