diff --git a/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletHeterogeneousEDProducer.cc b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletHeterogeneousEDProducer.cc index 52738a9e64e19..47417690d54b4 100644 --- a/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletHeterogeneousEDProducer.cc +++ b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletHeterogeneousEDProducer.cc @@ -54,8 +54,8 @@ class CAHitNtupletHeterogeneousEDProducer private: edm::EDGetTokenT> regionToken_; - edm::EDGetTokenT tGpuHits; - edm::EDGetTokenT tCpuHits; + edm::EDGetTokenT gpuHits_; + edm::EDGetTokenT cpuHits_; edm::RunningAverage localRA_; CAHitQuadrupletGeneratorGPU GPUGenerator_; @@ -63,8 +63,9 @@ class CAHitNtupletHeterogeneousEDProducer bool emptyRegions = false; std::unique_ptr seedingHitSets_; - bool enableTransfer_; - bool enableConversion_; + const bool doRiemannFit_; + const bool enableConversion_; + const bool enableTransfer_; }; CAHitNtupletHeterogeneousEDProducer::CAHitNtupletHeterogeneousEDProducer( @@ -72,13 +73,13 @@ CAHitNtupletHeterogeneousEDProducer::CAHitNtupletHeterogeneousEDProducer( : HeterogeneousEDProducer(iConfig), regionToken_(consumes>( iConfig.getParameter("trackingRegions"))), - tGpuHits(consumesHeterogeneous(iConfig.getParameter("heterogeneousPixelRecHitSrc"))), - tCpuHits(consumes(iConfig.getParameter("heterogeneousPixelRecHitSrc"))), - GPUGenerator_(iConfig, consumesCollector()) { - - enableConversion_ = iConfig.getParameter("gpuEnableConversion"); - enableTransfer_ = enableConversion_ || iConfig.getParameter("gpuEnableTransfer"); - + gpuHits_(consumesHeterogeneous(iConfig.getParameter("heterogeneousPixelRecHitSrc"))), + cpuHits_(consumes(iConfig.getParameter("heterogeneousPixelRecHitSrc"))), + GPUGenerator_(iConfig, consumesCollector()), + doRiemannFit_(iConfig.getParameter("doRiemannFit")), + enableConversion_(iConfig.getParameter("gpuEnableConversion")), + enableTransfer_(enableConversion_ || iConfig.getParameter("gpuEnableTransfer")) +{ produces(); } @@ -88,9 +89,8 @@ void CAHitNtupletHeterogeneousEDProducer::fillDescriptions( desc.add("doublets", edm::InputTag(""))->setComment("Not really used, kept to keep the python parameters"); desc.add("trackingRegions", edm::InputTag("globalTrackingRegionFromBeamSpot")); - desc.add("heterogeneousPixelRecHitSrc", edm::InputTag("siPixelRecHitsPreSplitting")); - + desc.add("doRiemannFit", true); desc.add("gpuEnableTransfer", true); desc.add("gpuEnableConversion", true); @@ -123,13 +123,10 @@ void CAHitNtupletHeterogeneousEDProducer::acquireGPUCuda( const TrackingRegion ®ion = regions[0]; - edm::Handle gh; - iEvent.getByToken(tGpuHits, gh); + iEvent.getByToken(gpuHits_, gh); auto const & gHits = *gh; -// auto nhits = gHits.nHits; - // move inside hitNtuplets??? GPUGenerator_.buildDoublets(gHits,cudaStream.id()); seedingHitSets_->reserve(regions.size(), localRA_.upper()); @@ -139,8 +136,7 @@ void CAHitNtupletHeterogeneousEDProducer::acquireGPUCuda( << "Creating ntuplets for " << regions.size() << " regions"; - GPUGenerator_.hitNtuplets(region, gHits, iSetup, enableTransfer_, cudaStream.id()); - + GPUGenerator_.hitNtuplets(region, gHits, iSetup, doRiemannFit_, enableTransfer_, cudaStream.id()); } void CAHitNtupletHeterogeneousEDProducer::produceGPUCuda( @@ -155,7 +151,7 @@ void CAHitNtupletHeterogeneousEDProducer::produceGPUCuda( const auto ®ions = *hregions; edm::Handle gh; - iEvent.getByToken(tCpuHits, gh); + iEvent.getByToken(cpuHits_, gh); auto const & rechits = *gh; std::vector ntuplets(regions.size()); diff --git a/RecoPixelVertexing/PixelTriplets/plugins/CAHitQuadrupletGeneratorGPU.cc b/RecoPixelVertexing/PixelTriplets/plugins/CAHitQuadrupletGeneratorGPU.cc index 708e7ec92a78b..add00788a1c07 100644 --- a/RecoPixelVertexing/PixelTriplets/plugins/CAHitQuadrupletGeneratorGPU.cc +++ b/RecoPixelVertexing/PixelTriplets/plugins/CAHitQuadrupletGeneratorGPU.cc @@ -85,12 +85,13 @@ void CAHitQuadrupletGeneratorGPU::hitNtuplets( TrackingRegion const& region, HitsOnCPU const& hh, edm::EventSetup const& es, + bool doRiemannFit, bool transferToCPU, cudaStream_t cudaStream) { hitsOnCPU = &hh; int index = 0; - launchKernels(region, index, hh, transferToCPU, cudaStream); + launchKernels(region, index, hh, doRiemannFit, transferToCPU, cudaStream); } void CAHitQuadrupletGeneratorGPU::fillResults( diff --git a/RecoPixelVertexing/PixelTriplets/plugins/CAHitQuadrupletGeneratorGPU.cu b/RecoPixelVertexing/PixelTriplets/plugins/CAHitQuadrupletGeneratorGPU.cu index e6a32d17d04d9..c6cd2419e5359 100644 --- a/RecoPixelVertexing/PixelTriplets/plugins/CAHitQuadrupletGeneratorGPU.cu +++ b/RecoPixelVertexing/PixelTriplets/plugins/CAHitQuadrupletGeneratorGPU.cu @@ -321,6 +321,7 @@ void CAHitQuadrupletGeneratorGPU::allocateOnGPU() void CAHitQuadrupletGeneratorGPU::launchKernels(const TrackingRegion ®ion, int regionIndex, HitsOnCPU const & hh, + bool doRiemannFit, bool transferToCPU, cudaStream_t cudaStream) { @@ -361,26 +362,28 @@ void CAHitQuadrupletGeneratorGPU::launchKernels(const TrackingRegion ®ion, // kernel_print_found_ntuplets<<<1, 1, 0, cudaStream>>>(d_foundNtupletsVec_[regionIndex], 10); - kernelFastFitAllHits<<>>( - d_foundNtupletsVec_[regionIndex], hh.gpu_d, 4, bField_, helix_fit_resultsGPU_, - hitsGPU_, hits_covGPU_, circle_fit_resultsGPU_, fast_fit_resultsGPU_, - line_fit_resultsGPU_); - cudaCheck(cudaGetLastError()); - - blockSize = 256; - numberOfBlocks = (maxNumberOfQuadruplets_ + blockSize - 1) / blockSize; - - kernelCircleFitAllHits<<>>( - d_foundNtupletsVec_[regionIndex], 4, bField_, helix_fit_resultsGPU_, - hitsGPU_, hits_covGPU_, circle_fit_resultsGPU_, fast_fit_resultsGPU_, - line_fit_resultsGPU_); - cudaCheck(cudaGetLastError()); - - kernelLineFitAllHits<<>>( - d_foundNtupletsVec_[regionIndex], bField_, helix_fit_resultsGPU_, - hitsGPU_, hits_covGPU_, circle_fit_resultsGPU_, fast_fit_resultsGPU_, - line_fit_resultsGPU_); - cudaCheck(cudaGetLastError()); + if (doRiemannFit) { + kernelFastFitAllHits<<>>( + d_foundNtupletsVec_[regionIndex], hh.gpu_d, 4, bField_, helix_fit_resultsGPU_, + hitsGPU_, hits_covGPU_, circle_fit_resultsGPU_, fast_fit_resultsGPU_, + line_fit_resultsGPU_); + cudaCheck(cudaGetLastError()); + + blockSize = 256; + numberOfBlocks = (maxNumberOfQuadruplets_ + blockSize - 1) / blockSize; + + kernelCircleFitAllHits<<>>( + d_foundNtupletsVec_[regionIndex], 4, bField_, helix_fit_resultsGPU_, + hitsGPU_, hits_covGPU_, circle_fit_resultsGPU_, fast_fit_resultsGPU_, + line_fit_resultsGPU_); + cudaCheck(cudaGetLastError()); + + kernelLineFitAllHits<<>>( + d_foundNtupletsVec_[regionIndex], bField_, helix_fit_resultsGPU_, + hitsGPU_, hits_covGPU_, circle_fit_resultsGPU_, fast_fit_resultsGPU_, + line_fit_resultsGPU_); + cudaCheck(cudaGetLastError()); + } if (transferToCPU) { cudaCheck(cudaMemcpyAsync(h_foundNtupletsVec_[regionIndex], d_foundNtupletsVec_[regionIndex], diff --git a/RecoPixelVertexing/PixelTriplets/plugins/CAHitQuadrupletGeneratorGPU.h b/RecoPixelVertexing/PixelTriplets/plugins/CAHitQuadrupletGeneratorGPU.h index fdceec9225d76..2feb95dca69a8 100644 --- a/RecoPixelVertexing/PixelTriplets/plugins/CAHitQuadrupletGeneratorGPU.h +++ b/RecoPixelVertexing/PixelTriplets/plugins/CAHitQuadrupletGeneratorGPU.h @@ -61,6 +61,7 @@ class CAHitQuadrupletGeneratorGPU { void hitNtuplets(const TrackingRegion ®ion, HitsOnCPU const & hh, const edm::EventSetup& es, + bool doRiemannFit, bool transferToCPU, cudaStream_t stream); void cleanup(cudaStream_t stream); @@ -139,10 +140,9 @@ class CAHitQuadrupletGeneratorGPU { const bool enabled_; }; - void launchKernels(const TrackingRegion &, int, HitsOnCPU const & hh, bool transferToCPU, cudaStream_t); + void launchKernels(const TrackingRegion &, int, HitsOnCPU const & hh, bool doRiemannFit, bool transferToCPU, cudaStream_t); std::vector> fetchKernelResult(int); - float bField_; const float extraHitRPhitolerance;