From 9e12c44babd1973492a7e430e278a238c7a928de Mon Sep 17 00:00:00 2001 From: Sushil Dubey Date: Wed, 15 Nov 2017 15:23:03 +0100 Subject: [PATCH 001/149] Implementation of the pixel raw to digi algorithm in CUDA --- EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml b/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml index f92aa68373927..c8cd3e9d29e08 100644 --- a/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml +++ b/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml @@ -1,4 +1,5 @@ - - - + + + + From acdaa894708aa920065d686d9252d2053c90efeb Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Thu, 16 Nov 2017 18:19:13 +0100 Subject: [PATCH 002/149] Cleanup the CUDA code, and recover the CPU code - remove log files - rename CUDA plugins to avoid conflict with standard ones - recover non-GPU code --- EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml b/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml index c8cd3e9d29e08..c5d495b7f4b9f 100644 --- a/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml +++ b/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml @@ -1,5 +1,8 @@ - - + + + + + From 835416c92bc24310d4e263a59b96c39c17d88a40 Mon Sep 17 00:00:00 2001 From: Cesare Calabria Date: Sun, 26 Nov 2017 15:09:34 +0100 Subject: [PATCH 003/149] Better integration in CMSSW, validation, cleanup and fixes - fill siPixel digi collection - other changes - add DQM validation - fix column binning --- .../python/SiPixelRawToDigi_cfi.py | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/EventFilter/SiPixelRawToDigi/python/SiPixelRawToDigi_cfi.py b/EventFilter/SiPixelRawToDigi/python/SiPixelRawToDigi_cfi.py index 12ff657cefd8e..feb5089785f1d 100644 --- a/EventFilter/SiPixelRawToDigi/python/SiPixelRawToDigi_cfi.py +++ b/EventFilter/SiPixelRawToDigi/python/SiPixelRawToDigi_cfi.py @@ -1,7 +1,42 @@ import FWCore.ParameterSet.Config as cms import EventFilter.SiPixelRawToDigi.siPixelRawToDigi_cfi +import EventFilter.SiPixelRawToDigi.siPixelRawToDigiGPU_cfi siPixelDigis = EventFilter.SiPixelRawToDigi.siPixelRawToDigi_cfi.siPixelRawToDigi.clone() +siPixelDigis.Timing = cms.untracked.bool(False) +siPixelDigis.IncludeErrors = cms.bool(True) +siPixelDigis.InputLabel = cms.InputTag("siPixelRawData") +siPixelDigis.UseQualityInfo = cms.bool(False) +## ErrorList: list of error codes used by tracking to invalidate modules +siPixelDigis.ErrorList = cms.vint32(29) +## UserErrorList: list of error codes used by Pixel experts for investigation +siPixelDigis.UserErrorList = cms.vint32(40) +## Use pilot blades +siPixelDigis.UsePilotBlade = cms.bool(False) +## Use phase1 +siPixelDigis.UsePhase1 = cms.bool(False) +## Empty Regions PSet means complete unpacking +siPixelDigis.Regions = cms.PSet( ) +siPixelDigis.CablingMapLabel = cms.string("") + +siPixelDigisGPU = EventFilter.SiPixelRawToDigi.siPixelRawToDigiGPU_cfi.siPixelRawToDigiGPU.clone() +siPixelDigisGPU.Timing = cms.untracked.bool(False) +siPixelDigisGPU.IncludeErrors = cms.bool(False) +siPixelDigisGPU.InputLabel = cms.InputTag("rawDataCollector") +siPixelDigisGPU.UseQualityInfo = cms.bool(False) +## ErrorList: list of error codes used by tracking to invalidate modules +siPixelDigisGPU.ErrorList = cms.vint32(29) +## UserErrorList: list of error codes used by Pixel experts for investigation +siPixelDigisGPU.UserErrorList = cms.vint32(40) +## Use pilot blades +siPixelDigisGPU.UsePilotBlade = cms.bool(False) +## Use phase1 +siPixelDigisGPU.UsePhase1 = cms.bool(False) +## Empty Regions PSet means complete unpacking +siPixelDigisGPU.Regions = cms.PSet( ) +siPixelDigisGPU.CablingMapLabel = cms.string("") from Configuration.Eras.Modifier_phase1Pixel_cff import phase1Pixel phase1Pixel.toModify(siPixelDigis, UsePhase1=True) +phase1Pixel.toModify(siPixelDigisGPU, UsePhase1=True) + From 41db525911cc593f4cac0bdabb095c4104a47cc9 Mon Sep 17 00:00:00 2001 From: Sushil Dubey Date: Fri, 1 Dec 2017 13:01:16 +0100 Subject: [PATCH 004/149] Direct access to cabling map for GPU RawToDigi --- EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml b/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml index c5d495b7f4b9f..46e82326f10af 100644 --- a/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml +++ b/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml @@ -2,7 +2,7 @@ - + From 39e1459cd89c8a65dbd817d3c7238a9344472d01 Mon Sep 17 00:00:00 2001 From: Cesare Calabria Date: Thu, 7 Dec 2017 16:42:12 +0100 Subject: [PATCH 005/149] Unpack errors, bad ROCs, improve validation, fixes and cleanup - changes for validation - add and unpack errors - add module to unpacking and bad rocs - map fixes --- EventFilter/SiPixelRawToDigi/python/SiPixelRawToDigi_cfi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/EventFilter/SiPixelRawToDigi/python/SiPixelRawToDigi_cfi.py b/EventFilter/SiPixelRawToDigi/python/SiPixelRawToDigi_cfi.py index feb5089785f1d..aabe584943683 100644 --- a/EventFilter/SiPixelRawToDigi/python/SiPixelRawToDigi_cfi.py +++ b/EventFilter/SiPixelRawToDigi/python/SiPixelRawToDigi_cfi.py @@ -21,7 +21,7 @@ siPixelDigisGPU = EventFilter.SiPixelRawToDigi.siPixelRawToDigiGPU_cfi.siPixelRawToDigiGPU.clone() siPixelDigisGPU.Timing = cms.untracked.bool(False) -siPixelDigisGPU.IncludeErrors = cms.bool(False) +siPixelDigisGPU.IncludeErrors = cms.bool(True) siPixelDigisGPU.InputLabel = cms.InputTag("rawDataCollector") siPixelDigisGPU.UseQualityInfo = cms.bool(False) ## ErrorList: list of error codes used by tracking to invalidate modules From 87e0c99ead7d1a4895d485e41ccc5187585067ad Mon Sep 17 00:00:00 2001 From: Felice Date: Wed, 24 Jan 2018 19:52:20 +0100 Subject: [PATCH 006/149] Set CUDA optimization flags --- EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml b/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml index 46e82326f10af..dbdef7d428365 100644 --- a/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml +++ b/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml @@ -5,4 +5,5 @@ + From 27ad793d95ab3fd7c558dc842c17a2079b91bb3f Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Fri, 2 Feb 2018 20:08:20 +0100 Subject: [PATCH 007/149] Various fixes to GPU implementation o the pixel unpacker - avoid hanging; - reproduce CPU unpacker resultsalso for data. --- EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml b/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml index dbdef7d428365..8a7d5cf5b1942 100644 --- a/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml +++ b/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml @@ -5,5 +5,5 @@ - + From 8813282b95cf85e33543d6f203b1d95294e9caed Mon Sep 17 00:00:00 2001 From: Vincenzo Innocente Date: Thu, 15 Feb 2018 16:25:59 +0100 Subject: [PATCH 008/149] Digital calibrator, concurrent clusterizer (optimized), CPE and RecHIts GPU implementation of the pixel clusterizer, CPU and RecHit producer --- .../interface/SiPixelGainForHLTonGPU.h | 69 +++ .../SiPixelRawToDigi/plugins/BuildFile.xml | 1 + .../interface/phase1PixelTopology.h | 96 +-- .../test/phase1PixelTopology_t.cpp | 244 ++++---- .../plugins/gpuCalibPixel.h | 125 ++++ .../plugins/gpuClustering.h | 168 ++++++ .../SiPixelClusterizer/test/BuildFile.xml | 67 ++- RecoLocalTracker/SiPixelRecHits/BuildFile.xml | 26 +- .../SiPixelRecHits/interface/PixelCPEBase.h | 450 +++++++------- .../SiPixelRecHits/interface/PixelCPEFast.h | 112 ++++ .../SiPixelRecHits/interface/pixelCPEforGPU.h | 206 +++++++ .../SiPixelRecHits/plugins/BuildFile.xml | 14 +- .../plugins/PixelCPEFastESProducer.cc | 103 ++++ .../SiPixelRecHits/plugins/gpuPixelRecHits.h | 130 ++++ .../python/PixelCPEESProducers_cff.py | 17 +- .../SiPixelRecHits/src/PixelCPEFast.cc | 559 ++++++++++++++++++ 16 files changed, 1944 insertions(+), 443 deletions(-) create mode 100644 CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h create mode 100644 RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h create mode 100644 RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h create mode 100644 RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h create mode 100644 RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h create mode 100644 RecoLocalTracker/SiPixelRecHits/plugins/PixelCPEFastESProducer.cc create mode 100644 RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h create mode 100644 RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc diff --git a/CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h b/CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h new file mode 100644 index 0000000000000..12faeaaa9a845 --- /dev/null +++ b/CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h @@ -0,0 +1,69 @@ +#pragma once + +#include +#include +#include +#include + +struct SiPixelGainForHLTonGPU_DecodingStructure{ + uint8_t gain; + uint8_t ped; +}; + + +// copy of SiPixelGainCalibrationForHLT +class SiPixelGainForHLTonGPU { + + public: + + using DecodingStructure = SiPixelGainForHLTonGPU_DecodingStructure; + + using Range = std::pair; + + + inline __host__ __device__ + std::pair getPedAndGain(uint32_t moduleInd, int col, int row, bool& isDeadColumn, bool& isNoisyColumn ) const { + + + auto range = rangeAndCols[moduleInd].first; + auto nCols = rangeAndCols[moduleInd].second; + + // determine what averaged data block we are in (there should be 1 or 2 of these depending on if plaquette is 1 by X or 2 by X + unsigned int lengthOfColumnData = (range.second-range.first)/nCols; + unsigned int lengthOfAveragedDataInEachColumn = 2; // we always only have two values per column averaged block + unsigned int numberOfDataBlocksToSkip = row / numberOfRowsAveragedOver_; + + + auto offset = range.first + col*lengthOfColumnData + lengthOfAveragedDataInEachColumn*numberOfDataBlocksToSkip; + + assert(offset rangeAndCols[2000]; + + float minPed_, maxPed_, minGain_, maxGain_; + + float pedPrecision, gainPrecision; + + unsigned int numberOfRowsAveragedOver_; // this is 80!!!! + unsigned int nBinsToUseForEncoding_; + unsigned int deadFlag_; + unsigned int noisyFlag_; +}; + diff --git a/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml b/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml index 8a7d5cf5b1942..1803bd76ef4ec 100644 --- a/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml +++ b/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml @@ -4,6 +4,7 @@ + diff --git a/Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h b/Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h index cefdbe4b3296a..ecc5889a28481 100644 --- a/Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h +++ b/Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h @@ -1,69 +1,79 @@ -#ifndef Geometry_TrackerGeometryBuilder_phase1PixelTopology_h -#define Geometry_TrackerGeometryBuilder_phase1PixelTopology_h +#pragma once -#include +#include namespace phase1PixelTopology { - constexpr uint16_t numRowsInRoc = 80; - constexpr uint16_t numColsInRoc = 52; - constexpr uint16_t lastRowInRoc = numRowsInRoc - 1; - constexpr uint16_t lastColInRoc = numColsInRoc - 1; + constexpr uint16_t numRowsInRoc = 80; + constexpr uint16_t numColsInRoc = 52; + constexpr uint16_t lastRowInRoc = 79; + constexpr uint16_t lastColInRoc = 51; - constexpr uint16_t numRowsInModule = 2 * numRowsInRoc; - constexpr uint16_t numColsInModule = 8 * numColsInRoc; - constexpr uint16_t lastRowInModule = numRowsInModule - 1; - constexpr uint16_t lastColInModule = numColsInModule - 1; + constexpr uint16_t numRowsInModule = 2*80; + constexpr uint16_t numColsInModule = 8*52; + constexpr uint16_t lastRowInModule = 2*80-1; + constexpr uint16_t lastColInModule = 8*52-1; constexpr int16_t xOffset = -81; - constexpr int16_t yOffset = -54 * 4; + constexpr int16_t yOffset = -54*4; + + + constexpr uint32_t numPixsInModule = uint32_t(numRowsInModule)* uint32_t(numColsInModule); - constexpr uint32_t numPixsInModule = uint32_t(numRowsInModule) * uint32_t(numColsInModule); // this is for the ROC n<512 (upgrade 1024) - constexpr inline uint16_t divu52(uint16_t n) { - n = n >> 2; - uint16_t q = (n >> 1) + (n >> 4); - q = q + (q >> 4) + (q >> 5); - q = q >> 3; - uint16_t r = n - q * 13; + constexpr inline + uint16_t divu52(uint16_t n) { + n = n>>2; + uint16_t q = (n>>1) + (n>>4); + q = q + (q>>4) + (q>>5); q = q >> 3; + uint16_t r = n - q*13; return q + ((r + 3) >> 4); + // return q + (r > 12); } - constexpr inline bool isEdgeX(uint16_t px) { return (px == 0) | (px == lastRowInModule); } - constexpr inline bool isEdgeY(uint16_t py) { return (py == 0) | (py == lastColInModule); } + constexpr inline + bool isEdgeX(uint16_t px) { return (px==0) | (px==lastRowInModule);} + constexpr inline + bool isEdgeY(uint16_t py) { return (py==0) | (py==lastColInModule);} - constexpr inline uint16_t toRocX(uint16_t px) { return (px < numRowsInRoc) ? px : px - numRowsInRoc; } - constexpr inline uint16_t toRocY(uint16_t py) { + + constexpr inline + uint16_t toRocX(uint16_t px) { return (px lastRowInRoc) - shift += 1; - if (px > numRowsInRoc) - shift += 1; - return px + shift; + if (px>lastRowInRoc) shift+=1; + if (px>numRowsInRoc) shift+=1; + return px+shift; } - constexpr inline uint16_t localY(uint16_t py) { + constexpr inline + uint16_t localY(uint16_t py) { auto roc = divu52(py); - auto shift = 2 * roc; - auto yInRoc = py - 52 * roc; - if (yInRoc > 0) - shift += 1; - return py + shift; + auto shift = 2*roc; + auto yInRoc = py - 52*roc; + if (yInRoc>0) shift+=1; + return py+shift; } + +} -} // namespace phase1PixelTopology - -#endif // Geometry_TrackerGeometryBuilder_phase1PixelTopology_h diff --git a/Geometry/TrackerGeometryBuilder/test/phase1PixelTopology_t.cpp b/Geometry/TrackerGeometryBuilder/test/phase1PixelTopology_t.cpp index 9a00efbff9a9a..293febbbc7143 100644 --- a/Geometry/TrackerGeometryBuilder/test/phase1PixelTopology_t.cpp +++ b/Geometry/TrackerGeometryBuilder/test/phase1PixelTopology_t.cpp @@ -1,146 +1,146 @@ -#include -#include -#include - #include "Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h" +#include +#include namespace { // original code from CMSSW_4_4 - std::tuple localXori(int mpx) { - const float m_pitchx = 1.f; - int binoffx = int(mpx); // truncate to int - float local_pitchx = m_pitchx; // defaultpitch - - if (binoffx > 80) { // ROC 1 - handles x on edge cluster - binoffx = binoffx + 2; - } else if (binoffx == 80) { // ROC 1 - binoffx = binoffx + 1; - local_pitchx = 2 * m_pitchx; - - } else if (binoffx == 79) { // ROC 0 - binoffx = binoffx + 0; - local_pitchx = 2 * m_pitchx; - } else if (binoffx >= 0) { // ROC 0 - binoffx = binoffx + 0; - - } else { // too small - assert("binoffx too small" == 0); - } - - return std::make_tuple(binoffx, local_pitchx > m_pitchx); + std::tuple localXori(int mpx) { + const float m_pitchx=1.f; + int binoffx = int(mpx); // truncate to int + float local_pitchx = m_pitchx; // defaultpitch + + if (binoffx>80) { // ROC 1 - handles x on edge cluster + binoffx=binoffx+2; + } else if (binoffx==80) { // ROC 1 + binoffx=binoffx+1; + local_pitchx = 2 * m_pitchx; + + } else if (binoffx==79) { // ROC 0 + binoffx=binoffx+0; + local_pitchx = 2 * m_pitchx; + } else if (binoffx>=0) { // ROC 0 + binoffx=binoffx+0; + + } else { // too small + assert("binoffx too small"==0); + } + + return std::make_tuple(binoffx,local_pitchx>m_pitchx); } - std::tuple localYori(int mpy) { - const float m_pitchy = 1.f; - int binoffy = int(mpy); // truncate to int - float local_pitchy = m_pitchy; // defaultpitch - - if (binoffy > 416) { // ROC 8, not real ROC - binoffy = binoffy + 17; - } else if (binoffy == 416) { // ROC 8 - binoffy = binoffy + 16; - local_pitchy = 2 * m_pitchy; - - } else if (binoffy == 415) { // ROC 7, last big pixel - binoffy = binoffy + 15; - local_pitchy = 2 * m_pitchy; - } else if (binoffy > 364) { // ROC 7 - binoffy = binoffy + 15; - } else if (binoffy == 364) { // ROC 7 - binoffy = binoffy + 14; - local_pitchy = 2 * m_pitchy; - - } else if (binoffy == 363) { // ROC 6 - binoffy = binoffy + 13; - local_pitchy = 2 * m_pitchy; - } else if (binoffy > 312) { // ROC 6 - binoffy = binoffy + 13; - } else if (binoffy == 312) { // ROC 6 - binoffy = binoffy + 12; - local_pitchy = 2 * m_pitchy; - - } else if (binoffy == 311) { // ROC 5 - binoffy = binoffy + 11; - local_pitchy = 2 * m_pitchy; - } else if (binoffy > 260) { // ROC 5 - binoffy = binoffy + 11; - } else if (binoffy == 260) { // ROC 5 - binoffy = binoffy + 10; - local_pitchy = 2 * m_pitchy; - } else if (binoffy == 259) { // ROC 4 - binoffy = binoffy + 9; - local_pitchy = 2 * m_pitchy; - } else if (binoffy > 208) { // ROC 4 - binoffy = binoffy + 9; - } else if (binoffy == 208) { // ROC 4 - binoffy = binoffy + 8; - local_pitchy = 2 * m_pitchy; + std::tuple localYori(int mpy) { + const float m_pitchy=1.f; + int binoffy = int(mpy); // truncate to int + float local_pitchy = m_pitchy; // defaultpitch + + if (binoffy>416) { // ROC 8, not real ROC + binoffy=binoffy+17; + } else if (binoffy==416) { // ROC 8 + binoffy=binoffy+16; + local_pitchy = 2 * m_pitchy; + + } else if (binoffy==415) { // ROC 7, last big pixel + binoffy=binoffy+15; + local_pitchy = 2 * m_pitchy; + } else if (binoffy>364) { // ROC 7 + binoffy=binoffy+15; + } else if (binoffy==364) { // ROC 7 + binoffy=binoffy+14; + local_pitchy = 2 * m_pitchy; + + } else if (binoffy==363) { // ROC 6 + binoffy=binoffy+13; + local_pitchy = 2 * m_pitchy; + } else if (binoffy>312) { // ROC 6 + binoffy=binoffy+13; + } else if (binoffy==312) { // ROC 6 + binoffy=binoffy+12; + local_pitchy = 2 * m_pitchy; + + } else if (binoffy==311) { // ROC 5 + binoffy=binoffy+11; + local_pitchy = 2 * m_pitchy; + } else if (binoffy>260) { // ROC 5 + binoffy=binoffy+11; + } else if (binoffy==260) { // ROC 5 + binoffy=binoffy+10; + local_pitchy = 2 * m_pitchy; + + } else if (binoffy==259) { // ROC 4 + binoffy=binoffy+9; + local_pitchy = 2 * m_pitchy; + } else if (binoffy>208) { // ROC 4 + binoffy=binoffy+9; + } else if (binoffy==208) { // ROC 4 + binoffy=binoffy+8; + local_pitchy = 2 * m_pitchy; + + } else if (binoffy==207) { // ROC 3 + binoffy=binoffy+7; + local_pitchy = 2 * m_pitchy; + } else if (binoffy>156) { // ROC 3 + binoffy=binoffy+7; + } else if (binoffy==156) { // ROC 3 + binoffy=binoffy+6; + local_pitchy = 2 * m_pitchy; + + } else if (binoffy==155) { // ROC 2 + binoffy=binoffy+5; + local_pitchy = 2 * m_pitchy; + } else if (binoffy>104) { // ROC 2 + binoffy=binoffy+5; + } else if (binoffy==104) { // ROC 2 + binoffy=binoffy+4; + local_pitchy = 2 * m_pitchy; + + } else if (binoffy==103) { // ROC 1 + binoffy=binoffy+3; + local_pitchy = 2 * m_pitchy; + } else if (binoffy>52) { // ROC 1 + binoffy=binoffy+3; + } else if (binoffy==52) { // ROC 1 + binoffy=binoffy+2; + local_pitchy = 2 * m_pitchy; + + } else if (binoffy==51) { // ROC 0 + binoffy=binoffy+1; + local_pitchy = 2 * m_pitchy; + } else if (binoffy>0) { // ROC 0 + binoffy=binoffy+1; + } else if (binoffy==0) { // ROC 0 + binoffy=binoffy+0; + local_pitchy = 2 * m_pitchy; + } else { + assert("binoffy too small"==0); + } + + return std::make_tuple(binoffy,local_pitchy>m_pitchy); + } - } else if (binoffy == 207) { // ROC 3 - binoffy = binoffy + 7; - local_pitchy = 2 * m_pitchy; - } else if (binoffy > 156) { // ROC 3 - binoffy = binoffy + 7; - } else if (binoffy == 156) { // ROC 3 - binoffy = binoffy + 6; - local_pitchy = 2 * m_pitchy; - - } else if (binoffy == 155) { // ROC 2 - binoffy = binoffy + 5; - local_pitchy = 2 * m_pitchy; - } else if (binoffy > 104) { // ROC 2 - binoffy = binoffy + 5; - } else if (binoffy == 104) { // ROC 2 - binoffy = binoffy + 4; - local_pitchy = 2 * m_pitchy; - - } else if (binoffy == 103) { // ROC 1 - binoffy = binoffy + 3; - local_pitchy = 2 * m_pitchy; - } else if (binoffy > 52) { // ROC 1 - binoffy = binoffy + 3; - } else if (binoffy == 52) { // ROC 1 - binoffy = binoffy + 2; - local_pitchy = 2 * m_pitchy; - - } else if (binoffy == 51) { // ROC 0 - binoffy = binoffy + 1; - local_pitchy = 2 * m_pitchy; - } else if (binoffy > 0) { // ROC 0 - binoffy = binoffy + 1; - } else if (binoffy == 0) { // ROC 0 - binoffy = binoffy + 0; - local_pitchy = 2 * m_pitchy; - } else { - assert("binoffy too small" == 0); - } - - return std::make_tuple(binoffy, local_pitchy > m_pitchy); - } - -} // namespace +} +#include int main() { - for (uint16_t ix = 0; ix < 80 * 2; ++ix) { + + for (uint16_t ix=0; ix<80*2; ++ix) { auto ori = localXori(ix); auto xl = phase1PixelTopology::localX(ix); auto bp = phase1PixelTopology::isBigPixX(ix); - if (std::get<0>(ori) != xl) - std::cout << "Error " << std::get<0>(ori) << "!=" << xl << std::endl; - assert(std::get<1>(ori) == bp); + if (std::get<0>(ori)!=xl) std::cout << "Error " << std::get<0>(ori) << "!=" << xl << std::endl; + assert(std::get<1>(ori)==bp); } - for (uint16_t iy = 0; iy < 52 * 8; ++iy) { + for (uint16_t iy=0; iy<52*8; ++iy) { auto ori = localYori(iy); auto yl = phase1PixelTopology::localY(iy); auto bp = phase1PixelTopology::isBigPixY(iy); - if (std::get<0>(ori) != yl) - std::cout << "Error " << std::get<0>(ori) << "!=" << yl << std::endl; - assert(std::get<1>(ori) == bp); + if (std::get<0>(ori)!=yl) std::cout << "Error " << std::get<0>(ori) << "!=" << yl << std::endl; + assert(std::get<1>(ori)==bp); } + return 0; } diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h new file mode 100644 index 0000000000000..fd2b28a4719e8 --- /dev/null +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h @@ -0,0 +1,125 @@ +#pragma once + +#include "CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h" + +#include +#include +#include + +namespace gpuCalibPixel { + + constexpr uint16_t InvId=9999; // must be > MaxNumModules + + constexpr float VCaltoElectronGain = 47; // L2-4: 47 +- 4.7 + constexpr float VCaltoElectronGain_L1 = 50; // L1: 49.6 +- 2.6 + constexpr float VCaltoElectronOffset = -60; // L2-4: -60 +- 130 + constexpr float VCaltoElectronOffset_L1 = -670; // L1: -670 +- 220 + + + __global__ void calibDigis(uint16_t * id, + uint16_t const * x, + uint16_t const * y, + uint16_t * adc, + SiPixelGainForHLTonGPU const * ped, + int numElements + ) +{ + + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i >= numElements) return; + if (InvId==id[i]) return; + + float conversionFactor = id[i]<96 ? VCaltoElectronGain_L1 : VCaltoElectronGain; + float offset = id[i]<96 ? VCaltoElectronOffset_L1 : VCaltoElectronOffset; + + bool isDeadColumn=false, isNoisyColumn=false; + + int row = x[i]; + int col = y[i]; + auto ret = ped->getPedAndGain(id[i], col, row, isDeadColumn, isNoisyColumn); + float pedestal = ret.first; float gain = ret.second; + // float pedestal = 0; float gain = 1.; + if ( isDeadColumn | isNoisyColumn ) + { + id[i]=InvId; adc[i] =0; + printf("bad pixel at %d in %d\n",i,id[i]); + } + else { + float vcal = adc[i] * gain - pedestal*gain; + adc[i] = std::max(100, int( vcal * conversionFactor + offset)); + } + + // if (threadIdx.x==0) + // printf ("calibrated %d\n",id[i]); + + __syncthreads(); + +} + + __global__ void calibADCByModule(uint16_t * id, + uint16_t const * x, + uint16_t const * y, + uint16_t * adc, + uint32_t * moduleStart, + SiPixelGainForHLTonGPU const * ped, + int numElements + ) +{ + + + auto first = moduleStart[1 + blockIdx.x]; + + auto me = id[first]; + + assert(me<2000); + + /// depends on "me" + + float conversionFactor = me<96 ? VCaltoElectronGain_L1 : VCaltoElectronGain; + float offset = me<96 ? VCaltoElectronOffset_L1 : VCaltoElectronOffset; + + +#ifdef GPU_DEBUG + if (me%100==1) + if (threadIdx.x==0) printf("start pixel calibration for module %d in block %d\n",me,blockIdx.x); +#endif + + first+=threadIdx.x; + + // __syncthreads(); + + float pedestal=0,gain=0; + bool isDeadColumn=false, isNoisyColumn=false; + int oldCol=-1, oldAveragedBlock=-1; + + for (int i=first; inumberOfRowsAveragedOver_; // 80.... ( row<80 will be faster...) + if ( (col!=oldCol) | ( averagedBlock != oldAveragedBlock) ) { + oldCol=col; oldAveragedBlock= averagedBlock; + auto ret = ped->getPedAndGain(me,col, row, isDeadColumn, isNoisyColumn); + pedestal = ret.first; gain = ret.second; + } + if ( isDeadColumn | isNoisyColumn ) + { id[i]=InvId; adc[i] =0; } + else { + float vcal = adc[i] * gain - pedestal*gain; + adc[i] = std::max(100, int( vcal * conversionFactor + offset)); + } + } + + __syncthreads(); + //reset start + if(0==threadIdx.x) { + auto & k = moduleStart[1 + blockIdx.x]; + while (id[k]==InvId) ++k; + } + + + } + + +} diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h new file mode 100644 index 0000000000000..5ac7375e008cc --- /dev/null +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h @@ -0,0 +1,168 @@ +#pragma once + +#include +#include +#include + +namespace gpuClustering { + + constexpr uint32_t MaxNumModules = 2000; + + constexpr uint32_t MaxNumPixels = 256*2000; // this does not mean maxPixelPerModule==256! + + constexpr uint16_t InvId=9999; // must be > MaxNumModules + + __global__ void countModules(uint16_t const * id, + uint32_t * moduleStart, + int32_t * clus, + int numElements){ + + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i >= numElements) return; + clus[i]=i; + if (InvId==id[i]) return; + auto j=i-1; + while(j>=0 && id[j]==InvId) --j; + if(j<0 || id[j]!=id[i]) { + // boundary... + auto loc = atomicInc(moduleStart,MaxNumModules); + moduleStart[loc+1]=i; + } + } + + + __global__ void findClus(uint16_t const * id, + uint16_t const * x, + uint16_t const * y, + uint16_t const * adc, + uint32_t const * moduleStart, + uint32_t * clusInModule, uint32_t * moduleId, + int32_t * clus, uint32_t * debug, + int numElements){ + + __shared__ bool go; + __shared__ int nclus; + + __shared__ int msize; + + auto first = moduleStart[1 + blockIdx.x]; + + auto me = id[first]; + + assert(me=numElements) return; + + go=true; + nclus=0; + + msize=numElements; + __syncthreads(); + + for (int i=first; i=msize) return; + + int jmax[10]; + auto niter = (msize-first)/blockDim.x; + assert(niter<10); + for (int i=0; i1) continue; + if (std::abs(int(y[j])-int(y[i]))>1) continue; + auto old = atomicMin(&clus[j],clus[i]); + if (old!=clus[i]) go=true; + atomicMin(&clus[i],old); + jmax[k]=j+1; + } + } + assert (k<=niter); + __syncthreads(); + } + + /* + // fast count (nice but not much useful) + auto laneId = threadIdx.x & 0x1f; + + for (int i=first; i=0) clus[i]=clus[clus[i]]; + } + + __syncthreads(); + for (int i=first; i - - - - - - - - - - - - - - - - - - - - + + + + + + - - + + + + + + + + + + + + + + +# for tracks + + + + +# for lumi + + +# + + - - - + + - - - + + + + + diff --git a/RecoLocalTracker/SiPixelRecHits/BuildFile.xml b/RecoLocalTracker/SiPixelRecHits/BuildFile.xml index d0f5f096dbb19..b9d484b351407 100644 --- a/RecoLocalTracker/SiPixelRecHits/BuildFile.xml +++ b/RecoLocalTracker/SiPixelRecHits/BuildFile.xml @@ -1,12 +1,18 @@ - - - - - - - - - + + + + + + + + + + + + + + + - + diff --git a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h index 4a7ba119b0a5b..b7c04c98443d4 100644 --- a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h +++ b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h @@ -21,13 +21,13 @@ #include "DataFormats/TrackerCommon/interface/TrackerTopology.h" #include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" #include "Geometry/CommonDetUnit/interface/GeomDetType.h" -#include "Geometry/CommonDetUnit/interface/PixelGeomDetUnit.h" +#include "Geometry/TrackerGeometryBuilder/interface/PixelGeomDetUnit.h" #include "Geometry/CommonTopologies/interface/PixelTopology.h" #include "Geometry/CommonTopologies/interface/Topology.h" //--- For the configuration: #include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" + #include "DataFormats/GeometryCommonDetAlgo/interface/MeasurementPoint.h" #include "DataFormats/GeometryCommonDetAlgo/interface/MeasurementError.h" @@ -51,244 +51,236 @@ class RectangularPixelTopology; class MagneticField; -class PixelCPEBase : public PixelClusterParameterEstimator { +class PixelCPEBase : public PixelClusterParameterEstimator +{ public: - struct DetParam { - DetParam() {} - const PixelGeomDetUnit* theDet; - // gavril : replace RectangularPixelTopology with PixelTopology - const PixelTopology* theTopol; - const RectangularPixelTopology* theRecTopol; - - GeomDetType::SubDetector thePart; - Local3DPoint theOrigin; - float theThickness; - float thePitchX; - float thePitchY; - - float bz; // local Bz - float bx; // local Bx - LocalVector driftDirection; - float widthLAFractionX; // Width-LA to Offset-LA in X - float widthLAFractionY; // same in Y - float lorentzShiftInCmX; // a FULL shift, in cm - float lorentzShiftInCmY; // a FULL shift, in cm - int detTemplateId; // det if for templates & generic errors - int detTemplateId2D; // det if for 2D templates - }; - - struct ClusterParam { - ClusterParam(const SiPixelCluster& cl) : theCluster(&cl) {} - - virtual ~ClusterParam() = default; - - const SiPixelCluster* theCluster; - - //--- Cluster-level quantities (filled in computeAnglesFrom....) - float cotalpha; - float cotbeta; - - // G.Giurgiu (05/14/08) track local coordinates - // filled in computeAnglesFrom.... - float trk_lp_x; - float trk_lp_y; - - // ggiurgiu@jhu.edu (12/01/2010) : Needed for calling topology methods - // with track angles to handle surface deformations (bows/kinks) - // filled in computeAnglesFrom.... (btw redundant with the 4 above) - Topology::LocalTrackPred loc_trk_pred; - - //--- Probability (protected by hasFilledProb_) - float probabilityX_; - float probabilityY_; - float probabilityQ_; - int qBin_; // always filled by qbin - - bool isOnEdge_; // filled in setTheClu - bool hasBadPixels_ = false; // (never used in current code) - bool spansTwoROCs_; // filled in setTheClu - bool hasFilledProb_ = false; - // ggiurgiu@jhu.edu (10/18/2008) - bool with_track_angle; // filled in computeAnglesFrom.... - bool filled_from_2d = false; // - - // More detailed edge information (for CPE ClusterRepair, and elsewhere...) - int edgeTypeX_ = 0; // 0: not on edge, 1: low end on edge, 2: high end - int edgeTypeY_ = 0; // 0: not on edge, 1: low end on edge, 2: high end - }; - + struct DetParam + { + DetParam() {} + const PixelGeomDetUnit * theDet; + // gavril : replace RectangularPixelTopology with PixelTopology + const PixelTopology * theTopol; + const RectangularPixelTopology * theRecTopol; + + GeomDetType::SubDetector thePart; + Local3DPoint theOrigin; + float theThickness; + float thePitchX; + float thePitchY; + + float bz; // local Bz + float bx; // local Bx + LocalVector driftDirection; + float widthLAFractionX; // Width-LA to Offset-LA in X + float widthLAFractionY; // same in Y + float lorentzShiftInCmX; // a FULL shift, in cm + float lorentzShiftInCmY; // a FULL shift, in cm + int detTemplateId; // det if for templates & generic errors + }; + + struct ClusterParam + { + ClusterParam(const SiPixelCluster & cl) : theCluster(&cl) {} + + virtual ~ClusterParam() = default; + + const SiPixelCluster * theCluster; + + //--- Cluster-level quantities (filled in computeAnglesFrom....) + float cotalpha; + float cotbeta; + + // G.Giurgiu (05/14/08) track local coordinates + // filled in computeAnglesFrom.... + float trk_lp_x; + float trk_lp_y; + + // ggiurgiu@jhu.edu (12/01/2010) : Needed for calling topology methods + // with track angles to handle surface deformations (bows/kinks) + // filled in computeAnglesFrom.... (btw redundant with the 4 above) + Topology::LocalTrackPred loc_trk_pred; + + //--- Probability (protected by hasFilledProb_) + float probabilityX_ ; + float probabilityY_ ; + float probabilityQ_ ; + int qBin_ ; // always filled by qbin + + bool isOnEdge_ ; // filled in setTheClu + bool hasBadPixels_ = false; // (never used in current code) + bool spansTwoROCs_ ; // filled in setTheClu + bool hasFilledProb_ =false; + // ggiurgiu@jhu.edu (10/18/2008) + bool with_track_angle; // filled in computeAnglesFrom.... + + }; + public: - PixelCPEBase(edm::ParameterSet const& conf, - const MagneticField* mag, - const TrackerGeometry& geom, - const TrackerTopology& ttopo, - const SiPixelLorentzAngle* lorentzAngle, - const SiPixelGenErrorDBObject* genErrorDBObject, - const SiPixelTemplateDBObject* templateDBobject, - const SiPixelLorentzAngle* lorentzAngleWidth, - int flag = 0 // flag=0 for generic, =1 for templates - ); // NEW - - static void fillPSetDescription(edm::ParameterSetDescription& desc); - - //-------------------------------------------------------------------------- - // Allow the magnetic field to be set/updated later. - //-------------------------------------------------------------------------- - //inline void setMagField(const MagneticField *mag) const { magfield_ = mag; } // Not used, AH - - //-------------------------------------------------------------------------- - // Obtain the angles from the position of the DetUnit. - //-------------------------------------------------------------------------- - - inline ReturnType getParameters(const SiPixelCluster& cl, const GeomDetUnit& det) const override { + PixelCPEBase(edm::ParameterSet const& conf, const MagneticField * mag, const TrackerGeometry& geom, const TrackerTopology& ttopo, + const SiPixelLorentzAngle * lorentzAngle, + const SiPixelGenErrorDBObject * genErrorDBObject, + const SiPixelTemplateDBObject * templateDBobject, + const SiPixelLorentzAngle * lorentzAngleWidth, + int flag=0 // flag=0 for generic, =1 for templates + ); // NEW + + //-------------------------------------------------------------------------- + // Allow the magnetic field to be set/updated later. + //-------------------------------------------------------------------------- + //inline void setMagField(const MagneticField *mag) const { magfield_ = mag; } // Not used, AH + + + //-------------------------------------------------------------------------- + // Obtain the angles from the position of the DetUnit. + //-------------------------------------------------------------------------- + + inline ReturnType getParameters(const SiPixelCluster & cl, + const GeomDetUnit & det ) const override + { #ifdef EDM_ML_DEBUG - nRecHitsTotal_++; - //std::cout<<" in PixelCPEBase:localParameters(all) - "< theClusterParam = createClusterParam(cl); - setTheClu(theDetParam, *theClusterParam); - computeAnglesFromDetPosition(theDetParam, *theClusterParam); - - // localPosition( cl, det ) must be called before localError( cl, det ) !!! - LocalPoint lp = localPosition(theDetParam, *theClusterParam); - LocalError le = localError(theDetParam, *theClusterParam); - SiPixelRecHitQuality::QualWordType rqw = rawQualityWord(*theClusterParam); - auto tuple = std::make_tuple(lp, le, rqw); - - //std::cout<<" in PixelCPEBase:localParameters(all) - "< theClusterParam = createClusterParam(cl); - setTheClu(theDetParam, *theClusterParam); - computeAnglesFromTrajectory(theDetParam, *theClusterParam, ltp); - - // localPosition( cl, det ) must be called before localError( cl, det ) !!! - LocalPoint lp = localPosition(theDetParam, *theClusterParam); - LocalError le = localError(theDetParam, *theClusterParam); - SiPixelRecHitQuality::QualWordType rqw = rawQualityWord(*theClusterParam); - auto tuple = std::make_tuple(lp, le, rqw); - - //std::cout<<" in PixelCPEBase:localParameters(on track) - "< createClusterParam(const SiPixelCluster& cl) const = 0; - - //-------------------------------------------------------------------------- - // This is where the action happens. - //-------------------------------------------------------------------------- - virtual LocalPoint localPosition(DetParam const& theDetParam, ClusterParam& theClusterParam) const = 0; - virtual LocalError localError(DetParam const& theDetParam, ClusterParam& theClusterParam) const = 0; - - void fillDetParams(); - - //----------------------------------------------------------------------------- - //! A convenience method to fill a whole SiPixelRecHitQuality word in one shot. - //! This way, we can keep the details of what is filled within the pixel - //! code and not expose the Transient SiPixelRecHit to it as well. The name - //! of this function is chosen to match the one in SiPixelRecHit. - //----------------------------------------------------------------------------- - SiPixelRecHitQuality::QualWordType rawQualityWord(ClusterParam& theClusterParam) const; - + virtual ClusterParam * createClusterParam(const SiPixelCluster & cl) const = 0; + + //-------------------------------------------------------------------------- + // This is where the action happens. + //-------------------------------------------------------------------------- + virtual LocalPoint localPosition(DetParam const & theDetParam, ClusterParam & theClusterParam) const = 0; + virtual LocalError localError (DetParam const & theDetParam, ClusterParam & theClusterParam) const = 0; + + void fillDetParams(); + + //----------------------------------------------------------------------------- + //! A convenience method to fill a whole SiPixelRecHitQuality word in one shot. + //! This way, we can keep the details of what is filled within the pixel + //! code and not expose the Transient SiPixelRecHit to it as well. The name + //! of this function is chosen to match the one in SiPixelRecHit. + //----------------------------------------------------------------------------- + SiPixelRecHitQuality::QualWordType rawQualityWord(ClusterParam & theClusterParam) const; + protected: - //--- All methods and data members are protected to facilitate (for now) - //--- access from derived classes. - - typedef GloballyPositioned Frame; - - //--------------------------------------------------------------------------- - // Data members - //--------------------------------------------------------------------------- - - //--- Counters + //--- All methods and data members are protected to facilitate (for now) + //--- access from derived classes. + + typedef GloballyPositioned Frame; + + //--------------------------------------------------------------------------- + // Data members + //--------------------------------------------------------------------------- + + //--- Counters #ifdef EDM_ML_DEBUG - mutable std::atomic nRecHitsTotal_; //for debugging only - mutable std::atomic nRecHitsUsedEdge_; //for debugging only + mutable std::atomic nRecHitsTotal_ ; //for debugging only + mutable std::atomic nRecHitsUsedEdge_ ; //for debugging only #endif - - // Added new members - float lAOffset_; // la used to calculate the offset from configuration (for testing) - float lAWidthBPix_; // la used to calculate the cluster width from conf. - float lAWidthFPix_; // la used to calculate the cluster width from conf. - //bool useLAAlignmentOffsets_; // lorentz angle offsets detrmined by alignment - bool useLAOffsetFromConfig_; // lorentz angle used to calculate the offset - bool useLAWidthFromConfig_; // lorentz angle used to calculate the cluster width - bool useLAWidthFromDB_; // lorentz angle used to calculate the cluster width - - //--- Global quantities - int theVerboseLevel; // algorithm's verbosity - int theFlag_; // flag to recognice if we are in generic or templates - - const MagneticField* magfield_; // magnetic field - const TrackerGeometry& geom_; // geometry - const TrackerTopology& ttopo_; // Tracker Topology - - const SiPixelLorentzAngle* lorentzAngle_; - const SiPixelLorentzAngle* lorentzAngleWidth_; // for the charge width (generic) - - const SiPixelGenErrorDBObject* genErrorDBObject_; // NEW - //const SiPixelCPEGenericErrorParm * genErrorParm_; // OLD - - const SiPixelTemplateDBObject* templateDBobject_; - bool alpha2Order; // switch on/off E.B effect. - - bool DoLorentz_; - bool LoadTemplatesFromDB_; - - //errors for template reco for edge hits, based on observed residuals from - //studies likely done in 2011... - static constexpr float xEdgeXError_ = 23.0f; - static constexpr float xEdgeYError_ = 39.0f; - - static constexpr float yEdgeXError_ = 24.0f; - static constexpr float yEdgeYError_ = 96.0f; - - static constexpr float bothEdgeXError_ = 31.0f; - static constexpr float bothEdgeYError_ = 90.0f; - - static constexpr float clusterSplitMaxError_ = 7777.7f; - - //--------------------------------------------------------------------------- - // Geometrical services to subclasses. - //--------------------------------------------------------------------------- + + // Added new members + float lAOffset_; // la used to calculate the offset from configuration (for testing) + float lAWidthBPix_; // la used to calculate the cluster width from conf. + float lAWidthFPix_; // la used to calculate the cluster width from conf. + //bool useLAAlignmentOffsets_; // lorentz angle offsets detrmined by alignment + bool useLAOffsetFromConfig_; // lorentz angle used to calculate the offset + bool useLAWidthFromConfig_; // lorentz angle used to calculate the cluster width + bool useLAWidthFromDB_; // lorentz angle used to calculate the cluster width + + //--- Global quantities + int theVerboseLevel; // algorithm's verbosity + int theFlag_; // flag to recognice if we are in generic or templates + + const MagneticField * magfield_; // magnetic field + const TrackerGeometry & geom_; // geometry + const TrackerTopology & ttopo_; // Tracker Topology + + const SiPixelLorentzAngle * lorentzAngle_; + const SiPixelLorentzAngle * lorentzAngleWidth_; // for the charge width (generic) + + const SiPixelGenErrorDBObject * genErrorDBObject_; // NEW + //const SiPixelCPEGenericErrorParm * genErrorParm_; // OLD + + const SiPixelTemplateDBObject * templateDBobject_; + bool alpha2Order; // switch on/off E.B effect. + + bool DoLorentz_; + bool LoadTemplatesFromDB_; + + //--------------------------------------------------------------------------- + // Geometrical services to subclasses. + //--------------------------------------------------------------------------- protected: - void computeAnglesFromDetPosition(DetParam const& theDetParam, ClusterParam& theClusterParam) const; - - void computeAnglesFromTrajectory(DetParam const& theDetParam, - ClusterParam& theClusterParam, - const LocalTrajectoryParameters& ltp) const; - - void setTheClu(DetParam const&, ClusterParam& theClusterParam) const; - - LocalVector driftDirection(DetParam& theDetParam, GlobalVector bfield) const; - LocalVector driftDirection(DetParam& theDetParam, LocalVector bfield) const; - void computeLorentzShifts(DetParam&) const; - - //--------------------------------------------------------------------------- - // Cluster-level services. - //--------------------------------------------------------------------------- - - DetParam const& detParam(const GeomDetUnit& det) const; - - using DetParams = std::vector; - - DetParams m_DetParams = DetParams(1440); + void computeAnglesFromDetPosition( DetParam const & theDetParam, ClusterParam & theClusterParam ) const; + + void computeAnglesFromTrajectory ( DetParam const & theDetParam, ClusterParam & theClusterParam, + const LocalTrajectoryParameters & ltp) const; + + void setTheClu( DetParam const &, ClusterParam & theClusterParam ) const ; + + LocalVector driftDirection (DetParam & theDetParam, GlobalVector bfield ) const ; + LocalVector driftDirection (DetParam & theDetParam, LocalVector bfield ) const ; + void computeLorentzShifts(DetParam &) const ; + + bool isFlipped(DetParam const & theDetParam) const; // is the det flipped or not? + + //--------------------------------------------------------------------------- + // Cluster-level services. + //--------------------------------------------------------------------------- + + DetParam const & detParam(const GeomDetUnit & det) const; + + using DetParams=std::vector; + + DetParams m_DetParams=DetParams(1440); + }; #endif + + diff --git a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h new file mode 100644 index 0000000000000..74b1573423f85 --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h @@ -0,0 +1,112 @@ +#pragma once + +#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h" +#include "CalibTracker/SiPixelESProducers/interface/SiPixelCPEGenericDBErrorParametrization.h" + + +// The template header files +#include "RecoLocalTracker/SiPixelRecHits/interface/SiPixelTemplate.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/SiPixelGenError.h" + + +#include +#include + + +class MagneticField; +class PixelCPEFast final : public PixelCPEBase +{ +public: + struct ClusterParamGeneric : ClusterParam + { + ClusterParamGeneric(const SiPixelCluster & cl) : ClusterParam(cl){} + // The truncation value pix_maximum is an angle-dependent cutoff on the + // individual pixel signals. It should be applied to all pixels in the + // cluster [signal_i = fminf(signal_i, pixmax)] before the column and row + // sums are made. Morris + int pixmx; + + // These are errors predicted by PIXELAV + float sigmay; // CPE Generic y-error for multi-pixel cluster + float sigmax; // CPE Generic x-error for multi-pixel cluster + float sy1 ; // CPE Generic y-error for single single-pixel + float sy2 ; // CPE Generic y-error for single double-pixel cluster + float sx1 ; // CPE Generic x-error for single single-pixel cluster + float sx2 ; // CPE Generic x-error for single double-pixel cluster + + }; + + PixelCPEFast(edm::ParameterSet const& conf, const MagneticField *, + const TrackerGeometry&, const TrackerTopology&, const SiPixelLorentzAngle *, + const SiPixelGenErrorDBObject *, const SiPixelLorentzAngle *); + + + ~PixelCPEFast(); +private: + ClusterParam * createClusterParam(const SiPixelCluster & cl) const override; + + LocalPoint localPosition (DetParam const & theDetParam, ClusterParam & theClusterParam) const override; + LocalError localError (DetParam const & theDetParam, ClusterParam & theClusterParam) const override; + + //-------------------------------------------------------------------- + // Methods. + //------------------------------------------------------------------ + static float + generic_position_formula( int size, //!< Size of this projection. + int Q_f, //!< Charge in the first pixel. + int Q_l, //!< Charge in the last pixel. + uint16_t upper_edge_first_pix, //!< As the name says. + uint16_t lower_edge_last_pix, //!< As the name says. + float lorentz_shift, //!< L-width + float theThickness, //detector thickness + float cot_angle, //!< cot of alpha_ or beta_ + float pitch, //!< thePitchX or thePitchY + bool first_is_big, //!< true if the first is big + bool last_is_big //!< true if the last is big + ); + + static void + collect_edge_charges(ClusterParam & theClusterParam, //!< input, the cluster + int & Q_f_X, //!< output, Q first in X + int & Q_l_X, //!< output, Q last in X + int & Q_f_Y, //!< output, Q first in Y + int & Q_l_Y, //!< output, Q last in Y + bool truncate + ); + + + bool UseErrorsFromTemplates_; + bool TruncatePixelCharge_; + + float EdgeClusterErrorX_; + float EdgeClusterErrorY_; + + std::vector xerr_barrel_l1_,yerr_barrel_l1_,xerr_barrel_ln_; + std::vector yerr_barrel_ln_,xerr_endcap_,yerr_endcap_; + float xerr_barrel_l1_def_, yerr_barrel_l1_def_,xerr_barrel_ln_def_; + float yerr_barrel_ln_def_, xerr_endcap_def_, yerr_endcap_def_; + + //--- DB Error Parametrization object, new light templates + std::vector< SiPixelGenErrorStore > thePixelGenError_; + + +public : + + void fillParamsForGpu(); + + // not needed if not used on CPU... + std::vector m_detParamsGPU; + pixelCPEforGPU::CommonParams m_commonParamsGPU; + + pixelCPEforGPU::ParamsOnGPU h_paramsOnGPU; + + pixelCPEforGPU::ParamsOnGPU * d_paramsOnGPU; // copy of the above on the Device + + +}; + + + + + diff --git a/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h b/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h new file mode 100644 index 0000000000000..900ae64969285 --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h @@ -0,0 +1,206 @@ +#pragma once + +#include "Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h" +#include "DataFormats/GeometrySurface/interface/SOARotation.h" +#include +#include + +#include + +namespace pixelCPEforGPU { + + using Frame = SOAFrame; + using Rotation = SOARotation; + + // all modules are identical! + struct CommonParams { + float theThicknessB; + float theThicknessE; + float thePitchX; + float thePitchY; + }; + + struct DetParams { + + bool isBarrel; + bool isPosZ; + uint16_t layer; + uint16_t index; + uint32_t rawId; + + float shiftX; + float shiftY; + float chargeWidthX; + float chargeWidthY; + + float x0,y0,z0; // the vertex in the local coord of the detector + + Frame frame; + + }; + + + struct ParamsOnGPU { + CommonParams * m_commonParams; + DetParams * m_detParams; + + constexpr + CommonParams const & commonParams() const {return *m_commonParams;} + constexpr + DetParams const & detParams(int i) const {return m_detParams[i];} + + }; + + // SOA! (on device) + template + struct ClusParamsT { + uint32_t minRow[N]; + uint32_t maxRow[N]; + uint32_t minCol[N]; + uint32_t maxCol[N]; + + int32_t Q_f_X[N]; + int32_t Q_l_X[N]; + int32_t Q_f_Y[N]; + int32_t Q_l_Y[N]; + + int32_t charge[N]; + + float xpos[N]; + float ypos[N]; + }; + + + constexpr uint32_t MaxClusInModule=256; + using ClusParams = ClusParamsT<256>; + + constexpr inline + void computeAnglesFromDet(DetParams const & detParams, float const x, float const y, float & cotalpha, float & cotbeta) { + // x,y local position on det + auto gvx = x - detParams.x0; + auto gvy = y - detParams.y0; + auto gvz = -1.f/detParams.z0; + // normalization not required as only ratio used... + // calculate angles + cotalpha = gvx*gvz; + cotbeta = gvy*gvz; + } + + constexpr inline + float correction( + int sizeM1, + int Q_f, //!< Charge in the first pixel. + int Q_l, //!< Charge in the last pixel. + uint16_t upper_edge_first_pix, //!< As the name says. + uint16_t lower_edge_last_pix, //!< As the name says. + float lorentz_shift, //!< L-shift at half thickness + float theThickness, //detector thickness + float cot_angle, //!< cot of alpha_ or beta_ + float pitch, //!< thePitchX or thePitchY + bool first_is_big, //!< true if the first is big + bool last_is_big //!< true if the last is big + ) +{ + if (0==sizeM1) return 0; // size1 + float W_eff=0; + bool simple=true; + if (1==sizeM1) { // size 2 + //--- Width of the clusters minus the edge (first and last) pixels. + //--- In the note, they are denoted x_F and x_L (and y_F and y_L) + // assert(lower_edge_last_pix>=upper_edge_first_pix); + auto W_inner = pitch * float(lower_edge_last_pix-upper_edge_first_pix); // in cm + + //--- Predicted charge width from geometry + auto W_pred = theThickness * cot_angle // geometric correction (in cm) + - lorentz_shift; // (in cm) &&& check fpix! + + W_eff = std::abs( W_pred ) - W_inner; + + //--- If the observed charge width is inconsistent with the expectations + //--- based on the track, do *not* use W_pred-W_innner. Instead, replace + //--- it with an *average* effective charge width, which is the average + //--- length of the edge pixels. + // + simple = ( W_eff < 0.0f ) | ( W_eff > pitch ); // this produces "large" regressions for very small numeric differences... + + } + if (simple) { + //--- Total length of the two edge pixels (first+last) + float sum_of_edge = 2.0f; + if (first_is_big) sum_of_edge += 1.0f; + if (last_is_big) sum_of_edge += 1.0f; + W_eff = pitch * 0.5f * sum_of_edge; // ave. length of edge pixels (first+last) (cm) + } + + + //--- Finally, compute the position in this projection + float Qdiff = Q_l - Q_f; + float Qsum = Q_l + Q_f; + + //--- Temporary fix for clusters with both first and last pixel with charge = 0 + if(Qsum==0) Qsum=1.0f; + return 0.5f*(Qdiff/Qsum) * W_eff; + + } + + constexpr inline + void position(CommonParams const & comParams, DetParams const & detParams, ClusParams & cp, uint32_t ic) { + + //--- Upper Right corner of Lower Left pixel -- in measurement frame + uint16_t llx = cp.minRow[ic]+1; + uint16_t lly = cp.minCol[ic]+1; + + //--- Lower Left corner of Upper Right pixel -- in measurement frame + uint16_t urx = cp.maxRow[ic]; + uint16_t ury = cp.maxCol[ic]; + + auto llxl = phase1PixelTopology::localX(llx); + auto llyl = phase1PixelTopology::localY(lly); + auto urxl = phase1PixelTopology::localX(urx); + auto uryl = phase1PixelTopology::localY(ury); + + auto mx = llxl+urxl; + auto my = llyl+uryl; + + // apply the lorentz offset correction + auto xPos = detParams.shiftX + comParams.thePitchX*(0.5f*float(mx)+float(phase1PixelTopology::xOffset)); + auto yPos = detParams.shiftY + comParams.thePitchY*(0.5f*float(my)+float(phase1PixelTopology::yOffset)); + + float cotalpha=0, cotbeta=0; + + + computeAnglesFromDet(detParams, xPos, yPos, cotalpha, cotbeta); + + auto thickness = detParams.isBarrel ? comParams.theThicknessB : comParams.theThicknessE; + + auto xcorr = correction( + cp.maxRow[ic]-cp.minRow[ic], + cp.Q_f_X[ic], cp.Q_l_X[ic], + llxl, urxl, + detParams.chargeWidthX, // lorentz shift in cm + thickness, + cotalpha, + comParams.thePitchX, + phase1PixelTopology::isBigPixX( cp.minRow[ic] ), + phase1PixelTopology::isBigPixX( cp.maxRow[ic] ) + ); + + + auto ycorr = correction( + cp.maxCol[ic]-cp.minCol[ic], + cp.Q_f_Y[ic], cp.Q_l_Y[ic], + llyl, uryl, + detParams.chargeWidthY, // lorentz shift in cm + thickness, + cotbeta, + comParams.thePitchY, + phase1PixelTopology::isBigPixY( cp.minCol[ic] ), + phase1PixelTopology::isBigPixY( cp.maxCol[ic] ) + ); + + cp.xpos[ic]=xPos+xcorr; + cp.ypos[ic]=yPos+ycorr; + + } + +} diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml b/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml index e02a0b722c1ae..526d837e1ec01 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml +++ b/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml @@ -1,7 +1,9 @@ - - - - - - + + + + + + + + diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/PixelCPEFastESProducer.cc b/RecoLocalTracker/SiPixelRecHits/plugins/PixelCPEFastESProducer.cc new file mode 100644 index 0000000000000..344625cba01b6 --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/plugins/PixelCPEFastESProducer.cc @@ -0,0 +1,103 @@ +#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h" +#include "MagneticField/Engine/interface/MagneticField.h" +#include "MagneticField/Records/interface/IdealMagneticFieldRecord.h" +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" +#include "Geometry/Records/interface/TrackerTopologyRcd.h" +#include "DataFormats/TrackerCommon/interface/TrackerTopology.h" + +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/ESHandle.h" +#include "FWCore/Framework/interface/ModuleFactory.h" + +// new record +#include "CondFormats/DataRecord/interface/SiPixelGenErrorDBObjectRcd.h" + +#include "FWCore/Framework/interface/ESProducer.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "RecoLocalTracker/Records/interface/TkPixelCPERecord.h" +#include "RecoLocalTracker/ClusterParameterEstimator/interface/PixelClusterParameterEstimator.h" +#include + +class PixelCPEFastESProducer: public edm::ESProducer{ + public: + PixelCPEFastESProducer(const edm::ParameterSet & p); + std::shared_ptr produce(const TkPixelCPERecord &); + private: + std::shared_ptr cpe_; + edm::ParameterSet pset_; + edm::ESInputTag magname_; + bool UseErrorsFromTemplates_; +}; + + +#include +#include + +using namespace edm; + + + + +PixelCPEFastESProducer::PixelCPEFastESProducer(const edm::ParameterSet & p) +{ + std::string myname = p.getParameter("ComponentName"); + magname_ = p.existsAs("MagneticFieldRecord")? + p.getParameter("MagneticFieldRecord"):edm::ESInputTag(""); + UseErrorsFromTemplates_ = p.getParameter("UseErrorsFromTemplates"); + + + pset_ = p; + setWhatProduced(this,myname); + + +} + + +std::shared_ptr +PixelCPEFastESProducer::produce(const TkPixelCPERecord & iRecord){ + + ESHandle magfield; + iRecord.getRecord().get( magname_, magfield ); + + edm::ESHandle pDD; + iRecord.getRecord().get( pDD ); + + edm::ESHandle hTT; + iRecord.getRecord().getRecord().get(hTT); + + // Lorant angle for offsets + ESHandle lorentzAngle; + iRecord.getRecord().get(lorentzAngle ); + + // add the new la width object + ESHandle lorentzAngleWidth; + const SiPixelLorentzAngle * lorentzAngleWidthProduct = nullptr; + iRecord.getRecord().get("forWidth",lorentzAngleWidth ); + lorentzAngleWidthProduct = lorentzAngleWidth.product(); + + const SiPixelGenErrorDBObject * genErrorDBObjectProduct = nullptr; + + // Errors take only from new GenError + ESHandle genErrorDBObject; + if(UseErrorsFromTemplates_) { // do only when generrors are needed + iRecord.getRecord().get(genErrorDBObject); + genErrorDBObjectProduct = genErrorDBObject.product(); + //} else { + //std::cout<<" pass an empty GenError pointer"<( + pset_,magfield.product(),*pDD.product(), + *hTT.product(),lorentzAngle.product(), + genErrorDBObjectProduct,lorentzAngleWidthProduct); + + return cpe_; +} + + +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/Utilities/interface/typelookup.h" +#include "FWCore/Framework/interface/eventsetuprecord_registration_macro.h" + +DEFINE_FWK_EVENTSETUP_MODULE(PixelCPEFastESProducer); + diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h b/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h new file mode 100644 index 0000000000000..40ad6ac15fec7 --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h @@ -0,0 +1,130 @@ +#pragma once + +#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h" + +#include +#include +#include +#include + + +namespace gpuPixelRecHits { + + // to be moved in common namespace... + constexpr uint16_t InvId=9999; // must be > MaxNumModules + + + constexpr uint32_t MaxClusInModule= pixelCPEforGPU::MaxClusInModule; + + using ClusParams = pixelCPEforGPU::ClusParams; + + + __global__ void getHits(pixelCPEforGPU::ParamsOnGPU const * cpeParams, + uint16_t const * id, + uint16_t const * x, + uint16_t const * y, + uint16_t const * adc, + uint32_t const * digiModuleStart, + uint32_t * const clusInModule, + uint32_t * const moduleId, + int32_t * const clus, + int numElements, + uint32_t const * hitsModuleStart, + int32_t * chargeh, + float * xh, float * yh, float * zh, + bool local // if true fill just x & y in local coord... + ){ + + // as usual one block per module + + __shared__ ClusParams clusParams; + + + auto first = digiModuleStart[1 + blockIdx.x]; + + auto me = id[first]; + assert (moduleId[blockIdx.x]==me); + + auto nclus = clusInModule[me]; + +#ifdef GPU_DEBUG + if (me%100==1) + if (threadIdx.x==0) printf("hitbuilder: %d clusters in module %d. will write at %d\n",nclus,me,hitsModuleStart[me]); +#endif + + assert(blockDim.x>=MaxClusInModule); + assert(nclus<=MaxClusInModule); + + auto ic = threadIdx.x; + + if (ic::max(); + clusParams.maxRow[ic] = 0; + clusParams.minCol[ic] = std::numeric_limits::max(); + clusParams.maxCol[ic] = 0; + + clusParams.charge[ic] = 0; + + clusParams.Q_f_X[ic] = 0; + clusParams.Q_l_X[ic] = 0; + clusParams.Q_f_Y[ic] = 0; + clusParams.Q_l_Y[ic] = 0; + } + + + first+=threadIdx.x; + + __syncthreads(); + + + // one thead per "digi" + + for (int i=first; i=nclus) return; + + first = hitsModuleStart[me]; + auto h = first+ic; // output index in global memory + + assert(h<2000*256); + + pixelCPEforGPU::position(cpeParams->commonParams(), cpeParams->detParams(me), clusParams,ic); + + chargeh[h] = clusParams.charge[ic]; + + if (local) { + xh[h]= clusParams.xpos[ic]; + yh[h]= clusParams.ypos[ic]; + } else { + cpeParams->detParams(me).frame.toGlobal(clusParams.xpos[ic],clusParams.ypos[ic], + xh[h],yh[h],zh[h] + ); + } + + } + +} + diff --git a/RecoLocalTracker/SiPixelRecHits/python/PixelCPEESProducers_cff.py b/RecoLocalTracker/SiPixelRecHits/python/PixelCPEESProducers_cff.py index e349a515c69b3..affcb3638cb6f 100644 --- a/RecoLocalTracker/SiPixelRecHits/python/PixelCPEESProducers_cff.py +++ b/RecoLocalTracker/SiPixelRecHits/python/PixelCPEESProducers_cff.py @@ -3,16 +3,25 @@ # # Load all Pixel Cluster Position Estimator ESProducers # -# 1. Template algorithm +# +# 1. RecHits using angles from module position +# +from RecoLocalTracker.SiPixelRecHits.PixelCPEInitial_cfi import * +# +# 2. TrackingRechits using angles from tracks +# +from RecoLocalTracker.SiPixelRecHits.PixelCPEParmError_cfi import * +# +# 3. Template algorithm # from RecoLocalTracker.SiPixelRecHits.PixelCPETemplateReco_cfi import * # -# 2. Pixel Generic CPE +# 4. Pixel Generic CPE # from RecoLocalTracker.SiPixelRecHits.PixelCPEGeneric_cfi import * +from RecoLocalTracker.SiPixelRecHits.PixelCPEFast_cfi import * # -# 3. ESProducer for the Magnetic-field dependent template records +# 5. The new ESProducer for the Magnetic-field dependent template record # from CalibTracker.SiPixelESProducers.SiPixelTemplateDBObjectESProducer_cfi import * -from CalibTracker.SiPixelESProducers.SiPixel2DTemplateDBObjectESProducer_cfi import * diff --git a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc new file mode 100644 index 0000000000000..e2d238563ec43 --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc @@ -0,0 +1,559 @@ +#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h" + +#include "Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h" + +#include "Geometry/TrackerGeometryBuilder/interface/PixelGeomDetUnit.h" +#include "Geometry/TrackerGeometryBuilder/interface/RectangularPixelTopology.h" + +// this is needed to get errors from templates +#include "RecoLocalTracker/SiPixelRecHits/interface/SiPixelTemplate.h" +#include "DataFormats/DetId/interface/DetId.h" + + +// Services +#include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "MagneticField/Engine/interface/MagneticField.h" + +#include +#include + +#include "EventFilter/SiPixelRawToDigi/plugins/cudaCheck.h" + +#include + + namespace { + + struct Stat { + Stat():c(0){} + ~Stat(){std::cout << "CPE stat " << c << ' ' << maxx << ' ' << maxd << std::endl;} + std::atomic c; + float maxx=0.f, maxd=0.0f; + }; + Stat statx, staty; + } + +namespace { + constexpr float micronsToCm = 1.0e-4; + const bool MYDEBUG = false; +} + +//----------------------------------------------------------------------------- +//! The constructor. +//----------------------------------------------------------------------------- +PixelCPEFast::PixelCPEFast(edm::ParameterSet const & conf, + const MagneticField * mag, + const TrackerGeometry& geom, + const TrackerTopology& ttopo, + const SiPixelLorentzAngle * lorentzAngle, + const SiPixelGenErrorDBObject * genErrorDBObject, + const SiPixelLorentzAngle * lorentzAngleWidth) + : PixelCPEBase(conf, mag, geom, ttopo, lorentzAngle, genErrorDBObject, nullptr,lorentzAngleWidth,0) { + + EdgeClusterErrorX_ = conf.getParameter("EdgeClusterErrorX"); + EdgeClusterErrorY_ = conf.getParameter("EdgeClusterErrorY"); + + + UseErrorsFromTemplates_ = conf.getParameter("UseErrorsFromTemplates"); + TruncatePixelCharge_ = conf.getParameter("TruncatePixelCharge"); + + + // Use errors from templates or from GenError + if ( UseErrorsFromTemplates_ ) { + if ( !SiPixelGenError::pushfile( *genErrorDBObject_, thePixelGenError_) ) + throw cms::Exception("InvalidCalibrationLoaded") + << "ERROR: GenErrors not filled correctly. Check the sqlite file. Using SiPixelTemplateDBObject version " + << ( *genErrorDBObject_ ).version(); + if(MYDEBUG) std::cout<<"Loaded genErrorDBObject v"<<( *genErrorDBObject_ ).version()<< std::endl; + } else { + if(MYDEBUG) std::cout<<" Use simple parametrised errors "<< std::endl; + } // if ( UseErrorsFromTemplates_ ) + + + // Rechit errors in case other, more correct, errors are not vailable + // This are constants. Maybe there is a more efficienct way to store them. + xerr_barrel_l1_= {0.00115, 0.00120, 0.00088}; + xerr_barrel_l1_def_=0.01030; + yerr_barrel_l1_= {0.00375,0.00230,0.00250,0.00250,0.00230,0.00230,0.00210,0.00210,0.00240}; + yerr_barrel_l1_def_=0.00210; + xerr_barrel_ln_= {0.00115, 0.00120, 0.00088}; + xerr_barrel_ln_def_=0.01030; + yerr_barrel_ln_= {0.00375,0.00230,0.00250,0.00250,0.00230,0.00230,0.00210,0.00210,0.00240}; + yerr_barrel_ln_def_=0.00210; + xerr_endcap_= {0.0020, 0.0020}; + xerr_endcap_def_=0.0020; + yerr_endcap_= {0.00210}; + yerr_endcap_def_=0.00075; + + + + fillParamsForGpu(); + +} + +void PixelCPEFast::fillParamsForGpu() { + + + m_commonParamsGPU.theThicknessB = m_DetParams.front().theThickness; + m_commonParamsGPU.theThicknessE = m_DetParams.back().theThickness; + m_commonParamsGPU.thePitchX = m_DetParams[0].thePitchX; + m_commonParamsGPU.thePitchY = m_DetParams[0].thePitchY; + + uint32_t oldLayer = 0; + m_detParamsGPU.resize(m_DetParams.size()); + for (auto i=0U; iindex()==int(i)); + + assert(m_commonParamsGPU.thePitchY==p.thePitchY); + assert(m_commonParamsGPU.thePitchX==p.thePitchX); + // assert(m_commonParamsGPU.theThickness==p.theThickness); + + g.isBarrel = GeomDetEnumerators::isBarrel(p.thePart); + g.isPosZ = p.theDet->surface().position().z()>0; + g.layer = ttopo_.layer(p.theDet->geographicalId()); + g.index=i; // better be! + g.rawId = p.theDet->geographicalId(); + + assert( (g.isBarrel ?m_commonParamsGPU.theThicknessB : m_commonParamsGPU.theThicknessE) ==p.theThickness ); + + // if (m_commonParamsGPU.theThickness!=p.theThickness) + // std::cout << i << (g.isBarrel ? "B " : "E ") << m_commonParamsGPU.theThickness<<"!="<surface().position(); + auto rr = pixelCPEforGPU::Rotation(p.theDet->surface().rotation()); + g.frame = pixelCPEforGPU::Frame(vv.x(),vv.y(),vv.z(),rr); + + } + + // and now copy to device... + cudaCheck(cudaMalloc((void**) & h_paramsOnGPU.m_commonParams, sizeof(pixelCPEforGPU::CommonParams))); + cudaCheck(cudaMalloc((void**) & h_paramsOnGPU.m_detParams, m_detParamsGPU.size()*sizeof(pixelCPEforGPU::DetParams))); + cudaCheck(cudaMalloc((void**) & d_paramsOnGPU, sizeof(pixelCPEforGPU::ParamsOnGPU))); + + cudaCheck(cudaMemcpy(d_paramsOnGPU, &h_paramsOnGPU, sizeof(pixelCPEforGPU::ParamsOnGPU), cudaMemcpyHostToDevice)); + cudaCheck(cudaMemcpy(h_paramsOnGPU.m_commonParams,&m_commonParamsGPU,sizeof(pixelCPEforGPU::CommonParams), cudaMemcpyHostToDevice)); + cudaCheck(cudaMemcpy(h_paramsOnGPU.m_detParams, m_detParamsGPU.data(), m_detParamsGPU.size()*sizeof(pixelCPEforGPU::DetParams), cudaMemcpyHostToDevice)); + cudaDeviceSynchronize(); +} + +PixelCPEFast::~PixelCPEFast() { + + cudaFree(h_paramsOnGPU.m_commonParams); + cudaFree(h_paramsOnGPU.m_detParams); + cudaFree(d_paramsOnGPU); + +} + + + +PixelCPEBase::ClusterParam* PixelCPEFast::createClusterParam(const SiPixelCluster & cl) const +{ + return new ClusterParamGeneric(cl); +} + + + +//----------------------------------------------------------------------------- +//! Hit position in the local frame (in cm). Unlike other CPE's, this +//! one converts everything from the measurement frame (in channel numbers) +//! into the local frame (in centimeters). +//----------------------------------------------------------------------------- +LocalPoint +PixelCPEFast::localPosition(DetParam const & theDetParam, ClusterParam & theClusterParamBase) const +{ + + ClusterParamGeneric & theClusterParam = static_cast(theClusterParamBase); + + assert(!theClusterParam.with_track_angle); + + float chargeWidthX = (theDetParam.lorentzShiftInCmX * theDetParam.widthLAFractionX); + float chargeWidthY = (theDetParam.lorentzShiftInCmY * theDetParam.widthLAFractionY); + float shiftX = 0.5f*theDetParam.lorentzShiftInCmX; + float shiftY = 0.5f*theDetParam.lorentzShiftInCmY; + + if ( UseErrorsFromTemplates_ ) { + + float qclus = theClusterParam.theCluster->charge(); + float locBz = theDetParam.bz; + float locBx = theDetParam.bx; + //cout << "PixelCPEFast::localPosition(...) : locBz = " << locBz << endl; + + theClusterParam.pixmx = std::numeric_limits::max(); // max pixel charge for truncation of 2-D cluster + + theClusterParam.sigmay = -999.9; // CPE Generic y-error for multi-pixel cluster + theClusterParam.sigmax = -999.9; // CPE Generic x-error for multi-pixel cluster + theClusterParam.sy1 = -999.9; // CPE Generic y-error for single single-pixel + theClusterParam.sy2 = -999.9; // CPE Generic y-error for single double-pixel cluster + theClusterParam.sx1 = -999.9; // CPE Generic x-error for single single-pixel cluster + theClusterParam.sx2 = -999.9; // CPE Generic x-error for single double-pixel cluster + + float dummy; + + SiPixelGenError gtempl(thePixelGenError_); + int gtemplID_ = theDetParam.detTemplateId; + + theClusterParam.qBin_ = gtempl.qbin( gtemplID_, theClusterParam.cotalpha, theClusterParam.cotbeta, locBz, locBx, qclus, + false, + theClusterParam.pixmx, theClusterParam.sigmay, dummy, + theClusterParam.sigmax, dummy, theClusterParam.sy1, + dummy, theClusterParam.sy2, dummy, theClusterParam.sx1, + dummy, theClusterParam.sx2, dummy ); + + + theClusterParam.sigmax = theClusterParam.sigmax * micronsToCm; + theClusterParam.sx1 = theClusterParam.sx1 * micronsToCm; + theClusterParam.sx2 = theClusterParam.sx2 * micronsToCm; + + theClusterParam.sigmay = theClusterParam.sigmay * micronsToCm; + theClusterParam.sy1 = theClusterParam.sy1 * micronsToCm; + theClusterParam.sy2 = theClusterParam.sy2 * micronsToCm; + + } // if ( UseErrorsFromTemplates_ ) + else { + theClusterParam.qBin_ = 0; + } + + int Q_f_X; //!< Q of the first pixel in X + int Q_l_X; //!< Q of the last pixel in X + int Q_f_Y; //!< Q of the first pixel in Y + int Q_l_Y; //!< Q of the last pixel in Y + collect_edge_charges( theClusterParam, + Q_f_X, Q_l_X, + Q_f_Y, Q_l_Y, + UseErrorsFromTemplates_ && TruncatePixelCharge_ + ); + + //--- Find the inner widths along X and Y in one shot. We + //--- compute the upper right corner of the inner pixels + //--- (== lower left corner of upper right pixel) and + //--- the lower left corner of the inner pixels + //--- (== upper right corner of lower left pixel), and then + //--- subtract these two points in the formula. + + //--- Upper Right corner of Lower Left pixel -- in measurement frame + uint16_t llx = theClusterParam.theCluster->minPixelRow()+1; + uint16_t lly = theClusterParam.theCluster->minPixelCol()+1; + + //--- Lower Left corner of Upper Right pixel -- in measurement frame + uint16_t urx = theClusterParam.theCluster->maxPixelRow(); + uint16_t ury = theClusterParam.theCluster->maxPixelCol(); + + auto llxl = phase1PixelTopology::localX(llx); + auto llyl = phase1PixelTopology::localY(lly); + auto urxl = phase1PixelTopology::localX(urx); + auto uryl = phase1PixelTopology::localY(ury); + + + float xPos = + generic_position_formula( theClusterParam.theCluster->sizeX(), + Q_f_X, Q_l_X, + llxl, urxl, + chargeWidthX, // lorentz shift in cm + theDetParam.theThickness, + theClusterParam.cotalpha, + theDetParam.thePitchX, + phase1PixelTopology::isBigPixX( theClusterParam.theCluster->minPixelRow() ), + phase1PixelTopology::isBigPixX( theClusterParam.theCluster->maxPixelRow() ) + ); + + // apply the lorentz offset correction + xPos = xPos + shiftX + theDetParam.thePitchX*float(phase1PixelTopology::xOffset); + + float yPos = + generic_position_formula( theClusterParam.theCluster->sizeY(), + Q_f_Y, Q_l_Y, + llyl, uryl, + chargeWidthY, // lorentz shift in cm + theDetParam.theThickness, + theClusterParam.cotbeta, + theDetParam.thePitchY, + phase1PixelTopology::isBigPixY( theClusterParam.theCluster->minPixelCol() ), + phase1PixelTopology::isBigPixY( theClusterParam.theCluster->maxPixelCol() ) + ); + // apply the lorentz offset correction + yPos = yPos + shiftY + theDetParam.thePitchY*float(phase1PixelTopology::yOffset); + + + { + // ok now do GPU like ... + + pixelCPEforGPU::ClusParams cp; + + + cp.minRow[0] = theClusterParam.theCluster->minPixelRow(); + cp.maxRow[0] = theClusterParam.theCluster->maxPixelRow(); + cp.minCol[0] = theClusterParam.theCluster->minPixelCol(); + cp.maxCol[0] = theClusterParam.theCluster->maxPixelCol(); + + cp.Q_f_X[0] = Q_f_X; + cp.Q_l_X[0] = Q_l_X; + cp.Q_f_Y[0] = Q_f_Y; + cp.Q_l_Y[0] = Q_l_Y; + + auto ind = theDetParam.theDet->index(); + pixelCPEforGPU::position(m_commonParamsGPU, m_detParamsGPU[ind],cp,0); + auto xg = cp.xpos[0]; + auto yg = cp.ypos[0]; + + if(std::abs(xPos-xg)>0.001) {++statx.c; statx.maxx=std::max(statx.maxx,xPos);} + statx.maxd=std::max(std::abs(xPos-xg), statx.maxd); + if(std::abs(yPos-yg)>0.001) {++staty.c; staty.maxx=std::max(staty.maxx,yPos);} + staty.maxd=std::max(std::abs(yPos-yg), staty.maxd); + if(std::abs(xPos-xg)>0.001 || std::abs(yPos-yg)>0.001) + std::cout << (m_detParamsGPU[ind].isBarrel ? "B " : "E ") << xPos <<'/'<=upper_edge_first_pix); + float W_inner = pitch * float(lower_edge_last_pix-upper_edge_first_pix); // in cm + + //--- Predicted charge width from geometry + float W_pred = theThickness * cot_angle // geometric correction (in cm) + - lorentz_shift; // (in cm) &&& check fpix! + + W_eff = std::abs( W_pred ) - W_inner; + + //--- If the observed charge width is inconsistent with the expectations + //--- based on the track, do *not* use W_pred-W_innner. Instead, replace + //--- it with an *average* effective charge width, which is the average + //--- length of the edge pixels. + // + simple = ( W_eff < 0.0f ) | ( W_eff > pitch ); // this produces "large" regressions for very small numeric differences... + + } + if (simple) { + //--- Total length of the two edge pixels (first+last) + float sum_of_edge = 2.0f; + if (first_is_big) sum_of_edge += 1.0f; + if (last_is_big) sum_of_edge += 1.0f; + W_eff = pitch * 0.5f * sum_of_edge; // ave. length of edge pixels (first+last) (cm) + } + + + //--- Finally, compute the position in this projection + float Qdiff = Q_l - Q_f; + float Qsum = Q_l + Q_f; + + //--- Temporary fix for clusters with both first and last pixel with charge = 0 + if(Qsum==0) Qsum=1.0f; + float hit_pos = geom_center + 0.5f*(Qdiff/Qsum) * W_eff; + + return hit_pos; +} + + +//----------------------------------------------------------------------------- +//! Collect the edge charges in x and y, in a single pass over the pixel vector. +//! Calculate charge in the first and last pixel projected in x and y +//! and the inner cluster charge, projected in x and y. +//----------------------------------------------------------------------------- +void +PixelCPEFast:: +collect_edge_charges(ClusterParam & theClusterParamBase, //!< input, the cluster + int & Q_f_X, //!< output, Q first in X + int & Q_l_X, //!< output, Q last in X + int & Q_f_Y, //!< output, Q first in Y + int & Q_l_Y, //!< output, Q last in Y + bool truncate +) +{ + ClusterParamGeneric & theClusterParam = static_cast(theClusterParamBase); + + // Initialize return variables. + Q_f_X = Q_l_X = 0; + Q_f_Y = Q_l_Y = 0; + + + // Obtain boundaries in index units + int xmin = theClusterParam.theCluster->minPixelRow(); + int xmax = theClusterParam.theCluster->maxPixelRow(); + int ymin = theClusterParam.theCluster->minPixelCol(); + int ymax = theClusterParam.theCluster->maxPixelCol(); + + + // Iterate over the pixels. + int isize = theClusterParam.theCluster->size(); + for (int i = 0; i != isize; ++i) + { + auto const & pixel = theClusterParam.theCluster->pixel(i); + // ggiurgiu@fnal.gov: add pixel charge truncation + int pix_adc = pixel.adc; + if ( truncate ) + pix_adc = std::min(pix_adc, theClusterParam.pixmx ); + + // + // X projection + if ( pixel.x == xmin ) Q_f_X += pix_adc; + if ( pixel.x == xmax ) Q_l_X += pix_adc; + // + // Y projection + if ( pixel.y == ymin ) Q_f_Y += pix_adc; + if ( pixel.y == ymax ) Q_l_Y += pix_adc; + } +} + + +//============== INFLATED ERROR AND ERRORS FROM DB BELOW ================ + +//------------------------------------------------------------------------- +// Hit error in the local frame +//------------------------------------------------------------------------- +LocalError +PixelCPEFast::localError(DetParam const & theDetParam, ClusterParam & theClusterParamBase) const +{ + + ClusterParamGeneric & theClusterParam = static_cast(theClusterParamBase); + + // Default errors are the maximum error used for edge clusters. + // These are determined by looking at residuals for edge clusters + float xerr = EdgeClusterErrorX_ * micronsToCm; + float yerr = EdgeClusterErrorY_ * micronsToCm; + + + // Find if cluster is at the module edge. + int maxPixelCol = theClusterParam.theCluster->maxPixelCol(); + int maxPixelRow = theClusterParam.theCluster->maxPixelRow(); + int minPixelCol = theClusterParam.theCluster->minPixelCol(); + int minPixelRow = theClusterParam.theCluster->minPixelRow(); + + bool edgex = phase1PixelTopology::isEdgeX(minPixelRow) | phase1PixelTopology::isEdgeX(maxPixelRow); + bool edgey = phase1PixelTopology::isEdgeY(minPixelCol) | phase1PixelTopology::isEdgeY(maxPixelCol); + + unsigned int sizex = theClusterParam.theCluster->sizeX(); + unsigned int sizey = theClusterParam.theCluster->sizeY(); + + // Find if cluster contains double (big) pixels. + bool bigInX = theDetParam.theRecTopol->containsBigPixelInX( minPixelRow, maxPixelRow ); + bool bigInY = theDetParam.theRecTopol->containsBigPixelInY( minPixelCol, maxPixelCol ); + + if (UseErrorsFromTemplates_ ) { + // + // Use template errors + + if ( !edgex ) { // Only use this for non-edge clusters + if ( sizex == 1 ) { + if ( !bigInX ) {xerr = theClusterParam.sx1;} + else {xerr = theClusterParam.sx2;} + } else {xerr = theClusterParam.sigmax;} + } + + if ( !edgey ) { // Only use for non-edge clusters + if ( sizey == 1 ) { + if ( !bigInY ) {yerr = theClusterParam.sy1;} + else {yerr = theClusterParam.sy2;} + } else {yerr = theClusterParam.sigmay;} + } + + } else { // simple errors + + // This are the simple errors, hardcoded in the code + //cout << "Track angles are not known " << endl; + //cout << "Default angle estimation which assumes track from PV (0,0,0) does not work." << endl; + + if ( GeomDetEnumerators::isTrackerPixel(theDetParam.thePart) ) { + if(GeomDetEnumerators::isBarrel(theDetParam.thePart)) { + + DetId id = (theDetParam.theDet->geographicalId()); + int layer=ttopo_.layer(id); + if ( layer==1 ) { + if ( !edgex ) { + if ( sizex<=xerr_barrel_l1_.size() ) xerr=xerr_barrel_l1_[sizex-1]; + else xerr=xerr_barrel_l1_def_; + } + + if ( !edgey ) { + if ( sizey<=yerr_barrel_l1_.size() ) yerr=yerr_barrel_l1_[sizey-1]; + else yerr=yerr_barrel_l1_def_; + } + } else{ // layer 2,3 + if ( !edgex ) { + if ( sizex<=xerr_barrel_ln_.size() ) xerr=xerr_barrel_ln_[sizex-1]; + else xerr=xerr_barrel_ln_def_; + } + + if ( !edgey ) { + if ( sizey<=yerr_barrel_ln_.size() ) yerr=yerr_barrel_ln_[sizey-1]; + else yerr=yerr_barrel_ln_def_; + } + } + + } else { // EndCap + + if ( !edgex ) { + if ( sizex<=xerr_endcap_.size() ) xerr=xerr_endcap_[sizex-1]; + else xerr=xerr_endcap_def_; + } + + if ( !edgey ) { + if ( sizey<=yerr_endcap_.size() ) yerr=yerr_endcap_[sizey-1]; + else yerr=yerr_endcap_def_; + } + } // end endcap + } + + } // end + + auto xerr_sq = xerr*xerr; + auto yerr_sq = yerr*yerr; + + return LocalError( xerr_sq, 0, yerr_sq ); + +} From 1b0c0faf7f6c6f06631d7999f03aa75373205816 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Thu, 15 Feb 2018 18:14:35 +0100 Subject: [PATCH 009/149] Move cudaCheck.h under HeterogeneousCore/CUDAUtilities --- RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc index e2d238563ec43..dcca841e6d156 100644 --- a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc +++ b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc @@ -17,7 +17,7 @@ #include #include -#include "EventFilter/SiPixelRawToDigi/plugins/cudaCheck.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" #include From 1dfe4f22c35737ae7c9827d66678a49ff6ed7935 Mon Sep 17 00:00:00 2001 From: Cesare Calabria Date: Tue, 20 Feb 2018 15:51:36 +0100 Subject: [PATCH 010/149] R2D: use GPU::SimpleVector for the error unpacking (cms-patatrack#14) --- EventFilter/SiPixelRawToDigi/python/SiPixelRawToDigi_cfi.py | 1 + 1 file changed, 1 insertion(+) diff --git a/EventFilter/SiPixelRawToDigi/python/SiPixelRawToDigi_cfi.py b/EventFilter/SiPixelRawToDigi/python/SiPixelRawToDigi_cfi.py index aabe584943683..68ae0c1706807 100644 --- a/EventFilter/SiPixelRawToDigi/python/SiPixelRawToDigi_cfi.py +++ b/EventFilter/SiPixelRawToDigi/python/SiPixelRawToDigi_cfi.py @@ -35,6 +35,7 @@ ## Empty Regions PSet means complete unpacking siPixelDigisGPU.Regions = cms.PSet( ) siPixelDigisGPU.CablingMapLabel = cms.string("") +siPixelDigisGPU.enableErrorDebug = cms.bool(False) from Configuration.Eras.Modifier_phase1Pixel_cff import phase1Pixel phase1Pixel.toModify(siPixelDigis, UsePhase1=True) From 78b9a6105aec2f775366aafddd70f242666e775c Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Wed, 28 Feb 2018 14:41:10 +0100 Subject: [PATCH 011/149] Remove debug messages (cms-patatrack#21) --- .../SiPixelRecHits/src/PixelCPEFast.cc | 50 +------------------ 1 file changed, 1 insertion(+), 49 deletions(-) diff --git a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc index dcca841e6d156..3a466be6d57c4 100644 --- a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc +++ b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc @@ -21,20 +21,8 @@ #include - namespace { - - struct Stat { - Stat():c(0){} - ~Stat(){std::cout << "CPE stat " << c << ' ' << maxx << ' ' << maxd << std::endl;} - std::atomic c; - float maxx=0.f, maxd=0.0f; - }; - Stat statx, staty; - } - namespace { constexpr float micronsToCm = 1.0e-4; - const bool MYDEBUG = false; } //----------------------------------------------------------------------------- @@ -63,11 +51,7 @@ PixelCPEFast::PixelCPEFast(edm::ParameterSet const & conf, throw cms::Exception("InvalidCalibrationLoaded") << "ERROR: GenErrors not filled correctly. Check the sqlite file. Using SiPixelTemplateDBObject version " << ( *genErrorDBObject_ ).version(); - if(MYDEBUG) std::cout<<"Loaded genErrorDBObject v"<<( *genErrorDBObject_ ).version()<< std::endl; - } else { - if(MYDEBUG) std::cout<<" Use simple parametrised errors "<< std::endl; - } // if ( UseErrorsFromTemplates_ ) - + } // Rechit errors in case other, more correct, errors are not vailable // This are constants. Maybe there is a more efficienct way to store them. @@ -289,38 +273,6 @@ PixelCPEFast::localPosition(DetParam const & theDetParam, ClusterParam & theClus // apply the lorentz offset correction yPos = yPos + shiftY + theDetParam.thePitchY*float(phase1PixelTopology::yOffset); - - { - // ok now do GPU like ... - - pixelCPEforGPU::ClusParams cp; - - - cp.minRow[0] = theClusterParam.theCluster->minPixelRow(); - cp.maxRow[0] = theClusterParam.theCluster->maxPixelRow(); - cp.minCol[0] = theClusterParam.theCluster->minPixelCol(); - cp.maxCol[0] = theClusterParam.theCluster->maxPixelCol(); - - cp.Q_f_X[0] = Q_f_X; - cp.Q_l_X[0] = Q_l_X; - cp.Q_f_Y[0] = Q_f_Y; - cp.Q_l_Y[0] = Q_l_Y; - - auto ind = theDetParam.theDet->index(); - pixelCPEforGPU::position(m_commonParamsGPU, m_detParamsGPU[ind],cp,0); - auto xg = cp.xpos[0]; - auto yg = cp.ypos[0]; - - if(std::abs(xPos-xg)>0.001) {++statx.c; statx.maxx=std::max(statx.maxx,xPos);} - statx.maxd=std::max(std::abs(xPos-xg), statx.maxd); - if(std::abs(yPos-yg)>0.001) {++staty.c; staty.maxx=std::max(staty.maxx,yPos);} - staty.maxd=std::max(std::abs(yPos-yg), staty.maxd); - if(std::abs(xPos-xg)>0.001 || std::abs(yPos-yg)>0.001) - std::cout << (m_detParamsGPU[ind].isBarrel ? "B " : "E ") << xPos <<'/'< Date: Wed, 28 Feb 2018 17:50:03 +0100 Subject: [PATCH 012/149] Add workflows for Riemann fit and GPU (cms-patatrack#20) * add `riemannFit` and `gpu` modifiers and workflows for Riemann fit and GPU modules * switch GPU modules with `gpu` modifier --- .../SiPixelRawToDigi/python/SiPixelRawToDigi_cfi.py | 5 +++++ .../SiPixelRecHits/python/SiPixelRecHits_cfi.py | 7 ++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/EventFilter/SiPixelRawToDigi/python/SiPixelRawToDigi_cfi.py b/EventFilter/SiPixelRawToDigi/python/SiPixelRawToDigi_cfi.py index 68ae0c1706807..538fd3dc97588 100644 --- a/EventFilter/SiPixelRawToDigi/python/SiPixelRawToDigi_cfi.py +++ b/EventFilter/SiPixelRawToDigi/python/SiPixelRawToDigi_cfi.py @@ -41,3 +41,8 @@ phase1Pixel.toModify(siPixelDigis, UsePhase1=True) phase1Pixel.toModify(siPixelDigisGPU, UsePhase1=True) +# In principle I would like to hide the name 'siPixelDigisGPU', but it +# is used in test/runRawToDigi_GPU_phase1.py which I also don't want +# to break +from Configuration.ProcessModifiers.gpu_cff import gpu +gpu.toReplaceWith(siPixelDigis, siPixelDigisGPU) diff --git a/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py b/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py index 465aa0bb346ce..495d6a5c84f55 100644 --- a/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py +++ b/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py @@ -1,11 +1,16 @@ import FWCore.ParameterSet.Config as cms +from RecoLocalTracker.SiPixelRecHits.SiPixelRecHitsGPU_cfi import siPixelRecHits as _siPixelRecHitsGPU siPixelRecHits = cms.EDProducer("SiPixelRecHitConverter", src = cms.InputTag("siPixelClusters"), CPE = cms.string('PixelCPEGeneric'), - VerboseLevel = cms.untracked.int32(0) + VerboseLevel = cms.untracked.int32(0), + ) +from Configuration.ProcessModifiers.gpu_cff import gpu +gpu.toReplaceWith(siPixelRecHits, _siPixelRecHitsGPU) + siPixelRecHitsPreSplitting = siPixelRecHits.clone( src = 'siPixelClustersPreSplitting' ) From 400d3088cf9f00fbc4e00b81dd311189febe3473 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Wed, 28 Feb 2018 19:41:17 +0100 Subject: [PATCH 013/149] Move the import next to the GPU modifier --- RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py b/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py index 495d6a5c84f55..c19315cbc66c9 100644 --- a/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py +++ b/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py @@ -1,6 +1,5 @@ import FWCore.ParameterSet.Config as cms -from RecoLocalTracker.SiPixelRecHits.SiPixelRecHitsGPU_cfi import siPixelRecHits as _siPixelRecHitsGPU siPixelRecHits = cms.EDProducer("SiPixelRecHitConverter", src = cms.InputTag("siPixelClusters"), CPE = cms.string('PixelCPEGeneric'), @@ -8,6 +7,7 @@ ) +from RecoLocalTracker.SiPixelRecHits.SiPixelRecHitsGPU_cfi import siPixelRecHits as _siPixelRecHitsGPU from Configuration.ProcessModifiers.gpu_cff import gpu gpu.toReplaceWith(siPixelRecHits, _siPixelRecHitsGPU) From e8cc5ff22c2f0bb59df9cfe078d15f39006efd17 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Thu, 1 Mar 2018 14:28:26 +0100 Subject: [PATCH 014/149] Remove -O2/-O3 from CUDA flags (cms-patatrack#29) After cms-sw/cmsdist#3786 the default CUDA flags are set to `-O3 -std=c++14 --expt-relaxed-constexpr --expt-extended-lambda` . Since `nvcc` does not support multiple `-On` options on the command line, remove them from the `CUDA_FLAGS` set in the BuildFile.xml . --- .../SiPixelRawToDigi/plugins/BuildFile.xml | 15 ++-- .../SiPixelClusterizer/test/BuildFile.xml | 71 +++++++++---------- .../SiPixelRecHits/plugins/BuildFile.xml | 15 ++-- 3 files changed, 48 insertions(+), 53 deletions(-) diff --git a/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml b/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml index 1803bd76ef4ec..790b772fc6feb 100644 --- a/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml +++ b/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml @@ -1,10 +1,9 @@ - - - + + + - - - - - + + + + diff --git a/RecoLocalTracker/SiPixelClusterizer/test/BuildFile.xml b/RecoLocalTracker/SiPixelClusterizer/test/BuildFile.xml index beefe8560e345..3fc830883ca58 100644 --- a/RecoLocalTracker/SiPixelClusterizer/test/BuildFile.xml +++ b/RecoLocalTracker/SiPixelClusterizer/test/BuildFile.xml @@ -1,42 +1,39 @@ - - - - - - + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - -# for tracks - - - - -# for lumi - - -# - - + + - - + + - - + + - - + + + + + + + + diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml b/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml index 526d837e1ec01..4f8efa0a80be0 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml +++ b/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml @@ -1,9 +1,8 @@ - - - - - - - - + + + + + + + From fce61c46065092ed62a939b7fa992d5ee3f00e8c Mon Sep 17 00:00:00 2001 From: Vincenzo Innocente Date: Thu, 1 Mar 2018 20:49:57 +0100 Subject: [PATCH 015/149] GPU2CPU for clusters and RecHIts (cms-patatrack#18) --- .../SiPixelRecHits/interface/PixelCPEFast.h | 17 --- .../SiPixelRecHits/interface/pixelCPEforGPU.h | 52 +++++++ .../SiPixelRecHits/plugins/gpuPixelRecHits.h | 8 +- .../SiPixelRecHits/src/PixelCPEFast.cc | 144 +++--------------- 4 files changed, 77 insertions(+), 144 deletions(-) diff --git a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h index 74b1573423f85..ed1ae37eb1324 100644 --- a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h +++ b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h @@ -49,23 +49,6 @@ class PixelCPEFast final : public PixelCPEBase LocalPoint localPosition (DetParam const & theDetParam, ClusterParam & theClusterParam) const override; LocalError localError (DetParam const & theDetParam, ClusterParam & theClusterParam) const override; - //-------------------------------------------------------------------- - // Methods. - //------------------------------------------------------------------ - static float - generic_position_formula( int size, //!< Size of this projection. - int Q_f, //!< Charge in the first pixel. - int Q_l, //!< Charge in the last pixel. - uint16_t upper_edge_first_pix, //!< As the name says. - uint16_t lower_edge_last_pix, //!< As the name says. - float lorentz_shift, //!< L-width - float theThickness, //detector thickness - float cot_angle, //!< cot of alpha_ or beta_ - float pitch, //!< thePitchX or thePitchY - bool first_is_big, //!< true if the first is big - bool last_is_big //!< true if the last is big - ); - static void collect_edge_charges(ClusterParam & theClusterParam, //!< input, the cluster int & Q_f_X, //!< output, Q first in X diff --git a/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h b/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h index 900ae64969285..d25e5649bcc74 100644 --- a/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h +++ b/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h @@ -4,6 +4,7 @@ #include "DataFormats/GeometrySurface/interface/SOARotation.h" #include #include +#include #include @@ -68,6 +69,9 @@ namespace pixelCPEforGPU { float xpos[N]; float ypos[N]; + + float xerr[N]; + float yerr[N]; }; @@ -203,4 +207,52 @@ namespace pixelCPEforGPU { } + // FIXME these are errors form Run1 + constexpr inline + void error(CommonParams const & comParams, DetParams const & detParams, ClusParams & cp, uint32_t ic) { + // Edge cluster errors + cp.xerr[ic]= 0.0050; + cp.yerr[ic]= 0.0085; + + + constexpr float xerr_barrel_l1[] = {0.00115, 0.00120, 0.00088}; + constexpr float xerr_barrel_l1_def = 0.01030; + constexpr float yerr_barrel_l1[] = {0.00375,0.00230,0.00250,0.00250,0.00230,0.00230,0.00210,0.00210,0.00240}; + constexpr float yerr_barrel_l1_def=0.00210; + constexpr float xerr_barrel_ln[]= {0.00115, 0.00120, 0.00088}; + constexpr float xerr_barrel_ln_def=0.01030; + constexpr float yerr_barrel_ln[]= {0.00375,0.00230,0.00250,0.00250,0.00230,0.00230,0.00210,0.00210,0.00240}; + constexpr float yerr_barrel_ln_def=0.00210; + constexpr float xerr_endcap[]= {0.0020, 0.0020}; + constexpr float xerr_endcap_def=0.0020; + constexpr float yerr_endcap[]= {0.00210}; + constexpr float yerr_endcap_def=0.00210; + + // is edgy? + bool isEdgeX = cp.minRow[ic]==0 || cp.maxRow[ic]==phase1PixelTopology::lastRowInModule; + bool isEdgeY = cp.minCol[ic]==0 || cp.maxCol[ic]==phase1PixelTopology::lastColInModule; + + if (!isEdgeX) { + auto sx = cp.maxRow[ic]-cp.minRow[ic]; + if (!detParams.isBarrel ) { + cp.xerr[ic] = sx commonParams(), cpeParams->detParams(me), clusParams,ic); - + pixelCPEforGPU::error(cpeParams->commonParams(), cpeParams->detParams(me), clusParams,ic); + chargeh[h] = clusParams.charge[ic]; if (local) { @@ -123,7 +125,9 @@ namespace gpuPixelRecHits { xh[h],yh[h],zh[h] ); } - + xe[h]= clusParams.xerr[ic]; + ye[h]= clusParams.yerr[ic]; + mr[h]= clusParams.minRow[ic]; } } diff --git a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc index 3a466be6d57c4..abd6dd1bb6925 100644 --- a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc +++ b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc @@ -166,11 +166,6 @@ PixelCPEFast::localPosition(DetParam const & theDetParam, ClusterParam & theClus assert(!theClusterParam.with_track_angle); - float chargeWidthX = (theDetParam.lorentzShiftInCmX * theDetParam.widthLAFractionX); - float chargeWidthY = (theDetParam.lorentzShiftInCmY * theDetParam.widthLAFractionY); - float shiftX = 0.5f*theDetParam.lorentzShiftInCmX; - float shiftY = 0.5f*theDetParam.lorentzShiftInCmY; - if ( UseErrorsFromTemplates_ ) { float qclus = theClusterParam.theCluster->charge(); @@ -223,135 +218,34 @@ PixelCPEFast::localPosition(DetParam const & theDetParam, ClusterParam & theClus UseErrorsFromTemplates_ && TruncatePixelCharge_ ); - //--- Find the inner widths along X and Y in one shot. We - //--- compute the upper right corner of the inner pixels - //--- (== lower left corner of upper right pixel) and - //--- the lower left corner of the inner pixels - //--- (== upper right corner of lower left pixel), and then - //--- subtract these two points in the formula. - - //--- Upper Right corner of Lower Left pixel -- in measurement frame - uint16_t llx = theClusterParam.theCluster->minPixelRow()+1; - uint16_t lly = theClusterParam.theCluster->minPixelCol()+1; - - //--- Lower Left corner of Upper Right pixel -- in measurement frame - uint16_t urx = theClusterParam.theCluster->maxPixelRow(); - uint16_t ury = theClusterParam.theCluster->maxPixelCol(); - - auto llxl = phase1PixelTopology::localX(llx); - auto llyl = phase1PixelTopology::localY(lly); - auto urxl = phase1PixelTopology::localX(urx); - auto uryl = phase1PixelTopology::localY(ury); - - float xPos = - generic_position_formula( theClusterParam.theCluster->sizeX(), - Q_f_X, Q_l_X, - llxl, urxl, - chargeWidthX, // lorentz shift in cm - theDetParam.theThickness, - theClusterParam.cotalpha, - theDetParam.thePitchX, - phase1PixelTopology::isBigPixX( theClusterParam.theCluster->minPixelRow() ), - phase1PixelTopology::isBigPixX( theClusterParam.theCluster->maxPixelRow() ) - ); - - // apply the lorentz offset correction - xPos = xPos + shiftX + theDetParam.thePitchX*float(phase1PixelTopology::xOffset); - - float yPos = - generic_position_formula( theClusterParam.theCluster->sizeY(), - Q_f_Y, Q_l_Y, - llyl, uryl, - chargeWidthY, // lorentz shift in cm - theDetParam.theThickness, - theClusterParam.cotbeta, - theDetParam.thePitchY, - phase1PixelTopology::isBigPixY( theClusterParam.theCluster->minPixelCol() ), - phase1PixelTopology::isBigPixY( theClusterParam.theCluster->maxPixelCol() ) - ); - // apply the lorentz offset correction - yPos = yPos + shiftY + theDetParam.thePitchY*float(phase1PixelTopology::yOffset); - - //--- Now put the two together - LocalPoint pos_in_local( xPos, yPos ); - return pos_in_local; -} + // do GPU like ... + pixelCPEforGPU::ClusParams cp; + + cp.minRow[0] = theClusterParam.theCluster->minPixelRow(); + cp.maxRow[0] = theClusterParam.theCluster->maxPixelRow(); + cp.minCol[0] = theClusterParam.theCluster->minPixelCol(); + cp.maxCol[0] = theClusterParam.theCluster->maxPixelCol(); -//----------------------------------------------------------------------------- -//! A generic version of the position formula. Since it works for both -//! X and Y, in the interest of the simplicity of the code, all parameters -//! are passed by the caller. The only class variable used by this method -//! is the theThickness, since that's common for both X and Y. -//----------------------------------------------------------------------------- -float -PixelCPEFast:: -generic_position_formula( int size, //!< Size of this projection. - int Q_f, //!< Charge in the first pixel. - int Q_l, //!< Charge in the last pixel. - uint16_t upper_edge_first_pix, //!< As the name says. - uint16_t lower_edge_last_pix, //!< As the name says. - float lorentz_shift, //!< L-shift at half thickness - float theThickness, //detector thickness - float cot_angle, //!< cot of alpha_ or beta_ - float pitch, //!< thePitchX or thePitchY - bool first_is_big, //!< true if the first is big - bool last_is_big //!< true if the last is big - ) -{ - - float geom_center = 0.5f * pitch*float( upper_edge_first_pix + lower_edge_last_pix ); - - //--- The case of only one pixel in this projection is separate. Note that - //--- here first_pix == last_pix, so the average of the two is still the - //--- center of the pixel. - if ( size == 1 ) {return geom_center;} - - float W_eff; // the compiler detects the logic below (and warns if buggy!!!!0 - bool simple=true; - if (size==2) { - //--- Width of the clusters minus the edge (first and last) pixels. - //--- In the note, they are denoted x_F and x_L (and y_F and y_L) - assert(lower_edge_last_pix>=upper_edge_first_pix); - float W_inner = pitch * float(lower_edge_last_pix-upper_edge_first_pix); // in cm - - //--- Predicted charge width from geometry - float W_pred = theThickness * cot_angle // geometric correction (in cm) - - lorentz_shift; // (in cm) &&& check fpix! - - W_eff = std::abs( W_pred ) - W_inner; + cp.Q_f_X[0] = Q_f_X; + cp.Q_l_X[0] = Q_l_X; + cp.Q_f_Y[0] = Q_f_Y; + cp.Q_l_Y[0] = Q_l_Y; - //--- If the observed charge width is inconsistent with the expectations - //--- based on the track, do *not* use W_pred-W_innner. Instead, replace - //--- it with an *average* effective charge width, which is the average - //--- length of the edge pixels. - // - simple = ( W_eff < 0.0f ) | ( W_eff > pitch ); // this produces "large" regressions for very small numeric differences... + auto ind = theDetParam.theDet->index(); + pixelCPEforGPU::position(m_commonParamsGPU, m_detParamsGPU[ind],cp,0); + auto xPos = cp.xpos[0]; + auto yPos = cp.ypos[0]; - } - if (simple) { - //--- Total length of the two edge pixels (first+last) - float sum_of_edge = 2.0f; - if (first_is_big) sum_of_edge += 1.0f; - if (last_is_big) sum_of_edge += 1.0f; - W_eff = pitch * 0.5f * sum_of_edge; // ave. length of edge pixels (first+last) (cm) - } - - - //--- Finally, compute the position in this projection - float Qdiff = Q_l - Q_f; - float Qsum = Q_l + Q_f; - - //--- Temporary fix for clusters with both first and last pixel with charge = 0 - if(Qsum==0) Qsum=1.0f; - float hit_pos = geom_center + 0.5f*(Qdiff/Qsum) * W_eff; - - return hit_pos; + //--- Now put the two together + LocalPoint pos_in_local( xPos, yPos ); + return pos_in_local; } + //----------------------------------------------------------------------------- //! Collect the edge charges in x and y, in a single pass over the pixel vector. //! Calculate charge in the first and last pixel projected in x and y From b518a250c165c3a574b5b4acfcf160fb6a780142 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Thu, 1 Mar 2018 20:57:16 +0100 Subject: [PATCH 016/149] Use the `gpu` modifier to read the pixel clusters from the unpacker (cms-patatrack#31) When running the GPU algorithms, the pixel unpacker is reponsible for providing both the digis and the cluster. These changes make use of the unpacker label to access the clusters, conditionally on the presence of the `gpu` process modifier. --- .../python/SiPixelClusterizerPreSplitting_cfi.py | 5 +++-- RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/RecoLocalTracker/SiPixelClusterizer/python/SiPixelClusterizerPreSplitting_cfi.py b/RecoLocalTracker/SiPixelClusterizer/python/SiPixelClusterizerPreSplitting_cfi.py index ba8d492c5f610..4e6ff591fb78a 100644 --- a/RecoLocalTracker/SiPixelClusterizer/python/SiPixelClusterizerPreSplitting_cfi.py +++ b/RecoLocalTracker/SiPixelClusterizer/python/SiPixelClusterizerPreSplitting_cfi.py @@ -1,7 +1,8 @@ - import FWCore.ParameterSet.Config as cms -# from CondTools.SiPixel.SiPixelGainCalibrationService_cfi import * from RecoLocalTracker.SiPixelClusterizer.SiPixelClusterizer_cfi import siPixelClusters as _siPixelClusters siPixelClustersPreSplitting = _siPixelClusters.clone() + +# In principle we could remove `siPixelClustersPreSplitting` from the `pixeltrackerlocalreco` +# sequence when the `gpu` modufier is active; for the time being we keep it for simplicity. diff --git a/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py b/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py index c19315cbc66c9..b13fd64d41565 100644 --- a/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py +++ b/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py @@ -1,16 +1,16 @@ import FWCore.ParameterSet.Config as cms +from Configuration.ProcessModifiers.gpu_cff import gpu siPixelRecHits = cms.EDProducer("SiPixelRecHitConverter", src = cms.InputTag("siPixelClusters"), CPE = cms.string('PixelCPEGeneric'), VerboseLevel = cms.untracked.int32(0), - ) from RecoLocalTracker.SiPixelRecHits.SiPixelRecHitsGPU_cfi import siPixelRecHits as _siPixelRecHitsGPU -from Configuration.ProcessModifiers.gpu_cff import gpu gpu.toReplaceWith(siPixelRecHits, _siPixelRecHitsGPU) siPixelRecHitsPreSplitting = siPixelRecHits.clone( src = 'siPixelClustersPreSplitting' ) +gpu.toModify(siPixelRecHitsPreSplitting, src = 'siPixelDigis') From c1d8e149607d850eb87b6df87f722c2db832c9ef Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Tue, 13 Mar 2018 17:22:15 +0100 Subject: [PATCH 017/149] Use CUDA Unified Addressing Update all calls to `cudaMemcpy` and `cudaMemcpyAsync` to pass `cudaMemcpyDefault` instead of explicitly specifying `cudaMemcpyHostToDevice`, `cudaMemcpyDeviceToHost`, etc. --- RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc index abd6dd1bb6925..54948d6e43bf6 100644 --- a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc +++ b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc @@ -130,9 +130,9 @@ void PixelCPEFast::fillParamsForGpu() { cudaCheck(cudaMalloc((void**) & h_paramsOnGPU.m_detParams, m_detParamsGPU.size()*sizeof(pixelCPEforGPU::DetParams))); cudaCheck(cudaMalloc((void**) & d_paramsOnGPU, sizeof(pixelCPEforGPU::ParamsOnGPU))); - cudaCheck(cudaMemcpy(d_paramsOnGPU, &h_paramsOnGPU, sizeof(pixelCPEforGPU::ParamsOnGPU), cudaMemcpyHostToDevice)); - cudaCheck(cudaMemcpy(h_paramsOnGPU.m_commonParams,&m_commonParamsGPU,sizeof(pixelCPEforGPU::CommonParams), cudaMemcpyHostToDevice)); - cudaCheck(cudaMemcpy(h_paramsOnGPU.m_detParams, m_detParamsGPU.data(), m_detParamsGPU.size()*sizeof(pixelCPEforGPU::DetParams), cudaMemcpyHostToDevice)); + cudaCheck(cudaMemcpy(d_paramsOnGPU, &h_paramsOnGPU, sizeof(pixelCPEforGPU::ParamsOnGPU), cudaMemcpyDefault)); + cudaCheck(cudaMemcpy(h_paramsOnGPU.m_commonParams,&m_commonParamsGPU,sizeof(pixelCPEforGPU::CommonParams), cudaMemcpyDefault)); + cudaCheck(cudaMemcpy(h_paramsOnGPU.m_detParams, m_detParamsGPU.data(), m_detParamsGPU.size()*sizeof(pixelCPEforGPU::DetParams), cudaMemcpyDefault)); cudaDeviceSynchronize(); } From b1473c708fbf04c888397e02aae967d77fb33a1e Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Thu, 15 Mar 2018 10:54:19 +0100 Subject: [PATCH 018/149] Introduce an STL-compatible allocator for CUDA host memory --- .../SiPixelRecHits/interface/PixelCPEFast.h | 14 ++------ .../SiPixelRecHits/src/PixelCPEFast.cc | 34 +++---------------- 2 files changed, 8 insertions(+), 40 deletions(-) diff --git a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h index ed1ae37eb1324..d845cddd6702e 100644 --- a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h +++ b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h @@ -9,6 +9,7 @@ #include "RecoLocalTracker/SiPixelRecHits/interface/SiPixelTemplate.h" #include "RecoLocalTracker/SiPixelRecHits/interface/SiPixelGenError.h" +#include "HeterogeneousCore/CUDAUtilities/interface/CUDAHostAllocator.h" #include #include @@ -43,6 +44,7 @@ class PixelCPEFast final : public PixelCPEBase ~PixelCPEFast(); + private: ClusterParam * createClusterParam(const SiPixelCluster & cl) const override; @@ -75,21 +77,11 @@ class PixelCPEFast final : public PixelCPEBase public : - void fillParamsForGpu(); // not needed if not used on CPU... - std::vector m_detParamsGPU; + std::vector> m_detParamsGPU; pixelCPEforGPU::CommonParams m_commonParamsGPU; - pixelCPEforGPU::ParamsOnGPU h_paramsOnGPU; - pixelCPEforGPU::ParamsOnGPU * d_paramsOnGPU; // copy of the above on the Device - - }; - - - - - diff --git a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc index 54948d6e43bf6..f40fb759b1557 100644 --- a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc +++ b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc @@ -34,17 +34,15 @@ PixelCPEFast::PixelCPEFast(edm::ParameterSet const & conf, const TrackerTopology& ttopo, const SiPixelLorentzAngle * lorentzAngle, const SiPixelGenErrorDBObject * genErrorDBObject, - const SiPixelLorentzAngle * lorentzAngleWidth) - : PixelCPEBase(conf, mag, geom, ttopo, lorentzAngle, genErrorDBObject, nullptr,lorentzAngleWidth,0) { - + const SiPixelLorentzAngle * lorentzAngleWidth) : + PixelCPEBase(conf, mag, geom, ttopo, lorentzAngle, genErrorDBObject, nullptr, lorentzAngleWidth, 0) +{ EdgeClusterErrorX_ = conf.getParameter("EdgeClusterErrorX"); EdgeClusterErrorY_ = conf.getParameter("EdgeClusterErrorY"); - UseErrorsFromTemplates_ = conf.getParameter("UseErrorsFromTemplates"); TruncatePixelCharge_ = conf.getParameter("TruncatePixelCharge"); - // Use errors from templates or from GenError if ( UseErrorsFromTemplates_ ) { if ( !SiPixelGenError::pushfile( *genErrorDBObject_, thePixelGenError_) ) @@ -68,15 +66,10 @@ PixelCPEFast::PixelCPEFast(edm::ParameterSet const & conf, yerr_endcap_= {0.00210}; yerr_endcap_def_=0.00075; - - fillParamsForGpu(); - } void PixelCPEFast::fillParamsForGpu() { - - m_commonParamsGPU.theThicknessB = m_DetParams.front().theThickness; m_commonParamsGPU.theThicknessE = m_DetParams.back().theThickness; m_commonParamsGPU.thePitchX = m_DetParams[0].thePitchX; @@ -89,7 +82,6 @@ void PixelCPEFast::fillParamsForGpu() { auto & g=m_detParamsGPU[i]; assert(p.theDet->index()==int(i)); - assert(m_commonParamsGPU.thePitchY==p.thePitchY); assert(m_commonParamsGPU.thePitchX==p.thePitchX); // assert(m_commonParamsGPU.theThickness==p.theThickness); @@ -121,8 +113,7 @@ void PixelCPEFast::fillParamsForGpu() { auto vv = p.theDet->surface().position(); auto rr = pixelCPEforGPU::Rotation(p.theDet->surface().rotation()); - g.frame = pixelCPEforGPU::Frame(vv.x(),vv.y(),vv.z(),rr); - + g.frame = pixelCPEforGPU::Frame(vv.x(),vv.y(),vv.z(),rr); } // and now copy to device... @@ -131,28 +122,22 @@ void PixelCPEFast::fillParamsForGpu() { cudaCheck(cudaMalloc((void**) & d_paramsOnGPU, sizeof(pixelCPEforGPU::ParamsOnGPU))); cudaCheck(cudaMemcpy(d_paramsOnGPU, &h_paramsOnGPU, sizeof(pixelCPEforGPU::ParamsOnGPU), cudaMemcpyDefault)); - cudaCheck(cudaMemcpy(h_paramsOnGPU.m_commonParams,&m_commonParamsGPU,sizeof(pixelCPEforGPU::CommonParams), cudaMemcpyDefault)); + cudaCheck(cudaMemcpy(h_paramsOnGPU.m_commonParams, &m_commonParamsGPU, sizeof(pixelCPEforGPU::CommonParams), cudaMemcpyDefault)); cudaCheck(cudaMemcpy(h_paramsOnGPU.m_detParams, m_detParamsGPU.data(), m_detParamsGPU.size()*sizeof(pixelCPEforGPU::DetParams), cudaMemcpyDefault)); cudaDeviceSynchronize(); } PixelCPEFast::~PixelCPEFast() { - cudaFree(h_paramsOnGPU.m_commonParams); cudaFree(h_paramsOnGPU.m_detParams); cudaFree(d_paramsOnGPU); - } - - PixelCPEBase::ClusterParam* PixelCPEFast::createClusterParam(const SiPixelCluster & cl) const { return new ClusterParamGeneric(cl); } - - //----------------------------------------------------------------------------- //! Hit position in the local frame (in cm). Unlike other CPE's, this //! one converts everything from the measurement frame (in channel numbers) @@ -161,7 +146,6 @@ PixelCPEBase::ClusterParam* PixelCPEFast::createClusterParam(const SiPixelCluste LocalPoint PixelCPEFast::localPosition(DetParam const & theDetParam, ClusterParam & theClusterParamBase) const { - ClusterParamGeneric & theClusterParam = static_cast(theClusterParamBase); assert(!theClusterParam.with_track_angle); @@ -194,7 +178,6 @@ PixelCPEFast::localPosition(DetParam const & theDetParam, ClusterParam & theClus dummy, theClusterParam.sy2, dummy, theClusterParam.sx1, dummy, theClusterParam.sx2, dummy ); - theClusterParam.sigmax = theClusterParam.sigmax * micronsToCm; theClusterParam.sx1 = theClusterParam.sx1 * micronsToCm; theClusterParam.sx2 = theClusterParam.sx2 * micronsToCm; @@ -218,11 +201,8 @@ PixelCPEFast::localPosition(DetParam const & theDetParam, ClusterParam & theClus UseErrorsFromTemplates_ && TruncatePixelCharge_ ); - // do GPU like ... - pixelCPEforGPU::ClusParams cp; - cp.minRow[0] = theClusterParam.theCluster->minPixelRow(); cp.maxRow[0] = theClusterParam.theCluster->maxPixelRow(); @@ -244,8 +224,6 @@ PixelCPEFast::localPosition(DetParam const & theDetParam, ClusterParam & theClus return pos_in_local; } - - //----------------------------------------------------------------------------- //! Collect the edge charges in x and y, in a single pass over the pixel vector. //! Calculate charge in the first and last pixel projected in x and y @@ -267,14 +245,12 @@ collect_edge_charges(ClusterParam & theClusterParamBase, //!< input, the cluste Q_f_X = Q_l_X = 0; Q_f_Y = Q_l_Y = 0; - // Obtain boundaries in index units int xmin = theClusterParam.theCluster->minPixelRow(); int xmax = theClusterParam.theCluster->maxPixelRow(); int ymin = theClusterParam.theCluster->minPixelCol(); int ymax = theClusterParam.theCluster->maxPixelCol(); - // Iterate over the pixels. int isize = theClusterParam.theCluster->size(); for (int i = 0; i != isize; ++i) From e5c8f7e9dcb2ab47ad694211f5c6bd7367fc2fe6 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Wed, 4 Apr 2018 17:00:48 +0200 Subject: [PATCH 019/149] Remove #pragma once --- .../interface/phase1PixelTopology.h | 32 +++++++++---------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h b/Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h index ecc5889a28481..455de58ce3408 100644 --- a/Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h +++ b/Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h @@ -1,25 +1,24 @@ -#pragma once +#ifndef Geometry_TrackerGeometryBuilder_phase1PixelTopology_h +#define Geometry_TrackerGeometryBuilder_phase1PixelTopology_h -#include +#include namespace phase1PixelTopology { constexpr uint16_t numRowsInRoc = 80; constexpr uint16_t numColsInRoc = 52; - constexpr uint16_t lastRowInRoc = 79; - constexpr uint16_t lastColInRoc = 51; + constexpr uint16_t lastRowInRoc = numRowsInRoc - 1; + constexpr uint16_t lastColInRoc = numColsInRoc - 1; - constexpr uint16_t numRowsInModule = 2*80; - constexpr uint16_t numColsInModule = 8*52; - constexpr uint16_t lastRowInModule = 2*80-1; - constexpr uint16_t lastColInModule = 8*52-1; + constexpr uint16_t numRowsInModule = 2 * numRowsInRoc; + constexpr uint16_t numColsInModule = 8 * numColsInRoc; + constexpr uint16_t lastRowInModule = numRowsInModule - 1; + constexpr uint16_t lastColInModule = numColsInModule - 1; constexpr int16_t xOffset = -81; constexpr int16_t yOffset = -54*4; - - - constexpr uint32_t numPixsInModule = uint32_t(numRowsInModule)* uint32_t(numColsInModule); + constexpr uint32_t numPixsInModule = uint32_t(numRowsInModule)* uint32_t(numColsInModule); // this is for the ROC n<512 (upgrade 1024) constexpr inline @@ -29,19 +28,18 @@ namespace phase1PixelTopology { q = q + (q>>4) + (q>>5); q = q >> 3; uint16_t r = n - q*13; return q + ((r + 3) >> 4); - // return q + (r > 12); } constexpr inline bool isEdgeX(uint16_t px) { return (px==0) | (px==lastRowInModule);} constexpr inline - bool isEdgeY(uint16_t py) { return (py==0) | (py==lastColInModule);} + bool isEdgeY(uint16_t py) { return (py==0) | (py==lastColInModule);} + - constexpr inline uint16_t toRocX(uint16_t px) { return (px0) shift+=1; return py+shift; } - + } +#endif // Geometry_TrackerGeometryBuilder_phase1PixelTopology_h From cc65b5267a282e9cce1b29f853567a12d3c60c76 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Mon, 23 Apr 2018 11:07:35 +0200 Subject: [PATCH 020/149] Synchronise with CMSSW_10_2_0_pre1 --- RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h | 3 +++ .../SiPixelRecHits/python/PixelCPEESProducers_cff.py | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h index b7c04c98443d4..758880251f9f1 100644 --- a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h +++ b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h @@ -113,6 +113,9 @@ class PixelCPEBase : public PixelClusterParameterEstimator // ggiurgiu@jhu.edu (10/18/2008) bool with_track_angle; // filled in computeAnglesFrom.... + // More detailed edge information (for CPE ClusterRepair, and elsewhere...) + int edgeTypeX_ = 0; // 0: not on edge, 1: low end on edge, 2: high end + int edgeTypeY_ = 0; // 0: not on edge, 1: low end on edge, 2: high end }; public: diff --git a/RecoLocalTracker/SiPixelRecHits/python/PixelCPEESProducers_cff.py b/RecoLocalTracker/SiPixelRecHits/python/PixelCPEESProducers_cff.py index affcb3638cb6f..8e28bbb175181 100644 --- a/RecoLocalTracker/SiPixelRecHits/python/PixelCPEESProducers_cff.py +++ b/RecoLocalTracker/SiPixelRecHits/python/PixelCPEESProducers_cff.py @@ -21,7 +21,8 @@ from RecoLocalTracker.SiPixelRecHits.PixelCPEGeneric_cfi import * from RecoLocalTracker.SiPixelRecHits.PixelCPEFast_cfi import * # -# 5. The new ESProducer for the Magnetic-field dependent template record +# 5. ESProducer for the Magnetic-field dependent template records # from CalibTracker.SiPixelESProducers.SiPixelTemplateDBObjectESProducer_cfi import * +from CalibTracker.SiPixelESProducers.SiPixel2DTemplateDBObjectESProducer_cfi import * From ae971f3d97239c6d550226276c59287f58ca00eb Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Thu, 26 Apr 2018 19:19:28 +0200 Subject: [PATCH 021/149] #include ".../cuda_cxx17.h" for std::size() --- .../SiPixelRecHits/interface/pixelCPEforGPU.h | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h b/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h index d25e5649bcc74..a76ca821e70da 100644 --- a/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h +++ b/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h @@ -1,12 +1,14 @@ -#pragma once +#ifndef RecoLocalTracker_SiPixelRecHits_pixelCPEforGPU_h +#define RecoLocalTracker_SiPixelRecHits_pixelCPEforGPU_h -#include "Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h" -#include "DataFormats/GeometrySurface/interface/SOARotation.h" -#include +#include #include +#include #include -#include +#include "DataFormats/GeometrySurface/interface/SOARotation.h" +#include "Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cuda_cxx17.h" namespace pixelCPEforGPU { @@ -256,3 +258,5 @@ namespace pixelCPEforGPU { } } + +#endif // RecoLocalTracker_SiPixelRecHits_pixelCPEforGPU_h From 67762f4fe9efe038aa595331b614e134ce2bd7ec Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Tue, 15 May 2018 16:46:03 +0200 Subject: [PATCH 022/149] Remove the use of #pragma once (cms-patatrack#47) Replace `#pragma once` with classical include guards. --- .../interface/SiPixelGainForHLTonGPU.h | 12 +++++----- .../plugins/gpuCalibPixel.h | 11 ++++++---- .../plugins/gpuClustering.h | 6 +++-- .../SiPixelRecHits/interface/PixelCPEFast.h | 22 +++++++++---------- .../SiPixelRecHits/plugins/gpuPixelRecHits.h | 9 ++++---- 5 files changed, 33 insertions(+), 27 deletions(-) diff --git a/CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h b/CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h index 12faeaaa9a845..8cf5451f91b93 100644 --- a/CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h +++ b/CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h @@ -1,9 +1,10 @@ -#pragma once +#ifndef CondFormats_SiPixelObjects_SiPixelGainForHLTonGPU_h +#define CondFormats_SiPixelObjects_SiPixelGainForHLTonGPU_h -#include -#include -#include -#include +#include +#include +#include +#include struct SiPixelGainForHLTonGPU_DecodingStructure{ uint8_t gain; @@ -67,3 +68,4 @@ class SiPixelGainForHLTonGPU { unsigned int noisyFlag_; }; +#endif // CondFormats_SiPixelObjects_SiPixelGainForHLTonGPU_h diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h index fd2b28a4719e8..5e1c2a7486b56 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h @@ -1,10 +1,11 @@ -#pragma once - -#include "CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h" +#ifndef RecoLocalTracker_SiPixelClusterizer_plugins_gpuCalibPixel_h +#define RecoLocalTracker_SiPixelClusterizer_plugins_gpuCalibPixel_h +#include #include #include -#include + +#include "CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h" namespace gpuCalibPixel { @@ -123,3 +124,5 @@ namespace gpuCalibPixel { } + +#endif // RecoLocalTracker_SiPixelClusterizer_plugins_gpuCalibPixel_h diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h index 5ac7375e008cc..672a4dbe97450 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h @@ -1,8 +1,9 @@ -#pragma once +#ifndef RecoLocalTracker_SiPixelClusterizer_plugins_gpuClustering_h +#define RecoLocalTracker_SiPixelClusterizer_plugins_gpuClustering_h +#include #include #include -#include namespace gpuClustering { @@ -166,3 +167,4 @@ namespace gpuClustering { } //namespace gpuClustering +#endif // RecoLocalTracker_SiPixelClusterizer_plugins_gpuClustering_h diff --git a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h index d845cddd6702e..a283cb1fa74ce 100644 --- a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h +++ b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h @@ -1,19 +1,15 @@ -#pragma once - -#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h" -#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h" -#include "CalibTracker/SiPixelESProducers/interface/SiPixelCPEGenericDBErrorParametrization.h" - - -// The template header files -#include "RecoLocalTracker/SiPixelRecHits/interface/SiPixelTemplate.h" -#include "RecoLocalTracker/SiPixelRecHits/interface/SiPixelGenError.h" - -#include "HeterogeneousCore/CUDAUtilities/interface/CUDAHostAllocator.h" +#ifndef RecoLocalTracker_SiPixelRecHits_PixelCPEFast_h +#define RecoLocalTracker_SiPixelRecHits_PixelCPEFast_h #include #include +#include "CalibTracker/SiPixelESProducers/interface/SiPixelCPEGenericDBErrorParametrization.h" +#include "HeterogeneousCore/CUDAUtilities/interface/CUDAHostAllocator.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/SiPixelGenError.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/SiPixelTemplate.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h" class MagneticField; class PixelCPEFast final : public PixelCPEBase @@ -85,3 +81,5 @@ public : pixelCPEforGPU::ParamsOnGPU h_paramsOnGPU; pixelCPEforGPU::ParamsOnGPU * d_paramsOnGPU; // copy of the above on the Device }; + +#endif // RecoLocalTracker_SiPixelRecHits_PixelCPEFast_h diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h b/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h index 2a6f2d0c3d926..14e4f5f26aaa1 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h +++ b/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h @@ -1,12 +1,12 @@ -#pragma once - -#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h" +#ifndef RecoLocalTracker_SiPixelRecHits_plugins_gpuPixelRecHits_h +#define RecoLocalTracker_SiPixelRecHits_plugins_gpuPixelRecHits_h +#include #include #include #include -#include +#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h" namespace gpuPixelRecHits { @@ -132,3 +132,4 @@ namespace gpuPixelRecHits { } +#endif // RecoLocalTracker_SiPixelRecHits_plugins_gpuPixelRecHits_h From 1da9842ea2a15fff23184c6f429aca22a6372d7e Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Wed, 23 May 2018 16:21:20 +0200 Subject: [PATCH 023/149] Clean up some GPU- and non-GPU-related pixel tracking code (cms-patatrack#49) - clean up GPU- and non-GPU-related pixel tracking code and build files - drop obsolete or unused classes - rename RawToDigiGPU to SiPixelRawToDigiGPUKernel - move the definitions in SiPixelRawToDigiGPUKernel to the `pixelgpudetails` namespace --- .../SiPixelRawToDigi/plugins/BuildFile.xml | 4 +- .../SiPixelRecHits/interface/pixelCPEforGPU.h | 317 +++++++++--------- .../SiPixelRecHits/plugins/gpuPixelRecHits.h | 96 +++--- 3 files changed, 199 insertions(+), 218 deletions(-) diff --git a/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml b/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml index 790b772fc6feb..3e047e2f90b57 100644 --- a/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml +++ b/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml @@ -1,8 +1,8 @@ - + - + diff --git a/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h b/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h index a76ca821e70da..6add3a78b96e6 100644 --- a/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h +++ b/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h @@ -19,12 +19,11 @@ namespace pixelCPEforGPU { struct CommonParams { float theThicknessB; float theThicknessE; - float thePitchX; + float thePitchX; float thePitchY; }; struct DetParams { - bool isBarrel; bool isPosZ; uint16_t layer; @@ -39,7 +38,6 @@ namespace pixelCPEforGPU { float x0,y0,z0; // the vertex in the local coord of the detector Frame frame; - }; @@ -51,10 +49,9 @@ namespace pixelCPEforGPU { CommonParams const & commonParams() const {return *m_commonParams;} constexpr DetParams const & detParams(int i) const {return m_detParams[i];} - }; - // SOA! (on device) + // SOA (on device) template struct ClusParamsT { uint32_t minRow[N]; @@ -66,12 +63,12 @@ namespace pixelCPEforGPU { int32_t Q_l_X[N]; int32_t Q_f_Y[N]; int32_t Q_l_Y[N]; - + int32_t charge[N]; float xpos[N]; float ypos[N]; - + float xerr[N]; float yerr[N]; }; @@ -85,177 +82,171 @@ namespace pixelCPEforGPU { // x,y local position on det auto gvx = x - detParams.x0; auto gvy = y - detParams.y0; - auto gvz = -1.f/detParams.z0; - // normalization not required as only ratio used... + auto gvz = -1.f / detParams.z0; + // normalization not required as only ratio used... // calculate angles - cotalpha = gvx*gvz; - cotbeta = gvy*gvz; + cotalpha = gvx * gvz; + cotbeta = gvy * gvz; } constexpr inline - float correction( - int sizeM1, - int Q_f, //!< Charge in the first pixel. - int Q_l, //!< Charge in the last pixel. - uint16_t upper_edge_first_pix, //!< As the name says. - uint16_t lower_edge_last_pix, //!< As the name says. - float lorentz_shift, //!< L-shift at half thickness - float theThickness, //detector thickness - float cot_angle, //!< cot of alpha_ or beta_ - float pitch, //!< thePitchX or thePitchY - bool first_is_big, //!< true if the first is big - bool last_is_big //!< true if the last is big - ) -{ - if (0==sizeM1) return 0; // size1 - float W_eff=0; - bool simple=true; - if (1==sizeM1) { // size 2 - //--- Width of the clusters minus the edge (first and last) pixels. - //--- In the note, they are denoted x_F and x_L (and y_F and y_L) - // assert(lower_edge_last_pix>=upper_edge_first_pix); - auto W_inner = pitch * float(lower_edge_last_pix-upper_edge_first_pix); // in cm - - //--- Predicted charge width from geometry - auto W_pred = theThickness * cot_angle // geometric correction (in cm) - - lorentz_shift; // (in cm) &&& check fpix! - - W_eff = std::abs( W_pred ) - W_inner; - - //--- If the observed charge width is inconsistent with the expectations - //--- based on the track, do *not* use W_pred-W_innner. Instead, replace - //--- it with an *average* effective charge width, which is the average - //--- length of the edge pixels. - // - simple = ( W_eff < 0.0f ) | ( W_eff > pitch ); // this produces "large" regressions for very small numeric differences... - - } - if (simple) { - //--- Total length of the two edge pixels (first+last) - float sum_of_edge = 2.0f; - if (first_is_big) sum_of_edge += 1.0f; - if (last_is_big) sum_of_edge += 1.0f; - W_eff = pitch * 0.5f * sum_of_edge; // ave. length of edge pixels (first+last) (cm) - } - - - //--- Finally, compute the position in this projection - float Qdiff = Q_l - Q_f; - float Qsum = Q_l + Q_f; - - //--- Temporary fix for clusters with both first and last pixel with charge = 0 - if(Qsum==0) Qsum=1.0f; - return 0.5f*(Qdiff/Qsum) * W_eff; - + float correction( + int sizeM1, + int Q_f, //!< Charge in the first pixel. + int Q_l, //!< Charge in the last pixel. + uint16_t upper_edge_first_pix, //!< As the name says. + uint16_t lower_edge_last_pix, //!< As the name says. + float lorentz_shift, //!< L-shift at half thickness + float theThickness, //detector thickness + float cot_angle, //!< cot of alpha_ or beta_ + float pitch, //!< thePitchX or thePitchY + bool first_is_big, //!< true if the first is big + bool last_is_big ) //!< true if the last is big + { + if (0 == sizeM1) // size 1 + return 0; + + float W_eff = 0; + bool simple = true; + if (1 == sizeM1) { // size 2 + //--- Width of the clusters minus the edge (first and last) pixels. + //--- In the note, they are denoted x_F and x_L (and y_F and y_L) + // assert(lower_edge_last_pix >= upper_edge_first_pix); + auto W_inner = pitch * float(lower_edge_last_pix - upper_edge_first_pix); // in cm + + //--- Predicted charge width from geometry + auto W_pred = theThickness * cot_angle // geometric correction (in cm) + - lorentz_shift; // (in cm) &&& check fpix! + + W_eff = std::abs(W_pred) - W_inner; + + //--- If the observed charge width is inconsistent with the expectations + //--- based on the track, do *not* use W_pred-W_inner. Instead, replace + //--- it with an *average* effective charge width, which is the average + //--- length of the edge pixels. + simple = (W_eff < 0.0f) | (W_eff > pitch); // this produces "large" regressions for very small numeric differences... + } + + if (simple) { + //--- Total length of the two edge pixels (first+last) + float sum_of_edge = 2.0f; + if (first_is_big) sum_of_edge += 1.0f; + if (last_is_big) sum_of_edge += 1.0f; + W_eff = pitch * 0.5f * sum_of_edge; // ave. length of edge pixels (first+last) (cm) + } + + //--- Finally, compute the position in this projection + float Qdiff = Q_l - Q_f; + float Qsum = Q_l + Q_f; + + //--- Temporary fix for clusters with both first and last pixel with charge = 0 + if (Qsum == 0) + Qsum = 1.0f; + + return 0.5f * (Qdiff/Qsum) * W_eff; } constexpr inline void position(CommonParams const & comParams, DetParams const & detParams, ClusParams & cp, uint32_t ic) { - //--- Upper Right corner of Lower Left pixel -- in measurement frame - uint16_t llx = cp.minRow[ic]+1; - uint16_t lly = cp.minCol[ic]+1; - - //--- Lower Left corner of Upper Right pixel -- in measurement frame - uint16_t urx = cp.maxRow[ic]; - uint16_t ury = cp.maxCol[ic]; - - auto llxl = phase1PixelTopology::localX(llx); - auto llyl = phase1PixelTopology::localY(lly); - auto urxl = phase1PixelTopology::localX(urx); - auto uryl = phase1PixelTopology::localY(ury); - - auto mx = llxl+urxl; - auto my = llyl+uryl; - - // apply the lorentz offset correction - auto xPos = detParams.shiftX + comParams.thePitchX*(0.5f*float(mx)+float(phase1PixelTopology::xOffset)); - auto yPos = detParams.shiftY + comParams.thePitchY*(0.5f*float(my)+float(phase1PixelTopology::yOffset)); - - float cotalpha=0, cotbeta=0; - - - computeAnglesFromDet(detParams, xPos, yPos, cotalpha, cotbeta); - - auto thickness = detParams.isBarrel ? comParams.theThicknessB : comParams.theThicknessE; - - auto xcorr = correction( - cp.maxRow[ic]-cp.minRow[ic], - cp.Q_f_X[ic], cp.Q_l_X[ic], - llxl, urxl, - detParams.chargeWidthX, // lorentz shift in cm - thickness, - cotalpha, - comParams.thePitchX, - phase1PixelTopology::isBigPixX( cp.minRow[ic] ), - phase1PixelTopology::isBigPixX( cp.maxRow[ic] ) - ); - - - auto ycorr = correction( - cp.maxCol[ic]-cp.minCol[ic], - cp.Q_f_Y[ic], cp.Q_l_Y[ic], - llyl, uryl, - detParams.chargeWidthY, // lorentz shift in cm - thickness, - cotbeta, - comParams.thePitchY, - phase1PixelTopology::isBigPixY( cp.minCol[ic] ), - phase1PixelTopology::isBigPixY( cp.maxCol[ic] ) - ); - - cp.xpos[ic]=xPos+xcorr; - cp.ypos[ic]=yPos+ycorr; - + //--- Upper Right corner of Lower Left pixel -- in measurement frame + uint16_t llx = cp.minRow[ic]+1; + uint16_t lly = cp.minCol[ic]+1; + + //--- Lower Left corner of Upper Right pixel -- in measurement frame + uint16_t urx = cp.maxRow[ic]; + uint16_t ury = cp.maxCol[ic]; + + auto llxl = phase1PixelTopology::localX(llx); + auto llyl = phase1PixelTopology::localY(lly); + auto urxl = phase1PixelTopology::localX(urx); + auto uryl = phase1PixelTopology::localY(ury); + + auto mx = llxl+urxl; + auto my = llyl+uryl; + + // apply the lorentz offset correction + auto xPos = detParams.shiftX + comParams.thePitchX*(0.5f*float(mx)+float(phase1PixelTopology::xOffset)); + auto yPos = detParams.shiftY + comParams.thePitchY*(0.5f*float(my)+float(phase1PixelTopology::yOffset)); + + float cotalpha=0, cotbeta=0; + + computeAnglesFromDet(detParams, xPos, yPos, cotalpha, cotbeta); + + auto thickness = detParams.isBarrel ? comParams.theThicknessB : comParams.theThicknessE; + + auto xcorr = correction( + cp.maxRow[ic]-cp.minRow[ic], + cp.Q_f_X[ic], cp.Q_l_X[ic], + llxl, urxl, + detParams.chargeWidthX, // lorentz shift in cm + thickness, + cotalpha, + comParams.thePitchX, + phase1PixelTopology::isBigPixX(cp.minRow[ic]), + phase1PixelTopology::isBigPixX(cp.maxRow[ic]) ); + + auto ycorr = correction( + cp.maxCol[ic]-cp.minCol[ic], + cp.Q_f_Y[ic], cp.Q_l_Y[ic], + llyl, uryl, + detParams.chargeWidthY, // lorentz shift in cm + thickness, + cotbeta, + comParams.thePitchY, + phase1PixelTopology::isBigPixY(cp.minCol[ic]), + phase1PixelTopology::isBigPixY(cp.maxCol[ic]) ); + + cp.xpos[ic]=xPos+xcorr; + cp.ypos[ic]=yPos+ycorr; } - // FIXME these are errors form Run1 constexpr inline void error(CommonParams const & comParams, DetParams const & detParams, ClusParams & cp, uint32_t ic) { - // Edge cluster errors - cp.xerr[ic]= 0.0050; - cp.yerr[ic]= 0.0085; - - - constexpr float xerr_barrel_l1[] = {0.00115, 0.00120, 0.00088}; - constexpr float xerr_barrel_l1_def = 0.01030; - constexpr float yerr_barrel_l1[] = {0.00375,0.00230,0.00250,0.00250,0.00230,0.00230,0.00210,0.00210,0.00240}; - constexpr float yerr_barrel_l1_def=0.00210; - constexpr float xerr_barrel_ln[]= {0.00115, 0.00120, 0.00088}; - constexpr float xerr_barrel_ln_def=0.01030; - constexpr float yerr_barrel_ln[]= {0.00375,0.00230,0.00250,0.00250,0.00230,0.00230,0.00210,0.00210,0.00240}; - constexpr float yerr_barrel_ln_def=0.00210; - constexpr float xerr_endcap[]= {0.0020, 0.0020}; - constexpr float xerr_endcap_def=0.0020; - constexpr float yerr_endcap[]= {0.00210}; - constexpr float yerr_endcap_def=0.00210; - - // is edgy? - bool isEdgeX = cp.minRow[ic]==0 || cp.maxRow[ic]==phase1PixelTopology::lastRowInModule; - bool isEdgeY = cp.minCol[ic]==0 || cp.maxCol[ic]==phase1PixelTopology::lastColInModule; - - if (!isEdgeX) { - auto sx = cp.maxRow[ic]-cp.minRow[ic]; - if (!detParams.isBarrel ) { - cp.xerr[ic] = sx MaxNumModules - - constexpr uint32_t MaxClusInModule= pixelCPEforGPU::MaxClusInModule; + + constexpr uint32_t MaxClusInModule = pixelCPEforGPU::MaxClusInModule; using ClusParams = pixelCPEforGPU::ClusParams; @@ -33,101 +33,91 @@ namespace gpuPixelRecHits { int32_t * chargeh, float * xh, float * yh, float * zh, float * xe, float * ye, uint16_t * mr, - bool local // if true fill just x & y in local coord... - ){ - + bool local) // if true fill just x & y in local coord + { // as usual one block per module - __shared__ ClusParams clusParams; - - auto first = digiModuleStart[1 + blockIdx.x]; - + auto first = digiModuleStart[1 + blockIdx.x]; auto me = id[first]; - assert (moduleId[blockIdx.x]==me); - + assert(moduleId[blockIdx.x] == me); auto nclus = clusInModule[me]; #ifdef GPU_DEBUG if (me%100==1) - if (threadIdx.x==0) printf("hitbuilder: %d clusters in module %d. will write at %d\n",nclus,me,hitsModuleStart[me]); + if (threadIdx.x==0) printf("hitbuilder: %d clusters in module %d. will write at %d\n", nclus, me, hitsModuleStart[me]); #endif - assert(blockDim.x>=MaxClusInModule); - assert(nclus<=MaxClusInModule); + assert(blockDim.x >= MaxClusInModule); + assert(nclus <= MaxClusInModule); auto ic = threadIdx.x; - - if (ic::max(); clusParams.maxRow[ic] = 0; clusParams.minCol[ic] = std::numeric_limits::max(); clusParams.maxCol[ic] = 0; - clusParams.charge[ic] = 0; - clusParams.Q_f_X[ic] = 0; clusParams.Q_l_X[ic] = 0; clusParams.Q_f_Y[ic] = 0; clusParams.Q_l_Y[ic] = 0; } - - first+=threadIdx.x; - - __syncthreads(); + first += threadIdx.x; + __syncthreads(); // one thead per "digi" - - for (int i=first; i=nclus) return; + if (ic >= nclus) return; first = hitsModuleStart[me]; auto h = first+ic; // output index in global memory - assert(h<2000*256); + assert(h < 2000*256); + + pixelCPEforGPU::position(cpeParams->commonParams(), cpeParams->detParams(me), clusParams, ic); + pixelCPEforGPU::error(cpeParams->commonParams(), cpeParams->detParams(me), clusParams, ic); - pixelCPEforGPU::position(cpeParams->commonParams(), cpeParams->detParams(me), clusParams,ic); - pixelCPEforGPU::error(cpeParams->commonParams(), cpeParams->detParams(me), clusParams,ic); - chargeh[h] = clusParams.charge[ic]; - if (local) { - xh[h]= clusParams.xpos[ic]; - yh[h]= clusParams.ypos[ic]; + if (local) { + xh[h] = clusParams.xpos[ic]; + yh[h] = clusParams.ypos[ic]; } else { - cpeParams->detParams(me).frame.toGlobal(clusParams.xpos[ic],clusParams.ypos[ic], - xh[h],yh[h],zh[h] - ); + cpeParams->detParams(me).frame.toGlobal(clusParams.xpos[ic], clusParams.ypos[ic], + xh[h], yh[h], zh[h] ); } - xe[h]= clusParams.xerr[ic]; - ye[h]= clusParams.yerr[ic]; - mr[h]= clusParams.minRow[ic]; + xe[h] = clusParams.xerr[ic]; + ye[h] = clusParams.yerr[ic]; + mr[h] = clusParams.minRow[ic]; } } From ae104ec8eb01d2901f227259d1c41836140fe58b Mon Sep 17 00:00:00 2001 From: Matti Kortelainen Date: Mon, 4 Jun 2018 11:26:15 +0200 Subject: [PATCH 024/149] Implement a Heterogeneous version of Raw2Cluster and RecHit (cms-patatrack#62) - reorganize `SiPixelRawToDigi` as `SiPixelRawToDigiHeterogeneous` using `HeterogeneousEDProducer` - output a `HeterogeneousEvent` - use `PixelThresholdClusterizer` - add `SiPixelDigiHeterogeneousConverter` - make cabling and gain transfers asynchronous - reorganize `SiPixelRecHits` as `SiPixelRecHitHeterogeneous` - move `PixelThresholdClusterizer` (back?) to interface+src in order to use it outside of RecoLocalTracker/SiPixelClusterizer - replace __host__ __device__ with constexpr to avoid weird compilation failures - split clusters to their own converter --- .../StandardSequences/python/RawToDigi_cff.py | 11 +- .../SiPixelRawToDigi/plugins/BuildFile.xml | 7 +- .../python/SiPixelRawToDigi_cfi.py | 30 +- .../SiPixelClusterizer/BuildFile.xml | 9 + .../SiPixelClusterizer/plugins/BuildFile.xml | 11 +- .../plugins/PixelClusterizerBase.h | 76 ----- .../plugins/SiPixelClusterProducer.cc | 314 +++++++++--------- .../plugins/gpuClustering.h | 9 +- .../SiPixelRecHits/plugins/BuildFile.xml | 4 + .../SiPixelRecHits/plugins/gpuPixelRecHits.h | 6 +- .../python/SiPixelRecHits_cfi.py | 5 +- 11 files changed, 209 insertions(+), 273 deletions(-) create mode 100644 RecoLocalTracker/SiPixelClusterizer/BuildFile.xml delete mode 100644 RecoLocalTracker/SiPixelClusterizer/plugins/PixelClusterizerBase.h diff --git a/Configuration/StandardSequences/python/RawToDigi_cff.py b/Configuration/StandardSequences/python/RawToDigi_cff.py index d0af52de00a47..ec2e0d1d71997 100644 --- a/Configuration/StandardSequences/python/RawToDigi_cff.py +++ b/Configuration/StandardSequences/python/RawToDigi_cff.py @@ -1,9 +1,11 @@ import FWCore.ParameterSet.Config as cms +from Configuration.ProcessModifiers.gpu_cff import gpu # This object is used to selectively make changes for different running # scenarios. In this case it makes changes for Run 2. from EventFilter.SiPixelRawToDigi.SiPixelRawToDigi_cfi import * +from EventFilter.SiPixelRawToDigi.siPixelDigisHeterogeneous_cfi import * from EventFilter.SiStripRawToDigi.SiStripDigis_cfi import * @@ -60,10 +62,14 @@ ) RawToDigi = cms.Sequence(RawToDigiTask) +_RawToDigi_gpu = RawToDigi.copy() +_RawToDigi_gpu.replace(siPixelDigis, siPixelDigisHeterogeneous + siPixelDigis) +gpu.toReplaceWith(RawToDigi, _RawToDigi_gpu) + RawToDigiTask_noTk = RawToDigiTask.copyAndExclude([siPixelDigis, siStripDigis]) RawToDigi_noTk = cms.Sequence(RawToDigiTask_noTk) -RawToDigiTask_pixelOnly = cms.Task(siPixelDigis) +RawToDigiTask_pixelOnly = cms.Task(siPixelDigisHeterogeneous + siPixelDigis) RawToDigi_pixelOnly = cms.Sequence(RawToDigiTask_pixelOnly) RawToDigiTask_ecalOnly = cms.Task(ecalDigisTask, ecalPreshowerDigis, scalersRawToDigi) @@ -73,7 +79,8 @@ RawToDigi_hcalOnly = cms.Sequence(RawToDigiTask_hcalOnly) scalersRawToDigi.scalersInputTag = 'rawDataCollector' -siPixelDigis.InputLabel = 'rawDataCollector' +siPixelDigisHeterogeneous.InputLabel = 'rawDataCollector' +(~gpu).toModify(siPixelDigis, InputLabel = 'rawDataCollector') ecalDigis.InputLabel = 'rawDataCollector' ecalPreshowerDigis.sourceTag = 'rawDataCollector' hcalDigis.InputLabel = 'rawDataCollector' diff --git a/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml b/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml index 3e047e2f90b57..0f46385a6e608 100644 --- a/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml +++ b/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml @@ -2,8 +2,13 @@ - + + + + + + diff --git a/EventFilter/SiPixelRawToDigi/python/SiPixelRawToDigi_cfi.py b/EventFilter/SiPixelRawToDigi/python/SiPixelRawToDigi_cfi.py index 538fd3dc97588..6567e35a24704 100644 --- a/EventFilter/SiPixelRawToDigi/python/SiPixelRawToDigi_cfi.py +++ b/EventFilter/SiPixelRawToDigi/python/SiPixelRawToDigi_cfi.py @@ -1,6 +1,6 @@ import FWCore.ParameterSet.Config as cms import EventFilter.SiPixelRawToDigi.siPixelRawToDigi_cfi -import EventFilter.SiPixelRawToDigi.siPixelRawToDigiGPU_cfi +import EventFilter.SiPixelRawToDigi.siPixelDigiHeterogeneousConverter_cfi siPixelDigis = EventFilter.SiPixelRawToDigi.siPixelRawToDigi_cfi.siPixelRawToDigi.clone() siPixelDigis.Timing = cms.untracked.bool(False) @@ -19,30 +19,12 @@ siPixelDigis.Regions = cms.PSet( ) siPixelDigis.CablingMapLabel = cms.string("") -siPixelDigisGPU = EventFilter.SiPixelRawToDigi.siPixelRawToDigiGPU_cfi.siPixelRawToDigiGPU.clone() -siPixelDigisGPU.Timing = cms.untracked.bool(False) -siPixelDigisGPU.IncludeErrors = cms.bool(True) -siPixelDigisGPU.InputLabel = cms.InputTag("rawDataCollector") -siPixelDigisGPU.UseQualityInfo = cms.bool(False) -## ErrorList: list of error codes used by tracking to invalidate modules -siPixelDigisGPU.ErrorList = cms.vint32(29) -## UserErrorList: list of error codes used by Pixel experts for investigation -siPixelDigisGPU.UserErrorList = cms.vint32(40) -## Use pilot blades -siPixelDigisGPU.UsePilotBlade = cms.bool(False) -## Use phase1 -siPixelDigisGPU.UsePhase1 = cms.bool(False) -## Empty Regions PSet means complete unpacking -siPixelDigisGPU.Regions = cms.PSet( ) -siPixelDigisGPU.CablingMapLabel = cms.string("") -siPixelDigisGPU.enableErrorDebug = cms.bool(False) - from Configuration.Eras.Modifier_phase1Pixel_cff import phase1Pixel phase1Pixel.toModify(siPixelDigis, UsePhase1=True) -phase1Pixel.toModify(siPixelDigisGPU, UsePhase1=True) -# In principle I would like to hide the name 'siPixelDigisGPU', but it -# is used in test/runRawToDigi_GPU_phase1.py which I also don't want -# to break +_siPixelDigis_gpu = EventFilter.SiPixelRawToDigi.siPixelDigiHeterogeneousConverter_cfi.siPixelDigiHeterogeneousConverter.clone() +_siPixelDigis_gpu.includeErrors = cms.bool(True) + from Configuration.ProcessModifiers.gpu_cff import gpu -gpu.toReplaceWith(siPixelDigis, siPixelDigisGPU) +gpu.toReplaceWith(siPixelDigis, _siPixelDigis_gpu) + diff --git a/RecoLocalTracker/SiPixelClusterizer/BuildFile.xml b/RecoLocalTracker/SiPixelClusterizer/BuildFile.xml new file mode 100644 index 0000000000000..b79cf8d4329c4 --- /dev/null +++ b/RecoLocalTracker/SiPixelClusterizer/BuildFile.xml @@ -0,0 +1,9 @@ + + + + + + + + + diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml b/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml index c7b16a6ef4ee2..ec51e90aa3065 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml @@ -1,8 +1,5 @@ - - - - - - - + + + + diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/PixelClusterizerBase.h b/RecoLocalTracker/SiPixelClusterizer/plugins/PixelClusterizerBase.h deleted file mode 100644 index 9e3aad606851c..0000000000000 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/PixelClusterizerBase.h +++ /dev/null @@ -1,76 +0,0 @@ -#ifndef RecoLocalTracker_SiPixelClusterizer_PixelClusterizerBase_H -#define RecoLocalTracker_SiPixelClusterizer_PixelClusterizerBase_H - -#include "DataFormats/Common/interface/DetSetVector.h" -#include "DataFormats/Common/interface/DetSetVectorNew.h" -#include "DataFormats/SiPixelCluster/interface/SiPixelCluster.h" -#include "DataFormats/SiPixelDigi/interface/PixelDigi.h" -#include "DataFormats/TrackerCommon/interface/TrackerTopology.h" -#include "CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationServiceBase.h" -#include - -class PixelGeomDetUnit; - -/** - * Abstract interface for Pixel Clusterizers - */ -class PixelClusterizerBase { -public: - typedef edm::DetSet::const_iterator DigiIterator; - typedef edmNew::DetSet::const_iterator ClusterIterator; - - struct AccretionCluster { - typedef unsigned short UShort; - static constexpr UShort MAXSIZE = 256; - UShort adc[MAXSIZE]; - UShort x[MAXSIZE]; - UShort y[MAXSIZE]; - UShort xmin = 16000; - UShort ymin = 16000; - unsigned int isize = 0; - unsigned int curr = 0; - - // stack interface (unsafe ok for use below) - UShort top() const { return curr; } - void pop() { ++curr; } - bool empty() { return curr == isize; } - - bool add(SiPixelCluster::PixelPos const& p, UShort const iadc) { - if (isize == MAXSIZE) - return false; - xmin = std::min(xmin, (unsigned short)(p.row())); - ymin = std::min(ymin, (unsigned short)(p.col())); - adc[isize] = iadc; - x[isize] = p.row(); - y[isize++] = p.col(); - return true; - } - }; - - // Virtual destructor, this is a base class. - virtual ~PixelClusterizerBase() {} - - // Build clusters in a DetUnit. Both digi and cluster stored in a DetSet - - virtual void clusterizeDetUnit(const edm::DetSet& input, - const PixelGeomDetUnit* pixDet, - const TrackerTopology* tTopo, - const std::vector& badChannels, - edmNew::DetSetVector::FastFiller& output) = 0; - - virtual void clusterizeDetUnit(const edmNew::DetSet& input, - const PixelGeomDetUnit* pixDet, - const TrackerTopology* tTopo, - const std::vector& badChannels, - edmNew::DetSetVector::FastFiller& output) = 0; - - // Configure gain calibration service - void setSiPixelGainCalibrationService(SiPixelGainCalibrationServiceBase* in) { - theSiPixelGainCalibrationService_ = in; - } - -protected: - SiPixelGainCalibrationServiceBase* theSiPixelGainCalibrationService_; -}; - -#endif diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelClusterProducer.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelClusterProducer.cc index d35e04ebe16f7..d9da8d77031ee 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelClusterProducer.cc +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelClusterProducer.cc @@ -13,10 +13,11 @@ // Our own stuff #include "SiPixelClusterProducer.h" -#include "PixelThresholdClusterizer.h" +#include "RecoLocalTracker/SiPixelClusterizer/interface/PixelThresholdClusterizer.h" // Geometry -#include "Geometry/CommonDetUnit/interface/PixelGeomDetUnit.h" +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" +#include "Geometry/TrackerGeometryBuilder/interface/PixelGeomDetUnit.h" // Data Formats #include "DataFormats/Common/interface/DetSetVector.h" @@ -31,7 +32,6 @@ // Framework #include "DataFormats/Common/interface/Handle.h" #include "FWCore/Framework/interface/ESHandle.h" -#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" // STL #include @@ -42,164 +42,178 @@ // MessageLogger #include "FWCore/MessageLogger/interface/MessageLogger.h" -//--------------------------------------------------------------------------- -//! Constructor: set the ParameterSet and defer all thinking to setupClusterizer(). -//--------------------------------------------------------------------------- -SiPixelClusterProducer::SiPixelClusterProducer(edm::ParameterSet const& conf) - : tPutPixelClusters(produces()), - clusterMode_(conf.getParameter("ClusterMode")), - maxTotalClusters_(conf.getParameter("maxNumberOfClusters")) { - if (clusterMode_ == "PixelThresholdReclusterizer") - tPixelClusters = consumes(conf.getParameter("src")); - else - tPixelDigi = consumes>(conf.getParameter("src")); - - trackerTopoToken_ = esConsumes(); - trackerGeomToken_ = esConsumes(); - - const auto& payloadType = conf.getParameter("payloadType"); - if (payloadType == "HLT") - theSiPixelGainCalibration_ = std::make_unique(conf); - else if (payloadType == "Offline") - theSiPixelGainCalibration_ = std::make_unique(conf); - else if (payloadType == "Full") - theSiPixelGainCalibration_ = std::make_unique(conf); - - //--- Make the algorithm(s) according to what the user specified - //--- in the ParameterSet. - setupClusterizer(conf); -} - -// Destructor -SiPixelClusterProducer::~SiPixelClusterProducer() = default; - -void SiPixelClusterProducer::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { - edm::ParameterSetDescription desc; - - desc.add("src", edm::InputTag("siPixelDigis")); - desc.add("ClusterMode", "PixelThresholdClusterizer"); - desc.add("maxNumberOfClusters", -1)->setComment("-1 means no limit"); - desc.add("payloadType", "Offline") - ->setComment("Options: HLT - column granularity, Offline - gain:col/ped:pix"); - - PixelThresholdClusterizer::fillPSetDescription(desc); - SiPixelGainCalibrationServiceBase::fillPSetDescription(desc); // no-op, but in principle the structures are there... - - descriptions.add("SiPixelClusterizerDefault", desc); -} - -//--------------------------------------------------------------------------- -//! The "Event" entrypoint: gets called by framework for every event -//--------------------------------------------------------------------------- -void SiPixelClusterProducer::produce(edm::Event& e, const edm::EventSetup& es) { - //Setup gain calibration service - theSiPixelGainCalibration_->setESObjects(es); - - // Step A.1: get input data - edm::Handle inputClusters; - edm::Handle> inputDigi; - if (clusterMode_ == "PixelThresholdReclusterizer") - e.getByToken(tPixelClusters, inputClusters); - else - e.getByToken(tPixelDigi, inputDigi); - - // Step A.2: get event setup - edm::ESHandle geom = es.getHandle(trackerGeomToken_); - - edm::ESHandle trackerTopologyHandle = es.getHandle(trackerTopoToken_); - tTopo_ = trackerTopologyHandle.product(); - - // Step B: create the final output collection - auto output = std::make_unique(); - //FIXME: put a reserve() here - - // Step C: Iterate over DetIds and invoke the pixel clusterizer algorithm - // on each DetUnit - if (clusterMode_ == "PixelThresholdReclusterizer") - run(*inputClusters, geom, *output); - else - run(*inputDigi, geom, *output); - - // Step D: write output to file - output->shrink_to_fit(); - e.put(tPutPixelClusters, std::move(output)); -} - -//--------------------------------------------------------------------------- -//! Set up the specific algorithm we are going to use. -//! TO DO: in the future, we should allow for a different algorithm for -//! each detector subset (e.g. barrel vs forward, per layer, etc). -//--------------------------------------------------------------------------- -void SiPixelClusterProducer::setupClusterizer(const edm::ParameterSet& conf) { - if (clusterMode_ == "PixelThresholdReclusterizer" || clusterMode_ == "PixelThresholdClusterizer") { - clusterizer_ = std::make_unique(conf); - clusterizer_->setSiPixelGainCalibrationService(theSiPixelGainCalibration_.get()); - } else { - throw cms::Exception("Configuration") << "[SiPixelClusterProducer]:" - << " choice " << clusterMode_ << " is invalid.\n" - << "Possible choices:\n" - << " PixelThresholdClusterizer"; + + //--------------------------------------------------------------------------- + //! Constructor: set the ParameterSet and defer all thinking to setupClusterizer(). + //--------------------------------------------------------------------------- + SiPixelClusterProducer::SiPixelClusterProducer(edm::ParameterSet const& conf) + : + theSiPixelGainCalibration_(nullptr), + clusterMode_( conf.getUntrackedParameter("ClusterMode","PixelThresholdClusterizer") ), + clusterizer_(nullptr), // the default, in case we fail to make one + readyToCluster_(false), // since we obviously aren't + maxTotalClusters_( conf.getParameter( "maxNumberOfClusters" ) ), + payloadType_( conf.getParameter( "payloadType" ) ) + { + if ( clusterMode_ == "PixelThresholdReclusterizer" ) + tPixelClusters = consumes( conf.getParameter("src") ); + else + tPixelDigi = consumes>( conf.getParameter("src") ); + //--- Declare to the EDM what kind of collections we will be making. + produces(); + + if (strcmp(payloadType_.c_str(), "HLT") == 0) + theSiPixelGainCalibration_ = new SiPixelGainCalibrationForHLTService(conf); + else if (strcmp(payloadType_.c_str(), "Offline") == 0) + theSiPixelGainCalibration_ = new SiPixelGainCalibrationOfflineService(conf); + else if (strcmp(payloadType_.c_str(), "Full") == 0) + theSiPixelGainCalibration_ = new SiPixelGainCalibrationService(conf); + + //--- Make the algorithm(s) according to what the user specified + //--- in the ParameterSet. + setupClusterizer(conf); + + } + + // Destructor + SiPixelClusterProducer::~SiPixelClusterProducer() { + delete clusterizer_; + delete theSiPixelGainCalibration_; + } + + + //--------------------------------------------------------------------------- + //! The "Event" entrypoint: gets called by framework for every event + //--------------------------------------------------------------------------- + void SiPixelClusterProducer::produce(edm::Event& e, const edm::EventSetup& es) + { + + //Setup gain calibration service + theSiPixelGainCalibration_->setESObjects( es ); + + // Step A.1: get input data + edm::Handle< SiPixelClusterCollectionNew > inputClusters; + edm::Handle< edm::DetSetVector > inputDigi; + if ( clusterMode_ == "PixelThresholdReclusterizer" ) + e.getByToken(tPixelClusters, inputClusters); + else + e.getByToken(tPixelDigi, inputDigi); + + // Step A.2: get event setup + edm::ESHandle geom; + es.get().get( geom ); + + edm::ESHandle trackerTopologyHandle; + es.get().get(trackerTopologyHandle); + tTopo_ = trackerTopologyHandle.product(); + + // Step B: create the final output collection + auto output = std::make_unique< SiPixelClusterCollectionNew>(); + //FIXME: put a reserve() here + + // Step C: Iterate over DetIds and invoke the pixel clusterizer algorithm + // on each DetUnit + if ( clusterMode_ == "PixelThresholdReclusterizer" ) + run(*inputClusters, geom, *output ); + else + run(*inputDigi, geom, *output ); + + // Step D: write output to file + output->shrink_to_fit(); + e.put(std::move(output)); + } -} - -//--------------------------------------------------------------------------- -//! Iterate over DetUnits, and invoke the PixelClusterizer on each. -//--------------------------------------------------------------------------- -template -void SiPixelClusterProducer::run(const T& input, - const edm::ESHandle& geom, - edmNew::DetSetVector& output) { - int numberOfDetUnits = 0; - int numberOfClusters = 0; - - // Iterate on detector units - typename T::const_iterator DSViter = input.begin(); - for (; DSViter != input.end(); DSViter++) { - ++numberOfDetUnits; - - // LogDebug takes very long time, get rid off. - //LogDebug("SiStripClusterizer") << "[SiPixelClusterProducer::run] DetID" << DSViter->id; - - std::vector badChannels; - DetId detIdObject(DSViter->detId()); - - // Comment: At the moment the clusterizer depends on geometry - // to access information as the pixel topology (number of columns - // and rows in a detector module). - // In the future the geometry service will be replaced with - // a ES service. - const GeomDetUnit* geoUnit = geom->idToDetUnit(detIdObject); - const PixelGeomDetUnit* pixDet = dynamic_cast(geoUnit); - if (!pixDet) { - // Fatal error! TO DO: throw an exception! - assert(0); + + //--------------------------------------------------------------------------- + //! Set up the specific algorithm we are going to use. + //! TO DO: in the future, we should allow for a different algorithm for + //! each detector subset (e.g. barrel vs forward, per layer, etc). + //--------------------------------------------------------------------------- + void SiPixelClusterProducer::setupClusterizer(const edm::ParameterSet& conf) { + + if ( clusterMode_ == "PixelThresholdReclusterizer" || clusterMode_ == "PixelThresholdClusterizer" ) { + clusterizer_ = new PixelThresholdClusterizer(conf); + clusterizer_->setSiPixelGainCalibrationService(theSiPixelGainCalibration_); + readyToCluster_ = true; + } + else { + edm::LogError("SiPixelClusterProducer") << "[SiPixelClusterProducer]:" + <<" choice " << clusterMode_ << " is invalid.\n" + << "Possible choices:\n" + << " PixelThresholdClusterizer"; + readyToCluster_ = false; + } + } + + + //--------------------------------------------------------------------------- + //! Iterate over DetUnits, and invoke the PixelClusterizer on each. + //--------------------------------------------------------------------------- + template + void SiPixelClusterProducer::run(const T & input, + const edm::ESHandle & geom, + edmNew::DetSetVector & output) { + if ( ! readyToCluster_ ) { + edm::LogError("SiPixelClusterProducer") + <<" at least one clusterizer is not ready -- can't run!" ; + // TO DO: throw an exception here? The user may want to know... + return; // clusterizer is invalid, bail out } - { - // Produce clusters for this DetUnit and store them in + + int numberOfDetUnits = 0; + int numberOfClusters = 0; + + // Iterate on detector units + typename T::const_iterator DSViter = input.begin(); + for( ; DSViter != input.end(); DSViter++) { + ++numberOfDetUnits; + + // LogDebug takes very long time, get rid off. + //LogDebug("SiStripClusterizer") << "[SiPixelClusterProducer::run] DetID" << DSViter->id; + + std::vector badChannels; + DetId detIdObject(DSViter->detId()); + + // Comment: At the moment the clusterizer depends on geometry + // to access information as the pixel topology (number of columns + // and rows in a detector module). + // In the future the geometry service will be replaced with + // a ES service. + const GeomDetUnit * geoUnit = geom->idToDetUnit( detIdObject ); + const PixelGeomDetUnit * pixDet = dynamic_cast(geoUnit); + if (! pixDet) { + // Fatal error! TO DO: throw an exception! + assert(0); + } + { + // Produce clusters for this DetUnit and store them in // a DetSet edmNew::DetSetVector::FastFiller spc(output, DSViter->detId()); clusterizer_->clusterizeDetUnit(*DSViter, pixDet, tTopo_, badChannels, spc); - if (spc.empty()) { + if ( spc.empty() ) { spc.abort(); } else { - numberOfClusters += spc.size(); + numberOfClusters += spc.size(); } - } // spc is not deleted and detsetvector updated - if ((maxTotalClusters_ >= 0) && (numberOfClusters > maxTotalClusters_)) { - edm::LogError("TooManyClusters") - << "Limit on the number of clusters exceeded. An empty cluster collection will be produced instead.\n"; - edmNew::DetSetVector empty; - empty.swap(output); - break; - } - } // end of DetUnit loop + } // spc is not deleted and detsetvector updated + if ((maxTotalClusters_ >= 0) && (numberOfClusters > maxTotalClusters_)) { + edm::LogError("TooManyClusters") << "Limit on the number of clusters exceeded. An empty cluster collection will be produced instead.\n"; + edmNew::DetSetVector empty; + empty.swap(output); + break; + } + } // end of DetUnit loop + + //LogDebug ("SiPixelClusterProducer") << " Executing " + // << clusterMode_ << " resulted in " << numberOfClusters + // << " SiPixelClusters in " << numberOfDetUnits << " DetUnits."; + } + + - //LogDebug ("SiPixelClusterProducer") << " Executing " - // << clusterMode_ << " resulted in " << numberOfClusters - // << " SiPixelClusters in " << numberOfDetUnits << " DetUnits."; -} #include "FWCore/PluginManager/interface/ModuleDef.h" #include "FWCore/Framework/interface/MakerMacros.h" DEFINE_FWK_MODULE(SiPixelClusterProducer); + diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h index 672a4dbe97450..df3caeb690090 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h @@ -1,18 +1,13 @@ #ifndef RecoLocalTracker_SiPixelClusterizer_plugins_gpuClustering_h #define RecoLocalTracker_SiPixelClusterizer_plugins_gpuClustering_h +#include "gpuClusteringConstants.h" + #include #include #include namespace gpuClustering { - - constexpr uint32_t MaxNumModules = 2000; - - constexpr uint32_t MaxNumPixels = 256*2000; // this does not mean maxPixelPerModule==256! - - constexpr uint16_t InvId=9999; // must be > MaxNumModules - __global__ void countModules(uint16_t const * id, uint32_t * moduleStart, int32_t * clus, diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml b/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml index 4f8efa0a80be0..a8af0c8a7c4f9 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml +++ b/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml @@ -1,8 +1,12 @@ + + + + diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h b/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h index 51cbb54dcb8e3..98a5198232591 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h +++ b/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h @@ -25,9 +25,9 @@ namespace gpuPixelRecHits { uint16_t const * y, uint16_t const * adc, uint32_t const * digiModuleStart, - uint32_t * const clusInModule, - uint32_t * const moduleId, - int32_t * const clus, + uint32_t const * clusInModule, + uint32_t const * moduleId, + int32_t const * clus, int numElements, uint32_t const * hitsModuleStart, int32_t * chargeh, diff --git a/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py b/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py index b13fd64d41565..87a0d3118554b 100644 --- a/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py +++ b/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py @@ -7,10 +7,9 @@ VerboseLevel = cms.untracked.int32(0), ) -from RecoLocalTracker.SiPixelRecHits.SiPixelRecHitsGPU_cfi import siPixelRecHits as _siPixelRecHitsGPU -gpu.toReplaceWith(siPixelRecHits, _siPixelRecHitsGPU) +from RecoLocalTracker.SiPixelRecHits.siPixelRecHitHeterogeneous_cfi import siPixelRecHitHeterogeneous as _siPixelRecHitHeterogeneous +gpu.toReplaceWith(siPixelRecHits, _siPixelRecHitHeterogeneous) siPixelRecHitsPreSplitting = siPixelRecHits.clone( src = 'siPixelClustersPreSplitting' ) -gpu.toModify(siPixelRecHitsPreSplitting, src = 'siPixelDigis') From 999ec32d4105c7d655b9e0376b88c54d9afc550a Mon Sep 17 00:00:00 2001 From: Vincenzo Innocente Date: Mon, 4 Jun 2018 14:50:37 +0200 Subject: [PATCH 025/149] Faster clustering, now does not requires to know number of modules (cms-patatrack#68) --- .../plugins/gpuClustering.h | 230 +++++++++--------- 1 file changed, 117 insertions(+), 113 deletions(-) diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h index df3caeb690090..12f205742433e 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h @@ -1,165 +1,169 @@ #ifndef RecoLocalTracker_SiPixelClusterizer_plugins_gpuClustering_h #define RecoLocalTracker_SiPixelClusterizer_plugins_gpuClustering_h -#include "gpuClusteringConstants.h" - #include #include #include +#include "gpuClusteringConstants.h" + namespace gpuClustering { + __global__ void countModules(uint16_t const * id, - uint32_t * moduleStart, - int32_t * clus, - int numElements){ - + uint32_t * moduleStart, + int32_t * clus, + int numElements) + { int i = blockDim.x * blockIdx.x + threadIdx.x; - if (i >= numElements) return; - clus[i]=i; - if (InvId==id[i]) return; - auto j=i-1; - while(j>=0 && id[j]==InvId) --j; - if(j<0 || id[j]!=id[i]) { + if (i >= numElements) + return; + clus[i] = i; + if (InvId == id[i]) + return; + auto j = i - 1; + while (j >= 0 and id[j] == InvId) + --j; + if (j < 0 or id[j] != id[i]) { // boundary... - auto loc = atomicInc(moduleStart,MaxNumModules); - moduleStart[loc+1]=i; + auto loc = atomicInc(moduleStart, MaxNumModules); + moduleStart[loc + 1] = i; } } - - + __global__ void findClus(uint16_t const * id, - uint16_t const * x, - uint16_t const * y, - uint16_t const * adc, - uint32_t const * moduleStart, - uint32_t * clusInModule, uint32_t * moduleId, - int32_t * clus, uint32_t * debug, - int numElements){ - + uint16_t const * x, + uint16_t const * y, + uint16_t const * adc, + uint32_t const * moduleStart, + uint32_t * clusInModule, uint32_t * moduleId, + int32_t * clus, uint32_t * debug, + int numElements) + { __shared__ bool go; __shared__ int nclus; + __shared__ int msize; - __shared__ int msize; + if (blockIdx.x >= moduleStart[0]) + return; - auto first = moduleStart[1 + blockIdx.x]; - + auto first = moduleStart[1 + blockIdx.x]; auto me = id[first]; - assert(me=numElements) return; - - go=true; - nclus=0; + first += threadIdx.x; + if (first>= numElements) + return; + + go = true; + nclus = 0; - msize=numElements; + msize = numElements; __syncthreads(); - for (int i=first; i=msize) return; + assert(msize<= numElements); + if (first>= msize) + return; int jmax[10]; - auto niter = (msize-first)/blockDim.x; - assert(niter<10); - for (int i=0; i1) continue; - if (std::abs(int(y[j])-int(y[i]))>1) continue; - auto old = atomicMin(&clus[j],clus[i]); - if (old!=clus[i]) go=true; - atomicMin(&clus[i],old); - jmax[k]=j+1; - } + jmax[k] = i + 1; + for (int j = js; j < jm; ++j) { + if (id[j] == InvId) // not valid + continue; + if (std::abs(int(x[j]) - int(x[i])) > 1 | + std::abs(int(y[j]) - int(y[i])) > 1) + continue; + auto old = atomicMin(&clus[j], clus[i]); + if (old != clus[i]) go = true; + atomicMin(&clus[i], old); + jmax[k] = j + 1; + } } - assert (k<=niter); - __syncthreads(); - } - - /* - // fast count (nice but not much useful) - auto laneId = threadIdx.x & 0x1f; - - for (int i=first; i=0) clus[i]=clus[clus[i]]; + for (int i = first; i < numElements; i += blockDim.x) { + if (id[i] == InvId) // not valid + continue; + if (id[i] != me) // end of module + break; + if (clus[i]>= 0) clus[i] = clus[clus[i]]; } - + __syncthreads(); - for (int i=first; i Date: Tue, 29 Dec 2020 15:09:29 +0100 Subject: [PATCH 026/149] Prototype for EventSetup data on GPUs (cms-patatrack#77) Adds a prototype for dealing with EventSetup data on GPUs. The prototype is applied to the ES data used by Raw2Cluster (cabling map etc, gains) and RecHits (CPE). Now it is the `ESProduct` who owns the GPU memory. Currently each of the affected `ESProducts` have a method `getGPUProductAsync(cuda::stream_t<>&)` that will allocate the memory on the current GPU device and transfer the data there asynchronously, if the data is not there yet. The functionality of bookkeeping which devices have the data already, and necessary synchronization between multiple threads (only one thread may do the transfer per device) are abstracted to a helper template in `HeterogeneousCore/CUDACore/interface/CUDAESProduct.h`. Technical changes: - `EventSetup`-based implementation for GPU cabling map, gains, etc - add support for multiple devices to `PixelCPEFast` - abstract the `EeventSetup` GPU transfer - move `malloc` and transfer to the lambda - move `cudaFree` outside of the `nullptr` check - move files (back) to the plusing directory - rename `siPixelDigisHeterogeneous` to `siPixelClustersHeterogeneous` --- .../SiPixelGainCalibrationForHLTGPURcd.h | 14 + .../src/SiPixelGainCalibrationForHLTGPURcd.cc | 5 + CalibTracker/SiPixelESProducers/BuildFile.xml | 21 +- .../SiPixelGainCalibrationForHLTGPU.h | 32 + .../SiPixelESProducers/plugins/BuildFile.xml | 24 +- ...PixelGainCalibrationForHLTGPUESProducer.cc | 47 ++ .../src/ES_SiPixelGainCalibrationForHLTGPU.cc | 4 + .../src/SiPixelGainCalibrationForHLTGPU.cc | 98 +++ .../StandardSequences/python/RawToDigi_cff.py | 8 +- .../SiPixelRawToDigi/plugins/BuildFile.xml | 12 +- .../python/SiPixelRawToDigi_cfi.py | 4 +- .../python/RecoLocalTracker_cff.py | 40 +- .../SiPixelClusterizer/BuildFile.xml | 8 +- .../SiPixelClusterizer/plugins/BuildFile.xml | 14 +- .../plugins/PixelClusterizerBase.h | 76 ++ .../plugins/SiPixelClusterProducer.cc | 2 +- .../plugins/SiPixelRawToClusterGPUKernel.cu | 718 ++++++++++++++++++ .../plugins/SiPixelRawToClusterGPUKernel.h | 268 +++++++ RecoLocalTracker/SiPixelRecHits/BuildFile.xml | 2 + .../SiPixelRecHits/interface/PixelCPEFast.h | 25 +- .../SiPixelRecHits/src/PixelCPEFast.cc | 37 +- 21 files changed, 1380 insertions(+), 79 deletions(-) create mode 100644 CalibTracker/Records/interface/SiPixelGainCalibrationForHLTGPURcd.h create mode 100644 CalibTracker/Records/src/SiPixelGainCalibrationForHLTGPURcd.cc create mode 100644 CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h create mode 100644 CalibTracker/SiPixelESProducers/plugins/SiPixelGainCalibrationForHLTGPUESProducer.cc create mode 100644 CalibTracker/SiPixelESProducers/src/ES_SiPixelGainCalibrationForHLTGPU.cc create mode 100644 CalibTracker/SiPixelESProducers/src/SiPixelGainCalibrationForHLTGPU.cc create mode 100644 RecoLocalTracker/SiPixelClusterizer/plugins/PixelClusterizerBase.h create mode 100644 RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu create mode 100644 RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h diff --git a/CalibTracker/Records/interface/SiPixelGainCalibrationForHLTGPURcd.h b/CalibTracker/Records/interface/SiPixelGainCalibrationForHLTGPURcd.h new file mode 100644 index 0000000000000..afb682e5d451f --- /dev/null +++ b/CalibTracker/Records/interface/SiPixelGainCalibrationForHLTGPURcd.h @@ -0,0 +1,14 @@ +#ifndef CalibTracker_Records_SiPixelGainCalibrationForHLTGPURcd_h +#define CalibTracker_Records_SiPixelGainCalibrationForHLTGPURcd_h + +#include "FWCore/Framework/interface/EventSetupRecordImplementation.h" +#include "FWCore/Framework/interface/DependentRecordImplementation.h" + +#include "CondFormats/DataRecord/interface/SiPixelGainCalibrationForHLTRcd.h" +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" + +#include "boost/mpl/vector.hpp" + +class SiPixelGainCalibrationForHLTGPURcd : public edm::eventsetup::DependentRecordImplementation > {}; + +#endif diff --git a/CalibTracker/Records/src/SiPixelGainCalibrationForHLTGPURcd.cc b/CalibTracker/Records/src/SiPixelGainCalibrationForHLTGPURcd.cc new file mode 100644 index 0000000000000..e6020eca80b1f --- /dev/null +++ b/CalibTracker/Records/src/SiPixelGainCalibrationForHLTGPURcd.cc @@ -0,0 +1,5 @@ +#include "CalibTracker/Records/interface/SiPixelGainCalibrationForHLTGPURcd.h" +#include "FWCore/Framework/interface/eventsetuprecord_registration_macro.h" +#include "FWCore/Utilities/interface/typelookup.h" + +EVENTSETUP_RECORD_REG(SiPixelGainCalibrationForHLTGPURcd); diff --git a/CalibTracker/SiPixelESProducers/BuildFile.xml b/CalibTracker/SiPixelESProducers/BuildFile.xml index 6efeef5ca0d1c..69d258da21ed1 100644 --- a/CalibTracker/SiPixelESProducers/BuildFile.xml +++ b/CalibTracker/SiPixelESProducers/BuildFile.xml @@ -1,10 +1,15 @@ - - - - - - - + + + + + + + + + + + + - + diff --git a/CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h b/CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h new file mode 100644 index 0000000000000..96989c8a2c3b2 --- /dev/null +++ b/CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h @@ -0,0 +1,32 @@ +#ifndef CalibTracker_SiPixelESProducers_SiPixelGainCalibrationForHLTGPU_H +#define CalibTracker_SiPixelESProducers_SiPixelGainCalibrationForHLTGPU_H + +#include "HeterogeneousCore/CUDACore/interface/CUDAESProduct.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLT.h" + +#include + +class SiPixelGainCalibrationForHLT; +class SiPixelGainForHLTonGPU; +struct SiPixelGainForHLTonGPU_DecodingStructure; +class TrackerGeometry; + +class SiPixelGainCalibrationForHLTGPU { +public: + explicit SiPixelGainCalibrationForHLTGPU(const SiPixelGainCalibrationForHLT& gains, const TrackerGeometry& geom); + ~SiPixelGainCalibrationForHLTGPU(); + + const SiPixelGainForHLTonGPU *getGPUProductAsync(cuda::stream_t<>& cudaStream) const; + +private: + const SiPixelGainCalibrationForHLT *gains_ = nullptr; + SiPixelGainForHLTonGPU *gainForHLTonHost_ = nullptr; + struct GPUData { + ~GPUData(); + SiPixelGainForHLTonGPU *gainForHLTonGPU = nullptr; + SiPixelGainForHLTonGPU_DecodingStructure *gainDataOnGPU = nullptr; + }; + CUDAESProduct gpuData_; +}; + +#endif diff --git a/CalibTracker/SiPixelESProducers/plugins/BuildFile.xml b/CalibTracker/SiPixelESProducers/plugins/BuildFile.xml index 5380c9d7d346b..b33657e273036 100644 --- a/CalibTracker/SiPixelESProducers/plugins/BuildFile.xml +++ b/CalibTracker/SiPixelESProducers/plugins/BuildFile.xml @@ -1,13 +1,13 @@ - - - - - - - - - - - - + + + + + + + + + + + + diff --git a/CalibTracker/SiPixelESProducers/plugins/SiPixelGainCalibrationForHLTGPUESProducer.cc b/CalibTracker/SiPixelESProducers/plugins/SiPixelGainCalibrationForHLTGPUESProducer.cc new file mode 100644 index 0000000000000..186bb2d72c3f3 --- /dev/null +++ b/CalibTracker/SiPixelESProducers/plugins/SiPixelGainCalibrationForHLTGPUESProducer.cc @@ -0,0 +1,47 @@ +#include "CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h" +#include "CalibTracker/Records/interface/SiPixelGainCalibrationForHLTGPURcd.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLT.h" +#include "CondFormats/DataRecord/interface/SiPixelGainCalibrationForHLTRcd.h" +#include "FWCore/Framework/interface/ESProducer.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/ESHandle.h" +#include "FWCore/Framework/interface/ModuleFactory.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" + +#include + +class SiPixelGainCalibrationForHLTGPUESProducer: public edm::ESProducer { +public: + explicit SiPixelGainCalibrationForHLTGPUESProducer(const edm::ParameterSet& iConfig); + std::unique_ptr produce(const SiPixelGainCalibrationForHLTGPURcd& iRecord); + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); +private: +}; + +SiPixelGainCalibrationForHLTGPUESProducer::SiPixelGainCalibrationForHLTGPUESProducer(const edm::ParameterSet& iConfig) { + setWhatProduced(this); +} + +void SiPixelGainCalibrationForHLTGPUESProducer::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + descriptions.add("siPixelGainCalibrationForHLTGPU", desc); +} + +std::unique_ptr SiPixelGainCalibrationForHLTGPUESProducer::produce(const SiPixelGainCalibrationForHLTGPURcd& iRecord) { + edm::ESHandle gains; + iRecord.getRecord().get(gains); + + edm::ESHandle geom; + iRecord.getRecord().get(geom); + + return std::make_unique(*gains, *geom); +} + +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/Utilities/interface/typelookup.h" +#include "FWCore/Framework/interface/eventsetuprecord_registration_macro.h" + +DEFINE_FWK_EVENTSETUP_MODULE(SiPixelGainCalibrationForHLTGPUESProducer); diff --git a/CalibTracker/SiPixelESProducers/src/ES_SiPixelGainCalibrationForHLTGPU.cc b/CalibTracker/SiPixelESProducers/src/ES_SiPixelGainCalibrationForHLTGPU.cc new file mode 100644 index 0000000000000..80932fb468f71 --- /dev/null +++ b/CalibTracker/SiPixelESProducers/src/ES_SiPixelGainCalibrationForHLTGPU.cc @@ -0,0 +1,4 @@ +#include "CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h" +#include "FWCore/Utilities/interface/typelookup.h" + +TYPELOOKUP_DATA_REG(SiPixelGainCalibrationForHLTGPU); diff --git a/CalibTracker/SiPixelESProducers/src/SiPixelGainCalibrationForHLTGPU.cc b/CalibTracker/SiPixelESProducers/src/SiPixelGainCalibrationForHLTGPU.cc new file mode 100644 index 0000000000000..3aef3f44c8f67 --- /dev/null +++ b/CalibTracker/SiPixelESProducers/src/SiPixelGainCalibrationForHLTGPU.cc @@ -0,0 +1,98 @@ +#include "CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLT.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h" +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" +#include "Geometry/CommonDetUnit/interface/GeomDetType.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" + +#include + +SiPixelGainCalibrationForHLTGPU::SiPixelGainCalibrationForHLTGPU(const SiPixelGainCalibrationForHLT& gains, const TrackerGeometry& geom): + gains_(&gains) +{ + // bizzarre logic (looking for fist strip-det) don't ask + auto const & dus = geom.detUnits(); + unsigned m_detectors = dus.size(); + for(unsigned int i=1;i<7;++i) { + if(geom.offsetDU(GeomDetEnumerators::tkDetEnum[i]) != dus.size() && + dus[geom.offsetDU(GeomDetEnumerators::tkDetEnum[i])]->type().isTrackerStrip()) { + if(geom.offsetDU(GeomDetEnumerators::tkDetEnum[i]) < m_detectors) m_detectors = geom.offsetDU(GeomDetEnumerators::tkDetEnum[i]); + } + } + + /* + std::cout << "caching calibs for " << m_detectors << " pixel detectors of size " << gains.data().size() << std::endl; + std::cout << "sizes " << sizeof(char) << ' ' << sizeof(uint8_t) << ' ' << sizeof(SiPixelGainForHLTonGPU::DecodingStructure) << std::endl; + */ + + cudaCheck(cudaMallocHost((void**) & gainForHLTonHost_, sizeof(SiPixelGainForHLTonGPU))); + //gainForHLTonHost_->v_pedestals = gainDataOnGPU_; // how to do this? + + // do not read back from the (possibly write-combined) memory buffer + auto minPed = gains.getPedLow(); + auto maxPed = gains.getPedHigh(); + auto minGain = gains.getGainLow(); + auto maxGain = gains.getGainHigh(); + auto nBinsToUseForEncoding = 253; + + // we will simplify later (not everything is needed....) + gainForHLTonHost_->minPed_ = minPed; + gainForHLTonHost_->maxPed_ = maxPed; + gainForHLTonHost_->minGain_= minGain; + gainForHLTonHost_->maxGain_= maxGain; + + gainForHLTonHost_->numberOfRowsAveragedOver_ = 80; + gainForHLTonHost_->nBinsToUseForEncoding_ = nBinsToUseForEncoding; + gainForHLTonHost_->deadFlag_ = 255; + gainForHLTonHost_->noisyFlag_ = 254; + + gainForHLTonHost_->pedPrecision = static_cast(maxPed - minPed) / nBinsToUseForEncoding; + gainForHLTonHost_->gainPrecision = static_cast(maxGain - minGain) / nBinsToUseForEncoding; + + /* + std::cout << "precisions g " << gainForHLTonHost_->pedPrecision << ' ' << gainForHLTonHost_->gainPrecision << std::endl; + */ + + // fill the index map + auto const & ind = gains.getIndexes(); + /* + std::cout << ind.size() << " " << m_detectors << std::endl; + */ + + for (auto i=0U; igeographicalId().rawId(),SiPixelGainCalibrationForHLT::StrictWeakOrdering()); + assert (p!=ind.end() && p->detid==dus[i]->geographicalId()); + assert(p->iend<=gains.data().size()); + assert(p->iend>=p->ibegin); + assert(0==p->ibegin%2); + assert(0==p->iend%2); + assert(p->ibegin!=p->iend); + assert(p->ncols>0); + gainForHLTonHost_->rangeAndCols[i] = std::make_pair(SiPixelGainForHLTonGPU::Range(p->ibegin,p->iend), p->ncols); + // if (ind[i].detid!=dus[i]->geographicalId()) std::cout << ind[i].detid<<"!="<geographicalId() << std::endl; + // gainForHLTonHost_->rangeAndCols[i] = std::make_pair(SiPixelGainForHLTonGPU::Range(ind[i].ibegin,ind[i].iend), ind[i].ncols); + } + +} + +SiPixelGainCalibrationForHLTGPU::~SiPixelGainCalibrationForHLTGPU() { + cudaCheck(cudaFreeHost(gainForHLTonHost_)); +} + +SiPixelGainCalibrationForHLTGPU::GPUData::~GPUData() { + cudaCheck(cudaFree(gainForHLTonGPU)); + cudaCheck(cudaFree(gainDataOnGPU)); +} + +const SiPixelGainForHLTonGPU *SiPixelGainCalibrationForHLTGPU::getGPUProductAsync(cuda::stream_t<>& cudaStream) const { + const auto& data = gpuData_.dataForCurrentDeviceAsync(cudaStream, [this](GPUData& data, cuda::stream_t<>& stream) { + cudaCheck(cudaMalloc((void**) & data.gainForHLTonGPU, sizeof(SiPixelGainForHLTonGPU))); + cudaCheck(cudaMalloc((void**) & data.gainDataOnGPU, this->gains_->data().size())); // TODO: this could be changed to cuda::memory::device::unique_ptr<> + // gains.data().data() is used also for non-GPU code, we cannot allocate it on aligned and write-combined memory + cudaCheck(cudaMemcpyAsync(data.gainDataOnGPU, this->gains_->data().data(), this->gains_->data().size(), cudaMemcpyDefault, stream.id())); + + cudaCheck(cudaMemcpyAsync(data.gainForHLTonGPU, this->gainForHLTonHost_, sizeof(SiPixelGainForHLTonGPU), cudaMemcpyDefault, stream.id())); + cudaCheck(cudaMemcpyAsync(&(data.gainForHLTonGPU->v_pedestals), &(data.gainDataOnGPU), sizeof(SiPixelGainForHLTonGPU_DecodingStructure*), cudaMemcpyDefault, stream.id())); + }); + return data.gainForHLTonGPU; +} diff --git a/Configuration/StandardSequences/python/RawToDigi_cff.py b/Configuration/StandardSequences/python/RawToDigi_cff.py index ec2e0d1d71997..605f9ea4c29bc 100644 --- a/Configuration/StandardSequences/python/RawToDigi_cff.py +++ b/Configuration/StandardSequences/python/RawToDigi_cff.py @@ -5,7 +5,6 @@ # scenarios. In this case it makes changes for Run 2. from EventFilter.SiPixelRawToDigi.SiPixelRawToDigi_cfi import * -from EventFilter.SiPixelRawToDigi.siPixelDigisHeterogeneous_cfi import * from EventFilter.SiStripRawToDigi.SiStripDigis_cfi import * @@ -62,14 +61,10 @@ ) RawToDigi = cms.Sequence(RawToDigiTask) -_RawToDigi_gpu = RawToDigi.copy() -_RawToDigi_gpu.replace(siPixelDigis, siPixelDigisHeterogeneous + siPixelDigis) -gpu.toReplaceWith(RawToDigi, _RawToDigi_gpu) - RawToDigiTask_noTk = RawToDigiTask.copyAndExclude([siPixelDigis, siStripDigis]) RawToDigi_noTk = cms.Sequence(RawToDigiTask_noTk) -RawToDigiTask_pixelOnly = cms.Task(siPixelDigisHeterogeneous + siPixelDigis) +RawToDigiTask_pixelOnly = cms.Task(siPixelDigis) RawToDigi_pixelOnly = cms.Sequence(RawToDigiTask_pixelOnly) RawToDigiTask_ecalOnly = cms.Task(ecalDigisTask, ecalPreshowerDigis, scalersRawToDigi) @@ -79,7 +74,6 @@ RawToDigi_hcalOnly = cms.Sequence(RawToDigiTask_hcalOnly) scalersRawToDigi.scalersInputTag = 'rawDataCollector' -siPixelDigisHeterogeneous.InputLabel = 'rawDataCollector' (~gpu).toModify(siPixelDigis, InputLabel = 'rawDataCollector') ecalDigis.InputLabel = 'rawDataCollector' ecalPreshowerDigis.sourceTag = 'rawDataCollector' diff --git a/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml b/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml index 0f46385a6e608..f92aa68373927 100644 --- a/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml +++ b/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml @@ -1,14 +1,4 @@ - - - - - - - - - - - + diff --git a/EventFilter/SiPixelRawToDigi/python/SiPixelRawToDigi_cfi.py b/EventFilter/SiPixelRawToDigi/python/SiPixelRawToDigi_cfi.py index 6567e35a24704..528ffa2683fa8 100644 --- a/EventFilter/SiPixelRawToDigi/python/SiPixelRawToDigi_cfi.py +++ b/EventFilter/SiPixelRawToDigi/python/SiPixelRawToDigi_cfi.py @@ -1,6 +1,6 @@ import FWCore.ParameterSet.Config as cms import EventFilter.SiPixelRawToDigi.siPixelRawToDigi_cfi -import EventFilter.SiPixelRawToDigi.siPixelDigiHeterogeneousConverter_cfi +import RecoLocalTracker.SiPixelClusterizer.siPixelDigiHeterogeneousConverter_cfi siPixelDigis = EventFilter.SiPixelRawToDigi.siPixelRawToDigi_cfi.siPixelRawToDigi.clone() siPixelDigis.Timing = cms.untracked.bool(False) @@ -22,7 +22,7 @@ from Configuration.Eras.Modifier_phase1Pixel_cff import phase1Pixel phase1Pixel.toModify(siPixelDigis, UsePhase1=True) -_siPixelDigis_gpu = EventFilter.SiPixelRawToDigi.siPixelDigiHeterogeneousConverter_cfi.siPixelDigiHeterogeneousConverter.clone() +_siPixelDigis_gpu = RecoLocalTracker.SiPixelClusterizer.siPixelDigiHeterogeneousConverter_cfi.siPixelDigiHeterogeneousConverter.clone() _siPixelDigis_gpu.includeErrors = cms.bool(True) from Configuration.ProcessModifiers.gpu_cff import gpu diff --git a/RecoLocalTracker/Configuration/python/RecoLocalTracker_cff.py b/RecoLocalTracker/Configuration/python/RecoLocalTracker_cff.py index 3cae176059b3b..b601e310db645 100644 --- a/RecoLocalTracker/Configuration/python/RecoLocalTracker_cff.py +++ b/RecoLocalTracker/Configuration/python/RecoLocalTracker_cff.py @@ -13,19 +13,37 @@ from RecoLocalTracker.SiPixelRecHits.SiPixelRecHits_cfi import * from RecoLocalTracker.SubCollectionProducers.clustersummaryproducer_cfi import * -pixeltrackerlocalrecoTask = cms.Task(siPixelClustersPreSplitting,siPixelRecHitsPreSplitting) -striptrackerlocalrecoTask = cms.Task(siStripZeroSuppression,siStripClusters,siStripMatchedRecHits) -trackerlocalrecoTask = cms.Task(pixeltrackerlocalrecoTask,striptrackerlocalrecoTask,clusterSummaryProducer) +pixeltrackerlocalreco = cms.Sequence(siPixelClustersPreSplitting*siPixelRecHitsPreSplitting) +striptrackerlocalreco = cms.Sequence(siStripZeroSuppression*siStripClusters*siStripMatchedRecHits) +trackerlocalreco = cms.Sequence(pixeltrackerlocalreco*striptrackerlocalreco*clusterSummaryProducer) + +from RecoLocalTracker.SiPixelClusterizer.siPixelClustersHeterogeneous_cfi import * +from RecoLocalTracker.SiPixelClusterizer.siPixelFedCablingMapGPUWrapper_cfi import * +from CalibTracker.SiPixelESProducers.siPixelGainCalibrationForHLTGPU_cfi import * + +from Configuration.ProcessModifiers.gpu_cff import gpu +_pixeltrackerlocalreco_gpu = pixeltrackerlocalreco.copy() +_pixeltrackerlocalreco_gpu.replace(siPixelClustersPreSplitting, siPixelClustersHeterogeneous+siPixelClustersPreSplitting) +gpu.toReplaceWith(pixeltrackerlocalreco, _pixeltrackerlocalreco_gpu) -pixeltrackerlocalreco = cms.Sequence(pixeltrackerlocalrecoTask) -striptrackerlocalreco = cms.Sequence(striptrackerlocalrecoTask) -trackerlocalreco = cms.Sequence(trackerlocalrecoTask) from RecoLocalTracker.SiPhase2Clusterizer.phase2TrackerClusterizer_cfi import * from RecoLocalTracker.Phase2TrackerRecHits.Phase2StripCPEGeometricESProducer_cfi import * -from RecoLocalTracker.SiPhase2VectorHitBuilder.siPhase2RecHitMatcher_cfi import * -_pixeltrackerlocalrecoTask_phase2 = pixeltrackerlocalrecoTask.copy() -_pixeltrackerlocalrecoTask_phase2.add(siPhase2Clusters) -phase2_tracker.toReplaceWith(pixeltrackerlocalrecoTask, _pixeltrackerlocalrecoTask_phase2) -phase2_tracker.toReplaceWith(trackerlocalrecoTask, trackerlocalrecoTask.copyAndExclude([striptrackerlocalrecoTask])) +from Configuration.Eras.Modifier_phase2_tracker_cff import phase2_tracker +phase2_tracker.toReplaceWith(pixeltrackerlocalreco, + cms.Sequence( + siPhase2Clusters + + siPixelClustersPreSplitting + + siPixelRecHitsPreSplitting + ) +) +phase2_tracker.toModify(clusterSummaryProducer, + doStrips = False, + stripClusters = '' +) +phase2_tracker.toReplaceWith(trackerlocalreco, + cms.Sequence( + pixeltrackerlocalreco*clusterSummaryProducer + ) +) diff --git a/RecoLocalTracker/SiPixelClusterizer/BuildFile.xml b/RecoLocalTracker/SiPixelClusterizer/BuildFile.xml index b79cf8d4329c4..74e76ab6ff3e2 100644 --- a/RecoLocalTracker/SiPixelClusterizer/BuildFile.xml +++ b/RecoLocalTracker/SiPixelClusterizer/BuildFile.xml @@ -1,8 +1,8 @@ - - - - + + + + diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml b/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml index ec51e90aa3065..1dc69b4dd7b73 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml @@ -1,5 +1,17 @@ + + + + - + + + + + + + + + diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/PixelClusterizerBase.h b/RecoLocalTracker/SiPixelClusterizer/plugins/PixelClusterizerBase.h new file mode 100644 index 0000000000000..0c15107816754 --- /dev/null +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/PixelClusterizerBase.h @@ -0,0 +1,76 @@ +#ifndef RecoLocalTracker_SiPixelClusterizer_PixelClusterizerBase_H +#define RecoLocalTracker_SiPixelClusterizer_PixelClusterizerBase_H + +#include "DataFormats/Common/interface/DetSetVector.h" +#include "DataFormats/Common/interface/DetSetVectorNew.h" +#include "DataFormats/SiPixelCluster/interface/SiPixelCluster.h" +#include "DataFormats/SiPixelDigi/interface/PixelDigi.h" +#include "DataFormats/TrackerCommon/interface/TrackerTopology.h" +#include "CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationServiceBase.h" +#include + +class PixelGeomDetUnit; + +/** + * Abstract interface for Pixel Clusterizers + */ +class PixelClusterizerBase { +public: + typedef edm::DetSet::const_iterator DigiIterator; + typedef edmNew::DetSet::const_iterator ClusterIterator; + + struct AccretionCluster { + typedef unsigned short UShort; + static constexpr UShort MAXSIZE = 256; + UShort adc[MAXSIZE]; + UShort x[MAXSIZE]; + UShort y[MAXSIZE]; + UShort xmin=16000; + UShort ymin=16000; + unsigned int isize=0; + unsigned int curr=0; + + // stack interface (unsafe ok for use below) + UShort top() const { return curr;} + void pop() { ++curr;} + bool empty() { return curr==isize;} + + bool add(SiPixelCluster::PixelPos const & p, UShort const iadc) { + if (isize==MAXSIZE) return false; + xmin=std::min(xmin,(unsigned short)(p.row())); + ymin=std::min(ymin,(unsigned short)(p.col())); + adc[isize]=iadc; + x[isize]=p.row(); + y[isize++]=p.col(); + return true; + } + }; + + // Virtual destructor, this is a base class. + virtual ~PixelClusterizerBase() {} + + // Build clusters in a DetUnit. Both digi and cluster stored in a DetSet + + virtual void clusterizeDetUnit( const edm::DetSet & input, + const PixelGeomDetUnit * pixDet, + const TrackerTopology* tTopo, + const std::vector& badChannels, + edmNew::DetSetVector::FastFiller& output) = 0; + + virtual void clusterizeDetUnit( const edmNew::DetSet & input, + const PixelGeomDetUnit * pixDet, + const TrackerTopology* tTopo, + const std::vector& badChannels, + edmNew::DetSetVector::FastFiller& output) = 0; + + // Configure gain calibration service + void setSiPixelGainCalibrationService( SiPixelGainCalibrationServiceBase* in){ + theSiPixelGainCalibrationService_=in; + } + + protected: + SiPixelGainCalibrationServiceBase* theSiPixelGainCalibrationService_; + +}; + +#endif diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelClusterProducer.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelClusterProducer.cc index d9da8d77031ee..45ca9be5fd6c3 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelClusterProducer.cc +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelClusterProducer.cc @@ -13,7 +13,7 @@ // Our own stuff #include "SiPixelClusterProducer.h" -#include "RecoLocalTracker/SiPixelClusterizer/interface/PixelThresholdClusterizer.h" +#include "PixelThresholdClusterizer.h" // Geometry #include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu new file mode 100644 index 0000000000000..fdf7455475197 --- /dev/null +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu @@ -0,0 +1,718 @@ +/* Sushil Dubey, Shashi Dugad, TIFR, July 2017 + * + * File Name: RawToClusterGPU.cu + * Description: It converts Raw data into Digi Format on GPU + * then it converts adc -> electron and + * applies the adc threshold to needed for clustering + * Finaly the Output of RawToDigi data is given to pixelClusterizer + * +**/ + +// C++ includes +#include +#include +#include +#include +#include +#include +#include +#include + +// CUDA includes +#include +#include +#include +#include +#include +#include +#include + +// CMSSW includes +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" +#include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h" +#include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h" +#include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelFedCablingMapGPU.h" + +// local includes +#include "SiPixelRawToClusterGPUKernel.h" + +namespace pixelgpudetails { + + SiPixelRawToClusterGPUKernel::SiPixelRawToClusterGPUKernel() { + int WSIZE = pixelgpudetails::MAX_FED * pixelgpudetails::MAX_WORD * sizeof(unsigned int); + cudaMallocHost(&word, sizeof(unsigned int)*WSIZE); + cudaMallocHost(&fedId_h, sizeof(unsigned char)*WSIZE); + + // to store the output of RawToDigi + cudaMallocHost(&pdigi_h, sizeof(uint32_t)*WSIZE); + cudaMallocHost(&rawIdArr_h, sizeof(uint32_t)*WSIZE); + + cudaMallocHost(&adc_h, sizeof(uint16_t)*WSIZE); + cudaMallocHost(&clus_h, sizeof(int32_t)*WSIZE); + + constexpr uint32_t vsize = sizeof(GPU::SimpleVector); + constexpr uint32_t esize = sizeof(pixelgpudetails::error_obj); + cudaCheck(cudaMallocHost(&error_h, vsize)); + cudaCheck(cudaMallocHost(&error_h_tmp, vsize)); + cudaCheck(cudaMallocHost(&data_h, MAX_FED*pixelgpudetails::MAX_WORD*esize)); + + new (error_h) GPU::SimpleVector(MAX_FED*pixelgpudetails::MAX_WORD, data_h); + new (error_h_tmp) GPU::SimpleVector(MAX_FED*pixelgpudetails::MAX_WORD, data_d); + assert(error_h->size() == 0); + assert(error_h->capacity() == static_cast(MAX_FED*pixelgpudetails::MAX_WORD)); + assert(error_h_tmp->size() == 0); + assert(error_h_tmp->capacity() == static_cast(MAX_FED*pixelgpudetails::MAX_WORD)); + + // allocate memory for RawToDigi on GPU + using namespace gpuClustering; + + // Number of words for all the feds + constexpr uint32_t MAX_WORD08_SIZE = MAX_FED * pixelgpudetails::MAX_WORD * sizeof(uint8_t); + constexpr uint32_t MAX_WORD32_SIZE = MAX_FED * pixelgpudetails::MAX_WORD * sizeof(uint32_t); + constexpr uint32_t MAX_WORD16_SIZE = MAX_FED * pixelgpudetails::MAX_WORD * sizeof(uint16_t); + constexpr uint32_t MAX_ERROR_SIZE = MAX_FED * pixelgpudetails::MAX_WORD * esize; + + cudaCheck(cudaMalloc((void**) & word_d, MAX_WORD32_SIZE)); + cudaCheck(cudaMalloc((void**) & fedId_d, MAX_WORD08_SIZE)); + cudaCheck(cudaMalloc((void**) & pdigi_d, MAX_WORD32_SIZE)); // to store thepacked digi + cudaCheck(cudaMalloc((void**) & xx_d, MAX_WORD16_SIZE)); // to store the x and y coordinate + cudaCheck(cudaMalloc((void**) & yy_d, MAX_WORD16_SIZE)); + cudaCheck(cudaMalloc((void**) & adc_d, MAX_WORD16_SIZE)); + + cudaCheck(cudaMalloc((void**) & moduleInd_d, MAX_WORD16_SIZE)); + cudaCheck(cudaMalloc((void**) & rawIdArr_d, MAX_WORD32_SIZE)); + cudaCheck(cudaMalloc((void**) & error_d, vsize)); + cudaCheck(cudaMalloc((void**) & data_d, MAX_ERROR_SIZE)); + + // for the clusterizer + cudaCheck(cudaMalloc((void**) & clus_d, MAX_WORD32_SIZE)); // cluser index in module + + cudaCheck(cudaMalloc((void**) & moduleStart_d, (MaxNumModules+1)*sizeof(uint32_t) )); + cudaCheck(cudaMalloc((void**) & clusInModule_d,(MaxNumModules)*sizeof(uint32_t) )); + cudaCheck(cudaMalloc((void**) & moduleId_d, (MaxNumModules)*sizeof(uint32_t) )); + + cudaCheck(cudaMalloc((void**) & debug_d, MAX_WORD32_SIZE)); + } + + + SiPixelRawToClusterGPUKernel::~SiPixelRawToClusterGPUKernel() { + // free device memory used for RawToDigi on GPU + // free the GPU memory + cudaCheck(cudaFree(word_d)); + cudaCheck(cudaFree(fedId_d)); + cudaCheck(cudaFree(pdigi_d)); + cudaCheck(cudaFree(xx_d)); + cudaCheck(cudaFree(yy_d)); + cudaCheck(cudaFree(adc_d)); + cudaCheck(cudaFree(moduleInd_d)); + cudaCheck(cudaFree(rawIdArr_d)); + cudaCheck(cudaFree(error_d)); + cudaCheck(cudaFree(data_d)); + + // these are for the clusterizer + cudaCheck(cudaFree(moduleStart_d)); + cudaCheck(cudaFree(clus_d)); + cudaCheck(cudaFree(clusInModule_d)); + cudaCheck(cudaFree(moduleId_d)); + cudaCheck(cudaFree(debug_d)); + } + + void SiPixelRawToClusterGPUKernel::initializeWordFed(int fedId, unsigned int wordCounterGPU, const cms_uint32_t *src, unsigned int length) { + std::memcpy(word+wordCounterGPU, src, sizeof(cms_uint32_t)*length); + std::memset(fedId_h+wordCounterGPU/2, fedId - 1200, length/2); + } + + + //////////////////// + + __device__ uint32_t getLink(uint32_t ww) { + return ((ww >> pixelgpudetails::LINK_shift) & pixelgpudetails::LINK_mask); + } + + + __device__ uint32_t getRoc(uint32_t ww) { + return ((ww >> pixelgpudetails::ROC_shift ) & pixelgpudetails::ROC_mask); + } + + + __device__ uint32_t getADC(uint32_t ww) { + return ((ww >> pixelgpudetails::ADC_shift) & pixelgpudetails::ADC_mask); + } + + + __device__ bool isBarrel(uint32_t rawId) { + return (1==((rawId>>25)&0x7)); + } + + + + __device__ pixelgpudetails::DetIdGPU getRawId(const SiPixelFedCablingMapGPU * Map, uint32_t fed, uint32_t link, uint32_t roc) { + uint32_t index = fed * MAX_LINK * MAX_ROC + (link-1) * MAX_ROC + roc; + pixelgpudetails::DetIdGPU detId = { Map->RawId[index], Map->rocInDet[index], Map->moduleId[index] }; + return detId; + } + + + //reference http://cmsdoxygen.web.cern.ch/cmsdoxygen/CMSSW_9_2_0/doc/html/dd/d31/FrameConversion_8cc_source.html + //http://cmslxr.fnal.gov/source/CondFormats/SiPixelObjects/src/PixelROC.cc?v=CMSSW_9_2_0#0071 + // Convert local pixel to pixelgpudetails::global pixel + __device__ pixelgpudetails::Pixel frameConversion(bool bpix, int side, uint32_t layer, uint32_t rocIdInDetUnit, pixelgpudetails::Pixel local) { + + int slopeRow = 0, slopeCol = 0; + int rowOffset = 0, colOffset = 0; + + if (bpix) { + + if (side == -1 && layer != 1) { // -Z side: 4 non-flipped modules oriented like 'dddd', except Layer 1 + if (rocIdInDetUnit < 8) { + slopeRow = 1; + slopeCol = -1; + rowOffset = 0; + colOffset = (8-rocIdInDetUnit)*pixelgpudetails::numColsInRoc-1; + } + else { + slopeRow = -1; + slopeCol = 1; + rowOffset = 2*pixelgpudetails::numRowsInRoc-1; + colOffset = (rocIdInDetUnit-8)*pixelgpudetails::numColsInRoc; + } // if roc + } + else { // +Z side: 4 non-flipped modules oriented like 'pppp', but all 8 in layer1 + if (rocIdInDetUnit < 8) { + slopeRow = -1; + slopeCol = 1; + rowOffset = 2*pixelgpudetails::numRowsInRoc-1; + colOffset = rocIdInDetUnit * pixelgpudetails::numColsInRoc; + } + else { + slopeRow = 1; + slopeCol = -1; + rowOffset = 0; + colOffset = (16-rocIdInDetUnit)*pixelgpudetails::numColsInRoc-1; + } + } + + } + else { // fpix + if (side==-1) { // pannel 1 + if (rocIdInDetUnit < 8) { + slopeRow = 1; + slopeCol = -1; + rowOffset = 0; + colOffset = (8-rocIdInDetUnit)*pixelgpudetails::numColsInRoc-1; + } + else { + slopeRow = -1; + slopeCol = 1; + rowOffset = 2*pixelgpudetails::numRowsInRoc-1; + colOffset = (rocIdInDetUnit-8)*pixelgpudetails::numColsInRoc; + } + } + else { // pannel 2 + if (rocIdInDetUnit < 8) { + slopeRow = 1; + slopeCol = -1; + rowOffset = 0; + colOffset = (8-rocIdInDetUnit)*pixelgpudetails::numColsInRoc-1; + } + else { + slopeRow = -1; + slopeCol = 1; + rowOffset = 2*pixelgpudetails::numRowsInRoc-1; + colOffset = (rocIdInDetUnit-8)*pixelgpudetails::numColsInRoc; + } + + } // side + + } + + uint32_t gRow = rowOffset+slopeRow*local.row; + uint32_t gCol = colOffset+slopeCol*local.col; + //printf("Inside frameConversion row: %u, column: %u\n",gRow, gCol); + pixelgpudetails::Pixel global = {gRow, gCol}; + return global; + } + + + __device__ uint32_t conversionError(uint32_t fedId, uint32_t status, bool debug = false) + { + + uint32_t errorType = 0; + + // debug = true; + + switch (status) { + case(1) : { + if (debug) printf("Error in Fed: %i, invalid channel Id (errorType = 35\n)", fedId ); + errorType = 35; + break; + } + case(2) : { + if (debug) printf("Error in Fed: %i, invalid ROC Id (errorType = 36)\n", fedId); + errorType = 36; + break; + } + case(3) : { + if (debug) printf("Error in Fed: %i, invalid dcol/pixel value (errorType = 37)\n", fedId); + errorType = 37; + break; + } + case(4) : { + if (debug) printf("Error in Fed: %i, dcol/pixel read out of order (errorType = 38)\n", fedId); + errorType = 38; + break; + } + default: if (debug) printf("Cabling check returned unexpected result, status = %i\n", status); + }; + + return errorType; + + } + + + __device__ bool rocRowColIsValid(uint32_t rocRow, uint32_t rocCol) + { + uint32_t numRowsInRoc = 80; + uint32_t numColsInRoc = 52; + + /// row and collumn in ROC representation + return ((rocRow < numRowsInRoc) & (rocCol < numColsInRoc)); + } + + + __device__ bool dcolIsValid(uint32_t dcol, uint32_t pxid) + { + return ((dcol < 26) & (2 <= pxid) & (pxid < 162)); + } + + + __device__ uint32_t checkROC(uint32_t errorWord, uint32_t fedId, uint32_t link, const SiPixelFedCablingMapGPU *Map, bool debug = false) + { + + int errorType = (errorWord >> pixelgpudetails::ROC_shift) & pixelgpudetails::ERROR_mask; + if (errorType < 25) return false; + bool errorFound = false; + + switch (errorType) { + case(25) : { + errorFound = true; + uint32_t index = fedId * MAX_LINK * MAX_ROC + (link-1) * MAX_ROC + 1; + if (index > 1 && index <= Map->size){ + if (!(link == Map->link[index] && 1 == Map->roc[index])) errorFound = false; + } + if (debug&errorFound) printf("Invalid ROC = 25 found (errorType = 25)\n"); + break; + } + case(26) : { + if (debug) printf("Gap word found (errorType = 26)\n"); + errorFound = true; + break; + } + case(27) : { + if (debug) printf("Dummy word found (errorType = 27)\n"); + errorFound = true; + break; + } + case(28) : { + if (debug) printf("Error fifo nearly full (errorType = 28)\n"); + errorFound = true; + break; + } + case(29) : { + if (debug) printf("Timeout on a channel (errorType = 29)\n"); + if ((errorWord >> pixelgpudetails::OMIT_ERR_shift) & pixelgpudetails::OMIT_ERR_mask) { + if (debug) printf("...first errorType=29 error, this gets masked out\n"); + } + errorFound = true; + break; + } + case(30) : { + if (debug) printf("TBM error trailer (errorType = 30)\n"); + int StateMatch_bits = 4; + int StateMatch_shift = 8; + uint32_t StateMatch_mask = ~(~uint32_t(0) << StateMatch_bits); + int StateMatch = (errorWord >> StateMatch_shift) & StateMatch_mask; + if ( StateMatch != 1 && StateMatch != 8 ) { + if (debug) printf("FED error 30 with unexpected State Bits (errorType = 30)\n"); + } + if ( StateMatch == 1 ) errorType = 40; // 1=Overflow -> 40, 8=number of ROCs -> 30 + errorFound = true; + break; + } + case(31) : { + if (debug) printf("Event number error (errorType = 31)\n"); + errorFound = true; + break; + } + default: errorFound = false; + + }; + + return errorFound? errorType : 0; + + } + + + __device__ uint32_t getErrRawID(uint32_t fedId, uint32_t errWord, uint32_t errorType, const SiPixelFedCablingMapGPU *Map, bool debug = false) + { + + uint32_t rID = 0xffffffff; + + switch (errorType) { + case 25 : case 30 : case 31 : case 36 : case 40 : { + //set dummy values for cabling just to get detId from link + //cabling.dcol = 0; + //cabling.pxid = 2; + uint32_t roc = 1; + uint32_t link = (errWord >> pixelgpudetails::LINK_shift) & pixelgpudetails::LINK_mask; + + uint32_t rID_temp = getRawId(Map, fedId, link, roc).RawId; + if(rID_temp != 9999) rID = rID_temp; + break; + } + case 29 : { + int chanNmbr = 0; + const int DB0_shift = 0; + const int DB1_shift = DB0_shift + 1; + const int DB2_shift = DB1_shift + 1; + const int DB3_shift = DB2_shift + 1; + const int DB4_shift = DB3_shift + 1; + const uint32_t DataBit_mask = ~(~uint32_t(0) << 1); + + int CH1 = (errWord >> DB0_shift) & DataBit_mask; + int CH2 = (errWord >> DB1_shift) & DataBit_mask; + int CH3 = (errWord >> DB2_shift) & DataBit_mask; + int CH4 = (errWord >> DB3_shift) & DataBit_mask; + int CH5 = (errWord >> DB4_shift) & DataBit_mask; + int BLOCK_bits = 3; + int BLOCK_shift = 8; + uint32_t BLOCK_mask = ~(~uint32_t(0) << BLOCK_bits); + int BLOCK = (errWord >> BLOCK_shift) & BLOCK_mask; + int localCH = 1*CH1+2*CH2+3*CH3+4*CH4+5*CH5; + if (BLOCK%2==0) chanNmbr=(BLOCK/2)*9+localCH; + else chanNmbr = ((BLOCK-1)/2)*9+4+localCH; + if ((chanNmbr < 1)||(chanNmbr > 36)) break; // signifies unexpected result + + // set dummy values for cabling just to get detId from link if in Barrel + //cabling.dcol = 0; + //cabling.pxid = 2; + uint32_t roc = 1; + uint32_t link = chanNmbr; + uint32_t rID_temp = getRawId(Map, fedId, link, roc).RawId; + if(rID_temp != 9999) rID = rID_temp; + break; + } + case 37 : case 38: { + //cabling.dcol = 0; + //cabling.pxid = 2; + uint32_t roc = (errWord >> pixelgpudetails::ROC_shift) & pixelgpudetails::ROC_mask; + uint32_t link = (errWord >> pixelgpudetails::LINK_shift) & pixelgpudetails::LINK_mask; + uint32_t rID_temp = getRawId(Map, fedId, link, roc).RawId; + if(rID_temp != 9999) rID = rID_temp; + break; + } + + default : break; + + }; + + return rID; + + } + + + /*---------- + * Name: applyADCthreshold_kernel() + * Desc: converts adc count to electrons and then applies the + * threshold on each channel. + * make pixel to 0 if it is below the threshold + * Input: xx_d[], yy_d[], layer_d[], wordCounter, adc[], ADCThreshold + *----------- + * Output: xx_adc[], yy_adc[] with pixel threshold applied + */ + // kernel to apply adc threshold on the channels + + + // Felice: gains and pedestals are not the same for each pixel. This code should be rewritten to take + // in account local gains/pedestals + // __global__ void applyADCthreshold_kernel(const uint32_t *xx_d, const uint32_t *yy_d, const uint32_t *layer_d, uint32_t *adc, const uint32_t wordCounter, + // const ADCThreshold adcThreshold, uint32_t *xx_adc, uint32_t *yy_adc ) { + // int tid = threadIdx.x; + // int gIndex = blockDim.x*blockIdx.x+tid; + // if (gIndex=adcThreshold.theFirstStack_) { + // if (adcThreshold.theStackADC_==1 && adcOld==1) { + // adcNew = int(255*135); // Arbitrarily use overflow value. + // } + // if (adcThreshold.theStackADC_ >1 && adcThreshold.theStackADC_!=255 && adcOld>=1){ + // adcNew = int((adcOld-1) * gain * 255/float(adcThreshold.theStackADC_-1)); + // } + // } + // + // if (adcNew >adcThreshold.thePixelThreshold ) { + // xx_adc[gIndex]=xx_d[gIndex]; + // yy_adc[gIndex]=yy_d[gIndex]; + // } + // else { + // xx_adc[gIndex]=0; // 0: dead pixel + // yy_adc[gIndex]=0; + // } + // adc[gIndex] = adcNew; + // } + // } + + + // Kernel to perform Raw to Digi conversion + __global__ void RawToDigi_kernel(const SiPixelFedCablingMapGPU *Map, const unsigned char *modToUnp, + const uint32_t wordCounter, const uint32_t *Word, const uint8_t *fedIds, + uint16_t * XX, uint16_t * YY, uint16_t * ADC, + uint32_t * pdigi, uint32_t *rawIdArr, uint16_t * moduleId, + GPU::SimpleVector *err, + bool useQualityInfo, bool includeErrors, bool debug) + { + uint32_t blockId = blockIdx.x; + uint32_t threadId = threadIdx.x; + + bool skipROC = false; + //if (threadId==0) printf("Event: %u blockId: %u start: %u end: %u\n", eventno, blockId, begin, end); + + for (int aaa=0; aaa<1; ++aaa) { // too many coninue below.... (to be fixed) + auto gIndex = threadId + blockId*blockDim.x; + if (gIndex < wordCounter) { + + uint32_t fedId = fedIds[gIndex/2]; // +1200; + + // initialize (too many coninue below) + pdigi[gIndex] = 0; + rawIdArr[gIndex] = 0; + moduleId[gIndex] = 9999; + + uint32_t ww = Word[gIndex]; // Array containing 32 bit raw data + if (ww == 0) { + //noise and dead channels are ignored + XX[gIndex] = 0; // 0 is an indicator of a noise/dead channel + YY[gIndex] = 0; // skip these pixels during clusterization + ADC[gIndex] = 0; + continue ; // 0: bad word + } + + uint32_t link = getLink(ww); // Extract link + uint32_t roc = getRoc(ww); // Extract Roc in link + pixelgpudetails::DetIdGPU detId = getRawId(Map, fedId, link, roc); + + uint32_t errorType = checkROC(ww, fedId, link, Map, debug); + skipROC = (roc < pixelgpudetails::maxROCIndex) ? false : (errorType != 0); + if (includeErrors and skipROC) + { + uint32_t rID = getErrRawID(fedId, ww, errorType, Map, debug); + err->emplace_back(rID, ww, errorType, fedId); + continue; + } + + uint32_t rawId = detId.RawId; + uint32_t rocIdInDetUnit = detId.rocInDet; + bool barrel = isBarrel(rawId); + + uint32_t index = fedId * MAX_LINK * MAX_ROC + (link-1) * MAX_ROC + roc; + if (useQualityInfo) { + + skipROC = Map->badRocs[index]; + if (skipROC) continue; + + } + skipROC = modToUnp[index]; + if (skipROC) continue; + + uint32_t layer = 0;//, ladder =0; + int side = 0, panel = 0, module = 0;//disk = 0,blade = 0 + + if (barrel) + { + layer = (rawId >> pixelgpudetails::layerStartBit) & pixelgpudetails::layerMask; + module = (rawId >> pixelgpudetails::moduleStartBit) & pixelgpudetails::moduleMask; + side = (module < 5)? -1 : 1; + } + else { + // endcap ids + layer = 0; + panel = (rawId >> pixelgpudetails::panelStartBit) & pixelgpudetails::panelMask; + //disk = (rawId >> diskStartBit_) & diskMask_ ; + side = (panel == 1)? -1 : 1; + //blade = (rawId>>bladeStartBit_) & bladeMask_; + } + + // ***special case of layer to 1 be handled here + pixelgpudetails::Pixel localPix; + if (layer == 1) { + uint32_t col = (ww >> pixelgpudetails::COL_shift) & pixelgpudetails::COL_mask; + uint32_t row = (ww >> pixelgpudetails::ROW_shift) & pixelgpudetails::ROW_mask; + localPix.row = row; + localPix.col = col; + if (includeErrors) { + if (not rocRowColIsValid(row, col)) { + uint32_t error = conversionError(fedId, 3, debug); //use the device function and fill the arrays + err->emplace_back(rawId, ww, error, fedId); + if(debug) printf("BPIX1 Error status: %i\n", error); + continue; + } + } + } else { + // ***conversion rules for dcol and pxid + uint32_t dcol = (ww >> pixelgpudetails::DCOL_shift) & pixelgpudetails::DCOL_mask; + uint32_t pxid = (ww >> pixelgpudetails::PXID_shift) & pixelgpudetails::PXID_mask; + uint32_t row = pixelgpudetails::numRowsInRoc - pxid/2; + uint32_t col = dcol*2 + pxid%2; + localPix.row = row; + localPix.col = col; + if (includeErrors and not dcolIsValid(dcol, pxid)) { + uint32_t error = conversionError(fedId, 3, debug); + err->emplace_back(rawId, ww, error, fedId); + if(debug) printf("Error status: %i %d %d %d %d\n", error, dcol, pxid, fedId, roc); + continue; + } + } + + pixelgpudetails::Pixel globalPix = frameConversion(barrel, side, layer, rocIdInDetUnit, localPix); + XX[gIndex] = globalPix.row ; // origin shifting by 1 0-159 + YY[gIndex] = globalPix.col ; // origin shifting by 1 0-415 + ADC[gIndex] = getADC(ww); + pdigi[gIndex] = pixelgpudetails::pack(globalPix.row,globalPix.col,ADC[gIndex]); + moduleId[gIndex] = detId.moduleId; + rawIdArr[gIndex] = rawId; + } // end of if (gIndex < end) + } // end fake loop + } // end of Raw to Digi kernel + + + // Interface to outside + void SiPixelRawToClusterGPUKernel::makeClustersAsync( + const SiPixelFedCablingMapGPU *cablingMap, + const unsigned char *modToUnp, + const SiPixelGainForHLTonGPU *gains, + const uint32_t wordCounter, const uint32_t fedCounter, + bool convertADCtoElectrons, + bool useQualityInfo, bool includeErrors, bool debug, + cuda::stream_t<>& stream) + { + nDigis = wordCounter; + + const int threadsPerBlock = 512; + const int blocks = (wordCounter + threadsPerBlock-1) /threadsPerBlock; // fill it all + + + assert(0 == wordCounter%2); + // wordCounter is the total no of words in each event to be trasfered on device + cudaCheck(cudaMemcpyAsync(&word_d[0], &word[0], wordCounter*sizeof(uint32_t), cudaMemcpyDefault, stream.id())); + cudaCheck(cudaMemcpyAsync(&fedId_d[0], &fedId_h[0], wordCounter*sizeof(uint8_t)/2, cudaMemcpyDefault, stream.id())); + + constexpr uint32_t vsize = sizeof(GPU::SimpleVector); + constexpr uint32_t esize = sizeof(pixelgpudetails::error_obj); + cudaCheck(cudaMemcpyAsync(error_d, error_h_tmp, vsize, cudaMemcpyDefault, stream.id())); + + // Launch rawToDigi kernel + RawToDigi_kernel<<>>( + cablingMap, + modToUnp, + wordCounter, + word_d, + fedId_d, + xx_d, yy_d, adc_d, + pdigi_d, + rawIdArr_d, + moduleInd_d, + error_d, + useQualityInfo, + includeErrors, + debug); + cudaCheck(cudaGetLastError()); + + // copy data to host variable + + cudaCheck(cudaMemcpyAsync(pdigi_h, pdigi_d, wordCounter*sizeof(uint32_t), cudaMemcpyDefault, stream.id())); + cudaCheck(cudaMemcpyAsync(rawIdArr_h, rawIdArr_d, wordCounter*sizeof(uint32_t), cudaMemcpyDefault, stream.id())); + + if (includeErrors) { + cudaCheck(cudaMemcpyAsync(error_h, error_d, vsize, cudaMemcpyDefault, stream.id())); + cudaStreamSynchronize(stream.id()); + error_h->set_data(data_h); + int size = error_h->size(); + cudaCheck(cudaMemcpyAsync(data_h, data_d, size*esize, cudaMemcpyDefault, stream.id())); + } + // End of Raw2Digi and passing data for cluserisation + + { + // clusterizer ... + using namespace gpuClustering; + int threadsPerBlock = 256; + int blocks = (wordCounter + threadsPerBlock - 1) / threadsPerBlock; + + + gpuCalibPixel::calibDigis<<>>( + moduleInd_d, + xx_d, yy_d, adc_d, + gains, + wordCounter + ); + + cudaCheck(cudaGetLastError()); + + // calibrated adc + cudaCheck(cudaMemcpyAsync(adc_h, adc_d, wordCounter*sizeof(uint32_t), cudaMemcpyDefault, stream.id())); + + /* + std::cout + << "CUDA countModules kernel launch with " << blocks + << " blocks of " << threadsPerBlock << " threads\n"; + */ + + nModulesActive = 0; + cudaCheck(cudaMemcpyAsync(moduleStart_d, &nModulesActive, sizeof(uint32_t), cudaMemcpyDefault, stream.id())); + + countModules<<>>(moduleInd_d, moduleStart_d, clus_d, wordCounter); + cudaCheck(cudaGetLastError()); + + cudaCheck(cudaMemcpyAsync(&nModulesActive, moduleStart_d, sizeof(uint32_t), cudaMemcpyDefault, stream.id())); + + // std::cout << "found " << nModulesActive << " Modules active" << std::endl; + + // TODO: I suspect we need a cudaStreamSynchronize before using nModules below + // In order to avoid the cudaStreamSynchronize, create a new kernel which launches countModules and findClus. + + threadsPerBlock = 256; + blocks = nModulesActive; + + /* + std::cout + << "CUDA findClus kernel launch with " << blocks + << " blocks of " << threadsPerBlock << " threads\n"; + */ + + cudaCheck(cudaMemsetAsync(clusInModule_d, 0, (MaxNumModules)*sizeof(uint32_t), stream.id())); + + findClus<<>>( + moduleInd_d, + xx_d, yy_d, adc_d, + moduleStart_d, + clusInModule_d, moduleId_d, + clus_d, + debug_d, + wordCounter + ); + + // clusters + cudaCheck(cudaMemcpyAsync(clus_h, clus_d, wordCounter*sizeof(uint32_t), cudaMemcpyDefault, stream.id())); + + + cudaStreamSynchronize(stream.id()); + cudaCheck(cudaGetLastError()); + + } // end clusterizer scope + + } + +} diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h new file mode 100644 index 0000000000000..ccc9e85ff3d24 --- /dev/null +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h @@ -0,0 +1,268 @@ +#ifndef RecoLocalTracker_SiPixelClusterizer_plugins_SiPixelRawToClusterGPUKernel_h +#define RecoLocalTracker_SiPixelClusterizer_plugins_SiPixelRawToClusterGPUKernel_h + +#include +#include +#include "cuda/api_wrappers.h" + +#include "FWCore/Utilities/interface/typedefs.h" +#include "HeterogeneousCore/CUDAUtilities/interface/GPUSimpleVector.h" +#include "siPixelRawToClusterHeterogeneousProduct.h" + +class SiPixelFedCablingMapGPU; +class SiPixelGainForHLTonGPU; + +namespace pixelgpudetails { + + // Phase 1 geometry constants + const uint32_t layerStartBit = 20; + const uint32_t ladderStartBit = 12; + const uint32_t moduleStartBit = 2; + + const uint32_t panelStartBit = 10; + const uint32_t diskStartBit = 18; + const uint32_t bladeStartBit = 12; + + const uint32_t layerMask = 0xF; + const uint32_t ladderMask = 0xFF; + const uint32_t moduleMask = 0x3FF; + const uint32_t panelMask = 0x3; + const uint32_t diskMask = 0xF; + const uint32_t bladeMask = 0x3F; + + const uint32_t LINK_bits = 6; + const uint32_t ROC_bits = 5; + const uint32_t DCOL_bits = 5; + const uint32_t PXID_bits = 8; + const uint32_t ADC_bits = 8; + + // special for layer 1 + const uint32_t LINK_bits_l1 = 6; + const uint32_t ROC_bits_l1 = 5; + const uint32_t COL_bits_l1 = 6; + const uint32_t ROW_bits_l1 = 7; + const uint32_t OMIT_ERR_bits = 1; + + const uint32_t maxROCIndex = 8; + const uint32_t numRowsInRoc = 80; + const uint32_t numColsInRoc = 52; + + const uint32_t MAX_WORD = 2000; + + const uint32_t ADC_shift = 0; + const uint32_t PXID_shift = ADC_shift + ADC_bits; + const uint32_t DCOL_shift = PXID_shift + PXID_bits; + const uint32_t ROC_shift = DCOL_shift + DCOL_bits; + const uint32_t LINK_shift = ROC_shift + ROC_bits_l1; + // special for layer 1 ROC + const uint32_t ROW_shift = ADC_shift + ADC_bits; + const uint32_t COL_shift = ROW_shift + ROW_bits_l1; + const uint32_t OMIT_ERR_shift = 20; + + const uint32_t LINK_mask = ~(~uint32_t(0) << LINK_bits_l1); + const uint32_t ROC_mask = ~(~uint32_t(0) << ROC_bits_l1); + const uint32_t COL_mask = ~(~uint32_t(0) << COL_bits_l1); + const uint32_t ROW_mask = ~(~uint32_t(0) << ROW_bits_l1); + const uint32_t DCOL_mask = ~(~uint32_t(0) << DCOL_bits); + const uint32_t PXID_mask = ~(~uint32_t(0) << PXID_bits); + const uint32_t ADC_mask = ~(~uint32_t(0) << ADC_bits); + const uint32_t ERROR_mask = ~(~uint32_t(0) << ROC_bits_l1); + const uint32_t OMIT_ERR_mask = ~(~uint32_t(0) << OMIT_ERR_bits); + + struct DetIdGPU { + uint32_t RawId; + uint32_t rocInDet; + uint32_t moduleId; + }; + + struct Pixel { + uint32_t row; + uint32_t col; + }; + + class Packing { + public: + using PackedDigiType = uint32_t; + + // Constructor: pre-computes masks and shifts from field widths + __host__ __device__ + inline + constexpr Packing(unsigned int row_w, unsigned int column_w, + unsigned int time_w, unsigned int adc_w) : + row_width(row_w), + column_width(column_w), + adc_width(adc_w), + row_shift(0), + column_shift(row_shift + row_w), + time_shift(column_shift + column_w), + adc_shift(time_shift + time_w), + row_mask(~(~0U << row_w)), + column_mask( ~(~0U << column_w)), + time_mask(~(~0U << time_w)), + adc_mask(~(~0U << adc_w)), + rowcol_mask(~(~0U << (column_w+row_w))), + max_row(row_mask), + max_column(column_mask), + max_adc(adc_mask) + { } + + uint32_t row_width; + uint32_t column_width; + uint32_t adc_width; + + uint32_t row_shift; + uint32_t column_shift; + uint32_t time_shift; + uint32_t adc_shift; + + PackedDigiType row_mask; + PackedDigiType column_mask; + PackedDigiType time_mask; + PackedDigiType adc_mask; + PackedDigiType rowcol_mask; + + uint32_t max_row; + uint32_t max_column; + uint32_t max_adc; + }; + + __host__ __device__ + inline + constexpr Packing packing() { + return Packing(11, 11, 0, 10); + } + + + __host__ __device__ + inline + uint32_t pack(uint32_t row, uint32_t col, uint32_t adc) { + constexpr Packing thePacking = packing(); + adc = std::min(adc, thePacking.max_adc); + + return (row << thePacking.row_shift) | + (col << thePacking.column_shift) | + (adc << thePacking.adc_shift); + } + + using error_obj = siPixelRawToClusterHeterogeneousProduct::error_obj; + + + class SiPixelRawToClusterGPUKernel { + public: + SiPixelRawToClusterGPUKernel(); + ~SiPixelRawToClusterGPUKernel(); + + + SiPixelRawToClusterGPUKernel(const SiPixelRawToClusterGPUKernel&) = delete; + SiPixelRawToClusterGPUKernel(SiPixelRawToClusterGPUKernel&&) = delete; + SiPixelRawToClusterGPUKernel& operator=(const SiPixelRawToClusterGPUKernel&) = delete; + SiPixelRawToClusterGPUKernel& operator=(SiPixelRawToClusterGPUKernel&&) = delete; + + void initializeWordFed(int fedId, unsigned int wordCounterGPU, const cms_uint32_t *src, unsigned int length); + + // Not really very async yet... + void makeClustersAsync(const SiPixelFedCablingMapGPU *cablingMap, const unsigned char *modToUnp, + const SiPixelGainForHLTonGPU *gains, + const uint32_t wordCounter, const uint32_t fedCounter, bool convertADCtoElectrons, + bool useQualityInfo, bool includeErrors, bool debug, + cuda::stream_t<>& stream); + + auto getProduct() const { + return siPixelRawToClusterHeterogeneousProduct::GPUProduct{ + pdigi_h, rawIdArr_h, clus_h, adc_h, error_h, + nDigis, nModulesActive, + xx_d, yy_d, adc_d, moduleInd_d, moduleStart_d,clus_d, clusInModule_d, moduleId_d + }; + } + + private: + // input + unsigned int *word = nullptr; // to hold input for rawtodigi + unsigned char *fedId_h = nullptr; // to hold fed index for each word + + // output + uint32_t *pdigi_h = nullptr, *rawIdArr_h = nullptr; // host copy of output + uint16_t *adc_h = nullptr; int32_t *clus_h = nullptr; // host copy of calib&clus output + pixelgpudetails::error_obj *data_h = nullptr; + GPU::SimpleVector *error_h = nullptr; + GPU::SimpleVector *error_h_tmp = nullptr; + + uint32_t nDigis = 0; + uint32_t nModulesActive = 0; + + // scratch memory buffers + uint32_t * word_d; + uint8_t * fedId_d; + uint32_t * pdigi_d; + uint16_t * xx_d; + uint16_t * yy_d; + uint16_t * adc_d; + uint16_t * moduleInd_d; + uint32_t * rawIdArr_d; + + GPU::SimpleVector * error_d; + error_obj * data_d; + + // these are for the clusterizer (to be moved) + uint32_t * moduleStart_d; + int32_t * clus_d; + uint32_t * clusInModule_d; + uint32_t * moduleId_d; + uint32_t * debug_d; + }; + + + // configuration and memory buffers alocated on the GPU + struct context { + uint32_t * word_d; + uint8_t * fedId_d; + uint32_t * pdigi_d; + uint16_t * xx_d; + uint16_t * yy_d; + uint16_t * adc_d; + uint16_t * moduleInd_d; + uint32_t * rawIdArr_d; + + GPU::SimpleVector * error_d; + error_obj * data_d; + + // these are for the clusterizer (to be moved) + uint32_t * moduleStart_d; + int32_t * clus_d; + uint32_t * clusInModule_d; + uint32_t * moduleId_d; + uint32_t * debug_d; + }; + + // wrapper function to call RawToDigi on the GPU from host side + void RawToDigi_wrapper(context &, const SiPixelFedCablingMapGPU* cablingMapDevice, + SiPixelGainForHLTonGPU * const ped, + const uint32_t wordCounter, uint32_t *word, + const uint32_t fedCounter, uint8_t *fedId_h, + bool convertADCtoElectrons, uint32_t * pdigi_h, + uint32_t *rawIdArr_h, GPU::SimpleVector *error_h, + GPU::SimpleVector *error_h_tmp, error_obj *data_h, + uint16_t * adc_h, int32_t * clus_h, + bool useQualityInfo, bool includeErrors, bool debug, + uint32_t & nModulesActive); + + // void initCablingMap(); + context initDeviceMemory(); + void freeMemory(context &); + + // see RecoLocalTracker/SiPixelClusterizer + // all are runtime const, should be specified in python _cfg.py + struct ADCThreshold { + const int thePixelThreshold = 1000; // default Pixel threshold in electrons + const int theSeedThreshold = 1000; // seed thershold in electrons not used in our algo + const float theClusterThreshold = 4000; // cluster threshold in electron + const int ConversionFactor = 65; // adc to electron conversion factor + + const int theStackADC_ = 255; // the maximum adc count for stack layer + const int theFirstStack_ = 5; // the index of the fits stack layer + const double theElectronPerADCGain_ = 600; // ADC to electron conversion + }; + +} + +#endif // RecoLocalTracker_SiPixelClusterizer_plugins_SiPixelRawToClusterGPUKernel_h diff --git a/RecoLocalTracker/SiPixelRecHits/BuildFile.xml b/RecoLocalTracker/SiPixelRecHits/BuildFile.xml index b9d484b351407..ede4584176e59 100644 --- a/RecoLocalTracker/SiPixelRecHits/BuildFile.xml +++ b/RecoLocalTracker/SiPixelRecHits/BuildFile.xml @@ -12,6 +12,8 @@ + + diff --git a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h index a283cb1fa74ce..0066361483318 100644 --- a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h +++ b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h @@ -2,15 +2,17 @@ #define RecoLocalTracker_SiPixelRecHits_PixelCPEFast_h #include -#include #include "CalibTracker/SiPixelESProducers/interface/SiPixelCPEGenericDBErrorParametrization.h" +#include "HeterogeneousCore/CUDACore/interface/CUDAESProduct.h" #include "HeterogeneousCore/CUDAUtilities/interface/CUDAHostAllocator.h" #include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h" #include "RecoLocalTracker/SiPixelRecHits/interface/SiPixelGenError.h" #include "RecoLocalTracker/SiPixelRecHits/interface/SiPixelTemplate.h" #include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h" +#include + class MagneticField; class PixelCPEFast final : public PixelCPEBase { @@ -41,6 +43,10 @@ class PixelCPEFast final : public PixelCPEBase ~PixelCPEFast(); + // The return value can only be used safely in kernels launched on + // the same cudaStream, or after cudaStreamSynchronize. + const pixelCPEforGPU::ParamsOnGPU *getGPUProductAsync(cuda::stream_t<>& cudaStream) const; + private: ClusterParam * createClusterParam(const SiPixelCluster & cl) const override; @@ -71,15 +77,18 @@ class PixelCPEFast final : public PixelCPEBase //--- DB Error Parametrization object, new light templates std::vector< SiPixelGenErrorStore > thePixelGenError_; - -public : - void fillParamsForGpu(); - - // not needed if not used on CPU... std::vector> m_detParamsGPU; pixelCPEforGPU::CommonParams m_commonParamsGPU; - pixelCPEforGPU::ParamsOnGPU h_paramsOnGPU; - pixelCPEforGPU::ParamsOnGPU * d_paramsOnGPU; // copy of the above on the Device + + struct GPUData { + ~GPUData(); + // not needed if not used on CPU... + pixelCPEforGPU::ParamsOnGPU h_paramsOnGPU; + pixelCPEforGPU::ParamsOnGPU * d_paramsOnGPU = nullptr; // copy of the above on the Device + }; + CUDAESProduct gpuData_; + + void fillParamsForGpu(); }; #endif // RecoLocalTracker_SiPixelRecHits_PixelCPEFast_h diff --git a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc index f40fb759b1557..e4f588aab14f9 100644 --- a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc +++ b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc @@ -18,6 +18,7 @@ #include #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" +#include "HeterogeneousCore/CUDAServices/interface/numberOfCUDADevices.h" #include @@ -69,6 +70,20 @@ PixelCPEFast::PixelCPEFast(edm::ParameterSet const & conf, fillParamsForGpu(); } +const pixelCPEforGPU::ParamsOnGPU *PixelCPEFast::getGPUProductAsync(cuda::stream_t<>& cudaStream) const { + const auto& data = gpuData_.dataForCurrentDeviceAsync(cudaStream, [this](GPUData& data, cuda::stream_t<>& stream) { + // and now copy to device... + cudaCheck(cudaMalloc((void**) & data.h_paramsOnGPU.m_commonParams, sizeof(pixelCPEforGPU::CommonParams))); + cudaCheck(cudaMalloc((void**) & data.h_paramsOnGPU.m_detParams, this->m_detParamsGPU.size()*sizeof(pixelCPEforGPU::DetParams))); + cudaCheck(cudaMalloc((void**) & data.d_paramsOnGPU, sizeof(pixelCPEforGPU::ParamsOnGPU))); + + cudaCheck(cudaMemcpyAsync(data.d_paramsOnGPU, &data.h_paramsOnGPU, sizeof(pixelCPEforGPU::ParamsOnGPU), cudaMemcpyDefault, stream.id())); + cudaCheck(cudaMemcpyAsync(data.h_paramsOnGPU.m_commonParams, &this->m_commonParamsGPU, sizeof(pixelCPEforGPU::CommonParams), cudaMemcpyDefault, stream.id())); + cudaCheck(cudaMemcpyAsync(data.h_paramsOnGPU.m_detParams, this->m_detParamsGPU.data(), this->m_detParamsGPU.size()*sizeof(pixelCPEforGPU::DetParams), cudaMemcpyDefault, stream.id())); + }); + return data.d_paramsOnGPU; +} + void PixelCPEFast::fillParamsForGpu() { m_commonParamsGPU.theThicknessB = m_DetParams.front().theThickness; m_commonParamsGPU.theThicknessE = m_DetParams.back().theThickness; @@ -115,22 +130,16 @@ void PixelCPEFast::fillParamsForGpu() { auto rr = pixelCPEforGPU::Rotation(p.theDet->surface().rotation()); g.frame = pixelCPEforGPU::Frame(vv.x(),vv.y(),vv.z(),rr); } - - // and now copy to device... - cudaCheck(cudaMalloc((void**) & h_paramsOnGPU.m_commonParams, sizeof(pixelCPEforGPU::CommonParams))); - cudaCheck(cudaMalloc((void**) & h_paramsOnGPU.m_detParams, m_detParamsGPU.size()*sizeof(pixelCPEforGPU::DetParams))); - cudaCheck(cudaMalloc((void**) & d_paramsOnGPU, sizeof(pixelCPEforGPU::ParamsOnGPU))); - - cudaCheck(cudaMemcpy(d_paramsOnGPU, &h_paramsOnGPU, sizeof(pixelCPEforGPU::ParamsOnGPU), cudaMemcpyDefault)); - cudaCheck(cudaMemcpy(h_paramsOnGPU.m_commonParams, &m_commonParamsGPU, sizeof(pixelCPEforGPU::CommonParams), cudaMemcpyDefault)); - cudaCheck(cudaMemcpy(h_paramsOnGPU.m_detParams, m_detParamsGPU.data(), m_detParamsGPU.size()*sizeof(pixelCPEforGPU::DetParams), cudaMemcpyDefault)); - cudaDeviceSynchronize(); } -PixelCPEFast::~PixelCPEFast() { - cudaFree(h_paramsOnGPU.m_commonParams); - cudaFree(h_paramsOnGPU.m_detParams); - cudaFree(d_paramsOnGPU); +PixelCPEFast::~PixelCPEFast() {} + +PixelCPEFast::GPUData::~GPUData() { + if(d_paramsOnGPU != nullptr) { + cudaFree(h_paramsOnGPU.m_commonParams); + cudaFree(h_paramsOnGPU.m_detParams); + cudaFree(d_paramsOnGPU); + } } PixelCPEBase::ClusterParam* PixelCPEFast::createClusterParam(const SiPixelCluster & cl) const From 7501972a56ac28835c6762fd653ae86f1193dd4d Mon Sep 17 00:00:00 2001 From: Vincenzo Innocente Date: Fri, 29 Jun 2018 10:21:35 +0200 Subject: [PATCH 027/149] Migrated PixelRecHit to Heterogeneous producer (cms-patatrack#81) Migrate PixelRecHit EDProducer to HeterogeneousEDProducer, including the cpu product. Data structures on gpu now include everything needed for Doublets, CA and fit. Layer splitting done: phi sorting (or partial sorting) requires #69. Includes some cleanup and bug fixes. --- .../interface/phase1PixelTopology.h | 9 ++++ .../python/RecoLocalTracker_cff.py | 9 +++- .../SiPixelRecHits/plugins/gpuPixelRecHits.h | 45 +++++++++++++------ .../python/SiPixelRecHits_cfi.py | 3 -- 4 files changed, 48 insertions(+), 18 deletions(-) diff --git a/Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h b/Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h index 455de58ce3408..37c97a92a3eaa 100644 --- a/Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h +++ b/Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h @@ -20,6 +20,15 @@ namespace phase1PixelTopology { constexpr uint32_t numPixsInModule = uint32_t(numRowsInModule)* uint32_t(numColsInModule); + constexpr uint32_t numberOfModules = 1856; + + constexpr uint32_t layerStart[11] = {0,96,320,672,1184,1296,1408,1520,1632,1744,1856}; + constexpr char const * layerName[10] = {"BL1","BL2","BL3","BL4", + "E+1", "E+2", "E+3", + "E-1", "E-2", "E-3" + }; + + // this is for the ROC n<512 (upgrade 1024) constexpr inline uint16_t divu52(uint16_t n) { diff --git a/RecoLocalTracker/Configuration/python/RecoLocalTracker_cff.py b/RecoLocalTracker/Configuration/python/RecoLocalTracker_cff.py index b601e310db645..6d803d40bc870 100644 --- a/RecoLocalTracker/Configuration/python/RecoLocalTracker_cff.py +++ b/RecoLocalTracker/Configuration/python/RecoLocalTracker_cff.py @@ -17,16 +17,23 @@ striptrackerlocalreco = cms.Sequence(siStripZeroSuppression*siStripClusters*siStripMatchedRecHits) trackerlocalreco = cms.Sequence(pixeltrackerlocalreco*striptrackerlocalreco*clusterSummaryProducer) + from RecoLocalTracker.SiPixelClusterizer.siPixelClustersHeterogeneous_cfi import * from RecoLocalTracker.SiPixelClusterizer.siPixelFedCablingMapGPUWrapper_cfi import * from CalibTracker.SiPixelESProducers.siPixelGainCalibrationForHLTGPU_cfi import * +from RecoLocalTracker.SiPixelRecHits.siPixelRecHitHeterogeneous_cfi import * +from RecoLocalTracker.SiPixelRecHits.siPixelRecHitHeterogeneousConverter_cfi import siPixelRecHitHeterogeneousConverter as _siPixelRecHitHeterogeneousConverter +gpu.toReplaceWith(siPixelRecHitsPreSplitting, _siPixelRecHitHeterogeneousConverter.clone()) + + + from Configuration.ProcessModifiers.gpu_cff import gpu _pixeltrackerlocalreco_gpu = pixeltrackerlocalreco.copy() _pixeltrackerlocalreco_gpu.replace(siPixelClustersPreSplitting, siPixelClustersHeterogeneous+siPixelClustersPreSplitting) +_pixeltrackerlocalreco_gpu.replace(siPixelRecHitsPreSplitting, siPixelRecHitHeterogeneous+siPixelRecHitsPreSplitting) gpu.toReplaceWith(pixeltrackerlocalreco, _pixeltrackerlocalreco_gpu) - from RecoLocalTracker.SiPhase2Clusterizer.phase2TrackerClusterizer_cfi import * from RecoLocalTracker.Phase2TrackerRecHits.Phase2StripCPEGeometricESProducer_cfi import * diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h b/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h index 98a5198232591..c0e7841658e93 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h +++ b/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h @@ -6,10 +6,14 @@ #include #include +#include "DataFormats/Math/interface/approx_atan2.h" #include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h" namespace gpuPixelRecHits { + + + // to be moved in common namespace... constexpr uint16_t InvId=9999; // must be > MaxNumModules @@ -20,6 +24,7 @@ namespace gpuPixelRecHits { __global__ void getHits(pixelCPEforGPU::ParamsOnGPU const * cpeParams, + float const * bs, uint16_t const * id, uint16_t const * x, uint16_t const * y, @@ -27,13 +32,15 @@ namespace gpuPixelRecHits { uint32_t const * digiModuleStart, uint32_t const * clusInModule, uint32_t const * moduleId, - int32_t const * clus, + int32_t const * clus, int numElements, uint32_t const * hitsModuleStart, int32_t * chargeh, - float * xh, float * yh, float * zh, - float * xe, float * ye, uint16_t * mr, - bool local) // if true fill just x & y in local coord + uint16_t * detInd, + float * xg, float * yg, float * zg, float * rg, int16_t * iph, + float * xl, float * yl, + float * xe, float * ye, + uint16_t * mr, uint16_t * mc) { // as usual one block per module __shared__ ClusParams clusParams; @@ -108,16 +115,26 @@ namespace gpuPixelRecHits { chargeh[h] = clusParams.charge[ic]; - if (local) { - xh[h] = clusParams.xpos[ic]; - yh[h] = clusParams.ypos[ic]; - } else { - cpeParams->detParams(me).frame.toGlobal(clusParams.xpos[ic], clusParams.ypos[ic], - xh[h], yh[h], zh[h] ); - } - xe[h] = clusParams.xerr[ic]; - ye[h] = clusParams.yerr[ic]; - mr[h] = clusParams.minRow[ic]; + detInd[h] = me; + + xl[h]= clusParams.xpos[ic]; + yl[h]= clusParams.ypos[ic]; + + xe[h]= clusParams.xerr[ic]; + ye[h]= clusParams.yerr[ic]; + mr[h]= clusParams.minRow[ic]; + mc[h]= clusParams.minCol[ic]; + + // to global and compute phi... + cpeParams->detParams(me).frame.toGlobal(xl[h],yl[h], xg[h],yg[h],zg[h]); + // here correct for the beamspot... + xg[h]-=bs[0]; + yg[h]-=bs[1]; + zg[h]-=bs[2]; + + rg[h] = std::sqrt(xg[h]*xg[h]+yg[h]*yg[h]); + iph[h] = unsafe_atan2s<7>(yg[h],xg[h]); + } } diff --git a/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py b/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py index 87a0d3118554b..5844235e29596 100644 --- a/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py +++ b/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py @@ -7,9 +7,6 @@ VerboseLevel = cms.untracked.int32(0), ) -from RecoLocalTracker.SiPixelRecHits.siPixelRecHitHeterogeneous_cfi import siPixelRecHitHeterogeneous as _siPixelRecHitHeterogeneous -gpu.toReplaceWith(siPixelRecHits, _siPixelRecHitHeterogeneous) - siPixelRecHitsPreSplitting = siPixelRecHits.clone( src = 'siPixelClustersPreSplitting' ) From 59c32aae1231aacf968e4cbc97c0546bb01ee1cd Mon Sep 17 00:00:00 2001 From: Matti Kortelainen Date: Fri, 29 Jun 2018 20:34:48 +0200 Subject: [PATCH 028/149] Various fixes and cleanup (cms-patatrack#87) - replace `exclusive_scan` with `memset` + `inclusive_scan` to avoid an invalid read - fix memory sizes in allocations and copies - add a missing stream synchronize - set `recordWatcherUpdatedSinceLastTransfer_` to avoid spurious copies --- .../plugins/SiPixelRawToClusterGPUKernel.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu index fdf7455475197..991e82ccd2491 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu @@ -39,7 +39,7 @@ namespace pixelgpudetails { SiPixelRawToClusterGPUKernel::SiPixelRawToClusterGPUKernel() { - int WSIZE = pixelgpudetails::MAX_FED * pixelgpudetails::MAX_WORD * sizeof(unsigned int); + int WSIZE = pixelgpudetails::MAX_FED * pixelgpudetails::MAX_WORD; cudaMallocHost(&word, sizeof(unsigned int)*WSIZE); cudaMallocHost(&fedId_h, sizeof(unsigned char)*WSIZE); @@ -680,8 +680,8 @@ namespace pixelgpudetails { // std::cout << "found " << nModulesActive << " Modules active" << std::endl; - // TODO: I suspect we need a cudaStreamSynchronize before using nModules below // In order to avoid the cudaStreamSynchronize, create a new kernel which launches countModules and findClus. + cudaStreamSynchronize(stream.id()); threadsPerBlock = 256; blocks = nModulesActive; From bfbbaac382385e06e454d3ba8c031f71cf3afec7 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Wed, 4 Jul 2018 23:47:58 +0200 Subject: [PATCH 029/149] Synchronise with CMSSW_10_2_0_pre6 --- RecoLocalTracker/SiPixelRecHits/BuildFile.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/RecoLocalTracker/SiPixelRecHits/BuildFile.xml b/RecoLocalTracker/SiPixelRecHits/BuildFile.xml index ede4584176e59..c52545a601341 100644 --- a/RecoLocalTracker/SiPixelRecHits/BuildFile.xml +++ b/RecoLocalTracker/SiPixelRecHits/BuildFile.xml @@ -8,6 +8,7 @@ + From c44ba3eb45d001ba0c53cd55868a23c22205c65a Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Sun, 8 Jul 2018 12:16:12 +0200 Subject: [PATCH 030/149] Update the PixelCPEFast code following the reorganisation in #23571 --- .../SiPixelRecHits/interface/PixelCPEFast.h | 22 +++---- .../SiPixelRecHits/src/PixelCPEFast.cc | 66 +++++++++---------- 2 files changed, 42 insertions(+), 46 deletions(-) diff --git a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h index 0066361483318..35a3e6bf2a82c 100644 --- a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h +++ b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h @@ -3,16 +3,16 @@ #include +#include + #include "CalibTracker/SiPixelESProducers/interface/SiPixelCPEGenericDBErrorParametrization.h" +#include "CondFormats/SiPixelTransient/interface/SiPixelGenError.h" +#include "CondFormats/SiPixelTransient/interface/SiPixelTemplate.h" #include "HeterogeneousCore/CUDACore/interface/CUDAESProduct.h" #include "HeterogeneousCore/CUDAUtilities/interface/CUDAHostAllocator.h" #include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h" -#include "RecoLocalTracker/SiPixelRecHits/interface/SiPixelGenError.h" -#include "RecoLocalTracker/SiPixelRecHits/interface/SiPixelTemplate.h" #include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h" -#include - class MagneticField; class PixelCPEFast final : public PixelCPEBase { @@ -29,10 +29,10 @@ class PixelCPEFast final : public PixelCPEBase // These are errors predicted by PIXELAV float sigmay; // CPE Generic y-error for multi-pixel cluster float sigmax; // CPE Generic x-error for multi-pixel cluster - float sy1 ; // CPE Generic y-error for single single-pixel - float sy2 ; // CPE Generic y-error for single double-pixel cluster - float sx1 ; // CPE Generic x-error for single single-pixel cluster - float sx2 ; // CPE Generic x-error for single double-pixel cluster + float sy1; // CPE Generic y-error for single single-pixel + float sy2; // CPE Generic y-error for single double-pixel cluster + float sx1; // CPE Generic x-error for single single-pixel cluster + float sx2; // CPE Generic x-error for single double-pixel cluster }; @@ -69,9 +69,9 @@ class PixelCPEFast final : public PixelCPEBase float EdgeClusterErrorX_; float EdgeClusterErrorY_; - std::vector xerr_barrel_l1_,yerr_barrel_l1_,xerr_barrel_ln_; - std::vector yerr_barrel_ln_,xerr_endcap_,yerr_endcap_; - float xerr_barrel_l1_def_, yerr_barrel_l1_def_,xerr_barrel_ln_def_; + std::vector xerr_barrel_l1_, yerr_barrel_l1_, xerr_barrel_ln_; + std::vector yerr_barrel_ln_, xerr_endcap_, yerr_endcap_; + float xerr_barrel_l1_def_, yerr_barrel_l1_def_, xerr_barrel_ln_def_; float yerr_barrel_ln_def_, xerr_endcap_def_, yerr_endcap_def_; //--- DB Error Parametrization object, new light templates diff --git a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc index e4f588aab14f9..af7dd7337084e 100644 --- a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc +++ b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc @@ -1,26 +1,22 @@ -#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h" - -#include "Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h" +#include -#include "Geometry/TrackerGeometryBuilder/interface/PixelGeomDetUnit.h" -#include "Geometry/TrackerGeometryBuilder/interface/RectangularPixelTopology.h" +#include +#include -// this is needed to get errors from templates -#include "RecoLocalTracker/SiPixelRecHits/interface/SiPixelTemplate.h" +#include "CondFormats/SiPixelTransient/interface/SiPixelTemplate.h" #include "DataFormats/DetId/interface/DetId.h" - - -// Services #include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "Geometry/TrackerGeometryBuilder/interface/PixelGeomDetUnit.h" +#include "Geometry/TrackerGeometryBuilder/interface/RectangularPixelTopology.h" +#include "Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h" +#include "HeterogeneousCore/CUDAServices/interface/numberOfCUDADevices.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" #include "MagneticField/Engine/interface/MagneticField.h" -#include -#include - -#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" -#include "HeterogeneousCore/CUDAServices/interface/numberOfCUDADevices.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h" -#include +// Services +// this is needed to get errors from templates namespace { constexpr float micronsToCm = 1.0e-4; @@ -54,18 +50,18 @@ PixelCPEFast::PixelCPEFast(edm::ParameterSet const & conf, // Rechit errors in case other, more correct, errors are not vailable // This are constants. Maybe there is a more efficienct way to store them. - xerr_barrel_l1_= {0.00115, 0.00120, 0.00088}; - xerr_barrel_l1_def_=0.01030; - yerr_barrel_l1_= {0.00375,0.00230,0.00250,0.00250,0.00230,0.00230,0.00210,0.00210,0.00240}; - yerr_barrel_l1_def_=0.00210; - xerr_barrel_ln_= {0.00115, 0.00120, 0.00088}; - xerr_barrel_ln_def_=0.01030; - yerr_barrel_ln_= {0.00375,0.00230,0.00250,0.00250,0.00230,0.00230,0.00210,0.00210,0.00240}; - yerr_barrel_ln_def_=0.00210; - xerr_endcap_= {0.0020, 0.0020}; - xerr_endcap_def_=0.0020; - yerr_endcap_= {0.00210}; - yerr_endcap_def_=0.00075; + xerr_barrel_l1_ = { 0.00115, 0.00120, 0.00088 }; + xerr_barrel_l1_def_ = 0.01030; + yerr_barrel_l1_ = { 0.00375, 0.00230, 0.00250, 0.00250, 0.00230, 0.00230, 0.00210, 0.00210, 0.00240 }; + yerr_barrel_l1_def_ = 0.00210; + xerr_barrel_ln_ = { 0.00115, 0.00120, 0.00088}; + xerr_barrel_ln_def_ = 0.01030; + yerr_barrel_ln_ = { 0.00375, 0.00230, 0.00250, 0.00250, 0.00230, 0.00230, 0.00210, 0.00210, 0.00240 }; + yerr_barrel_ln_def_ = 0.00210; + xerr_endcap_ = { 0.0020, 0.0020 }; + xerr_endcap_def_ = 0.0020; + yerr_endcap_ = { 0.00210 }; + yerr_endcap_def_ = 0.00075; fillParamsForGpu(); } @@ -90,7 +86,7 @@ void PixelCPEFast::fillParamsForGpu() { m_commonParamsGPU.thePitchX = m_DetParams[0].thePitchX; m_commonParamsGPU.thePitchY = m_DetParams[0].thePitchY; - uint32_t oldLayer = 0; + //uint32_t oldLayer = 0; m_detParamsGPU.resize(m_DetParams.size()); for (auto i=0U; iindex()==int(i)); assert(m_commonParamsGPU.thePitchY==p.thePitchY); assert(m_commonParamsGPU.thePitchX==p.thePitchX); - // assert(m_commonParamsGPU.theThickness==p.theThickness); + //assert(m_commonParamsGPU.theThickness==p.theThickness); g.isBarrel = GeomDetEnumerators::isBarrel(p.thePart); g.isPosZ = p.theDet->surface().position().z()>0; @@ -109,13 +105,13 @@ void PixelCPEFast::fillParamsForGpu() { assert( (g.isBarrel ?m_commonParamsGPU.theThicknessB : m_commonParamsGPU.theThicknessE) ==p.theThickness ); - // if (m_commonParamsGPU.theThickness!=p.theThickness) + //if (m_commonParamsGPU.theThickness!=p.theThickness) // std::cout << i << (g.isBarrel ? "B " : "E ") << m_commonParamsGPU.theThickness<<"!="< Date: Tue, 24 Jul 2018 12:56:38 +0200 Subject: [PATCH 031/149] Various fixes and cleanup (cms-patatrack#101) Fix errors found by cuda-memcheck: - properly initialise device memory - fix various cudaMemcpy calls Remove unused debug variables and function declarations, and #ifdef some debug printouts. Call cudaDeviceReset() before exiting, via the destructor of CUDAService. This explicitly destroys and cleans up all resources associated with the current device, and is useful to check for memory leaks with cuda-memcheck --tool memcheck --leak-check full. --- .../plugins/SiPixelRawToClusterGPUKernel.cu | 8 +------- .../plugins/SiPixelRawToClusterGPUKernel.h | 14 -------------- .../SiPixelClusterizer/plugins/gpuClustering.h | 3 +-- 3 files changed, 2 insertions(+), 23 deletions(-) diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu index 991e82ccd2491..65d3fc3626137 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu @@ -90,10 +90,7 @@ namespace pixelgpudetails { cudaCheck(cudaMalloc((void**) & moduleStart_d, (MaxNumModules+1)*sizeof(uint32_t) )); cudaCheck(cudaMalloc((void**) & clusInModule_d,(MaxNumModules)*sizeof(uint32_t) )); cudaCheck(cudaMalloc((void**) & moduleId_d, (MaxNumModules)*sizeof(uint32_t) )); - - cudaCheck(cudaMalloc((void**) & debug_d, MAX_WORD32_SIZE)); } - SiPixelRawToClusterGPUKernel::~SiPixelRawToClusterGPUKernel() { // free device memory used for RawToDigi on GPU @@ -114,7 +111,6 @@ namespace pixelgpudetails { cudaCheck(cudaFree(clus_d)); cudaCheck(cudaFree(clusInModule_d)); cudaCheck(cudaFree(moduleId_d)); - cudaCheck(cudaFree(debug_d)); } void SiPixelRawToClusterGPUKernel::initializeWordFed(int fedId, unsigned int wordCounterGPU, const cms_uint32_t *src, unsigned int length) { @@ -662,7 +658,7 @@ namespace pixelgpudetails { cudaCheck(cudaGetLastError()); // calibrated adc - cudaCheck(cudaMemcpyAsync(adc_h, adc_d, wordCounter*sizeof(uint32_t), cudaMemcpyDefault, stream.id())); + cudaCheck(cudaMemcpyAsync(adc_h, adc_d, wordCounter*sizeof(uint16_t), cudaMemcpyDefault, stream.id())); /* std::cout @@ -700,14 +696,12 @@ namespace pixelgpudetails { moduleStart_d, clusInModule_d, moduleId_d, clus_d, - debug_d, wordCounter ); // clusters cudaCheck(cudaMemcpyAsync(clus_h, clus_d, wordCounter*sizeof(uint32_t), cudaMemcpyDefault, stream.id())); - cudaStreamSynchronize(stream.id()); cudaCheck(cudaGetLastError()); diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h index ccc9e85ff3d24..2b0b205c9f536 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h @@ -208,9 +208,7 @@ namespace pixelgpudetails { int32_t * clus_d; uint32_t * clusInModule_d; uint32_t * moduleId_d; - uint32_t * debug_d; }; - // configuration and memory buffers alocated on the GPU struct context { @@ -234,18 +232,6 @@ namespace pixelgpudetails { uint32_t * debug_d; }; - // wrapper function to call RawToDigi on the GPU from host side - void RawToDigi_wrapper(context &, const SiPixelFedCablingMapGPU* cablingMapDevice, - SiPixelGainForHLTonGPU * const ped, - const uint32_t wordCounter, uint32_t *word, - const uint32_t fedCounter, uint8_t *fedId_h, - bool convertADCtoElectrons, uint32_t * pdigi_h, - uint32_t *rawIdArr_h, GPU::SimpleVector *error_h, - GPU::SimpleVector *error_h_tmp, error_obj *data_h, - uint16_t * adc_h, int32_t * clus_h, - bool useQualityInfo, bool includeErrors, bool debug, - uint32_t & nModulesActive); - // void initCablingMap(); context initDeviceMemory(); void freeMemory(context &); diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h index 12f205742433e..60be134a4ee46 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h @@ -36,7 +36,7 @@ namespace gpuClustering { uint16_t const * adc, uint32_t const * moduleStart, uint32_t * clusInModule, uint32_t * moduleId, - int32_t * clus, uint32_t * debug, + int32_t * clus, int numElements) { __shared__ bool go; @@ -98,7 +98,6 @@ namespace gpuClustering { if (id[i] == InvId) // not valid continue; assert(id[i] == me); // break; // end of module - ++debug[i]; auto js = i + 1; auto jm = jmax[k]; jmax[k] = i + 1; From 4dc680151b3db6e5429032e21f85e15072957eb2 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Wed, 25 Jul 2018 22:10:18 +0200 Subject: [PATCH 032/149] Fix synchronisation problems in the clusterizer (cms-patatrack#102) Fix the use of `__syncthreads()` in the `calibDigis` and `findClus` kernels (possibly fixes #84). Improve `SiPixelRawToClusterGPUKernel::makeClustersAsync()` to avoid calls to `cudaStreamSynchronize` (fixes #66). Improve the documentation. --- .../plugins/SiPixelRawToClusterGPUKernel.cu | 371 ++++++++---------- .../plugins/gpuCalibPixel.h | 3 - .../plugins/gpuClustering.h | 211 +++++----- 3 files changed, 285 insertions(+), 300 deletions(-) diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu index 65d3fc3626137..29e5e82049b5c 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu @@ -91,7 +91,7 @@ namespace pixelgpudetails { cudaCheck(cudaMalloc((void**) & clusInModule_d,(MaxNumModules)*sizeof(uint32_t) )); cudaCheck(cudaMalloc((void**) & moduleId_d, (MaxNumModules)*sizeof(uint32_t) )); } - + SiPixelRawToClusterGPUKernel::~SiPixelRawToClusterGPUKernel() { // free device memory used for RawToDigi on GPU // free the GPU memory @@ -118,7 +118,6 @@ namespace pixelgpudetails { std::memset(fedId_h+wordCounterGPU/2, fedId - 1200, length/2); } - //////////////////// __device__ uint32_t getLink(uint32_t ww) { @@ -140,15 +139,12 @@ namespace pixelgpudetails { return (1==((rawId>>25)&0x7)); } - - __device__ pixelgpudetails::DetIdGPU getRawId(const SiPixelFedCablingMapGPU * Map, uint32_t fed, uint32_t link, uint32_t roc) { uint32_t index = fed * MAX_LINK * MAX_ROC + (link-1) * MAX_ROC + roc; pixelgpudetails::DetIdGPU detId = { Map->RawId[index], Map->rocInDet[index], Map->moduleId[index] }; return detId; } - //reference http://cmsdoxygen.web.cern.ch/cmsdoxygen/CMSSW_9_2_0/doc/html/dd/d31/FrameConversion_8cc_source.html //http://cmslxr.fnal.gov/source/CondFormats/SiPixelObjects/src/PixelROC.cc?v=CMSSW_9_2_0#0071 // Convert local pixel to pixelgpudetails::global pixel @@ -232,141 +228,132 @@ namespace pixelgpudetails { __device__ uint32_t conversionError(uint32_t fedId, uint32_t status, bool debug = false) { - uint32_t errorType = 0; // debug = true; switch (status) { - case(1) : { - if (debug) printf("Error in Fed: %i, invalid channel Id (errorType = 35\n)", fedId ); - errorType = 35; - break; - } - case(2) : { - if (debug) printf("Error in Fed: %i, invalid ROC Id (errorType = 36)\n", fedId); - errorType = 36; - break; - } - case(3) : { - if (debug) printf("Error in Fed: %i, invalid dcol/pixel value (errorType = 37)\n", fedId); - errorType = 37; - break; - } - case(4) : { - if (debug) printf("Error in Fed: %i, dcol/pixel read out of order (errorType = 38)\n", fedId); - errorType = 38; - break; - } - default: if (debug) printf("Cabling check returned unexpected result, status = %i\n", status); + case(1) : { + if (debug) printf("Error in Fed: %i, invalid channel Id (errorType = 35\n)", fedId ); + errorType = 35; + break; + } + case(2) : { + if (debug) printf("Error in Fed: %i, invalid ROC Id (errorType = 36)\n", fedId); + errorType = 36; + break; + } + case(3) : { + if (debug) printf("Error in Fed: %i, invalid dcol/pixel value (errorType = 37)\n", fedId); + errorType = 37; + break; + } + case(4) : { + if (debug) printf("Error in Fed: %i, dcol/pixel read out of order (errorType = 38)\n", fedId); + errorType = 38; + break; + } + default: + if (debug) printf("Cabling check returned unexpected result, status = %i\n", status); }; return errorType; - } - __device__ bool rocRowColIsValid(uint32_t rocRow, uint32_t rocCol) { - uint32_t numRowsInRoc = 80; - uint32_t numColsInRoc = 52; + uint32_t numRowsInRoc = 80; + uint32_t numColsInRoc = 52; - /// row and collumn in ROC representation - return ((rocRow < numRowsInRoc) & (rocCol < numColsInRoc)); + /// row and collumn in ROC representation + return ((rocRow < numRowsInRoc) & (rocCol < numColsInRoc)); } - __device__ bool dcolIsValid(uint32_t dcol, uint32_t pxid) { - return ((dcol < 26) & (2 <= pxid) & (pxid < 162)); + return ((dcol < 26) & (2 <= pxid) & (pxid < 162)); } - __device__ uint32_t checkROC(uint32_t errorWord, uint32_t fedId, uint32_t link, const SiPixelFedCablingMapGPU *Map, bool debug = false) { + int errorType = (errorWord >> pixelgpudetails::ROC_shift) & pixelgpudetails::ERROR_mask; + if (errorType < 25) return false; + bool errorFound = false; - int errorType = (errorWord >> pixelgpudetails::ROC_shift) & pixelgpudetails::ERROR_mask; - if (errorType < 25) return false; - bool errorFound = false; - - switch (errorType) { + switch (errorType) { case(25) : { - errorFound = true; - uint32_t index = fedId * MAX_LINK * MAX_ROC + (link-1) * MAX_ROC + 1; - if (index > 1 && index <= Map->size){ - if (!(link == Map->link[index] && 1 == Map->roc[index])) errorFound = false; - } - if (debug&errorFound) printf("Invalid ROC = 25 found (errorType = 25)\n"); - break; - } - case(26) : { - if (debug) printf("Gap word found (errorType = 26)\n"); - errorFound = true; - break; - } - case(27) : { - if (debug) printf("Dummy word found (errorType = 27)\n"); - errorFound = true; - break; - } - case(28) : { - if (debug) printf("Error fifo nearly full (errorType = 28)\n"); - errorFound = true; - break; - } - case(29) : { - if (debug) printf("Timeout on a channel (errorType = 29)\n"); - if ((errorWord >> pixelgpudetails::OMIT_ERR_shift) & pixelgpudetails::OMIT_ERR_mask) { - if (debug) printf("...first errorType=29 error, this gets masked out\n"); - } - errorFound = true; - break; - } - case(30) : { - if (debug) printf("TBM error trailer (errorType = 30)\n"); - int StateMatch_bits = 4; - int StateMatch_shift = 8; - uint32_t StateMatch_mask = ~(~uint32_t(0) << StateMatch_bits); - int StateMatch = (errorWord >> StateMatch_shift) & StateMatch_mask; - if ( StateMatch != 1 && StateMatch != 8 ) { - if (debug) printf("FED error 30 with unexpected State Bits (errorType = 30)\n"); - } - if ( StateMatch == 1 ) errorType = 40; // 1=Overflow -> 40, 8=number of ROCs -> 30 - errorFound = true; - break; - } - case(31) : { - if (debug) printf("Event number error (errorType = 31)\n"); - errorFound = true; - break; - } - default: errorFound = false; - - }; - - return errorFound? errorType : 0; + errorFound = true; + uint32_t index = fedId * MAX_LINK * MAX_ROC + (link-1) * MAX_ROC + 1; + if (index > 1 && index <= Map->size) { + if (!(link == Map->link[index] && 1 == Map->roc[index])) errorFound = false; + } + if (debug&errorFound) printf("Invalid ROC = 25 found (errorType = 25)\n"); + break; + } + case(26) : { + if (debug) printf("Gap word found (errorType = 26)\n"); + errorFound = true; + break; + } + case(27) : { + if (debug) printf("Dummy word found (errorType = 27)\n"); + errorFound = true; + break; + } + case(28) : { + if (debug) printf("Error fifo nearly full (errorType = 28)\n"); + errorFound = true; + break; + } + case(29) : { + if (debug) printf("Timeout on a channel (errorType = 29)\n"); + if ((errorWord >> pixelgpudetails::OMIT_ERR_shift) & pixelgpudetails::OMIT_ERR_mask) { + if (debug) printf("...first errorType=29 error, this gets masked out\n"); + } + errorFound = true; + break; + } + case(30) : { + if (debug) printf("TBM error trailer (errorType = 30)\n"); + int StateMatch_bits = 4; + int StateMatch_shift = 8; + uint32_t StateMatch_mask = ~(~uint32_t(0) << StateMatch_bits); + int StateMatch = (errorWord >> StateMatch_shift) & StateMatch_mask; + if ( StateMatch != 1 && StateMatch != 8 ) { + if (debug) printf("FED error 30 with unexpected State Bits (errorType = 30)\n"); + } + if ( StateMatch == 1 ) errorType = 40; // 1=Overflow -> 40, 8=number of ROCs -> 30 + errorFound = true; + break; + } + case(31) : { + if (debug) printf("Event number error (errorType = 31)\n"); + errorFound = true; + break; + } + default: + errorFound = false; + }; + return errorFound? errorType : 0; } - __device__ uint32_t getErrRawID(uint32_t fedId, uint32_t errWord, uint32_t errorType, const SiPixelFedCablingMapGPU *Map, bool debug = false) { - uint32_t rID = 0xffffffff; switch (errorType) { - case 25 : case 30 : case 31 : case 36 : case 40 : { + case 25 : case 30 : case 31 : case 36 : case 40 : { //set dummy values for cabling just to get detId from link //cabling.dcol = 0; //cabling.pxid = 2; uint32_t roc = 1; uint32_t link = (errWord >> pixelgpudetails::LINK_shift) & pixelgpudetails::LINK_mask; - uint32_t rID_temp = getRawId(Map, fedId, link, roc).RawId; - if(rID_temp != 9999) rID = rID_temp; + if (rID_temp != 9999) rID = rID_temp; break; } - case 29 : { + case 29 : { int chanNmbr = 0; const int DB0_shift = 0; const int DB1_shift = DB0_shift + 1; @@ -398,7 +385,7 @@ namespace pixelgpudetails { if(rID_temp != 9999) rID = rID_temp; break; } - case 37 : case 38: { + case 37 : case 38: { //cabling.dcol = 0; //cabling.pxid = 2; uint32_t roc = (errWord >> pixelgpudetails::ROC_shift) & pixelgpudetails::ROC_mask; @@ -407,25 +394,22 @@ namespace pixelgpudetails { if(rID_temp != 9999) rID = rID_temp; break; } - - default : break; - + default: + break; }; return rID; - } - /*---------- - * Name: applyADCthreshold_kernel() - * Desc: converts adc count to electrons and then applies the - * threshold on each channel. - * make pixel to 0 if it is below the threshold - * Input: xx_d[], yy_d[], layer_d[], wordCounter, adc[], ADCThreshold - *----------- - * Output: xx_adc[], yy_adc[] with pixel threshold applied - */ + * Name: applyADCthreshold_kernel() + * Desc: converts adc count to electrons and then applies the + * threshold on each channel. + * make pixel to 0 if it is below the threshold + * Input: xx_d[], yy_d[], layer_d[], wordCounter, adc[], ADCThreshold + *----------- + * Output: xx_adc[], yy_adc[] with pixel threshold applied + */ // kernel to apply adc threshold on the channels @@ -465,28 +449,28 @@ namespace pixelgpudetails { // Kernel to perform Raw to Digi conversion __global__ void RawToDigi_kernel(const SiPixelFedCablingMapGPU *Map, const unsigned char *modToUnp, - const uint32_t wordCounter, const uint32_t *Word, const uint8_t *fedIds, - uint16_t * XX, uint16_t * YY, uint16_t * ADC, - uint32_t * pdigi, uint32_t *rawIdArr, uint16_t * moduleId, - GPU::SimpleVector *err, - bool useQualityInfo, bool includeErrors, bool debug) + const uint32_t wordCounter, const uint32_t *Word, const uint8_t *fedIds, + uint16_t * XX, uint16_t * YY, uint16_t * ADC, + uint32_t * pdigi, uint32_t *rawIdArr, uint16_t * moduleId, + GPU::SimpleVector *err, + bool useQualityInfo, bool includeErrors, bool debug) { uint32_t blockId = blockIdx.x; uint32_t threadId = threadIdx.x; bool skipROC = false; //if (threadId==0) printf("Event: %u blockId: %u start: %u end: %u\n", eventno, blockId, begin, end); - + for (int aaa=0; aaa<1; ++aaa) { // too many coninue below.... (to be fixed) auto gIndex = threadId + blockId*blockDim.x; if (gIndex < wordCounter) { - - uint32_t fedId = fedIds[gIndex/2]; // +1200; + + uint32_t fedId = fedIds[gIndex/2]; // +1200; // initialize (too many coninue below) pdigi[gIndex] = 0; rawIdArr[gIndex] = 0; - moduleId[gIndex] = 9999; + moduleId[gIndex] = 9999; uint32_t ww = Word[gIndex]; // Array containing 32 bit raw data if (ww == 0) { @@ -517,8 +501,8 @@ namespace pixelgpudetails { uint32_t index = fedId * MAX_LINK * MAX_ROC + (link-1) * MAX_ROC + roc; if (useQualityInfo) { - skipROC = Map->badRocs[index]; - if (skipROC) continue; + skipROC = Map->badRocs[index]; + if (skipROC) continue; } skipROC = modToUnp[index]; @@ -581,17 +565,16 @@ namespace pixelgpudetails { moduleId[gIndex] = detId.moduleId; rawIdArr[gIndex] = rawId; } // end of if (gIndex < end) - } // end fake loop + } // end fake loop } // end of Raw to Digi kernel - // Interface to outside void SiPixelRawToClusterGPUKernel::makeClustersAsync( const SiPixelFedCablingMapGPU *cablingMap, const unsigned char *modToUnp, const SiPixelGainForHLTonGPU *gains, const uint32_t wordCounter, const uint32_t fedCounter, - bool convertADCtoElectrons, + bool convertADCtoElectrons, bool useQualityInfo, bool includeErrors, bool debug, cuda::stream_t<>& stream) { @@ -605,11 +588,11 @@ namespace pixelgpudetails { // wordCounter is the total no of words in each event to be trasfered on device cudaCheck(cudaMemcpyAsync(&word_d[0], &word[0], wordCounter*sizeof(uint32_t), cudaMemcpyDefault, stream.id())); cudaCheck(cudaMemcpyAsync(&fedId_d[0], &fedId_h[0], wordCounter*sizeof(uint8_t)/2, cudaMemcpyDefault, stream.id())); - + constexpr uint32_t vsize = sizeof(GPU::SimpleVector); constexpr uint32_t esize = sizeof(pixelgpudetails::error_obj); cudaCheck(cudaMemcpyAsync(error_d, error_h_tmp, vsize, cudaMemcpyDefault, stream.id())); - + // Launch rawToDigi kernel RawToDigi_kernel<<>>( cablingMap, @@ -633,80 +616,64 @@ namespace pixelgpudetails { cudaCheck(cudaMemcpyAsync(rawIdArr_h, rawIdArr_d, wordCounter*sizeof(uint32_t), cudaMemcpyDefault, stream.id())); if (includeErrors) { - cudaCheck(cudaMemcpyAsync(error_h, error_d, vsize, cudaMemcpyDefault, stream.id())); - cudaStreamSynchronize(stream.id()); - error_h->set_data(data_h); - int size = error_h->size(); - cudaCheck(cudaMemcpyAsync(data_h, data_d, size*esize, cudaMemcpyDefault, stream.id())); + cudaCheck(cudaMemcpyAsync(error_h, error_d, vsize, cudaMemcpyDefault, stream.id())); + cudaCheck(cudaStreamSynchronize(stream.id())); + error_h->set_data(data_h); + int size = error_h->size(); + cudaCheck(cudaMemcpyAsync(data_h, data_d, size*esize, cudaMemcpyDefault, stream.id())); } // End of Raw2Digi and passing data for cluserisation - { - // clusterizer ... - using namespace gpuClustering; - int threadsPerBlock = 256; - int blocks = (wordCounter + threadsPerBlock - 1) / threadsPerBlock; - - - gpuCalibPixel::calibDigis<<>>( - moduleInd_d, - xx_d, yy_d, adc_d, - gains, - wordCounter - ); - - cudaCheck(cudaGetLastError()); - - // calibrated adc - cudaCheck(cudaMemcpyAsync(adc_h, adc_d, wordCounter*sizeof(uint16_t), cudaMemcpyDefault, stream.id())); - - /* - std::cout - << "CUDA countModules kernel launch with " << blocks - << " blocks of " << threadsPerBlock << " threads\n"; - */ - - nModulesActive = 0; - cudaCheck(cudaMemcpyAsync(moduleStart_d, &nModulesActive, sizeof(uint32_t), cudaMemcpyDefault, stream.id())); - - countModules<<>>(moduleInd_d, moduleStart_d, clus_d, wordCounter); - cudaCheck(cudaGetLastError()); - - cudaCheck(cudaMemcpyAsync(&nModulesActive, moduleStart_d, sizeof(uint32_t), cudaMemcpyDefault, stream.id())); - - // std::cout << "found " << nModulesActive << " Modules active" << std::endl; - - // In order to avoid the cudaStreamSynchronize, create a new kernel which launches countModules and findClus. - cudaStreamSynchronize(stream.id()); - - threadsPerBlock = 256; - blocks = nModulesActive; - - /* - std::cout - << "CUDA findClus kernel launch with " << blocks - << " blocks of " << threadsPerBlock << " threads\n"; - */ - - cudaCheck(cudaMemsetAsync(clusInModule_d, 0, (MaxNumModules)*sizeof(uint32_t), stream.id())); - - findClus<<>>( - moduleInd_d, - xx_d, yy_d, adc_d, - moduleStart_d, - clusInModule_d, moduleId_d, - clus_d, - wordCounter - ); - - // clusters - cudaCheck(cudaMemcpyAsync(clus_h, clus_d, wordCounter*sizeof(uint32_t), cudaMemcpyDefault, stream.id())); - - cudaStreamSynchronize(stream.id()); - cudaCheck(cudaGetLastError()); - - } // end clusterizer scope - + { + // clusterizer ... + using namespace gpuClustering; + int threadsPerBlock = 256; + int blocks = (wordCounter + threadsPerBlock - 1) / threadsPerBlock; + + + gpuCalibPixel::calibDigis<<>>( + moduleInd_d, + xx_d, yy_d, adc_d, + gains, + wordCounter); + cudaCheck(cudaGetLastError()); + + // calibrated adc + cudaCheck(cudaMemcpyAsync(adc_h, adc_d, wordCounter*sizeof(uint16_t), cudaMemcpyDefault, stream.id())); + + /* + std::cout + << "CUDA countModules kernel launch with " << blocks + << " blocks of " << threadsPerBlock << " threads\n"; + */ + + cudaCheck(cudaMemsetAsync(moduleStart_d, 0x00, sizeof(uint32_t), stream.id())); + + countModules<<>>(moduleInd_d, moduleStart_d, clus_d, wordCounter); + cudaCheck(cudaGetLastError()); + + // read the number of modules into a data member, used by getProduct()) + cudaCheck(cudaMemcpyAsync(&nModulesActive, moduleStart_d, sizeof(uint32_t), cudaMemcpyDefault, stream.id())); + + threadsPerBlock = 256; + blocks = MaxNumModules; + /* + std::cout << "CUDA findClus kernel launch with " << blocks + << " blocks of " << threadsPerBlock << " threads\n"; + */ + cudaCheck(cudaMemsetAsync(clusInModule_d, 0, (MaxNumModules)*sizeof(uint32_t), stream.id())); + findClus<<>>( + moduleInd_d, + xx_d, yy_d, + moduleStart_d, + clusInModule_d, moduleId_d, + clus_d, + wordCounter); + cudaCheck(cudaGetLastError()); + + // clusters + cudaCheck(cudaMemcpyAsync(clus_h, clus_d, wordCounter*sizeof(uint32_t), cudaMemcpyDefault, stream.id())); + } // end clusterizer scope } } diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h index 5e1c2a7486b56..c096afd5f44fc 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h @@ -52,9 +52,6 @@ namespace gpuCalibPixel { // if (threadIdx.x==0) // printf ("calibrated %d\n",id[i]); - - __syncthreads(); - } __global__ void calibADCByModule(uint16_t * id, diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h index 60be134a4ee46..d9a98437b52cf 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h @@ -11,13 +11,13 @@ namespace gpuClustering { __global__ void countModules(uint16_t const * id, uint32_t * moduleStart, - int32_t * clus, + int32_t * clusterId, int numElements) { int i = blockDim.x * blockIdx.x + threadIdx.x; if (i >= numElements) return; - clus[i] = i; + clusterId[i] = i; if (InvId == id[i]) return; auto j = i - 1; @@ -30,137 +30,158 @@ namespace gpuClustering { } } - __global__ void findClus(uint16_t const * id, - uint16_t const * x, - uint16_t const * y, - uint16_t const * adc, - uint32_t const * moduleStart, - uint32_t * clusInModule, uint32_t * moduleId, - int32_t * clus, + __global__ void findClus(uint16_t const * id, // module id of each pixel + uint16_t const * x, // local coordinates of each pixel + uint16_t const * y, // + uint32_t const * moduleStart, // index of the first pixel of each module + uint32_t * nClustersInModule, // output: number of clusters found in each module + uint32_t * moduleId, // output: module id of each module + int32_t * clusterId, // output: cluster id of each pixel int numElements) { - __shared__ bool go; - __shared__ int nclus; - __shared__ int msize; - if (blockIdx.x >= moduleStart[0]) return; - auto first = moduleStart[1 + blockIdx.x]; - auto me = id[first]; - - assert(me < MaxNumModules); + auto firstPixel = moduleStart[1 + blockIdx.x]; + auto thisModuleId = id[firstPixel]; + assert(thisModuleId < MaxNumModules); #ifdef GPU_DEBUG - if (me%100 == 1) + if (thisModuleId % 100 == 1) if (threadIdx.x == 0) - printf("start clusterizer for module %d in block %d\n", me, blockIdx.x); + printf("start clusterizer for module %d in block %d\n", thisModuleId, blockIdx.x); #endif - first += threadIdx.x; - if (first>= numElements) - return; - - go = true; - nclus = 0; + auto first = firstPixel + threadIdx.x; + // find the index of the first pixel not belonging to this module (or invalid) + __shared__ int msize; msize = numElements; __syncthreads(); - for (int i = first; i < numElements; i += blockDim.x) { - if (id[i] == InvId) // not valid - continue; - if (id[i] != me) { // end of module - atomicMin(&msize, i); - break; + // skip threads not associated to an existing pixel + bool active = (first < numElements); + if (active) { + for (int i = first; i < numElements; i += blockDim.x) { + if (id[i] == InvId) // skip invalid pixels + continue; + if (id[i] != thisModuleId) { // find the first pixel in a different module + atomicMin(&msize, i); + break; + } } } + __syncthreads(); + assert((msize == numElements) or ((msize < numElements) and (id[msize] != thisModuleId))); - assert(msize<= numElements); - if (first>= msize) - return; + // skip threads not assocoated to pixels in this module + active = (first < msize); - int jmax[10]; - auto niter = (msize - first) / blockDim.x; - assert(niter < 10); - for (int k = 0; k < niter + 1; ++k) + // assume that we can cover the whole module with up to 10 blockDim.x-wide iterations + constexpr int maxiter = 10; + if (active) { + assert(((msize - first) / blockDim.x) <= maxiter); + } + int jmax[maxiter]; + for (int k = 0; k < maxiter; ++k) jmax[k] = msize; - while (go) { - __syncthreads(); - go = false; - - __syncthreads(); - int k = -1; - for (int i = first; i < msize; i += blockDim.x) { - ++k; - if (id[i] == InvId) // not valid - continue; - assert(id[i] == me); // break; // end of module - auto js = i + 1; - auto jm = jmax[k]; - jmax[k] = i + 1; - for (int j = js; j < jm; ++j) { - if (id[j] == InvId) // not valid - continue; - if (std::abs(int(x[j]) - int(x[i])) > 1 | - std::abs(int(y[j]) - int(y[i])) > 1) + __syncthreads(); + // for each pixel, look at all the pixels until the end of the module; + // when two valid pixels within +/- 1 in x or y are found, set their id to the minimum; + // after the loop, all the pixel in each cluster should have the id equeal to the lowest + // pixel in the cluster ( clus[i] == i ). + bool done = false; + while (not __syncthreads_and(done)) { + done = true; + if (active) { + for (int i = first, k = 0; i < msize; i += blockDim.x, ++k) { + if (id[i] == InvId) // skip invalid pixels continue; - auto old = atomicMin(&clus[j], clus[i]); - if (old != clus[i]) go = true; - atomicMin(&clus[i], old); - jmax[k] = j + 1; + assert(id[i] == thisModuleId); // same module + auto js = i + 1; + auto jm = jmax[k]; + jmax[k] = i + 1; + for (int j = js; j < jm; ++j) { + if (id[j] == InvId) // skip invalid pixels + continue; + if (std::abs(int(x[j]) - int(x[i])) > 1 or + std::abs(int(y[j]) - int(y[i])) > 1) + continue; + auto old = atomicMin(&clusterId[j], clusterId[i]); + if (old != clusterId[i]) { + // end the loop only if no changes were applied + done = false; + } + atomicMin(&clusterId[i], old); + // update the loop boundary for the next iteration + jmax[k] = j + 1; + } } } - assert (k<= niter); - __syncthreads(); } - nclus = 0; + __shared__ int foundClusters; + foundClusters = 0; __syncthreads(); - for (int i = first; i < numElements; i += blockDim.x) { - if (id[i] == InvId) // not valid - continue; - if (id[i] != me) // end of module - break; - if (clus[i] == i) { - auto old = atomicAdd(&nclus, 1); - clus[i] = -(old + 1); + + // find the number of different clusters, identified by a pixels with clus[i] == i; + // mark these pixels with a negative id. + if (active) { + for (int i = first; i < numElements; i += blockDim.x) { + if (id[i] == InvId) // skip invalid pixels + continue; + if (id[i] != thisModuleId) // stop once in a different module + break; + if (clusterId[i] == i) { + auto old = atomicAdd(&foundClusters, 1); + clusterId[i] = -(old + 1); + } } } - __syncthreads(); - for (int i = first; i < numElements; i += blockDim.x) { - if (id[i] == InvId) // not valid - continue; - if (id[i] != me) // end of module - break; - if (clus[i]>= 0) clus[i] = clus[clus[i]]; - } - __syncthreads(); - for (int i = first; i < numElements; i += blockDim.x) { - if (id[i] == InvId) { // not valid - clus[i] = -9999; - continue; + // propagate the negative id to all the pixels in the cluster. + if (active) { + for (int i = first; i < numElements; i += blockDim.x) { + if (id[i] == InvId) // skip invalid pixels + continue; + if (id[i] != thisModuleId) // stop once in a different module + break; + if (clusterId[i] >= 0) { + // mark each pixel in a cluster with the same id as the first one + clusterId[i] = clusterId[clusterId[i]]; + } } - if (id[i] != me) // end of module - break; - clus[i] = - clus[i] - 1; } - __syncthreads(); - if (threadIdx.x == 0) { - clusInModule[me] = nclus; - moduleId[blockIdx.x] = me; + + // adjust the cluster id to be a positive value starting from 0 + if (active) { + for (int i = first; i < numElements; i += blockDim.x) { + if (id[i] == InvId) { // skip invalid pixels + clusterId[i] = -9999; + continue; + } + if (id[i] != thisModuleId) // stop once in a different module + break; + clusterId[i] = - clusterId[i] - 1; + } } + __syncthreads(); + if (active) { + if (threadIdx.x == 0) { + nClustersInModule[thisModuleId] = foundClusters; + moduleId[blockIdx.x] = thisModuleId; + } #ifdef GPU_DEBUG - if (me % 100 == 1) - if (threadIdx.x == 0) - printf("%d clusters in module %d\n", nclus, me); + if (thisModuleId % 100 == 1) + if (threadIdx.x == 0) + printf("%d clusters in module %d\n", foundClusters, thisModuleId); #endif + } } } // namespace gpuClustering From 2dbefb43bdedb080be6c5fc53563ce28fac084f2 Mon Sep 17 00:00:00 2001 From: Vincenzo Innocente Date: Tue, 31 Jul 2018 12:03:15 +0200 Subject: [PATCH 033/149] Heterogeneous ClusterTPAssociation (cms-patatrack#105) Implement a heterogeneous Cluster-to-TrackingParticle associator running on the GPU. --- .../plugins/SiPixelRawToClusterGPUKernel.cu | 21 +++++++----- .../plugins/SiPixelRawToClusterGPUKernel.h | 32 ++++++++++++++----- 2 files changed, 37 insertions(+), 16 deletions(-) diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu index 29e5e82049b5c..7eb90cffa2d77 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu @@ -38,7 +38,7 @@ namespace pixelgpudetails { - SiPixelRawToClusterGPUKernel::SiPixelRawToClusterGPUKernel() { + SiPixelRawToClusterGPUKernel::SiPixelRawToClusterGPUKernel(cuda::stream_t<>& cudaStream) { int WSIZE = pixelgpudetails::MAX_FED * pixelgpudetails::MAX_WORD; cudaMallocHost(&word, sizeof(unsigned int)*WSIZE); cudaMallocHost(&fedId_h, sizeof(unsigned char)*WSIZE); @@ -90,6 +90,12 @@ namespace pixelgpudetails { cudaCheck(cudaMalloc((void**) & moduleStart_d, (MaxNumModules+1)*sizeof(uint32_t) )); cudaCheck(cudaMalloc((void**) & clusInModule_d,(MaxNumModules)*sizeof(uint32_t) )); cudaCheck(cudaMalloc((void**) & moduleId_d, (MaxNumModules)*sizeof(uint32_t) )); + + cudaCheck(cudaMalloc((void**) & gpuProduct_d, sizeof(GPUProduct))); + gpuProduct = getProduct(); + assert(xx_d==gpuProduct.xx_d); + + cudaCheck(cudaMemcpyAsync(gpuProduct_d, &gpuProduct, sizeof(GPUProduct), cudaMemcpyDefault,cudaStream.id())); } SiPixelRawToClusterGPUKernel::~SiPixelRawToClusterGPUKernel() { @@ -111,6 +117,7 @@ namespace pixelgpudetails { cudaCheck(cudaFree(clus_d)); cudaCheck(cudaFree(clusInModule_d)); cudaCheck(cudaFree(moduleId_d)); + cudaCheck(cudaFree(gpuProduct_d)); } void SiPixelRawToClusterGPUKernel::initializeWordFed(int fedId, unsigned int wordCounterGPU, const cms_uint32_t *src, unsigned int length) { @@ -478,7 +485,7 @@ namespace pixelgpudetails { XX[gIndex] = 0; // 0 is an indicator of a noise/dead channel YY[gIndex] = 0; // skip these pixels during clusterization ADC[gIndex] = 0; - continue ; // 0: bad word + continue; // 0: bad word } uint32_t link = getLink(ww); // Extract link @@ -521,9 +528,9 @@ namespace pixelgpudetails { // endcap ids layer = 0; panel = (rawId >> pixelgpudetails::panelStartBit) & pixelgpudetails::panelMask; - //disk = (rawId >> diskStartBit_) & diskMask_ ; + //disk = (rawId >> diskStartBit_) & diskMask_; side = (panel == 1)? -1 : 1; - //blade = (rawId>>bladeStartBit_) & bladeMask_; + //blade = (rawId >> bladeStartBit_) & bladeMask_; } // ***special case of layer to 1 be handled here @@ -558,8 +565,8 @@ namespace pixelgpudetails { } pixelgpudetails::Pixel globalPix = frameConversion(barrel, side, layer, rocIdInDetUnit, localPix); - XX[gIndex] = globalPix.row ; // origin shifting by 1 0-159 - YY[gIndex] = globalPix.col ; // origin shifting by 1 0-415 + XX[gIndex] = globalPix.row; // origin shifting by 1 0-159 + YY[gIndex] = globalPix.col; // origin shifting by 1 0-415 ADC[gIndex] = getADC(ww); pdigi[gIndex] = pixelgpudetails::pack(globalPix.row,globalPix.col,ADC[gIndex]); moduleId[gIndex] = detId.moduleId; @@ -583,7 +590,6 @@ namespace pixelgpudetails { const int threadsPerBlock = 512; const int blocks = (wordCounter + threadsPerBlock-1) /threadsPerBlock; // fill it all - assert(0 == wordCounter%2); // wordCounter is the total no of words in each event to be trasfered on device cudaCheck(cudaMemcpyAsync(&word_d[0], &word[0], wordCounter*sizeof(uint32_t), cudaMemcpyDefault, stream.id())); @@ -630,7 +636,6 @@ namespace pixelgpudetails { int threadsPerBlock = 256; int blocks = (wordCounter + threadsPerBlock - 1) / threadsPerBlock; - gpuCalibPixel::calibDigis<<>>( moduleInd_d, xx_d, yy_d, adc_d, diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h index 2b0b205c9f536..2f7436052902b 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h @@ -83,7 +83,7 @@ namespace pixelgpudetails { class Packing { public: using PackedDigiType = uint32_t; - + // Constructor: pre-computes masks and shifts from field widths __host__ __device__ inline @@ -144,22 +144,32 @@ namespace pixelgpudetails { (adc << thePacking.adc_shift); } + constexpr + uint32_t pixelToChannel( int row, int col) { + constexpr Packing thePacking = packing(); + return (row << thePacking.column_width) | col; + } + + using error_obj = siPixelRawToClusterHeterogeneousProduct::error_obj; class SiPixelRawToClusterGPUKernel { public: - SiPixelRawToClusterGPUKernel(); + + using GPUProduct = siPixelRawToClusterHeterogeneousProduct::GPUProduct; + + SiPixelRawToClusterGPUKernel(cuda::stream_t<>& cudaStream); ~SiPixelRawToClusterGPUKernel(); - + SiPixelRawToClusterGPUKernel(const SiPixelRawToClusterGPUKernel&) = delete; SiPixelRawToClusterGPUKernel(SiPixelRawToClusterGPUKernel&&) = delete; SiPixelRawToClusterGPUKernel& operator=(const SiPixelRawToClusterGPUKernel&) = delete; SiPixelRawToClusterGPUKernel& operator=(SiPixelRawToClusterGPUKernel&&) = delete; void initializeWordFed(int fedId, unsigned int wordCounterGPU, const cms_uint32_t *src, unsigned int length); - + // Not really very async yet... void makeClustersAsync(const SiPixelFedCablingMapGPU *cablingMap, const unsigned char *modToUnp, const SiPixelGainForHLTonGPU *gains, @@ -170,8 +180,9 @@ namespace pixelgpudetails { auto getProduct() const { return siPixelRawToClusterHeterogeneousProduct::GPUProduct{ pdigi_h, rawIdArr_h, clus_h, adc_h, error_h, - nDigis, nModulesActive, - xx_d, yy_d, adc_d, moduleInd_d, moduleStart_d,clus_d, clusInModule_d, moduleId_d + gpuProduct_d, + xx_d, yy_d, adc_d, moduleInd_d, moduleStart_d,clus_d, clusInModule_d, moduleId_d, + nDigis, nModulesActive }; } @@ -181,6 +192,11 @@ namespace pixelgpudetails { unsigned char *fedId_h = nullptr; // to hold fed index for each word // output + GPUProduct gpuProduct; + GPUProduct * gpuProduct_d; + + // FIXME cleanup all these are in the gpuProduct above... + uint32_t *pdigi_h = nullptr, *rawIdArr_h = nullptr; // host copy of output uint16_t *adc_h = nullptr; int32_t *clus_h = nullptr; // host copy of calib&clus output pixelgpudetails::error_obj *data_h = nullptr; @@ -209,7 +225,7 @@ namespace pixelgpudetails { uint32_t * clusInModule_d; uint32_t * moduleId_d; }; - + // configuration and memory buffers alocated on the GPU struct context { uint32_t * word_d; @@ -223,7 +239,7 @@ namespace pixelgpudetails { GPU::SimpleVector * error_d; error_obj * data_d; - + // these are for the clusterizer (to be moved) uint32_t * moduleStart_d; int32_t * clus_d; From 911dc731841ce2ce1dc55fe968828bec139713cf Mon Sep 17 00:00:00 2001 From: Matti Kortelainen Date: Tue, 31 Jul 2018 22:17:08 +0200 Subject: [PATCH 034/149] Remove all remaining calls to cudaStreamSynchronize() (cms-patatrack#109) Remove all of the remaining calls to cudaStreamSynchronize() from the pixel "raw to cluster" workflow. Replace thrust::inclusive_scan with cub::DeviceScan::InclusiveSum to avoid implicit cudaStreamSynchronize and per-event buffer allocations Avoid a data dependency on the number of hits: - in raw2cluster, always transfer the errors for the maximum number of modules. - in rechits, replace the calculation of the total number of hits with the total number of clusters Copy the phase1PixelTopology::layerStart array to the GPU to avoid an extra copy back and forth from the CPU. --- .../SiPixelClusterizer/plugins/BuildFile.xml | 1 + .../plugins/SiPixelRawToClusterGPUKernel.cu | 45 +++++++++++++++++-- .../plugins/SiPixelRawToClusterGPUKernel.h | 13 ++++-- 3 files changed, 52 insertions(+), 7 deletions(-) diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml b/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml index 1dc69b4dd7b73..9db4a46f367b3 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml @@ -12,6 +12,7 @@ + diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu index 7eb90cffa2d77..839cb6c6202a3 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu @@ -27,6 +27,9 @@ #include #include +// cub includes +#include + // CMSSW includes #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" #include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h" @@ -96,6 +99,12 @@ namespace pixelgpudetails { assert(xx_d==gpuProduct.xx_d); cudaCheck(cudaMemcpyAsync(gpuProduct_d, &gpuProduct, sizeof(GPUProduct), cudaMemcpyDefault,cudaStream.id())); + + // originally from rechits + cudaCheck(cudaMalloc((void**) & clusModuleStart_d, (MaxNumModules+1)*sizeof(uint32_t) )); + uint32_t *tmp = nullptr; + cudaCheck(cub::DeviceScan::InclusiveSum(nullptr, tempScanStorageSize, tmp, tmp, MaxNumModules)); + cudaCheck(cudaMalloc(&tempScanStorage_d, tempScanStorageSize)); } SiPixelRawToClusterGPUKernel::~SiPixelRawToClusterGPUKernel() { @@ -118,6 +127,10 @@ namespace pixelgpudetails { cudaCheck(cudaFree(clusInModule_d)); cudaCheck(cudaFree(moduleId_d)); cudaCheck(cudaFree(gpuProduct_d)); + + // originally from rechits + cudaCheck(cudaFree(tempScanStorage_d)); + cudaCheck(cudaFree(clusModuleStart_d)); } void SiPixelRawToClusterGPUKernel::initializeWordFed(int fedId, unsigned int wordCounterGPU, const cms_uint32_t *src, unsigned int length) { @@ -623,10 +636,19 @@ namespace pixelgpudetails { if (includeErrors) { cudaCheck(cudaMemcpyAsync(error_h, error_d, vsize, cudaMemcpyDefault, stream.id())); - cudaCheck(cudaStreamSynchronize(stream.id())); - error_h->set_data(data_h); - int size = error_h->size(); - cudaCheck(cudaMemcpyAsync(data_h, data_d, size*esize, cudaMemcpyDefault, stream.id())); + cudaCheck(cudaMemcpyAsync(data_h, data_d, MAX_FED*pixelgpudetails::MAX_WORD*esize, cudaMemcpyDefault, stream.id())); + // If we want to transfer only the minimal amount of data, we + // need a synchronization point. A single ExternalWork (of + // SiPixelRawToClusterHeterogeneous) does not help because it is + // already used to synchronize the data movement. So we'd need + // two ExternalWorks (or explicit use of TBB tasks). The + // prototype of #100 would allow this easily (as there would be + // two ExternalWorks). + // + //error_h->set_data(data_h); + //cudaCheck(cudaStreamSynchronize(stream.id())); + //int size = error_h->size(); + //cudaCheck(cudaMemcpyAsync(data_h, data_d, size*esize, cudaMemcpyDefault, stream.id())); } // End of Raw2Digi and passing data for cluserisation @@ -676,6 +698,21 @@ namespace pixelgpudetails { wordCounter); cudaCheck(cudaGetLastError()); + // count the module start indices already here (instead of + // rechits) so that the number of clusters/hits can be made + // available in the rechit producer without additional points of + // synchronization/ExternalWork + // + // Set first the first element to 0 + cudaCheck(cudaMemsetAsync(clusModuleStart_d, 0, sizeof(uint32_t), stream.id())); + // Then use inclusive_scan to get the partial sum to the rest + cudaCheck(cub::DeviceScan::InclusiveSum(tempScanStorage_d, tempScanStorageSize, + clusInModule_d, &clusModuleStart_d[1], gpuClustering::MaxNumModules, + stream.id())); + // last element holds the number of all clusters + cudaCheck(cudaMemcpyAsync(&nClusters, clusModuleStart_d+gpuClustering::MaxNumModules, sizeof(uint32_t), cudaMemcpyDefault, stream.id())); + + // clusters cudaCheck(cudaMemcpyAsync(clus_h, clus_d, wordCounter*sizeof(uint32_t), cudaMemcpyDefault, stream.id())); } // end clusterizer scope diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h index 2f7436052902b..2f8a51901eaab 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h @@ -170,19 +170,20 @@ namespace pixelgpudetails { void initializeWordFed(int fedId, unsigned int wordCounterGPU, const cms_uint32_t *src, unsigned int length); - // Not really very async yet... void makeClustersAsync(const SiPixelFedCablingMapGPU *cablingMap, const unsigned char *modToUnp, const SiPixelGainForHLTonGPU *gains, const uint32_t wordCounter, const uint32_t fedCounter, bool convertADCtoElectrons, bool useQualityInfo, bool includeErrors, bool debug, cuda::stream_t<>& stream); - auto getProduct() const { + auto getProduct() { + error_h->set_data(data_h); return siPixelRawToClusterHeterogeneousProduct::GPUProduct{ pdigi_h, rawIdArr_h, clus_h, adc_h, error_h, gpuProduct_d, xx_d, yy_d, adc_d, moduleInd_d, moduleStart_d,clus_d, clusInModule_d, moduleId_d, - nDigis, nModulesActive + clusModuleStart_d, + nDigis, nModulesActive, nClusters }; } @@ -205,6 +206,7 @@ namespace pixelgpudetails { uint32_t nDigis = 0; uint32_t nModulesActive = 0; + uint32_t nClusters = 0; // scratch memory buffers uint32_t * word_d; @@ -224,6 +226,11 @@ namespace pixelgpudetails { int32_t * clus_d; uint32_t * clusInModule_d; uint32_t * moduleId_d; + + // originally in rechit, moved here + uint32_t *clusModuleStart_d = nullptr; + void *tempScanStorage_d = nullptr; + size_t tempScanStorageSize = 0; }; // configuration and memory buffers alocated on the GPU From a0c3d1974986b48a13563de2fc7c074013422b98 Mon Sep 17 00:00:00 2001 From: Matti Kortelainen Date: Wed, 1 Aug 2018 17:07:14 +0200 Subject: [PATCH 035/149] Make all device->host cudaMemcpyAsync transfers use pinned memory (cms-patatrack#112) Use pinned host memory as target for device-to-host transfers: - cudaMemcpyAsync to pinned memory in raw2cluster; - cudaMemcpyAsync to pinned memory in rechits . --- .../plugins/SiPixelRawToClusterGPUKernel.cu | 21 +++++++++++++++++-- .../plugins/SiPixelRawToClusterGPUKernel.h | 6 +++--- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu index 839cb6c6202a3..2496fb40114f0 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu @@ -66,6 +66,10 @@ namespace pixelgpudetails { assert(error_h_tmp->size() == 0); assert(error_h_tmp->capacity() == static_cast(MAX_FED*pixelgpudetails::MAX_WORD)); + // Need these in pinned memory to be truly asynchronous + cudaCheck(cudaMallocHost(&nModulesActive, sizeof(uint32_t))); + cudaCheck(cudaMallocHost(&nClusters, sizeof(uint32_t))); + // allocate memory for RawToDigi on GPU using namespace gpuClustering; @@ -108,6 +112,19 @@ namespace pixelgpudetails { } SiPixelRawToClusterGPUKernel::~SiPixelRawToClusterGPUKernel() { + // free the host memory + cudaCheck(cudaFreeHost(word)); + cudaCheck(cudaFreeHost(fedId_h)); + cudaCheck(cudaFreeHost(pdigi_h)); + cudaCheck(cudaFreeHost(rawIdArr_h)); + cudaCheck(cudaFreeHost(adc_h)); + cudaCheck(cudaFreeHost(clus_h)); + cudaCheck(cudaFreeHost(error_h)); + cudaCheck(cudaFreeHost(error_h_tmp)); + cudaCheck(cudaFreeHost(data_h)); + cudaCheck(cudaFreeHost(nModulesActive)); + cudaCheck(cudaFreeHost(nClusters)); + // free device memory used for RawToDigi on GPU // free the GPU memory cudaCheck(cudaFree(word_d)); @@ -680,7 +697,7 @@ namespace pixelgpudetails { cudaCheck(cudaGetLastError()); // read the number of modules into a data member, used by getProduct()) - cudaCheck(cudaMemcpyAsync(&nModulesActive, moduleStart_d, sizeof(uint32_t), cudaMemcpyDefault, stream.id())); + cudaCheck(cudaMemcpyAsync(nModulesActive, moduleStart_d, sizeof(uint32_t), cudaMemcpyDefault, stream.id())); threadsPerBlock = 256; blocks = MaxNumModules; @@ -710,7 +727,7 @@ namespace pixelgpudetails { clusInModule_d, &clusModuleStart_d[1], gpuClustering::MaxNumModules, stream.id())); // last element holds the number of all clusters - cudaCheck(cudaMemcpyAsync(&nClusters, clusModuleStart_d+gpuClustering::MaxNumModules, sizeof(uint32_t), cudaMemcpyDefault, stream.id())); + cudaCheck(cudaMemcpyAsync(nClusters, clusModuleStart_d+gpuClustering::MaxNumModules, sizeof(uint32_t), cudaMemcpyDefault, stream.id())); // clusters diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h index 2f8a51901eaab..9cff737140a90 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h @@ -183,7 +183,7 @@ namespace pixelgpudetails { gpuProduct_d, xx_d, yy_d, adc_d, moduleInd_d, moduleStart_d,clus_d, clusInModule_d, moduleId_d, clusModuleStart_d, - nDigis, nModulesActive, nClusters + nDigis, *nModulesActive, *nClusters }; } @@ -205,8 +205,8 @@ namespace pixelgpudetails { GPU::SimpleVector *error_h_tmp = nullptr; uint32_t nDigis = 0; - uint32_t nModulesActive = 0; - uint32_t nClusters = 0; + uint32_t *nModulesActive = nullptr; + uint32_t *nClusters = nullptr; // scratch memory buffers uint32_t * word_d; From 400cebf891deaac306e9ebf35657962e98acad48 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Thu, 2 Aug 2018 17:50:05 +0200 Subject: [PATCH 036/149] Fix memory initialisation problems in the clusterizer (again) (cms-patatrack#114) --- .../plugins/SiPixelRawToClusterGPUKernel.cu | 73 +++++++++---------- 1 file changed, 36 insertions(+), 37 deletions(-) diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu index 2496fb40114f0..0ab7682911f1c 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu @@ -41,43 +41,32 @@ namespace pixelgpudetails { - SiPixelRawToClusterGPUKernel::SiPixelRawToClusterGPUKernel(cuda::stream_t<>& cudaStream) { - int WSIZE = pixelgpudetails::MAX_FED * pixelgpudetails::MAX_WORD; - cudaMallocHost(&word, sizeof(unsigned int)*WSIZE); - cudaMallocHost(&fedId_h, sizeof(unsigned char)*WSIZE); + // data structures size + constexpr uint32_t vsize = sizeof(GPU::SimpleVector); + constexpr uint32_t esize = sizeof(pixelgpudetails::error_obj); - // to store the output of RawToDigi - cudaMallocHost(&pdigi_h, sizeof(uint32_t)*WSIZE); - cudaMallocHost(&rawIdArr_h, sizeof(uint32_t)*WSIZE); + // number of words for all the FEDs + constexpr uint32_t MAX_FED_WORDS = pixelgpudetails::MAX_FED * pixelgpudetails::MAX_WORD; + constexpr uint32_t MAX_WORD08_SIZE = MAX_FED_WORDS * sizeof(uint8_t); + constexpr uint32_t MAX_WORD32_SIZE = MAX_FED_WORDS * sizeof(uint32_t); + constexpr uint32_t MAX_WORD16_SIZE = MAX_FED_WORDS * sizeof(uint16_t); + constexpr uint32_t MAX_ERROR_SIZE = MAX_FED_WORDS * esize; - cudaMallocHost(&adc_h, sizeof(uint16_t)*WSIZE); - cudaMallocHost(&clus_h, sizeof(int32_t)*WSIZE); + SiPixelRawToClusterGPUKernel::SiPixelRawToClusterGPUKernel(cuda::stream_t<>& cudaStream) { - constexpr uint32_t vsize = sizeof(GPU::SimpleVector); - constexpr uint32_t esize = sizeof(pixelgpudetails::error_obj); - cudaCheck(cudaMallocHost(&error_h, vsize)); - cudaCheck(cudaMallocHost(&error_h_tmp, vsize)); - cudaCheck(cudaMallocHost(&data_h, MAX_FED*pixelgpudetails::MAX_WORD*esize)); + cudaCheck(cudaMallocHost(&word, MAX_FED_WORDS * sizeof(unsigned int))); + cudaCheck(cudaMallocHost(&fedId_h, MAX_FED_WORDS * sizeof(unsigned char))); - new (error_h) GPU::SimpleVector(MAX_FED*pixelgpudetails::MAX_WORD, data_h); - new (error_h_tmp) GPU::SimpleVector(MAX_FED*pixelgpudetails::MAX_WORD, data_d); - assert(error_h->size() == 0); - assert(error_h->capacity() == static_cast(MAX_FED*pixelgpudetails::MAX_WORD)); - assert(error_h_tmp->size() == 0); - assert(error_h_tmp->capacity() == static_cast(MAX_FED*pixelgpudetails::MAX_WORD)); - - // Need these in pinned memory to be truly asynchronous - cudaCheck(cudaMallocHost(&nModulesActive, sizeof(uint32_t))); - cudaCheck(cudaMallocHost(&nClusters, sizeof(uint32_t))); + // to store the output of RawToDigi + cudaCheck(cudaMallocHost(&pdigi_h, MAX_FED_WORDS * sizeof(uint32_t))); + cudaCheck(cudaMallocHost(&rawIdArr_h, MAX_FED_WORDS * sizeof(uint32_t))); - // allocate memory for RawToDigi on GPU - using namespace gpuClustering; + cudaCheck(cudaMallocHost(&adc_h, MAX_FED_WORDS * sizeof(uint16_t))); + cudaCheck(cudaMallocHost(&clus_h, MAX_FED_WORDS * sizeof(int32_t))); - // Number of words for all the feds - constexpr uint32_t MAX_WORD08_SIZE = MAX_FED * pixelgpudetails::MAX_WORD * sizeof(uint8_t); - constexpr uint32_t MAX_WORD32_SIZE = MAX_FED * pixelgpudetails::MAX_WORD * sizeof(uint32_t); - constexpr uint32_t MAX_WORD16_SIZE = MAX_FED * pixelgpudetails::MAX_WORD * sizeof(uint16_t); - constexpr uint32_t MAX_ERROR_SIZE = MAX_FED * pixelgpudetails::MAX_WORD * esize; + cudaCheck(cudaMallocHost(&error_h, vsize)); + cudaCheck(cudaMallocHost(&error_h_tmp, vsize)); + cudaCheck(cudaMallocHost(&data_h, MAX_ERROR_SIZE)); cudaCheck(cudaMalloc((void**) & word_d, MAX_WORD32_SIZE)); cudaCheck(cudaMalloc((void**) & fedId_d, MAX_WORD08_SIZE)); @@ -90,14 +79,27 @@ namespace pixelgpudetails { cudaCheck(cudaMalloc((void**) & rawIdArr_d, MAX_WORD32_SIZE)); cudaCheck(cudaMalloc((void**) & error_d, vsize)); cudaCheck(cudaMalloc((void**) & data_d, MAX_ERROR_SIZE)); + cudaCheck(cudaMemset(data_d, 0x00, MAX_ERROR_SIZE)); // for the clusterizer cudaCheck(cudaMalloc((void**) & clus_d, MAX_WORD32_SIZE)); // cluser index in module + using namespace gpuClustering; cudaCheck(cudaMalloc((void**) & moduleStart_d, (MaxNumModules+1)*sizeof(uint32_t) )); cudaCheck(cudaMalloc((void**) & clusInModule_d,(MaxNumModules)*sizeof(uint32_t) )); cudaCheck(cudaMalloc((void**) & moduleId_d, (MaxNumModules)*sizeof(uint32_t) )); + new (error_h) GPU::SimpleVector(MAX_FED_WORDS, data_h); + new (error_h_tmp) GPU::SimpleVector(MAX_FED_WORDS, data_d); + assert(error_h->size() == 0); + assert(error_h->capacity() == static_cast(MAX_FED_WORDS)); + assert(error_h_tmp->size() == 0); + assert(error_h_tmp->capacity() == static_cast(MAX_FED_WORDS)); + + // Need these in pinned memory to be truly asynchronous + cudaCheck(cudaMallocHost(&nModulesActive, sizeof(uint32_t))); + cudaCheck(cudaMallocHost(&nClusters, sizeof(uint32_t))); + cudaCheck(cudaMalloc((void**) & gpuProduct_d, sizeof(GPUProduct))); gpuProduct = getProduct(); assert(xx_d==gpuProduct.xx_d); @@ -622,11 +624,8 @@ namespace pixelgpudetails { assert(0 == wordCounter%2); // wordCounter is the total no of words in each event to be trasfered on device - cudaCheck(cudaMemcpyAsync(&word_d[0], &word[0], wordCounter*sizeof(uint32_t), cudaMemcpyDefault, stream.id())); - cudaCheck(cudaMemcpyAsync(&fedId_d[0], &fedId_h[0], wordCounter*sizeof(uint8_t)/2, cudaMemcpyDefault, stream.id())); - - constexpr uint32_t vsize = sizeof(GPU::SimpleVector); - constexpr uint32_t esize = sizeof(pixelgpudetails::error_obj); + cudaCheck(cudaMemcpyAsync(&word_d[0], &word[0], wordCounter*sizeof(uint32_t), cudaMemcpyDefault, stream.id())); + cudaCheck(cudaMemcpyAsync(&fedId_d[0], &fedId_h[0], wordCounter*sizeof(uint8_t) / 2, cudaMemcpyDefault, stream.id())); cudaCheck(cudaMemcpyAsync(error_d, error_h_tmp, vsize, cudaMemcpyDefault, stream.id())); // Launch rawToDigi kernel @@ -653,7 +652,7 @@ namespace pixelgpudetails { if (includeErrors) { cudaCheck(cudaMemcpyAsync(error_h, error_d, vsize, cudaMemcpyDefault, stream.id())); - cudaCheck(cudaMemcpyAsync(data_h, data_d, MAX_FED*pixelgpudetails::MAX_WORD*esize, cudaMemcpyDefault, stream.id())); + cudaCheck(cudaMemcpyAsync(data_h, data_d, MAX_ERROR_SIZE, cudaMemcpyDefault, stream.id())); // If we want to transfer only the minimal amount of data, we // need a synchronization point. A single ExternalWork (of // SiPixelRawToClusterHeterogeneous) does not help because it is From 81b6d79416728b6674f54dd060c7f32423e43363 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Tue, 14 Aug 2018 17:30:35 +0200 Subject: [PATCH 037/149] Clean up some non-GPU-related pixel tracking code Cleanup whitespaces and indenttion, plugin definitions, includes, and file names. Backport from the Patatrack fork: cms-patatrack#49, cms-patatrack#122, cms-patatrack#134. --- RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py b/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py index 5844235e29596..465aa0bb346ce 100644 --- a/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py +++ b/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py @@ -1,10 +1,9 @@ import FWCore.ParameterSet.Config as cms -from Configuration.ProcessModifiers.gpu_cff import gpu siPixelRecHits = cms.EDProducer("SiPixelRecHitConverter", src = cms.InputTag("siPixelClusters"), CPE = cms.string('PixelCPEGeneric'), - VerboseLevel = cms.untracked.int32(0), + VerboseLevel = cms.untracked.int32(0) ) siPixelRecHitsPreSplitting = siPixelRecHits.clone( From adf1e9229fe45169fd290aa7bd182887ce15a24d Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Fri, 17 Aug 2018 13:18:10 +0200 Subject: [PATCH 038/149] Cleanup after merging with CMSSW 10.2.2 (cms-patatrack#134) Clean up unnecessary changes, whitespaces, defines and include directives. --- .../Configuration/python/RecoLocalTracker_cff.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/RecoLocalTracker/Configuration/python/RecoLocalTracker_cff.py b/RecoLocalTracker/Configuration/python/RecoLocalTracker_cff.py index 6d803d40bc870..9fd64188802fc 100644 --- a/RecoLocalTracker/Configuration/python/RecoLocalTracker_cff.py +++ b/RecoLocalTracker/Configuration/python/RecoLocalTracker_cff.py @@ -17,18 +17,15 @@ striptrackerlocalreco = cms.Sequence(siStripZeroSuppression*siStripClusters*siStripMatchedRecHits) trackerlocalreco = cms.Sequence(pixeltrackerlocalreco*striptrackerlocalreco*clusterSummaryProducer) - from RecoLocalTracker.SiPixelClusterizer.siPixelClustersHeterogeneous_cfi import * from RecoLocalTracker.SiPixelClusterizer.siPixelFedCablingMapGPUWrapper_cfi import * from CalibTracker.SiPixelESProducers.siPixelGainCalibrationForHLTGPU_cfi import * +from Configuration.ProcessModifiers.gpu_cff import gpu from RecoLocalTracker.SiPixelRecHits.siPixelRecHitHeterogeneous_cfi import * from RecoLocalTracker.SiPixelRecHits.siPixelRecHitHeterogeneousConverter_cfi import siPixelRecHitHeterogeneousConverter as _siPixelRecHitHeterogeneousConverter gpu.toReplaceWith(siPixelRecHitsPreSplitting, _siPixelRecHitHeterogeneousConverter.clone()) - - -from Configuration.ProcessModifiers.gpu_cff import gpu _pixeltrackerlocalreco_gpu = pixeltrackerlocalreco.copy() _pixeltrackerlocalreco_gpu.replace(siPixelClustersPreSplitting, siPixelClustersHeterogeneous+siPixelClustersPreSplitting) _pixeltrackerlocalreco_gpu.replace(siPixelRecHitsPreSplitting, siPixelRecHitHeterogeneous+siPixelRecHitsPreSplitting) From 6b9ac389544692f3ff3df0393d6ffcc7e6f21920 Mon Sep 17 00:00:00 2001 From: Matti Kortelainen Date: Fri, 17 Aug 2018 16:16:42 +0200 Subject: [PATCH 039/149] Add optional flags to disable SOA->legacy conversion and GPU->CPU transfer (cms-patatrack#132) Always produce the CPU cluster and rechit collections, since they are needed anyway. Add transfer and conversion flags to clusterizer, rechits and CA. Add a skeleton for the future pixel track producer. Add customize functions to disable conversions to legacy formats, and to disable unnecessary GPU->CPU transfers. --- .../python/RecoLocalTracker_cff.py | 14 +----- .../plugins/SiPixelRawToClusterGPUKernel.cu | 49 ++++++++++--------- .../plugins/SiPixelRawToClusterGPUKernel.h | 2 +- .../SiPixelClusterizerPreSplitting_cfi.py | 7 ++- 4 files changed, 35 insertions(+), 37 deletions(-) diff --git a/RecoLocalTracker/Configuration/python/RecoLocalTracker_cff.py b/RecoLocalTracker/Configuration/python/RecoLocalTracker_cff.py index 9fd64188802fc..ad975fa183566 100644 --- a/RecoLocalTracker/Configuration/python/RecoLocalTracker_cff.py +++ b/RecoLocalTracker/Configuration/python/RecoLocalTracker_cff.py @@ -17,19 +17,9 @@ striptrackerlocalreco = cms.Sequence(siStripZeroSuppression*siStripClusters*siStripMatchedRecHits) trackerlocalreco = cms.Sequence(pixeltrackerlocalreco*striptrackerlocalreco*clusterSummaryProducer) -from RecoLocalTracker.SiPixelClusterizer.siPixelClustersHeterogeneous_cfi import * -from RecoLocalTracker.SiPixelClusterizer.siPixelFedCablingMapGPUWrapper_cfi import * -from CalibTracker.SiPixelESProducers.siPixelGainCalibrationForHLTGPU_cfi import * - from Configuration.ProcessModifiers.gpu_cff import gpu -from RecoLocalTracker.SiPixelRecHits.siPixelRecHitHeterogeneous_cfi import * -from RecoLocalTracker.SiPixelRecHits.siPixelRecHitHeterogeneousConverter_cfi import siPixelRecHitHeterogeneousConverter as _siPixelRecHitHeterogeneousConverter -gpu.toReplaceWith(siPixelRecHitsPreSplitting, _siPixelRecHitHeterogeneousConverter.clone()) - -_pixeltrackerlocalreco_gpu = pixeltrackerlocalreco.copy() -_pixeltrackerlocalreco_gpu.replace(siPixelClustersPreSplitting, siPixelClustersHeterogeneous+siPixelClustersPreSplitting) -_pixeltrackerlocalreco_gpu.replace(siPixelRecHitsPreSplitting, siPixelRecHitHeterogeneous+siPixelRecHitsPreSplitting) -gpu.toReplaceWith(pixeltrackerlocalreco, _pixeltrackerlocalreco_gpu) +from RecoLocalTracker.SiPixelRecHits.siPixelRecHitHeterogeneous_cfi import siPixelRecHitHeterogeneous as _siPixelRecHitHeterogeneous +gpu.toReplaceWith(siPixelRecHitsPreSplitting, _siPixelRecHitHeterogeneous) from RecoLocalTracker.SiPhase2Clusterizer.phase2TrackerClusterizer_cfi import * from RecoLocalTracker.Phase2TrackerRecHits.Phase2StripCPEGeometricESProducer_cfi import * diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu index 0ab7682911f1c..f3242a11d7ae6 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu @@ -614,7 +614,7 @@ namespace pixelgpudetails { const SiPixelGainForHLTonGPU *gains, const uint32_t wordCounter, const uint32_t fedCounter, bool convertADCtoElectrons, - bool useQualityInfo, bool includeErrors, bool debug, + bool useQualityInfo, bool includeErrors, bool transferToCPU, bool debug, cuda::stream_t<>& stream) { nDigis = wordCounter; @@ -646,25 +646,26 @@ namespace pixelgpudetails { cudaCheck(cudaGetLastError()); // copy data to host variable - - cudaCheck(cudaMemcpyAsync(pdigi_h, pdigi_d, wordCounter*sizeof(uint32_t), cudaMemcpyDefault, stream.id())); - cudaCheck(cudaMemcpyAsync(rawIdArr_h, rawIdArr_d, wordCounter*sizeof(uint32_t), cudaMemcpyDefault, stream.id())); - - if (includeErrors) { - cudaCheck(cudaMemcpyAsync(error_h, error_d, vsize, cudaMemcpyDefault, stream.id())); - cudaCheck(cudaMemcpyAsync(data_h, data_d, MAX_ERROR_SIZE, cudaMemcpyDefault, stream.id())); - // If we want to transfer only the minimal amount of data, we - // need a synchronization point. A single ExternalWork (of - // SiPixelRawToClusterHeterogeneous) does not help because it is - // already used to synchronize the data movement. So we'd need - // two ExternalWorks (or explicit use of TBB tasks). The - // prototype of #100 would allow this easily (as there would be - // two ExternalWorks). - // - //error_h->set_data(data_h); - //cudaCheck(cudaStreamSynchronize(stream.id())); - //int size = error_h->size(); - //cudaCheck(cudaMemcpyAsync(data_h, data_d, size*esize, cudaMemcpyDefault, stream.id())); + if(transferToCPU) { + cudaCheck(cudaMemcpyAsync(pdigi_h, pdigi_d, wordCounter*sizeof(uint32_t), cudaMemcpyDefault, stream.id())); + cudaCheck(cudaMemcpyAsync(rawIdArr_h, rawIdArr_d, wordCounter*sizeof(uint32_t), cudaMemcpyDefault, stream.id())); + + if (includeErrors) { + cudaCheck(cudaMemcpyAsync(error_h, error_d, vsize, cudaMemcpyDefault, stream.id())); + cudaCheck(cudaMemcpyAsync(data_h, data_d, MAX_ERROR_SIZE, cudaMemcpyDefault, stream.id())); + // If we want to transfer only the minimal amount of data, we + // need a synchronization point. A single ExternalWork (of + // SiPixelRawToClusterHeterogeneous) does not help because it is + // already used to synchronize the data movement. So we'd need + // two ExternalWorks (or explicit use of TBB tasks). The + // prototype of #100 would allow this easily (as there would be + // two ExternalWorks). + // + //error_h->set_data(data_h); + //cudaCheck(cudaStreamSynchronize(stream.id())); + //int size = error_h->size(); + //cudaCheck(cudaMemcpyAsync(data_h, data_d, size*esize, cudaMemcpyDefault, stream.id())); + } } // End of Raw2Digi and passing data for cluserisation @@ -682,7 +683,9 @@ namespace pixelgpudetails { cudaCheck(cudaGetLastError()); // calibrated adc - cudaCheck(cudaMemcpyAsync(adc_h, adc_d, wordCounter*sizeof(uint16_t), cudaMemcpyDefault, stream.id())); + if(transferToCPU) { + cudaCheck(cudaMemcpyAsync(adc_h, adc_d, wordCounter*sizeof(uint16_t), cudaMemcpyDefault, stream.id())); + } /* std::cout @@ -730,7 +733,9 @@ namespace pixelgpudetails { // clusters - cudaCheck(cudaMemcpyAsync(clus_h, clus_d, wordCounter*sizeof(uint32_t), cudaMemcpyDefault, stream.id())); + if(transferToCPU) { + cudaCheck(cudaMemcpyAsync(clus_h, clus_d, wordCounter*sizeof(uint32_t), cudaMemcpyDefault, stream.id())); + } } // end clusterizer scope } diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h index 9cff737140a90..ca8bd73106c2c 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h @@ -173,7 +173,7 @@ namespace pixelgpudetails { void makeClustersAsync(const SiPixelFedCablingMapGPU *cablingMap, const unsigned char *modToUnp, const SiPixelGainForHLTonGPU *gains, const uint32_t wordCounter, const uint32_t fedCounter, bool convertADCtoElectrons, - bool useQualityInfo, bool includeErrors, bool debug, + bool useQualityInfo, bool includeErrors, bool transferToCPU_, bool debug, cuda::stream_t<>& stream); auto getProduct() { diff --git a/RecoLocalTracker/SiPixelClusterizer/python/SiPixelClusterizerPreSplitting_cfi.py b/RecoLocalTracker/SiPixelClusterizer/python/SiPixelClusterizerPreSplitting_cfi.py index 4e6ff591fb78a..bb0bb85697a99 100644 --- a/RecoLocalTracker/SiPixelClusterizer/python/SiPixelClusterizerPreSplitting_cfi.py +++ b/RecoLocalTracker/SiPixelClusterizer/python/SiPixelClusterizerPreSplitting_cfi.py @@ -4,5 +4,8 @@ from RecoLocalTracker.SiPixelClusterizer.SiPixelClusterizer_cfi import siPixelClusters as _siPixelClusters siPixelClustersPreSplitting = _siPixelClusters.clone() -# In principle we could remove `siPixelClustersPreSplitting` from the `pixeltrackerlocalreco` -# sequence when the `gpu` modufier is active; for the time being we keep it for simplicity. +from Configuration.ProcessModifiers.gpu_cff import gpu +from RecoLocalTracker.SiPixelClusterizer.siPixelClustersHeterogeneous_cfi import siPixelClustersHeterogeneous as _siPixelClustersHeterogeneous +from RecoLocalTracker.SiPixelClusterizer.siPixelFedCablingMapGPUWrapper_cfi import * +from CalibTracker.SiPixelESProducers.siPixelGainCalibrationForHLTGPU_cfi import * +gpu.toReplaceWith(siPixelClustersPreSplitting, _siPixelClustersHeterogeneous.clone()) From f011f7d9478638265c896e16e901b8319e17d62a Mon Sep 17 00:00:00 2001 From: Thomas Reis Date: Tue, 11 Sep 2018 10:55:40 +0200 Subject: [PATCH 040/149] Add new unpackers for HI run configuration. --- .../l1tstage2_dqm_sourceclient-live_cfg.py | 37 +++++-------------- ...tage2emulator_dqm_sourceclient-live_cfg.py | 36 ++++-------------- 2 files changed, 18 insertions(+), 55 deletions(-) diff --git a/DQM/Integration/python/clients/l1tstage2_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/l1tstage2_dqm_sourceclient-live_cfg.py index c5788228cac1f..bee554d849a4e 100644 --- a/DQM/Integration/python/clients/l1tstage2_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/l1tstage2_dqm_sourceclient-live_cfg.py @@ -1,27 +1,16 @@ import FWCore.ParameterSet.Config as cms -import sys -from Configuration.Eras.Era_Run3_cff import Run3 -process = cms.Process("L1TStage2DQM", Run3) - -unitTest = False -if 'unitTest=True' in sys.argv: - unitTest=True +from Configuration.StandardSequences.Eras import eras +process = cms.Process("L1TStage2DQM", eras.Run2_2018) #-------------------------------------------------- # Event Source and Condition -if unitTest: - process.load("DQM.Integration.config.unittestinputsource_cfi") - from DQM.Integration.config.unittestinputsource_cfi import options -else: - # Live Online DQM in P5 - process.load("DQM.Integration.config.inputsource_cfi") - from DQM.Integration.config.inputsource_cfi import options +# Live Online DQM in P5 +process.load("DQM.Integration.config.inputsource_cfi") # # Testing in lxplus # process.load("DQM.Integration.config.fileinputsource_cfi") -# from DQM.Integration.config.fileinputsource_cfi import options # process.load("FWCore.MessageLogger.MessageLogger_cfi") # process.MessageLogger.cerr.FwkReport.reportEvery = 1 @@ -42,11 +31,9 @@ process.dqmEnv.subSystemFolder = "L1T" process.dqmSaver.tag = "L1T" -process.dqmSaver.runNumber = options.runNumber -process.dqmSaverPB.tag = "L1T" -process.dqmSaverPB.runNumber = options.runNumber +process.DQMStore.referenceFileName = "/dqmdata/dqm/reference/l1t_reference.root" -process.dqmEndPath = cms.EndPath(process.dqmEnv * process.dqmSaver * process.dqmSaverPB) +process.dqmEndPath = cms.EndPath(process.dqmEnv * process.dqmSaver) #-------------------------------------------------- # Standard Unpacking Path @@ -115,6 +102,7 @@ # Cosmic run if (process.runType.getRunType() == process.runType.cosmic_run): + process.DQMStore.referenceFileName = "/dqmdata/dqm/reference/l1t_reference_cosmic.root" # Remove Quality Tests for L1T Muon Subsystems since they are not optimized yet for cosmics process.l1tStage2MonitorClient.remove(process.l1TStage2uGMTQualityTests) process.l1tStage2MonitorClient.remove(process.l1TStage2EMTFQualityTests) @@ -125,11 +113,10 @@ # Heavy-Ion run if (process.runType.getRunType() == process.runType.hi_run): - process.onlineMetaDataDigis.onlineMetaDataInputLabel = cms.InputTag("rawDataRepacker") - process.onlineMetaDataRawToDigi.onlineMetaDataInputLabel = cms.InputTag("rawDataRepacker") + process.DQMStore.referenceFileName = "/dqmdata/dqm/reference/l1t_reference_hi.root" process.castorDigis.InputLabel = cms.InputTag("rawDataRepacker") process.ctppsDiamondRawToDigi.rawDataTag = cms.InputTag("rawDataRepacker") - process.ctppsPixelDigis.inputLabel = cms.InputTag("rawDataRepacker") + process.ctppsPixelDigis.InputLabel = cms.InputTag("rawDataRepacker") process.ecalDigis.InputLabel = cms.InputTag("rawDataRepacker") process.ecalPreshowerDigis.sourceTag = cms.InputTag("rawDataRepacker") process.hcalDigis.InputLabel = cms.InputTag("rawDataRepacker") @@ -150,22 +137,18 @@ process.gctDigis.inputLabel = cms.InputTag("rawDataRepacker") process.gtDigis.DaqGtInputTag = cms.InputTag("rawDataRepacker") process.twinMuxStage2Digis.DTTM7_FED_Source = cms.InputTag("rawDataRepacker") + process.RPCTwinMuxRawToDigi.inputTag = cms.InputTag("rawDataRepacker") process.bmtfDigis.InputLabel = cms.InputTag("rawDataRepacker") process.omtfStage2Digis.inputLabel = cms.InputTag("rawDataRepacker") process.emtfStage2Digis.InputLabel = cms.InputTag("rawDataRepacker") process.gmtStage2Digis.InputLabel = cms.InputTag("rawDataRepacker") - process.caloLayer1Digis.InputLabel = cms.InputTag("rawDataRepacker") process.caloStage1Digis.InputLabel = cms.InputTag("rawDataRepacker") process.caloStage2Digis.InputLabel = cms.InputTag("rawDataRepacker") process.gtStage2Digis.InputLabel = cms.InputTag("rawDataRepacker") process.l1tStage2CaloLayer1.fedRawDataLabel = cms.InputTag("rawDataRepacker") process.l1tStage2uGMTZeroSupp.rawData = cms.InputTag("rawDataRepacker") process.l1tStage2uGMTZeroSuppFatEvts.rawData = cms.InputTag("rawDataRepacker") - process.l1tStage2BmtfZeroSupp.rawData = cms.InputTag("rawDataRepacker") - process.l1tStage2BmtfZeroSuppFatEvts.rawData = cms.InputTag("rawDataRepacker") process.selfFatEventFilter.rawInput = cms.InputTag("rawDataRepacker") - process.rpcTwinMuxRawToDigi.inputTag = cms.InputTag("rawDataRepacker") - process.rpcCPPFRawToDigi.inputTag = cms.InputTag("rawDataRepacker") #-------------------------------------------------- # L1T Online DQM Schedule diff --git a/DQM/Integration/python/clients/l1tstage2emulator_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/l1tstage2emulator_dqm_sourceclient-live_cfg.py index 91f1f564366d0..81a0ae82e1c32 100644 --- a/DQM/Integration/python/clients/l1tstage2emulator_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/l1tstage2emulator_dqm_sourceclient-live_cfg.py @@ -1,27 +1,16 @@ import FWCore.ParameterSet.Config as cms -import sys -from Configuration.Eras.Era_Run3_cff import Run3 -process = cms.Process("L1TStage2EmulatorDQM", Run3) - -unitTest = False -if 'unitTest=True' in sys.argv: - unitTest=True +from Configuration.StandardSequences.Eras import eras +process = cms.Process("L1TStage2EmulatorDQM", eras.Run2_2018) #-------------------------------------------------- # Event Source and Condition -if unitTest: - process.load("DQM.Integration.config.unittestinputsource_cfi") - from DQM.Integration.config.unittestinputsource_cfi import options -else: - # Live Online DQM in P5 - process.load("DQM.Integration.config.inputsource_cfi") - from DQM.Integration.config.inputsource_cfi import options +# Live Online DQM in P5 +process.load("DQM.Integration.config.inputsource_cfi") # Testing in lxplus #process.load("DQM.Integration.config.fileinputsource_cfi") -#from DQM.Integration.config.fileinputsource_cfi import options # Required to load Global Tag process.load("DQM.Integration.config.FrontierCondition_GT_cfi") @@ -38,14 +27,11 @@ process.dqmEnv.subSystemFolder = "L1TEMU" process.dqmSaver.tag = "L1TEMU" -process.dqmSaver.runNumber = options.runNumber -process.dqmSaverPB.tag = "L1TEMU" -process.dqmSaverPB.runNumber = options.runNumber +process.DQMStore.referenceFileName = "/dqmdata/dqm/reference/l1temu_reference.root" process.dqmEndPath = cms.EndPath( process.dqmEnv * - process.dqmSaver * - process.dqmSaverPB + process.dqmSaver ) #-------------------------------------------------- @@ -124,11 +110,9 @@ # Heavy-Ion run if (process.runType.getRunType() == process.runType.hi_run): - process.onlineMetaDataDigis.onlineMetaDataInputLabel = cms.InputTag("rawDataRepacker") - process.onlineMetaDataRawToDigi.onlineMetaDataInputLabel = cms.InputTag("rawDataRepacker") process.castorDigis.InputLabel = cms.InputTag("rawDataRepacker") process.ctppsDiamondRawToDigi.rawDataTag = cms.InputTag("rawDataRepacker") - process.ctppsPixelDigis.inputLabel = cms.InputTag("rawDataRepacker") + process.ctppsPixelDigis.InputLabel = cms.InputTag("rawDataRepacker") process.ecalDigis.InputLabel = cms.InputTag("rawDataRepacker") process.ecalPreshowerDigis.sourceTag = cms.InputTag("rawDataRepacker") process.hcalDigis.InputLabel = cms.InputTag("rawDataRepacker") @@ -149,21 +133,17 @@ process.gctDigis.inputLabel = cms.InputTag("rawDataRepacker") process.gtDigis.DaqGtInputTag = cms.InputTag("rawDataRepacker") process.twinMuxStage2Digis.DTTM7_FED_Source = cms.InputTag("rawDataRepacker") + process.RPCTwinMuxRawToDigi.inputTag = cms.InputTag("rawDataRepacker") process.bmtfDigis.InputLabel = cms.InputTag("rawDataRepacker") - process.valBmtfAlgoSel.feds = cms.InputTag("rawDataRepacker") process.omtfStage2Digis.inputLabel = cms.InputTag("rawDataRepacker") process.emtfStage2Digis.InputLabel = cms.InputTag("rawDataRepacker") process.gmtStage2Digis.InputLabel = cms.InputTag("rawDataRepacker") - process.caloLayer1Digis.InputLabel = cms.InputTag("rawDataRepacker") process.caloStage1Digis.InputLabel = cms.InputTag("rawDataRepacker") process.caloStage2Digis.InputLabel = cms.InputTag("rawDataRepacker") process.simHcalTriggerPrimitiveDigis.InputTagFEDRaw = cms.InputTag("rawDataRepacker") process.l1tdeStage2CaloLayer1.fedRawDataLabel = cms.InputTag("rawDataRepacker") process.gtStage2Digis.InputLabel = cms.InputTag("rawDataRepacker") process.selfFatEventFilter.rawInput = cms.InputTag("rawDataRepacker") - process.rpcTwinMuxRawToDigi.inputTag = cms.InputTag("rawDataRepacker") - process.rpcCPPFRawToDigi.inputTag = cms.InputTag("rawDataRepacker") - process.hltFatEventFilter.HLTPaths.append('HLT_HIPhysics_v*') #-------------------------------------------------- # L1T Emulator Online DQM Schedule From cf35be09885c378f76f6ee053d171515b5af0a5c Mon Sep 17 00:00:00 2001 From: Vincenzo Innocente Date: Wed, 12 Sep 2018 09:47:46 +0200 Subject: [PATCH 041/149] Tune and speed up doublet algo (cms-patatrack#158) Tune and speed up the pixel doublet alforithm, and take advantage of GPU read-only memory for a further speedup. Includes a python notebook to tune the cuts for doublets and triplets. --- .../interface/SiPixelGainForHLTonGPU.h | 3 +- .../plugins/gpuCalibPixel.h | 12 +-- .../plugins/gpuClustering.h | 93 ++++++++++++------- .../SiPixelRecHits/interface/pixelCPEforGPU.h | 16 +++- .../SiPixelRecHits/plugins/gpuPixelRecHits.h | 22 ++--- 5 files changed, 90 insertions(+), 56 deletions(-) diff --git a/CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h b/CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h index 8cf5451f91b93..48302c7517583 100644 --- a/CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h +++ b/CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h @@ -41,7 +41,8 @@ class SiPixelGainForHLTonGPU { assert(offset<3088384); assert(0==offset%2); - auto s = v_pedestals[offset/2]; + DecodingStructure const * __restrict__ lp = v_pedestals; + auto s = lp[offset/2]; isDeadColumn = (s.ped & 0xFF) == deadFlag_; isNoisyColumn = (s.ped & 0xFF) == noisyFlag_; diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h index c096afd5f44fc..3b19268382c6a 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h @@ -18,10 +18,10 @@ namespace gpuCalibPixel { __global__ void calibDigis(uint16_t * id, - uint16_t const * x, - uint16_t const * y, + uint16_t const * __restrict__ x, + uint16_t const * __restrict__ y, uint16_t * adc, - SiPixelGainForHLTonGPU const * ped, + SiPixelGainForHLTonGPU const * __restrict__ ped, int numElements ) { @@ -55,11 +55,11 @@ namespace gpuCalibPixel { } __global__ void calibADCByModule(uint16_t * id, - uint16_t const * x, - uint16_t const * y, + uint16_t const * __restrict__ x, + uint16_t const * __restrict__ y, uint16_t * adc, uint32_t * moduleStart, - SiPixelGainForHLTonGPU const * ped, + SiPixelGainForHLTonGPU const * __restrict__ ped, int numElements ) { diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h index d9a98437b52cf..16df3abdf8df9 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h @@ -7,11 +7,13 @@ #include "gpuClusteringConstants.h" +#include "HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h" + namespace gpuClustering { - __global__ void countModules(uint16_t const * id, - uint32_t * moduleStart, - int32_t * clusterId, + __global__ void countModules(uint16_t const * __restrict__ id, + uint32_t * __restrict__ moduleStart, + int32_t * __restrict__ clusterId, int numElements) { int i = blockDim.x * blockIdx.x + threadIdx.x; @@ -30,15 +32,16 @@ namespace gpuClustering { } } - __global__ void findClus(uint16_t const * id, // module id of each pixel - uint16_t const * x, // local coordinates of each pixel - uint16_t const * y, // - uint32_t const * moduleStart, // index of the first pixel of each module - uint32_t * nClustersInModule, // output: number of clusters found in each module - uint32_t * moduleId, // output: module id of each module - int32_t * clusterId, // output: cluster id of each pixel + __global__ void findClus(uint16_t const * __restrict__ id, // module id of each pixel + uint16_t const * __restrict__ x, // local coordinates of each pixel + uint16_t const * __restrict__ y, // + uint32_t const * __restrict__ moduleStart, // index of the first pixel of each module + uint32_t * __restrict__ nClustersInModule, // output: number of clusters found in each module + uint32_t * __restrict__ moduleId, // output: module id of each module + int32_t * __restrict__ clusterId, // output: cluster id of each pixel int numElements) { + if (blockIdx.x >= moduleStart[0]) return; @@ -72,12 +75,32 @@ namespace gpuClustering { } } + //init hist (ymax < 512) + __shared__ HistoContainer hist; + hist.nspills = 0; + for (auto k = threadIdx.x; k 1 or - std::abs(int(y[j]) - int(y[i])) > 1) - continue; + // loop to columns + auto bs = hist.bin(y[i]>0 ? y[i]-1 : 0); + auto be = hist.bin(y[i]+1)+1; + auto loop = [&](int j) { + j+=firstPixel; + if (i>=j or j>jm or + std::abs(int(x[j]) - int(x[i])) > 1 or + std::abs(int(y[j]) - int(y[i])) > 1) return; auto old = atomicMin(&clusterId[j], clusterId[i]); if (old != clusterId[i]) { // end the loop only if no changes were applied @@ -116,11 +143,17 @@ namespace gpuClustering { } atomicMin(&clusterId[i], old); // update the loop boundary for the next iteration - jmax[k] = j + 1; - } - } - } - } + jmax[k] = std::max(j + 1,jmax[k]); + }; + for (auto b=bs; b= 0) { // mark each pixel in a cluster with the same id as the first one clusterId[i] = clusterId[clusterId[i]]; @@ -159,13 +188,11 @@ namespace gpuClustering { // adjust the cluster id to be a positive value starting from 0 if (active) { - for (int i = first; i < numElements; i += blockDim.x) { + for (int i = first; i < msize; i += blockDim.x) { if (id[i] == InvId) { // skip invalid pixels clusterId[i] = -9999; continue; } - if (id[i] != thisModuleId) // stop once in a different module - break; clusterId[i] = - clusterId[i] - 1; } } diff --git a/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h b/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h index 6add3a78b96e6..9697470ffb0be 100644 --- a/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h +++ b/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h @@ -46,9 +46,15 @@ namespace pixelCPEforGPU { DetParams * m_detParams; constexpr - CommonParams const & commonParams() const {return *m_commonParams;} + CommonParams const & __restrict__ commonParams() const { + CommonParams const * __restrict__ l = m_commonParams; + return *l; + } constexpr - DetParams const & detParams(int i) const {return m_detParams[i];} + DetParams const & __restrict__ detParams(int i) const { + DetParams const * __restrict__ l = m_detParams; + return l[i]; + } }; // SOA (on device) @@ -78,7 +84,7 @@ namespace pixelCPEforGPU { using ClusParams = ClusParamsT<256>; constexpr inline - void computeAnglesFromDet(DetParams const & detParams, float const x, float const y, float & cotalpha, float & cotbeta) { + void computeAnglesFromDet(DetParams const & __restrict__ detParams, float const x, float const y, float & cotalpha, float & cotbeta) { // x,y local position on det auto gvx = x - detParams.x0; auto gvy = y - detParams.y0; @@ -147,7 +153,7 @@ namespace pixelCPEforGPU { } constexpr inline - void position(CommonParams const & comParams, DetParams const & detParams, ClusParams & cp, uint32_t ic) { + void position(CommonParams const & __restrict__ comParams, DetParams const & __restrict__ detParams, ClusParams & cp, uint32_t ic) { //--- Upper Right corner of Lower Left pixel -- in measurement frame uint16_t llx = cp.minRow[ic]+1; @@ -202,7 +208,7 @@ namespace pixelCPEforGPU { } constexpr inline - void error(CommonParams const & comParams, DetParams const & detParams, ClusParams & cp, uint32_t ic) { + void error(CommonParams const & __restrict__ comParams, DetParams const & __restrict__ detParams, ClusParams & cp, uint32_t ic) { // Edge cluster errors cp.xerr[ic]= 0.0050; cp.yerr[ic]= 0.0085; diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h b/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h index c0e7841658e93..2ee4a10c6fc99 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h +++ b/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h @@ -23,18 +23,18 @@ namespace gpuPixelRecHits { using ClusParams = pixelCPEforGPU::ClusParams; - __global__ void getHits(pixelCPEforGPU::ParamsOnGPU const * cpeParams, - float const * bs, - uint16_t const * id, - uint16_t const * x, - uint16_t const * y, - uint16_t const * adc, - uint32_t const * digiModuleStart, - uint32_t const * clusInModule, - uint32_t const * moduleId, - int32_t const * clus, + __global__ void getHits(pixelCPEforGPU::ParamsOnGPU const * __restrict__ cpeParams, + float const * __restrict__ bs, + uint16_t const * __restrict__ id, + uint16_t const * __restrict__ x, + uint16_t const * __restrict__ y, + uint16_t const * __restrict__ adc, + uint32_t const * __restrict__ digiModuleStart, + uint32_t const * __restrict__ clusInModule, + uint32_t const * __restrict__ moduleId, + int32_t const * __restrict__ clus, int numElements, - uint32_t const * hitsModuleStart, + uint32_t const * __restrict__ hitsModuleStart, int32_t * chargeh, uint16_t * detInd, float * xg, float * yg, float * zg, float * rg, int16_t * iph, From 5f1d5a576c46be2a75fa185d9effdaf394c5ecff Mon Sep 17 00:00:00 2001 From: Thomas Reis Date: Thu, 13 Sep 2018 14:52:35 +0200 Subject: [PATCH 042/149] Add missing HI rawDataRepacker changes. --- .../python/clients/l1tstage2_dqm_sourceclient-live_cfg.py | 7 ++++++- .../clients/l1tstage2emulator_dqm_sourceclient-live_cfg.py | 5 ++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/DQM/Integration/python/clients/l1tstage2_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/l1tstage2_dqm_sourceclient-live_cfg.py index bee554d849a4e..bb57bd794d842 100644 --- a/DQM/Integration/python/clients/l1tstage2_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/l1tstage2_dqm_sourceclient-live_cfg.py @@ -114,9 +114,11 @@ # Heavy-Ion run if (process.runType.getRunType() == process.runType.hi_run): process.DQMStore.referenceFileName = "/dqmdata/dqm/reference/l1t_reference_hi.root" + process.onlineMetaDataDigis.onlineMetaDataInputLabel = cms.InputTag("rawDataRepacker") + process.onlineMetaDataRawToDigi.onlineMetaDataInputLabel = cms.InputTag("rawDataRepacker") process.castorDigis.InputLabel = cms.InputTag("rawDataRepacker") process.ctppsDiamondRawToDigi.rawDataTag = cms.InputTag("rawDataRepacker") - process.ctppsPixelDigis.InputLabel = cms.InputTag("rawDataRepacker") + process.ctppsPixelDigis.inputLabel = cms.InputTag("rawDataRepacker") process.ecalDigis.InputLabel = cms.InputTag("rawDataRepacker") process.ecalPreshowerDigis.sourceTag = cms.InputTag("rawDataRepacker") process.hcalDigis.InputLabel = cms.InputTag("rawDataRepacker") @@ -142,12 +144,15 @@ process.omtfStage2Digis.inputLabel = cms.InputTag("rawDataRepacker") process.emtfStage2Digis.InputLabel = cms.InputTag("rawDataRepacker") process.gmtStage2Digis.InputLabel = cms.InputTag("rawDataRepacker") + process.caloLayer1Digis.InputLabel = cms.InputTag("rawDataRepacker") process.caloStage1Digis.InputLabel = cms.InputTag("rawDataRepacker") process.caloStage2Digis.InputLabel = cms.InputTag("rawDataRepacker") process.gtStage2Digis.InputLabel = cms.InputTag("rawDataRepacker") process.l1tStage2CaloLayer1.fedRawDataLabel = cms.InputTag("rawDataRepacker") process.l1tStage2uGMTZeroSupp.rawData = cms.InputTag("rawDataRepacker") process.l1tStage2uGMTZeroSuppFatEvts.rawData = cms.InputTag("rawDataRepacker") + process.l1tStage2BmtfZeroSupp.rawData = cms.InputTag("rawDataRepacker") + process.l1tStage2BmtfZeroSuppFatEvts.rawData = cms.InputTag("rawDataRepacker") process.selfFatEventFilter.rawInput = cms.InputTag("rawDataRepacker") #-------------------------------------------------- diff --git a/DQM/Integration/python/clients/l1tstage2emulator_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/l1tstage2emulator_dqm_sourceclient-live_cfg.py index 81a0ae82e1c32..735e5d534a9e5 100644 --- a/DQM/Integration/python/clients/l1tstage2emulator_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/l1tstage2emulator_dqm_sourceclient-live_cfg.py @@ -110,9 +110,11 @@ # Heavy-Ion run if (process.runType.getRunType() == process.runType.hi_run): + process.onlineMetaDataDigis.onlineMetaDataInputLabel = cms.InputTag("rawDataRepacker") + process.onlineMetaDataRawToDigi.onlineMetaDataInputLabel = cms.InputTag("rawDataRepacker") process.castorDigis.InputLabel = cms.InputTag("rawDataRepacker") process.ctppsDiamondRawToDigi.rawDataTag = cms.InputTag("rawDataRepacker") - process.ctppsPixelDigis.InputLabel = cms.InputTag("rawDataRepacker") + process.ctppsPixelDigis.inputLabel = cms.InputTag("rawDataRepacker") process.ecalDigis.InputLabel = cms.InputTag("rawDataRepacker") process.ecalPreshowerDigis.sourceTag = cms.InputTag("rawDataRepacker") process.hcalDigis.InputLabel = cms.InputTag("rawDataRepacker") @@ -138,6 +140,7 @@ process.omtfStage2Digis.inputLabel = cms.InputTag("rawDataRepacker") process.emtfStage2Digis.InputLabel = cms.InputTag("rawDataRepacker") process.gmtStage2Digis.InputLabel = cms.InputTag("rawDataRepacker") + process.caloLayer1Digis.InputLabel = cms.InputTag("rawDataRepacker") process.caloStage1Digis.InputLabel = cms.InputTag("rawDataRepacker") process.caloStage2Digis.InputLabel = cms.InputTag("rawDataRepacker") process.simHcalTriggerPrimitiveDigis.InputTagFEDRaw = cms.InputTag("rawDataRepacker") From e82d9e81e83a4a410186eefa7977b66cfb0e5cce Mon Sep 17 00:00:00 2001 From: Mauro Date: Mon, 17 Sep 2018 22:05:32 +0200 Subject: [PATCH 043/149] Tested new configuration that works for HI runs --- .../beampixel_dqm_sourceclient-live_cfg.py | 130 +++++++++--------- 1 file changed, 64 insertions(+), 66 deletions(-) diff --git a/DQM/Integration/python/clients/beampixel_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/beampixel_dqm_sourceclient-live_cfg.py index 75f0545a5c5ba..b5c92413c1cf5 100644 --- a/DQM/Integration/python/clients/beampixel_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/beampixel_dqm_sourceclient-live_cfg.py @@ -1,27 +1,16 @@ -from __future__ import print_function import FWCore.ParameterSet.Config as cms +from Configuration.StandardSequences.Eras import eras -import sys -from Configuration.Eras.Era_Run2_2018_cff import Run2_2018 -process = cms.Process("BeamPixel", Run2_2018) - -unitTest = False -if 'unitTest=True' in sys.argv: - unitTest = True +process = cms.Process("BeamPixel", eras.Run2_2018) #---------------------------- # Common for PP and HI running #---------------------------- -if unitTest == True: - process.load("DQM.Integration.config.unittestinputsource_cfi") - from DQM.Integration.config.unittestinputsource_cfi import options -else: - process.load("DQM.Integration.config.inputsource_cfi") - from DQM.Integration.config.inputsource_cfi import options # Use this to run locally (for testing purposes) #process.load("DQM.Integration.config.fileinputsource_cfi") -#from DQM.Integration.config.fileinputsource_cfi import options +# Otherwise use this +process.load("DQM.Integration.config.inputsource_cfi") #---------------------------- @@ -37,9 +26,7 @@ process.load("DQM.Integration.config.environment_cfi") process.dqmEnv.subSystemFolder = "BeamPixel" process.dqmSaver.tag = "BeamPixel" -process.dqmSaver.runNumber = options.runNumber -process.dqmSaverPB.tag = 'BeamPixel' -process.dqmSaverPB.runNumber = options.runNumber + #---------------------------- # Conditions @@ -63,7 +50,7 @@ #---------------------------- # Define Sequences #---------------------------- -process.dqmModules = cms.Sequence(process.dqmEnv + process.dqmSaver + process.dqmSaverPB) +process.dqmModules = cms.Sequence(process.dqmEnv + process.dqmSaver) process.physTrigger = cms.Sequence(process.hltTriggerTypeFilter) @@ -76,8 +63,23 @@ #---------------------------- -# Tracking General Configuration +# Tracking Configuration #---------------------------- +process.castorDigis.InputLabel = cms.InputTag("rawDataRepacker") +process.csctfDigis.producer = cms.InputTag("rawDataRepacker") +process.dttfDigis.DTTF_FED_Source = cms.InputTag("rawDataRepacker") +process.ecalDigis.InputLabel = cms.InputTag("rawDataRepacker") +process.ecalPreshowerDigis.sourceTag = cms.InputTag("rawDataRepacker") +process.gctDigis.inputLabel = cms.InputTag("rawDataRepacker") +process.gtDigis.DaqGtInputTag = cms.InputTag("rawDataRepacker") +process.hcalDigis.InputLabel = cms.InputTag("rawDataRepacker") +process.muonCSCDigis.InputObjects = cms.InputTag("rawDataRepacker") +process.muonDTDigis.inputLabel = cms.InputTag("rawDataRepacker") +process.muonRPCDigis.InputLabel = cms.InputTag("rawDataRepacker") +process.scalersRawToDigi.scalersInputTag = cms.InputTag("rawDataRepacker") +process.siPixelDigis.InputLabel = cms.InputTag("rawDataRepacker") +process.siStripDigis.ProductLabel = cms.InputTag("rawDataRepacker") + process.load("RecoVertex.BeamSpotProducer.BeamSpot_cfi") process.load("RecoLocalTracker.Configuration.RecoLocalTracker_cff") process.load("TrackingTools.TransientTrack.TransientTrackBuilder_cfi") @@ -91,11 +93,11 @@ process.load("RecoLocalTracker.SiPixelRecHits.PixelCPEGeneric_cfi") process.load("RecoPixelVertexing.Configuration.RecoPixelVertexing_cff") process.pixelVertices.TkFilterParameters.minPt = process.pixelTracksTrackingRegions.RegionPSet.ptMin -process.pixelTracksTrackingRegions.RegionPSet.originRadius = cms.double(0.4) -process.pixelTracksTrackingRegions.RegionPSet.originHalfLength = cms.double(15.) -process.pixelTracksTrackingRegions.RegionPSet.originXPos = cms.double(0.08) -process.pixelTracksTrackingRegions.RegionPSet.originYPos = cms.double(-0.03) -process.pixelTracksTrackingRegions.RegionPSet.originZPos = cms.double(0.) +process.pixelTracksTrackingRegions.RegionPSet.originRadius = 0.4 +process.pixelTracksTrackingRegions.RegionPSet.originHalfLength = 15. +process.pixelTracksTrackingRegions.RegionPSet.originXPos = 0.08 +process.pixelTracksTrackingRegions.RegionPSet.originYPos = -0.03 +process.pixelTracksTrackingRegions.RegionPSet.originZPos = 0. #---------------------------- @@ -104,28 +106,9 @@ if (process.runType.getRunType() == process.runType.pp_run or process.runType.getRunType() == process.runType.pp_run_stage1 or process.runType.getRunType() == process.runType.cosmic_run or process.runType.getRunType() == process.runType.cosmic_run_stage1 or process.runType.getRunType() == process.runType.hpu_run): - print("[beampixel_dqm_sourceclient-live_cfg]::running pp") + print "[beampixel_dqm_sourceclient-live_cfg]::running pp" - #---------------------------- - # Tracking Configuration - #---------------------------- - process.castorDigis.InputLabel = cms.InputTag("rawDataCollector") - process.csctfDigis.producer = cms.InputTag("rawDataCollector") - process.dttfDigis.DTTF_FED_Source = cms.InputTag("rawDataCollector") - process.ecalDigis.InputLabel = cms.InputTag("rawDataCollector") - process.ecalPreshowerDigis.sourceTag = cms.InputTag("rawDataCollector") - process.gctDigis.inputLabel = cms.InputTag("rawDataCollector") - process.gtDigis.DaqGtInputTag = cms.InputTag("rawDataCollector") - process.hcalDigis.InputLabel = cms.InputTag("rawDataCollector") - process.muonCSCDigis.InputObjects = cms.InputTag("rawDataCollector") - process.muonDTDigis.inputLabel = cms.InputTag("rawDataCollector") - process.muonRPCDigis.InputLabel = cms.InputTag("rawDataCollector") - process.scalersRawToDigi.scalersInputTag = cms.InputTag("rawDataCollector") - process.siPixelDigis.InputLabel = cms.InputTag("rawDataCollector") - process.siStripDigis.ProductLabel = cms.InputTag("rawDataCollector") - - #---------------------------- # pixelVertexDQM Config #---------------------------- @@ -157,26 +140,7 @@ # Heavy Ion Specific Section #---------------------------- if (process.runType.getRunType() == process.runType.hi_run): - print("[beampixel_dqm_sourceclient-live_cfg]::running HI") - - - #---------------------------- - # Tracking Configuration - #---------------------------- - process.castorDigis.InputLabel = cms.InputTag("rawDataRepacker") - process.csctfDigis.producer = cms.InputTag("rawDataRepacker") - process.dttfDigis.DTTF_FED_Source = cms.InputTag("rawDataRepacker") - process.ecalDigis.InputLabel = cms.InputTag("rawDataRepacker") - process.ecalPreshowerDigis.sourceTag = cms.InputTag("rawDataRepacker") - process.gctDigis.inputLabel = cms.InputTag("rawDataRepacker") - process.gtDigis.DaqGtInputTag = cms.InputTag("rawDataRepacker") - process.hcalDigis.InputLabel = cms.InputTag("rawDataRepacker") - process.muonCSCDigis.InputObjects = cms.InputTag("rawDataRepacker") - process.muonDTDigis.inputLabel = cms.InputTag("rawDataRepacker") - process.muonRPCDigis.InputLabel = cms.InputTag("rawDataRepacker") - process.scalersRawToDigi.scalersInputTag = cms.InputTag("rawDataRepacker") - process.siPixelDigis.InputLabel = cms.InputTag("rawDataRepacker") - process.siStripDigis.ProductLabel = cms.InputTag("rawDataRepacker") + print "[beampixel_dqm_sourceclient-live_cfg]::running HI" #---------------------------- @@ -206,6 +170,40 @@ fileName = cms.string("/nfshome0/dqmdev/BeamMonitorDQM/BeamPixelResults.txt")) + #---------------------------- + # Pixel-Tracks&Vertices Config + #---------------------------- + from RecoVertex.PrimaryVertexProducer.TkClusParameters_cff import DA_vectParameters + offlinePrimaryVertices = cms.EDProducer( + "PrimaryVertexProducer", + verbose = cms.untracked.bool(False), + TrackLabel = cms.InputTag("generalTracks"), + beamSpotLabel = cms.InputTag("offlineBeamSpot"), + TkFilterParameters = cms.PSet( + algorithm = cms.string('filter'), + maxNormalizedChi2 = cms.double(10.0), + minPixelLayersWithHits = cms.int32(2), + minSiliconLayersWithHits = cms.int32(5), + maxD0Significance = cms.double(4.0), + minPt = cms.double(0.0), + maxEta = cms.double(2.4), + trackQuality = cms.string("any")), + TkClusParameters = DA_vectParameters, + vertexCollections = cms.VPSet( + [cms.PSet(label = cms.string(""), + algorithm = cms.string("AdaptiveVertexFitter"), + chi2cutoff = cms.double(2.5), + minNdof = cms.double(0.0), + useBeamConstraint = cms.bool(False), + maxDistanceToBeam = cms.double(1.0)), + cms.PSet(label = cms.string("WithBS"), + algorithm = cms.string('AdaptiveVertexFitter'), + chi2cutoff = cms.double(2.5), + minNdof = cms.double(2.0), + useBeamConstraint = cms.bool(True), + maxDistanceToBeam = cms.double(1.0))])) + + #---------------------------- # File to save beamspot info #---------------------------- @@ -213,7 +211,7 @@ process.pixelVertexDQM.fileName = cms.string("/nfshome0/dqmpro/BeamMonitorDQM/BeamPixelResults.txt") else: process.pixelVertexDQM.fileName = cms.string("/nfshome0/dqmdev/BeamMonitorDQM/BeamPixelResults.txt") -print("[beampixel_dqm_sourceclient-live_cfg]::saving DIP file into " + str(process.pixelVertexDQM.fileName)) +print "[beampixel_dqm_sourceclient-live_cfg]::saving DIP file into " + str(process.pixelVertexDQM.fileName) #---------------------------- From 8c1bc817d45e40e68eae9ac17b159fde5b524599 Mon Sep 17 00:00:00 2001 From: Vincenzo Innocente Date: Wed, 26 Sep 2018 17:53:25 +0200 Subject: [PATCH 044/149] Introduce Cluster Charge Cut, optimize Histogram (bucket sorting) (cms-patatrack#171) --- .../plugins/SiPixelRawToClusterGPUKernel.cu | 23 ++- .../plugins/gpuClusterChargeCut.h | 97 +++++++++++ .../plugins/gpuClustering.h | 151 +++++++++++------- .../SiPixelClusterizer/test/BuildFile.xml | 9 ++ RecoLocalTracker/SiPixelRecHits/BuildFile.xml | 3 +- .../SiPixelRecHits/plugins/gpuPixelRecHits.h | 41 +++-- 6 files changed, 247 insertions(+), 77 deletions(-) create mode 100644 RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu index f3242a11d7ae6..7bd6eac473cc7 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu @@ -34,6 +34,7 @@ #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" #include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h" #include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h" +#include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h" #include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelFedCablingMapGPU.h" // local includes @@ -687,11 +688,11 @@ namespace pixelgpudetails { cudaCheck(cudaMemcpyAsync(adc_h, adc_d, wordCounter*sizeof(uint16_t), cudaMemcpyDefault, stream.id())); } - /* - std::cout +#ifdef GPU_DEBUG + std::cout << "CUDA countModules kernel launch with " << blocks << " blocks of " << threadsPerBlock << " threads\n"; - */ +#endif cudaCheck(cudaMemsetAsync(moduleStart_d, 0x00, sizeof(uint32_t), stream.id())); @@ -703,10 +704,10 @@ namespace pixelgpudetails { threadsPerBlock = 256; blocks = MaxNumModules; - /* +#ifdef GPU_DEBUG std::cout << "CUDA findClus kernel launch with " << blocks << " blocks of " << threadsPerBlock << " threads\n"; - */ +#endif cudaCheck(cudaMemsetAsync(clusInModule_d, 0, (MaxNumModules)*sizeof(uint32_t), stream.id())); findClus<<>>( moduleInd_d, @@ -717,6 +718,18 @@ namespace pixelgpudetails { wordCounter); cudaCheck(cudaGetLastError()); + // apply charge cut + clusterChargeCut<<>>( + moduleInd_d, + adc_d, + moduleStart_d, + clusInModule_d, moduleId_d, + clus_d, + wordCounter); + cudaCheck(cudaGetLastError()); + + + // count the module start indices already here (instead of // rechits) so that the number of clusters/hits can be made // available in the rechit producer without additional points of diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h new file mode 100644 index 0000000000000..0284a378ecd39 --- /dev/null +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h @@ -0,0 +1,97 @@ +#ifndef RecoLocalTracker_SiPixelClusterizer_plugins_gpuClusterChargeCut_h +#define RecoLocalTracker_SiPixelClusterizer_plugins_gpuClusterChargeCut_h + +#include +#include +#include + +#include "gpuClusteringConstants.h" + +#include "HeterogeneousCore/CUDAUtilities/interface/prefixScan.h" + +namespace gpuClustering { + + __global__ void clusterChargeCut( + uint16_t * __restrict__ id, // module id of each pixel (modified if bad cluster) + uint16_t const * __restrict__ adc, // charge of each pixel + uint32_t const * __restrict__ moduleStart, // index of the first pixel of each module + uint32_t * __restrict__ nClustersInModule, // modified: number of clusters found in each module + uint32_t const * __restrict__ moduleId, // module id of each module + int32_t * __restrict__ clusterId, // modified: cluster id of each pixel + int numElements) + { + + if (blockIdx.x >= moduleStart[0]) + return; + + auto firstPixel = moduleStart[1 + blockIdx.x]; + auto thisModuleId = id[firstPixel]; + assert(thisModuleId < MaxNumModules); + assert(thisModuleId==moduleId[blockIdx.x]); + + auto nclus = nClustersInModule[thisModuleId]; + if (nclus==0) return; + + assert(nclus<=MaxNumClustersPerModules); + +#ifdef GPU_DEBUG + if (thisModuleId % 100 == 1) + if (threadIdx.x == 0) + printf("start clusterizer for module %d in block %d\n", thisModuleId, blockIdx.x); +#endif + + auto first = firstPixel + threadIdx.x; + + __shared__ int32_t charge[MaxNumClustersPerModules]; + for (int i=threadIdx.x; ichargeCut ? 1 : 0; + } + + __syncthreads(); + + // renumber + __shared__ uint16_t ws[32]; + blockPrefixScan(newclusId, nclus, ws); + + assert(nclus>=newclusId[nclus-1]); + + if(nclus==newclusId[nclus-1]) return; + + nClustersInModule[thisModuleId] = newclusId[nclus-1]; + __syncthreads(); + + // mark bad cluster again + for (int i=threadIdx.x; i #include #include #include "gpuClusteringConstants.h" +#include "Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h" #include "HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h" + namespace gpuClustering { __global__ void countModules(uint16_t const * __restrict__ id, @@ -32,7 +37,9 @@ namespace gpuClustering { } } - __global__ void findClus(uint16_t const * __restrict__ id, // module id of each pixel + __global__ +// __launch_bounds__(256,4) + void findClus(uint16_t const * __restrict__ id, // module id of each pixel uint16_t const * __restrict__ x, // local coordinates of each pixel uint16_t const * __restrict__ y, // uint32_t const * __restrict__ moduleStart, // index of the first pixel of each module @@ -63,8 +70,6 @@ namespace gpuClustering { __syncthreads(); // skip threads not associated to an existing pixel - bool active = (first < numElements); - if (active) { for (int i = first; i < numElements; i += blockDim.x) { if (id[i] == InvId) // skip invalid pixels continue; @@ -73,95 +78,132 @@ namespace gpuClustering { break; } } - } - - //init hist (ymax < 512) - __shared__ HistoContainer hist; - hist.nspills = 0; - for (auto k = threadIdx.x; k; + constexpr auto wss = Hist::totbins(); + __shared__ Hist hist; + __shared__ typename Hist::Counter ws[wss]; + for (auto j=threadIdx.x; j0 ? y[i]-1 : 0); - auto be = hist.bin(y[i]+1)+1; - auto loop = [&](int j) { - j+=firstPixel; - if (i>=j or j>jm or - std::abs(int(x[j]) - int(x[i])) > 1 or - std::abs(int(y[j]) - int(y[i])) > 1) return; - auto old = atomicMin(&clusterId[j], clusterId[i]); + auto loop = [&](uint16_t const * kk) { + auto m = (*kk)+firstPixel; +#ifdef GPU_DEBUG + assert(m!=i); +#endif + if (std::abs(int(x[m]) - int(x[i])) > 1) return; + // if (std::abs(int(y[m]) - int(y[i])) > 1) return; // binssize is 1 + auto old = atomicMin(&clusterId[m], clusterId[i]); if (old != clusterId[i]) { // end the loop only if no changes were applied - done = false; + more = true; } atomicMin(&clusterId[i], old); +#ifdef CLUS_LIMIT_LOOP // update the loop boundary for the next iteration - jmax[k] = std::max(j + 1,jmax[k]); + jmax[k] = std::max(kk + 1,jmax[k]); +#endif }; - for (auto b=bs; b + + + + + + + + + diff --git a/RecoLocalTracker/SiPixelRecHits/BuildFile.xml b/RecoLocalTracker/SiPixelRecHits/BuildFile.xml index c52545a601341..7918c7a4f4d9a 100644 --- a/RecoLocalTracker/SiPixelRecHits/BuildFile.xml +++ b/RecoLocalTracker/SiPixelRecHits/BuildFile.xml @@ -14,7 +14,8 @@ - + + diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h b/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h index 2ee4a10c6fc99..3f92e4833bc22 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h +++ b/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h @@ -14,15 +14,6 @@ namespace gpuPixelRecHits { - // to be moved in common namespace... - constexpr uint16_t InvId=9999; // must be > MaxNumModules - - - constexpr uint32_t MaxClusInModule = pixelCPEforGPU::MaxClusInModule; - - using ClusParams = pixelCPEforGPU::ClusParams; - - __global__ void getHits(pixelCPEforGPU::ParamsOnGPU const * __restrict__ cpeParams, float const * __restrict__ bs, uint16_t const * __restrict__ id, @@ -42,21 +33,44 @@ namespace gpuPixelRecHits { float * xe, float * ye, uint16_t * mr, uint16_t * mc) { + + // to be moved in common namespace... + constexpr uint16_t InvId=9999; // must be > MaxNumModules + constexpr uint32_t MaxClusInModule = pixelCPEforGPU::MaxClusInModule; + + using ClusParams = pixelCPEforGPU::ClusParams; + + // as usual one block per module __shared__ ClusParams clusParams; auto first = digiModuleStart[1 + blockIdx.x]; - auto me = id[first]; - assert(moduleId[blockIdx.x] == me); + auto me = moduleId[blockIdx.x]; auto nclus = clusInModule[me]; + if (0==nclus) return; + +#ifdef GPU_DEBUG + if (threadIdx.x==0) { + auto k=first; + while (id[k]==InvId) ++k; + assert(id[k]==me); + } +#endif + #ifdef GPU_DEBUG if (me%100==1) if (threadIdx.x==0) printf("hitbuilder: %d clusters in module %d. will write at %d\n", nclus, me, hitsModuleStart[me]); #endif assert(blockDim.x >= MaxClusInModule); - assert(nclus <= MaxClusInModule); + + if (threadIdx.x==0 && nclus > MaxClusInModule) { + printf("WARNING: too many clusters %d in Module %d. Only first %d processed\n", nclus,me,MaxClusInModule); + // zero charge: do not bother to do it in parallel + for (auto d=MaxClusInModule; d= nclus) continue; atomicMin(&clusParams.minRow[clus[i]], x[i]); atomicMax(&clusParams.maxRow[clus[i]], x[i]); atomicMin(&clusParams.minCol[clus[i]], y[i]); @@ -93,6 +107,7 @@ namespace gpuPixelRecHits { for (int i = first; i < numElements; i += blockDim.x) { if (id[i] == InvId) continue; // not valid if (id[i] != me) break; // end of module + if (clus[i] >= nclus) continue; atomicAdd(&clusParams.charge[clus[i]], adc[i]); if (clusParams.minRow[clus[i]]==x[i]) atomicAdd(&clusParams.Q_f_X[clus[i]], adc[i]); if (clusParams.maxRow[clus[i]]==x[i]) atomicAdd(&clusParams.Q_l_X[clus[i]], adc[i]); From e8430161cd004bfe3779ff2811c152e4335985a7 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Sun, 30 Sep 2018 21:48:13 +0200 Subject: [PATCH 045/149] Work around atomicAdd synchronisation problem on Volta (cms-patatrack#180) --- RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h index 163378c6adbf0..50a49634dd276 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h @@ -198,7 +198,7 @@ namespace gpuClustering { printf("# loops %d\n",nloops); #endif - __shared__ int foundClusters; + __shared__ unsigned int foundClusters; foundClusters = 0; __syncthreads(); @@ -208,7 +208,7 @@ namespace gpuClustering { if (id[i] == InvId) // skip invalid pixels continue; if (clusterId[i] == i) { - auto old = atomicAdd(&foundClusters, 1); + auto old = atomicInc(&foundClusters, 0xffffffff); clusterId[i] = -(old + 1); } } From db82536ef99d92f97ae740be3de9913cec072fb2 Mon Sep 17 00:00:00 2001 From: Mauro Date: Thu, 4 Oct 2018 15:32:19 +0200 Subject: [PATCH 046/149] Revert back from oversight about rawDataRepacker and rawDataCollector --- .../beampixel_dqm_sourceclient-live_cfg.py | 113 ++++++++---------- 1 file changed, 51 insertions(+), 62 deletions(-) diff --git a/DQM/Integration/python/clients/beampixel_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/beampixel_dqm_sourceclient-live_cfg.py index b5c92413c1cf5..7ee2b9147981d 100644 --- a/DQM/Integration/python/clients/beampixel_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/beampixel_dqm_sourceclient-live_cfg.py @@ -62,29 +62,6 @@ from DQMServices.Core.DQMEDAnalyzer import DQMEDAnalyzer -#---------------------------- -# Tracking Configuration -#---------------------------- -process.castorDigis.InputLabel = cms.InputTag("rawDataRepacker") -process.csctfDigis.producer = cms.InputTag("rawDataRepacker") -process.dttfDigis.DTTF_FED_Source = cms.InputTag("rawDataRepacker") -process.ecalDigis.InputLabel = cms.InputTag("rawDataRepacker") -process.ecalPreshowerDigis.sourceTag = cms.InputTag("rawDataRepacker") -process.gctDigis.inputLabel = cms.InputTag("rawDataRepacker") -process.gtDigis.DaqGtInputTag = cms.InputTag("rawDataRepacker") -process.hcalDigis.InputLabel = cms.InputTag("rawDataRepacker") -process.muonCSCDigis.InputObjects = cms.InputTag("rawDataRepacker") -process.muonDTDigis.inputLabel = cms.InputTag("rawDataRepacker") -process.muonRPCDigis.InputLabel = cms.InputTag("rawDataRepacker") -process.scalersRawToDigi.scalersInputTag = cms.InputTag("rawDataRepacker") -process.siPixelDigis.InputLabel = cms.InputTag("rawDataRepacker") -process.siStripDigis.ProductLabel = cms.InputTag("rawDataRepacker") - -process.load("RecoVertex.BeamSpotProducer.BeamSpot_cfi") -process.load("RecoLocalTracker.Configuration.RecoLocalTracker_cff") -process.load("TrackingTools.TransientTrack.TransientTrackBuilder_cfi") - - #---------------------------- # Pixel-Tracks&Vertices Config #---------------------------- @@ -93,11 +70,11 @@ process.load("RecoLocalTracker.SiPixelRecHits.PixelCPEGeneric_cfi") process.load("RecoPixelVertexing.Configuration.RecoPixelVertexing_cff") process.pixelVertices.TkFilterParameters.minPt = process.pixelTracksTrackingRegions.RegionPSet.ptMin -process.pixelTracksTrackingRegions.RegionPSet.originRadius = 0.4 -process.pixelTracksTrackingRegions.RegionPSet.originHalfLength = 15. -process.pixelTracksTrackingRegions.RegionPSet.originXPos = 0.08 -process.pixelTracksTrackingRegions.RegionPSet.originYPos = -0.03 -process.pixelTracksTrackingRegions.RegionPSet.originZPos = 0. +process.pixelTracksTrackingRegions.RegionPSet.originRadius = cms.double(0.4) +process.pixelTracksTrackingRegions.RegionPSet.originHalfLength = cms.double(15.) +process.pixelTracksTrackingRegions.RegionPSet.originXPos = cms.double(0.08) +process.pixelTracksTrackingRegions.RegionPSet.originYPos = cms.double(-0.03) +process.pixelTracksTrackingRegions.RegionPSet.originZPos = cms.double(0.) #---------------------------- @@ -109,6 +86,29 @@ print "[beampixel_dqm_sourceclient-live_cfg]::running pp" + #---------------------------- + # Tracking Configuration + #---------------------------- + process.castorDigis.InputLabel = cms.InputTag("rawDataCollector") + process.csctfDigis.producer = cms.InputTag("rawDataCollector") + process.dttfDigis.DTTF_FED_Source = cms.InputTag("rawDataCollector") + process.ecalDigis.InputLabel = cms.InputTag("rawDataCollector") + process.ecalPreshowerDigis.sourceTag = cms.InputTag("rawDataCollector") + process.gctDigis.inputLabel = cms.InputTag("rawDataCollector") + process.gtDigis.DaqGtInputTag = cms.InputTag("rawDataCollector") + process.hcalDigis.InputLabel = cms.InputTag("rawDataCollector") + process.muonCSCDigis.InputObjects = cms.InputTag("rawDataCollector") + process.muonDTDigis.inputLabel = cms.InputTag("rawDataCollector") + process.muonRPCDigis.InputLabel = cms.InputTag("rawDataCollector") + process.scalersRawToDigi.scalersInputTag = cms.InputTag("rawDataCollector") + process.siPixelDigis.InputLabel = cms.InputTag("rawDataCollector") + process.siStripDigis.ProductLabel = cms.InputTag("rawDataCollector") + + process.load("RecoVertex.BeamSpotProducer.BeamSpot_cfi") + process.load("RecoLocalTracker.Configuration.RecoLocalTracker_cff") + process.load("TrackingTools.TransientTrack.TransientTrackBuilder_cfi") + + #---------------------------- # pixelVertexDQM Config #---------------------------- @@ -143,6 +143,29 @@ print "[beampixel_dqm_sourceclient-live_cfg]::running HI" + #---------------------------- + # Tracking Configuration + #---------------------------- + process.castorDigis.InputLabel = cms.InputTag("rawDataRepacker") + process.csctfDigis.producer = cms.InputTag("rawDataRepacker") + process.dttfDigis.DTTF_FED_Source = cms.InputTag("rawDataRepacker") + process.ecalDigis.InputLabel = cms.InputTag("rawDataRepacker") + process.ecalPreshowerDigis.sourceTag = cms.InputTag("rawDataRepacker") + process.gctDigis.inputLabel = cms.InputTag("rawDataRepacker") + process.gtDigis.DaqGtInputTag = cms.InputTag("rawDataRepacker") + process.hcalDigis.InputLabel = cms.InputTag("rawDataRepacker") + process.muonCSCDigis.InputObjects = cms.InputTag("rawDataRepacker") + process.muonDTDigis.inputLabel = cms.InputTag("rawDataRepacker") + process.muonRPCDigis.InputLabel = cms.InputTag("rawDataRepacker") + process.scalersRawToDigi.scalersInputTag = cms.InputTag("rawDataRepacker") + process.siPixelDigis.InputLabel = cms.InputTag("rawDataRepacker") + process.siStripDigis.ProductLabel = cms.InputTag("rawDataRepacker") + + process.load("RecoVertex.BeamSpotProducer.BeamSpot_cfi") + process.load("RecoLocalTracker.Configuration.RecoLocalTracker_cff") + process.load("TrackingTools.TransientTrack.TransientTrackBuilder_cfi") + + #---------------------------- # pixelVertexDQM Config #---------------------------- @@ -170,40 +193,6 @@ fileName = cms.string("/nfshome0/dqmdev/BeamMonitorDQM/BeamPixelResults.txt")) - #---------------------------- - # Pixel-Tracks&Vertices Config - #---------------------------- - from RecoVertex.PrimaryVertexProducer.TkClusParameters_cff import DA_vectParameters - offlinePrimaryVertices = cms.EDProducer( - "PrimaryVertexProducer", - verbose = cms.untracked.bool(False), - TrackLabel = cms.InputTag("generalTracks"), - beamSpotLabel = cms.InputTag("offlineBeamSpot"), - TkFilterParameters = cms.PSet( - algorithm = cms.string('filter'), - maxNormalizedChi2 = cms.double(10.0), - minPixelLayersWithHits = cms.int32(2), - minSiliconLayersWithHits = cms.int32(5), - maxD0Significance = cms.double(4.0), - minPt = cms.double(0.0), - maxEta = cms.double(2.4), - trackQuality = cms.string("any")), - TkClusParameters = DA_vectParameters, - vertexCollections = cms.VPSet( - [cms.PSet(label = cms.string(""), - algorithm = cms.string("AdaptiveVertexFitter"), - chi2cutoff = cms.double(2.5), - minNdof = cms.double(0.0), - useBeamConstraint = cms.bool(False), - maxDistanceToBeam = cms.double(1.0)), - cms.PSet(label = cms.string("WithBS"), - algorithm = cms.string('AdaptiveVertexFitter'), - chi2cutoff = cms.double(2.5), - minNdof = cms.double(2.0), - useBeamConstraint = cms.bool(True), - maxDistanceToBeam = cms.double(1.0))])) - - #---------------------------- # File to save beamspot info #---------------------------- From 7d03457ca956a18d546501d66522f1f05f3d6ddb Mon Sep 17 00:00:00 2001 From: Mauro Date: Thu, 4 Oct 2018 15:57:46 +0200 Subject: [PATCH 047/149] Update beampixel_dqm_sourceclient-live_cfg.py --- .../beampixel_dqm_sourceclient-live_cfg.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/DQM/Integration/python/clients/beampixel_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/beampixel_dqm_sourceclient-live_cfg.py index 7ee2b9147981d..e7722a9046f36 100644 --- a/DQM/Integration/python/clients/beampixel_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/beampixel_dqm_sourceclient-live_cfg.py @@ -62,6 +62,14 @@ from DQMServices.Core.DQMEDAnalyzer import DQMEDAnalyzer +#---------------------------- +# Tracking General Configuration +#---------------------------- +process.load("RecoVertex.BeamSpotProducer.BeamSpot_cfi") +process.load("RecoLocalTracker.Configuration.RecoLocalTracker_cff") +process.load("TrackingTools.TransientTrack.TransientTrackBuilder_cfi") + + #---------------------------- # Pixel-Tracks&Vertices Config #---------------------------- @@ -104,10 +112,6 @@ process.siPixelDigis.InputLabel = cms.InputTag("rawDataCollector") process.siStripDigis.ProductLabel = cms.InputTag("rawDataCollector") - process.load("RecoVertex.BeamSpotProducer.BeamSpot_cfi") - process.load("RecoLocalTracker.Configuration.RecoLocalTracker_cff") - process.load("TrackingTools.TransientTrack.TransientTrackBuilder_cfi") - #---------------------------- # pixelVertexDQM Config @@ -161,10 +165,6 @@ process.siPixelDigis.InputLabel = cms.InputTag("rawDataRepacker") process.siStripDigis.ProductLabel = cms.InputTag("rawDataRepacker") - process.load("RecoVertex.BeamSpotProducer.BeamSpot_cfi") - process.load("RecoLocalTracker.Configuration.RecoLocalTracker_cff") - process.load("TrackingTools.TransientTrack.TransientTrackBuilder_cfi") - #---------------------------- # pixelVertexDQM Config From 6ed56a5504b910cbba7777291a80a821feb224ae Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Sat, 6 Oct 2018 14:29:06 +0200 Subject: [PATCH 048/149] Suppress asserts in the GPU code, unless GPU_DEBUG is defined (cms-patatrack#186) --- .../SiPixelObjects/interface/SiPixelGainForHLTonGPU.h | 3 ++- RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h | 2 +- .../SiPixelClusterizer/plugins/gpuClusterChargeCut.h | 6 +++--- RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h | 6 ++---- RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h | 2 +- 5 files changed, 9 insertions(+), 10 deletions(-) diff --git a/CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h b/CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h index 48302c7517583..931ee7e65f295 100644 --- a/CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h +++ b/CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h @@ -1,11 +1,12 @@ #ifndef CondFormats_SiPixelObjects_SiPixelGainForHLTonGPU_h #define CondFormats_SiPixelObjects_SiPixelGainForHLTonGPU_h -#include #include #include #include +#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" + struct SiPixelGainForHLTonGPU_DecodingStructure{ uint8_t gain; uint8_t ped; diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h index 3b19268382c6a..5a681e791f94f 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h @@ -1,11 +1,11 @@ #ifndef RecoLocalTracker_SiPixelClusterizer_plugins_gpuCalibPixel_h #define RecoLocalTracker_SiPixelClusterizer_plugins_gpuCalibPixel_h -#include #include #include #include "CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" namespace gpuCalibPixel { diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h index 0284a378ecd39..855216960d659 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h @@ -1,14 +1,14 @@ #ifndef RecoLocalTracker_SiPixelClusterizer_plugins_gpuClusterChargeCut_h #define RecoLocalTracker_SiPixelClusterizer_plugins_gpuClusterChargeCut_h -#include #include #include -#include "gpuClusteringConstants.h" - +#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" #include "HeterogeneousCore/CUDAUtilities/interface/prefixScan.h" +#include "gpuClusteringConstants.h" + namespace gpuClustering { __global__ void clusterChargeCut( diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h index 50a49634dd276..26bb4f9244c6a 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h @@ -1,18 +1,16 @@ #ifndef RecoLocalTracker_SiPixelClusterizer_plugins_gpuClustering_h #define RecoLocalTracker_SiPixelClusterizer_plugins_gpuClustering_h - // #define CLUS_LIMIT_LOOP -#include #include #include -#include "gpuClusteringConstants.h" #include "Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h" - #include "HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" +#include "gpuClusteringConstants.h" namespace gpuClustering { diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h b/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h index 3f92e4833bc22..4a8c9d945e5f1 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h +++ b/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h @@ -1,12 +1,12 @@ #ifndef RecoLocalTracker_SiPixelRecHits_plugins_gpuPixelRecHits_h #define RecoLocalTracker_SiPixelRecHits_plugins_gpuPixelRecHits_h -#include #include #include #include #include "DataFormats/Math/interface/approx_atan2.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" #include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h" namespace gpuPixelRecHits { From a19245d3f820d5610d56c61d781909765e2d0822 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Wed, 14 Nov 2018 23:56:26 +0100 Subject: [PATCH 049/149] Synchronise with CMSSW_10_4_0_pre2 --- .../Configuration/python/RecoLocalTracker_cff.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/RecoLocalTracker/Configuration/python/RecoLocalTracker_cff.py b/RecoLocalTracker/Configuration/python/RecoLocalTracker_cff.py index ad975fa183566..b001700d8f369 100644 --- a/RecoLocalTracker/Configuration/python/RecoLocalTracker_cff.py +++ b/RecoLocalTracker/Configuration/python/RecoLocalTracker_cff.py @@ -24,7 +24,6 @@ from RecoLocalTracker.SiPhase2Clusterizer.phase2TrackerClusterizer_cfi import * from RecoLocalTracker.Phase2TrackerRecHits.Phase2StripCPEGeometricESProducer_cfi import * -from Configuration.Eras.Modifier_phase2_tracker_cff import phase2_tracker phase2_tracker.toReplaceWith(pixeltrackerlocalreco, cms.Sequence( siPhase2Clusters + @@ -32,10 +31,6 @@ siPixelRecHitsPreSplitting ) ) -phase2_tracker.toModify(clusterSummaryProducer, - doStrips = False, - stripClusters = '' -) phase2_tracker.toReplaceWith(trackerlocalreco, cms.Sequence( pixeltrackerlocalreco*clusterSummaryProducer From 028882679c5da3783c92da1a0b5dedd41d9f30b0 Mon Sep 17 00:00:00 2001 From: Matti Kortelainen Date: Tue, 27 Nov 2018 09:26:57 -0600 Subject: [PATCH 050/149] Add infrastructure around cub CachingDeviceAllocator, and use it in SiPixelRawToCluster (cms-patatrack#172) Add infrastructure around cub CachingDeviceAllocator for device memory allocations, and CachingHostAllocator for pinned (or managed) host memory. CUDAService uses the CachingHostAllocator to allocate requested GPU->CPU/CPU->GPU buffers and data products. Configuration options can be used to request: - to print all memory (re)allocations and frees; - to preallocate device and host buffers. SiPixelRawToCluster uses the CachingDeviceAllocator for temporary buffers and data products. Fix a memory problem with SiPixelFedCablingMapGPUWrapper::ModulesToUnpack. --- CUDADataFormats/SiPixelCluster/BuildFile.xml | 8 + .../interface/SiPixelClustersCUDA.h | 73 +++++ .../SiPixelCluster/src/SiPixelClustersCUDA.cc | 24 ++ CUDADataFormats/SiPixelDigi/BuildFile.xml | 7 + .../SiPixelDigi/interface/SiPixelDigisCUDA.h | 65 ++++ .../SiPixelDigi/src/SiPixelDigisCUDA.cc | 24 ++ .../SiPixelClusterizer/plugins/BuildFile.xml | 2 + .../plugins/SiPixelRawToClusterGPUKernel.cu | 289 +++++++----------- .../plugins/SiPixelRawToClusterGPUKernel.h | 108 +++---- 9 files changed, 375 insertions(+), 225 deletions(-) create mode 100644 CUDADataFormats/SiPixelCluster/BuildFile.xml create mode 100644 CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h create mode 100644 CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc create mode 100644 CUDADataFormats/SiPixelDigi/BuildFile.xml create mode 100644 CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h create mode 100644 CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc diff --git a/CUDADataFormats/SiPixelCluster/BuildFile.xml b/CUDADataFormats/SiPixelCluster/BuildFile.xml new file mode 100644 index 0000000000000..21c527e7b2f0d --- /dev/null +++ b/CUDADataFormats/SiPixelCluster/BuildFile.xml @@ -0,0 +1,8 @@ + + + + + + + + diff --git a/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h b/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h new file mode 100644 index 0000000000000..22d9ff9d103ba --- /dev/null +++ b/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h @@ -0,0 +1,73 @@ +#ifndef CUDADataFormats_SiPixelCluster_interface_SiPixelClustersCUDA_h +#define CUDADataFormats_SiPixelCluster_interface_SiPixelClustersCUDA_h + +#include "CUDADataFormats/Common/interface/device_unique_ptr.h" + +#include + +class SiPixelClustersCUDA { +public: + SiPixelClustersCUDA() = default; + explicit SiPixelClustersCUDA(size_t feds, size_t nelements, cuda::stream_t<>& stream); + ~SiPixelClustersCUDA() = default; + + SiPixelClustersCUDA(const SiPixelClustersCUDA&) = delete; + SiPixelClustersCUDA& operator=(const SiPixelClustersCUDA&) = delete; + SiPixelClustersCUDA(SiPixelClustersCUDA&&) = default; + SiPixelClustersCUDA& operator=(SiPixelClustersCUDA&&) = default; + + uint32_t *moduleStart() { return moduleStart_d.get(); } + int32_t *clus() { return clus_d.get(); } + uint32_t *clusInModule() { return clusInModule_d.get(); } + uint32_t *moduleId() { return moduleId_d.get(); } + uint32_t *clusModuleStart() { return clusModuleStart_d.get(); } + + uint32_t const *moduleStart() const { return moduleStart_d.get(); } + int32_t const *clus() const { return clus_d.get(); } + uint32_t const *clusInModule() const { return clusInModule_d.get(); } + uint32_t const *moduleId() const { return moduleId_d.get(); } + uint32_t const *clusModuleStart() const { return clusModuleStart_d.get(); } + + uint32_t const *c_moduleStart() const { return moduleStart_d.get(); } + int32_t const *c_clus() const { return clus_d.get(); } + uint32_t const *c_clusInModule() const { return clusInModule_d.get(); } + uint32_t const *c_moduleId() const { return moduleId_d.get(); } + uint32_t const *c_clusModuleStart() const { return clusModuleStart_d.get(); } + + class DeviceConstView { + public: + DeviceConstView() = default; + +#ifdef __CUDACC__ + __device__ __forceinline__ uint32_t moduleStart(int i) const { return __ldg(moduleStart_+i); } + __device__ __forceinline__ int32_t clus(int i) const { return __ldg(clus_+i); } + __device__ __forceinline__ uint32_t clusInModule(int i) const { return __ldg(clusInModule_+i); } + __device__ __forceinline__ uint32_t moduleId(int i) const { return __ldg(moduleId_+i); } + __device__ __forceinline__ uint32_t clusModuleStart(int i) const { return __ldg(clusModuleStart_+i); } +#endif + + friend SiPixelClustersCUDA; + + private: + uint32_t const *moduleStart_ = nullptr; + int32_t const *clus_ = nullptr; + uint32_t const *clusInModule_ = nullptr; + uint32_t const *moduleId_ = nullptr; + uint32_t const *clusModuleStart_ = nullptr; + }; + + DeviceConstView *view() const { return view_d.get(); } + +private: + edm::cuda::device::unique_ptr moduleStart_d; // index of the first pixel of each module + edm::cuda::device::unique_ptr clus_d; // cluster id of each pixel + edm::cuda::device::unique_ptr clusInModule_d; // number of clusters found in each module + edm::cuda::device::unique_ptr moduleId_d; // module id of each module + + // originally from rechits + edm::cuda::device::unique_ptr clusModuleStart_d; + + edm::cuda::device::unique_ptr view_d; // "me" pointer +}; + +#endif diff --git a/CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc b/CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc new file mode 100644 index 0000000000000..7363c2fd364af --- /dev/null +++ b/CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc @@ -0,0 +1,24 @@ +#include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h" + +#include "FWCore/ServiceRegistry/interface/Service.h" +#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" + +SiPixelClustersCUDA::SiPixelClustersCUDA(size_t feds, size_t nelements, cuda::stream_t<>& stream) { + edm::Service cs; + + moduleStart_d = cs->make_device_unique(nelements+1, stream); + clus_d = cs->make_device_unique< int32_t[]>(feds, stream); + clusInModule_d = cs->make_device_unique(nelements, stream); + moduleId_d = cs->make_device_unique(nelements, stream); + clusModuleStart_d = cs->make_device_unique(nelements+1, stream); + + auto view = cs->make_host_unique(stream); + view->moduleStart_ = moduleStart_d.get(); + view->clus_ = clus_d.get(); + view->clusInModule_ = clusInModule_d.get(); + view->moduleId_ = moduleId_d.get(); + view->clusModuleStart_ = clusModuleStart_d.get(); + + view_d = cs->make_device_unique(stream); + cudaMemcpyAsync(view_d.get(), view.get(), sizeof(DeviceConstView), cudaMemcpyDefault, stream.id()); +} diff --git a/CUDADataFormats/SiPixelDigi/BuildFile.xml b/CUDADataFormats/SiPixelDigi/BuildFile.xml new file mode 100644 index 0000000000000..259aa9f08d054 --- /dev/null +++ b/CUDADataFormats/SiPixelDigi/BuildFile.xml @@ -0,0 +1,7 @@ + + + + + + + diff --git a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h new file mode 100644 index 0000000000000..25e8b54a743c2 --- /dev/null +++ b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h @@ -0,0 +1,65 @@ +#ifndef CUDADataFormats_SiPixelDigi_interface_SiPixelDigisCUDA_h +#define CUDADataFormats_SiPixelDigi_interface_SiPixelDigisCUDA_h + +#include "CUDADataFormats/Common/interface/device_unique_ptr.h" +#include "FWCore/Utilities/interface/propagate_const.h" + +#include + +class SiPixelDigisCUDA { +public: + SiPixelDigisCUDA() = default; + explicit SiPixelDigisCUDA(size_t nelements, cuda::stream_t<>& stream); + ~SiPixelDigisCUDA() = default; + + SiPixelDigisCUDA(const SiPixelDigisCUDA&) = delete; + SiPixelDigisCUDA& operator=(const SiPixelDigisCUDA&) = delete; + SiPixelDigisCUDA(SiPixelDigisCUDA&&) = default; + SiPixelDigisCUDA& operator=(SiPixelDigisCUDA&&) = default; + + uint16_t * xx() { return xx_d.get(); } + uint16_t * yy() { return yy_d.get(); } + uint16_t * adc() { return adc_d.get(); } + uint16_t * moduleInd() { return moduleInd_d.get(); } + + uint16_t const *xx() const { return xx_d.get(); } + uint16_t const *yy() const { return yy_d.get(); } + uint16_t const *adc() const { return adc_d.get(); } + uint16_t const *moduleInd() const { return moduleInd_d.get(); } + + uint16_t const *c_xx() const { return xx_d.get(); } + uint16_t const *c_yy() const { return yy_d.get(); } + uint16_t const *c_adc() const { return adc_d.get(); } + uint16_t const *c_moduleInd() const { return moduleInd_d.get(); } + + class DeviceConstView { + public: + DeviceConstView() = default; + +#ifdef __CUDACC__ + __device__ __forceinline__ uint16_t xx(int i) const { return __ldg(xx_+i); } + __device__ __forceinline__ uint16_t yy(int i) const { return __ldg(yy_+i); } + __device__ __forceinline__ uint16_t adc(int i) const { return __ldg(adc_+i); } + __device__ __forceinline__ uint16_t moduleInd(int i) const { return __ldg(moduleInd_+i); } +#endif + + friend class SiPixelDigisCUDA; + + private: + uint16_t const *xx_ = nullptr; + uint16_t const *yy_ = nullptr; + uint16_t const *adc_ = nullptr; + uint16_t const *moduleInd_ = nullptr; + }; + + const DeviceConstView *view() const { return view_d.get(); } + +private: + edm::cuda::device::unique_ptr xx_d; // local coordinates of each pixel + edm::cuda::device::unique_ptr yy_d; // + edm::cuda::device::unique_ptr adc_d; // ADC of each pixel + edm::cuda::device::unique_ptr moduleInd_d; // module id of each pixel + edm::cuda::device::unique_ptr view_d; // "me" pointer +}; + +#endif diff --git a/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc b/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc new file mode 100644 index 0000000000000..5ba2e920e9b04 --- /dev/null +++ b/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc @@ -0,0 +1,24 @@ +#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" + +#include "FWCore/ServiceRegistry/interface/Service.h" +#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" + +#include + +SiPixelDigisCUDA::SiPixelDigisCUDA(size_t nelements, cuda::stream_t<>& stream) { + edm::Service cs; + + xx_d = cs->make_device_unique(nelements, stream); + yy_d = cs->make_device_unique(nelements, stream); + adc_d = cs->make_device_unique(nelements, stream); + moduleInd_d = cs->make_device_unique(nelements, stream); + + auto view = cs->make_host_unique(stream); + view->xx_ = xx_d.get(); + view->yy_ = yy_d.get(); + view->adc_ = adc_d.get(); + view->moduleInd_ = moduleInd_d.get(); + + view_d = cs->make_device_unique(stream); + cudaMemcpyAsync(view_d.get(), view.get(), sizeof(DeviceConstView), cudaMemcpyDefault, stream.id()); +} diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml b/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml index 9db4a46f367b3..40a489f763397 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml @@ -7,6 +7,8 @@ + + diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu index 7bd6eac473cc7..dc768ce8f643d 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu @@ -31,6 +31,8 @@ #include // CMSSW includes +#include "FWCore/ServiceRegistry/interface/Service.h" +#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" #include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h" #include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h" @@ -48,114 +50,17 @@ namespace pixelgpudetails { // number of words for all the FEDs constexpr uint32_t MAX_FED_WORDS = pixelgpudetails::MAX_FED * pixelgpudetails::MAX_WORD; - constexpr uint32_t MAX_WORD08_SIZE = MAX_FED_WORDS * sizeof(uint8_t); - constexpr uint32_t MAX_WORD32_SIZE = MAX_FED_WORDS * sizeof(uint32_t); - constexpr uint32_t MAX_WORD16_SIZE = MAX_FED_WORDS * sizeof(uint16_t); constexpr uint32_t MAX_ERROR_SIZE = MAX_FED_WORDS * esize; - SiPixelRawToClusterGPUKernel::SiPixelRawToClusterGPUKernel(cuda::stream_t<>& cudaStream) { - - cudaCheck(cudaMallocHost(&word, MAX_FED_WORDS * sizeof(unsigned int))); - cudaCheck(cudaMallocHost(&fedId_h, MAX_FED_WORDS * sizeof(unsigned char))); - - // to store the output of RawToDigi - cudaCheck(cudaMallocHost(&pdigi_h, MAX_FED_WORDS * sizeof(uint32_t))); - cudaCheck(cudaMallocHost(&rawIdArr_h, MAX_FED_WORDS * sizeof(uint32_t))); - - cudaCheck(cudaMallocHost(&adc_h, MAX_FED_WORDS * sizeof(uint16_t))); - cudaCheck(cudaMallocHost(&clus_h, MAX_FED_WORDS * sizeof(int32_t))); - - cudaCheck(cudaMallocHost(&error_h, vsize)); - cudaCheck(cudaMallocHost(&error_h_tmp, vsize)); - cudaCheck(cudaMallocHost(&data_h, MAX_ERROR_SIZE)); - - cudaCheck(cudaMalloc((void**) & word_d, MAX_WORD32_SIZE)); - cudaCheck(cudaMalloc((void**) & fedId_d, MAX_WORD08_SIZE)); - cudaCheck(cudaMalloc((void**) & pdigi_d, MAX_WORD32_SIZE)); // to store thepacked digi - cudaCheck(cudaMalloc((void**) & xx_d, MAX_WORD16_SIZE)); // to store the x and y coordinate - cudaCheck(cudaMalloc((void**) & yy_d, MAX_WORD16_SIZE)); - cudaCheck(cudaMalloc((void**) & adc_d, MAX_WORD16_SIZE)); - - cudaCheck(cudaMalloc((void**) & moduleInd_d, MAX_WORD16_SIZE)); - cudaCheck(cudaMalloc((void**) & rawIdArr_d, MAX_WORD32_SIZE)); - cudaCheck(cudaMalloc((void**) & error_d, vsize)); - cudaCheck(cudaMalloc((void**) & data_d, MAX_ERROR_SIZE)); - cudaCheck(cudaMemset(data_d, 0x00, MAX_ERROR_SIZE)); - - // for the clusterizer - cudaCheck(cudaMalloc((void**) & clus_d, MAX_WORD32_SIZE)); // cluser index in module - - using namespace gpuClustering; - cudaCheck(cudaMalloc((void**) & moduleStart_d, (MaxNumModules+1)*sizeof(uint32_t) )); - cudaCheck(cudaMalloc((void**) & clusInModule_d,(MaxNumModules)*sizeof(uint32_t) )); - cudaCheck(cudaMalloc((void**) & moduleId_d, (MaxNumModules)*sizeof(uint32_t) )); - - new (error_h) GPU::SimpleVector(MAX_FED_WORDS, data_h); - new (error_h_tmp) GPU::SimpleVector(MAX_FED_WORDS, data_d); - assert(error_h->size() == 0); - assert(error_h->capacity() == static_cast(MAX_FED_WORDS)); - assert(error_h_tmp->size() == 0); - assert(error_h_tmp->capacity() == static_cast(MAX_FED_WORDS)); - - // Need these in pinned memory to be truly asynchronous - cudaCheck(cudaMallocHost(&nModulesActive, sizeof(uint32_t))); - cudaCheck(cudaMallocHost(&nClusters, sizeof(uint32_t))); - - cudaCheck(cudaMalloc((void**) & gpuProduct_d, sizeof(GPUProduct))); - gpuProduct = getProduct(); - assert(xx_d==gpuProduct.xx_d); - - cudaCheck(cudaMemcpyAsync(gpuProduct_d, &gpuProduct, sizeof(GPUProduct), cudaMemcpyDefault,cudaStream.id())); - - // originally from rechits - cudaCheck(cudaMalloc((void**) & clusModuleStart_d, (MaxNumModules+1)*sizeof(uint32_t) )); - uint32_t *tmp = nullptr; - cudaCheck(cub::DeviceScan::InclusiveSum(nullptr, tempScanStorageSize, tmp, tmp, MaxNumModules)); - cudaCheck(cudaMalloc(&tempScanStorage_d, tempScanStorageSize)); + SiPixelRawToClusterGPUKernel::WordFedAppender::WordFedAppender(cuda::stream_t<>& cudaStream) { + edm::Service cs; + word_ = cs->make_host_unique(MAX_FED_WORDS, cudaStream); + fedId_ = cs->make_host_unique(MAX_FED_WORDS, cudaStream); } - SiPixelRawToClusterGPUKernel::~SiPixelRawToClusterGPUKernel() { - // free the host memory - cudaCheck(cudaFreeHost(word)); - cudaCheck(cudaFreeHost(fedId_h)); - cudaCheck(cudaFreeHost(pdigi_h)); - cudaCheck(cudaFreeHost(rawIdArr_h)); - cudaCheck(cudaFreeHost(adc_h)); - cudaCheck(cudaFreeHost(clus_h)); - cudaCheck(cudaFreeHost(error_h)); - cudaCheck(cudaFreeHost(error_h_tmp)); - cudaCheck(cudaFreeHost(data_h)); - cudaCheck(cudaFreeHost(nModulesActive)); - cudaCheck(cudaFreeHost(nClusters)); - - // free device memory used for RawToDigi on GPU - // free the GPU memory - cudaCheck(cudaFree(word_d)); - cudaCheck(cudaFree(fedId_d)); - cudaCheck(cudaFree(pdigi_d)); - cudaCheck(cudaFree(xx_d)); - cudaCheck(cudaFree(yy_d)); - cudaCheck(cudaFree(adc_d)); - cudaCheck(cudaFree(moduleInd_d)); - cudaCheck(cudaFree(rawIdArr_d)); - cudaCheck(cudaFree(error_d)); - cudaCheck(cudaFree(data_d)); - - // these are for the clusterizer - cudaCheck(cudaFree(moduleStart_d)); - cudaCheck(cudaFree(clus_d)); - cudaCheck(cudaFree(clusInModule_d)); - cudaCheck(cudaFree(moduleId_d)); - cudaCheck(cudaFree(gpuProduct_d)); - - // originally from rechits - cudaCheck(cudaFree(tempScanStorage_d)); - cudaCheck(cudaFree(clusModuleStart_d)); - } - - void SiPixelRawToClusterGPUKernel::initializeWordFed(int fedId, unsigned int wordCounterGPU, const cms_uint32_t *src, unsigned int length) { - std::memcpy(word+wordCounterGPU, src, sizeof(cms_uint32_t)*length); - std::memset(fedId_h+wordCounterGPU/2, fedId - 1200, length/2); + void SiPixelRawToClusterGPUKernel::WordFedAppender::initializeWordFed(int fedId, unsigned int wordCounterGPU, const cms_uint32_t *src, unsigned int length) { + std::memcpy(word_.get()+wordCounterGPU, src, sizeof(cms_uint32_t)*length); + std::memset(fedId_.get()+wordCounterGPU/2, fedId - 1200, length/2); } //////////////////// @@ -613,6 +518,7 @@ namespace pixelgpudetails { const SiPixelFedCablingMapGPU *cablingMap, const unsigned char *modToUnp, const SiPixelGainForHLTonGPU *gains, + const WordFedAppender& wordFed, const uint32_t wordCounter, const uint32_t fedCounter, bool convertADCtoElectrons, bool useQualityInfo, bool includeErrors, bool transferToCPU, bool debug, @@ -620,52 +526,82 @@ namespace pixelgpudetails { { nDigis = wordCounter; - const int threadsPerBlock = 512; - const int blocks = (wordCounter + threadsPerBlock-1) /threadsPerBlock; // fill it all - - assert(0 == wordCounter%2); - // wordCounter is the total no of words in each event to be trasfered on device - cudaCheck(cudaMemcpyAsync(&word_d[0], &word[0], wordCounter*sizeof(uint32_t), cudaMemcpyDefault, stream.id())); - cudaCheck(cudaMemcpyAsync(&fedId_d[0], &fedId_h[0], wordCounter*sizeof(uint8_t) / 2, cudaMemcpyDefault, stream.id())); - cudaCheck(cudaMemcpyAsync(error_d, error_h_tmp, vsize, cudaMemcpyDefault, stream.id())); - - // Launch rawToDigi kernel - RawToDigi_kernel<<>>( - cablingMap, - modToUnp, - wordCounter, - word_d, - fedId_d, - xx_d, yy_d, adc_d, - pdigi_d, - rawIdArr_d, - moduleInd_d, - error_d, - useQualityInfo, - includeErrors, - debug); - cudaCheck(cudaGetLastError()); - - // copy data to host variable - if(transferToCPU) { - cudaCheck(cudaMemcpyAsync(pdigi_h, pdigi_d, wordCounter*sizeof(uint32_t), cudaMemcpyDefault, stream.id())); - cudaCheck(cudaMemcpyAsync(rawIdArr_h, rawIdArr_d, wordCounter*sizeof(uint32_t), cudaMemcpyDefault, stream.id())); - - if (includeErrors) { - cudaCheck(cudaMemcpyAsync(error_h, error_d, vsize, cudaMemcpyDefault, stream.id())); - cudaCheck(cudaMemcpyAsync(data_h, data_d, MAX_ERROR_SIZE, cudaMemcpyDefault, stream.id())); - // If we want to transfer only the minimal amount of data, we - // need a synchronization point. A single ExternalWork (of - // SiPixelRawToClusterHeterogeneous) does not help because it is - // already used to synchronize the data movement. So we'd need - // two ExternalWorks (or explicit use of TBB tasks). The - // prototype of #100 would allow this easily (as there would be - // two ExternalWorks). - // - //error_h->set_data(data_h); - //cudaCheck(cudaStreamSynchronize(stream.id())); - //int size = error_h->size(); - //cudaCheck(cudaMemcpyAsync(data_h, data_d, size*esize, cudaMemcpyDefault, stream.id())); + constexpr uint32_t MAX_FED_WORDS = pixelgpudetails::MAX_FED * pixelgpudetails::MAX_WORD; + digis_d = SiPixelDigisCUDA(MAX_FED_WORDS, stream); + clusters_d = SiPixelClustersCUDA(MAX_FED_WORDS, gpuClustering::MaxNumModules, stream); + + edm::Service cs; + digis_clusters_h.nModules_Clusters = cs->make_host_unique(2, stream); + + { + const int threadsPerBlock = 512; + const int blocks = (wordCounter + threadsPerBlock-1) /threadsPerBlock; // fill it all + + assert(0 == wordCounter%2); + // wordCounter is the total no of words in each event to be trasfered on device + auto word_d = cs->make_device_unique(wordCounter, stream); + auto fedId_d = cs->make_device_unique(wordCounter, stream); + + auto error_d = cs->make_device_unique>(stream); + auto data_d = cs->make_device_unique(MAX_FED_WORDS, stream); + cudaCheck(cudaMemsetAsync(data_d.get(), 0x00, MAX_ERROR_SIZE, stream.id())); + auto error_h_tmp = cs->make_host_unique>(stream); + new (error_h_tmp.get()) GPU::SimpleVector(MAX_FED_WORDS, data_d.get()); // should make_host_unique() call the constructor as well? note that even if std::make_unique does that, we can't do that in make_device_unique + assert(error_h_tmp->size() == 0); + assert(error_h_tmp->capacity() == static_cast(MAX_FED_WORDS)); + + cudaCheck(cudaMemcpyAsync(word_d.get(), wordFed.word(), wordCounter*sizeof(uint32_t), cudaMemcpyDefault, stream.id())); + cudaCheck(cudaMemcpyAsync(fedId_d.get(), wordFed.fedId(), wordCounter*sizeof(uint8_t) / 2, cudaMemcpyDefault, stream.id())); + cudaCheck(cudaMemcpyAsync(error_d.get(), error_h_tmp.get(), vsize, cudaMemcpyDefault, stream.id())); + + auto pdigi_d = cs->make_device_unique(wordCounter, stream); + auto rawIdArr_d = cs->make_device_unique(wordCounter, stream); + + // Launch rawToDigi kernel + RawToDigi_kernel<<>>( + cablingMap, + modToUnp, + wordCounter, + word_d.get(), + fedId_d.get(), + digis_d.xx(), digis_d.yy(), digis_d.adc(), + pdigi_d.get(), + rawIdArr_d.get(), + digis_d.moduleInd(), + error_d.get(), + useQualityInfo, + includeErrors, + debug); + cudaCheck(cudaGetLastError()); + + // copy data to host variable + if(transferToCPU) { + digis_clusters_h.pdigi = cs->make_host_unique(MAX_FED_WORDS, stream); + digis_clusters_h.rawIdArr = cs->make_host_unique(MAX_FED_WORDS, stream); + cudaCheck(cudaMemcpyAsync(digis_clusters_h.pdigi.get(), pdigi_d.get(), wordCounter*sizeof(uint32_t), cudaMemcpyDefault, stream.id())); + cudaCheck(cudaMemcpyAsync(digis_clusters_h.rawIdArr.get(), rawIdArr_d.get(), wordCounter*sizeof(uint32_t), cudaMemcpyDefault, stream.id())); + + if (includeErrors) { + digis_clusters_h.data = cs->make_host_unique(MAX_FED_WORDS, stream); + digis_clusters_h.error = cs->make_host_unique>(stream); + new (digis_clusters_h.error.get()) GPU::SimpleVector(MAX_FED_WORDS, digis_clusters_h.data.get()); + assert(digis_clusters_h.error->size() == 0); + assert(digis_clusters_h.error->capacity() == static_cast(MAX_FED_WORDS)); + + cudaCheck(cudaMemcpyAsync(digis_clusters_h.error.get(), error_d.get(), vsize, cudaMemcpyDefault, stream.id())); + cudaCheck(cudaMemcpyAsync(digis_clusters_h.data.get(), data_d.get(), MAX_ERROR_SIZE, cudaMemcpyDefault, stream.id())); + // If we want to transfer only the minimal amount of data, we + // need a synchronization point. A single ExternalWork (of + // SiPixelRawToClusterHeterogeneous) does not help because it is + // already used to synchronize the data movement. So we'd need + // two ExternalWorks (or explicit use of TBB tasks). The + // prototype of #100 would allow this easily (as there would be + // two ExternalWorks). + // + //cudaCheck(cudaStreamSynchronize(stream.id())); + //int size = digis_clusters_h.error->size(); + //cudaCheck(cudaMemcpyAsync(digis_clusters_h.data.get(), data_d.get(), size*esize, cudaMemcpyDefault, stream.id())); + } } } // End of Raw2Digi and passing data for cluserisation @@ -677,15 +613,16 @@ namespace pixelgpudetails { int blocks = (wordCounter + threadsPerBlock - 1) / threadsPerBlock; gpuCalibPixel::calibDigis<<>>( - moduleInd_d, - xx_d, yy_d, adc_d, + digis_d.moduleInd(), + digis_d.c_xx(), digis_d.c_yy(), digis_d.adc(), gains, wordCounter); cudaCheck(cudaGetLastError()); // calibrated adc if(transferToCPU) { - cudaCheck(cudaMemcpyAsync(adc_h, adc_d, wordCounter*sizeof(uint16_t), cudaMemcpyDefault, stream.id())); + digis_clusters_h.adc = cs->make_host_unique(MAX_FED_WORDS, stream); + cudaCheck(cudaMemcpyAsync(digis_clusters_h.adc.get(), digis_d.adc(), wordCounter*sizeof(uint16_t), cudaMemcpyDefault, stream.id())); } #ifdef GPU_DEBUG @@ -694,13 +631,13 @@ namespace pixelgpudetails { << " blocks of " << threadsPerBlock << " threads\n"; #endif - cudaCheck(cudaMemsetAsync(moduleStart_d, 0x00, sizeof(uint32_t), stream.id())); + cudaCheck(cudaMemsetAsync(clusters_d.moduleStart(), 0x00, sizeof(uint32_t), stream.id())); - countModules<<>>(moduleInd_d, moduleStart_d, clus_d, wordCounter); + countModules<<>>(digis_d.c_moduleInd(), clusters_d.moduleStart(), clusters_d.clus(), wordCounter); cudaCheck(cudaGetLastError()); // read the number of modules into a data member, used by getProduct()) - cudaCheck(cudaMemcpyAsync(nModulesActive, moduleStart_d, sizeof(uint32_t), cudaMemcpyDefault, stream.id())); + cudaCheck(cudaMemcpyAsync(&(digis_clusters_h.nModules_Clusters[0]), clusters_d.moduleStart(), sizeof(uint32_t), cudaMemcpyDefault, stream.id())); threadsPerBlock = 256; blocks = MaxNumModules; @@ -708,23 +645,23 @@ namespace pixelgpudetails { std::cout << "CUDA findClus kernel launch with " << blocks << " blocks of " << threadsPerBlock << " threads\n"; #endif - cudaCheck(cudaMemsetAsync(clusInModule_d, 0, (MaxNumModules)*sizeof(uint32_t), stream.id())); + cudaCheck(cudaMemsetAsync(clusters_d.clusInModule(), 0, (MaxNumModules)*sizeof(uint32_t), stream.id())); findClus<<>>( - moduleInd_d, - xx_d, yy_d, - moduleStart_d, - clusInModule_d, moduleId_d, - clus_d, + digis_d.c_moduleInd(), + digis_d.c_xx(), digis_d.c_yy(), + clusters_d.c_moduleStart(), + clusters_d.clusInModule(), clusters_d.moduleId(), + clusters_d.clus(), wordCounter); cudaCheck(cudaGetLastError()); // apply charge cut clusterChargeCut<<>>( - moduleInd_d, - adc_d, - moduleStart_d, - clusInModule_d, moduleId_d, - clus_d, + digis_d.moduleInd(), + digis_d.c_adc(), + clusters_d.c_moduleStart(), + clusters_d.clusInModule(), clusters_d.c_moduleId(), + clusters_d.clus(), wordCounter); cudaCheck(cudaGetLastError()); @@ -735,19 +672,27 @@ namespace pixelgpudetails { // available in the rechit producer without additional points of // synchronization/ExternalWork // + // Temporary storage + size_t tempScanStorageSize = 0; + { + uint32_t *tmp = nullptr; + cudaCheck(cub::DeviceScan::InclusiveSum(nullptr, tempScanStorageSize, tmp, tmp, MaxNumModules)); + } + auto tempScanStorage_d = cs->make_device_unique(tempScanStorageSize, stream); // Set first the first element to 0 - cudaCheck(cudaMemsetAsync(clusModuleStart_d, 0, sizeof(uint32_t), stream.id())); + cudaCheck(cudaMemsetAsync(clusters_d.clusModuleStart(), 0, sizeof(uint32_t), stream.id())); // Then use inclusive_scan to get the partial sum to the rest - cudaCheck(cub::DeviceScan::InclusiveSum(tempScanStorage_d, tempScanStorageSize, - clusInModule_d, &clusModuleStart_d[1], gpuClustering::MaxNumModules, + cudaCheck(cub::DeviceScan::InclusiveSum(tempScanStorage_d.get(), tempScanStorageSize, + clusters_d.c_clusInModule(), &clusters_d.clusModuleStart()[1], gpuClustering::MaxNumModules, stream.id())); // last element holds the number of all clusters - cudaCheck(cudaMemcpyAsync(nClusters, clusModuleStart_d+gpuClustering::MaxNumModules, sizeof(uint32_t), cudaMemcpyDefault, stream.id())); + cudaCheck(cudaMemcpyAsync(&(digis_clusters_h.nModules_Clusters[1]), clusters_d.clusModuleStart()+gpuClustering::MaxNumModules, sizeof(uint32_t), cudaMemcpyDefault, stream.id())); // clusters if(transferToCPU) { - cudaCheck(cudaMemcpyAsync(clus_h, clus_d, wordCounter*sizeof(uint32_t), cudaMemcpyDefault, stream.id())); + digis_clusters_h.clus = cs->make_host_unique(MAX_FED_WORDS, stream); + cudaCheck(cudaMemcpyAsync(digis_clusters_h.clus.get(), clusters_d.clus(), wordCounter*sizeof(uint32_t), cudaMemcpyDefault, stream.id())); } } // end clusterizer scope } diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h index ca8bd73106c2c..a2d9cdda92573 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h @@ -5,6 +5,7 @@ #include #include "cuda/api_wrappers.h" +#include "CUDADataFormats/Common/interface/host_unique_ptr.h" #include "FWCore/Utilities/interface/typedefs.h" #include "HeterogeneousCore/CUDAUtilities/interface/GPUSimpleVector.h" #include "siPixelRawToClusterHeterogeneousProduct.h" @@ -159,8 +160,43 @@ namespace pixelgpudetails { using GPUProduct = siPixelRawToClusterHeterogeneousProduct::GPUProduct; - SiPixelRawToClusterGPUKernel(cuda::stream_t<>& cudaStream); - ~SiPixelRawToClusterGPUKernel(); + struct CPUData { + CPUData() = default; + ~CPUData() = default; + + CPUData(const CPUData&) = delete; + CPUData& operator=(const CPUData&) = delete; + CPUData(CPUData&&) = default; + CPUData& operator=(CPUData&&) = default; + + edm::cuda::host::unique_ptr nModules_Clusters; // These should really be part of the GPU product + + edm::cuda::host::unique_ptr data; + edm::cuda::host::unique_ptr> error; + + edm::cuda::host::unique_ptr pdigi; + edm::cuda::host::unique_ptr rawIdArr; + edm::cuda::host::unique_ptr adc; + edm::cuda::host::unique_ptr clus; + }; + + class WordFedAppender { + public: + WordFedAppender(cuda::stream_t<>& cudaStream); + ~WordFedAppender() = default; + + void initializeWordFed(int fedId, unsigned int wordCounterGPU, const cms_uint32_t *src, unsigned int length); + + const unsigned int *word() const { return word_.get(); } + const unsigned char *fedId() const { return fedId_.get(); } + + private: + edm::cuda::host::unique_ptr word_; + edm::cuda::host::unique_ptr fedId_; + }; + + SiPixelRawToClusterGPUKernel() = default; + ~SiPixelRawToClusterGPUKernel() = default; SiPixelRawToClusterGPUKernel(const SiPixelRawToClusterGPUKernel&) = delete; @@ -168,69 +204,35 @@ namespace pixelgpudetails { SiPixelRawToClusterGPUKernel& operator=(const SiPixelRawToClusterGPUKernel&) = delete; SiPixelRawToClusterGPUKernel& operator=(SiPixelRawToClusterGPUKernel&&) = delete; - void initializeWordFed(int fedId, unsigned int wordCounterGPU, const cms_uint32_t *src, unsigned int length); - void makeClustersAsync(const SiPixelFedCablingMapGPU *cablingMap, const unsigned char *modToUnp, const SiPixelGainForHLTonGPU *gains, + const WordFedAppender& wordFed, const uint32_t wordCounter, const uint32_t fedCounter, bool convertADCtoElectrons, bool useQualityInfo, bool includeErrors, bool transferToCPU_, bool debug, cuda::stream_t<>& stream); - auto getProduct() { - error_h->set_data(data_h); - return siPixelRawToClusterHeterogeneousProduct::GPUProduct{ - pdigi_h, rawIdArr_h, clus_h, adc_h, error_h, - gpuProduct_d, - xx_d, yy_d, adc_d, moduleInd_d, moduleStart_d,clus_d, clusInModule_d, moduleId_d, - clusModuleStart_d, - nDigis, *nModulesActive, *nClusters - }; + siPixelRawToClusterHeterogeneousProduct::GPUProduct getProduct() { + return siPixelRawToClusterHeterogeneousProduct::GPUProduct( + std::move(digis_d), std::move(clusters_d), + nDigis, + digis_clusters_h.nModules_Clusters[0], + digis_clusters_h.nModules_Clusters[1] + ); } - private: - // input - unsigned int *word = nullptr; // to hold input for rawtodigi - unsigned char *fedId_h = nullptr; // to hold fed index for each word - - // output - GPUProduct gpuProduct; - GPUProduct * gpuProduct_d; - - // FIXME cleanup all these are in the gpuProduct above... - - uint32_t *pdigi_h = nullptr, *rawIdArr_h = nullptr; // host copy of output - uint16_t *adc_h = nullptr; int32_t *clus_h = nullptr; // host copy of calib&clus output - pixelgpudetails::error_obj *data_h = nullptr; - GPU::SimpleVector *error_h = nullptr; - GPU::SimpleVector *error_h_tmp = nullptr; + CPUData&& getCPUData() { + return std::move(digis_clusters_h); + } + private: uint32_t nDigis = 0; - uint32_t *nModulesActive = nullptr; - uint32_t *nClusters = nullptr; - - // scratch memory buffers - uint32_t * word_d; - uint8_t * fedId_d; - uint32_t * pdigi_d; - uint16_t * xx_d; - uint16_t * yy_d; - uint16_t * adc_d; - uint16_t * moduleInd_d; - uint32_t * rawIdArr_d; - GPU::SimpleVector * error_d; - error_obj * data_d; - - // these are for the clusterizer (to be moved) - uint32_t * moduleStart_d; - int32_t * clus_d; - uint32_t * clusInModule_d; - uint32_t * moduleId_d; + // CPU data + CPUData digis_clusters_h; - // originally in rechit, moved here - uint32_t *clusModuleStart_d = nullptr; - void *tempScanStorage_d = nullptr; - size_t tempScanStorageSize = 0; + // Data to be put in the event + SiPixelDigisCUDA digis_d; + SiPixelClustersCUDA clusters_d; }; // configuration and memory buffers alocated on the GPU From 49ab2726f61ab0c31054734235130121961132d6 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Wed, 28 Nov 2018 18:06:28 +0100 Subject: [PATCH 051/149] Migrate tracker local reconstruction and pixel tracking to Tasks (backport #25163) (cms-patatrack#202) Backport "Migrate tracker local reconstruction and pixel tracking to Tasks" (#25163) to the Patatrack branch: - migrate RecoLocalTracker_cff to Tasks; - migrate RecoPixelVertexing_cff to Tasks; - keeping sequences to avoid massive migration (for now). --- .../python/RecoLocalTracker_cff.py | 26 ++++++++----------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/RecoLocalTracker/Configuration/python/RecoLocalTracker_cff.py b/RecoLocalTracker/Configuration/python/RecoLocalTracker_cff.py index b001700d8f369..b75e75e000d48 100644 --- a/RecoLocalTracker/Configuration/python/RecoLocalTracker_cff.py +++ b/RecoLocalTracker/Configuration/python/RecoLocalTracker_cff.py @@ -13,9 +13,13 @@ from RecoLocalTracker.SiPixelRecHits.SiPixelRecHits_cfi import * from RecoLocalTracker.SubCollectionProducers.clustersummaryproducer_cfi import * -pixeltrackerlocalreco = cms.Sequence(siPixelClustersPreSplitting*siPixelRecHitsPreSplitting) -striptrackerlocalreco = cms.Sequence(siStripZeroSuppression*siStripClusters*siStripMatchedRecHits) -trackerlocalreco = cms.Sequence(pixeltrackerlocalreco*striptrackerlocalreco*clusterSummaryProducer) +pixeltrackerlocalrecoTask = cms.Task(siPixelClustersPreSplitting,siPixelRecHitsPreSplitting) +striptrackerlocalrecoTask = cms.Task(siStripZeroSuppression,siStripClusters,siStripMatchedRecHits) +trackerlocalrecoTask = cms.Task(pixeltrackerlocalrecoTask,striptrackerlocalrecoTask,clusterSummaryProducer) + +pixeltrackerlocalreco = cms.Sequence(pixeltrackerlocalrecoTask) +striptrackerlocalreco = cms.Sequence(striptrackerlocalrecoTask) +trackerlocalreco = cms.Sequence(trackerlocalrecoTask) from Configuration.ProcessModifiers.gpu_cff import gpu from RecoLocalTracker.SiPixelRecHits.siPixelRecHitHeterogeneous_cfi import siPixelRecHitHeterogeneous as _siPixelRecHitHeterogeneous @@ -24,15 +28,7 @@ from RecoLocalTracker.SiPhase2Clusterizer.phase2TrackerClusterizer_cfi import * from RecoLocalTracker.Phase2TrackerRecHits.Phase2StripCPEGeometricESProducer_cfi import * -phase2_tracker.toReplaceWith(pixeltrackerlocalreco, - cms.Sequence( - siPhase2Clusters + - siPixelClustersPreSplitting + - siPixelRecHitsPreSplitting - ) -) -phase2_tracker.toReplaceWith(trackerlocalreco, - cms.Sequence( - pixeltrackerlocalreco*clusterSummaryProducer - ) -) +_pixeltrackerlocalrecoTask_phase2 = pixeltrackerlocalrecoTask.copy() +_pixeltrackerlocalrecoTask_phase2.add(siPhase2Clusters) +phase2_tracker.toReplaceWith(pixeltrackerlocalrecoTask, _pixeltrackerlocalrecoTask_phase2) +phase2_tracker.toReplaceWith(trackerlocalrecoTask, trackerlocalrecoTask.copyAndExclude([striptrackerlocalrecoTask])) From 45f8115699442745734cdb0473be1cc56b417242 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Wed, 28 Nov 2018 18:41:33 +0100 Subject: [PATCH 052/149] Address code style and quality issues (cms-patatrack#203) Cleaned up by clang-tidy 7.0.0. Enabled checks: - boost-use-to-string - misc-uniqueptr-reset-release - modernize-deprecated-headers - modernize-make-shared - modernize-use-bool-literals - modernize-use-equals-delete - modernize-use-nullptr - modernize-use-override - performance-unnecessary-copy-initialization - readability-container-size-empty - readability-redundant-string-cstr - readability-static-definition-in-anonymous-namespace - readability-uniqueptr-delete-release See http://releases.llvm.org/7.0.0/tools/clang/tools/extra/docs/clang-tidy/index.html for details. --- RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h index 35a3e6bf2a82c..5e206d9d5bd0b 100644 --- a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h +++ b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h @@ -41,7 +41,7 @@ class PixelCPEFast final : public PixelCPEBase const SiPixelGenErrorDBObject *, const SiPixelLorentzAngle *); - ~PixelCPEFast(); + ~PixelCPEFast() override; // The return value can only be used safely in kernels launched on // the same cudaStream, or after cudaStreamSynchronize. From b10314ca8d50df149bcca4a7fcd11c871e09ed89 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Thu, 29 Nov 2018 11:52:08 +0100 Subject: [PATCH 053/149] Fix access to uninitialised memory in RawToDigi_kernel (cms-patatrack#206) Reported by cuda-memcheck --tool initcheck: CUDA-MEMCHECK Host API memory access error at host access to 0x7fe311800000 of size 112660 bytes Uninitialized access at 0x7fe311811720 on access by cudaMemcopy source. Saved host backtrace up to driver entry point at error ... Host Frame:.../pluginRecoLocalTrackerSiPixelClusterizerPlugins.so (pixelgpudetails::SiPixelRawToClusterGPUKernel::makeClustersAsync(SiPixelFedCablingMapGPU const*, unsigned char const*, SiPixelGainForHLTonGPU const*, pixelgpudetails::SiPixelRawToClusterGPUKernel::WordFedAppender const&, unsigned int, unsigned int, bool, bool, bool, bool, bool, cuda::stream_t&) + 0x1d87) [0x6e827] Host Frame:.../pluginRecoLocalTrackerSiPixelClusterizerPlugins.so (SiPixelRawToClusterHeterogeneous::acquireGPUCuda(edm::HeterogeneousEvent const&, edm::EventSetup const&, cuda::stream_t&) + 0x768) [0x58618] ... --- .../SiPixelDigi/src/SiPixelDigisCUDA.cc | 3 +- .../plugins/SiPixelRawToClusterGPUKernel.cu | 73 +++++++++---------- 2 files changed, 36 insertions(+), 40 deletions(-) diff --git a/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc b/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc index 5ba2e920e9b04..7e3d876ac8bdc 100644 --- a/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc +++ b/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc @@ -2,6 +2,7 @@ #include "FWCore/ServiceRegistry/interface/Service.h" #include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" #include @@ -20,5 +21,5 @@ SiPixelDigisCUDA::SiPixelDigisCUDA(size_t nelements, cuda::stream_t<>& stream) { view->moduleInd_ = moduleInd_d.get(); view_d = cs->make_device_unique(stream); - cudaMemcpyAsync(view_d.get(), view.get(), sizeof(DeviceConstView), cudaMemcpyDefault, stream.id()); + cudaCheck(cudaMemcpyAsync(view_d.get(), view.get(), sizeof(DeviceConstView), cudaMemcpyDefault, stream.id())); } diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu index dc768ce8f643d..d39662f5ee955 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu @@ -84,9 +84,9 @@ namespace pixelgpudetails { return (1==((rawId>>25)&0x7)); } - __device__ pixelgpudetails::DetIdGPU getRawId(const SiPixelFedCablingMapGPU * Map, uint32_t fed, uint32_t link, uint32_t roc) { + __device__ pixelgpudetails::DetIdGPU getRawId(const SiPixelFedCablingMapGPU * cablingMap, uint32_t fed, uint32_t link, uint32_t roc) { uint32_t index = fed * MAX_LINK * MAX_ROC + (link-1) * MAX_ROC + roc; - pixelgpudetails::DetIdGPU detId = { Map->RawId[index], Map->rocInDet[index], Map->moduleId[index] }; + pixelgpudetails::DetIdGPU detId = { cablingMap->RawId[index], cablingMap->rocInDet[index], cablingMap->moduleId[index] }; return detId; } @@ -165,7 +165,7 @@ namespace pixelgpudetails { uint32_t gRow = rowOffset+slopeRow*local.row; uint32_t gCol = colOffset+slopeCol*local.col; - //printf("Inside frameConversion row: %u, column: %u\n",gRow, gCol); + //printf("Inside frameConversion row: %u, column: %u\n", gRow, gCol); pixelgpudetails::Pixel global = {gRow, gCol}; return global; } @@ -219,7 +219,7 @@ namespace pixelgpudetails { return ((dcol < 26) & (2 <= pxid) & (pxid < 162)); } - __device__ uint32_t checkROC(uint32_t errorWord, uint32_t fedId, uint32_t link, const SiPixelFedCablingMapGPU *Map, bool debug = false) + __device__ uint32_t checkROC(uint32_t errorWord, uint32_t fedId, uint32_t link, const SiPixelFedCablingMapGPU *cablingMap, bool debug = false) { int errorType = (errorWord >> pixelgpudetails::ROC_shift) & pixelgpudetails::ERROR_mask; if (errorType < 25) return false; @@ -229,8 +229,8 @@ namespace pixelgpudetails { case(25) : { errorFound = true; uint32_t index = fedId * MAX_LINK * MAX_ROC + (link-1) * MAX_ROC + 1; - if (index > 1 && index <= Map->size) { - if (!(link == Map->link[index] && 1 == Map->roc[index])) errorFound = false; + if (index > 1 && index <= cablingMap->size) { + if (!(link == cablingMap->link[index] && 1 == cablingMap->roc[index])) errorFound = false; } if (debug&errorFound) printf("Invalid ROC = 25 found (errorType = 25)\n"); break; @@ -283,7 +283,7 @@ namespace pixelgpudetails { return errorFound? errorType : 0; } - __device__ uint32_t getErrRawID(uint32_t fedId, uint32_t errWord, uint32_t errorType, const SiPixelFedCablingMapGPU *Map, bool debug = false) + __device__ uint32_t getErrRawID(uint32_t fedId, uint32_t errWord, uint32_t errorType, const SiPixelFedCablingMapGPU *cablingMap, bool debug = false) { uint32_t rID = 0xffffffff; @@ -294,7 +294,7 @@ namespace pixelgpudetails { //cabling.pxid = 2; uint32_t roc = 1; uint32_t link = (errWord >> pixelgpudetails::LINK_shift) & pixelgpudetails::LINK_mask; - uint32_t rID_temp = getRawId(Map, fedId, link, roc).RawId; + uint32_t rID_temp = getRawId(cablingMap, fedId, link, roc).RawId; if (rID_temp != 9999) rID = rID_temp; break; } @@ -326,7 +326,7 @@ namespace pixelgpudetails { //cabling.pxid = 2; uint32_t roc = 1; uint32_t link = chanNmbr; - uint32_t rID_temp = getRawId(Map, fedId, link, roc).RawId; + uint32_t rID_temp = getRawId(cablingMap, fedId, link, roc).RawId; if(rID_temp != 9999) rID = rID_temp; break; } @@ -335,7 +335,7 @@ namespace pixelgpudetails { //cabling.pxid = 2; uint32_t roc = (errWord >> pixelgpudetails::ROC_shift) & pixelgpudetails::ROC_mask; uint32_t link = (errWord >> pixelgpudetails::LINK_shift) & pixelgpudetails::LINK_mask; - uint32_t rID_temp = getRawId(Map, fedId, link, roc).RawId; + uint32_t rID_temp = getRawId(cablingMap, fedId, link, roc).RawId; if(rID_temp != 9999) rID = rID_temp; break; } @@ -366,7 +366,7 @@ namespace pixelgpudetails { // int gIndex = blockDim.x*blockIdx.x+tid; // if (gIndex *err, bool useQualityInfo, bool includeErrors, bool debug) { - uint32_t blockId = blockIdx.x; - uint32_t threadId = threadIdx.x; + //if (threadIdx.x==0) printf("Event: %u blockIdx.x: %u start: %u end: %u\n", eventno, blockIdx.x, begin, end); + auto gIndex = threadIdx.x + blockIdx.x * blockDim.x; + xx[gIndex] = 0; + yy[gIndex] = 0; + adc[gIndex] = 0; bool skipROC = false; - //if (threadId==0) printf("Event: %u blockId: %u start: %u end: %u\n", eventno, blockId, begin, end); - for (int aaa=0; aaa<1; ++aaa) { // too many coninue below.... (to be fixed) - auto gIndex = threadId + blockId*blockDim.x; + do { // too many coninue below.... (to be fixed) if (gIndex < wordCounter) { - uint32_t fedId = fedIds[gIndex/2]; // +1200; // initialize (too many coninue below) @@ -417,24 +417,21 @@ namespace pixelgpudetails { rawIdArr[gIndex] = 0; moduleId[gIndex] = 9999; - uint32_t ww = Word[gIndex]; // Array containing 32 bit raw data + uint32_t ww = word[gIndex]; // Array containing 32 bit raw data if (ww == 0) { - //noise and dead channels are ignored - XX[gIndex] = 0; // 0 is an indicator of a noise/dead channel - YY[gIndex] = 0; // skip these pixels during clusterization - ADC[gIndex] = 0; - continue; // 0: bad word + // 0 is an indicator of a noise/dead channel, skip these pixels during clusterization + continue; } uint32_t link = getLink(ww); // Extract link uint32_t roc = getRoc(ww); // Extract Roc in link - pixelgpudetails::DetIdGPU detId = getRawId(Map, fedId, link, roc); + pixelgpudetails::DetIdGPU detId = getRawId(cablingMap, fedId, link, roc); - uint32_t errorType = checkROC(ww, fedId, link, Map, debug); + uint32_t errorType = checkROC(ww, fedId, link, cablingMap, debug); skipROC = (roc < pixelgpudetails::maxROCIndex) ? false : (errorType != 0); if (includeErrors and skipROC) { - uint32_t rID = getErrRawID(fedId, ww, errorType, Map, debug); + uint32_t rID = getErrRawID(fedId, ww, errorType, cablingMap, debug); err->emplace_back(rID, ww, errorType, fedId); continue; } @@ -445,16 +442,14 @@ namespace pixelgpudetails { uint32_t index = fedId * MAX_LINK * MAX_ROC + (link-1) * MAX_ROC + roc; if (useQualityInfo) { - - skipROC = Map->badRocs[index]; + skipROC = cablingMap->badRocs[index]; if (skipROC) continue; - } skipROC = modToUnp[index]; if (skipROC) continue; uint32_t layer = 0;//, ladder =0; - int side = 0, panel = 0, module = 0;//disk = 0,blade = 0 + int side = 0, panel = 0, module = 0;//disk = 0, blade = 0 if (barrel) { @@ -503,14 +498,14 @@ namespace pixelgpudetails { } pixelgpudetails::Pixel globalPix = frameConversion(barrel, side, layer, rocIdInDetUnit, localPix); - XX[gIndex] = globalPix.row; // origin shifting by 1 0-159 - YY[gIndex] = globalPix.col; // origin shifting by 1 0-415 - ADC[gIndex] = getADC(ww); - pdigi[gIndex] = pixelgpudetails::pack(globalPix.row,globalPix.col,ADC[gIndex]); + xx[gIndex] = globalPix.row; // origin shifting by 1 0-159 + yy[gIndex] = globalPix.col; // origin shifting by 1 0-415 + adc[gIndex] = getADC(ww); + pdigi[gIndex] = pixelgpudetails::pack(globalPix.row, globalPix.col, adc[gIndex]); moduleId[gIndex] = detId.moduleId; rawIdArr[gIndex] = rawId; } // end of if (gIndex < end) - } // end fake loop + } while (false); // end fake loop } // end of Raw to Digi kernel // Interface to outside From f90a242655cd27bfce24c42da8e21e9173dd1f55 Mon Sep 17 00:00:00 2001 From: Matti Kortelainen Date: Fri, 7 Dec 2018 10:40:34 -0600 Subject: [PATCH 054/149] Require allocated type to have only a trivial constructor for make_device_unique and make_host_unique (cms-patatrack#204) --- .../SiPixelCluster/interface/SiPixelClustersCUDA.h | 10 +++++----- .../SiPixelDigi/interface/SiPixelDigisCUDA.h | 8 ++++---- .../plugins/SiPixelRawToClusterGPUKernel.cu | 10 +++++----- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h b/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h index 22d9ff9d103ba..ca8a75d178b6c 100644 --- a/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h +++ b/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h @@ -49,11 +49,11 @@ class SiPixelClustersCUDA { friend SiPixelClustersCUDA; private: - uint32_t const *moduleStart_ = nullptr; - int32_t const *clus_ = nullptr; - uint32_t const *clusInModule_ = nullptr; - uint32_t const *moduleId_ = nullptr; - uint32_t const *clusModuleStart_ = nullptr; + uint32_t const *moduleStart_; + int32_t const *clus_; + uint32_t const *clusInModule_; + uint32_t const *moduleId_; + uint32_t const *clusModuleStart_; }; DeviceConstView *view() const { return view_d.get(); } diff --git a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h index 25e8b54a743c2..66ca680effd19 100644 --- a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h +++ b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h @@ -46,10 +46,10 @@ class SiPixelDigisCUDA { friend class SiPixelDigisCUDA; private: - uint16_t const *xx_ = nullptr; - uint16_t const *yy_ = nullptr; - uint16_t const *adc_ = nullptr; - uint16_t const *moduleInd_ = nullptr; + uint16_t const *xx_; + uint16_t const *yy_; + uint16_t const *adc_; + uint16_t const *moduleInd_; }; const DeviceConstView *view() const { return view_d.get(); } diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu index d39662f5ee955..c3f4292d4c4c4 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu @@ -432,7 +432,7 @@ namespace pixelgpudetails { if (includeErrors and skipROC) { uint32_t rID = getErrRawID(fedId, ww, errorType, cablingMap, debug); - err->emplace_back(rID, ww, errorType, fedId); + err->push_back(pixelgpudetails::error_obj{rID, ww, errorType, fedId}); continue; } @@ -476,7 +476,7 @@ namespace pixelgpudetails { if (includeErrors) { if (not rocRowColIsValid(row, col)) { uint32_t error = conversionError(fedId, 3, debug); //use the device function and fill the arrays - err->emplace_back(rawId, ww, error, fedId); + err->push_back(pixelgpudetails::error_obj{rawId, ww, error, fedId}); if(debug) printf("BPIX1 Error status: %i\n", error); continue; } @@ -491,7 +491,7 @@ namespace pixelgpudetails { localPix.col = col; if (includeErrors and not dcolIsValid(dcol, pxid)) { uint32_t error = conversionError(fedId, 3, debug); - err->emplace_back(rawId, ww, error, fedId); + err->push_back(pixelgpudetails::error_obj{rawId, ww, error, fedId}); if(debug) printf("Error status: %i %d %d %d %d\n", error, dcol, pxid, fedId, roc); continue; } @@ -541,7 +541,7 @@ namespace pixelgpudetails { auto data_d = cs->make_device_unique(MAX_FED_WORDS, stream); cudaCheck(cudaMemsetAsync(data_d.get(), 0x00, MAX_ERROR_SIZE, stream.id())); auto error_h_tmp = cs->make_host_unique>(stream); - new (error_h_tmp.get()) GPU::SimpleVector(MAX_FED_WORDS, data_d.get()); // should make_host_unique() call the constructor as well? note that even if std::make_unique does that, we can't do that in make_device_unique + GPU::make_SimpleVector(error_h_tmp.get(), MAX_FED_WORDS, data_d.get()); assert(error_h_tmp->size() == 0); assert(error_h_tmp->capacity() == static_cast(MAX_FED_WORDS)); @@ -579,7 +579,7 @@ namespace pixelgpudetails { if (includeErrors) { digis_clusters_h.data = cs->make_host_unique(MAX_FED_WORDS, stream); digis_clusters_h.error = cs->make_host_unique>(stream); - new (digis_clusters_h.error.get()) GPU::SimpleVector(MAX_FED_WORDS, digis_clusters_h.data.get()); + GPU::make_SimpleVector(digis_clusters_h.error.get(), MAX_FED_WORDS, digis_clusters_h.data.get()); assert(digis_clusters_h.error->size() == 0); assert(digis_clusters_h.error->capacity() == static_cast(MAX_FED_WORDS)); From 64c573c19694ad818a79cce00c10e9508db83beb Mon Sep 17 00:00:00 2001 From: Matti Kortelainen Date: Mon, 7 Jan 2019 17:25:36 -0600 Subject: [PATCH 055/149] Set the data pointer of error's SimpleVector (cms-patatrack#236) --- .../SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h index a2d9cdda92573..b0151055ed7f1 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h @@ -221,6 +221,8 @@ namespace pixelgpudetails { } CPUData&& getCPUData() { + // Set the vector data pointer to point to CPU + digis_clusters_h.error->set_data(digis_clusters_h.data.get()); return std::move(digis_clusters_h); } From f2a46487141105aa97c5784401777c4ecc24ebe5 Mon Sep 17 00:00:00 2001 From: Vincenzo Innocente Date: Tue, 8 Jan 2019 18:29:08 +0100 Subject: [PATCH 056/149] Full workflow from raw data to pixel tracks and vertices on GPUs (cms-patatrack#216) Port and optimise the full workflow from pixel raw data to pixel tracks and vertices to GPUs. Clean the pixel n-tuplets with the "fishbone" algorithm (only on GPUs). Other changes: - recover the Riemann fit updates lost during the merge with CMSSW 10.4.x; - speed up clustering and track fitting; - minor bug fix to avoid trivial regression with the optimized fit. --- .../interface/phase1PixelTopology.h | 61 +++++ .../test/phase1PixelTopology_t.cpp | 231 +++++++++--------- .../plugins/gpuClustering.h | 46 +++- .../SiPixelRecHits/interface/PixelCPEBase.h | 5 +- .../SiPixelRecHits/interface/PixelCPEFast.h | 4 + .../SiPixelRecHits/interface/pixelCPEforGPU.h | 44 +++- .../SiPixelRecHits/plugins/gpuPixelRecHits.h | 6 +- .../SiPixelRecHits/src/PixelCPEFast.cc | 130 ++++++++-- 8 files changed, 366 insertions(+), 161 deletions(-) diff --git a/Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h b/Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h index 37c97a92a3eaa..68fb60361d40d 100644 --- a/Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h +++ b/Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h @@ -2,6 +2,7 @@ #define Geometry_TrackerGeometryBuilder_phase1PixelTopology_h #include +#include namespace phase1PixelTopology { @@ -29,6 +30,66 @@ namespace phase1PixelTopology { }; + template + constexpr auto map_to_array_helper(Function f, std::index_sequence) + -> std::array::type, sizeof...(Indices)> + { + return {{ f(Indices)... }}; + } + + template + constexpr auto map_to_array(Function f) + -> std::array::type, N> + { + return map_to_array_helper(f, std::make_index_sequence{}); + } + + + constexpr uint32_t findMaxModuleStride() { + bool go = true; + int n=2; + while (go) { + for (uint8_t i=1; i<11; ++i) { + if (layerStart[i]%n !=0) {go=false; break;} + } + if(!go) break; + n*=2; + } + return n/2; + } + + constexpr uint32_t maxModuleStride = findMaxModuleStride(); + + + constexpr uint8_t findLayer(uint32_t detId) { + for (uint8_t i=0; i<11; ++i) if (detId layer = map_to_array(findLayerFromCompact); + + constexpr bool validateLayerIndex() { + bool res=true; + for (auto i=0U; i=layerStart[layer[j]]); + res &=(i +#include +#include + #include "Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h" -#include -#include namespace { // original code from CMSSW_4_4 std::tuple localXori(int mpx) { - const float m_pitchx=1.f; - int binoffx = int(mpx); // truncate to int - float local_pitchx = m_pitchx; // defaultpitch - - if (binoffx>80) { // ROC 1 - handles x on edge cluster - binoffx=binoffx+2; - } else if (binoffx==80) { // ROC 1 - binoffx=binoffx+1; - local_pitchx = 2 * m_pitchx; - - } else if (binoffx==79) { // ROC 0 - binoffx=binoffx+0; - local_pitchx = 2 * m_pitchx; - } else if (binoffx>=0) { // ROC 0 - binoffx=binoffx+0; - - } else { // too small - assert("binoffx too small"==0); - } - - return std::make_tuple(binoffx,local_pitchx>m_pitchx); - } + const float m_pitchx=1.f; + int binoffx = int(mpx); // truncate to int + float local_pitchx = m_pitchx; // defaultpitch + + if (binoffx>80) { // ROC 1 - handles x on edge cluster + binoffx=binoffx+2; + } else if (binoffx==80) { // ROC 1 + binoffx=binoffx+1; + local_pitchx = 2 * m_pitchx; + + } else if (binoffx==79) { // ROC 0 + binoffx=binoffx+0; + local_pitchx = 2 * m_pitchx; + } else if (binoffx>=0) { // ROC 0 + binoffx=binoffx+0; + + } else { // too small + assert("binoffx too small"==0); + } + return std::make_tuple(binoffx,local_pitchx>m_pitchx); + } std::tuple localYori(int mpy) { - const float m_pitchy=1.f; - int binoffy = int(mpy); // truncate to int - float local_pitchy = m_pitchy; // defaultpitch - - if (binoffy>416) { // ROC 8, not real ROC - binoffy=binoffy+17; - } else if (binoffy==416) { // ROC 8 - binoffy=binoffy+16; - local_pitchy = 2 * m_pitchy; - - } else if (binoffy==415) { // ROC 7, last big pixel - binoffy=binoffy+15; - local_pitchy = 2 * m_pitchy; - } else if (binoffy>364) { // ROC 7 - binoffy=binoffy+15; - } else if (binoffy==364) { // ROC 7 - binoffy=binoffy+14; - local_pitchy = 2 * m_pitchy; - - } else if (binoffy==363) { // ROC 6 - binoffy=binoffy+13; - local_pitchy = 2 * m_pitchy; - } else if (binoffy>312) { // ROC 6 - binoffy=binoffy+13; - } else if (binoffy==312) { // ROC 6 - binoffy=binoffy+12; - local_pitchy = 2 * m_pitchy; - - } else if (binoffy==311) { // ROC 5 - binoffy=binoffy+11; - local_pitchy = 2 * m_pitchy; - } else if (binoffy>260) { // ROC 5 - binoffy=binoffy+11; - } else if (binoffy==260) { // ROC 5 - binoffy=binoffy+10; - local_pitchy = 2 * m_pitchy; - - } else if (binoffy==259) { // ROC 4 - binoffy=binoffy+9; - local_pitchy = 2 * m_pitchy; - } else if (binoffy>208) { // ROC 4 - binoffy=binoffy+9; - } else if (binoffy==208) { // ROC 4 - binoffy=binoffy+8; - local_pitchy = 2 * m_pitchy; - - } else if (binoffy==207) { // ROC 3 - binoffy=binoffy+7; - local_pitchy = 2 * m_pitchy; - } else if (binoffy>156) { // ROC 3 - binoffy=binoffy+7; - } else if (binoffy==156) { // ROC 3 - binoffy=binoffy+6; - local_pitchy = 2 * m_pitchy; - - } else if (binoffy==155) { // ROC 2 - binoffy=binoffy+5; - local_pitchy = 2 * m_pitchy; - } else if (binoffy>104) { // ROC 2 - binoffy=binoffy+5; - } else if (binoffy==104) { // ROC 2 - binoffy=binoffy+4; - local_pitchy = 2 * m_pitchy; - - } else if (binoffy==103) { // ROC 1 - binoffy=binoffy+3; - local_pitchy = 2 * m_pitchy; - } else if (binoffy>52) { // ROC 1 - binoffy=binoffy+3; - } else if (binoffy==52) { // ROC 1 - binoffy=binoffy+2; - local_pitchy = 2 * m_pitchy; - - } else if (binoffy==51) { // ROC 0 - binoffy=binoffy+1; - local_pitchy = 2 * m_pitchy; - } else if (binoffy>0) { // ROC 0 - binoffy=binoffy+1; - } else if (binoffy==0) { // ROC 0 - binoffy=binoffy+0; - local_pitchy = 2 * m_pitchy; - } else { - assert("binoffy too small"==0); - } - - return std::make_tuple(binoffy,local_pitchy>m_pitchy); - } + const float m_pitchy=1.f; + int binoffy = int(mpy); // truncate to int + float local_pitchy = m_pitchy; // defaultpitch + + if (binoffy>416) { // ROC 8, not real ROC + binoffy=binoffy+17; + } else if (binoffy==416) { // ROC 8 + binoffy=binoffy+16; + local_pitchy = 2 * m_pitchy; + + } else if (binoffy==415) { // ROC 7, last big pixel + binoffy=binoffy+15; + local_pitchy = 2 * m_pitchy; + } else if (binoffy>364) { // ROC 7 + binoffy=binoffy+15; + } else if (binoffy==364) { // ROC 7 + binoffy=binoffy+14; + local_pitchy = 2 * m_pitchy; + + } else if (binoffy==363) { // ROC 6 + binoffy=binoffy+13; + local_pitchy = 2 * m_pitchy; + } else if (binoffy>312) { // ROC 6 + binoffy=binoffy+13; + } else if (binoffy==312) { // ROC 6 + binoffy=binoffy+12; + local_pitchy = 2 * m_pitchy; + + } else if (binoffy==311) { // ROC 5 + binoffy=binoffy+11; + local_pitchy = 2 * m_pitchy; + } else if (binoffy>260) { // ROC 5 + binoffy=binoffy+11; + } else if (binoffy==260) { // ROC 5 + binoffy=binoffy+10; + local_pitchy = 2 * m_pitchy; + + } else if (binoffy==259) { // ROC 4 + binoffy=binoffy+9; + local_pitchy = 2 * m_pitchy; + } else if (binoffy>208) { // ROC 4 + binoffy=binoffy+9; + } else if (binoffy==208) { // ROC 4 + binoffy=binoffy+8; + local_pitchy = 2 * m_pitchy; + + } else if (binoffy==207) { // ROC 3 + binoffy=binoffy+7; + local_pitchy = 2 * m_pitchy; + } else if (binoffy>156) { // ROC 3 + binoffy=binoffy+7; + } else if (binoffy==156) { // ROC 3 + binoffy=binoffy+6; + local_pitchy = 2 * m_pitchy; + + } else if (binoffy==155) { // ROC 2 + binoffy=binoffy+5; + local_pitchy = 2 * m_pitchy; + } else if (binoffy>104) { // ROC 2 + binoffy=binoffy+5; + } else if (binoffy==104) { // ROC 2 + binoffy=binoffy+4; + local_pitchy = 2 * m_pitchy; + + } else if (binoffy==103) { // ROC 1 + binoffy=binoffy+3; + local_pitchy = 2 * m_pitchy; + } else if (binoffy>52) { // ROC 1 + binoffy=binoffy+3; + } else if (binoffy==52) { // ROC 1 + binoffy=binoffy+2; + local_pitchy = 2 * m_pitchy; + + } else if (binoffy==51) { // ROC 0 + binoffy=binoffy+1; + local_pitchy = 2 * m_pitchy; + } else if (binoffy>0) { // ROC 0 + binoffy=binoffy+1; + } else if (binoffy==0) { // ROC 0 + binoffy=binoffy+0; + local_pitchy = 2 * m_pitchy; + } else { + assert("binoffy too small"==0); + } + + return std::make_tuple(binoffy,local_pitchy>m_pitchy); + } } -#include int main() { for (uint16_t ix=0; ix<80*2; ++ix) { @@ -141,6 +141,13 @@ int main() { assert(std::get<1>(ori)==bp); } + using namespace phase1PixelTopology; + for (auto i=0U; i=layerStart[layer[i]]); + assert(i; - constexpr auto wss = Hist::totbins(); __shared__ Hist hist; - __shared__ typename Hist::Counter ws[wss]; - for (auto j=threadIdx.x; j60) atomicAdd(&n60,1); + if(hist.size(j)>40) atomicAdd(&n40,1); + } + __syncthreads(); + if (0==threadIdx.x) { + if (n60>0) printf("columns with more than 60 px %d in %d\n",n60,thisModuleId); + else if (n40>0) printf("columns with more than 40 px %d in %d\n",n40,thisModuleId); + } + __syncthreads(); +#endif + // for each pixel, look at all the pixels until the end of the module; // when two valid pixels within +/- 1 in x or y are found, set their id to the minimum; // after the loop, all the pixel in each cluster should have the id equeal to the lowest // pixel in the cluster ( clus[i] == i ). bool more = true; while (__syncthreads_or(more)) { - more = false; + if (1==nloops%2) { + for (int j=threadIdx.x, k = 0; j 1) return; // if (std::abs(int(y[m]) - int(y[i])) > 1) return; // binssize is 1 auto old = atomicMin(&clusterId[m], clusterId[i]); @@ -185,9 +206,8 @@ namespace gpuClustering { ++p; for (;pcommonParams(), cpeParams->detParams(me), clusParams, ic); - pixelCPEforGPU::error(cpeParams->commonParams(), cpeParams->detParams(me), clusParams, ic); + pixelCPEforGPU::errorFromDB(cpeParams->commonParams(), cpeParams->detParams(me), clusParams, ic); chargeh[h] = clusParams.charge[ic]; @@ -135,8 +135,8 @@ namespace gpuPixelRecHits { xl[h]= clusParams.xpos[ic]; yl[h]= clusParams.ypos[ic]; - xe[h]= clusParams.xerr[ic]; - ye[h]= clusParams.yerr[ic]; + xe[h]= clusParams.xerr[ic]*clusParams.xerr[ic]; + ye[h]= clusParams.yerr[ic]*clusParams.yerr[ic]; mr[h]= clusParams.minRow[ic]; mc[h]= clusParams.minCol[ic]; diff --git a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc index af7dd7337084e..eb51dd5a2eaeb 100644 --- a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc +++ b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc @@ -125,7 +125,84 @@ void PixelCPEFast::fillParamsForGpu() { auto vv = p.theDet->surface().position(); auto rr = pixelCPEforGPU::Rotation(p.theDet->surface().rotation()); g.frame = pixelCPEforGPU::Frame(vv.x(),vv.y(),vv.z(),rr); - } + + + // errors ..... + ClusterParamGeneric cp; + auto gvx = p.theOrigin.x() + 40.f*m_commonParamsGPU.thePitchX; + auto gvy = p.theOrigin.y(); + auto gvz = 1.f/p.theOrigin.z(); + //--- Note that the normalization is not required as only the ratio used + + // calculate angles + cp.cotalpha = gvx*gvz; + cp.cotbeta = gvy*gvz; + + cp.with_track_angle = false; + + auto lape = p.theDet->localAlignmentError(); + if ( lape.invalid() ) lape = LocalError(); // zero.... + +#ifdef DUMP_ERRORS + auto m=10000.f; + for (float qclus = 15000; qclus<35000; qclus+=15000){ + errorFromTemplates(p,cp,qclus); + + std::cout << i << ' ' << qclus << ' ' << cp.pixmx + << ' ' << m*cp.sigmax << ' ' << m*cp.sx1 << ' ' << m*cp.sx2 + << ' ' << m*cp.sigmay << ' ' << m*cp.sy1 << ' ' << m*cp.sy2 + << std::endl; + } + std::cout << i << ' ' << m*std::sqrt(lape.xx()) <<' '<< m*std::sqrt(lape.yy()) << std::endl; +#endif + + + errorFromTemplates(p,cp,20000.f); + g.sx[0] = cp.sigmax; + g.sx[1] = cp.sx1; + g.sx[2] = cp.sx2; + + g.sy[0] = cp.sigmay; + g.sy[1] = cp.sy1; + g.sy[2] = cp.sy2; + + + /* + // from run1?? + if (i<96) { + g.sx[0] = 0.00120; + g.sx[1] = 0.00115; + g.sx[2] = 0.0050; + + g.sy[0] = 0.00210; + g.sy[1] = 0.00375; + g.sy[2] = 0.0085; + } else if (g.isBarrel) { + g.sx[0] = 0.00120; + g.sx[1] = 0.00115; + g.sx[2] = 0.0050; + + g.sy[0] = 0.00210; + g.sy[1] = 0.00375; + g.sy[2] = 0.0085; + } else { + g.sx[0] = 0.0020; + g.sx[1] = 0.0020; + g.sx[2] = 0.0050; + + g.sy[0] = 0.0021; + g.sy[1] = 0.0021; + g.sy[2] = 0.0085; + } + */ + + + for (int i=0; i<3; ++i) { + g.sx[i] = std::sqrt(g.sx[i]*g.sx[i]+lape.xx()); + g.sy[i] = std::sqrt(g.sy[i]*g.sy[i]+lape.yy()); + } + + } } PixelCPEFast::~PixelCPEFast() {} @@ -143,25 +220,15 @@ PixelCPEBase::ClusterParam* PixelCPEFast::createClusterParam(const SiPixelCluste return new ClusterParamGeneric(cl); } -//----------------------------------------------------------------------------- -//! Hit position in the local frame (in cm). Unlike other CPE's, this -//! one converts everything from the measurement frame (in channel numbers) -//! into the local frame (in centimeters). -//----------------------------------------------------------------------------- -LocalPoint -PixelCPEFast::localPosition(DetParam const & theDetParam, ClusterParam & theClusterParamBase) const -{ - ClusterParamGeneric & theClusterParam = static_cast(theClusterParamBase); - assert(!theClusterParam.with_track_angle); - - if ( UseErrorsFromTemplates_ ) { - - float qclus = theClusterParam.theCluster->charge(); + +void +PixelCPEFast::errorFromTemplates(DetParam const & theDetParam, ClusterParamGeneric & theClusterParam, float qclus) const +{ float locBz = theDetParam.bz; float locBx = theDetParam.bx; //cout << "PixelCPEFast::localPosition(...) : locBz = " << locBz << endl; - + theClusterParam.pixmx = std::numeric_limits::max(); // max pixel charge for truncation of 2-D cluster theClusterParam.sigmay = -999.9; // CPE Generic y-error for multi-pixel cluster @@ -170,28 +237,43 @@ PixelCPEFast::localPosition(DetParam const & theDetParam, ClusterParam & theClus theClusterParam.sy2 = -999.9; // CPE Generic y-error for single double-pixel cluster theClusterParam.sx1 = -999.9; // CPE Generic x-error for single single-pixel cluster theClusterParam.sx2 = -999.9; // CPE Generic x-error for single double-pixel cluster - + float dummy; - + SiPixelGenError gtempl(thePixelGenError_); int gtemplID_ = theDetParam.detTemplateId; - - theClusterParam.qBin_ = gtempl.qbin( gtemplID_, theClusterParam.cotalpha, theClusterParam.cotbeta, locBz, locBx, qclus, + + theClusterParam.qBin_ = gtempl.qbin( gtemplID_, theClusterParam.cotalpha, theClusterParam.cotbeta, locBz, locBx, qclus, false, theClusterParam.pixmx, theClusterParam.sigmay, dummy, theClusterParam.sigmax, dummy, theClusterParam.sy1, dummy, theClusterParam.sy2, dummy, theClusterParam.sx1, dummy, theClusterParam.sx2, dummy ); - + theClusterParam.sigmax = theClusterParam.sigmax * micronsToCm; theClusterParam.sx1 = theClusterParam.sx1 * micronsToCm; theClusterParam.sx2 = theClusterParam.sx2 * micronsToCm; - + theClusterParam.sigmay = theClusterParam.sigmay * micronsToCm; theClusterParam.sy1 = theClusterParam.sy1 * micronsToCm; theClusterParam.sy2 = theClusterParam.sy2 * micronsToCm; - - } // if ( UseErrorsFromTemplates_ ) +} + +//----------------------------------------------------------------------------- +//! Hit position in the local frame (in cm). Unlike other CPE's, this +//! one converts everything from the measurement frame (in channel numbers) +//! into the local frame (in centimeters). +//----------------------------------------------------------------------------- +LocalPoint +PixelCPEFast::localPosition(DetParam const & theDetParam, ClusterParam & theClusterParamBase) const +{ + ClusterParamGeneric & theClusterParam = static_cast(theClusterParamBase); + + assert(!theClusterParam.with_track_angle); + + if ( UseErrorsFromTemplates_ ) { + errorFromTemplates(theDetParam, theClusterParam, theClusterParam.theCluster->charge()); + } else { theClusterParam.qBin_ = 0; } From 4de304ffe98ce74846d5c10a8d268a06f6879db5 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Wed, 9 Jan 2019 20:50:14 +0100 Subject: [PATCH 057/149] Limit the pixel clusteriser to nearest neighbours (cms-patatrack#241) The clusteriser is now limited to the nearest neighbours; this is faster for large occupancy and/or many isolated pixels. --- .../plugins/gpuClustering.h | 78 ++++++++++--------- 1 file changed, 40 insertions(+), 38 deletions(-) diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h index dba9fe12f5492..5c21a39302d70 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h @@ -1,8 +1,6 @@ #ifndef RecoLocalTracker_SiPixelClusterizer_plugins_gpuClustering_h #define RecoLocalTracker_SiPixelClusterizer_plugins_gpuClustering_h -// #define CLUS_LIMIT_LOOP - #include #include @@ -87,8 +85,8 @@ namespace gpuClustering { __syncthreads(); assert((msize == numElements) or ((msize < numElements) and (id[msize] != thisModuleId))); - assert(msize-firstPixel60) atomicAdd(&n60,1); if(hist.size(j)>40) atomicAdd(&n40,1); } @@ -156,11 +150,31 @@ namespace gpuClustering { __syncthreads(); #endif + // fill NN + for (int j=threadIdx.x, k = 0; j 1) continue; + auto l = nnn[k]++; + assert(l<5); + nn[k][l]=*p; + } + } + // for each pixel, look at all the pixels until the end of the module; // when two valid pixels within +/- 1 in x or y are found, set their id to the minimum; // after the loop, all the pixel in each cluster should have the id equeal to the lowest // pixel in the cluster ( clus[i] == i ). bool more = true; + int nloops=0; while (__syncthreads_or(more)) { if (1==nloops%2) { for (int j=threadIdx.x, k = 0; j 1) return; - // if (std::abs(int(y[m]) - int(y[i])) > 1) return; // binssize is 1 auto old = atomicMin(&clusterId[m], clusterId[i]); if (old != clusterId[i]) { // end the loop only if no changes were applied more = true; } atomicMin(&clusterId[i], old); -#ifdef CLUS_LIMIT_LOOP - // update the loop boundary for the next iteration - jmax[k] = std::max(kk + 1,jmax[k]); -#endif - }; - ++p; - for (;p Date: Mon, 14 Jan 2019 13:33:34 +0100 Subject: [PATCH 058/149] Synchronise with CMSSW_10_4_0 --- EventFilter/SiPixelRawToDigi/python/SiPixelRawToDigi_cfi.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/EventFilter/SiPixelRawToDigi/python/SiPixelRawToDigi_cfi.py b/EventFilter/SiPixelRawToDigi/python/SiPixelRawToDigi_cfi.py index 528ffa2683fa8..587325a3d9ef5 100644 --- a/EventFilter/SiPixelRawToDigi/python/SiPixelRawToDigi_cfi.py +++ b/EventFilter/SiPixelRawToDigi/python/SiPixelRawToDigi_cfi.py @@ -1,6 +1,5 @@ import FWCore.ParameterSet.Config as cms import EventFilter.SiPixelRawToDigi.siPixelRawToDigi_cfi -import RecoLocalTracker.SiPixelClusterizer.siPixelDigiHeterogeneousConverter_cfi siPixelDigis = EventFilter.SiPixelRawToDigi.siPixelRawToDigi_cfi.siPixelRawToDigi.clone() siPixelDigis.Timing = cms.untracked.bool(False) @@ -21,10 +20,12 @@ from Configuration.Eras.Modifier_phase1Pixel_cff import phase1Pixel phase1Pixel.toModify(siPixelDigis, UsePhase1=True) +from Configuration.ProcessModifiers.premix_stage2_cff import premix_stage2 +premix_stage2.toModify(siPixelDigis, BadPixelFEDChannelsInputLabel = "mixData") +import RecoLocalTracker.SiPixelClusterizer.siPixelDigiHeterogeneousConverter_cfi _siPixelDigis_gpu = RecoLocalTracker.SiPixelClusterizer.siPixelDigiHeterogeneousConverter_cfi.siPixelDigiHeterogeneousConverter.clone() _siPixelDigis_gpu.includeErrors = cms.bool(True) from Configuration.ProcessModifiers.gpu_cff import gpu gpu.toReplaceWith(siPixelDigis, _siPixelDigis_gpu) - From 5fca7f1e4559e2dd1b29f330dcfacc2ea1b516a8 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Thu, 17 Jan 2019 09:44:22 +0100 Subject: [PATCH 059/149] Fix invalid narrowing conversion from "unsigned int" to "unsigned char" (cms-patatrack#250) --- .../plugins/SiPixelRawToClusterGPUKernel.cu | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu index c3f4292d4c4c4..1388ed4852b25 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu @@ -84,7 +84,7 @@ namespace pixelgpudetails { return (1==((rawId>>25)&0x7)); } - __device__ pixelgpudetails::DetIdGPU getRawId(const SiPixelFedCablingMapGPU * cablingMap, uint32_t fed, uint32_t link, uint32_t roc) { + __device__ pixelgpudetails::DetIdGPU getRawId(const SiPixelFedCablingMapGPU * cablingMap, uint8_t fed, uint32_t link, uint32_t roc) { uint32_t index = fed * MAX_LINK * MAX_ROC + (link-1) * MAX_ROC + roc; pixelgpudetails::DetIdGPU detId = { cablingMap->RawId[index], cablingMap->rocInDet[index], cablingMap->moduleId[index] }; return detId; @@ -171,9 +171,9 @@ namespace pixelgpudetails { } - __device__ uint32_t conversionError(uint32_t fedId, uint32_t status, bool debug = false) + __device__ uint8_t conversionError(uint8_t fedId, uint8_t status, bool debug = false) { - uint32_t errorType = 0; + uint8_t errorType = 0; // debug = true; @@ -219,10 +219,10 @@ namespace pixelgpudetails { return ((dcol < 26) & (2 <= pxid) & (pxid < 162)); } - __device__ uint32_t checkROC(uint32_t errorWord, uint32_t fedId, uint32_t link, const SiPixelFedCablingMapGPU *cablingMap, bool debug = false) + __device__ uint8_t checkROC(uint32_t errorWord, uint8_t fedId, uint32_t link, const SiPixelFedCablingMapGPU *cablingMap, bool debug = false) { - int errorType = (errorWord >> pixelgpudetails::ROC_shift) & pixelgpudetails::ERROR_mask; - if (errorType < 25) return false; + uint8_t errorType = (errorWord >> pixelgpudetails::ROC_shift) & pixelgpudetails::ERROR_mask; + if (errorType < 25) return 0; bool errorFound = false; switch (errorType) { @@ -232,7 +232,7 @@ namespace pixelgpudetails { if (index > 1 && index <= cablingMap->size) { if (!(link == cablingMap->link[index] && 1 == cablingMap->roc[index])) errorFound = false; } - if (debug&errorFound) printf("Invalid ROC = 25 found (errorType = 25)\n"); + if (debug and errorFound) printf("Invalid ROC = 25 found (errorType = 25)\n"); break; } case(26) : { @@ -267,7 +267,7 @@ namespace pixelgpudetails { if ( StateMatch != 1 && StateMatch != 8 ) { if (debug) printf("FED error 30 with unexpected State Bits (errorType = 30)\n"); } - if ( StateMatch == 1 ) errorType = 40; // 1=Overflow -> 40, 8=number of ROCs -> 30 + if (StateMatch == 1) errorType = 40; // 1=Overflow -> 40, 8=number of ROCs -> 30 errorFound = true; break; } @@ -280,10 +280,10 @@ namespace pixelgpudetails { errorFound = false; }; - return errorFound? errorType : 0; + return errorFound ? errorType : 0; } - __device__ uint32_t getErrRawID(uint32_t fedId, uint32_t errWord, uint32_t errorType, const SiPixelFedCablingMapGPU *cablingMap, bool debug = false) + __device__ uint32_t getErrRawID(uint8_t fedId, uint32_t errWord, uint32_t errorType, const SiPixelFedCablingMapGPU *cablingMap, bool debug = false) { uint32_t rID = 0xffffffff; @@ -410,7 +410,7 @@ namespace pixelgpudetails { do { // too many coninue below.... (to be fixed) if (gIndex < wordCounter) { - uint32_t fedId = fedIds[gIndex/2]; // +1200; + uint8_t fedId = fedIds[gIndex/2]; // +1200; // initialize (too many coninue below) pdigi[gIndex] = 0; @@ -427,7 +427,7 @@ namespace pixelgpudetails { uint32_t roc = getRoc(ww); // Extract Roc in link pixelgpudetails::DetIdGPU detId = getRawId(cablingMap, fedId, link, roc); - uint32_t errorType = checkROC(ww, fedId, link, cablingMap, debug); + uint8_t errorType = checkROC(ww, fedId, link, cablingMap, debug); skipROC = (roc < pixelgpudetails::maxROCIndex) ? false : (errorType != 0); if (includeErrors and skipROC) { @@ -475,7 +475,7 @@ namespace pixelgpudetails { localPix.col = col; if (includeErrors) { if (not rocRowColIsValid(row, col)) { - uint32_t error = conversionError(fedId, 3, debug); //use the device function and fill the arrays + uint8_t error = conversionError(fedId, 3, debug); //use the device function and fill the arrays err->push_back(pixelgpudetails::error_obj{rawId, ww, error, fedId}); if(debug) printf("BPIX1 Error status: %i\n", error); continue; @@ -490,7 +490,7 @@ namespace pixelgpudetails { localPix.row = row; localPix.col = col; if (includeErrors and not dcolIsValid(dcol, pxid)) { - uint32_t error = conversionError(fedId, 3, debug); + uint8_t error = conversionError(fedId, 3, debug); err->push_back(pixelgpudetails::error_obj{rawId, ww, error, fedId}); if(debug) printf("Error status: %i %d %d %d %d\n", error, dcol, pxid, fedId, roc); continue; From 1a307c411aca10f89c46c6848dc7933cbe408578 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Thu, 17 Jan 2019 15:50:11 +0100 Subject: [PATCH 060/149] Skip CUDA-related tests if no GPU is present (cms-patatrack#252) Make unit tests that require a CUDA device skip the test and exit succesfully if the CUDA runtime is not available, or no CUDA devices are available. --- RecoLocalTracker/SiPixelClusterizer/test/BuildFile.xml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/RecoLocalTracker/SiPixelClusterizer/test/BuildFile.xml b/RecoLocalTracker/SiPixelClusterizer/test/BuildFile.xml index 335591b583b58..66a87291221f2 100644 --- a/RecoLocalTracker/SiPixelClusterizer/test/BuildFile.xml +++ b/RecoLocalTracker/SiPixelClusterizer/test/BuildFile.xml @@ -36,6 +36,7 @@ + @@ -43,6 +44,7 @@ + From 255ec7111ac612988b970efaa565ef8faeeff01c Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Fri, 18 Jan 2019 17:04:41 +0100 Subject: [PATCH 061/149] Fix or disable failing unit tests (cms-patatrack#253) --- .../interface/phase1PixelTopology.h | 36 ++++++++++--------- .../test/phase1PixelTopology_t.cpp | 14 +++++--- 2 files changed, 29 insertions(+), 21 deletions(-) diff --git a/Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h b/Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h index 68fb60361d40d..05e6b01e96c24 100644 --- a/Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h +++ b/Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h @@ -22,12 +22,18 @@ namespace phase1PixelTopology { constexpr uint32_t numPixsInModule = uint32_t(numRowsInModule)* uint32_t(numColsInModule); constexpr uint32_t numberOfModules = 1856; - - constexpr uint32_t layerStart[11] = {0,96,320,672,1184,1296,1408,1520,1632,1744,1856}; - constexpr char const * layerName[10] = {"BL1","BL2","BL3","BL4", - "E+1", "E+2", "E+3", - "E-1", "E-2", "E-3" - }; + constexpr uint32_t numberOfLayers = 10; + constexpr uint32_t layerStart[numberOfLayers + 1] = { + 0, 96, 320, 672, // barrel + 1184, 1296, 1408, // positive endcap + 1520, 1632, 1744, // negative endcap + numberOfModules + }; + constexpr char const * layerName[numberOfLayers] = { + "BL1", "BL2", "BL3", "BL4", // barrel + "E+1", "E+2", "E+3", // positive endcap + "E-1", "E-2", "E-3" // negative endcap + }; template @@ -60,7 +66,6 @@ namespace phase1PixelTopology { constexpr uint32_t maxModuleStride = findMaxModuleStride(); - constexpr uint8_t findLayer(uint32_t detId) { for (uint8_t i=0; i<11; ++i) if (detId layer = map_to_array(findLayerFromCompact); + constexpr uint32_t layerIndexSize = numberOfModules / maxModuleStride; + constexpr std::array layer = map_to_array(findLayerFromCompact); constexpr bool validateLayerIndex() { bool res=true; @@ -87,12 +91,11 @@ namespace phase1PixelTopology { return res; } - static_assert(validateLayerIndex(),"layer from detIndex algo is buggy"); + static_assert(validateLayerIndex(), "layer from detIndex algo is buggy"); - // this is for the ROC n<512 (upgrade 1024) constexpr inline - uint16_t divu52(uint16_t n) { + uint16_t divu52(uint16_t n) { n = n>>2; uint16_t q = (n>>1) + (n>>4); q = q + (q>>4) + (q>>5); q = q >> 3; @@ -101,13 +104,14 @@ namespace phase1PixelTopology { } constexpr inline - bool isEdgeX(uint16_t px) { return (px==0) | (px==lastRowInModule);} - constexpr inline - bool isEdgeY(uint16_t py) { return (py==0) | (py==lastColInModule);} + bool isEdgeX(uint16_t px) { return (px==0) | (px==lastRowInModule); } + constexpr inline + bool isEdgeY(uint16_t py) { return (py==0) | (py==lastColInModule); } constexpr inline uint16_t toRocX(uint16_t px) { return (px(ori)==bp); } - using namespace phase1PixelTopology; - for (auto i=0U; i=layerStart[layer[i]]); - assert(i= phase1PixelTopology::layerStart[layer]); + assert(i < phase1PixelTopology::layerStart[layer+1]); + } return 0; } From 2392b516e3bc7d2502059c91c67ee37b79e49b1b Mon Sep 17 00:00:00 2001 From: Matti Kortelainen Date: Fri, 18 Jan 2019 17:21:05 -0600 Subject: [PATCH 062/149] Fix warnings reported by clang (cms-patatrack#255) Forward-declare `SiPixelFedCablingMapGPU` as `struct`. Remove `std::move` in `ClusterTPAssociationHeterogeneous::produceCPU` that prevents copy elision. --- .../SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h index b0151055ed7f1..44bed9abc1e68 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h @@ -10,7 +10,7 @@ #include "HeterogeneousCore/CUDAUtilities/interface/GPUSimpleVector.h" #include "siPixelRawToClusterHeterogeneousProduct.h" -class SiPixelFedCablingMapGPU; +struct SiPixelFedCablingMapGPU; class SiPixelGainForHLTonGPU; namespace pixelgpudetails { From 60a9c68c904ced9395a339816d38ea3243d3ed9d Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Thu, 21 Feb 2019 18:44:46 +0100 Subject: [PATCH 063/149] Synchronise with CMSSW_10_5_0_pre2 --- .../python/SiPixelRawToDigi_cfi.py | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/EventFilter/SiPixelRawToDigi/python/SiPixelRawToDigi_cfi.py b/EventFilter/SiPixelRawToDigi/python/SiPixelRawToDigi_cfi.py index 587325a3d9ef5..c2479af1f60bd 100644 --- a/EventFilter/SiPixelRawToDigi/python/SiPixelRawToDigi_cfi.py +++ b/EventFilter/SiPixelRawToDigi/python/SiPixelRawToDigi_cfi.py @@ -2,26 +2,9 @@ import EventFilter.SiPixelRawToDigi.siPixelRawToDigi_cfi siPixelDigis = EventFilter.SiPixelRawToDigi.siPixelRawToDigi_cfi.siPixelRawToDigi.clone() -siPixelDigis.Timing = cms.untracked.bool(False) -siPixelDigis.IncludeErrors = cms.bool(True) -siPixelDigis.InputLabel = cms.InputTag("siPixelRawData") -siPixelDigis.UseQualityInfo = cms.bool(False) -## ErrorList: list of error codes used by tracking to invalidate modules -siPixelDigis.ErrorList = cms.vint32(29) -## UserErrorList: list of error codes used by Pixel experts for investigation -siPixelDigis.UserErrorList = cms.vint32(40) -## Use pilot blades -siPixelDigis.UsePilotBlade = cms.bool(False) -## Use phase1 -siPixelDigis.UsePhase1 = cms.bool(False) -## Empty Regions PSet means complete unpacking -siPixelDigis.Regions = cms.PSet( ) -siPixelDigis.CablingMapLabel = cms.string("") from Configuration.Eras.Modifier_phase1Pixel_cff import phase1Pixel phase1Pixel.toModify(siPixelDigis, UsePhase1=True) -from Configuration.ProcessModifiers.premix_stage2_cff import premix_stage2 -premix_stage2.toModify(siPixelDigis, BadPixelFEDChannelsInputLabel = "mixData") import RecoLocalTracker.SiPixelClusterizer.siPixelDigiHeterogeneousConverter_cfi _siPixelDigis_gpu = RecoLocalTracker.SiPixelClusterizer.siPixelDigiHeterogeneousConverter_cfi.siPixelDigiHeterogeneousConverter.clone() From d3cb26323d3a777aec7a29bef3a1faaf1aa795db Mon Sep 17 00:00:00 2001 From: Matti Kortelainen Date: Wed, 13 Mar 2019 10:04:32 -0500 Subject: [PATCH 064/149] Next prototype of the framework integration (cms-patatrack#100) Provide a mechanism for a chain of modules to share a resource, that can be e.g. CUDA device memory or a CUDA stream. Minimize data movements between the CPU and the device, and support multiple devices. Allow the same job configuration to be used on all hardware combinations. See HeterogeneousCore/CUDACore/README.md for a more detailed description and examples. --- CUDADataFormats/Common/BuildFile.xml | 11 +- CUDADataFormats/SiPixelCluster/BuildFile.xml | 1 + .../interface/SiPixelClustersCUDA.h | 29 ++- .../SiPixelCluster/src/SiPixelClustersCUDA.cc | 15 +- CUDADataFormats/SiPixelCluster/src/classes.h | 8 + .../SiPixelCluster/src/classes_def.xml | 4 + CUDADataFormats/SiPixelDigi/BuildFile.xml | 2 + .../interface/SiPixelDigiErrorsCUDA.h | 40 +++ .../SiPixelDigi/interface/SiPixelDigisCUDA.h | 50 +++- .../SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc | 44 ++++ .../SiPixelDigi/src/SiPixelDigisCUDA.cc | 49 +++- CUDADataFormats/SiPixelDigi/src/classes.h | 9 + .../SiPixelDigi/src/classes_def.xml | 7 + .../StandardSequences/python/RawToDigi_cff.py | 11 +- .../SiPixelDigi/interface/SiPixelDigisSoA.h | 32 +++ .../SiPixelDigi/src/SiPixelDigisSoA.cc | 12 + DataFormats/SiPixelDigi/src/classes.h | 6 +- DataFormats/SiPixelDigi/src/classes_def.xml | 6 + .../SiPixelRawToDigi/plugins/BuildFile.xml | 3 + .../plugins/SiPixelDigiErrorsFromSoA.cc | 183 +++++++++++++ .../plugins/SiPixelDigiErrorsSoAFromCUDA.cc | 75 ++++++ .../plugins/SiPixelDigisSoAFromCUDA.cc | 81 ++++++ .../python/SiPixelRawToDigi_cfi.py | 26 +- .../python/siPixelDigis_cff.py | 30 +++ .../python/RecoLocalTracker_cff.py | 4 +- .../plugins/SiPixelDigisClustersFromSoA.cc | 158 ++++++++++++ .../plugins/SiPixelRawToClusterCUDA.cc | 243 ++++++++++++++++++ .../plugins/SiPixelRawToClusterGPUKernel.cu | 99 ++----- .../plugins/SiPixelRawToClusterGPUKernel.h | 95 ++----- .../SiPixelClusterizerPreSplitting_cfi.py | 16 +- .../python/siPixelClustersPreSplitting_cff.py | 21 ++ 31 files changed, 1157 insertions(+), 213 deletions(-) create mode 100644 CUDADataFormats/SiPixelCluster/src/classes.h create mode 100644 CUDADataFormats/SiPixelCluster/src/classes_def.xml create mode 100644 CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h create mode 100644 CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc create mode 100644 CUDADataFormats/SiPixelDigi/src/classes.h create mode 100644 CUDADataFormats/SiPixelDigi/src/classes_def.xml create mode 100644 DataFormats/SiPixelDigi/interface/SiPixelDigisSoA.h create mode 100644 DataFormats/SiPixelDigi/src/SiPixelDigisSoA.cc create mode 100644 EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsFromSoA.cc create mode 100644 EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsSoAFromCUDA.cc create mode 100644 EventFilter/SiPixelRawToDigi/plugins/SiPixelDigisSoAFromCUDA.cc create mode 100644 EventFilter/SiPixelRawToDigi/python/siPixelDigis_cff.py create mode 100644 RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc create mode 100644 RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc create mode 100644 RecoLocalTracker/SiPixelClusterizer/python/siPixelClustersPreSplitting_cff.py diff --git a/CUDADataFormats/Common/BuildFile.xml b/CUDADataFormats/Common/BuildFile.xml index b990c1295e31a..1046b76eef0f7 100644 --- a/CUDADataFormats/Common/BuildFile.xml +++ b/CUDADataFormats/Common/BuildFile.xml @@ -1,6 +1,7 @@ - - - + + + + + - - + diff --git a/CUDADataFormats/SiPixelCluster/BuildFile.xml b/CUDADataFormats/SiPixelCluster/BuildFile.xml index 21c527e7b2f0d..d34658faa2573 100644 --- a/CUDADataFormats/SiPixelCluster/BuildFile.xml +++ b/CUDADataFormats/SiPixelCluster/BuildFile.xml @@ -1,6 +1,7 @@ + diff --git a/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h b/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h index ca8a75d178b6c..f25a8a25f0808 100644 --- a/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h +++ b/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h @@ -1,14 +1,15 @@ #ifndef CUDADataFormats_SiPixelCluster_interface_SiPixelClustersCUDA_h #define CUDADataFormats_SiPixelCluster_interface_SiPixelClustersCUDA_h -#include "CUDADataFormats/Common/interface/device_unique_ptr.h" +#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" +#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" #include class SiPixelClustersCUDA { public: SiPixelClustersCUDA() = default; - explicit SiPixelClustersCUDA(size_t feds, size_t nelements, cuda::stream_t<>& stream); + explicit SiPixelClustersCUDA(size_t maxClusters, cuda::stream_t<>& stream); ~SiPixelClustersCUDA() = default; SiPixelClustersCUDA(const SiPixelClustersCUDA&) = delete; @@ -16,20 +17,23 @@ class SiPixelClustersCUDA { SiPixelClustersCUDA(SiPixelClustersCUDA&&) = default; SiPixelClustersCUDA& operator=(SiPixelClustersCUDA&&) = default; + void setNClusters(uint32_t nClusters) { + nClusters_h = nClusters; + } + + uint32_t nClusters() const { return nClusters_h; } + uint32_t *moduleStart() { return moduleStart_d.get(); } - int32_t *clus() { return clus_d.get(); } uint32_t *clusInModule() { return clusInModule_d.get(); } uint32_t *moduleId() { return moduleId_d.get(); } uint32_t *clusModuleStart() { return clusModuleStart_d.get(); } uint32_t const *moduleStart() const { return moduleStart_d.get(); } - int32_t const *clus() const { return clus_d.get(); } uint32_t const *clusInModule() const { return clusInModule_d.get(); } uint32_t const *moduleId() const { return moduleId_d.get(); } uint32_t const *clusModuleStart() const { return clusModuleStart_d.get(); } uint32_t const *c_moduleStart() const { return moduleStart_d.get(); } - int32_t const *c_clus() const { return clus_d.get(); } uint32_t const *c_clusInModule() const { return clusInModule_d.get(); } uint32_t const *c_moduleId() const { return moduleId_d.get(); } uint32_t const *c_clusModuleStart() const { return clusModuleStart_d.get(); } @@ -40,7 +44,6 @@ class SiPixelClustersCUDA { #ifdef __CUDACC__ __device__ __forceinline__ uint32_t moduleStart(int i) const { return __ldg(moduleStart_+i); } - __device__ __forceinline__ int32_t clus(int i) const { return __ldg(clus_+i); } __device__ __forceinline__ uint32_t clusInModule(int i) const { return __ldg(clusInModule_+i); } __device__ __forceinline__ uint32_t moduleId(int i) const { return __ldg(moduleId_+i); } __device__ __forceinline__ uint32_t clusModuleStart(int i) const { return __ldg(clusModuleStart_+i); } @@ -50,7 +53,6 @@ class SiPixelClustersCUDA { private: uint32_t const *moduleStart_; - int32_t const *clus_; uint32_t const *clusInModule_; uint32_t const *moduleId_; uint32_t const *clusModuleStart_; @@ -59,15 +61,16 @@ class SiPixelClustersCUDA { DeviceConstView *view() const { return view_d.get(); } private: - edm::cuda::device::unique_ptr moduleStart_d; // index of the first pixel of each module - edm::cuda::device::unique_ptr clus_d; // cluster id of each pixel - edm::cuda::device::unique_ptr clusInModule_d; // number of clusters found in each module - edm::cuda::device::unique_ptr moduleId_d; // module id of each module + cudautils::device::unique_ptr moduleStart_d; // index of the first pixel of each module + cudautils::device::unique_ptr clusInModule_d; // number of clusters found in each module + cudautils::device::unique_ptr moduleId_d; // module id of each module // originally from rechits - edm::cuda::device::unique_ptr clusModuleStart_d; + cudautils::device::unique_ptr clusModuleStart_d; + + cudautils::device::unique_ptr view_d; // "me" pointer - edm::cuda::device::unique_ptr view_d; // "me" pointer + uint32_t nClusters_h; }; #endif diff --git a/CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc b/CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc index 7363c2fd364af..d88a1b0a6370b 100644 --- a/CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc +++ b/CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc @@ -2,23 +2,22 @@ #include "FWCore/ServiceRegistry/interface/Service.h" #include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" +#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" -SiPixelClustersCUDA::SiPixelClustersCUDA(size_t feds, size_t nelements, cuda::stream_t<>& stream) { +SiPixelClustersCUDA::SiPixelClustersCUDA(size_t maxClusters, cuda::stream_t<>& stream) { edm::Service cs; - moduleStart_d = cs->make_device_unique(nelements+1, stream); - clus_d = cs->make_device_unique< int32_t[]>(feds, stream); - clusInModule_d = cs->make_device_unique(nelements, stream); - moduleId_d = cs->make_device_unique(nelements, stream); - clusModuleStart_d = cs->make_device_unique(nelements+1, stream); + moduleStart_d = cs->make_device_unique(maxClusters+1, stream); + clusInModule_d = cs->make_device_unique(maxClusters, stream); + moduleId_d = cs->make_device_unique(maxClusters, stream); + clusModuleStart_d = cs->make_device_unique(maxClusters+1, stream); auto view = cs->make_host_unique(stream); view->moduleStart_ = moduleStart_d.get(); - view->clus_ = clus_d.get(); view->clusInModule_ = clusInModule_d.get(); view->moduleId_ = moduleId_d.get(); view->clusModuleStart_ = clusModuleStart_d.get(); view_d = cs->make_device_unique(stream); - cudaMemcpyAsync(view_d.get(), view.get(), sizeof(DeviceConstView), cudaMemcpyDefault, stream.id()); + cudautils::copyAsync(view_d, view, stream); } diff --git a/CUDADataFormats/SiPixelCluster/src/classes.h b/CUDADataFormats/SiPixelCluster/src/classes.h new file mode 100644 index 0000000000000..08d46244adc7d --- /dev/null +++ b/CUDADataFormats/SiPixelCluster/src/classes.h @@ -0,0 +1,8 @@ +#ifndef CUDADataFormats_SiPixelCluster_classes_h +#define CUDADataFormats_SiPixelCluster_classes_h + +#include "CUDADataFormats/Common/interface/CUDAProduct.h" +#include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h" +#include "DataFormats/Common/interface/Wrapper.h" + +#endif diff --git a/CUDADataFormats/SiPixelCluster/src/classes_def.xml b/CUDADataFormats/SiPixelCluster/src/classes_def.xml new file mode 100644 index 0000000000000..ba0706ac4b8aa --- /dev/null +++ b/CUDADataFormats/SiPixelCluster/src/classes_def.xml @@ -0,0 +1,4 @@ + + + + diff --git a/CUDADataFormats/SiPixelDigi/BuildFile.xml b/CUDADataFormats/SiPixelDigi/BuildFile.xml index 259aa9f08d054..29ec13098819c 100644 --- a/CUDADataFormats/SiPixelDigi/BuildFile.xml +++ b/CUDADataFormats/SiPixelDigi/BuildFile.xml @@ -1,6 +1,8 @@ + + diff --git a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h new file mode 100644 index 0000000000000..e9c8c0f644722 --- /dev/null +++ b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h @@ -0,0 +1,40 @@ +#ifndef CUDADataFormats_SiPixelDigi_interface_SiPixelDigiErrorsCUDA_h +#define CUDADataFormats_SiPixelDigi_interface_SiPixelDigiErrorsCUDA_h + +#include "DataFormats/SiPixelDigi/interface/PixelErrors.h" +#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" +#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" +#include "HeterogeneousCore/CUDAUtilities/interface/GPUSimpleVector.h" + +#include + +class SiPixelDigiErrorsCUDA { +public: + SiPixelDigiErrorsCUDA() = default; + explicit SiPixelDigiErrorsCUDA(size_t maxFedWords, PixelFormatterErrors errors, cuda::stream_t<>& stream); + ~SiPixelDigiErrorsCUDA() = default; + + SiPixelDigiErrorsCUDA(const SiPixelDigiErrorsCUDA&) = delete; + SiPixelDigiErrorsCUDA& operator=(const SiPixelDigiErrorsCUDA&) = delete; + SiPixelDigiErrorsCUDA(SiPixelDigiErrorsCUDA&&) = default; + SiPixelDigiErrorsCUDA& operator=(SiPixelDigiErrorsCUDA&&) = default; + + const PixelFormatterErrors& formatterErrors() const { return formatterErrors_h; } + + GPU::SimpleVector *error() { return error_d.get(); } + GPU::SimpleVector const *error() const { return error_d.get(); } + GPU::SimpleVector const *c_error() const { return error_d.get(); } + + using HostDataError = std::pair, cudautils::host::unique_ptr>; + HostDataError dataErrorToHostAsync(cuda::stream_t<>& stream) const; + + void copyErrorToHostAsync(cuda::stream_t<>& stream); + +private: + cudautils::device::unique_ptr data_d; + cudautils::device::unique_ptr> error_d; + cudautils::host::unique_ptr> error_h; + PixelFormatterErrors formatterErrors_h; +}; + +#endif diff --git a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h index 66ca680effd19..6a52545483eb8 100644 --- a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h +++ b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h @@ -1,15 +1,15 @@ #ifndef CUDADataFormats_SiPixelDigi_interface_SiPixelDigisCUDA_h #define CUDADataFormats_SiPixelDigi_interface_SiPixelDigisCUDA_h -#include "CUDADataFormats/Common/interface/device_unique_ptr.h" -#include "FWCore/Utilities/interface/propagate_const.h" +#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" +#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" #include class SiPixelDigisCUDA { public: SiPixelDigisCUDA() = default; - explicit SiPixelDigisCUDA(size_t nelements, cuda::stream_t<>& stream); + explicit SiPixelDigisCUDA(size_t maxFedWords, cuda::stream_t<>& stream); ~SiPixelDigisCUDA() = default; SiPixelDigisCUDA(const SiPixelDigisCUDA&) = delete; @@ -17,20 +17,42 @@ class SiPixelDigisCUDA { SiPixelDigisCUDA(SiPixelDigisCUDA&&) = default; SiPixelDigisCUDA& operator=(SiPixelDigisCUDA&&) = default; + void setNModulesDigis(uint32_t nModules, uint32_t nDigis) { + nModules_h = nModules; + nDigis_h = nDigis; + } + + uint32_t nModules() const { return nModules_h; } + uint32_t nDigis() const { return nDigis_h; } + uint16_t * xx() { return xx_d.get(); } uint16_t * yy() { return yy_d.get(); } uint16_t * adc() { return adc_d.get(); } uint16_t * moduleInd() { return moduleInd_d.get(); } + int32_t * clus() { return clus_d.get(); } + uint32_t * pdigi() { return pdigi_d.get(); } + uint32_t * rawIdArr() { return rawIdArr_d.get(); } uint16_t const *xx() const { return xx_d.get(); } uint16_t const *yy() const { return yy_d.get(); } uint16_t const *adc() const { return adc_d.get(); } uint16_t const *moduleInd() const { return moduleInd_d.get(); } + int32_t const *clus() const { return clus_d.get(); } + uint32_t const *pdigi() const { return pdigi_d.get(); } + uint32_t const *rawIdArr() const { return rawIdArr_d.get(); } uint16_t const *c_xx() const { return xx_d.get(); } uint16_t const *c_yy() const { return yy_d.get(); } uint16_t const *c_adc() const { return adc_d.get(); } uint16_t const *c_moduleInd() const { return moduleInd_d.get(); } + int32_t const *c_clus() const { return clus_d.get(); } + uint32_t const *c_pdigi() const { return pdigi_d.get(); } + uint32_t const *c_rawIdArr() const { return rawIdArr_d.get(); } + + cudautils::host::unique_ptr adcToHostAsync(cuda::stream_t<>& stream) const; + cudautils::host::unique_ptr< int32_t[]> clusToHostAsync(cuda::stream_t<>& stream) const; + cudautils::host::unique_ptr pdigiToHostAsync(cuda::stream_t<>& stream) const; + cudautils::host::unique_ptr rawIdArrToHostAsync(cuda::stream_t<>& stream) const; class DeviceConstView { public: @@ -41,6 +63,7 @@ class SiPixelDigisCUDA { __device__ __forceinline__ uint16_t yy(int i) const { return __ldg(yy_+i); } __device__ __forceinline__ uint16_t adc(int i) const { return __ldg(adc_+i); } __device__ __forceinline__ uint16_t moduleInd(int i) const { return __ldg(moduleInd_+i); } + __device__ __forceinline__ int32_t clus(int i) const { return __ldg(clus_+i); } #endif friend class SiPixelDigisCUDA; @@ -50,16 +73,27 @@ class SiPixelDigisCUDA { uint16_t const *yy_; uint16_t const *adc_; uint16_t const *moduleInd_; + int32_t const *clus_; }; const DeviceConstView *view() const { return view_d.get(); } private: - edm::cuda::device::unique_ptr xx_d; // local coordinates of each pixel - edm::cuda::device::unique_ptr yy_d; // - edm::cuda::device::unique_ptr adc_d; // ADC of each pixel - edm::cuda::device::unique_ptr moduleInd_d; // module id of each pixel - edm::cuda::device::unique_ptr view_d; // "me" pointer + // These are consumed by downstream device code + cudautils::device::unique_ptr xx_d; // local coordinates of each pixel + cudautils::device::unique_ptr yy_d; // + cudautils::device::unique_ptr adc_d; // ADC of each pixel + cudautils::device::unique_ptr moduleInd_d; // module id of each pixel + cudautils::device::unique_ptr clus_d; // cluster id of each pixel + cudautils::device::unique_ptr view_d; // "me" pointer + + // These are for CPU output; should we (eventually) place them to a + // separate product? + cudautils::device::unique_ptr pdigi_d; + cudautils::device::unique_ptr rawIdArr_d; + + uint32_t nModules_h = 0; + uint32_t nDigis_h = 0; }; #endif diff --git a/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc b/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc new file mode 100644 index 0000000000000..92aab1ec9d578 --- /dev/null +++ b/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc @@ -0,0 +1,44 @@ +#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h" + +#include "FWCore/ServiceRegistry/interface/Service.h" +#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" +#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" +#include "HeterogeneousCore/CUDAUtilities/interface/memsetAsync.h" + +SiPixelDigiErrorsCUDA::SiPixelDigiErrorsCUDA(size_t maxFedWords, PixelFormatterErrors errors, cuda::stream_t<>& stream): + formatterErrors_h(std::move(errors)) +{ + edm::Service cs; + + error_d = cs->make_device_unique>(stream); + data_d = cs->make_device_unique(maxFedWords, stream); + + cudautils::memsetAsync(data_d, 0x00, maxFedWords, stream); + + error_h = cs->make_host_unique>(stream); + GPU::make_SimpleVector(error_h.get(), maxFedWords, data_d.get()); + assert(error_h->size() == 0); + assert(error_h->capacity() == static_cast(maxFedWords)); + + cudautils::copyAsync(error_d, error_h, stream); +} + +void SiPixelDigiErrorsCUDA::copyErrorToHostAsync(cuda::stream_t<>& stream) { + cudautils::copyAsync(error_h, error_d, stream); +} + +SiPixelDigiErrorsCUDA::HostDataError SiPixelDigiErrorsCUDA::dataErrorToHostAsync(cuda::stream_t<>& stream) const { + edm::Service cs; + // On one hand size() could be sufficient. On the other hand, if + // someone copies the SimpleVector<>, (s)he might expect the data + // buffer to actually have space for capacity() elements. + auto data = cs->make_host_unique(error_h->capacity(), stream); + + // but transfer only the required amount + if(error_h->size() > 0) { + cudautils::copyAsync(data, data_d, error_h->size(), stream); + } + auto err = *error_h; + err.set_data(data.get()); + return HostDataError(std::move(err), std::move(data)); +} diff --git a/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc b/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc index 7e3d876ac8bdc..ef13ed9612dbf 100644 --- a/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc +++ b/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc @@ -2,24 +2,55 @@ #include "FWCore/ServiceRegistry/interface/Service.h" #include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" +#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" -#include - -SiPixelDigisCUDA::SiPixelDigisCUDA(size_t nelements, cuda::stream_t<>& stream) { +SiPixelDigisCUDA::SiPixelDigisCUDA(size_t maxFedWords, cuda::stream_t<>& stream) { edm::Service cs; - xx_d = cs->make_device_unique(nelements, stream); - yy_d = cs->make_device_unique(nelements, stream); - adc_d = cs->make_device_unique(nelements, stream); - moduleInd_d = cs->make_device_unique(nelements, stream); + xx_d = cs->make_device_unique(maxFedWords, stream); + yy_d = cs->make_device_unique(maxFedWords, stream); + adc_d = cs->make_device_unique(maxFedWords, stream); + moduleInd_d = cs->make_device_unique(maxFedWords, stream); + clus_d = cs->make_device_unique< int32_t[]>(maxFedWords, stream); + + pdigi_d = cs->make_device_unique(maxFedWords, stream); + rawIdArr_d = cs->make_device_unique(maxFedWords, stream); auto view = cs->make_host_unique(stream); view->xx_ = xx_d.get(); view->yy_ = yy_d.get(); view->adc_ = adc_d.get(); view->moduleInd_ = moduleInd_d.get(); + view->clus_ = clus_d.get(); view_d = cs->make_device_unique(stream); - cudaCheck(cudaMemcpyAsync(view_d.get(), view.get(), sizeof(DeviceConstView), cudaMemcpyDefault, stream.id())); + cudautils::copyAsync(view_d, view, stream); +} + +cudautils::host::unique_ptr SiPixelDigisCUDA::adcToHostAsync(cuda::stream_t<>& stream) const { + edm::Service cs; + auto ret = cs->make_host_unique(nDigis(), stream); + cudautils::copyAsync(ret, adc_d, nDigis(), stream); + return ret; +} + +cudautils::host::unique_ptr SiPixelDigisCUDA::clusToHostAsync(cuda::stream_t<>& stream) const { + edm::Service cs; + auto ret = cs->make_host_unique(nDigis(), stream); + cudautils::copyAsync(ret, clus_d, nDigis(), stream); + return ret; +} + +cudautils::host::unique_ptr SiPixelDigisCUDA::pdigiToHostAsync(cuda::stream_t<>& stream) const { + edm::Service cs; + auto ret = cs->make_host_unique(nDigis(), stream); + cudautils::copyAsync(ret, pdigi_d, nDigis(), stream); + return ret; +} + +cudautils::host::unique_ptr SiPixelDigisCUDA::rawIdArrToHostAsync(cuda::stream_t<>& stream) const { + edm::Service cs; + auto ret = cs->make_host_unique(nDigis(), stream); + cudautils::copyAsync(ret, rawIdArr_d, nDigis(), stream); + return ret; } diff --git a/CUDADataFormats/SiPixelDigi/src/classes.h b/CUDADataFormats/SiPixelDigi/src/classes.h new file mode 100644 index 0000000000000..41b135640b883 --- /dev/null +++ b/CUDADataFormats/SiPixelDigi/src/classes.h @@ -0,0 +1,9 @@ +#ifndef CUDADataFormats_SiPixelDigi_classes_h +#define CUDADataFormats_SiPixelDigi_classes_h + +#include "CUDADataFormats/Common/interface/CUDAProduct.h" +#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" +#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h" +#include "DataFormats/Common/interface/Wrapper.h" + +#endif diff --git a/CUDADataFormats/SiPixelDigi/src/classes_def.xml b/CUDADataFormats/SiPixelDigi/src/classes_def.xml new file mode 100644 index 0000000000000..9d6816ed3b14c --- /dev/null +++ b/CUDADataFormats/SiPixelDigi/src/classes_def.xml @@ -0,0 +1,7 @@ + + + + + + + diff --git a/Configuration/StandardSequences/python/RawToDigi_cff.py b/Configuration/StandardSequences/python/RawToDigi_cff.py index 605f9ea4c29bc..7484638f218e0 100644 --- a/Configuration/StandardSequences/python/RawToDigi_cff.py +++ b/Configuration/StandardSequences/python/RawToDigi_cff.py @@ -1,10 +1,9 @@ import FWCore.ParameterSet.Config as cms -from Configuration.ProcessModifiers.gpu_cff import gpu # This object is used to selectively make changes for different running # scenarios. In this case it makes changes for Run 2. -from EventFilter.SiPixelRawToDigi.SiPixelRawToDigi_cfi import * +from EventFilter.SiPixelRawToDigi.siPixelDigis_cff import * from EventFilter.SiStripRawToDigi.SiStripDigis_cfi import * @@ -46,7 +45,7 @@ from EventFilter.CTPPSRawToDigi.ctppsRawToDigi_cff import * RawToDigiTask = cms.Task(L1TRawToDigiTask, - siPixelDigis, + siPixelDigisTask, siStripDigis, ecalDigisTask, ecalPreshowerDigis, @@ -61,10 +60,10 @@ ) RawToDigi = cms.Sequence(RawToDigiTask) -RawToDigiTask_noTk = RawToDigiTask.copyAndExclude([siPixelDigis, siStripDigis]) +RawToDigiTask_noTk = RawToDigiTask.copyAndExclude([siPixelDigisTask, siStripDigis]) RawToDigi_noTk = cms.Sequence(RawToDigiTask_noTk) -RawToDigiTask_pixelOnly = cms.Task(siPixelDigis) +RawToDigiTask_pixelOnly = cms.Task(siPixelDigisTask) RawToDigi_pixelOnly = cms.Sequence(RawToDigiTask_pixelOnly) RawToDigiTask_ecalOnly = cms.Task(ecalDigisTask, ecalPreshowerDigis, scalersRawToDigi) @@ -74,7 +73,7 @@ RawToDigi_hcalOnly = cms.Sequence(RawToDigiTask_hcalOnly) scalersRawToDigi.scalersInputTag = 'rawDataCollector' -(~gpu).toModify(siPixelDigis, InputLabel = 'rawDataCollector') +siPixelDigis.cpu.InputLabel = 'rawDataCollector' ecalDigis.InputLabel = 'rawDataCollector' ecalPreshowerDigis.sourceTag = 'rawDataCollector' hcalDigis.InputLabel = 'rawDataCollector' diff --git a/DataFormats/SiPixelDigi/interface/SiPixelDigisSoA.h b/DataFormats/SiPixelDigi/interface/SiPixelDigisSoA.h new file mode 100644 index 0000000000000..df249a3790cd2 --- /dev/null +++ b/DataFormats/SiPixelDigi/interface/SiPixelDigisSoA.h @@ -0,0 +1,32 @@ +#ifndef DataFormats_SiPixelDigi_interface_SiPixelDigisSoA_h +#define DataFormats_SiPixelDigi_interface_SiPixelDigisSoA_h + +#include +#include + +class SiPixelDigisSoA { +public: + SiPixelDigisSoA() = default; + explicit SiPixelDigisSoA(size_t nDigis, const uint32_t *pdigi, const uint32_t *rawIdArr, const uint16_t *adc, const int32_t *clus); + ~SiPixelDigisSoA() = default; + + auto size() const { return pdigi_.size(); } + + uint32_t pdigi(size_t i) const { return pdigi_[i]; } + uint32_t rawIdArr(size_t i) const { return rawIdArr_[i]; } + uint16_t adc(size_t i) const { return adc_[i]; } + int32_t clus(size_t i) const { return clus_[i]; } + + const std::vector& pdigiVector() const { return pdigi_; } + const std::vector& rawIdArrVector() const { return rawIdArr_; } + const std::vector& adcVector() const { return adc_; } + const std::vector& clusVector() const { return clus_; } + +private: + std::vector pdigi_; + std::vector rawIdArr_; + std::vector adc_; + std::vector clus_; +}; + +#endif diff --git a/DataFormats/SiPixelDigi/src/SiPixelDigisSoA.cc b/DataFormats/SiPixelDigi/src/SiPixelDigisSoA.cc new file mode 100644 index 0000000000000..ebc8ba2055f78 --- /dev/null +++ b/DataFormats/SiPixelDigi/src/SiPixelDigisSoA.cc @@ -0,0 +1,12 @@ +#include "DataFormats/SiPixelDigi/interface/SiPixelDigisSoA.h" + +#include + +SiPixelDigisSoA::SiPixelDigisSoA(size_t nDigis, const uint32_t *pdigi, const uint32_t *rawIdArr, const uint16_t *adc, const int32_t *clus): + pdigi_(pdigi, pdigi+nDigis), + rawIdArr_(rawIdArr, rawIdArr+nDigis), + adc_(adc, adc+nDigis), + clus_(clus, clus+nDigis) +{ + assert(pdigi_.size() == nDigis); +} diff --git a/DataFormats/SiPixelDigi/src/classes.h b/DataFormats/SiPixelDigi/src/classes.h index 2f36b72ca7df8..256ca41ad1867 100644 --- a/DataFormats/SiPixelDigi/src/classes.h +++ b/DataFormats/SiPixelDigi/src/classes.h @@ -5,9 +5,13 @@ #include "DataFormats/SiPixelDigi/interface/PixelDigiCollection.h" #include "DataFormats/SiPixelDigi/interface/SiPixelCalibDigi.h" #include "DataFormats/SiPixelDigi/interface/SiPixelCalibDigiError.h" +#include "DataFormats/SiPixelDigi/interface/SiPixelDigisSoA.h" +#include "DataFormats/SiPixelDigi/interface/SiPixelDigiErrorsSoA.h" #include "DataFormats/Common/interface/Wrapper.h" #include "DataFormats/Common/interface/DetSetVector.h" #include "DataFormats/Common/interface/DetSetVectorNew.h" +#include "boost/cstdint.hpp" #include -#endif // SIPIXELDIGI_CLASSES_H + +#endif // SIPIXELDIGI_CLASSES_H diff --git a/DataFormats/SiPixelDigi/src/classes_def.xml b/DataFormats/SiPixelDigi/src/classes_def.xml index de7779a5c00ea..8cabbd3f3f06e 100755 --- a/DataFormats/SiPixelDigi/src/classes_def.xml +++ b/DataFormats/SiPixelDigi/src/classes_def.xml @@ -49,4 +49,10 @@ + + + + + + diff --git a/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml b/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml index f92aa68373927..4d2b5ebf45542 100644 --- a/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml +++ b/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml @@ -1,4 +1,7 @@ + + + diff --git a/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsFromSoA.cc b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsFromSoA.cc new file mode 100644 index 0000000000000..9e998b92fc403 --- /dev/null +++ b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsFromSoA.cc @@ -0,0 +1,183 @@ +#include "CondFormats/DataRecord/interface/SiPixelFedCablingMapRcd.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingMap.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingTree.h" +#include "DataFormats/Common/interface/DetSetVector.h" +#include "DataFormats/Common/interface/Handle.h" +#include "DataFormats/DetId/interface/DetIdCollection.h" +#include "DataFormats/SiPixelDetId/interface/PixelFEDChannel.h" +#include "DataFormats/SiPixelDigi/interface/PixelDigi.h" +#include "DataFormats/SiPixelDigi/interface/SiPixelDigiErrorsSoA.h" +#include "EventFilter/SiPixelRawToDigi/interface/PixelDataFormatter.h" +#include "FWCore/Framework/interface/ESTransientHandle.h" +#include "FWCore/Framework/interface/ESWatcher.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/Framework/interface/stream/EDProducer.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" + +#include + +class SiPixelDigiErrorsFromSoA: public edm::stream::EDProducer<> { +public: + explicit SiPixelDigiErrorsFromSoA(const edm::ParameterSet& iConfig); + ~SiPixelDigiErrorsFromSoA() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + +private: + void produce(edm::Event& iEvent, const edm::EventSetup& iSetup) override; + + edm::EDGetTokenT digiErrorSoAGetToken_; + + edm::EDPutTokenT> errorPutToken_; + edm::EDPutTokenT tkErrorPutToken_; + edm::EDPutTokenT userErrorPutToken_; + edm::EDPutTokenT> disabledChannelPutToken_; + + edm::ESWatcher cablingWatcher_; + std::unique_ptr cabling_; + const std::string cablingMapLabel_; + + const std::vector tkerrorlist_; + const std::vector usererrorlist_; + + const bool usePhase1_; +}; + +SiPixelDigiErrorsFromSoA::SiPixelDigiErrorsFromSoA(const edm::ParameterSet& iConfig): + digiErrorSoAGetToken_{consumes(iConfig.getParameter("digiErrorSoASrc"))}, + errorPutToken_{produces>()}, + tkErrorPutToken_{produces()}, + userErrorPutToken_{produces("UserErrorModules")}, + disabledChannelPutToken_{produces>()}, + cablingMapLabel_(iConfig.getParameter("CablingMapLabel")), + tkerrorlist_(iConfig.getParameter>("ErrorList")), + usererrorlist_(iConfig.getParameter>("UserErrorList")), + usePhase1_(iConfig.getParameter ("UsePhase1")) +{} + +void SiPixelDigiErrorsFromSoA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("digiErrorSoASrc", edm::InputTag("siPixelDigiErrorsSoA")); + desc.add("CablingMapLabel","")->setComment("CablingMap label"); + desc.add("UsePhase1",false)->setComment("## Use phase1"); + desc.add >("ErrorList", std::vector{29})->setComment("## ErrorList: list of error codes used by tracking to invalidate modules"); + desc.add >("UserErrorList", std::vector{40})->setComment("## UserErrorList: list of error codes used by Pixel experts for investigation"); + descriptions.addWithDefaultLabel(desc); +} + +void SiPixelDigiErrorsFromSoA::produce(edm::Event& iEvent, const edm::EventSetup& iSetup) { + // pack errors into collection + + // initialize cabling map or update if necessary + if (cablingWatcher_.check(iSetup)) { + // cabling map, which maps online address (fed->link->ROC->local pixel) to offline (DetId->global pixel) + edm::ESTransientHandle cablingMap; + iSetup.get().get(cablingMapLabel_, cablingMap); + cabling_ = cablingMap->cablingTree(); + LogDebug("map version:")<< cabling_->version(); + } + + const auto& digiErrors = iEvent.get(digiErrorSoAGetToken_); + + + edm::DetSetVector errorcollection{}; + DetIdCollection tkerror_detidcollection{}; + DetIdCollection usererror_detidcollection{}; + edmNew::DetSetVector disabled_channelcollection{}; + + PixelDataFormatter formatter(cabling_.get(), usePhase1_); // for phase 1 & 0 + const PixelDataFormatter::Errors *formatterErrors = digiErrors.formatterErrors(); + assert(formatterErrors != nullptr); + auto errors = *formatterErrors; // make a copy + PixelDataFormatter::DetErrors nodeterrors; + + auto size = digiErrors.size(); + for (auto i = 0U; i < size; i++) { + PixelErrorCompact err = digiErrors.error(i); + if (err.errorType != 0) { + SiPixelRawDataError error(err.word, err.errorType, err.fedId + 1200); + errors[err.rawId].push_back(error); + } + } + + constexpr uint32_t dummydetid = 0xffffffff; + typedef PixelDataFormatter::Errors::iterator IE; + for (IE is = errors.begin(); is != errors.end(); is++) { + + uint32_t errordetid = is->first; + if (errordetid == dummydetid) {// errors given dummy detId must be sorted by Fed + nodeterrors.insert( nodeterrors.end(), errors[errordetid].begin(), errors[errordetid].end() ); + } + else { + edm::DetSet& errorDetSet = errorcollection.find_or_insert(errordetid); + errorDetSet.data.insert(errorDetSet.data.end(), is->second.begin(), is->second.end()); + // Fill detid of the detectors where there is error AND the error number is listed + // in the configurable error list in the job option cfi. + // Code needs to be here, because there can be a set of errors for each + // entry in the for loop over PixelDataFormatter::Errors + + std::vector disabledChannelsDetSet; + + for (auto const& aPixelError : errorDetSet) { + // For the time being, we extend the error handling functionality with ErrorType 25 + // In the future, we should sort out how the usage of tkerrorlist can be generalized + if (aPixelError.getType() == 25) { + int fedId = aPixelError.getFedId(); + const sipixelobjects::PixelFEDCabling* fed = cabling_->fed(fedId); + if (fed) { + cms_uint32_t linkId = formatter.linkId(aPixelError.getWord32()); + const sipixelobjects::PixelFEDLink* link = fed->link(linkId); + if (link) { + // The "offline" 0..15 numbering is fixed by definition, also, the FrameConversion depends on it + // in contrast, the ROC-in-channel numbering is determined by hardware --> better to use the "offline" scheme + PixelFEDChannel ch = {fed->id(), linkId, 25, 0}; + for (unsigned int iRoc = 1; iRoc <= link->numberOfROCs(); iRoc++) { + const sipixelobjects::PixelROC * roc = link->roc(iRoc); + if (roc->idInDetUnit() < ch.roc_first) ch.roc_first = roc->idInDetUnit(); + if (roc->idInDetUnit() > ch.roc_last) ch.roc_last = roc->idInDetUnit(); + } + if (ch.roc_first& errorDetSet = errorcollection.find_or_insert(dummydetid); + errorDetSet.data = nodeterrors; + + iEvent.emplace(errorPutToken_, std::move(errorcollection)); + iEvent.emplace(tkErrorPutToken_, std::move(tkerror_detidcollection)); + iEvent.emplace(userErrorPutToken_, std::move(usererror_detidcollection)); + iEvent.emplace(disabledChannelPutToken_, std::move(disabled_channelcollection)); +} + +DEFINE_FWK_MODULE(SiPixelDigiErrorsFromSoA); diff --git a/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsSoAFromCUDA.cc b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsSoAFromCUDA.cc new file mode 100644 index 0000000000000..d47542528ed86 --- /dev/null +++ b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsSoAFromCUDA.cc @@ -0,0 +1,75 @@ +#include "CUDADataFormats/Common/interface/CUDAProduct.h" +#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h" +#include "DataFormats/SiPixelDigi/interface/SiPixelDigiErrorsSoA.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/Framework/interface/stream/EDProducer.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "HeterogeneousCore/CUDACore/interface/CUDAScopedContext.h" +#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" + +class SiPixelDigiErrorsSoAFromCUDA: public edm::stream::EDProducer { +public: + explicit SiPixelDigiErrorsSoAFromCUDA(const edm::ParameterSet& iConfig); + ~SiPixelDigiErrorsSoAFromCUDA() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + +private: + void acquire(const edm::Event& iEvent, const edm::EventSetup& iSetup, edm::WaitingTaskWithArenaHolder waitingTaskHolder) override; + void produce(edm::Event& iEvent, const edm::EventSetup& iSetup) override; + + edm::EDGetTokenT> digiErrorGetToken_; + edm::EDPutTokenT digiErrorPutToken_; + + cudautils::host::unique_ptr data_; + GPU::SimpleVector error_; + const PixelFormatterErrors *formatterErrors_ = nullptr; +}; + +SiPixelDigiErrorsSoAFromCUDA::SiPixelDigiErrorsSoAFromCUDA(const edm::ParameterSet& iConfig): + digiErrorGetToken_(consumes>(iConfig.getParameter("src"))), + digiErrorPutToken_(produces()) +{} + +void SiPixelDigiErrorsSoAFromCUDA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("src", edm::InputTag("siPixelClustersCUDA")); + descriptions.addWithDefaultLabel(desc); +} + +void SiPixelDigiErrorsSoAFromCUDA::acquire(const edm::Event& iEvent, const edm::EventSetup& iSetup, edm::WaitingTaskWithArenaHolder waitingTaskHolder) { + // Do the transfer in a CUDA stream parallel to the computation CUDA stream + CUDAScopedContext ctx{iEvent.streamID(), std::move(waitingTaskHolder)}; + + const auto& gpuDigiErrors = ctx.get(iEvent, digiErrorGetToken_); + + auto tmp = gpuDigiErrors.dataErrorToHostAsync(ctx.stream()); + error_ = std::move(tmp.first); + data_ = std::move(tmp.second); + formatterErrors_ = &(gpuDigiErrors.formatterErrors()); +} + +void SiPixelDigiErrorsSoAFromCUDA::produce(edm::Event& iEvent, const edm::EventSetup& iSetup) { + // The following line copies the data from the pinned host memory to + // regular host memory. In principle that feels unnecessary (why not + // just use the pinned host memory?). There are a few arguments for + // doing it though + // - Now can release the pinned host memory back to the (caching) allocator + // * if we'd like to keep the pinned memory, we'd need to also + // keep the CUDA stream around as long as that, or allow pinned + // host memory to be allocated without a CUDA stream + // - What if a CPU algorithm would produce the same SoA? We can't + // use cudaMallocHost without a GPU... + iEvent.emplace(digiErrorPutToken_, error_.size(), error_.data(), formatterErrors_); + + error_ = GPU::make_SimpleVector(0, nullptr); + data_.reset(); + formatterErrors_ = nullptr; +} + +// define as framework plugin +DEFINE_FWK_MODULE(SiPixelDigiErrorsSoAFromCUDA); diff --git a/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigisSoAFromCUDA.cc b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigisSoAFromCUDA.cc new file mode 100644 index 0000000000000..068701f0bcf07 --- /dev/null +++ b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigisSoAFromCUDA.cc @@ -0,0 +1,81 @@ +#include "CUDADataFormats/Common/interface/CUDAProduct.h" +#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" +#include "DataFormats/SiPixelDigi/interface/SiPixelDigisSoA.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/Framework/interface/stream/EDProducer.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "HeterogeneousCore/CUDACore/interface/CUDAScopedContext.h" +#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" + + +class SiPixelDigisSoAFromCUDA: public edm::stream::EDProducer { +public: + explicit SiPixelDigisSoAFromCUDA(const edm::ParameterSet& iConfig); + ~SiPixelDigisSoAFromCUDA() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + +private: + void acquire(const edm::Event& iEvent, const edm::EventSetup& iSetup, edm::WaitingTaskWithArenaHolder waitingTaskHolder) override; + void produce(edm::Event& iEvent, const edm::EventSetup& iSetup) override; + + edm::EDGetTokenT> digiGetToken_; + edm::EDPutTokenT digiPutToken_; + + cudautils::host::unique_ptr pdigi_; + cudautils::host::unique_ptr rawIdArr_; + cudautils::host::unique_ptr adc_; + cudautils::host::unique_ptr< int32_t[]> clus_; + + int nDigis_; +}; + +SiPixelDigisSoAFromCUDA::SiPixelDigisSoAFromCUDA(const edm::ParameterSet& iConfig): + digiGetToken_(consumes>(iConfig.getParameter("src"))), + digiPutToken_(produces()) +{} + +void SiPixelDigisSoAFromCUDA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("src", edm::InputTag("siPixelClustersCUDA")); + descriptions.addWithDefaultLabel(desc); +} + +void SiPixelDigisSoAFromCUDA::acquire(const edm::Event& iEvent, const edm::EventSetup& iSetup, edm::WaitingTaskWithArenaHolder waitingTaskHolder) { + // Do the transfer in a CUDA stream parallel to the computation CUDA stream + CUDAScopedContext ctx{iEvent.streamID(), std::move(waitingTaskHolder)}; + + const auto& gpuDigis = ctx.get(iEvent, digiGetToken_); + + nDigis_ = gpuDigis.nDigis(); + pdigi_ = gpuDigis.pdigiToHostAsync(ctx.stream()); + rawIdArr_ = gpuDigis.rawIdArrToHostAsync(ctx.stream()); + adc_ = gpuDigis.adcToHostAsync(ctx.stream()); + clus_ = gpuDigis.clusToHostAsync(ctx.stream()); +} + +void SiPixelDigisSoAFromCUDA::produce(edm::Event& iEvent, const edm::EventSetup& iSetup) { + // The following line copies the data from the pinned host memory to + // regular host memory. In principle that feels unnecessary (why not + // just use the pinned host memory?). There are a few arguments for + // doing it though + // - Now can release the pinned host memory back to the (caching) allocator + // * if we'd like to keep the pinned memory, we'd need to also + // keep the CUDA stream around as long as that, or allow pinned + // host memory to be allocated without a CUDA stream + // - What if a CPU algorithm would produce the same SoA? We can't + // use cudaMallocHost without a GPU... + iEvent.emplace(digiPutToken_, nDigis_, pdigi_.get(), rawIdArr_.get(), adc_.get(), clus_.get()); + + pdigi_.reset(); + rawIdArr_.reset(); + adc_.reset(); + clus_.reset(); +} + +// define as framework plugin +DEFINE_FWK_MODULE(SiPixelDigisSoAFromCUDA); diff --git a/EventFilter/SiPixelRawToDigi/python/SiPixelRawToDigi_cfi.py b/EventFilter/SiPixelRawToDigi/python/SiPixelRawToDigi_cfi.py index c2479af1f60bd..50c8f0fcabd3c 100644 --- a/EventFilter/SiPixelRawToDigi/python/SiPixelRawToDigi_cfi.py +++ b/EventFilter/SiPixelRawToDigi/python/SiPixelRawToDigi_cfi.py @@ -1,14 +1,24 @@ import FWCore.ParameterSet.Config as cms -import EventFilter.SiPixelRawToDigi.siPixelRawToDigi_cfi +from EventFilter.SiPixelRawToDigi.siPixelRawToDigi_cfi import siPixelRawToDigi as _siPixelRawToDigi -siPixelDigis = EventFilter.SiPixelRawToDigi.siPixelRawToDigi_cfi.siPixelRawToDigi.clone() +from HeterogeneousCore.CUDACore.SwitchProducerCUDA import SwitchProducerCUDA +siPixelDigis = SwitchProducerCUDA( + cpu = _siPixelRawToDigi.clone() +) from Configuration.Eras.Modifier_phase1Pixel_cff import phase1Pixel -phase1Pixel.toModify(siPixelDigis, UsePhase1=True) - -import RecoLocalTracker.SiPixelClusterizer.siPixelDigiHeterogeneousConverter_cfi -_siPixelDigis_gpu = RecoLocalTracker.SiPixelClusterizer.siPixelDigiHeterogeneousConverter_cfi.siPixelDigiHeterogeneousConverter.clone() -_siPixelDigis_gpu.includeErrors = cms.bool(True) +phase1Pixel.toModify(siPixelDigis.cpu, UsePhase1=True) from Configuration.ProcessModifiers.gpu_cff import gpu -gpu.toReplaceWith(siPixelDigis, _siPixelDigis_gpu) +gpu.toModify(siPixelDigis, + cuda = cms.EDAlias( + siPixelDigiErrors = cms.VPSet( + cms.PSet(type = cms.string("DetIdedmEDCollection")), + cms.PSet(type = cms.string("SiPixelRawDataErroredmDetSetVector")), + cms.PSet(type = cms.string("PixelFEDChanneledmNewDetSetVector")) + ), + siPixelDigisClustersPreSplitting = cms.VPSet( + cms.PSet(type = cms.string("PixelDigiedmDetSetVector")) + ) + ) +) diff --git a/EventFilter/SiPixelRawToDigi/python/siPixelDigis_cff.py b/EventFilter/SiPixelRawToDigi/python/siPixelDigis_cff.py new file mode 100644 index 0000000000000..31ba8596bddc6 --- /dev/null +++ b/EventFilter/SiPixelRawToDigi/python/siPixelDigis_cff.py @@ -0,0 +1,30 @@ +import FWCore.ParameterSet.Config as cms + +from EventFilter.SiPixelRawToDigi.SiPixelRawToDigi_cfi import siPixelDigis +from EventFilter.SiPixelRawToDigi.siPixelDigisSoAFromCUDA_cfi import siPixelDigisSoAFromCUDA as _siPixelDigisSoAFromCUDA +from EventFilter.SiPixelRawToDigi.siPixelDigiErrorsSoAFromCUDA_cfi import siPixelDigiErrorsSoAFromCUDA as _siPixelDigiErrorsSoAFromCUDA +from EventFilter.SiPixelRawToDigi.siPixelDigiErrorsFromSoA_cfi import siPixelDigiErrorsFromSoA as _siPixelDigiErrorsFromSoA + +siPixelDigisTask = cms.Task(siPixelDigis) + +siPixelDigisSoA = _siPixelDigisSoAFromCUDA.clone( + src = "siPixelClustersCUDAPreSplitting" +) +siPixelDigiErrorsSoA = _siPixelDigiErrorsSoAFromCUDA.clone( + src = "siPixelClustersCUDAPreSplitting" +) +siPixelDigiErrors = _siPixelDigiErrorsFromSoA.clone() + +from Configuration.Eras.Modifier_phase1Pixel_cff import phase1Pixel +phase1Pixel.toModify(siPixelDigiErrors, UsePhase1=True) + +siPixelDigisTaskCUDA = cms.Task( + siPixelDigisSoA, + siPixelDigiErrorsSoA, + siPixelDigiErrors +) + +from Configuration.ProcessModifiers.gpu_cff import gpu +_siPixelDigisTask_gpu = siPixelDigisTask.copy() +_siPixelDigisTask_gpu.add(siPixelDigisTaskCUDA) +gpu.toReplaceWith(siPixelDigisTask, _siPixelDigisTask_gpu) diff --git a/RecoLocalTracker/Configuration/python/RecoLocalTracker_cff.py b/RecoLocalTracker/Configuration/python/RecoLocalTracker_cff.py index b75e75e000d48..a486a83d178f4 100644 --- a/RecoLocalTracker/Configuration/python/RecoLocalTracker_cff.py +++ b/RecoLocalTracker/Configuration/python/RecoLocalTracker_cff.py @@ -9,11 +9,11 @@ from RecoLocalTracker.SiStripRecHitConverter.StripCPEfromTrackAngle_cfi import * from RecoLocalTracker.SiStripZeroSuppression.SiStripZeroSuppression_cfi import * from RecoLocalTracker.SiStripClusterizer.SiStripClusterizer_cfi import * -from RecoLocalTracker.SiPixelClusterizer.SiPixelClusterizerPreSplitting_cfi import * +from RecoLocalTracker.SiPixelClusterizer.siPixelClustersPreSplitting_cff import * from RecoLocalTracker.SiPixelRecHits.SiPixelRecHits_cfi import * from RecoLocalTracker.SubCollectionProducers.clustersummaryproducer_cfi import * -pixeltrackerlocalrecoTask = cms.Task(siPixelClustersPreSplitting,siPixelRecHitsPreSplitting) +pixeltrackerlocalrecoTask = cms.Task(siPixelClustersPreSplittingTask,siPixelRecHitsPreSplitting) striptrackerlocalrecoTask = cms.Task(siStripZeroSuppression,siStripClusters,siStripMatchedRecHits) trackerlocalrecoTask = cms.Task(pixeltrackerlocalrecoTask,striptrackerlocalrecoTask,clusterSummaryProducer) diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc new file mode 100644 index 0000000000000..4c405a8c85afd --- /dev/null +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc @@ -0,0 +1,158 @@ +#include "DataFormats/Common/interface/DetSetVector.h" +#include "DataFormats/Common/interface/Handle.h" +#include "DataFormats/DetId/interface/DetId.h" +#include "DataFormats/SiPixelCluster/interface/SiPixelCluster.h" +#include "DataFormats/SiPixelDigi/interface/PixelDigi.h" +#include "DataFormats/SiPixelDigi/interface/SiPixelDigisSoA.h" +#include "DataFormats/TrackerCommon/interface/TrackerTopology.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/Framework/interface/global/EDProducer.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "Geometry/Records/interface/TrackerTopologyRcd.h" + +namespace { + struct AccretionCluster { + typedef unsigned short UShort; + static constexpr UShort MAXSIZE = 256; + UShort adc[MAXSIZE]; + UShort x[MAXSIZE]; + UShort y[MAXSIZE]; + UShort xmin=16000; + UShort ymin=16000; + unsigned int isize=0; + int charge=0; + + void clear() { + isize=0; + charge=0; + xmin=16000; + ymin=16000; + } + + bool add(SiPixelCluster::PixelPos const & p, UShort const iadc) { + if (isize==MAXSIZE) return false; + xmin=std::min(xmin,(unsigned short)(p.row())); + ymin=std::min(ymin,(unsigned short)(p.col())); + adc[isize]=iadc; + x[isize]=p.row(); + y[isize++]=p.col(); + charge+=iadc; + return true; + } + }; + + constexpr uint32_t dummydetid = 0xffffffff; +} + +class SiPixelDigisClustersFromSoA: public edm::global::EDProducer<> { +public: + explicit SiPixelDigisClustersFromSoA(const edm::ParameterSet& iConfig); + ~SiPixelDigisClustersFromSoA() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + +private: + void produce(edm::StreamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const override; + + edm::EDGetTokenT digiGetToken_; + + edm::EDPutTokenT> digiPutToken_; + edm::EDPutTokenT clusterPutToken_; + +}; + +SiPixelDigisClustersFromSoA::SiPixelDigisClustersFromSoA(const edm::ParameterSet& iConfig): + digiGetToken_(consumes(iConfig.getParameter("src"))), + digiPutToken_(produces>()), + clusterPutToken_(produces()) +{} + +void SiPixelDigisClustersFromSoA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("src", edm::InputTag("siPixelDigisSoA")); + descriptions.addWithDefaultLabel(desc); +} + +void SiPixelDigisClustersFromSoA::produce(edm::StreamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const { + const auto& digis = iEvent.get(digiGetToken_); + + edm::ESHandle trackerTopologyHandle; + iSetup.get().get(trackerTopologyHandle); + const auto& ttopo = *trackerTopologyHandle; + + auto collection = std::make_unique>(); + auto outputClusters = std::make_unique(); + + const uint32_t nDigis = digis.size(); + edm::DetSet * detDigis=nullptr; + for (uint32_t i = 0; i < nDigis; i++) { + if (digis.pdigi(i)==0) continue; + detDigis = &collection->find_or_insert(digis.rawIdArr(i)); + if ( (*detDigis).empty() ) (*detDigis).data.reserve(32); // avoid the first relocations + break; + } + + int32_t nclus=-1; + std::vector aclusters(1024); + auto totCluseFilled=0; + + auto fillClusters = [&](uint32_t detId){ + if (nclus<0) return; // this in reality should never happen + edmNew::DetSetVector::FastFiller spc(*outputClusters, detId); + auto layer = (DetId(detId).subdetId()==1) ? ttopo.pxbLayer(detId) : 0; + auto clusterThreshold = (layer==1) ? 2000 : 4000; + for (int32_t ic=0; ic9000) continue; // not in cluster; TODO add an assert for the size + assert(digis.rawIdArr(i) > 109999); + if ( (*detDigis).detId() != digis.rawIdArr(i)) + { + fillClusters((*detDigis).detId()); + assert(nclus==-1); + detDigis = &collection->find_or_insert(digis.rawIdArr(i)); + if ( (*detDigis).empty() ) + (*detDigis).data.reserve(32); // avoid the first relocations + else { std::cout << "Problem det present twice in input! " << (*detDigis).detId() << std::endl; } + } + (*detDigis).data.emplace_back(digis.pdigi(i)); + auto const & dig = (*detDigis).data.back(); + // fill clusters + assert(digis.clus(i)>=0); + assert(digis.clus(i)<1024); + nclus = std::max(digis.clus(i),nclus); + auto row = dig.row(); + auto col = dig.column(); + SiPixelCluster::PixelPos pix(row,col); + aclusters[digis.clus(i)].add(pix, digis.adc(i)); + } + + // fill final clusters + fillClusters((*detDigis).detId()); + //std::cout << "filled " << totCluseFilled << " clusters" << std::endl; + + iEvent.put(digiPutToken_, std::move(collection)); + iEvent.put(clusterPutToken_, std::move(outputClusters)); +} + +DEFINE_FWK_MODULE(SiPixelDigisClustersFromSoA); diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc new file mode 100644 index 0000000000000..5dc04009f4832 --- /dev/null +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc @@ -0,0 +1,243 @@ +#include "CUDADataFormats/Common/interface/CUDAProduct.h" +#include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h" +#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" +#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h" +#include "CalibTracker/Records/interface/SiPixelGainCalibrationForHLTGPURcd.h" +#include "CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h" +#include "CondFormats/DataRecord/interface/SiPixelFedCablingMapRcd.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingMap.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingTree.h" +#include "DataFormats/FEDRawData/interface/FEDNumbering.h" +#include "DataFormats/FEDRawData/interface/FEDRawData.h" +#include "DataFormats/FEDRawData/interface/FEDRawDataCollection.h" +#include "EventFilter/SiPixelRawToDigi/interface/PixelDataFormatter.h" +#include "EventFilter/SiPixelRawToDigi/interface/PixelUnpackingRegions.h" +#include "FWCore/Framework/interface/ConsumesCollector.h" +#include "FWCore/Framework/interface/ESHandle.h" +#include "FWCore/Framework/interface/ESTransientHandle.h" +#include "FWCore/Framework/interface/ESWatcher.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/Framework/interface/stream/EDProducer.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "HeterogeneousCore/CUDACore/interface/CUDAScopedContext.h" +#include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelFedCablingMapGPUWrapper.h" +#include "RecoTracker/Record/interface/CkfComponentsRecord.h" + +#include "SiPixelRawToClusterGPUKernel.h" + +#include +#include +#include + +class SiPixelRawToClusterCUDA: public edm::stream::EDProducer { +public: + explicit SiPixelRawToClusterCUDA(const edm::ParameterSet& iConfig); + ~SiPixelRawToClusterCUDA() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + +private: + void acquire(const edm::Event& iEvent, const edm::EventSetup& iSetup, edm::WaitingTaskWithArenaHolder waitingTaskHolder) override; + void produce(edm::Event& iEvent, const edm::EventSetup& iSetup) override; + + edm::EDGetTokenT rawGetToken_; + + edm::EDPutTokenT> digiPutToken_; + edm::EDPutTokenT> digiErrorPutToken_; + edm::EDPutTokenT> clusterPutToken_; + + CUDAContextToken ctxTmp_; + + edm::ESWatcher recordWatcher; + + std::string cablingMapLabel_; + std::unique_ptr cabling_; + std::vector fedIds_; + const SiPixelFedCablingMap *cablingMap_ = nullptr; + std::unique_ptr regions_; + + pixelgpudetails::SiPixelRawToClusterGPUKernel gpuAlgo_; + PixelDataFormatter::Errors errors_; + + const bool includeErrors_; + const bool useQuality_; + const bool usePilotBlade_; + const bool convertADCtoElectrons_; +}; + +SiPixelRawToClusterCUDA::SiPixelRawToClusterCUDA(const edm::ParameterSet& iConfig): + rawGetToken_(consumes(iConfig.getParameter("InputLabel"))), + digiPutToken_(produces>()), + clusterPutToken_(produces>()), + cablingMapLabel_(iConfig.getParameter("CablingMapLabel")), + includeErrors_(iConfig.getParameter("IncludeErrors")), + useQuality_(iConfig.getParameter("UseQualityInfo")), + usePilotBlade_(iConfig.getParameter ("UsePilotBlade")), // Control the usage of pilot-blade data, FED=40 + convertADCtoElectrons_(iConfig.getParameter("ConvertADCtoElectrons")) +{ + if(includeErrors_) { + digiErrorPutToken_ = produces>(); + } + + // regions + if(!iConfig.getParameter("Regions").getParameterNames().empty()) { + regions_ = std::make_unique(iConfig, consumesCollector()); + } + + if(usePilotBlade_) edm::LogInfo("SiPixelRawToCluster") << " Use pilot blade data (FED 40)"; +} + +void SiPixelRawToClusterCUDA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("IncludeErrors",true); + desc.add("UseQualityInfo",false); + desc.add("UsePilotBlade",false)->setComment("## Use pilot blades"); + desc.add("ConvertADCtoElectrons", false)->setComment("## do the calibration ADC-> Electron and apply the threshold, requried for clustering"); + desc.add("InputLabel",edm::InputTag("rawDataCollector")); + { + edm::ParameterSetDescription psd0; + psd0.addOptional>("inputs"); + psd0.addOptional>("deltaPhi"); + psd0.addOptional>("maxZ"); + psd0.addOptional("beamSpot"); + desc.add("Regions",psd0)->setComment("## Empty Regions PSet means complete unpacking"); + } + desc.add("CablingMapLabel","")->setComment("CablingMap label"); //Tav + descriptions.addWithDefaultLabel(desc); +} + + +void SiPixelRawToClusterCUDA::acquire(const edm::Event& iEvent, const edm::EventSetup& iSetup, edm::WaitingTaskWithArenaHolder waitingTaskHolder) { + CUDAScopedContext ctx{iEvent.streamID(), std::move(waitingTaskHolder)}; + + edm::ESHandle hgpuMap; + iSetup.get().get(hgpuMap); + if(hgpuMap->hasQuality() != useQuality_) { + throw cms::Exception("LogicError") << "UseQuality of the module (" << useQuality_ << ") differs the one from SiPixelFedCablingMapGPUWrapper. Please fix your configuration."; + } + // get the GPU product already here so that the async transfer can begin + const auto *gpuMap = hgpuMap->getGPUProductAsync(ctx.stream()); + + edm::ESHandle hgains; + iSetup.get().get(hgains); + // get the GPU product already here so that the async transfer can begin + const auto *gpuGains = hgains->getGPUProductAsync(ctx.stream()); + + cudautils::device::unique_ptr modulesToUnpackRegional; + const unsigned char *gpuModulesToUnpack; + + if(regions_) { + regions_->run(iEvent, iSetup); + LogDebug("SiPixelRawToCluster") << "region2unpack #feds: "<nFEDs(); + LogDebug("SiPixelRawToCluster") << "region2unpack #modules (BPIX,EPIX,total): "<nBarrelModules()<<" "<nForwardModules()<<" "<nModules(); + modulesToUnpackRegional = hgpuMap->getModToUnpRegionalAsync(*(regions_->modulesToUnpack()), ctx.stream()); + gpuModulesToUnpack = modulesToUnpackRegional.get(); + } + else { + gpuModulesToUnpack = hgpuMap->getModToUnpAllAsync(ctx.stream()); + } + + // initialize cabling map or update if necessary + if (recordWatcher.check(iSetup)) { + // cabling map, which maps online address (fed->link->ROC->local pixel) to offline (DetId->global pixel) + edm::ESTransientHandle cablingMap; + iSetup.get().get(cablingMapLabel_, cablingMap); //Tav + cablingMap_ = cablingMap.product(); + fedIds_ = cablingMap->fedIds(); + cabling_ = cablingMap->cablingTree(); + LogDebug("map version:")<< cabling_->version(); + } + + const auto& buffers = iEvent.get(rawGetToken_); + + errors_.clear(); + + // GPU specific: Data extraction for RawToDigi GPU + unsigned int wordCounterGPU = 0; + unsigned int fedCounter = 0; + bool errorsInEvent = false; + + // In CPU algorithm this loop is part of PixelDataFormatter::interpretRawData() + ErrorChecker errorcheck; + auto wordFedAppender = pixelgpudetails::SiPixelRawToClusterGPUKernel::WordFedAppender(ctx.stream()); + for(int fedId: fedIds_) { + if (!usePilotBlade_ && (fedId==40) ) continue; // skip pilot blade data + if (regions_ && !regions_->mayUnpackFED(fedId)) continue; + + // for GPU + // first 150 index stores the fedId and next 150 will store the + // start index of word in that fed + assert(fedId>=1200); + fedCounter++; + + // get event data for this fed + const FEDRawData& rawData = buffers.FEDData( fedId ); + + // GPU specific + int nWords = rawData.size()/sizeof(cms_uint64_t); + if (nWords == 0) { + continue; + } + + // check CRC bit + const cms_uint64_t* trailer = reinterpret_cast(rawData.data())+(nWords-1); + if (not errorcheck.checkCRC(errorsInEvent, fedId, trailer, errors_)) { + continue; + } + + // check headers + const cms_uint64_t* header = reinterpret_cast(rawData.data()); header--; + bool moreHeaders = true; + while (moreHeaders) { + header++; + bool headerStatus = errorcheck.checkHeader(errorsInEvent, fedId, header, errors_); + moreHeaders = headerStatus; + } + + // check trailers + bool moreTrailers = true; + trailer++; + while (moreTrailers) { + trailer--; + bool trailerStatus = errorcheck.checkTrailer(errorsInEvent, fedId, nWords, trailer, errors_); + moreTrailers = trailerStatus; + } + + const cms_uint32_t * bw = (const cms_uint32_t *)(header+1); + const cms_uint32_t * ew = (const cms_uint32_t *)(trailer); + + assert(0 == (ew-bw)%2); + wordFedAppender.initializeWordFed(fedId, wordCounterGPU, bw, (ew-bw)); + wordCounterGPU+=(ew-bw); + + } // end of for loop + + gpuAlgo_.makeClustersAsync(gpuMap, gpuModulesToUnpack, gpuGains, + wordFedAppender, + std::move(errors_), + wordCounterGPU, fedCounter, convertADCtoElectrons_, + useQuality_, includeErrors_, + edm::MessageDrop::instance()->debugEnabled, + ctx.stream()); + + ctxTmp_ = ctx.toToken(); +} + +void SiPixelRawToClusterCUDA::produce(edm::Event& iEvent, const edm::EventSetup& iSetup) { + CUDAScopedContext ctx{std::move(ctxTmp_)}; + + auto tmp = gpuAlgo_.getResults(); + ctx.emplace(iEvent, digiPutToken_, std::move(tmp.first)); + ctx.emplace(iEvent, clusterPutToken_, std::move(tmp.second)); + if(includeErrors_) { + ctx.emplace(iEvent, digiErrorPutToken_, gpuAlgo_.getErrors()); + } +} + +// define as framework plugin +DEFINE_FWK_MODULE(SiPixelRawToClusterCUDA); diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu index 1388ed4852b25..fead8e59a0db3 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu @@ -44,13 +44,8 @@ namespace pixelgpudetails { - // data structures size - constexpr uint32_t vsize = sizeof(GPU::SimpleVector); - constexpr uint32_t esize = sizeof(pixelgpudetails::error_obj); - // number of words for all the FEDs constexpr uint32_t MAX_FED_WORDS = pixelgpudetails::MAX_FED * pixelgpudetails::MAX_WORD; - constexpr uint32_t MAX_ERROR_SIZE = MAX_FED_WORDS * esize; SiPixelRawToClusterGPUKernel::WordFedAppender::WordFedAppender(cuda::stream_t<>& cudaStream) { edm::Service cs; @@ -397,7 +392,7 @@ namespace pixelgpudetails { const uint32_t wordCounter, const uint32_t *word, const uint8_t *fedIds, uint16_t *xx, uint16_t *yy, uint16_t *adc, uint32_t *pdigi, uint32_t *rawIdArr, uint16_t *moduleId, - GPU::SimpleVector *err, + GPU::SimpleVector *err, bool useQualityInfo, bool includeErrors, bool debug) { //if (threadIdx.x==0) printf("Event: %u blockIdx.x: %u start: %u end: %u\n", eventno, blockIdx.x, begin, end); @@ -432,7 +427,7 @@ namespace pixelgpudetails { if (includeErrors and skipROC) { uint32_t rID = getErrRawID(fedId, ww, errorType, cablingMap, debug); - err->push_back(pixelgpudetails::error_obj{rID, ww, errorType, fedId}); + err->push_back(PixelErrorCompact{rID, ww, errorType, fedId}); continue; } @@ -476,7 +471,7 @@ namespace pixelgpudetails { if (includeErrors) { if (not rocRowColIsValid(row, col)) { uint8_t error = conversionError(fedId, 3, debug); //use the device function and fill the arrays - err->push_back(pixelgpudetails::error_obj{rawId, ww, error, fedId}); + err->push_back(PixelErrorCompact{rawId, ww, error, fedId}); if(debug) printf("BPIX1 Error status: %i\n", error); continue; } @@ -491,7 +486,7 @@ namespace pixelgpudetails { localPix.col = col; if (includeErrors and not dcolIsValid(dcol, pxid)) { uint8_t error = conversionError(fedId, 3, debug); - err->push_back(pixelgpudetails::error_obj{rawId, ww, error, fedId}); + err->push_back(PixelErrorCompact{rawId, ww, error, fedId}); if(debug) printf("Error status: %i %d %d %d %d\n", error, dcol, pxid, fedId, roc); continue; } @@ -514,19 +509,22 @@ namespace pixelgpudetails { const unsigned char *modToUnp, const SiPixelGainForHLTonGPU *gains, const WordFedAppender& wordFed, + PixelFormatterErrors&& errors, const uint32_t wordCounter, const uint32_t fedCounter, bool convertADCtoElectrons, - bool useQualityInfo, bool includeErrors, bool transferToCPU, bool debug, + bool useQualityInfo, bool includeErrors, bool debug, cuda::stream_t<>& stream) { nDigis = wordCounter; - constexpr uint32_t MAX_FED_WORDS = pixelgpudetails::MAX_FED * pixelgpudetails::MAX_WORD; - digis_d = SiPixelDigisCUDA(MAX_FED_WORDS, stream); - clusters_d = SiPixelClustersCUDA(MAX_FED_WORDS, gpuClustering::MaxNumModules, stream); + digis_d = SiPixelDigisCUDA(pixelgpudetails::MAX_FED_WORDS, stream); + if(includeErrors) { + digiErrors_d = SiPixelDigiErrorsCUDA(pixelgpudetails::MAX_FED_WORDS, std::move(errors), stream); + } + clusters_d = SiPixelClustersCUDA(gpuClustering::MaxNumModules, stream); edm::Service cs; - digis_clusters_h.nModules_Clusters = cs->make_host_unique(2, stream); + nModules_Clusters_h = cs->make_host_unique(2, stream); { const int threadsPerBlock = 512; @@ -537,20 +535,8 @@ namespace pixelgpudetails { auto word_d = cs->make_device_unique(wordCounter, stream); auto fedId_d = cs->make_device_unique(wordCounter, stream); - auto error_d = cs->make_device_unique>(stream); - auto data_d = cs->make_device_unique(MAX_FED_WORDS, stream); - cudaCheck(cudaMemsetAsync(data_d.get(), 0x00, MAX_ERROR_SIZE, stream.id())); - auto error_h_tmp = cs->make_host_unique>(stream); - GPU::make_SimpleVector(error_h_tmp.get(), MAX_FED_WORDS, data_d.get()); - assert(error_h_tmp->size() == 0); - assert(error_h_tmp->capacity() == static_cast(MAX_FED_WORDS)); - cudaCheck(cudaMemcpyAsync(word_d.get(), wordFed.word(), wordCounter*sizeof(uint32_t), cudaMemcpyDefault, stream.id())); cudaCheck(cudaMemcpyAsync(fedId_d.get(), wordFed.fedId(), wordCounter*sizeof(uint8_t) / 2, cudaMemcpyDefault, stream.id())); - cudaCheck(cudaMemcpyAsync(error_d.get(), error_h_tmp.get(), vsize, cudaMemcpyDefault, stream.id())); - - auto pdigi_d = cs->make_device_unique(wordCounter, stream); - auto rawIdArr_d = cs->make_device_unique(wordCounter, stream); // Launch rawToDigi kernel RawToDigi_kernel<<>>( @@ -560,43 +546,17 @@ namespace pixelgpudetails { word_d.get(), fedId_d.get(), digis_d.xx(), digis_d.yy(), digis_d.adc(), - pdigi_d.get(), - rawIdArr_d.get(), + digis_d.pdigi(), + digis_d.rawIdArr(), digis_d.moduleInd(), - error_d.get(), + digiErrors_d.error(), // returns nullptr if default-constructed useQualityInfo, includeErrors, debug); cudaCheck(cudaGetLastError()); - // copy data to host variable - if(transferToCPU) { - digis_clusters_h.pdigi = cs->make_host_unique(MAX_FED_WORDS, stream); - digis_clusters_h.rawIdArr = cs->make_host_unique(MAX_FED_WORDS, stream); - cudaCheck(cudaMemcpyAsync(digis_clusters_h.pdigi.get(), pdigi_d.get(), wordCounter*sizeof(uint32_t), cudaMemcpyDefault, stream.id())); - cudaCheck(cudaMemcpyAsync(digis_clusters_h.rawIdArr.get(), rawIdArr_d.get(), wordCounter*sizeof(uint32_t), cudaMemcpyDefault, stream.id())); - - if (includeErrors) { - digis_clusters_h.data = cs->make_host_unique(MAX_FED_WORDS, stream); - digis_clusters_h.error = cs->make_host_unique>(stream); - GPU::make_SimpleVector(digis_clusters_h.error.get(), MAX_FED_WORDS, digis_clusters_h.data.get()); - assert(digis_clusters_h.error->size() == 0); - assert(digis_clusters_h.error->capacity() == static_cast(MAX_FED_WORDS)); - - cudaCheck(cudaMemcpyAsync(digis_clusters_h.error.get(), error_d.get(), vsize, cudaMemcpyDefault, stream.id())); - cudaCheck(cudaMemcpyAsync(digis_clusters_h.data.get(), data_d.get(), MAX_ERROR_SIZE, cudaMemcpyDefault, stream.id())); - // If we want to transfer only the minimal amount of data, we - // need a synchronization point. A single ExternalWork (of - // SiPixelRawToClusterHeterogeneous) does not help because it is - // already used to synchronize the data movement. So we'd need - // two ExternalWorks (or explicit use of TBB tasks). The - // prototype of #100 would allow this easily (as there would be - // two ExternalWorks). - // - //cudaCheck(cudaStreamSynchronize(stream.id())); - //int size = digis_clusters_h.error->size(); - //cudaCheck(cudaMemcpyAsync(digis_clusters_h.data.get(), data_d.get(), size*esize, cudaMemcpyDefault, stream.id())); - } + if(includeErrors) { + digiErrors_d.copyErrorToHostAsync(stream); } } // End of Raw2Digi and passing data for cluserisation @@ -614,12 +574,6 @@ namespace pixelgpudetails { wordCounter); cudaCheck(cudaGetLastError()); - // calibrated adc - if(transferToCPU) { - digis_clusters_h.adc = cs->make_host_unique(MAX_FED_WORDS, stream); - cudaCheck(cudaMemcpyAsync(digis_clusters_h.adc.get(), digis_d.adc(), wordCounter*sizeof(uint16_t), cudaMemcpyDefault, stream.id())); - } - #ifdef GPU_DEBUG std::cout << "CUDA countModules kernel launch with " << blocks @@ -628,11 +582,11 @@ namespace pixelgpudetails { cudaCheck(cudaMemsetAsync(clusters_d.moduleStart(), 0x00, sizeof(uint32_t), stream.id())); - countModules<<>>(digis_d.c_moduleInd(), clusters_d.moduleStart(), clusters_d.clus(), wordCounter); + countModules<<>>(digis_d.c_moduleInd(), clusters_d.moduleStart(), digis_d.clus(), wordCounter); cudaCheck(cudaGetLastError()); // read the number of modules into a data member, used by getProduct()) - cudaCheck(cudaMemcpyAsync(&(digis_clusters_h.nModules_Clusters[0]), clusters_d.moduleStart(), sizeof(uint32_t), cudaMemcpyDefault, stream.id())); + cudaCheck(cudaMemcpyAsync(&(nModules_Clusters_h[0]), clusters_d.moduleStart(), sizeof(uint32_t), cudaMemcpyDefault, stream.id())); threadsPerBlock = 256; blocks = MaxNumModules; @@ -646,7 +600,7 @@ namespace pixelgpudetails { digis_d.c_xx(), digis_d.c_yy(), clusters_d.c_moduleStart(), clusters_d.clusInModule(), clusters_d.moduleId(), - clusters_d.clus(), + digis_d.clus(), wordCounter); cudaCheck(cudaGetLastError()); @@ -656,12 +610,11 @@ namespace pixelgpudetails { digis_d.c_adc(), clusters_d.c_moduleStart(), clusters_d.clusInModule(), clusters_d.c_moduleId(), - clusters_d.clus(), + digis_d.clus(), wordCounter); cudaCheck(cudaGetLastError()); - // count the module start indices already here (instead of // rechits) so that the number of clusters/hits can be made // available in the rechit producer without additional points of @@ -681,15 +634,7 @@ namespace pixelgpudetails { clusters_d.c_clusInModule(), &clusters_d.clusModuleStart()[1], gpuClustering::MaxNumModules, stream.id())); // last element holds the number of all clusters - cudaCheck(cudaMemcpyAsync(&(digis_clusters_h.nModules_Clusters[1]), clusters_d.clusModuleStart()+gpuClustering::MaxNumModules, sizeof(uint32_t), cudaMemcpyDefault, stream.id())); - - - // clusters - if(transferToCPU) { - digis_clusters_h.clus = cs->make_host_unique(MAX_FED_WORDS, stream); - cudaCheck(cudaMemcpyAsync(digis_clusters_h.clus.get(), clusters_d.clus(), wordCounter*sizeof(uint32_t), cudaMemcpyDefault, stream.id())); - } + cudaCheck(cudaMemcpyAsync(&(nModules_Clusters_h[1]), clusters_d.clusModuleStart()+gpuClustering::MaxNumModules, sizeof(uint32_t), cudaMemcpyDefault, stream.id())); } // end clusterizer scope } - } diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h index 44bed9abc1e68..1ab8bc3fa5998 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h @@ -5,10 +5,13 @@ #include #include "cuda/api_wrappers.h" -#include "CUDADataFormats/Common/interface/host_unique_ptr.h" +#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" +#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h" +#include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h" #include "FWCore/Utilities/interface/typedefs.h" #include "HeterogeneousCore/CUDAUtilities/interface/GPUSimpleVector.h" -#include "siPixelRawToClusterHeterogeneousProduct.h" +#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" +#include "DataFormats/SiPixelDigi/interface/PixelErrors.h" struct SiPixelFedCablingMapGPU; class SiPixelGainForHLTonGPU; @@ -152,34 +155,8 @@ namespace pixelgpudetails { } - using error_obj = siPixelRawToClusterHeterogeneousProduct::error_obj; - - class SiPixelRawToClusterGPUKernel { public: - - using GPUProduct = siPixelRawToClusterHeterogeneousProduct::GPUProduct; - - struct CPUData { - CPUData() = default; - ~CPUData() = default; - - CPUData(const CPUData&) = delete; - CPUData& operator=(const CPUData&) = delete; - CPUData(CPUData&&) = default; - CPUData& operator=(CPUData&&) = default; - - edm::cuda::host::unique_ptr nModules_Clusters; // These should really be part of the GPU product - - edm::cuda::host::unique_ptr data; - edm::cuda::host::unique_ptr> error; - - edm::cuda::host::unique_ptr pdigi; - edm::cuda::host::unique_ptr rawIdArr; - edm::cuda::host::unique_ptr adc; - edm::cuda::host::unique_ptr clus; - }; - class WordFedAppender { public: WordFedAppender(cuda::stream_t<>& cudaStream); @@ -191,8 +168,8 @@ namespace pixelgpudetails { const unsigned char *fedId() const { return fedId_.get(); } private: - edm::cuda::host::unique_ptr word_; - edm::cuda::host::unique_ptr fedId_; + cudautils::host::unique_ptr word_; + cudautils::host::unique_ptr fedId_; }; SiPixelRawToClusterGPUKernel() = default; @@ -207,62 +184,38 @@ namespace pixelgpudetails { void makeClustersAsync(const SiPixelFedCablingMapGPU *cablingMap, const unsigned char *modToUnp, const SiPixelGainForHLTonGPU *gains, const WordFedAppender& wordFed, + PixelFormatterErrors&& errors, const uint32_t wordCounter, const uint32_t fedCounter, bool convertADCtoElectrons, - bool useQualityInfo, bool includeErrors, bool transferToCPU_, bool debug, + bool useQualityInfo, bool includeErrors, bool debug, cuda::stream_t<>& stream); - siPixelRawToClusterHeterogeneousProduct::GPUProduct getProduct() { - return siPixelRawToClusterHeterogeneousProduct::GPUProduct( - std::move(digis_d), std::move(clusters_d), - nDigis, - digis_clusters_h.nModules_Clusters[0], - digis_clusters_h.nModules_Clusters[1] - ); + std::pair getResults() { + digis_d.setNModulesDigis(nModules_Clusters_h[0], nDigis); + clusters_d.setNClusters(nModules_Clusters_h[1]); + // need to explicitly deallocate while the associated CUDA + // stream is still alive + // + // technically the statement above is not true anymore now that + // the CUDA streams are cached within the CUDAService, but it is + // still better to release as early as possible + nModules_Clusters_h.reset(); + return std::make_pair(std::move(digis_d), std::move(clusters_d)); } - CPUData&& getCPUData() { - // Set the vector data pointer to point to CPU - digis_clusters_h.error->set_data(digis_clusters_h.data.get()); - return std::move(digis_clusters_h); + SiPixelDigiErrorsCUDA&& getErrors() { + return std::move(digiErrors_d); } private: uint32_t nDigis = 0; - // CPU data - CPUData digis_clusters_h; - // Data to be put in the event + cudautils::host::unique_ptr nModules_Clusters_h; SiPixelDigisCUDA digis_d; SiPixelClustersCUDA clusters_d; + SiPixelDigiErrorsCUDA digiErrors_d; }; - // configuration and memory buffers alocated on the GPU - struct context { - uint32_t * word_d; - uint8_t * fedId_d; - uint32_t * pdigi_d; - uint16_t * xx_d; - uint16_t * yy_d; - uint16_t * adc_d; - uint16_t * moduleInd_d; - uint32_t * rawIdArr_d; - - GPU::SimpleVector * error_d; - error_obj * data_d; - - // these are for the clusterizer (to be moved) - uint32_t * moduleStart_d; - int32_t * clus_d; - uint32_t * clusInModule_d; - uint32_t * moduleId_d; - uint32_t * debug_d; - }; - - // void initCablingMap(); - context initDeviceMemory(); - void freeMemory(context &); - // see RecoLocalTracker/SiPixelClusterizer // all are runtime const, should be specified in python _cfg.py struct ADCThreshold { diff --git a/RecoLocalTracker/SiPixelClusterizer/python/SiPixelClusterizerPreSplitting_cfi.py b/RecoLocalTracker/SiPixelClusterizer/python/SiPixelClusterizerPreSplitting_cfi.py index bb0bb85697a99..b9c6862b015bf 100644 --- a/RecoLocalTracker/SiPixelClusterizer/python/SiPixelClusterizerPreSplitting_cfi.py +++ b/RecoLocalTracker/SiPixelClusterizer/python/SiPixelClusterizerPreSplitting_cfi.py @@ -2,10 +2,16 @@ from CondTools.SiPixel.SiPixelGainCalibrationService_cfi import * from RecoLocalTracker.SiPixelClusterizer.SiPixelClusterizer_cfi import siPixelClusters as _siPixelClusters -siPixelClustersPreSplitting = _siPixelClusters.clone() +from HeterogeneousCore.CUDACore.SwitchProducerCUDA import SwitchProducerCUDA +siPixelClustersPreSplitting = SwitchProducerCUDA( + cpu = _siPixelClusters.clone() +) from Configuration.ProcessModifiers.gpu_cff import gpu -from RecoLocalTracker.SiPixelClusterizer.siPixelClustersHeterogeneous_cfi import siPixelClustersHeterogeneous as _siPixelClustersHeterogeneous -from RecoLocalTracker.SiPixelClusterizer.siPixelFedCablingMapGPUWrapper_cfi import * -from CalibTracker.SiPixelESProducers.siPixelGainCalibrationForHLTGPU_cfi import * -gpu.toReplaceWith(siPixelClustersPreSplitting, _siPixelClustersHeterogeneous.clone()) +gpu.toModify(siPixelClustersPreSplitting, + cuda = cms.EDAlias( + siPixelDigisClustersPreSplitting = cms.VPSet( + cms.PSet(type = cms.string("SiPixelClusteredmNewDetSetVector")) + ) + ) +) diff --git a/RecoLocalTracker/SiPixelClusterizer/python/siPixelClustersPreSplitting_cff.py b/RecoLocalTracker/SiPixelClusterizer/python/siPixelClustersPreSplitting_cff.py new file mode 100644 index 0000000000000..c80f3b16b3a43 --- /dev/null +++ b/RecoLocalTracker/SiPixelClusterizer/python/siPixelClustersPreSplitting_cff.py @@ -0,0 +1,21 @@ +import FWCore.ParameterSet.Config as cms + +from RecoLocalTracker.SiPixelClusterizer.SiPixelClusterizerPreSplitting_cfi import siPixelClustersPreSplitting +from RecoLocalTracker.SiPixelClusterizer.siPixelRawToClusterCUDA_cfi import siPixelRawToClusterCUDA as _siPixelRawToClusterCUDA +from RecoLocalTracker.SiPixelClusterizer.siPixelDigisClustersFromSoA_cfi import siPixelDigisClustersFromSoA as _siPixelDigisClustersFromSoA +from RecoLocalTracker.SiPixelClusterizer.siPixelFedCablingMapGPUWrapper_cfi import * +from CalibTracker.SiPixelESProducers.siPixelGainCalibrationForHLTGPU_cfi import * + +siPixelClustersPreSplittingTask = cms.Task(siPixelClustersPreSplitting) + +siPixelClustersCUDAPreSplitting = _siPixelRawToClusterCUDA.clone() +siPixelDigisClustersPreSplitting = _siPixelDigisClustersFromSoA.clone() +siPixelClustersPreSplittingTaskCUDA = cms.Task( + siPixelClustersCUDAPreSplitting, + siPixelDigisClustersPreSplitting, +) + +from Configuration.ProcessModifiers.gpu_cff import gpu +_siPixelClustersPreSplittingTask_gpu = siPixelClustersPreSplittingTask.copy() +_siPixelClustersPreSplittingTask_gpu.add(siPixelClustersPreSplittingTaskCUDA) +gpu.toReplaceWith(siPixelClustersPreSplittingTask, _siPixelClustersPreSplittingTask_gpu) From 9ccfd893e1bb9c0d829310c80d775046c10857d3 Mon Sep 17 00:00:00 2001 From: Matti Kortelainen Date: Fri, 15 Mar 2019 09:26:12 -0500 Subject: [PATCH 065/149] Various updates to pixel track/vertex DQM and MTV (cms-patatrack#285) * Add DQM for pixel vertices * Add pT>0.9GeV pixel track collections to MTV * Add dzPV0p1, Pt0to1, Pt1 variants of pixel track DQM --- Configuration/StandardSequences/python/RawToDigi_cff.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Configuration/StandardSequences/python/RawToDigi_cff.py b/Configuration/StandardSequences/python/RawToDigi_cff.py index 7484638f218e0..dd3bf675faf0d 100644 --- a/Configuration/StandardSequences/python/RawToDigi_cff.py +++ b/Configuration/StandardSequences/python/RawToDigi_cff.py @@ -63,7 +63,7 @@ RawToDigiTask_noTk = RawToDigiTask.copyAndExclude([siPixelDigisTask, siStripDigis]) RawToDigi_noTk = cms.Sequence(RawToDigiTask_noTk) -RawToDigiTask_pixelOnly = cms.Task(siPixelDigisTask) +RawToDigiTask_pixelOnly = cms.Task(siPixelDigisTask, scalersRawToDigi) RawToDigi_pixelOnly = cms.Sequence(RawToDigiTask_pixelOnly) RawToDigiTask_ecalOnly = cms.Task(ecalDigisTask, ecalPreshowerDigis, scalersRawToDigi) From 9dbaa0ba8d3a8d26e0c73bbffa4869c420daf160 Mon Sep 17 00:00:00 2001 From: Vincenzo Innocente Date: Thu, 21 Mar 2019 19:27:12 +0100 Subject: [PATCH 066/149] Make GPU-CPU cluster matching deterministic (cms-patatrack#294) Makes the GPU-CPU cluster matching deterministic by intrusively marking CPU clusters with the cluster index. Reuse existing padding space to store the extra transient field, so that the size of SiPixelCluster does not increase. There is still a warning in case of mismatch of the content of the cluster (based on charge comparison), that can eventually be downgraded to a debug message. Properly rewrite the loop in the RawToDigi_kernel . Remove obsolete code (comments and configuration parameters) in SiPixelRawToClusterCUDA and SiPixelRawToClusterGPUKernel. --- .../SiPixelCluster/interface/SiPixelCluster.h | 229 ++++++++++-------- .../SiPixelCluster/src/classes_def.xml | 1 + .../plugins/SiPixelDigisClustersFromSoA.cc | 1 + .../plugins/SiPixelRawToClusterCUDA.cc | 7 +- .../plugins/SiPixelRawToClusterGPUKernel.cu | 67 +---- .../plugins/SiPixelRawToClusterGPUKernel.h | 2 +- 6 files changed, 139 insertions(+), 168 deletions(-) diff --git a/DataFormats/SiPixelCluster/interface/SiPixelCluster.h b/DataFormats/SiPixelCluster/interface/SiPixelCluster.h index ab4ae1add2132..ba75447e945bb 100644 --- a/DataFormats/SiPixelCluster/interface/SiPixelCluster.h +++ b/DataFormats/SiPixelCluster/interface/SiPixelCluster.h @@ -8,7 +8,7 @@ //! Class to contain and store all the topological information of pixel clusters: //! charge, global size, size and the barycenter in x and y //! local directions. It builds a vector of SiPixel (which is -//! an inner class) and a container of channels. +//! an inner class) and a container of channels. //! //! March 2007: Edge methods moved to RectangularPixelTopology class (V.Chiochia) //! Feb 2008: Modify the Pixel class from float to shorts @@ -21,158 +21,166 @@ #include #include #include +#include class PixelDigi; class SiPixelCluster { public: + class Pixel { public: - constexpr Pixel() : x(0), y(0), adc(0) {} // for root - constexpr Pixel(int pix_x, int pix_y, int pix_adc) : x(pix_x), y(pix_y), adc(pix_adc) {} - uint16_t x; + constexpr Pixel() : x(0), y(0), adc(0){} // for root + constexpr Pixel(int pix_x, int pix_y, int pix_adc) : + x(pix_x), y(pix_y), adc(pix_adc) {} + uint16_t x; uint16_t y; uint16_t adc; }; - + //--- Integer shift in x and y directions. class Shift { public: - constexpr Shift(int dx, int dy) : dx_(dx), dy_(dy) {} + constexpr Shift( int dx, int dy) : dx_(dx), dy_(dy) {} constexpr Shift() : dx_(0), dy_(0) {} - constexpr int dx() const { return dx_; } - constexpr int dy() const { return dy_; } - + constexpr int dx() const { return dx_;} + constexpr int dy() const { return dy_;} private: int dx_; int dy_; }; - + //--- Position of a SiPixel class PixelPos { public: constexpr PixelPos() : row_(0), col_(0) {} - constexpr PixelPos(int row, int col) : row_(row), col_(col) {} - constexpr int row() const { return row_; } - constexpr int col() const { return col_; } - constexpr PixelPos operator+(const Shift& shift) const { return PixelPos(row() + shift.dx(), col() + shift.dy()); } - + constexpr PixelPos(int row, int col) : row_(row) , col_(col) {} + constexpr int row() const { return row_;} + constexpr int col() const { return col_;} + constexpr PixelPos operator+( const Shift& shift) const { + return PixelPos( row() + shift.dx(), col() + shift.dy()); + } private: int row_; int col_; }; - - typedef std::vector::const_iterator PixelDigiIter; - typedef std::pair PixelDigiRange; - - static constexpr unsigned int MAXSPAN = 255; - static constexpr unsigned int MAXPOS = 2047; - + + typedef std::vector::const_iterator PixelDigiIter; + typedef std::pair PixelDigiRange; + + + static constexpr unsigned int MAXSPAN=255; + static constexpr unsigned int MAXPOS=2047; + /** Construct from a range of digis that form a cluster and from * a DetID. The range is assumed to be non-empty. */ - + SiPixelCluster() {} - - SiPixelCluster(unsigned int isize, - uint16_t const* adcs, - uint16_t const* xpos, - uint16_t const* ypos, - uint16_t const xmin, - uint16_t const ymin) - : thePixelOffset(2 * isize), thePixelADC(adcs, adcs + isize) { + + SiPixelCluster(unsigned int isize, uint16_t const * adcs, + uint16_t const * xpos, uint16_t const * ypos, + uint16_t const xmin, uint16_t const ymin) : + thePixelOffset(2*isize), thePixelADC(adcs,adcs+isize) { uint16_t maxCol = 0; uint16_t maxRow = 0; - for (unsigned int i = 0; i != isize; ++i) { - uint16_t xoffset = xpos[i] - xmin; - uint16_t yoffset = ypos[i] - ymin; - thePixelOffset[i * 2] = std::min(uint16_t(MAXSPAN), xoffset); - thePixelOffset[i * 2 + 1] = std::min(uint16_t(MAXSPAN), yoffset); - if (xoffset > maxRow) - maxRow = xoffset; - if (yoffset > maxCol) - maxCol = yoffset; + for (unsigned int i=0; i!=isize; ++i) { + uint16_t xoffset = xpos[i]-xmin; + uint16_t yoffset = ypos[i]-ymin; + thePixelOffset[i*2] = std::min(uint16_t(MAXSPAN),xoffset); + thePixelOffset[i*2+1] = std::min(uint16_t(MAXSPAN),yoffset); + if (xoffset > maxRow) maxRow = xoffset; + if (yoffset > maxCol) maxCol = yoffset; } - packRow(xmin, maxRow); - packCol(ymin, maxCol); + packRow(xmin,maxRow); + packCol(ymin,maxCol); } - + + // obsolete (only for regression tests) - SiPixelCluster(const PixelPos& pix, int adc); - void add(const PixelPos& pix, int adc); - - // Analog linear average position (barycenter) + SiPixelCluster( const PixelPos& pix, int adc); + void add( const PixelPos& pix, int adc); + + // Analog linear average position (barycenter) float x() const { float qm = 0.0; int isize = thePixelADC.size(); - for (int i = 0; i < isize; ++i) - qm += float(thePixelADC[i]) * (thePixelOffset[i * 2] + minPixelRow() + 0.5f); - return qm / charge(); + for (int i=0; i& pixelOffset() const { return thePixelOffset; } - const std::vector& pixelADC() const { return thePixelADC; } - + } // Return total cluster charge. + + inline int minPixelRow() const { return theMinPixelRow;} // The min x index. + inline int maxPixelRow() const { return minPixelRow() + rowSpan();} // The max x index. + inline int minPixelCol() const { return theMinPixelCol;} // The min y index. + inline int maxPixelCol() const { return minPixelCol() + colSpan();} // The max y index. + + + const std::vector & pixelOffset() const { return thePixelOffset;} + const std::vector & pixelADC() const { return thePixelADC;} + // obsolete, use single pixel access below const std::vector pixels() const { std::vector oldPixVector; int isize = thePixelADC.size(); - oldPixVector.reserve(isize); - for (int i = 0; i < isize; ++i) { + oldPixVector.reserve(isize); + for(int i=0; i thePixelOffset; + + std::vector thePixelOffset; std::vector thePixelADC; - - uint16_t theMinPixelRow = MAXPOS; // Minimum pixel index in the x direction (low edge). - uint16_t theMinPixelCol = MAXPOS; // Minimum pixel index in the y direction (left edge). - uint8_t thePixelRowSpan = 0; // Span pixel index in the x direction (low edge). - uint8_t thePixelColSpan = 0; // Span pixel index in the y direction (left edge). - - float err_x = -99999.9f; - float err_y = -99999.9f; + + + uint16_t theMinPixelRow=MAXPOS; // Minimum pixel index in the x direction (low edge). + uint16_t theMinPixelCol=MAXPOS; // Minimum pixel index in the y direction (left edge). + uint8_t thePixelRowSpan=0; // Span pixel index in the x direction (low edge). + uint8_t thePixelColSpan=0; // Span pixel index in the y direction (left edge). + + uint16_t theOriginalClusterId=std::numeric_limits::max(); + + float err_x=-99999.9f; + float err_y=-99999.9f; + }; + // Comparison operators (needed by DetSetVector) -inline bool operator<(const SiPixelCluster& one, const SiPixelCluster& other) { - if (one.minPixelRow() < other.minPixelRow()) { +inline bool operator<( const SiPixelCluster& one, const SiPixelCluster& other) { + if ( one.minPixelRow() < other.minPixelRow() ) { return true; - } else if (one.minPixelRow() > other.minPixelRow()) { + } else if ( one.minPixelRow() > other.minPixelRow() ) { return false; - } else if (one.minPixelCol() < other.minPixelCol()) { + } else if ( one.minPixelCol() < other.minPixelCol() ) { return true; } else { return false; } } + #include "DataFormats/Common/interface/DetSetVector.h" #include "DataFormats/Common/interface/DetSetVectorNew.h" #include "DataFormats/Common/interface/Ref.h" @@ -227,4 +246,4 @@ typedef edm::RefProd SiPixelClusterRefProd; typedef edmNew::DetSetVector SiPixelClusterCollectionNew; typedef edm::Ref SiPixelClusterRefNew; -#endif +#endif diff --git a/DataFormats/SiPixelCluster/src/classes_def.xml b/DataFormats/SiPixelCluster/src/classes_def.xml index 55c9fd8538417..d43f062877eb0 100644 --- a/DataFormats/SiPixelCluster/src/classes_def.xml +++ b/DataFormats/SiPixelCluster/src/classes_def.xml @@ -4,6 +4,7 @@ + diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc index 4c405a8c85afd..2c7da14cf72af 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc @@ -109,6 +109,7 @@ void SiPixelDigisClustersFromSoA::produce(edm::StreamID, edm::Event& iEvent, con auto const & acluster = aclusters[ic]; if ( acluster.charge < clusterThreshold) continue; SiPixelCluster cluster(acluster.isize,acluster.adc, acluster.x,acluster.y, acluster.xmin,acluster.ymin); + cluster.setOriginalId(ic); ++totCluseFilled; // std::cout << "putting in this cluster " << ic << " " << cluster.charge() << " " << cluster.pixelADC().size() << endl; // sort by row (x) diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc index 5dc04009f4832..b23faad9e78d3 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc @@ -67,7 +67,6 @@ class SiPixelRawToClusterCUDA: public edm::stream::EDProducer const bool includeErrors_; const bool useQuality_; const bool usePilotBlade_; - const bool convertADCtoElectrons_; }; SiPixelRawToClusterCUDA::SiPixelRawToClusterCUDA(const edm::ParameterSet& iConfig): @@ -77,8 +76,7 @@ SiPixelRawToClusterCUDA::SiPixelRawToClusterCUDA(const edm::ParameterSet& iConfi cablingMapLabel_(iConfig.getParameter("CablingMapLabel")), includeErrors_(iConfig.getParameter("IncludeErrors")), useQuality_(iConfig.getParameter("UseQualityInfo")), - usePilotBlade_(iConfig.getParameter ("UsePilotBlade")), // Control the usage of pilot-blade data, FED=40 - convertADCtoElectrons_(iConfig.getParameter("ConvertADCtoElectrons")) + usePilotBlade_(iConfig.getParameter ("UsePilotBlade")) // Control the usage of pilot-blade data, FED=40 { if(includeErrors_) { digiErrorPutToken_ = produces>(); @@ -97,7 +95,6 @@ void SiPixelRawToClusterCUDA::fillDescriptions(edm::ConfigurationDescriptions& d desc.add("IncludeErrors",true); desc.add("UseQualityInfo",false); desc.add("UsePilotBlade",false)->setComment("## Use pilot blades"); - desc.add("ConvertADCtoElectrons", false)->setComment("## do the calibration ADC-> Electron and apply the threshold, requried for clustering"); desc.add("InputLabel",edm::InputTag("rawDataCollector")); { edm::ParameterSetDescription psd0; @@ -220,7 +217,7 @@ void SiPixelRawToClusterCUDA::acquire(const edm::Event& iEvent, const edm::Event gpuAlgo_.makeClustersAsync(gpuMap, gpuModulesToUnpack, gpuGains, wordFedAppender, std::move(errors_), - wordCounterGPU, fedCounter, convertADCtoElectrons_, + wordCounterGPU, fedCounter, useQuality_, includeErrors_, edm::MessageDrop::instance()->debugEnabled, ctx.stream()); diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu index fead8e59a0db3..3d4e377eb8221 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu @@ -2,8 +2,6 @@ * * File Name: RawToClusterGPU.cu * Description: It converts Raw data into Digi Format on GPU - * then it converts adc -> electron and - * applies the adc threshold to needed for clustering * Finaly the Output of RawToDigi data is given to pixelClusterizer * **/ @@ -341,51 +339,6 @@ namespace pixelgpudetails { return rID; } - /*---------- - * Name: applyADCthreshold_kernel() - * Desc: converts adc count to electrons and then applies the - * threshold on each channel. - * make pixel to 0 if it is below the threshold - * Input: xx_d[], yy_d[], layer_d[], wordCounter, adc[], ADCThreshold - *----------- - * Output: xx_adc[], yy_adc[] with pixel threshold applied - */ - // kernel to apply adc threshold on the channels - - - // Felice: gains and pedestals are not the same for each pixel. This code should be rewritten to take - // in account local gains/pedestals - // __global__ void applyADCthreshold_kernel(const uint32_t *xx_d, const uint32_t *yy_d, const uint32_t *layer_d, uint32_t *adc, const uint32_t wordCounter, - // const ADCThreshold adcThreshold, uint32_t *xx_adc, uint32_t *yy_adc ) { - // int tid = threadIdx.x; - // int gIndex = blockDim.x*blockIdx.x+tid; - // if (gIndex=adcThreshold.theFirstStack_) { - // if (adcThreshold.theStackADC_==1 && adcOld==1) { - // adcNew = int(255*135); // Arbitrarily use overflow value. - // } - // if (adcThreshold.theStackADC_ >1 && adcThreshold.theStackADC_!=255 && adcOld>=1){ - // adcNew = int((adcOld-1) * gain * 255/float(adcThreshold.theStackADC_-1)); - // } - // } - // - // if (adcNew >adcThreshold.thePixelThreshold ) { - // xx_adc[gIndex]=xx_d[gIndex]; - // yy_adc[gIndex]=yy_d[gIndex]; - // } - // else { - // xx_adc[gIndex]=0; // 0: dead pixel - // yy_adc[gIndex]=0; - // } - // adc[gIndex] = adcNew; - // } - // } - // Kernel to perform Raw to Digi conversion __global__ void RawToDigi_kernel(const SiPixelFedCablingMapGPU *cablingMap, const unsigned char *modToUnp, @@ -397,14 +350,15 @@ namespace pixelgpudetails { { //if (threadIdx.x==0) printf("Event: %u blockIdx.x: %u start: %u end: %u\n", eventno, blockIdx.x, begin, end); - auto gIndex = threadIdx.x + blockIdx.x * blockDim.x; - xx[gIndex] = 0; - yy[gIndex] = 0; - adc[gIndex] = 0; - bool skipROC = false; + int32_t first = threadIdx.x + blockIdx.x*blockDim.x; + for (int32_t iloop=first, nend=wordCounter; iloop& stream) { diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h index 1ab8bc3fa5998..a0f89dc241c64 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h @@ -185,7 +185,7 @@ namespace pixelgpudetails { const SiPixelGainForHLTonGPU *gains, const WordFedAppender& wordFed, PixelFormatterErrors&& errors, - const uint32_t wordCounter, const uint32_t fedCounter, bool convertADCtoElectrons, + const uint32_t wordCounter, const uint32_t fedCounter, bool useQualityInfo, bool includeErrors, bool debug, cuda::stream_t<>& stream); From 18ccfc792b44dd9a902717fa82a7af3848cd2924 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Tue, 9 Apr 2019 14:28:12 +0200 Subject: [PATCH 067/149] Apply code checks (cms-patatrack#315) --- CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc b/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc index 92aab1ec9d578..4d574ff0cd6c1 100644 --- a/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc +++ b/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc @@ -17,7 +17,7 @@ SiPixelDigiErrorsCUDA::SiPixelDigiErrorsCUDA(size_t maxFedWords, PixelFormatterE error_h = cs->make_host_unique>(stream); GPU::make_SimpleVector(error_h.get(), maxFedWords, data_d.get()); - assert(error_h->size() == 0); + assert(error_h->empty()); assert(error_h->capacity() == static_cast(maxFedWords)); cudautils::copyAsync(error_d, error_h, stream); @@ -35,7 +35,7 @@ SiPixelDigiErrorsCUDA::HostDataError SiPixelDigiErrorsCUDA::dataErrorToHostAsync auto data = cs->make_host_unique(error_h->capacity(), stream); // but transfer only the required amount - if(error_h->size() > 0) { + if (not error_h->empty()) { cudautils::copyAsync(data, data_d, error_h->size(), stream); } auto err = *error_h; From 665867895b566b04518eb4b3903f3c4ebf998a71 Mon Sep 17 00:00:00 2001 From: Matti Kortelainen Date: Tue, 23 Apr 2019 08:18:43 -0500 Subject: [PATCH 068/149] Move BeamSpot transfer to GPU to its own producer (cms-patatrack#318) Implement a non-caching host allocator, useful for host-to-device copy buffers: - not bound to any CUDA stream to allow use in EDM beginStream(); - with the possibility to pass flags to cudaHostAlloc(), e.g. cudaHostAllocWriteCombined. Add perfect forwarding overload for CUDAProduct constructor, enabling the use of CUDAScopedContext::emplace() in BeamSpotToCUDA::produce(). Move the BeamSpot host-to-device transfer to its own EDProducer, making use of beginStream()-allocated write-combined memory for the transfer. --- .../plugins/SiPixelRawToClusterCUDA.cc | 13 ++++++++++--- .../plugins/SiPixelRawToClusterGPUKernel.cu | 7 +++---- .../plugins/SiPixelRawToClusterGPUKernel.h | 7 ++++--- .../SiPixelRecHits/plugins/BuildFile.xml | 1 + .../SiPixelRecHits/plugins/gpuPixelRecHits.h | 9 +++++---- 5 files changed, 23 insertions(+), 14 deletions(-) diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc index b23faad9e78d3..f2dacd5fbc415 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc @@ -24,7 +24,9 @@ #include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" #include "FWCore/ParameterSet/interface/ParameterSetDescription.h" #include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ServiceRegistry/interface/Service.h" #include "HeterogeneousCore/CUDACore/interface/CUDAScopedContext.h" +#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" #include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelFedCablingMapGPUWrapper.h" #include "RecoTracker/Record/interface/CkfComponentsRecord.h" @@ -62,6 +64,7 @@ class SiPixelRawToClusterCUDA: public edm::stream::EDProducer std::unique_ptr regions_; pixelgpudetails::SiPixelRawToClusterGPUKernel gpuAlgo_; + std::unique_ptr wordFedAppender_; PixelDataFormatter::Errors errors_; const bool includeErrors_; @@ -88,6 +91,11 @@ SiPixelRawToClusterCUDA::SiPixelRawToClusterCUDA(const edm::ParameterSet& iConfi } if(usePilotBlade_) edm::LogInfo("SiPixelRawToCluster") << " Use pilot blade data (FED 40)"; + + edm::Service cs; + if(cs->enabled()) { + wordFedAppender_ = std::make_unique(); + } } void SiPixelRawToClusterCUDA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { @@ -161,7 +169,6 @@ void SiPixelRawToClusterCUDA::acquire(const edm::Event& iEvent, const edm::Event // In CPU algorithm this loop is part of PixelDataFormatter::interpretRawData() ErrorChecker errorcheck; - auto wordFedAppender = pixelgpudetails::SiPixelRawToClusterGPUKernel::WordFedAppender(ctx.stream()); for(int fedId: fedIds_) { if (!usePilotBlade_ && (fedId==40) ) continue; // skip pilot blade data if (regions_ && !regions_->mayUnpackFED(fedId)) continue; @@ -209,13 +216,13 @@ void SiPixelRawToClusterCUDA::acquire(const edm::Event& iEvent, const edm::Event const cms_uint32_t * ew = (const cms_uint32_t *)(trailer); assert(0 == (ew-bw)%2); - wordFedAppender.initializeWordFed(fedId, wordCounterGPU, bw, (ew-bw)); + wordFedAppender_->initializeWordFed(fedId, wordCounterGPU, bw, (ew-bw)); wordCounterGPU+=(ew-bw); } // end of for loop gpuAlgo_.makeClustersAsync(gpuMap, gpuModulesToUnpack, gpuGains, - wordFedAppender, + *wordFedAppender_, std::move(errors_), wordCounterGPU, fedCounter, useQuality_, includeErrors_, diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu index 3d4e377eb8221..8fdb2ed8c90d5 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu @@ -45,10 +45,9 @@ namespace pixelgpudetails { // number of words for all the FEDs constexpr uint32_t MAX_FED_WORDS = pixelgpudetails::MAX_FED * pixelgpudetails::MAX_WORD; - SiPixelRawToClusterGPUKernel::WordFedAppender::WordFedAppender(cuda::stream_t<>& cudaStream) { - edm::Service cs; - word_ = cs->make_host_unique(MAX_FED_WORDS, cudaStream); - fedId_ = cs->make_host_unique(MAX_FED_WORDS, cudaStream); + SiPixelRawToClusterGPUKernel::WordFedAppender::WordFedAppender() { + word_ = cudautils::make_host_noncached_unique(MAX_FED_WORDS, cudaHostAllocWriteCombined); + fedId_ = cudautils::make_host_noncached_unique(MAX_FED_WORDS, cudaHostAllocWriteCombined); } void SiPixelRawToClusterGPUKernel::WordFedAppender::initializeWordFed(int fedId, unsigned int wordCounterGPU, const cms_uint32_t *src, unsigned int length) { diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h index a0f89dc241c64..0d2b6a8c7fc65 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h @@ -11,6 +11,7 @@ #include "FWCore/Utilities/interface/typedefs.h" #include "HeterogeneousCore/CUDAUtilities/interface/GPUSimpleVector.h" #include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" +#include "HeterogeneousCore/CUDAUtilities/interface/host_noncached_unique_ptr.h" #include "DataFormats/SiPixelDigi/interface/PixelErrors.h" struct SiPixelFedCablingMapGPU; @@ -159,7 +160,7 @@ namespace pixelgpudetails { public: class WordFedAppender { public: - WordFedAppender(cuda::stream_t<>& cudaStream); + WordFedAppender(); ~WordFedAppender() = default; void initializeWordFed(int fedId, unsigned int wordCounterGPU, const cms_uint32_t *src, unsigned int length); @@ -168,8 +169,8 @@ namespace pixelgpudetails { const unsigned char *fedId() const { return fedId_.get(); } private: - cudautils::host::unique_ptr word_; - cudautils::host::unique_ptr fedId_; + cudautils::host::noncached::unique_ptr word_; + cudautils::host::noncached::unique_ptr fedId_; }; SiPixelRawToClusterGPUKernel() = default; diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml b/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml index a8af0c8a7c4f9..27ee3af86e102 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml +++ b/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml @@ -1,3 +1,4 @@ + diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h b/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h index 6864a046bf1dc..cbd354e71143e 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h +++ b/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h @@ -5,6 +5,7 @@ #include #include +#include "CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h" #include "DataFormats/Math/interface/approx_atan2.h" #include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" #include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h" @@ -15,7 +16,7 @@ namespace gpuPixelRecHits { __global__ void getHits(pixelCPEforGPU::ParamsOnGPU const * __restrict__ cpeParams, - float const * __restrict__ bs, + BeamSpotCUDA::Data const * __restrict__ bs, uint16_t const * __restrict__ id, uint16_t const * __restrict__ x, uint16_t const * __restrict__ y, @@ -143,9 +144,9 @@ namespace gpuPixelRecHits { // to global and compute phi... cpeParams->detParams(me).frame.toGlobal(xl[h],yl[h], xg[h],yg[h],zg[h]); // here correct for the beamspot... - xg[h]-=bs[0]; - yg[h]-=bs[1]; - zg[h]-=bs[2]; + xg[h]-=bs->x; + yg[h]-=bs->y; + zg[h]-=bs->z; rg[h] = std::sqrt(xg[h]*xg[h]+yg[h]*yg[h]); iph[h] = unsafe_atan2s<7>(yg[h],xg[h]); From b0ede70c9972a0756f3ec73ba0f62c5ac6bc15c9 Mon Sep 17 00:00:00 2001 From: Vincenzo Innocente Date: Sat, 16 Mar 2019 18:07:59 +0100 Subject: [PATCH 069/149] Make SiPixelGainCalibrationForHLTGPU available on the CPU (cms-patatrack#338) --- .../interface/SiPixelGainCalibrationForHLTGPU.h | 2 ++ .../SiPixelESProducers/src/SiPixelGainCalibrationForHLTGPU.cc | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h b/CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h index 96989c8a2c3b2..85768e37d8b02 100644 --- a/CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h +++ b/CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h @@ -17,6 +17,8 @@ class SiPixelGainCalibrationForHLTGPU { ~SiPixelGainCalibrationForHLTGPU(); const SiPixelGainForHLTonGPU *getGPUProductAsync(cuda::stream_t<>& cudaStream) const; + const SiPixelGainForHLTonGPU *getCPUProduct() const { return gainForHLTonHost_;} + const SiPixelGainCalibrationForHLT *getOriginalProduct() { return gains_;} private: const SiPixelGainCalibrationForHLT *gains_ = nullptr; diff --git a/CalibTracker/SiPixelESProducers/src/SiPixelGainCalibrationForHLTGPU.cc b/CalibTracker/SiPixelESProducers/src/SiPixelGainCalibrationForHLTGPU.cc index 3aef3f44c8f67..59e0d4115583f 100644 --- a/CalibTracker/SiPixelESProducers/src/SiPixelGainCalibrationForHLTGPU.cc +++ b/CalibTracker/SiPixelESProducers/src/SiPixelGainCalibrationForHLTGPU.cc @@ -26,7 +26,7 @@ SiPixelGainCalibrationForHLTGPU::SiPixelGainCalibrationForHLTGPU(const SiPixelGa */ cudaCheck(cudaMallocHost((void**) & gainForHLTonHost_, sizeof(SiPixelGainForHLTonGPU))); - //gainForHLTonHost_->v_pedestals = gainDataOnGPU_; // how to do this? + gainForHLTonHost_->v_pedestals = (SiPixelGainForHLTonGPU_DecodingStructure*)this->gains_->data().data(); // so it can be used on CPU as well... // do not read back from the (possibly write-combined) memory buffer auto minPed = gains.getPedLow(); From 9163a102bd0248567a2076167edef1bf2235b981 Mon Sep 17 00:00:00 2001 From: Vincenzo Innocente Date: Thu, 14 Feb 2019 17:29:35 +0100 Subject: [PATCH 070/149] Rework the GPU pixel track clusterizer and vertex finder (cms-patatrack#338) Add two alternative (faster) track clusterizers: one based on DBSCAN, and one "by density"; use the latter by default. Allow all pixel triplets, but protect the vertex from triplets. Use a larger-then-needed nearest neightbours array to allow for possible duplicate pixels, as a pixel can appear more than once in the same event. Use a separate workspace for temporary data. --- .../plugins/gpuClusterChargeCut.h | 12 +- .../plugins/gpuClustering.h | 45 +- .../SiPixelClusterizer/test/BuildFile.xml | 15 +- .../test/cpuClustering_t.cpp | 1 + .../test/gpuClustering_t.cu | 1 + .../SiPixelClusterizer/test/gpuClustering_t.h | 391 ++++++++++++++++++ 6 files changed, 437 insertions(+), 28 deletions(-) create mode 100644 RecoLocalTracker/SiPixelClusterizer/test/cpuClustering_t.cpp create mode 100644 RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.cu create mode 100644 RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h index 855216960d659..cf94b1f43094b 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h @@ -18,7 +18,7 @@ namespace gpuClustering { uint32_t * __restrict__ nClustersInModule, // modified: number of clusters found in each module uint32_t const * __restrict__ moduleId, // module id of each module int32_t * __restrict__ clusterId, // modified: cluster id of each pixel - int numElements) + uint32_t numElements) { if (blockIdx.x >= moduleStart[0]) @@ -43,12 +43,12 @@ namespace gpuClustering { auto first = firstPixel + threadIdx.x; __shared__ int32_t charge[MaxNumClustersPerModules]; - for (int i=threadIdx.x; ichargeCut ? 1 : 0; } @@ -76,13 +76,13 @@ namespace gpuClustering { __syncthreads(); // mark bad cluster again - for (int i=threadIdx.x; i= numElements) - return; - clusterId[i] = i; - if (InvId == id[i]) - return; - auto j = i - 1; - while (j >= 0 and id[j] == InvId) - --j; - if (j < 0 or id[j] != id[i]) { - // boundary... - auto loc = atomicInc(moduleStart, MaxNumModules); - moduleStart[loc + 1] = i; + int first = blockDim.x * blockIdx.x + threadIdx.x; + for (int i = first; i < numElements; i += gridDim.x*blockDim.x) { + clusterId[i] = i; + if (InvId == id[i]) + continue; + auto j = i - 1; + while (j >= 0 and id[j] == InvId) + --j; + if (j < 0 or id[j] != id[i]) { + // boundary... + auto loc = atomicInc(moduleStart, MaxNumModules); + moduleStart[loc + 1] = i; + } } } @@ -120,15 +120,20 @@ namespace gpuClustering { hist.fill(y[i],i-firstPixel); } +#ifdef __CUDA_ARCH__ // assume that we can cover the whole module with up to 10 blockDim.x-wide iterations constexpr int maxiter = 10; +#else + auto maxiter = hist.size(); +#endif + constexpr int maxNeighbours = 10; // allocate space for duplicate pixels: a pixel can appear more than once with different charge in the same event if (threadIdx.x==0) { assert((hist.size()/ blockDim.x) <= maxiter); } // nearest neighbour - uint16_t nn[maxiter][5]; + uint16_t nn[maxiter][maxNeighbours]; uint8_t nnn[maxiter]; // number of nn - for (int k = 0; k < maxiter; ++k) + for (uint32_t k = 0; k < maxiter; ++k) nnn[k] = 0; __syncthreads(); // for hit filling! @@ -151,7 +156,7 @@ namespace gpuClustering { #endif // fill NN - for (int j=threadIdx.x, k = 0; j 1) continue; auto l = nnn[k]++; - assert(l<5); + assert(l < maxNeighbours); nn[k][l]=*p; } - } + } // for each pixel, look at all the pixels until the end of the module; // when two valid pixels within +/- 1 in x or y are found, set their id to the minimum; @@ -177,7 +182,7 @@ namespace gpuClustering { int nloops=0; while (__syncthreads_or(more)) { if (1==nloops%2) { - for (int j=threadIdx.x, k = 0; j - + @@ -40,7 +40,7 @@ - + @@ -48,3 +48,14 @@ + + + + + + + + + + + diff --git a/RecoLocalTracker/SiPixelClusterizer/test/cpuClustering_t.cpp b/RecoLocalTracker/SiPixelClusterizer/test/cpuClustering_t.cpp new file mode 100644 index 0000000000000..19a3b8d014c9c --- /dev/null +++ b/RecoLocalTracker/SiPixelClusterizer/test/cpuClustering_t.cpp @@ -0,0 +1 @@ +#include "gpuClustering_t.h" diff --git a/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.cu b/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.cu new file mode 100644 index 0000000000000..19a3b8d014c9c --- /dev/null +++ b/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.cu @@ -0,0 +1 @@ +#include "gpuClustering_t.h" diff --git a/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h b/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h new file mode 100644 index 0000000000000..add45a98088c3 --- /dev/null +++ b/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h @@ -0,0 +1,391 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef __CUDACC__ +#include +#include "HeterogeneousCore/CUDAUtilities/interface/exitSansCUDADevices.h" +#endif +#include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h" +#include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h" + +int main(void) +{ +#ifdef __CUDACC__ + exitSansCUDADevices(); + + if (cuda::device::count() == 0) { + std::cerr << "No CUDA devices on this system" << "\n"; + exit(EXIT_FAILURE); + } +#endif + + using namespace gpuClustering; + + int numElements = MaxNumPixels; + // these in reality are already on GPU + auto h_id = std::make_unique(numElements); + auto h_x = std::make_unique(numElements); + auto h_y = std::make_unique(numElements); + auto h_adc = std::make_unique(numElements); + + auto h_clus = std::make_unique(numElements); + +#ifdef __CUDACC__ + auto current_device = cuda::device::current::get(); + auto d_id = cuda::memory::device::make_unique(current_device, numElements); + auto d_x = cuda::memory::device::make_unique(current_device, numElements); + auto d_y = cuda::memory::device::make_unique(current_device, numElements); + auto d_adc = cuda::memory::device::make_unique(current_device, numElements); + + auto d_clus = cuda::memory::device::make_unique(current_device, numElements); + + auto d_moduleStart = cuda::memory::device::make_unique(current_device, MaxNumModules+1); + + auto d_clusInModule = cuda::memory::device::make_unique(current_device, MaxNumModules); + auto d_moduleId = cuda::memory::device::make_unique(current_device, MaxNumModules); +#else + + auto h_moduleStart = std::make_unique(MaxNumModules+1); + auto h_clusInModule = std::make_unique(MaxNumModules); + auto h_moduleId = std::make_unique(MaxNumModules); + +#endif + + // later random number + int n=0; + int ncl=0; + int y[10]={5,7,9,1,3,0,4,8,2,6}; + + auto generateClusters = [&](int kn) { + auto addBigNoise = 1==kn%2; + if (addBigNoise) { + constexpr int MaxPixels = 1000; + int id = 666; + for (int x=0; x<140; x+=3) { + for (int yy=0; yy<400; yy+=3) { + h_id[n]=id; + h_x[n]=x; + h_y[n]=yy; + h_adc[n]=1000; + ++n; ++ncl; + if (MaxPixels<=ncl) break; + } + if (MaxPixels<=ncl) break; + } + } + + { + // isolated + int id = 42; + int x = 10; + ++ncl; + h_id[n]=id; + h_x[n]=x; + h_y[n]=x; + h_adc[n]= kn==0 ? 100 : 5000; + ++n; + + // first column + ++ncl; + h_id[n]=id; + h_x[n]=x; + h_y[n]=0; + h_adc[n]= 5000; + ++n; + // first columns + ++ncl; + h_id[n]=id; + h_x[n]=x+80; + h_y[n]=2; + h_adc[n]= 5000; + ++n; + h_id[n]=id; + h_x[n]=x+80; + h_y[n]=1; + h_adc[n]= 5000; + ++n; + + // last column + ++ncl; + h_id[n]=id; + h_x[n]=x; + h_y[n]=415; + h_adc[n]= 5000; + ++n; + // last columns + ++ncl; + h_id[n]=id; + h_x[n]=x+80; + h_y[n]=415; + h_adc[n]= 2500; + ++n; + h_id[n]=id; + h_x[n]=x+80; + h_y[n]=414; + h_adc[n]= 2500; + ++n; + + // diagonal + ++ncl; + for (int x=20; x<25; ++x) { + h_id[n]=id; + h_x[n]=x; + h_y[n]=x; + h_adc[n]=1000; + ++n; + } + ++ncl; + // reversed + for (int x=45; x>40; --x) { + h_id[n]=id; + h_x[n]=x; + h_y[n]=x; + h_adc[n]=1000; + ++n; + } + ++ncl; + h_id[n++]=InvId; // error + // messy + int xx[5] = {21,25,23,24,22}; + for (int k=0; k<5; ++k) { + h_id[n]=id; + h_x[n]=xx[k]; + h_y[n]=20+xx[k]; + h_adc[n]=1000; + ++n; + } + // holes + ++ncl; + for (int k=0; k<5; ++k) { + h_id[n]=id; + h_x[n]=xx[k]; + h_y[n]=100; + h_adc[n]= kn==2 ? 100 : 1000; + ++n; + if (xx[k]%2==0) { + h_id[n]=id; + h_x[n]=xx[k]; + h_y[n]=101; + h_adc[n]=1000; + ++n; + } + } + } + { + // id == 0 (make sure it works! + int id = 0; + int x = 10; + ++ncl; + h_id[n]=id; + h_x[n]=x; + h_y[n]=x; + h_adc[n]=5000; + ++n; + } + // all odd id + for(int id=11; id<=1800; id+=2) { + if ( (id/20)%2) h_id[n++]=InvId; // error + for (int x=0; x<40; x+=4) { + ++ncl; + if ((id/10)%2) { + for (int k=0; k<10; ++k) { + h_id[n]=id; + h_x[n]=x; + h_y[n]=x+y[k]; + h_adc[n]=100; + ++n; + h_id[n]=id; + h_x[n]=x+1; + h_y[n]=x+y[k]+2; + h_adc[n]=1000; + ++n; + } + } else { + for (int k=0; k<10; ++k) { + h_id[n]=id; + h_x[n]=x; + h_y[n]=x+y[9-k]; + h_adc[n]= kn==2 ? 10 : 1000; + ++n; + if (y[k]==3) continue; // hole + if (id==51) {h_id[n++]=InvId; h_id[n++]=InvId; }// error + h_id[n]=id; + h_x[n]=x+1; + h_y[n]=x+y[k]+2; + h_adc[n]= kn==2 ? 10 : 1000; + ++n; + } + } + } + } + }; // end lambda + for (auto kkk=0; kkk<5; ++kkk) { + n=0; ncl=0; + generateClusters(kkk); + + std::cout << "created " << n << " digis in " << ncl << " clusters" << std::endl; + assert(n<=numElements); + + + uint32_t nModules=0; +#ifdef __CUDACC__ + size_t size32 = n * sizeof(unsigned int); + size_t size16 = n * sizeof(unsigned short); + // size_t size8 = n * sizeof(uint8_t); + + cuda::memory::copy(d_moduleStart.get(),&nModules,sizeof(uint32_t)); + + cuda::memory::copy(d_id.get(), h_id.get(), size16); + cuda::memory::copy(d_x.get(), h_x.get(), size16); + cuda::memory::copy(d_y.get(), h_y.get(), size16); + cuda::memory::copy(d_adc.get(), h_adc.get(), size16); + // Launch CUDA Kernels + int threadsPerBlock = (kkk==5) ? 512 : ((kkk==3) ? 128 : 256); + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout + << "CUDA countModules kernel launch with " << blocksPerGrid + << " blocks of " << threadsPerBlock << " threads\n"; + + cuda::launch( + countModules, + { blocksPerGrid, threadsPerBlock }, + d_id.get(), d_moduleStart.get() ,d_clus.get(),n + ); + + blocksPerGrid = MaxNumModules; //nModules; + + std::cout + << "CUDA findModules kernel launch with " << blocksPerGrid + << " blocks of " << threadsPerBlock << " threads\n"; + + cuda::memory::device::zero(d_clusInModule.get(),MaxNumModules*sizeof(uint32_t)); + + cuda::launch( + findClus, + { blocksPerGrid, threadsPerBlock }, + d_id.get(), d_x.get(), d_y.get(), + d_moduleStart.get(), + d_clusInModule.get(), d_moduleId.get(), + d_clus.get(), + n + ); + cudaDeviceSynchronize(); + + cuda::memory::copy(&nModules,d_moduleStart.get(),sizeof(uint32_t)); + + uint32_t nclus[MaxNumModules], moduleId[nModules]; + + cuda::memory::copy(&nclus,d_clusInModule.get(),MaxNumModules*sizeof(uint32_t)); + + std::cout << "before charge cut found " << std::accumulate(nclus,nclus+MaxNumModules,0) << " clusters" << std::endl; + for (auto i=MaxNumModules; i>0; i--) if (nclus[i-1]>0) {std::cout << "last module is " << i-1 << ' ' << nclus[i-1] << std::endl; break;} + if (ncl!=std::accumulate(nclus,nclus+MaxNumModules,0)) std::cout << "ERROR!!!!! wrong number of cluster found" << std::endl; + + cuda::launch( + clusterChargeCut, + { blocksPerGrid, threadsPerBlock }, + d_id.get(), d_adc.get(), + d_moduleStart.get(), + d_clusInModule.get(), d_moduleId.get(), + d_clus.get(), + n + ); + + + cudaDeviceSynchronize(); +#else + h_moduleStart[0]= nModules; + countModules(h_id.get(), h_moduleStart.get() ,h_clus.get(),n); + memset(h_clusInModule.get(),0,MaxNumModules*sizeof(uint32_t)); + gridDim.x = MaxNumModules; //not needed in the kernel for this specific case; + assert(blockIdx.x==0); + for (;blockIdx.x0; i--) if (nclus[i-1]>0) {std::cout << "last module is " << i-1 << ' ' << nclus[i-1] << std::endl; break;} + if (ncl!=std::accumulate(nclus,nclus+MaxNumModules,0)) std::cout << "ERROR!!!!! wrong number of cluster found" << std::endl; + + gridDim.x = MaxNumModules; // no needed in the kernel for in this specific case + assert(blockIdx.x==0); + for (;blockIdx.x clids; + for (int i=0; i=0); + assert(h_clus[i]0; i--) if (nclus[i-1]>0) {std::cout << "last module is " << i-1 << ' ' << nclus[i-1] << std::endl; break;} + // << " and " << seeds.size() << " seeds" << std::endl; + } /// end loop kkk + return 0; +} From 4c81ba3c6f37808e709b68550bf885b0bc03989b Mon Sep 17 00:00:00 2001 From: Vincenzo Innocente Date: Mon, 25 Feb 2019 11:46:16 +0100 Subject: [PATCH 071/149] Improve pixel doublets and CA, and extend debugging functionality (cms-patatrack#338) Improve pixel doublets and CA: - add pixel cluster size and shape cuts in doublets; - add triplet cleaner; - improved cluster size studies - implement layer-dependent cuts in the CA. Add counters in GPU code and possibility to test full doublet combinatorics. Update python notebook and include z0 resolution. --- .../SiPixelRecHits/interface/pixelCPEforGPU.h | 26 +++++++++++++++++++ .../SiPixelRecHits/plugins/gpuPixelRecHits.h | 9 ++++++- 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h b/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h index fa326865ced73..5776e054fd330 100644 --- a/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h +++ b/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h @@ -79,6 +79,10 @@ namespace pixelCPEforGPU { float xerr[N]; float yerr[N]; + + int16_t xsize[N]; // clipped at 127 if negative is edge.... + int16_t ysize[N]; + }; @@ -173,6 +177,28 @@ namespace pixelCPEforGPU { auto mx = llxl+urxl; auto my = llyl+uryl; + auto xsize = int(urxl)+2-int(llxl); + auto ysize = int(uryl)+2-int(llyl); + assert(xsize>=0); // 0 if bixpix... + assert(ysize>=0); + + if(phase1PixelTopology::isBigPixX(cp.minRow[ic])) ++xsize; + if(phase1PixelTopology::isBigPixX(cp.maxRow[ic])) ++xsize; + if(phase1PixelTopology::isBigPixY(cp.minCol[ic])) ++ysize; + if(phase1PixelTopology::isBigPixY(cp.maxCol[ic])) ++ysize; + + int unbalanceX = 8.*std::abs(float(cp.Q_f_X[ic]-cp.Q_l_X[ic]))/float(cp.Q_f_X[ic]+cp.Q_l_X[ic]); + int unbalanceY = 8.*std::abs(float(cp.Q_f_Y[ic]-cp.Q_l_Y[ic]))/float(cp.Q_f_Y[ic]+cp.Q_l_Y[ic]); + xsize = 8*xsize - unbalanceX; + ysize = 8*ysize - unbalanceY; + + cp.xsize[ic] = std::min(xsize,1023); + cp.ysize[ic] = std::min(ysize,1023); + + if(cp.minRow[ic]==0 || cp.maxRow[ic]==phase1PixelTopology::lastRowInModule) cp.xsize[ic] = -cp.xsize[ic]; + if(cp.minCol[ic]==0 || cp.maxCol[ic]==phase1PixelTopology::lastColInModule) cp.ysize[ic] = -cp.ysize[ic]; + + // apply the lorentz offset correction auto xPos = detParams.shiftX + comParams.thePitchX*(0.5f*float(mx)+float(phase1PixelTopology::xOffset)); auto yPos = detParams.shiftY + comParams.thePitchY*(0.5f*float(my)+float(phase1PixelTopology::yOffset)); diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h b/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h index cbd354e71143e..caf58c0615dbb 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h +++ b/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h @@ -32,7 +32,9 @@ namespace gpuPixelRecHits { float * xg, float * yg, float * zg, float * rg, int16_t * iph, float * xl, float * yl, float * xe, float * ye, - uint16_t * mr, uint16_t * mc) + uint16_t * mr, uint16_t * mc, + int16_t * xs, int16_t * ys + ) { // to be moved in common namespace... @@ -136,10 +138,15 @@ namespace gpuPixelRecHits { xl[h]= clusParams.xpos[ic]; yl[h]= clusParams.ypos[ic]; + xs[h]= clusParams.xsize[ic]; + ys[h]= clusParams.ysize[ic]; + + xe[h]= clusParams.xerr[ic]*clusParams.xerr[ic]; ye[h]= clusParams.yerr[ic]*clusParams.yerr[ic]; mr[h]= clusParams.minRow[ic]; mc[h]= clusParams.minCol[ic]; + // to global and compute phi... cpeParams->detParams(me).frame.toGlobal(xl[h],yl[h], xg[h],yg[h],zg[h]); From f14c0f49a4e8f295fab04a3772074a1d43594a53 Mon Sep 17 00:00:00 2001 From: Vincenzo Innocente Date: Fri, 29 Mar 2019 06:50:23 -0400 Subject: [PATCH 072/149] Migrate the pixel rechits producer and CA to the new heterogeneous framework (cms-patatrack#338) Use cleaned hits. Use pixel layer and ladders geometry, and use pixel triplets in the gaps. Optimise GPU memory usage: - reduce the number of memory allocations - fix the size of the cub workspace - allocate memory per event via the caching allocator - use constant memory for geometry and parameters - use shared memory where the content is the same for every thread Optimise kernel launches, and add a protection for empty events and overflows. --- .../python/RecoLocalTracker_cff.py | 6 +- .../plugins/SiPixelDigisClustersFromSoA.cc | 2 +- .../plugins/SiPixelRawToClusterGPUKernel.cu | 85 +++++++++--- .../plugins/gpuCalibPixel.h | 123 +++++------------- .../SiPixelRecHits/interface/PixelCPEFast.h | 1 + .../SiPixelRecHits/interface/pixelCPEforGPU.h | 22 +++- .../SiPixelRecHits/plugins/BuildFile.xml | 1 + .../plugins/SiPixelRecHitCUDA.cc | 118 +++++++++++++++++ .../SiPixelRecHits/plugins/gpuPixelRecHits.h | 74 ++++++----- .../python/SiPixelRecHits_cfi.py | 33 ++++- .../SiPixelRecHits/src/PixelCPEFast.cc | 49 ++++++- 11 files changed, 353 insertions(+), 161 deletions(-) create mode 100644 RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitCUDA.cc diff --git a/RecoLocalTracker/Configuration/python/RecoLocalTracker_cff.py b/RecoLocalTracker/Configuration/python/RecoLocalTracker_cff.py index a486a83d178f4..08f871e45f8d7 100644 --- a/RecoLocalTracker/Configuration/python/RecoLocalTracker_cff.py +++ b/RecoLocalTracker/Configuration/python/RecoLocalTracker_cff.py @@ -13,7 +13,7 @@ from RecoLocalTracker.SiPixelRecHits.SiPixelRecHits_cfi import * from RecoLocalTracker.SubCollectionProducers.clustersummaryproducer_cfi import * -pixeltrackerlocalrecoTask = cms.Task(siPixelClustersPreSplittingTask,siPixelRecHitsPreSplitting) +pixeltrackerlocalrecoTask = cms.Task(siPixelClustersPreSplittingTask,siPixelRecHitsPreSplittingTask) striptrackerlocalrecoTask = cms.Task(siStripZeroSuppression,siStripClusters,siStripMatchedRecHits) trackerlocalrecoTask = cms.Task(pixeltrackerlocalrecoTask,striptrackerlocalrecoTask,clusterSummaryProducer) @@ -21,10 +21,6 @@ striptrackerlocalreco = cms.Sequence(striptrackerlocalrecoTask) trackerlocalreco = cms.Sequence(trackerlocalrecoTask) -from Configuration.ProcessModifiers.gpu_cff import gpu -from RecoLocalTracker.SiPixelRecHits.siPixelRecHitHeterogeneous_cfi import siPixelRecHitHeterogeneous as _siPixelRecHitHeterogeneous -gpu.toReplaceWith(siPixelRecHitsPreSplitting, _siPixelRecHitHeterogeneous) - from RecoLocalTracker.SiPhase2Clusterizer.phase2TrackerClusterizer_cfi import * from RecoLocalTracker.Phase2TrackerRecHits.Phase2StripCPEGeometricESProducer_cfi import * diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc index 2c7da14cf72af..ba184d766feaf 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc @@ -149,7 +149,7 @@ void SiPixelDigisClustersFromSoA::produce(edm::StreamID, edm::Event& iEvent, con } // fill final clusters - fillClusters((*detDigis).detId()); + if (detDigis) fillClusters((*detDigis).detId()); //std::cout << "filled " << totCluseFilled << " clusters" << std::endl; iEvent.put(digiPutToken_, std::move(collection)); diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu index 8fdb2ed8c90d5..6a832128c1cc2 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu @@ -36,6 +36,7 @@ #include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h" #include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h" #include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelFedCablingMapGPU.h" +#include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" // local includes #include "SiPixelRawToClusterGPUKernel.h" @@ -456,6 +457,54 @@ namespace pixelgpudetails { } // end of Raw to Digi kernel + + __global__ + void fillHitsModuleStart(uint32_t const * __restrict__ cluStart, uint32_t * __restrict__ moduleStart) { + + assert(gpuClustering::MaxNumModules<2048); // easy to extend at least till 32*1024 + assert(1==gridDim.x); + assert(0==blockIdx.x); + + int first = threadIdx.x; + + // limit to MaxHitsInModule; + for (int i=first, iend=gpuClustering::MaxNumModules; i=moduleStart[1023]); + assert(moduleStart[1025]>=moduleStart[1024]); + assert(moduleStart[gpuClustering::MaxNumModules]>=moduleStart[1025]); + + for (int i=first, iend=gpuClustering::MaxNumModules+1; i=moduleStart[i-i]); + // [BPX1, BPX2, BPX3, BPX4, FP1, FP2, FP3, FN1, FN2, FN3, LAST_VALID] + // [ 0, 96, 320, 672, 1184, 1296, 1408, 1520, 1632, 1744, 1856] + if (i==96 || i==1184 || i==1744 || i==gpuClustering::MaxNumModules) printf("moduleStart %d %d\n",i,moduleStart[i]); + } +#endif + + // avoid overflow + constexpr auto MAX_HITS = gpuClustering::MaxNumClusters; + for (int i=first, iend=gpuClustering::MaxNumModules+1; i MAX_HITS) moduleStart[i] = MAX_HITS; + } + } + + // Interface to outside void SiPixelRawToClusterGPUKernel::makeClustersAsync( const SiPixelFedCablingMapGPU *cablingMap, @@ -478,6 +527,7 @@ namespace pixelgpudetails { edm::Service cs; nModules_Clusters_h = cs->make_host_unique(2, stream); + if (wordCounter) // protect in case of empty event.... { const int threadsPerBlock = 512; const int blocks = (wordCounter + threadsPerBlock-1) /threadsPerBlock; // fill it all @@ -511,19 +561,24 @@ namespace pixelgpudetails { digiErrors_d.copyErrorToHostAsync(stream); } } - // End of Raw2Digi and passing data for cluserisation + // End of Raw2Digi and passing data for clustering { // clusterizer ... using namespace gpuClustering; int threadsPerBlock = 256; - int blocks = (wordCounter + threadsPerBlock - 1) / threadsPerBlock; + int blocks = (std::max(int(wordCounter),int(gpuClustering::MaxNumModules)) + threadsPerBlock - 1) / threadsPerBlock; + gpuCalibPixel::calibDigis<<>>( digis_d.moduleInd(), digis_d.c_xx(), digis_d.c_yy(), digis_d.adc(), gains, - wordCounter); + wordCounter, + clusters_d.moduleStart(), + clusters_d.clusInModule(), + clusters_d.clusModuleStart() + ); cudaCheck(cudaGetLastError()); #ifdef GPU_DEBUG @@ -532,8 +587,6 @@ namespace pixelgpudetails { << " blocks of " << threadsPerBlock << " threads\n"; #endif - cudaCheck(cudaMemsetAsync(clusters_d.moduleStart(), 0x00, sizeof(uint32_t), stream.id())); - countModules<<>>(digis_d.c_moduleInd(), clusters_d.moduleStart(), digis_d.clus(), wordCounter); cudaCheck(cudaGetLastError()); @@ -546,7 +599,6 @@ namespace pixelgpudetails { std::cout << "CUDA findClus kernel launch with " << blocks << " blocks of " << threadsPerBlock << " threads\n"; #endif - cudaCheck(cudaMemsetAsync(clusters_d.clusInModule(), 0, (MaxNumModules)*sizeof(uint32_t), stream.id())); findClus<<>>( digis_d.c_moduleInd(), digis_d.c_xx(), digis_d.c_yy(), @@ -567,26 +619,19 @@ namespace pixelgpudetails { cudaCheck(cudaGetLastError()); + // count the module start indices already here (instead of // rechits) so that the number of clusters/hits can be made // available in the rechit producer without additional points of // synchronization/ExternalWork - // - // Temporary storage - size_t tempScanStorageSize = 0; - { - uint32_t *tmp = nullptr; - cudaCheck(cub::DeviceScan::InclusiveSum(nullptr, tempScanStorageSize, tmp, tmp, MaxNumModules)); - } - auto tempScanStorage_d = cs->make_device_unique(tempScanStorageSize, stream); - // Set first the first element to 0 - cudaCheck(cudaMemsetAsync(clusters_d.clusModuleStart(), 0, sizeof(uint32_t), stream.id())); - // Then use inclusive_scan to get the partial sum to the rest - cudaCheck(cub::DeviceScan::InclusiveSum(tempScanStorage_d.get(), tempScanStorageSize, - clusters_d.c_clusInModule(), &clusters_d.clusModuleStart()[1], gpuClustering::MaxNumModules, - stream.id())); + + // MUST be ONE block + fillHitsModuleStart<<<1, 1024, 0, stream.id()>>>(clusters_d.c_clusInModule(),clusters_d.clusModuleStart()); + // last element holds the number of all clusters cudaCheck(cudaMemcpyAsync(&(nModules_Clusters_h[1]), clusters_d.clusModuleStart()+gpuClustering::MaxNumModules, sizeof(uint32_t), cudaMemcpyDefault, stream.id())); + + } // end clusterizer scope } } diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h index 5a681e791f94f..5087516fa009d 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h @@ -5,6 +5,9 @@ #include #include "CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h" + +#include "gpuClusteringConstants.h" + #include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" namespace gpuCalibPixel { @@ -22,104 +25,46 @@ namespace gpuCalibPixel { uint16_t const * __restrict__ y, uint16_t * adc, SiPixelGainForHLTonGPU const * __restrict__ ped, - int numElements + int numElements, + uint32_t * __restrict__ moduleStart, // just to zero first + uint32_t * __restrict__ nClustersInModule, // just to zero them + uint32_t * __restrict__ clusModuleStart // just to zero first ) { - int i = blockDim.x * blockIdx.x + threadIdx.x; - if (i >= numElements) return; - if (InvId==id[i]) return; + int first = blockDim.x * blockIdx.x + threadIdx.x; - float conversionFactor = id[i]<96 ? VCaltoElectronGain_L1 : VCaltoElectronGain; - float offset = id[i]<96 ? VCaltoElectronOffset_L1 : VCaltoElectronOffset; - - bool isDeadColumn=false, isNoisyColumn=false; - - int row = x[i]; - int col = y[i]; - auto ret = ped->getPedAndGain(id[i], col, row, isDeadColumn, isNoisyColumn); - float pedestal = ret.first; float gain = ret.second; - // float pedestal = 0; float gain = 1.; - if ( isDeadColumn | isNoisyColumn ) - { - id[i]=InvId; adc[i] =0; - printf("bad pixel at %d in %d\n",i,id[i]); - } - else { - float vcal = adc[i] * gain - pedestal*gain; - adc[i] = std::max(100, int( vcal * conversionFactor + offset)); + // zero for next kernels... + if (0==first) clusModuleStart[0] = moduleStart[0]=0; + for (int i = first; i < gpuClustering::MaxNumModules; i += gridDim.x*blockDim.x) { + nClustersInModule[i]=0; } - // if (threadIdx.x==0) - // printf ("calibrated %d\n",id[i]); -} - - __global__ void calibADCByModule(uint16_t * id, - uint16_t const * __restrict__ x, - uint16_t const * __restrict__ y, - uint16_t * adc, - uint32_t * moduleStart, - SiPixelGainForHLTonGPU const * __restrict__ ped, - int numElements - ) -{ - - - auto first = moduleStart[1 + blockIdx.x]; - - auto me = id[first]; - - assert(me<2000); + for (int i = first; i < numElements; i += gridDim.x*blockDim.x) { + if (InvId==id[i]) continue; - /// depends on "me" + float conversionFactor = id[i]<96 ? VCaltoElectronGain_L1 : VCaltoElectronGain; + float offset = id[i]<96 ? VCaltoElectronOffset_L1 : VCaltoElectronOffset; - float conversionFactor = me<96 ? VCaltoElectronGain_L1 : VCaltoElectronGain; - float offset = me<96 ? VCaltoElectronOffset_L1 : VCaltoElectronOffset; + bool isDeadColumn=false, isNoisyColumn=false; - -#ifdef GPU_DEBUG - if (me%100==1) - if (threadIdx.x==0) printf("start pixel calibration for module %d in block %d\n",me,blockIdx.x); -#endif - - first+=threadIdx.x; - - // __syncthreads(); - - float pedestal=0,gain=0; - bool isDeadColumn=false, isNoisyColumn=false; - int oldCol=-1, oldAveragedBlock=-1; - - for (int i=first; inumberOfRowsAveragedOver_; // 80.... ( row<80 will be faster...) - if ( (col!=oldCol) | ( averagedBlock != oldAveragedBlock) ) { - oldCol=col; oldAveragedBlock= averagedBlock; - auto ret = ped->getPedAndGain(me,col, row, isDeadColumn, isNoisyColumn); - pedestal = ret.first; gain = ret.second; - } - if ( isDeadColumn | isNoisyColumn ) - { id[i]=InvId; adc[i] =0; } - else { - float vcal = adc[i] * gain - pedestal*gain; - adc[i] = std::max(100, int( vcal * conversionFactor + offset)); - } - } - - __syncthreads(); - //reset start - if(0==threadIdx.x) { - auto & k = moduleStart[1 + blockIdx.x]; - while (id[k]==InvId) ++k; + int row = x[i]; + int col = y[i]; + auto ret = ped->getPedAndGain(id[i], col, row, isDeadColumn, isNoisyColumn); + float pedestal = ret.first; float gain = ret.second; + // float pedestal = 0; float gain = 1.; + if ( isDeadColumn | isNoisyColumn ) + { + id[i]=InvId; adc[i] =0; + printf("bad pixel at %d in %d\n",i,id[i]); + } + else { + float vcal = adc[i] * gain - pedestal*gain; + adc[i] = std::max(100, int( vcal * conversionFactor + offset)); + } } - - - } - - + + } } -#endif // RecoLocalTracker_SiPixelClusterizer_plugins_gpuCalibPixel_h +#endif diff --git a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h index 9b8924988e848..82d71ce19f01e 100644 --- a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h +++ b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h @@ -83,6 +83,7 @@ class PixelCPEFast final : public PixelCPEBase std::vector> m_detParamsGPU; pixelCPEforGPU::CommonParams m_commonParamsGPU; + pixelCPEforGPU::LayerGeometry m_layerGeometry; struct GPUData { ~GPUData(); diff --git a/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h b/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h index 5776e054fd330..c33ccc85b16cb 100644 --- a/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h +++ b/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h @@ -6,9 +6,11 @@ #include #include +#include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" #include "DataFormats/GeometrySurface/interface/SOARotation.h" #include "Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h" #include "HeterogeneousCore/CUDAUtilities/interface/cuda_cxx17.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h" namespace pixelCPEforGPU { @@ -43,9 +45,16 @@ namespace pixelCPEforGPU { }; - struct ParamsOnGPU { + struct LayerGeometry { + uint32_t layerStart[phase1PixelTopology::numberOfLayers + 1]; + uint8_t layer[phase1PixelTopology::layerIndexSize]; + }; + + struct ParamsOnGPU { + CommonParams * m_commonParams; DetParams * m_detParams; + LayerGeometry * m_layerGeometry; constexpr CommonParams const & __restrict__ commonParams() const { @@ -57,6 +66,13 @@ namespace pixelCPEforGPU { DetParams const * __restrict__ l = m_detParams; return l[i]; } + constexpr + LayerGeometry const & __restrict__ layerGeometry() const { + return *m_layerGeometry; + } + + __device__ uint8_t layer(uint16_t id) const { return __ldg(m_layerGeometry->layer+id/phase1PixelTopology::maxModuleStride);}; + }; // SOA (on device) @@ -86,8 +102,8 @@ namespace pixelCPEforGPU { }; - constexpr uint32_t MaxClusInModule=256; - using ClusParams = ClusParamsT<256>; + constexpr uint32_t MaxHitsInModule = gpuClustering::MaxHitsInModule; + using ClusParams = ClusParamsT; constexpr inline void computeAnglesFromDet(DetParams const & __restrict__ detParams, float const x, float const y, float & cotalpha, float & cotbeta) { diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml b/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml index 27ee3af86e102..9385896a5e287 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml +++ b/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml @@ -1,4 +1,5 @@ + diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitCUDA.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitCUDA.cc new file mode 100644 index 0000000000000..7e17f6c029ac2 --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitCUDA.cc @@ -0,0 +1,118 @@ +#include "CUDADataFormats/Common/interface/CUDAProduct.h" +#include "CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h" +#include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h" +#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" +#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DCUDA.h" + + +#include "DataFormats/Common/interface/Handle.h" +#include "FWCore/Framework/interface/ESHandle.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/Utilities/interface/InputTag.h" +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" + +#include "FWCore/Framework/interface/global/EDProducer.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" + +#include "CUDADataFormats/Common/interface/CUDAProduct.h" +#include "HeterogeneousCore/CUDACore/interface/CUDAScopedContext.h" + +#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h" +#include "RecoLocalTracker/Records/interface/TkPixelCPERecord.h" + +#include "PixelRecHits.h" // TODO : spit product from kernel + +#include + +class SiPixelRecHitCUDA : public edm::global::EDProducer<> { + +public: + + + explicit SiPixelRecHitCUDA(const edm::ParameterSet& iConfig); + ~SiPixelRecHitCUDA() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + +private: + + void produce(edm::StreamID streamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const override; + + // The mess with inputs will be cleaned up when migrating to the new framework + edm::EDGetTokenT> tBeamSpot; + edm::EDGetTokenT> token_; + edm::EDGetTokenT> tokenDigi_; + + edm::EDPutTokenT> tokenHit_; + + std::string cpeName_; + + pixelgpudetails::PixelRecHitGPUKernel gpuAlgo_; + +}; + +SiPixelRecHitCUDA::SiPixelRecHitCUDA(const edm::ParameterSet& iConfig): + tBeamSpot(consumes>(iConfig.getParameter("beamSpot"))), + token_(consumes>(iConfig.getParameter("src"))), + tokenDigi_(consumes>(iConfig.getParameter("src"))), + tokenHit_(produces>()), + cpeName_(iConfig.getParameter("CPE")) +{} + +void SiPixelRecHitCUDA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + + desc.add("beamSpot", edm::InputTag("offlineBeamSpotCUDA")); + desc.add("src", edm::InputTag("siPixelClustersCUDAPreSplitting")); + desc.add("CPE", "PixelCPEFast"); + descriptions.add("siPixelRecHitCUDA",desc); +} + + +void SiPixelRecHitCUDA::produce(edm::StreamID streamID, edm::Event& iEvent, const edm::EventSetup& es) const { + + // const TrackerGeometry *geom_ = nullptr; + const PixelClusterParameterEstimator *cpe_ = nullptr; + + /* + edm::ESHandle geom; + es.get().get( geom ); + geom_ = geom.product(); + */ + + edm::ESHandle hCPE; + es.get().get(cpeName_, hCPE); + cpe_ = dynamic_cast< const PixelCPEBase* >(hCPE.product()); + + PixelCPEFast const * fcpe = dynamic_cast(cpe_); + if (!fcpe) { + throw cms::Exception("Configuration") << "too bad, not a fast cpe gpu processing not possible...."; + } + + edm::Handle> hclusters; + iEvent.getByToken(token_, hclusters); + + CUDAScopedContext ctx{*hclusters}; + auto const& clusters = ctx.get(*hclusters); + + edm::Handle> hdigis; + iEvent.getByToken(tokenDigi_, hdigis); + auto const& digis = ctx.get(*hdigis); + + edm::Handle> hbs; + iEvent.getByToken(tBeamSpot, hbs); + auto const& bs = ctx.get(*hbs); + + ctx.emplace(iEvent,tokenHit_, + std::move( + gpuAlgo_.makeHitsAsync(digis, clusters, bs, fcpe->getGPUProductAsync(ctx.stream()), ctx.stream()) + )); +} + +DEFINE_FWK_MODULE(SiPixelRecHitCUDA); diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h b/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h index caf58c0615dbb..2874df10c16c3 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h +++ b/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h @@ -6,15 +6,13 @@ #include #include "CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h" +#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DCUDA.h" #include "DataFormats/Math/interface/approx_atan2.h" #include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" #include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h" - namespace gpuPixelRecHits { - - __global__ void getHits(pixelCPEforGPU::ParamsOnGPU const * __restrict__ cpeParams, BeamSpotCUDA::Data const * __restrict__ bs, uint16_t const * __restrict__ id, @@ -27,19 +25,15 @@ namespace gpuPixelRecHits { int32_t const * __restrict__ clus, int numElements, uint32_t const * __restrict__ hitsModuleStart, - int32_t * chargeh, - uint16_t * detInd, - float * xg, float * yg, float * zg, float * rg, int16_t * iph, - float * xl, float * yl, - float * xe, float * ye, - uint16_t * mr, uint16_t * mc, - int16_t * xs, int16_t * ys - ) - { + TrackingRecHit2DSOAView * phits + ) +{ + + auto & hits = *phits; // to be moved in common namespace... constexpr uint16_t InvId=9999; // must be > MaxNumModules - constexpr uint32_t MaxClusInModule = pixelCPEforGPU::MaxClusInModule; + constexpr uint32_t MaxHitsInModule = pixelCPEforGPU::MaxHitsInModule; using ClusParams = pixelCPEforGPU::ClusParams; @@ -66,14 +60,14 @@ namespace gpuPixelRecHits { if (threadIdx.x==0) printf("hitbuilder: %d clusters in module %d. will write at %d\n", nclus, me, hitsModuleStart[me]); #endif - assert(blockDim.x >= MaxClusInModule); + assert(blockDim.x >= MaxHitsInModule); - if (threadIdx.x==0 && nclus > MaxClusInModule) { - printf("WARNING: too many clusters %d in Module %d. Only first %d processed\n", nclus,me,MaxClusInModule); + if (threadIdx.x==0 && nclus > MaxHitsInModule) { + printf("WARNING: too many clusters %d in Module %d. Only first %d processed\n", nclus,me,MaxHitsInModule); // zero charge: do not bother to do it in parallel - for (auto d=MaxClusInModule; d= nclus) return; first = hitsModuleStart[me]; auto h = first+ic; // output index in global memory - assert(h < 2000*256); + if (h >= TrackingRecHit2DSOAView::maxHits()) return; // overflow... pixelCPEforGPU::position(cpeParams->commonParams(), cpeParams->detParams(me), clusParams, ic); pixelCPEforGPU::errorFromDB(cpeParams->commonParams(), cpeParams->detParams(me), clusParams, ic); - chargeh[h] = clusParams.charge[ic]; - detInd[h] = me; + // store it + + hits.charge(h) = clusParams.charge[ic]; - xl[h]= clusParams.xpos[ic]; - yl[h]= clusParams.ypos[ic]; + hits.detectorIndex(h) = me; - xs[h]= clusParams.xsize[ic]; - ys[h]= clusParams.ysize[ic]; + float xl,yl; + hits.xLocal(h) = xl = clusParams.xpos[ic]; + hits.yLocal(h) = yl = clusParams.ypos[ic]; + hits.clusterSizeX(h) = clusParams.xsize[ic]; + hits.clusterSizeY(h) = clusParams.ysize[ic]; - xe[h]= clusParams.xerr[ic]*clusParams.xerr[ic]; - ye[h]= clusParams.yerr[ic]*clusParams.yerr[ic]; - mr[h]= clusParams.minRow[ic]; - mc[h]= clusParams.minCol[ic]; - + hits.xerrLocal(h) = clusParams.xerr[ic]*clusParams.xerr[ic]; + hits.yerrLocal(h) = clusParams.yerr[ic]*clusParams.yerr[ic]; + + // keep it local for computations + float xg,yg,zg; // to global and compute phi... - cpeParams->detParams(me).frame.toGlobal(xl[h],yl[h], xg[h],yg[h],zg[h]); + cpeParams->detParams(me).frame.toGlobal(xl,yl, xg,yg,zg); // here correct for the beamspot... - xg[h]-=bs->x; - yg[h]-=bs->y; - zg[h]-=bs->z; + xg-=bs->x; + yg-=bs->y; + zg-=bs->z; + + hits.xGlobal(h) = xg; + hits.yGlobal(h) = yg; + hits.zGlobal(h) = zg; - rg[h] = std::sqrt(xg[h]*xg[h]+yg[h]*yg[h]); - iph[h] = unsafe_atan2s<7>(yg[h],xg[h]); + hits.rGlobal(h) = std::sqrt(xg*xg+yg*yg); + hits.iphi(h) = unsafe_atan2s<7>(yg,xg); } diff --git a/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py b/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py index 465aa0bb346ce..8995471470f37 100644 --- a/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py +++ b/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py @@ -6,6 +6,37 @@ VerboseLevel = cms.untracked.int32(0) ) -siPixelRecHitsPreSplitting = siPixelRecHits.clone( +_siPixelRecHitsPreSplitting = siPixelRecHits.clone( src = 'siPixelClustersPreSplitting' ) + +from HeterogeneousCore.CUDACore.SwitchProducerCUDA import SwitchProducerCUDA +siPixelRecHitsPreSplitting = SwitchProducerCUDA( + cpu = _siPixelRecHitsPreSplitting.clone() +) + + + +from Configuration.ProcessModifiers.gpu_cff import gpu +from RecoLocalTracker.SiPixelRecHits.siPixelRecHitCUDA_cfi import siPixelRecHitCUDA as _siPixelRecHitCUDA +from RecoLocalTracker.SiPixelRecHits.siPixelRecHitFromSOA_cfi import siPixelRecHitFromSOA as _siPixelRecHitFromSOA + +gpu.toModify(siPixelRecHitsPreSplitting, + cuda = _siPixelRecHitFromSOA.clone() +) + + +siPixelRecHitsPreSplittingTask = cms.Task(siPixelRecHitsPreSplitting) + +siPixelRecHitsCUDAPreSplitting = _siPixelRecHitCUDA.clone() +siPixelRecHitsLegacyPreSplitting = _siPixelRecHitFromSOA.clone() +siPixelRecHitsPreSplittingTaskCUDA = cms.Task( + siPixelRecHitsCUDAPreSplitting, + siPixelRecHitsLegacyPreSplitting, +) + +from Configuration.ProcessModifiers.gpu_cff import gpu +_siPixelRecHitsPreSplittingTask_gpu = siPixelRecHitsPreSplittingTask.copy() +_siPixelRecHitsPreSplittingTask_gpu.add(siPixelRecHitsPreSplittingTaskCUDA) +gpu.toReplaceWith(siPixelRecHitsPreSplittingTask, _siPixelRecHitsPreSplittingTask_gpu) + diff --git a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc index eb51dd5a2eaeb..1111e4866a8d2 100644 --- a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc +++ b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc @@ -22,6 +22,8 @@ namespace { constexpr float micronsToCm = 1.0e-4; } + + //----------------------------------------------------------------------------- //! The constructor. //----------------------------------------------------------------------------- @@ -68,13 +70,16 @@ PixelCPEFast::PixelCPEFast(edm::ParameterSet const & conf, const pixelCPEforGPU::ParamsOnGPU *PixelCPEFast::getGPUProductAsync(cuda::stream_t<>& cudaStream) const { const auto& data = gpuData_.dataForCurrentDeviceAsync(cudaStream, [this](GPUData& data, cuda::stream_t<>& stream) { + // and now copy to device... cudaCheck(cudaMalloc((void**) & data.h_paramsOnGPU.m_commonParams, sizeof(pixelCPEforGPU::CommonParams))); cudaCheck(cudaMalloc((void**) & data.h_paramsOnGPU.m_detParams, this->m_detParamsGPU.size()*sizeof(pixelCPEforGPU::DetParams))); + cudaCheck(cudaMalloc((void**) & data.h_paramsOnGPU.m_layerGeometry, sizeof(pixelCPEforGPU::LayerGeometry))); cudaCheck(cudaMalloc((void**) & data.d_paramsOnGPU, sizeof(pixelCPEforGPU::ParamsOnGPU))); cudaCheck(cudaMemcpyAsync(data.d_paramsOnGPU, &data.h_paramsOnGPU, sizeof(pixelCPEforGPU::ParamsOnGPU), cudaMemcpyDefault, stream.id())); cudaCheck(cudaMemcpyAsync(data.h_paramsOnGPU.m_commonParams, &this->m_commonParamsGPU, sizeof(pixelCPEforGPU::CommonParams), cudaMemcpyDefault, stream.id())); + cudaCheck(cudaMemcpyAsync(data.h_paramsOnGPU.m_layerGeometry, &this->m_layerGeometry, sizeof(pixelCPEforGPU::LayerGeometry), cudaMemcpyDefault, stream.id())); cudaCheck(cudaMemcpyAsync(data.h_paramsOnGPU.m_detParams, this->m_detParamsGPU.data(), this->m_detParamsGPU.size()*sizeof(pixelCPEforGPU::DetParams), cudaMemcpyDefault, stream.id())); }); return data.d_paramsOnGPU; @@ -86,7 +91,13 @@ void PixelCPEFast::fillParamsForGpu() { m_commonParamsGPU.thePitchX = m_DetParams[0].thePitchX; m_commonParamsGPU.thePitchY = m_DetParams[0].thePitchY; - //uint32_t oldLayer = 0; + uint32_t oldLayer = 0; + uint32_t oldLadder=0; + float rl=0; + float zl = 0; + float miz = 90, mxz=0; + float pl = 0; + int nl=0; m_detParamsGPU.resize(m_DetParams.size()); for (auto i=0U; isurface().rotation()); g.frame = pixelCPEforGPU::Frame(vv.x(),vv.y(),vv.z(),rr); + zl+=vv.z(); + miz = std::min(miz,std::abs(vv.z())); + mxz = std::max(mxz,std::abs(vv.z())); + rl+=vv.perp(); + pl+=vv.phi(); // (not obvious) // errors ..... ClusterParamGeneric cp; @@ -196,13 +227,19 @@ void PixelCPEFast::fillParamsForGpu() { } */ - for (int i=0; i<3; ++i) { g.sx[i] = std::sqrt(g.sx[i]*g.sx[i]+lape.xx()); g.sy[i] = std::sqrt(g.sy[i]*g.sy[i]+lape.yy()); } } + + // fill Layer and ladders geometry + memcpy(m_layerGeometry.layerStart, phase1PixelTopology::layerStart, sizeof(phase1PixelTopology::layerStart)); + memcpy(m_layerGeometry.layer, phase1PixelTopology::layer.data(), phase1PixelTopology::layer.size()); + + + } PixelCPEFast::~PixelCPEFast() {} From 2cdc2cfa010a20493aa74578a360fd00a4476a5a Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Tue, 14 May 2019 23:31:50 +0200 Subject: [PATCH 073/149] Clean up by clang-format (cms-patatrack#338) --- .../SiPixelGainCalibrationForHLTGPU.h | 20 +- .../src/SiPixelGainCalibrationForHLTGPU.cc | 96 +-- .../plugins/SiPixelDigisClustersFromSoA.cc | 135 ++-- .../plugins/SiPixelRawToClusterGPUKernel.cu | 677 ++++++++-------- .../plugins/gpuCalibPixel.h | 79 +- .../plugins/gpuClusterChargeCut.h | 70 +- .../plugins/gpuClustering.h | 256 +++--- .../SiPixelClusterizer/test/gpuClustering_t.h | 600 +++++++------- .../SiPixelRecHits/interface/PixelCPEFast.h | 153 ++-- .../SiPixelRecHits/interface/pixelCPEforGPU.h | 289 +++---- .../plugins/SiPixelRecHitCUDA.cc | 59 +- .../SiPixelRecHits/plugins/gpuPixelRecHits.h | 133 ++-- .../SiPixelRecHits/src/PixelCPEFast.cc | 743 +++++++++--------- 13 files changed, 1705 insertions(+), 1605 deletions(-) diff --git a/CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h b/CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h index 85768e37d8b02..d6d2e1a262dc8 100644 --- a/CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h +++ b/CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h @@ -1,11 +1,11 @@ -#ifndef CalibTracker_SiPixelESProducers_SiPixelGainCalibrationForHLTGPU_H -#define CalibTracker_SiPixelESProducers_SiPixelGainCalibrationForHLTGPU_H - -#include "HeterogeneousCore/CUDACore/interface/CUDAESProduct.h" -#include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLT.h" +#ifndef CalibTracker_SiPixelESProducers_interface_SiPixelGainCalibrationForHLTGPU_h +#define CalibTracker_SiPixelESProducers_interface_SiPixelGainCalibrationForHLTGPU_h #include +#include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLT.h" +#include "HeterogeneousCore/CUDACore/interface/CUDAESProduct.h" + class SiPixelGainCalibrationForHLT; class SiPixelGainForHLTonGPU; struct SiPixelGainForHLTonGPU_DecodingStructure; @@ -13,12 +13,12 @@ class TrackerGeometry; class SiPixelGainCalibrationForHLTGPU { public: - explicit SiPixelGainCalibrationForHLTGPU(const SiPixelGainCalibrationForHLT& gains, const TrackerGeometry& geom); + explicit SiPixelGainCalibrationForHLTGPU(const SiPixelGainCalibrationForHLT &gains, const TrackerGeometry &geom); ~SiPixelGainCalibrationForHLTGPU(); - const SiPixelGainForHLTonGPU *getGPUProductAsync(cuda::stream_t<>& cudaStream) const; - const SiPixelGainForHLTonGPU *getCPUProduct() const { return gainForHLTonHost_;} - const SiPixelGainCalibrationForHLT *getOriginalProduct() { return gains_;} + const SiPixelGainForHLTonGPU *getGPUProductAsync(cuda::stream_t<> &cudaStream) const; + const SiPixelGainForHLTonGPU *getCPUProduct() const { return gainForHLTonHost_; } + const SiPixelGainCalibrationForHLT *getOriginalProduct() { return gains_; } private: const SiPixelGainCalibrationForHLT *gains_ = nullptr; @@ -31,4 +31,4 @@ class SiPixelGainCalibrationForHLTGPU { CUDAESProduct gpuData_; }; -#endif +#endif // CalibTracker_SiPixelESProducers_interface_SiPixelGainCalibrationForHLTGPU_h diff --git a/CalibTracker/SiPixelESProducers/src/SiPixelGainCalibrationForHLTGPU.cc b/CalibTracker/SiPixelESProducers/src/SiPixelGainCalibrationForHLTGPU.cc index 59e0d4115583f..d94e9f1959190 100644 --- a/CalibTracker/SiPixelESProducers/src/SiPixelGainCalibrationForHLTGPU.cc +++ b/CalibTracker/SiPixelESProducers/src/SiPixelGainCalibrationForHLTGPU.cc @@ -1,22 +1,23 @@ +#include + #include "CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h" #include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLT.h" #include "CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h" -#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" #include "Geometry/CommonDetUnit/interface/GeomDetType.h" +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" -#include - -SiPixelGainCalibrationForHLTGPU::SiPixelGainCalibrationForHLTGPU(const SiPixelGainCalibrationForHLT& gains, const TrackerGeometry& geom): - gains_(&gains) -{ +SiPixelGainCalibrationForHLTGPU::SiPixelGainCalibrationForHLTGPU(const SiPixelGainCalibrationForHLT& gains, + const TrackerGeometry& geom) + : gains_(&gains) { // bizzarre logic (looking for fist strip-det) don't ask - auto const & dus = geom.detUnits(); + auto const& dus = geom.detUnits(); unsigned m_detectors = dus.size(); - for(unsigned int i=1;i<7;++i) { - if(geom.offsetDU(GeomDetEnumerators::tkDetEnum[i]) != dus.size() && + for (unsigned int i = 1; i < 7; ++i) { + if (geom.offsetDU(GeomDetEnumerators::tkDetEnum[i]) != dus.size() && dus[geom.offsetDU(GeomDetEnumerators::tkDetEnum[i])]->type().isTrackerStrip()) { - if(geom.offsetDU(GeomDetEnumerators::tkDetEnum[i]) < m_detectors) m_detectors = geom.offsetDU(GeomDetEnumerators::tkDetEnum[i]); + if (geom.offsetDU(GeomDetEnumerators::tkDetEnum[i]) < m_detectors) + m_detectors = geom.offsetDU(GeomDetEnumerators::tkDetEnum[i]); } } @@ -25,12 +26,13 @@ SiPixelGainCalibrationForHLTGPU::SiPixelGainCalibrationForHLTGPU(const SiPixelGa std::cout << "sizes " << sizeof(char) << ' ' << sizeof(uint8_t) << ' ' << sizeof(SiPixelGainForHLTonGPU::DecodingStructure) << std::endl; */ - cudaCheck(cudaMallocHost((void**) & gainForHLTonHost_, sizeof(SiPixelGainForHLTonGPU))); - gainForHLTonHost_->v_pedestals = (SiPixelGainForHLTonGPU_DecodingStructure*)this->gains_->data().data(); // so it can be used on CPU as well... + cudaCheck(cudaMallocHost((void**)&gainForHLTonHost_, sizeof(SiPixelGainForHLTonGPU))); + gainForHLTonHost_->v_pedestals = + (SiPixelGainForHLTonGPU_DecodingStructure*)this->gains_->data().data(); // so it can be used on CPU as well... // do not read back from the (possibly write-combined) memory buffer - auto minPed = gains.getPedLow(); - auto maxPed = gains.getPedHigh(); + auto minPed = gains.getPedLow(); + auto maxPed = gains.getPedHigh(); auto minGain = gains.getGainLow(); auto maxGain = gains.getGainHigh(); auto nBinsToUseForEncoding = 253; @@ -38,15 +40,15 @@ SiPixelGainCalibrationForHLTGPU::SiPixelGainCalibrationForHLTGPU(const SiPixelGa // we will simplify later (not everything is needed....) gainForHLTonHost_->minPed_ = minPed; gainForHLTonHost_->maxPed_ = maxPed; - gainForHLTonHost_->minGain_= minGain; - gainForHLTonHost_->maxGain_= maxGain; + gainForHLTonHost_->minGain_ = minGain; + gainForHLTonHost_->maxGain_ = maxGain; gainForHLTonHost_->numberOfRowsAveragedOver_ = 80; - gainForHLTonHost_->nBinsToUseForEncoding_ = nBinsToUseForEncoding; - gainForHLTonHost_->deadFlag_ = 255; - gainForHLTonHost_->noisyFlag_ = 254; + gainForHLTonHost_->nBinsToUseForEncoding_ = nBinsToUseForEncoding; + gainForHLTonHost_->deadFlag_ = 255; + gainForHLTonHost_->noisyFlag_ = 254; - gainForHLTonHost_->pedPrecision = static_cast(maxPed - minPed) / nBinsToUseForEncoding; + gainForHLTonHost_->pedPrecision = static_cast(maxPed - minPed) / nBinsToUseForEncoding; gainForHLTonHost_->gainPrecision = static_cast(maxGain - minGain) / nBinsToUseForEncoding; /* @@ -54,45 +56,51 @@ SiPixelGainCalibrationForHLTGPU::SiPixelGainCalibrationForHLTGPU(const SiPixelGa */ // fill the index map - auto const & ind = gains.getIndexes(); + auto const& ind = gains.getIndexes(); /* std::cout << ind.size() << " " << m_detectors << std::endl; */ - for (auto i=0U; igeographicalId().rawId(),SiPixelGainCalibrationForHLT::StrictWeakOrdering()); - assert (p!=ind.end() && p->detid==dus[i]->geographicalId()); - assert(p->iend<=gains.data().size()); - assert(p->iend>=p->ibegin); - assert(0==p->ibegin%2); - assert(0==p->iend%2); - assert(p->ibegin!=p->iend); - assert(p->ncols>0); - gainForHLTonHost_->rangeAndCols[i] = std::make_pair(SiPixelGainForHLTonGPU::Range(p->ibegin,p->iend), p->ncols); + for (auto i = 0U; i < m_detectors; ++i) { + auto p = std::lower_bound( + ind.begin(), ind.end(), dus[i]->geographicalId().rawId(), SiPixelGainCalibrationForHLT::StrictWeakOrdering()); + assert(p != ind.end() && p->detid == dus[i]->geographicalId()); + assert(p->iend <= gains.data().size()); + assert(p->iend >= p->ibegin); + assert(0 == p->ibegin % 2); + assert(0 == p->iend % 2); + assert(p->ibegin != p->iend); + assert(p->ncols > 0); + gainForHLTonHost_->rangeAndCols[i] = std::make_pair(SiPixelGainForHLTonGPU::Range(p->ibegin, p->iend), p->ncols); // if (ind[i].detid!=dus[i]->geographicalId()) std::cout << ind[i].detid<<"!="<geographicalId() << std::endl; // gainForHLTonHost_->rangeAndCols[i] = std::make_pair(SiPixelGainForHLTonGPU::Range(ind[i].ibegin,ind[i].iend), ind[i].ncols); } - } -SiPixelGainCalibrationForHLTGPU::~SiPixelGainCalibrationForHLTGPU() { - cudaCheck(cudaFreeHost(gainForHLTonHost_)); -} +SiPixelGainCalibrationForHLTGPU::~SiPixelGainCalibrationForHLTGPU() { cudaCheck(cudaFreeHost(gainForHLTonHost_)); } SiPixelGainCalibrationForHLTGPU::GPUData::~GPUData() { cudaCheck(cudaFree(gainForHLTonGPU)); cudaCheck(cudaFree(gainDataOnGPU)); } -const SiPixelGainForHLTonGPU *SiPixelGainCalibrationForHLTGPU::getGPUProductAsync(cuda::stream_t<>& cudaStream) const { +const SiPixelGainForHLTonGPU* SiPixelGainCalibrationForHLTGPU::getGPUProductAsync(cuda::stream_t<>& cudaStream) const { const auto& data = gpuData_.dataForCurrentDeviceAsync(cudaStream, [this](GPUData& data, cuda::stream_t<>& stream) { - cudaCheck(cudaMalloc((void**) & data.gainForHLTonGPU, sizeof(SiPixelGainForHLTonGPU))); - cudaCheck(cudaMalloc((void**) & data.gainDataOnGPU, this->gains_->data().size())); // TODO: this could be changed to cuda::memory::device::unique_ptr<> - // gains.data().data() is used also for non-GPU code, we cannot allocate it on aligned and write-combined memory - cudaCheck(cudaMemcpyAsync(data.gainDataOnGPU, this->gains_->data().data(), this->gains_->data().size(), cudaMemcpyDefault, stream.id())); - - cudaCheck(cudaMemcpyAsync(data.gainForHLTonGPU, this->gainForHLTonHost_, sizeof(SiPixelGainForHLTonGPU), cudaMemcpyDefault, stream.id())); - cudaCheck(cudaMemcpyAsync(&(data.gainForHLTonGPU->v_pedestals), &(data.gainDataOnGPU), sizeof(SiPixelGainForHLTonGPU_DecodingStructure*), cudaMemcpyDefault, stream.id())); - }); + cudaCheck(cudaMalloc((void**)&data.gainForHLTonGPU, sizeof(SiPixelGainForHLTonGPU))); + cudaCheck( + cudaMalloc((void**)&data.gainDataOnGPU, + this->gains_->data().size())); // TODO: this could be changed to cuda::memory::device::unique_ptr<> + // gains.data().data() is used also for non-GPU code, we cannot allocate it on aligned and write-combined memory + cudaCheck(cudaMemcpyAsync( + data.gainDataOnGPU, this->gains_->data().data(), this->gains_->data().size(), cudaMemcpyDefault, stream.id())); + + cudaCheck(cudaMemcpyAsync( + data.gainForHLTonGPU, this->gainForHLTonHost_, sizeof(SiPixelGainForHLTonGPU), cudaMemcpyDefault, stream.id())); + cudaCheck(cudaMemcpyAsync(&(data.gainForHLTonGPU->v_pedestals), + &(data.gainDataOnGPU), + sizeof(SiPixelGainForHLTonGPU_DecodingStructure*), + cudaMemcpyDefault, + stream.id())); + }); return data.gainForHLTonGPU; } diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc index ba184d766feaf..c0c78b29ec4a5 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc @@ -5,13 +5,13 @@ #include "DataFormats/SiPixelDigi/interface/PixelDigi.h" #include "DataFormats/SiPixelDigi/interface/SiPixelDigisSoA.h" #include "DataFormats/TrackerCommon/interface/TrackerTopology.h" -#include "FWCore/Framework/interface/EventSetup.h" #include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" #include "FWCore/Framework/interface/MakerMacros.h" #include "FWCore/Framework/interface/global/EDProducer.h" #include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" -#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" #include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" #include "Geometry/Records/interface/TrackerTopologyRcd.h" namespace { @@ -21,34 +21,35 @@ namespace { UShort adc[MAXSIZE]; UShort x[MAXSIZE]; UShort y[MAXSIZE]; - UShort xmin=16000; - UShort ymin=16000; - unsigned int isize=0; - int charge=0; + UShort xmin = 16000; + UShort ymin = 16000; + unsigned int isize = 0; + int charge = 0; void clear() { - isize=0; - charge=0; - xmin=16000; - ymin=16000; + isize = 0; + charge = 0; + xmin = 16000; + ymin = 16000; } - bool add(SiPixelCluster::PixelPos const & p, UShort const iadc) { - if (isize==MAXSIZE) return false; - xmin=std::min(xmin,(unsigned short)(p.row())); - ymin=std::min(ymin,(unsigned short)(p.col())); - adc[isize]=iadc; - x[isize]=p.row(); - y[isize++]=p.col(); - charge+=iadc; + bool add(SiPixelCluster::PixelPos const& p, UShort const iadc) { + if (isize == MAXSIZE) + return false; + xmin = std::min(xmin, (unsigned short)(p.row())); + ymin = std::min(ymin, (unsigned short)(p.col())); + adc[isize] = iadc; + x[isize] = p.row(); + y[isize++] = p.col(); + charge += iadc; return true; } }; constexpr uint32_t dummydetid = 0xffffffff; -} +} // namespace -class SiPixelDigisClustersFromSoA: public edm::global::EDProducer<> { +class SiPixelDigisClustersFromSoA : public edm::global::EDProducer<> { public: explicit SiPixelDigisClustersFromSoA(const edm::ParameterSet& iConfig); ~SiPixelDigisClustersFromSoA() override = default; @@ -62,14 +63,12 @@ class SiPixelDigisClustersFromSoA: public edm::global::EDProducer<> { edm::EDPutTokenT> digiPutToken_; edm::EDPutTokenT clusterPutToken_; - }; -SiPixelDigisClustersFromSoA::SiPixelDigisClustersFromSoA(const edm::ParameterSet& iConfig): - digiGetToken_(consumes(iConfig.getParameter("src"))), - digiPutToken_(produces>()), - clusterPutToken_(produces()) -{} +SiPixelDigisClustersFromSoA::SiPixelDigisClustersFromSoA(const edm::ParameterSet& iConfig) + : digiGetToken_(consumes(iConfig.getParameter("src"))), + digiPutToken_(produces>()), + clusterPutToken_(produces()) {} void SiPixelDigisClustersFromSoA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { edm::ParameterSetDescription desc; @@ -79,7 +78,7 @@ void SiPixelDigisClustersFromSoA::fillDescriptions(edm::ConfigurationDescription void SiPixelDigisClustersFromSoA::produce(edm::StreamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const { const auto& digis = iEvent.get(digiGetToken_); - + edm::ESHandle trackerTopologyHandle; iSetup.get().get(trackerTopologyHandle); const auto& ttopo = *trackerTopologyHandle; @@ -88,68 +87,82 @@ void SiPixelDigisClustersFromSoA::produce(edm::StreamID, edm::Event& iEvent, con auto outputClusters = std::make_unique(); const uint32_t nDigis = digis.size(); - edm::DetSet * detDigis=nullptr; + edm::DetSet* detDigis = nullptr; for (uint32_t i = 0; i < nDigis; i++) { - if (digis.pdigi(i)==0) continue; + if (digis.pdigi(i) == 0) + continue; detDigis = &collection->find_or_insert(digis.rawIdArr(i)); - if ( (*detDigis).empty() ) (*detDigis).data.reserve(32); // avoid the first relocations + if ((*detDigis).empty()) + (*detDigis).data.reserve(32); // avoid the first relocations break; } - int32_t nclus=-1; + int32_t nclus = -1; std::vector aclusters(1024); - auto totCluseFilled=0; + auto totCluseFilled = 0; - auto fillClusters = [&](uint32_t detId){ - if (nclus<0) return; // this in reality should never happen + auto fillClusters = [&](uint32_t detId) { + if (nclus < 0) + return; // this in reality should never happen edmNew::DetSetVector::FastFiller spc(*outputClusters, detId); - auto layer = (DetId(detId).subdetId()==1) ? ttopo.pxbLayer(detId) : 0; - auto clusterThreshold = (layer==1) ? 2000 : 4000; - for (int32_t ic=0; ic9000) continue; // not in cluster; TODO add an assert for the size + if (digis.pdigi(i) == 0) + continue; + if (digis.clus(i) > 9000) + continue; // not in cluster; TODO add an assert for the size assert(digis.rawIdArr(i) > 109999); - if ( (*detDigis).detId() != digis.rawIdArr(i)) - { - fillClusters((*detDigis).detId()); - assert(nclus==-1); - detDigis = &collection->find_or_insert(digis.rawIdArr(i)); - if ( (*detDigis).empty() ) - (*detDigis).data.reserve(32); // avoid the first relocations - else { std::cout << "Problem det present twice in input! " << (*detDigis).detId() << std::endl; } + if ((*detDigis).detId() != digis.rawIdArr(i)) { + fillClusters((*detDigis).detId()); + assert(nclus == -1); + detDigis = &collection->find_or_insert(digis.rawIdArr(i)); + if ((*detDigis).empty()) + (*detDigis).data.reserve(32); // avoid the first relocations + else { + std::cout << "Problem det present twice in input! " << (*detDigis).detId() << std::endl; } + } (*detDigis).data.emplace_back(digis.pdigi(i)); - auto const & dig = (*detDigis).data.back(); + auto const& dig = (*detDigis).data.back(); // fill clusters - assert(digis.clus(i)>=0); - assert(digis.clus(i)<1024); - nclus = std::max(digis.clus(i),nclus); + assert(digis.clus(i) >= 0); + assert(digis.clus(i) < 1024); + nclus = std::max(digis.clus(i), nclus); auto row = dig.row(); auto col = dig.column(); - SiPixelCluster::PixelPos pix(row,col); + SiPixelCluster::PixelPos pix(row, col); aclusters[digis.clus(i)].add(pix, digis.adc(i)); } // fill final clusters - if (detDigis) fillClusters((*detDigis).detId()); + if (detDigis) + fillClusters((*detDigis).detId()); //std::cout << "filled " << totCluseFilled << " clusters" << std::endl; iEvent.put(digiPutToken_, std::move(collection)); diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu index 6a832128c1cc2..8a5119d68487c 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu @@ -19,24 +19,24 @@ // CUDA includes #include #include -#include #include +#include +#include #include #include -#include // cub includes #include // CMSSW includes +#include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" #include "FWCore/ServiceRegistry/interface/Service.h" #include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" +#include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelFedCablingMapGPU.h" #include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h" -#include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h" #include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h" -#include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelFedCablingMapGPU.h" -#include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" +#include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h" // local includes #include "SiPixelRawToClusterGPUKernel.h" @@ -44,162 +44,153 @@ namespace pixelgpudetails { // number of words for all the FEDs - constexpr uint32_t MAX_FED_WORDS = pixelgpudetails::MAX_FED * pixelgpudetails::MAX_WORD; + constexpr uint32_t MAX_FED_WORDS = pixelgpudetails::MAX_FED * pixelgpudetails::MAX_WORD; SiPixelRawToClusterGPUKernel::WordFedAppender::WordFedAppender() { word_ = cudautils::make_host_noncached_unique(MAX_FED_WORDS, cudaHostAllocWriteCombined); fedId_ = cudautils::make_host_noncached_unique(MAX_FED_WORDS, cudaHostAllocWriteCombined); } - void SiPixelRawToClusterGPUKernel::WordFedAppender::initializeWordFed(int fedId, unsigned int wordCounterGPU, const cms_uint32_t *src, unsigned int length) { - std::memcpy(word_.get()+wordCounterGPU, src, sizeof(cms_uint32_t)*length); - std::memset(fedId_.get()+wordCounterGPU/2, fedId - 1200, length/2); + void SiPixelRawToClusterGPUKernel::WordFedAppender::initializeWordFed(int fedId, + unsigned int wordCounterGPU, + const cms_uint32_t *src, + unsigned int length) { + std::memcpy(word_.get() + wordCounterGPU, src, sizeof(cms_uint32_t) * length); + std::memset(fedId_.get() + wordCounterGPU / 2, fedId - 1200, length / 2); } //////////////////// - __device__ uint32_t getLink(uint32_t ww) { + __device__ uint32_t getLink(uint32_t ww) { return ((ww >> pixelgpudetails::LINK_shift) & pixelgpudetails::LINK_mask); } + __device__ uint32_t getRoc(uint32_t ww) { return ((ww >> pixelgpudetails::ROC_shift) & pixelgpudetails::ROC_mask); } - __device__ uint32_t getRoc(uint32_t ww) { - return ((ww >> pixelgpudetails::ROC_shift ) & pixelgpudetails::ROC_mask); - } - + __device__ uint32_t getADC(uint32_t ww) { return ((ww >> pixelgpudetails::ADC_shift) & pixelgpudetails::ADC_mask); } - __device__ uint32_t getADC(uint32_t ww) { - return ((ww >> pixelgpudetails::ADC_shift) & pixelgpudetails::ADC_mask); - } + __device__ bool isBarrel(uint32_t rawId) { return (1 == ((rawId >> 25) & 0x7)); } - - __device__ bool isBarrel(uint32_t rawId) { - return (1==((rawId>>25)&0x7)); - } - - __device__ pixelgpudetails::DetIdGPU getRawId(const SiPixelFedCablingMapGPU * cablingMap, uint8_t fed, uint32_t link, uint32_t roc) { - uint32_t index = fed * MAX_LINK * MAX_ROC + (link-1) * MAX_ROC + roc; - pixelgpudetails::DetIdGPU detId = { cablingMap->RawId[index], cablingMap->rocInDet[index], cablingMap->moduleId[index] }; + __device__ pixelgpudetails::DetIdGPU getRawId(const SiPixelFedCablingMapGPU *cablingMap, + uint8_t fed, + uint32_t link, + uint32_t roc) { + uint32_t index = fed * MAX_LINK * MAX_ROC + (link - 1) * MAX_ROC + roc; + pixelgpudetails::DetIdGPU detId = { + cablingMap->RawId[index], cablingMap->rocInDet[index], cablingMap->moduleId[index]}; return detId; } //reference http://cmsdoxygen.web.cern.ch/cmsdoxygen/CMSSW_9_2_0/doc/html/dd/d31/FrameConversion_8cc_source.html //http://cmslxr.fnal.gov/source/CondFormats/SiPixelObjects/src/PixelROC.cc?v=CMSSW_9_2_0#0071 // Convert local pixel to pixelgpudetails::global pixel - __device__ pixelgpudetails::Pixel frameConversion(bool bpix, int side, uint32_t layer, uint32_t rocIdInDetUnit, pixelgpudetails::Pixel local) { - - int slopeRow = 0, slopeCol = 0; + __device__ pixelgpudetails::Pixel frameConversion( + bool bpix, int side, uint32_t layer, uint32_t rocIdInDetUnit, pixelgpudetails::Pixel local) { + int slopeRow = 0, slopeCol = 0; int rowOffset = 0, colOffset = 0; if (bpix) { - - if (side == -1 && layer != 1) { // -Z side: 4 non-flipped modules oriented like 'dddd', except Layer 1 + if (side == -1 && layer != 1) { // -Z side: 4 non-flipped modules oriented like 'dddd', except Layer 1 if (rocIdInDetUnit < 8) { slopeRow = 1; slopeCol = -1; rowOffset = 0; - colOffset = (8-rocIdInDetUnit)*pixelgpudetails::numColsInRoc-1; - } - else { - slopeRow = -1; - slopeCol = 1; - rowOffset = 2*pixelgpudetails::numRowsInRoc-1; - colOffset = (rocIdInDetUnit-8)*pixelgpudetails::numColsInRoc; - } // if roc - } - else { // +Z side: 4 non-flipped modules oriented like 'pppp', but all 8 in layer1 + colOffset = (8 - rocIdInDetUnit) * pixelgpudetails::numColsInRoc - 1; + } else { + slopeRow = -1; + slopeCol = 1; + rowOffset = 2 * pixelgpudetails::numRowsInRoc - 1; + colOffset = (rocIdInDetUnit - 8) * pixelgpudetails::numColsInRoc; + } // if roc + } else { // +Z side: 4 non-flipped modules oriented like 'pppp', but all 8 in layer1 if (rocIdInDetUnit < 8) { - slopeRow = -1; - slopeCol = 1; - rowOffset = 2*pixelgpudetails::numRowsInRoc-1; + slopeRow = -1; + slopeCol = 1; + rowOffset = 2 * pixelgpudetails::numRowsInRoc - 1; colOffset = rocIdInDetUnit * pixelgpudetails::numColsInRoc; - } - else { - slopeRow = 1; - slopeCol = -1; + } else { + slopeRow = 1; + slopeCol = -1; rowOffset = 0; - colOffset = (16-rocIdInDetUnit)*pixelgpudetails::numColsInRoc-1; + colOffset = (16 - rocIdInDetUnit) * pixelgpudetails::numColsInRoc - 1; } } - } - else { // fpix - if (side==-1) { // pannel 1 + } else { // fpix + if (side == -1) { // pannel 1 if (rocIdInDetUnit < 8) { slopeRow = 1; slopeCol = -1; rowOffset = 0; - colOffset = (8-rocIdInDetUnit)*pixelgpudetails::numColsInRoc-1; - } - else { + colOffset = (8 - rocIdInDetUnit) * pixelgpudetails::numColsInRoc - 1; + } else { slopeRow = -1; slopeCol = 1; - rowOffset = 2*pixelgpudetails::numRowsInRoc-1; - colOffset = (rocIdInDetUnit-8)*pixelgpudetails::numColsInRoc; + rowOffset = 2 * pixelgpudetails::numRowsInRoc - 1; + colOffset = (rocIdInDetUnit - 8) * pixelgpudetails::numColsInRoc; } - } - else { // pannel 2 + } else { // pannel 2 if (rocIdInDetUnit < 8) { slopeRow = 1; slopeCol = -1; rowOffset = 0; - colOffset = (8-rocIdInDetUnit)*pixelgpudetails::numColsInRoc-1; - } - else { + colOffset = (8 - rocIdInDetUnit) * pixelgpudetails::numColsInRoc - 1; + } else { slopeRow = -1; slopeCol = 1; - rowOffset = 2*pixelgpudetails::numRowsInRoc-1; - colOffset = (rocIdInDetUnit-8)*pixelgpudetails::numColsInRoc; + rowOffset = 2 * pixelgpudetails::numRowsInRoc - 1; + colOffset = (rocIdInDetUnit - 8) * pixelgpudetails::numColsInRoc; } - } // side - + } // side } - uint32_t gRow = rowOffset+slopeRow*local.row; - uint32_t gCol = colOffset+slopeCol*local.col; + uint32_t gRow = rowOffset + slopeRow * local.row; + uint32_t gCol = colOffset + slopeCol * local.col; //printf("Inside frameConversion row: %u, column: %u\n", gRow, gCol); pixelgpudetails::Pixel global = {gRow, gCol}; return global; } - - __device__ uint8_t conversionError(uint8_t fedId, uint8_t status, bool debug = false) - { + __device__ uint8_t conversionError(uint8_t fedId, uint8_t status, bool debug = false) { uint8_t errorType = 0; // debug = true; switch (status) { - case(1) : { - if (debug) printf("Error in Fed: %i, invalid channel Id (errorType = 35\n)", fedId ); + case (1): { + if (debug) + printf("Error in Fed: %i, invalid channel Id (errorType = 35\n)", fedId); errorType = 35; break; } - case(2) : { - if (debug) printf("Error in Fed: %i, invalid ROC Id (errorType = 36)\n", fedId); + case (2): { + if (debug) + printf("Error in Fed: %i, invalid ROC Id (errorType = 36)\n", fedId); errorType = 36; break; } - case(3) : { - if (debug) printf("Error in Fed: %i, invalid dcol/pixel value (errorType = 37)\n", fedId); + case (3): { + if (debug) + printf("Error in Fed: %i, invalid dcol/pixel value (errorType = 37)\n", fedId); errorType = 37; break; } - case(4) : { - if (debug) printf("Error in Fed: %i, dcol/pixel read out of order (errorType = 38)\n", fedId); + case (4): { + if (debug) + printf("Error in Fed: %i, dcol/pixel read out of order (errorType = 38)\n", fedId); errorType = 38; break; } default: - if (debug) printf("Cabling check returned unexpected result, status = %i\n", status); + if (debug) + printf("Cabling check returned unexpected result, status = %i\n", status); }; return errorType; } - __device__ bool rocRowColIsValid(uint32_t rocRow, uint32_t rocCol) - { + __device__ bool rocRowColIsValid(uint32_t rocRow, uint32_t rocCol) { uint32_t numRowsInRoc = 80; uint32_t numColsInRoc = 52; @@ -207,65 +198,74 @@ namespace pixelgpudetails { return ((rocRow < numRowsInRoc) & (rocCol < numColsInRoc)); } - __device__ bool dcolIsValid(uint32_t dcol, uint32_t pxid) - { - return ((dcol < 26) & (2 <= pxid) & (pxid < 162)); - } + __device__ bool dcolIsValid(uint32_t dcol, uint32_t pxid) { return ((dcol < 26) & (2 <= pxid) & (pxid < 162)); } - __device__ uint8_t checkROC(uint32_t errorWord, uint8_t fedId, uint32_t link, const SiPixelFedCablingMapGPU *cablingMap, bool debug = false) - { + __device__ uint8_t checkROC( + uint32_t errorWord, uint8_t fedId, uint32_t link, const SiPixelFedCablingMapGPU *cablingMap, bool debug = false) { uint8_t errorType = (errorWord >> pixelgpudetails::ROC_shift) & pixelgpudetails::ERROR_mask; - if (errorType < 25) return 0; + if (errorType < 25) + return 0; bool errorFound = false; switch (errorType) { - case(25) : { + case (25): { errorFound = true; - uint32_t index = fedId * MAX_LINK * MAX_ROC + (link-1) * MAX_ROC + 1; + uint32_t index = fedId * MAX_LINK * MAX_ROC + (link - 1) * MAX_ROC + 1; if (index > 1 && index <= cablingMap->size) { - if (!(link == cablingMap->link[index] && 1 == cablingMap->roc[index])) errorFound = false; + if (!(link == cablingMap->link[index] && 1 == cablingMap->roc[index])) + errorFound = false; } - if (debug and errorFound) printf("Invalid ROC = 25 found (errorType = 25)\n"); + if (debug and errorFound) + printf("Invalid ROC = 25 found (errorType = 25)\n"); break; } - case(26) : { - if (debug) printf("Gap word found (errorType = 26)\n"); + case (26): { + if (debug) + printf("Gap word found (errorType = 26)\n"); errorFound = true; break; } - case(27) : { - if (debug) printf("Dummy word found (errorType = 27)\n"); + case (27): { + if (debug) + printf("Dummy word found (errorType = 27)\n"); errorFound = true; break; } - case(28) : { - if (debug) printf("Error fifo nearly full (errorType = 28)\n"); + case (28): { + if (debug) + printf("Error fifo nearly full (errorType = 28)\n"); errorFound = true; break; } - case(29) : { - if (debug) printf("Timeout on a channel (errorType = 29)\n"); + case (29): { + if (debug) + printf("Timeout on a channel (errorType = 29)\n"); if ((errorWord >> pixelgpudetails::OMIT_ERR_shift) & pixelgpudetails::OMIT_ERR_mask) { - if (debug) printf("...first errorType=29 error, this gets masked out\n"); + if (debug) + printf("...first errorType=29 error, this gets masked out\n"); } errorFound = true; break; } - case(30) : { - if (debug) printf("TBM error trailer (errorType = 30)\n"); + case (30): { + if (debug) + printf("TBM error trailer (errorType = 30)\n"); int StateMatch_bits = 4; int StateMatch_shift = 8; uint32_t StateMatch_mask = ~(~uint32_t(0) << StateMatch_bits); int StateMatch = (errorWord >> StateMatch_shift) & StateMatch_mask; - if ( StateMatch != 1 && StateMatch != 8 ) { - if (debug) printf("FED error 30 with unexpected State Bits (errorType = 30)\n"); + if (StateMatch != 1 && StateMatch != 8) { + if (debug) + printf("FED error 30 with unexpected State Bits (errorType = 30)\n"); } - if (StateMatch == 1) errorType = 40; // 1=Overflow -> 40, 8=number of ROCs -> 30 + if (StateMatch == 1) + errorType = 40; // 1=Overflow -> 40, 8=number of ROCs -> 30 errorFound = true; break; } - case(31) : { - if (debug) printf("Event number error (errorType = 31)\n"); + case (31): { + if (debug) + printf("Event number error (errorType = 31)\n"); errorFound = true; break; } @@ -276,22 +276,30 @@ namespace pixelgpudetails { return errorFound ? errorType : 0; } - __device__ uint32_t getErrRawID(uint8_t fedId, uint32_t errWord, uint32_t errorType, const SiPixelFedCablingMapGPU *cablingMap, bool debug = false) - { + __device__ uint32_t getErrRawID(uint8_t fedId, + uint32_t errWord, + uint32_t errorType, + const SiPixelFedCablingMapGPU *cablingMap, + bool debug = false) { uint32_t rID = 0xffffffff; switch (errorType) { - case 25 : case 30 : case 31 : case 36 : case 40 : { + case 25: + case 30: + case 31: + case 36: + case 40: { //set dummy values for cabling just to get detId from link //cabling.dcol = 0; //cabling.pxid = 2; - uint32_t roc = 1; + uint32_t roc = 1; uint32_t link = (errWord >> pixelgpudetails::LINK_shift) & pixelgpudetails::LINK_mask; uint32_t rID_temp = getRawId(cablingMap, fedId, link, roc).RawId; - if (rID_temp != 9999) rID = rID_temp; + if (rID_temp != 9999) + rID = rID_temp; break; } - case 29 : { + case 29: { int chanNmbr = 0; const int DB0_shift = 0; const int DB1_shift = DB0_shift + 1; @@ -305,31 +313,37 @@ namespace pixelgpudetails { int CH3 = (errWord >> DB2_shift) & DataBit_mask; int CH4 = (errWord >> DB3_shift) & DataBit_mask; int CH5 = (errWord >> DB4_shift) & DataBit_mask; - int BLOCK_bits = 3; - int BLOCK_shift = 8; + int BLOCK_bits = 3; + int BLOCK_shift = 8; uint32_t BLOCK_mask = ~(~uint32_t(0) << BLOCK_bits); int BLOCK = (errWord >> BLOCK_shift) & BLOCK_mask; - int localCH = 1*CH1+2*CH2+3*CH3+4*CH4+5*CH5; - if (BLOCK%2==0) chanNmbr=(BLOCK/2)*9+localCH; - else chanNmbr = ((BLOCK-1)/2)*9+4+localCH; - if ((chanNmbr < 1)||(chanNmbr > 36)) break; // signifies unexpected result + int localCH = 1 * CH1 + 2 * CH2 + 3 * CH3 + 4 * CH4 + 5 * CH5; + if (BLOCK % 2 == 0) + chanNmbr = (BLOCK / 2) * 9 + localCH; + else + chanNmbr = ((BLOCK - 1) / 2) * 9 + 4 + localCH; + if ((chanNmbr < 1) || (chanNmbr > 36)) + break; // signifies unexpected result // set dummy values for cabling just to get detId from link if in Barrel //cabling.dcol = 0; //cabling.pxid = 2; - uint32_t roc = 1; + uint32_t roc = 1; uint32_t link = chanNmbr; uint32_t rID_temp = getRawId(cablingMap, fedId, link, roc).RawId; - if(rID_temp != 9999) rID = rID_temp; + if (rID_temp != 9999) + rID = rID_temp; break; } - case 37 : case 38: { + case 37: + case 38: { //cabling.dcol = 0; //cabling.pxid = 2; - uint32_t roc = (errWord >> pixelgpudetails::ROC_shift) & pixelgpudetails::ROC_mask; + uint32_t roc = (errWord >> pixelgpudetails::ROC_shift) & pixelgpudetails::ROC_mask; uint32_t link = (errWord >> pixelgpudetails::LINK_shift) & pixelgpudetails::LINK_mask; uint32_t rID_temp = getRawId(cablingMap, fedId, link, roc).RawId; - if(rID_temp != 9999) rID = rID_temp; + if (rID_temp != 9999) + rID = rID_temp; break; } default: @@ -339,187 +353,194 @@ namespace pixelgpudetails { return rID; } - // Kernel to perform Raw to Digi conversion - __global__ void RawToDigi_kernel(const SiPixelFedCablingMapGPU *cablingMap, const unsigned char *modToUnp, - const uint32_t wordCounter, const uint32_t *word, const uint8_t *fedIds, - uint16_t *xx, uint16_t *yy, uint16_t *adc, - uint32_t *pdigi, uint32_t *rawIdArr, uint16_t *moduleId, - GPU::SimpleVector *err, - bool useQualityInfo, bool includeErrors, bool debug) - { + __global__ void RawToDigi_kernel(const SiPixelFedCablingMapGPU *cablingMap, + const unsigned char *modToUnp, + const uint32_t wordCounter, + const uint32_t *word, + const uint8_t *fedIds, + uint16_t *xx, + uint16_t *yy, + uint16_t *adc, + uint32_t *pdigi, + uint32_t *rawIdArr, + uint16_t *moduleId, + GPU::SimpleVector *err, + bool useQualityInfo, + bool includeErrors, + bool debug) { //if (threadIdx.x==0) printf("Event: %u blockIdx.x: %u start: %u end: %u\n", eventno, blockIdx.x, begin, end); - int32_t first = threadIdx.x + blockIdx.x*blockDim.x; - for (int32_t iloop=first, nend=wordCounter; ilooppush_back(PixelErrorCompact{rID, ww, errorType, fedId}); + continue; + } - uint32_t link = getLink(ww); // Extract link - uint32_t roc = getRoc(ww); // Extract Roc in link - pixelgpudetails::DetIdGPU detId = getRawId(cablingMap, fedId, link, roc); + uint32_t rawId = detId.RawId; + uint32_t rocIdInDetUnit = detId.rocInDet; + bool barrel = isBarrel(rawId); - uint8_t errorType = checkROC(ww, fedId, link, cablingMap, debug); - skipROC = (roc < pixelgpudetails::maxROCIndex) ? false : (errorType != 0); - if (includeErrors and skipROC) - { - uint32_t rID = getErrRawID(fedId, ww, errorType, cablingMap, debug); - err->push_back(PixelErrorCompact{rID, ww, errorType, fedId}); + uint32_t index = fedId * MAX_LINK * MAX_ROC + (link - 1) * MAX_ROC + roc; + if (useQualityInfo) { + skipROC = cablingMap->badRocs[index]; + if (skipROC) continue; - } - - uint32_t rawId = detId.RawId; - uint32_t rocIdInDetUnit = detId.rocInDet; - bool barrel = isBarrel(rawId); - - uint32_t index = fedId * MAX_LINK * MAX_ROC + (link-1) * MAX_ROC + roc; - if (useQualityInfo) { - skipROC = cablingMap->badRocs[index]; - if (skipROC) continue; - } - skipROC = modToUnp[index]; - if (skipROC) continue; - - uint32_t layer = 0;//, ladder =0; - int side = 0, panel = 0, module = 0;//disk = 0, blade = 0 - - if (barrel) - { - layer = (rawId >> pixelgpudetails::layerStartBit) & pixelgpudetails::layerMask; - module = (rawId >> pixelgpudetails::moduleStartBit) & pixelgpudetails::moduleMask; - side = (module < 5)? -1 : 1; - } - else { - // endcap ids - layer = 0; - panel = (rawId >> pixelgpudetails::panelStartBit) & pixelgpudetails::panelMask; - //disk = (rawId >> diskStartBit_) & diskMask_; - side = (panel == 1)? -1 : 1; - //blade = (rawId >> bladeStartBit_) & bladeMask_; - } + } + skipROC = modToUnp[index]; + if (skipROC) + continue; + + uint32_t layer = 0; //, ladder =0; + int side = 0, panel = 0, module = 0; //disk = 0, blade = 0 + + if (barrel) { + layer = (rawId >> pixelgpudetails::layerStartBit) & pixelgpudetails::layerMask; + module = (rawId >> pixelgpudetails::moduleStartBit) & pixelgpudetails::moduleMask; + side = (module < 5) ? -1 : 1; + } else { + // endcap ids + layer = 0; + panel = (rawId >> pixelgpudetails::panelStartBit) & pixelgpudetails::panelMask; + //disk = (rawId >> diskStartBit_) & diskMask_; + side = (panel == 1) ? -1 : 1; + //blade = (rawId >> bladeStartBit_) & bladeMask_; + } - // ***special case of layer to 1 be handled here - pixelgpudetails::Pixel localPix; - if (layer == 1) { - uint32_t col = (ww >> pixelgpudetails::COL_shift) & pixelgpudetails::COL_mask; - uint32_t row = (ww >> pixelgpudetails::ROW_shift) & pixelgpudetails::ROW_mask; - localPix.row = row; - localPix.col = col; - if (includeErrors) { - if (not rocRowColIsValid(row, col)) { - uint8_t error = conversionError(fedId, 3, debug); //use the device function and fill the arrays - err->push_back(PixelErrorCompact{rawId, ww, error, fedId}); - if(debug) printf("BPIX1 Error status: %i\n", error); - continue; - } - } - } else { - // ***conversion rules for dcol and pxid - uint32_t dcol = (ww >> pixelgpudetails::DCOL_shift) & pixelgpudetails::DCOL_mask; - uint32_t pxid = (ww >> pixelgpudetails::PXID_shift) & pixelgpudetails::PXID_mask; - uint32_t row = pixelgpudetails::numRowsInRoc - pxid/2; - uint32_t col = dcol*2 + pxid%2; - localPix.row = row; - localPix.col = col; - if (includeErrors and not dcolIsValid(dcol, pxid)) { - uint8_t error = conversionError(fedId, 3, debug); + // ***special case of layer to 1 be handled here + pixelgpudetails::Pixel localPix; + if (layer == 1) { + uint32_t col = (ww >> pixelgpudetails::COL_shift) & pixelgpudetails::COL_mask; + uint32_t row = (ww >> pixelgpudetails::ROW_shift) & pixelgpudetails::ROW_mask; + localPix.row = row; + localPix.col = col; + if (includeErrors) { + if (not rocRowColIsValid(row, col)) { + uint8_t error = conversionError(fedId, 3, debug); //use the device function and fill the arrays err->push_back(PixelErrorCompact{rawId, ww, error, fedId}); - if(debug) printf("Error status: %i %d %d %d %d\n", error, dcol, pxid, fedId, roc); + if (debug) + printf("BPIX1 Error status: %i\n", error); continue; } } + } else { + // ***conversion rules for dcol and pxid + uint32_t dcol = (ww >> pixelgpudetails::DCOL_shift) & pixelgpudetails::DCOL_mask; + uint32_t pxid = (ww >> pixelgpudetails::PXID_shift) & pixelgpudetails::PXID_mask; + uint32_t row = pixelgpudetails::numRowsInRoc - pxid / 2; + uint32_t col = dcol * 2 + pxid % 2; + localPix.row = row; + localPix.col = col; + if (includeErrors and not dcolIsValid(dcol, pxid)) { + uint8_t error = conversionError(fedId, 3, debug); + err->push_back(PixelErrorCompact{rawId, ww, error, fedId}); + if (debug) + printf("Error status: %i %d %d %d %d\n", error, dcol, pxid, fedId, roc); + continue; + } + } - pixelgpudetails::Pixel globalPix = frameConversion(barrel, side, layer, rocIdInDetUnit, localPix); - xx[gIndex] = globalPix.row; // origin shifting by 1 0-159 - yy[gIndex] = globalPix.col; // origin shifting by 1 0-415 - adc[gIndex] = getADC(ww); - pdigi[gIndex] = pixelgpudetails::pack(globalPix.row, globalPix.col, adc[gIndex]); - moduleId[gIndex] = detId.moduleId; - rawIdArr[gIndex] = rawId; - } // end of loop (gIndex < end) - - } // end of Raw to Digi kernel + pixelgpudetails::Pixel globalPix = frameConversion(barrel, side, layer, rocIdInDetUnit, localPix); + xx[gIndex] = globalPix.row; // origin shifting by 1 0-159 + yy[gIndex] = globalPix.col; // origin shifting by 1 0-415 + adc[gIndex] = getADC(ww); + pdigi[gIndex] = pixelgpudetails::pack(globalPix.row, globalPix.col, adc[gIndex]); + moduleId[gIndex] = detId.moduleId; + rawIdArr[gIndex] = rawId; + } // end of loop (gIndex < end) + } // end of Raw to Digi kernel - __global__ - void fillHitsModuleStart(uint32_t const * __restrict__ cluStart, uint32_t * __restrict__ moduleStart) { - - assert(gpuClustering::MaxNumModules<2048); // easy to extend at least till 32*1024 - assert(1==gridDim.x); - assert(0==blockIdx.x); + __global__ void fillHitsModuleStart(uint32_t const *__restrict__ cluStart, uint32_t *__restrict__ moduleStart) { + assert(gpuClustering::MaxNumModules < 2048); // easy to extend at least till 32*1024 + assert(1 == gridDim.x); + assert(0 == blockIdx.x); - int first = threadIdx.x; + int first = threadIdx.x; - // limit to MaxHitsInModule; - for (int i=first, iend=gpuClustering::MaxNumModules; i=moduleStart[1023]); - assert(moduleStart[1025]>=moduleStart[1024]); - assert(moduleStart[gpuClustering::MaxNumModules]>=moduleStart[1025]); - - for (int i=first, iend=gpuClustering::MaxNumModules+1; i=moduleStart[i-i]); - // [BPX1, BPX2, BPX3, BPX4, FP1, FP2, FP3, FN1, FN2, FN3, LAST_VALID] - // [ 0, 96, 320, 672, 1184, 1296, 1408, 1520, 1632, 1744, 1856] - if (i==96 || i==1184 || i==1744 || i==gpuClustering::MaxNumModules) printf("moduleStart %d %d\n",i,moduleStart[i]); - } + assert(0 == moduleStart[0]); + auto c0 = std::min(gpuClustering::maxHitsInModule(), cluStart[0]); + assert(c0 == moduleStart[1]); + assert(moduleStart[1024] >= moduleStart[1023]); + assert(moduleStart[1025] >= moduleStart[1024]); + assert(moduleStart[gpuClustering::MaxNumModules] >= moduleStart[1025]); + + for (int i = first, iend = gpuClustering::MaxNumModules + 1; i < iend; i += blockDim.x) { + if (0 != i) + assert(moduleStart[i] >= moduleStart[i - i]); + // [BPX1, BPX2, BPX3, BPX4, FP1, FP2, FP3, FN1, FN2, FN3, LAST_VALID] + // [ 0, 96, 320, 672, 1184, 1296, 1408, 1520, 1632, 1744, 1856] + if (i == 96 || i == 1184 || i == 1744 || i == gpuClustering::MaxNumModules) + printf("moduleStart %d %d\n", i, moduleStart[i]); + } #endif - // avoid overflow - constexpr auto MAX_HITS = gpuClustering::MaxNumClusters; - for (int i=first, iend=gpuClustering::MaxNumModules+1; i MAX_HITS) moduleStart[i] = MAX_HITS; - } + // avoid overflow + constexpr auto MAX_HITS = gpuClustering::MaxNumClusters; + for (int i = first, iend = gpuClustering::MaxNumModules + 1; i < iend; i += blockDim.x) { + if (moduleStart[i] > MAX_HITS) + moduleStart[i] = MAX_HITS; + } } - // Interface to outside - void SiPixelRawToClusterGPUKernel::makeClustersAsync( - const SiPixelFedCablingMapGPU *cablingMap, - const unsigned char *modToUnp, - const SiPixelGainForHLTonGPU *gains, - const WordFedAppender& wordFed, - PixelFormatterErrors&& errors, - const uint32_t wordCounter, const uint32_t fedCounter, - bool useQualityInfo, bool includeErrors, bool debug, - cuda::stream_t<>& stream) - { + void SiPixelRawToClusterGPUKernel::makeClustersAsync(const SiPixelFedCablingMapGPU *cablingMap, + const unsigned char *modToUnp, + const SiPixelGainForHLTonGPU *gains, + const WordFedAppender &wordFed, + PixelFormatterErrors &&errors, + const uint32_t wordCounter, + const uint32_t fedCounter, + bool useQualityInfo, + bool includeErrors, + bool debug, + cuda::stream_t<> &stream) { nDigis = wordCounter; digis_d = SiPixelDigisCUDA(pixelgpudetails::MAX_FED_WORDS, stream); - if(includeErrors) { + if (includeErrors) { digiErrors_d = SiPixelDigiErrorsCUDA(pixelgpudetails::MAX_FED_WORDS, std::move(errors), stream); } clusters_d = SiPixelClustersCUDA(gpuClustering::MaxNumModules, stream); @@ -527,18 +548,20 @@ namespace pixelgpudetails { edm::Service cs; nModules_Clusters_h = cs->make_host_unique(2, stream); - if (wordCounter) // protect in case of empty event.... + if (wordCounter) // protect in case of empty event.... { const int threadsPerBlock = 512; - const int blocks = (wordCounter + threadsPerBlock-1) /threadsPerBlock; // fill it all + const int blocks = (wordCounter + threadsPerBlock - 1) / threadsPerBlock; // fill it all - assert(0 == wordCounter%2); + assert(0 == wordCounter % 2); // wordCounter is the total no of words in each event to be trasfered on device auto word_d = cs->make_device_unique(wordCounter, stream); auto fedId_d = cs->make_device_unique(wordCounter, stream); - cudaCheck(cudaMemcpyAsync(word_d.get(), wordFed.word(), wordCounter*sizeof(uint32_t), cudaMemcpyDefault, stream.id())); - cudaCheck(cudaMemcpyAsync(fedId_d.get(), wordFed.fedId(), wordCounter*sizeof(uint8_t) / 2, cudaMemcpyDefault, stream.id())); + cudaCheck(cudaMemcpyAsync( + word_d.get(), wordFed.word(), wordCounter * sizeof(uint32_t), cudaMemcpyDefault, stream.id())); + cudaCheck(cudaMemcpyAsync( + fedId_d.get(), wordFed.fedId(), wordCounter * sizeof(uint8_t) / 2, cudaMemcpyDefault, stream.id())); // Launch rawToDigi kernel RawToDigi_kernel<<>>( @@ -547,17 +570,19 @@ namespace pixelgpudetails { wordCounter, word_d.get(), fedId_d.get(), - digis_d.xx(), digis_d.yy(), digis_d.adc(), + digis_d.xx(), + digis_d.yy(), + digis_d.adc(), digis_d.pdigi(), digis_d.rawIdArr(), digis_d.moduleInd(), - digiErrors_d.error(), // returns nullptr if default-constructed + digiErrors_d.error(), // returns nullptr if default-constructed useQualityInfo, includeErrors, debug); cudaCheck(cudaGetLastError()); - if(includeErrors) { + if (includeErrors) { digiErrors_d.copyErrorToHostAsync(stream); } } @@ -567,71 +592,73 @@ namespace pixelgpudetails { // clusterizer ... using namespace gpuClustering; int threadsPerBlock = 256; - int blocks = (std::max(int(wordCounter),int(gpuClustering::MaxNumModules)) + threadsPerBlock - 1) / threadsPerBlock; - - - gpuCalibPixel::calibDigis<<>>( - digis_d.moduleInd(), - digis_d.c_xx(), digis_d.c_yy(), digis_d.adc(), - gains, - wordCounter, - clusters_d.moduleStart(), - clusters_d.clusInModule(), - clusters_d.clusModuleStart() - ); + int blocks = + (std::max(int(wordCounter), int(gpuClustering::MaxNumModules)) + threadsPerBlock - 1) / threadsPerBlock; + + gpuCalibPixel::calibDigis<<>>(digis_d.moduleInd(), + digis_d.c_xx(), + digis_d.c_yy(), + digis_d.adc(), + gains, + wordCounter, + clusters_d.moduleStart(), + clusters_d.clusInModule(), + clusters_d.clusModuleStart()); cudaCheck(cudaGetLastError()); #ifdef GPU_DEBUG - std::cout - << "CUDA countModules kernel launch with " << blocks - << " blocks of " << threadsPerBlock << " threads\n"; + std::cout << "CUDA countModules kernel launch with " << blocks << " blocks of " << threadsPerBlock + << " threads\n"; #endif - countModules<<>>(digis_d.c_moduleInd(), clusters_d.moduleStart(), digis_d.clus(), wordCounter); + countModules<<>>( + digis_d.c_moduleInd(), clusters_d.moduleStart(), digis_d.clus(), wordCounter); cudaCheck(cudaGetLastError()); // read the number of modules into a data member, used by getProduct()) - cudaCheck(cudaMemcpyAsync(&(nModules_Clusters_h[0]), clusters_d.moduleStart(), sizeof(uint32_t), cudaMemcpyDefault, stream.id())); + cudaCheck(cudaMemcpyAsync( + &(nModules_Clusters_h[0]), clusters_d.moduleStart(), sizeof(uint32_t), cudaMemcpyDefault, stream.id())); threadsPerBlock = 256; blocks = MaxNumModules; #ifdef GPU_DEBUG - std::cout << "CUDA findClus kernel launch with " << blocks - << " blocks of " << threadsPerBlock << " threads\n"; + std::cout << "CUDA findClus kernel launch with " << blocks << " blocks of " << threadsPerBlock << " threads\n"; #endif - findClus<<>>( - digis_d.c_moduleInd(), - digis_d.c_xx(), digis_d.c_yy(), - clusters_d.c_moduleStart(), - clusters_d.clusInModule(), clusters_d.moduleId(), - digis_d.clus(), - wordCounter); + findClus<<>>(digis_d.c_moduleInd(), + digis_d.c_xx(), + digis_d.c_yy(), + clusters_d.c_moduleStart(), + clusters_d.clusInModule(), + clusters_d.moduleId(), + digis_d.clus(), + wordCounter); cudaCheck(cudaGetLastError()); // apply charge cut - clusterChargeCut<<>>( - digis_d.moduleInd(), - digis_d.c_adc(), - clusters_d.c_moduleStart(), - clusters_d.clusInModule(), clusters_d.c_moduleId(), - digis_d.clus(), - wordCounter); + clusterChargeCut<<>>(digis_d.moduleInd(), + digis_d.c_adc(), + clusters_d.c_moduleStart(), + clusters_d.clusInModule(), + clusters_d.c_moduleId(), + digis_d.clus(), + wordCounter); cudaCheck(cudaGetLastError()); - - // count the module start indices already here (instead of // rechits) so that the number of clusters/hits can be made // available in the rechit producer without additional points of // synchronization/ExternalWork - // MUST be ONE block - fillHitsModuleStart<<<1, 1024, 0, stream.id()>>>(clusters_d.c_clusInModule(),clusters_d.clusModuleStart()); + // MUST be ONE block + fillHitsModuleStart<<<1, 1024, 0, stream.id()>>>(clusters_d.c_clusInModule(), clusters_d.clusModuleStart()); // last element holds the number of all clusters - cudaCheck(cudaMemcpyAsync(&(nModules_Clusters_h[1]), clusters_d.clusModuleStart()+gpuClustering::MaxNumModules, sizeof(uint32_t), cudaMemcpyDefault, stream.id())); - + cudaCheck(cudaMemcpyAsync(&(nModules_Clusters_h[1]), + clusters_d.clusModuleStart() + gpuClustering::MaxNumModules, + sizeof(uint32_t), + cudaMemcpyDefault, + stream.id())); - } // end clusterizer scope + } // end clusterizer scope } -} +} // namespace pixelgpudetails diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h index 5087516fa009d..41e028b3c4595 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h @@ -5,66 +5,63 @@ #include #include "CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" #include "gpuClusteringConstants.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" - namespace gpuCalibPixel { - constexpr uint16_t InvId=9999; // must be > MaxNumModules - - constexpr float VCaltoElectronGain = 47; // L2-4: 47 +- 4.7 - constexpr float VCaltoElectronGain_L1 = 50; // L1: 49.6 +- 2.6 - constexpr float VCaltoElectronOffset = -60; // L2-4: -60 +- 130 - constexpr float VCaltoElectronOffset_L1 = -670; // L1: -670 +- 220 + constexpr uint16_t InvId = 9999; // must be > MaxNumModules + constexpr float VCaltoElectronGain = 47; // L2-4: 47 +- 4.7 + constexpr float VCaltoElectronGain_L1 = 50; // L1: 49.6 +- 2.6 + constexpr float VCaltoElectronOffset = -60; // L2-4: -60 +- 130 + constexpr float VCaltoElectronOffset_L1 = -670; // L1: -670 +- 220 - __global__ void calibDigis(uint16_t * id, - uint16_t const * __restrict__ x, - uint16_t const * __restrict__ y, - uint16_t * adc, - SiPixelGainForHLTonGPU const * __restrict__ ped, - int numElements, - uint32_t * __restrict__ moduleStart, // just to zero first - uint32_t * __restrict__ nClustersInModule, // just to zero them - uint32_t * __restrict__ clusModuleStart // just to zero first - ) -{ - + __global__ void calibDigis(uint16_t* id, + uint16_t const* __restrict__ x, + uint16_t const* __restrict__ y, + uint16_t* adc, + SiPixelGainForHLTonGPU const* __restrict__ ped, + int numElements, + uint32_t* __restrict__ moduleStart, // just to zero first + uint32_t* __restrict__ nClustersInModule, // just to zero them + uint32_t* __restrict__ clusModuleStart // just to zero first + ) { int first = blockDim.x * blockIdx.x + threadIdx.x; // zero for next kernels... - if (0==first) clusModuleStart[0] = moduleStart[0]=0; - for (int i = first; i < gpuClustering::MaxNumModules; i += gridDim.x*blockDim.x) { - nClustersInModule[i]=0; + if (0 == first) + clusModuleStart[0] = moduleStart[0] = 0; + for (int i = first; i < gpuClustering::MaxNumModules; i += gridDim.x * blockDim.x) { + nClustersInModule[i] = 0; } - for (int i = first; i < numElements; i += gridDim.x*blockDim.x) { - if (InvId==id[i]) continue; + for (int i = first; i < numElements; i += gridDim.x * blockDim.x) { + if (InvId == id[i]) + continue; + + float conversionFactor = id[i] < 96 ? VCaltoElectronGain_L1 : VCaltoElectronGain; + float offset = id[i] < 96 ? VCaltoElectronOffset_L1 : VCaltoElectronOffset; - float conversionFactor = id[i]<96 ? VCaltoElectronGain_L1 : VCaltoElectronGain; - float offset = id[i]<96 ? VCaltoElectronOffset_L1 : VCaltoElectronOffset; + bool isDeadColumn = false, isNoisyColumn = false; - bool isDeadColumn=false, isNoisyColumn=false; - int row = x[i]; int col = y[i]; auto ret = ped->getPedAndGain(id[i], col, row, isDeadColumn, isNoisyColumn); - float pedestal = ret.first; float gain = ret.second; + float pedestal = ret.first; + float gain = ret.second; // float pedestal = 0; float gain = 1.; - if ( isDeadColumn | isNoisyColumn ) - { - id[i]=InvId; adc[i] =0; - printf("bad pixel at %d in %d\n",i,id[i]); - } - else { - float vcal = adc[i] * gain - pedestal*gain; - adc[i] = std::max(100, int( vcal * conversionFactor + offset)); + if (isDeadColumn | isNoisyColumn) { + id[i] = InvId; + adc[i] = 0; + printf("bad pixel at %d in %d\n", i, id[i]); + } else { + float vcal = adc[i] * gain - pedestal * gain; + adc[i] = std::max(100, int(vcal * conversionFactor + offset)); } } - } -} +} // namespace gpuCalibPixel -#endif +#endif // RecoLocalTracker_SiPixelClusterizer_plugins_gpuCalibPixel_h diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h index cf94b1f43094b..b81752cf2823a 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h @@ -11,28 +11,27 @@ namespace gpuClustering { - __global__ void clusterChargeCut( - uint16_t * __restrict__ id, // module id of each pixel (modified if bad cluster) - uint16_t const * __restrict__ adc, // charge of each pixel - uint32_t const * __restrict__ moduleStart, // index of the first pixel of each module - uint32_t * __restrict__ nClustersInModule, // modified: number of clusters found in each module - uint32_t const * __restrict__ moduleId, // module id of each module - int32_t * __restrict__ clusterId, // modified: cluster id of each pixel - uint32_t numElements) - { - + __global__ void clusterChargeCut( + uint16_t* __restrict__ id, // module id of each pixel (modified if bad cluster) + uint16_t const* __restrict__ adc, // charge of each pixel + uint32_t const* __restrict__ moduleStart, // index of the first pixel of each module + uint32_t* __restrict__ nClustersInModule, // modified: number of clusters found in each module + uint32_t const* __restrict__ moduleId, // module id of each module + int32_t* __restrict__ clusterId, // modified: cluster id of each pixel + uint32_t numElements) { if (blockIdx.x >= moduleStart[0]) return; auto firstPixel = moduleStart[1 + blockIdx.x]; auto thisModuleId = id[firstPixel]; assert(thisModuleId < MaxNumModules); - assert(thisModuleId==moduleId[blockIdx.x]); + assert(thisModuleId == moduleId[blockIdx.x]); auto nclus = nClustersInModule[thisModuleId]; - if (nclus==0) return; + if (nclus == 0) + return; - assert(nclus<=MaxNumClustersPerModules); + assert(nclus <= MaxNumClustersPerModules); #ifdef GPU_DEBUG if (thisModuleId % 100 == 1) @@ -43,23 +42,25 @@ namespace gpuClustering { auto first = firstPixel + threadIdx.x; __shared__ int32_t charge[MaxNumClustersPerModules]; - for (auto i=threadIdx.x; ichargeCut ? 1 : 0; + for (auto i = threadIdx.x; i < nclus; i += blockDim.x) { + newclusId[i] = ok[i] = charge[i] > chargeCut ? 1 : 0; } __syncthreads(); @@ -68,30 +69,35 @@ namespace gpuClustering { __shared__ uint16_t ws[32]; blockPrefixScan(newclusId, nclus, ws); - assert(nclus>=newclusId[nclus-1]); - - if(nclus==newclusId[nclus-1]) return; + assert(nclus >= newclusId[nclus - 1]); - nClustersInModule[thisModuleId] = newclusId[nclus-1]; + if (nclus == newclusId[nclus - 1]) + return; + + nClustersInModule[thisModuleId] = newclusId[nclus - 1]; __syncthreads(); // mark bad cluster again - for (auto i=threadIdx.x; i= moduleStart[0]) return; @@ -66,59 +64,61 @@ namespace gpuClustering { __syncthreads(); // skip threads not associated to an existing pixel - for (int i = first; i < numElements; i += blockDim.x) { - if (id[i] == InvId) // skip invalid pixels - continue; - if (id[i] != thisModuleId) { // find the first pixel in a different module - atomicMin(&msize, i); - break; - } + for (int i = first; i < numElements; i += blockDim.x) { + if (id[i] == InvId) // skip invalid pixels + continue; + if (id[i] != thisModuleId) { // find the first pixel in a different module + atomicMin(&msize, i); + break; } + } - //init hist (ymax=416 < 512 : 9bits) - constexpr uint32_t maxPixInModule = 4000; - constexpr auto nbins = phase1PixelTopology::numColsInModule + 2; //2+2; - using Hist = HistoContainer; + //init hist (ymax=416 < 512 : 9bits) + constexpr uint32_t maxPixInModule = 4000; + constexpr auto nbins = phase1PixelTopology::numColsInModule + 2; //2+2; + using Hist = HistoContainer; __shared__ Hist hist; __shared__ typename Hist::Counter ws[32]; - for (auto j=threadIdx.x; j60) atomicAdd(&n60,1); - if(hist.size(j)>40) atomicAdd(&n40,1); - } + for (auto j = threadIdx.x; j < Hist::nbins(); j += blockDim.x) { + if (hist.size(j) > 60) + atomicAdd(&n60, 1); + if (hist.size(j) > 40) + atomicAdd(&n40, 1); + } __syncthreads(); - if (0==threadIdx.x) { - if (n60>0) printf("columns with more than 60 px %d in %d\n",n60,thisModuleId); - else if (n40>0) printf("columns with more than 40 px %d in %d\n",n40,thisModuleId); + if (0 == threadIdx.x) { + if (n60 > 0) + printf("columns with more than 60 px %d in %d\n", n60, thisModuleId); + else if (n40 > 0) + printf("columns with more than 40 px %d in %d\n", n40, thisModuleId); } __syncthreads(); #endif // fill NN - for (auto j=threadIdx.x, k = 0U; j 1) continue; - auto l = nnn[k]++; - assert(l < maxNeighbours); - nn[k][l]=*p; - } + for (auto j = threadIdx.x, k = 0U; j < hist.size(); j += blockDim.x, ++k) { + auto p = hist.begin() + j; + auto i = *p + firstPixel; + assert(id[i] != InvId); + assert(id[i] == thisModuleId); // same module + int be = Hist::bin(y[i] + 1); + auto e = hist.end(be); + ++p; + for (; p < e; ++p) { + auto m = (*p) + firstPixel; + assert(m != i); + if (std::abs(int(x[m]) - int(x[i])) > 1) + continue; + auto l = nnn[k]++; + assert(l < maxNeighbours); + nn[k][l] = *p; + } } // for each pixel, look at all the pixels until the end of the module; @@ -179,48 +185,50 @@ namespace gpuClustering { // after the loop, all the pixel in each cluster should have the id equeal to the lowest // pixel in the cluster ( clus[i] == i ). bool more = true; - int nloops=0; + int nloops = 0; while (__syncthreads_or(more)) { - if (1==nloops%2) { - for (auto j=threadIdx.x, k = 0U; j= 0) { - // mark each pixel in a cluster with the same id as the first one - clusterId[i] = clusterId[clusterId[i]]; - } + for (int i = first; i < msize; i += blockDim.x) { + if (id[i] == InvId) // skip invalid pixels + continue; + if (clusterId[i] >= 0) { + // mark each pixel in a cluster with the same id as the first one + clusterId[i] = clusterId[clusterId[i]]; } + } __syncthreads(); // adjust the cluster id to be a positive value starting from 0 - for (int i = first; i < msize; i += blockDim.x) { - if (id[i] == InvId) { // skip invalid pixels - clusterId[i] = -9999; - continue; - } - clusterId[i] = - clusterId[i] - 1; + for (int i = first; i < msize; i += blockDim.x) { + if (id[i] == InvId) { // skip invalid pixels + clusterId[i] = -9999; + continue; } + clusterId[i] = -clusterId[i] - 1; + } __syncthreads(); - if (threadIdx.x == 0) { - nClustersInModule[thisModuleId] = foundClusters; - moduleId[blockIdx.x] = thisModuleId; + if (threadIdx.x == 0) { + nClustersInModule[thisModuleId] = foundClusters; + moduleId[blockIdx.x] = thisModuleId; #ifdef GPU_DEBUG if (thisModuleId % 100 == 1) if (threadIdx.x == 0) @@ -271,6 +279,6 @@ namespace gpuClustering { } } -} // namespace gpuClustering +} // namespace gpuClustering -#endif // RecoLocalTracker_SiPixelClusterizer_plugins_gpuClustering_h +#endif // RecoLocalTracker_SiPixelClusterizer_plugins_gpuClustering_h diff --git a/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h b/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h index add45a98088c3..c92ecf0d805da 100644 --- a/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h +++ b/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h @@ -11,18 +11,20 @@ #ifdef __CUDACC__ #include + #include "HeterogeneousCore/CUDAUtilities/interface/exitSansCUDADevices.h" #endif + #include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h" #include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h" -int main(void) -{ +int main(void) { #ifdef __CUDACC__ exitSansCUDADevices(); if (cuda::device::count() == 0) { - std::cerr << "No CUDA devices on this system" << "\n"; + std::cerr << "No CUDA devices on this system" + << "\n"; exit(EXIT_FAILURE); } #endif @@ -47,345 +49,363 @@ int main(void) auto d_clus = cuda::memory::device::make_unique(current_device, numElements); - auto d_moduleStart = cuda::memory::device::make_unique(current_device, MaxNumModules+1); + auto d_moduleStart = cuda::memory::device::make_unique(current_device, MaxNumModules + 1); auto d_clusInModule = cuda::memory::device::make_unique(current_device, MaxNumModules); auto d_moduleId = cuda::memory::device::make_unique(current_device, MaxNumModules); #else - auto h_moduleStart = std::make_unique(MaxNumModules+1); + auto h_moduleStart = std::make_unique(MaxNumModules + 1); auto h_clusInModule = std::make_unique(MaxNumModules); auto h_moduleId = std::make_unique(MaxNumModules); #endif // later random number - int n=0; - int ncl=0; - int y[10]={5,7,9,1,3,0,4,8,2,6}; + int n = 0; + int ncl = 0; + int y[10] = {5, 7, 9, 1, 3, 0, 4, 8, 2, 6}; auto generateClusters = [&](int kn) { - auto addBigNoise = 1==kn%2; - if (addBigNoise) { - constexpr int MaxPixels = 1000; - int id = 666; - for (int x=0; x<140; x+=3) { - for (int yy=0; yy<400; yy+=3) { - h_id[n]=id; - h_x[n]=x; - h_y[n]=yy; - h_adc[n]=1000; - ++n; ++ncl; - if (MaxPixels<=ncl) break; - } - if (MaxPixels<=ncl) break; + auto addBigNoise = 1 == kn % 2; + if (addBigNoise) { + constexpr int MaxPixels = 1000; + int id = 666; + for (int x = 0; x < 140; x += 3) { + for (int yy = 0; yy < 400; yy += 3) { + h_id[n] = id; + h_x[n] = x; + h_y[n] = yy; + h_adc[n] = 1000; + ++n; + ++ncl; + if (MaxPixels <= ncl) + break; + } + if (MaxPixels <= ncl) + break; + } } - } - { - // isolated - int id = 42; - int x = 10; - ++ncl; - h_id[n]=id; - h_x[n]=x; - h_y[n]=x; - h_adc[n]= kn==0 ? 100 : 5000; - ++n; - - // first column - ++ncl; - h_id[n]=id; - h_x[n]=x; - h_y[n]=0; - h_adc[n]= 5000; - ++n; - // first columns - ++ncl; - h_id[n]=id; - h_x[n]=x+80; - h_y[n]=2; - h_adc[n]= 5000; - ++n; - h_id[n]=id; - h_x[n]=x+80; - h_y[n]=1; - h_adc[n]= 5000; - ++n; - - // last column - ++ncl; - h_id[n]=id; - h_x[n]=x; - h_y[n]=415; - h_adc[n]= 5000; - ++n; - // last columns - ++ncl; - h_id[n]=id; - h_x[n]=x+80; - h_y[n]=415; - h_adc[n]= 2500; - ++n; - h_id[n]=id; - h_x[n]=x+80; - h_y[n]=414; - h_adc[n]= 2500; - ++n; - - // diagonal - ++ncl; - for (int x=20; x<25; ++x) { - h_id[n]=id; - h_x[n]=x; - h_y[n]=x; - h_adc[n]=1000; + { + // isolated + int id = 42; + int x = 10; + ++ncl; + h_id[n] = id; + h_x[n] = x; + h_y[n] = x; + h_adc[n] = kn == 0 ? 100 : 5000; ++n; - } - ++ncl; - // reversed - for (int x=45; x>40; --x) { - h_id[n]=id; - h_x[n]=x; - h_y[n]=x; - h_adc[n]=1000; + + // first column + ++ncl; + h_id[n] = id; + h_x[n] = x; + h_y[n] = 0; + h_adc[n] = 5000; ++n; - } - ++ncl; - h_id[n++]=InvId; // error - // messy - int xx[5] = {21,25,23,24,22}; - for (int k=0; k<5; ++k) { - h_id[n]=id; - h_x[n]=xx[k]; - h_y[n]=20+xx[k]; - h_adc[n]=1000; + // first columns + ++ncl; + h_id[n] = id; + h_x[n] = x + 80; + h_y[n] = 2; + h_adc[n] = 5000; ++n; - } - // holes - ++ncl; - for (int k=0; k<5; ++k) { - h_id[n]=id; - h_x[n]=xx[k]; - h_y[n]=100; - h_adc[n]= kn==2 ? 100 : 1000; + h_id[n] = id; + h_x[n] = x + 80; + h_y[n] = 1; + h_adc[n] = 5000; + ++n; + + // last column + ++ncl; + h_id[n] = id; + h_x[n] = x; + h_y[n] = 415; + h_adc[n] = 5000; + ++n; + // last columns + ++ncl; + h_id[n] = id; + h_x[n] = x + 80; + h_y[n] = 415; + h_adc[n] = 2500; ++n; - if (xx[k]%2==0) { - h_id[n]=id; - h_x[n]=xx[k]; - h_y[n]=101; - h_adc[n]=1000; + h_id[n] = id; + h_x[n] = x + 80; + h_y[n] = 414; + h_adc[n] = 2500; ++n; + + // diagonal + ++ncl; + for (int x = 20; x < 25; ++x) { + h_id[n] = id; + h_x[n] = x; + h_y[n] = x; + h_adc[n] = 1000; + ++n; } - } - } - { - // id == 0 (make sure it works! - int id = 0; - int x = 10; - ++ncl; - h_id[n]=id; - h_x[n]=x; - h_y[n]=x; - h_adc[n]=5000; - ++n; - } - // all odd id - for(int id=11; id<=1800; id+=2) { - if ( (id/20)%2) h_id[n++]=InvId; // error - for (int x=0; x<40; x+=4) { ++ncl; - if ((id/10)%2) { - for (int k=0; k<10; ++k) { - h_id[n]=id; - h_x[n]=x; - h_y[n]=x+y[k]; - h_adc[n]=100; - ++n; - h_id[n]=id; - h_x[n]=x+1; - h_y[n]=x+y[k]+2; - h_adc[n]=1000; + // reversed + for (int x = 45; x > 40; --x) { + h_id[n] = id; + h_x[n] = x; + h_y[n] = x; + h_adc[n] = 1000; + ++n; + } + ++ncl; + h_id[n++] = InvId; // error + // messy + int xx[5] = {21, 25, 23, 24, 22}; + for (int k = 0; k < 5; ++k) { + h_id[n] = id; + h_x[n] = xx[k]; + h_y[n] = 20 + xx[k]; + h_adc[n] = 1000; + ++n; + } + // holes + ++ncl; + for (int k = 0; k < 5; ++k) { + h_id[n] = id; + h_x[n] = xx[k]; + h_y[n] = 100; + h_adc[n] = kn == 2 ? 100 : 1000; + ++n; + if (xx[k] % 2 == 0) { + h_id[n] = id; + h_x[n] = xx[k]; + h_y[n] = 101; + h_adc[n] = 1000; ++n; } - } else { - for (int k=0; k<10; ++k) { - h_id[n]=id; - h_x[n]=x; - h_y[n]=x+y[9-k]; - h_adc[n]= kn==2 ? 10 : 1000; - ++n; - if (y[k]==3) continue; // hole - if (id==51) {h_id[n++]=InvId; h_id[n++]=InvId; }// error - h_id[n]=id; - h_x[n]=x+1; - h_y[n]=x+y[k]+2; - h_adc[n]= kn==2 ? 10 : 1000; - ++n; + } + } + { + // id == 0 (make sure it works! + int id = 0; + int x = 10; + ++ncl; + h_id[n] = id; + h_x[n] = x; + h_y[n] = x; + h_adc[n] = 5000; + ++n; + } + // all odd id + for (int id = 11; id <= 1800; id += 2) { + if ((id / 20) % 2) + h_id[n++] = InvId; // error + for (int x = 0; x < 40; x += 4) { + ++ncl; + if ((id / 10) % 2) { + for (int k = 0; k < 10; ++k) { + h_id[n] = id; + h_x[n] = x; + h_y[n] = x + y[k]; + h_adc[n] = 100; + ++n; + h_id[n] = id; + h_x[n] = x + 1; + h_y[n] = x + y[k] + 2; + h_adc[n] = 1000; + ++n; + } + } else { + for (int k = 0; k < 10; ++k) { + h_id[n] = id; + h_x[n] = x; + h_y[n] = x + y[9 - k]; + h_adc[n] = kn == 2 ? 10 : 1000; + ++n; + if (y[k] == 3) + continue; // hole + if (id == 51) { + h_id[n++] = InvId; + h_id[n++] = InvId; + } // error + h_id[n] = id; + h_x[n] = x + 1; + h_y[n] = x + y[k] + 2; + h_adc[n] = kn == 2 ? 10 : 1000; + ++n; + } } } } - } - }; // end lambda - for (auto kkk=0; kkk<5; ++kkk) { - n=0; ncl=0; - generateClusters(kkk); + }; // end lambda + for (auto kkk = 0; kkk < 5; ++kkk) { + n = 0; + ncl = 0; + generateClusters(kkk); - std::cout << "created " << n << " digis in " << ncl << " clusters" << std::endl; - assert(n<=numElements); + std::cout << "created " << n << " digis in " << ncl << " clusters" << std::endl; + assert(n <= numElements); - - uint32_t nModules=0; + uint32_t nModules = 0; #ifdef __CUDACC__ - size_t size32 = n * sizeof(unsigned int); - size_t size16 = n * sizeof(unsigned short); - // size_t size8 = n * sizeof(uint8_t); - - cuda::memory::copy(d_moduleStart.get(),&nModules,sizeof(uint32_t)); - - cuda::memory::copy(d_id.get(), h_id.get(), size16); - cuda::memory::copy(d_x.get(), h_x.get(), size16); - cuda::memory::copy(d_y.get(), h_y.get(), size16); - cuda::memory::copy(d_adc.get(), h_adc.get(), size16); - // Launch CUDA Kernels - int threadsPerBlock = (kkk==5) ? 512 : ((kkk==3) ? 128 : 256); - int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; - std::cout - << "CUDA countModules kernel launch with " << blocksPerGrid - << " blocks of " << threadsPerBlock << " threads\n"; - - cuda::launch( - countModules, - { blocksPerGrid, threadsPerBlock }, - d_id.get(), d_moduleStart.get() ,d_clus.get(),n - ); - - blocksPerGrid = MaxNumModules; //nModules; - - std::cout - << "CUDA findModules kernel launch with " << blocksPerGrid - << " blocks of " << threadsPerBlock << " threads\n"; - - cuda::memory::device::zero(d_clusInModule.get(),MaxNumModules*sizeof(uint32_t)); - - cuda::launch( - findClus, - { blocksPerGrid, threadsPerBlock }, - d_id.get(), d_x.get(), d_y.get(), - d_moduleStart.get(), - d_clusInModule.get(), d_moduleId.get(), - d_clus.get(), - n - ); + size_t size32 = n * sizeof(unsigned int); + size_t size16 = n * sizeof(unsigned short); + // size_t size8 = n * sizeof(uint8_t); + + cuda::memory::copy(d_moduleStart.get(), &nModules, sizeof(uint32_t)); + + cuda::memory::copy(d_id.get(), h_id.get(), size16); + cuda::memory::copy(d_x.get(), h_x.get(), size16); + cuda::memory::copy(d_y.get(), h_y.get(), size16); + cuda::memory::copy(d_adc.get(), h_adc.get(), size16); + // Launch CUDA Kernels + int threadsPerBlock = (kkk == 5) ? 512 : ((kkk == 3) ? 128 : 256); + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA countModules kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock + << " threads\n"; + + cuda::launch(countModules, {blocksPerGrid, threadsPerBlock}, d_id.get(), d_moduleStart.get(), d_clus.get(), n); + + blocksPerGrid = MaxNumModules; //nModules; + + std::cout << "CUDA findModules kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock + << " threads\n"; + + cuda::memory::device::zero(d_clusInModule.get(), MaxNumModules * sizeof(uint32_t)); + + cuda::launch(findClus, + {blocksPerGrid, threadsPerBlock}, + d_id.get(), + d_x.get(), + d_y.get(), + d_moduleStart.get(), + d_clusInModule.get(), + d_moduleId.get(), + d_clus.get(), + n); cudaDeviceSynchronize(); - cuda::memory::copy(&nModules,d_moduleStart.get(),sizeof(uint32_t)); + cuda::memory::copy(&nModules, d_moduleStart.get(), sizeof(uint32_t)); uint32_t nclus[MaxNumModules], moduleId[nModules]; - cuda::memory::copy(&nclus,d_clusInModule.get(),MaxNumModules*sizeof(uint32_t)); - - std::cout << "before charge cut found " << std::accumulate(nclus,nclus+MaxNumModules,0) << " clusters" << std::endl; - for (auto i=MaxNumModules; i>0; i--) if (nclus[i-1]>0) {std::cout << "last module is " << i-1 << ' ' << nclus[i-1] << std::endl; break;} - if (ncl!=std::accumulate(nclus,nclus+MaxNumModules,0)) std::cout << "ERROR!!!!! wrong number of cluster found" << std::endl; - - cuda::launch( - clusterChargeCut, - { blocksPerGrid, threadsPerBlock }, - d_id.get(), d_adc.get(), - d_moduleStart.get(), - d_clusInModule.get(), d_moduleId.get(), - d_clus.get(), - n - ); + cuda::memory::copy(&nclus, d_clusInModule.get(), MaxNumModules * sizeof(uint32_t)); + std::cout << "before charge cut found " << std::accumulate(nclus, nclus + MaxNumModules, 0) << " clusters" + << std::endl; + for (auto i = MaxNumModules; i > 0; i--) + if (nclus[i - 1] > 0) { + std::cout << "last module is " << i - 1 << ' ' << nclus[i - 1] << std::endl; + break; + } + if (ncl != std::accumulate(nclus, nclus + MaxNumModules, 0)) + std::cout << "ERROR!!!!! wrong number of cluster found" << std::endl; + + cuda::launch(clusterChargeCut, + {blocksPerGrid, threadsPerBlock}, + d_id.get(), + d_adc.get(), + d_moduleStart.get(), + d_clusInModule.get(), + d_moduleId.get(), + d_clus.get(), + n); cudaDeviceSynchronize(); #else - h_moduleStart[0]= nModules; - countModules(h_id.get(), h_moduleStart.get() ,h_clus.get(),n); - memset(h_clusInModule.get(),0,MaxNumModules*sizeof(uint32_t)); - gridDim.x = MaxNumModules; //not needed in the kernel for this specific case; - assert(blockIdx.x==0); - for (;blockIdx.x0; i--) if (nclus[i-1]>0) {std::cout << "last module is " << i-1 << ' ' << nclus[i-1] << std::endl; break;} - if (ncl!=std::accumulate(nclus,nclus+MaxNumModules,0)) std::cout << "ERROR!!!!! wrong number of cluster found" << std::endl; - - gridDim.x = MaxNumModules; // no needed in the kernel for in this specific case - assert(blockIdx.x==0); - for (;blockIdx.x 0; i--) + if (nclus[i - 1] > 0) { + std::cout << "last module is " << i - 1 << ' ' << nclus[i - 1] << std::endl; + break; + } + if (ncl != std::accumulate(nclus, nclus + MaxNumModules, 0)) + std::cout << "ERROR!!!!! wrong number of cluster found" << std::endl; + + gridDim.x = MaxNumModules; // no needed in the kernel for in this specific case + assert(blockIdx.x == 0); + for (; blockIdx.x < gridDim.x; ++blockIdx.x) + clusterChargeCut( + h_id.get(), h_adc.get(), h_moduleStart.get(), h_clusInModule.get(), h_moduleId.get(), h_clus.get(), n); + resetGrid(); #endif - std::cout << "found " << nModules << " Modules active" << std::endl; + std::cout << "found " << nModules << " Modules active" << std::endl; #ifdef __CUDACC__ - cuda::memory::copy(h_id.get(), d_id.get(), size16); - cuda::memory::copy(h_clus.get(), d_clus.get(), size32); - cuda::memory::copy(&nclus,d_clusInModule.get(),MaxNumModules*sizeof(uint32_t)); - cuda::memory::copy(&moduleId,d_moduleId.get(),nModules*sizeof(uint32_t)); + cuda::memory::copy(h_id.get(), d_id.get(), size16); + cuda::memory::copy(h_clus.get(), d_clus.get(), size32); + cuda::memory::copy(&nclus, d_clusInModule.get(), MaxNumModules * sizeof(uint32_t)); + cuda::memory::copy(&moduleId, d_moduleId.get(), nModules * sizeof(uint32_t)); #endif - std::set clids; - for (int i=0; i=0); - assert(h_clus[i] clids; + for (int i = 0; i < n; ++i) { + assert(h_id[i] != 666); // only noise + if (h_id[i] == InvId) + continue; + assert(h_clus[i] >= 0); + assert(h_clus[i] < int(nclus[h_id[i]])); + clids.insert(h_id[i] * 1000 + h_clus[i]); + // clids.insert(h_clus[i]); + } - // verify no hole in numbering - auto p = clids.begin(); - auto cmid = (*p)/1000; - assert (0==(*p)%1000); - auto c= p; ++c; - std::cout << "first clusters " << *p << ' ' << *c << ' ' << nclus[cmid] << ' ' << nclus[(*c)/1000] << std::endl; - std::cout << "last cluster " << *clids.rbegin() << ' ' << nclus[(*clids.rbegin())/1000] << std::endl; - for(;c!=clids.end(); ++c) { - auto cc = *c; - auto pp = *p; - auto mid = cc/1000; - auto pnc = pp%1000; - auto nc = cc%1000; - if(mid!=cmid) { - assert (0==cc%1000); - assert (nclus[cmid]-1 == pp%1000); - // if (nclus[cmid]-1 != pp%1000) std::cout << "error size " << mid << ": " << nclus[mid] << ' ' << pp << std::endl; - cmid=mid; - p=c; - continue; - } - p=c; - // assert(nc==pnc+1); - if (nc!=pnc+1) std::cout << "error " << mid << ": " << nc << ' ' << pnc << std::endl; - } + // verify no hole in numbering + auto p = clids.begin(); + auto cmid = (*p) / 1000; + assert(0 == (*p) % 1000); + auto c = p; + ++c; + std::cout << "first clusters " << *p << ' ' << *c << ' ' << nclus[cmid] << ' ' << nclus[(*c) / 1000] << std::endl; + std::cout << "last cluster " << *clids.rbegin() << ' ' << nclus[(*clids.rbegin()) / 1000] << std::endl; + for (; c != clids.end(); ++c) { + auto cc = *c; + auto pp = *p; + auto mid = cc / 1000; + auto pnc = pp % 1000; + auto nc = cc % 1000; + if (mid != cmid) { + assert(0 == cc % 1000); + assert(nclus[cmid] - 1 == pp % 1000); + // if (nclus[cmid]-1 != pp%1000) std::cout << "error size " << mid << ": " << nclus[mid] << ' ' << pp << std::endl; + cmid = mid; + p = c; + continue; + } + p = c; + // assert(nc==pnc+1); + if (nc != pnc + 1) + std::cout << "error " << mid << ": " << nc << ' ' << pnc << std::endl; + } - std::cout << "found " << std::accumulate(nclus,nclus+MaxNumModules,0) << ' ' << clids.size() << " clusters" << std::endl; - for(auto i=MaxNumModules; i>0; i--) if (nclus[i-1]>0) {std::cout << "last module is " << i-1 << ' ' << nclus[i-1] << std::endl; break;} - // << " and " << seeds.size() << " seeds" << std::endl; - } /// end loop kkk + std::cout << "found " << std::accumulate(nclus, nclus + MaxNumModules, 0) << ' ' << clids.size() << " clusters" + << std::endl; + for (auto i = MaxNumModules; i > 0; i--) + if (nclus[i - 1] > 0) { + std::cout << "last module is " << i - 1 << ' ' << nclus[i - 1] << std::endl; + break; + } + // << " and " << seeds.size() << " seeds" << std::endl; + } /// end loop kkk return 0; } diff --git a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h index 82d71ce19f01e..f0ed4f2574528 100644 --- a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h +++ b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h @@ -14,86 +14,83 @@ #include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h" class MagneticField; -class PixelCPEFast final : public PixelCPEBase -{ +class PixelCPEFast final : public PixelCPEBase { public: - struct ClusterParamGeneric : ClusterParam - { - ClusterParamGeneric() {} - ClusterParamGeneric(const SiPixelCluster & cl) : ClusterParam(cl){} - - // The truncation value pix_maximum is an angle-dependent cutoff on the - // individual pixel signals. It should be applied to all pixels in the - // cluster [signal_i = fminf(signal_i, pixmax)] before the column and row - // sums are made. Morris - int pixmx; - - // These are errors predicted by PIXELAV - float sigmay; // CPE Generic y-error for multi-pixel cluster - float sigmax; // CPE Generic x-error for multi-pixel cluster - float sy1; // CPE Generic y-error for single single-pixel - float sy2; // CPE Generic y-error for single double-pixel cluster - float sx1; // CPE Generic x-error for single single-pixel cluster - float sx2; // CPE Generic x-error for single double-pixel cluster - - }; - - PixelCPEFast(edm::ParameterSet const& conf, const MagneticField *, - const TrackerGeometry&, const TrackerTopology&, const SiPixelLorentzAngle *, - const SiPixelGenErrorDBObject *, const SiPixelLorentzAngle *); - - - ~PixelCPEFast() override; - - // The return value can only be used safely in kernels launched on - // the same cudaStream, or after cudaStreamSynchronize. - const pixelCPEforGPU::ParamsOnGPU *getGPUProductAsync(cuda::stream_t<>& cudaStream) const; + struct ClusterParamGeneric : ClusterParam { + ClusterParamGeneric() {} + ClusterParamGeneric(const SiPixelCluster &cl) : ClusterParam(cl) {} + + // The truncation value pix_maximum is an angle-dependent cutoff on the + // individual pixel signals. It should be applied to all pixels in the + // cluster [signal_i = fminf(signal_i, pixmax)] before the column and row + // sums are made. Morris + int pixmx; + + // These are errors predicted by PIXELAV + float sigmay; // CPE Generic y-error for multi-pixel cluster + float sigmax; // CPE Generic x-error for multi-pixel cluster + float sy1; // CPE Generic y-error for single single-pixel + float sy2; // CPE Generic y-error for single double-pixel cluster + float sx1; // CPE Generic x-error for single single-pixel cluster + float sx2; // CPE Generic x-error for single double-pixel cluster + }; + + PixelCPEFast(edm::ParameterSet const &conf, + const MagneticField *, + const TrackerGeometry &, + const TrackerTopology &, + const SiPixelLorentzAngle *, + const SiPixelGenErrorDBObject *, + const SiPixelLorentzAngle *); + + ~PixelCPEFast() override; + + // The return value can only be used safely in kernels launched on + // the same cudaStream, or after cudaStreamSynchronize. + const pixelCPEforGPU::ParamsOnGPU *getGPUProductAsync(cuda::stream_t<> &cudaStream) const; private: - ClusterParam * createClusterParam(const SiPixelCluster & cl) const override; - - LocalPoint localPosition (DetParam const & theDetParam, ClusterParam & theClusterParam) const override; - LocalError localError (DetParam const & theDetParam, ClusterParam & theClusterParam) const override; - - void errorFromTemplates(DetParam const & theDetParam, ClusterParamGeneric & theClusterParam, float qclus) const; - - static void - collect_edge_charges(ClusterParam & theClusterParam, //!< input, the cluster - int & Q_f_X, //!< output, Q first in X - int & Q_l_X, //!< output, Q last in X - int & Q_f_Y, //!< output, Q first in Y - int & Q_l_Y, //!< output, Q last in Y - bool truncate - ); - - - bool UseErrorsFromTemplates_; - bool TruncatePixelCharge_; - - float EdgeClusterErrorX_; - float EdgeClusterErrorY_; - - std::vector xerr_barrel_l1_, yerr_barrel_l1_, xerr_barrel_ln_; - std::vector yerr_barrel_ln_, xerr_endcap_, yerr_endcap_; - float xerr_barrel_l1_def_, yerr_barrel_l1_def_, xerr_barrel_ln_def_; - float yerr_barrel_ln_def_, xerr_endcap_def_, yerr_endcap_def_; - - //--- DB Error Parametrization object, new light templates - std::vector< SiPixelGenErrorStore > thePixelGenError_; - - std::vector> m_detParamsGPU; - pixelCPEforGPU::CommonParams m_commonParamsGPU; - pixelCPEforGPU::LayerGeometry m_layerGeometry; - - struct GPUData { - ~GPUData(); - // not needed if not used on CPU... - pixelCPEforGPU::ParamsOnGPU h_paramsOnGPU; - pixelCPEforGPU::ParamsOnGPU * d_paramsOnGPU = nullptr; // copy of the above on the Device - }; - CUDAESProduct gpuData_; - - void fillParamsForGpu(); + ClusterParam *createClusterParam(const SiPixelCluster &cl) const override; + + LocalPoint localPosition(DetParam const &theDetParam, ClusterParam &theClusterParam) const override; + LocalError localError(DetParam const &theDetParam, ClusterParam &theClusterParam) const override; + + void errorFromTemplates(DetParam const &theDetParam, ClusterParamGeneric &theClusterParam, float qclus) const; + + static void collect_edge_charges(ClusterParam &theClusterParam, //!< input, the cluster + int &Q_f_X, //!< output, Q first in X + int &Q_l_X, //!< output, Q last in X + int &Q_f_Y, //!< output, Q first in Y + int &Q_l_Y, //!< output, Q last in Y + bool truncate); + + bool UseErrorsFromTemplates_; + bool TruncatePixelCharge_; + + float EdgeClusterErrorX_; + float EdgeClusterErrorY_; + + std::vector xerr_barrel_l1_, yerr_barrel_l1_, xerr_barrel_ln_; + std::vector yerr_barrel_ln_, xerr_endcap_, yerr_endcap_; + float xerr_barrel_l1_def_, yerr_barrel_l1_def_, xerr_barrel_ln_def_; + float yerr_barrel_ln_def_, xerr_endcap_def_, yerr_endcap_def_; + + //--- DB Error Parametrization object, new light templates + std::vector thePixelGenError_; + + std::vector> m_detParamsGPU; + pixelCPEforGPU::CommonParams m_commonParamsGPU; + pixelCPEforGPU::LayerGeometry m_layerGeometry; + + struct GPUData { + ~GPUData(); + // not needed if not used on CPU... + pixelCPEforGPU::ParamsOnGPU h_paramsOnGPU; + pixelCPEforGPU::ParamsOnGPU *d_paramsOnGPU = nullptr; // copy of the above on the Device + }; + CUDAESProduct gpuData_; + + void fillParamsForGpu(); }; -#endif // RecoLocalTracker_SiPixelRecHits_PixelCPEFast_h +#endif // RecoLocalTracker_SiPixelRecHits_PixelCPEFast_h diff --git a/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h b/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h index c33ccc85b16cb..5c1a14a63fc65 100644 --- a/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h +++ b/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h @@ -9,8 +9,8 @@ #include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" #include "DataFormats/GeometrySurface/interface/SOARotation.h" #include "Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cuda_cxx17.h" #include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cuda_cxx17.h" namespace pixelCPEforGPU { @@ -37,46 +37,40 @@ namespace pixelCPEforGPU { float chargeWidthX; float chargeWidthY; - float x0,y0,z0; // the vertex in the local coord of the detector + float x0, y0, z0; // the vertex in the local coord of the detector - float sx[3], sy[3]; // the errors... + float sx[3], sy[3]; // the errors... Frame frame; }; - struct LayerGeometry { uint32_t layerStart[phase1PixelTopology::numberOfLayers + 1]; - uint8_t layer[phase1PixelTopology::layerIndexSize]; + uint8_t layer[phase1PixelTopology::layerIndexSize]; }; - struct ParamsOnGPU { - - CommonParams * m_commonParams; - DetParams * m_detParams; - LayerGeometry * m_layerGeometry; + struct ParamsOnGPU { + CommonParams* m_commonParams; + DetParams* m_detParams; + LayerGeometry* m_layerGeometry; - constexpr - CommonParams const & __restrict__ commonParams() const { - CommonParams const * __restrict__ l = m_commonParams; - return *l; + constexpr CommonParams const& __restrict__ commonParams() const { + CommonParams const* __restrict__ l = m_commonParams; + return *l; } - constexpr - DetParams const & __restrict__ detParams(int i) const { - DetParams const * __restrict__ l = m_detParams; - return l[i]; + constexpr DetParams const& __restrict__ detParams(int i) const { + DetParams const* __restrict__ l = m_detParams; + return l[i]; } - constexpr - LayerGeometry const & __restrict__ layerGeometry() const { - return *m_layerGeometry; - } - - __device__ uint8_t layer(uint16_t id) const { return __ldg(m_layerGeometry->layer+id/phase1PixelTopology::maxModuleStride);}; + constexpr LayerGeometry const& __restrict__ layerGeometry() const { return *m_layerGeometry; } + __device__ uint8_t layer(uint16_t id) const { + return __ldg(m_layerGeometry->layer + id / phase1PixelTopology::maxModuleStride); + }; }; // SOA (on device) - template + template struct ClusParamsT { uint32_t minRow[N]; uint32_t maxRow[N]; @@ -96,17 +90,15 @@ namespace pixelCPEforGPU { float xerr[N]; float yerr[N]; - int16_t xsize[N]; // clipped at 127 if negative is edge.... + int16_t xsize[N]; // clipped at 127 if negative is edge.... int16_t ysize[N]; - }; - constexpr uint32_t MaxHitsInModule = gpuClustering::MaxHitsInModule; using ClusParams = ClusParamsT; - constexpr inline - void computeAnglesFromDet(DetParams const & __restrict__ detParams, float const x, float const y, float & cotalpha, float & cotbeta) { + constexpr inline void computeAnglesFromDet( + DetParams const& __restrict__ detParams, float const x, float const y, float& cotalpha, float& cotbeta) { // x,y local position on det auto gvx = x - detParams.x0; auto gvy = y - detParams.y0; @@ -114,24 +106,22 @@ namespace pixelCPEforGPU { // normalization not required as only ratio used... // calculate angles cotalpha = gvx * gvz; - cotbeta = gvy * gvz; + cotbeta = gvy * gvz; } - constexpr inline - float correction( - int sizeM1, - int Q_f, //!< Charge in the first pixel. - int Q_l, //!< Charge in the last pixel. - uint16_t upper_edge_first_pix, //!< As the name says. - uint16_t lower_edge_last_pix, //!< As the name says. - float lorentz_shift, //!< L-shift at half thickness - float theThickness, //detector thickness - float cot_angle, //!< cot of alpha_ or beta_ - float pitch, //!< thePitchX or thePitchY - bool first_is_big, //!< true if the first is big - bool last_is_big ) //!< true if the last is big + constexpr inline float correction(int sizeM1, + int Q_f, //!< Charge in the first pixel. + int Q_l, //!< Charge in the last pixel. + uint16_t upper_edge_first_pix, //!< As the name says. + uint16_t lower_edge_last_pix, //!< As the name says. + float lorentz_shift, //!< L-shift at half thickness + float theThickness, //detector thickness + float cot_angle, //!< cot of alpha_ or beta_ + float pitch, //!< thePitchX or thePitchY + bool first_is_big, //!< true if the first is big + bool last_is_big) //!< true if the last is big { - if (0 == sizeM1) // size 1 + if (0 == sizeM1) // size 1 return 0; float W_eff = 0; @@ -140,11 +130,11 @@ namespace pixelCPEforGPU { //--- Width of the clusters minus the edge (first and last) pixels. //--- In the note, they are denoted x_F and x_L (and y_F and y_L) // assert(lower_edge_last_pix >= upper_edge_first_pix); - auto W_inner = pitch * float(lower_edge_last_pix - upper_edge_first_pix); // in cm + auto W_inner = pitch * float(lower_edge_last_pix - upper_edge_first_pix); // in cm //--- Predicted charge width from geometry - auto W_pred = theThickness * cot_angle // geometric correction (in cm) - - lorentz_shift; // (in cm) &&& check fpix! + auto W_pred = theThickness * cot_angle // geometric correction (in cm) + - lorentz_shift; // (in cm) &&& check fpix! W_eff = std::abs(W_pred) - W_inner; @@ -152,34 +142,38 @@ namespace pixelCPEforGPU { //--- based on the track, do *not* use W_pred-W_inner. Instead, replace //--- it with an *average* effective charge width, which is the average //--- length of the edge pixels. - simple = (W_eff < 0.0f) | (W_eff > pitch); // this produces "large" regressions for very small numeric differences... + simple = + (W_eff < 0.0f) | (W_eff > pitch); // this produces "large" regressions for very small numeric differences... } if (simple) { //--- Total length of the two edge pixels (first+last) float sum_of_edge = 2.0f; - if (first_is_big) sum_of_edge += 1.0f; - if (last_is_big) sum_of_edge += 1.0f; - W_eff = pitch * 0.5f * sum_of_edge; // ave. length of edge pixels (first+last) (cm) + if (first_is_big) + sum_of_edge += 1.0f; + if (last_is_big) + sum_of_edge += 1.0f; + W_eff = pitch * 0.5f * sum_of_edge; // ave. length of edge pixels (first+last) (cm) } //--- Finally, compute the position in this projection float Qdiff = Q_l - Q_f; - float Qsum = Q_l + Q_f; + float Qsum = Q_l + Q_f; //--- Temporary fix for clusters with both first and last pixel with charge = 0 if (Qsum == 0) Qsum = 1.0f; - return 0.5f * (Qdiff/Qsum) * W_eff; + return 0.5f * (Qdiff / Qsum) * W_eff; } - constexpr inline - void position(CommonParams const & __restrict__ comParams, DetParams const & __restrict__ detParams, ClusParams & cp, uint32_t ic) { - + constexpr inline void position(CommonParams const& __restrict__ comParams, + DetParams const& __restrict__ detParams, + ClusParams& cp, + uint32_t ic) { //--- Upper Right corner of Lower Left pixel -- in measurement frame - uint16_t llx = cp.minRow[ic]+1; - uint16_t lly = cp.minCol[ic]+1; + uint16_t llx = cp.minRow[ic] + 1; + uint16_t lly = cp.minCol[ic] + 1; //--- Lower Left corner of Upper Right pixel -- in measurement frame uint16_t urx = cp.maxRow[ic]; @@ -190,85 +184,96 @@ namespace pixelCPEforGPU { auto urxl = phase1PixelTopology::localX(urx); auto uryl = phase1PixelTopology::localY(ury); - auto mx = llxl+urxl; - auto my = llyl+uryl; - - auto xsize = int(urxl)+2-int(llxl); - auto ysize = int(uryl)+2-int(llyl); - assert(xsize>=0); // 0 if bixpix... - assert(ysize>=0); + auto mx = llxl + urxl; + auto my = llyl + uryl; - if(phase1PixelTopology::isBigPixX(cp.minRow[ic])) ++xsize; - if(phase1PixelTopology::isBigPixX(cp.maxRow[ic])) ++xsize; - if(phase1PixelTopology::isBigPixY(cp.minCol[ic])) ++ysize; - if(phase1PixelTopology::isBigPixY(cp.maxCol[ic])) ++ysize; + auto xsize = int(urxl) + 2 - int(llxl); + auto ysize = int(uryl) + 2 - int(llyl); + assert(xsize >= 0); // 0 if bixpix... + assert(ysize >= 0); - int unbalanceX = 8.*std::abs(float(cp.Q_f_X[ic]-cp.Q_l_X[ic]))/float(cp.Q_f_X[ic]+cp.Q_l_X[ic]); - int unbalanceY = 8.*std::abs(float(cp.Q_f_Y[ic]-cp.Q_l_Y[ic]))/float(cp.Q_f_Y[ic]+cp.Q_l_Y[ic]); - xsize = 8*xsize - unbalanceX; - ysize = 8*ysize - unbalanceY; + if (phase1PixelTopology::isBigPixX(cp.minRow[ic])) + ++xsize; + if (phase1PixelTopology::isBigPixX(cp.maxRow[ic])) + ++xsize; + if (phase1PixelTopology::isBigPixY(cp.minCol[ic])) + ++ysize; + if (phase1PixelTopology::isBigPixY(cp.maxCol[ic])) + ++ysize; - cp.xsize[ic] = std::min(xsize,1023); - cp.ysize[ic] = std::min(ysize,1023); + int unbalanceX = 8. * std::abs(float(cp.Q_f_X[ic] - cp.Q_l_X[ic])) / float(cp.Q_f_X[ic] + cp.Q_l_X[ic]); + int unbalanceY = 8. * std::abs(float(cp.Q_f_Y[ic] - cp.Q_l_Y[ic])) / float(cp.Q_f_Y[ic] + cp.Q_l_Y[ic]); + xsize = 8 * xsize - unbalanceX; + ysize = 8 * ysize - unbalanceY; - if(cp.minRow[ic]==0 || cp.maxRow[ic]==phase1PixelTopology::lastRowInModule) cp.xsize[ic] = -cp.xsize[ic]; - if(cp.minCol[ic]==0 || cp.maxCol[ic]==phase1PixelTopology::lastColInModule) cp.ysize[ic] = -cp.ysize[ic]; + cp.xsize[ic] = std::min(xsize, 1023); + cp.ysize[ic] = std::min(ysize, 1023); + if (cp.minRow[ic] == 0 || cp.maxRow[ic] == phase1PixelTopology::lastRowInModule) + cp.xsize[ic] = -cp.xsize[ic]; + if (cp.minCol[ic] == 0 || cp.maxCol[ic] == phase1PixelTopology::lastColInModule) + cp.ysize[ic] = -cp.ysize[ic]; // apply the lorentz offset correction - auto xPos = detParams.shiftX + comParams.thePitchX*(0.5f*float(mx)+float(phase1PixelTopology::xOffset)); - auto yPos = detParams.shiftY + comParams.thePitchY*(0.5f*float(my)+float(phase1PixelTopology::yOffset)); + auto xPos = detParams.shiftX + comParams.thePitchX * (0.5f * float(mx) + float(phase1PixelTopology::xOffset)); + auto yPos = detParams.shiftY + comParams.thePitchY * (0.5f * float(my) + float(phase1PixelTopology::yOffset)); - float cotalpha=0, cotbeta=0; + float cotalpha = 0, cotbeta = 0; - computeAnglesFromDet(detParams, xPos, yPos, cotalpha, cotbeta); + computeAnglesFromDet(detParams, xPos, yPos, cotalpha, cotbeta); auto thickness = detParams.isBarrel ? comParams.theThicknessB : comParams.theThicknessE; - auto xcorr = correction( - cp.maxRow[ic]-cp.minRow[ic], - cp.Q_f_X[ic], cp.Q_l_X[ic], - llxl, urxl, - detParams.chargeWidthX, // lorentz shift in cm - thickness, - cotalpha, - comParams.thePitchX, - phase1PixelTopology::isBigPixX(cp.minRow[ic]), - phase1PixelTopology::isBigPixX(cp.maxRow[ic]) ); - - auto ycorr = correction( - cp.maxCol[ic]-cp.minCol[ic], - cp.Q_f_Y[ic], cp.Q_l_Y[ic], - llyl, uryl, - detParams.chargeWidthY, // lorentz shift in cm - thickness, - cotbeta, - comParams.thePitchY, - phase1PixelTopology::isBigPixY(cp.minCol[ic]), - phase1PixelTopology::isBigPixY(cp.maxCol[ic]) ); - - cp.xpos[ic]=xPos+xcorr; - cp.ypos[ic]=yPos+ycorr; + auto xcorr = correction(cp.maxRow[ic] - cp.minRow[ic], + cp.Q_f_X[ic], + cp.Q_l_X[ic], + llxl, + urxl, + detParams.chargeWidthX, // lorentz shift in cm + thickness, + cotalpha, + comParams.thePitchX, + phase1PixelTopology::isBigPixX(cp.minRow[ic]), + phase1PixelTopology::isBigPixX(cp.maxRow[ic])); + + auto ycorr = correction(cp.maxCol[ic] - cp.minCol[ic], + cp.Q_f_Y[ic], + cp.Q_l_Y[ic], + llyl, + uryl, + detParams.chargeWidthY, // lorentz shift in cm + thickness, + cotbeta, + comParams.thePitchY, + phase1PixelTopology::isBigPixY(cp.minCol[ic]), + phase1PixelTopology::isBigPixY(cp.maxCol[ic])); + + cp.xpos[ic] = xPos + xcorr; + cp.ypos[ic] = yPos + ycorr; } - constexpr inline - void errorFromSize(CommonParams const & __restrict__ comParams, DetParams const & __restrict__ detParams, ClusParams & cp, uint32_t ic) { + constexpr inline void errorFromSize(CommonParams const& __restrict__ comParams, + DetParams const& __restrict__ detParams, + ClusParams& cp, + uint32_t ic) { // Edge cluster errors - cp.xerr[ic]= 0.0050; - cp.yerr[ic]= 0.0085; + cp.xerr[ic] = 0.0050; + cp.yerr[ic] = 0.0085; // FIXME these are errors form Run1 - constexpr float xerr_barrel_l1[] = { 0.00115, 0.00120, 0.00088 }; - constexpr float xerr_barrel_l1_def = 0.00200; // 0.01030; - constexpr float yerr_barrel_l1[] = { 0.00375, 0.00230, 0.00250, 0.00250, 0.00230, 0.00230, 0.00210, 0.00210, 0.00240 }; + constexpr float xerr_barrel_l1[] = {0.00115, 0.00120, 0.00088}; + constexpr float xerr_barrel_l1_def = 0.00200; // 0.01030; + constexpr float yerr_barrel_l1[] = { + 0.00375, 0.00230, 0.00250, 0.00250, 0.00230, 0.00230, 0.00210, 0.00210, 0.00240}; constexpr float yerr_barrel_l1_def = 0.00210; - constexpr float xerr_barrel_ln[] = { 0.00115, 0.00120, 0.00088 }; - constexpr float xerr_barrel_ln_def = 0.00200; // 0.01030; - constexpr float yerr_barrel_ln[] = { 0.00375, 0.00230, 0.00250, 0.00250, 0.00230, 0.00230, 0.00210, 0.00210, 0.00240 }; + constexpr float xerr_barrel_ln[] = {0.00115, 0.00120, 0.00088}; + constexpr float xerr_barrel_ln_def = 0.00200; // 0.01030; + constexpr float yerr_barrel_ln[] = { + 0.00375, 0.00230, 0.00250, 0.00250, 0.00230, 0.00230, 0.00210, 0.00210, 0.00240}; constexpr float yerr_barrel_ln_def = 0.00210; - constexpr float xerr_endcap[] = { 0.0020, 0.0020 }; + constexpr float xerr_endcap[] = {0.0020, 0.0020}; constexpr float xerr_endcap_def = 0.0020; - constexpr float yerr_endcap[] = { 0.00210 }; + constexpr float yerr_endcap[] = {0.00210}; constexpr float yerr_endcap_def = 0.00210; auto sx = cp.maxRow[ic] - cp.minRow[ic]; @@ -278,37 +283,37 @@ namespace pixelCPEforGPU { bool isEdgeX = cp.minRow[ic] == 0 or cp.maxRow[ic] == phase1PixelTopology::lastRowInModule; bool isEdgeY = cp.minCol[ic] == 0 or cp.maxCol[ic] == phase1PixelTopology::lastColInModule; // is one and big? - bool isBig1X = (0==sx) && phase1PixelTopology::isBigPixX(cp.minRow[ic]); - bool isBig1Y = (0==sy) && phase1PixelTopology::isBigPixY(cp.minCol[ic]); + bool isBig1X = (0 == sx) && phase1PixelTopology::isBigPixX(cp.minRow[ic]); + bool isBig1Y = (0 == sy) && phase1PixelTopology::isBigPixY(cp.minCol[ic]); - - if (!isEdgeX && !isBig1X ) { + if (!isEdgeX && !isBig1X) { if (not detParams.isBarrel) { - cp.xerr[ic] = sx + #include "CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h" +#include "CUDADataFormats/Common/interface/CUDAProduct.h" +#include "CUDADataFormats/Common/interface/CUDAProduct.h" #include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h" #include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" #include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DCUDA.h" - - #include "DataFormats/Common/interface/Handle.h" #include "FWCore/Framework/interface/ESHandle.h" #include "FWCore/Framework/interface/Event.h" #include "FWCore/Framework/interface/EventSetup.h" #include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/Framework/interface/global/EDProducer.h" #include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" #include "FWCore/Utilities/interface/InputTag.h" #include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" #include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" - -#include "FWCore/Framework/interface/global/EDProducer.h" -#include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" - -#include "CUDADataFormats/Common/interface/CUDAProduct.h" #include "HeterogeneousCore/CUDACore/interface/CUDAScopedContext.h" - +#include "RecoLocalTracker/Records/interface/TkPixelCPERecord.h" #include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h" #include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h" -#include "RecoLocalTracker/Records/interface/TkPixelCPERecord.h" #include "PixelRecHits.h" // TODO : spit product from kernel -#include - class SiPixelRecHitCUDA : public edm::global::EDProducer<> { - public: - - explicit SiPixelRecHitCUDA(const edm::ParameterSet& iConfig); ~SiPixelRecHitCUDA() override = default; static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); private: - void produce(edm::StreamID streamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const override; // The mess with inputs will be cleaned up when migrating to the new framework edm::EDGetTokenT> tBeamSpot; edm::EDGetTokenT> token_; edm::EDGetTokenT> tokenDigi_; - + edm::EDPutTokenT> tokenHit_; std::string cpeName_; pixelgpudetails::PixelRecHitGPUKernel gpuAlgo_; - }; -SiPixelRecHitCUDA::SiPixelRecHitCUDA(const edm::ParameterSet& iConfig): - tBeamSpot(consumes>(iConfig.getParameter("beamSpot"))), - token_(consumes>(iConfig.getParameter("src"))), - tokenDigi_(consumes>(iConfig.getParameter("src"))), - tokenHit_(produces>()), - cpeName_(iConfig.getParameter("CPE")) -{} +SiPixelRecHitCUDA::SiPixelRecHitCUDA(const edm::ParameterSet& iConfig) + : tBeamSpot(consumes>(iConfig.getParameter("beamSpot"))), + token_(consumes>(iConfig.getParameter("src"))), + tokenDigi_(consumes>(iConfig.getParameter("src"))), + tokenHit_(produces>()), + cpeName_(iConfig.getParameter("CPE")) {} void SiPixelRecHitCUDA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { edm::ParameterSetDescription desc; @@ -71,14 +60,12 @@ void SiPixelRecHitCUDA::fillDescriptions(edm::ConfigurationDescriptions& descrip desc.add("beamSpot", edm::InputTag("offlineBeamSpotCUDA")); desc.add("src", edm::InputTag("siPixelClustersCUDAPreSplitting")); desc.add("CPE", "PixelCPEFast"); - descriptions.add("siPixelRecHitCUDA",desc); + descriptions.add("siPixelRecHitCUDA", desc); } - void SiPixelRecHitCUDA::produce(edm::StreamID streamID, edm::Event& iEvent, const edm::EventSetup& es) const { - // const TrackerGeometry *geom_ = nullptr; - const PixelClusterParameterEstimator *cpe_ = nullptr; + const PixelClusterParameterEstimator* cpe_ = nullptr; /* edm::ESHandle geom; @@ -88,9 +75,9 @@ void SiPixelRecHitCUDA::produce(edm::StreamID streamID, edm::Event& iEvent, cons edm::ESHandle hCPE; es.get().get(cpeName_, hCPE); - cpe_ = dynamic_cast< const PixelCPEBase* >(hCPE.product()); + cpe_ = dynamic_cast(hCPE.product()); - PixelCPEFast const * fcpe = dynamic_cast(cpe_); + PixelCPEFast const* fcpe = dynamic_cast(cpe_); if (!fcpe) { throw cms::Exception("Configuration") << "too bad, not a fast cpe gpu processing not possible...."; } @@ -109,10 +96,10 @@ void SiPixelRecHitCUDA::produce(edm::StreamID streamID, edm::Event& iEvent, cons iEvent.getByToken(tBeamSpot, hbs); auto const& bs = ctx.get(*hbs); - ctx.emplace(iEvent,tokenHit_, - std::move( - gpuAlgo_.makeHitsAsync(digis, clusters, bs, fcpe->getGPUProductAsync(ctx.stream()), ctx.stream()) - )); + ctx.emplace( + iEvent, + tokenHit_, + std::move(gpuAlgo_.makeHitsAsync(digis, clusters, bs, fcpe->getGPUProductAsync(ctx.stream()), ctx.stream()))); } DEFINE_FWK_MODULE(SiPixelRecHitCUDA); diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h b/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h index 2874df10c16c3..ff3abdce0997e 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h +++ b/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h @@ -10,34 +10,30 @@ #include "DataFormats/Math/interface/approx_atan2.h" #include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" #include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h" -namespace gpuPixelRecHits { +namespace gpuPixelRecHits { - __global__ void getHits(pixelCPEforGPU::ParamsOnGPU const * __restrict__ cpeParams, - BeamSpotCUDA::Data const * __restrict__ bs, - uint16_t const * __restrict__ id, - uint16_t const * __restrict__ x, - uint16_t const * __restrict__ y, - uint16_t const * __restrict__ adc, - uint32_t const * __restrict__ digiModuleStart, - uint32_t const * __restrict__ clusInModule, - uint32_t const * __restrict__ moduleId, - int32_t const * __restrict__ clus, - int numElements, - uint32_t const * __restrict__ hitsModuleStart, - TrackingRecHit2DSOAView * phits - ) -{ - - auto & hits = *phits; + __global__ void getHits(pixelCPEforGPU::ParamsOnGPU const* __restrict__ cpeParams, + BeamSpotCUDA::Data const* __restrict__ bs, + uint16_t const* __restrict__ id, + uint16_t const* __restrict__ x, + uint16_t const* __restrict__ y, + uint16_t const* __restrict__ adc, + uint32_t const* __restrict__ digiModuleStart, + uint32_t const* __restrict__ clusInModule, + uint32_t const* __restrict__ moduleId, + int32_t const* __restrict__ clus, + int numElements, + uint32_t const* __restrict__ hitsModuleStart, + TrackingRecHit2DSOAView* phits) { + auto& hits = *phits; // to be moved in common namespace... - constexpr uint16_t InvId=9999; // must be > MaxNumModules + constexpr uint16_t InvId = 9999; // must be > MaxNumModules constexpr uint32_t MaxHitsInModule = pixelCPEforGPU::MaxHitsInModule; using ClusParams = pixelCPEforGPU::ClusParams; - // as usual one block per module __shared__ ClusParams clusParams; @@ -45,27 +41,33 @@ namespace gpuPixelRecHits { auto me = moduleId[blockIdx.x]; auto nclus = clusInModule[me]; - if (0==nclus) return; + if (0 == nclus) + return; #ifdef GPU_DEBUG - if (threadIdx.x==0) { - auto k=first; - while (id[k]==InvId) ++k; - assert(id[k]==me); + if (threadIdx.x == 0) { + auto k = first; + while (id[k] == InvId) + ++k; + assert(id[k] == me); } #endif #ifdef GPU_DEBUG - if (me%100==1) - if (threadIdx.x==0) printf("hitbuilder: %d clusters in module %d. will write at %d\n", nclus, me, hitsModuleStart[me]); + if (me % 100 == 1) + if (threadIdx.x == 0) + printf("hitbuilder: %d clusters in module %d. will write at %d\n", nclus, me, hitsModuleStart[me]); #endif assert(blockDim.x >= MaxHitsInModule); - if (threadIdx.x==0 && nclus > MaxHitsInModule) { - printf("WARNING: too many clusters %d in Module %d. Only first %d processed\n", nclus,me,MaxHitsInModule); + if (threadIdx.x == 0 && nclus > MaxHitsInModule) { + printf("WARNING: too many clusters %d in Module %d. Only first %d processed\n", nclus, me, MaxHitsInModule); // zero charge: do not bother to do it in parallel - for (auto d=MaxHitsInModule; d= nclus) continue; + if (id[i] == InvId) + continue; // not valid + if (id[i] != me) + break; // end of module + if (clus[i] >= nclus) + continue; atomicMin(&clusParams.minRow[clus[i]], x[i]); atomicMax(&clusParams.maxRow[clus[i]], x[i]); atomicMin(&clusParams.minCol[clus[i]], y[i]); @@ -102,66 +107,72 @@ namespace gpuPixelRecHits { __syncthreads(); for (int i = first; i < numElements; i += blockDim.x) { - if (id[i] == InvId) continue; // not valid - if (id[i] != me) break; // end of module - if (clus[i] >= nclus) continue; + if (id[i] == InvId) + continue; // not valid + if (id[i] != me) + break; // end of module + if (clus[i] >= nclus) + continue; atomicAdd(&clusParams.charge[clus[i]], adc[i]); - if (clusParams.minRow[clus[i]]==x[i]) atomicAdd(&clusParams.Q_f_X[clus[i]], adc[i]); - if (clusParams.maxRow[clus[i]]==x[i]) atomicAdd(&clusParams.Q_l_X[clus[i]], adc[i]); - if (clusParams.minCol[clus[i]]==y[i]) atomicAdd(&clusParams.Q_f_Y[clus[i]], adc[i]); - if (clusParams.maxCol[clus[i]]==y[i]) atomicAdd(&clusParams.Q_l_Y[clus[i]], adc[i]); + if (clusParams.minRow[clus[i]] == x[i]) + atomicAdd(&clusParams.Q_f_X[clus[i]], adc[i]); + if (clusParams.maxRow[clus[i]] == x[i]) + atomicAdd(&clusParams.Q_l_X[clus[i]], adc[i]); + if (clusParams.minCol[clus[i]] == y[i]) + atomicAdd(&clusParams.Q_f_Y[clus[i]], adc[i]); + if (clusParams.maxCol[clus[i]] == y[i]) + atomicAdd(&clusParams.Q_l_Y[clus[i]], adc[i]); } __syncthreads(); // next one cluster per thread... - if (ic >= nclus) return; + if (ic >= nclus) + return; first = hitsModuleStart[me]; - auto h = first+ic; // output index in global memory + auto h = first + ic; // output index in global memory - if (h >= TrackingRecHit2DSOAView::maxHits()) return; // overflow... + if (h >= TrackingRecHit2DSOAView::maxHits()) + return; // overflow... pixelCPEforGPU::position(cpeParams->commonParams(), cpeParams->detParams(me), clusParams, ic); pixelCPEforGPU::errorFromDB(cpeParams->commonParams(), cpeParams->detParams(me), clusParams, ic); - // store it hits.charge(h) = clusParams.charge[ic]; hits.detectorIndex(h) = me; - float xl,yl; - hits.xLocal(h) = xl = clusParams.xpos[ic]; - hits.yLocal(h) = yl = clusParams.ypos[ic]; + float xl, yl; + hits.xLocal(h) = xl = clusParams.xpos[ic]; + hits.yLocal(h) = yl = clusParams.ypos[ic]; hits.clusterSizeX(h) = clusParams.xsize[ic]; hits.clusterSizeY(h) = clusParams.ysize[ic]; - - hits.xerrLocal(h) = clusParams.xerr[ic]*clusParams.xerr[ic]; - hits.yerrLocal(h) = clusParams.yerr[ic]*clusParams.yerr[ic]; + hits.xerrLocal(h) = clusParams.xerr[ic] * clusParams.xerr[ic]; + hits.yerrLocal(h) = clusParams.yerr[ic] * clusParams.yerr[ic]; // keep it local for computations - float xg,yg,zg; - // to global and compute phi... - cpeParams->detParams(me).frame.toGlobal(xl,yl, xg,yg,zg); + float xg, yg, zg; + // to global and compute phi... + cpeParams->detParams(me).frame.toGlobal(xl, yl, xg, yg, zg); // here correct for the beamspot... - xg-=bs->x; - yg-=bs->y; - zg-=bs->z; + xg -= bs->x; + yg -= bs->y; + zg -= bs->z; hits.xGlobal(h) = xg; hits.yGlobal(h) = yg; hits.zGlobal(h) = zg; - hits.rGlobal(h) = std::sqrt(xg*xg+yg*yg); - hits.iphi(h) = unsafe_atan2s<7>(yg,xg); - + hits.rGlobal(h) = std::sqrt(xg * xg + yg * yg); + hits.iphi(h) = unsafe_atan2s<7>(yg, xg); } -} +} // namespace gpuPixelRecHits -#endif // RecoLocalTracker_SiPixelRecHits_plugins_gpuPixelRecHits_h +#endif // RecoLocalTracker_SiPixelRecHits_plugins_gpuPixelRecHits_h diff --git a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc index 1111e4866a8d2..3c36598f06b89 100644 --- a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc +++ b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc @@ -12,76 +12,85 @@ #include "HeterogeneousCore/CUDAServices/interface/numberOfCUDADevices.h" #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" #include "MagneticField/Engine/interface/MagneticField.h" - #include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h" // Services // this is needed to get errors from templates namespace { - constexpr float micronsToCm = 1.0e-4; + constexpr float micronsToCm = 1.0e-4; } - - //----------------------------------------------------------------------------- //! The constructor. //----------------------------------------------------------------------------- -PixelCPEFast::PixelCPEFast(edm::ParameterSet const & conf, - const MagneticField * mag, - const TrackerGeometry& geom, - const TrackerTopology& ttopo, - const SiPixelLorentzAngle * lorentzAngle, - const SiPixelGenErrorDBObject * genErrorDBObject, - const SiPixelLorentzAngle * lorentzAngleWidth) : - PixelCPEBase(conf, mag, geom, ttopo, lorentzAngle, genErrorDBObject, nullptr, lorentzAngleWidth, 0) -{ - EdgeClusterErrorX_ = conf.getParameter("EdgeClusterErrorX"); - EdgeClusterErrorY_ = conf.getParameter("EdgeClusterErrorY"); - - UseErrorsFromTemplates_ = conf.getParameter("UseErrorsFromTemplates"); - TruncatePixelCharge_ = conf.getParameter("TruncatePixelCharge"); - - // Use errors from templates or from GenError - if ( UseErrorsFromTemplates_ ) { - if ( !SiPixelGenError::pushfile( *genErrorDBObject_, thePixelGenError_) ) - throw cms::Exception("InvalidCalibrationLoaded") - << "ERROR: GenErrors not filled correctly. Check the sqlite file. Using SiPixelTemplateDBObject version " - << ( *genErrorDBObject_ ).version(); - } - - // Rechit errors in case other, more correct, errors are not vailable - // This are constants. Maybe there is a more efficienct way to store them. - xerr_barrel_l1_ = { 0.00115, 0.00120, 0.00088 }; - xerr_barrel_l1_def_ = 0.01030; - yerr_barrel_l1_ = { 0.00375, 0.00230, 0.00250, 0.00250, 0.00230, 0.00230, 0.00210, 0.00210, 0.00240 }; - yerr_barrel_l1_def_ = 0.00210; - xerr_barrel_ln_ = { 0.00115, 0.00120, 0.00088}; - xerr_barrel_ln_def_ = 0.01030; - yerr_barrel_ln_ = { 0.00375, 0.00230, 0.00250, 0.00250, 0.00230, 0.00230, 0.00210, 0.00210, 0.00240 }; - yerr_barrel_ln_def_ = 0.00210; - xerr_endcap_ = { 0.0020, 0.0020 }; - xerr_endcap_def_ = 0.0020; - yerr_endcap_ = { 0.00210 }; - yerr_endcap_def_ = 0.00075; - - fillParamsForGpu(); +PixelCPEFast::PixelCPEFast(edm::ParameterSet const& conf, + const MagneticField* mag, + const TrackerGeometry& geom, + const TrackerTopology& ttopo, + const SiPixelLorentzAngle* lorentzAngle, + const SiPixelGenErrorDBObject* genErrorDBObject, + const SiPixelLorentzAngle* lorentzAngleWidth) + : PixelCPEBase(conf, mag, geom, ttopo, lorentzAngle, genErrorDBObject, nullptr, lorentzAngleWidth, 0) { + EdgeClusterErrorX_ = conf.getParameter("EdgeClusterErrorX"); + EdgeClusterErrorY_ = conf.getParameter("EdgeClusterErrorY"); + + UseErrorsFromTemplates_ = conf.getParameter("UseErrorsFromTemplates"); + TruncatePixelCharge_ = conf.getParameter("TruncatePixelCharge"); + + // Use errors from templates or from GenError + if (UseErrorsFromTemplates_) { + if (!SiPixelGenError::pushfile(*genErrorDBObject_, thePixelGenError_)) + throw cms::Exception("InvalidCalibrationLoaded") + << "ERROR: GenErrors not filled correctly. Check the sqlite file. Using SiPixelTemplateDBObject version " + << (*genErrorDBObject_).version(); + } + + // Rechit errors in case other, more correct, errors are not vailable + // This are constants. Maybe there is a more efficienct way to store them. + xerr_barrel_l1_ = {0.00115, 0.00120, 0.00088}; + xerr_barrel_l1_def_ = 0.01030; + yerr_barrel_l1_ = {0.00375, 0.00230, 0.00250, 0.00250, 0.00230, 0.00230, 0.00210, 0.00210, 0.00240}; + yerr_barrel_l1_def_ = 0.00210; + xerr_barrel_ln_ = {0.00115, 0.00120, 0.00088}; + xerr_barrel_ln_def_ = 0.01030; + yerr_barrel_ln_ = {0.00375, 0.00230, 0.00250, 0.00250, 0.00230, 0.00230, 0.00210, 0.00210, 0.00240}; + yerr_barrel_ln_def_ = 0.00210; + xerr_endcap_ = {0.0020, 0.0020}; + xerr_endcap_def_ = 0.0020; + yerr_endcap_ = {0.00210}; + yerr_endcap_def_ = 0.00075; + + fillParamsForGpu(); } -const pixelCPEforGPU::ParamsOnGPU *PixelCPEFast::getGPUProductAsync(cuda::stream_t<>& cudaStream) const { +const pixelCPEforGPU::ParamsOnGPU* PixelCPEFast::getGPUProductAsync(cuda::stream_t<>& cudaStream) const { const auto& data = gpuData_.dataForCurrentDeviceAsync(cudaStream, [this](GPUData& data, cuda::stream_t<>& stream) { - - // and now copy to device... - cudaCheck(cudaMalloc((void**) & data.h_paramsOnGPU.m_commonParams, sizeof(pixelCPEforGPU::CommonParams))); - cudaCheck(cudaMalloc((void**) & data.h_paramsOnGPU.m_detParams, this->m_detParamsGPU.size()*sizeof(pixelCPEforGPU::DetParams))); - cudaCheck(cudaMalloc((void**) & data.h_paramsOnGPU.m_layerGeometry, sizeof(pixelCPEforGPU::LayerGeometry))); - cudaCheck(cudaMalloc((void**) & data.d_paramsOnGPU, sizeof(pixelCPEforGPU::ParamsOnGPU))); - - cudaCheck(cudaMemcpyAsync(data.d_paramsOnGPU, &data.h_paramsOnGPU, sizeof(pixelCPEforGPU::ParamsOnGPU), cudaMemcpyDefault, stream.id())); - cudaCheck(cudaMemcpyAsync(data.h_paramsOnGPU.m_commonParams, &this->m_commonParamsGPU, sizeof(pixelCPEforGPU::CommonParams), cudaMemcpyDefault, stream.id())); - cudaCheck(cudaMemcpyAsync(data.h_paramsOnGPU.m_layerGeometry, &this->m_layerGeometry, sizeof(pixelCPEforGPU::LayerGeometry), cudaMemcpyDefault, stream.id())); - cudaCheck(cudaMemcpyAsync(data.h_paramsOnGPU.m_detParams, this->m_detParamsGPU.data(), this->m_detParamsGPU.size()*sizeof(pixelCPEforGPU::DetParams), cudaMemcpyDefault, stream.id())); - }); + // and now copy to device... + cudaCheck(cudaMalloc((void**)&data.h_paramsOnGPU.m_commonParams, sizeof(pixelCPEforGPU::CommonParams))); + cudaCheck(cudaMalloc((void**)&data.h_paramsOnGPU.m_detParams, + this->m_detParamsGPU.size() * sizeof(pixelCPEforGPU::DetParams))); + cudaCheck(cudaMalloc((void**)&data.h_paramsOnGPU.m_layerGeometry, sizeof(pixelCPEforGPU::LayerGeometry))); + cudaCheck(cudaMalloc((void**)&data.d_paramsOnGPU, sizeof(pixelCPEforGPU::ParamsOnGPU))); + + cudaCheck(cudaMemcpyAsync( + data.d_paramsOnGPU, &data.h_paramsOnGPU, sizeof(pixelCPEforGPU::ParamsOnGPU), cudaMemcpyDefault, stream.id())); + cudaCheck(cudaMemcpyAsync(data.h_paramsOnGPU.m_commonParams, + &this->m_commonParamsGPU, + sizeof(pixelCPEforGPU::CommonParams), + cudaMemcpyDefault, + stream.id())); + cudaCheck(cudaMemcpyAsync(data.h_paramsOnGPU.m_layerGeometry, + &this->m_layerGeometry, + sizeof(pixelCPEforGPU::LayerGeometry), + cudaMemcpyDefault, + stream.id())); + cudaCheck(cudaMemcpyAsync(data.h_paramsOnGPU.m_detParams, + this->m_detParamsGPU.data(), + this->m_detParamsGPU.size() * sizeof(pixelCPEforGPU::DetParams), + cudaMemcpyDefault, + stream.id())); + }); return data.d_paramsOnGPU; } @@ -92,31 +101,31 @@ void PixelCPEFast::fillParamsForGpu() { m_commonParamsGPU.thePitchY = m_DetParams[0].thePitchY; uint32_t oldLayer = 0; - uint32_t oldLadder=0; - float rl=0; + uint32_t oldLadder = 0; + float rl = 0; float zl = 0; - float miz = 90, mxz=0; + float miz = 90, mxz = 0; float pl = 0; - int nl=0; + int nl = 0; m_detParamsGPU.resize(m_DetParams.size()); - for (auto i=0U; iindex()==int(i)); - assert(m_commonParamsGPU.thePitchY==p.thePitchY); - assert(m_commonParamsGPU.thePitchX==p.thePitchX); + assert(p.theDet->index() == int(i)); + assert(m_commonParamsGPU.thePitchY == p.thePitchY); + assert(m_commonParamsGPU.thePitchX == p.thePitchX); //assert(m_commonParamsGPU.theThickness==p.theThickness); g.isBarrel = GeomDetEnumerators::isBarrel(p.thePart); - g.isPosZ = p.theDet->surface().position().z()>0; + g.isPosZ = p.theDet->surface().position().z() > 0; g.layer = ttopo_.layer(p.theDet->geographicalId()); - g.index=i; // better be! + g.index = i; // better be! g.rawId = p.theDet->geographicalId(); - - assert( (g.isBarrel ?m_commonParamsGPU.theThicknessB : m_commonParamsGPU.theThicknessE) ==p.theThickness ); - //if (m_commonParamsGPU.theThickness!=p.theThickness) + assert((g.isBarrel ? m_commonParamsGPU.theThicknessB : m_commonParamsGPU.theThicknessE) == p.theThickness); + + //if (m_commonParamsGPU.theThickness!=p.theThickness) // std::cout << i << (g.isBarrel ? "B " : "E ") << m_commonParamsGPU.theThickness<<"!="<geographicalId()); @@ -124,23 +133,22 @@ void PixelCPEFast::fillParamsForGpu() { oldLayer = g.layer; // std::cout << "new layer at " << i << (g.isBarrel ? " B " : (g.isPosZ ? " E+ " : " E- ")) << g.layer << " starting at " << g.rawId << std::endl; // std::cout << "old layer had " << nl << " ladders" << std::endl; - nl=0; + nl = 0; } if (oldLadder != ladder) { oldLadder = ladder; // std::cout << "new ladder at " << i << (g.isBarrel ? " B " : (g.isPosZ ? " E+ " : " E- ")) << ladder << " starting at " << g.rawId << std::endl; // std::cout << "old ladder ave z,r,p mz " << zl/8.f << " " << rl/8.f << " " << pl/8.f << ' ' << miz << ' ' << mxz << std::endl; - rl=0; + rl = 0; zl = 0; pl = 0; - miz=90; mxz=0; + miz = 90; + mxz = 0; nl++; } - - - g.shiftX = 0.5f*p.lorentzShiftInCmX; - g.shiftY = 0.5f*p.lorentzShiftInCmY; + g.shiftX = 0.5f * p.lorentzShiftInCmX; + g.shiftY = 0.5f * p.lorentzShiftInCmY; g.chargeWidthX = p.lorentzShiftInCmX * p.widthLAFractionX; g.chargeWidthY = p.lorentzShiftInCmY * p.widthLAFractionY; @@ -150,55 +158,52 @@ void PixelCPEFast::fillParamsForGpu() { auto vv = p.theDet->surface().position(); auto rr = pixelCPEforGPU::Rotation(p.theDet->surface().rotation()); - g.frame = pixelCPEforGPU::Frame(vv.x(),vv.y(),vv.z(),rr); + g.frame = pixelCPEforGPU::Frame(vv.x(), vv.y(), vv.z(), rr); - zl+=vv.z(); - miz = std::min(miz,std::abs(vv.z())); - mxz = std::max(mxz,std::abs(vv.z())); - rl+=vv.perp(); - pl+=vv.phi(); // (not obvious) + zl += vv.z(); + miz = std::min(miz, std::abs(vv.z())); + mxz = std::max(mxz, std::abs(vv.z())); + rl += vv.perp(); + pl += vv.phi(); // (not obvious) // errors ..... - ClusterParamGeneric cp; - auto gvx = p.theOrigin.x() + 40.f*m_commonParamsGPU.thePitchX; - auto gvy = p.theOrigin.y(); - auto gvz = 1.f/p.theOrigin.z(); - //--- Note that the normalization is not required as only the ratio used - - // calculate angles - cp.cotalpha = gvx*gvz; - cp.cotbeta = gvy*gvz; - - cp.with_track_angle = false; - - auto lape = p.theDet->localAlignmentError(); - if ( lape.invalid() ) lape = LocalError(); // zero.... - -#ifdef DUMP_ERRORS - auto m=10000.f; - for (float qclus = 15000; qclus<35000; qclus+=15000){ - errorFromTemplates(p,cp,qclus); - - std::cout << i << ' ' << qclus << ' ' << cp.pixmx - << ' ' << m*cp.sigmax << ' ' << m*cp.sx1 << ' ' << m*cp.sx2 - << ' ' << m*cp.sigmay << ' ' << m*cp.sy1 << ' ' << m*cp.sy2 - << std::endl; - } - std::cout << i << ' ' << m*std::sqrt(lape.xx()) <<' '<< m*std::sqrt(lape.yy()) << std::endl; -#endif - - - errorFromTemplates(p,cp,20000.f); - g.sx[0] = cp.sigmax; - g.sx[1] = cp.sx1; - g.sx[2] = cp.sx2; - - g.sy[0] = cp.sigmay; - g.sy[1] = cp.sy1; - g.sy[2] = cp.sy2; - - - /* + ClusterParamGeneric cp; + auto gvx = p.theOrigin.x() + 40.f * m_commonParamsGPU.thePitchX; + auto gvy = p.theOrigin.y(); + auto gvz = 1.f / p.theOrigin.z(); + //--- Note that the normalization is not required as only the ratio used + + // calculate angles + cp.cotalpha = gvx * gvz; + cp.cotbeta = gvy * gvz; + + cp.with_track_angle = false; + + auto lape = p.theDet->localAlignmentError(); + if (lape.invalid()) + lape = LocalError(); // zero.... + +#ifdef DUMP_ERRORS + auto m = 10000.f; + for (float qclus = 15000; qclus < 35000; qclus += 15000) { + errorFromTemplates(p, cp, qclus); + + std::cout << i << ' ' << qclus << ' ' << cp.pixmx << ' ' << m * cp.sigmax << ' ' << m * cp.sx1 << ' ' + << m * cp.sx2 << ' ' << m * cp.sigmay << ' ' << m * cp.sy1 << ' ' << m * cp.sy2 << std::endl; + } + std::cout << i << ' ' << m * std::sqrt(lape.xx()) << ' ' << m * std::sqrt(lape.yy()) << std::endl; +#endif + + errorFromTemplates(p, cp, 20000.f); + g.sx[0] = cp.sigmax; + g.sx[1] = cp.sx1; + g.sx[2] = cp.sx2; + + g.sy[0] = cp.sigmay; + g.sy[1] = cp.sy1; + g.sy[2] = cp.sy2; + + /* // from run1?? if (i<96) { g.sx[0] = 0.00120; @@ -226,74 +231,81 @@ void PixelCPEFast::fillParamsForGpu() { g.sy[2] = 0.0085; } */ - - for (int i=0; i<3; ++i) { - g.sx[i] = std::sqrt(g.sx[i]*g.sx[i]+lape.xx()); - g.sy[i] = std::sqrt(g.sy[i]*g.sy[i]+lape.yy()); - } - - } - - // fill Layer and ladders geometry - memcpy(m_layerGeometry.layerStart, phase1PixelTopology::layerStart, sizeof(phase1PixelTopology::layerStart)); - memcpy(m_layerGeometry.layer, phase1PixelTopology::layer.data(), phase1PixelTopology::layer.size()); - + for (int i = 0; i < 3; ++i) { + g.sx[i] = std::sqrt(g.sx[i] * g.sx[i] + lape.xx()); + g.sy[i] = std::sqrt(g.sy[i] * g.sy[i] + lape.yy()); + } + } + // fill Layer and ladders geometry + memcpy(m_layerGeometry.layerStart, phase1PixelTopology::layerStart, sizeof(phase1PixelTopology::layerStart)); + memcpy(m_layerGeometry.layer, phase1PixelTopology::layer.data(), phase1PixelTopology::layer.size()); } PixelCPEFast::~PixelCPEFast() {} PixelCPEFast::GPUData::~GPUData() { - if(d_paramsOnGPU != nullptr) { + if (d_paramsOnGPU != nullptr) { cudaFree(h_paramsOnGPU.m_commonParams); cudaFree(h_paramsOnGPU.m_detParams); cudaFree(d_paramsOnGPU); } } -PixelCPEBase::ClusterParam* PixelCPEFast::createClusterParam(const SiPixelCluster & cl) const -{ - return new ClusterParamGeneric(cl); +PixelCPEBase::ClusterParam* PixelCPEFast::createClusterParam(const SiPixelCluster& cl) const { + return new ClusterParamGeneric(cl); } - - -void -PixelCPEFast::errorFromTemplates(DetParam const & theDetParam, ClusterParamGeneric & theClusterParam, float qclus) const -{ - float locBz = theDetParam.bz; - float locBx = theDetParam.bx; - //cout << "PixelCPEFast::localPosition(...) : locBz = " << locBz << endl; - - theClusterParam.pixmx = std::numeric_limits::max(); // max pixel charge for truncation of 2-D cluster - - theClusterParam.sigmay = -999.9; // CPE Generic y-error for multi-pixel cluster - theClusterParam.sigmax = -999.9; // CPE Generic x-error for multi-pixel cluster - theClusterParam.sy1 = -999.9; // CPE Generic y-error for single single-pixel - theClusterParam.sy2 = -999.9; // CPE Generic y-error for single double-pixel cluster - theClusterParam.sx1 = -999.9; // CPE Generic x-error for single single-pixel cluster - theClusterParam.sx2 = -999.9; // CPE Generic x-error for single double-pixel cluster - - float dummy; - - SiPixelGenError gtempl(thePixelGenError_); - int gtemplID_ = theDetParam.detTemplateId; - - theClusterParam.qBin_ = gtempl.qbin( gtemplID_, theClusterParam.cotalpha, theClusterParam.cotbeta, locBz, locBx, qclus, - false, - theClusterParam.pixmx, theClusterParam.sigmay, dummy, - theClusterParam.sigmax, dummy, theClusterParam.sy1, - dummy, theClusterParam.sy2, dummy, theClusterParam.sx1, - dummy, theClusterParam.sx2, dummy ); - - theClusterParam.sigmax = theClusterParam.sigmax * micronsToCm; - theClusterParam.sx1 = theClusterParam.sx1 * micronsToCm; - theClusterParam.sx2 = theClusterParam.sx2 * micronsToCm; - - theClusterParam.sigmay = theClusterParam.sigmay * micronsToCm; - theClusterParam.sy1 = theClusterParam.sy1 * micronsToCm; - theClusterParam.sy2 = theClusterParam.sy2 * micronsToCm; +void PixelCPEFast::errorFromTemplates(DetParam const& theDetParam, + ClusterParamGeneric& theClusterParam, + float qclus) const { + float locBz = theDetParam.bz; + float locBx = theDetParam.bx; + //cout << "PixelCPEFast::localPosition(...) : locBz = " << locBz << endl; + + theClusterParam.pixmx = std::numeric_limits::max(); // max pixel charge for truncation of 2-D cluster + + theClusterParam.sigmay = -999.9; // CPE Generic y-error for multi-pixel cluster + theClusterParam.sigmax = -999.9; // CPE Generic x-error for multi-pixel cluster + theClusterParam.sy1 = -999.9; // CPE Generic y-error for single single-pixel + theClusterParam.sy2 = -999.9; // CPE Generic y-error for single double-pixel cluster + theClusterParam.sx1 = -999.9; // CPE Generic x-error for single single-pixel cluster + theClusterParam.sx2 = -999.9; // CPE Generic x-error for single double-pixel cluster + + float dummy; + + SiPixelGenError gtempl(thePixelGenError_); + int gtemplID_ = theDetParam.detTemplateId; + + theClusterParam.qBin_ = gtempl.qbin(gtemplID_, + theClusterParam.cotalpha, + theClusterParam.cotbeta, + locBz, + locBx, + qclus, + false, + theClusterParam.pixmx, + theClusterParam.sigmay, + dummy, + theClusterParam.sigmax, + dummy, + theClusterParam.sy1, + dummy, + theClusterParam.sy2, + dummy, + theClusterParam.sx1, + dummy, + theClusterParam.sx2, + dummy); + + theClusterParam.sigmax = theClusterParam.sigmax * micronsToCm; + theClusterParam.sx1 = theClusterParam.sx1 * micronsToCm; + theClusterParam.sx2 = theClusterParam.sx2 * micronsToCm; + + theClusterParam.sigmay = theClusterParam.sigmay * micronsToCm; + theClusterParam.sy1 = theClusterParam.sy1 * micronsToCm; + theClusterParam.sy2 = theClusterParam.sy2 * micronsToCm; } //----------------------------------------------------------------------------- @@ -301,51 +313,44 @@ PixelCPEFast::errorFromTemplates(DetParam const & theDetParam, ClusterParamGener //! one converts everything from the measurement frame (in channel numbers) //! into the local frame (in centimeters). //----------------------------------------------------------------------------- -LocalPoint -PixelCPEFast::localPosition(DetParam const & theDetParam, ClusterParam & theClusterParamBase) const -{ - ClusterParamGeneric & theClusterParam = static_cast(theClusterParamBase); - - assert(!theClusterParam.with_track_angle); - - if ( UseErrorsFromTemplates_ ) { - errorFromTemplates(theDetParam, theClusterParam, theClusterParam.theCluster->charge()); - } - else { - theClusterParam.qBin_ = 0; - } - - int Q_f_X; //!< Q of the first pixel in X - int Q_l_X; //!< Q of the last pixel in X - int Q_f_Y; //!< Q of the first pixel in Y - int Q_l_Y; //!< Q of the last pixel in Y - collect_edge_charges( theClusterParam, - Q_f_X, Q_l_X, - Q_f_Y, Q_l_Y, - UseErrorsFromTemplates_ && TruncatePixelCharge_ - ); - - // do GPU like ... - pixelCPEforGPU::ClusParams cp; - - cp.minRow[0] = theClusterParam.theCluster->minPixelRow(); - cp.maxRow[0] = theClusterParam.theCluster->maxPixelRow(); - cp.minCol[0] = theClusterParam.theCluster->minPixelCol(); - cp.maxCol[0] = theClusterParam.theCluster->maxPixelCol(); - - cp.Q_f_X[0] = Q_f_X; - cp.Q_l_X[0] = Q_l_X; - cp.Q_f_Y[0] = Q_f_Y; - cp.Q_l_Y[0] = Q_l_Y; - - auto ind = theDetParam.theDet->index(); - pixelCPEforGPU::position(m_commonParamsGPU, m_detParamsGPU[ind],cp,0); - auto xPos = cp.xpos[0]; - auto yPos = cp.ypos[0]; - - //--- Now put the two together - LocalPoint pos_in_local( xPos, yPos ); - return pos_in_local; +LocalPoint PixelCPEFast::localPosition(DetParam const& theDetParam, ClusterParam& theClusterParamBase) const { + ClusterParamGeneric& theClusterParam = static_cast(theClusterParamBase); + + assert(!theClusterParam.with_track_angle); + + if (UseErrorsFromTemplates_) { + errorFromTemplates(theDetParam, theClusterParam, theClusterParam.theCluster->charge()); + } else { + theClusterParam.qBin_ = 0; + } + + int Q_f_X; //!< Q of the first pixel in X + int Q_l_X; //!< Q of the last pixel in X + int Q_f_Y; //!< Q of the first pixel in Y + int Q_l_Y; //!< Q of the last pixel in Y + collect_edge_charges(theClusterParam, Q_f_X, Q_l_X, Q_f_Y, Q_l_Y, UseErrorsFromTemplates_ && TruncatePixelCharge_); + + // do GPU like ... + pixelCPEforGPU::ClusParams cp; + + cp.minRow[0] = theClusterParam.theCluster->minPixelRow(); + cp.maxRow[0] = theClusterParam.theCluster->maxPixelRow(); + cp.minCol[0] = theClusterParam.theCluster->minPixelCol(); + cp.maxCol[0] = theClusterParam.theCluster->maxPixelCol(); + + cp.Q_f_X[0] = Q_f_X; + cp.Q_l_X[0] = Q_l_X; + cp.Q_f_Y[0] = Q_f_Y; + cp.Q_l_Y[0] = Q_l_Y; + + auto ind = theDetParam.theDet->index(); + pixelCPEforGPU::position(m_commonParamsGPU, m_detParamsGPU[ind], cp, 0); + auto xPos = cp.xpos[0]; + auto yPos = cp.ypos[0]; + + //--- Now put the two together + LocalPoint pos_in_local(xPos, yPos); + return pos_in_local; } //----------------------------------------------------------------------------- @@ -353,153 +358,167 @@ PixelCPEFast::localPosition(DetParam const & theDetParam, ClusterParam & theClus //! Calculate charge in the first and last pixel projected in x and y //! and the inner cluster charge, projected in x and y. //----------------------------------------------------------------------------- -void -PixelCPEFast:: -collect_edge_charges(ClusterParam & theClusterParamBase, //!< input, the cluster - int & Q_f_X, //!< output, Q first in X - int & Q_l_X, //!< output, Q last in X - int & Q_f_Y, //!< output, Q first in Y - int & Q_l_Y, //!< output, Q last in Y - bool truncate -) -{ - ClusterParamGeneric & theClusterParam = static_cast(theClusterParamBase); - - // Initialize return variables. - Q_f_X = Q_l_X = 0; - Q_f_Y = Q_l_Y = 0; - - // Obtain boundaries in index units - int xmin = theClusterParam.theCluster->minPixelRow(); - int xmax = theClusterParam.theCluster->maxPixelRow(); - int ymin = theClusterParam.theCluster->minPixelCol(); - int ymax = theClusterParam.theCluster->maxPixelCol(); - - // Iterate over the pixels. - int isize = theClusterParam.theCluster->size(); - for (int i = 0; i != isize; ++i) - { - auto const & pixel = theClusterParam.theCluster->pixel(i); - // ggiurgiu@fnal.gov: add pixel charge truncation - int pix_adc = pixel.adc; - if ( truncate ) - pix_adc = std::min(pix_adc, theClusterParam.pixmx ); - - // - // X projection - if ( pixel.x == xmin ) Q_f_X += pix_adc; - if ( pixel.x == xmax ) Q_l_X += pix_adc; - // - // Y projection - if ( pixel.y == ymin ) Q_f_Y += pix_adc; - if ( pixel.y == ymax ) Q_l_Y += pix_adc; - } +void PixelCPEFast::collect_edge_charges(ClusterParam& theClusterParamBase, //!< input, the cluster + int& Q_f_X, //!< output, Q first in X + int& Q_l_X, //!< output, Q last in X + int& Q_f_Y, //!< output, Q first in Y + int& Q_l_Y, //!< output, Q last in Y + bool truncate) { + ClusterParamGeneric& theClusterParam = static_cast(theClusterParamBase); + + // Initialize return variables. + Q_f_X = Q_l_X = 0; + Q_f_Y = Q_l_Y = 0; + + // Obtain boundaries in index units + int xmin = theClusterParam.theCluster->minPixelRow(); + int xmax = theClusterParam.theCluster->maxPixelRow(); + int ymin = theClusterParam.theCluster->minPixelCol(); + int ymax = theClusterParam.theCluster->maxPixelCol(); + + // Iterate over the pixels. + int isize = theClusterParam.theCluster->size(); + for (int i = 0; i != isize; ++i) { + auto const& pixel = theClusterParam.theCluster->pixel(i); + // ggiurgiu@fnal.gov: add pixel charge truncation + int pix_adc = pixel.adc; + if (truncate) + pix_adc = std::min(pix_adc, theClusterParam.pixmx); + + // + // X projection + if (pixel.x == xmin) + Q_f_X += pix_adc; + if (pixel.x == xmax) + Q_l_X += pix_adc; + // + // Y projection + if (pixel.y == ymin) + Q_f_Y += pix_adc; + if (pixel.y == ymax) + Q_l_Y += pix_adc; + } } - //============== INFLATED ERROR AND ERRORS FROM DB BELOW ================ //------------------------------------------------------------------------- // Hit error in the local frame //------------------------------------------------------------------------- -LocalError -PixelCPEFast::localError(DetParam const & theDetParam, ClusterParam & theClusterParamBase) const -{ - - ClusterParamGeneric & theClusterParam = static_cast(theClusterParamBase); - - // Default errors are the maximum error used for edge clusters. - // These are determined by looking at residuals for edge clusters - float xerr = EdgeClusterErrorX_ * micronsToCm; - float yerr = EdgeClusterErrorY_ * micronsToCm; - - - // Find if cluster is at the module edge. - int maxPixelCol = theClusterParam.theCluster->maxPixelCol(); - int maxPixelRow = theClusterParam.theCluster->maxPixelRow(); - int minPixelCol = theClusterParam.theCluster->minPixelCol(); - int minPixelRow = theClusterParam.theCluster->minPixelRow(); - - bool edgex = phase1PixelTopology::isEdgeX(minPixelRow) | phase1PixelTopology::isEdgeX(maxPixelRow); - bool edgey = phase1PixelTopology::isEdgeY(minPixelCol) | phase1PixelTopology::isEdgeY(maxPixelCol); - - unsigned int sizex = theClusterParam.theCluster->sizeX(); - unsigned int sizey = theClusterParam.theCluster->sizeY(); - - // Find if cluster contains double (big) pixels. - bool bigInX = theDetParam.theRecTopol->containsBigPixelInX( minPixelRow, maxPixelRow ); - bool bigInY = theDetParam.theRecTopol->containsBigPixelInY( minPixelCol, maxPixelCol ); - - if (UseErrorsFromTemplates_ ) { - // - // Use template errors - - if ( !edgex ) { // Only use this for non-edge clusters - if ( sizex == 1 ) { - if ( !bigInX ) {xerr = theClusterParam.sx1;} - else {xerr = theClusterParam.sx2;} - } else {xerr = theClusterParam.sigmax;} +LocalError PixelCPEFast::localError(DetParam const& theDetParam, ClusterParam& theClusterParamBase) const { + ClusterParamGeneric& theClusterParam = static_cast(theClusterParamBase); + + // Default errors are the maximum error used for edge clusters. + // These are determined by looking at residuals for edge clusters + float xerr = EdgeClusterErrorX_ * micronsToCm; + float yerr = EdgeClusterErrorY_ * micronsToCm; + + // Find if cluster is at the module edge. + int maxPixelCol = theClusterParam.theCluster->maxPixelCol(); + int maxPixelRow = theClusterParam.theCluster->maxPixelRow(); + int minPixelCol = theClusterParam.theCluster->minPixelCol(); + int minPixelRow = theClusterParam.theCluster->minPixelRow(); + + bool edgex = phase1PixelTopology::isEdgeX(minPixelRow) | phase1PixelTopology::isEdgeX(maxPixelRow); + bool edgey = phase1PixelTopology::isEdgeY(minPixelCol) | phase1PixelTopology::isEdgeY(maxPixelCol); + + unsigned int sizex = theClusterParam.theCluster->sizeX(); + unsigned int sizey = theClusterParam.theCluster->sizeY(); + + // Find if cluster contains double (big) pixels. + bool bigInX = theDetParam.theRecTopol->containsBigPixelInX(minPixelRow, maxPixelRow); + bool bigInY = theDetParam.theRecTopol->containsBigPixelInY(minPixelCol, maxPixelCol); + + if (UseErrorsFromTemplates_) { + // + // Use template errors + + if (!edgex) { // Only use this for non-edge clusters + if (sizex == 1) { + if (!bigInX) { + xerr = theClusterParam.sx1; + } else { + xerr = theClusterParam.sx2; + } + } else { + xerr = theClusterParam.sigmax; } - - if ( !edgey ) { // Only use for non-edge clusters - if ( sizey == 1 ) { - if ( !bigInY ) {yerr = theClusterParam.sy1;} - else {yerr = theClusterParam.sy2;} - } else {yerr = theClusterParam.sigmay;} - } - - } else { // simple errors - - // This are the simple errors, hardcoded in the code - //cout << "Track angles are not known " << endl; - //cout << "Default angle estimation which assumes track from PV (0,0,0) does not work." << endl; - - if ( GeomDetEnumerators::isTrackerPixel(theDetParam.thePart) ) { - if(GeomDetEnumerators::isBarrel(theDetParam.thePart)) { - - DetId id = (theDetParam.theDet->geographicalId()); - int layer=ttopo_.layer(id); - if ( layer==1 ) { - if ( !edgex ) { - if ( sizex<=xerr_barrel_l1_.size() ) xerr=xerr_barrel_l1_[sizex-1]; - else xerr=xerr_barrel_l1_def_; - } - - if ( !edgey ) { - if ( sizey<=yerr_barrel_l1_.size() ) yerr=yerr_barrel_l1_[sizey-1]; - else yerr=yerr_barrel_l1_def_; - } - } else{ // layer 2,3 - if ( !edgex ) { - if ( sizex<=xerr_barrel_ln_.size() ) xerr=xerr_barrel_ln_[sizex-1]; - else xerr=xerr_barrel_ln_def_; - } - - if ( !edgey ) { - if ( sizey<=yerr_barrel_ln_.size() ) yerr=yerr_barrel_ln_[sizey-1]; - else yerr=yerr_barrel_ln_def_; - } - } - - } else { // EndCap - - if ( !edgex ) { - if ( sizex<=xerr_endcap_.size() ) xerr=xerr_endcap_[sizex-1]; - else xerr=xerr_endcap_def_; - } - - if ( !edgey ) { - if ( sizey<=yerr_endcap_.size() ) yerr=yerr_endcap_[sizey-1]; - else yerr=yerr_endcap_def_; - } - } // end endcap + } + + if (!edgey) { // Only use for non-edge clusters + if (sizey == 1) { + if (!bigInY) { + yerr = theClusterParam.sy1; + } else { + yerr = theClusterParam.sy2; + } + } else { + yerr = theClusterParam.sigmay; } - - } // end - - auto xerr_sq = xerr*xerr; - auto yerr_sq = yerr*yerr; - - return LocalError( xerr_sq, 0, yerr_sq ); - + } + + } else { // simple errors + + // This are the simple errors, hardcoded in the code + //cout << "Track angles are not known " << endl; + //cout << "Default angle estimation which assumes track from PV (0,0,0) does not work." << endl; + + if (GeomDetEnumerators::isTrackerPixel(theDetParam.thePart)) { + if (GeomDetEnumerators::isBarrel(theDetParam.thePart)) { + DetId id = (theDetParam.theDet->geographicalId()); + int layer = ttopo_.layer(id); + if (layer == 1) { + if (!edgex) { + if (sizex <= xerr_barrel_l1_.size()) + xerr = xerr_barrel_l1_[sizex - 1]; + else + xerr = xerr_barrel_l1_def_; + } + + if (!edgey) { + if (sizey <= yerr_barrel_l1_.size()) + yerr = yerr_barrel_l1_[sizey - 1]; + else + yerr = yerr_barrel_l1_def_; + } + } else { // layer 2,3 + if (!edgex) { + if (sizex <= xerr_barrel_ln_.size()) + xerr = xerr_barrel_ln_[sizex - 1]; + else + xerr = xerr_barrel_ln_def_; + } + + if (!edgey) { + if (sizey <= yerr_barrel_ln_.size()) + yerr = yerr_barrel_ln_[sizey - 1]; + else + yerr = yerr_barrel_ln_def_; + } + } + + } else { // EndCap + + if (!edgex) { + if (sizex <= xerr_endcap_.size()) + xerr = xerr_endcap_[sizex - 1]; + else + xerr = xerr_endcap_def_; + } + + if (!edgey) { + if (sizey <= yerr_endcap_.size()) + yerr = yerr_endcap_[sizey - 1]; + else + yerr = yerr_endcap_def_; + } + } // end endcap + } + + } // end + + auto xerr_sq = xerr * xerr; + auto yerr_sq = yerr * yerr; + + return LocalError(xerr_sq, 0, yerr_sq); } From c5031d4492ce63419ca4f7f44d4b69a0846c6ac3 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Wed, 15 May 2019 11:44:49 +0200 Subject: [PATCH 074/149] Migrate ClusterTPAssociationHeterogeneous to the new framework (cms-patatrack#346) --- .../interface/gpuClusteringConstants.h | 27 +++++++++ CUDADataFormats/TrackingRecHit/BuildFile.xml | 8 +++ CUDADataFormats/TrackingRecHit/src/classes.h | 8 +++ .../TrackingRecHit/src/classes_def.xml | 4 ++ .../TrackingRecHit/test/BuildFile.xml | 3 + .../test/TrackingRecHit2DCUDA_t.cpp | 58 +++++++++++++++++++ .../test/TrackingRecHit2DCUDA_t.cu | 31 ++++++++++ 7 files changed, 139 insertions(+) create mode 100644 CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h create mode 100644 CUDADataFormats/TrackingRecHit/BuildFile.xml create mode 100644 CUDADataFormats/TrackingRecHit/src/classes.h create mode 100644 CUDADataFormats/TrackingRecHit/src/classes_def.xml create mode 100644 CUDADataFormats/TrackingRecHit/test/BuildFile.xml create mode 100644 CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp create mode 100644 CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cu diff --git a/CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h b/CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h new file mode 100644 index 0000000000000..b6141dc880312 --- /dev/null +++ b/CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h @@ -0,0 +1,27 @@ +#ifndef CUDADataFormats_SiPixelCluster_interface_gpuClusteringConstants_h +#define CUDADataFormats_SiPixelCluster_interface_gpuClusteringConstants_h + +#include + +namespace pixelGPUConstants { +#ifdef GPU_SMALL_EVENTS + constexpr uint32_t maxNumberOfHits = 24 * 1024; +#else + constexpr uint32_t maxNumberOfHits = + 48 * 1024; // data at pileup 50 has 18300 +/- 3500 hits; 40000 is around 6 sigma away +#endif +} // namespace pixelGPUConstants + +namespace gpuClustering { + constexpr uint32_t maxHitsInModule() { return 256; } + + constexpr uint32_t MaxNumModules = 2000; + constexpr uint32_t MaxNumPixels = 256 * 2000; // this does not mean maxPixelPerModule == 256! + constexpr uint32_t MaxNumClustersPerModules = 1024; + constexpr uint32_t MaxHitsInModule = maxHitsInModule(); + constexpr uint32_t MaxNumClusters = pixelGPUConstants::maxNumberOfHits; + constexpr uint16_t InvId = 9999; // must be > MaxNumModules + +} // namespace gpuClustering + +#endif // CUDADataFormats_SiPixelCluster_interface_gpuClusteringConstants_h diff --git a/CUDADataFormats/TrackingRecHit/BuildFile.xml b/CUDADataFormats/TrackingRecHit/BuildFile.xml new file mode 100644 index 0000000000000..b859c93772f1c --- /dev/null +++ b/CUDADataFormats/TrackingRecHit/BuildFile.xml @@ -0,0 +1,8 @@ + + + + + + + + diff --git a/CUDADataFormats/TrackingRecHit/src/classes.h b/CUDADataFormats/TrackingRecHit/src/classes.h new file mode 100644 index 0000000000000..6b471ff63c285 --- /dev/null +++ b/CUDADataFormats/TrackingRecHit/src/classes.h @@ -0,0 +1,8 @@ +#ifndef CUDADataFormats_SiPixelCluster_src_classes_h +#define CUDADataFormats_SiPixelCluster_src_classes_h + +#include "CUDADataFormats/Common/interface/CUDAProduct.h" +#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DCUDA.h" +#include "DataFormats/Common/interface/Wrapper.h" + +#endif // CUDADataFormats_SiPixelCluster_src_classes_h diff --git a/CUDADataFormats/TrackingRecHit/src/classes_def.xml b/CUDADataFormats/TrackingRecHit/src/classes_def.xml new file mode 100644 index 0000000000000..5b458756c21d0 --- /dev/null +++ b/CUDADataFormats/TrackingRecHit/src/classes_def.xml @@ -0,0 +1,4 @@ + + + + diff --git a/CUDADataFormats/TrackingRecHit/test/BuildFile.xml b/CUDADataFormats/TrackingRecHit/test/BuildFile.xml new file mode 100644 index 0000000000000..74f2818790d0f --- /dev/null +++ b/CUDADataFormats/TrackingRecHit/test/BuildFile.xml @@ -0,0 +1,3 @@ + + + diff --git a/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp b/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp new file mode 100644 index 0000000000000..f93e4dcf882aa --- /dev/null +++ b/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp @@ -0,0 +1,58 @@ +#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DCUDA.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/PluginManager/interface/PluginManager.h" +#include "FWCore/PluginManager/interface/standard.h" +#include "FWCore/ServiceRegistry/interface/ActivityRegistry.h" +#include "FWCore/ServiceRegistry/interface/ServiceRegistry.h" +#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" +#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" +#include "HeterogeneousCore/CUDAUtilities/interface/exitSansCUDADevices.h" + +namespace testTrackingRecHit2D { + + void runKernels(TrackingRecHit2DSOAView* hits); + +} + +namespace { + CUDAService makeCUDAService(edm::ParameterSet ps, edm::ActivityRegistry& ar) { + auto desc = edm::ConfigurationDescriptions("Service", "CUDAService"); + CUDAService::fillDescriptions(desc); + desc.validate(ps, "CUDAService"); + return CUDAService(ps, ar); + } +} // namespace + +int main() { + exitSansCUDADevices(); + + edmplugin::PluginManager::configure(edmplugin::standard::config()); + + const std::string config{ + R"_(import FWCore.ParameterSet.Config as cms +process = cms.Process('Test') +process.CUDAService = cms.Service('CUDAService') +)_"}; + + std::unique_ptr operate_; + edm::ServiceToken tempToken = edm::ServiceRegistry::createServicesFromConfig(config); + operate_.reset(new edm::ServiceRegistry::Operate(tempToken)); + + edm::ActivityRegistry ar; + edm::ParameterSet ps; + auto cs = makeCUDAService(ps, ar); + + auto current_device = cuda::device::current::get(); + auto stream = current_device.create_stream(cuda::stream::implicitly_synchronizes_with_default_stream); + + auto nHits = 200; + TrackingRecHit2DCUDA tkhit(nHits, nullptr, nullptr, stream); + + testTrackingRecHit2D::runKernels(tkhit.view()); + + //Fake the end-of-job signal. + ar.postEndJobSignal_(); + + return 0; +} diff --git a/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cu b/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cu new file mode 100644 index 0000000000000..6b55f8a8f98c5 --- /dev/null +++ b/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cu @@ -0,0 +1,31 @@ +#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DCUDA.h" + +namespace testTrackingRecHit2D { + + __global__ void fill(TrackingRecHit2DSOAView* phits) { + assert(phits); + auto& hits = *phits; + assert(hits.nHits() == 200); + + int i = threadIdx.x; + if (i > 200) + return; + } + + __global__ void verify(TrackingRecHit2DSOAView const* phits) { + assert(phits); + auto const& hits = *phits; + assert(hits.nHits() == 200); + + int i = threadIdx.x; + if (i > 200) + return; + } + + void runKernels(TrackingRecHit2DSOAView* hits) { + assert(hits); + fill<<<1, 1024>>>(hits); + verify<<<1, 1024>>>(hits); + } + +} // namespace testTrackingRecHit2D From 69ed3e2b330fa96f60b31fd191b074b549bffe13 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Wed, 15 May 2019 14:11:42 +0200 Subject: [PATCH 075/149] Synchronise with CMSSW_10_6_0 --- RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h | 1 + 1 file changed, 1 insertion(+) diff --git a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h index f908325029afe..2bc2413bc7fb3 100644 --- a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h +++ b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h @@ -76,6 +76,7 @@ class PixelCPEBase : public PixelClusterParameterEstimator float lorentzShiftInCmX; // a FULL shift, in cm float lorentzShiftInCmY; // a FULL shift, in cm int detTemplateId; // det if for templates & generic errors + int detTemplateId2D; // det if for 2D templates }; struct ClusterParam From b0e14f89049b94776f2b98b63b9237736f040c7d Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Wed, 15 May 2019 15:14:08 +0200 Subject: [PATCH 076/149] Update Service-based tests to provide the functionality that was removed by #26138 --- CUDADataFormats/TrackingRecHit/BuildFile.xml | 1 + .../TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CUDADataFormats/TrackingRecHit/BuildFile.xml b/CUDADataFormats/TrackingRecHit/BuildFile.xml index b859c93772f1c..004f6bd6d0883 100644 --- a/CUDADataFormats/TrackingRecHit/BuildFile.xml +++ b/CUDADataFormats/TrackingRecHit/BuildFile.xml @@ -1,6 +1,7 @@ + diff --git a/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp b/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp index f93e4dcf882aa..34ec61095116a 100644 --- a/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp +++ b/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp @@ -1,6 +1,7 @@ #include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DCUDA.h" #include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" #include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSetReader/interface/ParameterSetReader.h" #include "FWCore/PluginManager/interface/PluginManager.h" #include "FWCore/PluginManager/interface/standard.h" #include "FWCore/ServiceRegistry/interface/ActivityRegistry.h" @@ -36,7 +37,9 @@ process.CUDAService = cms.Service('CUDAService') )_"}; std::unique_ptr operate_; - edm::ServiceToken tempToken = edm::ServiceRegistry::createServicesFromConfig(config); + std::unique_ptr params; + edm::makeParameterSets(config, params); + edm::ServiceToken tempToken(edm::ServiceRegistry::createServicesFromConfig(std::move(params))); operate_.reset(new edm::ServiceRegistry::Operate(tempToken)); edm::ActivityRegistry ar; From acc83600c25aca6e3bc164ec170490acf60a2cd6 Mon Sep 17 00:00:00 2001 From: Vincenzo Innocente Date: Thu, 20 Jun 2019 15:58:37 +0200 Subject: [PATCH 077/149] Migrate gpuPixelRecHits::getHits() kernel to use a View instead of multiple pointers (cms-patatrack#354) Other changes and optimisations: - take into account the case where `nclus > blockDim.x` - use a smaller block size - document why why we copy or not to local variables --- .../SiPixelRecHits/plugins/gpuPixelRecHits.h | 143 ++++++++++-------- 1 file changed, 76 insertions(+), 67 deletions(-) diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h b/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h index ff3abdce0997e..085c382b90f2b 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h +++ b/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h @@ -15,19 +15,21 @@ namespace gpuPixelRecHits { __global__ void getHits(pixelCPEforGPU::ParamsOnGPU const* __restrict__ cpeParams, BeamSpotCUDA::Data const* __restrict__ bs, - uint16_t const* __restrict__ id, - uint16_t const* __restrict__ x, - uint16_t const* __restrict__ y, - uint16_t const* __restrict__ adc, - uint32_t const* __restrict__ digiModuleStart, - uint32_t const* __restrict__ clusInModule, - uint32_t const* __restrict__ moduleId, - int32_t const* __restrict__ clus, + SiPixelDigisCUDA::DeviceConstView const * __restrict__ pdigis, int numElements, - uint32_t const* __restrict__ hitsModuleStart, + SiPixelClustersCUDA::DeviceConstView const * __restrict__ pclusters, TrackingRecHit2DSOAView* phits) { + + // FIXME + // the compiler seems NOT to optimize loads from views (even in a simple test case) + // The whole gimnastic here of copying or not is a pure heuristic exercise that seems to produce the fastest code with the above signature + // not using views (passing a gazzilion of array pointers) seems to produce the fastest code (but it is harder to mantain) + auto& hits = *phits; + auto const digis = *pdigis; // the copy is intentional! + auto const & clusters = *pclusters; + // to be moved in common namespace... constexpr uint16_t InvId = 9999; // must be > MaxNumModules constexpr uint32_t MaxHitsInModule = pixelCPEforGPU::MaxHitsInModule; @@ -37,9 +39,9 @@ namespace gpuPixelRecHits { // as usual one block per module __shared__ ClusParams clusParams; - auto first = digiModuleStart[1 + blockIdx.x]; - auto me = moduleId[blockIdx.x]; - auto nclus = clusInModule[me]; + auto first = clusters.moduleStart(1 + blockIdx.x); + auto me = clusters.moduleId(blockIdx.x); + auto nclus = clusters.clusInModule(me); if (0 == nclus) return; @@ -47,9 +49,9 @@ namespace gpuPixelRecHits { #ifdef GPU_DEBUG if (threadIdx.x == 0) { auto k = first; - while (id[k] == InvId) + while (digis.moduleInd(k) == InvId) ++k; - assert(id[k] == me); + assert(digis.moduleInd(k) == me); } #endif @@ -71,9 +73,7 @@ namespace gpuPixelRecHits { } nclus = std::min(nclus, MaxHitsInModule); - auto ic = threadIdx.x; - - if (ic < nclus) { + for (int ic = threadIdx.x; ic < nclus; ic += blockDim.x) { clusParams.minRow[ic] = std::numeric_limits::max(); clusParams.maxRow[ic] = 0; clusParams.minCol[ic] = std::numeric_limits::max(); @@ -92,85 +92,94 @@ namespace gpuPixelRecHits { // one thead per "digi" for (int i = first; i < numElements; i += blockDim.x) { - if (id[i] == InvId) + auto id = digis.moduleInd(i); + if (id == InvId) continue; // not valid - if (id[i] != me) + if (id != me) break; // end of module - if (clus[i] >= nclus) + auto cl = digis.clus(i); + if (cl >= nclus) continue; - atomicMin(&clusParams.minRow[clus[i]], x[i]); - atomicMax(&clusParams.maxRow[clus[i]], x[i]); - atomicMin(&clusParams.minCol[clus[i]], y[i]); - atomicMax(&clusParams.maxCol[clus[i]], y[i]); + auto x = digis.xx(i); + auto y = digis.yy(i); + atomicMin(&clusParams.minRow[cl], x); + atomicMax(&clusParams.maxRow[cl], x); + atomicMin(&clusParams.minCol[cl], y); + atomicMax(&clusParams.maxCol[cl], y); } __syncthreads(); for (int i = first; i < numElements; i += blockDim.x) { - if (id[i] == InvId) + auto id = digis.moduleInd(i); + if (id == InvId) continue; // not valid - if (id[i] != me) + if (id != me) break; // end of module - if (clus[i] >= nclus) + auto cl = digis.clus(i); + if (cl >= nclus) continue; - atomicAdd(&clusParams.charge[clus[i]], adc[i]); - if (clusParams.minRow[clus[i]] == x[i]) - atomicAdd(&clusParams.Q_f_X[clus[i]], adc[i]); - if (clusParams.maxRow[clus[i]] == x[i]) - atomicAdd(&clusParams.Q_l_X[clus[i]], adc[i]); - if (clusParams.minCol[clus[i]] == y[i]) - atomicAdd(&clusParams.Q_f_Y[clus[i]], adc[i]); - if (clusParams.maxCol[clus[i]] == y[i]) - atomicAdd(&clusParams.Q_l_Y[clus[i]], adc[i]); + auto x = digis.xx(i); + auto y = digis.yy(i); + auto ch = digis.adc(i); + atomicAdd(&clusParams.charge[cl], ch); + if (clusParams.minRow[cl] == x) + atomicAdd(&clusParams.Q_f_X[cl], ch); + if (clusParams.maxRow[cl] == x) + atomicAdd(&clusParams.Q_l_X[cl], ch); + if (clusParams.minCol[cl] == y) + atomicAdd(&clusParams.Q_f_Y[cl], ch); + if (clusParams.maxCol[cl] == y) + atomicAdd(&clusParams.Q_l_Y[cl], ch); } __syncthreads(); // next one cluster per thread... - if (ic >= nclus) - return; + first = clusters.clusModuleStart(me); - first = hitsModuleStart[me]; - auto h = first + ic; // output index in global memory + for (int ic = threadIdx.x; ic < nclus; ic += blockDim.x) { + auto h = first + ic; // output index in global memory - if (h >= TrackingRecHit2DSOAView::maxHits()) - return; // overflow... + if (h >= TrackingRecHit2DSOAView::maxHits()) + break; // overflow... - pixelCPEforGPU::position(cpeParams->commonParams(), cpeParams->detParams(me), clusParams, ic); - pixelCPEforGPU::errorFromDB(cpeParams->commonParams(), cpeParams->detParams(me), clusParams, ic); + pixelCPEforGPU::position(cpeParams->commonParams(), cpeParams->detParams(me), clusParams, ic); + pixelCPEforGPU::errorFromDB(cpeParams->commonParams(), cpeParams->detParams(me), clusParams, ic); - // store it + // store it - hits.charge(h) = clusParams.charge[ic]; + hits.charge(h) = clusParams.charge[ic]; - hits.detectorIndex(h) = me; + hits.detectorIndex(h) = me; - float xl, yl; - hits.xLocal(h) = xl = clusParams.xpos[ic]; - hits.yLocal(h) = yl = clusParams.ypos[ic]; + float xl, yl; + hits.xLocal(h) = xl = clusParams.xpos[ic]; + hits.yLocal(h) = yl = clusParams.ypos[ic]; - hits.clusterSizeX(h) = clusParams.xsize[ic]; - hits.clusterSizeY(h) = clusParams.ysize[ic]; + hits.clusterSizeX(h) = clusParams.xsize[ic]; + hits.clusterSizeY(h) = clusParams.ysize[ic]; - hits.xerrLocal(h) = clusParams.xerr[ic] * clusParams.xerr[ic]; - hits.yerrLocal(h) = clusParams.yerr[ic] * clusParams.yerr[ic]; + hits.xerrLocal(h) = clusParams.xerr[ic] * clusParams.xerr[ic]; + hits.yerrLocal(h) = clusParams.yerr[ic] * clusParams.yerr[ic]; - // keep it local for computations - float xg, yg, zg; - // to global and compute phi... - cpeParams->detParams(me).frame.toGlobal(xl, yl, xg, yg, zg); - // here correct for the beamspot... - xg -= bs->x; - yg -= bs->y; - zg -= bs->z; + // keep it local for computations + float xg, yg, zg; + // to global and compute phi... + cpeParams->detParams(me).frame.toGlobal(xl, yl, xg, yg, zg); + // here correct for the beamspot... + xg -= bs->x; + yg -= bs->y; + zg -= bs->z; - hits.xGlobal(h) = xg; - hits.yGlobal(h) = yg; - hits.zGlobal(h) = zg; + hits.xGlobal(h) = xg; + hits.yGlobal(h) = yg; + hits.zGlobal(h) = zg; - hits.rGlobal(h) = std::sqrt(xg * xg + yg * yg); - hits.iphi(h) = unsafe_atan2s<7>(yg, xg); + hits.rGlobal(h) = std::sqrt(xg * xg + yg * yg); + hits.iphi(h) = unsafe_atan2s<7>(yg, xg); + } } } // namespace gpuPixelRecHits From af2ffa362b8fb032d8c1a98f151c9bdbf5938d15 Mon Sep 17 00:00:00 2001 From: Matti Kortelainen Date: Thu, 20 Jun 2019 09:20:59 -0500 Subject: [PATCH 078/149] Reorganize CUDAScopedContext (cms-patatrack#355) * Split CUDAScopedContext to *Acquire and *Produce The motivation is that acquire() and produce() need a different functionality, and are constructed differently (e.g. acquire version always needs the edm::WaitingTaskWithArenaHolder). This split should make it more difficult to make mistakes. It should also make future evolution, e.g. towards chains of TBB tasks alternating in CPU and GPU work, easier. * Rename CUDAContextToken to CUDAContextState, and change semantics Now CUDAScopedContextAcquire takes it as a parameter to constructor, and stores the state in its destructor (yielding RAII semantics). * Document the constructors. --- .../plugins/SiPixelDigiErrorsSoAFromCUDA.cc | 2 +- .../SiPixelRawToDigi/plugins/SiPixelDigisSoAFromCUDA.cc | 2 +- .../SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc | 8 +++----- .../SiPixelRecHits/plugins/SiPixelRecHitCUDA.cc | 2 +- 4 files changed, 6 insertions(+), 8 deletions(-) diff --git a/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsSoAFromCUDA.cc b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsSoAFromCUDA.cc index d47542528ed86..b44abfe2ba758 100644 --- a/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsSoAFromCUDA.cc +++ b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsSoAFromCUDA.cc @@ -43,7 +43,7 @@ void SiPixelDigiErrorsSoAFromCUDA::fillDescriptions(edm::ConfigurationDescriptio void SiPixelDigiErrorsSoAFromCUDA::acquire(const edm::Event& iEvent, const edm::EventSetup& iSetup, edm::WaitingTaskWithArenaHolder waitingTaskHolder) { // Do the transfer in a CUDA stream parallel to the computation CUDA stream - CUDAScopedContext ctx{iEvent.streamID(), std::move(waitingTaskHolder)}; + CUDAScopedContextAcquire ctx{iEvent.streamID(), std::move(waitingTaskHolder)}; const auto& gpuDigiErrors = ctx.get(iEvent, digiErrorGetToken_); diff --git a/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigisSoAFromCUDA.cc b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigisSoAFromCUDA.cc index 068701f0bcf07..4e71864daa7fe 100644 --- a/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigisSoAFromCUDA.cc +++ b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigisSoAFromCUDA.cc @@ -47,7 +47,7 @@ void SiPixelDigisSoAFromCUDA::fillDescriptions(edm::ConfigurationDescriptions& d void SiPixelDigisSoAFromCUDA::acquire(const edm::Event& iEvent, const edm::EventSetup& iSetup, edm::WaitingTaskWithArenaHolder waitingTaskHolder) { // Do the transfer in a CUDA stream parallel to the computation CUDA stream - CUDAScopedContext ctx{iEvent.streamID(), std::move(waitingTaskHolder)}; + CUDAScopedContextAcquire ctx{iEvent.streamID(), std::move(waitingTaskHolder)}; const auto& gpuDigis = ctx.get(iEvent, digiGetToken_); diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc index f2dacd5fbc415..8a0a123c43fb8 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc @@ -53,7 +53,7 @@ class SiPixelRawToClusterCUDA: public edm::stream::EDProducer edm::EDPutTokenT> digiErrorPutToken_; edm::EDPutTokenT> clusterPutToken_; - CUDAContextToken ctxTmp_; + CUDAContextState ctxState_; edm::ESWatcher recordWatcher; @@ -118,7 +118,7 @@ void SiPixelRawToClusterCUDA::fillDescriptions(edm::ConfigurationDescriptions& d void SiPixelRawToClusterCUDA::acquire(const edm::Event& iEvent, const edm::EventSetup& iSetup, edm::WaitingTaskWithArenaHolder waitingTaskHolder) { - CUDAScopedContext ctx{iEvent.streamID(), std::move(waitingTaskHolder)}; + CUDAScopedContextAcquire ctx{iEvent.streamID(), std::move(waitingTaskHolder), ctxState_}; edm::ESHandle hgpuMap; iSetup.get().get(hgpuMap); @@ -228,12 +228,10 @@ void SiPixelRawToClusterCUDA::acquire(const edm::Event& iEvent, const edm::Event useQuality_, includeErrors_, edm::MessageDrop::instance()->debugEnabled, ctx.stream()); - - ctxTmp_ = ctx.toToken(); } void SiPixelRawToClusterCUDA::produce(edm::Event& iEvent, const edm::EventSetup& iSetup) { - CUDAScopedContext ctx{std::move(ctxTmp_)}; + CUDAScopedContextProduce ctx{ctxState_}; auto tmp = gpuAlgo_.getResults(); ctx.emplace(iEvent, digiPutToken_, std::move(tmp.first)); diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitCUDA.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitCUDA.cc index 7b0ea2d163241..603ea911a3de3 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitCUDA.cc +++ b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitCUDA.cc @@ -85,7 +85,7 @@ void SiPixelRecHitCUDA::produce(edm::StreamID streamID, edm::Event& iEvent, cons edm::Handle> hclusters; iEvent.getByToken(token_, hclusters); - CUDAScopedContext ctx{*hclusters}; + CUDAScopedContextProduce ctx{*hclusters}; auto const& clusters = ctx.get(*hclusters); edm::Handle> hdigis; From f177e9e98e659eacbfe7139b0b43a0d4b531aef1 Mon Sep 17 00:00:00 2001 From: Vincenzo Innocente Date: Thu, 20 Jun 2019 12:09:14 +0200 Subject: [PATCH 079/149] Implement triplets in the pixel ntuplet producer (cms-patatrack#382) Enable pixel triplets with: process.pixelTracksHitQuadruplets.minHitsPerNtuplet = 3 process.pixelTracksHitQuadruplets.includeJumpingForwardDoublets = True Changes: - adjust for the average pixel geometry and the beam spot position; - allow "jumping doublets" in the forward region (FPIX1-FPIX3) for triplets. --- .../interface/phase1PixelTopology.h | 14 +++++ .../plugins/gpuClustering.h | 11 +++- .../SiPixelRecHits/interface/PixelCPEFast.h | 1 + .../SiPixelRecHits/interface/pixelCPEforGPU.h | 5 ++ .../SiPixelRecHits/plugins/gpuPixelRecHits.h | 22 +++++++ .../SiPixelRecHits/src/PixelCPEFast.cc | 57 ++++++++++++++++++- 6 files changed, 108 insertions(+), 2 deletions(-) diff --git a/Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h b/Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h index 05e6b01e96c24..c36033a8554d8 100644 --- a/Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h +++ b/Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h @@ -35,6 +35,8 @@ namespace phase1PixelTopology { "E-1", "E-2", "E-3" // negative endcap }; + constexpr uint32_t numberOfModulesInBarrel = 1184; + constexpr uint32_t numberOfLaddersInBarrel = numberOfModulesInBarrel/8; template constexpr auto map_to_array_helper(Function f, std::index_sequence) @@ -146,6 +148,18 @@ namespace phase1PixelTopology { return py+shift; } + //FIXME move it elsewhere? + struct AverageGeometry { + static constexpr auto numberOfLaddersInBarrel = phase1PixelTopology::numberOfLaddersInBarrel; + float ladderZ[numberOfLaddersInBarrel]; + float ladderX[numberOfLaddersInBarrel]; + float ladderY[numberOfLaddersInBarrel]; + float ladderR[numberOfLaddersInBarrel]; + float ladderMinZ[numberOfLaddersInBarrel]; + float ladderMaxZ[numberOfLaddersInBarrel]; + float endCapZ[2]; // just for pos and neg Layer1 + }; + } #endif // Geometry_TrackerGeometryBuilder_phase1PixelTopology_h diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h index b610d02ba7c39..79d4396c22ed5 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h @@ -12,6 +12,10 @@ namespace gpuClustering { +#ifdef GPU_DEBUG + __device__ uint32_t gMaxHit=0; +#endif + __global__ void countModules(uint16_t const* __restrict__ id, uint32_t* __restrict__ moduleStart, int32_t* __restrict__ clusterId, @@ -271,9 +275,14 @@ namespace gpuClustering { if (threadIdx.x == 0) { nClustersInModule[thisModuleId] = foundClusters; moduleId[blockIdx.x] = thisModuleId; +#ifdef GPU_DEBUG + if (foundClusters>gMaxHit) { + gMaxHit = foundClusters; + if (foundClusters>8) printf("max hit %d in %d\n",foundClusters, thisModuleId); + } +#endif #ifdef GPU_DEBUG if (thisModuleId % 100 == 1) - if (threadIdx.x == 0) printf("%d clusters in module %d\n", foundClusters, thisModuleId); #endif } diff --git a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h index f0ed4f2574528..d5c9b334baeaf 100644 --- a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h +++ b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h @@ -81,6 +81,7 @@ class PixelCPEFast final : public PixelCPEBase { std::vector> m_detParamsGPU; pixelCPEforGPU::CommonParams m_commonParamsGPU; pixelCPEforGPU::LayerGeometry m_layerGeometry; + pixelCPEforGPU::AverageGeometry m_averageGeometry; struct GPUData { ~GPUData(); diff --git a/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h b/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h index 5c1a14a63fc65..1f37dcc261643 100644 --- a/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h +++ b/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h @@ -44,6 +44,9 @@ namespace pixelCPEforGPU { Frame frame; }; + + using phase1PixelTopology::AverageGeometry; + struct LayerGeometry { uint32_t layerStart[phase1PixelTopology::numberOfLayers + 1]; uint8_t layer[phase1PixelTopology::layerIndexSize]; @@ -53,6 +56,7 @@ namespace pixelCPEforGPU { CommonParams* m_commonParams; DetParams* m_detParams; LayerGeometry* m_layerGeometry; + AverageGeometry * m_averageGeometry; constexpr CommonParams const& __restrict__ commonParams() const { CommonParams const* __restrict__ l = m_commonParams; @@ -63,6 +67,7 @@ namespace pixelCPEforGPU { return l[i]; } constexpr LayerGeometry const& __restrict__ layerGeometry() const { return *m_layerGeometry; } + constexpr AverageGeometry const& __restrict__ averageGeometry() const { return *m_averageGeometry; } __device__ uint8_t layer(uint16_t id) const { return __ldg(m_layerGeometry->layer + id / phase1PixelTopology::maxModuleStride); diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h b/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h index 085c382b90f2b..05a1b86ce5ab7 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h +++ b/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h @@ -25,11 +25,33 @@ namespace gpuPixelRecHits { // The whole gimnastic here of copying or not is a pure heuristic exercise that seems to produce the fastest code with the above signature // not using views (passing a gazzilion of array pointers) seems to produce the fastest code (but it is harder to mantain) + assert(phits); + assert(cpeParams); + auto& hits = *phits; auto const digis = *pdigis; // the copy is intentional! auto const & clusters = *pclusters; + // copy average geometry corrected by beamspot . FIXME (move it somewhere else???) + if (0==blockIdx.x) { + auto & agc = hits.averageGeometry(); + auto const & ag = cpeParams->averageGeometry(); + for(int il=threadIdx.x, nl=TrackingRecHit2DSOAView::AverageGeometry::numberOfLaddersInBarrel; ilz; + agc.ladderX[il] = ag.ladderX[il] - bs->x; + agc.ladderY[il] = ag.ladderY[il] - bs->y; + agc.ladderR[il] = sqrt(agc.ladderX[il]*agc.ladderX[il] + agc.ladderY[il]*agc.ladderY[il] ); + agc.ladderMinZ[il] = ag.ladderMinZ[il] - bs->z; + agc.ladderMaxZ[il] = ag.ladderMaxZ[il] - bs->z; + } + if(0==threadIdx.x) { + agc.endCapZ[0] = ag.endCapZ[0] - bs->z; + agc.endCapZ[1] = ag.endCapZ[1] - bs->z; +// printf("endcapZ %f %f\n",agc.endCapZ[0],agc.endCapZ[1]); + } + } + // to be moved in common namespace... constexpr uint16_t InvId = 9999; // must be > MaxNumModules constexpr uint32_t MaxHitsInModule = pixelCPEforGPU::MaxHitsInModule; diff --git a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc index 3c36598f06b89..3374595f74d1c 100644 --- a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc +++ b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc @@ -70,6 +70,7 @@ const pixelCPEforGPU::ParamsOnGPU* PixelCPEFast::getGPUProductAsync(cuda::stream cudaCheck(cudaMalloc((void**)&data.h_paramsOnGPU.m_commonParams, sizeof(pixelCPEforGPU::CommonParams))); cudaCheck(cudaMalloc((void**)&data.h_paramsOnGPU.m_detParams, this->m_detParamsGPU.size() * sizeof(pixelCPEforGPU::DetParams))); + cudaCheck(cudaMalloc((void**)&data.h_paramsOnGPU.m_averageGeometry, sizeof(pixelCPEforGPU::AverageGeometry))); cudaCheck(cudaMalloc((void**)&data.h_paramsOnGPU.m_layerGeometry, sizeof(pixelCPEforGPU::LayerGeometry))); cudaCheck(cudaMalloc((void**)&data.d_paramsOnGPU, sizeof(pixelCPEforGPU::ParamsOnGPU))); @@ -80,6 +81,11 @@ const pixelCPEforGPU::ParamsOnGPU* PixelCPEFast::getGPUProductAsync(cuda::stream sizeof(pixelCPEforGPU::CommonParams), cudaMemcpyDefault, stream.id())); + cudaCheck(cudaMemcpyAsync(data.h_paramsOnGPU.m_averageGeometry, + &this->m_averageGeometry, + sizeof(pixelCPEforGPU::AverageGeometry), + cudaMemcpyDefault, + stream.id())); cudaCheck(cudaMemcpyAsync(data.h_paramsOnGPU.m_layerGeometry, &this->m_layerGeometry, sizeof(pixelCPEforGPU::LayerGeometry), @@ -100,6 +106,9 @@ void PixelCPEFast::fillParamsForGpu() { m_commonParamsGPU.thePitchX = m_DetParams[0].thePitchX; m_commonParamsGPU.thePitchY = m_DetParams[0].thePitchY; + // zero average geometry + memset(&m_averageGeometry,0,sizeof(pixelCPEforGPU::AverageGeometry)); + uint32_t oldLayer = 0; uint32_t oldLadder = 0; float rl = 0; @@ -127,8 +136,8 @@ void PixelCPEFast::fillParamsForGpu() { //if (m_commonParamsGPU.theThickness!=p.theThickness) // std::cout << i << (g.isBarrel ? "B " : "E ") << m_commonParamsGPU.theThickness<<"!="<geographicalId()); + auto ladder = ttopo_.pxbLadder(p.theDet->geographicalId()); if (oldLayer != g.layer) { oldLayer = g.layer; // std::cout << "new layer at " << i << (g.isBarrel ? " B " : (g.isPosZ ? " E+ " : " E- ")) << g.layer << " starting at " << g.rawId << std::endl; @@ -238,6 +247,51 @@ void PixelCPEFast::fillParamsForGpu() { } } + // compute ladder baricenter (only in global z) for the barrel + auto & aveGeom = m_averageGeometry; + int il=0; + for (int im=0, nm=phase1PixelTopology::numberOfModulesInBarrel; im Date: Fri, 5 Jul 2019 11:59:12 +0200 Subject: [PATCH 080/149] Port the whole pixel workflow to new heterogeneous framework (cms-patatrack#384) - port the whole pixel workflow to new heterogeneous framework - implement a legacy cluster to SoA converter for the pixel RecHits - update the vertex producer to run on CPU as well as GPU --- .../interface/SiPixelClustersCUDA.h | 10 +- .../SiPixelDigi/interface/SiPixelDigisCUDA.h | 7 +- .../interface/TrackingRecHit2DHeterogeneous.h | 212 ++++++++++++++ .../interface/TrackingRecHit2DSOAView.h | 102 +++++++ CUDADataFormats/TrackingRecHit/src/classes.h | 1 + .../TrackingRecHit/src/classes_def.xml | 7 + .../plugins/SiPixelClusterProducer.cc | 71 ++--- .../plugins/SiPixelDigisClustersFromSoA.cc | 4 +- .../SiPixelRecHits/interface/PixelCPEFast.h | 13 +- .../SiPixelRecHits/interface/pixelCPEforGPU.h | 8 +- .../plugins/SiPixelRecHitConverter.cc | 269 +++++++----------- .../plugins/SiPixelRecHitSoAFromLegacy.cc | 226 +++++++++++++++ .../SiPixelRecHits/plugins/gpuPixelRecHits.h | 19 +- .../SiPixelRecHits/src/PixelCPEFast.cc | 14 +- 14 files changed, 731 insertions(+), 232 deletions(-) create mode 100644 CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h create mode 100644 CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h create mode 100644 RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc diff --git a/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h b/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h index f25a8a25f0808..c71d8573d9a23 100644 --- a/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h +++ b/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h @@ -6,6 +6,8 @@ #include +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h" + class SiPixelClustersCUDA { public: SiPixelClustersCUDA() = default; @@ -40,18 +42,16 @@ class SiPixelClustersCUDA { class DeviceConstView { public: - DeviceConstView() = default; + // DeviceConstView() = default; -#ifdef __CUDACC__ __device__ __forceinline__ uint32_t moduleStart(int i) const { return __ldg(moduleStart_+i); } __device__ __forceinline__ uint32_t clusInModule(int i) const { return __ldg(clusInModule_+i); } __device__ __forceinline__ uint32_t moduleId(int i) const { return __ldg(moduleId_+i); } __device__ __forceinline__ uint32_t clusModuleStart(int i) const { return __ldg(clusModuleStart_+i); } -#endif friend SiPixelClustersCUDA; - private: +// private: uint32_t const *moduleStart_; uint32_t const *clusInModule_; uint32_t const *moduleId_; @@ -66,7 +66,7 @@ class SiPixelClustersCUDA { cudautils::device::unique_ptr moduleId_d; // module id of each module // originally from rechits - cudautils::device::unique_ptr clusModuleStart_d; + cudautils::device::unique_ptr clusModuleStart_d; // index of the first cluster of each module cudautils::device::unique_ptr view_d; // "me" pointer diff --git a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h index 6a52545483eb8..c80e6bda4083a 100644 --- a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h +++ b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h @@ -5,6 +5,7 @@ #include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" #include +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h" class SiPixelDigisCUDA { public: @@ -56,19 +57,17 @@ class SiPixelDigisCUDA { class DeviceConstView { public: - DeviceConstView() = default; + // DeviceConstView() = default; -#ifdef __CUDACC__ __device__ __forceinline__ uint16_t xx(int i) const { return __ldg(xx_+i); } __device__ __forceinline__ uint16_t yy(int i) const { return __ldg(yy_+i); } __device__ __forceinline__ uint16_t adc(int i) const { return __ldg(adc_+i); } __device__ __forceinline__ uint16_t moduleInd(int i) const { return __ldg(moduleInd_+i); } __device__ __forceinline__ int32_t clus(int i) const { return __ldg(clus_+i); } -#endif friend class SiPixelDigisCUDA; - private: + // private: uint16_t const *xx_; uint16_t const *yy_; uint16_t const *adc_; diff --git a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h new file mode 100644 index 0000000000000..d28cd00c94b16 --- /dev/null +++ b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h @@ -0,0 +1,212 @@ +#ifndef CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DHeterogeneous_h +#define CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DHeterogeneous_h + +#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" +#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" + +#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h" + +#include "FWCore/ServiceRegistry/interface/Service.h" +#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" + + +// to be moved elsewhere +namespace cudaCompat { + + struct CUDATraits { + + template + using unique_ptr = cudautils::device::unique_ptr; + + template + static auto make_host_unique(edm::Service & cs, cuda::stream_t<> &stream) { + return cs->make_host_unique(stream); + } + + + template + static auto make_device_unique(edm::Service & cs, cuda::stream_t<> &stream) { + return cs->make_device_unique(stream); + } + + template + static auto make_device_unique(edm::Service & cs, size_t size, cuda::stream_t<> &stream) { + return cs->make_device_unique(size, stream); + } + + + }; + + + struct HostTraits { + + template + using unique_ptr = std::unique_ptr; + + template + static auto make_host_unique(edm::Service&, cuda::stream_t<> &) { + return std::make_unique(); + } + + + template + static auto make_device_unique(edm::Service&, cuda::stream_t<> &) { + return std::make_unique(); + } + + template + static auto make_device_unique(edm::Service&, size_t size, cuda::stream_t<> &) { + return std::make_unique(size); + } + + + }; +} + + +template +class TrackingRecHit2DHeterogeneous { +public: + + template + using unique_ptr = typename Traits:: template unique_ptr; + + using Hist = TrackingRecHit2DSOAView::Hist; + + TrackingRecHit2DHeterogeneous() = default; + + explicit TrackingRecHit2DHeterogeneous(uint32_t nHits, + pixelCPEforGPU::ParamsOnGPU const* cpeParams, + uint32_t const* hitsModuleStart, + cuda::stream_t<>& stream); + + + ~TrackingRecHit2DHeterogeneous() = default; + + TrackingRecHit2DHeterogeneous(const TrackingRecHit2DHeterogeneous&) = delete; + TrackingRecHit2DHeterogeneous& operator=(const TrackingRecHit2DHeterogeneous&) = delete; + TrackingRecHit2DHeterogeneous(TrackingRecHit2DHeterogeneous&&) = default; + TrackingRecHit2DHeterogeneous& operator=(TrackingRecHit2DHeterogeneous&&) = default; + + TrackingRecHit2DSOAView* view() { return m_view.get(); } + TrackingRecHit2DSOAView const* view() const { return m_view.get(); } + + auto nHits() const { return m_nHits; } + + auto hitsModuleStart() const { return m_hitsModuleStart; } + auto hitsLayerStart() { return m_hitsLayerStart; } + auto phiBinner() { return m_hist; } + auto iphi() { return m_iphi; } + + // only the local coord and detector index + cudautils::host::unique_ptr localCoordToHostAsync(cuda::stream_t<>& stream) const; + cudautils::host::unique_ptr detIndexToHostAsync(cuda::stream_t<>& stream) const; + cudautils::host::unique_ptr hitsModuleStartToHostAsync(cuda::stream_t<>& stream) const; + +private: + static constexpr uint32_t n16 = 4; + static constexpr uint32_t n32 = 9; + static_assert(sizeof(uint32_t) == sizeof(float)); // just stating the obvious + + unique_ptr m_store16; + unique_ptr m_store32; + + unique_ptr m_HistStore; + unique_ptr m_AverageGeometryStore; + + unique_ptr m_view; + + uint32_t m_nHits; + + uint32_t const* m_hitsModuleStart; // needed for legacy, this is on GPU! + + // needed as kernel params... + Hist* m_hist; + uint32_t* m_hitsLayerStart; + int16_t* m_iphi; +}; + +#include "FWCore/ServiceRegistry/interface/Service.h" +#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" +#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" + +template +TrackingRecHit2DHeterogeneous::TrackingRecHit2DHeterogeneous(uint32_t nHits, + pixelCPEforGPU::ParamsOnGPU const *cpeParams, + uint32_t const *hitsModuleStart, + cuda::stream_t<> &stream) + : m_nHits(nHits), m_hitsModuleStart(hitsModuleStart) { + edm::Service cs; + + + auto view = Traits:: template make_host_unique(cs,stream); + + view->m_nHits = nHits; + m_view = Traits:: template make_device_unique(cs,stream); + m_AverageGeometryStore = Traits:: template make_device_unique(cs,stream); + view->m_averageGeometry = m_AverageGeometryStore.get(); + view->m_cpeParams = cpeParams; + view->m_hitsModuleStart = hitsModuleStart; + + // if empy do not bother + if (0 == nHits) { + if +#ifndef __CUDACC__ + constexpr +#endif + (std::is_same::value) { + cudautils::copyAsync(m_view, view, stream); + } else { m_view.reset(view.release());} + return; + } + + // the single arrays are not 128 bit alligned... + // the hits are actually accessed in order only in building + // if ordering is relevant they may have to be stored phi-ordered by layer or so + // this will break 1to1 correspondence with cluster and module locality + // so unless proven VERY inefficient we keep it ordered as generated + m_store16 = Traits:: template make_device_unique(cs, nHits * n16, stream); + m_store32 = Traits:: template make_device_unique(cs, nHits * n32 + 11, stream); + m_HistStore = Traits:: template make_device_unique(cs, stream); + + auto get16 = [&](int i) { return m_store16.get() + i * nHits; }; + auto get32 = [&](int i) { return m_store32.get() + i * nHits; }; + + // copy all the pointers + m_hist = view->m_hist = m_HistStore.get(); + + view->m_xl = get32(0); + view->m_yl = get32(1); + view->m_xerr = get32(2); + view->m_yerr = get32(3); + + view->m_xg = get32(4); + view->m_yg = get32(5); + view->m_zg = get32(6); + view->m_rg = get32(7); + + m_iphi = view->m_iphi = reinterpret_cast(get16(0)); + + view->m_charge = reinterpret_cast(get32(8)); + view->m_xsize = reinterpret_cast(get16(2)); + view->m_ysize = reinterpret_cast(get16(3)); + view->m_detInd = get16(1); + + m_hitsLayerStart = view->m_hitsLayerStart = reinterpret_cast(get32(n32)); + + // transfer view + if +#ifndef __CUDACC__ + constexpr +#endif + (std::is_same::value) { + cudautils::copyAsync(m_view, view, stream); + } else { m_view.reset(view.release());} +} + +using TrackingRecHit2DCUDA = TrackingRecHit2DHeterogeneous; +using TrackingRecHit2DHost = TrackingRecHit2DHeterogeneous; + + +#endif // CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DHeterogeneous_h diff --git a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h new file mode 100644 index 0000000000000..de647e022b20d --- /dev/null +++ b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h @@ -0,0 +1,102 @@ +#ifndef CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DSOAView_h +#define CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DSOAView_h + +#include +#include + +#include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" +#include "HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h" +#include "Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h" + + +namespace pixelCPEforGPU { + struct ParamsOnGPU; +} + +class TrackingRecHit2DSOAView { +public: + static constexpr uint32_t maxHits() { return gpuClustering::MaxNumClusters; } + using hindex_type = uint16_t; // if above is <=2^16 + + using Hist = HistoContainer; + + using AverageGeometry = phase1PixelTopology::AverageGeometry; + + template friend class TrackingRecHit2DHeterogeneous; + + __device__ __forceinline__ uint32_t nHits() const { return m_nHits; } + + __device__ __forceinline__ float& xLocal(int i) { return m_xl[i]; } + __device__ __forceinline__ float xLocal(int i) const { return __ldg(m_xl + i); } + __device__ __forceinline__ float& yLocal(int i) { return m_yl[i]; } + __device__ __forceinline__ float yLocal(int i) const { return __ldg(m_yl + i); } + + __device__ __forceinline__ float& xerrLocal(int i) { return m_xerr[i]; } + __device__ __forceinline__ float xerrLocal(int i) const { return __ldg(m_xerr + i); } + __device__ __forceinline__ float& yerrLocal(int i) { return m_yerr[i]; } + __device__ __forceinline__ float yerrLocal(int i) const { return __ldg(m_yerr + i); } + + __device__ __forceinline__ float& xGlobal(int i) { return m_xg[i]; } + __device__ __forceinline__ float xGlobal(int i) const { return __ldg(m_xg + i); } + __device__ __forceinline__ float& yGlobal(int i) { return m_yg[i]; } + __device__ __forceinline__ float yGlobal(int i) const { return __ldg(m_yg + i); } + __device__ __forceinline__ float& zGlobal(int i) { return m_zg[i]; } + __device__ __forceinline__ float zGlobal(int i) const { return __ldg(m_zg + i); } + __device__ __forceinline__ float& rGlobal(int i) { return m_rg[i]; } + __device__ __forceinline__ float rGlobal(int i) const { return __ldg(m_rg + i); } + + __device__ __forceinline__ int16_t& iphi(int i) { return m_iphi[i]; } + __device__ __forceinline__ int16_t iphi(int i) const { return __ldg(m_iphi + i); } + + __device__ __forceinline__ int32_t& charge(int i) { return m_charge[i]; } + __device__ __forceinline__ int32_t charge(int i) const { return __ldg(m_charge + i); } + __device__ __forceinline__ int16_t& clusterSizeX(int i) { return m_xsize[i]; } + __device__ __forceinline__ int16_t clusterSizeX(int i) const { return __ldg(m_xsize + i); } + __device__ __forceinline__ int16_t& clusterSizeY(int i) { return m_ysize[i]; } + __device__ __forceinline__ int16_t clusterSizeY(int i) const { return __ldg(m_ysize + i); } + __device__ __forceinline__ uint16_t& detectorIndex(int i) { return m_detInd[i]; } + __device__ __forceinline__ uint16_t detectorIndex(int i) const { return __ldg(m_detInd + i); } + + __device__ __forceinline__ pixelCPEforGPU::ParamsOnGPU const& cpeParams() const { return *m_cpeParams; } + + __device__ __forceinline__ uint32_t hitsModuleStart(int i) const { return __ldg(m_hitsModuleStart + i); } + + __device__ __forceinline__ uint32_t* hitsLayerStart() { return m_hitsLayerStart; } + __device__ __forceinline__ uint32_t const* hitsLayerStart() const { return m_hitsLayerStart; } + + __device__ __forceinline__ Hist& phiBinner() { return *m_hist; } + __device__ __forceinline__ Hist const& phiBinner() const { return *m_hist; } + + __device__ __forceinline__ AverageGeometry & averageGeometry() { return *m_averageGeometry; } + __device__ __forceinline__ AverageGeometry const& averageGeometry() const { return *m_averageGeometry; } + + +private: + // local coord + float *m_xl, *m_yl; + float *m_xerr, *m_yerr; + + // global coord + float *m_xg, *m_yg, *m_zg, *m_rg; + int16_t* m_iphi; + + // cluster properties + int32_t* m_charge; + int16_t* m_xsize; + int16_t* m_ysize; + uint16_t* m_detInd; + + // supporting objects + AverageGeometry * m_averageGeometry; // owned (corrected for beam spot: not sure where to host it otherwise) + pixelCPEforGPU::ParamsOnGPU const* m_cpeParams; // forwarded from setup, NOT owned + uint32_t const* m_hitsModuleStart; // forwarded from clusters + + uint32_t* m_hitsLayerStart; + + Hist* m_hist; + + uint32_t m_nHits; +}; + +#endif diff --git a/CUDADataFormats/TrackingRecHit/src/classes.h b/CUDADataFormats/TrackingRecHit/src/classes.h index 6b471ff63c285..90cfd0945d76e 100644 --- a/CUDADataFormats/TrackingRecHit/src/classes.h +++ b/CUDADataFormats/TrackingRecHit/src/classes.h @@ -2,6 +2,7 @@ #define CUDADataFormats_SiPixelCluster_src_classes_h #include "CUDADataFormats/Common/interface/CUDAProduct.h" +#include "CUDADataFormats/Common/interface/HostProduct.h" #include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DCUDA.h" #include "DataFormats/Common/interface/Wrapper.h" diff --git a/CUDADataFormats/TrackingRecHit/src/classes_def.xml b/CUDADataFormats/TrackingRecHit/src/classes_def.xml index 5b458756c21d0..cc64ff661360c 100644 --- a/CUDADataFormats/TrackingRecHit/src/classes_def.xml +++ b/CUDADataFormats/TrackingRecHit/src/classes_def.xml @@ -1,4 +1,11 @@ + + + + + + + diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelClusterProducer.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelClusterProducer.cc index 45ca9be5fd6c3..83fcfbf8f9027 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelClusterProducer.cc +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelClusterProducer.cc @@ -32,6 +32,7 @@ // Framework #include "DataFormats/Common/interface/Handle.h" #include "FWCore/Framework/interface/ESHandle.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" // STL #include @@ -48,26 +49,22 @@ //--------------------------------------------------------------------------- SiPixelClusterProducer::SiPixelClusterProducer(edm::ParameterSet const& conf) : - theSiPixelGainCalibration_(nullptr), - clusterMode_( conf.getUntrackedParameter("ClusterMode","PixelThresholdClusterizer") ), - clusterizer_(nullptr), // the default, in case we fail to make one - readyToCluster_(false), // since we obviously aren't - maxTotalClusters_( conf.getParameter( "maxNumberOfClusters" ) ), - payloadType_( conf.getParameter( "payloadType" ) ) + tPutPixelClusters(produces()), + clusterMode_( conf.getParameter("ClusterMode") ), + maxTotalClusters_( conf.getParameter( "maxNumberOfClusters" ) ) { if ( clusterMode_ == "PixelThresholdReclusterizer" ) tPixelClusters = consumes( conf.getParameter("src") ); else tPixelDigi = consumes>( conf.getParameter("src") ); - //--- Declare to the EDM what kind of collections we will be making. - produces(); - if (strcmp(payloadType_.c_str(), "HLT") == 0) - theSiPixelGainCalibration_ = new SiPixelGainCalibrationForHLTService(conf); - else if (strcmp(payloadType_.c_str(), "Offline") == 0) - theSiPixelGainCalibration_ = new SiPixelGainCalibrationOfflineService(conf); - else if (strcmp(payloadType_.c_str(), "Full") == 0) - theSiPixelGainCalibration_ = new SiPixelGainCalibrationService(conf); + const auto& payloadType = conf.getParameter( "payloadType" ); + if (payloadType == "HLT") + theSiPixelGainCalibration_ = std::make_unique(conf); + else if (payloadType == "Offline") + theSiPixelGainCalibration_ = std::make_unique(conf); + else if (payloadType == "Full") + theSiPixelGainCalibration_ = std::make_unique(conf); //--- Make the algorithm(s) according to what the user specified //--- in the ParameterSet. @@ -76,10 +73,21 @@ } // Destructor - SiPixelClusterProducer::~SiPixelClusterProducer() { - delete clusterizer_; - delete theSiPixelGainCalibration_; - } +SiPixelClusterProducer::~SiPixelClusterProducer() = default; + +void SiPixelClusterProducer::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + + desc.add("src", edm::InputTag("siPixelDigis")); + desc.add("ClusterMode", "PixelThresholdClusterizer"); + desc.add("maxNumberOfClusters", -1)->setComment("-1 means no limit"); + desc.add("payloadType", "Offline")->setComment("Options: HLT - column granularity, Offline - gain:col/ped:pix"); + + PixelThresholdClusterizer::fillPSetDescription(desc); + SiPixelGainCalibrationServiceBase::fillPSetDescription(desc); // no-op, but in principle the structures are there... + + descriptions.add("SiPixelClusterizerDefault", desc); +} //--------------------------------------------------------------------------- @@ -120,7 +128,15 @@ // Step D: write output to file output->shrink_to_fit(); - e.put(std::move(output)); + + // set sequential identifier (this is a const interface, but we need to set it after the sorting) + for (auto DSViter = output->begin(); DSViter != output->end(); DSViter++) { + uint16_t id=0; + for (auto & clust : *DSViter) { + const_cast(clust).setOriginalId(id++); + } + } + e.put(tPutPixelClusters, std::move(output)); } @@ -132,16 +148,14 @@ void SiPixelClusterProducer::setupClusterizer(const edm::ParameterSet& conf) { if ( clusterMode_ == "PixelThresholdReclusterizer" || clusterMode_ == "PixelThresholdClusterizer" ) { - clusterizer_ = new PixelThresholdClusterizer(conf); - clusterizer_->setSiPixelGainCalibrationService(theSiPixelGainCalibration_); - readyToCluster_ = true; + clusterizer_ = std::make_unique(conf); + clusterizer_->setSiPixelGainCalibrationService(theSiPixelGainCalibration_.get()); } else { - edm::LogError("SiPixelClusterProducer") << "[SiPixelClusterProducer]:" + throw cms::Exception("Configuration") << "[SiPixelClusterProducer]:" <<" choice " << clusterMode_ << " is invalid.\n" << "Possible choices:\n" << " PixelThresholdClusterizer"; - readyToCluster_ = false; } } @@ -153,13 +167,6 @@ void SiPixelClusterProducer::run(const T & input, const edm::ESHandle & geom, edmNew::DetSetVector & output) { - if ( ! readyToCluster_ ) { - edm::LogError("SiPixelClusterProducer") - <<" at least one clusterizer is not ready -- can't run!" ; - // TO DO: throw an exception here? The user may want to know... - return; // clusterizer is invalid, bail out - } - int numberOfDetUnits = 0; int numberOfClusters = 0; @@ -206,7 +213,7 @@ //LogDebug ("SiPixelClusterProducer") << " Executing " // << clusterMode_ << " resulted in " << numberOfClusters - // << " SiPixelClusters in " << numberOfDetUnits << " DetUnits."; + // << " SiPixelClusters in " << numberOfDetUnits << " DetUnits."; } diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc index c0c78b29ec4a5..651c3bb609d18 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc @@ -109,8 +109,10 @@ void SiPixelDigisClustersFromSoA::produce(edm::StreamID, edm::Event& iEvent, con auto clusterThreshold = (layer == 1) ? 2000 : 4000; for (int32_t ic = 0; ic < nclus + 1; ++ic) { auto const& acluster = aclusters[ic]; + // in any case we cannot go out of sync with gpu... if (acluster.charge < clusterThreshold) - continue; + edm::LogWarning("SiPixelDigisClustersFromSoA") << "cluster below charge Threshold " + << "Layer/DetId/clusId " << layer<<'/'< &cudaStream) const; + pixelCPEforGPU::ParamsOnGPU getCPUProduct() const { + return pixelCPEforGPU::ParamsOnGPU { + &m_commonParamsGPU, + m_detParamsGPU.data(), + &m_layerGeometry, + &m_averageGeometry, + }; + } + private: ClusterParam *createClusterParam(const SiPixelCluster &cl) const override; @@ -78,7 +87,9 @@ class PixelCPEFast final : public PixelCPEBase { //--- DB Error Parametrization object, new light templates std::vector thePixelGenError_; - std::vector> m_detParamsGPU; + // allocate it with posix malloc to be ocmpatible with cpu wf + std::vector m_detParamsGPU; + // std::vector> m_detParamsGPU; pixelCPEforGPU::CommonParams m_commonParamsGPU; pixelCPEforGPU::LayerGeometry m_layerGeometry; pixelCPEforGPU::AverageGeometry m_averageGeometry; diff --git a/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h b/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h index 1f37dcc261643..e24b9d6ea493d 100644 --- a/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h +++ b/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h @@ -53,10 +53,10 @@ namespace pixelCPEforGPU { }; struct ParamsOnGPU { - CommonParams* m_commonParams; - DetParams* m_detParams; - LayerGeometry* m_layerGeometry; - AverageGeometry * m_averageGeometry; + CommonParams const * m_commonParams; + DetParams const * m_detParams; + LayerGeometry const * m_layerGeometry; + AverageGeometry const * m_averageGeometry; constexpr CommonParams const& __restrict__ commonParams() const { CommonParams const* __restrict__ l = m_commonParams; diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitConverter.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitConverter.cc index 42efbd12c2e2d..6cb33724b807a 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitConverter.cc +++ b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitConverter.cc @@ -9,68 +9,18 @@ * ------------------------------------------------------ */ -//--------------------------------------------------------------------------- -//! \class SiPixelRecHitConverter -//! -//! \brief EDProducer to covert SiPixelClusters into SiPixelRecHits -//! -//! SiPixelRecHitConverter is an EDProducer subclass (i.e., a module) -//! which orchestrates the conversion of SiPixelClusters into SiPixelRecHits. -//! Consequently, the input is a edm::DetSetVector and the output is -//! SiPixelRecHitCollection. -//! -//! SiPixelRecHitConverter invokes one of descendents from -//! ClusterParameterEstimator (templated on SiPixelCluster), e.g. -//! CPEFromDetPosition (which is the only available option -//! right now). SiPixelRecHitConverter loads the SiPixelClusterCollection, -//! and then iterates over DetIds, invoking the chosen CPE's methods -//! localPosition() and localError() to perform the correction (some of which -//! may be rather involved). A RecHit is made on the spot, and appended -//! to the output collection. -//! -//! The calibrations are not loaded at the moment, -//! although that is being planned for the near future. -//! -//! \author Porting from ORCA by Petar Maksimovic (JHU). Implementation of the -//! DetSetVector by V.Chiochia (Zurich University). -//! -//! \version v2, May 30, 2006 -//! change to use Lorentz angle from DB Lotte Wilke, Jan. 31st, 2008 -//! -//--------------------------------------------------------------------------- - -//--- Base class for CPEs: - -#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h" - -//--- Geometry + DataFormats -#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" -#include "DataFormats/SiPixelCluster/interface/SiPixelCluster.h" -#include "DataFormats/TrackerRecHit2D/interface/SiPixelRecHitCollection.h" -#include "DataFormats/Common/interface/DetSetVector.h" - -//--- Framework -#include "FWCore/Framework/interface/stream/EDProducer.h" -#include "FWCore/Framework/interface/Event.h" -#include "FWCore/Framework/interface/EventSetup.h" -#include "FWCore/Framework/interface/MakerMacros.h" - -#include "DataFormats/Common/interface/Handle.h" -#include "FWCore/Framework/interface/ESHandle.h" -#include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "FWCore/Utilities/interface/InputTag.h" -#include "FWCore/Utilities/interface/EDPutToken.h" -#include "FWCore/Utilities/interface/ESGetToken.h" - +// Our own stuff +#include "RecoLocalTracker/SiPixelRecHits/interface/SiPixelRecHitConverter.h" // Geometry #include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" -#include "Geometry/CommonDetUnit/interface/PixelGeomDetUnit.h" +#include "Geometry/TrackerGeometryBuilder/interface/PixelGeomDetUnit.h" // Data Formats #include "DataFormats/DetId/interface/DetId.h" #include "DataFormats/Common/interface/Ref.h" #include "DataFormats/Common/interface/DetSet2RangeMap.h" + // STL #include #include @@ -82,87 +32,66 @@ #include "RecoLocalTracker/Records/interface/TkPixelCPERecord.h" -using namespace std; +// Make heterogeneous framewokr happy.... +#include "CUDADataFormats/Common/interface/ArrayShadow.h" +using HitModuleStart = std::array; +using HMSstorage = ArrayShadow; -namespace cms { - - class SiPixelRecHitConverter : public edm::stream::EDProducer<> { - public: - //--- Constructor, virtual destructor (just in case) - explicit SiPixelRecHitConverter(const edm::ParameterSet& conf); - ~SiPixelRecHitConverter() override; - - //--- Factory method to make CPE's depending on the ParameterSet - //--- Not sure if we need to make more than one CPE to run concurrently - //--- on different parts of the detector (e.g., one for the barrel and the - //--- one for the forward). The way the CPE's are written now, it's - //--- likely we can use one (and they will switch internally), or - //--- make two of the same but configure them differently. We need a more - //--- realistic use case... - - //--- The top-level event method. - void produce(edm::Event& e, const edm::EventSetup& c) override; - - //--- Execute the position estimator algorithm(s). - //--- New interface with DetSetVector - void run(const edmNew::DetSetVector& input, - SiPixelRecHitCollectionNew& output, - TrackerGeometry const& geom); - - void run(edm::Handle> inputhandle, - SiPixelRecHitCollectionNew& output, - TrackerGeometry const& geom); - - private: - // TO DO: maybe allow a map of pointers? - /// const PixelClusterParameterEstimator * cpe_; // what we got (for now, one ptr to base class) - PixelCPEBase const* cpe_ = nullptr; // What we got (for now, one ptr to base class) - edm::InputTag const src_; - edm::EDGetTokenT> const tPixelCluster_; - edm::EDPutTokenT const tPut_; - edm::ESGetToken const tTrackerGeom_; - edm::ESGetToken const tCPE_; - bool m_newCont; // save also in emdNew::DetSetVector - }; +using namespace std; + +namespace cms +{ //--------------------------------------------------------------------------- //! Constructor: set the ParameterSet and defer all thinking to setupCPE(). //--------------------------------------------------------------------------- - SiPixelRecHitConverter::SiPixelRecHitConverter(edm::ParameterSet const& conf) - : src_(conf.getParameter("src")), - tPixelCluster_(consumes>(src_)), - tPut_(produces()), - tTrackerGeom_(esConsumes()), - tCPE_(esConsumes( - edm::ESInputTag("", conf.getParameter("CPE")))) {} + SiPixelRecHitConverter::SiPixelRecHitConverter(edm::ParameterSet const& conf) + : + conf_(conf), + src_( conf.getParameter( "src" ) ), + tPixelCluster(consumes< edmNew::DetSetVector >( src_)) { + //--- Declare to the EDM what kind of collections we will be making. + produces(); + produces(); + } + // Destructor - SiPixelRecHitConverter::~SiPixelRecHitConverter() {} - + SiPixelRecHitConverter::~SiPixelRecHitConverter() + { + } + //--------------------------------------------------------------------------- //! The "Event" entrypoint: gets called by framework for every event //--------------------------------------------------------------------------- - void SiPixelRecHitConverter::produce(edm::Event& e, const edm::EventSetup& es) { - // Step A.1: get input data - edm::Handle> input; - e.getByToken(tPixelCluster_, input); + void SiPixelRecHitConverter::produce(edm::Event& e, const edm::EventSetup& es) + { + // Step A.1: get input data + edm::Handle< edmNew::DetSetVector > input; + e.getByToken( tPixelCluster, input); + // Step A.2: get event setup - auto const& geom = es.getData(tTrackerGeom_); + edm::ESHandle geom; + es.get().get( geom ); // Step B: create empty output collection - SiPixelRecHitCollectionNew output; - + auto output = std::make_unique(); + // Step B*: create CPE - cpe_ = dynamic_cast(&es.getData(tCPE_)); - + edm::ESHandle hCPE; + std::string cpeName_ = conf_.getParameter("CPE"); + es.get().get(cpeName_,hCPE); + cpe_ = dynamic_cast< const PixelCPEBase* >(&(*hCPE)); + // Step C: Iterate over DetIds and invoke the strip CPE algorithm // on each DetUnit - run(input, output, geom); + run( input, *output, geom ); + + output->shrink_to_fit(); + e.put(std::move(output)); - output.shrink_to_fit(); - e.emplace(tPut_, std::move(output)); } //--------------------------------------------------------------------------- @@ -170,71 +99,69 @@ namespace cms { //! and make a RecHit to store the result. //! New interface reading DetSetVector by V.Chiochia (May 30th, 2006) //--------------------------------------------------------------------------- - void SiPixelRecHitConverter::run(edm::Handle> inputhandle, - SiPixelRecHitCollectionNew& output, - TrackerGeometry const& geom) { - if (!cpe_) { - edm::LogError("SiPixelRecHitConverter") << " at least one CPE is not ready -- can't run!"; - // TO DO: throw an exception here? The user may want to know... - assert(0); - return; // clusterizer is invalid, bail out - } - + void SiPixelRecHitConverter::run(edm::Handle > inputhandle, + SiPixelRecHitCollectionNew &output, + edm::ESHandle & geom) { + if ( ! cpe_ ) + { + edm::LogError("SiPixelRecHitConverter") << " at least one CPE is not ready -- can't run!"; + // TO DO: throw an exception here? The user may want to know... + assert(0); + return; // clusterizer is invalid, bail out + } + int numberOfDetUnits = 0; int numberOfClusters = 0; - + const edmNew::DetSetVector& input = *inputhandle; - - edmNew::DetSetVector::const_iterator DSViter = input.begin(); - - for (; DSViter != input.end(); DSViter++) { + + edmNew::DetSetVector::const_iterator DSViter=input.begin(); + + for ( ; DSViter != input.end() ; DSViter++) { numberOfDetUnits++; unsigned int detid = DSViter->detId(); - DetId detIdObject(detid); - const GeomDetUnit* genericDet = geom.idToDetUnit(detIdObject); - const PixelGeomDetUnit* pixDet = dynamic_cast(genericDet); - assert(pixDet); - SiPixelRecHitCollectionNew::FastFiller recHitsOnDetUnit(output, detid); - + DetId detIdObject( detid ); + const GeomDetUnit * genericDet = geom->idToDetUnit( detIdObject ); + const PixelGeomDetUnit * pixDet = dynamic_cast(genericDet); + assert(pixDet); + SiPixelRecHitCollectionNew::FastFiller recHitsOnDetUnit(output,detid); + edmNew::DetSet::const_iterator clustIt = DSViter->begin(), clustEnd = DSViter->end(); - - for (; clustIt != clustEnd; clustIt++) { - numberOfClusters++; - std::tuple tuple = - cpe_->getParameters(*clustIt, *genericDet); - LocalPoint lp(std::get<0>(tuple)); - LocalError le(std::get<1>(tuple)); - SiPixelRecHitQuality::QualWordType rqw(std::get<2>(tuple)); - // Create a persistent edm::Ref to the cluster - edm::Ref, SiPixelCluster> cluster = - edmNew::makeRefTo(inputhandle, clustIt); - // Make a RecHit and add it to the DetSet - // old : recHitsOnDetUnit.push_back( new SiPixelRecHit( lp, le, detIdObject, &*clustIt) ); - SiPixelRecHit hit(lp, le, rqw, *genericDet, cluster); - // - // Now save it ================= - recHitsOnDetUnit.push_back(hit); - // ============================= - - // std::cout << "SiPixelRecHitConverterVI " << numberOfClusters << ' '<< lp << " " << le << std::endl; - } // <-- End loop on Clusters + + for ( ; clustIt != clustEnd; clustIt++) { + numberOfClusters++; + std::tuple tuple = cpe_->getParameters( *clustIt, *genericDet ); + LocalPoint lp( std::get<0>(tuple) ); + LocalError le( std::get<1>(tuple) ); + SiPixelRecHitQuality::QualWordType rqw( std::get<2>(tuple) ); + // Create a persistent edm::Ref to the cluster + edm::Ref< edmNew::DetSetVector, SiPixelCluster > cluster = edmNew::makeRefTo( inputhandle, clustIt); + // Make a RecHit and add it to the DetSet + // old : recHitsOnDetUnit.push_back( new SiPixelRecHit( lp, le, detIdObject, &*clustIt) ); + SiPixelRecHit hit( lp, le, rqw, *genericDet, cluster); + // + // Now save it ================= + recHitsOnDetUnit.push_back(hit); + // ============================= + + // std::cout << "SiPixelRecHitConverterVI " << numberOfClusters << ' '<< lp << " " << le << std::endl; + } // <-- End loop on Clusters + // LogDebug("SiPixelRecHitConverter") //std::cout << "SiPixelRecHitConverterVI " - // << " Found " << recHitsOnDetUnit.size() << " RecHits on " << detid //; - // << std::endl; - - } // <-- End loop on DetUnits - - // LogDebug ("SiPixelRecHitConverter") + // << " Found " << recHitsOnDetUnit.size() << " RecHits on " << detid //; + // << std::endl; + + + } // <-- End loop on DetUnits + + // LogDebug ("SiPixelRecHitConverter") // std::cout << "SiPixelRecHitConverterVI " - // << cpeName_ << " converted " << numberOfClusters - // << " SiPixelClusters into SiPixelRecHits, in " - // << numberOfDetUnits << " DetUnits." //; + // << cpeName_ << " converted " << numberOfClusters + // << " SiPixelClusters into SiPixelRecHits, in " + // << numberOfDetUnits << " DetUnits." //; // << std::endl; + } } // end of namespace cms - -using cms::SiPixelRecHitConverter; - -DEFINE_FWK_MODULE(SiPixelRecHitConverter); diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc new file mode 100644 index 0000000000000..458294626a1cf --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc @@ -0,0 +1,226 @@ +#include + +// hack waiting for if constexpr +#include "CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h" +#include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h" +#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" +#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h" +#include "CUDADataFormats/Common/interface/HostProduct.h" +#include "DataFormats/BeamSpot/interface/BeamSpot.h" +#include "DataFormats/Common/interface/DetSetVectorNew.h" +#include "DataFormats/Common/interface/Handle.h" +#include "DataFormats/SiPixelCluster/interface/SiPixelCluster.h" +#include "DataFormats/TrackerRecHit2D/interface/SiPixelRecHitCollection.h" +#include "FWCore/Framework/interface/ESHandle.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/Framework/interface/global/EDProducer.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/Utilities/interface/InputTag.h" +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" +#include "HeterogeneousCore/CUDACore/interface/CUDAScopedContext.h" +#include "RecoLocalTracker/Records/interface/TkPixelCPERecord.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h" + +#include "CUDADataFormats/Common/interface/ArrayShadow.h" + + +#include "RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h" + +class SiPixelRecHitSoAFromLegacy : public edm::global::EDProducer<> { +public: + explicit SiPixelRecHitSoAFromLegacy(const edm::ParameterSet& iConfig); + ~SiPixelRecHitSoAFromLegacy() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + + using HitModuleStart = std::array; + using HMSstorage = HostProduct; + + +private: + void produce(edm::StreamID streamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const override; + + // The mess with inputs will be cleaned up when migrating to the new framework + edm::EDGetTokenT bsGetToken_; + edm::EDGetTokenT clusterToken_; // Legacy Clusters + edm::EDPutTokenT tokenHit_; + edm::EDPutTokenT tokenModuleStart_; + + std::string cpeName_; + +}; + +SiPixelRecHitSoAFromLegacy::SiPixelRecHitSoAFromLegacy(const edm::ParameterSet& iConfig) + : bsGetToken_{consumes(iConfig.getParameter("beamSpot"))}, + clusterToken_{consumes(iConfig.getParameter("src"))}, + tokenHit_{produces()}, + tokenModuleStart_{produces()}, + cpeName_(iConfig.getParameter("CPE")) {} + +void SiPixelRecHitSoAFromLegacy::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + + desc.add("beamSpot", edm::InputTag("offlineBeamSpot")); + desc.add("src", edm::InputTag("siPixelClustersPreSplitting")); + desc.add("CPE", "PixelCPEFast"); + descriptions.add("siPixelRecHitHostSoA", desc); +} + +void SiPixelRecHitSoAFromLegacy::produce(edm::StreamID streamID, edm::Event& iEvent, const edm::EventSetup& es) const { + + + const TrackerGeometry *geom_ = nullptr; + const PixelClusterParameterEstimator* cpe_ = nullptr; + + + edm::ESHandle geom; + es.get().get( geom ); + geom_ = geom.product(); + + + edm::ESHandle hCPE; + es.get().get(cpeName_, hCPE); + cpe_ = dynamic_cast(hCPE.product()); + + PixelCPEFast const* fcpe = dynamic_cast(cpe_); + if (!fcpe) { + throw cms::Exception("Configuration") << "too bad, not a fast cpe gpu processing not possible...."; + } + auto cpeView = fcpe->getCPUProduct(); + + const reco::BeamSpot& bs = iEvent.get(bsGetToken_); + + + BeamSpotCUDA::Data bsHost; + bsHost.x = bs.x0(); + bsHost.y = bs.y0(); + bsHost.z = bs.z0(); + + auto const& input = iEvent.get(clusterToken_); + + // yes a unique ptr of a unique ptr so edm is happy and the pointer stay still... + auto hmsp = std::make_unique(gpuClustering::MaxNumModules + 1); + auto hitsModuleStart = hmsp.get(); + auto hms = std::make_unique(std::move(hmsp)); // hmsp is gone + iEvent.put(tokenModuleStart_,std::move(hms)); // hms is gone! hitsModuleStart still alive and kicking... + + + // storage + std::vector xx_; + std::vector yy_; + std::vector adc_; + std::vector moduleInd_; + std::vector clus_; + + HitModuleStart moduleStart_; // index of the first pixel of each module + HitModuleStart clusInModule_; + memset(&clusInModule_,0,sizeof(HitModuleStart)); // needed?? + assert(2001==clusInModule_.size()); + assert(0==clusInModule_[2000]); + uint32_t moduleId_; + moduleStart_[1]=0; // we run sequentially.... + + SiPixelClustersCUDA::DeviceConstView clusterView{moduleStart_.data(),clusInModule_.data(), &moduleId_, hitsModuleStart}; + + // fill cluster arrays + int numberOfClusters = 0; + for (auto DSViter = input.begin(); DSViter != input.end(); DSViter++) { + unsigned int detid = DSViter->detId(); + DetId detIdObject(detid); + const GeomDetUnit* genericDet = geom_->idToDetUnit(detIdObject); + auto gind = genericDet->index(); + assert(gind<2000); + auto const nclus = DSViter->size(); + clusInModule_[gind]=nclus; + numberOfClusters+=nclus; + } + hitsModuleStart[0]=0; + for (int i=1, n=clusInModule_.size(); i(numberOfClusters, + &cpeView, + hitsModuleStart, + dummyStream + ); + + + int numberOfDetUnits = 0; + int numberOfHits = 0; + for (auto DSViter = input.begin(); DSViter != input.end(); DSViter++) { + numberOfDetUnits++; + unsigned int detid = DSViter->detId(); + DetId detIdObject(detid); + const GeomDetUnit* genericDet = geom_->idToDetUnit(detIdObject); + auto const gind = genericDet->index(); + assert(gind<2000); + const PixelGeomDetUnit* pixDet = dynamic_cast(genericDet); + assert(pixDet); + auto const nclus = DSViter->size(); + assert(clusInModule_[gind]==nclus); + if (0==nclus) continue; // is this really possible? + + auto const fc = hitsModuleStart[gind]; + auto const lc = hitsModuleStart[gind + 1]; + assert(lc>fc); + // std::cout << "in det " << gind << ": conv " << nclus << " hits from " << DSViter->size() << " legacy clusters" + // <<' '<< fc <<','<0); + for (int i=0, nd=clust.size(); iview()); + for (auto h=fc; hview()->detectorIndex(h)); + + } + assert(numberOfHits==numberOfClusters); + + // fill data structure to support CA + for (auto i=0; i < 11; ++i) { + output->hitsLayerStart()[i] = hitsModuleStart[cpeView.layerGeometry().layerStart[i]]; + } + cudautils::fillManyFromVector( + output->phiBinner(), nullptr, 10, output->iphi(), output->hitsLayerStart(), numberOfHits, 256, 0); + + // std::cout << "created HitSoa for " << numberOfClusters << " clusters in " << numberOfDetUnits << " Dets" << std::endl; + iEvent.put(std::move(output)); + +} + +DEFINE_FWK_MODULE(SiPixelRecHitSoAFromLegacy); diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h b/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h index 05a1b86ce5ab7..44b253077992f 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h +++ b/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h @@ -18,7 +18,8 @@ namespace gpuPixelRecHits { SiPixelDigisCUDA::DeviceConstView const * __restrict__ pdigis, int numElements, SiPixelClustersCUDA::DeviceConstView const * __restrict__ pclusters, - TrackingRecHit2DSOAView* phits) { + TrackingRecHit2DSOAView* phits + ){ // FIXME // the compiler seems NOT to optimize loads from views (even in a simple test case) @@ -83,7 +84,8 @@ namespace gpuPixelRecHits { printf("hitbuilder: %d clusters in module %d. will write at %d\n", nclus, me, hitsModuleStart[me]); #endif - assert(blockDim.x >= MaxHitsInModule); +// true on gpu only... +// assert(blockDim.x >= MaxHitsInModule); if (threadIdx.x == 0 && nclus > MaxHitsInModule) { printf("WARNING: too many clusters %d in Module %d. Only first %d processed\n", nclus, me, MaxHitsInModule); @@ -95,7 +97,7 @@ namespace gpuPixelRecHits { } nclus = std::min(nclus, MaxHitsInModule); - for (int ic = threadIdx.x; ic < nclus; ic += blockDim.x) { + for (int ic = threadIdx.x, nc=nclus; ic < nc; ic += blockDim.x) { clusParams.minRow[ic] = std::numeric_limits::max(); clusParams.maxRow[ic] = 0; clusParams.minCol[ic] = std::numeric_limits::max(); @@ -120,7 +122,7 @@ namespace gpuPixelRecHits { if (id != me) break; // end of module auto cl = digis.clus(i); - if (cl >= nclus) + if (cl >= int(nclus)) continue; auto x = digis.xx(i); auto y = digis.yy(i); @@ -139,7 +141,7 @@ namespace gpuPixelRecHits { if (id != me) break; // end of module auto cl = digis.clus(i); - if (cl >= nclus) + if (cl >= int(nclus)) continue; auto x = digis.xx(i); auto y = digis.yy(i); @@ -161,11 +163,14 @@ namespace gpuPixelRecHits { first = clusters.clusModuleStart(me); - for (int ic = threadIdx.x; ic < nclus; ic += blockDim.x) { + for (int ic = threadIdx.x, nc=nclus; ic < nc; ic += blockDim.x) { auto h = first + ic; // output index in global memory - + + // this cannot happen anymore if (h >= TrackingRecHit2DSOAView::maxHits()) break; // overflow... + assert(hcommonParams(), cpeParams->detParams(me), clusParams, ic); pixelCPEforGPU::errorFromDB(cpeParams->commonParams(), cpeParams->detParams(me), clusParams, ic); diff --git a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc index 3374595f74d1c..6c0782471757e 100644 --- a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc +++ b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc @@ -76,22 +76,22 @@ const pixelCPEforGPU::ParamsOnGPU* PixelCPEFast::getGPUProductAsync(cuda::stream cudaCheck(cudaMemcpyAsync( data.d_paramsOnGPU, &data.h_paramsOnGPU, sizeof(pixelCPEforGPU::ParamsOnGPU), cudaMemcpyDefault, stream.id())); - cudaCheck(cudaMemcpyAsync(data.h_paramsOnGPU.m_commonParams, + cudaCheck(cudaMemcpyAsync((void*)data.h_paramsOnGPU.m_commonParams, &this->m_commonParamsGPU, sizeof(pixelCPEforGPU::CommonParams), cudaMemcpyDefault, stream.id())); - cudaCheck(cudaMemcpyAsync(data.h_paramsOnGPU.m_averageGeometry, + cudaCheck(cudaMemcpyAsync((void*)data.h_paramsOnGPU.m_averageGeometry, &this->m_averageGeometry, sizeof(pixelCPEforGPU::AverageGeometry), cudaMemcpyDefault, stream.id())); - cudaCheck(cudaMemcpyAsync(data.h_paramsOnGPU.m_layerGeometry, + cudaCheck(cudaMemcpyAsync((void*)data.h_paramsOnGPU.m_layerGeometry, &this->m_layerGeometry, sizeof(pixelCPEforGPU::LayerGeometry), cudaMemcpyDefault, stream.id())); - cudaCheck(cudaMemcpyAsync(data.h_paramsOnGPU.m_detParams, + cudaCheck(cudaMemcpyAsync((void*)data.h_paramsOnGPU.m_detParams, this->m_detParamsGPU.data(), this->m_detParamsGPU.size() * sizeof(pixelCPEforGPU::DetParams), cudaMemcpyDefault, @@ -301,9 +301,9 @@ PixelCPEFast::~PixelCPEFast() {} PixelCPEFast::GPUData::~GPUData() { if (d_paramsOnGPU != nullptr) { - cudaFree(h_paramsOnGPU.m_commonParams); - cudaFree(h_paramsOnGPU.m_detParams); - cudaFree(h_paramsOnGPU.m_averageGeometry); + cudaFree((void*)h_paramsOnGPU.m_commonParams); + cudaFree((void*)h_paramsOnGPU.m_detParams); + cudaFree((void*)h_paramsOnGPU.m_averageGeometry); cudaFree(d_paramsOnGPU); } } From 5126c50fa77dd9c2b1133619091f6d0ce6fa54fa Mon Sep 17 00:00:00 2001 From: Vincenzo Innocente Date: Sun, 25 Aug 2019 14:04:02 +0200 Subject: [PATCH 081/149] Implement full Pixel SoA workflow on CPU (cms-patatrack#385) --- .../interface/gpuClusteringConstants.h | 13 +- .../interface/TrackingRecHit2DHeterogeneous.h | 80 +------ .../TrackingRecHit/src/classes_def.xml | 11 +- .../plugins/SiPixelDigisClustersFromSoA.cc | 7 +- .../plugins/SiPixelRawToClusterGPUKernel.cu | 22 ++ .../plugins/gpuClusterChargeCut.h | 30 ++- .../plugins/gpuClustering.h | 14 +- .../SiPixelClusterizer/test/gpuClustering_t.h | 2 +- .../SiPixelRecHits/interface/PixelCPEFast.h | 11 +- .../SiPixelRecHits/interface/pixelCPEforGPU.h | 4 +- .../plugins/SiPixelRecHitConverter.cc | 47 +++- .../plugins/SiPixelRecHitSoAFromLegacy.cc | 69 ++++-- .../SiPixelRecHits/plugins/gpuPixelRecHits.h | 224 +++++++++--------- .../SiPixelRecHits/src/PixelCPEFast.cc | 8 + 14 files changed, 307 insertions(+), 235 deletions(-) diff --git a/CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h b/CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h index b6141dc880312..5199992bbb3b8 100644 --- a/CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h +++ b/CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h @@ -13,12 +13,17 @@ namespace pixelGPUConstants { } // namespace pixelGPUConstants namespace gpuClustering { - constexpr uint32_t maxHitsInModule() { return 256; } +#ifdef GPU_SMALL_EVENTS + constexpr uint32_t maxHitsInIter() { return 64; } +#else + // optimized for real data PU 50 + constexpr uint32_t maxHitsInIter() { return 160; } +#endif + constexpr uint32_t maxHitsInModule() { return 1024; } constexpr uint32_t MaxNumModules = 2000; - constexpr uint32_t MaxNumPixels = 256 * 2000; // this does not mean maxPixelPerModule == 256! - constexpr uint32_t MaxNumClustersPerModules = 1024; - constexpr uint32_t MaxHitsInModule = maxHitsInModule(); + constexpr int32_t MaxNumClustersPerModules = maxHitsInModule(); + constexpr uint32_t MaxHitsInModule = maxHitsInModule(); // as above constexpr uint32_t MaxNumClusters = pixelGPUConstants::maxNumberOfHits; constexpr uint16_t InvId = 9999; // must be > MaxNumModules diff --git a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h index d28cd00c94b16..d3447aa8fd310 100644 --- a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h +++ b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h @@ -1,67 +1,8 @@ #ifndef CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DHeterogeneous_h #define CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DHeterogeneous_h -#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" -#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" - #include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h" - -#include "FWCore/ServiceRegistry/interface/Service.h" -#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" - - -// to be moved elsewhere -namespace cudaCompat { - - struct CUDATraits { - - template - using unique_ptr = cudautils::device::unique_ptr; - - template - static auto make_host_unique(edm::Service & cs, cuda::stream_t<> &stream) { - return cs->make_host_unique(stream); - } - - - template - static auto make_device_unique(edm::Service & cs, cuda::stream_t<> &stream) { - return cs->make_device_unique(stream); - } - - template - static auto make_device_unique(edm::Service & cs, size_t size, cuda::stream_t<> &stream) { - return cs->make_device_unique(size, stream); - } - - - }; - - - struct HostTraits { - - template - using unique_ptr = std::unique_ptr; - - template - static auto make_host_unique(edm::Service&, cuda::stream_t<> &) { - return std::make_unique(); - } - - - template - static auto make_device_unique(edm::Service&, cuda::stream_t<> &) { - return std::make_unique(); - } - - template - static auto make_device_unique(edm::Service&, size_t size, cuda::stream_t<> &) { - return std::make_unique(size); - } - - - }; -} +#include "CUDADataFormats/Common/interface/HeterogeneousSoA.h" template @@ -108,13 +49,13 @@ class TrackingRecHit2DHeterogeneous { static constexpr uint32_t n32 = 9; static_assert(sizeof(uint32_t) == sizeof(float)); // just stating the obvious - unique_ptr m_store16; - unique_ptr m_store32; + unique_ptr m_store16; //! + unique_ptr m_store32; //! - unique_ptr m_HistStore; - unique_ptr m_AverageGeometryStore; + unique_ptr m_HistStore; //! + unique_ptr m_AverageGeometryStore; //! - unique_ptr m_view; + unique_ptr m_view; //! uint32_t m_nHits; @@ -155,7 +96,7 @@ TrackingRecHit2DHeterogeneous::TrackingRecHit2DHeterogeneous(uint32_t nH #ifndef __CUDACC__ constexpr #endif - (std::is_same::value) { + (std::is_same::value) { cudautils::copyAsync(m_view, view, stream); } else { m_view.reset(view.release());} return; @@ -200,13 +141,14 @@ TrackingRecHit2DHeterogeneous::TrackingRecHit2DHeterogeneous(uint32_t nH #ifndef __CUDACC__ constexpr #endif - (std::is_same::value) { + (std::is_same::value) { cudautils::copyAsync(m_view, view, stream); } else { m_view.reset(view.release());} } -using TrackingRecHit2DCUDA = TrackingRecHit2DHeterogeneous; +using TrackingRecHit2DGPU = TrackingRecHit2DHeterogeneous; +using TrackingRecHit2DCUDA = TrackingRecHit2DHeterogeneous; +using TrackingRecHit2DCPU = TrackingRecHit2DHeterogeneous; using TrackingRecHit2DHost = TrackingRecHit2DHeterogeneous; - #endif // CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DHeterogeneous_h diff --git a/CUDADataFormats/TrackingRecHit/src/classes_def.xml b/CUDADataFormats/TrackingRecHit/src/classes_def.xml index cc64ff661360c..4e8325ddce87e 100644 --- a/CUDADataFormats/TrackingRecHit/src/classes_def.xml +++ b/CUDADataFormats/TrackingRecHit/src/classes_def.xml @@ -1,11 +1,10 @@ + - + + - - - - - + + diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc index 651c3bb609d18..cb6c55939335c 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc @@ -78,6 +78,7 @@ void SiPixelDigisClustersFromSoA::fillDescriptions(edm::ConfigurationDescription void SiPixelDigisClustersFromSoA::produce(edm::StreamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const { const auto& digis = iEvent.get(digiGetToken_); + const uint32_t nDigis = digis.size(); edm::ESHandle trackerTopologyHandle; iSetup.get().get(trackerTopologyHandle); @@ -85,15 +86,15 @@ void SiPixelDigisClustersFromSoA::produce(edm::StreamID, edm::Event& iEvent, con auto collection = std::make_unique>(); auto outputClusters = std::make_unique(); + outputClusters->reserve(2000,nDigis/4); - const uint32_t nDigis = digis.size(); edm::DetSet* detDigis = nullptr; for (uint32_t i = 0; i < nDigis; i++) { if (digis.pdigi(i) == 0) continue; detDigis = &collection->find_or_insert(digis.rawIdArr(i)); if ((*detDigis).empty()) - (*detDigis).data.reserve(32); // avoid the first relocations + (*detDigis).data.reserve(64); // avoid the first relocations break; } @@ -145,7 +146,7 @@ void SiPixelDigisClustersFromSoA::produce(edm::StreamID, edm::Event& iEvent, con assert(nclus == -1); detDigis = &collection->find_or_insert(digis.rawIdArr(i)); if ((*detDigis).empty()) - (*detDigis).data.reserve(32); // avoid the first relocations + (*detDigis).data.reserve(64); // avoid the first relocations else { std::cout << "Problem det present twice in input! " << (*detDigis).detId() << std::endl; } diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu index 8a5119d68487c..ab7852ee2faf7 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu @@ -539,6 +539,10 @@ namespace pixelgpudetails { cuda::stream_t<> &stream) { nDigis = wordCounter; +#ifdef GPU_DEBUG + std::cout << "decoding " << wordCounter << " digis. Max is " << pixelgpudetails::MAX_FED_WORDS << std::endl; +#endif + digis_d = SiPixelDigisCUDA(pixelgpudetails::MAX_FED_WORDS, stream); if (includeErrors) { digiErrors_d = SiPixelDigiErrorsCUDA(pixelgpudetails::MAX_FED_WORDS, std::move(errors), stream); @@ -581,6 +585,10 @@ namespace pixelgpudetails { includeErrors, debug); cudaCheck(cudaGetLastError()); +#ifdef GPU_DEBUG + cudaDeviceSynchronize(); + cudaCheck(cudaGetLastError()); +#endif if (includeErrors) { digiErrors_d.copyErrorToHostAsync(stream); @@ -605,6 +613,10 @@ namespace pixelgpudetails { clusters_d.clusInModule(), clusters_d.clusModuleStart()); cudaCheck(cudaGetLastError()); +#ifdef GPU_DEBUG + cudaDeviceSynchronize(); + cudaCheck(cudaGetLastError()); +#endif #ifdef GPU_DEBUG std::cout << "CUDA countModules kernel launch with " << blocks << " blocks of " << threadsPerBlock @@ -633,6 +645,10 @@ namespace pixelgpudetails { digis_d.clus(), wordCounter); cudaCheck(cudaGetLastError()); +#ifdef GPU_DEBUG + cudaDeviceSynchronize(); + cudaCheck(cudaGetLastError()); +#endif // apply charge cut clusterChargeCut<<>>(digis_d.moduleInd(), @@ -659,6 +675,12 @@ namespace pixelgpudetails { cudaMemcpyDefault, stream.id())); +#ifdef GPU_DEBUG + cudaDeviceSynchronize(); + cudaCheck(cudaGetLastError()); +#endif + + } // end clusterizer scope } } // namespace pixelgpudetails diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h index b81752cf2823a..80e903d25f501 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h @@ -31,7 +31,26 @@ namespace gpuClustering { if (nclus == 0) return; - assert(nclus <= MaxNumClustersPerModules); + if (threadIdx.x == 0 && nclus > MaxNumClustersPerModules) + printf("Warning too many clusters in module %d in block %d: %d > %d\n", thisModuleId, blockIdx.x, nclus, MaxNumClustersPerModules); + + auto first = firstPixel + threadIdx.x; + + if (nclus > MaxNumClustersPerModules) { + // remove excess FIXME find a way to cut charge first.... + for (auto i = first; i < numElements; i += blockDim.x) { + if (id[i] == InvId) + continue; // not valid + if (id[i] != thisModuleId) + break; // end of module + if (clusterId[i]>=MaxNumClustersPerModules) { + id[i]=InvId; + clusterId[i]=InvId; + } + } + nclus = MaxNumClustersPerModules; + } + #ifdef GPU_DEBUG if (thisModuleId % 100 == 1) @@ -39,9 +58,12 @@ namespace gpuClustering { printf("start clusterizer for module %d in block %d\n", thisModuleId, blockIdx.x); #endif - auto first = firstPixel + threadIdx.x; - __shared__ int32_t charge[MaxNumClustersPerModules]; + __shared__ uint8_t ok[MaxNumClustersPerModules]; + __shared__ uint16_t newclusId[MaxNumClustersPerModules]; + + + assert(nclus <= MaxNumClustersPerModules); for (auto i = threadIdx.x; i < nclus; i += blockDim.x) { charge[i] = 0; } @@ -57,8 +79,6 @@ namespace gpuClustering { __syncthreads(); auto chargeCut = thisModuleId < 96 ? 2000 : 4000; // move in constants (calib?) - __shared__ uint8_t ok[MaxNumClustersPerModules]; - __shared__ uint16_t newclusId[MaxNumClustersPerModules]; for (auto i = threadIdx.x; i < nclus; i += blockDim.x) { newclusId[i] = ok[i] = charge[i] > chargeCut ? 1 : 0; } diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h index 79d4396c22ed5..fcc7f27e529a6 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h @@ -126,15 +126,13 @@ namespace gpuClustering { #ifdef __CUDA_ARCH__ // assume that we can cover the whole module with up to 10 blockDim.x-wide iterations - constexpr int maxiter = 10; + constexpr int maxiter = 16; #else auto maxiter = hist.size(); #endif - constexpr int maxNeighbours = - 10; // allocate space for duplicate pixels: a pixel can appear more than once with different charge in the same event - if (threadIdx.x == 0) { - assert((hist.size() / blockDim.x) <= maxiter); - } + // allocate space for duplicate pixels: a pixel can appear more than once with different charge in the same event + constexpr int maxNeighbours = 10; + assert((hist.size() / blockDim.x) <= maxiter); // nearest neighbour uint16_t nn[maxiter][maxNeighbours]; uint8_t nnn[maxiter]; // number of nn @@ -166,6 +164,7 @@ namespace gpuClustering { // fill NN for (auto j = threadIdx.x, k = 0U; j < hist.size(); j += blockDim.x, ++k) { + assert(k= 0); + assert(int(y[m]) - int(y[i]) <= 1); if (std::abs(int(x[m]) - int(x[i])) > 1) continue; auto l = nnn[k]++; diff --git a/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h b/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h index c92ecf0d805da..e3b6b313d091b 100644 --- a/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h +++ b/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h @@ -31,7 +31,7 @@ int main(void) { using namespace gpuClustering; - int numElements = MaxNumPixels; + int numElements = 256 * 2000; // these in reality are already on GPU auto h_id = std::make_unique(numElements); auto h_x = std::make_unique(numElements); diff --git a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h index ec838d0b2906d..2218e31a85754 100644 --- a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h +++ b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h @@ -49,13 +49,8 @@ class PixelCPEFast final : public PixelCPEBase { // the same cudaStream, or after cudaStreamSynchronize. const pixelCPEforGPU::ParamsOnGPU *getGPUProductAsync(cuda::stream_t<> &cudaStream) const; - pixelCPEforGPU::ParamsOnGPU getCPUProduct() const { - return pixelCPEforGPU::ParamsOnGPU { - &m_commonParamsGPU, - m_detParamsGPU.data(), - &m_layerGeometry, - &m_averageGeometry, - }; + pixelCPEforGPU::ParamsOnGPU const & getCPUProduct() const { + return cpuData_; } private: @@ -94,6 +89,8 @@ class PixelCPEFast final : public PixelCPEBase { pixelCPEforGPU::LayerGeometry m_layerGeometry; pixelCPEforGPU::AverageGeometry m_averageGeometry; + pixelCPEforGPU::ParamsOnGPU cpuData_; + struct GPUData { ~GPUData(); // not needed if not used on CPU... diff --git a/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h b/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h index e24b9d6ea493d..4845c3b97ec2f 100644 --- a/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h +++ b/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h @@ -99,8 +99,8 @@ namespace pixelCPEforGPU { int16_t ysize[N]; }; - constexpr uint32_t MaxHitsInModule = gpuClustering::MaxHitsInModule; - using ClusParams = ClusParamsT; + constexpr int32_t MaxHitsInIter = gpuClustering::maxHitsInIter(); + using ClusParams = ClusParamsT; constexpr inline void computeAnglesFromDet( DetParams const& __restrict__ detParams, float const x, float const y, float& cotalpha, float& cotbeta) { diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitConverter.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitConverter.cc index 6cb33724b807a..921a4b6f61c40 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitConverter.cc +++ b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitConverter.cc @@ -1,4 +1,4 @@ -/** SiPixelRecHitConverter.cc + /** SiPixelRecHitConverter.cc * ------------------------------------------------------ * Description: see SiPixelRecHitConverter.h * Authors: P. Maksimovic (JHU), V.Chiochia (Uni Zurich) @@ -33,9 +33,9 @@ #include "RecoLocalTracker/Records/interface/TkPixelCPERecord.h" // Make heterogeneous framewokr happy.... -#include "CUDADataFormats/Common/interface/ArrayShadow.h" -using HitModuleStart = std::array; -using HMSstorage = ArrayShadow; +#include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" +#include "CUDADataFormats/Common/interface/HostProduct.h" +using HMSstorage = HostProduct; using namespace std; @@ -87,7 +87,7 @@ namespace cms // Step C: Iterate over DetIds and invoke the strip CPE algorithm // on each DetUnit - run( input, *output, geom ); + run(e, input, *output, geom ); output->shrink_to_fit(); e.put(std::move(output)); @@ -99,7 +99,8 @@ namespace cms //! and make a RecHit to store the result. //! New interface reading DetSetVector by V.Chiochia (May 30th, 2006) //--------------------------------------------------------------------------- - void SiPixelRecHitConverter::run(edm::Handle > inputhandle, + void SiPixelRecHitConverter::run(edm::Event& iEvent, + edm::Handle > inputhandle, SiPixelRecHitCollectionNew &output, edm::ESHandle & geom) { if ( ! cpe_ ) @@ -111,10 +112,40 @@ namespace cms } int numberOfDetUnits = 0; - int numberOfClusters = 0; const edmNew::DetSetVector& input = *inputhandle; - + + // yes a unique ptr of a unique ptr so edm is happy and the pointer stay still... + auto hmsp = std::make_unique(gpuClustering::MaxNumModules + 1); + auto hitsModuleStart = hmsp.get(); + auto hms = std::make_unique(std::move(hmsp)); // hmsp is gone + iEvent.put(std::move(hms)); // hms is gone! hitsModuleStart still alive and kicking... + + +// fill cluster arrays + std::array clusInModule; + for (auto & cl : clusInModule) cl=0; + int numberOfClusters = 0; + for (auto DSViter = input.begin(); DSViter != input.end(); DSViter++) { + unsigned int detid = DSViter->detId(); + DetId detIdObject(detid); + const GeomDetUnit* genericDet = geom->idToDetUnit(detIdObject); + auto gind = genericDet->index(); + assert(gind<2000); + auto const nclus = DSViter->size(); + assert(nclus>0); + clusInModule[gind]=nclus; + numberOfClusters+=nclus; + } + hitsModuleStart[0]=0; + assert(clusInModule.size()>gpuClustering::MaxNumModules); + for (int i=1, n=clusInModule.size(); i::const_iterator DSViter=input.begin(); for ( ; DSViter != input.end() ; DSViter++) { diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc index 458294626a1cf..3b8707d1d3799 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc +++ b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc @@ -27,9 +27,6 @@ #include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h" #include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h" -#include "CUDADataFormats/Common/interface/ArrayShadow.h" - - #include "RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h" class SiPixelRecHitSoAFromLegacy : public edm::global::EDProducer<> { @@ -49,19 +46,22 @@ class SiPixelRecHitSoAFromLegacy : public edm::global::EDProducer<> { // The mess with inputs will be cleaned up when migrating to the new framework edm::EDGetTokenT bsGetToken_; edm::EDGetTokenT clusterToken_; // Legacy Clusters - edm::EDPutTokenT tokenHit_; + edm::EDPutTokenT tokenHit_; edm::EDPutTokenT tokenModuleStart_; - std::string cpeName_; - + std::string const cpeName_; + bool const convert2Legacy_; }; SiPixelRecHitSoAFromLegacy::SiPixelRecHitSoAFromLegacy(const edm::ParameterSet& iConfig) : bsGetToken_{consumes(iConfig.getParameter("beamSpot"))}, clusterToken_{consumes(iConfig.getParameter("src"))}, - tokenHit_{produces()}, + tokenHit_{produces()}, tokenModuleStart_{produces()}, - cpeName_(iConfig.getParameter("CPE")) {} + cpeName_(iConfig.getParameter("CPE")), + convert2Legacy_(iConfig.getParameter("convertToLegacy")) { + if (convert2Legacy_) produces(); + } void SiPixelRecHitSoAFromLegacy::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { edm::ParameterSetDescription desc; @@ -69,6 +69,7 @@ void SiPixelRecHitSoAFromLegacy::fillDescriptions(edm::ConfigurationDescriptions desc.add("beamSpot", edm::InputTag("offlineBeamSpot")); desc.add("src", edm::InputTag("siPixelClustersPreSplitting")); desc.add("CPE", "PixelCPEFast"); + desc.add("convertToLegacy",false); descriptions.add("siPixelRecHitHostSoA", desc); } @@ -92,7 +93,7 @@ void SiPixelRecHitSoAFromLegacy::produce(edm::StreamID streamID, edm::Event& iEv if (!fcpe) { throw cms::Exception("Configuration") << "too bad, not a fast cpe gpu processing not possible...."; } - auto cpeView = fcpe->getCPUProduct(); + auto const & cpeView = fcpe->getCPUProduct(); const reco::BeamSpot& bs = iEvent.get(bsGetToken_); @@ -102,7 +103,9 @@ void SiPixelRecHitSoAFromLegacy::produce(edm::StreamID streamID, edm::Event& iEv bsHost.y = bs.y0(); bsHost.z = bs.z0(); - auto const& input = iEvent.get(clusterToken_); + edm::Handle hclusters; + iEvent.getByToken(clusterToken_, hclusters); + auto const& input = *hclusters; // yes a unique ptr of a unique ptr so edm is happy and the pointer stay still... auto hmsp = std::make_unique(gpuClustering::MaxNumModules + 1); @@ -110,6 +113,9 @@ void SiPixelRecHitSoAFromLegacy::produce(edm::StreamID streamID, edm::Event& iEv auto hms = std::make_unique(std::move(hmsp)); // hmsp is gone iEvent.put(tokenModuleStart_,std::move(hms)); // hms is gone! hitsModuleStart still alive and kicking... + // legacy output + auto legacyOutput = std::make_unique(); + // storage std::vector xx_; @@ -118,6 +124,10 @@ void SiPixelRecHitSoAFromLegacy::produce(edm::StreamID streamID, edm::Event& iEv std::vector moduleInd_; std::vector clus_; + std::vector, SiPixelCluster>> clusterRef; + + constexpr uint32_t MaxHitsInModule = gpuClustering::MaxHitsInModule; + HitModuleStart moduleStart_; // index of the first pixel of each module HitModuleStart clusInModule_; memset(&clusInModule_,0,sizeof(HitModuleStart)); // needed?? @@ -144,14 +154,24 @@ void SiPixelRecHitSoAFromLegacy::produce(edm::StreamID streamID, edm::Event& iEv for (int i=1, n=clusInModule_.size(); i(numberOfClusters, + auto output = std::make_unique(numberOfClusters, &cpeView, hitsModuleStart, dummyStream ); + + if (0==numberOfClusters) { + iEvent.put(std::move(output)); + if (convert2Legacy_) + iEvent.put(std::move(legacyOutput)); + return; + } + + + if (convert2Legacy_) + legacyOutput->reserve(2000,numberOfClusters); int numberOfDetUnits = 0; @@ -175,9 +195,10 @@ void SiPixelRecHitSoAFromLegacy::produce(edm::StreamID streamID, edm::Event& iEv // std::cout << "in det " << gind << ": conv " << nclus << " hits from " << DSViter->size() << " legacy clusters" // <<' '<< fc <<','<MaxHitsInModule) printf("WARNING: too many clusters %d in Module %d. Only first %d Hits converted\n", nclus, gind, MaxHitsInModule); // fill digis - xx_.clear();yy_.clear();adc_.clear();moduleInd_.clear(); clus_.clear(); + xx_.clear();yy_.clear();adc_.clear();moduleInd_.clear(); clus_.clear();clusterRef.clear(); moduleId_ = gind; uint32_t ic = 0; uint32_t ndigi = 0; @@ -193,6 +214,7 @@ void SiPixelRecHitSoAFromLegacy::produce(edm::StreamID streamID, edm::Event& iEv ++ndigi; } assert(clust.originalId()==ic); // make sure hits and clus are in sync + if (convert2Legacy_) clusterRef.emplace_back(edmNew::makeRefTo(hclusters, &clust)); ic++; } assert(nclus==ic); @@ -206,8 +228,23 @@ void SiPixelRecHitSoAFromLegacy::produce(edm::StreamID streamID, edm::Event& iEv // we run on blockId.x==0 gpuPixelRecHits::getHits(&cpeView, &bsHost, &digiView, ndigi, &clusterView, output->view()); for (auto h=fc; hview()->detectorIndex(h)); - + if (h-fcview()->detectorIndex(h)); + else + assert(9999 == output->view()->detectorIndex(h)); + if (convert2Legacy_) { + SiPixelRecHitCollectionNew::FastFiller recHitsOnDetUnit(*legacyOutput, detid); + for (auto h=fc; h=MaxHitsInModule) break; + assert(ihview()->xLocal(h), output->view()->yLocal(h)); + LocalError le(output->view()->xerrLocal(h), 0, output->view()->yerrLocal(h)); + SiPixelRecHitQuality::QualWordType rqw = 0; + SiPixelRecHit hit(lp, le, rqw, *genericDet, clusterRef[ih]); + recHitsOnDetUnit.push_back(hit); + } + } } assert(numberOfHits==numberOfClusters); @@ -220,6 +257,8 @@ void SiPixelRecHitSoAFromLegacy::produce(edm::StreamID streamID, edm::Event& iEv // std::cout << "created HitSoa for " << numberOfClusters << " clusters in " << numberOfDetUnits << " Dets" << std::endl; iEvent.put(std::move(output)); + if (convert2Legacy_) + iEvent.put(std::move(legacyOutput)); } diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h b/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h index 44b253077992f..903324d4b7d94 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h +++ b/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h @@ -55,16 +55,15 @@ namespace gpuPixelRecHits { // to be moved in common namespace... constexpr uint16_t InvId = 9999; // must be > MaxNumModules - constexpr uint32_t MaxHitsInModule = pixelCPEforGPU::MaxHitsInModule; + constexpr int32_t MaxHitsInIter = pixelCPEforGPU::MaxHitsInIter; using ClusParams = pixelCPEforGPU::ClusParams; // as usual one block per module __shared__ ClusParams clusParams; - auto first = clusters.moduleStart(1 + blockIdx.x); auto me = clusters.moduleId(blockIdx.x); - auto nclus = clusters.clusInModule(me); + int nclus = clusters.clusInModule(me); if (0 == nclus) return; @@ -81,132 +80,139 @@ namespace gpuPixelRecHits { #ifdef GPU_DEBUG if (me % 100 == 1) if (threadIdx.x == 0) - printf("hitbuilder: %d clusters in module %d. will write at %d\n", nclus, me, hitsModuleStart[me]); + printf("hitbuilder: %d clusters in module %d. will write at %d\n", nclus, me, clusters.clusModuleStart(me)); #endif -// true on gpu only... -// assert(blockDim.x >= MaxHitsInModule); - - if (threadIdx.x == 0 && nclus > MaxHitsInModule) { - printf("WARNING: too many clusters %d in Module %d. Only first %d processed\n", nclus, me, MaxHitsInModule); - // zero charge: do not bother to do it in parallel - for (auto d = MaxHitsInModule; d < nclus; ++d) { - hits.charge(d) = 0; - hits.detectorIndex(d) = InvId; + for(int startClus=0, endClus=nclus; startClus0); + assert(lastClus<=nclus); + + assert(nclus>MaxHitsInIter || (0==startClus && nClusInIter==nclus && lastClus==nclus)); + + // init + for (int ic = threadIdx.x; ic < nClusInIter; ic += blockDim.x) { + clusParams.minRow[ic] = std::numeric_limits::max(); + clusParams.maxRow[ic] = 0; + clusParams.minCol[ic] = std::numeric_limits::max(); + clusParams.maxCol[ic] = 0; + clusParams.charge[ic] = 0; + clusParams.Q_f_X[ic] = 0; + clusParams.Q_l_X[ic] = 0; + clusParams.Q_f_Y[ic] = 0; + clusParams.Q_l_Y[ic] = 0; } - } - nclus = std::min(nclus, MaxHitsInModule); - - for (int ic = threadIdx.x, nc=nclus; ic < nc; ic += blockDim.x) { - clusParams.minRow[ic] = std::numeric_limits::max(); - clusParams.maxRow[ic] = 0; - clusParams.minCol[ic] = std::numeric_limits::max(); - clusParams.maxCol[ic] = 0; - clusParams.charge[ic] = 0; - clusParams.Q_f_X[ic] = 0; - clusParams.Q_l_X[ic] = 0; - clusParams.Q_f_Y[ic] = 0; - clusParams.Q_l_Y[ic] = 0; - } - first += threadIdx.x; - - __syncthreads(); - - // one thead per "digi" - - for (int i = first; i < numElements; i += blockDim.x) { - auto id = digis.moduleInd(i); - if (id == InvId) - continue; // not valid - if (id != me) - break; // end of module - auto cl = digis.clus(i); - if (cl >= int(nclus)) - continue; - auto x = digis.xx(i); - auto y = digis.yy(i); - atomicMin(&clusParams.minRow[cl], x); - atomicMax(&clusParams.maxRow[cl], x); - atomicMin(&clusParams.minCol[cl], y); - atomicMax(&clusParams.maxCol[cl], y); - } + first += threadIdx.x; + + __syncthreads(); + + // one thead per "digi" + + for (int i = first; i < numElements; i += blockDim.x) { + auto id = digis.moduleInd(i); + if (id == InvId) + continue; // not valid + if (id != me) + break; // end of module + auto cl = digis.clus(i); + if (cl=lastClus) + continue; + auto x = digis.xx(i); + auto y = digis.yy(i); + cl -=startClus; + assert(cl>=0); + assert(cl= int(nclus)) - continue; - auto x = digis.xx(i); - auto y = digis.yy(i); - auto ch = digis.adc(i); - atomicAdd(&clusParams.charge[cl], ch); - if (clusParams.minRow[cl] == x) - atomicAdd(&clusParams.Q_f_X[cl], ch); - if (clusParams.maxRow[cl] == x) - atomicAdd(&clusParams.Q_l_X[cl], ch); - if (clusParams.minCol[cl] == y) - atomicAdd(&clusParams.Q_f_Y[cl], ch); - if (clusParams.maxCol[cl] == y) - atomicAdd(&clusParams.Q_l_Y[cl], ch); - } + __syncthreads(); + + for (int i = first; i < numElements; i += blockDim.x) { + auto id = digis.moduleInd(i); + if (id == InvId) + continue; // not valid + if (id != me) + break; // end of module + auto cl = digis.clus(i); + if (cl=lastClus) + continue; + cl -=startClus; + assert(cl>=0); + assert(cl= TrackingRecHit2DSOAView::maxHits()) - break; // overflow... - assert(h= TrackingRecHit2DSOAView::maxHits()) + break; // overflow... + assert(hcommonParams(), cpeParams->detParams(me), clusParams, ic); - pixelCPEforGPU::errorFromDB(cpeParams->commonParams(), cpeParams->detParams(me), clusParams, ic); + pixelCPEforGPU::position(cpeParams->commonParams(), cpeParams->detParams(me), clusParams, ic); + pixelCPEforGPU::errorFromDB(cpeParams->commonParams(), cpeParams->detParams(me), clusParams, ic); - // store it + // store it - hits.charge(h) = clusParams.charge[ic]; + hits.charge(h) = clusParams.charge[ic]; - hits.detectorIndex(h) = me; + hits.detectorIndex(h) = me; - float xl, yl; - hits.xLocal(h) = xl = clusParams.xpos[ic]; - hits.yLocal(h) = yl = clusParams.ypos[ic]; + float xl, yl; + hits.xLocal(h) = xl = clusParams.xpos[ic]; + hits.yLocal(h) = yl = clusParams.ypos[ic]; - hits.clusterSizeX(h) = clusParams.xsize[ic]; - hits.clusterSizeY(h) = clusParams.ysize[ic]; + hits.clusterSizeX(h) = clusParams.xsize[ic]; + hits.clusterSizeY(h) = clusParams.ysize[ic]; - hits.xerrLocal(h) = clusParams.xerr[ic] * clusParams.xerr[ic]; - hits.yerrLocal(h) = clusParams.yerr[ic] * clusParams.yerr[ic]; + hits.xerrLocal(h) = clusParams.xerr[ic] * clusParams.xerr[ic]; + hits.yerrLocal(h) = clusParams.yerr[ic] * clusParams.yerr[ic]; - // keep it local for computations - float xg, yg, zg; - // to global and compute phi... - cpeParams->detParams(me).frame.toGlobal(xl, yl, xg, yg, zg); - // here correct for the beamspot... - xg -= bs->x; - yg -= bs->y; - zg -= bs->z; + // keep it local for computations + float xg, yg, zg; + // to global and compute phi... + cpeParams->detParams(me).frame.toGlobal(xl, yl, xg, yg, zg); + // here correct for the beamspot... + xg -= bs->x; + yg -= bs->y; + zg -= bs->z; - hits.xGlobal(h) = xg; - hits.yGlobal(h) = yg; - hits.zGlobal(h) = zg; + hits.xGlobal(h) = xg; + hits.yGlobal(h) = yg; + hits.zGlobal(h) = zg; - hits.rGlobal(h) = std::sqrt(xg * xg + yg * yg); - hits.iphi(h) = unsafe_atan2s<7>(yg, xg); - } + hits.rGlobal(h) = std::sqrt(xg * xg + yg * yg); + hits.iphi(h) = unsafe_atan2s<7>(yg, xg); + } + __syncthreads(); + } // end loop on batches } } // namespace gpuPixelRecHits diff --git a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc index 6c0782471757e..bdd57477ee500 100644 --- a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc +++ b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc @@ -62,6 +62,14 @@ PixelCPEFast::PixelCPEFast(edm::ParameterSet const& conf, yerr_endcap_def_ = 0.00075; fillParamsForGpu(); + + cpuData_ = { + &m_commonParamsGPU, + m_detParamsGPU.data(), + &m_layerGeometry, + &m_averageGeometry, + }; + } const pixelCPEforGPU::ParamsOnGPU* PixelCPEFast::getGPUProductAsync(cuda::stream_t<>& cudaStream) const { From a157b8732ae6fd0904d2e3efaac1a19c4f246564 Mon Sep 17 00:00:00 2001 From: Matti Kortelainen Date: Tue, 10 Sep 2019 16:03:58 -0500 Subject: [PATCH 082/149] Move event and stream caches, and caching allocators out from CUDAService (cms-patatrack#364) To reduce dependencies on edm::Service, and to make CUDAService less of a collection of everything, split off from it: - the CUDAEventCache - the CUDAStreamCache - the caching allocators Other changes: - clean up unnecessary use of CUDAService - fix maxCachedFraction, add debug printouts - add make_*_unique_uninitialized that avoid the static_assert --- CUDADataFormats/Common/BuildFile.xml | 3 +- CUDADataFormats/SiPixelCluster/BuildFile.xml | 5 ++- .../SiPixelCluster/src/SiPixelClustersCUDA.cc | 18 ++++----- CUDADataFormats/SiPixelDigi/BuildFile.xml | 5 ++- .../SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc | 17 ++++----- .../SiPixelDigi/src/SiPixelDigisCUDA.cc | 36 ++++++++---------- CUDADataFormats/TrackingRecHit/BuildFile.xml | 5 +-- .../interface/TrackingRecHit2DHeterogeneous.h | 17 +++------ .../test/TrackingRecHit2DCUDA_t.cpp | 38 ------------------- .../plugins/SiPixelRawToClusterGPUKernel.cu | 11 +++--- .../plugins/SiPixelRawToClusterGPUKernel.h | 2 +- RecoLocalTracker/SiPixelRecHits/BuildFile.xml | 1 - .../SiPixelRecHits/src/PixelCPEFast.cc | 1 - 13 files changed, 52 insertions(+), 107 deletions(-) diff --git a/CUDADataFormats/Common/BuildFile.xml b/CUDADataFormats/Common/BuildFile.xml index 1046b76eef0f7..12da06aa20da0 100644 --- a/CUDADataFormats/Common/BuildFile.xml +++ b/CUDADataFormats/Common/BuildFile.xml @@ -1,6 +1,5 @@ - - + diff --git a/CUDADataFormats/SiPixelCluster/BuildFile.xml b/CUDADataFormats/SiPixelCluster/BuildFile.xml index d34658faa2573..6db6a1f62cda1 100644 --- a/CUDADataFormats/SiPixelCluster/BuildFile.xml +++ b/CUDADataFormats/SiPixelCluster/BuildFile.xml @@ -1,5 +1,6 @@ - - + + + diff --git a/CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc b/CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc index d88a1b0a6370b..280f6d45054c4 100644 --- a/CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc +++ b/CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc @@ -1,23 +1,21 @@ #include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h" -#include "FWCore/ServiceRegistry/interface/Service.h" -#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" +#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" +#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" #include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" SiPixelClustersCUDA::SiPixelClustersCUDA(size_t maxClusters, cuda::stream_t<>& stream) { - edm::Service cs; + moduleStart_d = cudautils::make_device_unique(maxClusters+1, stream); + clusInModule_d = cudautils::make_device_unique(maxClusters, stream); + moduleId_d = cudautils::make_device_unique(maxClusters, stream); + clusModuleStart_d = cudautils::make_device_unique(maxClusters+1, stream); - moduleStart_d = cs->make_device_unique(maxClusters+1, stream); - clusInModule_d = cs->make_device_unique(maxClusters, stream); - moduleId_d = cs->make_device_unique(maxClusters, stream); - clusModuleStart_d = cs->make_device_unique(maxClusters+1, stream); - - auto view = cs->make_host_unique(stream); + auto view = cudautils::make_host_unique(stream); view->moduleStart_ = moduleStart_d.get(); view->clusInModule_ = clusInModule_d.get(); view->moduleId_ = moduleId_d.get(); view->clusModuleStart_ = clusModuleStart_d.get(); - view_d = cs->make_device_unique(stream); + view_d = cudautils::make_device_unique(stream); cudautils::copyAsync(view_d, view, stream); } diff --git a/CUDADataFormats/SiPixelDigi/BuildFile.xml b/CUDADataFormats/SiPixelDigi/BuildFile.xml index 29ec13098819c..c29c9c9b9f44d 100644 --- a/CUDADataFormats/SiPixelDigi/BuildFile.xml +++ b/CUDADataFormats/SiPixelDigi/BuildFile.xml @@ -1,6 +1,7 @@ + + - - + diff --git a/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc b/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc index 4d574ff0cd6c1..a0bfeb5c5d9de 100644 --- a/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc +++ b/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc @@ -1,21 +1,21 @@ #include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h" -#include "FWCore/ServiceRegistry/interface/Service.h" -#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" +#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" +#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" #include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" #include "HeterogeneousCore/CUDAUtilities/interface/memsetAsync.h" +#include + SiPixelDigiErrorsCUDA::SiPixelDigiErrorsCUDA(size_t maxFedWords, PixelFormatterErrors errors, cuda::stream_t<>& stream): formatterErrors_h(std::move(errors)) { - edm::Service cs; - - error_d = cs->make_device_unique>(stream); - data_d = cs->make_device_unique(maxFedWords, stream); + error_d = cudautils::make_device_unique>(stream); + data_d = cudautils::make_device_unique(maxFedWords, stream); cudautils::memsetAsync(data_d, 0x00, maxFedWords, stream); - error_h = cs->make_host_unique>(stream); + error_h = cudautils::make_host_unique>(stream); GPU::make_SimpleVector(error_h.get(), maxFedWords, data_d.get()); assert(error_h->empty()); assert(error_h->capacity() == static_cast(maxFedWords)); @@ -28,11 +28,10 @@ void SiPixelDigiErrorsCUDA::copyErrorToHostAsync(cuda::stream_t<>& stream) { } SiPixelDigiErrorsCUDA::HostDataError SiPixelDigiErrorsCUDA::dataErrorToHostAsync(cuda::stream_t<>& stream) const { - edm::Service cs; // On one hand size() could be sufficient. On the other hand, if // someone copies the SimpleVector<>, (s)he might expect the data // buffer to actually have space for capacity() elements. - auto data = cs->make_host_unique(error_h->capacity(), stream); + auto data = cudautils::make_host_unique(error_h->capacity(), stream); // but transfer only the required amount if (not error_h->empty()) { diff --git a/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc b/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc index ef13ed9612dbf..05bad77c435e6 100644 --- a/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc +++ b/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc @@ -1,56 +1,50 @@ #include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" -#include "FWCore/ServiceRegistry/interface/Service.h" -#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" +#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" +#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" #include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" SiPixelDigisCUDA::SiPixelDigisCUDA(size_t maxFedWords, cuda::stream_t<>& stream) { - edm::Service cs; + xx_d = cudautils::make_device_unique(maxFedWords, stream); + yy_d = cudautils::make_device_unique(maxFedWords, stream); + adc_d = cudautils::make_device_unique(maxFedWords, stream); + moduleInd_d = cudautils::make_device_unique(maxFedWords, stream); + clus_d = cudautils::make_device_unique< int32_t[]>(maxFedWords, stream); - xx_d = cs->make_device_unique(maxFedWords, stream); - yy_d = cs->make_device_unique(maxFedWords, stream); - adc_d = cs->make_device_unique(maxFedWords, stream); - moduleInd_d = cs->make_device_unique(maxFedWords, stream); - clus_d = cs->make_device_unique< int32_t[]>(maxFedWords, stream); + pdigi_d = cudautils::make_device_unique(maxFedWords, stream); + rawIdArr_d = cudautils::make_device_unique(maxFedWords, stream); - pdigi_d = cs->make_device_unique(maxFedWords, stream); - rawIdArr_d = cs->make_device_unique(maxFedWords, stream); - - auto view = cs->make_host_unique(stream); + auto view = cudautils::make_host_unique(stream); view->xx_ = xx_d.get(); view->yy_ = yy_d.get(); view->adc_ = adc_d.get(); view->moduleInd_ = moduleInd_d.get(); view->clus_ = clus_d.get(); - view_d = cs->make_device_unique(stream); + view_d = cudautils::make_device_unique(stream); cudautils::copyAsync(view_d, view, stream); } cudautils::host::unique_ptr SiPixelDigisCUDA::adcToHostAsync(cuda::stream_t<>& stream) const { - edm::Service cs; - auto ret = cs->make_host_unique(nDigis(), stream); + auto ret = cudautils::make_host_unique(nDigis(), stream); cudautils::copyAsync(ret, adc_d, nDigis(), stream); return ret; } cudautils::host::unique_ptr SiPixelDigisCUDA::clusToHostAsync(cuda::stream_t<>& stream) const { - edm::Service cs; - auto ret = cs->make_host_unique(nDigis(), stream); + auto ret = cudautils::make_host_unique(nDigis(), stream); cudautils::copyAsync(ret, clus_d, nDigis(), stream); return ret; } cudautils::host::unique_ptr SiPixelDigisCUDA::pdigiToHostAsync(cuda::stream_t<>& stream) const { - edm::Service cs; - auto ret = cs->make_host_unique(nDigis(), stream); + auto ret = cudautils::make_host_unique(nDigis(), stream); cudautils::copyAsync(ret, pdigi_d, nDigis(), stream); return ret; } cudautils::host::unique_ptr SiPixelDigisCUDA::rawIdArrToHostAsync(cuda::stream_t<>& stream) const { - edm::Service cs; - auto ret = cs->make_host_unique(nDigis(), stream); + auto ret = cudautils::make_host_unique(nDigis(), stream); cudautils::copyAsync(ret, rawIdArr_d, nDigis(), stream); return ret; } diff --git a/CUDADataFormats/TrackingRecHit/BuildFile.xml b/CUDADataFormats/TrackingRecHit/BuildFile.xml index 004f6bd6d0883..8af605862d1ac 100644 --- a/CUDADataFormats/TrackingRecHit/BuildFile.xml +++ b/CUDADataFormats/TrackingRecHit/BuildFile.xml @@ -1,8 +1,7 @@ - - - + + diff --git a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h index d3447aa8fd310..ea11ac3339d2f 100644 --- a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h +++ b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h @@ -67,8 +67,6 @@ class TrackingRecHit2DHeterogeneous { int16_t* m_iphi; }; -#include "FWCore/ServiceRegistry/interface/Service.h" -#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" #include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" @@ -78,14 +76,11 @@ TrackingRecHit2DHeterogeneous::TrackingRecHit2DHeterogeneous(uint32_t nH uint32_t const *hitsModuleStart, cuda::stream_t<> &stream) : m_nHits(nHits), m_hitsModuleStart(hitsModuleStart) { - edm::Service cs; - - - auto view = Traits:: template make_host_unique(cs,stream); + auto view = Traits:: template make_host_unique(stream); view->m_nHits = nHits; - m_view = Traits:: template make_device_unique(cs,stream); - m_AverageGeometryStore = Traits:: template make_device_unique(cs,stream); + m_view = Traits:: template make_device_unique(stream); + m_AverageGeometryStore = Traits:: template make_device_unique(stream); view->m_averageGeometry = m_AverageGeometryStore.get(); view->m_cpeParams = cpeParams; view->m_hitsModuleStart = hitsModuleStart; @@ -107,9 +102,9 @@ TrackingRecHit2DHeterogeneous::TrackingRecHit2DHeterogeneous(uint32_t nH // if ordering is relevant they may have to be stored phi-ordered by layer or so // this will break 1to1 correspondence with cluster and module locality // so unless proven VERY inefficient we keep it ordered as generated - m_store16 = Traits:: template make_device_unique(cs, nHits * n16, stream); - m_store32 = Traits:: template make_device_unique(cs, nHits * n32 + 11, stream); - m_HistStore = Traits:: template make_device_unique(cs, stream); + m_store16 = Traits:: template make_device_unique(nHits * n16, stream); + m_store32 = Traits:: template make_device_unique(nHits * n32 + 11, stream); + m_HistStore = Traits:: template make_device_unique(stream); auto get16 = [&](int i) { return m_store16.get() + i * nHits; }; auto get32 = [&](int i) { return m_store32.get() + i * nHits; }; diff --git a/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp b/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp index 34ec61095116a..cf2221dc71b95 100644 --- a/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp +++ b/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp @@ -1,12 +1,4 @@ #include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DCUDA.h" -#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" -#include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "FWCore/ParameterSetReader/interface/ParameterSetReader.h" -#include "FWCore/PluginManager/interface/PluginManager.h" -#include "FWCore/PluginManager/interface/standard.h" -#include "FWCore/ServiceRegistry/interface/ActivityRegistry.h" -#include "FWCore/ServiceRegistry/interface/ServiceRegistry.h" -#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" #include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" #include "HeterogeneousCore/CUDAUtilities/interface/exitSansCUDADevices.h" @@ -16,36 +8,9 @@ namespace testTrackingRecHit2D { } -namespace { - CUDAService makeCUDAService(edm::ParameterSet ps, edm::ActivityRegistry& ar) { - auto desc = edm::ConfigurationDescriptions("Service", "CUDAService"); - CUDAService::fillDescriptions(desc); - desc.validate(ps, "CUDAService"); - return CUDAService(ps, ar); - } -} // namespace - int main() { exitSansCUDADevices(); - edmplugin::PluginManager::configure(edmplugin::standard::config()); - - const std::string config{ - R"_(import FWCore.ParameterSet.Config as cms -process = cms.Process('Test') -process.CUDAService = cms.Service('CUDAService') -)_"}; - - std::unique_ptr operate_; - std::unique_ptr params; - edm::makeParameterSets(config, params); - edm::ServiceToken tempToken(edm::ServiceRegistry::createServicesFromConfig(std::move(params))); - operate_.reset(new edm::ServiceRegistry::Operate(tempToken)); - - edm::ActivityRegistry ar; - edm::ParameterSet ps; - auto cs = makeCUDAService(ps, ar); - auto current_device = cuda::device::current::get(); auto stream = current_device.create_stream(cuda::stream::implicitly_synchronizes_with_default_stream); @@ -54,8 +19,5 @@ process.CUDAService = cms.Service('CUDAService') testTrackingRecHit2D::runKernels(tkhit.view()); - //Fake the end-of-job signal. - ar.postEndJobSignal_(); - return 0; } diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu index ab7852ee2faf7..c436ff319d736 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu @@ -30,9 +30,9 @@ // CMSSW includes #include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" -#include "FWCore/ServiceRegistry/interface/Service.h" -#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" +#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" +#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" #include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelFedCablingMapGPU.h" #include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h" #include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h" @@ -549,8 +549,7 @@ namespace pixelgpudetails { } clusters_d = SiPixelClustersCUDA(gpuClustering::MaxNumModules, stream); - edm::Service cs; - nModules_Clusters_h = cs->make_host_unique(2, stream); + nModules_Clusters_h = cudautils::make_host_unique(2, stream); if (wordCounter) // protect in case of empty event.... { @@ -559,8 +558,8 @@ namespace pixelgpudetails { assert(0 == wordCounter % 2); // wordCounter is the total no of words in each event to be trasfered on device - auto word_d = cs->make_device_unique(wordCounter, stream); - auto fedId_d = cs->make_device_unique(wordCounter, stream); + auto word_d = cudautils::make_device_unique(wordCounter, stream); + auto fedId_d = cudautils::make_device_unique(wordCounter, stream); cudaCheck(cudaMemcpyAsync( word_d.get(), wordFed.word(), wordCounter * sizeof(uint32_t), cudaMemcpyDefault, stream.id())); diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h index 0d2b6a8c7fc65..edc01d7b88c53 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h @@ -197,7 +197,7 @@ namespace pixelgpudetails { // stream is still alive // // technically the statement above is not true anymore now that - // the CUDA streams are cached within the CUDAService, but it is + // the CUDA streams are cached within the CUDAStreamCache, but it is // still better to release as early as possible nModules_Clusters_h.reset(); return std::make_pair(std::move(digis_d), std::move(clusters_d)); diff --git a/RecoLocalTracker/SiPixelRecHits/BuildFile.xml b/RecoLocalTracker/SiPixelRecHits/BuildFile.xml index 7918c7a4f4d9a..a3d2c6a35e642 100644 --- a/RecoLocalTracker/SiPixelRecHits/BuildFile.xml +++ b/RecoLocalTracker/SiPixelRecHits/BuildFile.xml @@ -15,7 +15,6 @@ - diff --git a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc index bdd57477ee500..f1d716f06eba1 100644 --- a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc +++ b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc @@ -9,7 +9,6 @@ #include "Geometry/TrackerGeometryBuilder/interface/PixelGeomDetUnit.h" #include "Geometry/TrackerGeometryBuilder/interface/RectangularPixelTopology.h" #include "Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h" -#include "HeterogeneousCore/CUDAServices/interface/numberOfCUDADevices.h" #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" #include "MagneticField/Engine/interface/MagneticField.h" #include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h" From 82a7bd06d28b13a14d62a7042e00b7c1d79e3b81 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Thu, 12 Sep 2019 00:22:05 +0200 Subject: [PATCH 083/149] Apply clang-format style formatting --- .../interface/SiPixelClustersCUDA.h | 28 +- .../interface/gpuClusteringConstants.h | 2 +- .../SiPixelCluster/src/SiPixelClustersCUDA.cc | 8 +- .../interface/SiPixelDigiErrorsCUDA.h | 9 +- .../SiPixelDigi/interface/SiPixelDigisCUDA.h | 64 +-- .../SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc | 5 +- .../SiPixelDigi/src/SiPixelDigisCUDA.cc | 16 +- .../interface/TrackingRecHit2DHeterogeneous.h | 75 +-- .../interface/TrackingRecHit2DSOAView.h | 9 +- .../SiPixelGainCalibrationForHLTGPURcd.h | 5 +- ...PixelGainCalibrationForHLTGPUESProducer.cc | 6 +- .../interface/SiPixelGainForHLTonGPU.h | 50 +- .../SiPixelCluster/interface/SiPixelCluster.h | 234 +++++----- .../SiPixelDigi/interface/SiPixelDigisSoA.h | 7 +- .../SiPixelDigi/src/SiPixelDigisSoA.cc | 12 +- DataFormats/SiPixelDigi/src/classes.h | 3 +- .../plugins/SiPixelDigiErrorsFromSoA.cc | 72 +-- .../plugins/SiPixelDigiErrorsSoAFromCUDA.cc | 19 +- .../plugins/SiPixelDigisSoAFromCUDA.cc | 20 +- .../interface/phase1PixelTopology.h | 171 +++---- .../test/phase1PixelTopology_t.cpp | 171 +++---- .../plugins/SiPixelClusterProducer.cc | 287 ++++++------ .../plugins/SiPixelDigisClustersFromSoA.cc | 5 +- .../plugins/SiPixelRawToClusterCUDA.cc | 124 ++--- .../plugins/SiPixelRawToClusterGPUKernel.cu | 17 +- .../plugins/SiPixelRawToClusterGPUKernel.h | 197 ++++---- .../plugins/gpuClusterChargeCut.h | 16 +- .../plugins/gpuClustering.h | 15 +- .../SiPixelRecHits/interface/PixelCPEBase.h | 441 +++++++++--------- .../SiPixelRecHits/interface/PixelCPEFast.h | 4 +- .../SiPixelRecHits/interface/pixelCPEforGPU.h | 9 +- .../plugins/PixelCPEFastESProducer.cc | 63 ++- .../plugins/SiPixelRecHitConverter.cc | 212 ++++----- .../plugins/SiPixelRecHitSoAFromLegacy.cc | 173 ++++--- .../SiPixelRecHits/plugins/gpuPixelRecHits.h | 77 ++- .../SiPixelRecHits/src/PixelCPEFast.cc | 59 ++- 36 files changed, 1319 insertions(+), 1366 deletions(-) diff --git a/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h b/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h index c71d8573d9a23..1db6e29459ea7 100644 --- a/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h +++ b/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h @@ -11,17 +11,15 @@ class SiPixelClustersCUDA { public: SiPixelClustersCUDA() = default; - explicit SiPixelClustersCUDA(size_t maxClusters, cuda::stream_t<>& stream); + explicit SiPixelClustersCUDA(size_t maxClusters, cuda::stream_t<> &stream); ~SiPixelClustersCUDA() = default; - SiPixelClustersCUDA(const SiPixelClustersCUDA&) = delete; - SiPixelClustersCUDA& operator=(const SiPixelClustersCUDA&) = delete; - SiPixelClustersCUDA(SiPixelClustersCUDA&&) = default; - SiPixelClustersCUDA& operator=(SiPixelClustersCUDA&&) = default; + SiPixelClustersCUDA(const SiPixelClustersCUDA &) = delete; + SiPixelClustersCUDA &operator=(const SiPixelClustersCUDA &) = delete; + SiPixelClustersCUDA(SiPixelClustersCUDA &&) = default; + SiPixelClustersCUDA &operator=(SiPixelClustersCUDA &&) = default; - void setNClusters(uint32_t nClusters) { - nClusters_h = nClusters; - } + void setNClusters(uint32_t nClusters) { nClusters_h = nClusters; } uint32_t nClusters() const { return nClusters_h; } @@ -44,14 +42,14 @@ class SiPixelClustersCUDA { public: // DeviceConstView() = default; - __device__ __forceinline__ uint32_t moduleStart(int i) const { return __ldg(moduleStart_+i); } - __device__ __forceinline__ uint32_t clusInModule(int i) const { return __ldg(clusInModule_+i); } - __device__ __forceinline__ uint32_t moduleId(int i) const { return __ldg(moduleId_+i); } - __device__ __forceinline__ uint32_t clusModuleStart(int i) const { return __ldg(clusModuleStart_+i); } + __device__ __forceinline__ uint32_t moduleStart(int i) const { return __ldg(moduleStart_ + i); } + __device__ __forceinline__ uint32_t clusInModule(int i) const { return __ldg(clusInModule_ + i); } + __device__ __forceinline__ uint32_t moduleId(int i) const { return __ldg(moduleId_ + i); } + __device__ __forceinline__ uint32_t clusModuleStart(int i) const { return __ldg(clusModuleStart_ + i); } friend SiPixelClustersCUDA; -// private: + // private: uint32_t const *moduleStart_; uint32_t const *clusInModule_; uint32_t const *moduleId_; @@ -66,9 +64,9 @@ class SiPixelClustersCUDA { cudautils::device::unique_ptr moduleId_d; // module id of each module // originally from rechits - cudautils::device::unique_ptr clusModuleStart_d; // index of the first cluster of each module + cudautils::device::unique_ptr clusModuleStart_d; // index of the first cluster of each module - cudautils::device::unique_ptr view_d; // "me" pointer + cudautils::device::unique_ptr view_d; // "me" pointer uint32_t nClusters_h; }; diff --git a/CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h b/CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h index 5199992bbb3b8..1430606ab6678 100644 --- a/CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h +++ b/CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h @@ -23,7 +23,7 @@ namespace gpuClustering { constexpr uint32_t MaxNumModules = 2000; constexpr int32_t MaxNumClustersPerModules = maxHitsInModule(); - constexpr uint32_t MaxHitsInModule = maxHitsInModule(); // as above + constexpr uint32_t MaxHitsInModule = maxHitsInModule(); // as above constexpr uint32_t MaxNumClusters = pixelGPUConstants::maxNumberOfHits; constexpr uint16_t InvId = 9999; // must be > MaxNumModules diff --git a/CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc b/CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc index 280f6d45054c4..4a145bb1231c2 100644 --- a/CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc +++ b/CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc @@ -5,10 +5,10 @@ #include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" SiPixelClustersCUDA::SiPixelClustersCUDA(size_t maxClusters, cuda::stream_t<>& stream) { - moduleStart_d = cudautils::make_device_unique(maxClusters+1, stream); - clusInModule_d = cudautils::make_device_unique(maxClusters, stream); - moduleId_d = cudautils::make_device_unique(maxClusters, stream); - clusModuleStart_d = cudautils::make_device_unique(maxClusters+1, stream); + moduleStart_d = cudautils::make_device_unique(maxClusters + 1, stream); + clusInModule_d = cudautils::make_device_unique(maxClusters, stream); + moduleId_d = cudautils::make_device_unique(maxClusters, stream); + clusModuleStart_d = cudautils::make_device_unique(maxClusters + 1, stream); auto view = cudautils::make_host_unique(stream); view->moduleStart_ = moduleStart_d.get(); diff --git a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h index e9c8c0f644722..7e016efb708de 100644 --- a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h +++ b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h @@ -21,11 +21,12 @@ class SiPixelDigiErrorsCUDA { const PixelFormatterErrors& formatterErrors() const { return formatterErrors_h; } - GPU::SimpleVector *error() { return error_d.get(); } - GPU::SimpleVector const *error() const { return error_d.get(); } - GPU::SimpleVector const *c_error() const { return error_d.get(); } + GPU::SimpleVector* error() { return error_d.get(); } + GPU::SimpleVector const* error() const { return error_d.get(); } + GPU::SimpleVector const* c_error() const { return error_d.get(); } - using HostDataError = std::pair, cudautils::host::unique_ptr>; + using HostDataError = + std::pair, cudautils::host::unique_ptr>; HostDataError dataErrorToHostAsync(cuda::stream_t<>& stream) const; void copyErrorToHostAsync(cuda::stream_t<>& stream); diff --git a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h index c80e6bda4083a..0d0e025ef52da 100644 --- a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h +++ b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h @@ -10,13 +10,13 @@ class SiPixelDigisCUDA { public: SiPixelDigisCUDA() = default; - explicit SiPixelDigisCUDA(size_t maxFedWords, cuda::stream_t<>& stream); + explicit SiPixelDigisCUDA(size_t maxFedWords, cuda::stream_t<> &stream); ~SiPixelDigisCUDA() = default; - SiPixelDigisCUDA(const SiPixelDigisCUDA&) = delete; - SiPixelDigisCUDA& operator=(const SiPixelDigisCUDA&) = delete; - SiPixelDigisCUDA(SiPixelDigisCUDA&&) = default; - SiPixelDigisCUDA& operator=(SiPixelDigisCUDA&&) = default; + SiPixelDigisCUDA(const SiPixelDigisCUDA &) = delete; + SiPixelDigisCUDA &operator=(const SiPixelDigisCUDA &) = delete; + SiPixelDigisCUDA(SiPixelDigisCUDA &&) = default; + SiPixelDigisCUDA &operator=(SiPixelDigisCUDA &&) = default; void setNModulesDigis(uint32_t nModules, uint32_t nDigis) { nModules_h = nModules; @@ -26,19 +26,19 @@ class SiPixelDigisCUDA { uint32_t nModules() const { return nModules_h; } uint32_t nDigis() const { return nDigis_h; } - uint16_t * xx() { return xx_d.get(); } - uint16_t * yy() { return yy_d.get(); } - uint16_t * adc() { return adc_d.get(); } - uint16_t * moduleInd() { return moduleInd_d.get(); } - int32_t * clus() { return clus_d.get(); } - uint32_t * pdigi() { return pdigi_d.get(); } - uint32_t * rawIdArr() { return rawIdArr_d.get(); } + uint16_t *xx() { return xx_d.get(); } + uint16_t *yy() { return yy_d.get(); } + uint16_t *adc() { return adc_d.get(); } + uint16_t *moduleInd() { return moduleInd_d.get(); } + int32_t *clus() { return clus_d.get(); } + uint32_t *pdigi() { return pdigi_d.get(); } + uint32_t *rawIdArr() { return rawIdArr_d.get(); } uint16_t const *xx() const { return xx_d.get(); } uint16_t const *yy() const { return yy_d.get(); } uint16_t const *adc() const { return adc_d.get(); } uint16_t const *moduleInd() const { return moduleInd_d.get(); } - int32_t const *clus() const { return clus_d.get(); } + int32_t const *clus() const { return clus_d.get(); } uint32_t const *pdigi() const { return pdigi_d.get(); } uint32_t const *rawIdArr() const { return rawIdArr_d.get(); } @@ -46,45 +46,45 @@ class SiPixelDigisCUDA { uint16_t const *c_yy() const { return yy_d.get(); } uint16_t const *c_adc() const { return adc_d.get(); } uint16_t const *c_moduleInd() const { return moduleInd_d.get(); } - int32_t const *c_clus() const { return clus_d.get(); } + int32_t const *c_clus() const { return clus_d.get(); } uint32_t const *c_pdigi() const { return pdigi_d.get(); } uint32_t const *c_rawIdArr() const { return rawIdArr_d.get(); } - - cudautils::host::unique_ptr adcToHostAsync(cuda::stream_t<>& stream) const; - cudautils::host::unique_ptr< int32_t[]> clusToHostAsync(cuda::stream_t<>& stream) const; - cudautils::host::unique_ptr pdigiToHostAsync(cuda::stream_t<>& stream) const; - cudautils::host::unique_ptr rawIdArrToHostAsync(cuda::stream_t<>& stream) const; + + cudautils::host::unique_ptr adcToHostAsync(cuda::stream_t<> &stream) const; + cudautils::host::unique_ptr clusToHostAsync(cuda::stream_t<> &stream) const; + cudautils::host::unique_ptr pdigiToHostAsync(cuda::stream_t<> &stream) const; + cudautils::host::unique_ptr rawIdArrToHostAsync(cuda::stream_t<> &stream) const; class DeviceConstView { public: // DeviceConstView() = default; - __device__ __forceinline__ uint16_t xx(int i) const { return __ldg(xx_+i); } - __device__ __forceinline__ uint16_t yy(int i) const { return __ldg(yy_+i); } - __device__ __forceinline__ uint16_t adc(int i) const { return __ldg(adc_+i); } - __device__ __forceinline__ uint16_t moduleInd(int i) const { return __ldg(moduleInd_+i); } - __device__ __forceinline__ int32_t clus(int i) const { return __ldg(clus_+i); } + __device__ __forceinline__ uint16_t xx(int i) const { return __ldg(xx_ + i); } + __device__ __forceinline__ uint16_t yy(int i) const { return __ldg(yy_ + i); } + __device__ __forceinline__ uint16_t adc(int i) const { return __ldg(adc_ + i); } + __device__ __forceinline__ uint16_t moduleInd(int i) const { return __ldg(moduleInd_ + i); } + __device__ __forceinline__ int32_t clus(int i) const { return __ldg(clus_ + i); } friend class SiPixelDigisCUDA; - // private: + // private: uint16_t const *xx_; uint16_t const *yy_; uint16_t const *adc_; uint16_t const *moduleInd_; - int32_t const *clus_; + int32_t const *clus_; }; const DeviceConstView *view() const { return view_d.get(); } private: // These are consumed by downstream device code - cudautils::device::unique_ptr xx_d; // local coordinates of each pixel - cudautils::device::unique_ptr yy_d; // - cudautils::device::unique_ptr adc_d; // ADC of each pixel - cudautils::device::unique_ptr moduleInd_d; // module id of each pixel - cudautils::device::unique_ptr clus_d; // cluster id of each pixel - cudautils::device::unique_ptr view_d; // "me" pointer + cudautils::device::unique_ptr xx_d; // local coordinates of each pixel + cudautils::device::unique_ptr yy_d; // + cudautils::device::unique_ptr adc_d; // ADC of each pixel + cudautils::device::unique_ptr moduleInd_d; // module id of each pixel + cudautils::device::unique_ptr clus_d; // cluster id of each pixel + cudautils::device::unique_ptr view_d; // "me" pointer // These are for CPU output; should we (eventually) place them to a // separate product? diff --git a/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc b/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc index a0bfeb5c5d9de..2aa2b24ddf316 100644 --- a/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc +++ b/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc @@ -7,9 +7,8 @@ #include -SiPixelDigiErrorsCUDA::SiPixelDigiErrorsCUDA(size_t maxFedWords, PixelFormatterErrors errors, cuda::stream_t<>& stream): - formatterErrors_h(std::move(errors)) -{ +SiPixelDigiErrorsCUDA::SiPixelDigiErrorsCUDA(size_t maxFedWords, PixelFormatterErrors errors, cuda::stream_t<>& stream) + : formatterErrors_h(std::move(errors)) { error_d = cudautils::make_device_unique>(stream); data_d = cudautils::make_device_unique(maxFedWords, stream); diff --git a/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc b/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc index 05bad77c435e6..fe4aedd686546 100644 --- a/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc +++ b/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc @@ -5,14 +5,14 @@ #include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" SiPixelDigisCUDA::SiPixelDigisCUDA(size_t maxFedWords, cuda::stream_t<>& stream) { - xx_d = cudautils::make_device_unique(maxFedWords, stream); - yy_d = cudautils::make_device_unique(maxFedWords, stream); - adc_d = cudautils::make_device_unique(maxFedWords, stream); - moduleInd_d = cudautils::make_device_unique(maxFedWords, stream); - clus_d = cudautils::make_device_unique< int32_t[]>(maxFedWords, stream); - - pdigi_d = cudautils::make_device_unique(maxFedWords, stream); - rawIdArr_d = cudautils::make_device_unique(maxFedWords, stream); + xx_d = cudautils::make_device_unique(maxFedWords, stream); + yy_d = cudautils::make_device_unique(maxFedWords, stream); + adc_d = cudautils::make_device_unique(maxFedWords, stream); + moduleInd_d = cudautils::make_device_unique(maxFedWords, stream); + clus_d = cudautils::make_device_unique(maxFedWords, stream); + + pdigi_d = cudautils::make_device_unique(maxFedWords, stream); + rawIdArr_d = cudautils::make_device_unique(maxFedWords, stream); auto view = cudautils::make_host_unique(stream); view->xx_ = xx_d.get(); diff --git a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h index ea11ac3339d2f..a3d6354732ecf 100644 --- a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h +++ b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h @@ -4,23 +4,20 @@ #include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h" #include "CUDADataFormats/Common/interface/HeterogeneousSoA.h" - -template +template class TrackingRecHit2DHeterogeneous { public: - - template - using unique_ptr = typename Traits:: template unique_ptr; + template + using unique_ptr = typename Traits::template unique_ptr; using Hist = TrackingRecHit2DSOAView::Hist; TrackingRecHit2DHeterogeneous() = default; explicit TrackingRecHit2DHeterogeneous(uint32_t nHits, - pixelCPEforGPU::ParamsOnGPU const* cpeParams, - uint32_t const* hitsModuleStart, - cuda::stream_t<>& stream); - + pixelCPEforGPU::ParamsOnGPU const* cpeParams, + uint32_t const* hitsModuleStart, + cuda::stream_t<>& stream); ~TrackingRecHit2DHeterogeneous() = default; @@ -50,12 +47,12 @@ class TrackingRecHit2DHeterogeneous { static_assert(sizeof(uint32_t) == sizeof(float)); // just stating the obvious unique_ptr m_store16; //! - unique_ptr m_store32; //! + unique_ptr m_store32; //! - unique_ptr m_HistStore; //! + unique_ptr m_HistStore; //! unique_ptr m_AverageGeometryStore; //! - unique_ptr m_view; //! + unique_ptr m_view; //! uint32_t m_nHits; @@ -70,30 +67,32 @@ class TrackingRecHit2DHeterogeneous { #include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" -template +template TrackingRecHit2DHeterogeneous::TrackingRecHit2DHeterogeneous(uint32_t nHits, - pixelCPEforGPU::ParamsOnGPU const *cpeParams, - uint32_t const *hitsModuleStart, - cuda::stream_t<> &stream) + pixelCPEforGPU::ParamsOnGPU const* cpeParams, + uint32_t const* hitsModuleStart, + cuda::stream_t<>& stream) : m_nHits(nHits), m_hitsModuleStart(hitsModuleStart) { - auto view = Traits:: template make_host_unique(stream); + auto view = Traits::template make_host_unique(stream); view->m_nHits = nHits; - m_view = Traits:: template make_device_unique(stream); - m_AverageGeometryStore = Traits:: template make_device_unique(stream); + m_view = Traits::template make_device_unique(stream); + m_AverageGeometryStore = Traits::template make_device_unique(stream); view->m_averageGeometry = m_AverageGeometryStore.get(); view->m_cpeParams = cpeParams; view->m_hitsModuleStart = hitsModuleStart; // if empy do not bother if (0 == nHits) { - if + if #ifndef __CUDACC__ - constexpr -#endif - (std::is_same::value) { + constexpr +#endif + (std::is_same::value) { cudautils::copyAsync(m_view, view, stream); - } else { m_view.reset(view.release());} + } else { + m_view.reset(view.release()); + } return; } @@ -102,9 +101,9 @@ TrackingRecHit2DHeterogeneous::TrackingRecHit2DHeterogeneous(uint32_t nH // if ordering is relevant they may have to be stored phi-ordered by layer or so // this will break 1to1 correspondence with cluster and module locality // so unless proven VERY inefficient we keep it ordered as generated - m_store16 = Traits:: template make_device_unique(nHits * n16, stream); - m_store32 = Traits:: template make_device_unique(nHits * n32 + 11, stream); - m_HistStore = Traits:: template make_device_unique(stream); + m_store16 = Traits::template make_device_unique(nHits * n16, stream); + m_store32 = Traits::template make_device_unique(nHits * n32 + 11, stream); + m_HistStore = Traits::template make_device_unique(stream); auto get16 = [&](int i) { return m_store16.get() + i * nHits; }; auto get32 = [&](int i) { return m_store32.get() + i * nHits; }; @@ -122,23 +121,25 @@ TrackingRecHit2DHeterogeneous::TrackingRecHit2DHeterogeneous(uint32_t nH view->m_zg = get32(6); view->m_rg = get32(7); - m_iphi = view->m_iphi = reinterpret_cast(get16(0)); + m_iphi = view->m_iphi = reinterpret_cast(get16(0)); - view->m_charge = reinterpret_cast(get32(8)); - view->m_xsize = reinterpret_cast(get16(2)); - view->m_ysize = reinterpret_cast(get16(3)); + view->m_charge = reinterpret_cast(get32(8)); + view->m_xsize = reinterpret_cast(get16(2)); + view->m_ysize = reinterpret_cast(get16(3)); view->m_detInd = get16(1); - m_hitsLayerStart = view->m_hitsLayerStart = reinterpret_cast(get32(n32)); + m_hitsLayerStart = view->m_hitsLayerStart = reinterpret_cast(get32(n32)); // transfer view - if + if #ifndef __CUDACC__ - constexpr + constexpr #endif - (std::is_same::value) { - cudautils::copyAsync(m_view, view, stream); - } else { m_view.reset(view.release());} + (std::is_same::value) { + cudautils::copyAsync(m_view, view, stream); + } else { + m_view.reset(view.release()); + } } using TrackingRecHit2DGPU = TrackingRecHit2DHeterogeneous; diff --git a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h index de647e022b20d..f648fa0904749 100644 --- a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h +++ b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h @@ -9,7 +9,6 @@ #include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h" #include "Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h" - namespace pixelCPEforGPU { struct ParamsOnGPU; } @@ -23,7 +22,8 @@ class TrackingRecHit2DSOAView { using AverageGeometry = phase1PixelTopology::AverageGeometry; - template friend class TrackingRecHit2DHeterogeneous; + template + friend class TrackingRecHit2DHeterogeneous; __device__ __forceinline__ uint32_t nHits() const { return m_nHits; } @@ -68,10 +68,9 @@ class TrackingRecHit2DSOAView { __device__ __forceinline__ Hist& phiBinner() { return *m_hist; } __device__ __forceinline__ Hist const& phiBinner() const { return *m_hist; } - __device__ __forceinline__ AverageGeometry & averageGeometry() { return *m_averageGeometry; } + __device__ __forceinline__ AverageGeometry& averageGeometry() { return *m_averageGeometry; } __device__ __forceinline__ AverageGeometry const& averageGeometry() const { return *m_averageGeometry; } - private: // local coord float *m_xl, *m_yl; @@ -88,7 +87,7 @@ class TrackingRecHit2DSOAView { uint16_t* m_detInd; // supporting objects - AverageGeometry * m_averageGeometry; // owned (corrected for beam spot: not sure where to host it otherwise) + AverageGeometry* m_averageGeometry; // owned (corrected for beam spot: not sure where to host it otherwise) pixelCPEforGPU::ParamsOnGPU const* m_cpeParams; // forwarded from setup, NOT owned uint32_t const* m_hitsModuleStart; // forwarded from clusters diff --git a/CalibTracker/Records/interface/SiPixelGainCalibrationForHLTGPURcd.h b/CalibTracker/Records/interface/SiPixelGainCalibrationForHLTGPURcd.h index afb682e5d451f..56301421f325c 100644 --- a/CalibTracker/Records/interface/SiPixelGainCalibrationForHLTGPURcd.h +++ b/CalibTracker/Records/interface/SiPixelGainCalibrationForHLTGPURcd.h @@ -9,6 +9,9 @@ #include "boost/mpl/vector.hpp" -class SiPixelGainCalibrationForHLTGPURcd : public edm::eventsetup::DependentRecordImplementation > {}; +class SiPixelGainCalibrationForHLTGPURcd + : public edm::eventsetup::DependentRecordImplementation< + SiPixelGainCalibrationForHLTGPURcd, + boost::mpl::vector > {}; #endif diff --git a/CalibTracker/SiPixelESProducers/plugins/SiPixelGainCalibrationForHLTGPUESProducer.cc b/CalibTracker/SiPixelESProducers/plugins/SiPixelGainCalibrationForHLTGPUESProducer.cc index 186bb2d72c3f3..92c2d996d9622 100644 --- a/CalibTracker/SiPixelESProducers/plugins/SiPixelGainCalibrationForHLTGPUESProducer.cc +++ b/CalibTracker/SiPixelESProducers/plugins/SiPixelGainCalibrationForHLTGPUESProducer.cc @@ -12,12 +12,13 @@ #include -class SiPixelGainCalibrationForHLTGPUESProducer: public edm::ESProducer { +class SiPixelGainCalibrationForHLTGPUESProducer : public edm::ESProducer { public: explicit SiPixelGainCalibrationForHLTGPUESProducer(const edm::ParameterSet& iConfig); std::unique_ptr produce(const SiPixelGainCalibrationForHLTGPURcd& iRecord); static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + private: }; @@ -30,7 +31,8 @@ void SiPixelGainCalibrationForHLTGPUESProducer::fillDescriptions(edm::Configurat descriptions.add("siPixelGainCalibrationForHLTGPU", desc); } -std::unique_ptr SiPixelGainCalibrationForHLTGPUESProducer::produce(const SiPixelGainCalibrationForHLTGPURcd& iRecord) { +std::unique_ptr SiPixelGainCalibrationForHLTGPUESProducer::produce( + const SiPixelGainCalibrationForHLTGPURcd& iRecord) { edm::ESHandle gains; iRecord.getRecord().get(gains); diff --git a/CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h b/CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h index 931ee7e65f295..8ce3924e54609 100644 --- a/CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h +++ b/CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h @@ -7,67 +7,57 @@ #include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" -struct SiPixelGainForHLTonGPU_DecodingStructure{ +struct SiPixelGainForHLTonGPU_DecodingStructure { uint8_t gain; uint8_t ped; }; - // copy of SiPixelGainCalibrationForHLT class SiPixelGainForHLTonGPU { - - public: - +public: using DecodingStructure = SiPixelGainForHLTonGPU_DecodingStructure; - - using Range = std::pair; - - - inline __host__ __device__ - std::pair getPedAndGain(uint32_t moduleInd, int col, int row, bool& isDeadColumn, bool& isNoisyColumn ) const { + using Range = std::pair; + inline __host__ __device__ std::pair getPedAndGain( + uint32_t moduleInd, int col, int row, bool& isDeadColumn, bool& isNoisyColumn) const { auto range = rangeAndCols[moduleInd].first; auto nCols = rangeAndCols[moduleInd].second; // determine what averaged data block we are in (there should be 1 or 2 of these depending on if plaquette is 1 by X or 2 by X - unsigned int lengthOfColumnData = (range.second-range.first)/nCols; - unsigned int lengthOfAveragedDataInEachColumn = 2; // we always only have two values per column averaged block + unsigned int lengthOfColumnData = (range.second - range.first) / nCols; + unsigned int lengthOfAveragedDataInEachColumn = 2; // we always only have two values per column averaged block unsigned int numberOfDataBlocksToSkip = row / numberOfRowsAveragedOver_; + auto offset = range.first + col * lengthOfColumnData + lengthOfAveragedDataInEachColumn * numberOfDataBlocksToSkip; - auto offset = range.first + col*lengthOfColumnData + lengthOfAveragedDataInEachColumn*numberOfDataBlocksToSkip; + assert(offset < range.second); + assert(offset < 3088384); + assert(0 == offset % 2); - assert(offset rangeAndCols[2000]; - float minPed_, maxPed_, minGain_, maxGain_; + float minPed_, maxPed_, minGain_, maxGain_; float pedPrecision, gainPrecision; - unsigned int numberOfRowsAveragedOver_; // this is 80!!!! + unsigned int numberOfRowsAveragedOver_; // this is 80!!!! unsigned int nBinsToUseForEncoding_; unsigned int deadFlag_; unsigned int noisyFlag_; }; -#endif // CondFormats_SiPixelObjects_SiPixelGainForHLTonGPU_h +#endif // CondFormats_SiPixelObjects_SiPixelGainForHLTonGPU_h diff --git a/DataFormats/SiPixelCluster/interface/SiPixelCluster.h b/DataFormats/SiPixelCluster/interface/SiPixelCluster.h index ba75447e945bb..5dfb8671c0a38 100644 --- a/DataFormats/SiPixelCluster/interface/SiPixelCluster.h +++ b/DataFormats/SiPixelCluster/interface/SiPixelCluster.h @@ -8,7 +8,7 @@ //! Class to contain and store all the topological information of pixel clusters: //! charge, global size, size and the barycenter in x and y //! local directions. It builds a vector of SiPixel (which is -//! an inner class) and a container of channels. +//! an inner class) and a container of channels. //! //! March 2007: Edge methods moved to RectangularPixelTopology class (V.Chiochia) //! Feb 2008: Modify the Pixel class from float to shorts @@ -27,160 +27,153 @@ class PixelDigi; class SiPixelCluster { public: - class Pixel { public: - constexpr Pixel() : x(0), y(0), adc(0){} // for root - constexpr Pixel(int pix_x, int pix_y, int pix_adc) : - x(pix_x), y(pix_y), adc(pix_adc) {} - uint16_t x; + constexpr Pixel() : x(0), y(0), adc(0) {} // for root + constexpr Pixel(int pix_x, int pix_y, int pix_adc) : x(pix_x), y(pix_y), adc(pix_adc) {} + uint16_t x; uint16_t y; uint16_t adc; }; - + //--- Integer shift in x and y directions. class Shift { public: - constexpr Shift( int dx, int dy) : dx_(dx), dy_(dy) {} + constexpr Shift(int dx, int dy) : dx_(dx), dy_(dy) {} constexpr Shift() : dx_(0), dy_(0) {} - constexpr int dx() const { return dx_;} - constexpr int dy() const { return dy_;} + constexpr int dx() const { return dx_; } + constexpr int dy() const { return dy_; } + private: int dx_; int dy_; }; - + //--- Position of a SiPixel class PixelPos { public: constexpr PixelPos() : row_(0), col_(0) {} - constexpr PixelPos(int row, int col) : row_(row) , col_(col) {} - constexpr int row() const { return row_;} - constexpr int col() const { return col_;} - constexpr PixelPos operator+( const Shift& shift) const { - return PixelPos( row() + shift.dx(), col() + shift.dy()); - } + constexpr PixelPos(int row, int col) : row_(row), col_(col) {} + constexpr int row() const { return row_; } + constexpr int col() const { return col_; } + constexpr PixelPos operator+(const Shift& shift) const { return PixelPos(row() + shift.dx(), col() + shift.dy()); } + private: int row_; int col_; }; - - typedef std::vector::const_iterator PixelDigiIter; - typedef std::pair PixelDigiRange; - - - static constexpr unsigned int MAXSPAN=255; - static constexpr unsigned int MAXPOS=2047; - + + typedef std::vector::const_iterator PixelDigiIter; + typedef std::pair PixelDigiRange; + + static constexpr unsigned int MAXSPAN = 255; + static constexpr unsigned int MAXPOS = 2047; + /** Construct from a range of digis that form a cluster and from * a DetID. The range is assumed to be non-empty. */ - + SiPixelCluster() {} - - SiPixelCluster(unsigned int isize, uint16_t const * adcs, - uint16_t const * xpos, uint16_t const * ypos, - uint16_t const xmin, uint16_t const ymin) : - thePixelOffset(2*isize), thePixelADC(adcs,adcs+isize) { + + SiPixelCluster(unsigned int isize, + uint16_t const* adcs, + uint16_t const* xpos, + uint16_t const* ypos, + uint16_t const xmin, + uint16_t const ymin) + : thePixelOffset(2 * isize), thePixelADC(adcs, adcs + isize) { uint16_t maxCol = 0; uint16_t maxRow = 0; - for (unsigned int i=0; i!=isize; ++i) { - uint16_t xoffset = xpos[i]-xmin; - uint16_t yoffset = ypos[i]-ymin; - thePixelOffset[i*2] = std::min(uint16_t(MAXSPAN),xoffset); - thePixelOffset[i*2+1] = std::min(uint16_t(MAXSPAN),yoffset); - if (xoffset > maxRow) maxRow = xoffset; - if (yoffset > maxCol) maxCol = yoffset; + for (unsigned int i = 0; i != isize; ++i) { + uint16_t xoffset = xpos[i] - xmin; + uint16_t yoffset = ypos[i] - ymin; + thePixelOffset[i * 2] = std::min(uint16_t(MAXSPAN), xoffset); + thePixelOffset[i * 2 + 1] = std::min(uint16_t(MAXSPAN), yoffset); + if (xoffset > maxRow) + maxRow = xoffset; + if (yoffset > maxCol) + maxCol = yoffset; } - packRow(xmin,maxRow); - packCol(ymin,maxCol); + packRow(xmin, maxRow); + packCol(ymin, maxCol); } - - + // obsolete (only for regression tests) - SiPixelCluster( const PixelPos& pix, int adc); - void add( const PixelPos& pix, int adc); - - // Analog linear average position (barycenter) + SiPixelCluster(const PixelPos& pix, int adc); + void add(const PixelPos& pix, int adc); + + // Analog linear average position (barycenter) float x() const { float qm = 0.0; int isize = thePixelADC.size(); - for (int i=0; i & pixelOffset() const { return thePixelOffset;} - const std::vector & pixelADC() const { return thePixelADC;} - + } // Return total cluster charge. + + inline int minPixelRow() const { return theMinPixelRow; } // The min x index. + inline int maxPixelRow() const { return minPixelRow() + rowSpan(); } // The max x index. + inline int minPixelCol() const { return theMinPixelCol; } // The min y index. + inline int maxPixelCol() const { return minPixelCol() + colSpan(); } // The max y index. + + const std::vector& pixelOffset() const { return thePixelOffset; } + const std::vector& pixelADC() const { return thePixelADC; } + // obsolete, use single pixel access below const std::vector pixels() const { std::vector oldPixVector; int isize = thePixelADC.size(); - oldPixVector.reserve(isize); - for(int i=0; i thePixelOffset; + std::vector thePixelOffset; std::vector thePixelADC; - - - uint16_t theMinPixelRow=MAXPOS; // Minimum pixel index in the x direction (low edge). - uint16_t theMinPixelCol=MAXPOS; // Minimum pixel index in the y direction (left edge). - uint8_t thePixelRowSpan=0; // Span pixel index in the x direction (low edge). - uint8_t thePixelColSpan=0; // Span pixel index in the y direction (left edge). - - uint16_t theOriginalClusterId=std::numeric_limits::max(); - - float err_x=-99999.9f; - float err_y=-99999.9f; - -}; + uint16_t theMinPixelRow = MAXPOS; // Minimum pixel index in the x direction (low edge). + uint16_t theMinPixelCol = MAXPOS; // Minimum pixel index in the y direction (left edge). + uint8_t thePixelRowSpan = 0; // Span pixel index in the x direction (low edge). + uint8_t thePixelColSpan = 0; // Span pixel index in the y direction (left edge). + + uint16_t theOriginalClusterId = std::numeric_limits::max(); + + float err_x = -99999.9f; + float err_y = -99999.9f; +}; // Comparison operators (needed by DetSetVector) -inline bool operator<( const SiPixelCluster& one, const SiPixelCluster& other) { - if ( one.minPixelRow() < other.minPixelRow() ) { +inline bool operator<(const SiPixelCluster& one, const SiPixelCluster& other) { + if (one.minPixelRow() < other.minPixelRow()) { return true; - } else if ( one.minPixelRow() > other.minPixelRow() ) { + } else if (one.minPixelRow() > other.minPixelRow()) { return false; - } else if ( one.minPixelCol() < other.minPixelCol() ) { + } else if (one.minPixelCol() < other.minPixelCol()) { return true; } else { return false; } } - #include "DataFormats/Common/interface/DetSetVector.h" #include "DataFormats/Common/interface/DetSetVectorNew.h" #include "DataFormats/Common/interface/Ref.h" @@ -246,4 +234,4 @@ typedef edm::RefProd SiPixelClusterRefProd; typedef edmNew::DetSetVector SiPixelClusterCollectionNew; typedef edm::Ref SiPixelClusterRefNew; -#endif +#endif diff --git a/DataFormats/SiPixelDigi/interface/SiPixelDigisSoA.h b/DataFormats/SiPixelDigi/interface/SiPixelDigisSoA.h index df249a3790cd2..50e863f03ff02 100644 --- a/DataFormats/SiPixelDigi/interface/SiPixelDigisSoA.h +++ b/DataFormats/SiPixelDigi/interface/SiPixelDigisSoA.h @@ -7,7 +7,8 @@ class SiPixelDigisSoA { public: SiPixelDigisSoA() = default; - explicit SiPixelDigisSoA(size_t nDigis, const uint32_t *pdigi, const uint32_t *rawIdArr, const uint16_t *adc, const int32_t *clus); + explicit SiPixelDigisSoA( + size_t nDigis, const uint32_t* pdigi, const uint32_t* rawIdArr, const uint16_t* adc, const int32_t* clus); ~SiPixelDigisSoA() = default; auto size() const { return pdigi_.size(); } @@ -16,12 +17,12 @@ class SiPixelDigisSoA { uint32_t rawIdArr(size_t i) const { return rawIdArr_[i]; } uint16_t adc(size_t i) const { return adc_[i]; } int32_t clus(size_t i) const { return clus_[i]; } - + const std::vector& pdigiVector() const { return pdigi_; } const std::vector& rawIdArrVector() const { return rawIdArr_; } const std::vector& adcVector() const { return adc_; } const std::vector& clusVector() const { return clus_; } - + private: std::vector pdigi_; std::vector rawIdArr_; diff --git a/DataFormats/SiPixelDigi/src/SiPixelDigisSoA.cc b/DataFormats/SiPixelDigi/src/SiPixelDigisSoA.cc index ebc8ba2055f78..992c98f450616 100644 --- a/DataFormats/SiPixelDigi/src/SiPixelDigisSoA.cc +++ b/DataFormats/SiPixelDigi/src/SiPixelDigisSoA.cc @@ -2,11 +2,11 @@ #include -SiPixelDigisSoA::SiPixelDigisSoA(size_t nDigis, const uint32_t *pdigi, const uint32_t *rawIdArr, const uint16_t *adc, const int32_t *clus): - pdigi_(pdigi, pdigi+nDigis), - rawIdArr_(rawIdArr, rawIdArr+nDigis), - adc_(adc, adc+nDigis), - clus_(clus, clus+nDigis) -{ +SiPixelDigisSoA::SiPixelDigisSoA( + size_t nDigis, const uint32_t *pdigi, const uint32_t *rawIdArr, const uint16_t *adc, const int32_t *clus) + : pdigi_(pdigi, pdigi + nDigis), + rawIdArr_(rawIdArr, rawIdArr + nDigis), + adc_(adc, adc + nDigis), + clus_(clus, clus + nDigis) { assert(pdigi_.size() == nDigis); } diff --git a/DataFormats/SiPixelDigi/src/classes.h b/DataFormats/SiPixelDigi/src/classes.h index 256ca41ad1867..4c15d6a06b20f 100644 --- a/DataFormats/SiPixelDigi/src/classes.h +++ b/DataFormats/SiPixelDigi/src/classes.h @@ -13,5 +13,4 @@ #include "boost/cstdint.hpp" #include - -#endif // SIPIXELDIGI_CLASSES_H +#endif // SIPIXELDIGI_CLASSES_H diff --git a/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsFromSoA.cc b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsFromSoA.cc index 9e998b92fc403..270598b0528b8 100644 --- a/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsFromSoA.cc +++ b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsFromSoA.cc @@ -20,7 +20,7 @@ #include -class SiPixelDigiErrorsFromSoA: public edm::stream::EDProducer<> { +class SiPixelDigiErrorsFromSoA : public edm::stream::EDProducer<> { public: explicit SiPixelDigiErrorsFromSoA(const edm::ParameterSet& iConfig); ~SiPixelDigiErrorsFromSoA() override = default; @@ -47,25 +47,26 @@ class SiPixelDigiErrorsFromSoA: public edm::stream::EDProducer<> { const bool usePhase1_; }; -SiPixelDigiErrorsFromSoA::SiPixelDigiErrorsFromSoA(const edm::ParameterSet& iConfig): - digiErrorSoAGetToken_{consumes(iConfig.getParameter("digiErrorSoASrc"))}, - errorPutToken_{produces>()}, - tkErrorPutToken_{produces()}, - userErrorPutToken_{produces("UserErrorModules")}, - disabledChannelPutToken_{produces>()}, - cablingMapLabel_(iConfig.getParameter("CablingMapLabel")), - tkerrorlist_(iConfig.getParameter>("ErrorList")), - usererrorlist_(iConfig.getParameter>("UserErrorList")), - usePhase1_(iConfig.getParameter ("UsePhase1")) -{} +SiPixelDigiErrorsFromSoA::SiPixelDigiErrorsFromSoA(const edm::ParameterSet& iConfig) + : digiErrorSoAGetToken_{consumes(iConfig.getParameter("digiErrorSoASrc"))}, + errorPutToken_{produces>()}, + tkErrorPutToken_{produces()}, + userErrorPutToken_{produces("UserErrorModules")}, + disabledChannelPutToken_{produces>()}, + cablingMapLabel_(iConfig.getParameter("CablingMapLabel")), + tkerrorlist_(iConfig.getParameter>("ErrorList")), + usererrorlist_(iConfig.getParameter>("UserErrorList")), + usePhase1_(iConfig.getParameter("UsePhase1")) {} void SiPixelDigiErrorsFromSoA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { edm::ParameterSetDescription desc; desc.add("digiErrorSoASrc", edm::InputTag("siPixelDigiErrorsSoA")); - desc.add("CablingMapLabel","")->setComment("CablingMap label"); - desc.add("UsePhase1",false)->setComment("## Use phase1"); - desc.add >("ErrorList", std::vector{29})->setComment("## ErrorList: list of error codes used by tracking to invalidate modules"); - desc.add >("UserErrorList", std::vector{40})->setComment("## UserErrorList: list of error codes used by Pixel experts for investigation"); + desc.add("CablingMapLabel", "")->setComment("CablingMap label"); + desc.add("UsePhase1", false)->setComment("## Use phase1"); + desc.add>("ErrorList", std::vector{29}) + ->setComment("## ErrorList: list of error codes used by tracking to invalidate modules"); + desc.add>("UserErrorList", std::vector{40}) + ->setComment("## UserErrorList: list of error codes used by Pixel experts for investigation"); descriptions.addWithDefaultLabel(desc); } @@ -78,21 +79,20 @@ void SiPixelDigiErrorsFromSoA::produce(edm::Event& iEvent, const edm::EventSetup edm::ESTransientHandle cablingMap; iSetup.get().get(cablingMapLabel_, cablingMap); cabling_ = cablingMap->cablingTree(); - LogDebug("map version:")<< cabling_->version(); + LogDebug("map version:") << cabling_->version(); } const auto& digiErrors = iEvent.get(digiErrorSoAGetToken_); - edm::DetSetVector errorcollection{}; DetIdCollection tkerror_detidcollection{}; DetIdCollection usererror_detidcollection{}; edmNew::DetSetVector disabled_channelcollection{}; - PixelDataFormatter formatter(cabling_.get(), usePhase1_); // for phase 1 & 0 - const PixelDataFormatter::Errors *formatterErrors = digiErrors.formatterErrors(); + PixelDataFormatter formatter(cabling_.get(), usePhase1_); // for phase 1 & 0 + const PixelDataFormatter::Errors* formatterErrors = digiErrors.formatterErrors(); assert(formatterErrors != nullptr); - auto errors = *formatterErrors; // make a copy + auto errors = *formatterErrors; // make a copy PixelDataFormatter::DetErrors nodeterrors; auto size = digiErrors.size(); @@ -107,12 +107,10 @@ void SiPixelDigiErrorsFromSoA::produce(edm::Event& iEvent, const edm::EventSetup constexpr uint32_t dummydetid = 0xffffffff; typedef PixelDataFormatter::Errors::iterator IE; for (IE is = errors.begin(); is != errors.end(); is++) { - uint32_t errordetid = is->first; - if (errordetid == dummydetid) {// errors given dummy detId must be sorted by Fed - nodeterrors.insert( nodeterrors.end(), errors[errordetid].begin(), errors[errordetid].end() ); - } - else { + if (errordetid == dummydetid) { // errors given dummy detId must be sorted by Fed + nodeterrors.insert(nodeterrors.end(), errors[errordetid].begin(), errors[errordetid].end()); + } else { edm::DetSet& errorDetSet = errorcollection.find_or_insert(errordetid); errorDetSet.data.insert(errorDetSet.data.end(), is->second.begin(), is->second.end()); // Fill detid of the detectors where there is error AND the error number is listed @@ -136,15 +134,17 @@ void SiPixelDigiErrorsFromSoA::produce(edm::Event& iEvent, const edm::EventSetup // in contrast, the ROC-in-channel numbering is determined by hardware --> better to use the "offline" scheme PixelFEDChannel ch = {fed->id(), linkId, 25, 0}; for (unsigned int iRoc = 1; iRoc <= link->numberOfROCs(); iRoc++) { - const sipixelobjects::PixelROC * roc = link->roc(iRoc); - if (roc->idInDetUnit() < ch.roc_first) ch.roc_first = roc->idInDetUnit(); - if (roc->idInDetUnit() > ch.roc_last) ch.roc_last = roc->idInDetUnit(); + const sipixelobjects::PixelROC* roc = link->roc(iRoc); + if (roc->idInDetUnit() < ch.roc_first) + ch.roc_first = roc->idInDetUnit(); + if (roc->idInDetUnit() > ch.roc_last) + ch.roc_last = roc->idInDetUnit(); } - if (ch.roc_first& errorDetSet = errorcollection.find_or_insert(dummydetid); errorDetSet.data = nodeterrors; diff --git a/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsSoAFromCUDA.cc b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsSoAFromCUDA.cc index b44abfe2ba758..ad6c46082be8b 100644 --- a/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsSoAFromCUDA.cc +++ b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsSoAFromCUDA.cc @@ -11,7 +11,7 @@ #include "HeterogeneousCore/CUDACore/interface/CUDAScopedContext.h" #include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" -class SiPixelDigiErrorsSoAFromCUDA: public edm::stream::EDProducer { +class SiPixelDigiErrorsSoAFromCUDA : public edm::stream::EDProducer { public: explicit SiPixelDigiErrorsSoAFromCUDA(const edm::ParameterSet& iConfig); ~SiPixelDigiErrorsSoAFromCUDA() override = default; @@ -19,7 +19,9 @@ class SiPixelDigiErrorsSoAFromCUDA: public edm::stream::EDProducer> digiErrorGetToken_; @@ -27,13 +29,12 @@ class SiPixelDigiErrorsSoAFromCUDA: public edm::stream::EDProducer data_; GPU::SimpleVector error_; - const PixelFormatterErrors *formatterErrors_ = nullptr; + const PixelFormatterErrors* formatterErrors_ = nullptr; }; -SiPixelDigiErrorsSoAFromCUDA::SiPixelDigiErrorsSoAFromCUDA(const edm::ParameterSet& iConfig): - digiErrorGetToken_(consumes>(iConfig.getParameter("src"))), - digiErrorPutToken_(produces()) -{} +SiPixelDigiErrorsSoAFromCUDA::SiPixelDigiErrorsSoAFromCUDA(const edm::ParameterSet& iConfig) + : digiErrorGetToken_(consumes>(iConfig.getParameter("src"))), + digiErrorPutToken_(produces()) {} void SiPixelDigiErrorsSoAFromCUDA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { edm::ParameterSetDescription desc; @@ -41,7 +42,9 @@ void SiPixelDigiErrorsSoAFromCUDA::fillDescriptions(edm::ConfigurationDescriptio descriptions.addWithDefaultLabel(desc); } -void SiPixelDigiErrorsSoAFromCUDA::acquire(const edm::Event& iEvent, const edm::EventSetup& iSetup, edm::WaitingTaskWithArenaHolder waitingTaskHolder) { +void SiPixelDigiErrorsSoAFromCUDA::acquire(const edm::Event& iEvent, + const edm::EventSetup& iSetup, + edm::WaitingTaskWithArenaHolder waitingTaskHolder) { // Do the transfer in a CUDA stream parallel to the computation CUDA stream CUDAScopedContextAcquire ctx{iEvent.streamID(), std::move(waitingTaskHolder)}; diff --git a/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigisSoAFromCUDA.cc b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigisSoAFromCUDA.cc index 4e71864daa7fe..7794032154e98 100644 --- a/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigisSoAFromCUDA.cc +++ b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigisSoAFromCUDA.cc @@ -11,8 +11,7 @@ #include "HeterogeneousCore/CUDACore/interface/CUDAScopedContext.h" #include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" - -class SiPixelDigisSoAFromCUDA: public edm::stream::EDProducer { +class SiPixelDigisSoAFromCUDA : public edm::stream::EDProducer { public: explicit SiPixelDigisSoAFromCUDA(const edm::ParameterSet& iConfig); ~SiPixelDigisSoAFromCUDA() override = default; @@ -20,7 +19,9 @@ class SiPixelDigisSoAFromCUDA: public edm::stream::EDProducer static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); private: - void acquire(const edm::Event& iEvent, const edm::EventSetup& iSetup, edm::WaitingTaskWithArenaHolder waitingTaskHolder) override; + void acquire(const edm::Event& iEvent, + const edm::EventSetup& iSetup, + edm::WaitingTaskWithArenaHolder waitingTaskHolder) override; void produce(edm::Event& iEvent, const edm::EventSetup& iSetup) override; edm::EDGetTokenT> digiGetToken_; @@ -29,15 +30,14 @@ class SiPixelDigisSoAFromCUDA: public edm::stream::EDProducer cudautils::host::unique_ptr pdigi_; cudautils::host::unique_ptr rawIdArr_; cudautils::host::unique_ptr adc_; - cudautils::host::unique_ptr< int32_t[]> clus_; + cudautils::host::unique_ptr clus_; int nDigis_; }; -SiPixelDigisSoAFromCUDA::SiPixelDigisSoAFromCUDA(const edm::ParameterSet& iConfig): - digiGetToken_(consumes>(iConfig.getParameter("src"))), - digiPutToken_(produces()) -{} +SiPixelDigisSoAFromCUDA::SiPixelDigisSoAFromCUDA(const edm::ParameterSet& iConfig) + : digiGetToken_(consumes>(iConfig.getParameter("src"))), + digiPutToken_(produces()) {} void SiPixelDigisSoAFromCUDA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { edm::ParameterSetDescription desc; @@ -45,7 +45,9 @@ void SiPixelDigisSoAFromCUDA::fillDescriptions(edm::ConfigurationDescriptions& d descriptions.addWithDefaultLabel(desc); } -void SiPixelDigisSoAFromCUDA::acquire(const edm::Event& iEvent, const edm::EventSetup& iSetup, edm::WaitingTaskWithArenaHolder waitingTaskHolder) { +void SiPixelDigisSoAFromCUDA::acquire(const edm::Event& iEvent, + const edm::EventSetup& iSetup, + edm::WaitingTaskWithArenaHolder waitingTaskHolder) { // Do the transfer in a CUDA stream parallel to the computation CUDA stream CUDAScopedContextAcquire ctx{iEvent.streamID(), std::move(waitingTaskHolder)}; diff --git a/Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h b/Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h index c36033a8554d8..409ebec3cb43f 100644 --- a/Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h +++ b/Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h @@ -6,76 +6,92 @@ namespace phase1PixelTopology { - constexpr uint16_t numRowsInRoc = 80; - constexpr uint16_t numColsInRoc = 52; - constexpr uint16_t lastRowInRoc = numRowsInRoc - 1; - constexpr uint16_t lastColInRoc = numColsInRoc - 1; + constexpr uint16_t numRowsInRoc = 80; + constexpr uint16_t numColsInRoc = 52; + constexpr uint16_t lastRowInRoc = numRowsInRoc - 1; + constexpr uint16_t lastColInRoc = numColsInRoc - 1; - constexpr uint16_t numRowsInModule = 2 * numRowsInRoc; - constexpr uint16_t numColsInModule = 8 * numColsInRoc; - constexpr uint16_t lastRowInModule = numRowsInModule - 1; - constexpr uint16_t lastColInModule = numColsInModule - 1; + constexpr uint16_t numRowsInModule = 2 * numRowsInRoc; + constexpr uint16_t numColsInModule = 8 * numColsInRoc; + constexpr uint16_t lastRowInModule = numRowsInModule - 1; + constexpr uint16_t lastColInModule = numColsInModule - 1; constexpr int16_t xOffset = -81; - constexpr int16_t yOffset = -54*4; + constexpr int16_t yOffset = -54 * 4; - constexpr uint32_t numPixsInModule = uint32_t(numRowsInModule)* uint32_t(numColsInModule); + constexpr uint32_t numPixsInModule = uint32_t(numRowsInModule) * uint32_t(numColsInModule); constexpr uint32_t numberOfModules = 1856; constexpr uint32_t numberOfLayers = 10; - constexpr uint32_t layerStart[numberOfLayers + 1] = { - 0, 96, 320, 672, // barrel - 1184, 1296, 1408, // positive endcap - 1520, 1632, 1744, // negative endcap - numberOfModules - }; - constexpr char const * layerName[numberOfLayers] = { - "BL1", "BL2", "BL3", "BL4", // barrel - "E+1", "E+2", "E+3", // positive endcap - "E-1", "E-2", "E-3" // negative endcap + constexpr uint32_t layerStart[numberOfLayers + 1] = {0, + 96, + 320, + 672, // barrel + 1184, + 1296, + 1408, // positive endcap + 1520, + 1632, + 1744, // negative endcap + numberOfModules}; + constexpr char const* layerName[numberOfLayers] = { + "BL1", + "BL2", + "BL3", + "BL4", // barrel + "E+1", + "E+2", + "E+3", // positive endcap + "E-1", + "E-2", + "E-3" // negative endcap }; constexpr uint32_t numberOfModulesInBarrel = 1184; - constexpr uint32_t numberOfLaddersInBarrel = numberOfModulesInBarrel/8; + constexpr uint32_t numberOfLaddersInBarrel = numberOfModulesInBarrel / 8; - template + template constexpr auto map_to_array_helper(Function f, std::index_sequence) - -> std::array::type, sizeof...(Indices)> - { - return {{ f(Indices)... }}; + -> std::array::type, sizeof...(Indices)> { + return {{f(Indices)...}}; } - template - constexpr auto map_to_array(Function f) - -> std::array::type, N> - { + template + constexpr auto map_to_array(Function f) -> std::array::type, N> { return map_to_array_helper(f, std::make_index_sequence{}); } - constexpr uint32_t findMaxModuleStride() { bool go = true; - int n=2; + int n = 2; while (go) { - for (uint8_t i=1; i<11; ++i) { - if (layerStart[i]%n !=0) {go=false; break;} + for (uint8_t i = 1; i < 11; ++i) { + if (layerStart[i] % n != 0) { + go = false; + break; + } } - if(!go) break; - n*=2; + if (!go) + break; + n *= 2; } - return n/2; + return n / 2; } constexpr uint32_t maxModuleStride = findMaxModuleStride(); constexpr uint8_t findLayer(uint32_t detId) { - for (uint8_t i=0; i<11; ++i) if (detId layer = map_to_array(findLayerFromCompact); constexpr bool validateLayerIndex() { - bool res=true; - for (auto i=0U; i=layerStart[layer[j]]); - res &=(i= layerStart[layer[j]]); + res &= (i < layerStart[layer[j] + 1]); } return res; } @@ -96,56 +112,49 @@ namespace phase1PixelTopology { static_assert(validateLayerIndex(), "layer from detIndex algo is buggy"); // this is for the ROC n<512 (upgrade 1024) - constexpr inline - uint16_t divu52(uint16_t n) { - n = n>>2; - uint16_t q = (n>>1) + (n>>4); - q = q + (q>>4) + (q>>5); q = q >> 3; - uint16_t r = n - q*13; + constexpr inline uint16_t divu52(uint16_t n) { + n = n >> 2; + uint16_t q = (n >> 1) + (n >> 4); + q = q + (q >> 4) + (q >> 5); + q = q >> 3; + uint16_t r = n - q * 13; return q + ((r + 3) >> 4); } - constexpr inline - bool isEdgeX(uint16_t px) { return (px==0) | (px==lastRowInModule); } + constexpr inline bool isEdgeX(uint16_t px) { return (px == 0) | (px == lastRowInModule); } - constexpr inline - bool isEdgeY(uint16_t py) { return (py==0) | (py==lastColInModule); } + constexpr inline bool isEdgeY(uint16_t py) { return (py == 0) | (py == lastColInModule); } - constexpr inline - uint16_t toRocX(uint16_t px) { return (pxlastRowInRoc) shift+=1; - if (px>numRowsInRoc) shift+=1; - return px+shift; + if (px > lastRowInRoc) + shift += 1; + if (px > numRowsInRoc) + shift += 1; + return px + shift; } - constexpr inline - uint16_t localY(uint16_t py) { + constexpr inline uint16_t localY(uint16_t py) { auto roc = divu52(py); - auto shift = 2*roc; - auto yInRoc = py - 52*roc; - if (yInRoc>0) shift+=1; - return py+shift; + auto shift = 2 * roc; + auto yInRoc = py - 52 * roc; + if (yInRoc > 0) + shift += 1; + return py + shift; } //FIXME move it elsewhere? @@ -160,6 +169,6 @@ namespace phase1PixelTopology { float endCapZ[2]; // just for pos and neg Layer1 }; -} +} // namespace phase1PixelTopology -#endif // Geometry_TrackerGeometryBuilder_phase1PixelTopology_h +#endif // Geometry_TrackerGeometryBuilder_phase1PixelTopology_h diff --git a/Geometry/TrackerGeometryBuilder/test/phase1PixelTopology_t.cpp b/Geometry/TrackerGeometryBuilder/test/phase1PixelTopology_t.cpp index 5c37dad30d73e..8dfae57b685b4 100644 --- a/Geometry/TrackerGeometryBuilder/test/phase1PixelTopology_t.cpp +++ b/Geometry/TrackerGeometryBuilder/test/phase1PixelTopology_t.cpp @@ -8,141 +8,144 @@ namespace { // original code from CMSSW_4_4 - std::tuple localXori(int mpx) { - const float m_pitchx=1.f; - int binoffx = int(mpx); // truncate to int - float local_pitchx = m_pitchx; // defaultpitch - - if (binoffx>80) { // ROC 1 - handles x on edge cluster - binoffx=binoffx+2; - } else if (binoffx==80) { // ROC 1 - binoffx=binoffx+1; + std::tuple localXori(int mpx) { + const float m_pitchx = 1.f; + int binoffx = int(mpx); // truncate to int + float local_pitchx = m_pitchx; // defaultpitch + + if (binoffx > 80) { // ROC 1 - handles x on edge cluster + binoffx = binoffx + 2; + } else if (binoffx == 80) { // ROC 1 + binoffx = binoffx + 1; local_pitchx = 2 * m_pitchx; - } else if (binoffx==79) { // ROC 0 - binoffx=binoffx+0; + } else if (binoffx == 79) { // ROC 0 + binoffx = binoffx + 0; local_pitchx = 2 * m_pitchx; - } else if (binoffx>=0) { // ROC 0 - binoffx=binoffx+0; + } else if (binoffx >= 0) { // ROC 0 + binoffx = binoffx + 0; - } else { // too small - assert("binoffx too small"==0); + } else { // too small + assert("binoffx too small" == 0); } - return std::make_tuple(binoffx,local_pitchx>m_pitchx); + return std::make_tuple(binoffx, local_pitchx > m_pitchx); } - std::tuple localYori(int mpy) { - const float m_pitchy=1.f; - int binoffy = int(mpy); // truncate to int - float local_pitchy = m_pitchy; // defaultpitch + std::tuple localYori(int mpy) { + const float m_pitchy = 1.f; + int binoffy = int(mpy); // truncate to int + float local_pitchy = m_pitchy; // defaultpitch - if (binoffy>416) { // ROC 8, not real ROC - binoffy=binoffy+17; - } else if (binoffy==416) { // ROC 8 - binoffy=binoffy+16; + if (binoffy > 416) { // ROC 8, not real ROC + binoffy = binoffy + 17; + } else if (binoffy == 416) { // ROC 8 + binoffy = binoffy + 16; local_pitchy = 2 * m_pitchy; - } else if (binoffy==415) { // ROC 7, last big pixel - binoffy=binoffy+15; + } else if (binoffy == 415) { // ROC 7, last big pixel + binoffy = binoffy + 15; local_pitchy = 2 * m_pitchy; - } else if (binoffy>364) { // ROC 7 - binoffy=binoffy+15; - } else if (binoffy==364) { // ROC 7 - binoffy=binoffy+14; + } else if (binoffy > 364) { // ROC 7 + binoffy = binoffy + 15; + } else if (binoffy == 364) { // ROC 7 + binoffy = binoffy + 14; local_pitchy = 2 * m_pitchy; - } else if (binoffy==363) { // ROC 6 - binoffy=binoffy+13; + } else if (binoffy == 363) { // ROC 6 + binoffy = binoffy + 13; local_pitchy = 2 * m_pitchy; - } else if (binoffy>312) { // ROC 6 - binoffy=binoffy+13; - } else if (binoffy==312) { // ROC 6 - binoffy=binoffy+12; + } else if (binoffy > 312) { // ROC 6 + binoffy = binoffy + 13; + } else if (binoffy == 312) { // ROC 6 + binoffy = binoffy + 12; local_pitchy = 2 * m_pitchy; - } else if (binoffy==311) { // ROC 5 - binoffy=binoffy+11; + } else if (binoffy == 311) { // ROC 5 + binoffy = binoffy + 11; local_pitchy = 2 * m_pitchy; - } else if (binoffy>260) { // ROC 5 - binoffy=binoffy+11; - } else if (binoffy==260) { // ROC 5 - binoffy=binoffy+10; + } else if (binoffy > 260) { // ROC 5 + binoffy = binoffy + 11; + } else if (binoffy == 260) { // ROC 5 + binoffy = binoffy + 10; local_pitchy = 2 * m_pitchy; - } else if (binoffy==259) { // ROC 4 - binoffy=binoffy+9; + } else if (binoffy == 259) { // ROC 4 + binoffy = binoffy + 9; local_pitchy = 2 * m_pitchy; - } else if (binoffy>208) { // ROC 4 - binoffy=binoffy+9; - } else if (binoffy==208) { // ROC 4 - binoffy=binoffy+8; + } else if (binoffy > 208) { // ROC 4 + binoffy = binoffy + 9; + } else if (binoffy == 208) { // ROC 4 + binoffy = binoffy + 8; local_pitchy = 2 * m_pitchy; - } else if (binoffy==207) { // ROC 3 - binoffy=binoffy+7; + } else if (binoffy == 207) { // ROC 3 + binoffy = binoffy + 7; local_pitchy = 2 * m_pitchy; - } else if (binoffy>156) { // ROC 3 - binoffy=binoffy+7; - } else if (binoffy==156) { // ROC 3 - binoffy=binoffy+6; + } else if (binoffy > 156) { // ROC 3 + binoffy = binoffy + 7; + } else if (binoffy == 156) { // ROC 3 + binoffy = binoffy + 6; local_pitchy = 2 * m_pitchy; - } else if (binoffy==155) { // ROC 2 - binoffy=binoffy+5; + } else if (binoffy == 155) { // ROC 2 + binoffy = binoffy + 5; local_pitchy = 2 * m_pitchy; - } else if (binoffy>104) { // ROC 2 - binoffy=binoffy+5; - } else if (binoffy==104) { // ROC 2 - binoffy=binoffy+4; + } else if (binoffy > 104) { // ROC 2 + binoffy = binoffy + 5; + } else if (binoffy == 104) { // ROC 2 + binoffy = binoffy + 4; local_pitchy = 2 * m_pitchy; - } else if (binoffy==103) { // ROC 1 - binoffy=binoffy+3; + } else if (binoffy == 103) { // ROC 1 + binoffy = binoffy + 3; local_pitchy = 2 * m_pitchy; - } else if (binoffy>52) { // ROC 1 - binoffy=binoffy+3; - } else if (binoffy==52) { // ROC 1 - binoffy=binoffy+2; + } else if (binoffy > 52) { // ROC 1 + binoffy = binoffy + 3; + } else if (binoffy == 52) { // ROC 1 + binoffy = binoffy + 2; local_pitchy = 2 * m_pitchy; - } else if (binoffy==51) { // ROC 0 - binoffy=binoffy+1; + } else if (binoffy == 51) { // ROC 0 + binoffy = binoffy + 1; local_pitchy = 2 * m_pitchy; - } else if (binoffy>0) { // ROC 0 - binoffy=binoffy+1; - } else if (binoffy==0) { // ROC 0 - binoffy=binoffy+0; + } else if (binoffy > 0) { // ROC 0 + binoffy = binoffy + 1; + } else if (binoffy == 0) { // ROC 0 + binoffy = binoffy + 0; local_pitchy = 2 * m_pitchy; } else { - assert("binoffy too small"==0); + assert("binoffy too small" == 0); } - return std::make_tuple(binoffy,local_pitchy>m_pitchy); + return std::make_tuple(binoffy, local_pitchy > m_pitchy); } -} +} // namespace int main() { - - for (uint16_t ix=0; ix<80*2; ++ix) { + for (uint16_t ix = 0; ix < 80 * 2; ++ix) { auto ori = localXori(ix); auto xl = phase1PixelTopology::localX(ix); auto bp = phase1PixelTopology::isBigPixX(ix); - if (std::get<0>(ori)!=xl) std::cout << "Error " << std::get<0>(ori) << "!=" << xl << std::endl; - assert(std::get<1>(ori)==bp); + if (std::get<0>(ori) != xl) + std::cout << "Error " << std::get<0>(ori) << "!=" << xl << std::endl; + assert(std::get<1>(ori) == bp); } - for (uint16_t iy=0; iy<52*8; ++iy) { + for (uint16_t iy = 0; iy < 52 * 8; ++iy) { auto ori = localYori(iy); auto yl = phase1PixelTopology::localY(iy); auto bp = phase1PixelTopology::isBigPixY(iy); - if (std::get<0>(ori)!=yl) std::cout << "Error " << std::get<0>(ori) << "!=" << yl << std::endl; - assert(std::get<1>(ori)==bp); + if (std::get<0>(ori) != yl) + std::cout << "Error " << std::get<0>(ori) << "!=" << yl << std::endl; + assert(std::get<1>(ori) == bp); } for (auto i = 0U; i < phase1PixelTopology::numberOfLayers; ++i) { - std::cout << "layer " << i << ", \"" << phase1PixelTopology::layerName[i] << "\", [" << phase1PixelTopology::layerStart[i] << ", " << phase1PixelTopology::layerStart[i+1] << ")" << std::endl; + std::cout << "layer " << i << ", \"" << phase1PixelTopology::layerName[i] << "\", [" + << phase1PixelTopology::layerStart[i] << ", " << phase1PixelTopology::layerStart[i + 1] << ")" + << std::endl; } for (auto i = 0U; i < phase1PixelTopology::numberOfModules; ++i) { @@ -150,7 +153,7 @@ int main() { //std::cout << "module " << i << ": " << "layer " << layer << ", \"" << phase1PixelTopology::layerName[layer] << "\", [" << phase1PixelTopology::layerStart[layer] << ", " << phase1PixelTopology::layerStart[layer+1] << ")" << std::endl; assert(layer < 10); assert(i >= phase1PixelTopology::layerStart[layer]); - assert(i < phase1PixelTopology::layerStart[layer+1]); + assert(i < phase1PixelTopology::layerStart[layer + 1]); } return 0; diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelClusterProducer.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelClusterProducer.cc index 83fcfbf8f9027..8783d13354241 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelClusterProducer.cc +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelClusterProducer.cc @@ -43,36 +43,32 @@ // MessageLogger #include "FWCore/MessageLogger/interface/MessageLogger.h" +//--------------------------------------------------------------------------- +//! Constructor: set the ParameterSet and defer all thinking to setupClusterizer(). +//--------------------------------------------------------------------------- +SiPixelClusterProducer::SiPixelClusterProducer(edm::ParameterSet const& conf) + : tPutPixelClusters(produces()), + clusterMode_(conf.getParameter("ClusterMode")), + maxTotalClusters_(conf.getParameter("maxNumberOfClusters")) { + if (clusterMode_ == "PixelThresholdReclusterizer") + tPixelClusters = consumes(conf.getParameter("src")); + else + tPixelDigi = consumes>(conf.getParameter("src")); + + const auto& payloadType = conf.getParameter("payloadType"); + if (payloadType == "HLT") + theSiPixelGainCalibration_ = std::make_unique(conf); + else if (payloadType == "Offline") + theSiPixelGainCalibration_ = std::make_unique(conf); + else if (payloadType == "Full") + theSiPixelGainCalibration_ = std::make_unique(conf); + + //--- Make the algorithm(s) according to what the user specified + //--- in the ParameterSet. + setupClusterizer(conf); +} - //--------------------------------------------------------------------------- - //! Constructor: set the ParameterSet and defer all thinking to setupClusterizer(). - //--------------------------------------------------------------------------- - SiPixelClusterProducer::SiPixelClusterProducer(edm::ParameterSet const& conf) - : - tPutPixelClusters(produces()), - clusterMode_( conf.getParameter("ClusterMode") ), - maxTotalClusters_( conf.getParameter( "maxNumberOfClusters" ) ) - { - if ( clusterMode_ == "PixelThresholdReclusterizer" ) - tPixelClusters = consumes( conf.getParameter("src") ); - else - tPixelDigi = consumes>( conf.getParameter("src") ); - - const auto& payloadType = conf.getParameter( "payloadType" ); - if (payloadType == "HLT") - theSiPixelGainCalibration_ = std::make_unique(conf); - else if (payloadType == "Offline") - theSiPixelGainCalibration_ = std::make_unique(conf); - else if (payloadType == "Full") - theSiPixelGainCalibration_ = std::make_unique(conf); - - //--- Make the algorithm(s) according to what the user specified - //--- in the ParameterSet. - setupClusterizer(conf); - - } - - // Destructor +// Destructor SiPixelClusterProducer::~SiPixelClusterProducer() = default; void SiPixelClusterProducer::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { @@ -81,146 +77,137 @@ void SiPixelClusterProducer::fillDescriptions(edm::ConfigurationDescriptions& de desc.add("src", edm::InputTag("siPixelDigis")); desc.add("ClusterMode", "PixelThresholdClusterizer"); desc.add("maxNumberOfClusters", -1)->setComment("-1 means no limit"); - desc.add("payloadType", "Offline")->setComment("Options: HLT - column granularity, Offline - gain:col/ped:pix"); + desc.add("payloadType", "Offline") + ->setComment("Options: HLT - column granularity, Offline - gain:col/ped:pix"); PixelThresholdClusterizer::fillPSetDescription(desc); - SiPixelGainCalibrationServiceBase::fillPSetDescription(desc); // no-op, but in principle the structures are there... + SiPixelGainCalibrationServiceBase::fillPSetDescription(desc); // no-op, but in principle the structures are there... descriptions.add("SiPixelClusterizerDefault", desc); } - - //--------------------------------------------------------------------------- - //! The "Event" entrypoint: gets called by framework for every event - //--------------------------------------------------------------------------- - void SiPixelClusterProducer::produce(edm::Event& e, const edm::EventSetup& es) - { - - //Setup gain calibration service - theSiPixelGainCalibration_->setESObjects( es ); - - // Step A.1: get input data - edm::Handle< SiPixelClusterCollectionNew > inputClusters; - edm::Handle< edm::DetSetVector > inputDigi; - if ( clusterMode_ == "PixelThresholdReclusterizer" ) - e.getByToken(tPixelClusters, inputClusters); - else - e.getByToken(tPixelDigi, inputDigi); - - // Step A.2: get event setup - edm::ESHandle geom; - es.get().get( geom ); - - edm::ESHandle trackerTopologyHandle; - es.get().get(trackerTopologyHandle); - tTopo_ = trackerTopologyHandle.product(); - - // Step B: create the final output collection - auto output = std::make_unique< SiPixelClusterCollectionNew>(); - //FIXME: put a reserve() here - - // Step C: Iterate over DetIds and invoke the pixel clusterizer algorithm - // on each DetUnit - if ( clusterMode_ == "PixelThresholdReclusterizer" ) - run(*inputClusters, geom, *output ); - else - run(*inputDigi, geom, *output ); - - // Step D: write output to file - output->shrink_to_fit(); - - // set sequential identifier (this is a const interface, but we need to set it after the sorting) - for (auto DSViter = output->begin(); DSViter != output->end(); DSViter++) { - uint16_t id=0; - for (auto & clust : *DSViter) { - const_cast(clust).setOriginalId(id++); - } - } - e.put(tPutPixelClusters, std::move(output)); - - } - - //--------------------------------------------------------------------------- - //! Set up the specific algorithm we are going to use. - //! TO DO: in the future, we should allow for a different algorithm for - //! each detector subset (e.g. barrel vs forward, per layer, etc). - //--------------------------------------------------------------------------- - void SiPixelClusterProducer::setupClusterizer(const edm::ParameterSet& conf) { - - if ( clusterMode_ == "PixelThresholdReclusterizer" || clusterMode_ == "PixelThresholdClusterizer" ) { - clusterizer_ = std::make_unique(conf); - clusterizer_->setSiPixelGainCalibrationService(theSiPixelGainCalibration_.get()); - } - else { - throw cms::Exception("Configuration") << "[SiPixelClusterProducer]:" - <<" choice " << clusterMode_ << " is invalid.\n" - << "Possible choices:\n" - << " PixelThresholdClusterizer"; +//--------------------------------------------------------------------------- +//! The "Event" entrypoint: gets called by framework for every event +//--------------------------------------------------------------------------- +void SiPixelClusterProducer::produce(edm::Event& e, const edm::EventSetup& es) { + //Setup gain calibration service + theSiPixelGainCalibration_->setESObjects(es); + + // Step A.1: get input data + edm::Handle inputClusters; + edm::Handle> inputDigi; + if (clusterMode_ == "PixelThresholdReclusterizer") + e.getByToken(tPixelClusters, inputClusters); + else + e.getByToken(tPixelDigi, inputDigi); + + // Step A.2: get event setup + edm::ESHandle geom; + es.get().get(geom); + + edm::ESHandle trackerTopologyHandle; + es.get().get(trackerTopologyHandle); + tTopo_ = trackerTopologyHandle.product(); + + // Step B: create the final output collection + auto output = std::make_unique(); + //FIXME: put a reserve() here + + // Step C: Iterate over DetIds and invoke the pixel clusterizer algorithm + // on each DetUnit + if (clusterMode_ == "PixelThresholdReclusterizer") + run(*inputClusters, geom, *output); + else + run(*inputDigi, geom, *output); + + // Step D: write output to file + output->shrink_to_fit(); + + // set sequential identifier (this is a const interface, but we need to set it after the sorting) + for (auto DSViter = output->begin(); DSViter != output->end(); DSViter++) { + uint16_t id = 0; + for (auto& clust : *DSViter) { + const_cast(clust).setOriginalId(id++); } } + e.put(tPutPixelClusters, std::move(output)); +} +//--------------------------------------------------------------------------- +//! Set up the specific algorithm we are going to use. +//! TO DO: in the future, we should allow for a different algorithm for +//! each detector subset (e.g. barrel vs forward, per layer, etc). +//--------------------------------------------------------------------------- +void SiPixelClusterProducer::setupClusterizer(const edm::ParameterSet& conf) { + if (clusterMode_ == "PixelThresholdReclusterizer" || clusterMode_ == "PixelThresholdClusterizer") { + clusterizer_ = std::make_unique(conf); + clusterizer_->setSiPixelGainCalibrationService(theSiPixelGainCalibration_.get()); + } else { + throw cms::Exception("Configuration") << "[SiPixelClusterProducer]:" + << " choice " << clusterMode_ << " is invalid.\n" + << "Possible choices:\n" + << " PixelThresholdClusterizer"; + } +} - //--------------------------------------------------------------------------- - //! Iterate over DetUnits, and invoke the PixelClusterizer on each. - //--------------------------------------------------------------------------- - template - void SiPixelClusterProducer::run(const T & input, - const edm::ESHandle & geom, - edmNew::DetSetVector & output) { - int numberOfDetUnits = 0; - int numberOfClusters = 0; - - // Iterate on detector units - typename T::const_iterator DSViter = input.begin(); - for( ; DSViter != input.end(); DSViter++) { - ++numberOfDetUnits; - - // LogDebug takes very long time, get rid off. - //LogDebug("SiStripClusterizer") << "[SiPixelClusterProducer::run] DetID" << DSViter->id; - - std::vector badChannels; - DetId detIdObject(DSViter->detId()); - - // Comment: At the moment the clusterizer depends on geometry - // to access information as the pixel topology (number of columns - // and rows in a detector module). - // In the future the geometry service will be replaced with - // a ES service. - const GeomDetUnit * geoUnit = geom->idToDetUnit( detIdObject ); - const PixelGeomDetUnit * pixDet = dynamic_cast(geoUnit); - if (! pixDet) { - // Fatal error! TO DO: throw an exception! - assert(0); - } - { - // Produce clusters for this DetUnit and store them in +//--------------------------------------------------------------------------- +//! Iterate over DetUnits, and invoke the PixelClusterizer on each. +//--------------------------------------------------------------------------- +template +void SiPixelClusterProducer::run(const T& input, + const edm::ESHandle& geom, + edmNew::DetSetVector& output) { + int numberOfDetUnits = 0; + int numberOfClusters = 0; + + // Iterate on detector units + typename T::const_iterator DSViter = input.begin(); + for (; DSViter != input.end(); DSViter++) { + ++numberOfDetUnits; + + // LogDebug takes very long time, get rid off. + //LogDebug("SiStripClusterizer") << "[SiPixelClusterProducer::run] DetID" << DSViter->id; + + std::vector badChannels; + DetId detIdObject(DSViter->detId()); + + // Comment: At the moment the clusterizer depends on geometry + // to access information as the pixel topology (number of columns + // and rows in a detector module). + // In the future the geometry service will be replaced with + // a ES service. + const GeomDetUnit* geoUnit = geom->idToDetUnit(detIdObject); + const PixelGeomDetUnit* pixDet = dynamic_cast(geoUnit); + if (!pixDet) { + // Fatal error! TO DO: throw an exception! + assert(0); + } + { + // Produce clusters for this DetUnit and store them in // a DetSet edmNew::DetSetVector::FastFiller spc(output, DSViter->detId()); clusterizer_->clusterizeDetUnit(*DSViter, pixDet, tTopo_, badChannels, spc); - if ( spc.empty() ) { + if (spc.empty()) { spc.abort(); } else { - numberOfClusters += spc.size(); + numberOfClusters += spc.size(); } - } // spc is not deleted and detsetvector updated - if ((maxTotalClusters_ >= 0) && (numberOfClusters > maxTotalClusters_)) { - edm::LogError("TooManyClusters") << "Limit on the number of clusters exceeded. An empty cluster collection will be produced instead.\n"; - edmNew::DetSetVector empty; - empty.swap(output); - break; - } - } // end of DetUnit loop - - //LogDebug ("SiPixelClusterProducer") << " Executing " - // << clusterMode_ << " resulted in " << numberOfClusters - // << " SiPixelClusters in " << numberOfDetUnits << " DetUnits."; - } - - + } // spc is not deleted and detsetvector updated + if ((maxTotalClusters_ >= 0) && (numberOfClusters > maxTotalClusters_)) { + edm::LogError("TooManyClusters") + << "Limit on the number of clusters exceeded. An empty cluster collection will be produced instead.\n"; + edmNew::DetSetVector empty; + empty.swap(output); + break; + } + } // end of DetUnit loop + //LogDebug ("SiPixelClusterProducer") << " Executing " + // << clusterMode_ << " resulted in " << numberOfClusters + // << " SiPixelClusters in " << numberOfDetUnits << " DetUnits."; +} #include "FWCore/PluginManager/interface/ModuleDef.h" #include "FWCore/Framework/interface/MakerMacros.h" DEFINE_FWK_MODULE(SiPixelClusterProducer); - diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc index cb6c55939335c..d818cb87e23ac 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc @@ -86,7 +86,7 @@ void SiPixelDigisClustersFromSoA::produce(edm::StreamID, edm::Event& iEvent, con auto collection = std::make_unique>(); auto outputClusters = std::make_unique(); - outputClusters->reserve(2000,nDigis/4); + outputClusters->reserve(2000, nDigis / 4); edm::DetSet* detDigis = nullptr; for (uint32_t i = 0; i < nDigis; i++) { @@ -113,7 +113,8 @@ void SiPixelDigisClustersFromSoA::produce(edm::StreamID, edm::Event& iEvent, con // in any case we cannot go out of sync with gpu... if (acluster.charge < clusterThreshold) edm::LogWarning("SiPixelDigisClustersFromSoA") << "cluster below charge Threshold " - << "Layer/DetId/clusId " << layer<<'/'< #include -class SiPixelRawToClusterCUDA: public edm::stream::EDProducer { +class SiPixelRawToClusterCUDA : public edm::stream::EDProducer { public: explicit SiPixelRawToClusterCUDA(const edm::ParameterSet& iConfig); ~SiPixelRawToClusterCUDA() override = default; @@ -44,7 +44,9 @@ class SiPixelRawToClusterCUDA: public edm::stream::EDProducer static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); private: - void acquire(const edm::Event& iEvent, const edm::EventSetup& iSetup, edm::WaitingTaskWithArenaHolder waitingTaskHolder) override; + void acquire(const edm::Event& iEvent, + const edm::EventSetup& iSetup, + edm::WaitingTaskWithArenaHolder waitingTaskHolder) override; void produce(edm::Event& iEvent, const edm::EventSetup& iSetup) override; edm::EDGetTokenT rawGetToken_; @@ -60,7 +62,7 @@ class SiPixelRawToClusterCUDA: public edm::stream::EDProducer std::string cablingMapLabel_; std::unique_ptr cabling_; std::vector fedIds_; - const SiPixelFedCablingMap *cablingMap_ = nullptr; + const SiPixelFedCablingMap* cablingMap_ = nullptr; std::unique_ptr regions_; pixelgpudetails::SiPixelRawToClusterGPUKernel gpuAlgo_; @@ -72,78 +74,83 @@ class SiPixelRawToClusterCUDA: public edm::stream::EDProducer const bool usePilotBlade_; }; -SiPixelRawToClusterCUDA::SiPixelRawToClusterCUDA(const edm::ParameterSet& iConfig): - rawGetToken_(consumes(iConfig.getParameter("InputLabel"))), - digiPutToken_(produces>()), - clusterPutToken_(produces>()), - cablingMapLabel_(iConfig.getParameter("CablingMapLabel")), - includeErrors_(iConfig.getParameter("IncludeErrors")), - useQuality_(iConfig.getParameter("UseQualityInfo")), - usePilotBlade_(iConfig.getParameter ("UsePilotBlade")) // Control the usage of pilot-blade data, FED=40 +SiPixelRawToClusterCUDA::SiPixelRawToClusterCUDA(const edm::ParameterSet& iConfig) + : rawGetToken_(consumes(iConfig.getParameter("InputLabel"))), + digiPutToken_(produces>()), + clusterPutToken_(produces>()), + cablingMapLabel_(iConfig.getParameter("CablingMapLabel")), + includeErrors_(iConfig.getParameter("IncludeErrors")), + useQuality_(iConfig.getParameter("UseQualityInfo")), + usePilotBlade_(iConfig.getParameter("UsePilotBlade")) // Control the usage of pilot-blade data, FED=40 { - if(includeErrors_) { + if (includeErrors_) { digiErrorPutToken_ = produces>(); } // regions - if(!iConfig.getParameter("Regions").getParameterNames().empty()) { + if (!iConfig.getParameter("Regions").getParameterNames().empty()) { regions_ = std::make_unique(iConfig, consumesCollector()); } - if(usePilotBlade_) edm::LogInfo("SiPixelRawToCluster") << " Use pilot blade data (FED 40)"; + if (usePilotBlade_) + edm::LogInfo("SiPixelRawToCluster") << " Use pilot blade data (FED 40)"; edm::Service cs; - if(cs->enabled()) { + if (cs->enabled()) { wordFedAppender_ = std::make_unique(); } } void SiPixelRawToClusterCUDA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { edm::ParameterSetDescription desc; - desc.add("IncludeErrors",true); - desc.add("UseQualityInfo",false); - desc.add("UsePilotBlade",false)->setComment("## Use pilot blades"); - desc.add("InputLabel",edm::InputTag("rawDataCollector")); + desc.add("IncludeErrors", true); + desc.add("UseQualityInfo", false); + desc.add("UsePilotBlade", false)->setComment("## Use pilot blades"); + desc.add("InputLabel", edm::InputTag("rawDataCollector")); { edm::ParameterSetDescription psd0; psd0.addOptional>("inputs"); psd0.addOptional>("deltaPhi"); psd0.addOptional>("maxZ"); psd0.addOptional("beamSpot"); - desc.add("Regions",psd0)->setComment("## Empty Regions PSet means complete unpacking"); + desc.add("Regions", psd0) + ->setComment("## Empty Regions PSet means complete unpacking"); } - desc.add("CablingMapLabel","")->setComment("CablingMap label"); //Tav + desc.add("CablingMapLabel", "")->setComment("CablingMap label"); //Tav descriptions.addWithDefaultLabel(desc); } - -void SiPixelRawToClusterCUDA::acquire(const edm::Event& iEvent, const edm::EventSetup& iSetup, edm::WaitingTaskWithArenaHolder waitingTaskHolder) { +void SiPixelRawToClusterCUDA::acquire(const edm::Event& iEvent, + const edm::EventSetup& iSetup, + edm::WaitingTaskWithArenaHolder waitingTaskHolder) { CUDAScopedContextAcquire ctx{iEvent.streamID(), std::move(waitingTaskHolder), ctxState_}; edm::ESHandle hgpuMap; iSetup.get().get(hgpuMap); - if(hgpuMap->hasQuality() != useQuality_) { - throw cms::Exception("LogicError") << "UseQuality of the module (" << useQuality_ << ") differs the one from SiPixelFedCablingMapGPUWrapper. Please fix your configuration."; + if (hgpuMap->hasQuality() != useQuality_) { + throw cms::Exception("LogicError") + << "UseQuality of the module (" << useQuality_ + << ") differs the one from SiPixelFedCablingMapGPUWrapper. Please fix your configuration."; } // get the GPU product already here so that the async transfer can begin - const auto *gpuMap = hgpuMap->getGPUProductAsync(ctx.stream()); + const auto* gpuMap = hgpuMap->getGPUProductAsync(ctx.stream()); edm::ESHandle hgains; iSetup.get().get(hgains); // get the GPU product already here so that the async transfer can begin - const auto *gpuGains = hgains->getGPUProductAsync(ctx.stream()); + const auto* gpuGains = hgains->getGPUProductAsync(ctx.stream()); cudautils::device::unique_ptr modulesToUnpackRegional; - const unsigned char *gpuModulesToUnpack; + const unsigned char* gpuModulesToUnpack; - if(regions_) { + if (regions_) { regions_->run(iEvent, iSetup); - LogDebug("SiPixelRawToCluster") << "region2unpack #feds: "<nFEDs(); - LogDebug("SiPixelRawToCluster") << "region2unpack #modules (BPIX,EPIX,total): "<nBarrelModules()<<" "<nForwardModules()<<" "<nModules(); + LogDebug("SiPixelRawToCluster") << "region2unpack #feds: " << regions_->nFEDs(); + LogDebug("SiPixelRawToCluster") << "region2unpack #modules (BPIX,EPIX,total): " << regions_->nBarrelModules() << " " + << regions_->nForwardModules() << " " << regions_->nModules(); modulesToUnpackRegional = hgpuMap->getModToUnpRegionalAsync(*(regions_->modulesToUnpack()), ctx.stream()); gpuModulesToUnpack = modulesToUnpackRegional.get(); - } - else { + } else { gpuModulesToUnpack = hgpuMap->getModToUnpAllAsync(ctx.stream()); } @@ -151,51 +158,54 @@ void SiPixelRawToClusterCUDA::acquire(const edm::Event& iEvent, const edm::Event if (recordWatcher.check(iSetup)) { // cabling map, which maps online address (fed->link->ROC->local pixel) to offline (DetId->global pixel) edm::ESTransientHandle cablingMap; - iSetup.get().get(cablingMapLabel_, cablingMap); //Tav + iSetup.get().get(cablingMapLabel_, cablingMap); //Tav cablingMap_ = cablingMap.product(); - fedIds_ = cablingMap->fedIds(); + fedIds_ = cablingMap->fedIds(); cabling_ = cablingMap->cablingTree(); - LogDebug("map version:")<< cabling_->version(); + LogDebug("map version:") << cabling_->version(); } const auto& buffers = iEvent.get(rawGetToken_); errors_.clear(); - // GPU specific: Data extraction for RawToDigi GPU + // GPU specific: Data extraction for RawToDigi GPU unsigned int wordCounterGPU = 0; unsigned int fedCounter = 0; bool errorsInEvent = false; // In CPU algorithm this loop is part of PixelDataFormatter::interpretRawData() ErrorChecker errorcheck; - for(int fedId: fedIds_) { - if (!usePilotBlade_ && (fedId==40) ) continue; // skip pilot blade data - if (regions_ && !regions_->mayUnpackFED(fedId)) continue; + for (int fedId : fedIds_) { + if (!usePilotBlade_ && (fedId == 40)) + continue; // skip pilot blade data + if (regions_ && !regions_->mayUnpackFED(fedId)) + continue; // for GPU // first 150 index stores the fedId and next 150 will store the // start index of word in that fed - assert(fedId>=1200); + assert(fedId >= 1200); fedCounter++; // get event data for this fed - const FEDRawData& rawData = buffers.FEDData( fedId ); + const FEDRawData& rawData = buffers.FEDData(fedId); // GPU specific - int nWords = rawData.size()/sizeof(cms_uint64_t); + int nWords = rawData.size() / sizeof(cms_uint64_t); if (nWords == 0) { continue; } // check CRC bit - const cms_uint64_t* trailer = reinterpret_cast(rawData.data())+(nWords-1); + const cms_uint64_t* trailer = reinterpret_cast(rawData.data()) + (nWords - 1); if (not errorcheck.checkCRC(errorsInEvent, fedId, trailer, errors_)) { continue; } // check headers - const cms_uint64_t* header = reinterpret_cast(rawData.data()); header--; + const cms_uint64_t* header = reinterpret_cast(rawData.data()); + header--; bool moreHeaders = true; while (moreHeaders) { header++; @@ -212,20 +222,24 @@ void SiPixelRawToClusterCUDA::acquire(const edm::Event& iEvent, const edm::Event moreTrailers = trailerStatus; } - const cms_uint32_t * bw = (const cms_uint32_t *)(header+1); - const cms_uint32_t * ew = (const cms_uint32_t *)(trailer); + const cms_uint32_t* bw = (const cms_uint32_t*)(header + 1); + const cms_uint32_t* ew = (const cms_uint32_t*)(trailer); - assert(0 == (ew-bw)%2); - wordFedAppender_->initializeWordFed(fedId, wordCounterGPU, bw, (ew-bw)); - wordCounterGPU+=(ew-bw); + assert(0 == (ew - bw) % 2); + wordFedAppender_->initializeWordFed(fedId, wordCounterGPU, bw, (ew - bw)); + wordCounterGPU += (ew - bw); - } // end of for loop + } // end of for loop - gpuAlgo_.makeClustersAsync(gpuMap, gpuModulesToUnpack, gpuGains, + gpuAlgo_.makeClustersAsync(gpuMap, + gpuModulesToUnpack, + gpuGains, *wordFedAppender_, std::move(errors_), - wordCounterGPU, fedCounter, - useQuality_, includeErrors_, + wordCounterGPU, + fedCounter, + useQuality_, + includeErrors_, edm::MessageDrop::instance()->debugEnabled, ctx.stream()); } @@ -236,7 +250,7 @@ void SiPixelRawToClusterCUDA::produce(edm::Event& iEvent, const edm::EventSetup& auto tmp = gpuAlgo_.getResults(); ctx.emplace(iEvent, digiPutToken_, std::move(tmp.first)); ctx.emplace(iEvent, clusterPutToken_, std::move(tmp.second)); - if(includeErrors_) { + if (includeErrors_) { ctx.emplace(iEvent, digiErrorPutToken_, gpuAlgo_.getErrors()); } } diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu index c436ff319d736..556c0dcec4fb1 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu @@ -585,8 +585,8 @@ namespace pixelgpudetails { debug); cudaCheck(cudaGetLastError()); #ifdef GPU_DEBUG - cudaDeviceSynchronize(); - cudaCheck(cudaGetLastError()); + cudaDeviceSynchronize(); + cudaCheck(cudaGetLastError()); #endif if (includeErrors) { @@ -613,8 +613,8 @@ namespace pixelgpudetails { clusters_d.clusModuleStart()); cudaCheck(cudaGetLastError()); #ifdef GPU_DEBUG - cudaDeviceSynchronize(); - cudaCheck(cudaGetLastError()); + cudaDeviceSynchronize(); + cudaCheck(cudaGetLastError()); #endif #ifdef GPU_DEBUG @@ -645,8 +645,8 @@ namespace pixelgpudetails { wordCounter); cudaCheck(cudaGetLastError()); #ifdef GPU_DEBUG - cudaDeviceSynchronize(); - cudaCheck(cudaGetLastError()); + cudaDeviceSynchronize(); + cudaCheck(cudaGetLastError()); #endif // apply charge cut @@ -675,11 +675,10 @@ namespace pixelgpudetails { stream.id())); #ifdef GPU_DEBUG - cudaDeviceSynchronize(); - cudaCheck(cudaGetLastError()); + cudaDeviceSynchronize(); + cudaCheck(cudaGetLastError()); #endif - } // end clusterizer scope } } // namespace pixelgpudetails diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h index edc01d7b88c53..6d53eaf6a71c0 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h @@ -20,44 +20,44 @@ class SiPixelGainForHLTonGPU; namespace pixelgpudetails { // Phase 1 geometry constants - const uint32_t layerStartBit = 20; - const uint32_t ladderStartBit = 12; - const uint32_t moduleStartBit = 2; - - const uint32_t panelStartBit = 10; - const uint32_t diskStartBit = 18; - const uint32_t bladeStartBit = 12; - - const uint32_t layerMask = 0xF; - const uint32_t ladderMask = 0xFF; - const uint32_t moduleMask = 0x3FF; - const uint32_t panelMask = 0x3; - const uint32_t diskMask = 0xF; - const uint32_t bladeMask = 0x3F; - - const uint32_t LINK_bits = 6; - const uint32_t ROC_bits = 5; - const uint32_t DCOL_bits = 5; - const uint32_t PXID_bits = 8; - const uint32_t ADC_bits = 8; + const uint32_t layerStartBit = 20; + const uint32_t ladderStartBit = 12; + const uint32_t moduleStartBit = 2; + + const uint32_t panelStartBit = 10; + const uint32_t diskStartBit = 18; + const uint32_t bladeStartBit = 12; + + const uint32_t layerMask = 0xF; + const uint32_t ladderMask = 0xFF; + const uint32_t moduleMask = 0x3FF; + const uint32_t panelMask = 0x3; + const uint32_t diskMask = 0xF; + const uint32_t bladeMask = 0x3F; + + const uint32_t LINK_bits = 6; + const uint32_t ROC_bits = 5; + const uint32_t DCOL_bits = 5; + const uint32_t PXID_bits = 8; + const uint32_t ADC_bits = 8; // special for layer 1 - const uint32_t LINK_bits_l1 = 6; - const uint32_t ROC_bits_l1 = 5; - const uint32_t COL_bits_l1 = 6; - const uint32_t ROW_bits_l1 = 7; - const uint32_t OMIT_ERR_bits = 1; + const uint32_t LINK_bits_l1 = 6; + const uint32_t ROC_bits_l1 = 5; + const uint32_t COL_bits_l1 = 6; + const uint32_t ROW_bits_l1 = 7; + const uint32_t OMIT_ERR_bits = 1; - const uint32_t maxROCIndex = 8; - const uint32_t numRowsInRoc = 80; - const uint32_t numColsInRoc = 52; + const uint32_t maxROCIndex = 8; + const uint32_t numRowsInRoc = 80; + const uint32_t numColsInRoc = 52; const uint32_t MAX_WORD = 2000; - const uint32_t ADC_shift = 0; + const uint32_t ADC_shift = 0; const uint32_t PXID_shift = ADC_shift + ADC_bits; const uint32_t DCOL_shift = PXID_shift + PXID_bits; - const uint32_t ROC_shift = DCOL_shift + DCOL_bits; + const uint32_t ROC_shift = DCOL_shift + DCOL_bits; const uint32_t LINK_shift = ROC_shift + ROC_bits_l1; // special for layer 1 ROC const uint32_t ROW_shift = ADC_shift + ADC_bits; @@ -65,12 +65,12 @@ namespace pixelgpudetails { const uint32_t OMIT_ERR_shift = 20; const uint32_t LINK_mask = ~(~uint32_t(0) << LINK_bits_l1); - const uint32_t ROC_mask = ~(~uint32_t(0) << ROC_bits_l1); - const uint32_t COL_mask = ~(~uint32_t(0) << COL_bits_l1); - const uint32_t ROW_mask = ~(~uint32_t(0) << ROW_bits_l1); + const uint32_t ROC_mask = ~(~uint32_t(0) << ROC_bits_l1); + const uint32_t COL_mask = ~(~uint32_t(0) << COL_bits_l1); + const uint32_t ROW_mask = ~(~uint32_t(0) << ROW_bits_l1); const uint32_t DCOL_mask = ~(~uint32_t(0) << DCOL_bits); const uint32_t PXID_mask = ~(~uint32_t(0) << PXID_bits); - const uint32_t ADC_mask = ~(~uint32_t(0) << ADC_bits); + const uint32_t ADC_mask = ~(~uint32_t(0) << ADC_bits); const uint32_t ERROR_mask = ~(~uint32_t(0) << ROC_bits_l1); const uint32_t OMIT_ERR_mask = ~(~uint32_t(0) << OMIT_ERR_bits); @@ -81,8 +81,8 @@ namespace pixelgpudetails { }; struct Pixel { - uint32_t row; - uint32_t col; + uint32_t row; + uint32_t col; }; class Packing { @@ -90,35 +90,34 @@ namespace pixelgpudetails { using PackedDigiType = uint32_t; // Constructor: pre-computes masks and shifts from field widths - __host__ __device__ - inline - constexpr Packing(unsigned int row_w, unsigned int column_w, - unsigned int time_w, unsigned int adc_w) : - row_width(row_w), - column_width(column_w), - adc_width(adc_w), - row_shift(0), - column_shift(row_shift + row_w), - time_shift(column_shift + column_w), - adc_shift(time_shift + time_w), - row_mask(~(~0U << row_w)), - column_mask( ~(~0U << column_w)), - time_mask(~(~0U << time_w)), - adc_mask(~(~0U << adc_w)), - rowcol_mask(~(~0U << (column_w+row_w))), - max_row(row_mask), - max_column(column_mask), - max_adc(adc_mask) - { } - - uint32_t row_width; - uint32_t column_width; - uint32_t adc_width; - - uint32_t row_shift; - uint32_t column_shift; - uint32_t time_shift; - uint32_t adc_shift; + __host__ __device__ inline constexpr Packing(unsigned int row_w, + unsigned int column_w, + unsigned int time_w, + unsigned int adc_w) + : row_width(row_w), + column_width(column_w), + adc_width(adc_w), + row_shift(0), + column_shift(row_shift + row_w), + time_shift(column_shift + column_w), + adc_shift(time_shift + time_w), + row_mask(~(~0U << row_w)), + column_mask(~(~0U << column_w)), + time_mask(~(~0U << time_w)), + adc_mask(~(~0U << adc_w)), + rowcol_mask(~(~0U << (column_w + row_w))), + max_row(row_mask), + max_column(column_mask), + max_adc(adc_mask) {} + + uint32_t row_width; + uint32_t column_width; + uint32_t adc_width; + + uint32_t row_shift; + uint32_t column_shift; + uint32_t time_shift; + uint32_t adc_shift; PackedDigiType row_mask; PackedDigiType column_mask; @@ -126,36 +125,25 @@ namespace pixelgpudetails { PackedDigiType adc_mask; PackedDigiType rowcol_mask; - uint32_t max_row; - uint32_t max_column; - uint32_t max_adc; + uint32_t max_row; + uint32_t max_column; + uint32_t max_adc; }; - __host__ __device__ - inline - constexpr Packing packing() { - return Packing(11, 11, 0, 10); - } - + __host__ __device__ inline constexpr Packing packing() { return Packing(11, 11, 0, 10); } - __host__ __device__ - inline - uint32_t pack(uint32_t row, uint32_t col, uint32_t adc) { + __host__ __device__ inline uint32_t pack(uint32_t row, uint32_t col, uint32_t adc) { constexpr Packing thePacking = packing(); adc = std::min(adc, thePacking.max_adc); - return (row << thePacking.row_shift) | - (col << thePacking.column_shift) | - (adc << thePacking.adc_shift); + return (row << thePacking.row_shift) | (col << thePacking.column_shift) | (adc << thePacking.adc_shift); } - constexpr - uint32_t pixelToChannel( int row, int col) { + constexpr uint32_t pixelToChannel(int row, int col) { constexpr Packing thePacking = packing(); return (row << thePacking.column_width) | col; } - class SiPixelRawToClusterGPUKernel { public: class WordFedAppender { @@ -163,10 +151,10 @@ namespace pixelgpudetails { WordFedAppender(); ~WordFedAppender() = default; - void initializeWordFed(int fedId, unsigned int wordCounterGPU, const cms_uint32_t *src, unsigned int length); + void initializeWordFed(int fedId, unsigned int wordCounterGPU, const cms_uint32_t* src, unsigned int length); - const unsigned int *word() const { return word_.get(); } - const unsigned char *fedId() const { return fedId_.get(); } + const unsigned int* word() const { return word_.get(); } + const unsigned char* fedId() const { return fedId_.get(); } private: cudautils::host::noncached::unique_ptr word_; @@ -176,18 +164,21 @@ namespace pixelgpudetails { SiPixelRawToClusterGPUKernel() = default; ~SiPixelRawToClusterGPUKernel() = default; - SiPixelRawToClusterGPUKernel(const SiPixelRawToClusterGPUKernel&) = delete; SiPixelRawToClusterGPUKernel(SiPixelRawToClusterGPUKernel&&) = delete; SiPixelRawToClusterGPUKernel& operator=(const SiPixelRawToClusterGPUKernel&) = delete; SiPixelRawToClusterGPUKernel& operator=(SiPixelRawToClusterGPUKernel&&) = delete; - void makeClustersAsync(const SiPixelFedCablingMapGPU *cablingMap, const unsigned char *modToUnp, - const SiPixelGainForHLTonGPU *gains, + void makeClustersAsync(const SiPixelFedCablingMapGPU* cablingMap, + const unsigned char* modToUnp, + const SiPixelGainForHLTonGPU* gains, const WordFedAppender& wordFed, PixelFormatterErrors&& errors, - const uint32_t wordCounter, const uint32_t fedCounter, - bool useQualityInfo, bool includeErrors, bool debug, + const uint32_t wordCounter, + const uint32_t fedCounter, + bool useQualityInfo, + bool includeErrors, + bool debug, cuda::stream_t<>& stream); std::pair getResults() { @@ -203,9 +194,7 @@ namespace pixelgpudetails { return std::make_pair(std::move(digis_d), std::move(clusters_d)); } - SiPixelDigiErrorsCUDA&& getErrors() { - return std::move(digiErrors_d); - } + SiPixelDigiErrorsCUDA&& getErrors() { return std::move(digiErrors_d); } private: uint32_t nDigis = 0; @@ -220,16 +209,16 @@ namespace pixelgpudetails { // see RecoLocalTracker/SiPixelClusterizer // all are runtime const, should be specified in python _cfg.py struct ADCThreshold { - const int thePixelThreshold = 1000; // default Pixel threshold in electrons - const int theSeedThreshold = 1000; // seed thershold in electrons not used in our algo - const float theClusterThreshold = 4000; // cluster threshold in electron - const int ConversionFactor = 65; // adc to electron conversion factor - - const int theStackADC_ = 255; // the maximum adc count for stack layer - const int theFirstStack_ = 5; // the index of the fits stack layer - const double theElectronPerADCGain_ = 600; // ADC to electron conversion + const int thePixelThreshold = 1000; // default Pixel threshold in electrons + const int theSeedThreshold = 1000; // seed thershold in electrons not used in our algo + const float theClusterThreshold = 4000; // cluster threshold in electron + const int ConversionFactor = 65; // adc to electron conversion factor + + const int theStackADC_ = 255; // the maximum adc count for stack layer + const int theFirstStack_ = 5; // the index of the fits stack layer + const double theElectronPerADCGain_ = 600; // ADC to electron conversion }; -} +} // namespace pixelgpudetails -#endif // RecoLocalTracker_SiPixelClusterizer_plugins_SiPixelRawToClusterGPUKernel_h +#endif // RecoLocalTracker_SiPixelClusterizer_plugins_SiPixelRawToClusterGPUKernel_h diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h index 80e903d25f501..dc50cd20b4db4 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h @@ -32,10 +32,14 @@ namespace gpuClustering { return; if (threadIdx.x == 0 && nclus > MaxNumClustersPerModules) - printf("Warning too many clusters in module %d in block %d: %d > %d\n", thisModuleId, blockIdx.x, nclus, MaxNumClustersPerModules); + printf("Warning too many clusters in module %d in block %d: %d > %d\n", + thisModuleId, + blockIdx.x, + nclus, + MaxNumClustersPerModules); auto first = firstPixel + threadIdx.x; - + if (nclus > MaxNumClustersPerModules) { // remove excess FIXME find a way to cut charge first.... for (auto i = first; i < numElements; i += blockDim.x) { @@ -43,15 +47,14 @@ namespace gpuClustering { continue; // not valid if (id[i] != thisModuleId) break; // end of module - if (clusterId[i]>=MaxNumClustersPerModules) { - id[i]=InvId; - clusterId[i]=InvId; + if (clusterId[i] >= MaxNumClustersPerModules) { + id[i] = InvId; + clusterId[i] = InvId; } } nclus = MaxNumClustersPerModules; } - #ifdef GPU_DEBUG if (thisModuleId % 100 == 1) if (threadIdx.x == 0) @@ -62,7 +65,6 @@ namespace gpuClustering { __shared__ uint8_t ok[MaxNumClustersPerModules]; __shared__ uint16_t newclusId[MaxNumClustersPerModules]; - assert(nclus <= MaxNumClustersPerModules); for (auto i = threadIdx.x; i < nclus; i += blockDim.x) { charge[i] = 0; diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h index fcc7f27e529a6..7997571d1cbd2 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h @@ -13,7 +13,7 @@ namespace gpuClustering { #ifdef GPU_DEBUG - __device__ uint32_t gMaxHit=0; + __device__ uint32_t gMaxHit = 0; #endif __global__ void countModules(uint16_t const* __restrict__ id, @@ -164,7 +164,7 @@ namespace gpuClustering { // fill NN for (auto j = threadIdx.x, k = 0U; j < hist.size(); j += blockDim.x, ++k) { - assert(kgMaxHit) { - gMaxHit = foundClusters; - if (foundClusters>8) printf("max hit %d in %d\n",foundClusters, thisModuleId); + if (foundClusters > gMaxHit) { + gMaxHit = foundClusters; + if (foundClusters > 8) + printf("max hit %d in %d\n", foundClusters, thisModuleId); } #endif #ifdef GPU_DEBUG if (thisModuleId % 100 == 1) - printf("%d clusters in module %d\n", foundClusters, thisModuleId); + printf("%d clusters in module %d\n", foundClusters, thisModuleId); #endif } } diff --git a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h index 2bc2413bc7fb3..72f054a9b5636 100644 --- a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h +++ b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h @@ -28,7 +28,6 @@ //--- For the configuration: #include "FWCore/ParameterSet/interface/ParameterSet.h" - #include "DataFormats/GeometryCommonDetAlgo/interface/MeasurementPoint.h" #include "DataFormats/GeometryCommonDetAlgo/interface/MeasurementError.h" #include "DataFormats/GeometrySurface/interface/GloballyPositioned.h" @@ -51,241 +50,233 @@ class RectangularPixelTopology; class MagneticField; -class PixelCPEBase : public PixelClusterParameterEstimator -{ +class PixelCPEBase : public PixelClusterParameterEstimator { public: - struct DetParam - { - DetParam() {} - const PixelGeomDetUnit * theDet; - // gavril : replace RectangularPixelTopology with PixelTopology - const PixelTopology * theTopol; - const RectangularPixelTopology * theRecTopol; - - GeomDetType::SubDetector thePart; - Local3DPoint theOrigin; - float theThickness; - float thePitchX; - float thePitchY; - - float bz; // local Bz - float bx; // local Bx - LocalVector driftDirection; - float widthLAFractionX; // Width-LA to Offset-LA in X - float widthLAFractionY; // same in Y - float lorentzShiftInCmX; // a FULL shift, in cm - float lorentzShiftInCmY; // a FULL shift, in cm - int detTemplateId; // det if for templates & generic errors - int detTemplateId2D; // det if for 2D templates - }; - - struct ClusterParam - { - ClusterParam(){} - ClusterParam(const SiPixelCluster & cl) : theCluster(&cl) {} - - virtual ~ClusterParam() = default; - - const SiPixelCluster * theCluster = nullptr;; - - //--- Cluster-level quantities (filled in computeAnglesFrom....) - float cotalpha; - float cotbeta; - - // G.Giurgiu (05/14/08) track local coordinates - // filled in computeAnglesFrom.... - float trk_lp_x; - float trk_lp_y; - - // ggiurgiu@jhu.edu (12/01/2010) : Needed for calling topology methods - // with track angles to handle surface deformations (bows/kinks) - // filled in computeAnglesFrom.... (btw redundant with the 4 above) - Topology::LocalTrackPred loc_trk_pred; - - //--- Probability (protected by hasFilledProb_) - float probabilityX_ ; - float probabilityY_ ; - float probabilityQ_ ; - int qBin_ ; // always filled by qbin - - bool isOnEdge_ ; // filled in setTheClu - bool hasBadPixels_ = false; // (never used in current code) - bool spansTwoROCs_ ; // filled in setTheClu - bool hasFilledProb_ =false; - // ggiurgiu@jhu.edu (10/18/2008) - bool with_track_angle; // filled in computeAnglesFrom.... - bool filled_from_2d = false; // - - // More detailed edge information (for CPE ClusterRepair, and elsewhere...) - int edgeTypeX_ = 0; // 0: not on edge, 1: low end on edge, 2: high end - int edgeTypeY_ = 0; // 0: not on edge, 1: low end on edge, 2: high end - }; - + struct DetParam { + DetParam() {} + const PixelGeomDetUnit* theDet; + // gavril : replace RectangularPixelTopology with PixelTopology + const PixelTopology* theTopol; + const RectangularPixelTopology* theRecTopol; + + GeomDetType::SubDetector thePart; + Local3DPoint theOrigin; + float theThickness; + float thePitchX; + float thePitchY; + + float bz; // local Bz + float bx; // local Bx + LocalVector driftDirection; + float widthLAFractionX; // Width-LA to Offset-LA in X + float widthLAFractionY; // same in Y + float lorentzShiftInCmX; // a FULL shift, in cm + float lorentzShiftInCmY; // a FULL shift, in cm + int detTemplateId; // det if for templates & generic errors + int detTemplateId2D; // det if for 2D templates + }; + + struct ClusterParam { + ClusterParam() {} + ClusterParam(const SiPixelCluster& cl) : theCluster(&cl) {} + + virtual ~ClusterParam() = default; + + const SiPixelCluster* theCluster = nullptr; + ; + + //--- Cluster-level quantities (filled in computeAnglesFrom....) + float cotalpha; + float cotbeta; + + // G.Giurgiu (05/14/08) track local coordinates + // filled in computeAnglesFrom.... + float trk_lp_x; + float trk_lp_y; + + // ggiurgiu@jhu.edu (12/01/2010) : Needed for calling topology methods + // with track angles to handle surface deformations (bows/kinks) + // filled in computeAnglesFrom.... (btw redundant with the 4 above) + Topology::LocalTrackPred loc_trk_pred; + + //--- Probability (protected by hasFilledProb_) + float probabilityX_; + float probabilityY_; + float probabilityQ_; + int qBin_; // always filled by qbin + + bool isOnEdge_; // filled in setTheClu + bool hasBadPixels_ = false; // (never used in current code) + bool spansTwoROCs_; // filled in setTheClu + bool hasFilledProb_ = false; + // ggiurgiu@jhu.edu (10/18/2008) + bool with_track_angle; // filled in computeAnglesFrom.... + bool filled_from_2d = false; // + + // More detailed edge information (for CPE ClusterRepair, and elsewhere...) + int edgeTypeX_ = 0; // 0: not on edge, 1: low end on edge, 2: high end + int edgeTypeY_ = 0; // 0: not on edge, 1: low end on edge, 2: high end + }; + public: - PixelCPEBase(edm::ParameterSet const& conf, const MagneticField * mag, const TrackerGeometry& geom, const TrackerTopology& ttopo, - const SiPixelLorentzAngle * lorentzAngle, - const SiPixelGenErrorDBObject * genErrorDBObject, - const SiPixelTemplateDBObject * templateDBobject, - const SiPixelLorentzAngle * lorentzAngleWidth, - int flag=0 // flag=0 for generic, =1 for templates - ); // NEW - - //-------------------------------------------------------------------------- - // Allow the magnetic field to be set/updated later. - //-------------------------------------------------------------------------- - //inline void setMagField(const MagneticField *mag) const { magfield_ = mag; } // Not used, AH - - - //-------------------------------------------------------------------------- - // Obtain the angles from the position of the DetUnit. - //-------------------------------------------------------------------------- - - inline ReturnType getParameters(const SiPixelCluster & cl, - const GeomDetUnit & det ) const override - { + PixelCPEBase(edm::ParameterSet const& conf, + const MagneticField* mag, + const TrackerGeometry& geom, + const TrackerTopology& ttopo, + const SiPixelLorentzAngle* lorentzAngle, + const SiPixelGenErrorDBObject* genErrorDBObject, + const SiPixelTemplateDBObject* templateDBobject, + const SiPixelLorentzAngle* lorentzAngleWidth, + int flag = 0 // flag=0 for generic, =1 for templates + ); // NEW + + //-------------------------------------------------------------------------- + // Allow the magnetic field to be set/updated later. + //-------------------------------------------------------------------------- + //inline void setMagField(const MagneticField *mag) const { magfield_ = mag; } // Not used, AH + + //-------------------------------------------------------------------------- + // Obtain the angles from the position of the DetUnit. + //-------------------------------------------------------------------------- + + inline ReturnType getParameters(const SiPixelCluster& cl, const GeomDetUnit& det) const override { #ifdef EDM_ML_DEBUG - nRecHitsTotal_++ ; - //std::cout<<" in PixelCPEBase:localParameters(all) - "< Frame; - - //--------------------------------------------------------------------------- - // Data members - //--------------------------------------------------------------------------- - - //--- Counters + //--- All methods and data members are protected to facilitate (for now) + //--- access from derived classes. + + typedef GloballyPositioned Frame; + + //--------------------------------------------------------------------------- + // Data members + //--------------------------------------------------------------------------- + + //--- Counters #ifdef EDM_ML_DEBUG - mutable std::atomic nRecHitsTotal_ ; //for debugging only - mutable std::atomic nRecHitsUsedEdge_ ; //for debugging only + mutable std::atomic nRecHitsTotal_; //for debugging only + mutable std::atomic nRecHitsUsedEdge_; //for debugging only #endif - - // Added new members - float lAOffset_; // la used to calculate the offset from configuration (for testing) - float lAWidthBPix_; // la used to calculate the cluster width from conf. - float lAWidthFPix_; // la used to calculate the cluster width from conf. - //bool useLAAlignmentOffsets_; // lorentz angle offsets detrmined by alignment - bool useLAOffsetFromConfig_; // lorentz angle used to calculate the offset - bool useLAWidthFromConfig_; // lorentz angle used to calculate the cluster width - bool useLAWidthFromDB_; // lorentz angle used to calculate the cluster width - - //--- Global quantities - int theVerboseLevel; // algorithm's verbosity - int theFlag_; // flag to recognice if we are in generic or templates - - const MagneticField * magfield_; // magnetic field - const TrackerGeometry & geom_; // geometry - const TrackerTopology & ttopo_; // Tracker Topology - - const SiPixelLorentzAngle * lorentzAngle_; - const SiPixelLorentzAngle * lorentzAngleWidth_; // for the charge width (generic) - - const SiPixelGenErrorDBObject * genErrorDBObject_; // NEW - //const SiPixelCPEGenericErrorParm * genErrorParm_; // OLD - - const SiPixelTemplateDBObject * templateDBobject_; - bool alpha2Order; // switch on/off E.B effect. - - bool DoLorentz_; - bool LoadTemplatesFromDB_; - - //--------------------------------------------------------------------------- - // Geometrical services to subclasses. - //--------------------------------------------------------------------------- + + // Added new members + float lAOffset_; // la used to calculate the offset from configuration (for testing) + float lAWidthBPix_; // la used to calculate the cluster width from conf. + float lAWidthFPix_; // la used to calculate the cluster width from conf. + //bool useLAAlignmentOffsets_; // lorentz angle offsets detrmined by alignment + bool useLAOffsetFromConfig_; // lorentz angle used to calculate the offset + bool useLAWidthFromConfig_; // lorentz angle used to calculate the cluster width + bool useLAWidthFromDB_; // lorentz angle used to calculate the cluster width + + //--- Global quantities + int theVerboseLevel; // algorithm's verbosity + int theFlag_; // flag to recognice if we are in generic or templates + + const MagneticField* magfield_; // magnetic field + const TrackerGeometry& geom_; // geometry + const TrackerTopology& ttopo_; // Tracker Topology + + const SiPixelLorentzAngle* lorentzAngle_; + const SiPixelLorentzAngle* lorentzAngleWidth_; // for the charge width (generic) + + const SiPixelGenErrorDBObject* genErrorDBObject_; // NEW + //const SiPixelCPEGenericErrorParm * genErrorParm_; // OLD + + const SiPixelTemplateDBObject* templateDBobject_; + bool alpha2Order; // switch on/off E.B effect. + + bool DoLorentz_; + bool LoadTemplatesFromDB_; + + //--------------------------------------------------------------------------- + // Geometrical services to subclasses. + //--------------------------------------------------------------------------- protected: - void computeAnglesFromDetPosition( DetParam const & theDetParam, ClusterParam & theClusterParam ) const; - - void computeAnglesFromTrajectory ( DetParam const & theDetParam, ClusterParam & theClusterParam, - const LocalTrajectoryParameters & ltp) const; - - void setTheClu( DetParam const &, ClusterParam & theClusterParam ) const ; - - LocalVector driftDirection (DetParam & theDetParam, GlobalVector bfield ) const ; - LocalVector driftDirection (DetParam & theDetParam, LocalVector bfield ) const ; - void computeLorentzShifts(DetParam &) const ; - - - //--------------------------------------------------------------------------- - // Cluster-level services. - //--------------------------------------------------------------------------- - - DetParam const & detParam(const GeomDetUnit & det) const; - - using DetParams=std::vector; - - DetParams m_DetParams=DetParams(1440); - -}; + void computeAnglesFromDetPosition(DetParam const& theDetParam, ClusterParam& theClusterParam) const; -#endif + void computeAnglesFromTrajectory(DetParam const& theDetParam, + ClusterParam& theClusterParam, + const LocalTrajectoryParameters& ltp) const; + + void setTheClu(DetParam const&, ClusterParam& theClusterParam) const; + LocalVector driftDirection(DetParam& theDetParam, GlobalVector bfield) const; + LocalVector driftDirection(DetParam& theDetParam, LocalVector bfield) const; + void computeLorentzShifts(DetParam&) const; + //--------------------------------------------------------------------------- + // Cluster-level services. + //--------------------------------------------------------------------------- + + DetParam const& detParam(const GeomDetUnit& det) const; + + using DetParams = std::vector; + + DetParams m_DetParams = DetParams(1440); +}; + +#endif diff --git a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h index 2218e31a85754..5666fc1f8453d 100644 --- a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h +++ b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h @@ -49,9 +49,7 @@ class PixelCPEFast final : public PixelCPEBase { // the same cudaStream, or after cudaStreamSynchronize. const pixelCPEforGPU::ParamsOnGPU *getGPUProductAsync(cuda::stream_t<> &cudaStream) const; - pixelCPEforGPU::ParamsOnGPU const & getCPUProduct() const { - return cpuData_; - } + pixelCPEforGPU::ParamsOnGPU const &getCPUProduct() const { return cpuData_; } private: ClusterParam *createClusterParam(const SiPixelCluster &cl) const override; diff --git a/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h b/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h index 4845c3b97ec2f..40c335547ba78 100644 --- a/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h +++ b/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h @@ -44,7 +44,6 @@ namespace pixelCPEforGPU { Frame frame; }; - using phase1PixelTopology::AverageGeometry; struct LayerGeometry { @@ -53,10 +52,10 @@ namespace pixelCPEforGPU { }; struct ParamsOnGPU { - CommonParams const * m_commonParams; - DetParams const * m_detParams; - LayerGeometry const * m_layerGeometry; - AverageGeometry const * m_averageGeometry; + CommonParams const* m_commonParams; + DetParams const* m_detParams; + LayerGeometry const* m_layerGeometry; + AverageGeometry const* m_averageGeometry; constexpr CommonParams const& __restrict__ commonParams() const { CommonParams const* __restrict__ l = m_commonParams; diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/PixelCPEFastESProducer.cc b/RecoLocalTracker/SiPixelRecHits/plugins/PixelCPEFastESProducer.cc index 344625cba01b6..c31b8bb1f2dae 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/PixelCPEFastESProducer.cc +++ b/RecoLocalTracker/SiPixelRecHits/plugins/PixelCPEFastESProducer.cc @@ -10,7 +10,7 @@ #include "FWCore/Framework/interface/ESHandle.h" #include "FWCore/Framework/interface/ModuleFactory.h" -// new record +// new record #include "CondFormats/DataRecord/interface/SiPixelGenErrorDBObjectRcd.h" #include "FWCore/Framework/interface/ESProducer.h" @@ -19,85 +19,76 @@ #include "RecoLocalTracker/ClusterParameterEstimator/interface/PixelClusterParameterEstimator.h" #include -class PixelCPEFastESProducer: public edm::ESProducer{ - public: - PixelCPEFastESProducer(const edm::ParameterSet & p); +class PixelCPEFastESProducer : public edm::ESProducer { +public: + PixelCPEFastESProducer(const edm::ParameterSet &p); std::shared_ptr produce(const TkPixelCPERecord &); - private: + +private: std::shared_ptr cpe_; edm::ParameterSet pset_; edm::ESInputTag magname_; bool UseErrorsFromTemplates_; }; - #include #include using namespace edm; - - - -PixelCPEFastESProducer::PixelCPEFastESProducer(const edm::ParameterSet & p) -{ +PixelCPEFastESProducer::PixelCPEFastESProducer(const edm::ParameterSet &p) { std::string myname = p.getParameter("ComponentName"); - magname_ = p.existsAs("MagneticFieldRecord")? - p.getParameter("MagneticFieldRecord"):edm::ESInputTag(""); - UseErrorsFromTemplates_ = p.getParameter("UseErrorsFromTemplates"); - + magname_ = p.existsAs("MagneticFieldRecord") ? p.getParameter("MagneticFieldRecord") + : edm::ESInputTag(""); + UseErrorsFromTemplates_ = p.getParameter("UseErrorsFromTemplates"); pset_ = p; - setWhatProduced(this,myname); - - + setWhatProduced(this, myname); } - -std::shared_ptr -PixelCPEFastESProducer::produce(const TkPixelCPERecord & iRecord){ - +std::shared_ptr PixelCPEFastESProducer::produce(const TkPixelCPERecord &iRecord) { ESHandle magfield; - iRecord.getRecord().get( magname_, magfield ); + iRecord.getRecord().get(magname_, magfield); edm::ESHandle pDD; - iRecord.getRecord().get( pDD ); + iRecord.getRecord().get(pDD); edm::ESHandle hTT; iRecord.getRecord().getRecord().get(hTT); // Lorant angle for offsets ESHandle lorentzAngle; - iRecord.getRecord().get(lorentzAngle ); + iRecord.getRecord().get(lorentzAngle); // add the new la width object ESHandle lorentzAngleWidth; - const SiPixelLorentzAngle * lorentzAngleWidthProduct = nullptr; - iRecord.getRecord().get("forWidth",lorentzAngleWidth ); + const SiPixelLorentzAngle *lorentzAngleWidthProduct = nullptr; + iRecord.getRecord().get("forWidth", lorentzAngleWidth); lorentzAngleWidthProduct = lorentzAngleWidth.product(); - const SiPixelGenErrorDBObject * genErrorDBObjectProduct = nullptr; + const SiPixelGenErrorDBObject *genErrorDBObjectProduct = nullptr; // Errors take only from new GenError ESHandle genErrorDBObject; - if(UseErrorsFromTemplates_) { // do only when generrors are needed - iRecord.getRecord().get(genErrorDBObject); + if (UseErrorsFromTemplates_) { // do only when generrors are needed + iRecord.getRecord().get(genErrorDBObject); genErrorDBObjectProduct = genErrorDBObject.product(); //} else { //std::cout<<" pass an empty GenError pointer"<( - pset_,magfield.product(),*pDD.product(), - *hTT.product(),lorentzAngle.product(), - genErrorDBObjectProduct,lorentzAngleWidthProduct); + cpe_ = std::make_shared(pset_, + magfield.product(), + *pDD.product(), + *hTT.product(), + lorentzAngle.product(), + genErrorDBObjectProduct, + lorentzAngleWidthProduct); return cpe_; } - #include "FWCore/Framework/interface/MakerMacros.h" #include "FWCore/Utilities/interface/typelookup.h" #include "FWCore/Framework/interface/eventsetuprecord_registration_macro.h" DEFINE_FWK_EVENTSETUP_MODULE(PixelCPEFastESProducer); - diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitConverter.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitConverter.cc index 921a4b6f61c40..b4df63b1b5cfd 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitConverter.cc +++ b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitConverter.cc @@ -1,4 +1,4 @@ - /** SiPixelRecHitConverter.cc +/** SiPixelRecHitConverter.cc * ------------------------------------------------------ * Description: see SiPixelRecHitConverter.h * Authors: P. Maksimovic (JHU), V.Chiochia (Uni Zurich) @@ -20,7 +20,6 @@ #include "DataFormats/Common/interface/Ref.h" #include "DataFormats/Common/interface/DetSet2RangeMap.h" - // STL #include #include @@ -37,61 +36,52 @@ #include "CUDADataFormats/Common/interface/HostProduct.h" using HMSstorage = HostProduct; - using namespace std; -namespace cms -{ +namespace cms { //--------------------------------------------------------------------------- //! Constructor: set the ParameterSet and defer all thinking to setupCPE(). //--------------------------------------------------------------------------- - SiPixelRecHitConverter::SiPixelRecHitConverter(edm::ParameterSet const& conf) - : - conf_(conf), - src_( conf.getParameter( "src" ) ), - tPixelCluster(consumes< edmNew::DetSetVector >( src_)) { + SiPixelRecHitConverter::SiPixelRecHitConverter(edm::ParameterSet const& conf) + : conf_(conf), + src_(conf.getParameter("src")), + tPixelCluster(consumes >(src_)) { //--- Declare to the EDM what kind of collections we will be making. produces(); produces(); - } - + // Destructor - SiPixelRecHitConverter::~SiPixelRecHitConverter() - { - } - + SiPixelRecHitConverter::~SiPixelRecHitConverter() {} + //--------------------------------------------------------------------------- //! The "Event" entrypoint: gets called by framework for every event //--------------------------------------------------------------------------- - void SiPixelRecHitConverter::produce(edm::Event& e, const edm::EventSetup& es) - { - + void SiPixelRecHitConverter::produce(edm::Event& e, const edm::EventSetup& es) { // Step A.1: get input data - edm::Handle< edmNew::DetSetVector > input; - e.getByToken( tPixelCluster, input); - + edm::Handle > input; + e.getByToken(tPixelCluster, input); + // Step A.2: get event setup edm::ESHandle geom; - es.get().get( geom ); + es.get().get(geom); // Step B: create empty output collection auto output = std::make_unique(); - + // Step B*: create CPE edm::ESHandle hCPE; std::string cpeName_ = conf_.getParameter("CPE"); - es.get().get(cpeName_,hCPE); - cpe_ = dynamic_cast< const PixelCPEBase* >(&(*hCPE)); - + es.get().get(cpeName_, hCPE); + cpe_ = dynamic_cast(&(*hCPE)); + // Step C: Iterate over DetIds and invoke the strip CPE algorithm // on each DetUnit - run(e, input, *output, geom ); + run(e, input, *output, geom); output->shrink_to_fit(); e.put(std::move(output)); - } //--------------------------------------------------------------------------- @@ -99,100 +89,96 @@ namespace cms //! and make a RecHit to store the result. //! New interface reading DetSetVector by V.Chiochia (May 30th, 2006) //--------------------------------------------------------------------------- - void SiPixelRecHitConverter::run(edm::Event& iEvent, - edm::Handle > inputhandle, - SiPixelRecHitCollectionNew &output, - edm::ESHandle & geom) { - if ( ! cpe_ ) - { - edm::LogError("SiPixelRecHitConverter") << " at least one CPE is not ready -- can't run!"; - // TO DO: throw an exception here? The user may want to know... - assert(0); - return; // clusterizer is invalid, bail out - } - - int numberOfDetUnits = 0; - - const edmNew::DetSetVector& input = *inputhandle; - - // yes a unique ptr of a unique ptr so edm is happy and the pointer stay still... - auto hmsp = std::make_unique(gpuClustering::MaxNumModules + 1); - auto hitsModuleStart = hmsp.get(); - auto hms = std::make_unique(std::move(hmsp)); // hmsp is gone - iEvent.put(std::move(hms)); // hms is gone! hitsModuleStart still alive and kicking... - - -// fill cluster arrays - std::array clusInModule; - for (auto & cl : clusInModule) cl=0; - int numberOfClusters = 0; - for (auto DSViter = input.begin(); DSViter != input.end(); DSViter++) { - unsigned int detid = DSViter->detId(); - DetId detIdObject(detid); - const GeomDetUnit* genericDet = geom->idToDetUnit(detIdObject); - auto gind = genericDet->index(); - assert(gind<2000); - auto const nclus = DSViter->size(); - assert(nclus>0); - clusInModule[gind]=nclus; - numberOfClusters+=nclus; - } - hitsModuleStart[0]=0; - assert(clusInModule.size()>gpuClustering::MaxNumModules); - for (int i=1, n=clusInModule.size(); i > inputhandle, + SiPixelRecHitCollectionNew& output, + edm::ESHandle& geom) { + if (!cpe_) { + edm::LogError("SiPixelRecHitConverter") << " at least one CPE is not ready -- can't run!"; + // TO DO: throw an exception here? The user may want to know... + assert(0); + return; // clusterizer is invalid, bail out + } + int numberOfDetUnits = 0; + const edmNew::DetSetVector& input = *inputhandle; - numberOfClusters = 0; - edmNew::DetSetVector::const_iterator DSViter=input.begin(); - - for ( ; DSViter != input.end() ; DSViter++) { + // yes a unique ptr of a unique ptr so edm is happy and the pointer stay still... + auto hmsp = std::make_unique(gpuClustering::MaxNumModules + 1); + auto hitsModuleStart = hmsp.get(); + auto hms = std::make_unique(std::move(hmsp)); // hmsp is gone + iEvent.put(std::move(hms)); // hms is gone! hitsModuleStart still alive and kicking... + + // fill cluster arrays + std::array clusInModule; + for (auto& cl : clusInModule) + cl = 0; + int numberOfClusters = 0; + for (auto DSViter = input.begin(); DSViter != input.end(); DSViter++) { + unsigned int detid = DSViter->detId(); + DetId detIdObject(detid); + const GeomDetUnit* genericDet = geom->idToDetUnit(detIdObject); + auto gind = genericDet->index(); + assert(gind < 2000); + auto const nclus = DSViter->size(); + assert(nclus > 0); + clusInModule[gind] = nclus; + numberOfClusters += nclus; + } + hitsModuleStart[0] = 0; + assert(clusInModule.size() > gpuClustering::MaxNumModules); + for (int i = 1, n = clusInModule.size(); i < n; ++i) + hitsModuleStart[i] = hitsModuleStart[i - 1] + clusInModule[i - 1]; + assert(numberOfClusters == int(hitsModuleStart[gpuClustering::MaxNumModules])); + + numberOfClusters = 0; + edmNew::DetSetVector::const_iterator DSViter = input.begin(); + + for (; DSViter != input.end(); DSViter++) { numberOfDetUnits++; unsigned int detid = DSViter->detId(); - DetId detIdObject( detid ); - const GeomDetUnit * genericDet = geom->idToDetUnit( detIdObject ); - const PixelGeomDetUnit * pixDet = dynamic_cast(genericDet); - assert(pixDet); - SiPixelRecHitCollectionNew::FastFiller recHitsOnDetUnit(output,detid); - + DetId detIdObject(detid); + const GeomDetUnit* genericDet = geom->idToDetUnit(detIdObject); + const PixelGeomDetUnit* pixDet = dynamic_cast(genericDet); + assert(pixDet); + SiPixelRecHitCollectionNew::FastFiller recHitsOnDetUnit(output, detid); + edmNew::DetSet::const_iterator clustIt = DSViter->begin(), clustEnd = DSViter->end(); - - for ( ; clustIt != clustEnd; clustIt++) { - numberOfClusters++; - std::tuple tuple = cpe_->getParameters( *clustIt, *genericDet ); - LocalPoint lp( std::get<0>(tuple) ); - LocalError le( std::get<1>(tuple) ); - SiPixelRecHitQuality::QualWordType rqw( std::get<2>(tuple) ); - // Create a persistent edm::Ref to the cluster - edm::Ref< edmNew::DetSetVector, SiPixelCluster > cluster = edmNew::makeRefTo( inputhandle, clustIt); - // Make a RecHit and add it to the DetSet - // old : recHitsOnDetUnit.push_back( new SiPixelRecHit( lp, le, detIdObject, &*clustIt) ); - SiPixelRecHit hit( lp, le, rqw, *genericDet, cluster); - // - // Now save it ================= - recHitsOnDetUnit.push_back(hit); - // ============================= - - // std::cout << "SiPixelRecHitConverterVI " << numberOfClusters << ' '<< lp << " " << le << std::endl; - } // <-- End loop on Clusters - + + for (; clustIt != clustEnd; clustIt++) { + numberOfClusters++; + std::tuple tuple = + cpe_->getParameters(*clustIt, *genericDet); + LocalPoint lp(std::get<0>(tuple)); + LocalError le(std::get<1>(tuple)); + SiPixelRecHitQuality::QualWordType rqw(std::get<2>(tuple)); + // Create a persistent edm::Ref to the cluster + edm::Ref, SiPixelCluster> cluster = + edmNew::makeRefTo(inputhandle, clustIt); + // Make a RecHit and add it to the DetSet + // old : recHitsOnDetUnit.push_back( new SiPixelRecHit( lp, le, detIdObject, &*clustIt) ); + SiPixelRecHit hit(lp, le, rqw, *genericDet, cluster); + // + // Now save it ================= + recHitsOnDetUnit.push_back(hit); + // ============================= + + // std::cout << "SiPixelRecHitConverterVI " << numberOfClusters << ' '<< lp << " " << le << std::endl; + } // <-- End loop on Clusters // LogDebug("SiPixelRecHitConverter") //std::cout << "SiPixelRecHitConverterVI " - // << " Found " << recHitsOnDetUnit.size() << " RecHits on " << detid //; - // << std::endl; - - - } // <-- End loop on DetUnits - - // LogDebug ("SiPixelRecHitConverter") + // << " Found " << recHitsOnDetUnit.size() << " RecHits on " << detid //; + // << std::endl; + + } // <-- End loop on DetUnits + + // LogDebug ("SiPixelRecHitConverter") // std::cout << "SiPixelRecHitConverterVI " - // << cpeName_ << " converted " << numberOfClusters - // << " SiPixelClusters into SiPixelRecHits, in " - // << numberOfDetUnits << " DetUnits." //; + // << cpeName_ << " converted " << numberOfClusters + // << " SiPixelClusters into SiPixelRecHits, in " + // << numberOfDetUnits << " DetUnits." //; // << std::endl; - } } // end of namespace cms diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc index 3b8707d1d3799..27e2528ceb97c 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc +++ b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc @@ -36,16 +36,15 @@ class SiPixelRecHitSoAFromLegacy : public edm::global::EDProducer<> { static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); - using HitModuleStart = std::array; - using HMSstorage = HostProduct; - + using HitModuleStart = std::array; + using HMSstorage = HostProduct; private: void produce(edm::StreamID streamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const override; // The mess with inputs will be cleaned up when migrating to the new framework edm::EDGetTokenT bsGetToken_; - edm::EDGetTokenT clusterToken_; // Legacy Clusters + edm::EDGetTokenT clusterToken_; // Legacy Clusters edm::EDPutTokenT tokenHit_; edm::EDPutTokenT tokenModuleStart_; @@ -60,8 +59,9 @@ SiPixelRecHitSoAFromLegacy::SiPixelRecHitSoAFromLegacy(const edm::ParameterSet& tokenModuleStart_{produces()}, cpeName_(iConfig.getParameter("CPE")), convert2Legacy_(iConfig.getParameter("convertToLegacy")) { - if (convert2Legacy_) produces(); - } + if (convert2Legacy_) + produces(); +} void SiPixelRecHitSoAFromLegacy::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { edm::ParameterSetDescription desc; @@ -69,21 +69,17 @@ void SiPixelRecHitSoAFromLegacy::fillDescriptions(edm::ConfigurationDescriptions desc.add("beamSpot", edm::InputTag("offlineBeamSpot")); desc.add("src", edm::InputTag("siPixelClustersPreSplitting")); desc.add("CPE", "PixelCPEFast"); - desc.add("convertToLegacy",false); + desc.add("convertToLegacy", false); descriptions.add("siPixelRecHitHostSoA", desc); } void SiPixelRecHitSoAFromLegacy::produce(edm::StreamID streamID, edm::Event& iEvent, const edm::EventSetup& es) const { - - - const TrackerGeometry *geom_ = nullptr; + const TrackerGeometry* geom_ = nullptr; const PixelClusterParameterEstimator* cpe_ = nullptr; - edm::ESHandle geom; - es.get().get( geom ); + es.get().get(geom); geom_ = geom.product(); - edm::ESHandle hCPE; es.get().get(cpeName_, hCPE); @@ -93,11 +89,10 @@ void SiPixelRecHitSoAFromLegacy::produce(edm::StreamID streamID, edm::Event& iEv if (!fcpe) { throw cms::Exception("Configuration") << "too bad, not a fast cpe gpu processing not possible...."; } - auto const & cpeView = fcpe->getCPUProduct(); + auto const& cpeView = fcpe->getCPUProduct(); const reco::BeamSpot& bs = iEvent.get(bsGetToken_); - BeamSpotCUDA::Data bsHost; bsHost.x = bs.x0(); bsHost.y = bs.y0(); @@ -110,69 +105,64 @@ void SiPixelRecHitSoAFromLegacy::produce(edm::StreamID streamID, edm::Event& iEv // yes a unique ptr of a unique ptr so edm is happy and the pointer stay still... auto hmsp = std::make_unique(gpuClustering::MaxNumModules + 1); auto hitsModuleStart = hmsp.get(); - auto hms = std::make_unique(std::move(hmsp)); // hmsp is gone - iEvent.put(tokenModuleStart_,std::move(hms)); // hms is gone! hitsModuleStart still alive and kicking... + auto hms = std::make_unique(std::move(hmsp)); // hmsp is gone + iEvent.put(tokenModuleStart_, std::move(hms)); // hms is gone! hitsModuleStart still alive and kicking... - // legacy output + // legacy output auto legacyOutput = std::make_unique(); + // storage + std::vector xx_; + std::vector yy_; + std::vector adc_; + std::vector moduleInd_; + std::vector clus_; - // storage - std::vector xx_; - std::vector yy_; - std::vector adc_; - std::vector moduleInd_; - std::vector clus_; - - std::vector, SiPixelCluster>> clusterRef; + std::vector, SiPixelCluster>> clusterRef; - constexpr uint32_t MaxHitsInModule = gpuClustering::MaxHitsInModule; + constexpr uint32_t MaxHitsInModule = gpuClustering::MaxHitsInModule; - HitModuleStart moduleStart_; // index of the first pixel of each module - HitModuleStart clusInModule_; - memset(&clusInModule_,0,sizeof(HitModuleStart)); // needed?? - assert(2001==clusInModule_.size()); - assert(0==clusInModule_[2000]); - uint32_t moduleId_; - moduleStart_[1]=0; // we run sequentially.... + HitModuleStart moduleStart_; // index of the first pixel of each module + HitModuleStart clusInModule_; + memset(&clusInModule_, 0, sizeof(HitModuleStart)); // needed?? + assert(2001 == clusInModule_.size()); + assert(0 == clusInModule_[2000]); + uint32_t moduleId_; + moduleStart_[1] = 0; // we run sequentially.... - SiPixelClustersCUDA::DeviceConstView clusterView{moduleStart_.data(),clusInModule_.data(), &moduleId_, hitsModuleStart}; + SiPixelClustersCUDA::DeviceConstView clusterView{ + moduleStart_.data(), clusInModule_.data(), &moduleId_, hitsModuleStart}; // fill cluster arrays int numberOfClusters = 0; for (auto DSViter = input.begin(); DSViter != input.end(); DSViter++) { - unsigned int detid = DSViter->detId(); + unsigned int detid = DSViter->detId(); DetId detIdObject(detid); const GeomDetUnit* genericDet = geom_->idToDetUnit(detIdObject); auto gind = genericDet->index(); - assert(gind<2000); - auto const nclus = DSViter->size(); - clusInModule_[gind]=nclus; - numberOfClusters+=nclus; + assert(gind < 2000); + auto const nclus = DSViter->size(); + clusInModule_[gind] = nclus; + numberOfClusters += nclus; } - hitsModuleStart[0]=0; - for (int i=1, n=clusInModule_.size(); i(numberOfClusters, - &cpeView, - hitsModuleStart, - dummyStream - ); - - if (0==numberOfClusters) { + auto dummyStream = cuda::stream::wrap(0, 0, false); + auto output = std::make_unique(numberOfClusters, &cpeView, hitsModuleStart, dummyStream); + + if (0 == numberOfClusters) { iEvent.put(std::move(output)); if (convert2Legacy_) iEvent.put(std::move(legacyOutput)); - return; - } - + return; + } if (convert2Legacy_) - legacyOutput->reserve(2000,numberOfClusters); - + legacyOutput->reserve(2000, numberOfClusters); int numberOfDetUnits = 0; int numberOfHits = 0; @@ -182,29 +172,37 @@ void SiPixelRecHitSoAFromLegacy::produce(edm::StreamID streamID, edm::Event& iEv DetId detIdObject(detid); const GeomDetUnit* genericDet = geom_->idToDetUnit(detIdObject); auto const gind = genericDet->index(); - assert(gind<2000); + assert(gind < 2000); const PixelGeomDetUnit* pixDet = dynamic_cast(genericDet); assert(pixDet); - auto const nclus = DSViter->size(); - assert(clusInModule_[gind]==nclus); - if (0==nclus) continue; // is this really possible? - + auto const nclus = DSViter->size(); + assert(clusInModule_[gind] == nclus); + if (0 == nclus) + continue; // is this really possible? + auto const fc = hitsModuleStart[gind]; auto const lc = hitsModuleStart[gind + 1]; - assert(lc>fc); + assert(lc > fc); // std::cout << "in det " << gind << ": conv " << nclus << " hits from " << DSViter->size() << " legacy clusters" // <<' '<< fc <<','<MaxHitsInModule) printf("WARNING: too many clusters %d in Module %d. Only first %d Hits converted\n", nclus, gind, MaxHitsInModule); + assert((lc - fc) == nclus); + if (nclus > MaxHitsInModule) + printf( + "WARNING: too many clusters %d in Module %d. Only first %d Hits converted\n", nclus, gind, MaxHitsInModule); // fill digis - xx_.clear();yy_.clear();adc_.clear();moduleInd_.clear(); clus_.clear();clusterRef.clear(); + xx_.clear(); + yy_.clear(); + adc_.clear(); + moduleInd_.clear(); + clus_.clear(); + clusterRef.clear(); moduleId_ = gind; uint32_t ic = 0; uint32_t ndigi = 0; for (auto const& clust : *DSViter) { - assert(clust.size()>0); - for (int i=0, nd=clust.size(); i 0); + for (int i = 0, nd = clust.size(); i < nd; ++i) { auto px = clust.pixel(i); xx_.push_back(px.x); yy_.push_back(px.y); @@ -213,53 +211,54 @@ void SiPixelRecHitSoAFromLegacy::produce(edm::StreamID streamID, edm::Event& iEv clus_.push_back(ic); ++ndigi; } - assert(clust.originalId()==ic); // make sure hits and clus are in sync - if (convert2Legacy_) clusterRef.emplace_back(edmNew::makeRefTo(hclusters, &clust)); + assert(clust.originalId() == ic); // make sure hits and clus are in sync + if (convert2Legacy_) + clusterRef.emplace_back(edmNew::makeRefTo(hclusters, &clust)); ic++; } - assert(nclus==ic); - assert(clus_.size()==ndigi); - numberOfHits+=nclus; + assert(nclus == ic); + assert(clus_.size() == ndigi); + numberOfHits += nclus; // filled creates view - SiPixelDigisCUDA::DeviceConstView digiView{xx_.data(),yy_.data(),adc_.data(),moduleInd_.data(), clus_.data()}; - assert(digiView.adc(0)!=0); + SiPixelDigisCUDA::DeviceConstView digiView{xx_.data(), yy_.data(), adc_.data(), moduleInd_.data(), clus_.data()}; + assert(digiView.adc(0) != 0); // not needed... cudaCompat::resetGrid(); // we run on blockId.x==0 gpuPixelRecHits::getHits(&cpeView, &bsHost, &digiView, ndigi, &clusterView, output->view()); - for (auto h=fc; hview()->detectorIndex(h)); else assert(9999 == output->view()->detectorIndex(h)); if (convert2Legacy_) { SiPixelRecHitCollectionNew::FastFiller recHitsOnDetUnit(*legacyOutput, detid); - for (auto h=fc; h=MaxHitsInModule) break; - assert(ih= MaxHitsInModule) + break; + assert(ih < clusterRef.size()); LocalPoint lp(output->view()->xLocal(h), output->view()->yLocal(h)); LocalError le(output->view()->xerrLocal(h), 0, output->view()->yerrLocal(h)); SiPixelRecHitQuality::QualWordType rqw = 0; - SiPixelRecHit hit(lp, le, rqw, *genericDet, clusterRef[ih]); + SiPixelRecHit hit(lp, le, rqw, *genericDet, clusterRef[ih]); recHitsOnDetUnit.push_back(hit); } } } - assert(numberOfHits==numberOfClusters); + assert(numberOfHits == numberOfClusters); // fill data structure to support CA - for (auto i=0; i < 11; ++i) { - output->hitsLayerStart()[i] = hitsModuleStart[cpeView.layerGeometry().layerStart[i]]; + for (auto i = 0; i < 11; ++i) { + output->hitsLayerStart()[i] = hitsModuleStart[cpeView.layerGeometry().layerStart[i]]; } - cudautils::fillManyFromVector( - output->phiBinner(), nullptr, 10, output->iphi(), output->hitsLayerStart(), numberOfHits, 256, 0); + cudautils::fillManyFromVector( + output->phiBinner(), nullptr, 10, output->iphi(), output->hitsLayerStart(), numberOfHits, 256, 0); // std::cout << "created HitSoa for " << numberOfClusters << " clusters in " << numberOfDetUnits << " Dets" << std::endl; iEvent.put(std::move(output)); if (convert2Legacy_) iEvent.put(std::move(legacyOutput)); - } DEFINE_FWK_MODULE(SiPixelRecHitSoAFromLegacy); diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h b/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h index 903324d4b7d94..feeff98849af2 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h +++ b/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h @@ -15,41 +15,40 @@ namespace gpuPixelRecHits { __global__ void getHits(pixelCPEforGPU::ParamsOnGPU const* __restrict__ cpeParams, BeamSpotCUDA::Data const* __restrict__ bs, - SiPixelDigisCUDA::DeviceConstView const * __restrict__ pdigis, + SiPixelDigisCUDA::DeviceConstView const* __restrict__ pdigis, int numElements, - SiPixelClustersCUDA::DeviceConstView const * __restrict__ pclusters, - TrackingRecHit2DSOAView* phits - ){ - + SiPixelClustersCUDA::DeviceConstView const* __restrict__ pclusters, + TrackingRecHit2DSOAView* phits) { // FIXME // the compiler seems NOT to optimize loads from views (even in a simple test case) // The whole gimnastic here of copying or not is a pure heuristic exercise that seems to produce the fastest code with the above signature - // not using views (passing a gazzilion of array pointers) seems to produce the fastest code (but it is harder to mantain) + // not using views (passing a gazzilion of array pointers) seems to produce the fastest code (but it is harder to mantain) assert(phits); assert(cpeParams); auto& hits = *phits; - auto const digis = *pdigis; // the copy is intentional! - auto const & clusters = *pclusters; + auto const digis = *pdigis; // the copy is intentional! + auto const& clusters = *pclusters; // copy average geometry corrected by beamspot . FIXME (move it somewhere else???) - if (0==blockIdx.x) { - auto & agc = hits.averageGeometry(); - auto const & ag = cpeParams->averageGeometry(); - for(int il=threadIdx.x, nl=TrackingRecHit2DSOAView::AverageGeometry::numberOfLaddersInBarrel; ilz; + if (0 == blockIdx.x) { + auto& agc = hits.averageGeometry(); + auto const& ag = cpeParams->averageGeometry(); + for (int il = threadIdx.x, nl = TrackingRecHit2DSOAView::AverageGeometry::numberOfLaddersInBarrel; il < nl; + il += blockDim.x) { + agc.ladderZ[il] = ag.ladderZ[il] - bs->z; agc.ladderX[il] = ag.ladderX[il] - bs->x; agc.ladderY[il] = ag.ladderY[il] - bs->y; - agc.ladderR[il] = sqrt(agc.ladderX[il]*agc.ladderX[il] + agc.ladderY[il]*agc.ladderY[il] ); + agc.ladderR[il] = sqrt(agc.ladderX[il] * agc.ladderX[il] + agc.ladderY[il] * agc.ladderY[il]); agc.ladderMinZ[il] = ag.ladderMinZ[il] - bs->z; agc.ladderMaxZ[il] = ag.ladderMaxZ[il] - bs->z; } - if(0==threadIdx.x) { - agc.endCapZ[0] = ag.endCapZ[0] - bs->z; - agc.endCapZ[1] = ag.endCapZ[1] - bs->z; -// printf("endcapZ %f %f\n",agc.endCapZ[0],agc.endCapZ[1]); + if (0 == threadIdx.x) { + agc.endCapZ[0] = ag.endCapZ[0] - bs->z; + agc.endCapZ[1] = ag.endCapZ[1] - bs->z; + // printf("endcapZ %f %f\n",agc.endCapZ[0],agc.endCapZ[1]); } } @@ -83,18 +82,18 @@ namespace gpuPixelRecHits { printf("hitbuilder: %d clusters in module %d. will write at %d\n", nclus, me, clusters.clusModuleStart(me)); #endif - for(int startClus=0, endClus=nclus; startClus0); - assert(lastClus<=nclus); + assert(nClusInIter <= nclus); + assert(nClusInIter > 0); + assert(lastClus <= nclus); - assert(nclus>MaxHitsInIter || (0==startClus && nClusInIter==nclus && lastClus==nclus)); + assert(nclus > MaxHitsInIter || (0 == startClus && nClusInIter == nclus && lastClus == nclus)); - // init + // init for (int ic = threadIdx.x; ic < nClusInIter; ic += blockDim.x) { clusParams.minRow[ic] = std::numeric_limits::max(); clusParams.maxRow[ic] = 0; @@ -120,13 +119,13 @@ namespace gpuPixelRecHits { if (id != me) break; // end of module auto cl = digis.clus(i); - if (cl=lastClus) + if (cl < startClus || cl >= lastClus) continue; auto x = digis.xx(i); auto y = digis.yy(i); - cl -=startClus; - assert(cl>=0); - assert(cl= 0); + assert(cl < MaxHitsInIter); atomicMin(&clusParams.minRow[cl], x); atomicMax(&clusParams.maxRow[cl], x); atomicMin(&clusParams.minCol[cl], y); @@ -136,19 +135,19 @@ namespace gpuPixelRecHits { __syncthreads(); for (int i = first; i < numElements; i += blockDim.x) { - auto id = digis.moduleInd(i); + auto id = digis.moduleInd(i); if (id == InvId) continue; // not valid if (id != me) break; // end of module auto cl = digis.clus(i); - if (cl=lastClus) + if (cl < startClus || cl >= lastClus) continue; - cl -=startClus; - assert(cl>=0); - assert(cl= 0); + assert(cl < MaxHitsInIter); auto x = digis.xx(i); - auto y = digis.yy(i); + auto y = digis.yy(i); auto ch = digis.adc(i); atomicAdd(&clusParams.charge[cl], ch); if (clusParams.minRow[cl] == x) @@ -169,12 +168,12 @@ namespace gpuPixelRecHits { for (int ic = threadIdx.x; ic < nClusInIter; ic += blockDim.x) { auto h = first + ic; // output index in global memory - + // this cannot happen anymore if (h >= TrackingRecHit2DSOAView::maxHits()) break; // overflow... - assert(hcommonParams(), cpeParams->detParams(me), clusParams, ic); pixelCPEforGPU::errorFromDB(cpeParams->commonParams(), cpeParams->detParams(me), clusParams, ic); @@ -212,7 +211,7 @@ namespace gpuPixelRecHits { hits.iphi(h) = unsafe_atan2s<7>(yg, xg); } __syncthreads(); - } // end loop on batches + } // end loop on batches } } // namespace gpuPixelRecHits diff --git a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc index f1d716f06eba1..4aa3e60ae8320 100644 --- a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc +++ b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc @@ -63,12 +63,11 @@ PixelCPEFast::PixelCPEFast(edm::ParameterSet const& conf, fillParamsForGpu(); cpuData_ = { - &m_commonParamsGPU, - m_detParamsGPU.data(), - &m_layerGeometry, - &m_averageGeometry, - }; - + &m_commonParamsGPU, + m_detParamsGPU.data(), + &m_layerGeometry, + &m_averageGeometry, + }; } const pixelCPEforGPU::ParamsOnGPU* PixelCPEFast::getGPUProductAsync(cuda::stream_t<>& cudaStream) const { @@ -114,7 +113,7 @@ void PixelCPEFast::fillParamsForGpu() { m_commonParamsGPU.thePitchY = m_DetParams[0].thePitchY; // zero average geometry - memset(&m_averageGeometry,0,sizeof(pixelCPEforGPU::AverageGeometry)); + memset(&m_averageGeometry, 0, sizeof(pixelCPEforGPU::AverageGeometry)); uint32_t oldLayer = 0; uint32_t oldLadder = 0; @@ -255,37 +254,37 @@ void PixelCPEFast::fillParamsForGpu() { } // compute ladder baricenter (only in global z) for the barrel - auto & aveGeom = m_averageGeometry; - int il=0; - for (int im=0, nm=phase1PixelTopology::numberOfModulesInBarrel; im Date: Thu, 12 Sep 2019 05:45:55 +0200 Subject: [PATCH 084/149] Synchronise with CMSSW_11_0_0_pre7 --- RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h | 3 +++ .../SiPixelRecHits/plugins/SiPixelRecHitConverter.cc | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h index 72f054a9b5636..4d2b0f033015f 100644 --- a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h +++ b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h @@ -27,6 +27,7 @@ //--- For the configuration: #include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" #include "DataFormats/GeometryCommonDetAlgo/interface/MeasurementPoint.h" #include "DataFormats/GeometryCommonDetAlgo/interface/MeasurementError.h" @@ -130,6 +131,8 @@ class PixelCPEBase : public PixelClusterParameterEstimator { int flag = 0 // flag=0 for generic, =1 for templates ); // NEW + static void fillPSetDescription(edm::ParameterSetDescription& desc); + //-------------------------------------------------------------------------- // Allow the magnetic field to be set/updated later. //-------------------------------------------------------------------------- diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitConverter.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitConverter.cc index b4df63b1b5cfd..7026754ffea1f 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitConverter.cc +++ b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitConverter.cc @@ -31,7 +31,7 @@ #include "RecoLocalTracker/Records/interface/TkPixelCPERecord.h" -// Make heterogeneous framewokr happy.... +// Make heterogeneous framework happy #include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" #include "CUDADataFormats/Common/interface/HostProduct.h" using HMSstorage = HostProduct; From 950358b695339cd82c9cc11928b6e95608c9c720 Mon Sep 17 00:00:00 2001 From: Matti Kortelainen Date: Wed, 23 Oct 2019 02:13:49 -0500 Subject: [PATCH 085/149] Fix clang warnings (cms-patatrack#387) --- .../plugins/SiPixelDigisClustersFromSoA.cc | 2 -- .../SiPixelRecHits/plugins/SiPixelRecHitCUDA.cc | 7 +++---- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc index d818cb87e23ac..1622b2402925b 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc @@ -45,8 +45,6 @@ namespace { return true; } }; - - constexpr uint32_t dummydetid = 0xffffffff; } // namespace class SiPixelDigisClustersFromSoA : public edm::global::EDProducer<> { diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitCUDA.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitCUDA.cc index 603ea911a3de3..2ec203819731b 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitCUDA.cc +++ b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitCUDA.cc @@ -96,10 +96,9 @@ void SiPixelRecHitCUDA::produce(edm::StreamID streamID, edm::Event& iEvent, cons iEvent.getByToken(tBeamSpot, hbs); auto const& bs = ctx.get(*hbs); - ctx.emplace( - iEvent, - tokenHit_, - std::move(gpuAlgo_.makeHitsAsync(digis, clusters, bs, fcpe->getGPUProductAsync(ctx.stream()), ctx.stream()))); + ctx.emplace(iEvent, + tokenHit_, + gpuAlgo_.makeHitsAsync(digis, clusters, bs, fcpe->getGPUProductAsync(ctx.stream()), ctx.stream())); } DEFINE_FWK_MODULE(SiPixelRecHitCUDA); From de8067490e65216fd534b7dad35903ff9aaf7b2f Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Thu, 24 Oct 2019 16:41:47 +0200 Subject: [PATCH 086/149] Implement library-only wrappers for launching CUDA kernels (cms-patatrack#390) Implement a wrapper to launch a CUDA kernel without using the non-standard CUDA <<<...>>> syntax, based on the cudaLaunchKernel library function. Implement a similar wrapper for cudaLaunchCooperativeKernel. Migrate code base from cuda::launch to cudautils::launch. --- RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h b/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h index e3b6b313d091b..4f7ced9b7e309 100644 --- a/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h +++ b/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h @@ -13,6 +13,7 @@ #include #include "HeterogeneousCore/CUDAUtilities/interface/exitSansCUDADevices.h" +#include "HeterogeneousCore/CUDAUtilities/interface/launch.h" #endif #include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h" @@ -263,7 +264,7 @@ int main(void) { std::cout << "CUDA countModules kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads\n"; - cuda::launch(countModules, {blocksPerGrid, threadsPerBlock}, d_id.get(), d_moduleStart.get(), d_clus.get(), n); + cudautils::launch(countModules, {blocksPerGrid, threadsPerBlock}, d_id.get(), d_moduleStart.get(), d_clus.get(), n); blocksPerGrid = MaxNumModules; //nModules; @@ -272,7 +273,7 @@ int main(void) { cuda::memory::device::zero(d_clusInModule.get(), MaxNumModules * sizeof(uint32_t)); - cuda::launch(findClus, + cudautils::launch(findClus, {blocksPerGrid, threadsPerBlock}, d_id.get(), d_x.get(), @@ -300,7 +301,7 @@ int main(void) { if (ncl != std::accumulate(nclus, nclus + MaxNumModules, 0)) std::cout << "ERROR!!!!! wrong number of cluster found" << std::endl; - cuda::launch(clusterChargeCut, + cudautils::launch(clusterChargeCut, {blocksPerGrid, threadsPerBlock}, d_id.get(), d_adc.get(), From b56966f668e15f6471e12414d861c4a4b15fabb6 Mon Sep 17 00:00:00 2001 From: Matti Kortelainen Date: Sat, 26 Oct 2019 13:57:43 -0500 Subject: [PATCH 087/149] Replace use of API wrapper stream and event with plain CUDA, part 1 (cms-patatrack#389) Replace cuda::stream_t<> with cudaStream_t in client code Replace cuda::event_t with cudaEvent_t in the client code Clean up BuildFiles --- CUDADataFormats/SiPixelCluster/BuildFile.xml | 2 +- .../interface/SiPixelClustersCUDA.h | 7 +- .../SiPixelCluster/src/SiPixelClustersCUDA.cc | 2 +- CUDADataFormats/SiPixelDigi/BuildFile.xml | 2 +- .../interface/SiPixelDigiErrorsCUDA.h | 8 +-- .../SiPixelDigi/interface/SiPixelDigisCUDA.h | 14 ++-- .../SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc | 6 +- .../SiPixelDigi/src/SiPixelDigisCUDA.cc | 10 +-- CUDADataFormats/TrackingRecHit/BuildFile.xml | 2 +- .../interface/TrackingRecHit2DHeterogeneous.h | 10 +-- .../test/TrackingRecHit2DCUDA_t.cpp | 16 +++-- CalibTracker/SiPixelESProducers/BuildFile.xml | 2 +- .../SiPixelGainCalibrationForHLTGPU.h | 2 +- .../SiPixelESProducers/plugins/BuildFile.xml | 2 +- .../src/SiPixelGainCalibrationForHLTGPU.cc | 10 +-- .../SiPixelRawToDigi/plugins/BuildFile.xml | 2 +- .../plugins/SiPixelRawToClusterGPUKernel.cu | 66 +++++++++---------- .../plugins/SiPixelRawToClusterGPUKernel.h | 2 +- RecoLocalTracker/SiPixelRecHits/BuildFile.xml | 1 - .../SiPixelRecHits/interface/PixelCPEFast.h | 2 +- .../SiPixelRecHits/plugins/BuildFile.xml | 1 - .../plugins/SiPixelRecHitSoAFromLegacy.cc | 3 +- .../SiPixelRecHits/src/PixelCPEFast.cc | 14 ++-- 23 files changed, 94 insertions(+), 92 deletions(-) diff --git a/CUDADataFormats/SiPixelCluster/BuildFile.xml b/CUDADataFormats/SiPixelCluster/BuildFile.xml index 6db6a1f62cda1..5e401d215c4eb 100644 --- a/CUDADataFormats/SiPixelCluster/BuildFile.xml +++ b/CUDADataFormats/SiPixelCluster/BuildFile.xml @@ -1,7 +1,7 @@ - + diff --git a/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h b/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h index 1db6e29459ea7..d3650e164d44e 100644 --- a/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h +++ b/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h @@ -3,15 +3,14 @@ #include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" #include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" - -#include - #include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h" +#include + class SiPixelClustersCUDA { public: SiPixelClustersCUDA() = default; - explicit SiPixelClustersCUDA(size_t maxClusters, cuda::stream_t<> &stream); + explicit SiPixelClustersCUDA(size_t maxClusters, cudaStream_t stream); ~SiPixelClustersCUDA() = default; SiPixelClustersCUDA(const SiPixelClustersCUDA &) = delete; diff --git a/CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc b/CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc index 4a145bb1231c2..c814cd4a2e131 100644 --- a/CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc +++ b/CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc @@ -4,7 +4,7 @@ #include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" #include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" -SiPixelClustersCUDA::SiPixelClustersCUDA(size_t maxClusters, cuda::stream_t<>& stream) { +SiPixelClustersCUDA::SiPixelClustersCUDA(size_t maxClusters, cudaStream_t stream) { moduleStart_d = cudautils::make_device_unique(maxClusters + 1, stream); clusInModule_d = cudautils::make_device_unique(maxClusters, stream); moduleId_d = cudautils::make_device_unique(maxClusters, stream); diff --git a/CUDADataFormats/SiPixelDigi/BuildFile.xml b/CUDADataFormats/SiPixelDigi/BuildFile.xml index c29c9c9b9f44d..ee357e2d4e157 100644 --- a/CUDADataFormats/SiPixelDigi/BuildFile.xml +++ b/CUDADataFormats/SiPixelDigi/BuildFile.xml @@ -2,7 +2,7 @@ - + diff --git a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h index 7e016efb708de..7c18d58a3fc12 100644 --- a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h +++ b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h @@ -6,12 +6,12 @@ #include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" #include "HeterogeneousCore/CUDAUtilities/interface/GPUSimpleVector.h" -#include +#include class SiPixelDigiErrorsCUDA { public: SiPixelDigiErrorsCUDA() = default; - explicit SiPixelDigiErrorsCUDA(size_t maxFedWords, PixelFormatterErrors errors, cuda::stream_t<>& stream); + explicit SiPixelDigiErrorsCUDA(size_t maxFedWords, PixelFormatterErrors errors, cudaStream_t stream); ~SiPixelDigiErrorsCUDA() = default; SiPixelDigiErrorsCUDA(const SiPixelDigiErrorsCUDA&) = delete; @@ -27,9 +27,9 @@ class SiPixelDigiErrorsCUDA { using HostDataError = std::pair, cudautils::host::unique_ptr>; - HostDataError dataErrorToHostAsync(cuda::stream_t<>& stream) const; + HostDataError dataErrorToHostAsync(cudaStream_t stream) const; - void copyErrorToHostAsync(cuda::stream_t<>& stream); + void copyErrorToHostAsync(cudaStream_t stream); private: cudautils::device::unique_ptr data_d; diff --git a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h index 0d0e025ef52da..47efe634ad93d 100644 --- a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h +++ b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h @@ -3,14 +3,14 @@ #include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" #include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" - -#include #include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h" +#include + class SiPixelDigisCUDA { public: SiPixelDigisCUDA() = default; - explicit SiPixelDigisCUDA(size_t maxFedWords, cuda::stream_t<> &stream); + explicit SiPixelDigisCUDA(size_t maxFedWords, cudaStream_t stream); ~SiPixelDigisCUDA() = default; SiPixelDigisCUDA(const SiPixelDigisCUDA &) = delete; @@ -50,10 +50,10 @@ class SiPixelDigisCUDA { uint32_t const *c_pdigi() const { return pdigi_d.get(); } uint32_t const *c_rawIdArr() const { return rawIdArr_d.get(); } - cudautils::host::unique_ptr adcToHostAsync(cuda::stream_t<> &stream) const; - cudautils::host::unique_ptr clusToHostAsync(cuda::stream_t<> &stream) const; - cudautils::host::unique_ptr pdigiToHostAsync(cuda::stream_t<> &stream) const; - cudautils::host::unique_ptr rawIdArrToHostAsync(cuda::stream_t<> &stream) const; + cudautils::host::unique_ptr adcToHostAsync(cudaStream_t stream) const; + cudautils::host::unique_ptr clusToHostAsync(cudaStream_t stream) const; + cudautils::host::unique_ptr pdigiToHostAsync(cudaStream_t stream) const; + cudautils::host::unique_ptr rawIdArrToHostAsync(cudaStream_t stream) const; class DeviceConstView { public: diff --git a/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc b/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc index 2aa2b24ddf316..7640348c15f08 100644 --- a/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc +++ b/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc @@ -7,7 +7,7 @@ #include -SiPixelDigiErrorsCUDA::SiPixelDigiErrorsCUDA(size_t maxFedWords, PixelFormatterErrors errors, cuda::stream_t<>& stream) +SiPixelDigiErrorsCUDA::SiPixelDigiErrorsCUDA(size_t maxFedWords, PixelFormatterErrors errors, cudaStream_t stream) : formatterErrors_h(std::move(errors)) { error_d = cudautils::make_device_unique>(stream); data_d = cudautils::make_device_unique(maxFedWords, stream); @@ -22,11 +22,11 @@ SiPixelDigiErrorsCUDA::SiPixelDigiErrorsCUDA(size_t maxFedWords, PixelFormatterE cudautils::copyAsync(error_d, error_h, stream); } -void SiPixelDigiErrorsCUDA::copyErrorToHostAsync(cuda::stream_t<>& stream) { +void SiPixelDigiErrorsCUDA::copyErrorToHostAsync(cudaStream_t stream) { cudautils::copyAsync(error_h, error_d, stream); } -SiPixelDigiErrorsCUDA::HostDataError SiPixelDigiErrorsCUDA::dataErrorToHostAsync(cuda::stream_t<>& stream) const { +SiPixelDigiErrorsCUDA::HostDataError SiPixelDigiErrorsCUDA::dataErrorToHostAsync(cudaStream_t stream) const { // On one hand size() could be sufficient. On the other hand, if // someone copies the SimpleVector<>, (s)he might expect the data // buffer to actually have space for capacity() elements. diff --git a/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc b/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc index fe4aedd686546..a8aab7ab5a4b8 100644 --- a/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc +++ b/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc @@ -4,7 +4,7 @@ #include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" #include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" -SiPixelDigisCUDA::SiPixelDigisCUDA(size_t maxFedWords, cuda::stream_t<>& stream) { +SiPixelDigisCUDA::SiPixelDigisCUDA(size_t maxFedWords, cudaStream_t stream) { xx_d = cudautils::make_device_unique(maxFedWords, stream); yy_d = cudautils::make_device_unique(maxFedWords, stream); adc_d = cudautils::make_device_unique(maxFedWords, stream); @@ -25,25 +25,25 @@ SiPixelDigisCUDA::SiPixelDigisCUDA(size_t maxFedWords, cuda::stream_t<>& stream) cudautils::copyAsync(view_d, view, stream); } -cudautils::host::unique_ptr SiPixelDigisCUDA::adcToHostAsync(cuda::stream_t<>& stream) const { +cudautils::host::unique_ptr SiPixelDigisCUDA::adcToHostAsync(cudaStream_t stream) const { auto ret = cudautils::make_host_unique(nDigis(), stream); cudautils::copyAsync(ret, adc_d, nDigis(), stream); return ret; } -cudautils::host::unique_ptr SiPixelDigisCUDA::clusToHostAsync(cuda::stream_t<>& stream) const { +cudautils::host::unique_ptr SiPixelDigisCUDA::clusToHostAsync(cudaStream_t stream) const { auto ret = cudautils::make_host_unique(nDigis(), stream); cudautils::copyAsync(ret, clus_d, nDigis(), stream); return ret; } -cudautils::host::unique_ptr SiPixelDigisCUDA::pdigiToHostAsync(cuda::stream_t<>& stream) const { +cudautils::host::unique_ptr SiPixelDigisCUDA::pdigiToHostAsync(cudaStream_t stream) const { auto ret = cudautils::make_host_unique(nDigis(), stream); cudautils::copyAsync(ret, pdigi_d, nDigis(), stream); return ret; } -cudautils::host::unique_ptr SiPixelDigisCUDA::rawIdArrToHostAsync(cuda::stream_t<>& stream) const { +cudautils::host::unique_ptr SiPixelDigisCUDA::rawIdArrToHostAsync(cudaStream_t stream) const { auto ret = cudautils::make_host_unique(nDigis(), stream); cudautils::copyAsync(ret, rawIdArr_d, nDigis(), stream); return ret; diff --git a/CUDADataFormats/TrackingRecHit/BuildFile.xml b/CUDADataFormats/TrackingRecHit/BuildFile.xml index 8af605862d1ac..8dc569d40b6c4 100644 --- a/CUDADataFormats/TrackingRecHit/BuildFile.xml +++ b/CUDADataFormats/TrackingRecHit/BuildFile.xml @@ -1,4 +1,4 @@ - + diff --git a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h index a3d6354732ecf..1b27558efe3ee 100644 --- a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h +++ b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h @@ -17,7 +17,7 @@ class TrackingRecHit2DHeterogeneous { explicit TrackingRecHit2DHeterogeneous(uint32_t nHits, pixelCPEforGPU::ParamsOnGPU const* cpeParams, uint32_t const* hitsModuleStart, - cuda::stream_t<>& stream); + cudaStream_t stream); ~TrackingRecHit2DHeterogeneous() = default; @@ -37,9 +37,9 @@ class TrackingRecHit2DHeterogeneous { auto iphi() { return m_iphi; } // only the local coord and detector index - cudautils::host::unique_ptr localCoordToHostAsync(cuda::stream_t<>& stream) const; - cudautils::host::unique_ptr detIndexToHostAsync(cuda::stream_t<>& stream) const; - cudautils::host::unique_ptr hitsModuleStartToHostAsync(cuda::stream_t<>& stream) const; + cudautils::host::unique_ptr localCoordToHostAsync(cudaStream_t stream) const; + cudautils::host::unique_ptr detIndexToHostAsync(cudaStream_t stream) const; + cudautils::host::unique_ptr hitsModuleStartToHostAsync(cudaStream_t stream) const; private: static constexpr uint32_t n16 = 4; @@ -71,7 +71,7 @@ template TrackingRecHit2DHeterogeneous::TrackingRecHit2DHeterogeneous(uint32_t nHits, pixelCPEforGPU::ParamsOnGPU const* cpeParams, uint32_t const* hitsModuleStart, - cuda::stream_t<>& stream) + cudaStream_t stream) : m_nHits(nHits), m_hitsModuleStart(hitsModuleStart) { auto view = Traits::template make_host_unique(stream); diff --git a/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp b/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp index cf2221dc71b95..135545ba65832 100644 --- a/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp +++ b/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp @@ -1,6 +1,7 @@ #include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DCUDA.h" #include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" #include "HeterogeneousCore/CUDAUtilities/interface/exitSansCUDADevices.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" namespace testTrackingRecHit2D { @@ -11,13 +12,18 @@ namespace testTrackingRecHit2D { int main() { exitSansCUDADevices(); - auto current_device = cuda::device::current::get(); - auto stream = current_device.create_stream(cuda::stream::implicitly_synchronizes_with_default_stream); + cudaStream_t stream; + cudaCheck(cudaStreamCreate(&stream)); - auto nHits = 200; - TrackingRecHit2DCUDA tkhit(nHits, nullptr, nullptr, stream); + // inner scope to deallocate memory before destroying the stream + { + auto nHits = 200; + TrackingRecHit2DCUDA tkhit(nHits, nullptr, nullptr, stream); - testTrackingRecHit2D::runKernels(tkhit.view()); + testTrackingRecHit2D::runKernels(tkhit.view()); + } + + cudaCheck(cudaStreamDestroy(stream)); return 0; } diff --git a/CalibTracker/SiPixelESProducers/BuildFile.xml b/CalibTracker/SiPixelESProducers/BuildFile.xml index 69d258da21ed1..02a36e17ed732 100644 --- a/CalibTracker/SiPixelESProducers/BuildFile.xml +++ b/CalibTracker/SiPixelESProducers/BuildFile.xml @@ -9,7 +9,7 @@ - + diff --git a/CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h b/CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h index d6d2e1a262dc8..00386b06c2e6c 100644 --- a/CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h +++ b/CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h @@ -16,7 +16,7 @@ class SiPixelGainCalibrationForHLTGPU { explicit SiPixelGainCalibrationForHLTGPU(const SiPixelGainCalibrationForHLT &gains, const TrackerGeometry &geom); ~SiPixelGainCalibrationForHLTGPU(); - const SiPixelGainForHLTonGPU *getGPUProductAsync(cuda::stream_t<> &cudaStream) const; + const SiPixelGainForHLTonGPU *getGPUProductAsync(cudaStream_t cudaStream) const; const SiPixelGainForHLTonGPU *getCPUProduct() const { return gainForHLTonHost_; } const SiPixelGainCalibrationForHLT *getOriginalProduct() { return gains_; } diff --git a/CalibTracker/SiPixelESProducers/plugins/BuildFile.xml b/CalibTracker/SiPixelESProducers/plugins/BuildFile.xml index b33657e273036..57bf68a1b7518 100644 --- a/CalibTracker/SiPixelESProducers/plugins/BuildFile.xml +++ b/CalibTracker/SiPixelESProducers/plugins/BuildFile.xml @@ -7,7 +7,7 @@ - + diff --git a/CalibTracker/SiPixelESProducers/src/SiPixelGainCalibrationForHLTGPU.cc b/CalibTracker/SiPixelESProducers/src/SiPixelGainCalibrationForHLTGPU.cc index d94e9f1959190..9758731f0bb8e 100644 --- a/CalibTracker/SiPixelESProducers/src/SiPixelGainCalibrationForHLTGPU.cc +++ b/CalibTracker/SiPixelESProducers/src/SiPixelGainCalibrationForHLTGPU.cc @@ -84,23 +84,23 @@ SiPixelGainCalibrationForHLTGPU::GPUData::~GPUData() { cudaCheck(cudaFree(gainDataOnGPU)); } -const SiPixelGainForHLTonGPU* SiPixelGainCalibrationForHLTGPU::getGPUProductAsync(cuda::stream_t<>& cudaStream) const { - const auto& data = gpuData_.dataForCurrentDeviceAsync(cudaStream, [this](GPUData& data, cuda::stream_t<>& stream) { +const SiPixelGainForHLTonGPU* SiPixelGainCalibrationForHLTGPU::getGPUProductAsync(cudaStream_t cudaStream) const { + const auto& data = gpuData_.dataForCurrentDeviceAsync(cudaStream, [this](GPUData& data, cudaStream_t stream) { cudaCheck(cudaMalloc((void**)&data.gainForHLTonGPU, sizeof(SiPixelGainForHLTonGPU))); cudaCheck( cudaMalloc((void**)&data.gainDataOnGPU, this->gains_->data().size())); // TODO: this could be changed to cuda::memory::device::unique_ptr<> // gains.data().data() is used also for non-GPU code, we cannot allocate it on aligned and write-combined memory cudaCheck(cudaMemcpyAsync( - data.gainDataOnGPU, this->gains_->data().data(), this->gains_->data().size(), cudaMemcpyDefault, stream.id())); + data.gainDataOnGPU, this->gains_->data().data(), this->gains_->data().size(), cudaMemcpyDefault, stream)); cudaCheck(cudaMemcpyAsync( - data.gainForHLTonGPU, this->gainForHLTonHost_, sizeof(SiPixelGainForHLTonGPU), cudaMemcpyDefault, stream.id())); + data.gainForHLTonGPU, this->gainForHLTonHost_, sizeof(SiPixelGainForHLTonGPU), cudaMemcpyDefault, stream)); cudaCheck(cudaMemcpyAsync(&(data.gainForHLTonGPU->v_pedestals), &(data.gainDataOnGPU), sizeof(SiPixelGainForHLTonGPU_DecodingStructure*), cudaMemcpyDefault, - stream.id())); + stream)); }); return data.gainForHLTonGPU; } diff --git a/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml b/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml index 4d2b5ebf45542..212738e941533 100644 --- a/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml +++ b/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml @@ -1,7 +1,7 @@ - + diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu index 556c0dcec4fb1..8e0d5123e6ecc 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu @@ -536,7 +536,7 @@ namespace pixelgpudetails { bool useQualityInfo, bool includeErrors, bool debug, - cuda::stream_t<> &stream) { + cudaStream_t stream) { nDigis = wordCounter; #ifdef GPU_DEBUG @@ -561,13 +561,13 @@ namespace pixelgpudetails { auto word_d = cudautils::make_device_unique(wordCounter, stream); auto fedId_d = cudautils::make_device_unique(wordCounter, stream); + cudaCheck( + cudaMemcpyAsync(word_d.get(), wordFed.word(), wordCounter * sizeof(uint32_t), cudaMemcpyDefault, stream)); cudaCheck(cudaMemcpyAsync( - word_d.get(), wordFed.word(), wordCounter * sizeof(uint32_t), cudaMemcpyDefault, stream.id())); - cudaCheck(cudaMemcpyAsync( - fedId_d.get(), wordFed.fedId(), wordCounter * sizeof(uint8_t) / 2, cudaMemcpyDefault, stream.id())); + fedId_d.get(), wordFed.fedId(), wordCounter * sizeof(uint8_t) / 2, cudaMemcpyDefault, stream)); // Launch rawToDigi kernel - RawToDigi_kernel<<>>( + RawToDigi_kernel<<>>( cablingMap, modToUnp, wordCounter, @@ -602,15 +602,15 @@ namespace pixelgpudetails { int blocks = (std::max(int(wordCounter), int(gpuClustering::MaxNumModules)) + threadsPerBlock - 1) / threadsPerBlock; - gpuCalibPixel::calibDigis<<>>(digis_d.moduleInd(), - digis_d.c_xx(), - digis_d.c_yy(), - digis_d.adc(), - gains, - wordCounter, - clusters_d.moduleStart(), - clusters_d.clusInModule(), - clusters_d.clusModuleStart()); + gpuCalibPixel::calibDigis<<>>(digis_d.moduleInd(), + digis_d.c_xx(), + digis_d.c_yy(), + digis_d.adc(), + gains, + wordCounter, + clusters_d.moduleStart(), + clusters_d.clusInModule(), + clusters_d.clusModuleStart()); cudaCheck(cudaGetLastError()); #ifdef GPU_DEBUG cudaDeviceSynchronize(); @@ -622,27 +622,27 @@ namespace pixelgpudetails { << " threads\n"; #endif - countModules<<>>( + countModules<<>>( digis_d.c_moduleInd(), clusters_d.moduleStart(), digis_d.clus(), wordCounter); cudaCheck(cudaGetLastError()); // read the number of modules into a data member, used by getProduct()) cudaCheck(cudaMemcpyAsync( - &(nModules_Clusters_h[0]), clusters_d.moduleStart(), sizeof(uint32_t), cudaMemcpyDefault, stream.id())); + &(nModules_Clusters_h[0]), clusters_d.moduleStart(), sizeof(uint32_t), cudaMemcpyDefault, stream)); threadsPerBlock = 256; blocks = MaxNumModules; #ifdef GPU_DEBUG std::cout << "CUDA findClus kernel launch with " << blocks << " blocks of " << threadsPerBlock << " threads\n"; #endif - findClus<<>>(digis_d.c_moduleInd(), - digis_d.c_xx(), - digis_d.c_yy(), - clusters_d.c_moduleStart(), - clusters_d.clusInModule(), - clusters_d.moduleId(), - digis_d.clus(), - wordCounter); + findClus<<>>(digis_d.c_moduleInd(), + digis_d.c_xx(), + digis_d.c_yy(), + clusters_d.c_moduleStart(), + clusters_d.clusInModule(), + clusters_d.moduleId(), + digis_d.clus(), + wordCounter); cudaCheck(cudaGetLastError()); #ifdef GPU_DEBUG cudaDeviceSynchronize(); @@ -650,13 +650,13 @@ namespace pixelgpudetails { #endif // apply charge cut - clusterChargeCut<<>>(digis_d.moduleInd(), - digis_d.c_adc(), - clusters_d.c_moduleStart(), - clusters_d.clusInModule(), - clusters_d.c_moduleId(), - digis_d.clus(), - wordCounter); + clusterChargeCut<<>>(digis_d.moduleInd(), + digis_d.c_adc(), + clusters_d.c_moduleStart(), + clusters_d.clusInModule(), + clusters_d.c_moduleId(), + digis_d.clus(), + wordCounter); cudaCheck(cudaGetLastError()); // count the module start indices already here (instead of @@ -665,14 +665,14 @@ namespace pixelgpudetails { // synchronization/ExternalWork // MUST be ONE block - fillHitsModuleStart<<<1, 1024, 0, stream.id()>>>(clusters_d.c_clusInModule(), clusters_d.clusModuleStart()); + fillHitsModuleStart<<<1, 1024, 0, stream>>>(clusters_d.c_clusInModule(), clusters_d.clusModuleStart()); // last element holds the number of all clusters cudaCheck(cudaMemcpyAsync(&(nModules_Clusters_h[1]), clusters_d.clusModuleStart() + gpuClustering::MaxNumModules, sizeof(uint32_t), cudaMemcpyDefault, - stream.id())); + stream)); #ifdef GPU_DEBUG cudaDeviceSynchronize(); diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h index 6d53eaf6a71c0..f1ffb191e9959 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h @@ -179,7 +179,7 @@ namespace pixelgpudetails { bool useQualityInfo, bool includeErrors, bool debug, - cuda::stream_t<>& stream); + cudaStream_t stream); std::pair getResults() { digis_d.setNModulesDigis(nModules_Clusters_h[0], nDigis); diff --git a/RecoLocalTracker/SiPixelRecHits/BuildFile.xml b/RecoLocalTracker/SiPixelRecHits/BuildFile.xml index a3d2c6a35e642..a626c35727249 100644 --- a/RecoLocalTracker/SiPixelRecHits/BuildFile.xml +++ b/RecoLocalTracker/SiPixelRecHits/BuildFile.xml @@ -13,7 +13,6 @@ - diff --git a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h index 5666fc1f8453d..575c72d33a69a 100644 --- a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h +++ b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h @@ -47,7 +47,7 @@ class PixelCPEFast final : public PixelCPEBase { // The return value can only be used safely in kernels launched on // the same cudaStream, or after cudaStreamSynchronize. - const pixelCPEforGPU::ParamsOnGPU *getGPUProductAsync(cuda::stream_t<> &cudaStream) const; + const pixelCPEforGPU::ParamsOnGPU *getGPUProductAsync(cudaStream_t cudaStream) const; pixelCPEforGPU::ParamsOnGPU const &getCPUProduct() const { return cpuData_; } diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml b/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml index 9385896a5e287..0868b38c07652 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml +++ b/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml @@ -9,6 +9,5 @@ - diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc index 27e2528ceb97c..f58d81faaeea1 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc +++ b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc @@ -151,8 +151,7 @@ void SiPixelRecHitSoAFromLegacy::produce(edm::StreamID streamID, edm::Event& iEv assert(numberOfClusters == int(hitsModuleStart[2000])); // output SoA - auto dummyStream = cuda::stream::wrap(0, 0, false); - auto output = std::make_unique(numberOfClusters, &cpeView, hitsModuleStart, dummyStream); + auto output = std::make_unique(numberOfClusters, &cpeView, hitsModuleStart, nullptr); if (0 == numberOfClusters) { iEvent.put(std::move(output)); diff --git a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc index 4aa3e60ae8320..356713ad4b45c 100644 --- a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc +++ b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc @@ -70,8 +70,8 @@ PixelCPEFast::PixelCPEFast(edm::ParameterSet const& conf, }; } -const pixelCPEforGPU::ParamsOnGPU* PixelCPEFast::getGPUProductAsync(cuda::stream_t<>& cudaStream) const { - const auto& data = gpuData_.dataForCurrentDeviceAsync(cudaStream, [this](GPUData& data, cuda::stream_t<>& stream) { +const pixelCPEforGPU::ParamsOnGPU* PixelCPEFast::getGPUProductAsync(cudaStream_t cudaStream) const { + const auto& data = gpuData_.dataForCurrentDeviceAsync(cudaStream, [this](GPUData& data, cudaStream_t stream) { // and now copy to device... cudaCheck(cudaMalloc((void**)&data.h_paramsOnGPU.m_commonParams, sizeof(pixelCPEforGPU::CommonParams))); cudaCheck(cudaMalloc((void**)&data.h_paramsOnGPU.m_detParams, @@ -81,27 +81,27 @@ const pixelCPEforGPU::ParamsOnGPU* PixelCPEFast::getGPUProductAsync(cuda::stream cudaCheck(cudaMalloc((void**)&data.d_paramsOnGPU, sizeof(pixelCPEforGPU::ParamsOnGPU))); cudaCheck(cudaMemcpyAsync( - data.d_paramsOnGPU, &data.h_paramsOnGPU, sizeof(pixelCPEforGPU::ParamsOnGPU), cudaMemcpyDefault, stream.id())); + data.d_paramsOnGPU, &data.h_paramsOnGPU, sizeof(pixelCPEforGPU::ParamsOnGPU), cudaMemcpyDefault, stream)); cudaCheck(cudaMemcpyAsync((void*)data.h_paramsOnGPU.m_commonParams, &this->m_commonParamsGPU, sizeof(pixelCPEforGPU::CommonParams), cudaMemcpyDefault, - stream.id())); + stream)); cudaCheck(cudaMemcpyAsync((void*)data.h_paramsOnGPU.m_averageGeometry, &this->m_averageGeometry, sizeof(pixelCPEforGPU::AverageGeometry), cudaMemcpyDefault, - stream.id())); + stream)); cudaCheck(cudaMemcpyAsync((void*)data.h_paramsOnGPU.m_layerGeometry, &this->m_layerGeometry, sizeof(pixelCPEforGPU::LayerGeometry), cudaMemcpyDefault, - stream.id())); + stream)); cudaCheck(cudaMemcpyAsync((void*)data.h_paramsOnGPU.m_detParams, this->m_detParamsGPU.data(), this->m_detParamsGPU.size() * sizeof(pixelCPEforGPU::DetParams), cudaMemcpyDefault, - stream.id())); + stream)); }); return data.d_paramsOnGPU; } From 5aa5b5550596cf4a6c9f2a11d8a2b50b6f8b9e23 Mon Sep 17 00:00:00 2001 From: waredjeb <39335169+waredjeb@users.noreply.github.com> Date: Tue, 29 Oct 2019 07:09:04 +0100 Subject: [PATCH 088/149] Replace CUDA API wrapper memory operations with native CUDA calls (cms-patatrack#395) --- .../SiPixelClusterizer/test/gpuClustering_t.h | 62 +++++++++---------- 1 file changed, 30 insertions(+), 32 deletions(-) diff --git a/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h b/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h index 4f7ced9b7e309..bb86c1392cdf9 100644 --- a/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h +++ b/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h @@ -12,6 +12,7 @@ #ifdef __CUDACC__ #include +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" #include "HeterogeneousCore/CUDAUtilities/interface/exitSansCUDADevices.h" #include "HeterogeneousCore/CUDAUtilities/interface/launch.h" #endif @@ -252,12 +253,12 @@ int main(void) { size_t size16 = n * sizeof(unsigned short); // size_t size8 = n * sizeof(uint8_t); - cuda::memory::copy(d_moduleStart.get(), &nModules, sizeof(uint32_t)); + cudaCheck(cudaMemcpy(d_moduleStart.get(), &nModules, sizeof(uint32_t), cudaMemcpyHostToDevice)); - cuda::memory::copy(d_id.get(), h_id.get(), size16); - cuda::memory::copy(d_x.get(), h_x.get(), size16); - cuda::memory::copy(d_y.get(), h_y.get(), size16); - cuda::memory::copy(d_adc.get(), h_adc.get(), size16); + cudaCheck(cudaMemcpy(d_id.get(), h_id.get(), size16, cudaMemcpyHostToDevice)); + cudaCheck(cudaMemcpy(d_x.get(), h_x.get(), size16, cudaMemcpyHostToDevice)); + cudaCheck(cudaMemcpy(d_y.get(), h_y.get(), size16, cudaMemcpyHostToDevice)); + cudaCheck(cudaMemcpy(d_adc.get(), h_adc.get(), size16, cudaMemcpyHostToDevice)); // Launch CUDA Kernels int threadsPerBlock = (kkk == 5) ? 512 : ((kkk == 3) ? 128 : 256); int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; @@ -270,26 +271,23 @@ int main(void) { std::cout << "CUDA findModules kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads\n"; - - cuda::memory::device::zero(d_clusInModule.get(), MaxNumModules * sizeof(uint32_t)); + cudaCheck(cudaMemset(d_clusInModule.get(), 0, MaxNumModules * sizeof(uint32_t))); cudautils::launch(findClus, - {blocksPerGrid, threadsPerBlock}, - d_id.get(), - d_x.get(), - d_y.get(), - d_moduleStart.get(), - d_clusInModule.get(), - d_moduleId.get(), - d_clus.get(), - n); + {blocksPerGrid, threadsPerBlock}, + d_id.get(), + d_x.get(), + d_y.get(), + d_moduleStart.get(), + d_clusInModule.get(), + d_moduleId.get(), + d_clus.get(), + n); cudaDeviceSynchronize(); - - cuda::memory::copy(&nModules, d_moduleStart.get(), sizeof(uint32_t)); + cudaCheck(cudaMemcpy(&nModules, d_moduleStart.get(), sizeof(uint32_t), cudaMemcpyDeviceToHost)); uint32_t nclus[MaxNumModules], moduleId[nModules]; - - cuda::memory::copy(&nclus, d_clusInModule.get(), MaxNumModules * sizeof(uint32_t)); + cudaCheck(cudaMemcpy(&nclus, d_clusInModule.get(), MaxNumModules * sizeof(uint32_t), cudaMemcpyDeviceToHost)); std::cout << "before charge cut found " << std::accumulate(nclus, nclus + MaxNumModules, 0) << " clusters" << std::endl; @@ -302,14 +300,14 @@ int main(void) { std::cout << "ERROR!!!!! wrong number of cluster found" << std::endl; cudautils::launch(clusterChargeCut, - {blocksPerGrid, threadsPerBlock}, - d_id.get(), - d_adc.get(), - d_moduleStart.get(), - d_clusInModule.get(), - d_moduleId.get(), - d_clus.get(), - n); + {blocksPerGrid, threadsPerBlock}, + d_id.get(), + d_adc.get(), + d_moduleStart.get(), + d_clusInModule.get(), + d_moduleId.get(), + d_clus.get(), + n); cudaDeviceSynchronize(); #else @@ -354,10 +352,10 @@ int main(void) { std::cout << "found " << nModules << " Modules active" << std::endl; #ifdef __CUDACC__ - cuda::memory::copy(h_id.get(), d_id.get(), size16); - cuda::memory::copy(h_clus.get(), d_clus.get(), size32); - cuda::memory::copy(&nclus, d_clusInModule.get(), MaxNumModules * sizeof(uint32_t)); - cuda::memory::copy(&moduleId, d_moduleId.get(), nModules * sizeof(uint32_t)); + cudaCheck(cudaMemcpy(h_id.get(), d_id.get(), size16, cudaMemcpyDeviceToHost)); + cudaCheck(cudaMemcpy(h_clus.get(), d_clus.get(), size32, cudaMemcpyDeviceToHost)); + cudaCheck(cudaMemcpy(&nclus, d_clusInModule.get(), MaxNumModules * sizeof(uint32_t), cudaMemcpyDeviceToHost)); + cudaCheck(cudaMemcpy(&moduleId, d_moduleId.get(), nModules * sizeof(uint32_t), cudaMemcpyDeviceToHost)); #endif std::set clids; From 86a3932ba5c3204957ce742702ee8da37811cfda Mon Sep 17 00:00:00 2001 From: waredjeb <39335169+waredjeb@users.noreply.github.com> Date: Thu, 31 Oct 2019 11:54:07 +0100 Subject: [PATCH 089/149] Replace use of CUDA API wrapper unique_ptrs with CUDAUtilities unique_ptrs (cms-patatrack#396) Replace cuda::memory::device::make_unique() calls with cudautils::make_device_unique() Replace cuda::memory::host::make_unique() with cudautils::make_host_unique() --- .../SiPixelClusterizer/test/gpuClustering_t.h | 20 +++++++++---------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h b/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h index bb86c1392cdf9..03a45baa3ba24 100644 --- a/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h +++ b/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h @@ -12,6 +12,7 @@ #ifdef __CUDACC__ #include +#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" #include "HeterogeneousCore/CUDAUtilities/interface/exitSansCUDADevices.h" #include "HeterogeneousCore/CUDAUtilities/interface/launch.h" @@ -44,17 +45,14 @@ int main(void) { #ifdef __CUDACC__ auto current_device = cuda::device::current::get(); - auto d_id = cuda::memory::device::make_unique(current_device, numElements); - auto d_x = cuda::memory::device::make_unique(current_device, numElements); - auto d_y = cuda::memory::device::make_unique(current_device, numElements); - auto d_adc = cuda::memory::device::make_unique(current_device, numElements); - - auto d_clus = cuda::memory::device::make_unique(current_device, numElements); - - auto d_moduleStart = cuda::memory::device::make_unique(current_device, MaxNumModules + 1); - - auto d_clusInModule = cuda::memory::device::make_unique(current_device, MaxNumModules); - auto d_moduleId = cuda::memory::device::make_unique(current_device, MaxNumModules); + auto d_id = cudautils::make_device_unique(numElements, nullptr); + auto d_x = cudautils::make_device_unique(numElements, nullptr); + auto d_y = cudautils::make_device_unique(numElements, nullptr); + auto d_adc = cudautils::make_device_unique(numElements, nullptr); + auto d_clus = cudautils::make_device_unique(numElements, nullptr); + auto d_moduleStart = cudautils::make_device_unique(MaxNumModules + 1, nullptr); + auto d_clusInModule = cudautils::make_device_unique(MaxNumModules, nullptr); + auto d_moduleId = cudautils::make_device_unique(MaxNumModules, nullptr); #else auto h_moduleStart = std::make_unique(MaxNumModules + 1); From b13812b89e947991459275d66f23810b5211309b Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Mon, 4 Nov 2019 11:48:34 +0100 Subject: [PATCH 090/149] Synchronise with CMSSW_11_0_0_pre11 --- DataFormats/SiPixelDigi/src/classes.h | 1 - .../SiPixelClusterizer/plugins/SiPixelClusterProducer.cc | 2 +- RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h | 2 +- .../SiPixelRecHits/plugins/SiPixelRecHitConverter.cc | 2 +- 4 files changed, 3 insertions(+), 4 deletions(-) diff --git a/DataFormats/SiPixelDigi/src/classes.h b/DataFormats/SiPixelDigi/src/classes.h index 4c15d6a06b20f..ba68d3289e8cd 100644 --- a/DataFormats/SiPixelDigi/src/classes.h +++ b/DataFormats/SiPixelDigi/src/classes.h @@ -10,7 +10,6 @@ #include "DataFormats/Common/interface/Wrapper.h" #include "DataFormats/Common/interface/DetSetVector.h" #include "DataFormats/Common/interface/DetSetVectorNew.h" -#include "boost/cstdint.hpp" #include #endif // SIPIXELDIGI_CLASSES_H diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelClusterProducer.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelClusterProducer.cc index 8783d13354241..02678c999a036 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelClusterProducer.cc +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelClusterProducer.cc @@ -17,7 +17,7 @@ // Geometry #include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" -#include "Geometry/TrackerGeometryBuilder/interface/PixelGeomDetUnit.h" +#include "Geometry/CommonDetUnit/interface/PixelGeomDetUnit.h" // Data Formats #include "DataFormats/Common/interface/DetSetVector.h" diff --git a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h index 4d2b0f033015f..96956564e1a5a 100644 --- a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h +++ b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h @@ -21,7 +21,7 @@ #include "DataFormats/TrackerCommon/interface/TrackerTopology.h" #include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" #include "Geometry/CommonDetUnit/interface/GeomDetType.h" -#include "Geometry/TrackerGeometryBuilder/interface/PixelGeomDetUnit.h" +#include "Geometry/CommonDetUnit/interface/PixelGeomDetUnit.h" #include "Geometry/CommonTopologies/interface/PixelTopology.h" #include "Geometry/CommonTopologies/interface/Topology.h" diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitConverter.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitConverter.cc index 7026754ffea1f..945bbb28a3262 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitConverter.cc +++ b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitConverter.cc @@ -13,7 +13,7 @@ #include "RecoLocalTracker/SiPixelRecHits/interface/SiPixelRecHitConverter.h" // Geometry #include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" -#include "Geometry/TrackerGeometryBuilder/interface/PixelGeomDetUnit.h" +#include "Geometry/CommonDetUnit/interface/PixelGeomDetUnit.h" // Data Formats #include "DataFormats/DetId/interface/DetId.h" From 9b09f0a0b55bce4ff9aad8b94d0cf0a22fee7089 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Mon, 4 Nov 2019 21:53:04 +0100 Subject: [PATCH 091/149] Minimal updates following #28127 Update RecoLocalTracker/SiPixelRecHits files from Patatrack development following "DataFormats dependency on Geometry Cleanup (#28127)". --- RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc index 356713ad4b45c..0b794e1219909 100644 --- a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc +++ b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc @@ -6,7 +6,7 @@ #include "CondFormats/SiPixelTransient/interface/SiPixelTemplate.h" #include "DataFormats/DetId/interface/DetId.h" #include "FWCore/MessageLogger/interface/MessageLogger.h" -#include "Geometry/TrackerGeometryBuilder/interface/PixelGeomDetUnit.h" +#include "Geometry/CommonDetUnit/interface/PixelGeomDetUnit.h" #include "Geometry/TrackerGeometryBuilder/interface/RectangularPixelTopology.h" #include "Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h" #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" From 8393897ef35b621a45462535a3d1a48680cd4732 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Fri, 8 Nov 2019 18:43:10 +1030 Subject: [PATCH 092/149] Use non-blocking CUDA streams (cms-patatrack#405) Specifies that work running in the created stream may run concurrently with work in stream 0 (the NULL stream), and that the created stream should perform no implicit synchronization with stream 0. --- CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp b/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp index 135545ba65832..42be4bc6991e1 100644 --- a/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp +++ b/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp @@ -13,7 +13,7 @@ int main() { exitSansCUDADevices(); cudaStream_t stream; - cudaCheck(cudaStreamCreate(&stream)); + cudaCheck(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking)); // inner scope to deallocate memory before destroying the stream { From a32cd8f1f01c7e986b59768ba7a804729d689f6c Mon Sep 17 00:00:00 2001 From: waredjeb <39335169+waredjeb@users.noreply.github.com> Date: Tue, 26 Nov 2019 18:41:27 +0100 Subject: [PATCH 093/149] Replace cuda::device operations with native CUDA calls (cms-patatrack#408) Replaces the usage of cuda::device::count(), cuda::device::get(), cuda::device::set() and cuda::device::current::get() with native CUDA calls. --- CUDADataFormats/Common/BuildFile.xml | 1 - .../TrackingRecHit/interface/TrackingRecHit2DSOAView.h | 1 - RecoLocalTracker/SiPixelClusterizer/BuildFile.xml | 1 - RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml | 1 - .../plugins/SiPixelRawToClusterGPUKernel.h | 1 - RecoLocalTracker/SiPixelClusterizer/test/BuildFile.xml | 2 -- RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h | 5 ++--- 7 files changed, 2 insertions(+), 10 deletions(-) diff --git a/CUDADataFormats/Common/BuildFile.xml b/CUDADataFormats/Common/BuildFile.xml index 12da06aa20da0..98033aab4d99d 100644 --- a/CUDADataFormats/Common/BuildFile.xml +++ b/CUDADataFormats/Common/BuildFile.xml @@ -1,4 +1,3 @@ - diff --git a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h index f648fa0904749..8e6d99e81238a 100644 --- a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h +++ b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h @@ -2,7 +2,6 @@ #define CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DSOAView_h #include -#include #include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" #include "HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h" diff --git a/RecoLocalTracker/SiPixelClusterizer/BuildFile.xml b/RecoLocalTracker/SiPixelClusterizer/BuildFile.xml index 74e76ab6ff3e2..58a7f0b22e30b 100644 --- a/RecoLocalTracker/SiPixelClusterizer/BuildFile.xml +++ b/RecoLocalTracker/SiPixelClusterizer/BuildFile.xml @@ -2,7 +2,6 @@ - diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml b/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml index 40a489f763397..546aa92692b9e 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml @@ -13,7 +13,6 @@ - diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h index f1ffb191e9959..888fc07953d9d 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h @@ -3,7 +3,6 @@ #include #include -#include "cuda/api_wrappers.h" #include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" #include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h" diff --git a/RecoLocalTracker/SiPixelClusterizer/test/BuildFile.xml b/RecoLocalTracker/SiPixelClusterizer/test/BuildFile.xml index 46f83f663faab..2d7d2139ab079 100644 --- a/RecoLocalTracker/SiPixelClusterizer/test/BuildFile.xml +++ b/RecoLocalTracker/SiPixelClusterizer/test/BuildFile.xml @@ -35,7 +35,6 @@ - @@ -43,7 +42,6 @@ - diff --git a/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h b/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h index 03a45baa3ba24..c808bc780353c 100644 --- a/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h +++ b/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h @@ -10,12 +10,12 @@ #include #ifdef __CUDACC__ -#include #include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" #include "HeterogeneousCore/CUDAUtilities/interface/exitSansCUDADevices.h" #include "HeterogeneousCore/CUDAUtilities/interface/launch.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaDeviceCount.h" #endif #include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h" @@ -25,7 +25,7 @@ int main(void) { #ifdef __CUDACC__ exitSansCUDADevices(); - if (cuda::device::count() == 0) { + if (cudautils::cudaDeviceCount() == 0) { std::cerr << "No CUDA devices on this system" << "\n"; exit(EXIT_FAILURE); @@ -44,7 +44,6 @@ int main(void) { auto h_clus = std::make_unique(numElements); #ifdef __CUDACC__ - auto current_device = cuda::device::current::get(); auto d_id = cudautils::make_device_unique(numElements, nullptr); auto d_x = cudautils::make_device_unique(numElements, nullptr); auto d_y = cudautils::make_device_unique(numElements, nullptr); From ead6c491238531190e2fdc0cfd4fa95c93b8697e Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Wed, 27 Nov 2019 15:17:05 +0100 Subject: [PATCH 094/149] Drop obsolete heterogenous framework (cms-patatrack#416) --- .../SiPixelClusterizer/plugins/BuildFile.xml | 34 +++++++++---------- .../SiPixelRecHits/plugins/BuildFile.xml | 4 +-- 2 files changed, 17 insertions(+), 21 deletions(-) diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml b/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml index 546aa92692b9e..1acd271a17e00 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml @@ -1,19 +1,17 @@ - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml b/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml index 0868b38c07652..49be86daa18cd 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml +++ b/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml @@ -1,13 +1,11 @@ + - - - From 2276716afd0073b7f318ba9d0dc420b5269a1b2f Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Wed, 27 Nov 2019 15:50:28 +0100 Subject: [PATCH 095/149] Remove last references to CUDA API Wrappers (cms-patatrack#417) --- .../interface/SiPixelGainCalibrationForHLTGPU.h | 2 -- .../SiPixelESProducers/src/SiPixelGainCalibrationForHLTGPU.cc | 4 +--- RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h | 2 -- 3 files changed, 1 insertion(+), 7 deletions(-) diff --git a/CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h b/CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h index 00386b06c2e6c..8bfefee5c3387 100644 --- a/CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h +++ b/CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h @@ -1,8 +1,6 @@ #ifndef CalibTracker_SiPixelESProducers_interface_SiPixelGainCalibrationForHLTGPU_h #define CalibTracker_SiPixelESProducers_interface_SiPixelGainCalibrationForHLTGPU_h -#include - #include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLT.h" #include "HeterogeneousCore/CUDACore/interface/CUDAESProduct.h" diff --git a/CalibTracker/SiPixelESProducers/src/SiPixelGainCalibrationForHLTGPU.cc b/CalibTracker/SiPixelESProducers/src/SiPixelGainCalibrationForHLTGPU.cc index 9758731f0bb8e..e4f278c28ec69 100644 --- a/CalibTracker/SiPixelESProducers/src/SiPixelGainCalibrationForHLTGPU.cc +++ b/CalibTracker/SiPixelESProducers/src/SiPixelGainCalibrationForHLTGPU.cc @@ -87,9 +87,7 @@ SiPixelGainCalibrationForHLTGPU::GPUData::~GPUData() { const SiPixelGainForHLTonGPU* SiPixelGainCalibrationForHLTGPU::getGPUProductAsync(cudaStream_t cudaStream) const { const auto& data = gpuData_.dataForCurrentDeviceAsync(cudaStream, [this](GPUData& data, cudaStream_t stream) { cudaCheck(cudaMalloc((void**)&data.gainForHLTonGPU, sizeof(SiPixelGainForHLTonGPU))); - cudaCheck( - cudaMalloc((void**)&data.gainDataOnGPU, - this->gains_->data().size())); // TODO: this could be changed to cuda::memory::device::unique_ptr<> + cudaCheck(cudaMalloc((void**)&data.gainDataOnGPU, this->gains_->data().size())); // gains.data().data() is used also for non-GPU code, we cannot allocate it on aligned and write-combined memory cudaCheck(cudaMemcpyAsync( data.gainDataOnGPU, this->gains_->data().data(), this->gains_->data().size(), cudaMemcpyDefault, stream)); diff --git a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h index 575c72d33a69a..aedca75b90c17 100644 --- a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h +++ b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h @@ -3,8 +3,6 @@ #include -#include - #include "CalibTracker/SiPixelESProducers/interface/SiPixelCPEGenericDBErrorParametrization.h" #include "CondFormats/SiPixelTransient/interface/SiPixelGenError.h" #include "CondFormats/SiPixelTransient/interface/SiPixelTemplate.h" From 71d4137e3d957179ccf7c44c60a423acc0e14ca2 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Fri, 29 Nov 2019 14:53:14 +0100 Subject: [PATCH 096/149] Migrate ESProducers to use ESGetToken's This is now mandatory after #28223. --- ...PixelGainCalibrationForHLTGPUESProducer.cc | 12 +-- .../plugins/SiPixelRawToClusterCUDA.cc | 22 ++-- .../SiPixelRecHits/interface/PixelCPEFast.h | 4 +- .../plugins/PixelCPEFastESProducer.cc | 102 ++++++++++-------- .../SiPixelRecHits/src/PixelCPEFast.cc | 5 +- 5 files changed, 78 insertions(+), 67 deletions(-) diff --git a/CalibTracker/SiPixelESProducers/plugins/SiPixelGainCalibrationForHLTGPUESProducer.cc b/CalibTracker/SiPixelESProducers/plugins/SiPixelGainCalibrationForHLTGPUESProducer.cc index 92c2d996d9622..bf8a0b2c5a75f 100644 --- a/CalibTracker/SiPixelESProducers/plugins/SiPixelGainCalibrationForHLTGPUESProducer.cc +++ b/CalibTracker/SiPixelESProducers/plugins/SiPixelGainCalibrationForHLTGPUESProducer.cc @@ -20,10 +20,12 @@ class SiPixelGainCalibrationForHLTGPUESProducer : public edm::ESProducer { static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); private: + edm::ESGetToken gainsToken_; + edm::ESGetToken geometryToken_; }; SiPixelGainCalibrationForHLTGPUESProducer::SiPixelGainCalibrationForHLTGPUESProducer(const edm::ParameterSet& iConfig) { - setWhatProduced(this); + setWhatProduced(this).setConsumes(gainsToken_).setConsumes(geometryToken_); } void SiPixelGainCalibrationForHLTGPUESProducer::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { @@ -33,12 +35,8 @@ void SiPixelGainCalibrationForHLTGPUESProducer::fillDescriptions(edm::Configurat std::unique_ptr SiPixelGainCalibrationForHLTGPUESProducer::produce( const SiPixelGainCalibrationForHLTGPURcd& iRecord) { - edm::ESHandle gains; - iRecord.getRecord().get(gains); - - edm::ESHandle geom; - iRecord.getRecord().get(geom); - + auto gains = iRecord.getHandle(gainsToken_); + auto geom = iRecord.getHandle(geometryToken_); return std::make_unique(*gains, *geom); } diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc index bba30f1492cf5..62004d385577d 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc @@ -57,9 +57,11 @@ class SiPixelRawToClusterCUDA : public edm::stream::EDProducer recordWatcher; + edm::ESWatcher recordWatcher_; + edm::ESGetToken gpuMapToken_; + edm::ESGetToken gainsToken_; + edm::ESGetToken cablingMapToken_; - std::string cablingMapLabel_; std::unique_ptr cabling_; std::vector fedIds_; const SiPixelFedCablingMap* cablingMap_ = nullptr; @@ -78,7 +80,10 @@ SiPixelRawToClusterCUDA::SiPixelRawToClusterCUDA(const edm::ParameterSet& iConfi : rawGetToken_(consumes(iConfig.getParameter("InputLabel"))), digiPutToken_(produces>()), clusterPutToken_(produces>()), - cablingMapLabel_(iConfig.getParameter("CablingMapLabel")), + gpuMapToken_(esConsumes()), + gainsToken_(esConsumes()), + cablingMapToken_(esConsumes( + edm::ESInputTag("", iConfig.getParameter("CablingMapLabel")))), includeErrors_(iConfig.getParameter("IncludeErrors")), useQuality_(iConfig.getParameter("UseQualityInfo")), usePilotBlade_(iConfig.getParameter("UsePilotBlade")) // Control the usage of pilot-blade data, FED=40 @@ -125,8 +130,7 @@ void SiPixelRawToClusterCUDA::acquire(const edm::Event& iEvent, edm::WaitingTaskWithArenaHolder waitingTaskHolder) { CUDAScopedContextAcquire ctx{iEvent.streamID(), std::move(waitingTaskHolder), ctxState_}; - edm::ESHandle hgpuMap; - iSetup.get().get(hgpuMap); + auto hgpuMap = iSetup.getHandle(gpuMapToken_); if (hgpuMap->hasQuality() != useQuality_) { throw cms::Exception("LogicError") << "UseQuality of the module (" << useQuality_ @@ -135,8 +139,7 @@ void SiPixelRawToClusterCUDA::acquire(const edm::Event& iEvent, // get the GPU product already here so that the async transfer can begin const auto* gpuMap = hgpuMap->getGPUProductAsync(ctx.stream()); - edm::ESHandle hgains; - iSetup.get().get(hgains); + auto hgains = iSetup.getHandle(gainsToken_); // get the GPU product already here so that the async transfer can begin const auto* gpuGains = hgains->getGPUProductAsync(ctx.stream()); @@ -155,10 +158,9 @@ void SiPixelRawToClusterCUDA::acquire(const edm::Event& iEvent, } // initialize cabling map or update if necessary - if (recordWatcher.check(iSetup)) { + if (recordWatcher_.check(iSetup)) { // cabling map, which maps online address (fed->link->ROC->local pixel) to offline (DetId->global pixel) - edm::ESTransientHandle cablingMap; - iSetup.get().get(cablingMapLabel_, cablingMap); //Tav + auto cablingMap = iSetup.getTransientHandle(cablingMapToken_); cablingMap_ = cablingMap.product(); fedIds_ = cablingMap->fedIds(); cabling_ = cablingMap->cablingTree(); diff --git a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h index aedca75b90c17..e43c45f90523f 100644 --- a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h +++ b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h @@ -41,7 +41,9 @@ class PixelCPEFast final : public PixelCPEBase { const SiPixelGenErrorDBObject *, const SiPixelLorentzAngle *); - ~PixelCPEFast() override; + ~PixelCPEFast() override = default; + + static void fillPSetDescription(edm::ParameterSetDescription &desc); // The return value can only be used safely in kernels launched on // the same cudaStream, or after cudaStreamSynchronize. diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/PixelCPEFastESProducer.cc b/RecoLocalTracker/SiPixelRecHits/plugins/PixelCPEFastESProducer.cc index c31b8bb1f2dae..96e4a568e2bf3 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/PixelCPEFastESProducer.cc +++ b/RecoLocalTracker/SiPixelRecHits/plugins/PixelCPEFastESProducer.cc @@ -5,90 +5,98 @@ #include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" #include "Geometry/Records/interface/TrackerTopologyRcd.h" #include "DataFormats/TrackerCommon/interface/TrackerTopology.h" +#include "RecoLocalTracker/Records/interface/TkPixelCPERecord.h" +#include "RecoLocalTracker/ClusterParameterEstimator/interface/PixelClusterParameterEstimator.h" #include "FWCore/Framework/interface/EventSetup.h" #include "FWCore/Framework/interface/ESHandle.h" #include "FWCore/Framework/interface/ModuleFactory.h" +#include "FWCore/Framework/interface/ESProducer.h" // new record #include "CondFormats/DataRecord/interface/SiPixelGenErrorDBObjectRcd.h" -#include "FWCore/Framework/interface/ESProducer.h" -#include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "RecoLocalTracker/Records/interface/TkPixelCPERecord.h" -#include "RecoLocalTracker/ClusterParameterEstimator/interface/PixelClusterParameterEstimator.h" +#include #include class PixelCPEFastESProducer : public edm::ESProducer { public: - PixelCPEFastESProducer(const edm::ParameterSet &p); - std::shared_ptr produce(const TkPixelCPERecord &); + PixelCPEFastESProducer(const edm::ParameterSet& p); + std::unique_ptr produce(const TkPixelCPERecord&); + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); private: - std::shared_ptr cpe_; + edm::ESGetToken magfieldToken_; + edm::ESGetToken pDDToken_; + edm::ESGetToken hTTToken_; + edm::ESGetToken lorentzAngleToken_; + edm::ESGetToken lorentzAngleWidthToken_; + edm::ESGetToken genErrorDBObjectToken_; + edm::ParameterSet pset_; - edm::ESInputTag magname_; bool UseErrorsFromTemplates_; }; -#include -#include - using namespace edm; -PixelCPEFastESProducer::PixelCPEFastESProducer(const edm::ParameterSet &p) { +PixelCPEFastESProducer::PixelCPEFastESProducer(const edm::ParameterSet& p) { std::string myname = p.getParameter("ComponentName"); - magname_ = p.existsAs("MagneticFieldRecord") ? p.getParameter("MagneticFieldRecord") - : edm::ESInputTag(""); + auto magname = p.getParameter("MagneticFieldRecord"); UseErrorsFromTemplates_ = p.getParameter("UseErrorsFromTemplates"); pset_ = p; - setWhatProduced(this, myname); + auto c = setWhatProduced(this, myname); + c.setConsumes(magfieldToken_, magname) + .setConsumes(pDDToken_) + .setConsumes(hTTToken_) + .setConsumes(lorentzAngleToken_, edm::ESInputTag("")); + c.setConsumes(lorentzAngleWidthToken_, edm::ESInputTag("", "forWidth")); + if (UseErrorsFromTemplates_) { + c.setConsumes(genErrorDBObjectToken_); + } } -std::shared_ptr PixelCPEFastESProducer::produce(const TkPixelCPERecord &iRecord) { - ESHandle magfield; - iRecord.getRecord().get(magname_, magfield); - - edm::ESHandle pDD; - iRecord.getRecord().get(pDD); - - edm::ESHandle hTT; - iRecord.getRecord().getRecord().get(hTT); - - // Lorant angle for offsets - ESHandle lorentzAngle; - iRecord.getRecord().get(lorentzAngle); - +std::unique_ptr PixelCPEFastESProducer::produce(const TkPixelCPERecord& iRecord) { // add the new la width object - ESHandle lorentzAngleWidth; - const SiPixelLorentzAngle *lorentzAngleWidthProduct = nullptr; - iRecord.getRecord().get("forWidth", lorentzAngleWidth); - lorentzAngleWidthProduct = lorentzAngleWidth.product(); + const SiPixelLorentzAngle* lorentzAngleWidthProduct = nullptr; + lorentzAngleWidthProduct = &iRecord.get(lorentzAngleWidthToken_); - const SiPixelGenErrorDBObject *genErrorDBObjectProduct = nullptr; + const SiPixelGenErrorDBObject* genErrorDBObjectProduct = nullptr; // Errors take only from new GenError - ESHandle genErrorDBObject; if (UseErrorsFromTemplates_) { // do only when generrors are needed - iRecord.getRecord().get(genErrorDBObject); - genErrorDBObjectProduct = genErrorDBObject.product(); + genErrorDBObjectProduct = &iRecord.get(genErrorDBObjectToken_); //} else { //std::cout<<" pass an empty GenError pointer"<(pset_, - magfield.product(), - *pDD.product(), - *hTT.product(), - lorentzAngle.product(), + return std::make_unique(pset_, + &iRecord.get(magfieldToken_), + iRecord.get(pDDToken_), + iRecord.get(hTTToken_), + &iRecord.get(lorentzAngleToken_), genErrorDBObjectProduct, lorentzAngleWidthProduct); - - return cpe_; } -#include "FWCore/Framework/interface/MakerMacros.h" -#include "FWCore/Utilities/interface/typelookup.h" -#include "FWCore/Framework/interface/eventsetuprecord_registration_macro.h" +void PixelCPEFastESProducer::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + // PixelCPEFastESProducer + edm::ParameterSetDescription desc; + desc.add("DoLorentz", false); + desc.add("lAWidthFPix", 0); + desc.add("useLAAlignmentOffsets", false); + desc.add("LoadTemplatesFromDB", true); + desc.add("UseErrorsFromTemplates", true); + desc.add("EdgeClusterErrorX", 50.0); + desc.add("MagneticFieldRecord", edm::ESInputTag("","")); + desc.add("useLAWidthFromDB", true); + desc.add("TruncatePixelCharge", true); + desc.add("ClusterProbComputationFlag", 0); + desc.add("lAOffset", 0); + desc.add("EdgeClusterErrorY", 85.0); + desc.add("ComponentName", "PixelCPEFast"); + desc.add("lAWidthBPix", 0); + desc.add("Alpha2Order", true); + descriptions.add("PixelCPEFastESProducer", desc); +} DEFINE_FWK_EVENTSETUP_MODULE(PixelCPEFastESProducer); diff --git a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc index 0b794e1219909..133afca6d3ddd 100644 --- a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc +++ b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc @@ -303,8 +303,6 @@ void PixelCPEFast::fillParamsForGpu() { memcpy(m_layerGeometry.layer, phase1PixelTopology::layer.data(), phase1PixelTopology::layer.size()); } -PixelCPEFast::~PixelCPEFast() {} - PixelCPEFast::GPUData::~GPUData() { if (d_paramsOnGPU != nullptr) { cudaFree((void*)h_paramsOnGPU.m_commonParams); @@ -583,3 +581,6 @@ LocalError PixelCPEFast::localError(DetParam const& theDetParam, ClusterParam& t return LocalError(xerr_sq, 0, yerr_sq); } + +void PixelCPEFast::fillPSetDescription(edm::ParameterSetDescription& desc) { +} From acde18eaf2df83cbb6e8b0c32eeb00ec5ce766a4 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Fri, 29 Nov 2019 12:10:12 +0100 Subject: [PATCH 097/149] Apply code checks and code formatting --- .../TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h | 4 ++-- .../SiPixelRecHits/plugins/PixelCPEFastESProducer.cc | 2 +- .../SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc | 2 +- RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc | 3 +-- 4 files changed, 5 insertions(+), 6 deletions(-) diff --git a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h index 1b27558efe3ee..aa551f21b4aad 100644 --- a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h +++ b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h @@ -91,7 +91,7 @@ TrackingRecHit2DHeterogeneous::TrackingRecHit2DHeterogeneous(uint32_t nH (std::is_same::value) { cudautils::copyAsync(m_view, view, stream); } else { - m_view.reset(view.release()); + m_view.reset(view.release()); // NOLINT: std::move() breaks CUDA version } return; } @@ -138,7 +138,7 @@ TrackingRecHit2DHeterogeneous::TrackingRecHit2DHeterogeneous(uint32_t nH (std::is_same::value) { cudautils::copyAsync(m_view, view, stream); } else { - m_view.reset(view.release()); + m_view.reset(view.release()); // NOLINT: std::move() breaks CUDA version } } diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/PixelCPEFastESProducer.cc b/RecoLocalTracker/SiPixelRecHits/plugins/PixelCPEFastESProducer.cc index 96e4a568e2bf3..8b6cba9a9232b 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/PixelCPEFastESProducer.cc +++ b/RecoLocalTracker/SiPixelRecHits/plugins/PixelCPEFastESProducer.cc @@ -87,7 +87,7 @@ void PixelCPEFastESProducer::fillDescriptions(edm::ConfigurationDescriptions& de desc.add("LoadTemplatesFromDB", true); desc.add("UseErrorsFromTemplates", true); desc.add("EdgeClusterErrorX", 50.0); - desc.add("MagneticFieldRecord", edm::ESInputTag("","")); + desc.add("MagneticFieldRecord", edm::ESInputTag()); desc.add("useLAWidthFromDB", true); desc.add("TruncatePixelCharge", true); desc.add("ClusterProbComputationFlag", 0); diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc index f58d81faaeea1..7900cf8b2289a 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc +++ b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc @@ -252,7 +252,7 @@ void SiPixelRecHitSoAFromLegacy::produce(edm::StreamID streamID, edm::Event& iEv output->hitsLayerStart()[i] = hitsModuleStart[cpeView.layerGeometry().layerStart[i]]; } cudautils::fillManyFromVector( - output->phiBinner(), nullptr, 10, output->iphi(), output->hitsLayerStart(), numberOfHits, 256, 0); + output->phiBinner(), nullptr, 10, output->iphi(), output->hitsLayerStart(), numberOfHits, 256, nullptr); // std::cout << "created HitSoa for " << numberOfClusters << " clusters in " << numberOfDetUnits << " Dets" << std::endl; iEvent.put(std::move(output)); diff --git a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc index 133afca6d3ddd..b652458811d45 100644 --- a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc +++ b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc @@ -582,5 +582,4 @@ LocalError PixelCPEFast::localError(DetParam const& theDetParam, ClusterParam& t return LocalError(xerr_sq, 0, yerr_sq); } -void PixelCPEFast::fillPSetDescription(edm::ParameterSetDescription& desc) { -} +void PixelCPEFast::fillPSetDescription(edm::ParameterSetDescription& desc) {} From 2595d76b577a7df9be57a936bf654231cba1a1d2 Mon Sep 17 00:00:00 2001 From: Vincenzo Innocente Date: Mon, 2 Dec 2019 14:37:30 +0100 Subject: [PATCH 098/149] Move LogWarning into the EDProducer (cms-patatrack#422) In CMSSW 11.0.0 the MessageLogger is no longer usable in CUDA code due to the use of c++17 featutres. --- .../SiPixelRecHits/plugins/SiPixelRecHitCUDA.cc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitCUDA.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitCUDA.cc index 2ec203819731b..1641719d0537d 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitCUDA.cc +++ b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitCUDA.cc @@ -96,6 +96,12 @@ void SiPixelRecHitCUDA::produce(edm::StreamID streamID, edm::Event& iEvent, cons iEvent.getByToken(tBeamSpot, hbs); auto const& bs = ctx.get(*hbs); + auto nHits = clusters.nClusters(); + if (nHits >= TrackingRecHit2DSOAView::maxHits()) { + edm::LogWarning("PixelRecHitGPUKernel") + << "Clusters/Hits Overflow " << nHits << " >= " << TrackingRecHit2DSOAView::maxHits(); + } + ctx.emplace(iEvent, tokenHit_, gpuAlgo_.makeHitsAsync(digis, clusters, bs, fcpe->getGPUProductAsync(ctx.stream()), ctx.stream())); From b3879ebfc07299836dbe3d43919d59ea0d6aa382 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Mon, 2 Dec 2019 14:43:35 +0100 Subject: [PATCH 099/149] Rename exitSansCUDADevices to requireCUDADevices (cms-patatrack#423) --- .../TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp | 4 ++-- RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp b/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp index 42be4bc6991e1..592f0267c2f7d 100644 --- a/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp +++ b/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp @@ -1,6 +1,6 @@ #include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DCUDA.h" #include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" -#include "HeterogeneousCore/CUDAUtilities/interface/exitSansCUDADevices.h" +#include "HeterogeneousCore/CUDAUtilities/interface/requireCUDADevices.h" #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" namespace testTrackingRecHit2D { @@ -10,7 +10,7 @@ namespace testTrackingRecHit2D { } int main() { - exitSansCUDADevices(); + requireCUDADevices(); cudaStream_t stream; cudaCheck(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking)); diff --git a/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h b/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h index c808bc780353c..b22e7a35a6ac7 100644 --- a/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h +++ b/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h @@ -13,7 +13,7 @@ #include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" -#include "HeterogeneousCore/CUDAUtilities/interface/exitSansCUDADevices.h" +#include "HeterogeneousCore/CUDAUtilities/interface/requireCUDADevices.h" #include "HeterogeneousCore/CUDAUtilities/interface/launch.h" #include "HeterogeneousCore/CUDAUtilities/interface/cudaDeviceCount.h" #endif @@ -23,7 +23,7 @@ int main(void) { #ifdef __CUDACC__ - exitSansCUDADevices(); + requireCUDADevices(); if (cudautils::cudaDeviceCount() == 0) { std::cerr << "No CUDA devices on this system" From e77a278d65bab0f6a678f5f6f18e8a0fee7a6455 Mon Sep 17 00:00:00 2001 From: Matti Kortelainen Date: Fri, 17 Jan 2020 09:10:53 -0600 Subject: [PATCH 100/149] Implement changes from the CUDA framework review (cms-patatrack#429) Rename the cudautils namespace to cms::cuda or cms::cudatest, and drop the CUDA prefix from the symbols defined there. Always record and query the CUDA event, to minimize need for error checking in CUDAScopedContextProduce destructor. Add comments to highlight the pieces in CachingDeviceAllocator that have been changed wrt. cub. Various other updates and clean up: - enable CUDA for compute capability 3.5. - clean up CUDAService, CUDA tests and plugins. - add CUDA existence protections to BuildFiles. - mark thread-safe static variables with CMS_THREAD_SAFE. --- CUDADataFormats/Common/BuildFile.xml | 2 + .../interface/SiPixelClustersCUDA.h | 10 ++--- .../SiPixelCluster/src/SiPixelClustersCUDA.cc | 14 +++--- CUDADataFormats/SiPixelCluster/src/classes.h | 2 +- .../SiPixelCluster/src/classes_def.xml | 4 +- .../interface/SiPixelDigiErrorsCUDA.h | 8 ++-- .../SiPixelDigi/interface/SiPixelDigisCUDA.h | 24 +++++----- .../SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc | 16 +++---- .../SiPixelDigi/src/SiPixelDigisCUDA.cc | 44 +++++++++---------- CUDADataFormats/SiPixelDigi/src/classes.h | 2 +- .../SiPixelDigi/src/classes_def.xml | 8 ++-- .../interface/TrackingRecHit2DHeterogeneous.h | 10 ++--- CUDADataFormats/TrackingRecHit/src/classes.h | 2 +- .../TrackingRecHit/src/classes_def.xml | 4 +- .../test/TrackingRecHit2DCUDA_t.cpp | 4 +- .../SiPixelGainCalibrationForHLTGPU.h | 4 +- .../plugins/SiPixelDigiErrorsSoAFromCUDA.cc | 13 +++--- .../plugins/SiPixelDigisSoAFromCUDA.cc | 18 ++++---- .../plugins/SiPixelRawToClusterCUDA.cc | 24 +++++----- .../plugins/SiPixelRawToClusterGPUKernel.cu | 10 ++--- .../plugins/SiPixelRawToClusterGPUKernel.h | 8 ++-- .../SiPixelClusterizer/test/gpuClustering_t.h | 33 ++++++-------- .../SiPixelRecHits/interface/PixelCPEFast.h | 4 +- .../plugins/SiPixelRecHitCUDA.cc | 29 ++++++------ .../plugins/SiPixelRecHitSoAFromLegacy.cc | 3 +- 25 files changed, 147 insertions(+), 153 deletions(-) diff --git a/CUDADataFormats/Common/BuildFile.xml b/CUDADataFormats/Common/BuildFile.xml index 98033aab4d99d..e7a5ba74d80be 100644 --- a/CUDADataFormats/Common/BuildFile.xml +++ b/CUDADataFormats/Common/BuildFile.xml @@ -1,5 +1,7 @@ + + diff --git a/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h b/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h index d3650e164d44e..dbfb5ff5e1761 100644 --- a/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h +++ b/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h @@ -58,14 +58,14 @@ class SiPixelClustersCUDA { DeviceConstView *view() const { return view_d.get(); } private: - cudautils::device::unique_ptr moduleStart_d; // index of the first pixel of each module - cudautils::device::unique_ptr clusInModule_d; // number of clusters found in each module - cudautils::device::unique_ptr moduleId_d; // module id of each module + cms::cuda::device::unique_ptr moduleStart_d; // index of the first pixel of each module + cms::cuda::device::unique_ptr clusInModule_d; // number of clusters found in each module + cms::cuda::device::unique_ptr moduleId_d; // module id of each module // originally from rechits - cudautils::device::unique_ptr clusModuleStart_d; // index of the first cluster of each module + cms::cuda::device::unique_ptr clusModuleStart_d; // index of the first cluster of each module - cudautils::device::unique_ptr view_d; // "me" pointer + cms::cuda::device::unique_ptr view_d; // "me" pointer uint32_t nClusters_h; }; diff --git a/CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc b/CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc index c814cd4a2e131..7bef9d0d8a52f 100644 --- a/CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc +++ b/CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc @@ -5,17 +5,17 @@ #include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" SiPixelClustersCUDA::SiPixelClustersCUDA(size_t maxClusters, cudaStream_t stream) { - moduleStart_d = cudautils::make_device_unique(maxClusters + 1, stream); - clusInModule_d = cudautils::make_device_unique(maxClusters, stream); - moduleId_d = cudautils::make_device_unique(maxClusters, stream); - clusModuleStart_d = cudautils::make_device_unique(maxClusters + 1, stream); + moduleStart_d = cms::cuda::make_device_unique(maxClusters + 1, stream); + clusInModule_d = cms::cuda::make_device_unique(maxClusters, stream); + moduleId_d = cms::cuda::make_device_unique(maxClusters, stream); + clusModuleStart_d = cms::cuda::make_device_unique(maxClusters + 1, stream); - auto view = cudautils::make_host_unique(stream); + auto view = cms::cuda::make_host_unique(stream); view->moduleStart_ = moduleStart_d.get(); view->clusInModule_ = clusInModule_d.get(); view->moduleId_ = moduleId_d.get(); view->clusModuleStart_ = clusModuleStart_d.get(); - view_d = cudautils::make_device_unique(stream); - cudautils::copyAsync(view_d, view, stream); + view_d = cms::cuda::make_device_unique(stream); + cms::cuda::copyAsync(view_d, view, stream); } diff --git a/CUDADataFormats/SiPixelCluster/src/classes.h b/CUDADataFormats/SiPixelCluster/src/classes.h index 08d46244adc7d..0698cb103dab9 100644 --- a/CUDADataFormats/SiPixelCluster/src/classes.h +++ b/CUDADataFormats/SiPixelCluster/src/classes.h @@ -1,7 +1,7 @@ #ifndef CUDADataFormats_SiPixelCluster_classes_h #define CUDADataFormats_SiPixelCluster_classes_h -#include "CUDADataFormats/Common/interface/CUDAProduct.h" +#include "CUDADataFormats/Common/interface/Product.h" #include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h" #include "DataFormats/Common/interface/Wrapper.h" diff --git a/CUDADataFormats/SiPixelCluster/src/classes_def.xml b/CUDADataFormats/SiPixelCluster/src/classes_def.xml index ba0706ac4b8aa..70decb9f27df7 100644 --- a/CUDADataFormats/SiPixelCluster/src/classes_def.xml +++ b/CUDADataFormats/SiPixelCluster/src/classes_def.xml @@ -1,4 +1,4 @@ - - + + diff --git a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h index 7c18d58a3fc12..1557fd64750e7 100644 --- a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h +++ b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h @@ -26,15 +26,15 @@ class SiPixelDigiErrorsCUDA { GPU::SimpleVector const* c_error() const { return error_d.get(); } using HostDataError = - std::pair, cudautils::host::unique_ptr>; + std::pair, cms::cuda::host::unique_ptr>; HostDataError dataErrorToHostAsync(cudaStream_t stream) const; void copyErrorToHostAsync(cudaStream_t stream); private: - cudautils::device::unique_ptr data_d; - cudautils::device::unique_ptr> error_d; - cudautils::host::unique_ptr> error_h; + cms::cuda::device::unique_ptr data_d; + cms::cuda::device::unique_ptr> error_d; + cms::cuda::host::unique_ptr> error_h; PixelFormatterErrors formatterErrors_h; }; diff --git a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h index 47efe634ad93d..04207f3e0b385 100644 --- a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h +++ b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h @@ -50,10 +50,10 @@ class SiPixelDigisCUDA { uint32_t const *c_pdigi() const { return pdigi_d.get(); } uint32_t const *c_rawIdArr() const { return rawIdArr_d.get(); } - cudautils::host::unique_ptr adcToHostAsync(cudaStream_t stream) const; - cudautils::host::unique_ptr clusToHostAsync(cudaStream_t stream) const; - cudautils::host::unique_ptr pdigiToHostAsync(cudaStream_t stream) const; - cudautils::host::unique_ptr rawIdArrToHostAsync(cudaStream_t stream) const; + cms::cuda::host::unique_ptr adcToHostAsync(cudaStream_t stream) const; + cms::cuda::host::unique_ptr clusToHostAsync(cudaStream_t stream) const; + cms::cuda::host::unique_ptr pdigiToHostAsync(cudaStream_t stream) const; + cms::cuda::host::unique_ptr rawIdArrToHostAsync(cudaStream_t stream) const; class DeviceConstView { public: @@ -79,17 +79,17 @@ class SiPixelDigisCUDA { private: // These are consumed by downstream device code - cudautils::device::unique_ptr xx_d; // local coordinates of each pixel - cudautils::device::unique_ptr yy_d; // - cudautils::device::unique_ptr adc_d; // ADC of each pixel - cudautils::device::unique_ptr moduleInd_d; // module id of each pixel - cudautils::device::unique_ptr clus_d; // cluster id of each pixel - cudautils::device::unique_ptr view_d; // "me" pointer + cms::cuda::device::unique_ptr xx_d; // local coordinates of each pixel + cms::cuda::device::unique_ptr yy_d; // + cms::cuda::device::unique_ptr adc_d; // ADC of each pixel + cms::cuda::device::unique_ptr moduleInd_d; // module id of each pixel + cms::cuda::device::unique_ptr clus_d; // cluster id of each pixel + cms::cuda::device::unique_ptr view_d; // "me" pointer // These are for CPU output; should we (eventually) place them to a // separate product? - cudautils::device::unique_ptr pdigi_d; - cudautils::device::unique_ptr rawIdArr_d; + cms::cuda::device::unique_ptr pdigi_d; + cms::cuda::device::unique_ptr rawIdArr_d; uint32_t nModules_h = 0; uint32_t nDigis_h = 0; diff --git a/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc b/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc index 7640348c15f08..ffef71092f6c9 100644 --- a/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc +++ b/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc @@ -9,32 +9,32 @@ SiPixelDigiErrorsCUDA::SiPixelDigiErrorsCUDA(size_t maxFedWords, PixelFormatterErrors errors, cudaStream_t stream) : formatterErrors_h(std::move(errors)) { - error_d = cudautils::make_device_unique>(stream); - data_d = cudautils::make_device_unique(maxFedWords, stream); + error_d = cms::cuda::make_device_unique>(stream); + data_d = cms::cuda::make_device_unique(maxFedWords, stream); - cudautils::memsetAsync(data_d, 0x00, maxFedWords, stream); + cms::cuda::memsetAsync(data_d, 0x00, maxFedWords, stream); - error_h = cudautils::make_host_unique>(stream); + error_h = cms::cuda::make_host_unique>(stream); GPU::make_SimpleVector(error_h.get(), maxFedWords, data_d.get()); assert(error_h->empty()); assert(error_h->capacity() == static_cast(maxFedWords)); - cudautils::copyAsync(error_d, error_h, stream); + cms::cuda::copyAsync(error_d, error_h, stream); } void SiPixelDigiErrorsCUDA::copyErrorToHostAsync(cudaStream_t stream) { - cudautils::copyAsync(error_h, error_d, stream); + cms::cuda::copyAsync(error_h, error_d, stream); } SiPixelDigiErrorsCUDA::HostDataError SiPixelDigiErrorsCUDA::dataErrorToHostAsync(cudaStream_t stream) const { // On one hand size() could be sufficient. On the other hand, if // someone copies the SimpleVector<>, (s)he might expect the data // buffer to actually have space for capacity() elements. - auto data = cudautils::make_host_unique(error_h->capacity(), stream); + auto data = cms::cuda::make_host_unique(error_h->capacity(), stream); // but transfer only the required amount if (not error_h->empty()) { - cudautils::copyAsync(data, data_d, error_h->size(), stream); + cms::cuda::copyAsync(data, data_d, error_h->size(), stream); } auto err = *error_h; err.set_data(data.get()); diff --git a/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc b/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc index a8aab7ab5a4b8..664364b6ff25a 100644 --- a/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc +++ b/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc @@ -5,46 +5,46 @@ #include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" SiPixelDigisCUDA::SiPixelDigisCUDA(size_t maxFedWords, cudaStream_t stream) { - xx_d = cudautils::make_device_unique(maxFedWords, stream); - yy_d = cudautils::make_device_unique(maxFedWords, stream); - adc_d = cudautils::make_device_unique(maxFedWords, stream); - moduleInd_d = cudautils::make_device_unique(maxFedWords, stream); - clus_d = cudautils::make_device_unique(maxFedWords, stream); + xx_d = cms::cuda::make_device_unique(maxFedWords, stream); + yy_d = cms::cuda::make_device_unique(maxFedWords, stream); + adc_d = cms::cuda::make_device_unique(maxFedWords, stream); + moduleInd_d = cms::cuda::make_device_unique(maxFedWords, stream); + clus_d = cms::cuda::make_device_unique(maxFedWords, stream); - pdigi_d = cudautils::make_device_unique(maxFedWords, stream); - rawIdArr_d = cudautils::make_device_unique(maxFedWords, stream); + pdigi_d = cms::cuda::make_device_unique(maxFedWords, stream); + rawIdArr_d = cms::cuda::make_device_unique(maxFedWords, stream); - auto view = cudautils::make_host_unique(stream); + auto view = cms::cuda::make_host_unique(stream); view->xx_ = xx_d.get(); view->yy_ = yy_d.get(); view->adc_ = adc_d.get(); view->moduleInd_ = moduleInd_d.get(); view->clus_ = clus_d.get(); - view_d = cudautils::make_device_unique(stream); - cudautils::copyAsync(view_d, view, stream); + view_d = cms::cuda::make_device_unique(stream); + cms::cuda::copyAsync(view_d, view, stream); } -cudautils::host::unique_ptr SiPixelDigisCUDA::adcToHostAsync(cudaStream_t stream) const { - auto ret = cudautils::make_host_unique(nDigis(), stream); - cudautils::copyAsync(ret, adc_d, nDigis(), stream); +cms::cuda::host::unique_ptr SiPixelDigisCUDA::adcToHostAsync(cudaStream_t stream) const { + auto ret = cms::cuda::make_host_unique(nDigis(), stream); + cms::cuda::copyAsync(ret, adc_d, nDigis(), stream); return ret; } -cudautils::host::unique_ptr SiPixelDigisCUDA::clusToHostAsync(cudaStream_t stream) const { - auto ret = cudautils::make_host_unique(nDigis(), stream); - cudautils::copyAsync(ret, clus_d, nDigis(), stream); +cms::cuda::host::unique_ptr SiPixelDigisCUDA::clusToHostAsync(cudaStream_t stream) const { + auto ret = cms::cuda::make_host_unique(nDigis(), stream); + cms::cuda::copyAsync(ret, clus_d, nDigis(), stream); return ret; } -cudautils::host::unique_ptr SiPixelDigisCUDA::pdigiToHostAsync(cudaStream_t stream) const { - auto ret = cudautils::make_host_unique(nDigis(), stream); - cudautils::copyAsync(ret, pdigi_d, nDigis(), stream); +cms::cuda::host::unique_ptr SiPixelDigisCUDA::pdigiToHostAsync(cudaStream_t stream) const { + auto ret = cms::cuda::make_host_unique(nDigis(), stream); + cms::cuda::copyAsync(ret, pdigi_d, nDigis(), stream); return ret; } -cudautils::host::unique_ptr SiPixelDigisCUDA::rawIdArrToHostAsync(cudaStream_t stream) const { - auto ret = cudautils::make_host_unique(nDigis(), stream); - cudautils::copyAsync(ret, rawIdArr_d, nDigis(), stream); +cms::cuda::host::unique_ptr SiPixelDigisCUDA::rawIdArrToHostAsync(cudaStream_t stream) const { + auto ret = cms::cuda::make_host_unique(nDigis(), stream); + cms::cuda::copyAsync(ret, rawIdArr_d, nDigis(), stream); return ret; } diff --git a/CUDADataFormats/SiPixelDigi/src/classes.h b/CUDADataFormats/SiPixelDigi/src/classes.h index 41b135640b883..fca0811e4650f 100644 --- a/CUDADataFormats/SiPixelDigi/src/classes.h +++ b/CUDADataFormats/SiPixelDigi/src/classes.h @@ -1,7 +1,7 @@ #ifndef CUDADataFormats_SiPixelDigi_classes_h #define CUDADataFormats_SiPixelDigi_classes_h -#include "CUDADataFormats/Common/interface/CUDAProduct.h" +#include "CUDADataFormats/Common/interface/Product.h" #include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" #include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h" #include "DataFormats/Common/interface/Wrapper.h" diff --git a/CUDADataFormats/SiPixelDigi/src/classes_def.xml b/CUDADataFormats/SiPixelDigi/src/classes_def.xml index 9d6816ed3b14c..ff775afdc2046 100644 --- a/CUDADataFormats/SiPixelDigi/src/classes_def.xml +++ b/CUDADataFormats/SiPixelDigi/src/classes_def.xml @@ -1,7 +1,7 @@ - - + + - - + + diff --git a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h index aa551f21b4aad..955f97ca6bd54 100644 --- a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h +++ b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h @@ -37,9 +37,9 @@ class TrackingRecHit2DHeterogeneous { auto iphi() { return m_iphi; } // only the local coord and detector index - cudautils::host::unique_ptr localCoordToHostAsync(cudaStream_t stream) const; - cudautils::host::unique_ptr detIndexToHostAsync(cudaStream_t stream) const; - cudautils::host::unique_ptr hitsModuleStartToHostAsync(cudaStream_t stream) const; + cms::cuda::host::unique_ptr localCoordToHostAsync(cudaStream_t stream) const; + cms::cuda::host::unique_ptr detIndexToHostAsync(cudaStream_t stream) const; + cms::cuda::host::unique_ptr hitsModuleStartToHostAsync(cudaStream_t stream) const; private: static constexpr uint32_t n16 = 4; @@ -89,7 +89,7 @@ TrackingRecHit2DHeterogeneous::TrackingRecHit2DHeterogeneous(uint32_t nH constexpr #endif (std::is_same::value) { - cudautils::copyAsync(m_view, view, stream); + cms::cuda::copyAsync(m_view, view, stream); } else { m_view.reset(view.release()); // NOLINT: std::move() breaks CUDA version } @@ -136,7 +136,7 @@ TrackingRecHit2DHeterogeneous::TrackingRecHit2DHeterogeneous(uint32_t nH constexpr #endif (std::is_same::value) { - cudautils::copyAsync(m_view, view, stream); + cms::cuda::copyAsync(m_view, view, stream); } else { m_view.reset(view.release()); // NOLINT: std::move() breaks CUDA version } diff --git a/CUDADataFormats/TrackingRecHit/src/classes.h b/CUDADataFormats/TrackingRecHit/src/classes.h index 90cfd0945d76e..d80226ec7a14b 100644 --- a/CUDADataFormats/TrackingRecHit/src/classes.h +++ b/CUDADataFormats/TrackingRecHit/src/classes.h @@ -1,7 +1,7 @@ #ifndef CUDADataFormats_SiPixelCluster_src_classes_h #define CUDADataFormats_SiPixelCluster_src_classes_h -#include "CUDADataFormats/Common/interface/CUDAProduct.h" +#include "CUDADataFormats/Common/interface/Product.h" #include "CUDADataFormats/Common/interface/HostProduct.h" #include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DCUDA.h" #include "DataFormats/Common/interface/Wrapper.h" diff --git a/CUDADataFormats/TrackingRecHit/src/classes_def.xml b/CUDADataFormats/TrackingRecHit/src/classes_def.xml index 4e8325ddce87e..02b0eb37d157b 100644 --- a/CUDADataFormats/TrackingRecHit/src/classes_def.xml +++ b/CUDADataFormats/TrackingRecHit/src/classes_def.xml @@ -1,10 +1,10 @@ - + - + diff --git a/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp b/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp index 592f0267c2f7d..32af6c181ae68 100644 --- a/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp +++ b/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp @@ -1,6 +1,6 @@ #include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DCUDA.h" #include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" -#include "HeterogeneousCore/CUDAUtilities/interface/requireCUDADevices.h" +#include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h" #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" namespace testTrackingRecHit2D { @@ -10,7 +10,7 @@ namespace testTrackingRecHit2D { } int main() { - requireCUDADevices(); + cms::cudatest::requireDevices(); cudaStream_t stream; cudaCheck(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking)); diff --git a/CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h b/CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h index 8bfefee5c3387..6fb487a244e71 100644 --- a/CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h +++ b/CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h @@ -2,7 +2,7 @@ #define CalibTracker_SiPixelESProducers_interface_SiPixelGainCalibrationForHLTGPU_h #include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLT.h" -#include "HeterogeneousCore/CUDACore/interface/CUDAESProduct.h" +#include "HeterogeneousCore/CUDACore/interface/ESProduct.h" class SiPixelGainCalibrationForHLT; class SiPixelGainForHLTonGPU; @@ -26,7 +26,7 @@ class SiPixelGainCalibrationForHLTGPU { SiPixelGainForHLTonGPU *gainForHLTonGPU = nullptr; SiPixelGainForHLTonGPU_DecodingStructure *gainDataOnGPU = nullptr; }; - CUDAESProduct gpuData_; + cms::cuda::ESProduct gpuData_; }; #endif // CalibTracker_SiPixelESProducers_interface_SiPixelGainCalibrationForHLTGPU_h diff --git a/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsSoAFromCUDA.cc b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsSoAFromCUDA.cc index ad6c46082be8b..be4cc5d9a3336 100644 --- a/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsSoAFromCUDA.cc +++ b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsSoAFromCUDA.cc @@ -1,4 +1,4 @@ -#include "CUDADataFormats/Common/interface/CUDAProduct.h" +#include "CUDADataFormats/Common/interface/Product.h" #include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h" #include "DataFormats/SiPixelDigi/interface/SiPixelDigiErrorsSoA.h" #include "FWCore/Framework/interface/EventSetup.h" @@ -8,7 +8,7 @@ #include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" #include "FWCore/ParameterSet/interface/ParameterSetDescription.h" #include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "HeterogeneousCore/CUDACore/interface/CUDAScopedContext.h" +#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" #include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" class SiPixelDigiErrorsSoAFromCUDA : public edm::stream::EDProducer { @@ -24,16 +24,17 @@ class SiPixelDigiErrorsSoAFromCUDA : public edm::stream::EDProducer> digiErrorGetToken_; + edm::EDGetTokenT> digiErrorGetToken_; edm::EDPutTokenT digiErrorPutToken_; - cudautils::host::unique_ptr data_; + cms::cuda::host::unique_ptr data_; GPU::SimpleVector error_; const PixelFormatterErrors* formatterErrors_ = nullptr; }; SiPixelDigiErrorsSoAFromCUDA::SiPixelDigiErrorsSoAFromCUDA(const edm::ParameterSet& iConfig) - : digiErrorGetToken_(consumes>(iConfig.getParameter("src"))), + : digiErrorGetToken_( + consumes>(iConfig.getParameter("src"))), digiErrorPutToken_(produces()) {} void SiPixelDigiErrorsSoAFromCUDA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { @@ -46,7 +47,7 @@ void SiPixelDigiErrorsSoAFromCUDA::acquire(const edm::Event& iEvent, const edm::EventSetup& iSetup, edm::WaitingTaskWithArenaHolder waitingTaskHolder) { // Do the transfer in a CUDA stream parallel to the computation CUDA stream - CUDAScopedContextAcquire ctx{iEvent.streamID(), std::move(waitingTaskHolder)}; + cms::cuda::ScopedContextAcquire ctx{iEvent.streamID(), std::move(waitingTaskHolder)}; const auto& gpuDigiErrors = ctx.get(iEvent, digiErrorGetToken_); diff --git a/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigisSoAFromCUDA.cc b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigisSoAFromCUDA.cc index 7794032154e98..dbec74585998f 100644 --- a/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigisSoAFromCUDA.cc +++ b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigisSoAFromCUDA.cc @@ -1,4 +1,4 @@ -#include "CUDADataFormats/Common/interface/CUDAProduct.h" +#include "CUDADataFormats/Common/interface/Product.h" #include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" #include "DataFormats/SiPixelDigi/interface/SiPixelDigisSoA.h" #include "FWCore/Framework/interface/EventSetup.h" @@ -8,7 +8,7 @@ #include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" #include "FWCore/ParameterSet/interface/ParameterSetDescription.h" #include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "HeterogeneousCore/CUDACore/interface/CUDAScopedContext.h" +#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" #include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" class SiPixelDigisSoAFromCUDA : public edm::stream::EDProducer { @@ -24,19 +24,19 @@ class SiPixelDigisSoAFromCUDA : public edm::stream::EDProducer> digiGetToken_; + edm::EDGetTokenT> digiGetToken_; edm::EDPutTokenT digiPutToken_; - cudautils::host::unique_ptr pdigi_; - cudautils::host::unique_ptr rawIdArr_; - cudautils::host::unique_ptr adc_; - cudautils::host::unique_ptr clus_; + cms::cuda::host::unique_ptr pdigi_; + cms::cuda::host::unique_ptr rawIdArr_; + cms::cuda::host::unique_ptr adc_; + cms::cuda::host::unique_ptr clus_; int nDigis_; }; SiPixelDigisSoAFromCUDA::SiPixelDigisSoAFromCUDA(const edm::ParameterSet& iConfig) - : digiGetToken_(consumes>(iConfig.getParameter("src"))), + : digiGetToken_(consumes>(iConfig.getParameter("src"))), digiPutToken_(produces()) {} void SiPixelDigisSoAFromCUDA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { @@ -49,7 +49,7 @@ void SiPixelDigisSoAFromCUDA::acquire(const edm::Event& iEvent, const edm::EventSetup& iSetup, edm::WaitingTaskWithArenaHolder waitingTaskHolder) { // Do the transfer in a CUDA stream parallel to the computation CUDA stream - CUDAScopedContextAcquire ctx{iEvent.streamID(), std::move(waitingTaskHolder)}; + cms::cuda::ScopedContextAcquire ctx{iEvent.streamID(), std::move(waitingTaskHolder)}; const auto& gpuDigis = ctx.get(iEvent, digiGetToken_); diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc index 62004d385577d..95aac36dbd197 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc @@ -1,4 +1,4 @@ -#include "CUDADataFormats/Common/interface/CUDAProduct.h" +#include "CUDADataFormats/Common/interface/Product.h" #include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h" #include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" #include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h" @@ -25,7 +25,7 @@ #include "FWCore/ParameterSet/interface/ParameterSetDescription.h" #include "FWCore/ParameterSet/interface/ParameterSet.h" #include "FWCore/ServiceRegistry/interface/Service.h" -#include "HeterogeneousCore/CUDACore/interface/CUDAScopedContext.h" +#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" #include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" #include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelFedCablingMapGPUWrapper.h" #include "RecoTracker/Record/interface/CkfComponentsRecord.h" @@ -51,11 +51,11 @@ class SiPixelRawToClusterCUDA : public edm::stream::EDProducer rawGetToken_; - edm::EDPutTokenT> digiPutToken_; - edm::EDPutTokenT> digiErrorPutToken_; - edm::EDPutTokenT> clusterPutToken_; + edm::EDPutTokenT> digiPutToken_; + edm::EDPutTokenT> digiErrorPutToken_; + edm::EDPutTokenT> clusterPutToken_; - CUDAContextState ctxState_; + cms::cuda::ContextState ctxState_; edm::ESWatcher recordWatcher_; edm::ESGetToken gpuMapToken_; @@ -78,8 +78,8 @@ class SiPixelRawToClusterCUDA : public edm::stream::EDProducer(iConfig.getParameter("InputLabel"))), - digiPutToken_(produces>()), - clusterPutToken_(produces>()), + digiPutToken_(produces>()), + clusterPutToken_(produces>()), gpuMapToken_(esConsumes()), gainsToken_(esConsumes()), cablingMapToken_(esConsumes( @@ -89,7 +89,7 @@ SiPixelRawToClusterCUDA::SiPixelRawToClusterCUDA(const edm::ParameterSet& iConfi usePilotBlade_(iConfig.getParameter("UsePilotBlade")) // Control the usage of pilot-blade data, FED=40 { if (includeErrors_) { - digiErrorPutToken_ = produces>(); + digiErrorPutToken_ = produces>(); } // regions @@ -128,7 +128,7 @@ void SiPixelRawToClusterCUDA::fillDescriptions(edm::ConfigurationDescriptions& d void SiPixelRawToClusterCUDA::acquire(const edm::Event& iEvent, const edm::EventSetup& iSetup, edm::WaitingTaskWithArenaHolder waitingTaskHolder) { - CUDAScopedContextAcquire ctx{iEvent.streamID(), std::move(waitingTaskHolder), ctxState_}; + cms::cuda::ScopedContextAcquire ctx{iEvent.streamID(), std::move(waitingTaskHolder), ctxState_}; auto hgpuMap = iSetup.getHandle(gpuMapToken_); if (hgpuMap->hasQuality() != useQuality_) { @@ -143,7 +143,7 @@ void SiPixelRawToClusterCUDA::acquire(const edm::Event& iEvent, // get the GPU product already here so that the async transfer can begin const auto* gpuGains = hgains->getGPUProductAsync(ctx.stream()); - cudautils::device::unique_ptr modulesToUnpackRegional; + cms::cuda::device::unique_ptr modulesToUnpackRegional; const unsigned char* gpuModulesToUnpack; if (regions_) { @@ -247,7 +247,7 @@ void SiPixelRawToClusterCUDA::acquire(const edm::Event& iEvent, } void SiPixelRawToClusterCUDA::produce(edm::Event& iEvent, const edm::EventSetup& iSetup) { - CUDAScopedContextProduce ctx{ctxState_}; + cms::cuda::ScopedContextProduce ctx{ctxState_}; auto tmp = gpuAlgo_.getResults(); ctx.emplace(iEvent, digiPutToken_, std::move(tmp.first)); diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu index 8e0d5123e6ecc..53af26ac7527d 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu @@ -47,8 +47,8 @@ namespace pixelgpudetails { constexpr uint32_t MAX_FED_WORDS = pixelgpudetails::MAX_FED * pixelgpudetails::MAX_WORD; SiPixelRawToClusterGPUKernel::WordFedAppender::WordFedAppender() { - word_ = cudautils::make_host_noncached_unique(MAX_FED_WORDS, cudaHostAllocWriteCombined); - fedId_ = cudautils::make_host_noncached_unique(MAX_FED_WORDS, cudaHostAllocWriteCombined); + word_ = cms::cuda::make_host_noncached_unique(MAX_FED_WORDS, cudaHostAllocWriteCombined); + fedId_ = cms::cuda::make_host_noncached_unique(MAX_FED_WORDS, cudaHostAllocWriteCombined); } void SiPixelRawToClusterGPUKernel::WordFedAppender::initializeWordFed(int fedId, @@ -549,7 +549,7 @@ namespace pixelgpudetails { } clusters_d = SiPixelClustersCUDA(gpuClustering::MaxNumModules, stream); - nModules_Clusters_h = cudautils::make_host_unique(2, stream); + nModules_Clusters_h = cms::cuda::make_host_unique(2, stream); if (wordCounter) // protect in case of empty event.... { @@ -558,8 +558,8 @@ namespace pixelgpudetails { assert(0 == wordCounter % 2); // wordCounter is the total no of words in each event to be trasfered on device - auto word_d = cudautils::make_device_unique(wordCounter, stream); - auto fedId_d = cudautils::make_device_unique(wordCounter, stream); + auto word_d = cms::cuda::make_device_unique(wordCounter, stream); + auto fedId_d = cms::cuda::make_device_unique(wordCounter, stream); cudaCheck( cudaMemcpyAsync(word_d.get(), wordFed.word(), wordCounter * sizeof(uint32_t), cudaMemcpyDefault, stream)); diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h index 888fc07953d9d..767c5a1e92ad0 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h @@ -156,8 +156,8 @@ namespace pixelgpudetails { const unsigned char* fedId() const { return fedId_.get(); } private: - cudautils::host::noncached::unique_ptr word_; - cudautils::host::noncached::unique_ptr fedId_; + cms::cuda::host::noncached::unique_ptr word_; + cms::cuda::host::noncached::unique_ptr fedId_; }; SiPixelRawToClusterGPUKernel() = default; @@ -187,7 +187,7 @@ namespace pixelgpudetails { // stream is still alive // // technically the statement above is not true anymore now that - // the CUDA streams are cached within the CUDAStreamCache, but it is + // the CUDA streams are cached within the cms::cuda::StreamCache, but it is // still better to release as early as possible nModules_Clusters_h.reset(); return std::make_pair(std::move(digis_d), std::move(clusters_d)); @@ -199,7 +199,7 @@ namespace pixelgpudetails { uint32_t nDigis = 0; // Data to be put in the event - cudautils::host::unique_ptr nModules_Clusters_h; + cms::cuda::host::unique_ptr nModules_Clusters_h; SiPixelDigisCUDA digis_d; SiPixelClustersCUDA clusters_d; SiPixelDigiErrorsCUDA digiErrors_d; diff --git a/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h b/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h index b22e7a35a6ac7..8ec665f8960b6 100644 --- a/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h +++ b/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h @@ -13,9 +13,8 @@ #include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" -#include "HeterogeneousCore/CUDAUtilities/interface/requireCUDADevices.h" +#include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h" #include "HeterogeneousCore/CUDAUtilities/interface/launch.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cudaDeviceCount.h" #endif #include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h" @@ -23,13 +22,7 @@ int main(void) { #ifdef __CUDACC__ - requireCUDADevices(); - - if (cudautils::cudaDeviceCount() == 0) { - std::cerr << "No CUDA devices on this system" - << "\n"; - exit(EXIT_FAILURE); - } + cms::cudatest::requireDevices(); #endif using namespace gpuClustering; @@ -44,14 +37,14 @@ int main(void) { auto h_clus = std::make_unique(numElements); #ifdef __CUDACC__ - auto d_id = cudautils::make_device_unique(numElements, nullptr); - auto d_x = cudautils::make_device_unique(numElements, nullptr); - auto d_y = cudautils::make_device_unique(numElements, nullptr); - auto d_adc = cudautils::make_device_unique(numElements, nullptr); - auto d_clus = cudautils::make_device_unique(numElements, nullptr); - auto d_moduleStart = cudautils::make_device_unique(MaxNumModules + 1, nullptr); - auto d_clusInModule = cudautils::make_device_unique(MaxNumModules, nullptr); - auto d_moduleId = cudautils::make_device_unique(MaxNumModules, nullptr); + auto d_id = cms::cuda::make_device_unique(numElements, nullptr); + auto d_x = cms::cuda::make_device_unique(numElements, nullptr); + auto d_y = cms::cuda::make_device_unique(numElements, nullptr); + auto d_adc = cms::cuda::make_device_unique(numElements, nullptr); + auto d_clus = cms::cuda::make_device_unique(numElements, nullptr); + auto d_moduleStart = cms::cuda::make_device_unique(MaxNumModules + 1, nullptr); + auto d_clusInModule = cms::cuda::make_device_unique(MaxNumModules, nullptr); + auto d_moduleId = cms::cuda::make_device_unique(MaxNumModules, nullptr); #else auto h_moduleStart = std::make_unique(MaxNumModules + 1); @@ -262,7 +255,7 @@ int main(void) { std::cout << "CUDA countModules kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads\n"; - cudautils::launch(countModules, {blocksPerGrid, threadsPerBlock}, d_id.get(), d_moduleStart.get(), d_clus.get(), n); + cms::cuda::launch(countModules, {blocksPerGrid, threadsPerBlock}, d_id.get(), d_moduleStart.get(), d_clus.get(), n); blocksPerGrid = MaxNumModules; //nModules; @@ -270,7 +263,7 @@ int main(void) { << " threads\n"; cudaCheck(cudaMemset(d_clusInModule.get(), 0, MaxNumModules * sizeof(uint32_t))); - cudautils::launch(findClus, + cms::cuda::launch(findClus, {blocksPerGrid, threadsPerBlock}, d_id.get(), d_x.get(), @@ -296,7 +289,7 @@ int main(void) { if (ncl != std::accumulate(nclus, nclus + MaxNumModules, 0)) std::cout << "ERROR!!!!! wrong number of cluster found" << std::endl; - cudautils::launch(clusterChargeCut, + cms::cuda::launch(clusterChargeCut, {blocksPerGrid, threadsPerBlock}, d_id.get(), d_adc.get(), diff --git a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h index e43c45f90523f..70e30563c66c3 100644 --- a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h +++ b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h @@ -6,7 +6,7 @@ #include "CalibTracker/SiPixelESProducers/interface/SiPixelCPEGenericDBErrorParametrization.h" #include "CondFormats/SiPixelTransient/interface/SiPixelGenError.h" #include "CondFormats/SiPixelTransient/interface/SiPixelTemplate.h" -#include "HeterogeneousCore/CUDACore/interface/CUDAESProduct.h" +#include "HeterogeneousCore/CUDACore/interface/ESProduct.h" #include "HeterogeneousCore/CUDAUtilities/interface/CUDAHostAllocator.h" #include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h" #include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h" @@ -95,7 +95,7 @@ class PixelCPEFast final : public PixelCPEBase { pixelCPEforGPU::ParamsOnGPU h_paramsOnGPU; pixelCPEforGPU::ParamsOnGPU *d_paramsOnGPU = nullptr; // copy of the above on the Device }; - CUDAESProduct gpuData_; + cms::cuda::ESProduct gpuData_; void fillParamsForGpu(); }; diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitCUDA.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitCUDA.cc index 1641719d0537d..4d85c41339020 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitCUDA.cc +++ b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitCUDA.cc @@ -1,8 +1,7 @@ #include #include "CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h" -#include "CUDADataFormats/Common/interface/CUDAProduct.h" -#include "CUDADataFormats/Common/interface/CUDAProduct.h" +#include "CUDADataFormats/Common/interface/Product.h" #include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h" #include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" #include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DCUDA.h" @@ -18,7 +17,7 @@ #include "FWCore/Utilities/interface/InputTag.h" #include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" #include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" -#include "HeterogeneousCore/CUDACore/interface/CUDAScopedContext.h" +#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" #include "RecoLocalTracker/Records/interface/TkPixelCPERecord.h" #include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h" #include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h" @@ -36,11 +35,11 @@ class SiPixelRecHitCUDA : public edm::global::EDProducer<> { void produce(edm::StreamID streamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const override; // The mess with inputs will be cleaned up when migrating to the new framework - edm::EDGetTokenT> tBeamSpot; - edm::EDGetTokenT> token_; - edm::EDGetTokenT> tokenDigi_; + edm::EDGetTokenT> tBeamSpot; + edm::EDGetTokenT> token_; + edm::EDGetTokenT> tokenDigi_; - edm::EDPutTokenT> tokenHit_; + edm::EDPutTokenT> tokenHit_; std::string cpeName_; @@ -48,10 +47,10 @@ class SiPixelRecHitCUDA : public edm::global::EDProducer<> { }; SiPixelRecHitCUDA::SiPixelRecHitCUDA(const edm::ParameterSet& iConfig) - : tBeamSpot(consumes>(iConfig.getParameter("beamSpot"))), - token_(consumes>(iConfig.getParameter("src"))), - tokenDigi_(consumes>(iConfig.getParameter("src"))), - tokenHit_(produces>()), + : tBeamSpot(consumes>(iConfig.getParameter("beamSpot"))), + token_(consumes>(iConfig.getParameter("src"))), + tokenDigi_(consumes>(iConfig.getParameter("src"))), + tokenHit_(produces>()), cpeName_(iConfig.getParameter("CPE")) {} void SiPixelRecHitCUDA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { @@ -82,17 +81,17 @@ void SiPixelRecHitCUDA::produce(edm::StreamID streamID, edm::Event& iEvent, cons throw cms::Exception("Configuration") << "too bad, not a fast cpe gpu processing not possible...."; } - edm::Handle> hclusters; + edm::Handle> hclusters; iEvent.getByToken(token_, hclusters); - CUDAScopedContextProduce ctx{*hclusters}; + cms::cuda::ScopedContextProduce ctx{*hclusters}; auto const& clusters = ctx.get(*hclusters); - edm::Handle> hdigis; + edm::Handle> hdigis; iEvent.getByToken(tokenDigi_, hdigis); auto const& digis = ctx.get(*hdigis); - edm::Handle> hbs; + edm::Handle> hbs; iEvent.getByToken(tBeamSpot, hbs); auto const& bs = ctx.get(*hbs); diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc index 7900cf8b2289a..fbe0fd13b84a4 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc +++ b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc @@ -22,7 +22,6 @@ #include "FWCore/Utilities/interface/InputTag.h" #include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" #include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" -#include "HeterogeneousCore/CUDACore/interface/CUDAScopedContext.h" #include "RecoLocalTracker/Records/interface/TkPixelCPERecord.h" #include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h" #include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h" @@ -251,7 +250,7 @@ void SiPixelRecHitSoAFromLegacy::produce(edm::StreamID streamID, edm::Event& iEv for (auto i = 0; i < 11; ++i) { output->hitsLayerStart()[i] = hitsModuleStart[cpeView.layerGeometry().layerStart[i]]; } - cudautils::fillManyFromVector( + cms::cuda::fillManyFromVector( output->phiBinner(), nullptr, 10, output->iphi(), output->hitsLayerStart(), numberOfHits, 256, nullptr); // std::cout << "created HitSoa for " << numberOfClusters << " clusters in " << numberOfDetUnits << " Dets" << std::endl; From db9c642ecc9b16bf1895781925bc1a7211c1741c Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Mon, 27 Jan 2020 12:17:14 +0100 Subject: [PATCH 101/149] Synchronise with CMSSW_11_1_0_pre2 Major changes: - restructure the RecoPixelVertexing/PixelVertexFinding package; - update the interface of PixelCPEFast. --- RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h | 8 +++----- RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h | 2 +- RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc | 4 ++-- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h index 96956564e1a5a..80cc68d3db85f 100644 --- a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h +++ b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h @@ -149,7 +149,7 @@ class PixelCPEBase : public PixelClusterParameterEstimator { #endif DetParam const& theDetParam = detParam(det); - ClusterParam* theClusterParam = createClusterParam(cl); + std::unique_ptr theClusterParam = createClusterParam(cl); setTheClu(theDetParam, *theClusterParam); computeAnglesFromDetPosition(theDetParam, *theClusterParam); @@ -158,7 +158,6 @@ class PixelCPEBase : public PixelClusterParameterEstimator { LocalError le = localError(theDetParam, *theClusterParam); SiPixelRecHitQuality::QualWordType rqw = rawQualityWord(*theClusterParam); auto tuple = std::make_tuple(lp, le, rqw); - delete theClusterParam; //std::cout<<" in PixelCPEBase:localParameters(all) - "< theClusterParam = createClusterParam(cl); setTheClu(theDetParam, *theClusterParam); computeAnglesFromTrajectory(theDetParam, *theClusterParam, ltp); @@ -185,14 +184,13 @@ class PixelCPEBase : public PixelClusterParameterEstimator { LocalError le = localError(theDetParam, *theClusterParam); SiPixelRecHitQuality::QualWordType rqw = rawQualityWord(*theClusterParam); auto tuple = std::make_tuple(lp, le, rqw); - delete theClusterParam; //std::cout<<" in PixelCPEBase:localParameters(on track) - "< createClusterParam(const SiPixelCluster& cl) const = 0; //-------------------------------------------------------------------------- // This is where the action happens. diff --git a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h index 70e30563c66c3..2208c449884c5 100644 --- a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h +++ b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h @@ -52,7 +52,7 @@ class PixelCPEFast final : public PixelCPEBase { pixelCPEforGPU::ParamsOnGPU const &getCPUProduct() const { return cpuData_; } private: - ClusterParam *createClusterParam(const SiPixelCluster &cl) const override; + std::unique_ptr createClusterParam(const SiPixelCluster &cl) const override; LocalPoint localPosition(DetParam const &theDetParam, ClusterParam &theClusterParam) const override; LocalError localError(DetParam const &theDetParam, ClusterParam &theClusterParam) const override; diff --git a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc index b652458811d45..1480d6a81ec0e 100644 --- a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc +++ b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc @@ -312,8 +312,8 @@ PixelCPEFast::GPUData::~GPUData() { } } -PixelCPEBase::ClusterParam* PixelCPEFast::createClusterParam(const SiPixelCluster& cl) const { - return new ClusterParamGeneric(cl); +std::unique_ptr PixelCPEFast::createClusterParam(const SiPixelCluster& cl) const { + return std::make_unique(cl); } void PixelCPEFast::errorFromTemplates(DetParam const& theDetParam, From 850455d0a7af19bdf868c1e419bfc10558766d68 Mon Sep 17 00:00:00 2001 From: Vincenzo Innocente Date: Fri, 31 Jan 2020 13:53:06 +0100 Subject: [PATCH 102/149] Protect against too many pixels (cms-patatrack#436) Add a protection to avoid overflow in case there are more pixels in a module than the allowed maximum. This has been seen if the simulation has an unrealistically low charge threshold, which should not happen in CMSW 11.0.x and later. --- .../SiPixelClusterizer/plugins/gpuClustering.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h index 7997571d1cbd2..f47f06e6ec563 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h @@ -89,7 +89,17 @@ namespace gpuClustering { __syncthreads(); assert((msize == numElements) or ((msize < numElements) and (id[msize] != thisModuleId))); - assert(msize - firstPixel < maxPixInModule); + + // limit to maxPixInModule (FIXME if recurrent (and not limited to simulation with low threshold) one will need to implement something cleverer) + if (0 == threadIdx.x) { + if (msize - firstPixel > maxPixInModule) { + printf("too many pixels in module %d: %d > %d\n", thisModuleId, msize - firstPixel, maxPixInModule); + msize = maxPixInModule + firstPixel; + } + } + + __syncthreads(); + assert(msize - firstPixel <= maxPixInModule); #ifdef GPU_DEBUG __shared__ uint32_t totGood; @@ -125,7 +135,7 @@ namespace gpuClustering { } #ifdef __CUDA_ARCH__ - // assume that we can cover the whole module with up to 10 blockDim.x-wide iterations + // assume that we can cover the whole module with up to 16 blockDim.x-wide iterations constexpr int maxiter = 16; #else auto maxiter = hist.size(); From 333afca6640b976179d73bb27102ef3b83d07892 Mon Sep 17 00:00:00 2001 From: Matti Kortelainen Date: Wed, 5 Feb 2020 20:50:04 +0100 Subject: [PATCH 103/149] Update lumi ALCARECO configurations for heterogeneous pixel digi and cluster configuration (cms-patatrack#437) For now just use the CPU producer. --- .../python/ALCARECOAlCaPCCRandom_cff.py | 14 ++++++++------ .../python/ALCARECOAlCaPCCZeroBias_cff.py | 14 ++++++++------ .../python/ALCARECOLumiPixels_cff.py | 10 ++++++---- 3 files changed, 22 insertions(+), 16 deletions(-) diff --git a/Calibration/LumiAlCaRecoProducers/python/ALCARECOAlCaPCCRandom_cff.py b/Calibration/LumiAlCaRecoProducers/python/ALCARECOAlCaPCCRandom_cff.py index 8185c8cfbb089..7ed4f0655e8a6 100644 --- a/Calibration/LumiAlCaRecoProducers/python/ALCARECOAlCaPCCRandom_cff.py +++ b/Calibration/LumiAlCaRecoProducers/python/ALCARECOAlCaPCCRandom_cff.py @@ -10,17 +10,19 @@ ) from EventFilter.SiPixelRawToDigi.SiPixelRawToDigi_cfi import siPixelDigis -siPixelDigisForLumiR = siPixelDigis.clone() -siPixelDigisForLumiR.InputLabel = cms.InputTag("hltFEDSelectorLumiPixels") +siPixelDigisForLumiR = siPixelDigis.cpu.clone( + InputLabel = "hltFEDSelectorLumiPixels" +) from RecoLocalTracker.SiPixelClusterizer.SiPixelClusterizerPreSplitting_cfi import siPixelClustersPreSplitting -siPixelClustersForLumiR = siPixelClustersPreSplitting.clone() -siPixelClustersForLumiR.src = cms.InputTag("siPixelDigisForLumiR") +siPixelClustersForLumiR = siPixelClustersPreSplitting.cpu.clone( + src = "siPixelDigisForLumiR" +) from Calibration.LumiAlCaRecoProducers.alcaPCCProducer_cfi import alcaPCCProducer alcaPCCProducerRandom = alcaPCCProducer.clone() -alcaPCCProducerRandom.pixelClusterLabel = cms.InputTag("siPixelClustersForLumiR") -alcaPCCProducerRandom.trigstring = cms.untracked.string("alcaPCCRandom") +alcaPCCProducerRandom.AlcaPCCProducerParameters.pixelClusterLabel = cms.InputTag("siPixelClustersForLumiR") +alcaPCCProducerRandom.AlcaPCCProducerParameters.trigstring = cms.untracked.string("alcaPCCRandom") # Sequence # seqALCARECOAlCaPCCRandom = cms.Sequence(ALCARECORandomHLT + siPixelDigisForLumiR + siPixelClustersForLumiR + alcaPCCProducerRandom) diff --git a/Calibration/LumiAlCaRecoProducers/python/ALCARECOAlCaPCCZeroBias_cff.py b/Calibration/LumiAlCaRecoProducers/python/ALCARECOAlCaPCCZeroBias_cff.py index 0ef9e074cc817..32a65512c1505 100644 --- a/Calibration/LumiAlCaRecoProducers/python/ALCARECOAlCaPCCZeroBias_cff.py +++ b/Calibration/LumiAlCaRecoProducers/python/ALCARECOAlCaPCCZeroBias_cff.py @@ -10,17 +10,19 @@ ) from EventFilter.SiPixelRawToDigi.SiPixelRawToDigi_cfi import siPixelDigis -siPixelDigisForLumiZB = siPixelDigis.clone() -siPixelDigisForLumiZB.InputLabel = cms.InputTag("hltFEDSelectorLumiPixels") +siPixelDigisForLumiZB = siPixelDigis.cpu.clone( + InputLabel = "hltFEDSelectorLumiPixels" +) from RecoLocalTracker.SiPixelClusterizer.SiPixelClusterizerPreSplitting_cfi import siPixelClustersPreSplitting -siPixelClustersForLumiZB = siPixelClustersPreSplitting.clone() -siPixelClustersForLumiZB.src = cms.InputTag("siPixelDigisForLumiZB") +siPixelClustersForLumiZB = siPixelClustersPreSplitting.cpu.clone( + src = "siPixelDigisForLumiZB" +) from Calibration.LumiAlCaRecoProducers.alcaPCCProducer_cfi import alcaPCCProducer alcaPCCProducerZeroBias = alcaPCCProducer.clone() -alcaPCCProducerZeroBias.pixelClusterLabel = cms.InputTag("siPixelClustersForLumiZB") -alcaPCCProducerZeroBias.trigstring = cms.untracked.string("alcaPCCZeroBias") +alcaPCCProducerZeroBias.AlcaPCCProducerParameters.pixelClusterLabel = cms.InputTag("siPixelClustersForLumiZB") +alcaPCCProducerZeroBias.AlcaPCCProducerParameters.trigstring = cms.untracked.string("alcaPCCZeroBias") # Sequence # seqALCARECOAlCaPCCZeroBias = cms.Sequence(ALCARECOZeroBiasHLT + siPixelDigisForLumiZB + siPixelClustersForLumiZB + alcaPCCProducerZeroBias) diff --git a/Calibration/LumiAlCaRecoProducers/python/ALCARECOLumiPixels_cff.py b/Calibration/LumiAlCaRecoProducers/python/ALCARECOLumiPixels_cff.py index d88e1d4c27506..25c7e5e60cb26 100644 --- a/Calibration/LumiAlCaRecoProducers/python/ALCARECOLumiPixels_cff.py +++ b/Calibration/LumiAlCaRecoProducers/python/ALCARECOLumiPixels_cff.py @@ -9,12 +9,14 @@ ) from EventFilter.SiPixelRawToDigi.SiPixelRawToDigi_cfi import siPixelDigis -siPixelDigisForLumi = siPixelDigis.clone() -siPixelDigisForLumi.InputLabel = cms.InputTag("hltFEDSelectorLumiPixels") +siPixelDigisForLumi = siPixelDigis.cpu.clone( + InputLabel = "hltFEDSelectorLumiPixels" +) from RecoLocalTracker.SiPixelClusterizer.SiPixelClusterizerPreSplitting_cfi import siPixelClustersPreSplitting -siPixelClustersForLumi = siPixelClustersPreSplitting.clone() -siPixelClustersForLumi.src = cms.InputTag("siPixelDigisForLumi") +siPixelClustersForLumi = siPixelClustersPreSplitting.cpu.clone( + src = "siPixelDigisForLumi" +) # Sequence # seqALCARECOLumiPixels = cms.Sequence(ALCARECOLumiPixelsHLT + siPixelDigisForLumi + siPixelClustersForLumi) From a452bab181f83980f5807f5e530f985f53b19561 Mon Sep 17 00:00:00 2001 From: Vincenzo Innocente Date: Tue, 11 Feb 2020 10:13:50 +0100 Subject: [PATCH 104/149] Work around: avoid assert in Phase 2 workflows (cms-patatrack#438) SiPixelRecHitConverter needs to produce the very same products as SiPixelRecHitSoAFromLegacy, even if they are not used. These changes limit the size of the product to gpuClustering::MaxNumModules, avoid an assert or an overflow for Phase 2 workflows. --- .../SiPixelRecHits/plugins/SiPixelRecHitConverter.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitConverter.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitConverter.cc index 945bbb28a3262..6930eb0bcd345 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitConverter.cc +++ b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitConverter.cc @@ -120,7 +120,9 @@ namespace cms { DetId detIdObject(detid); const GeomDetUnit* genericDet = geom->idToDetUnit(detIdObject); auto gind = genericDet->index(); - assert(gind < 2000); + // FIXME to be changed to support Phase2 + if (gind >= int(gpuClustering::MaxNumModules)) + continue; auto const nclus = DSViter->size(); assert(nclus > 0); clusInModule[gind] = nclus; From f63360430980b5717404645df70769f21d551102 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Wed, 25 Mar 2020 00:28:04 +0100 Subject: [PATCH 105/149] Integrate the comments from the upstream PRs (cms-patatrack#442) Clean up the Patatrack code base following the comments received during the integration into the upstream release. Currently tracks the changes introduced due to - cms-sw#29109: Patatrack integration - trivial changes (1/N) - cms-sw#29110: Patatrack integration - common tools (2/N) List of changes: * Remove unused files * Fix compilation warnings * Fix AtomicPairCounter unit test * Rename the cudaCompat namespace to cms::cudacompat * Remove extra semicolon * Move SimpleVector and VecArray to the cms::cuda namespace * Add missing dependency * Move HistoContainer, AtomicPairCounter, prefixScan and radixSort to the cms::cuda namespace * Remove rule exception for HeterogeneousCore * Fix code rule violations: - replace using namespace cms::cuda in test/OneToManyAssoc_t.h . - add an exception for cudaCompat.h: cudaCompat relies on defining equivalent symbols to the CUDA intrinsics in the cms::cudacompat namespace, and pulling them in the global namespace when compiling device code without CUDA. * Protect the headers to compile only with a CUDA compiler --- .../SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h | 14 +++++++------- .../SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc | 6 +++--- .../interface/TrackingRecHit2DHeterogeneous.h | 12 ++++++------ .../interface/TrackingRecHit2DSOAView.h | 2 +- .../plugins/SiPixelDigiErrorsSoAFromCUDA.cc | 4 ++-- .../plugins/SiPixelRawToClusterGPUKernel.cu | 6 +++--- .../plugins/SiPixelRawToClusterGPUKernel.h | 2 +- .../plugins/gpuClusterChargeCut.h | 2 +- .../SiPixelClusterizer/plugins/gpuClustering.h | 2 +- .../plugins/SiPixelRecHitSoAFromLegacy.cc | 2 +- 10 files changed, 26 insertions(+), 26 deletions(-) diff --git a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h index 1557fd64750e7..aa06e8dbbd57d 100644 --- a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h +++ b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h @@ -4,7 +4,7 @@ #include "DataFormats/SiPixelDigi/interface/PixelErrors.h" #include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" #include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" -#include "HeterogeneousCore/CUDAUtilities/interface/GPUSimpleVector.h" +#include "HeterogeneousCore/CUDAUtilities/interface/SimpleVector.h" #include @@ -21,20 +21,20 @@ class SiPixelDigiErrorsCUDA { const PixelFormatterErrors& formatterErrors() const { return formatterErrors_h; } - GPU::SimpleVector* error() { return error_d.get(); } - GPU::SimpleVector const* error() const { return error_d.get(); } - GPU::SimpleVector const* c_error() const { return error_d.get(); } + cms::cuda::SimpleVector* error() { return error_d.get(); } + cms::cuda::SimpleVector const* error() const { return error_d.get(); } + cms::cuda::SimpleVector const* c_error() const { return error_d.get(); } using HostDataError = - std::pair, cms::cuda::host::unique_ptr>; + std::pair, cms::cuda::host::unique_ptr>; HostDataError dataErrorToHostAsync(cudaStream_t stream) const; void copyErrorToHostAsync(cudaStream_t stream); private: cms::cuda::device::unique_ptr data_d; - cms::cuda::device::unique_ptr> error_d; - cms::cuda::host::unique_ptr> error_h; + cms::cuda::device::unique_ptr> error_d; + cms::cuda::host::unique_ptr> error_h; PixelFormatterErrors formatterErrors_h; }; diff --git a/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc b/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc index ffef71092f6c9..ef229be4b9910 100644 --- a/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc +++ b/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc @@ -9,13 +9,13 @@ SiPixelDigiErrorsCUDA::SiPixelDigiErrorsCUDA(size_t maxFedWords, PixelFormatterErrors errors, cudaStream_t stream) : formatterErrors_h(std::move(errors)) { - error_d = cms::cuda::make_device_unique>(stream); + error_d = cms::cuda::make_device_unique>(stream); data_d = cms::cuda::make_device_unique(maxFedWords, stream); cms::cuda::memsetAsync(data_d, 0x00, maxFedWords, stream); - error_h = cms::cuda::make_host_unique>(stream); - GPU::make_SimpleVector(error_h.get(), maxFedWords, data_d.get()); + error_h = cms::cuda::make_host_unique>(stream); + cms::cuda::make_SimpleVector(error_h.get(), maxFedWords, data_d.get()); assert(error_h->empty()); assert(error_h->capacity() == static_cast(maxFedWords)); diff --git a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h index 955f97ca6bd54..b0aa79cfe20b6 100644 --- a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h +++ b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h @@ -88,7 +88,7 @@ TrackingRecHit2DHeterogeneous::TrackingRecHit2DHeterogeneous(uint32_t nH #ifndef __CUDACC__ constexpr #endif - (std::is_same::value) { + (std::is_same::value) { cms::cuda::copyAsync(m_view, view, stream); } else { m_view.reset(view.release()); // NOLINT: std::move() breaks CUDA version @@ -135,16 +135,16 @@ TrackingRecHit2DHeterogeneous::TrackingRecHit2DHeterogeneous(uint32_t nH #ifndef __CUDACC__ constexpr #endif - (std::is_same::value) { + (std::is_same::value) { cms::cuda::copyAsync(m_view, view, stream); } else { m_view.reset(view.release()); // NOLINT: std::move() breaks CUDA version } } -using TrackingRecHit2DGPU = TrackingRecHit2DHeterogeneous; -using TrackingRecHit2DCUDA = TrackingRecHit2DHeterogeneous; -using TrackingRecHit2DCPU = TrackingRecHit2DHeterogeneous; -using TrackingRecHit2DHost = TrackingRecHit2DHeterogeneous; +using TrackingRecHit2DGPU = TrackingRecHit2DHeterogeneous; +using TrackingRecHit2DCUDA = TrackingRecHit2DHeterogeneous; +using TrackingRecHit2DCPU = TrackingRecHit2DHeterogeneous; +using TrackingRecHit2DHost = TrackingRecHit2DHeterogeneous; #endif // CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DHeterogeneous_h diff --git a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h index 8e6d99e81238a..3ed332bbe9356 100644 --- a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h +++ b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h @@ -17,7 +17,7 @@ class TrackingRecHit2DSOAView { static constexpr uint32_t maxHits() { return gpuClustering::MaxNumClusters; } using hindex_type = uint16_t; // if above is <=2^16 - using Hist = HistoContainer; + using Hist = cms::cuda::HistoContainer; using AverageGeometry = phase1PixelTopology::AverageGeometry; diff --git a/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsSoAFromCUDA.cc b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsSoAFromCUDA.cc index be4cc5d9a3336..8817606043a60 100644 --- a/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsSoAFromCUDA.cc +++ b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsSoAFromCUDA.cc @@ -28,7 +28,7 @@ class SiPixelDigiErrorsSoAFromCUDA : public edm::stream::EDProducer digiErrorPutToken_; cms::cuda::host::unique_ptr data_; - GPU::SimpleVector error_; + cms::cuda::SimpleVector error_; const PixelFormatterErrors* formatterErrors_ = nullptr; }; @@ -70,7 +70,7 @@ void SiPixelDigiErrorsSoAFromCUDA::produce(edm::Event& iEvent, const edm::EventS // use cudaMallocHost without a GPU... iEvent.emplace(digiErrorPutToken_, error_.size(), error_.data(), formatterErrors_); - error_ = GPU::make_SimpleVector(0, nullptr); + error_ = cms::cuda::make_SimpleVector(0, nullptr); data_.reset(); formatterErrors_ = nullptr; } diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu index 53af26ac7527d..acf6034d6c33c 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu @@ -365,7 +365,7 @@ namespace pixelgpudetails { uint32_t *pdigi, uint32_t *rawIdArr, uint16_t *moduleId, - GPU::SimpleVector *err, + cms::cuda::SimpleVector *err, bool useQualityInfo, bool includeErrors, bool debug) { @@ -491,8 +491,8 @@ namespace pixelgpudetails { } __shared__ uint32_t ws[32]; - blockPrefixScan(moduleStart + 1, moduleStart + 1, 1024, ws); - blockPrefixScan(moduleStart + 1025, moduleStart + 1025, gpuClustering::MaxNumModules - 1024, ws); + cms::cuda::blockPrefixScan(moduleStart + 1, moduleStart + 1, 1024, ws); + cms::cuda::blockPrefixScan(moduleStart + 1025, moduleStart + 1025, gpuClustering::MaxNumModules - 1024, ws); for (int i = first + 1025, iend = gpuClustering::MaxNumModules + 1; i < iend; i += blockDim.x) { moduleStart[i] += moduleStart[1024]; diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h index 767c5a1e92ad0..ee9729f75aed2 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h @@ -8,7 +8,7 @@ #include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h" #include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h" #include "FWCore/Utilities/interface/typedefs.h" -#include "HeterogeneousCore/CUDAUtilities/interface/GPUSimpleVector.h" +#include "HeterogeneousCore/CUDAUtilities/interface/SimpleVector.h" #include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" #include "HeterogeneousCore/CUDAUtilities/interface/host_noncached_unique_ptr.h" #include "DataFormats/SiPixelDigi/interface/PixelErrors.h" diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h index dc50cd20b4db4..b781b10792fff 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h @@ -89,7 +89,7 @@ namespace gpuClustering { // renumber __shared__ uint16_t ws[32]; - blockPrefixScan(newclusId, nclus, ws); + cms::cuda::blockPrefixScan(newclusId, nclus, ws); assert(nclus >= newclusId[nclus - 1]); diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h index f47f06e6ec563..16c181a431ce8 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h @@ -80,7 +80,7 @@ namespace gpuClustering { //init hist (ymax=416 < 512 : 9bits) constexpr uint32_t maxPixInModule = 4000; constexpr auto nbins = phase1PixelTopology::numColsInModule + 2; //2+2; - using Hist = HistoContainer; + using Hist = cms::cuda::HistoContainer; __shared__ Hist hist; __shared__ typename Hist::Counter ws[32]; for (auto j = threadIdx.x; j < Hist::totbins(); j += blockDim.x) { diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc index fbe0fd13b84a4..b34aff1bced11 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc +++ b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc @@ -221,7 +221,7 @@ void SiPixelRecHitSoAFromLegacy::produce(edm::StreamID streamID, edm::Event& iEv SiPixelDigisCUDA::DeviceConstView digiView{xx_.data(), yy_.data(), adc_.data(), moduleInd_.data(), clus_.data()}; assert(digiView.adc(0) != 0); // not needed... - cudaCompat::resetGrid(); + cms::cudacompat::resetGrid(); // we run on blockId.x==0 gpuPixelRecHits::getHits(&cpeView, &bsHost, &digiView, ndigi, &clusterView, output->view()); for (auto h = fc; h < lc; ++h) From 77efd08efc0f432492178e0142e31852474e2f67 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Thu, 26 Mar 2020 19:09:30 +0100 Subject: [PATCH 106/149] Synchronise with CMSSW_11_1_0_pre5 --- .../python/ALCARECOAlCaPCCRandom_cff.py | 4 ++-- .../python/ALCARECOAlCaPCCZeroBias_cff.py | 4 ++-- RecoLocalTracker/SiPixelRecHits/BuildFile.xml | 5 ++--- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/Calibration/LumiAlCaRecoProducers/python/ALCARECOAlCaPCCRandom_cff.py b/Calibration/LumiAlCaRecoProducers/python/ALCARECOAlCaPCCRandom_cff.py index 7ed4f0655e8a6..149eb1d7f3854 100644 --- a/Calibration/LumiAlCaRecoProducers/python/ALCARECOAlCaPCCRandom_cff.py +++ b/Calibration/LumiAlCaRecoProducers/python/ALCARECOAlCaPCCRandom_cff.py @@ -21,8 +21,8 @@ from Calibration.LumiAlCaRecoProducers.alcaPCCProducer_cfi import alcaPCCProducer alcaPCCProducerRandom = alcaPCCProducer.clone() -alcaPCCProducerRandom.AlcaPCCProducerParameters.pixelClusterLabel = cms.InputTag("siPixelClustersForLumiR") -alcaPCCProducerRandom.AlcaPCCProducerParameters.trigstring = cms.untracked.string("alcaPCCRandom") +alcaPCCProducerRandom.pixelClusterLabel = cms.InputTag("siPixelClustersForLumiR") +alcaPCCProducerRandom.trigstring = cms.untracked.string("alcaPCCRandom") # Sequence # seqALCARECOAlCaPCCRandom = cms.Sequence(ALCARECORandomHLT + siPixelDigisForLumiR + siPixelClustersForLumiR + alcaPCCProducerRandom) diff --git a/Calibration/LumiAlCaRecoProducers/python/ALCARECOAlCaPCCZeroBias_cff.py b/Calibration/LumiAlCaRecoProducers/python/ALCARECOAlCaPCCZeroBias_cff.py index 32a65512c1505..cce52734afeb0 100644 --- a/Calibration/LumiAlCaRecoProducers/python/ALCARECOAlCaPCCZeroBias_cff.py +++ b/Calibration/LumiAlCaRecoProducers/python/ALCARECOAlCaPCCZeroBias_cff.py @@ -21,8 +21,8 @@ from Calibration.LumiAlCaRecoProducers.alcaPCCProducer_cfi import alcaPCCProducer alcaPCCProducerZeroBias = alcaPCCProducer.clone() -alcaPCCProducerZeroBias.AlcaPCCProducerParameters.pixelClusterLabel = cms.InputTag("siPixelClustersForLumiZB") -alcaPCCProducerZeroBias.AlcaPCCProducerParameters.trigstring = cms.untracked.string("alcaPCCZeroBias") +alcaPCCProducerZeroBias.pixelClusterLabel = cms.InputTag("siPixelClustersForLumiZB") +alcaPCCProducerZeroBias.trigstring = cms.untracked.string("alcaPCCZeroBias") # Sequence # seqALCARECOAlCaPCCZeroBias = cms.Sequence(ALCARECOZeroBiasHLT + siPixelDigisForLumiZB + siPixelClustersForLumiZB + alcaPCCProducerZeroBias) diff --git a/RecoLocalTracker/SiPixelRecHits/BuildFile.xml b/RecoLocalTracker/SiPixelRecHits/BuildFile.xml index a626c35727249..d37b4076c2621 100644 --- a/RecoLocalTracker/SiPixelRecHits/BuildFile.xml +++ b/RecoLocalTracker/SiPixelRecHits/BuildFile.xml @@ -1,14 +1,13 @@ - - - + + From 9d3e4581e663975d479b12d1731435e401859b51 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Mon, 6 Apr 2020 15:57:55 +0200 Subject: [PATCH 107/149] Backport: remove unneeded dependencies in Reco subsystems (#29295) --- RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml | 3 +-- RecoLocalTracker/SiPixelClusterizer/test/BuildFile.xml | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml b/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml index 1acd271a17e00..a4715dfbaa8f7 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml @@ -1,3 +1,4 @@ + @@ -5,13 +6,11 @@ - - diff --git a/RecoLocalTracker/SiPixelClusterizer/test/BuildFile.xml b/RecoLocalTracker/SiPixelClusterizer/test/BuildFile.xml index 2d7d2139ab079..f4d22287a9eb7 100644 --- a/RecoLocalTracker/SiPixelClusterizer/test/BuildFile.xml +++ b/RecoLocalTracker/SiPixelClusterizer/test/BuildFile.xml @@ -2,7 +2,6 @@ - From f7c8e8a8ef57528a4cb1078b4bab5efe21683add Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Mon, 6 Apr 2020 16:04:52 +0200 Subject: [PATCH 108/149] Apply code formatting fixes from upstream integration (cms-patatrack#445) --- .../TrackingRecHit/interface/TrackingRecHit2DSOAView.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h index 3ed332bbe9356..808feb2a4218f 100644 --- a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h +++ b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h @@ -17,7 +17,8 @@ class TrackingRecHit2DSOAView { static constexpr uint32_t maxHits() { return gpuClustering::MaxNumClusters; } using hindex_type = uint16_t; // if above is <=2^16 - using Hist = cms::cuda::HistoContainer; + using Hist = + cms::cuda::HistoContainer; using AverageGeometry = phase1PixelTopology::AverageGeometry; From 738ef55c14150f5d40a77ea1fc299a1a44a2ead6 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Tue, 29 Dec 2020 15:48:34 +0100 Subject: [PATCH 109/149] Synchronise with CMSSW_11_1_0_pre7 --- CalibTracker/SiPixelESProducers/BuildFile.xml | 26 ++++++++--------- .../SiPixelESProducers/plugins/BuildFile.xml | 24 +++++++-------- .../SiPixelClusterizer/plugins/BuildFile.xml | 10 +++---- .../SiPixelClusterizer/test/BuildFile.xml | 5 ++-- RecoLocalTracker/SiPixelRecHits/BuildFile.xml | 29 +++++++++---------- .../SiPixelRecHits/plugins/BuildFile.xml | 2 +- 6 files changed, 47 insertions(+), 49 deletions(-) diff --git a/CalibTracker/SiPixelESProducers/BuildFile.xml b/CalibTracker/SiPixelESProducers/BuildFile.xml index 02a36e17ed732..fc506b154c5f2 100644 --- a/CalibTracker/SiPixelESProducers/BuildFile.xml +++ b/CalibTracker/SiPixelESProducers/BuildFile.xml @@ -1,15 +1,15 @@ - - - - - - - - - - - - + + + + + + + + + + + + - + diff --git a/CalibTracker/SiPixelESProducers/plugins/BuildFile.xml b/CalibTracker/SiPixelESProducers/plugins/BuildFile.xml index 57bf68a1b7518..1f063df32a766 100644 --- a/CalibTracker/SiPixelESProducers/plugins/BuildFile.xml +++ b/CalibTracker/SiPixelESProducers/plugins/BuildFile.xml @@ -1,13 +1,13 @@ - - - - - - - - - - - - + + + + + + + + + + + + diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml b/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml index a4715dfbaa8f7..1231eb7f0f377 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml @@ -1,13 +1,13 @@ - - - - + + + + + - diff --git a/RecoLocalTracker/SiPixelClusterizer/test/BuildFile.xml b/RecoLocalTracker/SiPixelClusterizer/test/BuildFile.xml index f4d22287a9eb7..a1cb95039c078 100644 --- a/RecoLocalTracker/SiPixelClusterizer/test/BuildFile.xml +++ b/RecoLocalTracker/SiPixelClusterizer/test/BuildFile.xml @@ -9,7 +9,6 @@ - @@ -17,16 +16,18 @@ - + + + diff --git a/RecoLocalTracker/SiPixelRecHits/BuildFile.xml b/RecoLocalTracker/SiPixelRecHits/BuildFile.xml index d37b4076c2621..bfa92dc57a48b 100644 --- a/RecoLocalTracker/SiPixelRecHits/BuildFile.xml +++ b/RecoLocalTracker/SiPixelRecHits/BuildFile.xml @@ -1,19 +1,16 @@ - - - - - - - - - - - - - - + + + + + + + + - + + + + - + diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml b/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml index 49be86daa18cd..40acdaf2385cb 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml +++ b/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml @@ -1,11 +1,11 @@ - + From 192c1a0ba891989daf0e1e4e749c5fe20d46bdaf Mon Sep 17 00:00:00 2001 From: Vincenzo Innocente Date: Fri, 15 May 2020 17:33:59 +0200 Subject: [PATCH 110/149] Replace cub prefix scan with home-brewed one (cms-patatrack#447) Replace the use of the prefix scan from CUB with a home-brewed implementation, using dynamic instead of static shared memory. No changes to physics or timing performance. --- .../SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc index b34aff1bced11..d33e488344142 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc +++ b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc @@ -251,7 +251,7 @@ void SiPixelRecHitSoAFromLegacy::produce(edm::StreamID streamID, edm::Event& iEv output->hitsLayerStart()[i] = hitsModuleStart[cpeView.layerGeometry().layerStart[i]]; } cms::cuda::fillManyFromVector( - output->phiBinner(), nullptr, 10, output->iphi(), output->hitsLayerStart(), numberOfHits, 256, nullptr); + output->phiBinner(), 10, output->iphi(), output->hitsLayerStart(), numberOfHits, 256, nullptr); // std::cout << "created HitSoa for " << numberOfClusters << " clusters in " << numberOfDetUnits << " Dets" << std::endl; iEvent.put(std::move(output)); From 53c8a22dfaf51e7ddc174122d6826b3aeedbf8ec Mon Sep 17 00:00:00 2001 From: Vincenzo Innocente Date: Mon, 18 May 2020 15:50:30 +0200 Subject: [PATCH 111/149] Remove dependency on cub (cms-patatrack#449) Annotate all CMS-specific changes to CachingDeviceAllocator. Co-authored-by: Andrea Bocci --- RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml | 3 +-- .../SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu | 3 --- RecoLocalTracker/SiPixelClusterizer/test/BuildFile.xml | 2 -- 3 files changed, 1 insertion(+), 7 deletions(-) diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml b/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml index 1231eb7f0f377..d574c1e6f2b92 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml @@ -1,9 +1,8 @@ - + - diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu index acf6034d6c33c..cf2b10b198692 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu @@ -25,9 +25,6 @@ #include #include -// cub includes -#include - // CMSSW includes #include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" diff --git a/RecoLocalTracker/SiPixelClusterizer/test/BuildFile.xml b/RecoLocalTracker/SiPixelClusterizer/test/BuildFile.xml index a1cb95039c078..1891970a9d98b 100644 --- a/RecoLocalTracker/SiPixelClusterizer/test/BuildFile.xml +++ b/RecoLocalTracker/SiPixelClusterizer/test/BuildFile.xml @@ -34,14 +34,12 @@ - - From 3025d44729f00f001b56b7f7a6ca912ad77f841f Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Wed, 20 May 2020 09:32:58 +0200 Subject: [PATCH 112/149] Rename CUDAHostAllocator to cms::cuda::HostAllocator (cms-patatrack#464) --- RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h index 2208c449884c5..10e10f7654883 100644 --- a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h +++ b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h @@ -7,7 +7,7 @@ #include "CondFormats/SiPixelTransient/interface/SiPixelGenError.h" #include "CondFormats/SiPixelTransient/interface/SiPixelTemplate.h" #include "HeterogeneousCore/CUDACore/interface/ESProduct.h" -#include "HeterogeneousCore/CUDAUtilities/interface/CUDAHostAllocator.h" +#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h" #include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h" #include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h" @@ -82,7 +82,7 @@ class PixelCPEFast final : public PixelCPEBase { // allocate it with posix malloc to be ocmpatible with cpu wf std::vector m_detParamsGPU; - // std::vector> m_detParamsGPU; + // std::vector> m_detParamsGPU; pixelCPEforGPU::CommonParams m_commonParamsGPU; pixelCPEforGPU::LayerGeometry m_layerGeometry; pixelCPEforGPU::AverageGeometry m_averageGeometry; From 3b0dedea33803e108a694ece91ba0d4dc4ae581f Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Sat, 23 May 2020 11:59:55 +0200 Subject: [PATCH 113/149] Synchronise with CMSSW_11_1_0_pre8 --- .../plugins/SiPixelRecHitConverter.cc | 157 +++++++++++++----- 1 file changed, 120 insertions(+), 37 deletions(-) diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitConverter.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitConverter.cc index 6930eb0bcd345..c7eb7481fc4f8 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitConverter.cc +++ b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitConverter.cc @@ -9,8 +9,59 @@ * ------------------------------------------------------ */ -// Our own stuff -#include "RecoLocalTracker/SiPixelRecHits/interface/SiPixelRecHitConverter.h" +//--------------------------------------------------------------------------- +//! \class SiPixelRecHitConverter +//! +//! \brief EDProducer to covert SiPixelClusters into SiPixelRecHits +//! +//! SiPixelRecHitConverter is an EDProducer subclass (i.e., a module) +//! which orchestrates the conversion of SiPixelClusters into SiPixelRecHits. +//! Consequently, the input is a edm::DetSetVector and the output is +//! SiPixelRecHitCollection. +//! +//! SiPixelRecHitConverter invokes one of descendents from +//! ClusterParameterEstimator (templated on SiPixelCluster), e.g. +//! CPEFromDetPosition (which is the only available option +//! right now). SiPixelRecHitConverter loads the SiPixelClusterCollection, +//! and then iterates over DetIds, invoking the chosen CPE's methods +//! localPosition() and localError() to perform the correction (some of which +//! may be rather involved). A RecHit is made on the spot, and appended +//! to the output collection. +//! +//! The calibrations are not loaded at the moment, +//! although that is being planned for the near future. +//! +//! \author Porting from ORCA by Petar Maksimovic (JHU). Implementation of the +//! DetSetVector by V.Chiochia (Zurich University). +//! +//! \version v2, May 30, 2006 +//! change to use Lorentz angle from DB Lotte Wilke, Jan. 31st, 2008 +//! +//--------------------------------------------------------------------------- + +//--- Base class for CPEs: + +#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h" + +//--- Geometry + DataFormats +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" +#include "DataFormats/SiPixelCluster/interface/SiPixelCluster.h" +#include "DataFormats/TrackerRecHit2D/interface/SiPixelRecHitCollection.h" +#include "DataFormats/Common/interface/DetSetVector.h" + +//--- Framework +#include "FWCore/Framework/interface/stream/EDProducer.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/MakerMacros.h" + +#include "DataFormats/Common/interface/Handle.h" +#include "FWCore/Framework/interface/ESHandle.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/Utilities/interface/InputTag.h" +#include "FWCore/Utilities/interface/EDPutToken.h" +#include "FWCore/Utilities/interface/ESGetToken.h" + // Geometry #include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" #include "Geometry/CommonDetUnit/interface/PixelGeomDetUnit.h" @@ -39,17 +90,54 @@ using HMSstorage = HostProduct; using namespace std; namespace cms { + + class SiPixelRecHitConverter : public edm::stream::EDProducer<> { + public: + //--- Constructor, virtual destructor (just in case) + explicit SiPixelRecHitConverter(const edm::ParameterSet& conf); + ~SiPixelRecHitConverter() override; + + //--- Factory method to make CPE's depending on the ParameterSet + //--- Not sure if we need to make more than one CPE to run concurrently + //--- on different parts of the detector (e.g., one for the barrel and the + //--- one for the forward). The way the CPE's are written now, it's + //--- likely we can use one (and they will switch internally), or + //--- make two of the same but configure them differently. We need a more + //--- realistic use case... + + //--- The top-level event method. + void produce(edm::Event& e, const edm::EventSetup& c) override; + + //--- Execute the position estimator algorithm(s). + void run(edm::Event& e, + edm::Handle> inputhandle, + SiPixelRecHitCollectionNew& output, + TrackerGeometry const& geom); + + private: + // TO DO: maybe allow a map of pointers? + /// const PixelClusterParameterEstimator * cpe_; // what we got (for now, one ptr to base class) + PixelCPEBase const* cpe_ = nullptr; // What we got (for now, one ptr to base class) + edm::InputTag const src_; + edm::EDGetTokenT> const tPixelCluster_; + edm::EDPutTokenT const tPut_; + edm::EDPutTokenT const tHost_; + edm::ESGetToken const tTrackerGeom_; + edm::ESGetToken const tCPE_; + bool m_newCont; // save also in emdNew::DetSetVector + }; + //--------------------------------------------------------------------------- //! Constructor: set the ParameterSet and defer all thinking to setupCPE(). //--------------------------------------------------------------------------- SiPixelRecHitConverter::SiPixelRecHitConverter(edm::ParameterSet const& conf) - : conf_(conf), - src_(conf.getParameter("src")), - tPixelCluster(consumes >(src_)) { - //--- Declare to the EDM what kind of collections we will be making. - produces(); - produces(); - } + : src_(conf.getParameter("src")), + tPixelCluster_(consumes>(src_)), + tPut_(produces()), + tHost_(produces()), + tTrackerGeom_(esConsumes()), + tCPE_(esConsumes( + edm::ESInputTag("", conf.getParameter("CPE")))) {} // Destructor SiPixelRecHitConverter::~SiPixelRecHitConverter() {} @@ -59,29 +147,25 @@ namespace cms { //--------------------------------------------------------------------------- void SiPixelRecHitConverter::produce(edm::Event& e, const edm::EventSetup& es) { // Step A.1: get input data - edm::Handle > input; - e.getByToken(tPixelCluster, input); + edm::Handle> input; + e.getByToken(tPixelCluster_, input); // Step A.2: get event setup - edm::ESHandle geom; - es.get().get(geom); + auto const& geom = es.getData(tTrackerGeom_); // Step B: create empty output collection - auto output = std::make_unique(); + SiPixelRecHitCollectionNew output; // Step B*: create CPE - edm::ESHandle hCPE; - std::string cpeName_ = conf_.getParameter("CPE"); - es.get().get(cpeName_, hCPE); - cpe_ = dynamic_cast(&(*hCPE)); + cpe_ = dynamic_cast(&es.getData(tCPE_)); // Step C: Iterate over DetIds and invoke the strip CPE algorithm // on each DetUnit - run(e, input, *output, geom); + run(e, input, output, geom); - output->shrink_to_fit(); - e.put(std::move(output)); + output.shrink_to_fit(); + e.emplace(tPut_, std::move(output)); } //--------------------------------------------------------------------------- @@ -90,9 +174,9 @@ namespace cms { //! New interface reading DetSetVector by V.Chiochia (May 30th, 2006) //--------------------------------------------------------------------------- void SiPixelRecHitConverter::run(edm::Event& iEvent, - edm::Handle > inputhandle, + edm::Handle> inputhandle, SiPixelRecHitCollectionNew& output, - edm::ESHandle& geom) { + TrackerGeometry const& geom) { if (!cpe_) { edm::LogError("SiPixelRecHitConverter") << " at least one CPE is not ready -- can't run!"; // TO DO: throw an exception here? The user may want to know... @@ -101,24 +185,18 @@ namespace cms { } int numberOfDetUnits = 0; + int numberOfClusters = 0; const edmNew::DetSetVector& input = *inputhandle; - // yes a unique ptr of a unique ptr so edm is happy and the pointer stay still... + // fill cluster arrays auto hmsp = std::make_unique(gpuClustering::MaxNumModules + 1); auto hitsModuleStart = hmsp.get(); - auto hms = std::make_unique(std::move(hmsp)); // hmsp is gone - iEvent.put(std::move(hms)); // hms is gone! hitsModuleStart still alive and kicking... - - // fill cluster arrays - std::array clusInModule; - for (auto& cl : clusInModule) - cl = 0; - int numberOfClusters = 0; + std::array clusInModule{}; for (auto DSViter = input.begin(); DSViter != input.end(); DSViter++) { unsigned int detid = DSViter->detId(); DetId detIdObject(detid); - const GeomDetUnit* genericDet = geom->idToDetUnit(detIdObject); + const GeomDetUnit* genericDet = geom.idToDetUnit(detIdObject); auto gind = genericDet->index(); // FIXME to be changed to support Phase2 if (gind >= int(gpuClustering::MaxNumModules)) @@ -134,14 +212,15 @@ namespace cms { hitsModuleStart[i] = hitsModuleStart[i - 1] + clusInModule[i - 1]; assert(numberOfClusters == int(hitsModuleStart[gpuClustering::MaxNumModules])); - numberOfClusters = 0; - edmNew::DetSetVector::const_iterator DSViter = input.begin(); + // yes a unique ptr of a unique ptr so edm is happy and the pointer stay still... + iEvent.emplace(tHost_, std::move(hmsp)); // hmsp is gone, hitsModuleStart still alive and kicking... - for (; DSViter != input.end(); DSViter++) { + numberOfClusters = 0; + for (auto DSViter = input.begin(); DSViter != input.end(); DSViter++) { numberOfDetUnits++; unsigned int detid = DSViter->detId(); DetId detIdObject(detid); - const GeomDetUnit* genericDet = geom->idToDetUnit(detIdObject); + const GeomDetUnit* genericDet = geom.idToDetUnit(detIdObject); const PixelGeomDetUnit* pixDet = dynamic_cast(genericDet); assert(pixDet); SiPixelRecHitCollectionNew::FastFiller recHitsOnDetUnit(output, detid); @@ -184,3 +263,7 @@ namespace cms { // << std::endl; } } // end of namespace cms + +using cms::SiPixelRecHitConverter; + +DEFINE_FWK_MODULE(SiPixelRecHitConverter); From e9ed9facf4d3b8505932315f61ae3767e0a069a2 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Mon, 25 May 2020 09:51:44 +0200 Subject: [PATCH 114/149] Use siPixelDigis.cpu instead of siPixelDigis (cms-patatrack#467) --- .../clients/beam_dqm_sourceclient-live_cfg.py | 81 +------------------ .../beampixel_dqm_sourceclient-live_cfg.py | 29 ++++--- .../clients/csc_dqm_sourceclient-live_cfg.py | 14 +--- .../clients/fed_dqm_sourceclient-live_cfg.py | 16 ++-- .../clients/l1t_dqm_sourceclient-live_cfg.py | 12 +-- .../l1temulator_dqm_sourceclient-live_cfg.py | 11 +-- .../l1tstage1_dqm_sourceclient-live_cfg.py | 12 +-- ...tage1emulator_dqm_sourceclient-live_cfg.py | 11 +-- .../l1tstage2_dqm_sourceclient-live_cfg.py | 24 +++--- ...tage2emulator_dqm_sourceclient-live_cfg.py | 24 ++++-- .../clients/lumi_dqm_sourceclient-live_cfg.py | 9 +-- .../physics_dqm_sourceclient-live_cfg.py | 12 +-- .../pixel_dqm_sourceclient-live_cfg.py | 25 +++--- .../pixellumi_dqm_sourceclient-live_cfg.py | 15 +--- .../clients/scal_dqm_sourceclient-live_cfg.py | 10 +-- .../sistrip_dqm_sourceclient-live_cfg.py | 26 ++---- 16 files changed, 101 insertions(+), 230 deletions(-) diff --git a/DQM/Integration/python/clients/beam_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/beam_dqm_sourceclient-live_cfg.py index e0d5d05bc193a..d0e5ff6c7d5ce 100644 --- a/DQM/Integration/python/clients/beam_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/beam_dqm_sourceclient-live_cfg.py @@ -1,23 +1,12 @@ from __future__ import print_function import FWCore.ParameterSet.Config as cms -# Define here the BeamSpotOnline record name, -# it will be used both in BeamMonitor setup and in payload creation/upload -BSOnlineRecordName = 'BeamSpotOnlineLegacyObjectsRcd' -BSOnlineTag = 'BeamSpotOnlineTestLegacy' -BSOnlineJobName = 'BeamSpotOnlineTestLegacy' - #from Configuration.Eras.Era_Run2_2018_cff import Run2_2018 #process = cms.Process("BeamMonitor", Run2_2018) FIXME import sys from Configuration.Eras.Era_Run2_2018_pp_on_AA_cff import Run2_2018_pp_on_AA process = cms.Process("BeamMonitor", Run2_2018_pp_on_AA) -# Configure tag and jobName if running Playback system -if "dqm_cmssw/playback" in str(sys.argv[1]): - BSOnlineTag = BSOnlineTag + 'Playback' - BSOnlineJobName = BSOnlineJobName + 'Playback' - # process.MessageLogger = cms.Service("MessageLogger", debugModules = cms.untracked.vstring('*'), @@ -39,13 +28,10 @@ # Input sources if unitTest: process.load("DQM.Integration.config.unittestinputsource_cfi") - from DQM.Integration.config.unittestinputsource_cfi import options elif live: process.load("DQM.Integration.config.inputsource_cfi") - from DQM.Integration.config.inputsource_cfi import options else: process.load("DQM.Integration.config.fileinputsource_cfi") - from DQM.Integration.config.fileinputsource_cfi import options #-------------------------- # HLT Filter @@ -58,9 +44,6 @@ process.load("DQM.Integration.config.environment_cfi") process.dqmEnv.subSystemFolder = 'BeamMonitor' process.dqmSaver.tag = 'BeamMonitor' -process.dqmSaver.runNumber = options.runNumber -process.dqmSaverPB.tag = 'BeamMonitor' -process.dqmSaverPB.runNumber = options.runNumber process.dqmEnvPixelLess = process.dqmEnv.clone() process.dqmEnvPixelLess.subSystemFolder = 'BeamMonitor_PixelLess' @@ -69,14 +52,12 @@ # Conditions if (live): process.load("DQM.Integration.config.FrontierCondition_GT_cfi") - process.GlobalTag.DBParameters.authenticationPath = cms.untracked.string('.') else: process.load('Configuration.StandardSequences.FrontierConditions_GlobalTag_cff') from Configuration.AlCa.GlobalTag import GlobalTag as gtCustomise process.GlobalTag = gtCustomise(process.GlobalTag, 'auto:run2_data', '') - process.GlobalTag.DBParameters.authenticationPath = cms.untracked.string('.') # you may need to set manually the GT in the line below - #process.GlobalTag.globaltag = '100X_upgrade2018_realistic_v10' + process.GlobalTag.globaltag = '100X_upgrade2018_realistic_v10' #---------------------------- # BeamMonitor @@ -234,7 +215,7 @@ # process.dqmcommon = cms.Sequence(process.dqmEnv - * process.dqmSaver*process.dqmSaverPB) + * process.dqmSaver) # process.monitor = cms.Sequence(process.dqmBeamMonitor @@ -294,13 +275,12 @@ process.muonDTDigis.inputLabel = rawDataInputTag process.muonRPCDigis.InputLabel = rawDataInputTag process.scalersRawToDigi.scalersInputTag = rawDataInputTag -process.siPixelDigis.InputLabel = rawDataInputTag +process.siPixelDigis.cpu.InputLabel = rawDataInputTag process.siStripDigis.ProductLabel = rawDataInputTag process.load("RecoVertex.BeamSpotProducer.BeamSpot_cfi") process.dqmBeamMonitor.OnlineMode = True -process.dqmBeamMonitor.recordName = BSOnlineRecordName process.dqmBeamMonitor.resetEveryNLumi = 5 # was 10 for HI process.dqmBeamMonitor.resetPVEveryNLumi = 5 # was 10 for HI @@ -353,61 +333,6 @@ process.dqmBeamMonitor.hltResults = cms.InputTag("TriggerResults","","HLT") -#--------- -# Upload BeamSpotOnlineObject (LegacyRcd) to CondDB -if unitTest == False: - process.OnlineDBOutputService = cms.Service("OnlineDBOutputService", - - DBParameters = cms.PSet( - messageLevel = cms.untracked.int32(0), - authenticationPath = cms.untracked.string('.') - ), - - # Upload to CondDB - connect = cms.string('oracle://cms_orcon_prod/CMS_CONDITIONS'), - preLoadConnectionString = cms.untracked.string('frontier://FrontierProd/CMS_CONDITIONS'), - - runNumber = cms.untracked.uint64(options.runNumber), - #lastLumiFile = cms.untracked.string('last_lumi.txt'), - #lastLumiUrl = cms.untracked.string('http://ru-c2e14-11-01.cms:11100/urn:xdaq-application:lid=52/getLatestLumiSection'), - omsServiceUrl = cms.untracked.string('http://cmsoms-services.cms:9949/urn:xdaq-application:lid=100/getRunAndLumiSection'), - writeTransactionDelay = cms.untracked.uint32(options.transDelay), - latency = cms.untracked.uint32(2), - autoCommit = cms.untracked.bool(True), - saveLogsOnDB = cms.untracked.bool(True), - jobName = cms.untracked.string(BSOnlineJobName), # name of the DB log record - toPut = cms.VPSet(cms.PSet( - record = cms.string(BSOnlineRecordName), - tag = cms.string(BSOnlineTag), - timetype = cms.untracked.string('Lumi'), - onlyAppendUpdatePolicy = cms.untracked.bool(True) - )) - ) -else: - process.OnlineDBOutputService = cms.Service("OnlineDBOutputService", - - DBParameters = cms.PSet( - messageLevel = cms.untracked.int32(0), - authenticationPath = cms.untracked.string('.') - ), - - # Upload to CondDB - connect = cms.string('sqlite_file:BeamSpotOnlineLegacy.db'), - preLoadConnectionString = cms.untracked.string('sqlite_file:BeamSpotOnlineLegacy.db'), - runNumber = cms.untracked.uint64(options.runNumber), - lastLumiFile = cms.untracked.string('last_lumi.txt'), - #lastLumiUrl = cms.untracked.string('http://ru-c2e14-11-01.cms:11100/urn:xdaq-application:lid=52/getLatestLumiSection'), - writeTransactionDelay = cms.untracked.uint32(options.transDelay), - latency = cms.untracked.uint32(2), - autoCommit = cms.untracked.bool(True), - toPut = cms.VPSet(cms.PSet( - record = cms.string(BSOnlineRecordName), - tag = cms.string(BSOnlineTag), - timetype = cms.untracked.string('Lumi'), - onlyAppendUpdatePolicy = cms.untracked.bool(True) - )) - ) - #--------- # Final path if (not process.runType.getRunType() == process.runType.hi_run): diff --git a/DQM/Integration/python/clients/beampixel_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/beampixel_dqm_sourceclient-live_cfg.py index e7722a9046f36..3fdc7f2d6d2ac 100644 --- a/DQM/Integration/python/clients/beampixel_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/beampixel_dqm_sourceclient-live_cfg.py @@ -1,16 +1,25 @@ +from __future__ import print_function import FWCore.ParameterSet.Config as cms -from Configuration.StandardSequences.Eras import eras -process = cms.Process("BeamPixel", eras.Run2_2018) +import sys +from Configuration.Eras.Era_Run2_2018_cff import Run2_2018 +process = cms.Process("BeamPixel", Run2_2018) +unitTest=False +if 'unitTest=True' in sys.argv: + unitTest=True #---------------------------- # Common for PP and HI running #---------------------------- + +if unitTest: + process.load("DQM.Integration.config.unittestinputsource_cfi") +else: + process.load("DQM.Integration.config.inputsource_cfi") + # Use this to run locally (for testing purposes) #process.load("DQM.Integration.config.fileinputsource_cfi") -# Otherwise use this -process.load("DQM.Integration.config.inputsource_cfi") #---------------------------- @@ -91,7 +100,7 @@ if (process.runType.getRunType() == process.runType.pp_run or process.runType.getRunType() == process.runType.pp_run_stage1 or process.runType.getRunType() == process.runType.cosmic_run or process.runType.getRunType() == process.runType.cosmic_run_stage1 or process.runType.getRunType() == process.runType.hpu_run): - print "[beampixel_dqm_sourceclient-live_cfg]::running pp" + print("[beampixel_dqm_sourceclient-live_cfg]::running pp") #---------------------------- @@ -109,10 +118,10 @@ process.muonDTDigis.inputLabel = cms.InputTag("rawDataCollector") process.muonRPCDigis.InputLabel = cms.InputTag("rawDataCollector") process.scalersRawToDigi.scalersInputTag = cms.InputTag("rawDataCollector") - process.siPixelDigis.InputLabel = cms.InputTag("rawDataCollector") + process.siPixelDigis.cpu.InputLabel = cms.InputTag("rawDataCollector") process.siStripDigis.ProductLabel = cms.InputTag("rawDataCollector") - + #---------------------------- # pixelVertexDQM Config #---------------------------- @@ -144,7 +153,7 @@ # Heavy Ion Specific Section #---------------------------- if (process.runType.getRunType() == process.runType.hi_run): - print "[beampixel_dqm_sourceclient-live_cfg]::running HI" + print("[beampixel_dqm_sourceclient-live_cfg]::running HI") #---------------------------- @@ -162,7 +171,7 @@ process.muonDTDigis.inputLabel = cms.InputTag("rawDataRepacker") process.muonRPCDigis.InputLabel = cms.InputTag("rawDataRepacker") process.scalersRawToDigi.scalersInputTag = cms.InputTag("rawDataRepacker") - process.siPixelDigis.InputLabel = cms.InputTag("rawDataRepacker") + process.siPixelDigis.cpu.InputLabel = cms.InputTag("rawDataRepacker") process.siStripDigis.ProductLabel = cms.InputTag("rawDataRepacker") @@ -200,7 +209,7 @@ process.pixelVertexDQM.fileName = cms.string("/nfshome0/dqmpro/BeamMonitorDQM/BeamPixelResults.txt") else: process.pixelVertexDQM.fileName = cms.string("/nfshome0/dqmdev/BeamMonitorDQM/BeamPixelResults.txt") -print "[beampixel_dqm_sourceclient-live_cfg]::saving DIP file into " + str(process.pixelVertexDQM.fileName) +print("[beampixel_dqm_sourceclient-live_cfg]::saving DIP file into " + str(process.pixelVertexDQM.fileName)) #---------------------------- diff --git a/DQM/Integration/python/clients/csc_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/csc_dqm_sourceclient-live_cfg.py index 77b1e6b88b699..4cabf7a09bb19 100644 --- a/DQM/Integration/python/clients/csc_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/csc_dqm_sourceclient-live_cfg.py @@ -40,15 +40,12 @@ if unitTest: process.load("DQM.Integration.config.unittestinputsource_cfi") - from DQM.Integration.config.unittestinputsource_cfi import options else: # for live online DQM in P5 process.load("DQM.Integration.config.inputsource_cfi") - from DQM.Integration.config.inputsource_cfi import options # for testing in lxplus #process.load("DQM.Integration.config.fileinputsource_cfi") -#from DQM.Integration.config.fileinputsource_cfi import options #---------------------------- # DQM Environment @@ -61,9 +58,6 @@ process.load("DQM.Integration.config.environment_cfi") process.dqmEnv.subSystemFolder = "CSC" process.dqmSaver.tag = "CSC" -process.dqmSaver.runNumber = options.runNumber -process.dqmSaverPB.tag = "CSC" -process.dqmSaverPB.runNumber = options.runNumber #process.DQM.collectorHost = 'pccmsdqm02.cern.ch' @@ -172,8 +166,8 @@ # Sequences #-------------------------- -#process.p = cms.Path(process.dqmCSCClient+process.dqmEnv+process.dqmSaver+process.dqmSaverPB) -process.p = cms.Path(process.dqmCSCClient * process.muonCSCDigis * process.csc2DRecHits * process.cscSegments * process.cscMonitor + process.dqmEnv + process.dqmSaver + process.dqmSaverPB) +#process.p = cms.Path(process.dqmCSCClient+process.dqmEnv+process.dqmSaver) +process.p = cms.Path(process.dqmCSCClient * process.muonCSCDigis * process.csc2DRecHits * process.cscSegments * process.cscMonitor + process.dqmEnv + process.dqmSaver) process.castorDigis.InputLabel = cms.InputTag("rawDataCollector") @@ -189,7 +183,7 @@ process.muonDTDigis.inputLabel = cms.InputTag("rawDataCollector") process.muonRPCDigis.InputLabel = cms.InputTag("rawDataCollector") process.scalersRawToDigi.scalersInputTag = cms.InputTag("rawDataCollector") -process.siPixelDigis.InputLabel = cms.InputTag("rawDataCollector") +process.siPixelDigis.cpu.InputLabel = cms.InputTag("rawDataCollector") process.siStripDigis.ProductLabel = cms.InputTag("rawDataCollector") process.cscMonitor.FEDRawDataCollectionTag = cms.InputTag("rawDataCollector") process.dqmCSCClient.InputObjects = cms.untracked.InputTag("rawDataCollector") @@ -214,7 +208,7 @@ process.muonDTDigis.inputLabel = cms.InputTag("rawDataRepacker") process.muonRPCDigis.InputLabel = cms.InputTag("rawDataRepacker") process.scalersRawToDigi.scalersInputTag = cms.InputTag("rawDataRepacker") - process.siPixelDigis.InputLabel = cms.InputTag("rawDataRepacker") + process.siPixelDigis.cpu.InputLabel = cms.InputTag("rawDataRepacker") process.siStripDigis.ProductLabel = cms.InputTag("rawDataRepacker") process.cscMonitor.FEDRawDataCollectionTag = cms.InputTag("rawDataRepacker") process.dqmCSCClient.InputObjects = cms.untracked.InputTag("rawDataRepacker") diff --git a/DQM/Integration/python/clients/fed_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/fed_dqm_sourceclient-live_cfg.py index 6feff0263b749..0d6d5c54d1d3a 100644 --- a/DQM/Integration/python/clients/fed_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/fed_dqm_sourceclient-live_cfg.py @@ -25,16 +25,11 @@ # Input: if unitTest: process.load("DQM.Integration.config.unittestinputsource_cfi") - from DQM.Integration.config.unittestinputsource_cfi import options else: process.load('DQM.Integration.config.inputsource_cfi') - from DQM.Integration.config.inputsource_cfi import options # Output: process.dqmEnv.subSystemFolder = 'FED' process.dqmSaver.tag = 'FED' -process.dqmSaver.runNumber = options.runNumber -process.dqmSaverPB.tag = 'FED' -process.dqmSaverPB.runNumber = options.runNumber # Subsystem sequences @@ -48,8 +43,8 @@ # Pixel sequence: process.load('Configuration.StandardSequences.MagneticField_cff') process.load('EventFilter.SiPixelRawToDigi.SiPixelRawToDigi_cfi') -process.siPixelDigis.Timing = False -process.siPixelDigis.IncludeErrors = True +process.siPixelDigis.cpu.Timing = False +process.siPixelDigis.cpu.IncludeErrors = True process.load('DQM.SiPixelMonitorRawData.SiPixelMonitorHLT_cfi') process.SiPixelHLTSource.saveFile = False process.SiPixelHLTSource.slowDown = False @@ -92,7 +87,7 @@ # Setting raw data collection label for all subsytem modules, depending on run type: if (process.runType.getRunType() == process.runType.hi_run): process.l1tStage2Fed.rawTag = cms.InputTag('rawDataRepacker') - process.siPixelDigis.InputLabel = cms.InputTag('rawDataRepacker') + process.siPixelDigis.cpu.InputLabel = cms.InputTag('rawDataRepacker') process.SiPixelHLTSource.RawInput = cms.InputTag('rawDataRepacker') process.siStripFEDCheck.RawDataTag = cms.InputTag('rawDataRepacker') process.esRawToDigi.sourceTag = cms.InputTag('rawDataRepacker') @@ -105,7 +100,7 @@ process.cscDQMEvF.InputObjects = cms.untracked.InputTag('rawDataRepacker') else: process.l1tStage2Fed.rawTag = cms.InputTag('rawDataCollector') - process.siPixelDigis.InputLabel = cms.InputTag('rawDataCollector') + process.siPixelDigis.cpu.InputLabel = cms.InputTag('rawDataCollector') process.SiPixelHLTSource.RawInput = cms.InputTag('rawDataCollector') process.siStripFEDCheck.RawDataTag = cms.InputTag('rawDataCollector') process.esRawToDigi.sourceTag = cms.InputTag('rawDataCollector') @@ -126,7 +121,7 @@ # Modules for the FED process.FEDModulesPath = cms.Path( process.l1tStage2Fed - + process.siPixelDigis + + process.siPixelDigis.cpu + process.SiPixelHLTSource + process.siStripFEDCheck + process.esRawToDigi @@ -147,7 +142,6 @@ process.DQMmodulesPath = cms.Path( process.dqmEnv + process.dqmSaver - + process.dqmSaverPB ) process.schedule = cms.Schedule( diff --git a/DQM/Integration/python/clients/l1t_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/l1t_dqm_sourceclient-live_cfg.py index 301a79f2b2865..85ce46d7f7ace 100644 --- a/DQM/Integration/python/clients/l1t_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/l1t_dqm_sourceclient-live_cfg.py @@ -16,11 +16,9 @@ # # for live online DQM in P5 process.load("DQM.Integration.config.inputsource_cfi") -from DQM.Integration.config.inputsource_cfi import options # # for testing in lxplus #process.load("DQM.Integration.config.fileinputsource_cfi") -#from DQM.Integration.config.fileinputsource_cfi import options #---------------------------- # DQM Environment @@ -28,9 +26,6 @@ process.load("DQM.Integration.config.environment_cfi") process.dqmEnv.subSystemFolder = 'L1T' process.dqmSaver.tag = 'L1T' -process.dqmSaver.runNumber = options.runNumber -process.dqmSaverPB.tag = 'L1T' -process.dqmSaverPB.runNumber = options.runNumber # # references needed @@ -101,8 +96,7 @@ # process.dqmEndPath = cms.EndPath( process.dqmEnv * - process.dqmSaver * - process.dqmSaverPB + process.dqmSaver ) # @@ -182,7 +176,7 @@ process.muonDTDigis.inputLabel = cms.InputTag("rawDataCollector") process.muonRPCDigis.InputLabel = cms.InputTag("rawDataCollector") process.scalersRawToDigi.scalersInputTag = cms.InputTag("rawDataCollector") -process.siPixelDigis.InputLabel = cms.InputTag("rawDataCollector") +process.siPixelDigis.cpu.InputLabel = cms.InputTag("rawDataCollector") process.siStripDigis.ProductLabel = cms.InputTag("rawDataCollector") process.bxTiming.FedSource = cms.untracked.InputTag("rawDataCollector") process.l1s.fedRawData = cms.InputTag("rawDataCollector") @@ -201,7 +195,7 @@ process.muonDTDigis.inputLabel = cms.InputTag("rawDataRepacker") process.muonRPCDigis.InputLabel = cms.InputTag("rawDataRepacker") process.scalersRawToDigi.scalersInputTag = cms.InputTag("rawDataRepacker") - process.siPixelDigis.InputLabel = cms.InputTag("rawDataRepacker") + process.siPixelDigis.cpu.InputLabel = cms.InputTag("rawDataRepacker") process.siStripDigis.ProductLabel = cms.InputTag("rawDataRepacker") process.bxTiming.FedSource = cms.untracked.InputTag("rawDataRepacker") process.l1s.fedRawData = cms.InputTag("rawDataRepacker") diff --git a/DQM/Integration/python/clients/l1temulator_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/l1temulator_dqm_sourceclient-live_cfg.py index 9701c71d14a3c..9ca55cca06428 100644 --- a/DQM/Integration/python/clients/l1temulator_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/l1temulator_dqm_sourceclient-live_cfg.py @@ -16,11 +16,9 @@ # # for live online DQM in P5 process.load("DQM.Integration.config.inputsource_cfi") -from DQM.Integration.config.inputsource_cfi import options # # for testing in lxplus #process.load("DQM.Integration.config.fileinputsource_cfi") -#from DQM.Integration.config.fileinputsource_cfi import options #---------------------------- # DQM Environment @@ -32,9 +30,6 @@ # for local test process.dqmEnv.subSystemFolder = 'L1TEMU' process.dqmSaver.tag = 'L1TEMU' -process.dqmSaver.runNumber = options.runNumber -process.dqmSaverPB.tag = 'L1TEMU' -process.dqmSaverPB.runNumber = options.runNumber # # no references needed @@ -93,7 +88,7 @@ process.l1EmulatorMonitorClientPath = cms.Path(process.l1EmulatorMonitorClient) # -process.l1EmulatorMonitorEndPath = cms.EndPath(process.dqmEnv*process.dqmSaver*process.dqmSaverPB) +process.l1EmulatorMonitorEndPath = cms.EndPath(process.dqmEnv*process.dqmSaver) # process.valCscTriggerPrimitiveDigis.gangedME1a = cms.untracked.bool(False) @@ -197,7 +192,7 @@ process.muonDTDigis.inputLabel = cms.InputTag("rawDataCollector") process.muonRPCDigis.InputLabel = cms.InputTag("rawDataCollector") process.scalersRawToDigi.scalersInputTag = cms.InputTag("rawDataCollector") -process.siPixelDigis.InputLabel = cms.InputTag("rawDataCollector") +process.siPixelDigis.cpu.InputLabel = cms.InputTag("rawDataCollector") process.siStripDigis.ProductLabel = cms.InputTag("rawDataCollector") #-------------------------------------------------- @@ -219,7 +214,7 @@ process.muonDTDigis.inputLabel = cms.InputTag("rawDataRepacker") process.muonRPCDigis.InputLabel = cms.InputTag("rawDataRepacker") process.scalersRawToDigi.scalersInputTag = cms.InputTag("rawDataRepacker") - process.siPixelDigis.InputLabel = cms.InputTag("rawDataRepacker") + process.siPixelDigis.cpu.InputLabel = cms.InputTag("rawDataRepacker") process.siStripDigis.ProductLabel = cms.InputTag("rawDataRepacker") diff --git a/DQM/Integration/python/clients/l1tstage1_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/l1tstage1_dqm_sourceclient-live_cfg.py index b8ae0bcf233b4..ca0db40e25687 100644 --- a/DQM/Integration/python/clients/l1tstage1_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/l1tstage1_dqm_sourceclient-live_cfg.py @@ -16,11 +16,9 @@ # # for live online DQM in P5 process.load("DQM.Integration.config.inputsource_cfi") -from DQM.Integration.config.inputsource_cfi import options # # for testing in lxplus #process.load("DQM.Integration.config.fileinputsource_cfi") -#from DQM.Integration.config.fileinputsource_cfi import options #---------------------------- # DQM Environment @@ -29,9 +27,6 @@ process.load("DQM.Integration.config.environment_cfi") process.dqmEnv.subSystemFolder = 'L1TStage1' process.dqmSaver.tag = 'L1TStage1' -process.dqmSaver.runNumber = options.runNumber -process.dqmSaverPB.tag = 'L1TStage1' -process.dqmSaverPB.runNumber = options.runNumber # # references needed @@ -106,8 +101,7 @@ # process.dqmEndPath = cms.EndPath( process.dqmEnv * - process.dqmSaver * - process.dqmSaverPB + process.dqmSaver ) # @@ -192,7 +186,7 @@ process.muonDTDigis.inputLabel = cms.InputTag("rawDataCollector") process.muonRPCDigis.InputLabel = cms.InputTag("rawDataCollector") process.scalersRawToDigi.scalersInputTag = cms.InputTag("rawDataCollector") -process.siPixelDigis.InputLabel = cms.InputTag("rawDataCollector") +process.siPixelDigis.cpu.InputLabel = cms.InputTag("rawDataCollector") process.siStripDigis.ProductLabel = cms.InputTag("rawDataCollector") process.bxTiming.FedSource = cms.untracked.InputTag("rawDataCollector") process.l1s.fedRawData = cms.InputTag("rawDataCollector") @@ -211,7 +205,7 @@ process.muonDTDigis.inputLabel = cms.InputTag("rawDataRepacker") process.muonRPCDigis.InputLabel = cms.InputTag("rawDataRepacker") process.scalersRawToDigi.scalersInputTag = cms.InputTag("rawDataRepacker") - process.siPixelDigis.InputLabel = cms.InputTag("rawDataRepacker") + process.siPixelDigis.cpu.InputLabel = cms.InputTag("rawDataRepacker") process.siStripDigis.ProductLabel = cms.InputTag("rawDataRepacker") process.bxTiming.FedSource = cms.untracked.InputTag("rawDataRepacker") process.l1s.fedRawData = cms.InputTag("rawDataRepacker") diff --git a/DQM/Integration/python/clients/l1tstage1emulator_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/l1tstage1emulator_dqm_sourceclient-live_cfg.py index 6200618c0fe44..9809652f90f3e 100644 --- a/DQM/Integration/python/clients/l1tstage1emulator_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/l1tstage1emulator_dqm_sourceclient-live_cfg.py @@ -16,11 +16,9 @@ # # for live online DQM in P5 process.load("DQM.Integration.config.inputsource_cfi") -from DQM.Integration.config.inputsource_cfi import options # # for testing in lxplus #process.load("DQM.Integration.config.fileinputsource_cfi") -#from DQM.Integration.config.fileinputsource_cfi import options #---------------------------- # DQM Environment @@ -30,9 +28,6 @@ # for local test process.dqmEnv.subSystemFolder = 'L1TEMUStage1' process.dqmSaver.tag = 'L1TEMUStage1' -process.dqmSaver.runNumber = options.runNumber -process.dqmSaverPB.tag = 'L1TEMUStage1' -process.dqmSaverPB.runNumber = options.runNumber # # no references needed @@ -93,7 +88,7 @@ process.l1EmulatorMonitorClientPath = cms.Path(process.l1EmulatorMonitorClient) # -process.l1EmulatorMonitorEndPath = cms.EndPath(process.dqmEnv*process.dqmSaver*process.dqmSaverPB) +process.l1EmulatorMonitorEndPath = cms.EndPath(process.dqmEnv*process.dqmSaver) # @@ -206,7 +201,7 @@ process.muonDTDigis.inputLabel = cms.InputTag("rawDataCollector") process.muonRPCDigis.InputLabel = cms.InputTag("rawDataCollector") process.scalersRawToDigi.scalersInputTag = cms.InputTag("rawDataCollector") -process.siPixelDigis.InputLabel = cms.InputTag("rawDataCollector") +process.siPixelDigis.cpu.InputLabel = cms.InputTag("rawDataCollector") process.siStripDigis.ProductLabel = cms.InputTag("rawDataCollector") #-------------------------------------------------- @@ -228,7 +223,7 @@ process.muonDTDigis.inputLabel = cms.InputTag("rawDataRepacker") process.muonRPCDigis.InputLabel = cms.InputTag("rawDataRepacker") process.scalersRawToDigi.scalersInputTag = cms.InputTag("rawDataRepacker") - process.siPixelDigis.InputLabel = cms.InputTag("rawDataRepacker") + process.siPixelDigis.cpu.InputLabel = cms.InputTag("rawDataRepacker") process.siStripDigis.ProductLabel = cms.InputTag("rawDataRepacker") diff --git a/DQM/Integration/python/clients/l1tstage2_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/l1tstage2_dqm_sourceclient-live_cfg.py index bb57bd794d842..4e8abe5391b2b 100644 --- a/DQM/Integration/python/clients/l1tstage2_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/l1tstage2_dqm_sourceclient-live_cfg.py @@ -1,13 +1,21 @@ import FWCore.ParameterSet.Config as cms -from Configuration.StandardSequences.Eras import eras -process = cms.Process("L1TStage2DQM", eras.Run2_2018) +import sys +from Configuration.Eras.Era_Run2_2018_cff import Run2_2018 +process = cms.Process("L1TStage2DQM", Run2_2018) + +unitTest = False +if 'unitTest=True' in sys.argv: + unitTest=True #-------------------------------------------------- # Event Source and Condition -# Live Online DQM in P5 -process.load("DQM.Integration.config.inputsource_cfi") +if unitTest: + process.load("DQM.Integration.config.unittestinputsource_cfi") +else: + # Live Online DQM in P5 + process.load("DQM.Integration.config.inputsource_cfi") # # Testing in lxplus # process.load("DQM.Integration.config.fileinputsource_cfi") @@ -31,7 +39,6 @@ process.dqmEnv.subSystemFolder = "L1T" process.dqmSaver.tag = "L1T" -process.DQMStore.referenceFileName = "/dqmdata/dqm/reference/l1t_reference.root" process.dqmEndPath = cms.EndPath(process.dqmEnv * process.dqmSaver) @@ -102,7 +109,6 @@ # Cosmic run if (process.runType.getRunType() == process.runType.cosmic_run): - process.DQMStore.referenceFileName = "/dqmdata/dqm/reference/l1t_reference_cosmic.root" # Remove Quality Tests for L1T Muon Subsystems since they are not optimized yet for cosmics process.l1tStage2MonitorClient.remove(process.l1TStage2uGMTQualityTests) process.l1tStage2MonitorClient.remove(process.l1TStage2EMTFQualityTests) @@ -113,7 +119,6 @@ # Heavy-Ion run if (process.runType.getRunType() == process.runType.hi_run): - process.DQMStore.referenceFileName = "/dqmdata/dqm/reference/l1t_reference_hi.root" process.onlineMetaDataDigis.onlineMetaDataInputLabel = cms.InputTag("rawDataRepacker") process.onlineMetaDataRawToDigi.onlineMetaDataInputLabel = cms.InputTag("rawDataRepacker") process.castorDigis.InputLabel = cms.InputTag("rawDataRepacker") @@ -127,7 +132,7 @@ process.muonRPCDigis.InputLabel = cms.InputTag("rawDataRepacker") process.muonGEMDigis.InputLabel = cms.InputTag("rawDataRepacker") process.scalersRawToDigi.scalersInputTag = cms.InputTag("rawDataRepacker") - process.siPixelDigis.InputLabel = cms.InputTag("rawDataRepacker") + process.siPixelDigis.cpu.InputLabel = cms.InputTag("rawDataRepacker") process.siStripDigis.ProductLabel = cms.InputTag("rawDataRepacker") process.tcdsDigis.InputLabel = cms.InputTag("rawDataRepacker") process.tcdsRawToDigi.InputLabel = cms.InputTag("rawDataRepacker") @@ -139,7 +144,6 @@ process.gctDigis.inputLabel = cms.InputTag("rawDataRepacker") process.gtDigis.DaqGtInputTag = cms.InputTag("rawDataRepacker") process.twinMuxStage2Digis.DTTM7_FED_Source = cms.InputTag("rawDataRepacker") - process.RPCTwinMuxRawToDigi.inputTag = cms.InputTag("rawDataRepacker") process.bmtfDigis.InputLabel = cms.InputTag("rawDataRepacker") process.omtfStage2Digis.inputLabel = cms.InputTag("rawDataRepacker") process.emtfStage2Digis.InputLabel = cms.InputTag("rawDataRepacker") @@ -154,6 +158,8 @@ process.l1tStage2BmtfZeroSupp.rawData = cms.InputTag("rawDataRepacker") process.l1tStage2BmtfZeroSuppFatEvts.rawData = cms.InputTag("rawDataRepacker") process.selfFatEventFilter.rawInput = cms.InputTag("rawDataRepacker") + process.rpcTwinMuxRawToDigi.inputTag = cms.InputTag("rawDataRepacker") + process.rpcCPPFRawToDigi.inputTag = cms.InputTag("rawDataRepacker") #-------------------------------------------------- # L1T Online DQM Schedule diff --git a/DQM/Integration/python/clients/l1tstage2emulator_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/l1tstage2emulator_dqm_sourceclient-live_cfg.py index 735e5d534a9e5..8ae678f923620 100644 --- a/DQM/Integration/python/clients/l1tstage2emulator_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/l1tstage2emulator_dqm_sourceclient-live_cfg.py @@ -1,13 +1,21 @@ import FWCore.ParameterSet.Config as cms -from Configuration.StandardSequences.Eras import eras -process = cms.Process("L1TStage2EmulatorDQM", eras.Run2_2018) +import sys +from Configuration.Eras.Era_Run2_2018_cff import Run2_2018 +process = cms.Process("L1TStage2EmulatorDQM", Run2_2018) + +unitTest = False +if 'unitTest=True' in sys.argv: + unitTest=True #-------------------------------------------------- # Event Source and Condition -# Live Online DQM in P5 -process.load("DQM.Integration.config.inputsource_cfi") +if unitTest: + process.load("DQM.Integration.config.unittestinputsource_cfi") +else: + # Live Online DQM in P5 + process.load("DQM.Integration.config.inputsource_cfi") # Testing in lxplus #process.load("DQM.Integration.config.fileinputsource_cfi") @@ -27,7 +35,6 @@ process.dqmEnv.subSystemFolder = "L1TEMU" process.dqmSaver.tag = "L1TEMU" -process.DQMStore.referenceFileName = "/dqmdata/dqm/reference/l1temu_reference.root" process.dqmEndPath = cms.EndPath( process.dqmEnv * @@ -123,7 +130,7 @@ process.muonRPCDigis.InputLabel = cms.InputTag("rawDataRepacker") process.muonGEMDigis.InputLabel = cms.InputTag("rawDataRepacker") process.scalersRawToDigi.scalersInputTag = cms.InputTag("rawDataRepacker") - process.siPixelDigis.InputLabel = cms.InputTag("rawDataRepacker") + process.siPixelDigis.cpu.InputLabel = cms.InputTag("rawDataRepacker") process.siStripDigis.ProductLabel = cms.InputTag("rawDataRepacker") process.tcdsDigis.InputLabel = cms.InputTag("rawDataRepacker") process.tcdsRawToDigi.InputLabel = cms.InputTag("rawDataRepacker") @@ -135,8 +142,8 @@ process.gctDigis.inputLabel = cms.InputTag("rawDataRepacker") process.gtDigis.DaqGtInputTag = cms.InputTag("rawDataRepacker") process.twinMuxStage2Digis.DTTM7_FED_Source = cms.InputTag("rawDataRepacker") - process.RPCTwinMuxRawToDigi.inputTag = cms.InputTag("rawDataRepacker") process.bmtfDigis.InputLabel = cms.InputTag("rawDataRepacker") + process.valBmtfAlgoSel.feds = cms.InputTag("rawDataRepacker") process.omtfStage2Digis.inputLabel = cms.InputTag("rawDataRepacker") process.emtfStage2Digis.InputLabel = cms.InputTag("rawDataRepacker") process.gmtStage2Digis.InputLabel = cms.InputTag("rawDataRepacker") @@ -147,6 +154,9 @@ process.l1tdeStage2CaloLayer1.fedRawDataLabel = cms.InputTag("rawDataRepacker") process.gtStage2Digis.InputLabel = cms.InputTag("rawDataRepacker") process.selfFatEventFilter.rawInput = cms.InputTag("rawDataRepacker") + process.rpcTwinMuxRawToDigi.inputTag = cms.InputTag("rawDataRepacker") + process.rpcCPPFRawToDigi.inputTag = cms.InputTag("rawDataRepacker") + process.hltFatEventFilter.HLTPaths.append('HLT_HIPhysics_v*') #-------------------------------------------------- # L1T Emulator Online DQM Schedule diff --git a/DQM/Integration/python/clients/lumi_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/lumi_dqm_sourceclient-live_cfg.py index 47acfcdc471f2..e9fb481e5581b 100644 --- a/DQM/Integration/python/clients/lumi_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/lumi_dqm_sourceclient-live_cfg.py @@ -8,7 +8,6 @@ # Event Source #---------------------------- process.load("DQM.Integration.config.inputsource_cfi") -from DQM.Integration.config.inputsource_cfi import options #process.DQMEventStreamHttpReader.consumerName = 'DQM Luminosity Consumer' #process.DQMEventStreamHttpReader.SelectHLTOutput = cms.untracked.string('hltOutputALCALUMIPIXELS') @@ -18,9 +17,6 @@ process.load("DQM.Integration.config.environment_cfi") process.dqmEnv.subSystemFolder = "Info/Lumi" process.dqmSaver.tag = "Lumi" -process.dqmSaver.runNumber = options.runNumber -process.dqmSaverPB.tag = "Lumi" -process.dqmSaverPB.runNumber = options.runNumber #--------------------------------------------- # Global Tag @@ -49,7 +45,7 @@ process.load("Configuration.StandardSequences.EndOfProcess_cff") process.load("Configuration.EventContent.EventContent_cff") process.load("Configuration.StandardSequences.Reconstruction_cff") -process.siPixelDigis.InputLabel = cms.InputTag("hltFEDSelectorLumiPixels") +process.siPixelDigis.cpu.InputLabel = cms.InputTag("hltFEDSelectorLumiPixels") process.reconstruction_step = cms.Sequence( process.siPixelDigis + @@ -67,8 +63,7 @@ process.dqmmodules = cms.Sequence(process.dqmEnv + process.expressLumiProducer + process.dqmLumiMonitor - + process.dqmSaver - + process.dqmSaverPB) + + process.dqmSaver) #---------------------------- # Proton-Proton Running Stuff #---------------------------- diff --git a/DQM/Integration/python/clients/physics_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/physics_dqm_sourceclient-live_cfg.py index 76b6ae553949c..5638bc8ef940a 100644 --- a/DQM/Integration/python/clients/physics_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/physics_dqm_sourceclient-live_cfg.py @@ -11,11 +11,9 @@ # for live online DQM in P5 process.load("DQM.Integration.config.inputsource_cfi") -from DQM.Integration.config.inputsource_cfi import options # for testing in lxplus #process.load("DQM.Integration.config.fileinputsource_cfi") -#from DQM.Integration.config.fileinputsource_cfi import options #---------------------------- # DQM Environment @@ -24,9 +22,6 @@ process.load("DQM.Integration.config.environment_cfi") process.dqmEnv.subSystemFolder = 'Physics' process.dqmSaver.tag = 'Physics' -process.dqmSaver.runNumber = options.runNumber -process.dqmSaverPB.tag = 'Physics' -process.dqmSaverPB.runNumber = options.runNumber # 0=random, 1=physics, 2=calibration, 3=technical process.hltTriggerTypeFilter = cms.EDFilter("HLTTriggerTypeFilter", @@ -52,11 +47,10 @@ # process.dump * process.qcdLowPtDQM * process.dqmEnv * - process.dqmSaver * - process.dqmSaverPB + process.dqmSaver ) -process.siPixelDigis.InputLabel = cms.InputTag("rawDataCollector") +process.siPixelDigis.cpu.InputLabel = cms.InputTag("rawDataCollector") ### process customizations included here from DQM.Integration.config.online_customizations_cfi import * @@ -69,4 +63,4 @@ print("Running with run type = ", process.runType.getRunType()) if (process.runType.getRunType() == process.runType.hi_run): - process.siPixelDigis.InputLabel = cms.InputTag("rawDataRepacker") + process.siPixelDigis.cpu.InputLabel = cms.InputTag("rawDataRepacker") diff --git a/DQM/Integration/python/clients/pixel_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/pixel_dqm_sourceclient-live_cfg.py index 5c45b5f46ca9c..ae5461b739751 100644 --- a/DQM/Integration/python/clients/pixel_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/pixel_dqm_sourceclient-live_cfg.py @@ -2,8 +2,8 @@ import FWCore.ParameterSet.Config as cms import sys -from Configuration.Eras.Era_Run3_cff import Run3 -process = cms.Process("PIXELDQMLIVE", Run3) +from Configuration.Eras.Era_Run2_2018_pp_on_AA_cff import Run2_2018_pp_on_AA +process = cms.Process("PIXELDQMLIVE", Run2_2018_pp_on_AA) live=True unitTest = False @@ -34,16 +34,13 @@ if (unitTest): process.load("DQM.Integration.config.unittestinputsource_cfi") - from DQM.Integration.config.unittestinputsource_cfi import options elif (live): process.load("DQM.Integration.config.inputsource_cfi") - from DQM.Integration.config.inputsource_cfi import options # for testing in lxplus elif(offlineTesting): process.load("DQM.Integration.config.fileinputsource_cfi") - from DQM.Integration.config.fileinputsource_cfi import options #----------------------------- # DQM Environment @@ -59,16 +56,13 @@ process.dqmEnv.subSystemFolder = TAG process.dqmSaver.tag = TAG -process.dqmSaver.runNumber = options.runNumber -process.dqmSaverPB.tag = TAG -process.dqmSaverPB.runNumber = options.runNumber #----------------------------- # Magnetic Field #----------------------------- -process.load('Configuration.StandardSequences.MagneticField_cff') +process.load('Configuration.StandardSequences.MagneticField_AutoFromDBCurrent_cff') #------------------------------------------------- # GEOMETRY @@ -104,18 +98,19 @@ # PixelPhase1 Real data raw to digi process.load("EventFilter.SiPixelRawToDigi.SiPixelRawToDigi_cfi") -process.siPixelDigis.IncludeErrors = True +process.siPixelDigis.cpu.IncludeErrors = True if (process.runType.getRunType() == process.runType.hi_run): #-------------------------------- # Heavy Ion Configuration Changes #-------------------------------- - process.siPixelDigis.InputLabel = cms.InputTag("rawDataRepacker") + process.siPixelDigis.cpu.InputLabel = cms.InputTag("rawDataRepacker") process.siStripDigis.ProductLabel = cms.InputTag("rawDataRepacker") process.scalersRawToDigi.scalersInputTag = cms.InputTag("rawDataRepacker") else : - process.siPixelDigis.InputLabel = cms.InputTag("rawDataCollector") - process.siStripDigis.InputLabel = cms.InputTag("rawDataCollector") + process.siPixelDigis.cpu.InputLabel = cms.InputTag("rawDataCollector") + process.siStripDigis.InputLabel = cms.InputTag("rawDataCollector") + ## Collision Reconstruction process.load("Configuration.StandardSequences.RawToDigi_Data_cff") @@ -165,7 +160,7 @@ # Scheduling #-------------------------- -process.DQMmodules = cms.Sequence(process.dqmEnv* process.dqmSaver*process.dqmSaverPB) +process.DQMmodules = cms.Sequence(process.dqmEnv* process.dqmSaver) process.RecoForDQM_LocalReco = cms.Sequence(process.siPixelDigis*process.siStripDigis*process.gtDigis*process.trackerlocalreco) @@ -185,7 +180,6 @@ ##### TRIGGER SELECTION ##### process.hltHighLevel* process.scalersRawToDigi* - process.tcdsDigis* process.APVPhases* process.consecutiveHEs* process.hltTriggerTypeFilter* @@ -231,7 +225,6 @@ process.p = cms.Path( process.hltHighLevel #trigger selection *process.scalersRawToDigi - *process.tcdsDigis *process.APVPhases *process.consecutiveHEs *process.Reco diff --git a/DQM/Integration/python/clients/pixellumi_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/pixellumi_dqm_sourceclient-live_cfg.py index bd15690ede85a..2770834fe7cb7 100644 --- a/DQM/Integration/python/clients/pixellumi_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/pixellumi_dqm_sourceclient-live_cfg.py @@ -21,15 +21,12 @@ if unitTest: process.load("DQM.Integration.config.unittestinputsource_cfi") - from DQM.Integration.config.unittestinputsource_cfi import options else: # for live online DQM in P5 process.load("DQM.Integration.config.inputsource_cfi") - from DQM.Integration.config.inputsource_cfi import options # for testing in lxplus #process.load("DQM.Integration.config.fileinputsource_cfi") -#from DQM.Integration.config.fileinputsource_cfi import options ## #---------------------------- @@ -43,9 +40,6 @@ process.load("DQM.Integration.config.environment_cfi") process.dqmEnv.subSystemFolder = "PixelLumi" process.dqmSaver.tag = "PixelLumi" -process.dqmSaver.runNumber = options.runNumber -process.dqmSaverPB.tag = "PixelLumi" -process.dqmSaverPB.runNumber = options.runNumber if not unitTest: process.source.SelectEvents = cms.untracked.vstring("HLT_ZeroBias*","HLT_L1AlwaysTrue*", "HLT_PAZeroBias*", "HLT_PAL1AlwaysTrue*") @@ -79,7 +73,7 @@ #----------------------- # Real data raw to digi process.load("EventFilter.SiPixelRawToDigi.SiPixelRawToDigi_cfi") -process.siPixelDigis.IncludeErrors = True +process.siPixelDigis.cpu.IncludeErrors = True # Local Reconstruction process.load("RecoLocalTracker.SiPixelClusterizer.SiPixelClusterizer_cfi") @@ -92,13 +86,13 @@ # SelectEvents = cms.vstring('HLT_600Tower*','HLT_L1*','HLT_Jet*','HLT_*Cosmic*','HLT_HT*','HLT_MinBias_*','HLT_Physics*', 'HLT_ZeroBias*','HLT_HcalNZS*')) -process.siPixelDigis.InputLabel = cms.InputTag("rawDataCollector") +process.siPixelDigis.cpu.InputLabel = cms.InputTag("rawDataCollector") #-------------------------------- # Heavy Ion Configuration Changes #-------------------------------- if (process.runType.getRunType() == process.runType.hi_run): process.load('Configuration.StandardSequences.RawToDigi_Repacked_cff') - process.siPixelDigis.InputLabel = cms.InputTag("rawDataRepacker") + process.siPixelDigis.cpu.InputLabel = cms.InputTag("rawDataRepacker") if not unitTest: process.source.SelectEvents = cms.untracked.vstring('HLT_HIL1MinimumBiasHF2AND*') @@ -134,8 +128,7 @@ process.Reco = cms.Sequence(process.siPixelDigis*process.siPixelClusters) process.DQMmodules = cms.Sequence(process.dqmEnv* process.pixel_lumi_dqm* - process.dqmSaver* - process.dqmSaverPB) + process.dqmSaver) process.p = cms.Path(process.Reco*process.DQMmodules) diff --git a/DQM/Integration/python/clients/scal_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/scal_dqm_sourceclient-live_cfg.py index 893443f74f1a2..103dd87c720fb 100644 --- a/DQM/Integration/python/clients/scal_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/scal_dqm_sourceclient-live_cfg.py @@ -13,15 +13,12 @@ if unitTest: process.load("DQM.Integration.config.unittestinputsource_cfi") - from DQM.Integration.config.unittestinputsource_cfi import options else: # for live online DQM in P5 process.load("DQM.Integration.config.inputsource_cfi") - from DQM.Integration.config.inputsource_cfi import options # for testing in lxplus #process.load("DQM.Integration.config.fileinputsource_cfi") -#from DQM.Integration.config.fileinputsource_cfi import options #---------------------------- #### DQM Environment @@ -29,9 +26,6 @@ process.load("DQM.Integration.config.environment_cfi") process.dqmEnv.subSystemFolder = 'Scal' process.dqmSaver.tag = 'Scal' -process.dqmSaver.runNumber = options.runNumber -process.dqmSaverPB.tag = 'Scal' -process.dqmSaverPB.runNumber = options.runNumber #----------------------------- process.load("DQMServices.Components.DQMScalInfo_cfi") @@ -74,7 +68,7 @@ process.dump = cms.EDAnalyzer('EventContentAnalyzer') # DQM Modules -process.dqmmodules = cms.Sequence(process.dqmEnv + process.dqmSaver + process.dqmSaverPB) +process.dqmmodules = cms.Sequence(process.dqmEnv + process.dqmSaver) process.evfDQMmodulesPath = cms.Path( process.l1GtUnpack* process.gtDigis* @@ -100,7 +94,7 @@ process.muonDTDigis.inputLabel = cms.InputTag("rawDataRepacker") process.muonRPCDigis.InputLabel = cms.InputTag("rawDataRepacker") process.scalersRawToDigi.scalersInputTag = cms.InputTag("rawDataRepacker") - process.siPixelDigis.InputLabel = cms.InputTag("rawDataRepacker") + process.siPixelDigis.cpu.InputLabel = cms.InputTag("rawDataRepacker") process.siStripDigis.ProductLabel = cms.InputTag("rawDataRepacker") diff --git a/DQM/Integration/python/clients/sistrip_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/sistrip_dqm_sourceclient-live_cfg.py index 37e219bd456c2..13f944bce5820 100644 --- a/DQM/Integration/python/clients/sistrip_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/sistrip_dqm_sourceclient-live_cfg.py @@ -2,8 +2,8 @@ import FWCore.ParameterSet.Config as cms import sys -from Configuration.Eras.Era_Run3_cff import Run3 -process = cms.Process("SiStrpDQMLive", Run3) +from Configuration.Eras.Era_Run2_2018_pp_on_AA_cff import Run2_2018_pp_on_AA +process = cms.Process("SiStrpDQMLive", Run2_2018_pp_on_AA) process.MessageLogger = cms.Service("MessageLogger", debugModules = cms.untracked.vstring('siStripDigis', @@ -32,14 +32,11 @@ # for live online DQM in P5 if (unitTest): process.load("DQM.Integration.config.unittestinputsource_cfi") - from DQM.Integration.config.unittestinputsource_cfi import options elif (live): process.load("DQM.Integration.config.inputsource_cfi") - from DQM.Integration.config.inputsource_cfi import options # for testing in lxplus elif(offlineTesting): process.load("DQM.Integration.config.fileinputsource_cfi") - from DQM.Integration.config.fileinputsource_cfi import options #---------------------------- # DQM Live Environment @@ -55,9 +52,6 @@ process.dqmEnv.subSystemFolder = "SiStrip" process.dqmSaver.tag = "SiStrip" process.dqmSaver.backupLumiCount = 30 -process.dqmSaver.runNumber = options.runNumber -process.dqmSaverPB.tag = "SiStrip" -process.dqmSaverPB.runNumber = options.runNumber from DQMServices.Core.DQMEDAnalyzer import DQMEDAnalyzer process.dqmEnvTr = DQMEDAnalyzer('DQMEventInfo', @@ -70,7 +64,7 @@ #----------------------------- # Magnetic Field #----------------------------- -process.load('Configuration.StandardSequences.MagneticField_cff') +process.load('Configuration.StandardSequences.MagneticField_AutoFromDBCurrent_cff') #------------------------------------------------- # GEOMETRY @@ -214,7 +208,7 @@ # Scheduling #-------------------------- process.SiStripSources_LocalReco = cms.Sequence(process.siStripFEDMonitor*process.SiStripMonitorDigi*process.SiStripMonitorClusterReal) -process.DQMCommon = cms.Sequence(process.stripQTester*process.trackingQTester*process.dqmEnv*process.dqmEnvTr*process.dqmSaver*process.dqmSaverPB) +process.DQMCommon = cms.Sequence(process.stripQTester*process.trackingQTester*process.dqmEnv*process.dqmEnvTr*process.dqmSaver) if (process.runType.getRunType() == process.runType.hi_run): process.RecoForDQM_LocalReco = cms.Sequence(process.siPixelDigis*process.siStripDigis*process.trackerlocalreco) else : @@ -272,8 +266,6 @@ process.trackingQTester.qtestOnEndRun = cms.untracked.bool(True) process.p = cms.Path(process.scalersRawToDigi* - process.tcdsDigis* - process.onlineMetaDataDigis* process.APVPhases* process.consecutiveHEs* process.hltTriggerTypeFilter* @@ -377,8 +369,6 @@ process.p = cms.Path( process.scalersRawToDigi* - process.tcdsDigis* - process.onlineMetaDataDigis* process.APVPhases* process.consecutiveHEs* process.hltTriggerTypeFilter* @@ -473,8 +463,6 @@ process.RecoForDQM_TrkReco = cms.Sequence(process.offlineBeamSpot*process.MeasurementTrackerEvent*process.siPixelClusterShapeCache*process.recopixelvertexing*process.iterTracking_FirstStep) process.p = cms.Path(process.scalersRawToDigi* - process.tcdsDigis* - process.onlineMetaDataDigis* process.APVPhases* process.consecutiveHEs* process.hltTriggerTypeFilter* @@ -502,7 +490,7 @@ process.muonDTDigis.inputLabel = cms.InputTag("rawDataCollector") process.muonRPCDigis.InputLabel = cms.InputTag("rawDataCollector") process.scalersRawToDigi.scalersInputTag = cms.InputTag("rawDataCollector") -process.siPixelDigis.InputLabel = cms.InputTag("rawDataCollector") +process.siPixelDigis.cpu.InputLabel = cms.InputTag("rawDataCollector") process.siStripDigis.ProductLabel = cms.InputTag("rawDataCollector") process.siStripFEDMonitor.RawDataTag = cms.untracked.InputTag("rawDataCollector") #-------------------------------------------------- @@ -523,7 +511,7 @@ process.muonDTDigis.inputLabel = cms.InputTag("rawDataRepacker") process.muonRPCDigis.InputLabel = cms.InputTag("rawDataRepacker") process.scalersRawToDigi.scalersInputTag = cms.InputTag("rawDataRepacker") - process.siPixelDigis.InputLabel = cms.InputTag("rawDataRepacker") + process.siPixelDigis.cpu.InputLabel = cms.InputTag("rawDataRepacker") process.siStripDigis.ProductLabel = cms.InputTag("rawDataRepacker") process.siStripFEDMonitor.RawDataTag = cms.untracked.InputTag("rawDataRepacker") @@ -629,8 +617,6 @@ process.p = cms.Path( process.scalersRawToDigi* - process.tcdsDigis* - process.onlineMetaDataDigis* process.APVPhases* process.consecutiveHEs* process.hltTriggerTypeFilter* From 6ad3fecb75be7bcd973194560085ba788e8f0601 Mon Sep 17 00:00:00 2001 From: Vincenzo Innocente Date: Sat, 4 Jul 2020 08:23:13 +0200 Subject: [PATCH 115/149] Update Pixel gain calibration scheme (for Run3) (cms-patatrack#492) Update the Patatrack code following #29333: Modify the scheme of the pixel gain calibration: instead of applying the VCal calibration in the clusterizer include it already in the gain calibration payload. --- .../SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc | 6 +++++- .../plugins/SiPixelRawToClusterGPUKernel.cu | 6 ++++-- .../plugins/SiPixelRawToClusterGPUKernel.h | 3 ++- .../SiPixelClusterizer/plugins/gpuCalibPixel.h | 8 +++++--- .../python/siPixelClustersPreSplitting_cff.py | 5 +++++ 5 files changed, 21 insertions(+), 7 deletions(-) diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc index 95aac36dbd197..993840c62c7f1 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc @@ -71,6 +71,7 @@ class SiPixelRawToClusterCUDA : public edm::stream::EDProducer wordFedAppender_; PixelDataFormatter::Errors errors_; + const bool isRun2_; const bool includeErrors_; const bool useQuality_; const bool usePilotBlade_; @@ -84,6 +85,7 @@ SiPixelRawToClusterCUDA::SiPixelRawToClusterCUDA(const edm::ParameterSet& iConfi gainsToken_(esConsumes()), cablingMapToken_(esConsumes( edm::ESInputTag("", iConfig.getParameter("CablingMapLabel")))), + isRun2_(iConfig.getParameter("isRun2")), includeErrors_(iConfig.getParameter("IncludeErrors")), useQuality_(iConfig.getParameter("UseQualityInfo")), usePilotBlade_(iConfig.getParameter("UsePilotBlade")) // Control the usage of pilot-blade data, FED=40 @@ -108,6 +110,7 @@ SiPixelRawToClusterCUDA::SiPixelRawToClusterCUDA(const edm::ParameterSet& iConfi void SiPixelRawToClusterCUDA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { edm::ParameterSetDescription desc; + desc.add("isRun2", true); desc.add("IncludeErrors", true); desc.add("UseQualityInfo", false); desc.add("UsePilotBlade", false)->setComment("## Use pilot blades"); @@ -233,7 +236,8 @@ void SiPixelRawToClusterCUDA::acquire(const edm::Event& iEvent, } // end of for loop - gpuAlgo_.makeClustersAsync(gpuMap, + gpuAlgo_.makeClustersAsync(isRun2_, + gpuMap, gpuModulesToUnpack, gpuGains, *wordFedAppender_, diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu index cf2b10b198692..f14808dda1e2b 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu @@ -523,7 +523,8 @@ namespace pixelgpudetails { } // Interface to outside - void SiPixelRawToClusterGPUKernel::makeClustersAsync(const SiPixelFedCablingMapGPU *cablingMap, + void SiPixelRawToClusterGPUKernel::makeClustersAsync(bool isRun2, + const SiPixelFedCablingMapGPU *cablingMap, const unsigned char *modToUnp, const SiPixelGainForHLTonGPU *gains, const WordFedAppender &wordFed, @@ -599,7 +600,8 @@ namespace pixelgpudetails { int blocks = (std::max(int(wordCounter), int(gpuClustering::MaxNumModules)) + threadsPerBlock - 1) / threadsPerBlock; - gpuCalibPixel::calibDigis<<>>(digis_d.moduleInd(), + gpuCalibPixel::calibDigis<<>>(isRun2, + digis_d.moduleInd(), digis_d.c_xx(), digis_d.c_yy(), digis_d.adc(), diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h index ee9729f75aed2..d214e7784af48 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h @@ -168,7 +168,8 @@ namespace pixelgpudetails { SiPixelRawToClusterGPUKernel& operator=(const SiPixelRawToClusterGPUKernel&) = delete; SiPixelRawToClusterGPUKernel& operator=(SiPixelRawToClusterGPUKernel&&) = delete; - void makeClustersAsync(const SiPixelFedCablingMapGPU* cablingMap, + void makeClustersAsync(bool isRun2, + const SiPixelFedCablingMapGPU* cablingMap, const unsigned char* modToUnp, const SiPixelGainForHLTonGPU* gains, const WordFedAppender& wordFed, diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h index 41e028b3c4595..50c62f44f1df8 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h @@ -13,12 +13,14 @@ namespace gpuCalibPixel { constexpr uint16_t InvId = 9999; // must be > MaxNumModules + // valid for run2 constexpr float VCaltoElectronGain = 47; // L2-4: 47 +- 4.7 constexpr float VCaltoElectronGain_L1 = 50; // L1: 49.6 +- 2.6 constexpr float VCaltoElectronOffset = -60; // L2-4: -60 +- 130 constexpr float VCaltoElectronOffset_L1 = -670; // L1: -670 +- 220 - __global__ void calibDigis(uint16_t* id, + __global__ void calibDigis(bool isRun2, + uint16_t* id, uint16_t const* __restrict__ x, uint16_t const* __restrict__ y, uint16_t* adc, @@ -41,8 +43,8 @@ namespace gpuCalibPixel { if (InvId == id[i]) continue; - float conversionFactor = id[i] < 96 ? VCaltoElectronGain_L1 : VCaltoElectronGain; - float offset = id[i] < 96 ? VCaltoElectronOffset_L1 : VCaltoElectronOffset; + float conversionFactor = (isRun2) ? (id[i] < 96 ? VCaltoElectronGain_L1 : VCaltoElectronGain) : 1.f; + float offset = (isRun2) ? (id[i] < 96 ? VCaltoElectronOffset_L1 : VCaltoElectronOffset) : 0; bool isDeadColumn = false, isNoisyColumn = false; diff --git a/RecoLocalTracker/SiPixelClusterizer/python/siPixelClustersPreSplitting_cff.py b/RecoLocalTracker/SiPixelClusterizer/python/siPixelClustersPreSplitting_cff.py index c80f3b16b3a43..3f8cf314ec2e2 100644 --- a/RecoLocalTracker/SiPixelClusterizer/python/siPixelClustersPreSplitting_cff.py +++ b/RecoLocalTracker/SiPixelClusterizer/python/siPixelClustersPreSplitting_cff.py @@ -9,6 +9,11 @@ siPixelClustersPreSplittingTask = cms.Task(siPixelClustersPreSplitting) siPixelClustersCUDAPreSplitting = _siPixelRawToClusterCUDA.clone() +from Configuration.Eras.Modifier_run3_common_cff import run3_common +run3_common.toModify(siPixelClustersCUDAPreSplitting, + isRun2=False +) + siPixelDigisClustersPreSplitting = _siPixelDigisClustersFromSoA.clone() siPixelClustersPreSplittingTaskCUDA = cms.Task( siPixelClustersCUDAPreSplitting, From e4fc4a8ab0a4df263a698bef37088f7351a17eb9 Mon Sep 17 00:00:00 2001 From: Vincenzo Innocente Date: Wed, 8 Jul 2020 08:59:31 +0200 Subject: [PATCH 116/149] Add truncation to pixel charge on GPU (cms-patatrack#501) Makes the GPU implementation of CPE and CPEFast consistent with the CPEGeneric one. --- .../SiPixelRecHits/interface/pixelCPEforGPU.h | 1 + RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h | 3 ++- RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc | 8 ++++++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h b/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h index 40c335547ba78..c9d2df58dfeb0 100644 --- a/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h +++ b/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h @@ -36,6 +36,7 @@ namespace pixelCPEforGPU { float shiftY; float chargeWidthX; float chargeWidthY; + uint16_t pixmx; // max pix charge float x0, y0, z0; // the vertex in the local coord of the detector diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h b/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h index feeff98849af2..fd1cd8c0cdd91 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h +++ b/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h @@ -134,6 +134,7 @@ namespace gpuPixelRecHits { __syncthreads(); + auto pixmx = cpeParams->detParams(me).pixmx; for (int i = first; i < numElements; i += blockDim.x) { auto id = digis.moduleInd(i); if (id == InvId) @@ -148,7 +149,7 @@ namespace gpuPixelRecHits { assert(cl < MaxHitsInIter); auto x = digis.xx(i); auto y = digis.yy(i); - auto ch = digis.adc(i); + auto ch = std::min(digis.adc(i), pixmx); atomicAdd(&clusParams.charge[cl], ch); if (clusParams.minRow[cl] == x) atomicAdd(&clusParams.Q_f_X[cl], ch); diff --git a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc index 1480d6a81ec0e..5a2b8f41bb988 100644 --- a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc +++ b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc @@ -112,6 +112,8 @@ void PixelCPEFast::fillParamsForGpu() { m_commonParamsGPU.thePitchX = m_DetParams[0].thePitchX; m_commonParamsGPU.thePitchY = m_DetParams[0].thePitchY; + // std::cout << "pitch & thickness " << m_commonParamsGPU.thePitchX << ' ' << m_commonParamsGPU.thePitchY << " " << m_commonParamsGPU.theThicknessB << ' ' << m_commonParamsGPU.theThicknessE << std::endl; + // zero average geometry memset(&m_averageGeometry, 0, sizeof(pixelCPEforGPU::AverageGeometry)); @@ -210,6 +212,7 @@ void PixelCPEFast::fillParamsForGpu() { #endif errorFromTemplates(p, cp, 20000.f); + g.pixmx = std::max(0, cp.pixmx); g.sx[0] = cp.sigmax; g.sx[1] = cp.sx1; g.sx[2] = cp.sx2; @@ -407,6 +410,9 @@ LocalPoint PixelCPEFast::localPosition(DetParam const& theDetParam, ClusterParam auto xPos = cp.xpos[0]; auto yPos = cp.ypos[0]; + // std::cout<<" in PixelCPEFast:localPosition - pos = "< Date: Sun, 12 Jul 2020 23:24:43 +0200 Subject: [PATCH 117/149] Synchronise with CMSSW_11_2_0_pre2 --- CalibTracker/SiPixelESProducers/BuildFile.xml | 1 - .../clients/beam_dqm_sourceclient-live_cfg.py | 44 +++++++++++++++++++ .../beampixel_dqm_sourceclient-live_cfg.py | 9 ++-- 3 files changed, 48 insertions(+), 6 deletions(-) diff --git a/CalibTracker/SiPixelESProducers/BuildFile.xml b/CalibTracker/SiPixelESProducers/BuildFile.xml index fc506b154c5f2..4a43231fa11cb 100644 --- a/CalibTracker/SiPixelESProducers/BuildFile.xml +++ b/CalibTracker/SiPixelESProducers/BuildFile.xml @@ -8,7 +8,6 @@ - diff --git a/DQM/Integration/python/clients/beam_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/beam_dqm_sourceclient-live_cfg.py index d0e5ff6c7d5ce..c19cd7d0ba69c 100644 --- a/DQM/Integration/python/clients/beam_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/beam_dqm_sourceclient-live_cfg.py @@ -1,6 +1,10 @@ from __future__ import print_function import FWCore.ParameterSet.Config as cms +# Define here the BeamSpotOnline record name, +# it will be used both in BeamMonitor setup and in payload creation/upload +BSOnlineRecordName = 'BeamSpotOnlineLegacyObjectsRcd' + #from Configuration.Eras.Era_Run2_2018_cff import Run2_2018 #process = cms.Process("BeamMonitor", Run2_2018) FIXME import sys @@ -24,14 +28,23 @@ live=False unitTest=True +# Switch to veto the upload of the BeamSpot conditions to the DB +# when False it performs the upload +noDB = True +if 'noDB=False' in sys.argv: + noDB=False + #--------------- # Input sources if unitTest: process.load("DQM.Integration.config.unittestinputsource_cfi") + from DQM.Integration.config.unittestinputsource_cfi import options elif live: process.load("DQM.Integration.config.inputsource_cfi") + from DQM.Integration.config.inputsource_cfi import options else: process.load("DQM.Integration.config.fileinputsource_cfi") + from DQM.Integration.config.fileinputsource_cfi import options #-------------------------- # HLT Filter @@ -281,6 +294,7 @@ process.load("RecoVertex.BeamSpotProducer.BeamSpot_cfi") process.dqmBeamMonitor.OnlineMode = True +process.dqmBeamMonitor.recordName = BSOnlineRecordName process.dqmBeamMonitor.resetEveryNLumi = 5 # was 10 for HI process.dqmBeamMonitor.resetPVEveryNLumi = 5 # was 10 for HI @@ -333,6 +347,36 @@ process.dqmBeamMonitor.hltResults = cms.InputTag("TriggerResults","","HLT") +#--------- +# Upload BeamSpotOnlineObject (LegacyRcd) to CondDB +process.OnlineDBOutputService = cms.Service("OnlineDBOutputService", + + DBParameters = cms.PSet( + messageLevel = cms.untracked.int32(0), + authenticationPath = cms.untracked.string('') + ), + + # Upload to CondDB + connect = cms.string('oracle://cms_orcoff_prep/CMS_CONDITIONS'), + preLoadConnectionString = cms.untracked.string('frontier://FrontierPrep/CMS_CONDITIONS'), + + runNumber = cms.untracked.uint64(options.runNumber), + lastLumiFile = cms.untracked.string(''), + writeTransactionDelay = cms.untracked.uint32(options.transDelay), + autoCommit = cms.untracked.bool(True), + toPut = cms.VPSet(cms.PSet( + record = cms.string(BSOnlineRecordName), + tag = cms.string('BSOnlineLegacy_tag'), + timetype = cms.untracked.string('Lumi'), + onlyAppendUpdatePolicy = cms.untracked.bool(True) + )) +) + +# If not live or noDB: produce a (local) SQLITE file +if not live or noDB: + process.OnlineDBOutputService.connect = cms.string('sqlite_file:BeamSpotOnlineLegacy.db') + process.OnlineDBOutputService.preLoadConnectionString = cms.untracked.string('sqlite_file:BeamSpotOnlineLegacy.db') + #--------- # Final path if (not process.runType.getRunType() == process.runType.hi_run): diff --git a/DQM/Integration/python/clients/beampixel_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/beampixel_dqm_sourceclient-live_cfg.py index 3fdc7f2d6d2ac..3b6584f309d2a 100644 --- a/DQM/Integration/python/clients/beampixel_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/beampixel_dqm_sourceclient-live_cfg.py @@ -5,19 +5,18 @@ from Configuration.Eras.Era_Run2_2018_cff import Run2_2018 process = cms.Process("BeamPixel", Run2_2018) -unitTest=False +unitTest = False if 'unitTest=True' in sys.argv: - unitTest=True + unitTest = True + #---------------------------- # Common for PP and HI running #---------------------------- - -if unitTest: +if unitTest == True: process.load("DQM.Integration.config.unittestinputsource_cfi") else: process.load("DQM.Integration.config.inputsource_cfi") - # Use this to run locally (for testing purposes) #process.load("DQM.Integration.config.fileinputsource_cfi") From 52ea6f7f4db4c5e6e7468c1627b55db93f6531e1 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Sun, 19 Jul 2020 10:49:10 +0200 Subject: [PATCH 118/149] Remove "cuda_cxx17.h" (cms-patatrack#519) Remove the C++ 17 compatibility functions now that CUDA 11.0 and later support C++ 17 natively. --- RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h | 1 - 1 file changed, 1 deletion(-) diff --git a/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h b/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h index c9d2df58dfeb0..681211b82e1af 100644 --- a/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h +++ b/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h @@ -10,7 +10,6 @@ #include "DataFormats/GeometrySurface/interface/SOARotation.h" #include "Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h" #include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cuda_cxx17.h" namespace pixelCPEforGPU { From a2151b76fb13ba6740096c0034cd76544258c578 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Sat, 8 Aug 2020 17:38:10 +0200 Subject: [PATCH 119/149] Synchronise with CMSSW_11_2_0_pre3 --- .../clients/pixel_dqm_sourceclient-live_cfg.py | 4 ++-- .../clients/sistrip_dqm_sourceclient-live_cfg.py | 12 ++++++++++-- RecoLocalTracker/SiPixelRecHits/BuildFile.xml | 2 -- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/DQM/Integration/python/clients/pixel_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/pixel_dqm_sourceclient-live_cfg.py index ae5461b739751..5a24247036339 100644 --- a/DQM/Integration/python/clients/pixel_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/pixel_dqm_sourceclient-live_cfg.py @@ -2,8 +2,8 @@ import FWCore.ParameterSet.Config as cms import sys -from Configuration.Eras.Era_Run2_2018_pp_on_AA_cff import Run2_2018_pp_on_AA -process = cms.Process("PIXELDQMLIVE", Run2_2018_pp_on_AA) +from Configuration.Eras.Era_Run3_cff import Run3 +process = cms.Process("PIXELDQMLIVE", Run3) live=True unitTest = False diff --git a/DQM/Integration/python/clients/sistrip_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/sistrip_dqm_sourceclient-live_cfg.py index 13f944bce5820..f8da312461d40 100644 --- a/DQM/Integration/python/clients/sistrip_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/sistrip_dqm_sourceclient-live_cfg.py @@ -2,8 +2,8 @@ import FWCore.ParameterSet.Config as cms import sys -from Configuration.Eras.Era_Run2_2018_pp_on_AA_cff import Run2_2018_pp_on_AA -process = cms.Process("SiStrpDQMLive", Run2_2018_pp_on_AA) +from Configuration.Eras.Era_Run3_cff import Run3 +process = cms.Process("SiStrpDQMLive", Run3) process.MessageLogger = cms.Service("MessageLogger", debugModules = cms.untracked.vstring('siStripDigis', @@ -266,6 +266,8 @@ process.trackingQTester.qtestOnEndRun = cms.untracked.bool(True) process.p = cms.Path(process.scalersRawToDigi* + process.tcdsDigis* + process.onlineMetaDataDigis* process.APVPhases* process.consecutiveHEs* process.hltTriggerTypeFilter* @@ -369,6 +371,8 @@ process.p = cms.Path( process.scalersRawToDigi* + process.tcdsDigis* + process.onlineMetaDataDigis* process.APVPhases* process.consecutiveHEs* process.hltTriggerTypeFilter* @@ -463,6 +467,8 @@ process.RecoForDQM_TrkReco = cms.Sequence(process.offlineBeamSpot*process.MeasurementTrackerEvent*process.siPixelClusterShapeCache*process.recopixelvertexing*process.iterTracking_FirstStep) process.p = cms.Path(process.scalersRawToDigi* + process.tcdsDigis* + process.onlineMetaDataDigis* process.APVPhases* process.consecutiveHEs* process.hltTriggerTypeFilter* @@ -617,6 +623,8 @@ process.p = cms.Path( process.scalersRawToDigi* + process.tcdsDigis* + process.onlineMetaDataDigis* process.APVPhases* process.consecutiveHEs* process.hltTriggerTypeFilter* diff --git a/RecoLocalTracker/SiPixelRecHits/BuildFile.xml b/RecoLocalTracker/SiPixelRecHits/BuildFile.xml index bfa92dc57a48b..e22b18b17117a 100644 --- a/RecoLocalTracker/SiPixelRecHits/BuildFile.xml +++ b/RecoLocalTracker/SiPixelRecHits/BuildFile.xml @@ -1,9 +1,7 @@ - - From 236150b92b69c301f0911e51248eeeb4434b7afb Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Sat, 8 Aug 2020 19:08:24 +0200 Subject: [PATCH 120/149] Remove use of boost::mpl::vector for dependent records (cms-patatrack#527) Update dependent records declarations to use edm::mpl::Vector instead of boost::mpl::vector, following #30874. --- .../interface/SiPixelGainCalibrationForHLTGPURcd.h | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/CalibTracker/Records/interface/SiPixelGainCalibrationForHLTGPURcd.h b/CalibTracker/Records/interface/SiPixelGainCalibrationForHLTGPURcd.h index 56301421f325c..ad8f0a4032588 100644 --- a/CalibTracker/Records/interface/SiPixelGainCalibrationForHLTGPURcd.h +++ b/CalibTracker/Records/interface/SiPixelGainCalibrationForHLTGPURcd.h @@ -1,17 +1,14 @@ #ifndef CalibTracker_Records_SiPixelGainCalibrationForHLTGPURcd_h #define CalibTracker_Records_SiPixelGainCalibrationForHLTGPURcd_h -#include "FWCore/Framework/interface/EventSetupRecordImplementation.h" -#include "FWCore/Framework/interface/DependentRecordImplementation.h" - #include "CondFormats/DataRecord/interface/SiPixelGainCalibrationForHLTRcd.h" +#include "FWCore/Framework/interface/DependentRecordImplementation.h" +#include "FWCore/Framework/interface/EventSetupRecordImplementation.h" #include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" -#include "boost/mpl/vector.hpp" - class SiPixelGainCalibrationForHLTGPURcd : public edm::eventsetup::DependentRecordImplementation< SiPixelGainCalibrationForHLTGPURcd, - boost::mpl::vector > {}; + edm::mpl::Vector> {}; -#endif +#endif // CalibTracker_Records_SiPixelGainCalibrationForHLTGPURcd_h From 07f8623c9d68371d79aeb46775470ad6b485bb12 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Tue, 1 Sep 2020 12:34:03 +0200 Subject: [PATCH 121/149] Remove misleading calls to std::move (cms-patatrack#546) --- CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc | 2 +- .../SiPixelRawToDigi/plugins/SiPixelDigiErrorsSoAFromCUDA.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc b/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc index ef229be4b9910..075d408a6f6fc 100644 --- a/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc +++ b/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc @@ -38,5 +38,5 @@ SiPixelDigiErrorsCUDA::HostDataError SiPixelDigiErrorsCUDA::dataErrorToHostAsync } auto err = *error_h; err.set_data(data.get()); - return HostDataError(std::move(err), std::move(data)); + return HostDataError(err, std::move(data)); } diff --git a/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsSoAFromCUDA.cc b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsSoAFromCUDA.cc index 8817606043a60..c5b568750ad7d 100644 --- a/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsSoAFromCUDA.cc +++ b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsSoAFromCUDA.cc @@ -52,7 +52,7 @@ void SiPixelDigiErrorsSoAFromCUDA::acquire(const edm::Event& iEvent, const auto& gpuDigiErrors = ctx.get(iEvent, digiErrorGetToken_); auto tmp = gpuDigiErrors.dataErrorToHostAsync(ctx.stream()); - error_ = std::move(tmp.first); + error_ = tmp.first; data_ = std::move(tmp.second); formatterErrors_ = &(gpuDigiErrors.formatterErrors()); } From 741e56c900e7d5ef8115de4d5ffa53c51ed8f347 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Fri, 2 Oct 2020 14:38:40 +0200 Subject: [PATCH 122/149] Synchronise with CMSSW_11_2_0_pre7 --- .../clients/beam_dqm_sourceclient-live_cfg.py | 5 ++++- .../beampixel_dqm_sourceclient-live_cfg.py | 9 +++++++-- .../clients/csc_dqm_sourceclient-live_cfg.py | 10 ++++++++-- .../clients/fed_dqm_sourceclient-live_cfg.py | 6 ++++++ .../clients/l1t_dqm_sourceclient-live_cfg.py | 8 +++++++- .../l1temulator_dqm_sourceclient-live_cfg.py | 7 ++++++- .../l1tstage1_dqm_sourceclient-live_cfg.py | 8 +++++++- ...l1tstage1emulator_dqm_sourceclient-live_cfg.py | 7 ++++++- .../l1tstage2_dqm_sourceclient-live_cfg.py | 8 +++++++- ...l1tstage2emulator_dqm_sourceclient-live_cfg.py | 9 ++++++++- .../clients/lumi_dqm_sourceclient-live_cfg.py | 7 ++++++- .../clients/physics_dqm_sourceclient-live_cfg.py | 8 +++++++- .../clients/pixel_dqm_sourceclient-live_cfg.py | 8 +++++++- .../pixellumi_dqm_sourceclient-live_cfg.py | 9 ++++++++- .../clients/scal_dqm_sourceclient-live_cfg.py | 8 +++++++- .../clients/sistrip_dqm_sourceclient-live_cfg.py | 8 +++++++- .../SiPixelRecHits/interface/PixelCPEBase.h | 13 +++++++++++++ .../plugins/SiPixelRecHitSoAFromLegacy.cc | 2 +- .../SiPixelRecHits/plugins/gpuPixelRecHits.h | 2 +- .../python/PixelCPEESProducers_cff.py | 15 +++------------ 20 files changed, 126 insertions(+), 31 deletions(-) diff --git a/DQM/Integration/python/clients/beam_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/beam_dqm_sourceclient-live_cfg.py index c19cd7d0ba69c..55a38db34d1b0 100644 --- a/DQM/Integration/python/clients/beam_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/beam_dqm_sourceclient-live_cfg.py @@ -57,6 +57,9 @@ process.load("DQM.Integration.config.environment_cfi") process.dqmEnv.subSystemFolder = 'BeamMonitor' process.dqmSaver.tag = 'BeamMonitor' +process.dqmSaver.runNumber = options.runNumber +process.dqmSaverPB.tag = 'BeamMonitor' +process.dqmSaverPB.runNumber = options.runNumber process.dqmEnvPixelLess = process.dqmEnv.clone() process.dqmEnvPixelLess.subSystemFolder = 'BeamMonitor_PixelLess' @@ -228,7 +231,7 @@ # process.dqmcommon = cms.Sequence(process.dqmEnv - * process.dqmSaver) + * process.dqmSaver*process.dqmSaverPB) # process.monitor = cms.Sequence(process.dqmBeamMonitor diff --git a/DQM/Integration/python/clients/beampixel_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/beampixel_dqm_sourceclient-live_cfg.py index 3b6584f309d2a..f909104a39834 100644 --- a/DQM/Integration/python/clients/beampixel_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/beampixel_dqm_sourceclient-live_cfg.py @@ -15,10 +15,13 @@ #---------------------------- if unitTest == True: process.load("DQM.Integration.config.unittestinputsource_cfi") + from DQM.Integration.config.unittestinputsource_cfi import options else: process.load("DQM.Integration.config.inputsource_cfi") + from DQM.Integration.config.inputsource_cfi import options # Use this to run locally (for testing purposes) #process.load("DQM.Integration.config.fileinputsource_cfi") +#from DQM.Integration.config.fileinputsource_cfi import options #---------------------------- @@ -34,7 +37,9 @@ process.load("DQM.Integration.config.environment_cfi") process.dqmEnv.subSystemFolder = "BeamPixel" process.dqmSaver.tag = "BeamPixel" - +process.dqmSaver.runNumber = options.runNumber +process.dqmSaverPB.tag = 'BeamPixel' +process.dqmSaverPB.runNumber = options.runNumber #---------------------------- # Conditions @@ -58,7 +63,7 @@ #---------------------------- # Define Sequences #---------------------------- -process.dqmModules = cms.Sequence(process.dqmEnv + process.dqmSaver) +process.dqmModules = cms.Sequence(process.dqmEnv + process.dqmSaver + process.dqmSaverPB) process.physTrigger = cms.Sequence(process.hltTriggerTypeFilter) diff --git a/DQM/Integration/python/clients/csc_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/csc_dqm_sourceclient-live_cfg.py index 4cabf7a09bb19..747ec4d01b07a 100644 --- a/DQM/Integration/python/clients/csc_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/csc_dqm_sourceclient-live_cfg.py @@ -40,12 +40,15 @@ if unitTest: process.load("DQM.Integration.config.unittestinputsource_cfi") + from DQM.Integration.config.unittestinputsource_cfi import options else: # for live online DQM in P5 process.load("DQM.Integration.config.inputsource_cfi") + from DQM.Integration.config.inputsource_cfi import options # for testing in lxplus #process.load("DQM.Integration.config.fileinputsource_cfi") +#from DQM.Integration.config.fileinputsource_cfi import options #---------------------------- # DQM Environment @@ -58,6 +61,9 @@ process.load("DQM.Integration.config.environment_cfi") process.dqmEnv.subSystemFolder = "CSC" process.dqmSaver.tag = "CSC" +process.dqmSaver.runNumber = options.runNumber +process.dqmSaverPB.tag = "CSC" +process.dqmSaverPB.runNumber = options.runNumber #process.DQM.collectorHost = 'pccmsdqm02.cern.ch' @@ -166,8 +172,8 @@ # Sequences #-------------------------- -#process.p = cms.Path(process.dqmCSCClient+process.dqmEnv+process.dqmSaver) -process.p = cms.Path(process.dqmCSCClient * process.muonCSCDigis * process.csc2DRecHits * process.cscSegments * process.cscMonitor + process.dqmEnv + process.dqmSaver) +#process.p = cms.Path(process.dqmCSCClient+process.dqmEnv+process.dqmSaver+process.dqmSaverPB) +process.p = cms.Path(process.dqmCSCClient * process.muonCSCDigis * process.csc2DRecHits * process.cscSegments * process.cscMonitor + process.dqmEnv + process.dqmSaver + process.dqmSaverPB) process.castorDigis.InputLabel = cms.InputTag("rawDataCollector") diff --git a/DQM/Integration/python/clients/fed_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/fed_dqm_sourceclient-live_cfg.py index 0d6d5c54d1d3a..b1da8dc477c4a 100644 --- a/DQM/Integration/python/clients/fed_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/fed_dqm_sourceclient-live_cfg.py @@ -25,11 +25,16 @@ # Input: if unitTest: process.load("DQM.Integration.config.unittestinputsource_cfi") + from DQM.Integration.config.unittestinputsource_cfi import options else: process.load('DQM.Integration.config.inputsource_cfi') + from DQM.Integration.config.inputsource_cfi import options # Output: process.dqmEnv.subSystemFolder = 'FED' process.dqmSaver.tag = 'FED' +process.dqmSaver.runNumber = options.runNumber +process.dqmSaverPB.tag = 'FED' +process.dqmSaverPB.runNumber = options.runNumber # Subsystem sequences @@ -142,6 +147,7 @@ process.DQMmodulesPath = cms.Path( process.dqmEnv + process.dqmSaver + + process.dqmSaverPB ) process.schedule = cms.Schedule( diff --git a/DQM/Integration/python/clients/l1t_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/l1t_dqm_sourceclient-live_cfg.py index 85ce46d7f7ace..b2358e79a1bb6 100644 --- a/DQM/Integration/python/clients/l1t_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/l1t_dqm_sourceclient-live_cfg.py @@ -16,9 +16,11 @@ # # for live online DQM in P5 process.load("DQM.Integration.config.inputsource_cfi") +from DQM.Integration.config.inputsource_cfi import options # # for testing in lxplus #process.load("DQM.Integration.config.fileinputsource_cfi") +#from DQM.Integration.config.fileinputsource_cfi import options #---------------------------- # DQM Environment @@ -26,6 +28,9 @@ process.load("DQM.Integration.config.environment_cfi") process.dqmEnv.subSystemFolder = 'L1T' process.dqmSaver.tag = 'L1T' +process.dqmSaver.runNumber = options.runNumber +process.dqmSaverPB.tag = 'L1T' +process.dqmSaverPB.runNumber = options.runNumber # # references needed @@ -96,7 +101,8 @@ # process.dqmEndPath = cms.EndPath( process.dqmEnv * - process.dqmSaver + process.dqmSaver * + process.dqmSaverPB ) # diff --git a/DQM/Integration/python/clients/l1temulator_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/l1temulator_dqm_sourceclient-live_cfg.py index 9ca55cca06428..e2c9f057f04b1 100644 --- a/DQM/Integration/python/clients/l1temulator_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/l1temulator_dqm_sourceclient-live_cfg.py @@ -16,9 +16,11 @@ # # for live online DQM in P5 process.load("DQM.Integration.config.inputsource_cfi") +from DQM.Integration.config.inputsource_cfi import options # # for testing in lxplus #process.load("DQM.Integration.config.fileinputsource_cfi") +#from DQM.Integration.config.fileinputsource_cfi import options #---------------------------- # DQM Environment @@ -30,6 +32,9 @@ # for local test process.dqmEnv.subSystemFolder = 'L1TEMU' process.dqmSaver.tag = 'L1TEMU' +process.dqmSaver.runNumber = options.runNumber +process.dqmSaverPB.tag = 'L1TEMU' +process.dqmSaverPB.runNumber = options.runNumber # # no references needed @@ -88,7 +93,7 @@ process.l1EmulatorMonitorClientPath = cms.Path(process.l1EmulatorMonitorClient) # -process.l1EmulatorMonitorEndPath = cms.EndPath(process.dqmEnv*process.dqmSaver) +process.l1EmulatorMonitorEndPath = cms.EndPath(process.dqmEnv*process.dqmSaver*process.dqmSaverPB) # process.valCscTriggerPrimitiveDigis.gangedME1a = cms.untracked.bool(False) diff --git a/DQM/Integration/python/clients/l1tstage1_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/l1tstage1_dqm_sourceclient-live_cfg.py index ca0db40e25687..93df769c2dfe4 100644 --- a/DQM/Integration/python/clients/l1tstage1_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/l1tstage1_dqm_sourceclient-live_cfg.py @@ -16,9 +16,11 @@ # # for live online DQM in P5 process.load("DQM.Integration.config.inputsource_cfi") +from DQM.Integration.config.inputsource_cfi import options # # for testing in lxplus #process.load("DQM.Integration.config.fileinputsource_cfi") +#from DQM.Integration.config.fileinputsource_cfi import options #---------------------------- # DQM Environment @@ -27,6 +29,9 @@ process.load("DQM.Integration.config.environment_cfi") process.dqmEnv.subSystemFolder = 'L1TStage1' process.dqmSaver.tag = 'L1TStage1' +process.dqmSaver.runNumber = options.runNumber +process.dqmSaverPB.tag = 'L1TStage1' +process.dqmSaverPB.runNumber = options.runNumber # # references needed @@ -101,7 +106,8 @@ # process.dqmEndPath = cms.EndPath( process.dqmEnv * - process.dqmSaver + process.dqmSaver * + process.dqmSaverPB ) # diff --git a/DQM/Integration/python/clients/l1tstage1emulator_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/l1tstage1emulator_dqm_sourceclient-live_cfg.py index 9809652f90f3e..4ed9c5e298890 100644 --- a/DQM/Integration/python/clients/l1tstage1emulator_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/l1tstage1emulator_dqm_sourceclient-live_cfg.py @@ -16,9 +16,11 @@ # # for live online DQM in P5 process.load("DQM.Integration.config.inputsource_cfi") +from DQM.Integration.config.inputsource_cfi import options # # for testing in lxplus #process.load("DQM.Integration.config.fileinputsource_cfi") +#from DQM.Integration.config.fileinputsource_cfi import options #---------------------------- # DQM Environment @@ -28,6 +30,9 @@ # for local test process.dqmEnv.subSystemFolder = 'L1TEMUStage1' process.dqmSaver.tag = 'L1TEMUStage1' +process.dqmSaver.runNumber = options.runNumber +process.dqmSaverPB.tag = 'L1TEMUStage1' +process.dqmSaverPB.runNumber = options.runNumber # # no references needed @@ -88,7 +93,7 @@ process.l1EmulatorMonitorClientPath = cms.Path(process.l1EmulatorMonitorClient) # -process.l1EmulatorMonitorEndPath = cms.EndPath(process.dqmEnv*process.dqmSaver) +process.l1EmulatorMonitorEndPath = cms.EndPath(process.dqmEnv*process.dqmSaver*process.dqmSaverPB) # diff --git a/DQM/Integration/python/clients/l1tstage2_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/l1tstage2_dqm_sourceclient-live_cfg.py index 4e8abe5391b2b..580d7a2136841 100644 --- a/DQM/Integration/python/clients/l1tstage2_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/l1tstage2_dqm_sourceclient-live_cfg.py @@ -13,12 +13,15 @@ if unitTest: process.load("DQM.Integration.config.unittestinputsource_cfi") + from DQM.Integration.config.unittestinputsource_cfi import options else: # Live Online DQM in P5 process.load("DQM.Integration.config.inputsource_cfi") + from DQM.Integration.config.inputsource_cfi import options # # Testing in lxplus # process.load("DQM.Integration.config.fileinputsource_cfi") +# from DQM.Integration.config.fileinputsource_cfi import options # process.load("FWCore.MessageLogger.MessageLogger_cfi") # process.MessageLogger.cerr.FwkReport.reportEvery = 1 @@ -39,8 +42,11 @@ process.dqmEnv.subSystemFolder = "L1T" process.dqmSaver.tag = "L1T" +process.dqmSaver.runNumber = options.runNumber +process.dqmSaverPB.tag = "L1T" +process.dqmSaverPB.runNumber = options.runNumber -process.dqmEndPath = cms.EndPath(process.dqmEnv * process.dqmSaver) +process.dqmEndPath = cms.EndPath(process.dqmEnv * process.dqmSaver * process.dqmSaverPB) #-------------------------------------------------- # Standard Unpacking Path diff --git a/DQM/Integration/python/clients/l1tstage2emulator_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/l1tstage2emulator_dqm_sourceclient-live_cfg.py index 8ae678f923620..1d868837300b0 100644 --- a/DQM/Integration/python/clients/l1tstage2emulator_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/l1tstage2emulator_dqm_sourceclient-live_cfg.py @@ -13,12 +13,15 @@ if unitTest: process.load("DQM.Integration.config.unittestinputsource_cfi") + from DQM.Integration.config.unittestinputsource_cfi import options else: # Live Online DQM in P5 process.load("DQM.Integration.config.inputsource_cfi") + from DQM.Integration.config.inputsource_cfi import options # Testing in lxplus #process.load("DQM.Integration.config.fileinputsource_cfi") +#from DQM.Integration.config.fileinputsource_cfi import options # Required to load Global Tag process.load("DQM.Integration.config.FrontierCondition_GT_cfi") @@ -35,10 +38,14 @@ process.dqmEnv.subSystemFolder = "L1TEMU" process.dqmSaver.tag = "L1TEMU" +process.dqmSaver.runNumber = options.runNumber +process.dqmSaverPB.tag = "L1TEMU" +process.dqmSaverPB.runNumber = options.runNumber process.dqmEndPath = cms.EndPath( process.dqmEnv * - process.dqmSaver + process.dqmSaver * + process.dqmSaverPB ) #-------------------------------------------------- diff --git a/DQM/Integration/python/clients/lumi_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/lumi_dqm_sourceclient-live_cfg.py index e9fb481e5581b..e439454513880 100644 --- a/DQM/Integration/python/clients/lumi_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/lumi_dqm_sourceclient-live_cfg.py @@ -8,6 +8,7 @@ # Event Source #---------------------------- process.load("DQM.Integration.config.inputsource_cfi") +from DQM.Integration.config.inputsource_cfi import options #process.DQMEventStreamHttpReader.consumerName = 'DQM Luminosity Consumer' #process.DQMEventStreamHttpReader.SelectHLTOutput = cms.untracked.string('hltOutputALCALUMIPIXELS') @@ -17,6 +18,9 @@ process.load("DQM.Integration.config.environment_cfi") process.dqmEnv.subSystemFolder = "Info/Lumi" process.dqmSaver.tag = "Lumi" +process.dqmSaver.runNumber = options.runNumber +process.dqmSaverPB.tag = "Lumi" +process.dqmSaverPB.runNumber = options.runNumber #--------------------------------------------- # Global Tag @@ -63,7 +67,8 @@ process.dqmmodules = cms.Sequence(process.dqmEnv + process.expressLumiProducer + process.dqmLumiMonitor - + process.dqmSaver) + + process.dqmSaver + + process.dqmSaverPB) #---------------------------- # Proton-Proton Running Stuff #---------------------------- diff --git a/DQM/Integration/python/clients/physics_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/physics_dqm_sourceclient-live_cfg.py index 5638bc8ef940a..fef661979c427 100644 --- a/DQM/Integration/python/clients/physics_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/physics_dqm_sourceclient-live_cfg.py @@ -11,9 +11,11 @@ # for live online DQM in P5 process.load("DQM.Integration.config.inputsource_cfi") +from DQM.Integration.config.inputsource_cfi import options # for testing in lxplus #process.load("DQM.Integration.config.fileinputsource_cfi") +#from DQM.Integration.config.fileinputsource_cfi import options #---------------------------- # DQM Environment @@ -22,6 +24,9 @@ process.load("DQM.Integration.config.environment_cfi") process.dqmEnv.subSystemFolder = 'Physics' process.dqmSaver.tag = 'Physics' +process.dqmSaver.runNumber = options.runNumber +process.dqmSaverPB.tag = 'Physics' +process.dqmSaverPB.runNumber = options.runNumber # 0=random, 1=physics, 2=calibration, 3=technical process.hltTriggerTypeFilter = cms.EDFilter("HLTTriggerTypeFilter", @@ -47,7 +52,8 @@ # process.dump * process.qcdLowPtDQM * process.dqmEnv * - process.dqmSaver + process.dqmSaver * + process.dqmSaverPB ) process.siPixelDigis.cpu.InputLabel = cms.InputTag("rawDataCollector") diff --git a/DQM/Integration/python/clients/pixel_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/pixel_dqm_sourceclient-live_cfg.py index 5a24247036339..10477ddfcec84 100644 --- a/DQM/Integration/python/clients/pixel_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/pixel_dqm_sourceclient-live_cfg.py @@ -34,13 +34,16 @@ if (unitTest): process.load("DQM.Integration.config.unittestinputsource_cfi") + from DQM.Integration.config.unittestinputsource_cfi import options elif (live): process.load("DQM.Integration.config.inputsource_cfi") + from DQM.Integration.config.inputsource_cfi import options # for testing in lxplus elif(offlineTesting): process.load("DQM.Integration.config.fileinputsource_cfi") + from DQM.Integration.config.fileinputsource_cfi import options #----------------------------- # DQM Environment @@ -56,6 +59,9 @@ process.dqmEnv.subSystemFolder = TAG process.dqmSaver.tag = TAG +process.dqmSaver.runNumber = options.runNumber +process.dqmSaverPB.tag = TAG +process.dqmSaverPB.runNumber = options.runNumber #----------------------------- @@ -160,7 +166,7 @@ # Scheduling #-------------------------- -process.DQMmodules = cms.Sequence(process.dqmEnv* process.dqmSaver) +process.DQMmodules = cms.Sequence(process.dqmEnv* process.dqmSaver*process.dqmSaverPB) process.RecoForDQM_LocalReco = cms.Sequence(process.siPixelDigis*process.siStripDigis*process.gtDigis*process.trackerlocalreco) diff --git a/DQM/Integration/python/clients/pixellumi_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/pixellumi_dqm_sourceclient-live_cfg.py index 2770834fe7cb7..50213e303b8de 100644 --- a/DQM/Integration/python/clients/pixellumi_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/pixellumi_dqm_sourceclient-live_cfg.py @@ -21,12 +21,15 @@ if unitTest: process.load("DQM.Integration.config.unittestinputsource_cfi") + from DQM.Integration.config.unittestinputsource_cfi import options else: # for live online DQM in P5 process.load("DQM.Integration.config.inputsource_cfi") + from DQM.Integration.config.inputsource_cfi import options # for testing in lxplus #process.load("DQM.Integration.config.fileinputsource_cfi") +#from DQM.Integration.config.fileinputsource_cfi import options ## #---------------------------- @@ -40,6 +43,9 @@ process.load("DQM.Integration.config.environment_cfi") process.dqmEnv.subSystemFolder = "PixelLumi" process.dqmSaver.tag = "PixelLumi" +process.dqmSaver.runNumber = options.runNumber +process.dqmSaverPB.tag = "PixelLumi" +process.dqmSaverPB.runNumber = options.runNumber if not unitTest: process.source.SelectEvents = cms.untracked.vstring("HLT_ZeroBias*","HLT_L1AlwaysTrue*", "HLT_PAZeroBias*", "HLT_PAL1AlwaysTrue*") @@ -128,7 +134,8 @@ process.Reco = cms.Sequence(process.siPixelDigis*process.siPixelClusters) process.DQMmodules = cms.Sequence(process.dqmEnv* process.pixel_lumi_dqm* - process.dqmSaver) + process.dqmSaver* + process.dqmSaverPB) process.p = cms.Path(process.Reco*process.DQMmodules) diff --git a/DQM/Integration/python/clients/scal_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/scal_dqm_sourceclient-live_cfg.py index 103dd87c720fb..2decc1b774251 100644 --- a/DQM/Integration/python/clients/scal_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/scal_dqm_sourceclient-live_cfg.py @@ -13,12 +13,15 @@ if unitTest: process.load("DQM.Integration.config.unittestinputsource_cfi") + from DQM.Integration.config.unittestinputsource_cfi import options else: # for live online DQM in P5 process.load("DQM.Integration.config.inputsource_cfi") + from DQM.Integration.config.inputsource_cfi import options # for testing in lxplus #process.load("DQM.Integration.config.fileinputsource_cfi") +#from DQM.Integration.config.fileinputsource_cfi import options #---------------------------- #### DQM Environment @@ -26,6 +29,9 @@ process.load("DQM.Integration.config.environment_cfi") process.dqmEnv.subSystemFolder = 'Scal' process.dqmSaver.tag = 'Scal' +process.dqmSaver.runNumber = options.runNumber +process.dqmSaverPB.tag = 'Scal' +process.dqmSaverPB.runNumber = options.runNumber #----------------------------- process.load("DQMServices.Components.DQMScalInfo_cfi") @@ -68,7 +74,7 @@ process.dump = cms.EDAnalyzer('EventContentAnalyzer') # DQM Modules -process.dqmmodules = cms.Sequence(process.dqmEnv + process.dqmSaver) +process.dqmmodules = cms.Sequence(process.dqmEnv + process.dqmSaver + process.dqmSaverPB) process.evfDQMmodulesPath = cms.Path( process.l1GtUnpack* process.gtDigis* diff --git a/DQM/Integration/python/clients/sistrip_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/sistrip_dqm_sourceclient-live_cfg.py index f8da312461d40..77d21710d1ed3 100644 --- a/DQM/Integration/python/clients/sistrip_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/sistrip_dqm_sourceclient-live_cfg.py @@ -32,11 +32,14 @@ # for live online DQM in P5 if (unitTest): process.load("DQM.Integration.config.unittestinputsource_cfi") + from DQM.Integration.config.unittestinputsource_cfi import options elif (live): process.load("DQM.Integration.config.inputsource_cfi") + from DQM.Integration.config.inputsource_cfi import options # for testing in lxplus elif(offlineTesting): process.load("DQM.Integration.config.fileinputsource_cfi") + from DQM.Integration.config.fileinputsource_cfi import options #---------------------------- # DQM Live Environment @@ -52,6 +55,9 @@ process.dqmEnv.subSystemFolder = "SiStrip" process.dqmSaver.tag = "SiStrip" process.dqmSaver.backupLumiCount = 30 +process.dqmSaver.runNumber = options.runNumber +process.dqmSaverPB.tag = "SiStrip" +process.dqmSaverPB.runNumber = options.runNumber from DQMServices.Core.DQMEDAnalyzer import DQMEDAnalyzer process.dqmEnvTr = DQMEDAnalyzer('DQMEventInfo', @@ -208,7 +214,7 @@ # Scheduling #-------------------------- process.SiStripSources_LocalReco = cms.Sequence(process.siStripFEDMonitor*process.SiStripMonitorDigi*process.SiStripMonitorClusterReal) -process.DQMCommon = cms.Sequence(process.stripQTester*process.trackingQTester*process.dqmEnv*process.dqmEnvTr*process.dqmSaver) +process.DQMCommon = cms.Sequence(process.stripQTester*process.trackingQTester*process.dqmEnv*process.dqmEnvTr*process.dqmSaver*process.dqmSaverPB) if (process.runType.getRunType() == process.runType.hi_run): process.RecoForDQM_LocalReco = cms.Sequence(process.siPixelDigis*process.siStripDigis*process.trackerlocalreco) else : diff --git a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h index 80cc68d3db85f..4b569438aa130 100644 --- a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h +++ b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h @@ -253,6 +253,19 @@ class PixelCPEBase : public PixelClusterParameterEstimator { bool DoLorentz_; bool LoadTemplatesFromDB_; + //errors for template reco for edge hits, based on observed residuals from + //studies likely done in 2011... + static constexpr float xEdgeXError_ = 23.0f; + static constexpr float xEdgeYError_ = 39.0f; + + static constexpr float yEdgeXError_ = 24.0f; + static constexpr float yEdgeYError_ = 96.0f; + + static constexpr float bothEdgeXError_ = 31.0f; + static constexpr float bothEdgeYError_ = 90.0f; + + static constexpr float clusterSplitMaxError_ = 7777.7f; + //--------------------------------------------------------------------------- // Geometrical services to subclasses. //--------------------------------------------------------------------------- diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc index d33e488344142..2afe12753097b 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc +++ b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc @@ -92,7 +92,7 @@ void SiPixelRecHitSoAFromLegacy::produce(edm::StreamID streamID, edm::Event& iEv const reco::BeamSpot& bs = iEvent.get(bsGetToken_); - BeamSpotCUDA::Data bsHost; + BeamSpotPOD bsHost; bsHost.x = bs.x0(); bsHost.y = bs.y0(); bsHost.z = bs.z0(); diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h b/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h index fd1cd8c0cdd91..17cd5aad4db52 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h +++ b/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h @@ -14,7 +14,7 @@ namespace gpuPixelRecHits { __global__ void getHits(pixelCPEforGPU::ParamsOnGPU const* __restrict__ cpeParams, - BeamSpotCUDA::Data const* __restrict__ bs, + BeamSpotPOD const* __restrict__ bs, SiPixelDigisCUDA::DeviceConstView const* __restrict__ pdigis, int numElements, SiPixelClustersCUDA::DeviceConstView const* __restrict__ pclusters, diff --git a/RecoLocalTracker/SiPixelRecHits/python/PixelCPEESProducers_cff.py b/RecoLocalTracker/SiPixelRecHits/python/PixelCPEESProducers_cff.py index 8e28bbb175181..ea9fab563d164 100644 --- a/RecoLocalTracker/SiPixelRecHits/python/PixelCPEESProducers_cff.py +++ b/RecoLocalTracker/SiPixelRecHits/python/PixelCPEESProducers_cff.py @@ -3,25 +3,16 @@ # # Load all Pixel Cluster Position Estimator ESProducers # -# -# 1. RecHits using angles from module position -# -from RecoLocalTracker.SiPixelRecHits.PixelCPEInitial_cfi import * -# -# 2. TrackingRechits using angles from tracks -# -from RecoLocalTracker.SiPixelRecHits.PixelCPEParmError_cfi import * -# -# 3. Template algorithm +# 1. Template algorithm # from RecoLocalTracker.SiPixelRecHits.PixelCPETemplateReco_cfi import * # -# 4. Pixel Generic CPE +# 2. Pixel Generic CPE # from RecoLocalTracker.SiPixelRecHits.PixelCPEGeneric_cfi import * from RecoLocalTracker.SiPixelRecHits.PixelCPEFast_cfi import * # -# 5. ESProducer for the Magnetic-field dependent template records +# 3. ESProducer for the Magnetic-field dependent template records # from CalibTracker.SiPixelESProducers.SiPixelTemplateDBObjectESProducer_cfi import * from CalibTracker.SiPixelESProducers.SiPixel2DTemplateDBObjectESProducer_cfi import * From 39b00889c136fd415dda0b3c21420791a980ebbb Mon Sep 17 00:00:00 2001 From: AdrianoDee Date: Thu, 1 Oct 2020 14:22:22 +0200 Subject: [PATCH 123/149] Fix BS naming in siPixelRecHitsCUDAPreSplitting (cms-patatrack#551) Fixing BeamSpotCUDA naming in siPixelRecHitsCUDAPreSplitting to be compliant with changes made in cms-sw#31130 . --- .../SiPixelRecHits/python/SiPixelRecHits_cfi.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py b/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py index 8995471470f37..2a0c005e51622 100644 --- a/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py +++ b/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py @@ -28,7 +28,10 @@ siPixelRecHitsPreSplittingTask = cms.Task(siPixelRecHitsPreSplitting) -siPixelRecHitsCUDAPreSplitting = _siPixelRecHitCUDA.clone() +siPixelRecHitsCUDAPreSplitting = _siPixelRecHitCUDA.clone( + beamSpot = "offlineBeamSpotToCUDA" +) + siPixelRecHitsLegacyPreSplitting = _siPixelRecHitFromSOA.clone() siPixelRecHitsPreSplittingTaskCUDA = cms.Task( siPixelRecHitsCUDAPreSplitting, @@ -39,4 +42,3 @@ _siPixelRecHitsPreSplittingTask_gpu = siPixelRecHitsPreSplittingTask.copy() _siPixelRecHitsPreSplittingTask_gpu.add(siPixelRecHitsPreSplittingTaskCUDA) gpu.toReplaceWith(siPixelRecHitsPreSplittingTask, _siPixelRecHitsPreSplittingTask_gpu) - From d243b8f6ac581784b2b8b57b12848d461b234185 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Fri, 2 Oct 2020 11:24:26 +0200 Subject: [PATCH 124/149] Update ESProducers following cms-sw#31556 (cms-patatrack#555) Remove setConsumes() from ESConsumesCollector: the functionality was replaced with the type-deducing consumes(). --- ...PixelGainCalibrationForHLTGPUESProducer.cc | 4 +++- .../plugins/PixelCPEFastESProducer.cc | 21 +++++++++---------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/CalibTracker/SiPixelESProducers/plugins/SiPixelGainCalibrationForHLTGPUESProducer.cc b/CalibTracker/SiPixelESProducers/plugins/SiPixelGainCalibrationForHLTGPUESProducer.cc index bf8a0b2c5a75f..37055ea3e00ca 100644 --- a/CalibTracker/SiPixelESProducers/plugins/SiPixelGainCalibrationForHLTGPUESProducer.cc +++ b/CalibTracker/SiPixelESProducers/plugins/SiPixelGainCalibrationForHLTGPUESProducer.cc @@ -25,7 +25,9 @@ class SiPixelGainCalibrationForHLTGPUESProducer : public edm::ESProducer { }; SiPixelGainCalibrationForHLTGPUESProducer::SiPixelGainCalibrationForHLTGPUESProducer(const edm::ParameterSet& iConfig) { - setWhatProduced(this).setConsumes(gainsToken_).setConsumes(geometryToken_); + auto cc = setWhatProduced(this); + gainsToken_ = cc.consumes(); + geometryToken_ = cc.consumes(); } void SiPixelGainCalibrationForHLTGPUESProducer::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/PixelCPEFastESProducer.cc b/RecoLocalTracker/SiPixelRecHits/plugins/PixelCPEFastESProducer.cc index 8b6cba9a9232b..3f7c9aca2a974 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/PixelCPEFastESProducer.cc +++ b/RecoLocalTracker/SiPixelRecHits/plugins/PixelCPEFastESProducer.cc @@ -39,20 +39,19 @@ class PixelCPEFastESProducer : public edm::ESProducer { using namespace edm; -PixelCPEFastESProducer::PixelCPEFastESProducer(const edm::ParameterSet& p) { - std::string myname = p.getParameter("ComponentName"); - auto magname = p.getParameter("MagneticFieldRecord"); +PixelCPEFastESProducer::PixelCPEFastESProducer(const edm::ParameterSet& p) : pset_(p) { + auto const& myname = p.getParameter("ComponentName"); + auto const& magname = p.getParameter("MagneticFieldRecord"); UseErrorsFromTemplates_ = p.getParameter("UseErrorsFromTemplates"); - pset_ = p; - auto c = setWhatProduced(this, myname); - c.setConsumes(magfieldToken_, magname) - .setConsumes(pDDToken_) - .setConsumes(hTTToken_) - .setConsumes(lorentzAngleToken_, edm::ESInputTag("")); - c.setConsumes(lorentzAngleWidthToken_, edm::ESInputTag("", "forWidth")); + auto cc = setWhatProduced(this, myname); + magfieldToken_ = cc.consumes(magname); + pDDToken_ = cc.consumes(); + hTTToken_ = cc.consumes(); + lorentzAngleToken_ = cc.consumes(edm::ESInputTag("")); + lorentzAngleWidthToken_ = cc.consumes(edm::ESInputTag("", "forWidth")); if (UseErrorsFromTemplates_) { - c.setConsumes(genErrorDBObjectToken_); + genErrorDBObjectToken_ = cc.consumes(); } } From 6dc465ec0d78489ada1519d77c508961e5b667be Mon Sep 17 00:00:00 2001 From: Suvankar Roy Chowdhury Date: Mon, 12 Oct 2020 16:53:54 +0200 Subject: [PATCH 125/149] Update the validation sequence for pixel-only tracking workflows (cms-patatrack#548) --- .../python/globalValidation_cff.py | 5 +++ .../python/postValidation_cff.py | 12 +++---- .../SiPixelPhase1OfflineDQM_sourceV_cff.py | 35 +++++++++++++++++++ 3 files changed, 45 insertions(+), 7 deletions(-) diff --git a/Validation/Configuration/python/globalValidation_cff.py b/Validation/Configuration/python/globalValidation_cff.py index 0a98acf89ad69..b43f8ee22e3a4 100644 --- a/Validation/Configuration/python/globalValidation_cff.py +++ b/Validation/Configuration/python/globalValidation_cff.py @@ -219,8 +219,13 @@ _phase_1_globalValidation = globalValidation.copy() _phase_1_globalValidation += siPixelPhase1OfflineDQM_sourceV + +_phase_1_globalValidationPixelTrackingOnly = globalValidationPixelTrackingOnly.copy() +_phase_1_globalValidationPixelTrackingOnly += siPixelPhase1ValidationPixelTrackingOnly_sourceV + from Configuration.Eras.Modifier_phase1Pixel_cff import phase1Pixel (phase1Pixel & ~fastSim).toReplaceWith( globalValidation, _phase_1_globalValidation ) #module siPixelPhase1OfflineDQM_sourceV can't run in FastSim since siPixelClusters of type edmNew::DetSetVector are not produced +(phase1Pixel & ~fastSim).toReplaceWith( globalValidationPixelTrackingOnly, _phase_1_globalValidationPixelTrackingOnly ) #module siPixelPhase1OfflineDQM_sourceV can't run in FastSim since siPixelClusters of type edmNew::DetSetVector are not produced _run3_globalValidation = globalValidation.copy() _run3_globalValidation += gemSimValid diff --git a/Validation/Configuration/python/postValidation_cff.py b/Validation/Configuration/python/postValidation_cff.py index 8f310eacfce2e..ae365e85187a6 100644 --- a/Validation/Configuration/python/postValidation_cff.py +++ b/Validation/Configuration/python/postValidation_cff.py @@ -115,8 +115,13 @@ _phase1_postValidation = postValidation.copy() _phase1_postValidation += siPixelPhase1OfflineDQM_harvestingV + +_phase1_postValidation_trackingOnly = postValidation_trackingOnly.copy() +_phase1_postValidation_trackingOnly += siPixelPhase1OfflineDQM_harvestingV + from Configuration.Eras.Modifier_phase1Pixel_cff import phase1Pixel phase1Pixel.toReplaceWith( postValidation, _phase1_postValidation ) +phase1Pixel.toReplaceWith( postValidation_trackingOnly, _phase1_postValidation_trackingOnly) _run3_postValidation = postValidation.copy() _run3_postValidation += MuonGEMHitsPostProcessors @@ -129,16 +134,9 @@ _phase2_postValidation += MuonME0SegPostProcessors _phase2_postValidation += trackerphase2ValidationHarvesting -_phase2_ge0_postValidation = _run3_postValidation.copy() -_phase2_ge0_postValidation += hgcalPostProcessor -_phase2_ge0_postValidation += trackerphase2ValidationHarvesting - from Configuration.Eras.Modifier_run2_GEM_2017_cff import run2_GEM_2017 run2_GEM_2017.toReplaceWith( postValidation, _run3_postValidation ) from Configuration.Eras.Modifier_run3_GEM_cff import run3_GEM run3_GEM.toReplaceWith( postValidation, _run3_postValidation ) from Configuration.Eras.Modifier_phase2_hgcal_cff import phase2_hgcal phase2_hgcal.toReplaceWith( postValidation, _phase2_postValidation ) -from Configuration.Eras.Modifier_phase2_GE0_cff import phase2_GE0 -(phase2_GE0 & phase2_hgcal).toReplaceWith( postValidation, _phase2_ge0_postValidation ) -phase2_GE0.toReplaceWith( postValidation_muons, postValidation_muons.copyAndExclude([MuonME0DigisPostProcessors, MuonME0SegPostProcessors]) ) diff --git a/Validation/SiPixelPhase1ConfigV/python/SiPixelPhase1OfflineDQM_sourceV_cff.py b/Validation/SiPixelPhase1ConfigV/python/SiPixelPhase1OfflineDQM_sourceV_cff.py index 1a5692bf6677b..d0d98251ea9d4 100644 --- a/Validation/SiPixelPhase1ConfigV/python/SiPixelPhase1OfflineDQM_sourceV_cff.py +++ b/Validation/SiPixelPhase1ConfigV/python/SiPixelPhase1OfflineDQM_sourceV_cff.py @@ -20,3 +20,38 @@ + SiPixelPhase1TrackingParticleAnalyzerV ) +### Pixel Tracking-only configurations for the GPU workflow + +# Pixel digis +pixelOnlyDigisAnalyzerV = SiPixelPhase1DigisAnalyzerV.clone() + +# Pixel clusters +pixelOnlyTrackClustersAnalyzerV = SiPixelPhase1TrackClustersAnalyzerV.clone( + clusters = 'siPixelClustersPreSplitting', + tracks = 'pixelTracks' +) + +# Pixel rechit analyzer +pixelOnlyRecHitsAnalyzerV = SiPixelPhase1RecHitsAnalyzerV.clone( + src = 'siPixelRecHitsPreSplitting', + pixelSimLinkSrc = 'simSiPixelDigis', + ROUList = ('TrackerHitsPixelBarrelLowTof', + 'TrackerHitsPixelBarrelHighTof', + 'TrackerHitsPixelEndcapLowTof', + 'TrackerHitsPixelEndcapHighTof') +) + +# Pixel hits +pixelOnlyHitsAnalyzerV = SiPixelPhase1HitsAnalyzerV.clone( + tracksTag = 'pixelTracks' +) + +# Tracking particles +pixelOnlyTrackingParticleAnalyzerV = SiPixelPhase1TrackingParticleAnalyzerV.clone() + +siPixelPhase1ValidationPixelTrackingOnly_sourceV = cms.Sequence(pixelOnlyDigisAnalyzerV + + pixelOnlyTrackClustersAnalyzerV + + pixelOnlyHitsAnalyzerV + + pixelOnlyRecHitsAnalyzerV + + pixelOnlyTrackingParticleAnalyzerV +) From 2a32b42f331bd44dbf34777b11856c37c461228b Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Tue, 20 Oct 2020 21:00:42 +0200 Subject: [PATCH 126/149] Define CUDA-specific attributes for compatility with GCC (cms-patatrack#559) Including would pull in the dependency on all of CUDA; instead, just define away the CUDA specific attributes to keep GCC happy. --- .../SiPixelObjects/interface/SiPixelGainForHLTonGPU.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h b/CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h index 8ce3924e54609..6326b594e2771 100644 --- a/CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h +++ b/CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h @@ -5,6 +5,17 @@ #include #include +// including would pull in the dependency on all of CUDA; +// instead, just define away the CUDA specific attributes to keep GCC happy. +#ifndef __CUDACC__ +#ifndef __host__ +#define __host__ +#endif // __host__ +#ifndef __device__ +#define __device__ +#endif // __device__ +#endif // __CUDACC__ + #include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" struct SiPixelGainForHLTonGPU_DecodingStructure { From 86d7fb5f9e1e92246b540e734bd13d057021b1a5 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Fri, 23 Oct 2020 13:43:58 +0200 Subject: [PATCH 127/149] Synchronise with CMSSW_11_2_0_pre8 --- .../python/clients/beam_dqm_sourceclient-live_cfg.py | 2 +- .../Configuration/python/RecoLocalTracker_cff.py | 1 + Validation/Configuration/python/postValidation_cff.py | 7 +++++++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/DQM/Integration/python/clients/beam_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/beam_dqm_sourceclient-live_cfg.py index 55a38db34d1b0..50fda0503d1ed 100644 --- a/DQM/Integration/python/clients/beam_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/beam_dqm_sourceclient-live_cfg.py @@ -73,7 +73,7 @@ from Configuration.AlCa.GlobalTag import GlobalTag as gtCustomise process.GlobalTag = gtCustomise(process.GlobalTag, 'auto:run2_data', '') # you may need to set manually the GT in the line below - process.GlobalTag.globaltag = '100X_upgrade2018_realistic_v10' + #process.GlobalTag.globaltag = '100X_upgrade2018_realistic_v10' #---------------------------- # BeamMonitor diff --git a/RecoLocalTracker/Configuration/python/RecoLocalTracker_cff.py b/RecoLocalTracker/Configuration/python/RecoLocalTracker_cff.py index 08f871e45f8d7..35a72f0edb08f 100644 --- a/RecoLocalTracker/Configuration/python/RecoLocalTracker_cff.py +++ b/RecoLocalTracker/Configuration/python/RecoLocalTracker_cff.py @@ -23,6 +23,7 @@ from RecoLocalTracker.SiPhase2Clusterizer.phase2TrackerClusterizer_cfi import * from RecoLocalTracker.Phase2TrackerRecHits.Phase2StripCPEGeometricESProducer_cfi import * +from RecoLocalTracker.SiPhase2VectorHitBuilder.siPhase2RecHitMatcher_cfi import * _pixeltrackerlocalrecoTask_phase2 = pixeltrackerlocalrecoTask.copy() _pixeltrackerlocalrecoTask_phase2.add(siPhase2Clusters) diff --git a/Validation/Configuration/python/postValidation_cff.py b/Validation/Configuration/python/postValidation_cff.py index ae365e85187a6..8468943e81b04 100644 --- a/Validation/Configuration/python/postValidation_cff.py +++ b/Validation/Configuration/python/postValidation_cff.py @@ -134,9 +134,16 @@ _phase2_postValidation += MuonME0SegPostProcessors _phase2_postValidation += trackerphase2ValidationHarvesting +_phase2_ge0_postValidation = _run3_postValidation.copy() +_phase2_ge0_postValidation += hgcalPostProcessor +_phase2_ge0_postValidation += trackerphase2ValidationHarvesting + from Configuration.Eras.Modifier_run2_GEM_2017_cff import run2_GEM_2017 run2_GEM_2017.toReplaceWith( postValidation, _run3_postValidation ) from Configuration.Eras.Modifier_run3_GEM_cff import run3_GEM run3_GEM.toReplaceWith( postValidation, _run3_postValidation ) from Configuration.Eras.Modifier_phase2_hgcal_cff import phase2_hgcal phase2_hgcal.toReplaceWith( postValidation, _phase2_postValidation ) +from Configuration.Eras.Modifier_phase2_GE0_cff import phase2_GE0 +(phase2_GE0 & phase2_hgcal).toReplaceWith( postValidation, _phase2_ge0_postValidation ) +phase2_GE0.toReplaceWith( postValidation_muons, postValidation_muons.copyAndExclude([MuonME0DigisPostProcessors, MuonME0SegPostProcessors]) ) From fb7932469ac4aea05a13043364d7b9544988a34b Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Thu, 12 Nov 2020 16:42:33 +0100 Subject: [PATCH 128/149] Bugfix: add missing GPU memory free to PixelCPEFast (cms-patatrack#570) --- RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc index 5a2b8f41bb988..f3b3f308fa9d3 100644 --- a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc +++ b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc @@ -311,6 +311,7 @@ PixelCPEFast::GPUData::~GPUData() { cudaFree((void*)h_paramsOnGPU.m_commonParams); cudaFree((void*)h_paramsOnGPU.m_detParams); cudaFree((void*)h_paramsOnGPU.m_averageGeometry); + cudaFree((void*)h_paramsOnGPU.m_layerGeometry); cudaFree(d_paramsOnGPU); } } From 5315cb9dd82cce5e57f82ce72dd092e46bd93100 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Mon, 16 Nov 2020 11:56:30 +0100 Subject: [PATCH 129/149] Synchronise with CMSSW_11_2_0_pre9 --- .../python/clients/pixel_dqm_sourceclient-live_cfg.py | 5 +++-- .../python/clients/sistrip_dqm_sourceclient-live_cfg.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/DQM/Integration/python/clients/pixel_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/pixel_dqm_sourceclient-live_cfg.py index 10477ddfcec84..b849be77b6e63 100644 --- a/DQM/Integration/python/clients/pixel_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/pixel_dqm_sourceclient-live_cfg.py @@ -68,7 +68,7 @@ # Magnetic Field #----------------------------- -process.load('Configuration.StandardSequences.MagneticField_AutoFromDBCurrent_cff') +process.load('Configuration.StandardSequences.MagneticField_cff') #------------------------------------------------- # GEOMETRY @@ -117,7 +117,6 @@ process.siPixelDigis.cpu.InputLabel = cms.InputTag("rawDataCollector") process.siStripDigis.InputLabel = cms.InputTag("rawDataCollector") - ## Collision Reconstruction process.load("Configuration.StandardSequences.RawToDigi_Data_cff") @@ -186,6 +185,7 @@ ##### TRIGGER SELECTION ##### process.hltHighLevel* process.scalersRawToDigi* + process.tcdsDigis* process.APVPhases* process.consecutiveHEs* process.hltTriggerTypeFilter* @@ -231,6 +231,7 @@ process.p = cms.Path( process.hltHighLevel #trigger selection *process.scalersRawToDigi + *process.tcdsDigis *process.APVPhases *process.consecutiveHEs *process.Reco diff --git a/DQM/Integration/python/clients/sistrip_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/sistrip_dqm_sourceclient-live_cfg.py index 77d21710d1ed3..889fc8a978d22 100644 --- a/DQM/Integration/python/clients/sistrip_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/sistrip_dqm_sourceclient-live_cfg.py @@ -70,7 +70,7 @@ #----------------------------- # Magnetic Field #----------------------------- -process.load('Configuration.StandardSequences.MagneticField_AutoFromDBCurrent_cff') +process.load('Configuration.StandardSequences.MagneticField_cff') #------------------------------------------------- # GEOMETRY From fad0b22b76aa5dac5d4c1b0c6f51d39473fffaf2 Mon Sep 17 00:00:00 2001 From: Tamas Vami Date: Tue, 24 Nov 2020 15:31:47 -0500 Subject: [PATCH 130/149] Remove partial handling of the Pilot Blade from GPU code (cms-patatrack#581) --- .../plugins/SiPixelRawToClusterCUDA.cc | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc index 993840c62c7f1..402ab2b675d31 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc @@ -74,7 +74,6 @@ class SiPixelRawToClusterCUDA : public edm::stream::EDProducer("CablingMapLabel")))), isRun2_(iConfig.getParameter("isRun2")), includeErrors_(iConfig.getParameter("IncludeErrors")), - useQuality_(iConfig.getParameter("UseQualityInfo")), - usePilotBlade_(iConfig.getParameter("UsePilotBlade")) // Control the usage of pilot-blade data, FED=40 + useQuality_(iConfig.getParameter("UseQualityInfo")) { if (includeErrors_) { digiErrorPutToken_ = produces>(); @@ -99,9 +97,6 @@ SiPixelRawToClusterCUDA::SiPixelRawToClusterCUDA(const edm::ParameterSet& iConfi regions_ = std::make_unique(iConfig, consumesCollector()); } - if (usePilotBlade_) - edm::LogInfo("SiPixelRawToCluster") << " Use pilot blade data (FED 40)"; - edm::Service cs; if (cs->enabled()) { wordFedAppender_ = std::make_unique(); @@ -113,7 +108,6 @@ void SiPixelRawToClusterCUDA::fillDescriptions(edm::ConfigurationDescriptions& d desc.add("isRun2", true); desc.add("IncludeErrors", true); desc.add("UseQualityInfo", false); - desc.add("UsePilotBlade", false)->setComment("## Use pilot blades"); desc.add("InputLabel", edm::InputTag("rawDataCollector")); { edm::ParameterSetDescription psd0; @@ -182,8 +176,6 @@ void SiPixelRawToClusterCUDA::acquire(const edm::Event& iEvent, // In CPU algorithm this loop is part of PixelDataFormatter::interpretRawData() ErrorChecker errorcheck; for (int fedId : fedIds_) { - if (!usePilotBlade_ && (fedId == 40)) - continue; // skip pilot blade data if (regions_ && !regions_->mayUnpackFED(fedId)) continue; From b612094433077b1039b5ca5aa8eb3da4b4769aa2 Mon Sep 17 00:00:00 2001 From: Marco Musich Date: Tue, 24 Nov 2020 22:11:43 +0100 Subject: [PATCH 131/149] Migrate GPU pixel modules to ESConsumes (cms-patatrack#577) --- .../plugins/SiPixelDigiErrorsFromSoA.cc | 10 ++++---- .../plugins/SiPixelDigisClustersFromSoA.cc | 10 ++++---- .../plugins/SiPixelRecHitCUDA.cc | 24 +++++-------------- .../plugins/SiPixelRecHitSoAFromLegacy.cc | 23 +++++++----------- 4 files changed, 24 insertions(+), 43 deletions(-) diff --git a/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsFromSoA.cc b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsFromSoA.cc index 270598b0528b8..a3df23457f0cc 100644 --- a/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsFromSoA.cc +++ b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsFromSoA.cc @@ -30,6 +30,8 @@ class SiPixelDigiErrorsFromSoA : public edm::stream::EDProducer<> { private: void produce(edm::Event& iEvent, const edm::EventSetup& iSetup) override; + const edm::ESGetToken cablingToken_; + edm::EDGetTokenT digiErrorSoAGetToken_; edm::EDPutTokenT> errorPutToken_; @@ -39,7 +41,6 @@ class SiPixelDigiErrorsFromSoA : public edm::stream::EDProducer<> { edm::ESWatcher cablingWatcher_; std::unique_ptr cabling_; - const std::string cablingMapLabel_; const std::vector tkerrorlist_; const std::vector usererrorlist_; @@ -48,12 +49,12 @@ class SiPixelDigiErrorsFromSoA : public edm::stream::EDProducer<> { }; SiPixelDigiErrorsFromSoA::SiPixelDigiErrorsFromSoA(const edm::ParameterSet& iConfig) - : digiErrorSoAGetToken_{consumes(iConfig.getParameter("digiErrorSoASrc"))}, + : cablingToken_(esConsumes(edm::ESInputTag("",iConfig.getParameter("CablingMapLabel")))), + digiErrorSoAGetToken_{consumes(iConfig.getParameter("digiErrorSoASrc"))}, errorPutToken_{produces>()}, tkErrorPutToken_{produces()}, userErrorPutToken_{produces("UserErrorModules")}, disabledChannelPutToken_{produces>()}, - cablingMapLabel_(iConfig.getParameter("CablingMapLabel")), tkerrorlist_(iConfig.getParameter>("ErrorList")), usererrorlist_(iConfig.getParameter>("UserErrorList")), usePhase1_(iConfig.getParameter("UsePhase1")) {} @@ -76,8 +77,7 @@ void SiPixelDigiErrorsFromSoA::produce(edm::Event& iEvent, const edm::EventSetup // initialize cabling map or update if necessary if (cablingWatcher_.check(iSetup)) { // cabling map, which maps online address (fed->link->ROC->local pixel) to offline (DetId->global pixel) - edm::ESTransientHandle cablingMap; - iSetup.get().get(cablingMapLabel_, cablingMap); + const SiPixelFedCablingMap* cablingMap = &iSetup.getData(cablingToken_); cabling_ = cablingMap->cablingTree(); LogDebug("map version:") << cabling_->version(); } diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc index 1622b2402925b..dbbc5c4b03284 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc @@ -57,6 +57,8 @@ class SiPixelDigisClustersFromSoA : public edm::global::EDProducer<> { private: void produce(edm::StreamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const override; + const edm::ESGetToken topoToken_; + edm::EDGetTokenT digiGetToken_; edm::EDPutTokenT> digiPutToken_; @@ -64,7 +66,8 @@ class SiPixelDigisClustersFromSoA : public edm::global::EDProducer<> { }; SiPixelDigisClustersFromSoA::SiPixelDigisClustersFromSoA(const edm::ParameterSet& iConfig) - : digiGetToken_(consumes(iConfig.getParameter("src"))), + : topoToken_(esConsumes()), + digiGetToken_(consumes(iConfig.getParameter("src"))), digiPutToken_(produces>()), clusterPutToken_(produces()) {} @@ -77,10 +80,7 @@ void SiPixelDigisClustersFromSoA::fillDescriptions(edm::ConfigurationDescription void SiPixelDigisClustersFromSoA::produce(edm::StreamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const { const auto& digis = iEvent.get(digiGetToken_); const uint32_t nDigis = digis.size(); - - edm::ESHandle trackerTopologyHandle; - iSetup.get().get(trackerTopologyHandle); - const auto& ttopo = *trackerTopologyHandle; + const auto& ttopo = iSetup.getData(topoToken_); auto collection = std::make_unique>(); auto outputClusters = std::make_unique(); diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitCUDA.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitCUDA.cc index 4d85c41339020..2f566ba4db1ec 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitCUDA.cc +++ b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitCUDA.cc @@ -6,7 +6,6 @@ #include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" #include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DCUDA.h" #include "DataFormats/Common/interface/Handle.h" -#include "FWCore/Framework/interface/ESHandle.h" #include "FWCore/Framework/interface/Event.h" #include "FWCore/Framework/interface/EventSetup.h" #include "FWCore/Framework/interface/MakerMacros.h" @@ -34,6 +33,8 @@ class SiPixelRecHitCUDA : public edm::global::EDProducer<> { private: void produce(edm::StreamID streamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const override; + const edm::ESGetToken cpeToken_; + // The mess with inputs will be cleaned up when migrating to the new framework edm::EDGetTokenT> tBeamSpot; edm::EDGetTokenT> token_; @@ -41,17 +42,15 @@ class SiPixelRecHitCUDA : public edm::global::EDProducer<> { edm::EDPutTokenT> tokenHit_; - std::string cpeName_; - pixelgpudetails::PixelRecHitGPUKernel gpuAlgo_; }; SiPixelRecHitCUDA::SiPixelRecHitCUDA(const edm::ParameterSet& iConfig) - : tBeamSpot(consumes>(iConfig.getParameter("beamSpot"))), + : cpeToken_(esConsumes(edm::ESInputTag("",iConfig.getParameter("CPE")))), + tBeamSpot(consumes>(iConfig.getParameter("beamSpot"))), token_(consumes>(iConfig.getParameter("src"))), tokenDigi_(consumes>(iConfig.getParameter("src"))), - tokenHit_(produces>()), - cpeName_(iConfig.getParameter("CPE")) {} + tokenHit_(produces>()) {} void SiPixelRecHitCUDA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { edm::ParameterSetDescription desc; @@ -63,18 +62,7 @@ void SiPixelRecHitCUDA::fillDescriptions(edm::ConfigurationDescriptions& descrip } void SiPixelRecHitCUDA::produce(edm::StreamID streamID, edm::Event& iEvent, const edm::EventSetup& es) const { - // const TrackerGeometry *geom_ = nullptr; - const PixelClusterParameterEstimator* cpe_ = nullptr; - - /* - edm::ESHandle geom; - es.get().get( geom ); - geom_ = geom.product(); - */ - - edm::ESHandle hCPE; - es.get().get(cpeName_, hCPE); - cpe_ = dynamic_cast(hCPE.product()); + const PixelClusterParameterEstimator* cpe_ = dynamic_cast(&es.getData(cpeToken_)); PixelCPEFast const* fcpe = dynamic_cast(cpe_); if (!fcpe) { diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc index 2afe12753097b..8b2ce100510ad 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc +++ b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc @@ -11,7 +11,6 @@ #include "DataFormats/Common/interface/Handle.h" #include "DataFormats/SiPixelCluster/interface/SiPixelCluster.h" #include "DataFormats/TrackerRecHit2D/interface/SiPixelRecHitCollection.h" -#include "FWCore/Framework/interface/ESHandle.h" #include "FWCore/Framework/interface/Event.h" #include "FWCore/Framework/interface/EventSetup.h" #include "FWCore/Framework/interface/MakerMacros.h" @@ -41,22 +40,24 @@ class SiPixelRecHitSoAFromLegacy : public edm::global::EDProducer<> { private: void produce(edm::StreamID streamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const override; + const edm::ESGetToken geomToken_; + const edm::ESGetToken cpeToken_; + // The mess with inputs will be cleaned up when migrating to the new framework edm::EDGetTokenT bsGetToken_; edm::EDGetTokenT clusterToken_; // Legacy Clusters edm::EDPutTokenT tokenHit_; edm::EDPutTokenT tokenModuleStart_; - - std::string const cpeName_; bool const convert2Legacy_; }; SiPixelRecHitSoAFromLegacy::SiPixelRecHitSoAFromLegacy(const edm::ParameterSet& iConfig) - : bsGetToken_{consumes(iConfig.getParameter("beamSpot"))}, + : geomToken_(esConsumes()), + cpeToken_(esConsumes(edm::ESInputTag("",iConfig.getParameter("CPE")))), + bsGetToken_{consumes(iConfig.getParameter("beamSpot"))}, clusterToken_{consumes(iConfig.getParameter("src"))}, tokenHit_{produces()}, tokenModuleStart_{produces()}, - cpeName_(iConfig.getParameter("CPE")), convert2Legacy_(iConfig.getParameter("convertToLegacy")) { if (convert2Legacy_) produces(); @@ -73,16 +74,8 @@ void SiPixelRecHitSoAFromLegacy::fillDescriptions(edm::ConfigurationDescriptions } void SiPixelRecHitSoAFromLegacy::produce(edm::StreamID streamID, edm::Event& iEvent, const edm::EventSetup& es) const { - const TrackerGeometry* geom_ = nullptr; - const PixelClusterParameterEstimator* cpe_ = nullptr; - - edm::ESHandle geom; - es.get().get(geom); - geom_ = geom.product(); - - edm::ESHandle hCPE; - es.get().get(cpeName_, hCPE); - cpe_ = dynamic_cast(hCPE.product()); + const TrackerGeometry* geom_ = &es.getData(geomToken_); + const PixelClusterParameterEstimator* cpe_ = dynamic_cast(&es.getData(cpeToken_)); PixelCPEFast const* fcpe = dynamic_cast(cpe_); if (!fcpe) { From cc51270cf31f1f0fa901dbd2d5e177ab40ff975a Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Tue, 24 Nov 2020 22:12:14 +0100 Subject: [PATCH 132/149] Clean up GPU pixel modules Apply clang-format fixes. Make configuration data members const. Save one dynamic_cast. --- .../plugins/SiPixelDigiErrorsFromSoA.cc | 14 +++++------ .../plugins/SiPixelRecHitCUDA.cc | 23 ++++++++----------- .../plugins/SiPixelRecHitSoAFromLegacy.cc | 22 ++++++++---------- 3 files changed, 24 insertions(+), 35 deletions(-) diff --git a/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsFromSoA.cc b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsFromSoA.cc index a3df23457f0cc..ea381948ec352 100644 --- a/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsFromSoA.cc +++ b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsFromSoA.cc @@ -31,13 +31,11 @@ class SiPixelDigiErrorsFromSoA : public edm::stream::EDProducer<> { void produce(edm::Event& iEvent, const edm::EventSetup& iSetup) override; const edm::ESGetToken cablingToken_; - - edm::EDGetTokenT digiErrorSoAGetToken_; - - edm::EDPutTokenT> errorPutToken_; - edm::EDPutTokenT tkErrorPutToken_; - edm::EDPutTokenT userErrorPutToken_; - edm::EDPutTokenT> disabledChannelPutToken_; + const edm::EDGetTokenT digiErrorSoAGetToken_; + const edm::EDPutTokenT> errorPutToken_; + const edm::EDPutTokenT tkErrorPutToken_; + const edm::EDPutTokenT userErrorPutToken_; + const edm::EDPutTokenT> disabledChannelPutToken_; edm::ESWatcher cablingWatcher_; std::unique_ptr cabling_; @@ -49,7 +47,7 @@ class SiPixelDigiErrorsFromSoA : public edm::stream::EDProducer<> { }; SiPixelDigiErrorsFromSoA::SiPixelDigiErrorsFromSoA(const edm::ParameterSet& iConfig) - : cablingToken_(esConsumes(edm::ESInputTag("",iConfig.getParameter("CablingMapLabel")))), + : cablingToken_(esConsumes(edm::ESInputTag("", iConfig.getParameter("CablingMapLabel")))), digiErrorSoAGetToken_{consumes(iConfig.getParameter("digiErrorSoASrc"))}, errorPutToken_{produces>()}, tkErrorPutToken_{produces()}, diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitCUDA.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitCUDA.cc index 2f566ba4db1ec..10e4b678b515c 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitCUDA.cc +++ b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitCUDA.cc @@ -34,19 +34,16 @@ class SiPixelRecHitCUDA : public edm::global::EDProducer<> { void produce(edm::StreamID streamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const override; const edm::ESGetToken cpeToken_; + const edm::EDGetTokenT> tBeamSpot; + const edm::EDGetTokenT> token_; + const edm::EDGetTokenT> tokenDigi_; + const edm::EDPutTokenT> tokenHit_; - // The mess with inputs will be cleaned up when migrating to the new framework - edm::EDGetTokenT> tBeamSpot; - edm::EDGetTokenT> token_; - edm::EDGetTokenT> tokenDigi_; - - edm::EDPutTokenT> tokenHit_; - - pixelgpudetails::PixelRecHitGPUKernel gpuAlgo_; + const pixelgpudetails::PixelRecHitGPUKernel gpuAlgo_; }; SiPixelRecHitCUDA::SiPixelRecHitCUDA(const edm::ParameterSet& iConfig) - : cpeToken_(esConsumes(edm::ESInputTag("",iConfig.getParameter("CPE")))), + : cpeToken_(esConsumes(edm::ESInputTag("", iConfig.getParameter("CPE")))), tBeamSpot(consumes>(iConfig.getParameter("beamSpot"))), token_(consumes>(iConfig.getParameter("src"))), tokenDigi_(consumes>(iConfig.getParameter("src"))), @@ -62,11 +59,9 @@ void SiPixelRecHitCUDA::fillDescriptions(edm::ConfigurationDescriptions& descrip } void SiPixelRecHitCUDA::produce(edm::StreamID streamID, edm::Event& iEvent, const edm::EventSetup& es) const { - const PixelClusterParameterEstimator* cpe_ = dynamic_cast(&es.getData(cpeToken_)); - - PixelCPEFast const* fcpe = dynamic_cast(cpe_); - if (!fcpe) { - throw cms::Exception("Configuration") << "too bad, not a fast cpe gpu processing not possible...."; + PixelCPEFast const* fcpe = dynamic_cast(&es.getData(cpeToken_)); + if (not fcpe) { + throw cms::Exception("Configuration") << "SiPixelRecHitSoAFromLegacy can only use a CPE of type PixelCPEFast"; } edm::Handle> hclusters; diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc index 8b2ce100510ad..c7084f325d05b 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc +++ b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc @@ -42,18 +42,16 @@ class SiPixelRecHitSoAFromLegacy : public edm::global::EDProducer<> { const edm::ESGetToken geomToken_; const edm::ESGetToken cpeToken_; - - // The mess with inputs will be cleaned up when migrating to the new framework - edm::EDGetTokenT bsGetToken_; - edm::EDGetTokenT clusterToken_; // Legacy Clusters - edm::EDPutTokenT tokenHit_; - edm::EDPutTokenT tokenModuleStart_; - bool const convert2Legacy_; + const edm::EDGetTokenT bsGetToken_; + const edm::EDGetTokenT clusterToken_; // Legacy Clusters + const edm::EDPutTokenT tokenHit_; + const edm::EDPutTokenT tokenModuleStart_; + const bool convert2Legacy_; }; SiPixelRecHitSoAFromLegacy::SiPixelRecHitSoAFromLegacy(const edm::ParameterSet& iConfig) : geomToken_(esConsumes()), - cpeToken_(esConsumes(edm::ESInputTag("",iConfig.getParameter("CPE")))), + cpeToken_(esConsumes(edm::ESInputTag("", iConfig.getParameter("CPE")))), bsGetToken_{consumes(iConfig.getParameter("beamSpot"))}, clusterToken_{consumes(iConfig.getParameter("src"))}, tokenHit_{produces()}, @@ -75,11 +73,9 @@ void SiPixelRecHitSoAFromLegacy::fillDescriptions(edm::ConfigurationDescriptions void SiPixelRecHitSoAFromLegacy::produce(edm::StreamID streamID, edm::Event& iEvent, const edm::EventSetup& es) const { const TrackerGeometry* geom_ = &es.getData(geomToken_); - const PixelClusterParameterEstimator* cpe_ = dynamic_cast(&es.getData(cpeToken_)); - - PixelCPEFast const* fcpe = dynamic_cast(cpe_); - if (!fcpe) { - throw cms::Exception("Configuration") << "too bad, not a fast cpe gpu processing not possible...."; + PixelCPEFast const* fcpe = dynamic_cast(&es.getData(cpeToken_)); + if (not fcpe) { + throw cms::Exception("Configuration") << "SiPixelRecHitSoAFromLegacy can only use a CPE of type PixelCPEFast"; } auto const& cpeView = fcpe->getCPUProduct(); From b5d9f710d353f131ad05988b073138411209331b Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Thu, 26 Nov 2020 00:09:12 +0100 Subject: [PATCH 133/149] Apply code formatting --- .../SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc index 402ab2b675d31..df16276520523 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc @@ -86,8 +86,7 @@ SiPixelRawToClusterCUDA::SiPixelRawToClusterCUDA(const edm::ParameterSet& iConfi edm::ESInputTag("", iConfig.getParameter("CablingMapLabel")))), isRun2_(iConfig.getParameter("isRun2")), includeErrors_(iConfig.getParameter("IncludeErrors")), - useQuality_(iConfig.getParameter("UseQualityInfo")) -{ + useQuality_(iConfig.getParameter("UseQualityInfo")) { if (includeErrors_) { digiErrorPutToken_ = produces>(); } From dad913385cec60353a2392e2025ba3038feb1a31 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Thu, 26 Nov 2020 09:39:06 +0100 Subject: [PATCH 134/149] Update DQM clients for PixelVertexProducer pixel vertices (cms-patatrack#584) Update DQM clients for pixel vertices produced by PixelVertexProducer instead of PrimaryVertexProducer. Fix the DQM/Integration unit test failures in - runtest.sh beam_dqm_sourceclient-live_cfg.py - runtest.sh beampixel_dqm_sourceclient-live_cfg.py - runtest.sh fed_dqm_sourceclient-live_cfg.py --- DQM/Integration/python/clients/fed_dqm_sourceclient-live_cfg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DQM/Integration/python/clients/fed_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/fed_dqm_sourceclient-live_cfg.py index b1da8dc477c4a..b28f92d0f8d4e 100644 --- a/DQM/Integration/python/clients/fed_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/fed_dqm_sourceclient-live_cfg.py @@ -126,7 +126,7 @@ # Modules for the FED process.FEDModulesPath = cms.Path( process.l1tStage2Fed - + process.siPixelDigis.cpu + + process.siPixelDigis + process.SiPixelHLTSource + process.siStripFEDCheck + process.esRawToDigi From 843d35fd1f9682e400ee95c6c044e027dad6db90 Mon Sep 17 00:00:00 2001 From: Tatjana Date: Mon, 23 Nov 2020 14:25:07 +0100 Subject: [PATCH 135/149] Move SiPixelFedCablingMapGPU to CondFormats and CalibTracker (cms-patatrack#578) Move SiPixelFedCablingMapGPU code from RecoLocalTracker/SiPixelClusterizer/ to - CondFormats/SiPixelObjects/ - CalibTracker/SiPixelESProducers/ --- CalibTracker/SiPixelESProducers/BuildFile.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/CalibTracker/SiPixelESProducers/BuildFile.xml b/CalibTracker/SiPixelESProducers/BuildFile.xml index 4a43231fa11cb..ae04d111f1024 100644 --- a/CalibTracker/SiPixelESProducers/BuildFile.xml +++ b/CalibTracker/SiPixelESProducers/BuildFile.xml @@ -9,6 +9,7 @@ + From e61ac426d4778fff295137a2833d90684193c00e Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Wed, 25 Nov 2020 00:46:03 +0100 Subject: [PATCH 136/149] Rename SiPixelFedCablingMapGPU to SiPixelROCsStatusAndMapping (cms-patatrack#582) --- CalibTracker/SiPixelESProducers/BuildFile.xml | 12 +- .../SiPixelROCsStatusAndMappingWrapper.h | 56 ++++++ ...elROCsStatusAndMappingWrapperESProducer.cc | 67 +++++++ .../ES_SiPixelROCsStatusAndMappingWrapper.cc | 4 + .../src/SiPixelROCsStatusAndMappingWrapper.cc | 169 ++++++++++++++++++ .../interface/SiPixelROCsStatusAndMapping.h | 26 +++ .../plugins/SiPixelRawToClusterCUDA.cc | 25 +-- .../plugins/SiPixelRawToClusterGPUKernel.cu | 21 +-- .../plugins/SiPixelRawToClusterGPUKernel.h | 4 +- .../python/siPixelClustersPreSplitting_cff.py | 2 +- 10 files changed, 352 insertions(+), 34 deletions(-) create mode 100644 CalibTracker/SiPixelESProducers/interface/SiPixelROCsStatusAndMappingWrapper.h create mode 100644 CalibTracker/SiPixelESProducers/plugins/SiPixelROCsStatusAndMappingWrapperESProducer.cc create mode 100644 CalibTracker/SiPixelESProducers/src/ES_SiPixelROCsStatusAndMappingWrapper.cc create mode 100644 CalibTracker/SiPixelESProducers/src/SiPixelROCsStatusAndMappingWrapper.cc create mode 100644 CondFormats/SiPixelObjects/interface/SiPixelROCsStatusAndMapping.h diff --git a/CalibTracker/SiPixelESProducers/BuildFile.xml b/CalibTracker/SiPixelESProducers/BuildFile.xml index ae04d111f1024..6e64a5b4b94ee 100644 --- a/CalibTracker/SiPixelESProducers/BuildFile.xml +++ b/CalibTracker/SiPixelESProducers/BuildFile.xml @@ -1,14 +1,14 @@ - - - + + - - + + + - + diff --git a/CalibTracker/SiPixelESProducers/interface/SiPixelROCsStatusAndMappingWrapper.h b/CalibTracker/SiPixelESProducers/interface/SiPixelROCsStatusAndMappingWrapper.h new file mode 100644 index 0000000000000..d86aa93700297 --- /dev/null +++ b/CalibTracker/SiPixelESProducers/interface/SiPixelROCsStatusAndMappingWrapper.h @@ -0,0 +1,56 @@ +#ifndef CalibTracker_SiPixelESProducers_interface_SiPixelROCsStatusAndMappingWrapper_h +#define CalibTracker_SiPixelESProducers_interface_SiPixelROCsStatusAndMappingWrapper_h + +#include + +#include + +#include "CondFormats/SiPixelObjects/interface/SiPixelROCsStatusAndMapping.h" +#include "HeterogeneousCore/CUDACore/interface/ESProduct.h" +#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h" +#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" + +class SiPixelFedCablingMap; +class TrackerGeometry; +class SiPixelQuality; + +// TODO: since this has more information than just cabling map, maybe we should invent a better name? +class SiPixelROCsStatusAndMappingWrapper { +public: + SiPixelROCsStatusAndMappingWrapper(SiPixelFedCablingMap const &cablingMap, + TrackerGeometry const &trackerGeom, + SiPixelQuality const *badPixelInfo); + ~SiPixelROCsStatusAndMappingWrapper(); + + bool hasQuality() const { return hasQuality_; } + + // returns pointer to GPU memory + const SiPixelROCsStatusAndMapping *getGPUProductAsync(cudaStream_t cudaStream) const; + + // returns pointer to GPU memory + const unsigned char *getModToUnpAllAsync(cudaStream_t cudaStream) const; + cms::cuda::device::unique_ptr getModToUnpRegionalAsync(std::set const &modules, + cudaStream_t cudaStream) const; + +private: + const SiPixelFedCablingMap *cablingMap_; + std::vector> modToUnpDefault; + unsigned int size; + bool hasQuality_; + + SiPixelROCsStatusAndMapping *cablingMapHost = nullptr; // pointer to struct in CPU + + struct GPUData { + ~GPUData(); + SiPixelROCsStatusAndMapping *cablingMapDevice = nullptr; // pointer to struct in GPU + }; + cms::cuda::ESProduct gpuData_; + + struct ModulesToUnpack { + ~ModulesToUnpack(); + unsigned char *modToUnpDefault = nullptr; // pointer to GPU + }; + cms::cuda::ESProduct modToUnp_; +}; + +#endif // CalibTracker_SiPixelESProducers_interface_SiPixelROCsStatusAndMappingWrapper_h diff --git a/CalibTracker/SiPixelESProducers/plugins/SiPixelROCsStatusAndMappingWrapperESProducer.cc b/CalibTracker/SiPixelESProducers/plugins/SiPixelROCsStatusAndMappingWrapperESProducer.cc new file mode 100644 index 0000000000000..2c77560a5058e --- /dev/null +++ b/CalibTracker/SiPixelESProducers/plugins/SiPixelROCsStatusAndMappingWrapperESProducer.cc @@ -0,0 +1,67 @@ +#include + +#include "CalibTracker/SiPixelESProducers/interface/SiPixelROCsStatusAndMappingWrapper.h" +#include "CondFormats/DataRecord/interface/SiPixelFedCablingMapRcd.h" +#include "CondFormats/DataRecord/interface/SiPixelQualityRcd.h" +#include "FWCore/Framework/interface/ESProducer.h" +#include "FWCore/Framework/interface/ESTransientHandle.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/ModuleFactory.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" +#include "RecoTracker/Record/interface/CkfComponentsRecord.h" // TODO: eventually use something more limited + +class SiPixelROCsStatusAndMappingWrapperESProducer : public edm::ESProducer { +public: + explicit SiPixelROCsStatusAndMappingWrapperESProducer(const edm::ParameterSet& iConfig); + std::unique_ptr produce(const CkfComponentsRecord& iRecord); + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + +private: + edm::ESGetToken cablingMapToken_; + edm::ESGetToken qualityToken_; + edm::ESGetToken geometryToken_; + bool useQuality_; +}; + +SiPixelROCsStatusAndMappingWrapperESProducer::SiPixelROCsStatusAndMappingWrapperESProducer(const edm::ParameterSet& iConfig) + : useQuality_(iConfig.getParameter("UseQualityInfo")) { + auto const& component = iConfig.getParameter("ComponentName"); + auto cc = setWhatProduced(this, component); + cablingMapToken_ = cc.consumes(edm::ESInputTag{"", iConfig.getParameter("CablingMapLabel")}); + if (useQuality_) { + qualityToken_ = cc.consumes(); + } + geometryToken_ = cc.consumes(); +} + +void SiPixelROCsStatusAndMappingWrapperESProducer::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("ComponentName", ""); + desc.add("CablingMapLabel", "")->setComment("CablingMap label"); + desc.add("UseQualityInfo", false); + descriptions.addWithDefaultLabel(desc); +} + +std::unique_ptr SiPixelROCsStatusAndMappingWrapperESProducer::produce( + const CkfComponentsRecord& iRecord) { + auto cablingMap = iRecord.getTransientHandle(cablingMapToken_); + + const SiPixelQuality* quality = nullptr; + if (useQuality_) { + auto qualityInfo = iRecord.getTransientHandle(qualityToken_); + quality = qualityInfo.product(); + } + + auto geom = iRecord.getTransientHandle(geometryToken_); + + return std::make_unique(*cablingMap, *geom, quality); +} + +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/Utilities/interface/typelookup.h" +#include "FWCore/Framework/interface/eventsetuprecord_registration_macro.h" + +DEFINE_FWK_EVENTSETUP_MODULE(SiPixelROCsStatusAndMappingWrapperESProducer); diff --git a/CalibTracker/SiPixelESProducers/src/ES_SiPixelROCsStatusAndMappingWrapper.cc b/CalibTracker/SiPixelESProducers/src/ES_SiPixelROCsStatusAndMappingWrapper.cc new file mode 100644 index 0000000000000..45767102b5958 --- /dev/null +++ b/CalibTracker/SiPixelESProducers/src/ES_SiPixelROCsStatusAndMappingWrapper.cc @@ -0,0 +1,4 @@ +#include "CalibTracker/SiPixelESProducers/interface/SiPixelROCsStatusAndMappingWrapper.h" +#include "FWCore/Utilities/interface/typelookup.h" + +TYPELOOKUP_DATA_REG(SiPixelROCsStatusAndMappingWrapper); diff --git a/CalibTracker/SiPixelESProducers/src/SiPixelROCsStatusAndMappingWrapper.cc b/CalibTracker/SiPixelESProducers/src/SiPixelROCsStatusAndMappingWrapper.cc new file mode 100644 index 0000000000000..1657be1725842 --- /dev/null +++ b/CalibTracker/SiPixelESProducers/src/SiPixelROCsStatusAndMappingWrapper.cc @@ -0,0 +1,169 @@ +// C++ includes +#include +#include +#include +#include + +// CUDA includes +#include + +// CMSSW includes +#include "CalibTracker/SiPixelESProducers/interface/SiPixelROCsStatusAndMappingWrapper.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingMap.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingTree.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelQuality.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "Geometry/CommonDetUnit/interface/GeomDetType.h" +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" +#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" +#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" + +SiPixelROCsStatusAndMappingWrapper::SiPixelROCsStatusAndMappingWrapper(SiPixelFedCablingMap const& cablingMap, + TrackerGeometry const& trackerGeom, + SiPixelQuality const* badPixelInfo) + : cablingMap_(&cablingMap), modToUnpDefault(pixelgpudetails::MAX_SIZE), hasQuality_(badPixelInfo != nullptr) { + cudaCheck(cudaMallocHost(&cablingMapHost, sizeof(SiPixelROCsStatusAndMapping))); + + std::vector const& fedIds = cablingMap.fedIds(); + std::unique_ptr const& cabling = cablingMap.cablingTree(); + + unsigned int startFed = *(fedIds.begin()); + unsigned int endFed = *(fedIds.end() - 1); + + sipixelobjects::CablingPathToDetUnit path; + int index = 1; + + for (unsigned int fed = startFed; fed <= endFed; fed++) { + for (unsigned int link = 1; link <= pixelgpudetails::MAX_LINK; link++) { + for (unsigned int roc = 1; roc <= pixelgpudetails::MAX_ROC; roc++) { + path = {fed, link, roc}; + const sipixelobjects::PixelROC* pixelRoc = cabling->findItem(path); + cablingMapHost->fed[index] = fed; + cablingMapHost->link[index] = link; + cablingMapHost->roc[index] = roc; + if (pixelRoc != nullptr) { + cablingMapHost->RawId[index] = pixelRoc->rawId(); + cablingMapHost->rocInDet[index] = pixelRoc->idInDetUnit(); + modToUnpDefault[index] = false; + if (badPixelInfo != nullptr) + cablingMapHost->badRocs[index] = badPixelInfo->IsRocBad(pixelRoc->rawId(), pixelRoc->idInDetUnit()); + else + cablingMapHost->badRocs[index] = false; + } else { // store some dummy number + cablingMapHost->RawId[index] = 9999; + cablingMapHost->rocInDet[index] = 9999; + cablingMapHost->badRocs[index] = true; + modToUnpDefault[index] = true; + } + index++; + } + } + } // end of FED loop + + // Given FedId, Link and idinLnk; use the following formula + // to get the RawId and idinDU + // index = (FedID-1200) * MAX_LINK* MAX_ROC + (Link-1)* MAX_ROC + idinLnk; + // where, MAX_LINK = 48, MAX_ROC = 8 for Phase1 as mentioned Danek's email + // FedID varies between 1200 to 1338 (In total 108 FED's) + // Link varies between 1 to 48 + // idinLnk varies between 1 to 8 + + for (int i = 1; i < index; i++) { + if (cablingMapHost->RawId[i] == 9999) { + cablingMapHost->moduleId[i] = 9999; + } else { + /* + std::cout << cablingMapHost->RawId[i] << std::endl; + */ + auto gdet = trackerGeom.idToDetUnit(cablingMapHost->RawId[i]); + if (!gdet) { + LogDebug("SiPixelROCsStatusAndMapping") << " Not found: " << cablingMapHost->RawId[i] << std::endl; + continue; + } + cablingMapHost->moduleId[i] = gdet->index(); + } + LogDebug("SiPixelROCsStatusAndMapping") + << "----------------------------------------------------------------------------" << std::endl; + LogDebug("SiPixelROCsStatusAndMapping") << i << std::setw(20) << cablingMapHost->fed[i] << std::setw(20) + << cablingMapHost->link[i] << std::setw(20) << cablingMapHost->roc[i] + << std::endl; + LogDebug("SiPixelROCsStatusAndMapping") << i << std::setw(20) << cablingMapHost->RawId[i] << std::setw(20) + << cablingMapHost->rocInDet[i] << std::setw(20) << cablingMapHost->moduleId[i] + << std::endl; + LogDebug("SiPixelROCsStatusAndMapping") << i << std::setw(20) << (bool)cablingMapHost->badRocs[i] << std::setw(20) + << std::endl; + LogDebug("SiPixelROCsStatusAndMapping") + << "----------------------------------------------------------------------------" << std::endl; + } + + cablingMapHost->size = index - 1; +} + +SiPixelROCsStatusAndMappingWrapper::~SiPixelROCsStatusAndMappingWrapper() { cudaCheck(cudaFreeHost(cablingMapHost)); } + +const SiPixelROCsStatusAndMapping* SiPixelROCsStatusAndMappingWrapper::getGPUProductAsync(cudaStream_t cudaStream) const { + const auto& data = gpuData_.dataForCurrentDeviceAsync(cudaStream, [this](GPUData& data, cudaStream_t stream) { + // allocate + cudaCheck(cudaMalloc(&data.cablingMapDevice, sizeof(SiPixelROCsStatusAndMapping))); + + // transfer + cudaCheck(cudaMemcpyAsync( + data.cablingMapDevice, this->cablingMapHost, sizeof(SiPixelROCsStatusAndMapping), cudaMemcpyDefault, stream)); + }); + return data.cablingMapDevice; +} + +const unsigned char* SiPixelROCsStatusAndMappingWrapper::getModToUnpAllAsync(cudaStream_t cudaStream) const { + const auto& data = + modToUnp_.dataForCurrentDeviceAsync(cudaStream, [this](ModulesToUnpack& data, cudaStream_t stream) { + cudaCheck(cudaMalloc((void**)&data.modToUnpDefault, pixelgpudetails::MAX_SIZE_BYTE_BOOL)); + cudaCheck(cudaMemcpyAsync(data.modToUnpDefault, + this->modToUnpDefault.data(), + this->modToUnpDefault.size() * sizeof(unsigned char), + cudaMemcpyDefault, + stream)); + }); + return data.modToUnpDefault; +} + +cms::cuda::device::unique_ptr SiPixelROCsStatusAndMappingWrapper::getModToUnpRegionalAsync( + std::set const& modules, cudaStream_t cudaStream) const { + auto modToUnpDevice = cms::cuda::make_device_unique(pixelgpudetails::MAX_SIZE, cudaStream); + auto modToUnpHost = cms::cuda::make_host_unique(pixelgpudetails::MAX_SIZE, cudaStream); + + std::vector const& fedIds = cablingMap_->fedIds(); + std::unique_ptr const& cabling = cablingMap_->cablingTree(); + + unsigned int startFed = *(fedIds.begin()); + unsigned int endFed = *(fedIds.end() - 1); + + sipixelobjects::CablingPathToDetUnit path; + int index = 1; + + for (unsigned int fed = startFed; fed <= endFed; fed++) { + for (unsigned int link = 1; link <= pixelgpudetails::MAX_LINK; link++) { + for (unsigned int roc = 1; roc <= pixelgpudetails::MAX_ROC; roc++) { + path = {fed, link, roc}; + const sipixelobjects::PixelROC* pixelRoc = cabling->findItem(path); + if (pixelRoc != nullptr) { + modToUnpHost[index] = (not modules.empty()) and (modules.find(pixelRoc->rawId()) == modules.end()); + } else { // store some dummy number + modToUnpHost[index] = true; + } + index++; + } + } + } + + cudaCheck(cudaMemcpyAsync(modToUnpDevice.get(), + modToUnpHost.get(), + pixelgpudetails::MAX_SIZE * sizeof(unsigned char), + cudaMemcpyHostToDevice, + cudaStream)); + return modToUnpDevice; +} + +SiPixelROCsStatusAndMappingWrapper::GPUData::~GPUData() { cudaCheck(cudaFree(cablingMapDevice)); } + +SiPixelROCsStatusAndMappingWrapper::ModulesToUnpack::~ModulesToUnpack() { cudaCheck(cudaFree(modToUnpDefault)); } diff --git a/CondFormats/SiPixelObjects/interface/SiPixelROCsStatusAndMapping.h b/CondFormats/SiPixelObjects/interface/SiPixelROCsStatusAndMapping.h new file mode 100644 index 0000000000000..df5b8b24b70dc --- /dev/null +++ b/CondFormats/SiPixelObjects/interface/SiPixelROCsStatusAndMapping.h @@ -0,0 +1,26 @@ +#ifndef CondFormats_SiPixelObjects_interface_SiPixelROCsStatusAndMapping_h +#define CondFormats_SiPixelObjects_interface_SiPixelROCsStatusAndMapping_h + +namespace pixelgpudetails { + // Maximum fed for phase1 is 150 but not all of them are filled + // Update the number FED based on maximum fed found in the cabling map + constexpr unsigned int MAX_FED = 150; + constexpr unsigned int MAX_LINK = 48; // maximum links/channels for Phase 1 + constexpr unsigned int MAX_ROC = 8; + constexpr unsigned int MAX_SIZE = MAX_FED * MAX_LINK * MAX_ROC; + constexpr unsigned int MAX_SIZE_BYTE_BOOL = MAX_SIZE * sizeof(unsigned char); +} // namespace pixelgpudetails + +// TODO: since this has more information than just cabling map, maybe we should invent a better name? +struct SiPixelROCsStatusAndMapping { + alignas(128) unsigned int fed[pixelgpudetails::MAX_SIZE]; + alignas(128) unsigned int link[pixelgpudetails::MAX_SIZE]; + alignas(128) unsigned int roc[pixelgpudetails::MAX_SIZE]; + alignas(128) unsigned int RawId[pixelgpudetails::MAX_SIZE]; + alignas(128) unsigned int rocInDet[pixelgpudetails::MAX_SIZE]; + alignas(128) unsigned int moduleId[pixelgpudetails::MAX_SIZE]; + alignas(128) unsigned char badRocs[pixelgpudetails::MAX_SIZE]; + alignas(128) unsigned int size = 0; +}; + +#endif // CondFormats_SiPixelObjects_interface_SiPixelROCsStatusAndMapping_h diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc index df16276520523..5e97610d92286 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc @@ -1,8 +1,15 @@ +// C++ includes +#include +#include +#include + +// CMSSW includes #include "CUDADataFormats/Common/interface/Product.h" #include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h" -#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" #include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h" +#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" #include "CalibTracker/Records/interface/SiPixelGainCalibrationForHLTGPURcd.h" +#include "CalibTracker/SiPixelESProducers/interface/SiPixelROCsStatusAndMappingWrapper.h" #include "CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h" #include "CondFormats/DataRecord/interface/SiPixelFedCablingMapRcd.h" #include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingMap.h" @@ -16,26 +23,22 @@ #include "FWCore/Framework/interface/ESHandle.h" #include "FWCore/Framework/interface/ESTransientHandle.h" #include "FWCore/Framework/interface/ESWatcher.h" -#include "FWCore/Framework/interface/EventSetup.h" #include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" #include "FWCore/Framework/interface/MakerMacros.h" #include "FWCore/Framework/interface/stream/EDProducer.h" #include "FWCore/MessageLogger/interface/MessageLogger.h" #include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" -#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" #include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" #include "FWCore/ServiceRegistry/interface/Service.h" #include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" #include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" -#include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelFedCablingMapGPUWrapper.h" #include "RecoTracker/Record/interface/CkfComponentsRecord.h" +// local includes #include "SiPixelRawToClusterGPUKernel.h" -#include -#include -#include - class SiPixelRawToClusterCUDA : public edm::stream::EDProducer { public: explicit SiPixelRawToClusterCUDA(const edm::ParameterSet& iConfig); @@ -58,7 +61,7 @@ class SiPixelRawToClusterCUDA : public edm::stream::EDProducer recordWatcher_; - edm::ESGetToken gpuMapToken_; + edm::ESGetToken gpuMapToken_; edm::ESGetToken gainsToken_; edm::ESGetToken cablingMapToken_; @@ -80,7 +83,7 @@ SiPixelRawToClusterCUDA::SiPixelRawToClusterCUDA(const edm::ParameterSet& iConfi : rawGetToken_(consumes(iConfig.getParameter("InputLabel"))), digiPutToken_(produces>()), clusterPutToken_(produces>()), - gpuMapToken_(esConsumes()), + gpuMapToken_(esConsumes()), gainsToken_(esConsumes()), cablingMapToken_(esConsumes( edm::ESInputTag("", iConfig.getParameter("CablingMapLabel")))), @@ -130,7 +133,7 @@ void SiPixelRawToClusterCUDA::acquire(const edm::Event& iEvent, if (hgpuMap->hasQuality() != useQuality_) { throw cms::Exception("LogicError") << "UseQuality of the module (" << useQuality_ - << ") differs the one from SiPixelFedCablingMapGPUWrapper. Please fix your configuration."; + << ") differs the one from SiPixelROCsStatusAndMappingWrapper. Please fix your configuration."; } // get the GPU product already here so that the async transfer can begin const auto* gpuMap = hgpuMap->getGPUProductAsync(ctx.stream()); diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu index f14808dda1e2b..04072943bf0f8 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu @@ -8,29 +8,22 @@ // C++ includes #include -#include #include #include +#include #include #include #include -#include // CUDA includes -#include #include -#include -#include -#include -#include -#include // CMSSW includes #include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelROCsStatusAndMapping.h" #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" #include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" #include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" -#include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelFedCablingMapGPU.h" #include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h" #include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h" #include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h" @@ -68,7 +61,7 @@ namespace pixelgpudetails { __device__ bool isBarrel(uint32_t rawId) { return (1 == ((rawId >> 25) & 0x7)); } - __device__ pixelgpudetails::DetIdGPU getRawId(const SiPixelFedCablingMapGPU *cablingMap, + __device__ pixelgpudetails::DetIdGPU getRawId(const SiPixelROCsStatusAndMapping *cablingMap, uint8_t fed, uint32_t link, uint32_t roc) { @@ -198,7 +191,7 @@ namespace pixelgpudetails { __device__ bool dcolIsValid(uint32_t dcol, uint32_t pxid) { return ((dcol < 26) & (2 <= pxid) & (pxid < 162)); } __device__ uint8_t checkROC( - uint32_t errorWord, uint8_t fedId, uint32_t link, const SiPixelFedCablingMapGPU *cablingMap, bool debug = false) { + uint32_t errorWord, uint8_t fedId, uint32_t link, const SiPixelROCsStatusAndMapping *cablingMap, bool debug = false) { uint8_t errorType = (errorWord >> pixelgpudetails::ROC_shift) & pixelgpudetails::ERROR_mask; if (errorType < 25) return 0; @@ -276,7 +269,7 @@ namespace pixelgpudetails { __device__ uint32_t getErrRawID(uint8_t fedId, uint32_t errWord, uint32_t errorType, - const SiPixelFedCablingMapGPU *cablingMap, + const SiPixelROCsStatusAndMapping *cablingMap, bool debug = false) { uint32_t rID = 0xffffffff; @@ -351,7 +344,7 @@ namespace pixelgpudetails { } // Kernel to perform Raw to Digi conversion - __global__ void RawToDigi_kernel(const SiPixelFedCablingMapGPU *cablingMap, + __global__ void RawToDigi_kernel(const SiPixelROCsStatusAndMapping *cablingMap, const unsigned char *modToUnp, const uint32_t wordCounter, const uint32_t *word, @@ -524,7 +517,7 @@ namespace pixelgpudetails { // Interface to outside void SiPixelRawToClusterGPUKernel::makeClustersAsync(bool isRun2, - const SiPixelFedCablingMapGPU *cablingMap, + const SiPixelROCsStatusAndMapping *cablingMap, const unsigned char *modToUnp, const SiPixelGainForHLTonGPU *gains, const WordFedAppender &wordFed, diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h index d214e7784af48..2f52316aa2e78 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h @@ -13,7 +13,7 @@ #include "HeterogeneousCore/CUDAUtilities/interface/host_noncached_unique_ptr.h" #include "DataFormats/SiPixelDigi/interface/PixelErrors.h" -struct SiPixelFedCablingMapGPU; +struct SiPixelROCsStatusAndMapping; class SiPixelGainForHLTonGPU; namespace pixelgpudetails { @@ -169,7 +169,7 @@ namespace pixelgpudetails { SiPixelRawToClusterGPUKernel& operator=(SiPixelRawToClusterGPUKernel&&) = delete; void makeClustersAsync(bool isRun2, - const SiPixelFedCablingMapGPU* cablingMap, + const SiPixelROCsStatusAndMapping* cablingMap, const unsigned char* modToUnp, const SiPixelGainForHLTonGPU* gains, const WordFedAppender& wordFed, diff --git a/RecoLocalTracker/SiPixelClusterizer/python/siPixelClustersPreSplitting_cff.py b/RecoLocalTracker/SiPixelClusterizer/python/siPixelClustersPreSplitting_cff.py index 3f8cf314ec2e2..6839e4582bb2b 100644 --- a/RecoLocalTracker/SiPixelClusterizer/python/siPixelClustersPreSplitting_cff.py +++ b/RecoLocalTracker/SiPixelClusterizer/python/siPixelClustersPreSplitting_cff.py @@ -3,7 +3,7 @@ from RecoLocalTracker.SiPixelClusterizer.SiPixelClusterizerPreSplitting_cfi import siPixelClustersPreSplitting from RecoLocalTracker.SiPixelClusterizer.siPixelRawToClusterCUDA_cfi import siPixelRawToClusterCUDA as _siPixelRawToClusterCUDA from RecoLocalTracker.SiPixelClusterizer.siPixelDigisClustersFromSoA_cfi import siPixelDigisClustersFromSoA as _siPixelDigisClustersFromSoA -from RecoLocalTracker.SiPixelClusterizer.siPixelFedCablingMapGPUWrapper_cfi import * +from CalibTracker.SiPixelESProducers.siPixelROCsStatusAndMappingWrapperESProducer_cfi import * from CalibTracker.SiPixelESProducers.siPixelGainCalibrationForHLTGPU_cfi import * siPixelClustersPreSplittingTask = cms.Task(siPixelClustersPreSplitting) From d3b0b58f07e4a1b44b81acabfc5c09ec2c927905 Mon Sep 17 00:00:00 2001 From: Matti Kortelainen Date: Fri, 27 Nov 2020 08:51:39 -0600 Subject: [PATCH 137/149] Address pixel local reco PR review comments (cms-patatrack#575) Remove SiPixelDigiHeterogeneousConverter as obsolete, should have been removed as part of cms-patatrack#100. Address review comments for SiPixelClustersCUDA: - remove commented out default constructor and private: from DeviceConstView; this is perhaps the best compromise between non-default constructors not being preferred for device allocations, and the use case in SiPixelRecHitSoAFromLegacy (for the expected life time of this class) - remove const getters with c_ prefix - improve constructor parameter name - use more initializer list - initialize nClusters_h Address review comments for SiPixelDigiErrorsCUDA: - use type alias - remove const getters with c_ prefix and other unnecessary methods - use more initializer list Address review comments for SiPixelDigisCUDA: - remove const getters with c_ prefix and other unnecessary methods - remove commented out default constructor and private: from DeviceConstView - add comments for remaining SiPixelDigisCUDA member arrays Move PixelErrorsCompact and SiPixelDigiErrorsSoa to DataFormats/SiPixelRawData, rename classes Address review comments for SiPixelErrorsSoA - remove redundant assert - move constructor inline Address review comments for SiPixelDigisSoA - remove redundant assert - add comments Enable if constexpr also for CUDA in TrackingRecHit2DHeterogeneous Move dictionary of HostProduct to CUDADataFormats/Common --- CUDADataFormats/Common/BuildFile.xml | 11 ++++--- CUDADataFormats/Common/src/classes.h | 7 ++++ CUDADataFormats/Common/src/classes_def.xml | 4 +++ .../interface/SiPixelClustersCUDA.h | 14 ++------ .../SiPixelCluster/src/SiPixelClustersCUDA.cc | 11 +++---- .../interface/SiPixelDigiErrorsCUDA.h | 25 ++++++++------- .../SiPixelDigi/interface/SiPixelDigisCUDA.h | 17 ++-------- .../SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc | 13 ++++---- .../interface/TrackingRecHit2DHeterogeneous.h | 6 +--- CUDADataFormats/TrackingRecHit/src/classes.h | 1 - .../TrackingRecHit/src/classes_def.xml | 2 -- .../SiPixelDigi/interface/SiPixelDigisSoA.h | 14 +++++--- .../SiPixelDigi/src/SiPixelDigisSoA.cc | 4 +-- DataFormats/SiPixelDigi/src/classes.h | 1 - DataFormats/SiPixelDigi/src/classes_def.xml | 3 -- .../interface/SiPixelErrorCompact.h | 13 ++++++++ .../interface/SiPixelErrorsSoA.h | 30 +++++++++++++++++ .../interface/SiPixelFormatterErrors.h | 12 +++++++ DataFormats/SiPixelRawData/src/classes.h | 1 + .../SiPixelRawData/src/classes_def.xml | 3 ++ .../plugins/SiPixelDigiErrorsFromSoA.cc | 8 ++--- .../plugins/SiPixelDigiErrorsSoAFromCUDA.cc | 14 ++++---- .../plugins/SiPixelRawToClusterGPUKernel.cu | 32 +++++++++---------- .../plugins/SiPixelRawToClusterGPUKernel.h | 5 +-- 24 files changed, 146 insertions(+), 105 deletions(-) create mode 100644 CUDADataFormats/Common/src/classes.h create mode 100644 CUDADataFormats/Common/src/classes_def.xml create mode 100644 DataFormats/SiPixelRawData/interface/SiPixelErrorCompact.h create mode 100644 DataFormats/SiPixelRawData/interface/SiPixelErrorsSoA.h create mode 100644 DataFormats/SiPixelRawData/interface/SiPixelFormatterErrors.h diff --git a/CUDADataFormats/Common/BuildFile.xml b/CUDADataFormats/Common/BuildFile.xml index e7a5ba74d80be..c524cada33060 100644 --- a/CUDADataFormats/Common/BuildFile.xml +++ b/CUDADataFormats/Common/BuildFile.xml @@ -1,7 +1,8 @@ - - - - + + + + + - + diff --git a/CUDADataFormats/Common/src/classes.h b/CUDADataFormats/Common/src/classes.h new file mode 100644 index 0000000000000..239e071d513a2 --- /dev/null +++ b/CUDADataFormats/Common/src/classes.h @@ -0,0 +1,7 @@ +#ifndef CUDADataFormats_Common_src_classes_h +#define CUDADataFormats_Common_src_classes_h + +#include "CUDADataFormats/Common/interface/HostProduct.h" +#include "DataFormats/Common/interface/Wrapper.h" + +#endif // CUDADataFormats_Common_src_classes_h diff --git a/CUDADataFormats/Common/src/classes_def.xml b/CUDADataFormats/Common/src/classes_def.xml new file mode 100644 index 0000000000000..024d927595914 --- /dev/null +++ b/CUDADataFormats/Common/src/classes_def.xml @@ -0,0 +1,4 @@ + + + + diff --git a/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h b/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h index dbfb5ff5e1761..d5d009aaffeb5 100644 --- a/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h +++ b/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h @@ -10,7 +10,7 @@ class SiPixelClustersCUDA { public: SiPixelClustersCUDA() = default; - explicit SiPixelClustersCUDA(size_t maxClusters, cudaStream_t stream); + explicit SiPixelClustersCUDA(size_t maxModules, cudaStream_t stream); ~SiPixelClustersCUDA() = default; SiPixelClustersCUDA(const SiPixelClustersCUDA &) = delete; @@ -32,23 +32,13 @@ class SiPixelClustersCUDA { uint32_t const *moduleId() const { return moduleId_d.get(); } uint32_t const *clusModuleStart() const { return clusModuleStart_d.get(); } - uint32_t const *c_moduleStart() const { return moduleStart_d.get(); } - uint32_t const *c_clusInModule() const { return clusInModule_d.get(); } - uint32_t const *c_moduleId() const { return moduleId_d.get(); } - uint32_t const *c_clusModuleStart() const { return clusModuleStart_d.get(); } - class DeviceConstView { public: - // DeviceConstView() = default; - __device__ __forceinline__ uint32_t moduleStart(int i) const { return __ldg(moduleStart_ + i); } __device__ __forceinline__ uint32_t clusInModule(int i) const { return __ldg(clusInModule_ + i); } __device__ __forceinline__ uint32_t moduleId(int i) const { return __ldg(moduleId_ + i); } __device__ __forceinline__ uint32_t clusModuleStart(int i) const { return __ldg(clusModuleStart_ + i); } - friend SiPixelClustersCUDA; - - // private: uint32_t const *moduleStart_; uint32_t const *clusInModule_; uint32_t const *moduleId_; @@ -67,7 +57,7 @@ class SiPixelClustersCUDA { cms::cuda::device::unique_ptr view_d; // "me" pointer - uint32_t nClusters_h; + uint32_t nClusters_h = 0; }; #endif diff --git a/CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc b/CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc index 7bef9d0d8a52f..5e53f49570bb4 100644 --- a/CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc +++ b/CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc @@ -4,12 +4,11 @@ #include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" #include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" -SiPixelClustersCUDA::SiPixelClustersCUDA(size_t maxClusters, cudaStream_t stream) { - moduleStart_d = cms::cuda::make_device_unique(maxClusters + 1, stream); - clusInModule_d = cms::cuda::make_device_unique(maxClusters, stream); - moduleId_d = cms::cuda::make_device_unique(maxClusters, stream); - clusModuleStart_d = cms::cuda::make_device_unique(maxClusters + 1, stream); - +SiPixelClustersCUDA::SiPixelClustersCUDA(size_t maxModules, cudaStream_t stream) + : moduleStart_d(cms::cuda::make_device_unique(maxModules + 1, stream)), + clusInModule_d(cms::cuda::make_device_unique(maxModules, stream)), + moduleId_d(cms::cuda::make_device_unique(maxModules, stream)), + clusModuleStart_d(cms::cuda::make_device_unique(maxModules + 1, stream)) { auto view = cms::cuda::make_host_unique(stream); view->moduleStart_ = moduleStart_d.get(); view->clusInModule_ = clusInModule_d.get(); diff --git a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h index aa06e8dbbd57d..85e8883fa1bd4 100644 --- a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h +++ b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h @@ -1,7 +1,8 @@ #ifndef CUDADataFormats_SiPixelDigi_interface_SiPixelDigiErrorsCUDA_h #define CUDADataFormats_SiPixelDigi_interface_SiPixelDigiErrorsCUDA_h -#include "DataFormats/SiPixelDigi/interface/PixelErrors.h" +#include "DataFormats/SiPixelRawData/interface/SiPixelErrorCompact.h" +#include "DataFormats/SiPixelRawData/interface/SiPixelFormatterErrors.h" #include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" #include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" #include "HeterogeneousCore/CUDAUtilities/interface/SimpleVector.h" @@ -10,8 +11,10 @@ class SiPixelDigiErrorsCUDA { public: + using SiPixelErrorCompactVector = cms::cuda::SimpleVector; + SiPixelDigiErrorsCUDA() = default; - explicit SiPixelDigiErrorsCUDA(size_t maxFedWords, PixelFormatterErrors errors, cudaStream_t stream); + explicit SiPixelDigiErrorsCUDA(size_t maxFedWords, SiPixelFormatterErrors errors, cudaStream_t stream); ~SiPixelDigiErrorsCUDA() = default; SiPixelDigiErrorsCUDA(const SiPixelDigiErrorsCUDA&) = delete; @@ -19,23 +22,21 @@ class SiPixelDigiErrorsCUDA { SiPixelDigiErrorsCUDA(SiPixelDigiErrorsCUDA&&) = default; SiPixelDigiErrorsCUDA& operator=(SiPixelDigiErrorsCUDA&&) = default; - const PixelFormatterErrors& formatterErrors() const { return formatterErrors_h; } + const SiPixelFormatterErrors& formatterErrors() const { return formatterErrors_h; } - cms::cuda::SimpleVector* error() { return error_d.get(); } - cms::cuda::SimpleVector const* error() const { return error_d.get(); } - cms::cuda::SimpleVector const* c_error() const { return error_d.get(); } + SiPixelErrorCompactVector* error() { return error_d.get(); } + SiPixelErrorCompactVector const* error() const { return error_d.get(); } - using HostDataError = - std::pair, cms::cuda::host::unique_ptr>; + using HostDataError = std::pair>; HostDataError dataErrorToHostAsync(cudaStream_t stream) const; void copyErrorToHostAsync(cudaStream_t stream); private: - cms::cuda::device::unique_ptr data_d; - cms::cuda::device::unique_ptr> error_d; - cms::cuda::host::unique_ptr> error_h; - PixelFormatterErrors formatterErrors_h; + cms::cuda::device::unique_ptr data_d; + cms::cuda::device::unique_ptr error_d; + cms::cuda::host::unique_ptr error_h; + SiPixelFormatterErrors formatterErrors_h; }; #endif diff --git a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h index 04207f3e0b385..2dc1f628bf426 100644 --- a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h +++ b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h @@ -42,14 +42,6 @@ class SiPixelDigisCUDA { uint32_t const *pdigi() const { return pdigi_d.get(); } uint32_t const *rawIdArr() const { return rawIdArr_d.get(); } - uint16_t const *c_xx() const { return xx_d.get(); } - uint16_t const *c_yy() const { return yy_d.get(); } - uint16_t const *c_adc() const { return adc_d.get(); } - uint16_t const *c_moduleInd() const { return moduleInd_d.get(); } - int32_t const *c_clus() const { return clus_d.get(); } - uint32_t const *c_pdigi() const { return pdigi_d.get(); } - uint32_t const *c_rawIdArr() const { return rawIdArr_d.get(); } - cms::cuda::host::unique_ptr adcToHostAsync(cudaStream_t stream) const; cms::cuda::host::unique_ptr clusToHostAsync(cudaStream_t stream) const; cms::cuda::host::unique_ptr pdigiToHostAsync(cudaStream_t stream) const; @@ -57,17 +49,12 @@ class SiPixelDigisCUDA { class DeviceConstView { public: - // DeviceConstView() = default; - __device__ __forceinline__ uint16_t xx(int i) const { return __ldg(xx_ + i); } __device__ __forceinline__ uint16_t yy(int i) const { return __ldg(yy_ + i); } __device__ __forceinline__ uint16_t adc(int i) const { return __ldg(adc_ + i); } __device__ __forceinline__ uint16_t moduleInd(int i) const { return __ldg(moduleInd_ + i); } __device__ __forceinline__ int32_t clus(int i) const { return __ldg(clus_ + i); } - friend class SiPixelDigisCUDA; - - // private: uint16_t const *xx_; uint16_t const *yy_; uint16_t const *adc_; @@ -88,8 +75,8 @@ class SiPixelDigisCUDA { // These are for CPU output; should we (eventually) place them to a // separate product? - cms::cuda::device::unique_ptr pdigi_d; - cms::cuda::device::unique_ptr rawIdArr_d; + cms::cuda::device::unique_ptr pdigi_d; // packed digi (row, col, adc) of each pixel + cms::cuda::device::unique_ptr rawIdArr_d; // DetId of each pixel uint32_t nModules_h = 0; uint32_t nDigis_h = 0; diff --git a/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc b/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc index 075d408a6f6fc..70bf2e8aa19f5 100644 --- a/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc +++ b/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc @@ -7,14 +7,13 @@ #include -SiPixelDigiErrorsCUDA::SiPixelDigiErrorsCUDA(size_t maxFedWords, PixelFormatterErrors errors, cudaStream_t stream) - : formatterErrors_h(std::move(errors)) { - error_d = cms::cuda::make_device_unique>(stream); - data_d = cms::cuda::make_device_unique(maxFedWords, stream); - +SiPixelDigiErrorsCUDA::SiPixelDigiErrorsCUDA(size_t maxFedWords, SiPixelFormatterErrors errors, cudaStream_t stream) + : data_d(cms::cuda::make_device_unique(maxFedWords, stream)), + error_d(cms::cuda::make_device_unique(stream)), + error_h(cms::cuda::make_host_unique(stream)), + formatterErrors_h(std::move(errors)) { cms::cuda::memsetAsync(data_d, 0x00, maxFedWords, stream); - error_h = cms::cuda::make_host_unique>(stream); cms::cuda::make_SimpleVector(error_h.get(), maxFedWords, data_d.get()); assert(error_h->empty()); assert(error_h->capacity() == static_cast(maxFedWords)); @@ -30,7 +29,7 @@ SiPixelDigiErrorsCUDA::HostDataError SiPixelDigiErrorsCUDA::dataErrorToHostAsync // On one hand size() could be sufficient. On the other hand, if // someone copies the SimpleVector<>, (s)he might expect the data // buffer to actually have space for capacity() elements. - auto data = cms::cuda::make_host_unique(error_h->capacity(), stream); + auto data = cms::cuda::make_host_unique(error_h->capacity(), stream); // but transfer only the required amount if (not error_h->empty()) { diff --git a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h index b0aa79cfe20b6..73a6daaa4e387 100644 --- a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h +++ b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h @@ -84,11 +84,7 @@ TrackingRecHit2DHeterogeneous::TrackingRecHit2DHeterogeneous(uint32_t nH // if empy do not bother if (0 == nHits) { - if -#ifndef __CUDACC__ - constexpr -#endif - (std::is_same::value) { + if constexpr (std::is_same::value) { cms::cuda::copyAsync(m_view, view, stream); } else { m_view.reset(view.release()); // NOLINT: std::move() breaks CUDA version diff --git a/CUDADataFormats/TrackingRecHit/src/classes.h b/CUDADataFormats/TrackingRecHit/src/classes.h index d80226ec7a14b..3d40821493c5b 100644 --- a/CUDADataFormats/TrackingRecHit/src/classes.h +++ b/CUDADataFormats/TrackingRecHit/src/classes.h @@ -2,7 +2,6 @@ #define CUDADataFormats_SiPixelCluster_src_classes_h #include "CUDADataFormats/Common/interface/Product.h" -#include "CUDADataFormats/Common/interface/HostProduct.h" #include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DCUDA.h" #include "DataFormats/Common/interface/Wrapper.h" diff --git a/CUDADataFormats/TrackingRecHit/src/classes_def.xml b/CUDADataFormats/TrackingRecHit/src/classes_def.xml index 02b0eb37d157b..7e1919de510b3 100644 --- a/CUDADataFormats/TrackingRecHit/src/classes_def.xml +++ b/CUDADataFormats/TrackingRecHit/src/classes_def.xml @@ -5,6 +5,4 @@ - - diff --git a/DataFormats/SiPixelDigi/interface/SiPixelDigisSoA.h b/DataFormats/SiPixelDigi/interface/SiPixelDigisSoA.h index 50e863f03ff02..6c016155b1cb0 100644 --- a/DataFormats/SiPixelDigi/interface/SiPixelDigisSoA.h +++ b/DataFormats/SiPixelDigi/interface/SiPixelDigisSoA.h @@ -4,6 +4,12 @@ #include #include +// The main purpose of this class is to deliver digi and cluster data +// from an EDProducer that transfers the data from GPU to host to an +// EDProducer that converts the SoA to legacy data products. The class +// is independent of any GPU technology, and in prunciple could be +// produced by host code, and be used for other purposes than +// conversion-to-legacy as well. class SiPixelDigisSoA { public: SiPixelDigisSoA() = default; @@ -24,10 +30,10 @@ class SiPixelDigisSoA { const std::vector& clusVector() const { return clus_; } private: - std::vector pdigi_; - std::vector rawIdArr_; - std::vector adc_; - std::vector clus_; + std::vector pdigi_; // packed digi (row, col, adc) of each pixel + std::vector rawIdArr_; // DetId of each pixel + std::vector adc_; // ADC of each pixel + std::vector clus_; // cluster id of each pixel }; #endif diff --git a/DataFormats/SiPixelDigi/src/SiPixelDigisSoA.cc b/DataFormats/SiPixelDigi/src/SiPixelDigisSoA.cc index 992c98f450616..b95c004a50a25 100644 --- a/DataFormats/SiPixelDigi/src/SiPixelDigisSoA.cc +++ b/DataFormats/SiPixelDigi/src/SiPixelDigisSoA.cc @@ -7,6 +7,4 @@ SiPixelDigisSoA::SiPixelDigisSoA( : pdigi_(pdigi, pdigi + nDigis), rawIdArr_(rawIdArr, rawIdArr + nDigis), adc_(adc, adc + nDigis), - clus_(clus, clus + nDigis) { - assert(pdigi_.size() == nDigis); -} + clus_(clus, clus + nDigis) {} diff --git a/DataFormats/SiPixelDigi/src/classes.h b/DataFormats/SiPixelDigi/src/classes.h index ba68d3289e8cd..1360ee6e469d9 100644 --- a/DataFormats/SiPixelDigi/src/classes.h +++ b/DataFormats/SiPixelDigi/src/classes.h @@ -6,7 +6,6 @@ #include "DataFormats/SiPixelDigi/interface/SiPixelCalibDigi.h" #include "DataFormats/SiPixelDigi/interface/SiPixelCalibDigiError.h" #include "DataFormats/SiPixelDigi/interface/SiPixelDigisSoA.h" -#include "DataFormats/SiPixelDigi/interface/SiPixelDigiErrorsSoA.h" #include "DataFormats/Common/interface/Wrapper.h" #include "DataFormats/Common/interface/DetSetVector.h" #include "DataFormats/Common/interface/DetSetVectorNew.h" diff --git a/DataFormats/SiPixelDigi/src/classes_def.xml b/DataFormats/SiPixelDigi/src/classes_def.xml index 8cabbd3f3f06e..e6bc08de161fa 100755 --- a/DataFormats/SiPixelDigi/src/classes_def.xml +++ b/DataFormats/SiPixelDigi/src/classes_def.xml @@ -52,7 +52,4 @@ - - - diff --git a/DataFormats/SiPixelRawData/interface/SiPixelErrorCompact.h b/DataFormats/SiPixelRawData/interface/SiPixelErrorCompact.h new file mode 100644 index 0000000000000..0b1a80868594f --- /dev/null +++ b/DataFormats/SiPixelRawData/interface/SiPixelErrorCompact.h @@ -0,0 +1,13 @@ +#ifndef DataFormats_SiPixelRawData_interface_SiPixelErrorCompact_h +#define DataFormats_SiPixelRawData_interface_SiPixelErrorCompact_h + +#include + +struct SiPixelErrorCompact { + uint32_t rawId; + uint32_t word; + uint8_t errorType; + uint8_t fedId; +}; + +#endif // DataFormats_SiPixelRawData_interface_SiPixelErrorCompact_h diff --git a/DataFormats/SiPixelRawData/interface/SiPixelErrorsSoA.h b/DataFormats/SiPixelRawData/interface/SiPixelErrorsSoA.h new file mode 100644 index 0000000000000..c72c19ce5fda4 --- /dev/null +++ b/DataFormats/SiPixelRawData/interface/SiPixelErrorsSoA.h @@ -0,0 +1,30 @@ +#ifndef DataFormats_SiPixelDigi_interface_SiPixelErrorsSoA_h +#define DataFormats_SiPixelDigi_interface_SiPixelErrorsSoA_h + +#include "DataFormats/SiPixelRawData/interface/SiPixelErrorCompact.h" +#include "DataFormats/SiPixelRawData/interface/SiPixelFormatterErrors.h" + +#include +#include + +class SiPixelErrorsSoA { +public: + SiPixelErrorsSoA() = default; + explicit SiPixelErrorsSoA(size_t nErrors, const SiPixelErrorCompact *error, const SiPixelFormatterErrors *err) + : error_(error, error + nErrors), formatterErrors_(err) {} + ~SiPixelErrorsSoA() = default; + + auto size() const { return error_.size(); } + + const SiPixelFormatterErrors *formatterErrors() const { return formatterErrors_; } + + const SiPixelErrorCompact &error(size_t i) const { return error_[i]; } + + const std::vector &errorVector() const { return error_; } + +private: + std::vector error_; + const SiPixelFormatterErrors *formatterErrors_ = nullptr; +}; + +#endif diff --git a/DataFormats/SiPixelRawData/interface/SiPixelFormatterErrors.h b/DataFormats/SiPixelRawData/interface/SiPixelFormatterErrors.h new file mode 100644 index 0000000000000..9d372737300d4 --- /dev/null +++ b/DataFormats/SiPixelRawData/interface/SiPixelFormatterErrors.h @@ -0,0 +1,12 @@ +#ifndef DataFormats_SiPixelRawData_interface_SiPixelFormatterErrors_h +#define DataFormats_SiPixelRawData_interface_SiPixelFormatterErrors_h + +#include +#include + +#include "DataFormats/SiPixelRawData/interface/SiPixelRawDataError.h" +#include "FWCore/Utilities/interface/typedefs.h" + +using SiPixelFormatterErrors = std::map>; + +#endif // DataFormats_SiPixelRawData_interface_SiPixelFormatterErrors_h diff --git a/DataFormats/SiPixelRawData/src/classes.h b/DataFormats/SiPixelRawData/src/classes.h index 73768cc373013..7a07e9f35f388 100644 --- a/DataFormats/SiPixelRawData/src/classes.h +++ b/DataFormats/SiPixelRawData/src/classes.h @@ -2,6 +2,7 @@ #define SIPIXELRAWDATA_CLASSES_H #include "DataFormats/SiPixelRawData/interface/SiPixelRawDataError.h" +#include "DataFormats/SiPixelRawData/interface/SiPixelErrorsSoA.h" #include "DataFormats/Common/interface/Wrapper.h" #include "DataFormats/Common/interface/DetSetVector.h" #include diff --git a/DataFormats/SiPixelRawData/src/classes_def.xml b/DataFormats/SiPixelRawData/src/classes_def.xml index 827d4b1191cf6..fd2b5dcf27965 100644 --- a/DataFormats/SiPixelRawData/src/classes_def.xml +++ b/DataFormats/SiPixelRawData/src/classes_def.xml @@ -14,4 +14,7 @@ + + + diff --git a/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsFromSoA.cc b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsFromSoA.cc index ea381948ec352..7a49646d7a9a1 100644 --- a/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsFromSoA.cc +++ b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsFromSoA.cc @@ -6,7 +6,7 @@ #include "DataFormats/DetId/interface/DetIdCollection.h" #include "DataFormats/SiPixelDetId/interface/PixelFEDChannel.h" #include "DataFormats/SiPixelDigi/interface/PixelDigi.h" -#include "DataFormats/SiPixelDigi/interface/SiPixelDigiErrorsSoA.h" +#include "DataFormats/SiPixelRawData/interface/SiPixelErrorsSoA.h" #include "EventFilter/SiPixelRawToDigi/interface/PixelDataFormatter.h" #include "FWCore/Framework/interface/ESTransientHandle.h" #include "FWCore/Framework/interface/ESWatcher.h" @@ -31,7 +31,7 @@ class SiPixelDigiErrorsFromSoA : public edm::stream::EDProducer<> { void produce(edm::Event& iEvent, const edm::EventSetup& iSetup) override; const edm::ESGetToken cablingToken_; - const edm::EDGetTokenT digiErrorSoAGetToken_; + const edm::EDGetTokenT digiErrorSoAGetToken_; const edm::EDPutTokenT> errorPutToken_; const edm::EDPutTokenT tkErrorPutToken_; const edm::EDPutTokenT userErrorPutToken_; @@ -48,7 +48,7 @@ class SiPixelDigiErrorsFromSoA : public edm::stream::EDProducer<> { SiPixelDigiErrorsFromSoA::SiPixelDigiErrorsFromSoA(const edm::ParameterSet& iConfig) : cablingToken_(esConsumes(edm::ESInputTag("", iConfig.getParameter("CablingMapLabel")))), - digiErrorSoAGetToken_{consumes(iConfig.getParameter("digiErrorSoASrc"))}, + digiErrorSoAGetToken_{consumes(iConfig.getParameter("digiErrorSoASrc"))}, errorPutToken_{produces>()}, tkErrorPutToken_{produces()}, userErrorPutToken_{produces("UserErrorModules")}, @@ -95,7 +95,7 @@ void SiPixelDigiErrorsFromSoA::produce(edm::Event& iEvent, const edm::EventSetup auto size = digiErrors.size(); for (auto i = 0U; i < size; i++) { - PixelErrorCompact err = digiErrors.error(i); + SiPixelErrorCompact err = digiErrors.error(i); if (err.errorType != 0) { SiPixelRawDataError error(err.word, err.errorType, err.fedId + 1200); errors[err.rawId].push_back(error); diff --git a/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsSoAFromCUDA.cc b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsSoAFromCUDA.cc index c5b568750ad7d..f2c7d0de5fe24 100644 --- a/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsSoAFromCUDA.cc +++ b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsSoAFromCUDA.cc @@ -1,6 +1,6 @@ #include "CUDADataFormats/Common/interface/Product.h" #include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h" -#include "DataFormats/SiPixelDigi/interface/SiPixelDigiErrorsSoA.h" +#include "DataFormats/SiPixelRawData/interface/SiPixelErrorsSoA.h" #include "FWCore/Framework/interface/EventSetup.h" #include "FWCore/Framework/interface/Event.h" #include "FWCore/Framework/interface/MakerMacros.h" @@ -25,17 +25,17 @@ class SiPixelDigiErrorsSoAFromCUDA : public edm::stream::EDProducer> digiErrorGetToken_; - edm::EDPutTokenT digiErrorPutToken_; + edm::EDPutTokenT digiErrorPutToken_; - cms::cuda::host::unique_ptr data_; - cms::cuda::SimpleVector error_; - const PixelFormatterErrors* formatterErrors_ = nullptr; + cms::cuda::host::unique_ptr data_; + cms::cuda::SimpleVector error_; + const SiPixelFormatterErrors* formatterErrors_ = nullptr; }; SiPixelDigiErrorsSoAFromCUDA::SiPixelDigiErrorsSoAFromCUDA(const edm::ParameterSet& iConfig) : digiErrorGetToken_( consumes>(iConfig.getParameter("src"))), - digiErrorPutToken_(produces()) {} + digiErrorPutToken_(produces()) {} void SiPixelDigiErrorsSoAFromCUDA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { edm::ParameterSetDescription desc; @@ -70,7 +70,7 @@ void SiPixelDigiErrorsSoAFromCUDA::produce(edm::Event& iEvent, const edm::EventS // use cudaMallocHost without a GPU... iEvent.emplace(digiErrorPutToken_, error_.size(), error_.data(), formatterErrors_); - error_ = cms::cuda::make_SimpleVector(0, nullptr); + error_ = cms::cuda::make_SimpleVector(0, nullptr); data_.reset(); formatterErrors_ = nullptr; } diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu index 04072943bf0f8..9a37ec2100661 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu @@ -355,7 +355,7 @@ namespace pixelgpudetails { uint32_t *pdigi, uint32_t *rawIdArr, uint16_t *moduleId, - cms::cuda::SimpleVector *err, + cms::cuda::SimpleVector *err, bool useQualityInfo, bool includeErrors, bool debug) { @@ -390,7 +390,7 @@ namespace pixelgpudetails { skipROC = (roc < pixelgpudetails::maxROCIndex) ? false : (errorType != 0); if (includeErrors and skipROC) { uint32_t rID = getErrRawID(fedId, ww, errorType, cablingMap, debug); - err->push_back(PixelErrorCompact{rID, ww, errorType, fedId}); + err->push_back(SiPixelErrorCompact{rID, ww, errorType, fedId}); continue; } @@ -434,7 +434,7 @@ namespace pixelgpudetails { if (includeErrors) { if (not rocRowColIsValid(row, col)) { uint8_t error = conversionError(fedId, 3, debug); //use the device function and fill the arrays - err->push_back(PixelErrorCompact{rawId, ww, error, fedId}); + err->push_back(SiPixelErrorCompact{rawId, ww, error, fedId}); if (debug) printf("BPIX1 Error status: %i\n", error); continue; @@ -450,7 +450,7 @@ namespace pixelgpudetails { localPix.col = col; if (includeErrors and not dcolIsValid(dcol, pxid)) { uint8_t error = conversionError(fedId, 3, debug); - err->push_back(PixelErrorCompact{rawId, ww, error, fedId}); + err->push_back(SiPixelErrorCompact{rawId, ww, error, fedId}); if (debug) printf("Error status: %i %d %d %d %d\n", error, dcol, pxid, fedId, roc); continue; @@ -521,7 +521,7 @@ namespace pixelgpudetails { const unsigned char *modToUnp, const SiPixelGainForHLTonGPU *gains, const WordFedAppender &wordFed, - PixelFormatterErrors &&errors, + SiPixelFormatterErrors &&errors, const uint32_t wordCounter, const uint32_t fedCounter, bool useQualityInfo, @@ -595,8 +595,8 @@ namespace pixelgpudetails { gpuCalibPixel::calibDigis<<>>(isRun2, digis_d.moduleInd(), - digis_d.c_xx(), - digis_d.c_yy(), + digis_d.xx(), + digis_d.yy(), digis_d.adc(), gains, wordCounter, @@ -615,7 +615,7 @@ namespace pixelgpudetails { #endif countModules<<>>( - digis_d.c_moduleInd(), clusters_d.moduleStart(), digis_d.clus(), wordCounter); + digis_d.moduleInd(), clusters_d.moduleStart(), digis_d.clus(), wordCounter); cudaCheck(cudaGetLastError()); // read the number of modules into a data member, used by getProduct()) @@ -627,10 +627,10 @@ namespace pixelgpudetails { #ifdef GPU_DEBUG std::cout << "CUDA findClus kernel launch with " << blocks << " blocks of " << threadsPerBlock << " threads\n"; #endif - findClus<<>>(digis_d.c_moduleInd(), - digis_d.c_xx(), - digis_d.c_yy(), - clusters_d.c_moduleStart(), + findClus<<>>(digis_d.moduleInd(), + digis_d.xx(), + digis_d.yy(), + clusters_d.moduleStart(), clusters_d.clusInModule(), clusters_d.moduleId(), digis_d.clus(), @@ -643,10 +643,10 @@ namespace pixelgpudetails { // apply charge cut clusterChargeCut<<>>(digis_d.moduleInd(), - digis_d.c_adc(), - clusters_d.c_moduleStart(), + digis_d.adc(), + clusters_d.moduleStart(), clusters_d.clusInModule(), - clusters_d.c_moduleId(), + clusters_d.moduleId(), digis_d.clus(), wordCounter); cudaCheck(cudaGetLastError()); @@ -657,7 +657,7 @@ namespace pixelgpudetails { // synchronization/ExternalWork // MUST be ONE block - fillHitsModuleStart<<<1, 1024, 0, stream>>>(clusters_d.c_clusInModule(), clusters_d.clusModuleStart()); + fillHitsModuleStart<<<1, 1024, 0, stream>>>(clusters_d.clusInModule(), clusters_d.clusModuleStart()); // last element holds the number of all clusters cudaCheck(cudaMemcpyAsync(&(nModules_Clusters_h[1]), diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h index 2f52316aa2e78..e06ba8ce735aa 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h @@ -11,7 +11,8 @@ #include "HeterogeneousCore/CUDAUtilities/interface/SimpleVector.h" #include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" #include "HeterogeneousCore/CUDAUtilities/interface/host_noncached_unique_ptr.h" -#include "DataFormats/SiPixelDigi/interface/PixelErrors.h" +#include "DataFormats/SiPixelRawData/interface/SiPixelErrorCompact.h" +#include "DataFormats/SiPixelRawData/interface/SiPixelFormatterErrors.h" struct SiPixelROCsStatusAndMapping; class SiPixelGainForHLTonGPU; @@ -173,7 +174,7 @@ namespace pixelgpudetails { const unsigned char* modToUnp, const SiPixelGainForHLTonGPU* gains, const WordFedAppender& wordFed, - PixelFormatterErrors&& errors, + SiPixelFormatterErrors&& errors, const uint32_t wordCounter, const uint32_t fedCounter, bool useQualityInfo, From 674af61e62b1f58c7fbe248ff3618df56817804e Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Fri, 27 Nov 2020 20:43:23 +0100 Subject: [PATCH 138/149] Apply code formatting --- .../SiPixelROCsStatusAndMappingWrapper.h | 4 ++-- ...elROCsStatusAndMappingWrapperESProducer.cc | 3 ++- .../src/SiPixelROCsStatusAndMappingWrapper.cc | 23 ++++++++++--------- .../plugins/SiPixelRawToClusterGPUKernel.cu | 7 ++++-- 4 files changed, 21 insertions(+), 16 deletions(-) diff --git a/CalibTracker/SiPixelESProducers/interface/SiPixelROCsStatusAndMappingWrapper.h b/CalibTracker/SiPixelESProducers/interface/SiPixelROCsStatusAndMappingWrapper.h index d86aa93700297..5f875d7dff5a9 100644 --- a/CalibTracker/SiPixelESProducers/interface/SiPixelROCsStatusAndMappingWrapper.h +++ b/CalibTracker/SiPixelESProducers/interface/SiPixelROCsStatusAndMappingWrapper.h @@ -18,8 +18,8 @@ class SiPixelQuality; class SiPixelROCsStatusAndMappingWrapper { public: SiPixelROCsStatusAndMappingWrapper(SiPixelFedCablingMap const &cablingMap, - TrackerGeometry const &trackerGeom, - SiPixelQuality const *badPixelInfo); + TrackerGeometry const &trackerGeom, + SiPixelQuality const *badPixelInfo); ~SiPixelROCsStatusAndMappingWrapper(); bool hasQuality() const { return hasQuality_; } diff --git a/CalibTracker/SiPixelESProducers/plugins/SiPixelROCsStatusAndMappingWrapperESProducer.cc b/CalibTracker/SiPixelESProducers/plugins/SiPixelROCsStatusAndMappingWrapperESProducer.cc index 2c77560a5058e..9c37860ca9ffe 100644 --- a/CalibTracker/SiPixelESProducers/plugins/SiPixelROCsStatusAndMappingWrapperESProducer.cc +++ b/CalibTracker/SiPixelESProducers/plugins/SiPixelROCsStatusAndMappingWrapperESProducer.cc @@ -26,7 +26,8 @@ class SiPixelROCsStatusAndMappingWrapperESProducer : public edm::ESProducer { bool useQuality_; }; -SiPixelROCsStatusAndMappingWrapperESProducer::SiPixelROCsStatusAndMappingWrapperESProducer(const edm::ParameterSet& iConfig) +SiPixelROCsStatusAndMappingWrapperESProducer::SiPixelROCsStatusAndMappingWrapperESProducer( + const edm::ParameterSet& iConfig) : useQuality_(iConfig.getParameter("UseQualityInfo")) { auto const& component = iConfig.getParameter("ComponentName"); auto cc = setWhatProduced(this, component); diff --git a/CalibTracker/SiPixelESProducers/src/SiPixelROCsStatusAndMappingWrapper.cc b/CalibTracker/SiPixelESProducers/src/SiPixelROCsStatusAndMappingWrapper.cc index 1657be1725842..1470ad6825b86 100644 --- a/CalibTracker/SiPixelESProducers/src/SiPixelROCsStatusAndMappingWrapper.cc +++ b/CalibTracker/SiPixelESProducers/src/SiPixelROCsStatusAndMappingWrapper.cc @@ -20,8 +20,8 @@ #include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" SiPixelROCsStatusAndMappingWrapper::SiPixelROCsStatusAndMappingWrapper(SiPixelFedCablingMap const& cablingMap, - TrackerGeometry const& trackerGeom, - SiPixelQuality const* badPixelInfo) + TrackerGeometry const& trackerGeom, + SiPixelQuality const* badPixelInfo) : cablingMap_(&cablingMap), modToUnpDefault(pixelgpudetails::MAX_SIZE), hasQuality_(badPixelInfo != nullptr) { cudaCheck(cudaMallocHost(&cablingMapHost, sizeof(SiPixelROCsStatusAndMapping))); @@ -85,14 +85,14 @@ SiPixelROCsStatusAndMappingWrapper::SiPixelROCsStatusAndMappingWrapper(SiPixelFe } LogDebug("SiPixelROCsStatusAndMapping") << "----------------------------------------------------------------------------" << std::endl; - LogDebug("SiPixelROCsStatusAndMapping") << i << std::setw(20) << cablingMapHost->fed[i] << std::setw(20) - << cablingMapHost->link[i] << std::setw(20) << cablingMapHost->roc[i] - << std::endl; - LogDebug("SiPixelROCsStatusAndMapping") << i << std::setw(20) << cablingMapHost->RawId[i] << std::setw(20) - << cablingMapHost->rocInDet[i] << std::setw(20) << cablingMapHost->moduleId[i] - << std::endl; - LogDebug("SiPixelROCsStatusAndMapping") << i << std::setw(20) << (bool)cablingMapHost->badRocs[i] << std::setw(20) - << std::endl; + LogDebug("SiPixelROCsStatusAndMapping") + << i << std::setw(20) << cablingMapHost->fed[i] << std::setw(20) << cablingMapHost->link[i] << std::setw(20) + << cablingMapHost->roc[i] << std::endl; + LogDebug("SiPixelROCsStatusAndMapping") + << i << std::setw(20) << cablingMapHost->RawId[i] << std::setw(20) << cablingMapHost->rocInDet[i] + << std::setw(20) << cablingMapHost->moduleId[i] << std::endl; + LogDebug("SiPixelROCsStatusAndMapping") + << i << std::setw(20) << (bool)cablingMapHost->badRocs[i] << std::setw(20) << std::endl; LogDebug("SiPixelROCsStatusAndMapping") << "----------------------------------------------------------------------------" << std::endl; } @@ -102,7 +102,8 @@ SiPixelROCsStatusAndMappingWrapper::SiPixelROCsStatusAndMappingWrapper(SiPixelFe SiPixelROCsStatusAndMappingWrapper::~SiPixelROCsStatusAndMappingWrapper() { cudaCheck(cudaFreeHost(cablingMapHost)); } -const SiPixelROCsStatusAndMapping* SiPixelROCsStatusAndMappingWrapper::getGPUProductAsync(cudaStream_t cudaStream) const { +const SiPixelROCsStatusAndMapping* SiPixelROCsStatusAndMappingWrapper::getGPUProductAsync( + cudaStream_t cudaStream) const { const auto& data = gpuData_.dataForCurrentDeviceAsync(cudaStream, [this](GPUData& data, cudaStream_t stream) { // allocate cudaCheck(cudaMalloc(&data.cablingMapDevice, sizeof(SiPixelROCsStatusAndMapping))); diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu index 9a37ec2100661..7c133b10f4dab 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu @@ -190,8 +190,11 @@ namespace pixelgpudetails { __device__ bool dcolIsValid(uint32_t dcol, uint32_t pxid) { return ((dcol < 26) & (2 <= pxid) & (pxid < 162)); } - __device__ uint8_t checkROC( - uint32_t errorWord, uint8_t fedId, uint32_t link, const SiPixelROCsStatusAndMapping *cablingMap, bool debug = false) { + __device__ uint8_t checkROC(uint32_t errorWord, + uint8_t fedId, + uint32_t link, + const SiPixelROCsStatusAndMapping *cablingMap, + bool debug = false) { uint8_t errorType = (errorWord >> pixelgpudetails::ROC_shift) & pixelgpudetails::ERROR_mask; if (errorType < 25) return 0; From b643d1bad33f55405ba1401f76d81b6653558f64 Mon Sep 17 00:00:00 2001 From: Vincenzo Innocente Date: Sat, 28 Nov 2020 00:18:26 +0100 Subject: [PATCH 139/149] Move hit indexes to 32 bits (cms-patatrack#583) Add a counter for forlorn doublets. --- .../interface/TrackingRecHit2DSOAView.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h index 808feb2a4218f..6a83a66b60fbd 100644 --- a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h +++ b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h @@ -15,10 +15,12 @@ namespace pixelCPEforGPU { class TrackingRecHit2DSOAView { public: static constexpr uint32_t maxHits() { return gpuClustering::MaxNumClusters; } - using hindex_type = uint16_t; // if above is <=2^16 + using hindex_type = uint32_t; // if above is <=2^32 - using Hist = - cms::cuda::HistoContainer; + using PhiBinner = + cms::cuda::HistoContainer; + + using Hist = PhiBinner; // FIXME using AverageGeometry = phase1PixelTopology::AverageGeometry; @@ -93,9 +95,9 @@ class TrackingRecHit2DSOAView { uint32_t* m_hitsLayerStart; - Hist* m_hist; + PhiBinner* m_hist; // FIXME use a more descriptive name consistently uint32_t m_nHits; }; -#endif +#endif // CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DSOAView_h From 2b4d4eba9429c0626540d1e826cac873074d5386 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Sat, 28 Nov 2020 00:22:40 +0100 Subject: [PATCH 140/149] Synchronise with CMSSW_11_2_0_pre10 --- .../python/clients/beam_dqm_sourceclient-live_cfg.py | 6 +++++- .../python/clients/l1tstage2_dqm_sourceclient-live_cfg.py | 4 ++-- .../clients/l1tstage2emulator_dqm_sourceclient-live_cfg.py | 4 ++-- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/DQM/Integration/python/clients/beam_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/beam_dqm_sourceclient-live_cfg.py index 50fda0503d1ed..471c3266bb569 100644 --- a/DQM/Integration/python/clients/beam_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/beam_dqm_sourceclient-live_cfg.py @@ -356,7 +356,7 @@ DBParameters = cms.PSet( messageLevel = cms.untracked.int32(0), - authenticationPath = cms.untracked.string('') + authenticationPath = cms.untracked.string('.') ), # Upload to CondDB @@ -366,7 +366,10 @@ runNumber = cms.untracked.uint64(options.runNumber), lastLumiFile = cms.untracked.string(''), writeTransactionDelay = cms.untracked.uint32(options.transDelay), + latency = cms.untracked.uint32(2), autoCommit = cms.untracked.bool(True), + saveLogsOnDB = cms.untracked.bool(True), + jobName = cms.untracked.string("BeamSpotOnlineLegacyTest"), # name of the DB log record toPut = cms.VPSet(cms.PSet( record = cms.string(BSOnlineRecordName), tag = cms.string('BSOnlineLegacy_tag'), @@ -379,6 +382,7 @@ if not live or noDB: process.OnlineDBOutputService.connect = cms.string('sqlite_file:BeamSpotOnlineLegacy.db') process.OnlineDBOutputService.preLoadConnectionString = cms.untracked.string('sqlite_file:BeamSpotOnlineLegacy.db') + process.OnlineDBOutputService.saveLogsOnDB = cms.untracked.bool(False) #--------- # Final path diff --git a/DQM/Integration/python/clients/l1tstage2_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/l1tstage2_dqm_sourceclient-live_cfg.py index 580d7a2136841..0c7f4707b6d8b 100644 --- a/DQM/Integration/python/clients/l1tstage2_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/l1tstage2_dqm_sourceclient-live_cfg.py @@ -1,8 +1,8 @@ import FWCore.ParameterSet.Config as cms import sys -from Configuration.Eras.Era_Run2_2018_cff import Run2_2018 -process = cms.Process("L1TStage2DQM", Run2_2018) +from Configuration.Eras.Era_Run3_cff import Run3 +process = cms.Process("L1TStage2DQM", Run3) unitTest = False if 'unitTest=True' in sys.argv: diff --git a/DQM/Integration/python/clients/l1tstage2emulator_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/l1tstage2emulator_dqm_sourceclient-live_cfg.py index 1d868837300b0..7b6372229555c 100644 --- a/DQM/Integration/python/clients/l1tstage2emulator_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/l1tstage2emulator_dqm_sourceclient-live_cfg.py @@ -1,8 +1,8 @@ import FWCore.ParameterSet.Config as cms import sys -from Configuration.Eras.Era_Run2_2018_cff import Run2_2018 -process = cms.Process("L1TStage2EmulatorDQM", Run2_2018) +from Configuration.Eras.Era_Run3_cff import Run3 +process = cms.Process("L1TStage2EmulatorDQM", Run3) unitTest = False if 'unitTest=True' in sys.argv: From 0489c582c396d6dc34c5a772c2fb08030ebf50b7 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Tue, 1 Dec 2020 02:22:18 +0100 Subject: [PATCH 141/149] Simplify cudacompat layer to use a 1-dimensional grid (cms-patatrack#586) Remove the possibility of changing the grid size used by the cms::cudacompat layer, and make it a constant equal to {1, 1, 1}. This avoids a thread-related problem caused by TBB using worker threads where the grid size had not been initialised. The kernel for pixel clustering need to be rewritten to support a one-dimensional grid to run on the CPU. Currently they are only used on the GPU in the Patatrack workflows, but they are exercised on the CPU by the gpuClustering_t tests; those tests have been commented out until the kernels can be updated. --- .../SiPixelClusterizer/test/gpuClustering_t.h | 17 +++++++++-------- .../plugins/SiPixelRecHitSoAFromLegacy.cc | 2 -- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h b/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h index 8ec665f8960b6..721f08adfcf46 100644 --- a/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h +++ b/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h @@ -10,11 +10,10 @@ #include #ifdef __CUDACC__ - -#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" -#include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h" +#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" #include "HeterogeneousCore/CUDAUtilities/interface/launch.h" +#include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h" #endif #include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h" @@ -33,7 +32,6 @@ int main(void) { auto h_x = std::make_unique(numElements); auto h_y = std::make_unique(numElements); auto h_adc = std::make_unique(numElements); - auto h_clus = std::make_unique(numElements); #ifdef __CUDACC__ @@ -46,11 +44,9 @@ int main(void) { auto d_clusInModule = cms::cuda::make_device_unique(MaxNumModules, nullptr); auto d_moduleId = cms::cuda::make_device_unique(MaxNumModules, nullptr); #else - auto h_moduleStart = std::make_unique(MaxNumModules + 1); auto h_clusInModule = std::make_unique(MaxNumModules); auto h_moduleId = std::make_unique(MaxNumModules); - #endif // later random number @@ -301,9 +297,12 @@ int main(void) { cudaDeviceSynchronize(); #else + h_moduleStart[0] = nModules; countModules(h_id.get(), h_moduleStart.get(), h_clus.get(), n); memset(h_clusInModule.get(), 0, MaxNumModules * sizeof(uint32_t)); +#ifdef TODO_FIX_CLUSTERIZER_FOR_ANY_GRID_SIZE + // FIXME the findClus kernel should be rewritten to avoid relying on a predefined grid size gridDim.x = MaxNumModules; //not needed in the kernel for this specific case; assert(blockIdx.x == 0); for (; blockIdx.x < gridDim.x; ++blockIdx.x) @@ -315,7 +314,7 @@ int main(void) { h_moduleId.get(), h_clus.get(), n); - resetGrid(); +#endif // TODO_FIX_CLUSTERIZER_FOR_ANY_GRID_SIZE nModules = h_moduleStart[0]; auto nclus = h_clusInModule.get(); @@ -330,12 +329,14 @@ int main(void) { if (ncl != std::accumulate(nclus, nclus + MaxNumModules, 0)) std::cout << "ERROR!!!!! wrong number of cluster found" << std::endl; +#ifdef TODO_FIX_CLUSTERIZER_FOR_ANY_GRID_SIZE + // FIXME the clusterChargeCut kernel should be rewritten to avoid relying on a predefined grid size gridDim.x = MaxNumModules; // no needed in the kernel for in this specific case assert(blockIdx.x == 0); for (; blockIdx.x < gridDim.x; ++blockIdx.x) clusterChargeCut( h_id.get(), h_adc.get(), h_moduleStart.get(), h_clusInModule.get(), h_moduleId.get(), h_clus.get(), n); - resetGrid(); +#endif // TODO_FIX_CLUSTERIZER_FOR_ANY_GRID_SIZE #endif diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc index c7084f325d05b..9505aec154222 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc +++ b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc @@ -209,8 +209,6 @@ void SiPixelRecHitSoAFromLegacy::produce(edm::StreamID streamID, edm::Event& iEv // filled creates view SiPixelDigisCUDA::DeviceConstView digiView{xx_.data(), yy_.data(), adc_.data(), moduleInd_.data(), clus_.data()}; assert(digiView.adc(0) != 0); - // not needed... - cms::cudacompat::resetGrid(); // we run on blockId.x==0 gpuPixelRecHits::getHits(&cpeView, &bsHost, &digiView, ndigi, &clusterView, output->view()); for (auto h = fc; h < lc; ++h) From ef089516ea4d7e2f24a0f5c9709634ab75cc5268 Mon Sep 17 00:00:00 2001 From: Vincenzo Innocente Date: Tue, 1 Dec 2020 15:18:50 +0100 Subject: [PATCH 142/149] Make clusterizer kernels independent of the grid size (cms-patatrack#588) --- .../plugins/gpuClusterChargeCut.h | 173 ++++---- .../plugins/gpuClustering.h | 409 +++++++++--------- .../SiPixelClusterizer/test/gpuClustering_t.h | 44 +- 3 files changed, 304 insertions(+), 322 deletions(-) diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h index b781b10792fff..8f45d35b267b1 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h @@ -19,105 +19,106 @@ namespace gpuClustering { uint32_t const* __restrict__ moduleId, // module id of each module int32_t* __restrict__ clusterId, // modified: cluster id of each pixel uint32_t numElements) { - if (blockIdx.x >= moduleStart[0]) - return; - - auto firstPixel = moduleStart[1 + blockIdx.x]; - auto thisModuleId = id[firstPixel]; - assert(thisModuleId < MaxNumModules); - assert(thisModuleId == moduleId[blockIdx.x]); + __shared__ int32_t charge[MaxNumClustersPerModules]; + __shared__ uint8_t ok[MaxNumClustersPerModules]; + __shared__ uint16_t newclusId[MaxNumClustersPerModules]; - auto nclus = nClustersInModule[thisModuleId]; - if (nclus == 0) - return; + auto firstModule = blockIdx.x; + auto endModule = moduleStart[0]; + for (auto module = firstModule; module < endModule; module += gridDim.x) { + auto firstPixel = moduleStart[1 + module]; + auto thisModuleId = id[firstPixel]; + assert(thisModuleId < MaxNumModules); + assert(thisModuleId == moduleId[module]); + + auto nclus = nClustersInModule[thisModuleId]; + if (nclus == 0) + continue; + + if (threadIdx.x == 0 && nclus > MaxNumClustersPerModules) + printf("Warning too many clusters in module %d in block %d: %d > %d\n", + thisModuleId, + blockIdx.x, + nclus, + MaxNumClustersPerModules); + + auto first = firstPixel + threadIdx.x; + + if (nclus > MaxNumClustersPerModules) { + // remove excess FIXME find a way to cut charge first.... + for (auto i = first; i < numElements; i += blockDim.x) { + if (id[i] == InvId) + continue; // not valid + if (id[i] != thisModuleId) + break; // end of module + if (clusterId[i] >= MaxNumClustersPerModules) { + id[i] = InvId; + clusterId[i] = InvId; + } + } + nclus = MaxNumClustersPerModules; + } - if (threadIdx.x == 0 && nclus > MaxNumClustersPerModules) - printf("Warning too many clusters in module %d in block %d: %d > %d\n", - thisModuleId, - blockIdx.x, - nclus, - MaxNumClustersPerModules); +#ifdef GPU_DEBUG + if (thisModuleId % 100 == 1) + if (threadIdx.x == 0) + printf("start cluster charge cut for module %d in block %d\n", thisModuleId, blockIdx.x); +#endif - auto first = firstPixel + threadIdx.x; + assert(nclus <= MaxNumClustersPerModules); + for (auto i = threadIdx.x; i < nclus; i += blockDim.x) { + charge[i] = 0; + } + __syncthreads(); - if (nclus > MaxNumClustersPerModules) { - // remove excess FIXME find a way to cut charge first.... for (auto i = first; i < numElements; i += blockDim.x) { if (id[i] == InvId) continue; // not valid if (id[i] != thisModuleId) break; // end of module - if (clusterId[i] >= MaxNumClustersPerModules) { - id[i] = InvId; - clusterId[i] = InvId; - } + atomicAdd(&charge[clusterId[i]], adc[i]); } - nclus = MaxNumClustersPerModules; - } + __syncthreads(); -#ifdef GPU_DEBUG - if (thisModuleId % 100 == 1) - if (threadIdx.x == 0) - printf("start clusterizer for module %d in block %d\n", thisModuleId, blockIdx.x); -#endif + auto chargeCut = thisModuleId < 96 ? 2000 : 4000; // move in constants (calib?) + for (auto i = threadIdx.x; i < nclus; i += blockDim.x) { + newclusId[i] = ok[i] = charge[i] > chargeCut ? 1 : 0; + } - __shared__ int32_t charge[MaxNumClustersPerModules]; - __shared__ uint8_t ok[MaxNumClustersPerModules]; - __shared__ uint16_t newclusId[MaxNumClustersPerModules]; + __syncthreads(); + + // renumber + __shared__ uint16_t ws[32]; + cms::cuda::blockPrefixScan(newclusId, nclus, ws); + + assert(nclus >= newclusId[nclus - 1]); + + if (nclus == newclusId[nclus - 1]) + continue; + + nClustersInModule[thisModuleId] = newclusId[nclus - 1]; + __syncthreads(); + + // mark bad cluster again + for (auto i = threadIdx.x; i < nclus; i += blockDim.x) { + if (0 == ok[i]) + newclusId[i] = InvId + 1; + } + __syncthreads(); + + // reassign id + for (auto i = first; i < numElements; i += blockDim.x) { + if (id[i] == InvId) + continue; // not valid + if (id[i] != thisModuleId) + break; // end of module + clusterId[i] = newclusId[clusterId[i]] - 1; + if (clusterId[i] == InvId) + id[i] = InvId; + } - assert(nclus <= MaxNumClustersPerModules); - for (auto i = threadIdx.x; i < nclus; i += blockDim.x) { - charge[i] = 0; - } - __syncthreads(); - - for (auto i = first; i < numElements; i += blockDim.x) { - if (id[i] == InvId) - continue; // not valid - if (id[i] != thisModuleId) - break; // end of module - atomicAdd(&charge[clusterId[i]], adc[i]); - } - __syncthreads(); - - auto chargeCut = thisModuleId < 96 ? 2000 : 4000; // move in constants (calib?) - for (auto i = threadIdx.x; i < nclus; i += blockDim.x) { - newclusId[i] = ok[i] = charge[i] > chargeCut ? 1 : 0; - } - - __syncthreads(); - - // renumber - __shared__ uint16_t ws[32]; - cms::cuda::blockPrefixScan(newclusId, nclus, ws); - - assert(nclus >= newclusId[nclus - 1]); - - if (nclus == newclusId[nclus - 1]) - return; - - nClustersInModule[thisModuleId] = newclusId[nclus - 1]; - __syncthreads(); - - // mark bad cluster again - for (auto i = threadIdx.x; i < nclus; i += blockDim.x) { - if (0 == ok[i]) - newclusId[i] = InvId + 1; - } - __syncthreads(); - - // reassign id - for (auto i = first; i < numElements; i += blockDim.x) { - if (id[i] == InvId) - continue; // not valid - if (id[i] != thisModuleId) - break; // end of module - clusterId[i] = newclusId[clusterId[i]] - 1; - if (clusterId[i] == InvId) - id[i] = InvId; - } - - //done + //done + } // loop on modules } } // namespace gpuClustering diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h index 16c181a431ce8..3bf42c8265b1e 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h @@ -47,260 +47,261 @@ namespace gpuClustering { uint32_t* __restrict__ moduleId, // output: module id of each module int32_t* __restrict__ clusterId, // output: cluster id of each pixel int numElements) { - if (blockIdx.x >= moduleStart[0]) - return; + __shared__ int msize; - auto firstPixel = moduleStart[1 + blockIdx.x]; - auto thisModuleId = id[firstPixel]; - assert(thisModuleId < MaxNumModules); + auto firstModule = blockIdx.x; + auto endModule = moduleStart[0]; + for (auto module = firstModule; module < endModule; module += gridDim.x) { + auto firstPixel = moduleStart[1 + module]; + auto thisModuleId = id[firstPixel]; + assert(thisModuleId < MaxNumModules); #ifdef GPU_DEBUG - if (thisModuleId % 100 == 1) - if (threadIdx.x == 0) - printf("start clusterizer for module %d in block %d\n", thisModuleId, blockIdx.x); + if (thisModuleId % 100 == 1) + if (threadIdx.x == 0) + printf("start clusterizer for module %d in block %d\n", thisModuleId, blockIdx.x); #endif - auto first = firstPixel + threadIdx.x; + auto first = firstPixel + threadIdx.x; - // find the index of the first pixel not belonging to this module (or invalid) - __shared__ int msize; - msize = numElements; - __syncthreads(); + // find the index of the first pixel not belonging to this module (or invalid) + msize = numElements; + __syncthreads(); - // skip threads not associated to an existing pixel - for (int i = first; i < numElements; i += blockDim.x) { - if (id[i] == InvId) // skip invalid pixels - continue; - if (id[i] != thisModuleId) { // find the first pixel in a different module - atomicMin(&msize, i); - break; + // skip threads not associated to an existing pixel + for (int i = first; i < numElements; i += blockDim.x) { + if (id[i] == InvId) // skip invalid pixels + continue; + if (id[i] != thisModuleId) { // find the first pixel in a different module + atomicMin(&msize, i); + break; + } } - } - //init hist (ymax=416 < 512 : 9bits) - constexpr uint32_t maxPixInModule = 4000; - constexpr auto nbins = phase1PixelTopology::numColsInModule + 2; //2+2; - using Hist = cms::cuda::HistoContainer; - __shared__ Hist hist; - __shared__ typename Hist::Counter ws[32]; - for (auto j = threadIdx.x; j < Hist::totbins(); j += blockDim.x) { - hist.off[j] = 0; - } - __syncthreads(); + //init hist (ymax=416 < 512 : 9bits) + constexpr uint32_t maxPixInModule = 4000; + constexpr auto nbins = phase1PixelTopology::numColsInModule + 2; //2+2; + using Hist = cms::cuda::HistoContainer; + __shared__ Hist hist; + __shared__ typename Hist::Counter ws[32]; + for (auto j = threadIdx.x; j < Hist::totbins(); j += blockDim.x) { + hist.off[j] = 0; + } + __syncthreads(); - assert((msize == numElements) or ((msize < numElements) and (id[msize] != thisModuleId))); + assert((msize == numElements) or ((msize < numElements) and (id[msize] != thisModuleId))); - // limit to maxPixInModule (FIXME if recurrent (and not limited to simulation with low threshold) one will need to implement something cleverer) - if (0 == threadIdx.x) { - if (msize - firstPixel > maxPixInModule) { - printf("too many pixels in module %d: %d > %d\n", thisModuleId, msize - firstPixel, maxPixInModule); - msize = maxPixInModule + firstPixel; + // limit to maxPixInModule (FIXME if recurrent (and not limited to simulation with low threshold) one will need to implement something cleverer) + if (0 == threadIdx.x) { + if (msize - firstPixel > maxPixInModule) { + printf("too many pixels in module %d: %d > %d\n", thisModuleId, msize - firstPixel, maxPixInModule); + msize = maxPixInModule + firstPixel; + } } - } - __syncthreads(); - assert(msize - firstPixel <= maxPixInModule); + __syncthreads(); + assert(msize - firstPixel <= maxPixInModule); #ifdef GPU_DEBUG - __shared__ uint32_t totGood; - totGood = 0; - __syncthreads(); + __shared__ uint32_t totGood; + totGood = 0; + __syncthreads(); #endif - // fill histo - for (int i = first; i < msize; i += blockDim.x) { - if (id[i] == InvId) // skip invalid pixels - continue; - hist.count(y[i]); + // fill histo + for (int i = first; i < msize; i += blockDim.x) { + if (id[i] == InvId) // skip invalid pixels + continue; + hist.count(y[i]); #ifdef GPU_DEBUG - atomicAdd(&totGood, 1); + atomicAdd(&totGood, 1); #endif - } - __syncthreads(); - if (threadIdx.x < 32) - ws[threadIdx.x] = 0; // used by prefix scan... - __syncthreads(); - hist.finalize(ws); - __syncthreads(); + } + __syncthreads(); + if (threadIdx.x < 32) + ws[threadIdx.x] = 0; // used by prefix scan... + __syncthreads(); + hist.finalize(ws); + __syncthreads(); #ifdef GPU_DEBUG - assert(hist.size() == totGood); - if (thisModuleId % 100 == 1) - if (threadIdx.x == 0) - printf("histo size %d\n", hist.size()); + assert(hist.size() == totGood); + if (thisModuleId % 100 == 1) + if (threadIdx.x == 0) + printf("histo size %d\n", hist.size()); #endif - for (int i = first; i < msize; i += blockDim.x) { - if (id[i] == InvId) // skip invalid pixels - continue; - hist.fill(y[i], i - firstPixel); - } + for (int i = first; i < msize; i += blockDim.x) { + if (id[i] == InvId) // skip invalid pixels + continue; + hist.fill(y[i], i - firstPixel); + } #ifdef __CUDA_ARCH__ - // assume that we can cover the whole module with up to 16 blockDim.x-wide iterations - constexpr int maxiter = 16; + // assume that we can cover the whole module with up to 16 blockDim.x-wide iterations + constexpr int maxiter = 16; #else - auto maxiter = hist.size(); + auto maxiter = hist.size(); #endif - // allocate space for duplicate pixels: a pixel can appear more than once with different charge in the same event - constexpr int maxNeighbours = 10; - assert((hist.size() / blockDim.x) <= maxiter); - // nearest neighbour - uint16_t nn[maxiter][maxNeighbours]; - uint8_t nnn[maxiter]; // number of nn - for (uint32_t k = 0; k < maxiter; ++k) - nnn[k] = 0; + // allocate space for duplicate pixels: a pixel can appear more than once with different charge in the same event + constexpr int maxNeighbours = 10; + assert((hist.size() / blockDim.x) <= maxiter); + // nearest neighbour + uint16_t nn[maxiter][maxNeighbours]; + uint8_t nnn[maxiter]; // number of nn + for (uint32_t k = 0; k < maxiter; ++k) + nnn[k] = 0; - __syncthreads(); // for hit filling! + __syncthreads(); // for hit filling! #ifdef GPU_DEBUG - // look for anomalous high occupancy - __shared__ uint32_t n40, n60; - n40 = n60 = 0; - __syncthreads(); - for (auto j = threadIdx.x; j < Hist::nbins(); j += blockDim.x) { - if (hist.size(j) > 60) - atomicAdd(&n60, 1); - if (hist.size(j) > 40) - atomicAdd(&n40, 1); - } - __syncthreads(); - if (0 == threadIdx.x) { - if (n60 > 0) - printf("columns with more than 60 px %d in %d\n", n60, thisModuleId); - else if (n40 > 0) - printf("columns with more than 40 px %d in %d\n", n40, thisModuleId); - } - __syncthreads(); + // look for anomalous high occupancy + __shared__ uint32_t n40, n60; + n40 = n60 = 0; + __syncthreads(); + for (auto j = threadIdx.x; j < Hist::nbins(); j += blockDim.x) { + if (hist.size(j) > 60) + atomicAdd(&n60, 1); + if (hist.size(j) > 40) + atomicAdd(&n40, 1); + } + __syncthreads(); + if (0 == threadIdx.x) { + if (n60 > 0) + printf("columns with more than 60 px %d in %d\n", n60, thisModuleId); + else if (n40 > 0) + printf("columns with more than 40 px %d in %d\n", n40, thisModuleId); + } + __syncthreads(); #endif - // fill NN - for (auto j = threadIdx.x, k = 0U; j < hist.size(); j += blockDim.x, ++k) { - assert(k < maxiter); - auto p = hist.begin() + j; - auto i = *p + firstPixel; - assert(id[i] != InvId); - assert(id[i] == thisModuleId); // same module - int be = Hist::bin(y[i] + 1); - auto e = hist.end(be); - ++p; - assert(0 == nnn[k]); - for (; p < e; ++p) { - auto m = (*p) + firstPixel; - assert(m != i); - assert(int(y[m]) - int(y[i]) >= 0); - assert(int(y[m]) - int(y[i]) <= 1); - if (std::abs(int(x[m]) - int(x[i])) > 1) - continue; - auto l = nnn[k]++; - assert(l < maxNeighbours); - nn[k][l] = *p; + // fill NN + for (auto j = threadIdx.x, k = 0U; j < hist.size(); j += blockDim.x, ++k) { + assert(k < maxiter); + auto p = hist.begin() + j; + auto i = *p + firstPixel; + assert(id[i] != InvId); + assert(id[i] == thisModuleId); // same module + int be = Hist::bin(y[i] + 1); + auto e = hist.end(be); + ++p; + assert(0 == nnn[k]); + for (; p < e; ++p) { + auto m = (*p) + firstPixel; + assert(m != i); + assert(int(y[m]) - int(y[i]) >= 0); + assert(int(y[m]) - int(y[i]) <= 1); + if (std::abs(int(x[m]) - int(x[i])) > 1) + continue; + auto l = nnn[k]++; + assert(l < maxNeighbours); + nn[k][l] = *p; + } } - } - // for each pixel, look at all the pixels until the end of the module; - // when two valid pixels within +/- 1 in x or y are found, set their id to the minimum; - // after the loop, all the pixel in each cluster should have the id equeal to the lowest - // pixel in the cluster ( clus[i] == i ). - bool more = true; - int nloops = 0; - while (__syncthreads_or(more)) { - if (1 == nloops % 2) { - for (auto j = threadIdx.x, k = 0U; j < hist.size(); j += blockDim.x, ++k) { - auto p = hist.begin() + j; - auto i = *p + firstPixel; - auto m = clusterId[i]; - while (m != clusterId[m]) - m = clusterId[m]; - clusterId[i] = m; + // for each pixel, look at all the pixels until the end of the module; + // when two valid pixels within +/- 1 in x or y are found, set their id to the minimum; + // after the loop, all the pixel in each cluster should have the id equeal to the lowest + // pixel in the cluster ( clus[i] == i ). + bool more = true; + int nloops = 0; + while (__syncthreads_or(more)) { + if (1 == nloops % 2) { + for (auto j = threadIdx.x, k = 0U; j < hist.size(); j += blockDim.x, ++k) { + auto p = hist.begin() + j; + auto i = *p + firstPixel; + auto m = clusterId[i]; + while (m != clusterId[m]) + m = clusterId[m]; + clusterId[i] = m; + } + } else { + more = false; + for (auto j = threadIdx.x, k = 0U; j < hist.size(); j += blockDim.x, ++k) { + auto p = hist.begin() + j; + auto i = *p + firstPixel; + for (int kk = 0; kk < nnn[k]; ++kk) { + auto l = nn[k][kk]; + auto m = l + firstPixel; + assert(m != i); + auto old = atomicMin(&clusterId[m], clusterId[i]); + if (old != clusterId[i]) { + // end the loop only if no changes were applied + more = true; + } + atomicMin(&clusterId[i], old); + } // nnloop + } // pixel loop } - } else { - more = false; - for (auto j = threadIdx.x, k = 0U; j < hist.size(); j += blockDim.x, ++k) { - auto p = hist.begin() + j; - auto i = *p + firstPixel; - for (int kk = 0; kk < nnn[k]; ++kk) { - auto l = nn[k][kk]; - auto m = l + firstPixel; - assert(m != i); - auto old = atomicMin(&clusterId[m], clusterId[i]); - if (old != clusterId[i]) { - // end the loop only if no changes were applied - more = true; - } - atomicMin(&clusterId[i], old); - } // nnloop - } // pixel loop - } - ++nloops; - } // end while + ++nloops; + } // end while #ifdef GPU_DEBUG - { - __shared__ int n0; - if (threadIdx.x == 0) - n0 = nloops; - __syncthreads(); - auto ok = n0 == nloops; - assert(__syncthreads_and(ok)); - if (thisModuleId % 100 == 1) + { + __shared__ int n0; if (threadIdx.x == 0) - printf("# loops %d\n", nloops); - } + n0 = nloops; + __syncthreads(); + auto ok = n0 == nloops; + assert(__syncthreads_and(ok)); + if (thisModuleId % 100 == 1) + if (threadIdx.x == 0) + printf("# loops %d\n", nloops); + } #endif - __shared__ unsigned int foundClusters; - foundClusters = 0; - __syncthreads(); + __shared__ unsigned int foundClusters; + foundClusters = 0; + __syncthreads(); - // find the number of different clusters, identified by a pixels with clus[i] == i; - // mark these pixels with a negative id. - for (int i = first; i < msize; i += blockDim.x) { - if (id[i] == InvId) // skip invalid pixels - continue; - if (clusterId[i] == i) { - auto old = atomicInc(&foundClusters, 0xffffffff); - clusterId[i] = -(old + 1); + // find the number of different clusters, identified by a pixels with clus[i] == i; + // mark these pixels with a negative id. + for (int i = first; i < msize; i += blockDim.x) { + if (id[i] == InvId) // skip invalid pixels + continue; + if (clusterId[i] == i) { + auto old = atomicInc(&foundClusters, 0xffffffff); + clusterId[i] = -(old + 1); + } } - } - __syncthreads(); + __syncthreads(); - // propagate the negative id to all the pixels in the cluster. - for (int i = first; i < msize; i += blockDim.x) { - if (id[i] == InvId) // skip invalid pixels - continue; - if (clusterId[i] >= 0) { - // mark each pixel in a cluster with the same id as the first one - clusterId[i] = clusterId[clusterId[i]]; + // propagate the negative id to all the pixels in the cluster. + for (int i = first; i < msize; i += blockDim.x) { + if (id[i] == InvId) // skip invalid pixels + continue; + if (clusterId[i] >= 0) { + // mark each pixel in a cluster with the same id as the first one + clusterId[i] = clusterId[clusterId[i]]; + } } - } - __syncthreads(); + __syncthreads(); - // adjust the cluster id to be a positive value starting from 0 - for (int i = first; i < msize; i += blockDim.x) { - if (id[i] == InvId) { // skip invalid pixels - clusterId[i] = -9999; - continue; + // adjust the cluster id to be a positive value starting from 0 + for (int i = first; i < msize; i += blockDim.x) { + if (id[i] == InvId) { // skip invalid pixels + clusterId[i] = -9999; + continue; + } + clusterId[i] = -clusterId[i] - 1; } - clusterId[i] = -clusterId[i] - 1; - } - __syncthreads(); + __syncthreads(); - if (threadIdx.x == 0) { - nClustersInModule[thisModuleId] = foundClusters; - moduleId[blockIdx.x] = thisModuleId; + if (threadIdx.x == 0) { + nClustersInModule[thisModuleId] = foundClusters; + moduleId[module] = thisModuleId; #ifdef GPU_DEBUG - if (foundClusters > gMaxHit) { - gMaxHit = foundClusters; - if (foundClusters > 8) - printf("max hit %d in %d\n", foundClusters, thisModuleId); - } + if (foundClusters > gMaxHit) { + gMaxHit = foundClusters; + if (foundClusters > 8) + printf("max hit %d in %d\n", foundClusters, thisModuleId); + } #endif #ifdef GPU_DEBUG - if (thisModuleId % 100 == 1) - printf("%d clusters in module %d\n", foundClusters, thisModuleId); + if (thisModuleId % 100 == 1) + printf("%d clusters in module %d\n", foundClusters, thisModuleId); #endif - } + } + } // module loop } - } // namespace gpuClustering #endif // RecoLocalTracker_SiPixelClusterizer_plugins_gpuClustering_h diff --git a/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h b/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h index 721f08adfcf46..64289d5208b48 100644 --- a/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h +++ b/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h @@ -14,7 +14,7 @@ #include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" #include "HeterogeneousCore/CUDAUtilities/interface/launch.h" #include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h" -#endif +#endif // __CUDACC__ #include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h" #include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h" @@ -22,7 +22,7 @@ int main(void) { #ifdef __CUDACC__ cms::cudatest::requireDevices(); -#endif +#endif // __CUDACC__ using namespace gpuClustering; @@ -43,11 +43,11 @@ int main(void) { auto d_moduleStart = cms::cuda::make_device_unique(MaxNumModules + 1, nullptr); auto d_clusInModule = cms::cuda::make_device_unique(MaxNumModules, nullptr); auto d_moduleId = cms::cuda::make_device_unique(MaxNumModules, nullptr); -#else +#else // __CUDACC__ auto h_moduleStart = std::make_unique(MaxNumModules + 1); auto h_clusInModule = std::make_unique(MaxNumModules); auto h_moduleId = std::make_unique(MaxNumModules); -#endif +#endif // __CUDACC__ // later random number int n = 0; @@ -240,11 +240,11 @@ int main(void) { // size_t size8 = n * sizeof(uint8_t); cudaCheck(cudaMemcpy(d_moduleStart.get(), &nModules, sizeof(uint32_t), cudaMemcpyHostToDevice)); - cudaCheck(cudaMemcpy(d_id.get(), h_id.get(), size16, cudaMemcpyHostToDevice)); cudaCheck(cudaMemcpy(d_x.get(), h_x.get(), size16, cudaMemcpyHostToDevice)); cudaCheck(cudaMemcpy(d_y.get(), h_y.get(), size16, cudaMemcpyHostToDevice)); cudaCheck(cudaMemcpy(d_adc.get(), h_adc.get(), size16, cudaMemcpyHostToDevice)); + // Launch CUDA Kernels int threadsPerBlock = (kkk == 5) ? 512 : ((kkk == 3) ? 128 : 256); int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; @@ -296,25 +296,12 @@ int main(void) { n); cudaDeviceSynchronize(); -#else - +#else // __CUDACC__ h_moduleStart[0] = nModules; countModules(h_id.get(), h_moduleStart.get(), h_clus.get(), n); memset(h_clusInModule.get(), 0, MaxNumModules * sizeof(uint32_t)); -#ifdef TODO_FIX_CLUSTERIZER_FOR_ANY_GRID_SIZE - // FIXME the findClus kernel should be rewritten to avoid relying on a predefined grid size - gridDim.x = MaxNumModules; //not needed in the kernel for this specific case; - assert(blockIdx.x == 0); - for (; blockIdx.x < gridDim.x; ++blockIdx.x) - findClus(h_id.get(), - h_x.get(), - h_y.get(), - h_moduleStart.get(), - h_clusInModule.get(), - h_moduleId.get(), - h_clus.get(), - n); -#endif // TODO_FIX_CLUSTERIZER_FOR_ANY_GRID_SIZE + findClus( + h_id.get(), h_x.get(), h_y.get(), h_moduleStart.get(), h_clusInModule.get(), h_moduleId.get(), h_clus.get(), n); nModules = h_moduleStart[0]; auto nclus = h_clusInModule.get(); @@ -329,16 +316,9 @@ int main(void) { if (ncl != std::accumulate(nclus, nclus + MaxNumModules, 0)) std::cout << "ERROR!!!!! wrong number of cluster found" << std::endl; -#ifdef TODO_FIX_CLUSTERIZER_FOR_ANY_GRID_SIZE - // FIXME the clusterChargeCut kernel should be rewritten to avoid relying on a predefined grid size - gridDim.x = MaxNumModules; // no needed in the kernel for in this specific case - assert(blockIdx.x == 0); - for (; blockIdx.x < gridDim.x; ++blockIdx.x) - clusterChargeCut( - h_id.get(), h_adc.get(), h_moduleStart.get(), h_clusInModule.get(), h_moduleId.get(), h_clus.get(), n); -#endif // TODO_FIX_CLUSTERIZER_FOR_ANY_GRID_SIZE - -#endif + clusterChargeCut( + h_id.get(), h_adc.get(), h_moduleStart.get(), h_clusInModule.get(), h_moduleId.get(), h_clus.get(), n); +#endif // __CUDACC__ std::cout << "found " << nModules << " Modules active" << std::endl; @@ -347,7 +327,7 @@ int main(void) { cudaCheck(cudaMemcpy(h_clus.get(), d_clus.get(), size32, cudaMemcpyDeviceToHost)); cudaCheck(cudaMemcpy(&nclus, d_clusInModule.get(), MaxNumModules * sizeof(uint32_t), cudaMemcpyDeviceToHost)); cudaCheck(cudaMemcpy(&moduleId, d_moduleId.get(), nModules * sizeof(uint32_t), cudaMemcpyDeviceToHost)); -#endif +#endif // __CUDACC__ std::set clids; for (int i = 0; i < n; ++i) { From fedbf4d8a9f702469f5ddd72a7a7d394e9fca9c7 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Fri, 18 Dec 2020 18:26:58 +0100 Subject: [PATCH 143/149] Clean up the pixel local reconstruction code (cms-patatrack#593) Address the pixel local reconstruction review comments. General clean up of the pixel local reconstruction code: - remove commented out and obsolete code and data members - use named constants more consistently - update variable names to follow the coding rules and for better consistency - use member initializer lists in the constructors - allow `if constexpr` in CUDA code - use `std::size` instead of hardcoding the array size - convert iterator-based loops to range-based loops - replace `cout` and `printf` with `LogDebug` or `LogWarning` - use put tokens - reorganise the auto-generated cfi files and use them more consistently - adjust code after rearranging an `#ifdef GPU_DEBUG` block - apply code formatting - other minor changes Improve comments: - improve comments and remove obsolete ones - clarify comments and types regarding `HostProduct` - update comments about `GPU_SMALL_EVENTS` being kept for testing purposes - add notes about the original cpu code Reuse some more common code: - move common pixel cluster code to `PixelClusterizerBase` - extend the `SiPixelCluster` constructor Rename classes and modules for better consistency: - remove the `TrackingRecHit2DCUDA.h` and `gpuClusteringConstants.h` forwarding headers - rename `PixelRecHits` to `PixelRecHitGPUKernel` - rename SiPixelRecHitFromSOA to SiPixelRecHitFromCUDA - rename `siPixelClustersCUDAPreSplitting` to `siPixelClustersPreSplittingCUDA` - rename `siPixelRecHitsCUDAPreSplitting` to `siPixelRecHitsPreSplittingCUDA` - rename `siPixelRecHitsLegacyPreSplitting` to `siPixelRecHitsPreSplittingLegacy` - rename `siPixelRecHitHostSoA` to `siPixelRecHitSoAFromLegacy` Re-apply changes from #29805 that were lost in the Patatrack branch. --- CUDADataFormats/Common/BuildFile.xml | 2 +- CUDADataFormats/SiPixelCluster/BuildFile.xml | 7 +- .../interface/SiPixelClustersCUDA.h | 2 +- .../interface/gpuClusteringConstants.h | 19 +- .../SiPixelCluster/src/SiPixelClustersCUDA.cc | 3 +- CUDADataFormats/SiPixelCluster/src/classes.h | 6 +- CUDADataFormats/SiPixelDigi/BuildFile.xml | 4 +- .../interface/SiPixelDigiErrorsCUDA.h | 8 +- .../SiPixelDigi/interface/SiPixelDigisCUDA.h | 6 +- .../SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc | 7 +- .../SiPixelDigi/src/SiPixelDigisCUDA.cc | 24 +-- CUDADataFormats/SiPixelDigi/src/classes.h | 8 +- .../interface/TrackingRecHit2DHeterogeneous.h | 6 +- .../interface/TrackingRecHit2DSOAView.h | 7 +- .../src/TrackingRecHit2DHeterogeneous.cc | 20 ++ CUDADataFormats/TrackingRecHit/src/classes.h | 2 +- .../test/TrackingRecHit2DCUDA_t.cpp | 2 +- .../test/TrackingRecHit2DCUDA_t.cu | 2 +- .../SiPixelESProducers/plugins/BuildFile.xml | 8 +- .../src/SiPixelGainCalibrationForHLTGPU.cc | 43 ++-- .../src/SiPixelROCsStatusAndMappingWrapper.cc | 16 +- .../interface/SiPixelGainForHLTonGPU.h | 24 +-- .../interface/SiPixelROCsStatusAndMapping.h | 2 +- .../SiPixelCluster/interface/SiPixelCluster.h | 13 +- .../plugins/SiPixelDigiErrorsFromSoA.cc | 18 +- .../python/siPixelDigis_cff.py | 4 +- .../interface/phase1PixelTopology.h | 10 +- .../SiPixelClusterizer/BuildFile.xml | 11 +- .../SiPixelClusterizer/plugins/BuildFile.xml | 8 +- .../plugins/PixelClusterizerBase.h | 84 ++++---- .../plugins/SiPixelClusterProducer.cc | 19 +- .../plugins/SiPixelDigisClustersFromSoA.cc | 62 ++---- .../plugins/SiPixelRawToClusterCUDA.cc | 2 +- .../plugins/SiPixelRawToClusterGPUKernel.cu | 98 +++++---- .../plugins/SiPixelRawToClusterGPUKernel.h | 15 +- .../plugins/gpuCalibPixel.h | 11 +- .../plugins/gpuClusterChargeCut.h | 39 ++-- .../plugins/gpuClustering.h | 26 ++- .../python/siPixelClustersPreSplitting_cff.py | 6 +- .../SiPixelClusterizer/test/BuildFile.xml | 2 +- .../SiPixelClusterizer/test/gpuClustering_t.h | 57 +++--- RecoLocalTracker/SiPixelRecHits/BuildFile.xml | 12 +- .../SiPixelRecHits/interface/PixelCPEBase.h | 60 +++--- .../SiPixelRecHits/interface/PixelCPEFast.h | 17 +- .../SiPixelRecHits/interface/pixelCPEforGPU.h | 13 +- .../SiPixelRecHits/plugins/BuildFile.xml | 1 + .../plugins/PixelCPEFastESProducer.cc | 65 +++--- .../plugins/PixelRecHitGPUKernel.cu | 78 ++++++++ .../plugins/PixelRecHitGPUKernel.h | 33 ++++ .../plugins/SiPixelRecHitCUDA.cc | 6 +- .../plugins/SiPixelRecHitConverter.cc | 33 ++-- .../plugins/SiPixelRecHitFromCUDA.cc | 187 ++++++++++++++++++ .../plugins/SiPixelRecHitSoAFromLegacy.cc | 99 +++++----- .../SiPixelRecHits/plugins/gpuPixelRecHits.h | 32 ++- .../python/PixelCPEESProducers_cff.py | 2 +- .../python/SiPixelRecHits_cfi.py | 12 +- .../SiPixelRecHits/src/PixelCPEFast.cc | 157 ++++++--------- 57 files changed, 872 insertions(+), 648 deletions(-) create mode 100644 CUDADataFormats/TrackingRecHit/src/TrackingRecHit2DHeterogeneous.cc create mode 100644 RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHitGPUKernel.cu create mode 100644 RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHitGPUKernel.h create mode 100644 RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitFromCUDA.cc diff --git a/CUDADataFormats/Common/BuildFile.xml b/CUDADataFormats/Common/BuildFile.xml index c524cada33060..f6b68fe69b400 100644 --- a/CUDADataFormats/Common/BuildFile.xml +++ b/CUDADataFormats/Common/BuildFile.xml @@ -1,7 +1,7 @@ + - diff --git a/CUDADataFormats/SiPixelCluster/BuildFile.xml b/CUDADataFormats/SiPixelCluster/BuildFile.xml index 5e401d215c4eb..5406d1355533f 100644 --- a/CUDADataFormats/SiPixelCluster/BuildFile.xml +++ b/CUDADataFormats/SiPixelCluster/BuildFile.xml @@ -1,10 +1,9 @@ - - - + + + - diff --git a/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h b/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h index d5d009aaffeb5..acdf1b34a6d79 100644 --- a/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h +++ b/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h @@ -60,4 +60,4 @@ class SiPixelClustersCUDA { uint32_t nClusters_h = 0; }; -#endif +#endif // CUDADataFormats_SiPixelCluster_interface_SiPixelClustersCUDA_h diff --git a/CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h b/CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h index 1430606ab6678..e9dfed7bca7a6 100644 --- a/CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h +++ b/CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h @@ -2,30 +2,35 @@ #define CUDADataFormats_SiPixelCluster_interface_gpuClusteringConstants_h #include +#include namespace pixelGPUConstants { #ifdef GPU_SMALL_EVENTS + // kept for testing and debugging constexpr uint32_t maxNumberOfHits = 24 * 1024; #else - constexpr uint32_t maxNumberOfHits = - 48 * 1024; // data at pileup 50 has 18300 +/- 3500 hits; 40000 is around 6 sigma away + // data at pileup 50 has 18300 +/- 3500 hits; 40000 is around 6 sigma away + // tested on MC events with 55-75 pileup events + constexpr uint32_t maxNumberOfHits = 48 * 1024; #endif } // namespace pixelGPUConstants namespace gpuClustering { #ifdef GPU_SMALL_EVENTS + // kept for testing and debugging constexpr uint32_t maxHitsInIter() { return 64; } #else // optimized for real data PU 50 + // tested on MC events with 55-75 pileup events constexpr uint32_t maxHitsInIter() { return 160; } #endif constexpr uint32_t maxHitsInModule() { return 1024; } - constexpr uint32_t MaxNumModules = 2000; - constexpr int32_t MaxNumClustersPerModules = maxHitsInModule(); - constexpr uint32_t MaxHitsInModule = maxHitsInModule(); // as above - constexpr uint32_t MaxNumClusters = pixelGPUConstants::maxNumberOfHits; - constexpr uint16_t InvId = 9999; // must be > MaxNumModules + constexpr uint16_t maxNumModules = 2000; + constexpr int32_t maxNumClustersPerModules = maxHitsInModule(); + constexpr uint32_t maxNumClusters = pixelGPUConstants::maxNumberOfHits; + constexpr uint16_t invalidModuleId = std::numeric_limits::max() - 1; + static_assert(invalidModuleId > maxNumModules); // invalidModuleId must be > maxNumModules } // namespace gpuClustering diff --git a/CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc b/CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc index 5e53f49570bb4..ae4a24dbbf83b 100644 --- a/CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc +++ b/CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc @@ -1,8 +1,7 @@ #include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h" - +#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" #include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" #include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" -#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" SiPixelClustersCUDA::SiPixelClustersCUDA(size_t maxModules, cudaStream_t stream) : moduleStart_d(cms::cuda::make_device_unique(maxModules + 1, stream)), diff --git a/CUDADataFormats/SiPixelCluster/src/classes.h b/CUDADataFormats/SiPixelCluster/src/classes.h index 0698cb103dab9..3eee5a1fce009 100644 --- a/CUDADataFormats/SiPixelCluster/src/classes.h +++ b/CUDADataFormats/SiPixelCluster/src/classes.h @@ -1,8 +1,8 @@ -#ifndef CUDADataFormats_SiPixelCluster_classes_h -#define CUDADataFormats_SiPixelCluster_classes_h +#ifndef CUDADataFormats_SiPixelCluster_src_classes_h +#define CUDADataFormats_SiPixelCluster_src_classes_h #include "CUDADataFormats/Common/interface/Product.h" #include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h" #include "DataFormats/Common/interface/Wrapper.h" -#endif +#endif // CUDADataFormats_SiPixelCluster_src_classes_h diff --git a/CUDADataFormats/SiPixelDigi/BuildFile.xml b/CUDADataFormats/SiPixelDigi/BuildFile.xml index ee357e2d4e157..0806768a9b657 100644 --- a/CUDADataFormats/SiPixelDigi/BuildFile.xml +++ b/CUDADataFormats/SiPixelDigi/BuildFile.xml @@ -1,9 +1,9 @@ + + - - diff --git a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h index 85e8883fa1bd4..bfb15c4ac9f5c 100644 --- a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h +++ b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h @@ -1,13 +1,13 @@ #ifndef CUDADataFormats_SiPixelDigi_interface_SiPixelDigiErrorsCUDA_h #define CUDADataFormats_SiPixelDigi_interface_SiPixelDigiErrorsCUDA_h +#include + #include "DataFormats/SiPixelRawData/interface/SiPixelErrorCompact.h" #include "DataFormats/SiPixelRawData/interface/SiPixelFormatterErrors.h" +#include "HeterogeneousCore/CUDAUtilities/interface/SimpleVector.h" #include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" #include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" -#include "HeterogeneousCore/CUDAUtilities/interface/SimpleVector.h" - -#include class SiPixelDigiErrorsCUDA { public: @@ -39,4 +39,4 @@ class SiPixelDigiErrorsCUDA { SiPixelFormatterErrors formatterErrors_h; }; -#endif +#endif // CUDADataFormats_SiPixelDigi_interface_SiPixelDigiErrorsCUDA_h diff --git a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h index 2dc1f628bf426..950f9651cf83b 100644 --- a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h +++ b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h @@ -1,12 +1,12 @@ #ifndef CUDADataFormats_SiPixelDigi_interface_SiPixelDigisCUDA_h #define CUDADataFormats_SiPixelDigi_interface_SiPixelDigisCUDA_h +#include + #include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" #include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" #include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h" -#include - class SiPixelDigisCUDA { public: SiPixelDigisCUDA() = default; @@ -82,4 +82,4 @@ class SiPixelDigisCUDA { uint32_t nDigis_h = 0; }; -#endif +#endif // CUDADataFormats_SiPixelDigi_interface_SiPixelDigisCUDA_h diff --git a/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc b/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc index 70bf2e8aa19f5..eecea35ddd622 100644 --- a/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc +++ b/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc @@ -1,12 +1,11 @@ -#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h" +#include +#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h" +#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" #include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" #include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" -#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" #include "HeterogeneousCore/CUDAUtilities/interface/memsetAsync.h" -#include - SiPixelDigiErrorsCUDA::SiPixelDigiErrorsCUDA(size_t maxFedWords, SiPixelFormatterErrors errors, cudaStream_t stream) : data_d(cms::cuda::make_device_unique(maxFedWords, stream)), error_d(cms::cuda::make_device_unique(stream)), diff --git a/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc b/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc index 664364b6ff25a..4e6a3fc2593fd 100644 --- a/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc +++ b/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc @@ -1,27 +1,23 @@ #include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" - +#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" #include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" #include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" -#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" - -SiPixelDigisCUDA::SiPixelDigisCUDA(size_t maxFedWords, cudaStream_t stream) { - xx_d = cms::cuda::make_device_unique(maxFedWords, stream); - yy_d = cms::cuda::make_device_unique(maxFedWords, stream); - adc_d = cms::cuda::make_device_unique(maxFedWords, stream); - moduleInd_d = cms::cuda::make_device_unique(maxFedWords, stream); - clus_d = cms::cuda::make_device_unique(maxFedWords, stream); - - pdigi_d = cms::cuda::make_device_unique(maxFedWords, stream); - rawIdArr_d = cms::cuda::make_device_unique(maxFedWords, stream); +SiPixelDigisCUDA::SiPixelDigisCUDA(size_t maxFedWords, cudaStream_t stream) + : xx_d(cms::cuda::make_device_unique(maxFedWords, stream)), + yy_d(cms::cuda::make_device_unique(maxFedWords, stream)), + adc_d(cms::cuda::make_device_unique(maxFedWords, stream)), + moduleInd_d(cms::cuda::make_device_unique(maxFedWords, stream)), + clus_d(cms::cuda::make_device_unique(maxFedWords, stream)), + view_d(cms::cuda::make_device_unique(stream)), + pdigi_d(cms::cuda::make_device_unique(maxFedWords, stream)), + rawIdArr_d(cms::cuda::make_device_unique(maxFedWords, stream)) { auto view = cms::cuda::make_host_unique(stream); view->xx_ = xx_d.get(); view->yy_ = yy_d.get(); view->adc_ = adc_d.get(); view->moduleInd_ = moduleInd_d.get(); view->clus_ = clus_d.get(); - - view_d = cms::cuda::make_device_unique(stream); cms::cuda::copyAsync(view_d, view, stream); } diff --git a/CUDADataFormats/SiPixelDigi/src/classes.h b/CUDADataFormats/SiPixelDigi/src/classes.h index fca0811e4650f..fc5d318fad688 100644 --- a/CUDADataFormats/SiPixelDigi/src/classes.h +++ b/CUDADataFormats/SiPixelDigi/src/classes.h @@ -1,9 +1,9 @@ -#ifndef CUDADataFormats_SiPixelDigi_classes_h -#define CUDADataFormats_SiPixelDigi_classes_h +#ifndef CUDADataFormats_SiPixelDigi_src_classes_h +#define CUDADataFormats_SiPixelDigi_src_classes_h #include "CUDADataFormats/Common/interface/Product.h" -#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" #include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h" +#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" #include "DataFormats/Common/interface/Wrapper.h" -#endif +#endif // CUDADataFormats_SiPixelDigi_src_classes_h diff --git a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h index 73a6daaa4e387..f10495abd2ab8 100644 --- a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h +++ b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h @@ -127,11 +127,7 @@ TrackingRecHit2DHeterogeneous::TrackingRecHit2DHeterogeneous(uint32_t nH m_hitsLayerStart = view->m_hitsLayerStart = reinterpret_cast(get32(n32)); // transfer view - if -#ifndef __CUDACC__ - constexpr -#endif - (std::is_same::value) { + if constexpr (std::is_same::value) { cms::cuda::copyAsync(m_view, view, stream); } else { m_view.reset(view.release()); // NOLINT: std::move() breaks CUDA version diff --git a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h index 6a83a66b60fbd..53297a78a428f 100644 --- a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h +++ b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h @@ -14,11 +14,11 @@ namespace pixelCPEforGPU { class TrackingRecHit2DSOAView { public: - static constexpr uint32_t maxHits() { return gpuClustering::MaxNumClusters; } + static constexpr uint32_t maxHits() { return gpuClustering::maxNumClusters; } using hindex_type = uint32_t; // if above is <=2^32 using PhiBinner = - cms::cuda::HistoContainer; + cms::cuda::HistoContainer; using Hist = PhiBinner; // FIXME @@ -89,7 +89,8 @@ class TrackingRecHit2DSOAView { uint16_t* m_detInd; // supporting objects - AverageGeometry* m_averageGeometry; // owned (corrected for beam spot: not sure where to host it otherwise) + // m_averageGeometry is corrected for beam spot, not sure where to host it otherwise + AverageGeometry* m_averageGeometry; // owned by TrackingRecHit2DHeterogeneous pixelCPEforGPU::ParamsOnGPU const* m_cpeParams; // forwarded from setup, NOT owned uint32_t const* m_hitsModuleStart; // forwarded from clusters diff --git a/CUDADataFormats/TrackingRecHit/src/TrackingRecHit2DHeterogeneous.cc b/CUDADataFormats/TrackingRecHit/src/TrackingRecHit2DHeterogeneous.cc new file mode 100644 index 0000000000000..dd3cf548e11dd --- /dev/null +++ b/CUDADataFormats/TrackingRecHit/src/TrackingRecHit2DHeterogeneous.cc @@ -0,0 +1,20 @@ +#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h" +#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" +#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" +#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" + +template <> +cms::cuda::host::unique_ptr TrackingRecHit2DCUDA::localCoordToHostAsync(cudaStream_t stream) const { + auto ret = cms::cuda::make_host_unique(4 * nHits(), stream); + cms::cuda::copyAsync(ret, m_store32, 4 * nHits(), stream); + return ret; +} + +template <> +cms::cuda::host::unique_ptr TrackingRecHit2DCUDA::hitsModuleStartToHostAsync(cudaStream_t stream) const { + auto ret = cms::cuda::make_host_unique(gpuClustering::maxNumModules + 1, stream); + cudaCheck(cudaMemcpyAsync( + ret.get(), m_hitsModuleStart, sizeof(uint32_t) * (gpuClustering::maxNumModules + 1), cudaMemcpyDefault, stream)); + return ret; +} diff --git a/CUDADataFormats/TrackingRecHit/src/classes.h b/CUDADataFormats/TrackingRecHit/src/classes.h index 3d40821493c5b..86fef25746efd 100644 --- a/CUDADataFormats/TrackingRecHit/src/classes.h +++ b/CUDADataFormats/TrackingRecHit/src/classes.h @@ -2,7 +2,7 @@ #define CUDADataFormats_SiPixelCluster_src_classes_h #include "CUDADataFormats/Common/interface/Product.h" -#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DCUDA.h" +#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h" #include "DataFormats/Common/interface/Wrapper.h" #endif // CUDADataFormats_SiPixelCluster_src_classes_h diff --git a/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp b/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp index 32af6c181ae68..3d8413b36ec96 100644 --- a/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp +++ b/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp @@ -1,4 +1,4 @@ -#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DCUDA.h" +#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h" #include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" #include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h" #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" diff --git a/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cu b/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cu index 6b55f8a8f98c5..06bd599d074f9 100644 --- a/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cu +++ b/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cu @@ -1,4 +1,4 @@ -#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DCUDA.h" +#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h" namespace testTrackingRecHit2D { diff --git a/CalibTracker/SiPixelESProducers/plugins/BuildFile.xml b/CalibTracker/SiPixelESProducers/plugins/BuildFile.xml index 1f063df32a766..4bef676217b72 100644 --- a/CalibTracker/SiPixelESProducers/plugins/BuildFile.xml +++ b/CalibTracker/SiPixelESProducers/plugins/BuildFile.xml @@ -1,13 +1,15 @@ - - + + + + - + diff --git a/CalibTracker/SiPixelESProducers/src/SiPixelGainCalibrationForHLTGPU.cc b/CalibTracker/SiPixelESProducers/src/SiPixelGainCalibrationForHLTGPU.cc index e4f278c28ec69..66b8d9594353b 100644 --- a/CalibTracker/SiPixelESProducers/src/SiPixelGainCalibrationForHLTGPU.cc +++ b/CalibTracker/SiPixelESProducers/src/SiPixelGainCalibrationForHLTGPU.cc @@ -3,6 +3,7 @@ #include "CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h" #include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLT.h" #include "CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" #include "Geometry/CommonDetUnit/interface/GeomDetType.h" #include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" @@ -12,22 +13,21 @@ SiPixelGainCalibrationForHLTGPU::SiPixelGainCalibrationForHLTGPU(const SiPixelGa : gains_(&gains) { // bizzarre logic (looking for fist strip-det) don't ask auto const& dus = geom.detUnits(); - unsigned m_detectors = dus.size(); + unsigned int n_detectors = dus.size(); for (unsigned int i = 1; i < 7; ++i) { - if (geom.offsetDU(GeomDetEnumerators::tkDetEnum[i]) != dus.size() && - dus[geom.offsetDU(GeomDetEnumerators::tkDetEnum[i])]->type().isTrackerStrip()) { - if (geom.offsetDU(GeomDetEnumerators::tkDetEnum[i]) < m_detectors) - m_detectors = geom.offsetDU(GeomDetEnumerators::tkDetEnum[i]); + const auto offset = geom.offsetDU(GeomDetEnumerators::tkDetEnum[i]); + if (offset != dus.size() && dus[offset]->type().isTrackerStrip()) { + if (n_detectors > offset) + n_detectors = offset; } } - /* - std::cout << "caching calibs for " << m_detectors << " pixel detectors of size " << gains.data().size() << std::endl; - std::cout << "sizes " << sizeof(char) << ' ' << sizeof(uint8_t) << ' ' << sizeof(SiPixelGainForHLTonGPU::DecodingStructure) << std::endl; - */ + LogDebug("SiPixelGainCalibrationForHLTGPU") + << "caching calibs for " << n_detectors << " pixel detectors of size " << gains.data().size() << '\n' + << "sizes " << sizeof(char) << ' ' << sizeof(uint8_t) << ' ' << sizeof(SiPixelGainForHLTonGPU::DecodingStructure); cudaCheck(cudaMallocHost((void**)&gainForHLTonHost_, sizeof(SiPixelGainForHLTonGPU))); - gainForHLTonHost_->v_pedestals = + gainForHLTonHost_->v_pedestals_ = (SiPixelGainForHLTonGPU_DecodingStructure*)this->gains_->data().data(); // so it can be used on CPU as well... // do not read back from the (possibly write-combined) memory buffer @@ -48,20 +48,17 @@ SiPixelGainCalibrationForHLTGPU::SiPixelGainCalibrationForHLTGPU(const SiPixelGa gainForHLTonHost_->deadFlag_ = 255; gainForHLTonHost_->noisyFlag_ = 254; - gainForHLTonHost_->pedPrecision = static_cast(maxPed - minPed) / nBinsToUseForEncoding; - gainForHLTonHost_->gainPrecision = static_cast(maxGain - minGain) / nBinsToUseForEncoding; + gainForHLTonHost_->pedPrecision_ = static_cast(maxPed - minPed) / nBinsToUseForEncoding; + gainForHLTonHost_->gainPrecision_ = static_cast(maxGain - minGain) / nBinsToUseForEncoding; - /* - std::cout << "precisions g " << gainForHLTonHost_->pedPrecision << ' ' << gainForHLTonHost_->gainPrecision << std::endl; - */ + LogDebug("SiPixelGainCalibrationForHLTGPU") + << "precisions g " << gainForHLTonHost_->pedPrecision_ << ' ' << gainForHLTonHost_->gainPrecision_; // fill the index map auto const& ind = gains.getIndexes(); - /* - std::cout << ind.size() << " " << m_detectors << std::endl; - */ + LogDebug("SiPixelGainCalibrationForHLTGPU") << ind.size() << " " << n_detectors; - for (auto i = 0U; i < m_detectors; ++i) { + for (auto i = 0U; i < n_detectors; ++i) { auto p = std::lower_bound( ind.begin(), ind.end(), dus[i]->geographicalId().rawId(), SiPixelGainCalibrationForHLT::StrictWeakOrdering()); assert(p != ind.end() && p->detid == dus[i]->geographicalId()); @@ -71,9 +68,9 @@ SiPixelGainCalibrationForHLTGPU::SiPixelGainCalibrationForHLTGPU(const SiPixelGa assert(0 == p->iend % 2); assert(p->ibegin != p->iend); assert(p->ncols > 0); - gainForHLTonHost_->rangeAndCols[i] = std::make_pair(SiPixelGainForHLTonGPU::Range(p->ibegin, p->iend), p->ncols); - // if (ind[i].detid!=dus[i]->geographicalId()) std::cout << ind[i].detid<<"!="<geographicalId() << std::endl; - // gainForHLTonHost_->rangeAndCols[i] = std::make_pair(SiPixelGainForHLTonGPU::Range(ind[i].ibegin,ind[i].iend), ind[i].ncols); + gainForHLTonHost_->rangeAndCols_[i] = std::make_pair(SiPixelGainForHLTonGPU::Range(p->ibegin, p->iend), p->ncols); + if (ind[i].detid != dus[i]->geographicalId()) + LogDebug("SiPixelGainCalibrationForHLTGPU") << ind[i].detid << "!=" << dus[i]->geographicalId(); } } @@ -94,7 +91,7 @@ const SiPixelGainForHLTonGPU* SiPixelGainCalibrationForHLTGPU::getGPUProductAsyn cudaCheck(cudaMemcpyAsync( data.gainForHLTonGPU, this->gainForHLTonHost_, sizeof(SiPixelGainForHLTonGPU), cudaMemcpyDefault, stream)); - cudaCheck(cudaMemcpyAsync(&(data.gainForHLTonGPU->v_pedestals), + cudaCheck(cudaMemcpyAsync(&(data.gainForHLTonGPU->v_pedestals_), &(data.gainDataOnGPU), sizeof(SiPixelGainForHLTonGPU_DecodingStructure*), cudaMemcpyDefault, diff --git a/CalibTracker/SiPixelESProducers/src/SiPixelROCsStatusAndMappingWrapper.cc b/CalibTracker/SiPixelESProducers/src/SiPixelROCsStatusAndMappingWrapper.cc index 1470ad6825b86..2437696656d25 100644 --- a/CalibTracker/SiPixelESProducers/src/SiPixelROCsStatusAndMappingWrapper.cc +++ b/CalibTracker/SiPixelESProducers/src/SiPixelROCsStatusAndMappingWrapper.cc @@ -43,7 +43,7 @@ SiPixelROCsStatusAndMappingWrapper::SiPixelROCsStatusAndMappingWrapper(SiPixelFe cablingMapHost->link[index] = link; cablingMapHost->roc[index] = roc; if (pixelRoc != nullptr) { - cablingMapHost->RawId[index] = pixelRoc->rawId(); + cablingMapHost->rawId[index] = pixelRoc->rawId(); cablingMapHost->rocInDet[index] = pixelRoc->idInDetUnit(); modToUnpDefault[index] = false; if (badPixelInfo != nullptr) @@ -51,7 +51,7 @@ SiPixelROCsStatusAndMappingWrapper::SiPixelROCsStatusAndMappingWrapper(SiPixelFe else cablingMapHost->badRocs[index] = false; } else { // store some dummy number - cablingMapHost->RawId[index] = 9999; + cablingMapHost->rawId[index] = 9999; cablingMapHost->rocInDet[index] = 9999; cablingMapHost->badRocs[index] = true; modToUnpDefault[index] = true; @@ -62,7 +62,7 @@ SiPixelROCsStatusAndMappingWrapper::SiPixelROCsStatusAndMappingWrapper(SiPixelFe } // end of FED loop // Given FedId, Link and idinLnk; use the following formula - // to get the RawId and idinDU + // to get the rawId and idinDU // index = (FedID-1200) * MAX_LINK* MAX_ROC + (Link-1)* MAX_ROC + idinLnk; // where, MAX_LINK = 48, MAX_ROC = 8 for Phase1 as mentioned Danek's email // FedID varies between 1200 to 1338 (In total 108 FED's) @@ -70,15 +70,15 @@ SiPixelROCsStatusAndMappingWrapper::SiPixelROCsStatusAndMappingWrapper(SiPixelFe // idinLnk varies between 1 to 8 for (int i = 1; i < index; i++) { - if (cablingMapHost->RawId[i] == 9999) { + if (cablingMapHost->rawId[i] == 9999) { cablingMapHost->moduleId[i] = 9999; } else { /* - std::cout << cablingMapHost->RawId[i] << std::endl; + std::cout << cablingMapHost->rawId[i] << std::endl; */ - auto gdet = trackerGeom.idToDetUnit(cablingMapHost->RawId[i]); + auto gdet = trackerGeom.idToDetUnit(cablingMapHost->rawId[i]); if (!gdet) { - LogDebug("SiPixelROCsStatusAndMapping") << " Not found: " << cablingMapHost->RawId[i] << std::endl; + LogDebug("SiPixelROCsStatusAndMapping") << " Not found: " << cablingMapHost->rawId[i] << std::endl; continue; } cablingMapHost->moduleId[i] = gdet->index(); @@ -89,7 +89,7 @@ SiPixelROCsStatusAndMappingWrapper::SiPixelROCsStatusAndMappingWrapper(SiPixelFe << i << std::setw(20) << cablingMapHost->fed[i] << std::setw(20) << cablingMapHost->link[i] << std::setw(20) << cablingMapHost->roc[i] << std::endl; LogDebug("SiPixelROCsStatusAndMapping") - << i << std::setw(20) << cablingMapHost->RawId[i] << std::setw(20) << cablingMapHost->rocInDet[i] + << i << std::setw(20) << cablingMapHost->rawId[i] << std::setw(20) << cablingMapHost->rocInDet[i] << std::setw(20) << cablingMapHost->moduleId[i] << std::endl; LogDebug("SiPixelROCsStatusAndMapping") << i << std::setw(20) << (bool)cablingMapHost->badRocs[i] << std::setw(20) << std::endl; diff --git a/CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h b/CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h index 6326b594e2771..aa5a127927b90 100644 --- a/CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h +++ b/CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h @@ -1,5 +1,5 @@ -#ifndef CondFormats_SiPixelObjects_SiPixelGainForHLTonGPU_h -#define CondFormats_SiPixelObjects_SiPixelGainForHLTonGPU_h +#ifndef CondFormats_SiPixelObjects_interface_SiPixelGainForHLTonGPU_h +#define CondFormats_SiPixelObjects_interface_SiPixelGainForHLTonGPU_h #include #include @@ -16,6 +16,7 @@ #endif // __device__ #endif // __CUDACC__ +#include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" #include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" struct SiPixelGainForHLTonGPU_DecodingStructure { @@ -32,8 +33,8 @@ class SiPixelGainForHLTonGPU { inline __host__ __device__ std::pair getPedAndGain( uint32_t moduleInd, int col, int row, bool& isDeadColumn, bool& isNoisyColumn) const { - auto range = rangeAndCols[moduleInd].first; - auto nCols = rangeAndCols[moduleInd].second; + auto range = rangeAndCols_[moduleInd].first; + auto nCols = rangeAndCols_[moduleInd].second; // determine what averaged data block we are in (there should be 1 or 2 of these depending on if plaquette is 1 by X or 2 by X unsigned int lengthOfColumnData = (range.second - range.first) / nCols; @@ -46,7 +47,7 @@ class SiPixelGainForHLTonGPU { assert(offset < 3088384); assert(0 == offset % 2); - DecodingStructure const* __restrict__ lp = v_pedestals; + DecodingStructure const* __restrict__ lp = v_pedestals_; auto s = lp[offset / 2]; isDeadColumn = (s.ped & 0xFF) == deadFlag_; @@ -55,15 +56,14 @@ class SiPixelGainForHLTonGPU { return std::make_pair(decodePed(s.ped & 0xFF), decodeGain(s.gain & 0xFF)); } - constexpr float decodeGain(unsigned int gain) const { return gain * gainPrecision + minGain_; } - constexpr float decodePed(unsigned int ped) const { return ped * pedPrecision + minPed_; } + constexpr float decodeGain(unsigned int gain) const { return gain * gainPrecision_ + minGain_; } + constexpr float decodePed(unsigned int ped) const { return ped * pedPrecision_ + minPed_; } - DecodingStructure* v_pedestals; - std::pair rangeAndCols[2000]; + DecodingStructure* v_pedestals_; + std::pair rangeAndCols_[gpuClustering::maxNumModules]; float minPed_, maxPed_, minGain_, maxGain_; - - float pedPrecision, gainPrecision; + float pedPrecision_, gainPrecision_; unsigned int numberOfRowsAveragedOver_; // this is 80!!!! unsigned int nBinsToUseForEncoding_; @@ -71,4 +71,4 @@ class SiPixelGainForHLTonGPU { unsigned int noisyFlag_; }; -#endif // CondFormats_SiPixelObjects_SiPixelGainForHLTonGPU_h +#endif // CondFormats_SiPixelObjects_interface_SiPixelGainForHLTonGPU_h diff --git a/CondFormats/SiPixelObjects/interface/SiPixelROCsStatusAndMapping.h b/CondFormats/SiPixelObjects/interface/SiPixelROCsStatusAndMapping.h index df5b8b24b70dc..a0771aaefb366 100644 --- a/CondFormats/SiPixelObjects/interface/SiPixelROCsStatusAndMapping.h +++ b/CondFormats/SiPixelObjects/interface/SiPixelROCsStatusAndMapping.h @@ -16,7 +16,7 @@ struct SiPixelROCsStatusAndMapping { alignas(128) unsigned int fed[pixelgpudetails::MAX_SIZE]; alignas(128) unsigned int link[pixelgpudetails::MAX_SIZE]; alignas(128) unsigned int roc[pixelgpudetails::MAX_SIZE]; - alignas(128) unsigned int RawId[pixelgpudetails::MAX_SIZE]; + alignas(128) unsigned int rawId[pixelgpudetails::MAX_SIZE]; alignas(128) unsigned int rocInDet[pixelgpudetails::MAX_SIZE]; alignas(128) unsigned int moduleId[pixelgpudetails::MAX_SIZE]; alignas(128) unsigned char badRocs[pixelgpudetails::MAX_SIZE]; diff --git a/DataFormats/SiPixelCluster/interface/SiPixelCluster.h b/DataFormats/SiPixelCluster/interface/SiPixelCluster.h index 5dfb8671c0a38..453d41555a65d 100644 --- a/DataFormats/SiPixelCluster/interface/SiPixelCluster.h +++ b/DataFormats/SiPixelCluster/interface/SiPixelCluster.h @@ -69,19 +69,22 @@ class SiPixelCluster { static constexpr unsigned int MAXSPAN = 255; static constexpr unsigned int MAXPOS = 2047; + static constexpr uint16_t invalidClusterId = std::numeric_limits::max(); + /** Construct from a range of digis that form a cluster and from * a DetID. The range is assumed to be non-empty. */ - SiPixelCluster() {} + SiPixelCluster() = default; SiPixelCluster(unsigned int isize, uint16_t const* adcs, uint16_t const* xpos, uint16_t const* ypos, - uint16_t const xmin, - uint16_t const ymin) - : thePixelOffset(2 * isize), thePixelADC(adcs, adcs + isize) { + uint16_t xmin, + uint16_t ymin, + uint16_t id = invalidClusterId) + : thePixelOffset(2 * isize), thePixelADC(adcs, adcs + isize), theOriginalClusterId(id) { uint16_t maxCol = 0; uint16_t maxRow = 0; for (unsigned int i = 0; i != isize; ++i) { @@ -203,7 +206,7 @@ class SiPixelCluster { uint8_t thePixelRowSpan = 0; // Span pixel index in the x direction (low edge). uint8_t thePixelColSpan = 0; // Span pixel index in the y direction (left edge). - uint16_t theOriginalClusterId = std::numeric_limits::max(); + uint16_t theOriginalClusterId = invalidClusterId; float err_x = -99999.9f; float err_y = -99999.9f; diff --git a/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsFromSoA.cc b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsFromSoA.cc index 7a49646d7a9a1..b487942a1419b 100644 --- a/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsFromSoA.cc +++ b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsFromSoA.cc @@ -1,24 +1,25 @@ +#include + #include "CondFormats/DataRecord/interface/SiPixelFedCablingMapRcd.h" #include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingMap.h" #include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingTree.h" #include "DataFormats/Common/interface/DetSetVector.h" #include "DataFormats/Common/interface/Handle.h" #include "DataFormats/DetId/interface/DetIdCollection.h" +#include "DataFormats/FEDRawData/interface/FEDNumbering.h" #include "DataFormats/SiPixelDetId/interface/PixelFEDChannel.h" #include "DataFormats/SiPixelDigi/interface/PixelDigi.h" #include "DataFormats/SiPixelRawData/interface/SiPixelErrorsSoA.h" #include "EventFilter/SiPixelRawToDigi/interface/PixelDataFormatter.h" #include "FWCore/Framework/interface/ESTransientHandle.h" #include "FWCore/Framework/interface/ESWatcher.h" -#include "FWCore/Framework/interface/EventSetup.h" #include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" #include "FWCore/Framework/interface/MakerMacros.h" #include "FWCore/Framework/interface/stream/EDProducer.h" #include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" -#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" #include "FWCore/ParameterSet/interface/ParameterSet.h" - -#include +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" class SiPixelDigiErrorsFromSoA : public edm::stream::EDProducer<> { public: @@ -60,6 +61,7 @@ SiPixelDigiErrorsFromSoA::SiPixelDigiErrorsFromSoA(const edm::ParameterSet& iCon void SiPixelDigiErrorsFromSoA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { edm::ParameterSetDescription desc; desc.add("digiErrorSoASrc", edm::InputTag("siPixelDigiErrorsSoA")); + // the configuration parameters here are named following those in SiPixelRawToDigi desc.add("CablingMapLabel", "")->setComment("CablingMap label"); desc.add("UsePhase1", false)->setComment("## Use phase1"); desc.add>("ErrorList", std::vector{29}) @@ -97,20 +99,20 @@ void SiPixelDigiErrorsFromSoA::produce(edm::Event& iEvent, const edm::EventSetup for (auto i = 0U; i < size; i++) { SiPixelErrorCompact err = digiErrors.error(i); if (err.errorType != 0) { - SiPixelRawDataError error(err.word, err.errorType, err.fedId + 1200); + SiPixelRawDataError error(err.word, err.errorType, err.fedId + FEDNumbering::MINSiPixeluTCAFEDID); errors[err.rawId].push_back(error); } } constexpr uint32_t dummydetid = 0xffffffff; typedef PixelDataFormatter::Errors::iterator IE; - for (IE is = errors.begin(); is != errors.end(); is++) { - uint32_t errordetid = is->first; + for (auto& error : errors) { + uint32_t errordetid = error.first; if (errordetid == dummydetid) { // errors given dummy detId must be sorted by Fed nodeterrors.insert(nodeterrors.end(), errors[errordetid].begin(), errors[errordetid].end()); } else { edm::DetSet& errorDetSet = errorcollection.find_or_insert(errordetid); - errorDetSet.data.insert(errorDetSet.data.end(), is->second.begin(), is->second.end()); + errorDetSet.data.insert(errorDetSet.data.end(), error.second.begin(), error.second.end()); // Fill detid of the detectors where there is error AND the error number is listed // in the configurable error list in the job option cfi. // Code needs to be here, because there can be a set of errors for each diff --git a/EventFilter/SiPixelRawToDigi/python/siPixelDigis_cff.py b/EventFilter/SiPixelRawToDigi/python/siPixelDigis_cff.py index 31ba8596bddc6..5c1ff74be9c69 100644 --- a/EventFilter/SiPixelRawToDigi/python/siPixelDigis_cff.py +++ b/EventFilter/SiPixelRawToDigi/python/siPixelDigis_cff.py @@ -8,10 +8,10 @@ siPixelDigisTask = cms.Task(siPixelDigis) siPixelDigisSoA = _siPixelDigisSoAFromCUDA.clone( - src = "siPixelClustersCUDAPreSplitting" + src = "siPixelClustersPreSplittingCUDA" ) siPixelDigiErrorsSoA = _siPixelDigiErrorsSoAFromCUDA.clone( - src = "siPixelClustersCUDAPreSplitting" + src = "siPixelClustersPreSplittingCUDA" ) siPixelDigiErrors = _siPixelDigiErrorsFromSoA.clone() diff --git a/Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h b/Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h index 409ebec3cb43f..c2b5bc9d95f83 100644 --- a/Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h +++ b/Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h @@ -65,7 +65,7 @@ namespace phase1PixelTopology { bool go = true; int n = 2; while (go) { - for (uint8_t i = 1; i < 11; ++i) { + for (uint8_t i = 1; i < std::size(layerStart); ++i) { if (layerStart[i] % n != 0) { go = false; break; @@ -81,18 +81,18 @@ namespace phase1PixelTopology { constexpr uint32_t maxModuleStride = findMaxModuleStride(); constexpr uint8_t findLayer(uint32_t detId) { - for (uint8_t i = 0; i < 11; ++i) + for (uint8_t i = 0; i < std::size(layerStart); ++i) if (detId < layerStart[i + 1]) return i; - return 11; + return std::size(layerStart); } constexpr uint8_t findLayerFromCompact(uint32_t detId) { detId *= maxModuleStride; - for (uint8_t i = 0; i < 11; ++i) + for (uint8_t i = 0; i < std::size(layerStart); ++i) if (detId < layerStart[i + 1]) return i; - return 11; + return std::size(layerStart); } constexpr uint32_t layerIndexSize = numberOfModules / maxModuleStride; diff --git a/RecoLocalTracker/SiPixelClusterizer/BuildFile.xml b/RecoLocalTracker/SiPixelClusterizer/BuildFile.xml index 58a7f0b22e30b..7e71c635c95b8 100644 --- a/RecoLocalTracker/SiPixelClusterizer/BuildFile.xml +++ b/RecoLocalTracker/SiPixelClusterizer/BuildFile.xml @@ -1,8 +1,7 @@ - - - - - + + + + - + diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml b/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml index d574c1e6f2b92..a4851e4b322be 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml @@ -1,12 +1,12 @@ - - - - + + + + diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/PixelClusterizerBase.h b/RecoLocalTracker/SiPixelClusterizer/plugins/PixelClusterizerBase.h index 0c15107816754..eb622cccb051e 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/PixelClusterizerBase.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/PixelClusterizerBase.h @@ -1,13 +1,14 @@ #ifndef RecoLocalTracker_SiPixelClusterizer_PixelClusterizerBase_H #define RecoLocalTracker_SiPixelClusterizer_PixelClusterizerBase_H +#include + +#include "CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationServiceBase.h" #include "DataFormats/Common/interface/DetSetVector.h" #include "DataFormats/Common/interface/DetSetVectorNew.h" #include "DataFormats/SiPixelCluster/interface/SiPixelCluster.h" #include "DataFormats/SiPixelDigi/interface/PixelDigi.h" #include "DataFormats/TrackerCommon/interface/TrackerTopology.h" -#include "CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationServiceBase.h" -#include class PixelGeomDetUnit; @@ -16,32 +17,42 @@ class PixelGeomDetUnit; */ class PixelClusterizerBase { public: - typedef edm::DetSet::const_iterator DigiIterator; - typedef edmNew::DetSet::const_iterator ClusterIterator; + typedef edm::DetSet::const_iterator DigiIterator; + typedef edmNew::DetSet::const_iterator ClusterIterator; struct AccretionCluster { - typedef unsigned short UShort; - static constexpr UShort MAXSIZE = 256; - UShort adc[MAXSIZE]; - UShort x[MAXSIZE]; - UShort y[MAXSIZE]; - UShort xmin=16000; - UShort ymin=16000; - unsigned int isize=0; - unsigned int curr=0; + static constexpr uint16_t MAXSIZE = 256; + uint16_t adc[MAXSIZE]; + uint16_t x[MAXSIZE]; + uint16_t y[MAXSIZE]; + uint16_t xmin = 16000; + uint16_t ymin = 16000; + unsigned int isize = 0; + int charge = 0; // stack interface (unsafe ok for use below) - UShort top() const { return curr;} - void pop() { ++curr;} - bool empty() { return curr==isize;} + unsigned int curr = 0; + uint16_t top() const { return curr; } + void pop() { ++curr; } + bool empty() { return curr == isize; } + + void clear() { + xmin = 16000; + ymin = 16000; + isize = 0; + charge = 0; + curr = 0; + } - bool add(SiPixelCluster::PixelPos const & p, UShort const iadc) { - if (isize==MAXSIZE) return false; - xmin=std::min(xmin,(unsigned short)(p.row())); - ymin=std::min(ymin,(unsigned short)(p.col())); - adc[isize]=iadc; - x[isize]=p.row(); - y[isize++]=p.col(); + bool add(SiPixelCluster::PixelPos const& p, uint16_t const iadc) { + if (isize == MAXSIZE) + return false; + xmin = std::min(xmin, p.row()); + ymin = std::min(ymin, p.col()); + adc[isize] = iadc; + x[isize] = p.row(); + y[isize++] = p.col(); + charge += iadc; return true; } }; @@ -51,26 +62,25 @@ class PixelClusterizerBase { // Build clusters in a DetUnit. Both digi and cluster stored in a DetSet - virtual void clusterizeDetUnit( const edm::DetSet & input, - const PixelGeomDetUnit * pixDet, - const TrackerTopology* tTopo, - const std::vector& badChannels, - edmNew::DetSetVector::FastFiller& output) = 0; + virtual void clusterizeDetUnit(const edm::DetSet& input, + const PixelGeomDetUnit* pixDet, + const TrackerTopology* tTopo, + const std::vector& badChannels, + edmNew::DetSetVector::FastFiller& output) = 0; - virtual void clusterizeDetUnit( const edmNew::DetSet & input, - const PixelGeomDetUnit * pixDet, - const TrackerTopology* tTopo, - const std::vector& badChannels, - edmNew::DetSetVector::FastFiller& output) = 0; + virtual void clusterizeDetUnit(const edmNew::DetSet& input, + const PixelGeomDetUnit* pixDet, + const TrackerTopology* tTopo, + const std::vector& badChannels, + edmNew::DetSetVector::FastFiller& output) = 0; // Configure gain calibration service - void setSiPixelGainCalibrationService( SiPixelGainCalibrationServiceBase* in){ - theSiPixelGainCalibrationService_=in; + void setSiPixelGainCalibrationService(SiPixelGainCalibrationServiceBase* in) { + theSiPixelGainCalibrationService_ = in; } - protected: +protected: SiPixelGainCalibrationServiceBase* theSiPixelGainCalibrationService_; - }; #endif diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelClusterProducer.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelClusterProducer.cc index 02678c999a036..2bd902af01b1e 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelClusterProducer.cc +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelClusterProducer.cc @@ -123,11 +123,11 @@ void SiPixelClusterProducer::produce(edm::Event& e, const edm::EventSetup& es) { // Step D: write output to file output->shrink_to_fit(); - // set sequential identifier (this is a const interface, but we need to set it after the sorting) - for (auto DSViter = output->begin(); DSViter != output->end(); DSViter++) { + // set sequential identifier + for (auto& clusters : *output) { uint16_t id = 0; - for (auto& clust : *DSViter) { - const_cast(clust).setOriginalId(id++); + for (auto& cluster : clusters) { + cluster.setOriginalId(id++); } } e.put(tPutPixelClusters, std::move(output)); @@ -161,15 +161,14 @@ void SiPixelClusterProducer::run(const T& input, int numberOfClusters = 0; // Iterate on detector units - typename T::const_iterator DSViter = input.begin(); - for (; DSViter != input.end(); DSViter++) { + for (auto const& dsv : input) { ++numberOfDetUnits; // LogDebug takes very long time, get rid off. - //LogDebug("SiStripClusterizer") << "[SiPixelClusterProducer::run] DetID" << DSViter->id; + //LogDebug("SiStripClusterizer") << "[SiPixelClusterProducer::run] DetID" << dsv.id; std::vector badChannels; - DetId detIdObject(DSViter->detId()); + DetId detIdObject(dsv.detId()); // Comment: At the moment the clusterizer depends on geometry // to access information as the pixel topology (number of columns @@ -185,8 +184,8 @@ void SiPixelClusterProducer::run(const T& input, { // Produce clusters for this DetUnit and store them in // a DetSet - edmNew::DetSetVector::FastFiller spc(output, DSViter->detId()); - clusterizer_->clusterizeDetUnit(*DSViter, pixDet, tTopo_, badChannels, spc); + edmNew::DetSetVector::FastFiller spc(output, dsv.detId()); + clusterizer_->clusterizeDetUnit(dsv, pixDet, tTopo_, badChannels, spc); if (spc.empty()) { spc.abort(); } else { diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc index dbbc5c4b03284..0078bae38306a 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc @@ -1,3 +1,4 @@ +#include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" #include "DataFormats/Common/interface/DetSetVector.h" #include "DataFormats/Common/interface/Handle.h" #include "DataFormats/DetId/interface/DetId.h" @@ -9,43 +10,12 @@ #include "FWCore/Framework/interface/EventSetup.h" #include "FWCore/Framework/interface/MakerMacros.h" #include "FWCore/Framework/interface/global/EDProducer.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" #include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" #include "FWCore/ParameterSet/interface/ParameterSet.h" #include "FWCore/ParameterSet/interface/ParameterSetDescription.h" #include "Geometry/Records/interface/TrackerTopologyRcd.h" - -namespace { - struct AccretionCluster { - typedef unsigned short UShort; - static constexpr UShort MAXSIZE = 256; - UShort adc[MAXSIZE]; - UShort x[MAXSIZE]; - UShort y[MAXSIZE]; - UShort xmin = 16000; - UShort ymin = 16000; - unsigned int isize = 0; - int charge = 0; - - void clear() { - isize = 0; - charge = 0; - xmin = 16000; - ymin = 16000; - } - - bool add(SiPixelCluster::PixelPos const& p, UShort const iadc) { - if (isize == MAXSIZE) - return false; - xmin = std::min(xmin, (unsigned short)(p.row())); - ymin = std::min(ymin, (unsigned short)(p.col())); - adc[isize] = iadc; - x[isize] = p.row(); - y[isize++] = p.col(); - charge += iadc; - return true; - } - }; -} // namespace +#include "RecoLocalTracker/SiPixelClusterizer/plugins/PixelClusterizerBase.h" class SiPixelDigisClustersFromSoA : public edm::global::EDProducer<> { public: @@ -84,7 +54,7 @@ void SiPixelDigisClustersFromSoA::produce(edm::StreamID, edm::Event& iEvent, con auto collection = std::make_unique>(); auto outputClusters = std::make_unique(); - outputClusters->reserve(2000, nDigis / 4); + outputClusters->reserve(gpuClustering::maxNumModules, nDigis / 4); edm::DetSet* detDigis = nullptr; for (uint32_t i = 0; i < nDigis; i++) { @@ -97,8 +67,10 @@ void SiPixelDigisClustersFromSoA::produce(edm::StreamID, edm::Event& iEvent, con } int32_t nclus = -1; - std::vector aclusters(1024); - auto totCluseFilled = 0; + std::vector aclusters(gpuClustering::maxNumClustersPerModules); +#ifdef EDM_ML_DEBUG + auto totClustersFilled = 0; +#endif auto fillClusters = [&](uint32_t detId) { if (nclus < 0) @@ -113,10 +85,12 @@ void SiPixelDigisClustersFromSoA::produce(edm::StreamID, edm::Event& iEvent, con edm::LogWarning("SiPixelDigisClustersFromSoA") << "cluster below charge Threshold " << "Layer/DetId/clusId " << layer << '/' << detId << '/' << ic << " size/charge " << acluster.isize << '/' << acluster.charge; - SiPixelCluster cluster(acluster.isize, acluster.adc, acluster.x, acluster.y, acluster.xmin, acluster.ymin); - cluster.setOriginalId(ic); - ++totCluseFilled; - // std::cout << "putting in this cluster " << ic << " " << cluster.charge() << " " << cluster.pixelADC().size() << endl; + SiPixelCluster cluster(acluster.isize, acluster.adc, acluster.x, acluster.y, acluster.xmin, acluster.ymin, ic); +#ifdef EDM_ML_DEBUG + ++totClustersFilled; +#endif + LogDebug("SiPixelDigisClustersFromSoA") + << "putting in this cluster " << ic << " " << cluster.charge() << " " << cluster.pixelADC().size(); // sort by row (x) spc.push_back(std::move(cluster)); std::push_heap(spc.begin(), spc.end(), [](SiPixelCluster const& cl1, SiPixelCluster const& cl2) { @@ -147,14 +121,14 @@ void SiPixelDigisClustersFromSoA::produce(edm::StreamID, edm::Event& iEvent, con if ((*detDigis).empty()) (*detDigis).data.reserve(64); // avoid the first relocations else { - std::cout << "Problem det present twice in input! " << (*detDigis).detId() << std::endl; + edm::LogWarning("SiPixelDigisClustersFromSoA") << "Problem det present twice in input! " << (*detDigis).detId(); } } (*detDigis).data.emplace_back(digis.pdigi(i)); auto const& dig = (*detDigis).data.back(); // fill clusters assert(digis.clus(i) >= 0); - assert(digis.clus(i) < 1024); + assert(digis.clus(i) < gpuClustering::maxNumClustersPerModules); nclus = std::max(digis.clus(i), nclus); auto row = dig.row(); auto col = dig.column(); @@ -165,7 +139,9 @@ void SiPixelDigisClustersFromSoA::produce(edm::StreamID, edm::Event& iEvent, con // fill final clusters if (detDigis) fillClusters((*detDigis).detId()); - //std::cout << "filled " << totCluseFilled << " clusters" << std::endl; +#ifdef EDM_ML_DEBUG + LogDebug("SiPixelDigisClustersFromSoA") << "filled " << totClustersFilled << " clusters"; +#endif iEvent.put(digiPutToken_, std::move(collection)); iEvent.put(clusterPutToken_, std::move(outputClusters)); diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc index 5e97610d92286..93b92e145ec5c 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc @@ -184,7 +184,7 @@ void SiPixelRawToClusterCUDA::acquire(const edm::Event& iEvent, // for GPU // first 150 index stores the fedId and next 150 will store the // start index of word in that fed - assert(fedId >= 1200); + assert(fedId >= FEDNumbering::MINSiPixeluTCAFEDID); fedCounter++; // get event data for this fed diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu index 7c133b10f4dab..25e5c925990f8 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu @@ -3,7 +3,6 @@ * File Name: RawToClusterGPU.cu * Description: It converts Raw data into Digi Format on GPU * Finaly the Output of RawToDigi data is given to pixelClusterizer - * **/ // C++ includes @@ -21,6 +20,8 @@ // CMSSW includes #include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" #include "CondFormats/SiPixelObjects/interface/SiPixelROCsStatusAndMapping.h" +#include "DataFormats/FEDRawData/interface/FEDNumbering.h" +#include "DataFormats/TrackerCommon/interface/TrackerTopology.h" #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" #include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" #include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" @@ -46,7 +47,7 @@ namespace pixelgpudetails { const cms_uint32_t *src, unsigned int length) { std::memcpy(word_.get() + wordCounterGPU, src, sizeof(cms_uint32_t) * length); - std::memset(fedId_.get() + wordCounterGPU / 2, fedId - 1200, length / 2); + std::memset(fedId_.get() + wordCounterGPU / 2, fedId - FEDNumbering::MINSiPixeluTCAFEDID, length / 2); } //////////////////// @@ -59,7 +60,9 @@ namespace pixelgpudetails { __device__ uint32_t getADC(uint32_t ww) { return ((ww >> pixelgpudetails::ADC_shift) & pixelgpudetails::ADC_mask); } - __device__ bool isBarrel(uint32_t rawId) { return (1 == ((rawId >> 25) & 0x7)); } + __device__ bool isBarrel(uint32_t rawId) { + return (PixelSubdetector::PixelBarrel == ((rawId >> DetId::kSubdetOffset) & DetId::kSubdetMask)); + } __device__ pixelgpudetails::DetIdGPU getRawId(const SiPixelROCsStatusAndMapping *cablingMap, uint8_t fed, @@ -67,7 +70,7 @@ namespace pixelgpudetails { uint32_t roc) { uint32_t index = fed * MAX_LINK * MAX_ROC + (link - 1) * MAX_ROC + roc; pixelgpudetails::DetIdGPU detId = { - cablingMap->RawId[index], cablingMap->rocInDet[index], cablingMap->moduleId[index]}; + cablingMap->rawId[index], cablingMap->rocInDet[index], cablingMap->moduleId[index]}; return detId; } @@ -137,16 +140,15 @@ namespace pixelgpudetails { uint32_t gRow = rowOffset + slopeRow * local.row; uint32_t gCol = colOffset + slopeCol * local.col; - //printf("Inside frameConversion row: %u, column: %u\n", gRow, gCol); + // inside frameConversion row: gRow, column: gCol pixelgpudetails::Pixel global = {gRow, gCol}; return global; } + // error decoding and handling copied from EventFilter/SiPixelRawToDigi/src/ErrorChecker.cc __device__ uint8_t conversionError(uint8_t fedId, uint8_t status, bool debug = false) { uint8_t errorType = 0; - // debug = true; - switch (status) { case (1): { if (debug) @@ -181,15 +183,13 @@ namespace pixelgpudetails { } __device__ bool rocRowColIsValid(uint32_t rocRow, uint32_t rocCol) { - uint32_t numRowsInRoc = 80; - uint32_t numColsInRoc = 52; - - /// row and collumn in ROC representation - return ((rocRow < numRowsInRoc) & (rocCol < numColsInRoc)); + /// row and column in ROC representation + return ((rocRow < pixelgpudetails::numRowsInRoc) & (rocCol < pixelgpudetails::numColsInRoc)); } __device__ bool dcolIsValid(uint32_t dcol, uint32_t pxid) { return ((dcol < 26) & (2 <= pxid) & (pxid < 162)); } + // error decoding and handling copied from EventFilter/SiPixelRawToDigi/src/ErrorChecker.cc __device__ uint8_t checkROC(uint32_t errorWord, uint8_t fedId, uint32_t link, @@ -243,15 +243,15 @@ namespace pixelgpudetails { case (30): { if (debug) printf("TBM error trailer (errorType = 30)\n"); - int StateMatch_bits = 4; - int StateMatch_shift = 8; - uint32_t StateMatch_mask = ~(~uint32_t(0) << StateMatch_bits); - int StateMatch = (errorWord >> StateMatch_shift) & StateMatch_mask; - if (StateMatch != 1 && StateMatch != 8) { + int stateMatch_bits = 4; + int stateMatch_shift = 8; + uint32_t stateMatch_mask = ~(~uint32_t(0) << stateMatch_bits); + int stateMatch = (errorWord >> stateMatch_shift) & stateMatch_mask; + if (stateMatch != 1 && stateMatch != 8) { if (debug) printf("FED error 30 with unexpected State Bits (errorType = 30)\n"); } - if (StateMatch == 1) + if (stateMatch == 1) errorType = 40; // 1=Overflow -> 40, 8=number of ROCs -> 30 errorFound = true; break; @@ -269,6 +269,7 @@ namespace pixelgpudetails { return errorFound ? errorType : 0; } + // error decoding and handling copied from EventFilter/SiPixelRawToDigi/src/ErrorChecker.cc __device__ uint32_t getErrRawID(uint8_t fedId, uint32_t errWord, uint32_t errorType, @@ -282,13 +283,10 @@ namespace pixelgpudetails { case 31: case 36: case 40: { - //set dummy values for cabling just to get detId from link - //cabling.dcol = 0; - //cabling.pxid = 2; uint32_t roc = 1; uint32_t link = (errWord >> pixelgpudetails::LINK_shift) & pixelgpudetails::LINK_mask; - uint32_t rID_temp = getRawId(cablingMap, fedId, link, roc).RawId; - if (rID_temp != 9999) + uint32_t rID_temp = getRawId(cablingMap, fedId, link, roc).rawId; + if (rID_temp != gpuClustering::invalidModuleId) rID = rID_temp; break; } @@ -318,24 +316,19 @@ namespace pixelgpudetails { if ((chanNmbr < 1) || (chanNmbr > 36)) break; // signifies unexpected result - // set dummy values for cabling just to get detId from link if in Barrel - //cabling.dcol = 0; - //cabling.pxid = 2; uint32_t roc = 1; uint32_t link = chanNmbr; - uint32_t rID_temp = getRawId(cablingMap, fedId, link, roc).RawId; - if (rID_temp != 9999) + uint32_t rID_temp = getRawId(cablingMap, fedId, link, roc).rawId; + if (rID_temp != gpuClustering::invalidModuleId) rID = rID_temp; break; } case 37: case 38: { - //cabling.dcol = 0; - //cabling.pxid = 2; uint32_t roc = (errWord >> pixelgpudetails::ROC_shift) & pixelgpudetails::ROC_mask; uint32_t link = (errWord >> pixelgpudetails::LINK_shift) & pixelgpudetails::LINK_mask; - uint32_t rID_temp = getRawId(cablingMap, fedId, link, roc).RawId; - if (rID_temp != 9999) + uint32_t rID_temp = getRawId(cablingMap, fedId, link, roc).rawId; + if (rID_temp != gpuClustering::invalidModuleId) rID = rID_temp; break; } @@ -377,7 +370,7 @@ namespace pixelgpudetails { // initialize (too many coninue below) pdigi[gIndex] = 0; rawIdArr[gIndex] = 0; - moduleId[gIndex] = 9999; + moduleId[gIndex] = gpuClustering::invalidModuleId; uint32_t ww = word[gIndex]; // Array containing 32 bit raw data if (ww == 0) { @@ -397,7 +390,7 @@ namespace pixelgpudetails { continue; } - uint32_t rawId = detId.RawId; + uint32_t rawId = detId.rawId; uint32_t rocIdInDetUnit = detId.rocInDet; bool barrel = isBarrel(rawId); @@ -411,8 +404,8 @@ namespace pixelgpudetails { if (skipROC) continue; - uint32_t layer = 0; //, ladder =0; - int side = 0, panel = 0, module = 0; //disk = 0, blade = 0 + uint32_t layer = 0; + int side = 0, panel = 0, module = 0; if (barrel) { layer = (rawId >> pixelgpudetails::layerStartBit) & pixelgpudetails::layerMask; @@ -422,9 +415,7 @@ namespace pixelgpudetails { // endcap ids layer = 0; panel = (rawId >> pixelgpudetails::panelStartBit) & pixelgpudetails::panelMask; - //disk = (rawId >> diskStartBit_) & diskMask_; side = (panel == 1) ? -1 : 1; - //blade = (rawId >> bladeStartBit_) & bladeMask_; } // ***special case of layer to 1 be handled here @@ -472,22 +463,22 @@ namespace pixelgpudetails { } // end of Raw to Digi kernel __global__ void fillHitsModuleStart(uint32_t const *__restrict__ cluStart, uint32_t *__restrict__ moduleStart) { - assert(gpuClustering::MaxNumModules < 2048); // easy to extend at least till 32*1024 + assert(gpuClustering::maxNumModules < 2048); // easy to extend at least till 32*1024 assert(1 == gridDim.x); assert(0 == blockIdx.x); int first = threadIdx.x; - // limit to MaxHitsInModule; - for (int i = first, iend = gpuClustering::MaxNumModules; i < iend; i += blockDim.x) { + // limit to maxHitsInModule() + for (int i = first, iend = gpuClustering::maxNumModules; i < iend; i += blockDim.x) { moduleStart[i + 1] = std::min(gpuClustering::maxHitsInModule(), cluStart[i]); } __shared__ uint32_t ws[32]; cms::cuda::blockPrefixScan(moduleStart + 1, moduleStart + 1, 1024, ws); - cms::cuda::blockPrefixScan(moduleStart + 1025, moduleStart + 1025, gpuClustering::MaxNumModules - 1024, ws); + cms::cuda::blockPrefixScan(moduleStart + 1025, moduleStart + 1025, gpuClustering::maxNumModules - 1024, ws); - for (int i = first + 1025, iend = gpuClustering::MaxNumModules + 1; i < iend; i += blockDim.x) { + for (int i = first + 1025, iend = gpuClustering::maxNumModules + 1; i < iend; i += blockDim.x) { moduleStart[i] += moduleStart[1024]; } __syncthreads(); @@ -498,23 +489,22 @@ namespace pixelgpudetails { assert(c0 == moduleStart[1]); assert(moduleStart[1024] >= moduleStart[1023]); assert(moduleStart[1025] >= moduleStart[1024]); - assert(moduleStart[gpuClustering::MaxNumModules] >= moduleStart[1025]); + assert(moduleStart[gpuClustering::maxNumModules] >= moduleStart[1025]); - for (int i = first, iend = gpuClustering::MaxNumModules + 1; i < iend; i += blockDim.x) { + for (int i = first, iend = gpuClustering::maxNumModules + 1; i < iend; i += blockDim.x) { if (0 != i) assert(moduleStart[i] >= moduleStart[i - i]); // [BPX1, BPX2, BPX3, BPX4, FP1, FP2, FP3, FN1, FN2, FN3, LAST_VALID] // [ 0, 96, 320, 672, 1184, 1296, 1408, 1520, 1632, 1744, 1856] - if (i == 96 || i == 1184 || i == 1744 || i == gpuClustering::MaxNumModules) + if (i == 96 || i == 1184 || i == 1744 || i == gpuClustering::maxNumModules) printf("moduleStart %d %d\n", i, moduleStart[i]); } #endif // avoid overflow - constexpr auto MAX_HITS = gpuClustering::MaxNumClusters; - for (int i = first, iend = gpuClustering::MaxNumModules + 1; i < iend; i += blockDim.x) { - if (moduleStart[i] > MAX_HITS) - moduleStart[i] = MAX_HITS; + auto constexpr maxNumClusters = gpuClustering::maxNumClusters; + for (int i = first, iend = gpuClustering::maxNumModules + 1; i < iend; i += blockDim.x) { + moduleStart[i] = std::clamp(moduleStart[i], 0U, maxNumClusters); } } @@ -541,7 +531,7 @@ namespace pixelgpudetails { if (includeErrors) { digiErrors_d = SiPixelDigiErrorsCUDA(pixelgpudetails::MAX_FED_WORDS, std::move(errors), stream); } - clusters_d = SiPixelClustersCUDA(gpuClustering::MaxNumModules, stream); + clusters_d = SiPixelClustersCUDA(gpuClustering::maxNumModules, stream); nModules_Clusters_h = cms::cuda::make_host_unique(2, stream); @@ -594,7 +584,7 @@ namespace pixelgpudetails { using namespace gpuClustering; int threadsPerBlock = 256; int blocks = - (std::max(int(wordCounter), int(gpuClustering::MaxNumModules)) + threadsPerBlock - 1) / threadsPerBlock; + (std::max(int(wordCounter), int(gpuClustering::maxNumModules)) + threadsPerBlock - 1) / threadsPerBlock; gpuCalibPixel::calibDigis<<>>(isRun2, digis_d.moduleInd(), @@ -626,7 +616,7 @@ namespace pixelgpudetails { &(nModules_Clusters_h[0]), clusters_d.moduleStart(), sizeof(uint32_t), cudaMemcpyDefault, stream)); threadsPerBlock = 256; - blocks = MaxNumModules; + blocks = maxNumModules; #ifdef GPU_DEBUG std::cout << "CUDA findClus kernel launch with " << blocks << " blocks of " << threadsPerBlock << " threads\n"; #endif @@ -664,7 +654,7 @@ namespace pixelgpudetails { // last element holds the number of all clusters cudaCheck(cudaMemcpyAsync(&(nModules_Clusters_h[1]), - clusters_d.clusModuleStart() + gpuClustering::MaxNumModules, + clusters_d.clusModuleStart() + gpuClustering::maxNumModules, sizeof(uint32_t), cudaMemcpyDefault, stream)); diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h index e06ba8ce735aa..75eeab2606dd5 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h @@ -75,7 +75,7 @@ namespace pixelgpudetails { const uint32_t OMIT_ERR_mask = ~(~uint32_t(0) << OMIT_ERR_bits); struct DetIdGPU { - uint32_t RawId; + uint32_t rawId; uint32_t rocInDet; uint32_t moduleId; }; @@ -207,19 +207,6 @@ namespace pixelgpudetails { SiPixelDigiErrorsCUDA digiErrors_d; }; - // see RecoLocalTracker/SiPixelClusterizer - // all are runtime const, should be specified in python _cfg.py - struct ADCThreshold { - const int thePixelThreshold = 1000; // default Pixel threshold in electrons - const int theSeedThreshold = 1000; // seed thershold in electrons not used in our algo - const float theClusterThreshold = 4000; // cluster threshold in electron - const int ConversionFactor = 65; // adc to electron conversion factor - - const int theStackADC_ = 255; // the maximum adc count for stack layer - const int theFirstStack_ = 5; // the index of the fits stack layer - const double theElectronPerADCGain_ = 600; // ADC to electron conversion - }; - } // namespace pixelgpudetails #endif // RecoLocalTracker_SiPixelClusterizer_plugins_SiPixelRawToClusterGPUKernel_h diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h index 50c62f44f1df8..c21c792f39c30 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h @@ -4,14 +4,13 @@ #include #include +#include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" #include "CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h" #include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" -#include "gpuClusteringConstants.h" - namespace gpuCalibPixel { - constexpr uint16_t InvId = 9999; // must be > MaxNumModules + using gpuClustering::invalidModuleId; // valid for run2 constexpr float VCaltoElectronGain = 47; // L2-4: 47 +- 4.7 @@ -35,12 +34,12 @@ namespace gpuCalibPixel { // zero for next kernels... if (0 == first) clusModuleStart[0] = moduleStart[0] = 0; - for (int i = first; i < gpuClustering::MaxNumModules; i += gridDim.x * blockDim.x) { + for (int i = first; i < gpuClustering::maxNumModules; i += gridDim.x * blockDim.x) { nClustersInModule[i] = 0; } for (int i = first; i < numElements; i += gridDim.x * blockDim.x) { - if (InvId == id[i]) + if (invalidModuleId == id[i]) continue; float conversionFactor = (isRun2) ? (id[i] < 96 ? VCaltoElectronGain_L1 : VCaltoElectronGain) : 1.f; @@ -55,7 +54,7 @@ namespace gpuCalibPixel { float gain = ret.second; // float pedestal = 0; float gain = 1.; if (isDeadColumn | isNoisyColumn) { - id[i] = InvId; + id[i] = invalidModuleId; adc[i] = 0; printf("bad pixel at %d in %d\n", i, id[i]); } else { diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h index 8f45d35b267b1..d9520da80b695 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h @@ -4,11 +4,10 @@ #include #include +#include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" #include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" #include "HeterogeneousCore/CUDAUtilities/interface/prefixScan.h" -#include "gpuClusteringConstants.h" - namespace gpuClustering { __global__ void clusterChargeCut( @@ -19,44 +18,44 @@ namespace gpuClustering { uint32_t const* __restrict__ moduleId, // module id of each module int32_t* __restrict__ clusterId, // modified: cluster id of each pixel uint32_t numElements) { - __shared__ int32_t charge[MaxNumClustersPerModules]; - __shared__ uint8_t ok[MaxNumClustersPerModules]; - __shared__ uint16_t newclusId[MaxNumClustersPerModules]; + __shared__ int32_t charge[maxNumClustersPerModules]; + __shared__ uint8_t ok[maxNumClustersPerModules]; + __shared__ uint16_t newclusId[maxNumClustersPerModules]; auto firstModule = blockIdx.x; auto endModule = moduleStart[0]; for (auto module = firstModule; module < endModule; module += gridDim.x) { auto firstPixel = moduleStart[1 + module]; auto thisModuleId = id[firstPixel]; - assert(thisModuleId < MaxNumModules); + assert(thisModuleId < maxNumModules); assert(thisModuleId == moduleId[module]); auto nclus = nClustersInModule[thisModuleId]; if (nclus == 0) continue; - if (threadIdx.x == 0 && nclus > MaxNumClustersPerModules) + if (threadIdx.x == 0 && nclus > maxNumClustersPerModules) printf("Warning too many clusters in module %d in block %d: %d > %d\n", thisModuleId, blockIdx.x, nclus, - MaxNumClustersPerModules); + maxNumClustersPerModules); auto first = firstPixel + threadIdx.x; - if (nclus > MaxNumClustersPerModules) { + if (nclus > maxNumClustersPerModules) { // remove excess FIXME find a way to cut charge first.... for (auto i = first; i < numElements; i += blockDim.x) { - if (id[i] == InvId) + if (id[i] == invalidModuleId) continue; // not valid if (id[i] != thisModuleId) break; // end of module - if (clusterId[i] >= MaxNumClustersPerModules) { - id[i] = InvId; - clusterId[i] = InvId; + if (clusterId[i] >= maxNumClustersPerModules) { + id[i] = invalidModuleId; + clusterId[i] = invalidModuleId; } } - nclus = MaxNumClustersPerModules; + nclus = maxNumClustersPerModules; } #ifdef GPU_DEBUG @@ -65,14 +64,14 @@ namespace gpuClustering { printf("start cluster charge cut for module %d in block %d\n", thisModuleId, blockIdx.x); #endif - assert(nclus <= MaxNumClustersPerModules); + assert(nclus <= maxNumClustersPerModules); for (auto i = threadIdx.x; i < nclus; i += blockDim.x) { charge[i] = 0; } __syncthreads(); for (auto i = first; i < numElements; i += blockDim.x) { - if (id[i] == InvId) + if (id[i] == invalidModuleId) continue; // not valid if (id[i] != thisModuleId) break; // end of module @@ -102,19 +101,19 @@ namespace gpuClustering { // mark bad cluster again for (auto i = threadIdx.x; i < nclus; i += blockDim.x) { if (0 == ok[i]) - newclusId[i] = InvId + 1; + newclusId[i] = invalidModuleId + 1; } __syncthreads(); // reassign id for (auto i = first; i < numElements; i += blockDim.x) { - if (id[i] == InvId) + if (id[i] == invalidModuleId) continue; // not valid if (id[i] != thisModuleId) break; // end of module clusterId[i] = newclusId[clusterId[i]] - 1; - if (clusterId[i] == InvId) - id[i] = InvId; + if (clusterId[i] == invalidModuleId) + id[i] = invalidModuleId; } //done diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h index 3bf42c8265b1e..9f295981ca732 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h @@ -4,12 +4,11 @@ #include #include +#include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" #include "Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h" #include "HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h" #include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" -#include "gpuClusteringConstants.h" - namespace gpuClustering { #ifdef GPU_DEBUG @@ -23,21 +22,20 @@ namespace gpuClustering { int first = blockDim.x * blockIdx.x + threadIdx.x; for (int i = first; i < numElements; i += gridDim.x * blockDim.x) { clusterId[i] = i; - if (InvId == id[i]) + if (invalidModuleId == id[i]) continue; auto j = i - 1; - while (j >= 0 and id[j] == InvId) + while (j >= 0 and id[j] == invalidModuleId) --j; if (j < 0 or id[j] != id[i]) { // boundary... - auto loc = atomicInc(moduleStart, MaxNumModules); + auto loc = atomicInc(moduleStart, maxNumModules); moduleStart[loc + 1] = i; } } } __global__ - // __launch_bounds__(256,4) void findClus(uint16_t const* __restrict__ id, // module id of each pixel uint16_t const* __restrict__ x, // local coordinates of each pixel @@ -54,7 +52,7 @@ namespace gpuClustering { for (auto module = firstModule; module < endModule; module += gridDim.x) { auto firstPixel = moduleStart[1 + module]; auto thisModuleId = id[firstPixel]; - assert(thisModuleId < MaxNumModules); + assert(thisModuleId < maxNumModules); #ifdef GPU_DEBUG if (thisModuleId % 100 == 1) @@ -70,7 +68,7 @@ namespace gpuClustering { // skip threads not associated to an existing pixel for (int i = first; i < numElements; i += blockDim.x) { - if (id[i] == InvId) // skip invalid pixels + if (id[i] == invalidModuleId) // skip invalid pixels continue; if (id[i] != thisModuleId) { // find the first pixel in a different module atomicMin(&msize, i); @@ -110,7 +108,7 @@ namespace gpuClustering { // fill histo for (int i = first; i < msize; i += blockDim.x) { - if (id[i] == InvId) // skip invalid pixels + if (id[i] == invalidModuleId) // skip invalid pixels continue; hist.count(y[i]); #ifdef GPU_DEBUG @@ -130,7 +128,7 @@ namespace gpuClustering { printf("histo size %d\n", hist.size()); #endif for (int i = first; i < msize; i += blockDim.x) { - if (id[i] == InvId) // skip invalid pixels + if (id[i] == invalidModuleId) // skip invalid pixels continue; hist.fill(y[i], i - firstPixel); } @@ -178,7 +176,7 @@ namespace gpuClustering { assert(k < maxiter); auto p = hist.begin() + j; auto i = *p + firstPixel; - assert(id[i] != InvId); + assert(id[i] != invalidModuleId); assert(id[i] == thisModuleId); // same module int be = Hist::bin(y[i] + 1); auto e = hist.end(be); @@ -255,7 +253,7 @@ namespace gpuClustering { // find the number of different clusters, identified by a pixels with clus[i] == i; // mark these pixels with a negative id. for (int i = first; i < msize; i += blockDim.x) { - if (id[i] == InvId) // skip invalid pixels + if (id[i] == invalidModuleId) // skip invalid pixels continue; if (clusterId[i] == i) { auto old = atomicInc(&foundClusters, 0xffffffff); @@ -266,7 +264,7 @@ namespace gpuClustering { // propagate the negative id to all the pixels in the cluster. for (int i = first; i < msize; i += blockDim.x) { - if (id[i] == InvId) // skip invalid pixels + if (id[i] == invalidModuleId) // skip invalid pixels continue; if (clusterId[i] >= 0) { // mark each pixel in a cluster with the same id as the first one @@ -277,7 +275,7 @@ namespace gpuClustering { // adjust the cluster id to be a positive value starting from 0 for (int i = first; i < msize; i += blockDim.x) { - if (id[i] == InvId) { // skip invalid pixels + if (id[i] == invalidModuleId) { // skip invalid pixels clusterId[i] = -9999; continue; } diff --git a/RecoLocalTracker/SiPixelClusterizer/python/siPixelClustersPreSplitting_cff.py b/RecoLocalTracker/SiPixelClusterizer/python/siPixelClustersPreSplitting_cff.py index 6839e4582bb2b..8bbf47e9ebf90 100644 --- a/RecoLocalTracker/SiPixelClusterizer/python/siPixelClustersPreSplitting_cff.py +++ b/RecoLocalTracker/SiPixelClusterizer/python/siPixelClustersPreSplitting_cff.py @@ -8,15 +8,15 @@ siPixelClustersPreSplittingTask = cms.Task(siPixelClustersPreSplitting) -siPixelClustersCUDAPreSplitting = _siPixelRawToClusterCUDA.clone() +siPixelClustersPreSplittingCUDA = _siPixelRawToClusterCUDA.clone() from Configuration.Eras.Modifier_run3_common_cff import run3_common -run3_common.toModify(siPixelClustersCUDAPreSplitting, +run3_common.toModify(siPixelClustersPreSplittingCUDA, isRun2=False ) siPixelDigisClustersPreSplitting = _siPixelDigisClustersFromSoA.clone() siPixelClustersPreSplittingTaskCUDA = cms.Task( - siPixelClustersCUDAPreSplitting, + siPixelClustersPreSplittingCUDA, siPixelDigisClustersPreSplitting, ) diff --git a/RecoLocalTracker/SiPixelClusterizer/test/BuildFile.xml b/RecoLocalTracker/SiPixelClusterizer/test/BuildFile.xml index 1891970a9d98b..4420adb507027 100644 --- a/RecoLocalTracker/SiPixelClusterizer/test/BuildFile.xml +++ b/RecoLocalTracker/SiPixelClusterizer/test/BuildFile.xml @@ -16,6 +16,7 @@ + @@ -54,4 +55,3 @@ - diff --git a/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h b/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h index 64289d5208b48..02611ab1cac1d 100644 --- a/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h +++ b/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h @@ -26,7 +26,8 @@ int main(void) { using namespace gpuClustering; - int numElements = 256 * 2000; + constexpr int numElements = 256 * maxNumModules; + // these in reality are already on GPU auto h_id = std::make_unique(numElements); auto h_x = std::make_unique(numElements); @@ -40,13 +41,13 @@ int main(void) { auto d_y = cms::cuda::make_device_unique(numElements, nullptr); auto d_adc = cms::cuda::make_device_unique(numElements, nullptr); auto d_clus = cms::cuda::make_device_unique(numElements, nullptr); - auto d_moduleStart = cms::cuda::make_device_unique(MaxNumModules + 1, nullptr); - auto d_clusInModule = cms::cuda::make_device_unique(MaxNumModules, nullptr); - auto d_moduleId = cms::cuda::make_device_unique(MaxNumModules, nullptr); -#else // __CUDACC__ - auto h_moduleStart = std::make_unique(MaxNumModules + 1); - auto h_clusInModule = std::make_unique(MaxNumModules); - auto h_moduleId = std::make_unique(MaxNumModules); + auto d_moduleStart = cms::cuda::make_device_unique(maxNumModules + 1, nullptr); + auto d_clusInModule = cms::cuda::make_device_unique(maxNumModules, nullptr); + auto d_moduleId = cms::cuda::make_device_unique(maxNumModules, nullptr); +#else // __CUDACC__ + auto h_moduleStart = std::make_unique(maxNumModules + 1); + auto h_clusInModule = std::make_unique(maxNumModules); + auto h_moduleId = std::make_unique(maxNumModules); #endif // __CUDACC__ // later random number @@ -145,7 +146,7 @@ int main(void) { ++n; } ++ncl; - h_id[n++] = InvId; // error + h_id[n++] = invalidModuleId; // error // messy int xx[5] = {21, 25, 23, 24, 22}; for (int k = 0; k < 5; ++k) { @@ -186,7 +187,7 @@ int main(void) { // all odd id for (int id = 11; id <= 1800; id += 2) { if ((id / 20) % 2) - h_id[n++] = InvId; // error + h_id[n++] = invalidModuleId; // error for (int x = 0; x < 40; x += 4) { ++ncl; if ((id / 10) % 2) { @@ -212,8 +213,8 @@ int main(void) { if (y[k] == 3) continue; // hole if (id == 51) { - h_id[n++] = InvId; - h_id[n++] = InvId; + h_id[n++] = invalidModuleId; + h_id[n++] = invalidModuleId; } // error h_id[n] = id; h_x[n] = x + 1; @@ -253,11 +254,11 @@ int main(void) { cms::cuda::launch(countModules, {blocksPerGrid, threadsPerBlock}, d_id.get(), d_moduleStart.get(), d_clus.get(), n); - blocksPerGrid = MaxNumModules; //nModules; + blocksPerGrid = maxNumModules; //nModules; std::cout << "CUDA findModules kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads\n"; - cudaCheck(cudaMemset(d_clusInModule.get(), 0, MaxNumModules * sizeof(uint32_t))); + cudaCheck(cudaMemset(d_clusInModule.get(), 0, maxNumModules * sizeof(uint32_t))); cms::cuda::launch(findClus, {blocksPerGrid, threadsPerBlock}, @@ -272,17 +273,17 @@ int main(void) { cudaDeviceSynchronize(); cudaCheck(cudaMemcpy(&nModules, d_moduleStart.get(), sizeof(uint32_t), cudaMemcpyDeviceToHost)); - uint32_t nclus[MaxNumModules], moduleId[nModules]; - cudaCheck(cudaMemcpy(&nclus, d_clusInModule.get(), MaxNumModules * sizeof(uint32_t), cudaMemcpyDeviceToHost)); + uint32_t nclus[maxNumModules], moduleId[nModules]; + cudaCheck(cudaMemcpy(&nclus, d_clusInModule.get(), maxNumModules * sizeof(uint32_t), cudaMemcpyDeviceToHost)); - std::cout << "before charge cut found " << std::accumulate(nclus, nclus + MaxNumModules, 0) << " clusters" + std::cout << "before charge cut found " << std::accumulate(nclus, nclus + maxNumModules, 0) << " clusters" << std::endl; - for (auto i = MaxNumModules; i > 0; i--) + for (auto i = maxNumModules; i > 0; i--) if (nclus[i - 1] > 0) { std::cout << "last module is " << i - 1 << ' ' << nclus[i - 1] << std::endl; break; } - if (ncl != std::accumulate(nclus, nclus + MaxNumModules, 0)) + if (ncl != std::accumulate(nclus, nclus + maxNumModules, 0)) std::cout << "ERROR!!!!! wrong number of cluster found" << std::endl; cms::cuda::launch(clusterChargeCut, @@ -296,24 +297,24 @@ int main(void) { n); cudaDeviceSynchronize(); -#else // __CUDACC__ +#else // __CUDACC__ h_moduleStart[0] = nModules; countModules(h_id.get(), h_moduleStart.get(), h_clus.get(), n); - memset(h_clusInModule.get(), 0, MaxNumModules * sizeof(uint32_t)); + memset(h_clusInModule.get(), 0, maxNumModules * sizeof(uint32_t)); findClus( h_id.get(), h_x.get(), h_y.get(), h_moduleStart.get(), h_clusInModule.get(), h_moduleId.get(), h_clus.get(), n); nModules = h_moduleStart[0]; auto nclus = h_clusInModule.get(); - std::cout << "before charge cut found " << std::accumulate(nclus, nclus + MaxNumModules, 0) << " clusters" + std::cout << "before charge cut found " << std::accumulate(nclus, nclus + maxNumModules, 0) << " clusters" << std::endl; - for (auto i = MaxNumModules; i > 0; i--) + for (auto i = maxNumModules; i > 0; i--) if (nclus[i - 1] > 0) { std::cout << "last module is " << i - 1 << ' ' << nclus[i - 1] << std::endl; break; } - if (ncl != std::accumulate(nclus, nclus + MaxNumModules, 0)) + if (ncl != std::accumulate(nclus, nclus + maxNumModules, 0)) std::cout << "ERROR!!!!! wrong number of cluster found" << std::endl; clusterChargeCut( @@ -325,14 +326,14 @@ int main(void) { #ifdef __CUDACC__ cudaCheck(cudaMemcpy(h_id.get(), d_id.get(), size16, cudaMemcpyDeviceToHost)); cudaCheck(cudaMemcpy(h_clus.get(), d_clus.get(), size32, cudaMemcpyDeviceToHost)); - cudaCheck(cudaMemcpy(&nclus, d_clusInModule.get(), MaxNumModules * sizeof(uint32_t), cudaMemcpyDeviceToHost)); + cudaCheck(cudaMemcpy(&nclus, d_clusInModule.get(), maxNumModules * sizeof(uint32_t), cudaMemcpyDeviceToHost)); cudaCheck(cudaMemcpy(&moduleId, d_moduleId.get(), nModules * sizeof(uint32_t), cudaMemcpyDeviceToHost)); #endif // __CUDACC__ std::set clids; for (int i = 0; i < n; ++i) { assert(h_id[i] != 666); // only noise - if (h_id[i] == InvId) + if (h_id[i] == invalidModuleId) continue; assert(h_clus[i] >= 0); assert(h_clus[i] < int(nclus[h_id[i]])); @@ -368,9 +369,9 @@ int main(void) { std::cout << "error " << mid << ": " << nc << ' ' << pnc << std::endl; } - std::cout << "found " << std::accumulate(nclus, nclus + MaxNumModules, 0) << ' ' << clids.size() << " clusters" + std::cout << "found " << std::accumulate(nclus, nclus + maxNumModules, 0) << ' ' << clids.size() << " clusters" << std::endl; - for (auto i = MaxNumModules; i > 0; i--) + for (auto i = maxNumModules; i > 0; i--) if (nclus[i - 1] > 0) { std::cout << "last module is " << i - 1 << ' ' << nclus[i - 1] << std::endl; break; diff --git a/RecoLocalTracker/SiPixelRecHits/BuildFile.xml b/RecoLocalTracker/SiPixelRecHits/BuildFile.xml index e22b18b17117a..d9376d88f7bbd 100644 --- a/RecoLocalTracker/SiPixelRecHits/BuildFile.xml +++ b/RecoLocalTracker/SiPixelRecHits/BuildFile.xml @@ -1,14 +1,14 @@ - - + + + + + - + - - - diff --git a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h index 4b569438aa130..05e59585ba6ba 100644 --- a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h +++ b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h @@ -1,5 +1,5 @@ -#ifndef RecoLocalTracker_SiPixelRecHits_PixelCPEBase_H -#define RecoLocalTracker_SiPixelRecHits_PixelCPEBase_H 1 +#ifndef RecoLocalTracker_SiPixelRecHits_interface_PixelCPEBase_h +#define RecoLocalTracker_SiPixelRecHits_interface_PixelCPEBase_h 1 //----------------------------------------------------------------------------- // \class PixelCPEBase @@ -11,43 +11,32 @@ // Change to use Generic error & Template calibration from DB - D.Fehling 11/08 //----------------------------------------------------------------------------- +#ifdef EDM_ML_DEBUG +#include +#endif +#include #include #include -#include "TMath.h" -#include "RecoLocalTracker/ClusterParameterEstimator/interface/PixelClusterParameterEstimator.h" -#include "DataFormats/TrackerRecHit2D/interface/SiPixelRecHitQuality.h" +#include +#include "CondFormats/SiPixelObjects/interface/SiPixelGenErrorDBObject.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelLorentzAngle.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelTemplateDBObject.h" +#include "DataFormats/GeometryCommonDetAlgo/interface/MeasurementError.h" +#include "DataFormats/GeometryCommonDetAlgo/interface/MeasurementPoint.h" +#include "DataFormats/GeometrySurface/interface/GloballyPositioned.h" #include "DataFormats/TrackerCommon/interface/TrackerTopology.h" -#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" +#include "DataFormats/TrackerRecHit2D/interface/SiPixelRecHitQuality.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" #include "Geometry/CommonDetUnit/interface/GeomDetType.h" #include "Geometry/CommonDetUnit/interface/PixelGeomDetUnit.h" #include "Geometry/CommonTopologies/interface/PixelTopology.h" #include "Geometry/CommonTopologies/interface/Topology.h" - -//--- For the configuration: -#include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" - -#include "DataFormats/GeometryCommonDetAlgo/interface/MeasurementPoint.h" -#include "DataFormats/GeometryCommonDetAlgo/interface/MeasurementError.h" -#include "DataFormats/GeometrySurface/interface/GloballyPositioned.h" - -#include "CondFormats/SiPixelObjects/interface/SiPixelLorentzAngle.h" - -// new errors -#include "CondFormats/SiPixelObjects/interface/SiPixelGenErrorDBObject.h" -// old errors -//#include "CondFormats/SiPixelObjects/interface/SiPixelCPEGenericErrorParm.h" - -#include "CondFormats/SiPixelObjects/interface/SiPixelTemplateDBObject.h" - -#include - -#include -#ifdef EDM_ML_DEBUG -#include -#endif +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" +#include "RecoLocalTracker/ClusterParameterEstimator/interface/PixelClusterParameterEstimator.h" class RectangularPixelTopology; class MagneticField; @@ -84,7 +73,6 @@ class PixelCPEBase : public PixelClusterParameterEstimator { virtual ~ClusterParam() = default; const SiPixelCluster* theCluster = nullptr; - ; //--- Cluster-level quantities (filled in computeAnglesFrom....) float cotalpha; @@ -145,7 +133,7 @@ class PixelCPEBase : public PixelClusterParameterEstimator { inline ReturnType getParameters(const SiPixelCluster& cl, const GeomDetUnit& det) const override { #ifdef EDM_ML_DEBUG nRecHitsTotal_++; - //std::cout<<" in PixelCPEBase:localParameters(all) - "< xerr_barrel_l1_, yerr_barrel_l1_, xerr_barrel_ln_; std::vector yerr_barrel_ln_, xerr_endcap_, yerr_endcap_; @@ -80,20 +79,18 @@ class PixelCPEFast final : public PixelCPEBase { //--- DB Error Parametrization object, new light templates std::vector thePixelGenError_; - // allocate it with posix malloc to be ocmpatible with cpu wf + // allocate this with posix malloc to be compatible with the cpu workflow std::vector m_detParamsGPU; - // std::vector> m_detParamsGPU; pixelCPEforGPU::CommonParams m_commonParamsGPU; pixelCPEforGPU::LayerGeometry m_layerGeometry; pixelCPEforGPU::AverageGeometry m_averageGeometry; - pixelCPEforGPU::ParamsOnGPU cpuData_; struct GPUData { ~GPUData(); // not needed if not used on CPU... - pixelCPEforGPU::ParamsOnGPU h_paramsOnGPU; - pixelCPEforGPU::ParamsOnGPU *d_paramsOnGPU = nullptr; // copy of the above on the Device + pixelCPEforGPU::ParamsOnGPU paramsOnGPU_h; + pixelCPEforGPU::ParamsOnGPU *paramsOnGPU_d = nullptr; // copy of the above on the Device }; cms::cuda::ESProduct gpuData_; diff --git a/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h b/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h index 681211b82e1af..f655329d02013 100644 --- a/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h +++ b/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h @@ -128,7 +128,7 @@ namespace pixelCPEforGPU { if (0 == sizeM1) // size 1 return 0; - float W_eff = 0; + float w_eff = 0; bool simple = true; if (1 == sizeM1) { // size 2 //--- Width of the clusters minus the edge (first and last) pixels. @@ -140,14 +140,15 @@ namespace pixelCPEforGPU { auto W_pred = theThickness * cot_angle // geometric correction (in cm) - lorentz_shift; // (in cm) &&& check fpix! - W_eff = std::abs(W_pred) - W_inner; + w_eff = std::abs(W_pred) - W_inner; //--- If the observed charge width is inconsistent with the expectations //--- based on the track, do *not* use W_pred-W_inner. Instead, replace //--- it with an *average* effective charge width, which is the average //--- length of the edge pixels. - simple = - (W_eff < 0.0f) | (W_eff > pitch); // this produces "large" regressions for very small numeric differences... + + // this can produce "large" regressions for very small numeric differences + simple = (w_eff < 0.0f) | (w_eff > pitch); } if (simple) { @@ -157,7 +158,7 @@ namespace pixelCPEforGPU { sum_of_edge += 1.0f; if (last_is_big) sum_of_edge += 1.0f; - W_eff = pitch * 0.5f * sum_of_edge; // ave. length of edge pixels (first+last) (cm) + w_eff = pitch * 0.5f * sum_of_edge; // ave. length of edge pixels (first+last) (cm) } //--- Finally, compute the position in this projection @@ -168,7 +169,7 @@ namespace pixelCPEforGPU { if (Qsum == 0) Qsum = 1.0f; - return 0.5f * (Qdiff / Qsum) * W_eff; + return 0.5f * (Qdiff / Qsum) * w_eff; } constexpr inline void position(CommonParams const& __restrict__ comParams, diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml b/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml index 40acdaf2385cb..4457b02203e66 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml +++ b/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml @@ -1,4 +1,5 @@ + diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/PixelCPEFastESProducer.cc b/RecoLocalTracker/SiPixelRecHits/plugins/PixelCPEFastESProducer.cc index 3f7c9aca2a974..332baabe8842a 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/PixelCPEFastESProducer.cc +++ b/RecoLocalTracker/SiPixelRecHits/plugins/PixelCPEFastESProducer.cc @@ -1,23 +1,20 @@ -#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h" -#include "MagneticField/Engine/interface/MagneticField.h" -#include "MagneticField/Records/interface/IdealMagneticFieldRecord.h" -#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" -#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" -#include "Geometry/Records/interface/TrackerTopologyRcd.h" -#include "DataFormats/TrackerCommon/interface/TrackerTopology.h" -#include "RecoLocalTracker/Records/interface/TkPixelCPERecord.h" -#include "RecoLocalTracker/ClusterParameterEstimator/interface/PixelClusterParameterEstimator.h" +#include +#include -#include "FWCore/Framework/interface/EventSetup.h" +#include "CondFormats/DataRecord/interface/SiPixelGenErrorDBObjectRcd.h" +#include "DataFormats/TrackerCommon/interface/TrackerTopology.h" #include "FWCore/Framework/interface/ESHandle.h" -#include "FWCore/Framework/interface/ModuleFactory.h" #include "FWCore/Framework/interface/ESProducer.h" - -// new record -#include "CondFormats/DataRecord/interface/SiPixelGenErrorDBObjectRcd.h" - -#include -#include +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/ModuleFactory.h" +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" +#include "Geometry/Records/interface/TrackerTopologyRcd.h" +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" +#include "MagneticField/Engine/interface/MagneticField.h" +#include "MagneticField/Records/interface/IdealMagneticFieldRecord.h" +#include "RecoLocalTracker/ClusterParameterEstimator/interface/PixelClusterParameterEstimator.h" +#include "RecoLocalTracker/Records/interface/TkPixelCPERecord.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h" class PixelCPEFastESProducer : public edm::ESProducer { public: @@ -34,7 +31,7 @@ class PixelCPEFastESProducer : public edm::ESProducer { edm::ESGetToken genErrorDBObjectToken_; edm::ParameterSet pset_; - bool UseErrorsFromTemplates_; + bool useErrorsFromTemplates_; }; using namespace edm; @@ -42,7 +39,7 @@ using namespace edm; PixelCPEFastESProducer::PixelCPEFastESProducer(const edm::ParameterSet& p) : pset_(p) { auto const& myname = p.getParameter("ComponentName"); auto const& magname = p.getParameter("MagneticFieldRecord"); - UseErrorsFromTemplates_ = p.getParameter("UseErrorsFromTemplates"); + useErrorsFromTemplates_ = p.getParameter("UseErrorsFromTemplates"); auto cc = setWhatProduced(this, myname); magfieldToken_ = cc.consumes(magname); @@ -50,7 +47,7 @@ PixelCPEFastESProducer::PixelCPEFastESProducer(const edm::ParameterSet& p) : pse hTTToken_ = cc.consumes(); lorentzAngleToken_ = cc.consumes(edm::ESInputTag("")); lorentzAngleWidthToken_ = cc.consumes(edm::ESInputTag("", "forWidth")); - if (UseErrorsFromTemplates_) { + if (useErrorsFromTemplates_) { genErrorDBObjectToken_ = cc.consumes(); } } @@ -63,7 +60,7 @@ std::unique_ptr PixelCPEFastESProducer::produce( const SiPixelGenErrorDBObject* genErrorDBObjectProduct = nullptr; // Errors take only from new GenError - if (UseErrorsFromTemplates_) { // do only when generrors are needed + if (useErrorsFromTemplates_) { // do only when generrors are needed genErrorDBObjectProduct = &iRecord.get(genErrorDBObjectToken_); //} else { //std::cout<<" pass an empty GenError pointer"< PixelCPEFastESProducer::produce( } void PixelCPEFastESProducer::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { - // PixelCPEFastESProducer edm::ParameterSetDescription desc; - desc.add("DoLorentz", false); - desc.add("lAWidthFPix", 0); - desc.add("useLAAlignmentOffsets", false); - desc.add("LoadTemplatesFromDB", true); - desc.add("UseErrorsFromTemplates", true); + + // from PixelCPEBase + PixelCPEBase::fillPSetDescription(desc); + + // used by PixelCPEFast desc.add("EdgeClusterErrorX", 50.0); - desc.add("MagneticFieldRecord", edm::ESInputTag()); - desc.add("useLAWidthFromDB", true); - desc.add("TruncatePixelCharge", true); - desc.add("ClusterProbComputationFlag", 0); - desc.add("lAOffset", 0); desc.add("EdgeClusterErrorY", 85.0); + desc.add("UseErrorsFromTemplates", true); + desc.add("TruncatePixelCharge", true); + + // specific to PixelCPEFastESProducer desc.add("ComponentName", "PixelCPEFast"); - desc.add("lAWidthBPix", 0); - desc.add("Alpha2Order", true); + desc.add("MagneticFieldRecord", edm::ESInputTag()); + desc.add("useLAAlignmentOffsets", false); + desc.add("DoLorentz", false); + descriptions.add("PixelCPEFastESProducer", desc); } diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHitGPUKernel.cu b/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHitGPUKernel.cu new file mode 100644 index 0000000000000..f75d5e3b3bef7 --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHitGPUKernel.cu @@ -0,0 +1,78 @@ +// C++ headers +#include +#include + +// CUDA runtime +#include + +// CMSSW headers +#include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" +#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" +#include "RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h" + +#include "PixelRecHitGPUKernel.h" +#include "gpuPixelRecHits.h" + +namespace { + __global__ void setHitsLayerStart(uint32_t const* __restrict__ hitsModuleStart, + pixelCPEforGPU::ParamsOnGPU const* cpeParams, + uint32_t* hitsLayerStart) { + auto i = blockIdx.x * blockDim.x + threadIdx.x; + + assert(0 == hitsModuleStart[0]); + + if (i < 11) { + hitsLayerStart[i] = hitsModuleStart[cpeParams->layerGeometry().layerStart[i]]; +#ifdef GPU_DEBUG + printf("LayerStart %d %d: %d\n", i, cpeParams->layerGeometry().layerStart[i], hitsLayerStart[i]); +#endif + } + } +} // namespace + +namespace pixelgpudetails { + + TrackingRecHit2DCUDA PixelRecHitGPUKernel::makeHitsAsync(SiPixelDigisCUDA const& digis_d, + SiPixelClustersCUDA const& clusters_d, + BeamSpotCUDA const& bs_d, + pixelCPEforGPU::ParamsOnGPU const* cpeParams, + cudaStream_t stream) const { + auto nHits = clusters_d.nClusters(); + TrackingRecHit2DCUDA hits_d(nHits, cpeParams, clusters_d.clusModuleStart(), stream); + + int threadsPerBlock = 128; + int blocks = digis_d.nModules(); // active modules (with digis) + +#ifdef GPU_DEBUG + std::cout << "launching getHits kernel for " << blocks << " blocks" << std::endl; +#endif + if (blocks) // protect from empty events + gpuPixelRecHits::getHits<<>>( + cpeParams, bs_d.data(), digis_d.view(), digis_d.nDigis(), clusters_d.view(), hits_d.view()); + cudaCheck(cudaGetLastError()); +#ifdef GPU_DEBUG + cudaDeviceSynchronize(); + cudaCheck(cudaGetLastError()); +#endif + + // assuming full warp of threads is better than a smaller number... + if (nHits) { + setHitsLayerStart<<<1, 32, 0, stream>>>(clusters_d.clusModuleStart(), cpeParams, hits_d.hitsLayerStart()); + cudaCheck(cudaGetLastError()); + } + + if (nHits) { + cms::cuda::fillManyFromVector(hits_d.phiBinner(), 10, hits_d.iphi(), hits_d.hitsLayerStart(), nHits, 256, stream); + cudaCheck(cudaGetLastError()); + } + +#ifdef GPU_DEBUG + cudaDeviceSynchronize(); + cudaCheck(cudaGetLastError()); +#endif + + return hits_d; + } + +} // namespace pixelgpudetails diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHitGPUKernel.h b/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHitGPUKernel.h new file mode 100644 index 0000000000000..61bc8b58bb7d6 --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHitGPUKernel.h @@ -0,0 +1,33 @@ +#ifndef RecoLocalTracker_SiPixelRecHits_plugins_PixelRecHitGPUKernel_h +#define RecoLocalTracker_SiPixelRecHits_plugins_PixelRecHitGPUKernel_h + +#include + +#include + +#include "CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h" +#include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h" +#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" +#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h" + +namespace pixelgpudetails { + + class PixelRecHitGPUKernel { + public: + PixelRecHitGPUKernel() = default; + ~PixelRecHitGPUKernel() = default; + + PixelRecHitGPUKernel(const PixelRecHitGPUKernel&) = delete; + PixelRecHitGPUKernel(PixelRecHitGPUKernel&&) = delete; + PixelRecHitGPUKernel& operator=(const PixelRecHitGPUKernel&) = delete; + PixelRecHitGPUKernel& operator=(PixelRecHitGPUKernel&&) = delete; + + TrackingRecHit2DCUDA makeHitsAsync(SiPixelDigisCUDA const& digis_d, + SiPixelClustersCUDA const& clusters_d, + BeamSpotCUDA const& bs_d, + pixelCPEforGPU::ParamsOnGPU const* cpeParams, + cudaStream_t stream) const; + }; +} // namespace pixelgpudetails + +#endif // RecoLocalTracker_SiPixelRecHits_plugins_PixelRecHitGPUKernel_h diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitCUDA.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitCUDA.cc index 10e4b678b515c..09b90526bf7db 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitCUDA.cc +++ b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitCUDA.cc @@ -4,7 +4,7 @@ #include "CUDADataFormats/Common/interface/Product.h" #include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h" #include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" -#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DCUDA.h" +#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h" #include "DataFormats/Common/interface/Handle.h" #include "FWCore/Framework/interface/Event.h" #include "FWCore/Framework/interface/EventSetup.h" @@ -21,7 +21,7 @@ #include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h" #include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h" -#include "PixelRecHits.h" // TODO : spit product from kernel +#include "PixelRecHitGPUKernel.h" class SiPixelRecHitCUDA : public edm::global::EDProducer<> { public: @@ -53,7 +53,7 @@ void SiPixelRecHitCUDA::fillDescriptions(edm::ConfigurationDescriptions& descrip edm::ParameterSetDescription desc; desc.add("beamSpot", edm::InputTag("offlineBeamSpotCUDA")); - desc.add("src", edm::InputTag("siPixelClustersCUDAPreSplitting")); + desc.add("src", edm::InputTag("siPixelClustersPreSplittingCUDA")); desc.add("CPE", "PixelCPEFast"); descriptions.add("siPixelRecHitCUDA", desc); } diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitConverter.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitConverter.cc index c7eb7481fc4f8..8c16be54e5774 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitConverter.cc +++ b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitConverter.cc @@ -85,7 +85,6 @@ // Make heterogeneous framework happy #include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" #include "CUDADataFormats/Common/interface/HostProduct.h" -using HMSstorage = HostProduct; using namespace std; @@ -115,6 +114,8 @@ namespace cms { TrackerGeometry const& geom); private: + using HMSstorage = HostProduct; + // TO DO: maybe allow a map of pointers? /// const PixelClusterParameterEstimator * cpe_; // what we got (for now, one ptr to base class) PixelCPEBase const* cpe_ = nullptr; // What we got (for now, one ptr to base class) @@ -189,43 +190,45 @@ namespace cms { const edmNew::DetSetVector& input = *inputhandle; - // fill cluster arrays - auto hmsp = std::make_unique(gpuClustering::MaxNumModules + 1); + // allocate a buffer for the indices of the clusters + auto hmsp = std::make_unique(gpuClustering::maxNumModules + 1); + // hitsModuleStart is a non-owning pointer to the buffer auto hitsModuleStart = hmsp.get(); - std::array clusInModule{}; - for (auto DSViter = input.begin(); DSViter != input.end(); DSViter++) { - unsigned int detid = DSViter->detId(); + // fill cluster arrays + std::array clusInModule{}; + for (auto const& dsv : input) { + unsigned int detid = dsv.detId(); DetId detIdObject(detid); const GeomDetUnit* genericDet = geom.idToDetUnit(detIdObject); auto gind = genericDet->index(); // FIXME to be changed to support Phase2 - if (gind >= int(gpuClustering::MaxNumModules)) + if (gind >= int(gpuClustering::maxNumModules)) continue; - auto const nclus = DSViter->size(); + auto const nclus = dsv.size(); assert(nclus > 0); clusInModule[gind] = nclus; numberOfClusters += nclus; } hitsModuleStart[0] = 0; - assert(clusInModule.size() > gpuClustering::MaxNumModules); + assert(clusInModule.size() > gpuClustering::maxNumModules); for (int i = 1, n = clusInModule.size(); i < n; ++i) hitsModuleStart[i] = hitsModuleStart[i - 1] + clusInModule[i - 1]; - assert(numberOfClusters == int(hitsModuleStart[gpuClustering::MaxNumModules])); + assert(numberOfClusters == int(hitsModuleStart[gpuClustering::maxNumModules])); - // yes a unique ptr of a unique ptr so edm is happy and the pointer stay still... - iEvent.emplace(tHost_, std::move(hmsp)); // hmsp is gone, hitsModuleStart still alive and kicking... + // wrap the buffer in a HostProduct, and move it to the Event, without reallocating the buffer or affecting hitsModuleStart + iEvent.emplace(tHost_, std::move(hmsp)); numberOfClusters = 0; - for (auto DSViter = input.begin(); DSViter != input.end(); DSViter++) { + for (auto const& dsv : input) { numberOfDetUnits++; - unsigned int detid = DSViter->detId(); + unsigned int detid = dsv.detId(); DetId detIdObject(detid); const GeomDetUnit* genericDet = geom.idToDetUnit(detIdObject); const PixelGeomDetUnit* pixDet = dynamic_cast(genericDet); assert(pixDet); SiPixelRecHitCollectionNew::FastFiller recHitsOnDetUnit(output, detid); - edmNew::DetSet::const_iterator clustIt = DSViter->begin(), clustEnd = DSViter->end(); + edmNew::DetSet::const_iterator clustIt = dsv.begin(), clustEnd = dsv.end(); for (; clustIt != clustEnd; clustIt++) { numberOfClusters++; diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitFromCUDA.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitFromCUDA.cc new file mode 100644 index 0000000000000..790b0da51ecfb --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitFromCUDA.cc @@ -0,0 +1,187 @@ +#include + +#include + +#include "CUDADataFormats/Common/interface/HostProduct.h" +#include "CUDADataFormats/Common/interface/Product.h" +#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h" +#include "DataFormats/Common/interface/DetSetVectorNew.h" +#include "DataFormats/Common/interface/Handle.h" +#include "DataFormats/SiPixelCluster/interface/SiPixelCluster.h" +#include "DataFormats/TrackerRecHit2D/interface/SiPixelRecHitCollection.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/Framework/interface/stream/EDProducer.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/Utilities/interface/InputTag.h" +#include "Geometry/CommonDetUnit/interface/PixelGeomDetUnit.h" +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" +#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h" + +class SiPixelRecHitFromCUDA : public edm::stream::EDProducer { +public: + explicit SiPixelRecHitFromCUDA(const edm::ParameterSet& iConfig); + ~SiPixelRecHitFromCUDA() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + + using HMSstorage = HostProduct; + +private: + void acquire(edm::Event const& iEvent, + edm::EventSetup const& iSetup, + edm::WaitingTaskWithArenaHolder waitingTaskHolder) override; + void produce(edm::Event& iEvent, edm::EventSetup const& iSetup) override; + + const edm::ESGetToken geomToken_; + const edm::EDGetTokenT> hitsToken_; // CUDA hits + const edm::EDGetTokenT clusterToken_; // legacy clusters + const edm::EDPutTokenT rechitsPutToken_; // legacy rechits + const edm::EDPutTokenT hostPutToken_; + + uint32_t nHits_; + cms::cuda::host::unique_ptr store32_; + cms::cuda::host::unique_ptr hitsModuleStart_; +}; + +SiPixelRecHitFromCUDA::SiPixelRecHitFromCUDA(const edm::ParameterSet& iConfig) + : geomToken_(esConsumes()), + hitsToken_( + consumes>(iConfig.getParameter("pixelRecHitSrc"))), + clusterToken_(consumes(iConfig.getParameter("src"))), + rechitsPutToken_(produces()), + hostPutToken_(produces()) {} + +void SiPixelRecHitFromCUDA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("pixelRecHitSrc", edm::InputTag("siPixelRecHitsPreSplittingCUDA")); + desc.add("src", edm::InputTag("siPixelClustersPreSplitting")); + descriptions.addWithDefaultLabel(desc); +} + +void SiPixelRecHitFromCUDA::acquire(edm::Event const& iEvent, + edm::EventSetup const& iSetup, + edm::WaitingTaskWithArenaHolder waitingTaskHolder) { + cms::cuda::Product const& inputDataWrapped = iEvent.get(hitsToken_); + cms::cuda::ScopedContextAcquire ctx{inputDataWrapped, std::move(waitingTaskHolder)}; + auto const& inputData = ctx.get(inputDataWrapped); + + nHits_ = inputData.nHits(); + + LogDebug("SiPixelRecHitFromCUDA") << "converting " << nHits_ << " Hits"; + + if (0 == nHits_) + return; + store32_ = inputData.localCoordToHostAsync(ctx.stream()); + hitsModuleStart_ = inputData.hitsModuleStartToHostAsync(ctx.stream()); +} + +void SiPixelRecHitFromCUDA::produce(edm::Event& iEvent, edm::EventSetup const& es) { + // allocate a buffer for the indices of the clusters + auto hmsp = std::make_unique(gpuClustering::maxNumModules + 1); + std::copy(hitsModuleStart_.get(), hitsModuleStart_.get() + gpuClustering::maxNumModules + 1, hmsp.get()); + // wrap the buffer in a HostProduct, and move it to the Event, without reallocating the buffer or affecting hitsModuleStart + iEvent.emplace(hostPutToken_, std::move(hmsp)); + + SiPixelRecHitCollection output; + if (0 == nHits_) { + iEvent.emplace(rechitsPutToken_, std::move(output)); + return; + } + + auto xl = store32_.get(); + auto yl = xl + nHits_; + auto xe = yl + nHits_; + auto ye = xe + nHits_; + + const TrackerGeometry* geom = &es.getData(geomToken_); + + edm::Handle hclusters = iEvent.getHandle(clusterToken_); + auto const& input = *hclusters; + + constexpr uint32_t maxHitsInModule = gpuClustering::maxHitsInModule(); + + int numberOfDetUnits = 0; + int numberOfClusters = 0; + for (auto const& dsv : input) { + numberOfDetUnits++; + unsigned int detid = dsv.detId(); + DetId detIdObject(detid); + const GeomDetUnit* genericDet = geom->idToDetUnit(detIdObject); + auto gind = genericDet->index(); + const PixelGeomDetUnit* pixDet = dynamic_cast(genericDet); + assert(pixDet); + SiPixelRecHitCollection::FastFiller recHitsOnDetUnit(output, detid); + auto fc = hitsModuleStart_[gind]; + auto lc = hitsModuleStart_[gind + 1]; + auto nhits = lc - fc; + + assert(lc > fc); + LogDebug("SiPixelRecHitFromCUDA") << "in det " << gind << ": conv " << nhits << " hits from " << dsv.size() + << " legacy clusters" << ' ' << fc << ',' << lc; + if (nhits > maxHitsInModule) + edm::LogWarning("SiPixelRecHitFromCUDA") << fmt::sprintf( + "Too many clusters %d in module %d. Only the first %d hits will be converted", nhits, gind, maxHitsInModule); + nhits = std::min(nhits, maxHitsInModule); + + LogDebug("SiPixelRecHitFromCUDA") << "in det " << gind << "conv " << nhits << " hits from " << dsv.size() + << " legacy clusters" << ' ' << lc << ',' << fc; + + if (0 == nhits) + continue; + auto jnd = [&](int k) { return fc + k; }; + assert(nhits <= dsv.size()); + if (nhits != dsv.size()) { + edm::LogWarning("GPUHits2CPU") << "nhits!= nclus " << nhits << ' ' << dsv.size(); + } + for (auto const& clust : dsv) { + assert(clust.originalId() >= 0); + assert(clust.originalId() < dsv.size()); + if (clust.originalId() >= nhits) + continue; + auto ij = jnd(clust.originalId()); + if (ij >= TrackingRecHit2DSOAView::maxHits()) + continue; // overflow... + LocalPoint lp(xl[ij], yl[ij]); + LocalError le(xe[ij], 0, ye[ij]); + SiPixelRecHitQuality::QualWordType rqw = 0; + + numberOfClusters++; + + /* cpu version.... (for reference) + std::tuple tuple = cpe_->getParameters( clust, *genericDet ); + LocalPoint lp( std::get<0>(tuple) ); + LocalError le( std::get<1>(tuple) ); + SiPixelRecHitQuality::QualWordType rqw( std::get<2>(tuple) ); + */ + + // Create a persistent edm::Ref to the cluster + edm::Ref, SiPixelCluster> cluster = edmNew::makeRefTo(hclusters, &clust); + // Make a RecHit and add it to the DetSet + SiPixelRecHit hit(lp, le, rqw, *genericDet, cluster); + // + // Now save it ================= + recHitsOnDetUnit.push_back(hit); + // ============================= + + LogDebug("SiPixelRecHitFromCUDA") << "cluster " << numberOfClusters << " at " << lp << ' ' << le; + + } // <-- End loop on Clusters + + // LogDebug("SiPixelRecHitGPU") + LogDebug("SiPixelRecHitFromCUDA") << "found " << recHitsOnDetUnit.size() << " RecHits on " << detid; + + } // <-- End loop on DetUnits + + LogDebug("SiPixelRecHitFromCUDA") << "found " << numberOfDetUnits << " dets, " << numberOfClusters << " clusters"; + + iEvent.emplace(rechitsPutToken_, std::move(output)); +} + +DEFINE_FWK_MODULE(SiPixelRecHitFromCUDA); diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc index 9505aec154222..2397434027fa1 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc +++ b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc @@ -1,6 +1,5 @@ #include -// hack waiting for if constexpr #include "CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h" #include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h" #include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" @@ -25,7 +24,7 @@ #include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h" #include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h" -#include "RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h" +#include "gpuPixelRecHits.h" class SiPixelRecHitSoAFromLegacy : public edm::global::EDProducer<> { public: @@ -34,8 +33,8 @@ class SiPixelRecHitSoAFromLegacy : public edm::global::EDProducer<> { static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); - using HitModuleStart = std::array; - using HMSstorage = HostProduct; + using HitModuleStart = std::array; + using HMSstorage = HostProduct; private: void produce(edm::StreamID streamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const override; @@ -68,7 +67,7 @@ void SiPixelRecHitSoAFromLegacy::fillDescriptions(edm::ConfigurationDescriptions desc.add("src", edm::InputTag("siPixelClustersPreSplitting")); desc.add("CPE", "PixelCPEFast"); desc.add("convertToLegacy", false); - descriptions.add("siPixelRecHitHostSoA", desc); + descriptions.addWithDefaultLabel(desc); } void SiPixelRecHitSoAFromLegacy::produce(edm::StreamID streamID, edm::Event& iEvent, const edm::EventSetup& es) const { @@ -90,31 +89,34 @@ void SiPixelRecHitSoAFromLegacy::produce(edm::StreamID streamID, edm::Event& iEv iEvent.getByToken(clusterToken_, hclusters); auto const& input = *hclusters; - // yes a unique ptr of a unique ptr so edm is happy and the pointer stay still... - auto hmsp = std::make_unique(gpuClustering::MaxNumModules + 1); + // allocate a buffer for the indices of the clusters + auto hmsp = std::make_unique(gpuClustering::maxNumModules + 1); + // hitsModuleStart is a non-owning pointer to the buffer auto hitsModuleStart = hmsp.get(); - auto hms = std::make_unique(std::move(hmsp)); // hmsp is gone - iEvent.put(tokenModuleStart_, std::move(hms)); // hms is gone! hitsModuleStart still alive and kicking... + // wrap the buffer in a HostProduct + auto hms = std::make_unique(std::move(hmsp)); + // move the HostProduct to the Event, without reallocating the buffer or affecting hitsModuleStart + iEvent.put(tokenModuleStart_, std::move(hms)); // legacy output auto legacyOutput = std::make_unique(); // storage - std::vector xx_; - std::vector yy_; - std::vector adc_; - std::vector moduleInd_; - std::vector clus_; + std::vector xx; + std::vector yy; + std::vector adc; + std::vector moduleInd; + std::vector clus; std::vector, SiPixelCluster>> clusterRef; - constexpr uint32_t MaxHitsInModule = gpuClustering::MaxHitsInModule; + constexpr uint32_t maxHitsInModule = gpuClustering::maxHitsInModule(); HitModuleStart moduleStart_; // index of the first pixel of each module HitModuleStart clusInModule_; memset(&clusInModule_, 0, sizeof(HitModuleStart)); // needed?? - assert(2001 == clusInModule_.size()); - assert(0 == clusInModule_[2000]); + assert(gpuClustering::maxNumModules + 1 == clusInModule_.size()); + assert(0 == clusInModule_[gpuClustering::maxNumModules]); uint32_t moduleId_; moduleStart_[1] = 0; // we run sequentially.... @@ -123,20 +125,20 @@ void SiPixelRecHitSoAFromLegacy::produce(edm::StreamID streamID, edm::Event& iEv // fill cluster arrays int numberOfClusters = 0; - for (auto DSViter = input.begin(); DSViter != input.end(); DSViter++) { - unsigned int detid = DSViter->detId(); + for (auto const& dsv : input) { + unsigned int detid = dsv.detId(); DetId detIdObject(detid); const GeomDetUnit* genericDet = geom_->idToDetUnit(detIdObject); auto gind = genericDet->index(); - assert(gind < 2000); - auto const nclus = DSViter->size(); + assert(gind < gpuClustering::maxNumModules); + auto const nclus = dsv.size(); clusInModule_[gind] = nclus; numberOfClusters += nclus; } hitsModuleStart[0] = 0; for (int i = 1, n = clusInModule_.size(); i < n; ++i) hitsModuleStart[i] = hitsModuleStart[i - 1] + clusInModule_[i - 1]; - assert(numberOfClusters == int(hitsModuleStart[2000])); + assert(numberOfClusters == int(hitsModuleStart[gpuClustering::maxNumModules])); // output SoA auto output = std::make_unique(numberOfClusters, &cpeView, hitsModuleStart, nullptr); @@ -149,20 +151,20 @@ void SiPixelRecHitSoAFromLegacy::produce(edm::StreamID streamID, edm::Event& iEv } if (convert2Legacy_) - legacyOutput->reserve(2000, numberOfClusters); + legacyOutput->reserve(gpuClustering::maxNumModules, numberOfClusters); int numberOfDetUnits = 0; int numberOfHits = 0; - for (auto DSViter = input.begin(); DSViter != input.end(); DSViter++) { + for (auto const& dsv : input) { numberOfDetUnits++; - unsigned int detid = DSViter->detId(); + unsigned int detid = dsv.detId(); DetId detIdObject(detid); const GeomDetUnit* genericDet = geom_->idToDetUnit(detIdObject); auto const gind = genericDet->index(); - assert(gind < 2000); + assert(gind < gpuClustering::maxNumModules); const PixelGeomDetUnit* pixDet = dynamic_cast(genericDet); assert(pixDet); - auto const nclus = DSViter->size(); + auto const nclus = dsv.size(); assert(clusInModule_[gind] == nclus); if (0 == nclus) continue; // is this really possible? @@ -170,32 +172,32 @@ void SiPixelRecHitSoAFromLegacy::produce(edm::StreamID streamID, edm::Event& iEv auto const fc = hitsModuleStart[gind]; auto const lc = hitsModuleStart[gind + 1]; assert(lc > fc); - // std::cout << "in det " << gind << ": conv " << nclus << " hits from " << DSViter->size() << " legacy clusters" - // <<' '<< fc <<','< MaxHitsInModule) + if (nclus > maxHitsInModule) printf( - "WARNING: too many clusters %d in Module %d. Only first %d Hits converted\n", nclus, gind, MaxHitsInModule); + "WARNING: too many clusters %d in Module %d. Only first %d Hits converted\n", nclus, gind, maxHitsInModule); // fill digis - xx_.clear(); - yy_.clear(); - adc_.clear(); - moduleInd_.clear(); - clus_.clear(); + xx.clear(); + yy.clear(); + adc.clear(); + moduleInd.clear(); + clus.clear(); clusterRef.clear(); moduleId_ = gind; uint32_t ic = 0; uint32_t ndigi = 0; - for (auto const& clust : *DSViter) { + for (auto const& clust : dsv) { assert(clust.size() > 0); for (int i = 0, nd = clust.size(); i < nd; ++i) { auto px = clust.pixel(i); - xx_.push_back(px.x); - yy_.push_back(px.y); - adc_.push_back(px.adc); - moduleInd_.push_back(gind); - clus_.push_back(ic); + xx.push_back(px.x); + yy.push_back(px.y); + adc.push_back(px.adc); + moduleInd.push_back(gind); + clus.push_back(ic); ++ndigi; } assert(clust.originalId() == ic); // make sure hits and clus are in sync @@ -204,23 +206,23 @@ void SiPixelRecHitSoAFromLegacy::produce(edm::StreamID streamID, edm::Event& iEv ic++; } assert(nclus == ic); - assert(clus_.size() == ndigi); + assert(clus.size() == ndigi); numberOfHits += nclus; // filled creates view - SiPixelDigisCUDA::DeviceConstView digiView{xx_.data(), yy_.data(), adc_.data(), moduleInd_.data(), clus_.data()}; + SiPixelDigisCUDA::DeviceConstView digiView{xx.data(), yy.data(), adc.data(), moduleInd.data(), clus.data()}; assert(digiView.adc(0) != 0); // we run on blockId.x==0 gpuPixelRecHits::getHits(&cpeView, &bsHost, &digiView, ndigi, &clusterView, output->view()); for (auto h = fc; h < lc; ++h) - if (h - fc < MaxHitsInModule) + if (h - fc < maxHitsInModule) assert(gind == output->view()->detectorIndex(h)); else - assert(9999 == output->view()->detectorIndex(h)); + assert(gpuClustering::invalidModuleId == output->view()->detectorIndex(h)); if (convert2Legacy_) { SiPixelRecHitCollectionNew::FastFiller recHitsOnDetUnit(*legacyOutput, detid); for (auto h = fc; h < lc; ++h) { auto ih = h - fc; - if (ih >= MaxHitsInModule) + if (ih >= maxHitsInModule) break; assert(ih < clusterRef.size()); LocalPoint lp(output->view()->xLocal(h), output->view()->yLocal(h)); @@ -240,7 +242,8 @@ void SiPixelRecHitSoAFromLegacy::produce(edm::StreamID streamID, edm::Event& iEv cms::cuda::fillManyFromVector( output->phiBinner(), 10, output->iphi(), output->hitsLayerStart(), numberOfHits, 256, nullptr); - // std::cout << "created HitSoa for " << numberOfClusters << " clusters in " << numberOfDetUnits << " Dets" << std::endl; + LogDebug("SiPixelRecHitSoAFromLegacy") << "created HitSoa for " << numberOfClusters << " clusters in " + << numberOfDetUnits << " Dets"; iEvent.put(std::move(output)); if (convert2Legacy_) iEvent.put(std::move(legacyOutput)); diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h b/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h index 17cd5aad4db52..89a40c8723ae3 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h +++ b/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h @@ -6,7 +6,8 @@ #include #include "CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h" -#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DCUDA.h" +#include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" +#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h" #include "DataFormats/Math/interface/approx_atan2.h" #include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" #include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h" @@ -53,7 +54,7 @@ namespace gpuPixelRecHits { } // to be moved in common namespace... - constexpr uint16_t InvId = 9999; // must be > MaxNumModules + using gpuClustering::invalidModuleId; constexpr int32_t MaxHitsInIter = pixelCPEforGPU::MaxHitsInIter; using ClusParams = pixelCPEforGPU::ClusParams; @@ -69,8 +70,8 @@ namespace gpuPixelRecHits { #ifdef GPU_DEBUG if (threadIdx.x == 0) { - auto k = first; - while (digis.moduleInd(k) == InvId) + auto k = clusters.moduleStart(1 + blockIdx.x); + while (digis.moduleInd(k) == invalidModuleId) ++k; assert(digis.moduleInd(k) == me); } @@ -83,8 +84,6 @@ namespace gpuPixelRecHits { #endif for (int startClus = 0, endClus = nclus; startClus < endClus; startClus += MaxHitsInIter) { - auto first = clusters.moduleStart(1 + blockIdx.x); - int nClusInIter = std::min(MaxHitsInIter, endClus - startClus); int lastClus = startClus + nClusInIter; assert(nClusInIter <= nclus); @@ -106,26 +105,24 @@ namespace gpuPixelRecHits { clusParams.Q_l_Y[ic] = 0; } - first += threadIdx.x; - __syncthreads(); - // one thead per "digi" - + // one thread per "digi" + auto first = clusters.moduleStart(1 + blockIdx.x) + threadIdx.x; for (int i = first; i < numElements; i += blockDim.x) { auto id = digis.moduleInd(i); - if (id == InvId) + if (id == invalidModuleId) continue; // not valid if (id != me) break; // end of module auto cl = digis.clus(i); if (cl < startClus || cl >= lastClus) continue; - auto x = digis.xx(i); - auto y = digis.yy(i); cl -= startClus; assert(cl >= 0); assert(cl < MaxHitsInIter); + auto x = digis.xx(i); + auto y = digis.yy(i); atomicMin(&clusParams.minRow[cl], x); atomicMax(&clusParams.maxRow[cl], x); atomicMin(&clusParams.minCol[cl], y); @@ -137,7 +134,7 @@ namespace gpuPixelRecHits { auto pixmx = cpeParams->detParams(me).pixmx; for (int i = first; i < numElements; i += blockDim.x) { auto id = digis.moduleInd(i); - if (id == InvId) + if (id == invalidModuleId) continue; // not valid if (id != me) break; // end of module @@ -166,13 +163,10 @@ namespace gpuPixelRecHits { // next one cluster per thread... first = clusters.clusModuleStart(me) + startClus; - for (int ic = threadIdx.x; ic < nClusInIter; ic += blockDim.x) { auto h = first + ic; // output index in global memory - // this cannot happen anymore - if (h >= TrackingRecHit2DSOAView::maxHits()) - break; // overflow... + assert(h < TrackingRecHit2DSOAView::maxHits()); assert(h < hits.nHits()); assert(h < clusters.clusModuleStart(me + 1)); @@ -180,9 +174,7 @@ namespace gpuPixelRecHits { pixelCPEforGPU::errorFromDB(cpeParams->commonParams(), cpeParams->detParams(me), clusParams, ic); // store it - hits.charge(h) = clusParams.charge[ic]; - hits.detectorIndex(h) = me; float xl, yl; diff --git a/RecoLocalTracker/SiPixelRecHits/python/PixelCPEESProducers_cff.py b/RecoLocalTracker/SiPixelRecHits/python/PixelCPEESProducers_cff.py index ea9fab563d164..e3879f4d9d34c 100644 --- a/RecoLocalTracker/SiPixelRecHits/python/PixelCPEESProducers_cff.py +++ b/RecoLocalTracker/SiPixelRecHits/python/PixelCPEESProducers_cff.py @@ -10,7 +10,7 @@ # 2. Pixel Generic CPE # from RecoLocalTracker.SiPixelRecHits.PixelCPEGeneric_cfi import * -from RecoLocalTracker.SiPixelRecHits.PixelCPEFast_cfi import * +from RecoLocalTracker.SiPixelRecHits.PixelCPEFastESProducer_cfi import * # # 3. ESProducer for the Magnetic-field dependent template records # diff --git a/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py b/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py index 2a0c005e51622..eb9dbad4934cd 100644 --- a/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py +++ b/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py @@ -19,23 +19,23 @@ from Configuration.ProcessModifiers.gpu_cff import gpu from RecoLocalTracker.SiPixelRecHits.siPixelRecHitCUDA_cfi import siPixelRecHitCUDA as _siPixelRecHitCUDA -from RecoLocalTracker.SiPixelRecHits.siPixelRecHitFromSOA_cfi import siPixelRecHitFromSOA as _siPixelRecHitFromSOA +from RecoLocalTracker.SiPixelRecHits.siPixelRecHitFromCUDA_cfi import siPixelRecHitFromCUDA as _siPixelRecHitFromCUDA gpu.toModify(siPixelRecHitsPreSplitting, - cuda = _siPixelRecHitFromSOA.clone() + cuda = _siPixelRecHitFromCUDA.clone() ) siPixelRecHitsPreSplittingTask = cms.Task(siPixelRecHitsPreSplitting) -siPixelRecHitsCUDAPreSplitting = _siPixelRecHitCUDA.clone( +siPixelRecHitsPreSplittingCUDA = _siPixelRecHitCUDA.clone( beamSpot = "offlineBeamSpotToCUDA" ) -siPixelRecHitsLegacyPreSplitting = _siPixelRecHitFromSOA.clone() +siPixelRecHitsPreSplittingLegacy = _siPixelRecHitFromCUDA.clone() siPixelRecHitsPreSplittingTaskCUDA = cms.Task( - siPixelRecHitsCUDAPreSplitting, - siPixelRecHitsLegacyPreSplitting, + siPixelRecHitsPreSplittingCUDA, + siPixelRecHitsPreSplittingLegacy, ) from Configuration.ProcessModifiers.gpu_cff import gpu diff --git a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc index f3b3f308fa9d3..548119cef501b 100644 --- a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc +++ b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc @@ -1,6 +1,3 @@ -#include - -#include #include #include "CondFormats/SiPixelTransient/interface/SiPixelTemplate.h" @@ -30,15 +27,13 @@ PixelCPEFast::PixelCPEFast(edm::ParameterSet const& conf, const SiPixelLorentzAngle* lorentzAngle, const SiPixelGenErrorDBObject* genErrorDBObject, const SiPixelLorentzAngle* lorentzAngleWidth) - : PixelCPEBase(conf, mag, geom, ttopo, lorentzAngle, genErrorDBObject, nullptr, lorentzAngleWidth, 0) { - EdgeClusterErrorX_ = conf.getParameter("EdgeClusterErrorX"); - EdgeClusterErrorY_ = conf.getParameter("EdgeClusterErrorY"); - - UseErrorsFromTemplates_ = conf.getParameter("UseErrorsFromTemplates"); - TruncatePixelCharge_ = conf.getParameter("TruncatePixelCharge"); - + : PixelCPEBase(conf, mag, geom, ttopo, lorentzAngle, genErrorDBObject, nullptr, lorentzAngleWidth, 0), + edgeClusterErrorX_(conf.getParameter("EdgeClusterErrorX")), + edgeClusterErrorY_(conf.getParameter("EdgeClusterErrorY")), + useErrorsFromTemplates_(conf.getParameter("UseErrorsFromTemplates")), + truncatePixelCharge_(conf.getParameter("TruncatePixelCharge")) { // Use errors from templates or from GenError - if (UseErrorsFromTemplates_) { + if (useErrorsFromTemplates_) { if (!SiPixelGenError::pushfile(*genErrorDBObject_, thePixelGenError_)) throw cms::Exception("InvalidCalibrationLoaded") << "ERROR: GenErrors not filled correctly. Check the sqlite file. Using SiPixelTemplateDBObject version " @@ -46,7 +41,7 @@ PixelCPEFast::PixelCPEFast(edm::ParameterSet const& conf, } // Rechit errors in case other, more correct, errors are not vailable - // This are constants. Maybe there is a more efficienct way to store them. + // These are constants. Maybe there is a more efficienct way to store them. xerr_barrel_l1_ = {0.00115, 0.00120, 0.00088}; xerr_barrel_l1_def_ = 0.01030; yerr_barrel_l1_ = {0.00375, 0.00230, 0.00250, 0.00250, 0.00230, 0.00230, 0.00210, 0.00210, 0.00240}; @@ -73,37 +68,37 @@ PixelCPEFast::PixelCPEFast(edm::ParameterSet const& conf, const pixelCPEforGPU::ParamsOnGPU* PixelCPEFast::getGPUProductAsync(cudaStream_t cudaStream) const { const auto& data = gpuData_.dataForCurrentDeviceAsync(cudaStream, [this](GPUData& data, cudaStream_t stream) { // and now copy to device... - cudaCheck(cudaMalloc((void**)&data.h_paramsOnGPU.m_commonParams, sizeof(pixelCPEforGPU::CommonParams))); - cudaCheck(cudaMalloc((void**)&data.h_paramsOnGPU.m_detParams, + cudaCheck(cudaMalloc((void**)&data.paramsOnGPU_h.m_commonParams, sizeof(pixelCPEforGPU::CommonParams))); + cudaCheck(cudaMalloc((void**)&data.paramsOnGPU_h.m_detParams, this->m_detParamsGPU.size() * sizeof(pixelCPEforGPU::DetParams))); - cudaCheck(cudaMalloc((void**)&data.h_paramsOnGPU.m_averageGeometry, sizeof(pixelCPEforGPU::AverageGeometry))); - cudaCheck(cudaMalloc((void**)&data.h_paramsOnGPU.m_layerGeometry, sizeof(pixelCPEforGPU::LayerGeometry))); - cudaCheck(cudaMalloc((void**)&data.d_paramsOnGPU, sizeof(pixelCPEforGPU::ParamsOnGPU))); + cudaCheck(cudaMalloc((void**)&data.paramsOnGPU_h.m_averageGeometry, sizeof(pixelCPEforGPU::AverageGeometry))); + cudaCheck(cudaMalloc((void**)&data.paramsOnGPU_h.m_layerGeometry, sizeof(pixelCPEforGPU::LayerGeometry))); + cudaCheck(cudaMalloc((void**)&data.paramsOnGPU_d, sizeof(pixelCPEforGPU::ParamsOnGPU))); cudaCheck(cudaMemcpyAsync( - data.d_paramsOnGPU, &data.h_paramsOnGPU, sizeof(pixelCPEforGPU::ParamsOnGPU), cudaMemcpyDefault, stream)); - cudaCheck(cudaMemcpyAsync((void*)data.h_paramsOnGPU.m_commonParams, + data.paramsOnGPU_d, &data.paramsOnGPU_h, sizeof(pixelCPEforGPU::ParamsOnGPU), cudaMemcpyDefault, stream)); + cudaCheck(cudaMemcpyAsync((void*)data.paramsOnGPU_h.m_commonParams, &this->m_commonParamsGPU, sizeof(pixelCPEforGPU::CommonParams), cudaMemcpyDefault, stream)); - cudaCheck(cudaMemcpyAsync((void*)data.h_paramsOnGPU.m_averageGeometry, + cudaCheck(cudaMemcpyAsync((void*)data.paramsOnGPU_h.m_averageGeometry, &this->m_averageGeometry, sizeof(pixelCPEforGPU::AverageGeometry), cudaMemcpyDefault, stream)); - cudaCheck(cudaMemcpyAsync((void*)data.h_paramsOnGPU.m_layerGeometry, + cudaCheck(cudaMemcpyAsync((void*)data.paramsOnGPU_h.m_layerGeometry, &this->m_layerGeometry, sizeof(pixelCPEforGPU::LayerGeometry), cudaMemcpyDefault, stream)); - cudaCheck(cudaMemcpyAsync((void*)data.h_paramsOnGPU.m_detParams, + cudaCheck(cudaMemcpyAsync((void*)data.paramsOnGPU_h.m_detParams, this->m_detParamsGPU.data(), this->m_detParamsGPU.size() * sizeof(pixelCPEforGPU::DetParams), cudaMemcpyDefault, stream)); }); - return data.d_paramsOnGPU; + return data.paramsOnGPU_d; } void PixelCPEFast::fillParamsForGpu() { @@ -112,7 +107,8 @@ void PixelCPEFast::fillParamsForGpu() { m_commonParamsGPU.thePitchX = m_DetParams[0].thePitchX; m_commonParamsGPU.thePitchY = m_DetParams[0].thePitchY; - // std::cout << "pitch & thickness " << m_commonParamsGPU.thePitchX << ' ' << m_commonParamsGPU.thePitchY << " " << m_commonParamsGPU.theThicknessB << ' ' << m_commonParamsGPU.theThicknessE << std::endl; + LogDebug("PixelCPEFast") << "pitch & thickness " << m_commonParamsGPU.thePitchX << ' ' << m_commonParamsGPU.thePitchY + << " " << m_commonParamsGPU.theThicknessB << ' ' << m_commonParamsGPU.theThicknessE; // zero average geometry memset(&m_averageGeometry, 0, sizeof(pixelCPEforGPU::AverageGeometry)); @@ -132,30 +128,28 @@ void PixelCPEFast::fillParamsForGpu() { assert(p.theDet->index() == int(i)); assert(m_commonParamsGPU.thePitchY == p.thePitchY); assert(m_commonParamsGPU.thePitchX == p.thePitchX); - //assert(m_commonParamsGPU.theThickness==p.theThickness); g.isBarrel = GeomDetEnumerators::isBarrel(p.thePart); g.isPosZ = p.theDet->surface().position().z() > 0; g.layer = ttopo_.layer(p.theDet->geographicalId()); g.index = i; // better be! g.rawId = p.theDet->geographicalId(); - assert((g.isBarrel ? m_commonParamsGPU.theThicknessB : m_commonParamsGPU.theThicknessE) == p.theThickness); - //if (m_commonParamsGPU.theThickness!=p.theThickness) - // std::cout << i << (g.isBarrel ? "B " : "E ") << m_commonParamsGPU.theThickness<<"!="<geographicalId()); if (oldLayer != g.layer) { oldLayer = g.layer; - // std::cout << "new layer at " << i << (g.isBarrel ? " B " : (g.isPosZ ? " E+ " : " E- ")) << g.layer << " starting at " << g.rawId << std::endl; - // std::cout << "old layer had " << nl << " ladders" << std::endl; + LogDebug("PixelCPEFast") << "new layer at " << i << (g.isBarrel ? " B " : (g.isPosZ ? " E+ " : " E- ")) + << g.layer << " starting at " << g.rawId << '\n' + << "old layer had " << nl << " ladders"; nl = 0; } if (oldLadder != ladder) { oldLadder = ladder; - // std::cout << "new ladder at " << i << (g.isBarrel ? " B " : (g.isPosZ ? " E+ " : " E- ")) << ladder << " starting at " << g.rawId << std::endl; - // std::cout << "old ladder ave z,r,p mz " << zl/8.f << " " << rl/8.f << " " << pl/8.f << ' ' << miz << ' ' << mxz << std::endl; + LogDebug("PixelCPEFast") << "new ladder at " << i << (g.isBarrel ? " B " : (g.isPosZ ? " E+ " : " E- ")) + << ladder << " starting at " << g.rawId << '\n' + << "old ladder ave z,r,p mz " << zl / 8.f << " " << rl / 8.f << " " << pl / 8.f << ' ' + << miz << ' ' << mxz; rl = 0; zl = 0; pl = 0; @@ -200,16 +194,15 @@ void PixelCPEFast::fillParamsForGpu() { if (lape.invalid()) lape = LocalError(); // zero.... -#ifdef DUMP_ERRORS +#ifdef EDM_ML_DEBUG auto m = 10000.f; for (float qclus = 15000; qclus < 35000; qclus += 15000) { errorFromTemplates(p, cp, qclus); - - std::cout << i << ' ' << qclus << ' ' << cp.pixmx << ' ' << m * cp.sigmax << ' ' << m * cp.sx1 << ' ' - << m * cp.sx2 << ' ' << m * cp.sigmay << ' ' << m * cp.sy1 << ' ' << m * cp.sy2 << std::endl; + LogDebug("PixelCPEFast") << i << ' ' << qclus << ' ' << cp.pixmx << ' ' << m * cp.sigmax << ' ' << m * cp.sx1 + << ' ' << m * cp.sx2 << ' ' << m * cp.sigmay << ' ' << m * cp.sy1 << ' ' << m * cp.sy2; } - std::cout << i << ' ' << m * std::sqrt(lape.xx()) << ' ' << m * std::sqrt(lape.yy()) << std::endl; -#endif + LogDebug("PixelCPEFast") << i << ' ' << m * std::sqrt(lape.xx()) << ' ' << m * std::sqrt(lape.yy()); +#endif // EDM_ML_DEBUG errorFromTemplates(p, cp, 20000.f); g.pixmx = std::max(0, cp.pixmx); @@ -221,35 +214,6 @@ void PixelCPEFast::fillParamsForGpu() { g.sy[1] = cp.sy1; g.sy[2] = cp.sy2; - /* - // from run1?? - if (i<96) { - g.sx[0] = 0.00120; - g.sx[1] = 0.00115; - g.sx[2] = 0.0050; - - g.sy[0] = 0.00210; - g.sy[1] = 0.00375; - g.sy[2] = 0.0085; - } else if (g.isBarrel) { - g.sx[0] = 0.00120; - g.sx[1] = 0.00115; - g.sx[2] = 0.0050; - - g.sy[0] = 0.00210; - g.sy[1] = 0.00375; - g.sy[2] = 0.0085; - } else { - g.sx[0] = 0.0020; - g.sx[1] = 0.0020; - g.sx[2] = 0.0050; - - g.sy[0] = 0.0021; - g.sy[1] = 0.0021; - g.sy[2] = 0.0085; - } - */ - for (int i = 0; i < 3; ++i) { g.sx[i] = std::sqrt(g.sx[i] * g.sx[i] + lape.xx()); g.sy[i] = std::sqrt(g.sy[i] * g.sy[i] + lape.yy()); @@ -269,7 +233,7 @@ void PixelCPEFast::fillParamsForGpu() { aveGeom.ladderMaxZ[il] = std::max(aveGeom.ladderMaxZ[il], z); aveGeom.ladderX[il] += 0.125f * g.frame.x(); aveGeom.ladderY[il] += 0.125f * g.frame.y(); - aveGeom.ladderR[il] += 0.125 * sqrt(g.frame.x() * g.frame.x() + g.frame.y() * g.frame.y()); + aveGeom.ladderR[il] += 0.125f * sqrt(g.frame.x() * g.frame.x() + g.frame.y() * g.frame.y()); } assert(il + 1 == int(phase1PixelTopology::numberOfLaddersInBarrel)); // add half_module and tollerance @@ -293,13 +257,16 @@ void PixelCPEFast::fillParamsForGpu() { aveGeom.endCapZ[0] -= 1.5f; aveGeom.endCapZ[1] += 1.5f; - /* - for (int jl=0, nl=phase1PixelTopology::numberOfLaddersInBarrel; jlmaxPixelCol(); @@ -495,7 +462,7 @@ LocalError PixelCPEFast::localError(DetParam const& theDetParam, ClusterParam& t bool bigInX = theDetParam.theRecTopol->containsBigPixelInX(minPixelRow, maxPixelRow); bool bigInY = theDetParam.theRecTopol->containsBigPixelInY(minPixelCol, maxPixelCol); - if (UseErrorsFromTemplates_) { + if (useErrorsFromTemplates_) { // // Use template errors @@ -526,8 +493,8 @@ LocalError PixelCPEFast::localError(DetParam const& theDetParam, ClusterParam& t } else { // simple errors // This are the simple errors, hardcoded in the code - //cout << "Track angles are not known " << endl; - //cout << "Default angle estimation which assumes track from PV (0,0,0) does not work." << endl; + LogDebug("PixelCPEFast") << "Track angles are not known.\n" + << "Default angle estimation which assumes track from PV (0,0,0) does not work."; if (GeomDetEnumerators::isTrackerPixel(theDetParam.thePart)) { if (GeomDetEnumerators::isBarrel(theDetParam.thePart)) { @@ -583,7 +550,7 @@ LocalError PixelCPEFast::localError(DetParam const& theDetParam, ClusterParam& t } // end - // std::cout<<" errors "< Date: Thu, 24 Dec 2020 15:43:59 +0100 Subject: [PATCH 144/149] Synchronise with CMSSW_11_3_0_pre1 --- .../plugins/SiPixelClusterProducer.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelClusterProducer.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelClusterProducer.cc index 2bd902af01b1e..3c0f8b9754089 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelClusterProducer.cc +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelClusterProducer.cc @@ -16,7 +16,6 @@ #include "PixelThresholdClusterizer.h" // Geometry -#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" #include "Geometry/CommonDetUnit/interface/PixelGeomDetUnit.h" // Data Formats @@ -55,6 +54,9 @@ SiPixelClusterProducer::SiPixelClusterProducer(edm::ParameterSet const& conf) else tPixelDigi = consumes>(conf.getParameter("src")); + trackerTopoToken_ = esConsumes(); + trackerGeomToken_ = esConsumes(); + const auto& payloadType = conf.getParameter("payloadType"); if (payloadType == "HLT") theSiPixelGainCalibration_ = std::make_unique(conf); @@ -102,11 +104,9 @@ void SiPixelClusterProducer::produce(edm::Event& e, const edm::EventSetup& es) { e.getByToken(tPixelDigi, inputDigi); // Step A.2: get event setup - edm::ESHandle geom; - es.get().get(geom); + edm::ESHandle geom = es.getHandle(trackerGeomToken_); - edm::ESHandle trackerTopologyHandle; - es.get().get(trackerTopologyHandle); + edm::ESHandle trackerTopologyHandle = es.getHandle(trackerTopoToken_); tTopo_ = trackerTopologyHandle.product(); // Step B: create the final output collection From 120f13121cc9cb80c005037f112a26042351ac12 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Thu, 24 Dec 2020 16:05:33 +0100 Subject: [PATCH 145/149] Synchronise with CMSSW_11_3_X_2020-12-24-2300 --- .../clients/beam_dqm_sourceclient-live_cfg.py | 98 ++++++++++++------- 1 file changed, 61 insertions(+), 37 deletions(-) diff --git a/DQM/Integration/python/clients/beam_dqm_sourceclient-live_cfg.py b/DQM/Integration/python/clients/beam_dqm_sourceclient-live_cfg.py index 471c3266bb569..efdde1512fcf7 100644 --- a/DQM/Integration/python/clients/beam_dqm_sourceclient-live_cfg.py +++ b/DQM/Integration/python/clients/beam_dqm_sourceclient-live_cfg.py @@ -4,6 +4,8 @@ # Define here the BeamSpotOnline record name, # it will be used both in BeamMonitor setup and in payload creation/upload BSOnlineRecordName = 'BeamSpotOnlineLegacyObjectsRcd' +BSOnlineTag = 'BeamSpotOnlineTestLegacy' +BSOnlineJobName = 'BeamSpotOnlineTestLegacy' #from Configuration.Eras.Era_Run2_2018_cff import Run2_2018 #process = cms.Process("BeamMonitor", Run2_2018) FIXME @@ -11,6 +13,11 @@ from Configuration.Eras.Era_Run2_2018_pp_on_AA_cff import Run2_2018_pp_on_AA process = cms.Process("BeamMonitor", Run2_2018_pp_on_AA) +# Configure tag and jobName if running Playback system +if "dqm_cmssw/playback" in str(sys.argv[1]): + BSOnlineTag = BSOnlineTag + 'Playback' + BSOnlineJobName = BSOnlineJobName + 'Playback' + # process.MessageLogger = cms.Service("MessageLogger", debugModules = cms.untracked.vstring('*'), @@ -28,12 +35,6 @@ live=False unitTest=True -# Switch to veto the upload of the BeamSpot conditions to the DB -# when False it performs the upload -noDB = True -if 'noDB=False' in sys.argv: - noDB=False - #--------------- # Input sources if unitTest: @@ -68,10 +69,12 @@ # Conditions if (live): process.load("DQM.Integration.config.FrontierCondition_GT_cfi") + process.GlobalTag.DBParameters.authenticationPath = cms.untracked.string('.') else: process.load('Configuration.StandardSequences.FrontierConditions_GlobalTag_cff') from Configuration.AlCa.GlobalTag import GlobalTag as gtCustomise process.GlobalTag = gtCustomise(process.GlobalTag, 'auto:run2_data', '') + process.GlobalTag.DBParameters.authenticationPath = cms.untracked.string('.') # you may need to set manually the GT in the line below #process.GlobalTag.globaltag = '100X_upgrade2018_realistic_v10' @@ -352,37 +355,58 @@ #--------- # Upload BeamSpotOnlineObject (LegacyRcd) to CondDB -process.OnlineDBOutputService = cms.Service("OnlineDBOutputService", - - DBParameters = cms.PSet( - messageLevel = cms.untracked.int32(0), - authenticationPath = cms.untracked.string('.') - ), - - # Upload to CondDB - connect = cms.string('oracle://cms_orcoff_prep/CMS_CONDITIONS'), - preLoadConnectionString = cms.untracked.string('frontier://FrontierPrep/CMS_CONDITIONS'), - - runNumber = cms.untracked.uint64(options.runNumber), - lastLumiFile = cms.untracked.string(''), - writeTransactionDelay = cms.untracked.uint32(options.transDelay), - latency = cms.untracked.uint32(2), - autoCommit = cms.untracked.bool(True), - saveLogsOnDB = cms.untracked.bool(True), - jobName = cms.untracked.string("BeamSpotOnlineLegacyTest"), # name of the DB log record - toPut = cms.VPSet(cms.PSet( - record = cms.string(BSOnlineRecordName), - tag = cms.string('BSOnlineLegacy_tag'), - timetype = cms.untracked.string('Lumi'), - onlyAppendUpdatePolicy = cms.untracked.bool(True) - )) -) - -# If not live or noDB: produce a (local) SQLITE file -if not live or noDB: - process.OnlineDBOutputService.connect = cms.string('sqlite_file:BeamSpotOnlineLegacy.db') - process.OnlineDBOutputService.preLoadConnectionString = cms.untracked.string('sqlite_file:BeamSpotOnlineLegacy.db') - process.OnlineDBOutputService.saveLogsOnDB = cms.untracked.bool(False) +if unitTest == False: + process.OnlineDBOutputService = cms.Service("OnlineDBOutputService", + + DBParameters = cms.PSet( + messageLevel = cms.untracked.int32(0), + authenticationPath = cms.untracked.string('.') + ), + + # Upload to CondDB + connect = cms.string('oracle://cms_orcon_prod/CMS_CONDITIONS'), + preLoadConnectionString = cms.untracked.string('frontier://FrontierProd/CMS_CONDITIONS'), + + runNumber = cms.untracked.uint64(options.runNumber), + #lastLumiFile = cms.untracked.string('last_lumi.txt'), + #lastLumiUrl = cms.untracked.string('http://ru-c2e14-11-01.cms:11100/urn:xdaq-application:lid=52/getLatestLumiSection'), + omsServiceUrl = cms.untracked.string('http://cmsoms-services.cms:9949/urn:xdaq-application:lid=100/getRunAndLumiSection'), + writeTransactionDelay = cms.untracked.uint32(options.transDelay), + latency = cms.untracked.uint32(2), + autoCommit = cms.untracked.bool(True), + saveLogsOnDB = cms.untracked.bool(True), + jobName = cms.untracked.string(BSOnlineJobName), # name of the DB log record + toPut = cms.VPSet(cms.PSet( + record = cms.string(BSOnlineRecordName), + tag = cms.string(BSOnlineTag), + timetype = cms.untracked.string('Lumi'), + onlyAppendUpdatePolicy = cms.untracked.bool(True) + )) + ) +else: + process.OnlineDBOutputService = cms.Service("OnlineDBOutputService", + + DBParameters = cms.PSet( + messageLevel = cms.untracked.int32(0), + authenticationPath = cms.untracked.string('.') + ), + + # Upload to CondDB + connect = cms.string('sqlite_file:BeamSpotOnlineLegacy.db'), + preLoadConnectionString = cms.untracked.string('sqlite_file:BeamSpotOnlineLegacy.db'), + runNumber = cms.untracked.uint64(options.runNumber), + lastLumiFile = cms.untracked.string('last_lumi.txt'), + #lastLumiUrl = cms.untracked.string('http://ru-c2e14-11-01.cms:11100/urn:xdaq-application:lid=52/getLatestLumiSection'), + writeTransactionDelay = cms.untracked.uint32(options.transDelay), + latency = cms.untracked.uint32(2), + autoCommit = cms.untracked.bool(True), + toPut = cms.VPSet(cms.PSet( + record = cms.string(BSOnlineRecordName), + tag = cms.string(BSOnlineTag), + timetype = cms.untracked.string('Lumi'), + onlyAppendUpdatePolicy = cms.untracked.bool(True) + )) + ) #--------- # Final path From 23549f65257315d8c86b05d918d7a36dc2cc34bc Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Fri, 25 Dec 2020 01:19:03 +0100 Subject: [PATCH 146/149] Apply code formatting --- .../SiPixelClusterizer/plugins/gpuClustering.h | 18 ++++++++---------- .../plugins/SiPixelRecHitSoAFromLegacy.cc | 4 ++-- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h index 9f295981ca732..8467c1d74e2d9 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h @@ -35,16 +35,14 @@ namespace gpuClustering { } } - __global__ - void - findClus(uint16_t const* __restrict__ id, // module id of each pixel - uint16_t const* __restrict__ x, // local coordinates of each pixel - uint16_t const* __restrict__ y, // - uint32_t const* __restrict__ moduleStart, // index of the first pixel of each module - uint32_t* __restrict__ nClustersInModule, // output: number of clusters found in each module - uint32_t* __restrict__ moduleId, // output: module id of each module - int32_t* __restrict__ clusterId, // output: cluster id of each pixel - int numElements) { + __global__ void findClus(uint16_t const* __restrict__ id, // module id of each pixel + uint16_t const* __restrict__ x, // local coordinates of each pixel + uint16_t const* __restrict__ y, // + uint32_t const* __restrict__ moduleStart, // index of the first pixel of each module + uint32_t* __restrict__ nClustersInModule, // output: number of clusters found in each module + uint32_t* __restrict__ moduleId, // output: module id of each module + int32_t* __restrict__ clusterId, // output: cluster id of each pixel + int numElements) { __shared__ int msize; auto firstModule = blockIdx.x; diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc index 2397434027fa1..0d95370f25e47 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc +++ b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc @@ -172,8 +172,8 @@ void SiPixelRecHitSoAFromLegacy::produce(edm::StreamID streamID, edm::Event& iEv auto const fc = hitsModuleStart[gind]; auto const lc = hitsModuleStart[gind + 1]; assert(lc > fc); - LogDebug("SiPixelRecHitSoAFromLegacy") << "in det " << gind << ": conv " << nclus << " hits from " - << dsv.size() << " legacy clusters" << ' ' << fc << ',' << lc; + LogDebug("SiPixelRecHitSoAFromLegacy") << "in det " << gind << ": conv " << nclus << " hits from " << dsv.size() + << " legacy clusters" << ' ' << fc << ',' << lc; assert((lc - fc) == nclus); if (nclus > maxHitsInModule) printf( From e87a6b6e085f5cb293380764db5ee3b3cd74914e Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Sat, 26 Dec 2020 00:18:35 +0100 Subject: [PATCH 147/149] Clean up the pixel local reconstruction code (cms-patatrack#599) Address the pixel local reconstruction review comments. General clean up of the pixel local reconstruction code: - update comments; - update data members for better consistency; - remove unimplemented method. --- .../interface/TrackingRecHit2DHeterogeneous.h | 5 +- .../plugins/SiPixelDigiErrorsFromSoA.cc | 3 +- .../SiPixelRecHits/interface/PixelCPEFast.h | 8 +-- .../SiPixelRecHits/src/PixelCPEFast.cc | 60 +++++++++---------- 4 files changed, 37 insertions(+), 39 deletions(-) diff --git a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h index f10495abd2ab8..72a136ab5f5b6 100644 --- a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h +++ b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h @@ -38,12 +38,11 @@ class TrackingRecHit2DHeterogeneous { // only the local coord and detector index cms::cuda::host::unique_ptr localCoordToHostAsync(cudaStream_t stream) const; - cms::cuda::host::unique_ptr detIndexToHostAsync(cudaStream_t stream) const; cms::cuda::host::unique_ptr hitsModuleStartToHostAsync(cudaStream_t stream) const; private: - static constexpr uint32_t n16 = 4; - static constexpr uint32_t n32 = 9; + static constexpr uint32_t n16 = 4; // number of elements in m_store16 + static constexpr uint32_t n32 = 9; // number of elements in m_store32 static_assert(sizeof(uint32_t) == sizeof(float)); // just stating the obvious unique_ptr m_store16; //! diff --git a/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsFromSoA.cc b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsFromSoA.cc index b487942a1419b..d09e703c36a00 100644 --- a/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsFromSoA.cc +++ b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsFromSoA.cc @@ -140,8 +140,7 @@ void SiPixelDigiErrorsFromSoA::produce(edm::Event& iEvent, const edm::EventSetup if (roc->idInDetUnit() > ch.roc_last) ch.roc_last = roc->idInDetUnit(); } - if (ch.roc_first < ch.roc_last) - disabledChannelsDetSet.push_back(ch); + disabledChannelsDetSet.push_back(ch); } } } else { diff --git a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h index 7335aa5e2dfdd..f50db3af11868 100644 --- a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h +++ b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h @@ -80,10 +80,10 @@ class PixelCPEFast final : public PixelCPEBase { std::vector thePixelGenError_; // allocate this with posix malloc to be compatible with the cpu workflow - std::vector m_detParamsGPU; - pixelCPEforGPU::CommonParams m_commonParamsGPU; - pixelCPEforGPU::LayerGeometry m_layerGeometry; - pixelCPEforGPU::AverageGeometry m_averageGeometry; + std::vector detParamsGPU_; + pixelCPEforGPU::CommonParams commonParamsGPU_; + pixelCPEforGPU::LayerGeometry layerGeometry_; + pixelCPEforGPU::AverageGeometry averageGeometry_; pixelCPEforGPU::ParamsOnGPU cpuData_; struct GPUData { diff --git a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc index 548119cef501b..3a57ce120b545 100644 --- a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc +++ b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc @@ -58,10 +58,10 @@ PixelCPEFast::PixelCPEFast(edm::ParameterSet const& conf, fillParamsForGpu(); cpuData_ = { - &m_commonParamsGPU, - m_detParamsGPU.data(), - &m_layerGeometry, - &m_averageGeometry, + &commonParamsGPU_, + detParamsGPU_.data(), + &layerGeometry_, + &averageGeometry_, }; } @@ -70,7 +70,7 @@ const pixelCPEforGPU::ParamsOnGPU* PixelCPEFast::getGPUProductAsync(cudaStream_t // and now copy to device... cudaCheck(cudaMalloc((void**)&data.paramsOnGPU_h.m_commonParams, sizeof(pixelCPEforGPU::CommonParams))); cudaCheck(cudaMalloc((void**)&data.paramsOnGPU_h.m_detParams, - this->m_detParamsGPU.size() * sizeof(pixelCPEforGPU::DetParams))); + this->detParamsGPU_.size() * sizeof(pixelCPEforGPU::DetParams))); cudaCheck(cudaMalloc((void**)&data.paramsOnGPU_h.m_averageGeometry, sizeof(pixelCPEforGPU::AverageGeometry))); cudaCheck(cudaMalloc((void**)&data.paramsOnGPU_h.m_layerGeometry, sizeof(pixelCPEforGPU::LayerGeometry))); cudaCheck(cudaMalloc((void**)&data.paramsOnGPU_d, sizeof(pixelCPEforGPU::ParamsOnGPU))); @@ -78,23 +78,23 @@ const pixelCPEforGPU::ParamsOnGPU* PixelCPEFast::getGPUProductAsync(cudaStream_t cudaCheck(cudaMemcpyAsync( data.paramsOnGPU_d, &data.paramsOnGPU_h, sizeof(pixelCPEforGPU::ParamsOnGPU), cudaMemcpyDefault, stream)); cudaCheck(cudaMemcpyAsync((void*)data.paramsOnGPU_h.m_commonParams, - &this->m_commonParamsGPU, + &this->commonParamsGPU_, sizeof(pixelCPEforGPU::CommonParams), cudaMemcpyDefault, stream)); cudaCheck(cudaMemcpyAsync((void*)data.paramsOnGPU_h.m_averageGeometry, - &this->m_averageGeometry, + &this->averageGeometry_, sizeof(pixelCPEforGPU::AverageGeometry), cudaMemcpyDefault, stream)); cudaCheck(cudaMemcpyAsync((void*)data.paramsOnGPU_h.m_layerGeometry, - &this->m_layerGeometry, + &this->layerGeometry_, sizeof(pixelCPEforGPU::LayerGeometry), cudaMemcpyDefault, stream)); cudaCheck(cudaMemcpyAsync((void*)data.paramsOnGPU_h.m_detParams, - this->m_detParamsGPU.data(), - this->m_detParamsGPU.size() * sizeof(pixelCPEforGPU::DetParams), + this->detParamsGPU_.data(), + this->detParamsGPU_.size() * sizeof(pixelCPEforGPU::DetParams), cudaMemcpyDefault, stream)); }); @@ -102,16 +102,16 @@ const pixelCPEforGPU::ParamsOnGPU* PixelCPEFast::getGPUProductAsync(cudaStream_t } void PixelCPEFast::fillParamsForGpu() { - m_commonParamsGPU.theThicknessB = m_DetParams.front().theThickness; - m_commonParamsGPU.theThicknessE = m_DetParams.back().theThickness; - m_commonParamsGPU.thePitchX = m_DetParams[0].thePitchX; - m_commonParamsGPU.thePitchY = m_DetParams[0].thePitchY; + commonParamsGPU_.theThicknessB = m_DetParams.front().theThickness; + commonParamsGPU_.theThicknessE = m_DetParams.back().theThickness; + commonParamsGPU_.thePitchX = m_DetParams[0].thePitchX; + commonParamsGPU_.thePitchY = m_DetParams[0].thePitchY; - LogDebug("PixelCPEFast") << "pitch & thickness " << m_commonParamsGPU.thePitchX << ' ' << m_commonParamsGPU.thePitchY - << " " << m_commonParamsGPU.theThicknessB << ' ' << m_commonParamsGPU.theThicknessE; + LogDebug("PixelCPEFast") << "pitch & thickness " << commonParamsGPU_.thePitchX << ' ' << commonParamsGPU_.thePitchY + << " " << commonParamsGPU_.theThicknessB << ' ' << commonParamsGPU_.theThicknessE; // zero average geometry - memset(&m_averageGeometry, 0, sizeof(pixelCPEforGPU::AverageGeometry)); + memset(&averageGeometry_, 0, sizeof(pixelCPEforGPU::AverageGeometry)); uint32_t oldLayer = 0; uint32_t oldLadder = 0; @@ -120,21 +120,21 @@ void PixelCPEFast::fillParamsForGpu() { float miz = 90, mxz = 0; float pl = 0; int nl = 0; - m_detParamsGPU.resize(m_DetParams.size()); + detParamsGPU_.resize(m_DetParams.size()); for (auto i = 0U; i < m_DetParams.size(); ++i) { auto& p = m_DetParams[i]; - auto& g = m_detParamsGPU[i]; + auto& g = detParamsGPU_[i]; assert(p.theDet->index() == int(i)); - assert(m_commonParamsGPU.thePitchY == p.thePitchY); - assert(m_commonParamsGPU.thePitchX == p.thePitchX); + assert(commonParamsGPU_.thePitchY == p.thePitchY); + assert(commonParamsGPU_.thePitchX == p.thePitchX); g.isBarrel = GeomDetEnumerators::isBarrel(p.thePart); g.isPosZ = p.theDet->surface().position().z() > 0; g.layer = ttopo_.layer(p.theDet->geographicalId()); g.index = i; // better be! g.rawId = p.theDet->geographicalId(); - assert((g.isBarrel ? m_commonParamsGPU.theThicknessB : m_commonParamsGPU.theThicknessE) == p.theThickness); + assert((g.isBarrel ? commonParamsGPU_.theThicknessB : commonParamsGPU_.theThicknessE) == p.theThickness); auto ladder = ttopo_.pxbLadder(p.theDet->geographicalId()); if (oldLayer != g.layer) { @@ -179,7 +179,7 @@ void PixelCPEFast::fillParamsForGpu() { // errors ..... ClusterParamGeneric cp; - auto gvx = p.theOrigin.x() + 40.f * m_commonParamsGPU.thePitchX; + auto gvx = p.theOrigin.x() + 40.f * commonParamsGPU_.thePitchX; auto gvy = p.theOrigin.y(); auto gvz = 1.f / p.theOrigin.z(); //--- Note that the normalization is not required as only the ratio used @@ -221,10 +221,10 @@ void PixelCPEFast::fillParamsForGpu() { } // compute ladder baricenter (only in global z) for the barrel - auto& aveGeom = m_averageGeometry; + auto& aveGeom = averageGeometry_; int il = 0; for (int im = 0, nm = phase1PixelTopology::numberOfModulesInBarrel; im < nm; ++im) { - auto const& g = m_detParamsGPU[im]; + auto const& g = detParamsGPU_[im]; il = im / 8; assert(il < int(phase1PixelTopology::numberOfLaddersInBarrel)); auto z = g.frame.z(); @@ -246,11 +246,11 @@ void PixelCPEFast::fillParamsForGpu() { // compute "max z" for first layer in endcap (should we restrict to the outermost ring?) for (auto im = phase1PixelTopology::layerStart[4]; im < phase1PixelTopology::layerStart[5]; ++im) { - auto const& g = m_detParamsGPU[im]; + auto const& g = detParamsGPU_[im]; aveGeom.endCapZ[0] = std::max(aveGeom.endCapZ[0], g.frame.z()); } for (auto im = phase1PixelTopology::layerStart[7]; im < phase1PixelTopology::layerStart[8]; ++im) { - auto const& g = m_detParamsGPU[im]; + auto const& g = detParamsGPU_[im]; aveGeom.endCapZ[1] = std::min(aveGeom.endCapZ[1], g.frame.z()); } // correct for outer ring being closer @@ -269,8 +269,8 @@ void PixelCPEFast::fillParamsForGpu() { #endif // EDM_ML_DEBUG // fill Layer and ladders geometry - memcpy(m_layerGeometry.layerStart, phase1PixelTopology::layerStart, sizeof(phase1PixelTopology::layerStart)); - memcpy(m_layerGeometry.layer, phase1PixelTopology::layer.data(), phase1PixelTopology::layer.size()); + memcpy(layerGeometry_.layerStart, phase1PixelTopology::layerStart, sizeof(phase1PixelTopology::layerStart)); + memcpy(layerGeometry_.layer, phase1PixelTopology::layer.data(), phase1PixelTopology::layer.size()); } PixelCPEFast::GPUData::~GPUData() { @@ -374,7 +374,7 @@ LocalPoint PixelCPEFast::localPosition(DetParam const& theDetParam, ClusterParam cp.Q_l_Y[0] = Q_l_Y; auto ind = theDetParam.theDet->index(); - pixelCPEforGPU::position(m_commonParamsGPU, m_detParamsGPU[ind], cp, 0); + pixelCPEforGPU::position(commonParamsGPU_, detParamsGPU_[ind], cp, 0); auto xPos = cp.xpos[0]; auto yPos = cp.ypos[0]; From 7717f1baffe23c885719c53dfc7d8578d3a8f6d6 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Tue, 29 Dec 2020 18:24:20 +0100 Subject: [PATCH 148/149] Clean up the pixel local reconstruction code (#601) Address the pixel local reconstruction review comments: - update method names according to the coding rules. --- .../interface/SiPixelGainCalibrationForHLTGPU.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h b/CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h index 6fb487a244e71..aea0f4d8b0c63 100644 --- a/CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h +++ b/CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h @@ -15,8 +15,8 @@ class SiPixelGainCalibrationForHLTGPU { ~SiPixelGainCalibrationForHLTGPU(); const SiPixelGainForHLTonGPU *getGPUProductAsync(cudaStream_t cudaStream) const; - const SiPixelGainForHLTonGPU *getCPUProduct() const { return gainForHLTonHost_; } - const SiPixelGainCalibrationForHLT *getOriginalProduct() { return gains_; } + const SiPixelGainForHLTonGPU *cpuProduct() const { return gainForHLTonHost_; } + const SiPixelGainCalibrationForHLT *originalProduct() { return gains_; } private: const SiPixelGainCalibrationForHLT *gains_ = nullptr; From 2e8e5fd76b0dbfb8abf86ba42de0615ef1e70918 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Wed, 30 Dec 2020 17:31:37 +0100 Subject: [PATCH 149/149] Clean up the pixel local reconstruction code (cms-patatrack#602) Address the pixel local reconstruction review comments: - remove obsolete comments; - consistently use named constants; - rename data members and methods to be more descriptive; - rename local variables according to the coding rules and for consistency with cms-sw#32591; - update transient dictionaries to match data types. --- CUDADataFormats/Common/src/classes_def.xml | 4 +- .../interface/TrackingRecHit2DHeterogeneous.h | 12 +++--- .../interface/TrackingRecHit2DSOAView.h | 8 ++-- .../SiPixelROCsStatusAndMappingWrapper.h | 1 - .../src/SiPixelROCsStatusAndMappingWrapper.cc | 9 ++-- .../interface/SiPixelROCsStatusAndMapping.h | 1 - .../SiPixelRecHits/interface/PixelCPEFast.h | 8 ++-- .../SiPixelRecHits/interface/pixelCPEforGPU.h | 42 +++++++++---------- .../SiPixelRecHits/plugins/gpuPixelRecHits.h | 16 +++---- .../SiPixelRecHits/src/PixelCPEFast.cc | 38 ++++++++--------- 10 files changed, 68 insertions(+), 71 deletions(-) diff --git a/CUDADataFormats/Common/src/classes_def.xml b/CUDADataFormats/Common/src/classes_def.xml index 024d927595914..d8514251c807a 100644 --- a/CUDADataFormats/Common/src/classes_def.xml +++ b/CUDADataFormats/Common/src/classes_def.xml @@ -1,4 +1,4 @@ - - + + diff --git a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h index 72a136ab5f5b6..967b5c6c8282f 100644 --- a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h +++ b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h @@ -10,7 +10,7 @@ class TrackingRecHit2DHeterogeneous { template using unique_ptr = typename Traits::template unique_ptr; - using Hist = TrackingRecHit2DSOAView::Hist; + using PhiBinner = TrackingRecHit2DSOAView::PhiBinner; TrackingRecHit2DHeterogeneous() = default; @@ -33,7 +33,7 @@ class TrackingRecHit2DHeterogeneous { auto hitsModuleStart() const { return m_hitsModuleStart; } auto hitsLayerStart() { return m_hitsLayerStart; } - auto phiBinner() { return m_hist; } + auto phiBinner() { return m_phiBinner; } auto iphi() { return m_iphi; } // only the local coord and detector index @@ -48,7 +48,7 @@ class TrackingRecHit2DHeterogeneous { unique_ptr m_store16; //! unique_ptr m_store32; //! - unique_ptr m_HistStore; //! + unique_ptr m_PhiBinnerStore; //! unique_ptr m_AverageGeometryStore; //! unique_ptr m_view; //! @@ -58,7 +58,7 @@ class TrackingRecHit2DHeterogeneous { uint32_t const* m_hitsModuleStart; // needed for legacy, this is on GPU! // needed as kernel params... - Hist* m_hist; + PhiBinner* m_phiBinner; uint32_t* m_hitsLayerStart; int16_t* m_iphi; }; @@ -98,13 +98,13 @@ TrackingRecHit2DHeterogeneous::TrackingRecHit2DHeterogeneous(uint32_t nH // so unless proven VERY inefficient we keep it ordered as generated m_store16 = Traits::template make_device_unique(nHits * n16, stream); m_store32 = Traits::template make_device_unique(nHits * n32 + 11, stream); - m_HistStore = Traits::template make_device_unique(stream); + m_PhiBinnerStore = Traits::template make_device_unique(stream); auto get16 = [&](int i) { return m_store16.get() + i * nHits; }; auto get32 = [&](int i) { return m_store32.get() + i * nHits; }; // copy all the pointers - m_hist = view->m_hist = m_HistStore.get(); + m_phiBinner = view->m_phiBinner = m_PhiBinnerStore.get(); view->m_xl = get32(0); view->m_yl = get32(1); diff --git a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h index 53297a78a428f..7f3c59cd70faf 100644 --- a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h +++ b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h @@ -20,8 +20,6 @@ class TrackingRecHit2DSOAView { using PhiBinner = cms::cuda::HistoContainer; - using Hist = PhiBinner; // FIXME - using AverageGeometry = phase1PixelTopology::AverageGeometry; template @@ -67,8 +65,8 @@ class TrackingRecHit2DSOAView { __device__ __forceinline__ uint32_t* hitsLayerStart() { return m_hitsLayerStart; } __device__ __forceinline__ uint32_t const* hitsLayerStart() const { return m_hitsLayerStart; } - __device__ __forceinline__ Hist& phiBinner() { return *m_hist; } - __device__ __forceinline__ Hist const& phiBinner() const { return *m_hist; } + __device__ __forceinline__ PhiBinner& phiBinner() { return *m_phiBinner; } + __device__ __forceinline__ PhiBinner const& phiBinner() const { return *m_phiBinner; } __device__ __forceinline__ AverageGeometry& averageGeometry() { return *m_averageGeometry; } __device__ __forceinline__ AverageGeometry const& averageGeometry() const { return *m_averageGeometry; } @@ -96,7 +94,7 @@ class TrackingRecHit2DSOAView { uint32_t* m_hitsLayerStart; - PhiBinner* m_hist; // FIXME use a more descriptive name consistently + PhiBinner* m_phiBinner; uint32_t m_nHits; }; diff --git a/CalibTracker/SiPixelESProducers/interface/SiPixelROCsStatusAndMappingWrapper.h b/CalibTracker/SiPixelESProducers/interface/SiPixelROCsStatusAndMappingWrapper.h index 5f875d7dff5a9..f7555a75d9bec 100644 --- a/CalibTracker/SiPixelESProducers/interface/SiPixelROCsStatusAndMappingWrapper.h +++ b/CalibTracker/SiPixelESProducers/interface/SiPixelROCsStatusAndMappingWrapper.h @@ -14,7 +14,6 @@ class SiPixelFedCablingMap; class TrackerGeometry; class SiPixelQuality; -// TODO: since this has more information than just cabling map, maybe we should invent a better name? class SiPixelROCsStatusAndMappingWrapper { public: SiPixelROCsStatusAndMappingWrapper(SiPixelFedCablingMap const &cablingMap, diff --git a/CalibTracker/SiPixelESProducers/src/SiPixelROCsStatusAndMappingWrapper.cc b/CalibTracker/SiPixelESProducers/src/SiPixelROCsStatusAndMappingWrapper.cc index 2437696656d25..665d31b97ead2 100644 --- a/CalibTracker/SiPixelESProducers/src/SiPixelROCsStatusAndMappingWrapper.cc +++ b/CalibTracker/SiPixelESProducers/src/SiPixelROCsStatusAndMappingWrapper.cc @@ -8,6 +8,7 @@ #include // CMSSW includes +#include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" #include "CalibTracker/SiPixelESProducers/interface/SiPixelROCsStatusAndMappingWrapper.h" #include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingMap.h" #include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingTree.h" @@ -51,8 +52,8 @@ SiPixelROCsStatusAndMappingWrapper::SiPixelROCsStatusAndMappingWrapper(SiPixelFe else cablingMapHost->badRocs[index] = false; } else { // store some dummy number - cablingMapHost->rawId[index] = 9999; - cablingMapHost->rocInDet[index] = 9999; + cablingMapHost->rawId[index] = gpuClustering::invalidModuleId; + cablingMapHost->rocInDet[index] = gpuClustering::invalidModuleId; cablingMapHost->badRocs[index] = true; modToUnpDefault[index] = true; } @@ -70,8 +71,8 @@ SiPixelROCsStatusAndMappingWrapper::SiPixelROCsStatusAndMappingWrapper(SiPixelFe // idinLnk varies between 1 to 8 for (int i = 1; i < index; i++) { - if (cablingMapHost->rawId[i] == 9999) { - cablingMapHost->moduleId[i] = 9999; + if (cablingMapHost->rawId[i] == gpuClustering::invalidModuleId) { + cablingMapHost->moduleId[i] = gpuClustering::invalidModuleId; } else { /* std::cout << cablingMapHost->rawId[i] << std::endl; diff --git a/CondFormats/SiPixelObjects/interface/SiPixelROCsStatusAndMapping.h b/CondFormats/SiPixelObjects/interface/SiPixelROCsStatusAndMapping.h index a0771aaefb366..f7cd8dedca941 100644 --- a/CondFormats/SiPixelObjects/interface/SiPixelROCsStatusAndMapping.h +++ b/CondFormats/SiPixelObjects/interface/SiPixelROCsStatusAndMapping.h @@ -11,7 +11,6 @@ namespace pixelgpudetails { constexpr unsigned int MAX_SIZE_BYTE_BOOL = MAX_SIZE * sizeof(unsigned char); } // namespace pixelgpudetails -// TODO: since this has more information than just cabling map, maybe we should invent a better name? struct SiPixelROCsStatusAndMapping { alignas(128) unsigned int fed[pixelgpudetails::MAX_SIZE]; alignas(128) unsigned int link[pixelgpudetails::MAX_SIZE]; diff --git a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h index f50db3af11868..bf85f6c74ebd9 100644 --- a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h +++ b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h @@ -60,10 +60,10 @@ class PixelCPEFast final : public PixelCPEBase { void errorFromTemplates(DetParam const &theDetParam, ClusterParamGeneric &theClusterParam, float qclus) const; static void collect_edge_charges(ClusterParam &theClusterParam, //!< input, the cluster - int &Q_f_X, //!< output, Q first in X - int &Q_l_X, //!< output, Q last in X - int &Q_f_Y, //!< output, Q first in Y - int &Q_l_Y, //!< output, Q last in Y + int &q_f_X, //!< output, Q first in X + int &q_l_X, //!< output, Q last in X + int &q_f_Y, //!< output, Q first in Y + int &q_l_Y, //!< output, Q last in Y bool truncate); const float edgeClusterErrorX_; diff --git a/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h b/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h index f655329d02013..03e136d8d23ef 100644 --- a/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h +++ b/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h @@ -81,10 +81,10 @@ namespace pixelCPEforGPU { uint32_t minCol[N]; uint32_t maxCol[N]; - int32_t Q_f_X[N]; - int32_t Q_l_X[N]; - int32_t Q_f_Y[N]; - int32_t Q_l_Y[N]; + int32_t q_f_X[N]; + int32_t q_l_X[N]; + int32_t q_f_Y[N]; + int32_t q_l_Y[N]; int32_t charge[N]; @@ -114,8 +114,8 @@ namespace pixelCPEforGPU { } constexpr inline float correction(int sizeM1, - int Q_f, //!< Charge in the first pixel. - int Q_l, //!< Charge in the last pixel. + int q_f, //!< Charge in the first pixel. + int q_l, //!< Charge in the last pixel. uint16_t upper_edge_first_pix, //!< As the name says. uint16_t lower_edge_last_pix, //!< As the name says. float lorentz_shift, //!< L-shift at half thickness @@ -134,16 +134,16 @@ namespace pixelCPEforGPU { //--- Width of the clusters minus the edge (first and last) pixels. //--- In the note, they are denoted x_F and x_L (and y_F and y_L) // assert(lower_edge_last_pix >= upper_edge_first_pix); - auto W_inner = pitch * float(lower_edge_last_pix - upper_edge_first_pix); // in cm + auto w_inner = pitch * float(lower_edge_last_pix - upper_edge_first_pix); // in cm //--- Predicted charge width from geometry - auto W_pred = theThickness * cot_angle // geometric correction (in cm) + auto w_pred = theThickness * cot_angle // geometric correction (in cm) - lorentz_shift; // (in cm) &&& check fpix! - w_eff = std::abs(W_pred) - W_inner; + w_eff = std::abs(w_pred) - w_inner; //--- If the observed charge width is inconsistent with the expectations - //--- based on the track, do *not* use W_pred-W_inner. Instead, replace + //--- based on the track, do *not* use w_pred-w_inner. Instead, replace //--- it with an *average* effective charge width, which is the average //--- length of the edge pixels. @@ -162,14 +162,14 @@ namespace pixelCPEforGPU { } //--- Finally, compute the position in this projection - float Qdiff = Q_l - Q_f; - float Qsum = Q_l + Q_f; + float qdiff = q_l - q_f; + float qsum = q_l + q_f; //--- Temporary fix for clusters with both first and last pixel with charge = 0 - if (Qsum == 0) - Qsum = 1.0f; + if (qsum == 0) + qsum = 1.0f; - return 0.5f * (Qdiff / Qsum) * w_eff; + return 0.5f * (qdiff / qsum) * w_eff; } constexpr inline void position(CommonParams const& __restrict__ comParams, @@ -206,8 +206,8 @@ namespace pixelCPEforGPU { if (phase1PixelTopology::isBigPixY(cp.maxCol[ic])) ++ysize; - int unbalanceX = 8. * std::abs(float(cp.Q_f_X[ic] - cp.Q_l_X[ic])) / float(cp.Q_f_X[ic] + cp.Q_l_X[ic]); - int unbalanceY = 8. * std::abs(float(cp.Q_f_Y[ic] - cp.Q_l_Y[ic])) / float(cp.Q_f_Y[ic] + cp.Q_l_Y[ic]); + int unbalanceX = 8. * std::abs(float(cp.q_f_X[ic] - cp.q_l_X[ic])) / float(cp.q_f_X[ic] + cp.q_l_X[ic]); + int unbalanceY = 8. * std::abs(float(cp.q_f_Y[ic] - cp.q_l_Y[ic])) / float(cp.q_f_Y[ic] + cp.q_l_Y[ic]); xsize = 8 * xsize - unbalanceX; ysize = 8 * ysize - unbalanceY; @@ -230,8 +230,8 @@ namespace pixelCPEforGPU { auto thickness = detParams.isBarrel ? comParams.theThicknessB : comParams.theThicknessE; auto xcorr = correction(cp.maxRow[ic] - cp.minRow[ic], - cp.Q_f_X[ic], - cp.Q_l_X[ic], + cp.q_f_X[ic], + cp.q_l_X[ic], llxl, urxl, detParams.chargeWidthX, // lorentz shift in cm @@ -242,8 +242,8 @@ namespace pixelCPEforGPU { phase1PixelTopology::isBigPixX(cp.maxRow[ic])); auto ycorr = correction(cp.maxCol[ic] - cp.minCol[ic], - cp.Q_f_Y[ic], - cp.Q_l_Y[ic], + cp.q_f_Y[ic], + cp.q_l_Y[ic], llyl, uryl, detParams.chargeWidthY, // lorentz shift in cm diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h b/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h index 89a40c8723ae3..2401fed6c5171 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h +++ b/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h @@ -99,10 +99,10 @@ namespace gpuPixelRecHits { clusParams.minCol[ic] = std::numeric_limits::max(); clusParams.maxCol[ic] = 0; clusParams.charge[ic] = 0; - clusParams.Q_f_X[ic] = 0; - clusParams.Q_l_X[ic] = 0; - clusParams.Q_f_Y[ic] = 0; - clusParams.Q_l_Y[ic] = 0; + clusParams.q_f_X[ic] = 0; + clusParams.q_l_X[ic] = 0; + clusParams.q_f_Y[ic] = 0; + clusParams.q_l_Y[ic] = 0; } __syncthreads(); @@ -149,13 +149,13 @@ namespace gpuPixelRecHits { auto ch = std::min(digis.adc(i), pixmx); atomicAdd(&clusParams.charge[cl], ch); if (clusParams.minRow[cl] == x) - atomicAdd(&clusParams.Q_f_X[cl], ch); + atomicAdd(&clusParams.q_f_X[cl], ch); if (clusParams.maxRow[cl] == x) - atomicAdd(&clusParams.Q_l_X[cl], ch); + atomicAdd(&clusParams.q_l_X[cl], ch); if (clusParams.minCol[cl] == y) - atomicAdd(&clusParams.Q_f_Y[cl], ch); + atomicAdd(&clusParams.q_f_Y[cl], ch); if (clusParams.maxCol[cl] == y) - atomicAdd(&clusParams.Q_l_Y[cl], ch); + atomicAdd(&clusParams.q_l_Y[cl], ch); } __syncthreads(); diff --git a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc index 3a57ce120b545..0077c0748ca28 100644 --- a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc +++ b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc @@ -354,11 +354,11 @@ LocalPoint PixelCPEFast::localPosition(DetParam const& theDetParam, ClusterParam theClusterParam.qBin_ = 0; } - int Q_f_X; //!< Q of the first pixel in X - int Q_l_X; //!< Q of the last pixel in X - int Q_f_Y; //!< Q of the first pixel in Y - int Q_l_Y; //!< Q of the last pixel in Y - collect_edge_charges(theClusterParam, Q_f_X, Q_l_X, Q_f_Y, Q_l_Y, useErrorsFromTemplates_ && truncatePixelCharge_); + int q_f_X; //!< Q of the first pixel in X + int q_l_X; //!< Q of the last pixel in X + int q_f_Y; //!< Q of the first pixel in Y + int q_l_Y; //!< Q of the last pixel in Y + collect_edge_charges(theClusterParam, q_f_X, q_l_X, q_f_Y, q_l_Y, useErrorsFromTemplates_ && truncatePixelCharge_); // do GPU like ... pixelCPEforGPU::ClusParams cp; @@ -368,10 +368,10 @@ LocalPoint PixelCPEFast::localPosition(DetParam const& theDetParam, ClusterParam cp.minCol[0] = theClusterParam.theCluster->minPixelCol(); cp.maxCol[0] = theClusterParam.theCluster->maxPixelCol(); - cp.Q_f_X[0] = Q_f_X; - cp.Q_l_X[0] = Q_l_X; - cp.Q_f_Y[0] = Q_f_Y; - cp.Q_l_Y[0] = Q_l_Y; + cp.q_f_X[0] = q_f_X; + cp.q_l_X[0] = q_l_X; + cp.q_f_Y[0] = q_f_Y; + cp.q_l_Y[0] = q_l_Y; auto ind = theDetParam.theDet->index(); pixelCPEforGPU::position(commonParamsGPU_, detParamsGPU_[ind], cp, 0); @@ -392,16 +392,16 @@ LocalPoint PixelCPEFast::localPosition(DetParam const& theDetParam, ClusterParam //! and the inner cluster charge, projected in x and y. //----------------------------------------------------------------------------- void PixelCPEFast::collect_edge_charges(ClusterParam& theClusterParamBase, //!< input, the cluster - int& Q_f_X, //!< output, Q first in X - int& Q_l_X, //!< output, Q last in X - int& Q_f_Y, //!< output, Q first in Y - int& Q_l_Y, //!< output, Q last in Y + int& q_f_X, //!< output, Q first in X + int& q_l_X, //!< output, Q last in X + int& q_f_Y, //!< output, Q first in Y + int& q_l_Y, //!< output, Q last in Y bool truncate) { ClusterParamGeneric& theClusterParam = static_cast(theClusterParamBase); // Initialize return variables. - Q_f_X = Q_l_X = 0; - Q_f_Y = Q_l_Y = 0; + q_f_X = q_l_X = 0; + q_f_Y = q_l_Y = 0; // Obtain boundaries in index units int xmin = theClusterParam.theCluster->minPixelRow(); @@ -421,15 +421,15 @@ void PixelCPEFast::collect_edge_charges(ClusterParam& theClusterParamBase, //!< // // X projection if (pixel.x == xmin) - Q_f_X += pix_adc; + q_f_X += pix_adc; if (pixel.x == xmax) - Q_l_X += pix_adc; + q_l_X += pix_adc; // // Y projection if (pixel.y == ymin) - Q_f_Y += pix_adc; + q_f_Y += pix_adc; if (pixel.y == ymax) - Q_l_Y += pix_adc; + q_l_Y += pix_adc; } }