Skip to content

Commit

Permalink
Prototype for EventSetup data on GPUs (#77)
Browse files Browse the repository at this point in the history
Adds a prototype for dealing with EventSetup data on GPUs. The prototype is applied to the ES data used by Raw2Cluster (cabling map etc, gains) and RecHits (CPE).

Now it is the `ESProduct` who owns the GPU memory. Currently each of the affected `ESProducts` have a method `getGPUProductAsync(cuda::stream_t<>&)` that will allocate the memory on the current GPU device and transfer the data there asynchronously, if the data is not there yet. The functionality of bookkeeping which devices have the data already, and necessary synchronization between multiple threads (only one thread may do the transfer per device) are abstracted to a helper template in `HeterogeneousCore/CUDACore/interface/CUDAESProduct.h`.

Technical changes:
  - `EventSetup`-based implementation for GPU cabling map, gains, etc
  - add support for multiple devices to `PixelCPEFast`
  - abstract the `EeventSetup` GPU transfer
  - move `malloc` and transfer to the lambda
  - move `cudaFree` outside of the `nullptr` check
  - move files (back) to the plusing directory
  - rename `siPixelDigisHeterogeneous` to `siPixelClustersHeterogeneous`
  • Loading branch information
makortel authored and fwyzard committed Jun 14, 2018
1 parent 655e4ed commit 955d9df
Show file tree
Hide file tree
Showing 44 changed files with 803 additions and 415 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#ifndef CalibTracker_Records_SiPixelGainCalibrationForHLTGPURcd_h
#define CalibTracker_Records_SiPixelGainCalibrationForHLTGPURcd_h

#include "FWCore/Framework/interface/EventSetupRecordImplementation.h"
#include "FWCore/Framework/interface/DependentRecordImplementation.h"

#include "CondFormats/DataRecord/interface/SiPixelGainCalibrationForHLTRcd.h"
#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h"

#include "boost/mpl/vector.hpp"

class SiPixelGainCalibrationForHLTGPURcd : public edm::eventsetup::DependentRecordImplementation<SiPixelGainCalibrationForHLTGPURcd, boost::mpl::vector<SiPixelGainCalibrationForHLTRcd, TrackerDigiGeometryRecord> > {};

#endif
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#include "CalibTracker/Records/interface/SiPixelGainCalibrationForHLTGPURcd.h"
#include "FWCore/Framework/interface/eventsetuprecord_registration_macro.h"
#include "FWCore/Utilities/interface/typelookup.h"

EVENTSETUP_RECORD_REG(SiPixelGainCalibrationForHLTGPURcd);
2 changes: 2 additions & 0 deletions CalibTracker/SiPixelESProducers/BuildFile.xml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
<use name="DataFormats/SiPixelDigi"/>
<use name="CalibTracker/Records"/>
<use name="MagneticField/VolumeBasedEngine"/>
<use name="HeterogeneousCore/CUDACore"/>
<use name="boost"/>
<use name="cuda-api-wrappers"/>
<export>
<lib name="1"/>
</export>
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#ifndef CalibTracker_SiPixelESProducers_SiPixelGainCalibrationForHLTGPU_H
#define CalibTracker_SiPixelESProducers_SiPixelGainCalibrationForHLTGPU_H

#include "HeterogeneousCore/CUDACore/interface/CUDAESProduct.h"
#include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLT.h"

#include <cuda/api_wrappers.h>

class SiPixelGainCalibrationForHLT;
class SiPixelGainForHLTonGPU;
struct SiPixelGainForHLTonGPU_DecodingStructure;
class TrackerGeometry;

class SiPixelGainCalibrationForHLTGPU {
public:
explicit SiPixelGainCalibrationForHLTGPU(const SiPixelGainCalibrationForHLT& gains, const TrackerGeometry& geom);
~SiPixelGainCalibrationForHLTGPU();

const SiPixelGainForHLTonGPU *getGPUProductAsync(cuda::stream_t<>& cudaStream) const;

private:
const SiPixelGainCalibrationForHLT *gains_ = nullptr;
SiPixelGainForHLTonGPU *gainForHLTonHost_ = nullptr;
struct GPUData {
~GPUData();
SiPixelGainForHLTonGPU *gainForHLTonGPU = nullptr;
SiPixelGainForHLTonGPU_DecodingStructure *gainDataOnGPU = nullptr;
};
CUDAESProduct<GPUData> gpuData_;
};

#endif
2 changes: 2 additions & 0 deletions CalibTracker/SiPixelESProducers/plugins/BuildFile.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
<use name="Geometry/Records"/>
<use name="Geometry/TrackerGeometryBuilder"/>
<use name="CalibTracker/SiPixelESProducers"/>
<use name="HeterogeneousCore/CUDACore"/>
<use name="cuda-api-wrappers"/>
<library file="*.cc" name="CalibTrackerSiPixelESProducersPlugins">
<flags EDM_PLUGIN="1"/>
</library>
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#include "CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h"
#include "CalibTracker/Records/interface/SiPixelGainCalibrationForHLTGPURcd.h"
#include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLT.h"
#include "CondFormats/DataRecord/interface/SiPixelGainCalibrationForHLTRcd.h"
#include "FWCore/Framework/interface/ESProducer.h"
#include "FWCore/Framework/interface/EventSetup.h"
#include "FWCore/Framework/interface/ESHandle.h"
#include "FWCore/Framework/interface/ModuleFactory.h"
#include "FWCore/ParameterSet/interface/ParameterSet.h"
#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h"
#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h"

#include <memory>

class SiPixelGainCalibrationForHLTGPUESProducer: public edm::ESProducer {
public:
explicit SiPixelGainCalibrationForHLTGPUESProducer(const edm::ParameterSet& iConfig);
std::unique_ptr<SiPixelGainCalibrationForHLTGPU> produce(const SiPixelGainCalibrationForHLTGPURcd& iRecord);

static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
private:
};

SiPixelGainCalibrationForHLTGPUESProducer::SiPixelGainCalibrationForHLTGPUESProducer(const edm::ParameterSet& iConfig) {
setWhatProduced(this);
}

void SiPixelGainCalibrationForHLTGPUESProducer::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
edm::ParameterSetDescription desc;
descriptions.add("siPixelGainCalibrationForHLTGPU", desc);
}

std::unique_ptr<SiPixelGainCalibrationForHLTGPU> SiPixelGainCalibrationForHLTGPUESProducer::produce(const SiPixelGainCalibrationForHLTGPURcd& iRecord) {
edm::ESHandle<SiPixelGainCalibrationForHLT> gains;
iRecord.getRecord<SiPixelGainCalibrationForHLTRcd>().get(gains);

edm::ESHandle<TrackerGeometry> geom;
iRecord.getRecord<TrackerDigiGeometryRecord>().get(geom);

return std::make_unique<SiPixelGainCalibrationForHLTGPU>(*gains, *geom);
}

#include "FWCore/Framework/interface/MakerMacros.h"
#include "FWCore/Utilities/interface/typelookup.h"
#include "FWCore/Framework/interface/eventsetuprecord_registration_macro.h"

DEFINE_FWK_EVENTSETUP_MODULE(SiPixelGainCalibrationForHLTGPUESProducer);
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#include "CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h"
#include "FWCore/Utilities/interface/typelookup.h"

TYPELOOKUP_DATA_REG(SiPixelGainCalibrationForHLTGPU);
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
#include "CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h"
#include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLT.h"
#include "CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h"
#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h"
#include "Geometry/CommonDetUnit/interface/GeomDetType.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"

#include <cuda.h>

SiPixelGainCalibrationForHLTGPU::SiPixelGainCalibrationForHLTGPU(const SiPixelGainCalibrationForHLT& gains, const TrackerGeometry& geom):
gains_(&gains)
{
// bizzarre logic (looking for fist strip-det) don't ask
auto const & dus = geom.detUnits();
unsigned m_detectors = dus.size();
for(unsigned int i=1;i<7;++i) {
if(geom.offsetDU(GeomDetEnumerators::tkDetEnum[i]) != dus.size() &&
dus[geom.offsetDU(GeomDetEnumerators::tkDetEnum[i])]->type().isTrackerStrip()) {
if(geom.offsetDU(GeomDetEnumerators::tkDetEnum[i]) < m_detectors) m_detectors = geom.offsetDU(GeomDetEnumerators::tkDetEnum[i]);
}
}

/*
std::cout << "caching calibs for " << m_detectors << " pixel detectors of size " << gains.data().size() << std::endl;
std::cout << "sizes " << sizeof(char) << ' ' << sizeof(uint8_t) << ' ' << sizeof(SiPixelGainForHLTonGPU::DecodingStructure) << std::endl;
*/

cudaCheck(cudaMallocHost((void**) & gainForHLTonHost_, sizeof(SiPixelGainForHLTonGPU)));
//gainForHLTonHost_->v_pedestals = gainDataOnGPU_; // how to do this?

// do not read back from the (possibly write-combined) memory buffer
auto minPed = gains.getPedLow();
auto maxPed = gains.getPedHigh();
auto minGain = gains.getGainLow();
auto maxGain = gains.getGainHigh();
auto nBinsToUseForEncoding = 253;

// we will simplify later (not everything is needed....)
gainForHLTonHost_->minPed_ = minPed;
gainForHLTonHost_->maxPed_ = maxPed;
gainForHLTonHost_->minGain_= minGain;
gainForHLTonHost_->maxGain_= maxGain;

gainForHLTonHost_->numberOfRowsAveragedOver_ = 80;
gainForHLTonHost_->nBinsToUseForEncoding_ = nBinsToUseForEncoding;
gainForHLTonHost_->deadFlag_ = 255;
gainForHLTonHost_->noisyFlag_ = 254;

gainForHLTonHost_->pedPrecision = static_cast<float>(maxPed - minPed) / nBinsToUseForEncoding;
gainForHLTonHost_->gainPrecision = static_cast<float>(maxGain - minGain) / nBinsToUseForEncoding;

/*
std::cout << "precisions g " << gainForHLTonHost_->pedPrecision << ' ' << gainForHLTonHost_->gainPrecision << std::endl;
*/

// fill the index map
auto const & ind = gains.getIndexes();
/*
std::cout << ind.size() << " " << m_detectors << std::endl;
*/

for (auto i=0U; i<m_detectors; ++i) {
auto p = std::lower_bound(ind.begin(),ind.end(),dus[i]->geographicalId().rawId(),SiPixelGainCalibrationForHLT::StrictWeakOrdering());
assert (p!=ind.end() && p->detid==dus[i]->geographicalId());
assert(p->iend<=gains.data().size());
assert(p->iend>=p->ibegin);
assert(0==p->ibegin%2);
assert(0==p->iend%2);
assert(p->ibegin!=p->iend);
assert(p->ncols>0);
gainForHLTonHost_->rangeAndCols[i] = std::make_pair(SiPixelGainForHLTonGPU::Range(p->ibegin,p->iend), p->ncols);
// if (ind[i].detid!=dus[i]->geographicalId()) std::cout << ind[i].detid<<"!="<<dus[i]->geographicalId() << std::endl;
// gainForHLTonHost_->rangeAndCols[i] = std::make_pair(SiPixelGainForHLTonGPU::Range(ind[i].ibegin,ind[i].iend), ind[i].ncols);
}

}

SiPixelGainCalibrationForHLTGPU::~SiPixelGainCalibrationForHLTGPU() {
cudaCheck(cudaFreeHost(gainForHLTonHost_));
}

SiPixelGainCalibrationForHLTGPU::GPUData::~GPUData() {
cudaCheck(cudaFree(gainForHLTonGPU));
cudaCheck(cudaFree(gainDataOnGPU));
}

const SiPixelGainForHLTonGPU *SiPixelGainCalibrationForHLTGPU::getGPUProductAsync(cuda::stream_t<>& cudaStream) const {
const auto& data = gpuData_.dataForCurrentDeviceAsync(cudaStream, [this](GPUData& data, cuda::stream_t<>& stream) {
cudaCheck(cudaMalloc((void**) & data.gainForHLTonGPU, sizeof(SiPixelGainForHLTonGPU)));
cudaCheck(cudaMalloc((void**) & data.gainDataOnGPU, this->gains_->data().size())); // TODO: this could be changed to cuda::memory::device::unique_ptr<>
// gains.data().data() is used also for non-GPU code, we cannot allocate it on aligned and write-combined memory
cudaCheck(cudaMemcpyAsync(data.gainDataOnGPU, this->gains_->data().data(), this->gains_->data().size(), cudaMemcpyDefault, stream.id()));

cudaCheck(cudaMemcpyAsync(data.gainForHLTonGPU, this->gainForHLTonHost_, sizeof(SiPixelGainForHLTonGPU), cudaMemcpyDefault, stream.id()));
cudaCheck(cudaMemcpyAsync(&(data.gainForHLTonGPU->v_pedestals), &(data.gainDataOnGPU), sizeof(SiPixelGainForHLTonGPU_DecodingStructure*), cudaMemcpyDefault, stream.id()));
});
return data.gainForHLTonGPU;
}
9 changes: 2 additions & 7 deletions Configuration/StandardSequences/python/RawToDigi_cff.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
# scenarios. In this case it makes changes for Run 2.

from EventFilter.SiPixelRawToDigi.SiPixelRawToDigi_cfi import *
from EventFilter.SiPixelRawToDigi.siPixelDigisHeterogeneous_cfi import *

from EventFilter.SiStripRawToDigi.SiStripDigis_cfi import *

Expand Down Expand Up @@ -60,10 +59,6 @@
+tcdsDigis
+onlineMetaDataDigis
)
from Configuration.ProcessModifiers.gpu_cff import gpu
_RawToDigi_gpu = RawToDigi.copy()
_RawToDigi_gpu.replace(siPixelDigis, siPixelDigisHeterogeneous+siPixelDigis)
gpu.toReplaceWith(RawToDigi, _RawToDigi_gpu)

RawToDigi_noTk = cms.Sequence(L1TRawToDigi
+ecalDigis
Expand All @@ -78,10 +73,10 @@
+onlineMetaDataDigis
)

RawToDigi_pixelOnly = cms.Sequence(siPixelDigisHeterogeneous+siPixelDigis)
RawToDigi_pixelOnly = cms.Sequence(siPixelDigis)

scalersRawToDigi.scalersInputTag = 'rawDataCollector'
siPixelDigisHeterogeneous.InputLabel = 'rawDataCollector'
from Configuration.ProcessModifiers.gpu_cff import gpu
(~gpu).toModify(siPixelDigis, InputLabel = 'rawDataCollector')
#false by default anyways ecalDigis.DoRegional = False
ecalDigis.InputLabel = 'rawDataCollector'
Expand Down
12 changes: 1 addition & 11 deletions EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml
Original file line number Diff line number Diff line change
@@ -1,14 +1,4 @@
<use name="EventFilter/SiPixelRawToDigi"/>
<library file="SiPixelDigiToRaw.cc SiPixelRawToDigi.cc" name="EventFilterSiPixelRawToDigiPlugins">
<flags EDM_PLUGIN="1"/>
</library>
<library file="SiPixelRawToClusterHeterogeneous.cc SiPixelFedCablingMapGPU.cc SiPixelRawToClusterGPUKernel.cu SiPixelDigiHeterogeneousConverter.cc SiPixelClusterHeterogeneousConverter.cc" name="EventFilterSiPixelRawToDigiGPUPlugins">
<use name="cuda"/>
<use name="cuda-api-wrappers"/>
<use name="CalibTracker/SiPixelESProducers"/>
<use name="HeterogeneousCore/Producer"/>
<use name="HeterogeneousCore/Product"/>
<use name="HeterogeneousCore/CUDACore"/>
<use name="RecoLocalTracker/SiPixelClusterizer"/>
<library file="*.cc" name="EventFilterSiPixelRawToDigiPlugins">
<flags EDM_PLUGIN="1"/>
</library>
Loading

0 comments on commit 955d9df

Please sign in to comment.