Skip to content

Commit

Permalink
Merge pull request #31721 from cms-patatrack/patatrack_integration_9_…
Browse files Browse the repository at this point in the history
…N_pixel_local_reco

Patatrack integration - Pixel local reconstruction (9/N)
  • Loading branch information
cmsbuild authored Jan 11, 2021
2 parents bc13284 + 2e8e5fd commit dd931b1
Show file tree
Hide file tree
Showing 115 changed files with 6,466 additions and 148 deletions.
2 changes: 2 additions & 0 deletions CUDADataFormats/Common/BuildFile.xml
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
<iftool name="cuda">
<use name="rootcore"/>
<use name="DataFormats/Common"/>
<use name="HeterogeneousCore/CUDAUtilities"/>
<export>
<lib name="1"/>
Expand Down
7 changes: 7 additions & 0 deletions CUDADataFormats/Common/src/classes.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#ifndef CUDADataFormats_Common_src_classes_h
#define CUDADataFormats_Common_src_classes_h

#include "CUDADataFormats/Common/interface/HostProduct.h"
#include "DataFormats/Common/interface/Wrapper.h"

#endif // CUDADataFormats_Common_src_classes_h
4 changes: 4 additions & 0 deletions CUDADataFormats/Common/src/classes_def.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<lcgdict>
<class name="HostProduct<uint32_t[]>" persistent="false"/>
<class name="edm::Wrapper<HostProduct<uint32_t[]>>" persistent="false"/>
</lcgdict>
9 changes: 9 additions & 0 deletions CUDADataFormats/SiPixelCluster/BuildFile.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
<use name="cuda"/>
<use name="rootcore"/>
<use name="CUDADataFormats/Common"/>
<use name="DataFormats/Common"/>
<use name="HeterogeneousCore/CUDAUtilities"/>

<export>
<lib name="1"/>
</export>
63 changes: 63 additions & 0 deletions CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#ifndef CUDADataFormats_SiPixelCluster_interface_SiPixelClustersCUDA_h
#define CUDADataFormats_SiPixelCluster_interface_SiPixelClustersCUDA_h

#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h"

#include <cuda_runtime.h>

class SiPixelClustersCUDA {
public:
SiPixelClustersCUDA() = default;
explicit SiPixelClustersCUDA(size_t maxModules, cudaStream_t stream);
~SiPixelClustersCUDA() = default;

SiPixelClustersCUDA(const SiPixelClustersCUDA &) = delete;
SiPixelClustersCUDA &operator=(const SiPixelClustersCUDA &) = delete;
SiPixelClustersCUDA(SiPixelClustersCUDA &&) = default;
SiPixelClustersCUDA &operator=(SiPixelClustersCUDA &&) = default;

void setNClusters(uint32_t nClusters) { nClusters_h = nClusters; }

uint32_t nClusters() const { return nClusters_h; }

uint32_t *moduleStart() { return moduleStart_d.get(); }
uint32_t *clusInModule() { return clusInModule_d.get(); }
uint32_t *moduleId() { return moduleId_d.get(); }
uint32_t *clusModuleStart() { return clusModuleStart_d.get(); }

uint32_t const *moduleStart() const { return moduleStart_d.get(); }
uint32_t const *clusInModule() const { return clusInModule_d.get(); }
uint32_t const *moduleId() const { return moduleId_d.get(); }
uint32_t const *clusModuleStart() const { return clusModuleStart_d.get(); }

class DeviceConstView {
public:
__device__ __forceinline__ uint32_t moduleStart(int i) const { return __ldg(moduleStart_ + i); }
__device__ __forceinline__ uint32_t clusInModule(int i) const { return __ldg(clusInModule_ + i); }
__device__ __forceinline__ uint32_t moduleId(int i) const { return __ldg(moduleId_ + i); }
__device__ __forceinline__ uint32_t clusModuleStart(int i) const { return __ldg(clusModuleStart_ + i); }

uint32_t const *moduleStart_;
uint32_t const *clusInModule_;
uint32_t const *moduleId_;
uint32_t const *clusModuleStart_;
};

DeviceConstView *view() const { return view_d.get(); }

private:
cms::cuda::device::unique_ptr<uint32_t[]> moduleStart_d; // index of the first pixel of each module
cms::cuda::device::unique_ptr<uint32_t[]> clusInModule_d; // number of clusters found in each module
cms::cuda::device::unique_ptr<uint32_t[]> moduleId_d; // module id of each module

// originally from rechits
cms::cuda::device::unique_ptr<uint32_t[]> clusModuleStart_d; // index of the first cluster of each module

cms::cuda::device::unique_ptr<DeviceConstView> view_d; // "me" pointer

uint32_t nClusters_h = 0;
};

#endif // CUDADataFormats_SiPixelCluster_interface_SiPixelClustersCUDA_h
37 changes: 37 additions & 0 deletions CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#ifndef CUDADataFormats_SiPixelCluster_interface_gpuClusteringConstants_h
#define CUDADataFormats_SiPixelCluster_interface_gpuClusteringConstants_h

#include <cstdint>
#include <limits>

namespace pixelGPUConstants {
#ifdef GPU_SMALL_EVENTS
// kept for testing and debugging
constexpr uint32_t maxNumberOfHits = 24 * 1024;
#else
// data at pileup 50 has 18300 +/- 3500 hits; 40000 is around 6 sigma away
// tested on MC events with 55-75 pileup events
constexpr uint32_t maxNumberOfHits = 48 * 1024;
#endif
} // namespace pixelGPUConstants

namespace gpuClustering {
#ifdef GPU_SMALL_EVENTS
// kept for testing and debugging
constexpr uint32_t maxHitsInIter() { return 64; }
#else
// optimized for real data PU 50
// tested on MC events with 55-75 pileup events
constexpr uint32_t maxHitsInIter() { return 160; }
#endif
constexpr uint32_t maxHitsInModule() { return 1024; }

constexpr uint16_t maxNumModules = 2000;
constexpr int32_t maxNumClustersPerModules = maxHitsInModule();
constexpr uint32_t maxNumClusters = pixelGPUConstants::maxNumberOfHits;
constexpr uint16_t invalidModuleId = std::numeric_limits<uint16_t>::max() - 1;
static_assert(invalidModuleId > maxNumModules); // invalidModuleId must be > maxNumModules

} // namespace gpuClustering

#endif // CUDADataFormats_SiPixelCluster_interface_gpuClusteringConstants_h
19 changes: 19 additions & 0 deletions CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h"
#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h"
#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"

SiPixelClustersCUDA::SiPixelClustersCUDA(size_t maxModules, cudaStream_t stream)
: moduleStart_d(cms::cuda::make_device_unique<uint32_t[]>(maxModules + 1, stream)),
clusInModule_d(cms::cuda::make_device_unique<uint32_t[]>(maxModules, stream)),
moduleId_d(cms::cuda::make_device_unique<uint32_t[]>(maxModules, stream)),
clusModuleStart_d(cms::cuda::make_device_unique<uint32_t[]>(maxModules + 1, stream)) {
auto view = cms::cuda::make_host_unique<DeviceConstView>(stream);
view->moduleStart_ = moduleStart_d.get();
view->clusInModule_ = clusInModule_d.get();
view->moduleId_ = moduleId_d.get();
view->clusModuleStart_ = clusModuleStart_d.get();

view_d = cms::cuda::make_device_unique<DeviceConstView>(stream);
cms::cuda::copyAsync(view_d, view, stream);
}
8 changes: 8 additions & 0 deletions CUDADataFormats/SiPixelCluster/src/classes.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#ifndef CUDADataFormats_SiPixelCluster_src_classes_h
#define CUDADataFormats_SiPixelCluster_src_classes_h

#include "CUDADataFormats/Common/interface/Product.h"
#include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h"
#include "DataFormats/Common/interface/Wrapper.h"

#endif // CUDADataFormats_SiPixelCluster_src_classes_h
4 changes: 4 additions & 0 deletions CUDADataFormats/SiPixelCluster/src/classes_def.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<lcgdict>
<class name="cms::cuda::Product<SiPixelClustersCUDA>" persistent="false"/>
<class name="edm::Wrapper<cms::cuda::Product<SiPixelClustersCUDA>>" persistent="false"/>
</lcgdict>
10 changes: 10 additions & 0 deletions CUDADataFormats/SiPixelDigi/BuildFile.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<use name="cuda"/>
<use name="rootcore"/>
<use name="CUDADataFormats/Common"/>
<use name="DataFormats/Common"/>
<use name="DataFormats/SiPixelRawData"/>
<use name="HeterogeneousCore/CUDAUtilities"/>

<export>
<lib name="1"/>
</export>
42 changes: 42 additions & 0 deletions CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#ifndef CUDADataFormats_SiPixelDigi_interface_SiPixelDigiErrorsCUDA_h
#define CUDADataFormats_SiPixelDigi_interface_SiPixelDigiErrorsCUDA_h

#include <cuda_runtime.h>

#include "DataFormats/SiPixelRawData/interface/SiPixelErrorCompact.h"
#include "DataFormats/SiPixelRawData/interface/SiPixelFormatterErrors.h"
#include "HeterogeneousCore/CUDAUtilities/interface/SimpleVector.h"
#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"

class SiPixelDigiErrorsCUDA {
public:
using SiPixelErrorCompactVector = cms::cuda::SimpleVector<SiPixelErrorCompact>;

SiPixelDigiErrorsCUDA() = default;
explicit SiPixelDigiErrorsCUDA(size_t maxFedWords, SiPixelFormatterErrors errors, cudaStream_t stream);
~SiPixelDigiErrorsCUDA() = default;

SiPixelDigiErrorsCUDA(const SiPixelDigiErrorsCUDA&) = delete;
SiPixelDigiErrorsCUDA& operator=(const SiPixelDigiErrorsCUDA&) = delete;
SiPixelDigiErrorsCUDA(SiPixelDigiErrorsCUDA&&) = default;
SiPixelDigiErrorsCUDA& operator=(SiPixelDigiErrorsCUDA&&) = default;

const SiPixelFormatterErrors& formatterErrors() const { return formatterErrors_h; }

SiPixelErrorCompactVector* error() { return error_d.get(); }
SiPixelErrorCompactVector const* error() const { return error_d.get(); }

using HostDataError = std::pair<SiPixelErrorCompactVector, cms::cuda::host::unique_ptr<SiPixelErrorCompact[]>>;
HostDataError dataErrorToHostAsync(cudaStream_t stream) const;

void copyErrorToHostAsync(cudaStream_t stream);

private:
cms::cuda::device::unique_ptr<SiPixelErrorCompact[]> data_d;
cms::cuda::device::unique_ptr<SiPixelErrorCompactVector> error_d;
cms::cuda::host::unique_ptr<SiPixelErrorCompactVector> error_h;
SiPixelFormatterErrors formatterErrors_h;
};

#endif // CUDADataFormats_SiPixelDigi_interface_SiPixelDigiErrorsCUDA_h
85 changes: 85 additions & 0 deletions CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
#ifndef CUDADataFormats_SiPixelDigi_interface_SiPixelDigisCUDA_h
#define CUDADataFormats_SiPixelDigi_interface_SiPixelDigisCUDA_h

#include <cuda_runtime.h>

#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h"

class SiPixelDigisCUDA {
public:
SiPixelDigisCUDA() = default;
explicit SiPixelDigisCUDA(size_t maxFedWords, cudaStream_t stream);
~SiPixelDigisCUDA() = default;

SiPixelDigisCUDA(const SiPixelDigisCUDA &) = delete;
SiPixelDigisCUDA &operator=(const SiPixelDigisCUDA &) = delete;
SiPixelDigisCUDA(SiPixelDigisCUDA &&) = default;
SiPixelDigisCUDA &operator=(SiPixelDigisCUDA &&) = default;

void setNModulesDigis(uint32_t nModules, uint32_t nDigis) {
nModules_h = nModules;
nDigis_h = nDigis;
}

uint32_t nModules() const { return nModules_h; }
uint32_t nDigis() const { return nDigis_h; }

uint16_t *xx() { return xx_d.get(); }
uint16_t *yy() { return yy_d.get(); }
uint16_t *adc() { return adc_d.get(); }
uint16_t *moduleInd() { return moduleInd_d.get(); }
int32_t *clus() { return clus_d.get(); }
uint32_t *pdigi() { return pdigi_d.get(); }
uint32_t *rawIdArr() { return rawIdArr_d.get(); }

uint16_t const *xx() const { return xx_d.get(); }
uint16_t const *yy() const { return yy_d.get(); }
uint16_t const *adc() const { return adc_d.get(); }
uint16_t const *moduleInd() const { return moduleInd_d.get(); }
int32_t const *clus() const { return clus_d.get(); }
uint32_t const *pdigi() const { return pdigi_d.get(); }
uint32_t const *rawIdArr() const { return rawIdArr_d.get(); }

cms::cuda::host::unique_ptr<uint16_t[]> adcToHostAsync(cudaStream_t stream) const;
cms::cuda::host::unique_ptr<int32_t[]> clusToHostAsync(cudaStream_t stream) const;
cms::cuda::host::unique_ptr<uint32_t[]> pdigiToHostAsync(cudaStream_t stream) const;
cms::cuda::host::unique_ptr<uint32_t[]> rawIdArrToHostAsync(cudaStream_t stream) const;

class DeviceConstView {
public:
__device__ __forceinline__ uint16_t xx(int i) const { return __ldg(xx_ + i); }
__device__ __forceinline__ uint16_t yy(int i) const { return __ldg(yy_ + i); }
__device__ __forceinline__ uint16_t adc(int i) const { return __ldg(adc_ + i); }
__device__ __forceinline__ uint16_t moduleInd(int i) const { return __ldg(moduleInd_ + i); }
__device__ __forceinline__ int32_t clus(int i) const { return __ldg(clus_ + i); }

uint16_t const *xx_;
uint16_t const *yy_;
uint16_t const *adc_;
uint16_t const *moduleInd_;
int32_t const *clus_;
};

const DeviceConstView *view() const { return view_d.get(); }

private:
// These are consumed by downstream device code
cms::cuda::device::unique_ptr<uint16_t[]> xx_d; // local coordinates of each pixel
cms::cuda::device::unique_ptr<uint16_t[]> yy_d; //
cms::cuda::device::unique_ptr<uint16_t[]> adc_d; // ADC of each pixel
cms::cuda::device::unique_ptr<uint16_t[]> moduleInd_d; // module id of each pixel
cms::cuda::device::unique_ptr<int32_t[]> clus_d; // cluster id of each pixel
cms::cuda::device::unique_ptr<DeviceConstView> view_d; // "me" pointer

// These are for CPU output; should we (eventually) place them to a
// separate product?
cms::cuda::device::unique_ptr<uint32_t[]> pdigi_d; // packed digi (row, col, adc) of each pixel
cms::cuda::device::unique_ptr<uint32_t[]> rawIdArr_d; // DetId of each pixel

uint32_t nModules_h = 0;
uint32_t nDigis_h = 0;
};

#endif // CUDADataFormats_SiPixelDigi_interface_SiPixelDigisCUDA_h
40 changes: 40 additions & 0 deletions CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#include <cassert>

#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h"
#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h"
#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/memsetAsync.h"

SiPixelDigiErrorsCUDA::SiPixelDigiErrorsCUDA(size_t maxFedWords, SiPixelFormatterErrors errors, cudaStream_t stream)
: data_d(cms::cuda::make_device_unique<SiPixelErrorCompact[]>(maxFedWords, stream)),
error_d(cms::cuda::make_device_unique<SiPixelErrorCompactVector>(stream)),
error_h(cms::cuda::make_host_unique<SiPixelErrorCompactVector>(stream)),
formatterErrors_h(std::move(errors)) {
cms::cuda::memsetAsync(data_d, 0x00, maxFedWords, stream);

cms::cuda::make_SimpleVector(error_h.get(), maxFedWords, data_d.get());
assert(error_h->empty());
assert(error_h->capacity() == static_cast<int>(maxFedWords));

cms::cuda::copyAsync(error_d, error_h, stream);
}

void SiPixelDigiErrorsCUDA::copyErrorToHostAsync(cudaStream_t stream) {
cms::cuda::copyAsync(error_h, error_d, stream);
}

SiPixelDigiErrorsCUDA::HostDataError SiPixelDigiErrorsCUDA::dataErrorToHostAsync(cudaStream_t stream) const {
// On one hand size() could be sufficient. On the other hand, if
// someone copies the SimpleVector<>, (s)he might expect the data
// buffer to actually have space for capacity() elements.
auto data = cms::cuda::make_host_unique<SiPixelErrorCompact[]>(error_h->capacity(), stream);

// but transfer only the required amount
if (not error_h->empty()) {
cms::cuda::copyAsync(data, data_d, error_h->size(), stream);
}
auto err = *error_h;
err.set_data(data.get());
return HostDataError(err, std::move(data));
}
46 changes: 46 additions & 0 deletions CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h"
#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h"
#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"

SiPixelDigisCUDA::SiPixelDigisCUDA(size_t maxFedWords, cudaStream_t stream)
: xx_d(cms::cuda::make_device_unique<uint16_t[]>(maxFedWords, stream)),
yy_d(cms::cuda::make_device_unique<uint16_t[]>(maxFedWords, stream)),
adc_d(cms::cuda::make_device_unique<uint16_t[]>(maxFedWords, stream)),
moduleInd_d(cms::cuda::make_device_unique<uint16_t[]>(maxFedWords, stream)),
clus_d(cms::cuda::make_device_unique<int32_t[]>(maxFedWords, stream)),
view_d(cms::cuda::make_device_unique<DeviceConstView>(stream)),
pdigi_d(cms::cuda::make_device_unique<uint32_t[]>(maxFedWords, stream)),
rawIdArr_d(cms::cuda::make_device_unique<uint32_t[]>(maxFedWords, stream)) {
auto view = cms::cuda::make_host_unique<DeviceConstView>(stream);
view->xx_ = xx_d.get();
view->yy_ = yy_d.get();
view->adc_ = adc_d.get();
view->moduleInd_ = moduleInd_d.get();
view->clus_ = clus_d.get();
cms::cuda::copyAsync(view_d, view, stream);
}

cms::cuda::host::unique_ptr<uint16_t[]> SiPixelDigisCUDA::adcToHostAsync(cudaStream_t stream) const {
auto ret = cms::cuda::make_host_unique<uint16_t[]>(nDigis(), stream);
cms::cuda::copyAsync(ret, adc_d, nDigis(), stream);
return ret;
}

cms::cuda::host::unique_ptr<int32_t[]> SiPixelDigisCUDA::clusToHostAsync(cudaStream_t stream) const {
auto ret = cms::cuda::make_host_unique<int32_t[]>(nDigis(), stream);
cms::cuda::copyAsync(ret, clus_d, nDigis(), stream);
return ret;
}

cms::cuda::host::unique_ptr<uint32_t[]> SiPixelDigisCUDA::pdigiToHostAsync(cudaStream_t stream) const {
auto ret = cms::cuda::make_host_unique<uint32_t[]>(nDigis(), stream);
cms::cuda::copyAsync(ret, pdigi_d, nDigis(), stream);
return ret;
}

cms::cuda::host::unique_ptr<uint32_t[]> SiPixelDigisCUDA::rawIdArrToHostAsync(cudaStream_t stream) const {
auto ret = cms::cuda::make_host_unique<uint32_t[]>(nDigis(), stream);
cms::cuda::copyAsync(ret, rawIdArr_d, nDigis(), stream);
return ret;
}
Loading

0 comments on commit dd931b1

Please sign in to comment.