Skip to content

Commit

Permalink
Various fixes and cleanup (#101)
Browse files Browse the repository at this point in the history
Fix errors found by cuda-memcheck:
  - properly initialise device memory
  - fix various cudaMemcpy calls

Remove unused debug variables and function declarations, and #ifdef some debug printouts.

Call cudaDeviceReset() before exiting, via the destructor of CUDAService. This explicitly destroys and cleans up all resources associated with the current device, and is useful to check for memory leaks with cuda-memcheck --tool memcheck --leak-check full.
  • Loading branch information
fwyzard committed Oct 19, 2020
1 parent ccc6f95 commit c8e25f4
Show file tree
Hide file tree
Showing 5 changed files with 5 additions and 24 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,7 @@ namespace pixelgpudetails {
cudaCheck(cudaMalloc((void**) & moduleStart_d, (MaxNumModules+1)*sizeof(uint32_t) ));
cudaCheck(cudaMalloc((void**) & clusInModule_d,(MaxNumModules)*sizeof(uint32_t) ));
cudaCheck(cudaMalloc((void**) & moduleId_d, (MaxNumModules)*sizeof(uint32_t) ));

cudaCheck(cudaMalloc((void**) & debug_d, MAX_WORD32_SIZE));
}


SiPixelRawToClusterGPUKernel::~SiPixelRawToClusterGPUKernel() {
// free device memory used for RawToDigi on GPU
Expand All @@ -114,7 +111,6 @@ namespace pixelgpudetails {
cudaCheck(cudaFree(clus_d));
cudaCheck(cudaFree(clusInModule_d));
cudaCheck(cudaFree(moduleId_d));
cudaCheck(cudaFree(debug_d));
}

void SiPixelRawToClusterGPUKernel::initializeWordFed(int fedId, unsigned int wordCounterGPU, const cms_uint32_t *src, unsigned int length) {
Expand Down Expand Up @@ -662,7 +658,7 @@ namespace pixelgpudetails {
cudaCheck(cudaGetLastError());

// calibrated adc
cudaCheck(cudaMemcpyAsync(adc_h, adc_d, wordCounter*sizeof(uint32_t), cudaMemcpyDefault, stream.id()));
cudaCheck(cudaMemcpyAsync(adc_h, adc_d, wordCounter*sizeof(uint16_t), cudaMemcpyDefault, stream.id()));

/*
std::cout
Expand Down Expand Up @@ -700,14 +696,12 @@ namespace pixelgpudetails {
moduleStart_d,
clusInModule_d, moduleId_d,
clus_d,
debug_d,
wordCounter
);

// clusters
cudaCheck(cudaMemcpyAsync(clus_h, clus_d, wordCounter*sizeof(uint32_t), cudaMemcpyDefault, stream.id()));


cudaStreamSynchronize(stream.id());
cudaCheck(cudaGetLastError());

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -208,9 +208,7 @@ namespace pixelgpudetails {
int32_t * clus_d;
uint32_t * clusInModule_d;
uint32_t * moduleId_d;
uint32_t * debug_d;
};


// configuration and memory buffers alocated on the GPU
struct context {
Expand All @@ -234,18 +232,6 @@ namespace pixelgpudetails {
uint32_t * debug_d;
};

// wrapper function to call RawToDigi on the GPU from host side
void RawToDigi_wrapper(context &, const SiPixelFedCablingMapGPU* cablingMapDevice,
SiPixelGainForHLTonGPU * const ped,
const uint32_t wordCounter, uint32_t *word,
const uint32_t fedCounter, uint8_t *fedId_h,
bool convertADCtoElectrons, uint32_t * pdigi_h,
uint32_t *rawIdArr_h, GPU::SimpleVector<error_obj> *error_h,
GPU::SimpleVector<error_obj> *error_h_tmp, error_obj *data_h,
uint16_t * adc_h, int32_t * clus_h,
bool useQualityInfo, bool includeErrors, bool debug,
uint32_t & nModulesActive);

// void initCablingMap();
context initDeviceMemory();
void freeMemory(context &);
Expand Down
3 changes: 1 addition & 2 deletions RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ namespace gpuClustering {
uint16_t const * adc,
uint32_t const * moduleStart,
uint32_t * clusInModule, uint32_t * moduleId,
int32_t * clus, uint32_t * debug,
int32_t * clus,
int numElements)
{
__shared__ bool go;
Expand Down Expand Up @@ -98,7 +98,6 @@ namespace gpuClustering {
if (id[i] == InvId) // not valid
continue;
assert(id[i] == me); // break; // end of module
++debug[i];
auto js = i + 1;
auto jm = jmax[k];
jmax[k] = i + 1;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ void SiPixelFedCablingMapGPUWrapper::ModulesToUnpack::fillAsync(SiPixelFedCablin
}
}

cuda::memory::async::copy(modToUnpHost.data(), modToUnpDevice.get(), modToUnpHost.size() * sizeof(unsigned char), cudaStream.id());
cuda::memory::async::copy(modToUnpDevice.get(), modToUnpHost.data(), modToUnpHost.size() * sizeof(unsigned char), cudaStream.id());
}


Expand Down
2 changes: 2 additions & 0 deletions RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHits.cu
Original file line number Diff line number Diff line change
Expand Up @@ -108,9 +108,11 @@ namespace pixelgpudetails {
for (int i=0;i<10;++i) hitsLayerStart_[i]=hitsModuleStart_[phase1PixelTopology::layerStart[i]];
hitsLayerStart_[10]=nhits;

#ifdef GPU_DEBUG
std::cout << "hit layerStart ";
for (int i=0;i<10;++i) std::cout << phase1PixelTopology::layerName[i] << ':' << hitsLayerStart_[i] << ' ';
std::cout << "end:" << hitsLayerStart_[10] << std::endl;
#endif

cudaCheck(cudaMemcpyAsync(gpu_.hitsLayerStart_d, hitsLayerStart_, (11) * sizeof(uint32_t), cudaMemcpyDefault, stream.id()));

Expand Down

0 comments on commit c8e25f4

Please sign in to comment.