Skip to content

Commit

Permalink
Various fixes and cleanup (#87)
Browse files Browse the repository at this point in the history
  - replace `exclusive_scan` with `memset` + `inclusive_scan` to avoid an invalid read
  - fix memory sizes in allocations and copies
  - add a missing stream synchronize
  - set `recordWatcherUpdatedSinceLastTransfer_` to avoid spurious copies
  • Loading branch information
makortel authored and fwyzard committed Dec 29, 2020
1 parent 7501972 commit 59c32aa
Showing 1 changed file with 2 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
namespace pixelgpudetails {

SiPixelRawToClusterGPUKernel::SiPixelRawToClusterGPUKernel() {
int WSIZE = pixelgpudetails::MAX_FED * pixelgpudetails::MAX_WORD * sizeof(unsigned int);
int WSIZE = pixelgpudetails::MAX_FED * pixelgpudetails::MAX_WORD;
cudaMallocHost(&word, sizeof(unsigned int)*WSIZE);
cudaMallocHost(&fedId_h, sizeof(unsigned char)*WSIZE);

Expand Down Expand Up @@ -680,8 +680,8 @@ namespace pixelgpudetails {

// std::cout << "found " << nModulesActive << " Modules active" << std::endl;

// TODO: I suspect we need a cudaStreamSynchronize before using nModules below
// In order to avoid the cudaStreamSynchronize, create a new kernel which launches countModules and findClus.
cudaStreamSynchronize(stream.id());

threadsPerBlock = 256;
blocks = nModulesActive;
Expand Down

0 comments on commit 59c32aa

Please sign in to comment.