Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simplify SiPixelFedCablingMapGPU SoA #301

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,19 @@ namespace pixelgpudetails {
constexpr unsigned int MAX_LINK = 48; // maximum links/channels for Phase 1
constexpr unsigned int MAX_ROC = 8;
constexpr unsigned int MAX_SIZE = MAX_FED * MAX_LINK * MAX_ROC;
constexpr unsigned int MAX_SIZE_BYTE_INT = MAX_SIZE * sizeof(unsigned int);
constexpr unsigned int MAX_SIZE_BYTE_BOOL = MAX_SIZE * sizeof(unsigned char);
}

// TODO: since this has more information than just cabling map, maybe we should invent a better name?
struct SiPixelFedCablingMapGPU {
unsigned int fed[pixelgpudetails::MAX_SIZE] alignas(128);
unsigned int link[pixelgpudetails::MAX_SIZE] alignas(128);
unsigned int roc[pixelgpudetails::MAX_SIZE] alignas(128);
unsigned int RawId[pixelgpudetails::MAX_SIZE] alignas(128);
unsigned int rocInDet[pixelgpudetails::MAX_SIZE] alignas(128);
unsigned int moduleId[pixelgpudetails::MAX_SIZE] alignas(128);
unsigned char badRocs[pixelgpudetails::MAX_SIZE] alignas(128);
unsigned int size = 0;
unsigned int * fed = nullptr;
unsigned int * link = nullptr;
unsigned int * roc = nullptr;
unsigned int * RawId = nullptr;
unsigned int * rocInDet = nullptr;
unsigned int * moduleId = nullptr;
unsigned char * badRocs = nullptr;
};

#endif
Original file line number Diff line number Diff line change
Expand Up @@ -33,21 +33,15 @@ class SiPixelFedCablingMapGPUWrapper {

private:
const SiPixelFedCablingMap *cablingMap_;
std::vector<unsigned int, CUDAHostAllocator<unsigned int>> fedMap;
std::vector<unsigned int, CUDAHostAllocator<unsigned int>> linkMap;
std::vector<unsigned int, CUDAHostAllocator<unsigned int>> rocMap;
std::vector<unsigned int, CUDAHostAllocator<unsigned int>> RawId;
std::vector<unsigned int, CUDAHostAllocator<unsigned int>> rocInDet;
std::vector<unsigned int, CUDAHostAllocator<unsigned int>> moduleId;
std::vector<unsigned char, CUDAHostAllocator<unsigned char>> badRocs;
std::vector<unsigned char, CUDAHostAllocator<unsigned char>> modToUnpDefault;
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

modToUnpDefault_ ?

unsigned int size;
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

size_ ?

bool hasQuality_;

SiPixelFedCablingMapGPU *cablingMapHost = nullptr; // pointer to struct in CPU

struct GPUData {
~GPUData();
SiPixelFedCablingMapGPU *cablingMapHost = nullptr; // internal pointers are to GPU, struct itself is on CPU
SiPixelFedCablingMapGPU *cablingMapDevice = nullptr; // same internal pointers as above, struct itself is on GPU
SiPixelFedCablingMapGPU *cablingMapDevice = nullptr; // pointer to struct in GPU
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

last comment not relevant to this PR, but rather to CUDAESProduct in general :-)

The pattern seems to be:

  • start with a class/struct for the actual data on th GPU
  Payload *cablingMapHost = nullptr; // pointer to struct in CPU
  • define a wrapper
  struct PayloadWrapper {
    ~PayloadWrapper();
    Payload *payload = nullptr; // pointer to struct in GPU
  };
  • add a CUDAESProduct data mamber:
  CUDAESProduct<PayloadWrapper> payload_;
  • produce it for the gpu like this
Payload const* getGPUProductAsync(cuda::stream_t<>& cudaStream) const {
  const auto& data = payload_.dataForCurrentDeviceAsync(cudaStream, [this](PayloadWrapper& data, cuda::stream_t<>& stream) {
      // allocate
      cudaCheck(cudaMalloc(&data.payload, sizeof(Payload)));

      // transfer
      cudaCheck(cudaMemcpyAsync(data.payload, this->cablingMapHost, sizeof(Payload), cudaMemcpyDefault, stream.id()));
  });
  return data.payload;
}

Would it make sense to encapsulate more of the common part into CUDAESProduct ?

And/or to drop the PayloadWrapper in favour of a unique_ptr, possibly with a custom destructor ?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm all for encapsulating patterns, but I need to think this for a while. I made an issue #336 of it to remind.

};
CUDAESProduct<GPUData> gpuData_;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,11 @@ SiPixelFedCablingMapGPUWrapper::SiPixelFedCablingMapGPUWrapper(SiPixelFedCabling
TrackerGeometry const& trackerGeom,
SiPixelQuality const *badPixelInfo):
cablingMap_(&cablingMap),
fedMap(pixelgpudetails::MAX_SIZE), linkMap(pixelgpudetails::MAX_SIZE), rocMap(pixelgpudetails::MAX_SIZE),
RawId(pixelgpudetails::MAX_SIZE), rocInDet(pixelgpudetails::MAX_SIZE), moduleId(pixelgpudetails::MAX_SIZE),
badRocs(pixelgpudetails::MAX_SIZE), modToUnpDefault(pixelgpudetails::MAX_SIZE),
modToUnpDefault(pixelgpudetails::MAX_SIZE),
hasQuality_(badPixelInfo != nullptr)
{
cudaCheck(cudaMallocHost(&cablingMapHost, sizeof(SiPixelFedCablingMapGPU)));

std::vector<unsigned int> const& fedIds = cablingMap.fedIds();
std::unique_ptr<SiPixelFedCablingTree> const& cabling = cablingMap.cablingTree();
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not relevant for this PR, but wouldn't it be simpler to use a "dumb" pointer (SiPixelFedCablingTree const *) instead of a const referent to a unique_ptr ?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd say it would be clearer to drop const& and take the unique_ptr by value as cablingMap.cablingTree() returns the unique_ptr by value

std::unique_ptr<SiPixelFedCablingTree> cablingTree() const;

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same for fedIds ?
cablingMap.fedIds() return an std::vector by value, so we could drop the const& there as well, and let the compiler move or even optimise it away


Expand All @@ -41,21 +41,21 @@ SiPixelFedCablingMapGPUWrapper::SiPixelFedCablingMapGPUWrapper(SiPixelFedCabling
for (unsigned int roc = 1; roc <= pixelgpudetails::MAX_ROC; roc++) {
path = {fed, link, roc};
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is there a reason why at line 39 we use

for (unsigned int fed = startFed; fed <= endFed; fed++) {

instead of

for (unsigned int fed: fedIds) {

?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know if it can happen, but if fedIds vector does not contain all the values between fedIds.front() and fedIds.back(), the result of those two is different.

const sipixelobjects::PixelROC* pixelRoc = cabling->findItem(path);
fedMap[index] = fed;
linkMap[index] = link;
rocMap[index] = roc;
cablingMapHost->fed[index] = fed;
cablingMapHost->link[index] = link;
cablingMapHost->roc[index] = roc;
if (pixelRoc != nullptr) {
RawId[index] = pixelRoc->rawId();
rocInDet[index] = pixelRoc->idInDetUnit();
cablingMapHost->RawId[index] = pixelRoc->rawId();
cablingMapHost->rocInDet[index] = pixelRoc->idInDetUnit();
modToUnpDefault[index] = false;
if (badPixelInfo != nullptr)
badRocs[index] = badPixelInfo->IsRocBad(pixelRoc->rawId(), pixelRoc->idInDetUnit());
cablingMapHost->badRocs[index] = badPixelInfo->IsRocBad(pixelRoc->rawId(), pixelRoc->idInDetUnit());
else
badRocs[index] = false;
cablingMapHost->badRocs[index] = false;
} else { // store some dummy number
RawId[index] = 9999;
rocInDet[index] = 9999;
badRocs[index] = true;
cablingMapHost->RawId[index] = 9999;
cablingMapHost->rocInDet[index] = 9999;
cablingMapHost->badRocs[index] = true;
modToUnpDefault[index] = true;
}
index++;
Expand All @@ -72,58 +72,44 @@ SiPixelFedCablingMapGPUWrapper::SiPixelFedCablingMapGPUWrapper(SiPixelFedCabling
// idinLnk varies between 1 to 8

for (int i = 1; i < index; i++) {
if (RawId[i] == 9999) {
moduleId[i] = 9999;
if (cablingMapHost->RawId[i] == 9999) {
cablingMapHost->moduleId[i] = 9999;
} else {
/*
std::cout << RawId[i] << std::endl;
std::cout << cablingMapHost->RawId[i] << std::endl;
*/
auto gdet = trackerGeom.idToDetUnit(RawId[i]);
auto gdet = trackerGeom.idToDetUnit(cablingMapHost->RawId[i]);
if (!gdet) {
LogDebug("SiPixelFedCablingMapGPU") << " Not found: " << RawId[i] << std::endl;
LogDebug("SiPixelFedCablingMapGPU") << " Not found: " << cablingMapHost->RawId[i] << std::endl;
continue;
}
moduleId[i] = gdet->index();
cablingMapHost->moduleId[i] = gdet->index();
}
LogDebug("SiPixelFedCablingMapGPU") << "----------------------------------------------------------------------------" << std::endl;
LogDebug("SiPixelFedCablingMapGPU") << i << std::setw(20) << fedMap[i] << std::setw(20) << linkMap[i] << std::setw(20) << rocMap[i] << std::endl;
LogDebug("SiPixelFedCablingMapGPU") << i << std::setw(20) << RawId[i] << std::setw(20) << rocInDet[i] << std::setw(20) << moduleId[i] << std::endl;
LogDebug("SiPixelFedCablingMapGPU") << i << std::setw(20) << (bool)badRocs[i] << std::setw(20) << std::endl;
LogDebug("SiPixelFedCablingMapGPU") << i << std::setw(20) << cablingMapHost->fed[i] << std::setw(20) << cablingMapHost->link[i] << std::setw(20) << cablingMapHost->roc[i] << std::endl;
LogDebug("SiPixelFedCablingMapGPU") << i << std::setw(20) << cablingMapHost->RawId[i] << std::setw(20) << cablingMapHost->rocInDet[i] << std::setw(20) << cablingMapHost->moduleId[i] << std::endl;
LogDebug("SiPixelFedCablingMapGPU") << i << std::setw(20) << (bool)cablingMapHost->badRocs[i] << std::setw(20) << std::endl;
LogDebug("SiPixelFedCablingMapGPU") << "----------------------------------------------------------------------------" << std::endl;

}

size = index-1;
cablingMapHost->size = index-1;
}


SiPixelFedCablingMapGPUWrapper::~SiPixelFedCablingMapGPUWrapper() {}
SiPixelFedCablingMapGPUWrapper::~SiPixelFedCablingMapGPUWrapper() {
cudaCheck(cudaFreeHost(cablingMapHost));
}


const SiPixelFedCablingMapGPU *SiPixelFedCablingMapGPUWrapper::getGPUProductAsync(cuda::stream_t<>& cudaStream) const {
const auto& data = gpuData_.dataForCurrentDeviceAsync(cudaStream, [this](GPUData& data, cuda::stream_t<>& stream) {
// allocate
cudaCheck(cudaMallocHost((void**) & data.cablingMapHost, sizeof(SiPixelFedCablingMapGPU)));
cudaCheck(cudaMalloc((void**) & data.cablingMapDevice, sizeof(SiPixelFedCablingMapGPU)));
cudaCheck(cudaMalloc((void**) & data.cablingMapHost->fed, pixelgpudetails::MAX_SIZE_BYTE_INT));
cudaCheck(cudaMalloc((void**) & data.cablingMapHost->link, pixelgpudetails::MAX_SIZE_BYTE_INT));
cudaCheck(cudaMalloc((void**) & data.cablingMapHost->roc, pixelgpudetails::MAX_SIZE_BYTE_INT));
cudaCheck(cudaMalloc((void**) & data.cablingMapHost->RawId, pixelgpudetails::MAX_SIZE_BYTE_INT));
cudaCheck(cudaMalloc((void**) & data.cablingMapHost->rocInDet, pixelgpudetails::MAX_SIZE_BYTE_INT));
cudaCheck(cudaMalloc((void**) & data.cablingMapHost->moduleId, pixelgpudetails::MAX_SIZE_BYTE_INT));
cudaCheck(cudaMalloc((void**) & data.cablingMapHost->badRocs, pixelgpudetails::MAX_SIZE_BYTE_BOOL));
cudaCheck(cudaMalloc(&data.cablingMapDevice, sizeof(SiPixelFedCablingMapGPU)));

// transfer
data.cablingMapHost->size = this->size;
cudaCheck(cudaMemcpyAsync(data.cablingMapHost->fed, this->fedMap.data(), this->fedMap.size() * sizeof(unsigned int), cudaMemcpyDefault, stream.id()));
cudaCheck(cudaMemcpyAsync(data.cablingMapHost->link, this->linkMap.data(), this->linkMap.size() * sizeof(unsigned int), cudaMemcpyDefault, stream.id()));
cudaCheck(cudaMemcpyAsync(data.cablingMapHost->roc, this->rocMap.data(), this->rocMap.size() * sizeof(unsigned int), cudaMemcpyDefault, stream.id()));
cudaCheck(cudaMemcpyAsync(data.cablingMapHost->RawId, this->RawId.data(), this->RawId.size() * sizeof(unsigned int), cudaMemcpyDefault, stream.id()));
cudaCheck(cudaMemcpyAsync(data.cablingMapHost->rocInDet, this->rocInDet.data(), this->rocInDet.size() * sizeof(unsigned int), cudaMemcpyDefault, stream.id()));
cudaCheck(cudaMemcpyAsync(data.cablingMapHost->moduleId, this->moduleId.data(), this->moduleId.size() * sizeof(unsigned int), cudaMemcpyDefault, stream.id()));
cudaCheck(cudaMemcpyAsync(data.cablingMapHost->badRocs, this->badRocs.data(), this->badRocs.size() * sizeof(unsigned char), cudaMemcpyDefault, stream.id()));
cudaCheck(cudaMemcpyAsync(data.cablingMapDevice, data.cablingMapHost, sizeof(SiPixelFedCablingMapGPU), cudaMemcpyDefault, stream.id()));
});
cudaCheck(cudaMemcpyAsync(data.cablingMapDevice, this->cablingMapHost, sizeof(SiPixelFedCablingMapGPU), cudaMemcpyDefault, stream.id()));
});
return data.cablingMapDevice;
}

Expand Down Expand Up @@ -170,16 +156,6 @@ cudautils::device::unique_ptr<unsigned char[]> SiPixelFedCablingMapGPUWrapper::g


SiPixelFedCablingMapGPUWrapper::GPUData::~GPUData() {
if(cablingMapHost != nullptr) {
cudaCheck(cudaFree(cablingMapHost->fed));
cudaCheck(cudaFree(cablingMapHost->link));
cudaCheck(cudaFree(cablingMapHost->roc));
cudaCheck(cudaFree(cablingMapHost->RawId));
cudaCheck(cudaFree(cablingMapHost->rocInDet));
cudaCheck(cudaFree(cablingMapHost->moduleId));
cudaCheck(cudaFree(cablingMapHost->badRocs));
cudaCheck(cudaFreeHost(cablingMapHost));
}
cudaCheck(cudaFree(cablingMapDevice));
}

Expand Down