Skip to content

Commit

Permalink
Improve CUDAService log messages (#153)
Browse files Browse the repository at this point in the history
Make the message from the CUDAService more descriptive,and add a test file to run the CUDAService.
  • Loading branch information
fwyzard authored Sep 2, 2018
1 parent 46ea6cc commit 305d260
Show file tree
Hide file tree
Showing 2 changed files with 104 additions and 5 deletions.
96 changes: 91 additions & 5 deletions HeterogeneousCore/CUDAServices/src/CUDAService.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,51 @@ void setCudaLimit(cudaLimit limit, const char* name, size_t request) {
}
}

constexpr
unsigned int getCudaCoresPerSM(unsigned int major, unsigned int minor) {
switch (major * 10 + minor) {
// Fermi architecture
case 20: // SM 2.0: GF100 class
return 32;
case 21: // SM 2.1: GF10x class
return 48;

// Kepler architecture
case 30: // SM 3.0: GK10x class
case 32: // SM 3.2: GK10x class
case 35: // SM 3.5: GK11x class
case 37: // SM 3.7: GK21x class
return 192;

// Maxwell architecture
case 50: // SM 5.0: GM10x class
case 52: // SM 5.2: GM20x class
case 53: // SM 5.3: GM20x class
return 128;

// Pascal architecture
case 60: // SM 6.0: GP100 class
return 64;
case 61: // SM 6.1: GP10x class
case 62: // SM 6.2: GP10x class
return 128;

// Volta architecture
case 70: // SM 7.0: GV100 class
case 72: // SM 7.2: GV11b class
return 64;

// Turing architecture
case 80: // SM 8.0: TU10x class
return 64;

// unknown architecture
default:
return 0;
}
}


CUDAService::CUDAService(edm::ParameterSet const& config, edm::ActivityRegistry& iRegistry) {
bool configEnabled = config.getUntrackedParameter<bool>("enabled");
if (not configEnabled) {
Expand All @@ -40,7 +85,7 @@ CUDAService::CUDAService(edm::ParameterSet const& config, edm::ActivityRegistry&

auto status = cudaGetDeviceCount(&numberOfDevices_);
if (cudaSuccess != status) {
edm::LogWarning("CUDAService") << "Failed to initialize the CUDA runtime.\n" << ".\n" << "Disabling the CUDAService.";
edm::LogWarning("CUDAService") << "Failed to initialize the CUDA runtime.\n" << "Disabling the CUDAService.";
return;
}
edm::LogInfo log("CUDAService");
Expand All @@ -66,9 +111,25 @@ CUDAService::CUDAService(edm::ParameterSet const& config, edm::ActivityRegistry&
cudaDeviceProp properties;
cudaCheck(cudaGetDeviceProperties(&properties, i));
log << "CUDA device " << i << ": " << properties.name << '\n';
log << " compute capability: " << properties.major << "." << properties.minor << '\n';

// compute capabilities
log << " compute capability: " << properties.major << "." << properties.minor << " (sm_" << properties.major << properties.minor << ")\n";
computeCapabilities_.emplace_back(properties.major, properties.minor);
log << " streaming multiprocessors: " << std::setw(13) << properties.multiProcessorCount << '\n';
log << " CUDA cores: " << std::setw(28) << properties.multiProcessorCount * getCudaCoresPerSM(properties.major, properties.minor ) << '\n';
log << " single to double performance: " << std::setw(8) << properties.singleToDoublePrecisionPerfRatio << ":1\n";

// compute mode
static constexpr const char* computeModeDescription[] = {
"default (shared)", // cudaComputeModeDefault
"exclusive (single thread)", // cudaComputeModeExclusive
"prohibited", // cudaComputeModeProhibited
"exclusive (single process)", // cudaComputeModeExclusiveProcess
"unknown"
};
log << " compute mode:" << std::right << std::setw(27) << computeModeDescription[std::min(properties.computeMode, (int) std::size(computeModeDescription) - 1)] << '\n';

// TODO if a device is in exclusive use, skip it and remove it from the list, instead of failing with abort()
cudaCheck(cudaSetDevice(i));
cudaCheck(cudaSetDeviceFlags(cudaDeviceScheduleAuto | cudaDeviceMapHost));

Expand All @@ -77,6 +138,31 @@ CUDAService::CUDAService(edm::ParameterSet const& config, edm::ActivityRegistry&
size_t freeMemory, totalMemory;
cudaCheck(cudaMemGetInfo(&freeMemory, &totalMemory));
log << " memory: " << std::setw(6) << freeMemory / (1 << 20) << " MB free / " << std::setw(6) << totalMemory / (1 << 20) << " MB total\n";
log << " constant memory: " << std::setw(6) << properties.totalConstMem / (1 << 10) << " kB\n";
log << " L2 cache size: " << std::setw(6) << properties.l2CacheSize / (1 << 10) << " kB\n";

// L1 cache behaviour
static constexpr const char* l1CacheModeDescription[] = {
"unknown",
"local memory",
"global memory",
"local and global memory"
};
int l1CacheMode = properties.localL1CacheSupported + 2 * properties.globalL1CacheSupported;
log << " L1 cache mode:" << std::setw(26) << std::right << l1CacheModeDescription[l1CacheMode] << '\n';
log << '\n';

log << "Other capabilities\n";
log << " " << (properties.canMapHostMemory ? "can" : "cannot") << " map host memory into the CUDA address space for use with cudaHostAlloc()/cudaHostGetDevicePointer()\n";
log << " " << (properties.pageableMemoryAccess ? "supports" : "does not support") << " coherently accessing pageable memory without calling cudaHostRegister() on it\n";
log << " " << (properties.pageableMemoryAccessUsesHostPageTables ? "can" : "cannot") << " access pageable memory via the host's page tables\n";
log << " " << (properties.canUseHostPointerForRegisteredMem ? "can" : "cannot") << " access host registered memory at the same virtual address as the host\n";
log << " " << (properties.unifiedAddressing ? "shares" : "does not share") << " a unified address space with the host\n";
log << " " << (properties.managedMemory ? "supports" : "does not support") << " allocating managed memory on this system\n";
log << " " << (properties.concurrentManagedAccess ? "can" : "cannot") << " coherently access managed memory concurrently with the host\n";
log << " " << "the host " << (properties.directManagedMemAccessFromHost ? "can" : "cannot") << " directly access managed memory on the device without migration\n";
log << " " << (properties.cooperativeLaunch ? "support" : "does not support") << " launching cooperative kernels via cudaLaunchCooperativeKernel()\n";
log << " " << (properties.cooperativeMultiDeviceLaunch ? "supports" : "does not support") << " launching cooperative kernels via cudaLaunchCooperativeKernelMultiDevice()\n";
log << '\n';

// set and read the CUDA device flags.
Expand Down Expand Up @@ -145,11 +231,11 @@ CUDAService::CUDAService(edm::ParameterSet const& config, edm::ActivityRegistry&
size_t value;
log << "CUDA limits\n";
cudaCheck(cudaDeviceGetLimit(&value, cudaLimitPrintfFifoSize));
log << " printf buffer size: " << std::setw(10) << value << '\n';
log << " printf buffer size: " << std::setw(10) << value / (1 << 20) << " MB\n";
cudaCheck(cudaDeviceGetLimit(&value, cudaLimitStackSize));
log << " stack size: " << std::setw(10) << value << '\n';
log << " stack size: " << std::setw(10) << value / (1 << 10) << " kB\n";
cudaCheck(cudaDeviceGetLimit(&value, cudaLimitMallocHeapSize));
log << " malloc heap size: " << std::setw(10) << value << '\n';
log << " malloc heap size: " << std::setw(10) << value / (1 << 20) << " MB\n";
if ((properties.major > 3) or (properties.major == 3 and properties.minor >= 5)) {
cudaCheck(cudaDeviceGetLimit(&value, cudaLimitDevRuntimeSyncDepth));
log << " runtime sync depth: " << std::setw(10) << value << '\n';
Expand Down
13 changes: 13 additions & 0 deletions HeterogeneousCore/CUDAServices/test/testCUDAService.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import FWCore.ParameterSet.Config as cms

process = cms.Process( "TEST" )

process.load('FWCore.MessageService.MessageLogger_cfi')
process.load('HeterogeneousCore.CUDAServices.CUDAService_cfi')
process.MessageLogger.categories.append("CUDAService")

process.source = cms.Source("EmptySource")

process.maxEvents = cms.untracked.PSet(
input = cms.untracked.int32( 0 )
)

0 comments on commit 305d260

Please sign in to comment.