diff --git a/HeterogeneousCore/CUDAServices/src/CUDAService.cc b/HeterogeneousCore/CUDAServices/src/CUDAService.cc index 9db5d89de1f83..b2c412ee3793e 100644 --- a/HeterogeneousCore/CUDAServices/src/CUDAService.cc +++ b/HeterogeneousCore/CUDAServices/src/CUDAService.cc @@ -286,7 +286,7 @@ CUDAService::CUDAService(edm::ParameterSet const& config, edm::ActivityRegistry& auto maxBin = allocator.getUntrackedParameter("maxBin"); size_t maxCachedBytes = allocator.getUntrackedParameter("maxCachedBytes"); auto maxCachedFraction = allocator.getUntrackedParameter("maxCachedFraction"); - auto debug = allocator.getUntrackedParameter("debug"); + auto debug = allocator.getUntrackedParameter("debug"); size_t minCachedBytes = std::numeric_limits::max(); int currentDevice; diff --git a/HeterogeneousCore/CUDAServices/src/CachingDeviceAllocator.h b/HeterogeneousCore/CUDAServices/src/CachingDeviceAllocator.h index eb0b6686ef8d5..33bc1bbb175fd 100644 --- a/HeterogeneousCore/CUDAServices/src/CachingDeviceAllocator.h +++ b/HeterogeneousCore/CUDAServices/src/CachingDeviceAllocator.h @@ -418,8 +418,8 @@ struct CachingDeviceAllocator cached_bytes[device].free -= search_key.bytes; cached_bytes[device].live += search_key.bytes; - if (debug) _CubLog("\tDevice %d reused cached block at %p (%lld bytes) for stream %lld (previously associated with stream %lld).\n", - device, search_key.d_ptr, (long long) search_key.bytes, (long long) search_key.associated_stream, (long long) block_itr->associated_stream); + if (debug) _CubLog("\tDevice %d reused cached block at %p (%lld bytes) for stream %lld, event %lld (previously associated with stream %lld, event %lld).\n", + device, search_key.d_ptr, (long long) search_key.bytes, (long long) search_key.associated_stream, (long long) search_key.ready_event, (long long) block_itr->associated_stream, (long long) block_itr->ready_event); cached_blocks.erase(block_itr); @@ -500,8 +500,8 @@ struct CachingDeviceAllocator cached_bytes[device].live += search_key.bytes; mutex.Unlock(); - if (debug) _CubLog("\tDevice %d allocated new device block at %p (%lld bytes associated with stream %lld).\n", - device, search_key.d_ptr, (long long) search_key.bytes, (long long) search_key.associated_stream); + if (debug) _CubLog("\tDevice %d allocated new device block at %p (%lld bytes associated with stream %lld, event %lld).\n", + device, search_key.d_ptr, (long long) search_key.bytes, (long long) search_key.associated_stream, (long long) search_key.ready_event); // Attempt to revert back to previous device if necessary if ((entrypoint_device != INVALID_DEVICE_ORDINAL) && (entrypoint_device != device)) @@ -579,8 +579,8 @@ struct CachingDeviceAllocator cached_blocks.insert(search_key); cached_bytes[device].free += search_key.bytes; - if (debug) _CubLog("\tDevice %d returned %lld bytes from associated stream %lld.\n\t\t %lld available blocks cached (%lld bytes), %lld live blocks outstanding. (%lld bytes)\n", - device, (long long) search_key.bytes, (long long) search_key.associated_stream, (long long) cached_blocks.size(), + if (debug) _CubLog("\tDevice %d returned %lld bytes at %p from associated stream %lld, event %lld.\n\t\t %lld available blocks cached (%lld bytes), %lld live blocks outstanding. (%lld bytes)\n", + device, (long long) search_key.bytes, d_ptr, (long long) search_key.associated_stream, (long long) search_key.ready_event, (long long) cached_blocks.size(), (long long) cached_bytes[device].free, (long long) live_blocks.size(), (long long) cached_bytes[device].live); } } @@ -607,8 +607,8 @@ struct CachingDeviceAllocator if (CubDebug(error = cudaFree(d_ptr))) return error; if (CubDebug(error = cudaEventDestroy(search_key.ready_event))) return error; - if (debug) _CubLog("\tDevice %d freed %lld bytes from associated stream %lld.\n\t\t %lld available blocks cached (%lld bytes), %lld live blocks (%lld bytes) outstanding.\n", - device, (long long) search_key.bytes, (long long) search_key.associated_stream, (long long) cached_blocks.size(), (long long) cached_bytes[device].free, (long long) live_blocks.size(), (long long) cached_bytes[device].live); + if (debug) _CubLog("\tDevice %d freed %lld bytes at %p from associated stream %lld, event %lld.\n\t\t %lld available blocks cached (%lld bytes), %lld live blocks (%lld bytes) outstanding.\n", + device, (long long) search_key.bytes, d_ptr, (long long) search_key.associated_stream, (long long) search_key.ready_event, (long long) cached_blocks.size(), (long long) cached_bytes[device].free, (long long) live_blocks.size(), (long long) cached_bytes[device].live); } // Reset device diff --git a/HeterogeneousCore/CUDAServices/src/CachingHostAllocator.h b/HeterogeneousCore/CUDAServices/src/CachingHostAllocator.h index 215e7be96a4d6..43ae2f42429a2 100644 --- a/HeterogeneousCore/CUDAServices/src/CachingHostAllocator.h +++ b/HeterogeneousCore/CUDAServices/src/CachingHostAllocator.h @@ -407,8 +407,8 @@ struct CachingHostAllocator cached_bytes.free -= search_key.bytes; cached_bytes.live += search_key.bytes; - if (debug) _CubLog("\tHost reused cached block at %p (%lld bytes) for stream %lld on device %lld (previously associated with stream %lld).\n", - search_key.d_ptr, (long long) search_key.bytes, (long long) search_key.associated_stream, (long long) search_key.device, (long long) block_itr->associated_stream); + if (debug) _CubLog("\tHost reused cached block at %p (%lld bytes) for stream %lld, event %lld on device %lld (previously associated with stream %lld, event %lld).\n", + search_key.d_ptr, (long long) search_key.bytes, (long long) search_key.associated_stream, (long long) search_key.ready_event, (long long) search_key.device, (long long) block_itr->associated_stream, (long long) block_itr->ready_event); cached_blocks.erase(block_itr); @@ -482,8 +482,8 @@ struct CachingHostAllocator cached_bytes.live += search_key.bytes; mutex.Unlock(); - if (debug) _CubLog("\tHost allocated new host block at %p (%lld bytes associated with stream %lld on device %lld).\n", - search_key.d_ptr, (long long) search_key.bytes, (long long) search_key.associated_stream, (long long) search_key.device); + if (debug) _CubLog("\tHost allocated new host block at %p (%lld bytes associated with stream %lld, event %lld on device %lld).\n", + search_key.d_ptr, (long long) search_key.bytes, (long long) search_key.associated_stream, (long long) search_key.ready_event, (long long) search_key.device); } // Copy host pointer to output parameter @@ -529,8 +529,8 @@ struct CachingHostAllocator cached_blocks.insert(search_key); cached_bytes.free += search_key.bytes; - if (debug) _CubLog("\tHost returned %lld bytes from associated stream %lld on device %lld.\n\t\t %lld available blocks cached (%lld bytes), %lld live blocks outstanding. (%lld bytes)\n", - (long long) search_key.bytes, (long long) search_key.associated_stream, (long long) search_key.device, (long long) cached_blocks.size(), + if (debug) _CubLog("\tHost returned %lld bytes from associated stream %lld, event %lld on device %lld.\n\t\t %lld available blocks cached (%lld bytes), %lld live blocks outstanding. (%lld bytes)\n", + (long long) search_key.bytes, (long long) search_key.associated_stream, (long long) search_key.ready_event, (long long) search_key.device, (long long) cached_blocks.size(), (long long) cached_bytes.free, (long long) live_blocks.size(), (long long) cached_bytes.live); } } @@ -554,8 +554,8 @@ struct CachingHostAllocator if (CubDebug(error = cudaFreeHost(d_ptr))) return error; if (CubDebug(error = cudaEventDestroy(search_key.ready_event))) return error; - if (debug) _CubLog("\tHost freed %lld bytes from associated stream %lld on device %lld.\n\t\t %lld available blocks cached (%lld bytes), %lld live blocks (%lld bytes) outstanding.\n", - (long long) search_key.bytes, (long long) search_key.associated_stream, (long long) search_key.device, (long long) cached_blocks.size(), (long long) cached_bytes.free, (long long) live_blocks.size(), (long long) cached_bytes.live); + if (debug) _CubLog("\tHost freed %lld bytes from associated stream %lld, event %lld on device %lld.\n\t\t %lld available blocks cached (%lld bytes), %lld live blocks (%lld bytes) outstanding.\n", + (long long) search_key.bytes, (long long) search_key.associated_stream, (long long) search_key.ready_event, (long long) search_key.device, (long long) cached_blocks.size(), (long long) cached_bytes.free, (long long) live_blocks.size(), (long long) cached_bytes.live); } // Reset device diff --git a/HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h b/HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h index 4930307a89567..e9fe7aba2087b 100644 --- a/HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h +++ b/HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h @@ -2,21 +2,35 @@ #define HeterogeneousCore_CUDAUtilities_cudaCheck_h #include +#include #include #include +namespace { + + inline + void printCudaErrorMessage(const char* file, int line, const char* cmd, const char* error, const char* message) { + std::ostringstream out; + out << "\n"; + out << file << ", line " << line << ":\n"; + out << "cudaCheck(" << cmd << ");\n"; + out << error << ": " << message << "\n"; + std::cerr << out.rdbuf() << std::endl; + } + +} + inline bool cudaCheck_(const char* file, int line, const char* cmd, CUresult result) { - //std::cerr << file << ", line " << line << ": " << cmd << std::endl; - if (result == CUDA_SUCCESS) + if (__builtin_expect(result == CUDA_SUCCESS, true)) return true; const char* error; const char* message; cuGetErrorName(result, &error); cuGetErrorString(result, &message); - std::cerr << file << ", line " << line << ": " << error << ": " << message << std::endl; + printCudaErrorMessage(file, line, cmd, error, message); abort(); return false; } @@ -24,13 +38,12 @@ bool cudaCheck_(const char* file, int line, const char* cmd, CUresult result) inline bool cudaCheck_(const char* file, int line, const char* cmd, cudaError_t result) { - //std::cerr << file << ", line " << line << ": " << cmd << std::endl; - if (result == cudaSuccess) + if (__builtin_expect(result == cudaSuccess, true)) return true; const char* error = cudaGetErrorName(result); const char* message = cudaGetErrorString(result); - std::cerr << file << ", line " << line << ": " << error << ": " << message << std::endl; + printCudaErrorMessage(file, line, cmd, error, message); abort(); return false; }