diff --git a/include/camp/defines.hpp b/include/camp/defines.hpp index 9ddac43..1d5e1e1 100644 --- a/include/camp/defines.hpp +++ b/include/camp/defines.hpp @@ -18,7 +18,6 @@ For details about use and distribution, please read LICENSE and NOTICE from // include cuda header if configured, even if not in use #ifdef CAMP_ENABLE_CUDA -#include #include #endif @@ -182,13 +181,6 @@ CAMP_DLL_EXPORT void throw_re(const char *s); #ifdef CAMP_ENABLE_CUDA -#define campCuErrchk(ans) ::camp::cuAssert((ans), #ans, __FILE__, __LINE__) - -CAMP_DLL_EXPORT CUresult cuAssert(CUresult code, - const char *call, - const char *file, - int line); - #define campCudaErrchk(ans) ::camp::cudaAssert((ans), #ans, __FILE__, __LINE__) CAMP_DLL_EXPORT cudaError_t cudaAssert(cudaError_t code, diff --git a/include/camp/resource/cuda.hpp b/include/camp/resource/cuda.hpp index 5a9f53a..29cf278 100644 --- a/include/camp/resource/cuda.hpp +++ b/include/camp/resource/cuda.hpp @@ -18,8 +18,6 @@ For details about use and distribution, please read LICENSE and NOTICE from #ifdef CAMP_ENABLE_CUDA #include -#include -#include namespace camp { @@ -81,90 +79,34 @@ namespace resources class Cuda { - static int get_current_device() + static cudaStream_t get_a_stream(int num) { - int dev = -1; - campCudaErrchk(cudaGetDevice(&dev)); - return dev; - } - - static void setup_current_device() { - static std::vector device_setup([] { - int count = -1; - campCudaErrchk(cudaGetDeviceCount(&count)); - return count; - }()); - - int dev = get_current_device(); - - std::call_once(device_setup[dev], [&] { - size_t free = 0, total = 0; - campCudaErrchk(cudaMemGetInfo(&free, &total)); - }); - } - - static int get_device_from_stream(cudaStream_t stream) - { - if (stream == 0) { - // If the current device has been set but not used cuCtx API - // functions will return CUDA_ERROR_CONTEXT_IS_DESTROYED - setup_current_device(); - } - - CUcontext stream_ctx; - campCuErrchk(cuStreamGetCtx(stream, &stream_ctx)); - campCuErrchk(cuCtxPushCurrent(stream_ctx)); - int dev = -1; - campCuErrchk(cuCtxGetDevice(&dev)); - campCuErrchk(cuCtxPopCurrent(&stream_ctx)); - return dev; - } - - static cudaStream_t get_a_stream(int num, int dev) - { - static constexpr int num_streams = 16; - struct Streams { - cudaStream_t streams[num_streams] = {}; - int previous = 0; - - std::once_flag onceFlag; - std::mutex mtx; - }; - - static std::vector devices([] { - int count = -1; - campCudaErrchk(cudaGetDeviceCount(&count)); - return count; - }()); + static cudaStream_t streams[16] = {}; + static int previous = 0; - if (dev < 0) { - dev = get_current_device(); - } + static std::once_flag m_onceFlag; + static std::mutex m_mtx; - std::call_once(devices[dev].onceFlag, [&] { - auto d{device_guard(dev)}; - if (devices[dev].streams[0] == nullptr) { - for (auto &s : devices[dev].streams) { + std::call_once(m_onceFlag, [] { + if (streams[0] == nullptr) { + for (auto &s : streams) { campCudaErrchk(cudaStreamCreate(&s)); } } }); if (num < 0) { - std::lock_guard guard(devices[dev].mtx); - devices[dev].previous = (devices[dev].previous + 1) % num_streams; - num = devices[dev].previous; - } else { - num = num % num_streams; + m_mtx.lock(); + previous = (previous + 1) % 16; + m_mtx.unlock(); + return streams[previous]; } - return devices[dev].streams[num]; + return streams[num % 16]; } // Private from-stream constructor - Cuda(cudaStream_t s, int dev) - : stream(s), device((dev >= 0) ? dev : get_device_from_stream(s)) - { } + Cuda(cudaStream_t s, int dev = 0) : stream(s), device(dev) {} MemoryAccess get_access_type(void *p) { cudaPointerAttributes a; @@ -187,19 +129,20 @@ namespace resources // related: https://stackoverflow.com/questions/64523302/cuda-missing-return-statement-at-end-of-non-void-function-in-constexpr-if-fun return MemoryAccess::Unknown; } - public: - explicit Cuda(int group = -1, int dev = get_current_device()) - : stream(get_a_stream(group, dev)), device(dev) - { } - - /// Create a resource from a custom stream. - /// If device is specified it must match the stream. If device is - /// unspecified, we will get it from the stream. - /// This may be called before main if device is specified as no calls to - /// the runtime are made in this case. + Cuda(int group = -1, int dev = 0) + : stream(get_a_stream(group)), device(dev) + { + } + + /// Create a resource from a custom stream + /// The device specified must match the stream, if none is specified the + /// currently selected device is used. static Cuda CudaFromStream(cudaStream_t s, int dev = -1) { + if (dev < 0) { + campCudaErrchk(cudaGetDevice(&dev)); + } return Cuda(s, dev); } @@ -215,7 +158,7 @@ namespace resources campCudaErrchk(cudaStreamCreate(&s)); #endif return s; - }(), get_current_device()); + }()); return c; } @@ -275,7 +218,7 @@ namespace resources void deallocate(void *p, MemoryAccess ma = MemoryAccess::Unknown) { auto d{device_guard(device)}; - if (ma == MemoryAccess::Unknown) { + if(ma == MemoryAccess::Unknown) { ma = get_access_type(p); } switch (ma) { @@ -292,7 +235,6 @@ namespace resources break; case MemoryAccess::Unknown: ::camp::throw_re("Unknown memory access type, cannot free"); - break; } } void memcpy(void *dst, const void *src, size_t size) diff --git a/include/camp/resource/hip.hpp b/include/camp/resource/hip.hpp index 5343fc7..c55328a 100644 --- a/include/camp/resource/hip.hpp +++ b/include/camp/resource/hip.hpp @@ -11,15 +11,11 @@ For details about use and distribution, please read LICENSE and NOTICE from #ifndef __CAMP_HIP_HPP #define __CAMP_HIP_HPP -#include "camp/defines.hpp" #include "camp/resource/event.hpp" #include "camp/resource/platform.hpp" #ifdef CAMP_ENABLE_HIP - #include -#include -#include namespace camp { @@ -79,63 +75,34 @@ namespace resources class Hip { - static int get_current_device() + static hipStream_t get_a_stream(int num) { - int dev = -1; - campHipErrchk(hipGetDevice(&dev)); - return dev; - } + static hipStream_t streams[16] = {}; + static int previous = 0; - static int get_device_from_stream(hipStream_t stream) - { - return hipGetStreamDeviceId(stream); - } + static std::once_flag m_onceFlag; + static std::mutex m_mtx; - static hipStream_t get_a_stream(int num, int dev) - { - static constexpr int num_streams = 16; - struct Streams { - hipStream_t streams[num_streams] = {}; - int previous = 0; - - std::once_flag onceFlag; - std::mutex mtx; - }; - - static std::vector devices([] { - int count = -1; - campHipErrchk(hipGetDeviceCount(&count)); - return count; - }()); - - if (dev < 0) { - dev = get_current_device(); - } - - std::call_once(devices[dev].onceFlag, [=] { - auto d{device_guard(dev)}; - if (devices[dev].streams[0] == nullptr) { - for (auto &s : devices[dev].streams) { + std::call_once(m_onceFlag, [] { + if (streams[0] == nullptr) { + for (auto &s : streams) { campHipErrchk(hipStreamCreate(&s)); } } }); if (num < 0) { - std::lock_guard guard(devices[dev].mtx); - devices[dev].previous = (devices[dev].previous + 1) % num_streams; - num = devices[dev].previous; - } else { - num = num % num_streams; + m_mtx.lock(); + previous = (previous + 1) % 16; + m_mtx.unlock(); + return streams[previous]; } - return devices[dev].streams[num]; + return streams[num % 16]; } // Private from-stream constructor - Hip(hipStream_t s, int dev) - : stream(s), device((dev >= 0) ? dev : get_device_from_stream(s)) - { } + Hip(hipStream_t s, int dev = 0) : stream(s), device(dev) {} MemoryAccess get_access_type(void *p) { @@ -159,17 +126,19 @@ namespace resources } public: - explicit Hip(int group = -1, int dev = get_current_device()) - : stream(get_a_stream(group, dev)), device(dev) - { } - - /// Create a resource from a custom stream. - /// If device is specified it must match the stream. If device is - /// unspecified, we will get it from the stream. - /// This may be called before main if device is specified as no calls to - /// the runtime are made in this case. + Hip(int group = -1, int dev = 0) + : stream(get_a_stream(group)), device(dev) + { + } + + /// Create a resource from a custom stream + /// The device specified must match the stream, if none is specified the + /// currently selected device is used. static Hip HipFromStream(hipStream_t s, int dev = -1) { + if (dev < 0) { + campHipErrchk(hipGetDevice(&dev)); + } return Hip(s, dev); } @@ -185,7 +154,7 @@ namespace resources campHipErrchk(hipStreamCreate(&s)); #endif return s; - }(), get_current_device()); + }()); return h; } diff --git a/include/camp/resource/host.hpp b/include/camp/resource/host.hpp index 33fd187..eccdf27 100644 --- a/include/camp/resource/host.hpp +++ b/include/camp/resource/host.hpp @@ -35,7 +35,7 @@ namespace resources class Host { public: - explicit Host(int /* group */ = -1) {} + Host(int /* group */ = -1) {} // Methods Platform get_platform() { return Platform::host; } diff --git a/include/camp/resource/omp_target.hpp b/include/camp/resource/omp_target.hpp index 7b99708..d40ef99 100644 --- a/include/camp/resource/omp_target.hpp +++ b/include/camp/resource/omp_target.hpp @@ -83,7 +83,7 @@ namespace resources } } public: - explicit Omp(int group = -1, int device = omp_get_default_device()) + Omp(int group = -1, int device = omp_get_default_device()) : addr(get_addr(group)), dev(device) { } diff --git a/include/camp/resource/sycl.hpp b/include/camp/resource/sycl.hpp index 84b65ae..6823d49 100644 --- a/include/camp/resource/sycl.hpp +++ b/include/camp/resource/sycl.hpp @@ -19,7 +19,6 @@ For details about use and distribution, please read LICENSE and NOTICE from #include #include #include -#include using namespace cl; namespace camp @@ -54,11 +53,9 @@ namespace resources static sycl::context *contextInUse = NULL; static std::map> queueMap; - static int previous = 0; static std::mutex m_mtx; - - std::lock_guard guard(m_mtx); + m_mtx.lock(); // User passed a context, use it if (useContext) { @@ -105,37 +102,33 @@ namespace resources sycl::queue(*contextInUse, gpuSelector, propertyList)}; } } + m_mtx.unlock(); + static int previous = 0; + + static std::once_flag m_onceFlag; if (num < 0) { + m_mtx.lock(); previous = (previous + 1) % 16; - num = previous; - } else { - num = num % 16; + m_mtx.unlock(); + return &queueMap[contextInUse][previous]; } - return &queueMap[contextInUse][num]; + return &queueMap[contextInUse][num % 16]; } - explicit Sycl(sycl::queue* queue) : qu(queue) {} - public: - explicit Sycl(int group = -1) + Sycl(int group = -1) { sycl::context temp; qu = get_a_queue(temp, group, false); } - explicit Sycl(sycl::context &syclContext, int group = -1) + Sycl(sycl::context &syclContext, int group = -1) : qu(get_a_queue(syclContext, group, true)) { } - /// Create a resource from a custom queue. - static Sycl SyclFromQueue(sycl::queue* queue) - { - return Sycl(queue); - } - // Methods Platform get_platform() { return Platform::sycl; } static Sycl get_default() diff --git a/src/errors.cpp b/src/errors.cpp index b06981e..2a90483 100644 --- a/src/errors.cpp +++ b/src/errors.cpp @@ -22,30 +22,6 @@ void throw_re(const char *s) { throw std::runtime_error(s); } #ifdef CAMP_ENABLE_CUDA -CUresult cuAssert(CUresult code, - const char *call, - const char *file, - int line) -{ - if (code != CUDA_SUCCESS && code != CUDA_ERROR_NOT_READY) { - const char* error_string = nullptr; - if (cuGetErrorString(code, &error_string) != CUDA_SUCCESS) { - error_string = "Unknown Error code"; - } - std::string msg; - msg += "campCuErrchk("; - msg += call; - msg += ") "; - msg += error_string; - msg += " "; - msg += file; - msg += ":"; - msg += std::to_string(line); - throw std::runtime_error(msg); - } - return code; -} - cudaError_t cudaAssert(cudaError_t code, const char *call, const char *file, diff --git a/test/resource.cpp b/test/resource.cpp index 417fcd5..f32824a 100644 --- a/test/resource.cpp +++ b/test/resource.cpp @@ -93,69 +93,6 @@ TEST(CampResource, StreamSelect) cudaStreamDestroy(stream2); } -TEST(CampResource, MultipleDevices) -{ - int cur_dev = 0; - cudaGetDevice(&cur_dev); - - int num_devices = 0; - cudaGetDeviceCount(&num_devices); - - for (int d = 0; d < num_devices; ++d) { - cudaSetDevice(d); - - Cuda c1{Cuda::CudaFromStream(0)}; - Cuda c2{Cuda::CudaFromStream(0, d)}; - Cuda c3{}; - Cuda c4{0}; - Cuda c5{0, d}; - - EXPECT_EQ(c1.get_device(), d); - EXPECT_EQ(c2.get_device(), d); - EXPECT_EQ(c3.get_device(), d); - EXPECT_EQ(c4.get_device(), d); - EXPECT_EQ(c5.get_device(), d); - - EXPECT_EQ(c1.get_stream(), cudaStream_t{0}); - EXPECT_EQ(c2.get_stream(), cudaStream_t{0}); - EXPECT_EQ(c4.get_stream(), c5.get_stream()); - - const int N = 5; - int* d_array1 = c1.allocate(N); - c1.deallocate(d_array1); - } - - cudaSetDevice(cur_dev); -} - -TEST(CampResource, DifferentDevice) -{ - int cur_dev = 0; - cudaGetDevice(&cur_dev); - - int num_devices = 0; - cudaGetDeviceCount(&num_devices); - - if (num_devices > 1) { - int diff_dev = (cur_dev + 1) % num_devices; - - Cuda c1{Cuda::CudaFromStream(0, diff_dev)}; - Cuda c2{0, diff_dev}; - - EXPECT_EQ(c1.get_device(), diff_dev); - EXPECT_EQ(c2.get_device(), diff_dev); - - const int N = 5; - int* d_array1 = c1.allocate(N); - c1.deallocate(d_array1); - } - - int check_dev = -1; - cudaGetDevice(&check_dev); - - EXPECT_EQ(check_dev, cur_dev); -} - TEST(CampResource, Get) { Resource dev_host{Host()}; @@ -241,69 +178,6 @@ TEST(CampResource, StreamSelect) hipStreamDestroy(stream2); } -TEST(CampResource, MultipleDevices) -{ - int cur_dev = 0; - hipGetDevice(&cur_dev); - - int num_devices = 0; - hipGetDeviceCount(&num_devices); - - for (int d = 0; d < num_devices; ++d) { - hipSetDevice(d); - - Hip c1{Hip::HipFromStream(0)}; - Hip c2{Hip::HipFromStream(0, d)}; - Hip c3{}; - Hip c4{0}; - Hip c5{0, d}; - - EXPECT_EQ(c1.get_device(), d); - EXPECT_EQ(c2.get_device(), d); - EXPECT_EQ(c3.get_device(), d); - EXPECT_EQ(c4.get_device(), d); - EXPECT_EQ(c5.get_device(), d); - - EXPECT_EQ(c1.get_stream(), hipStream_t{0}); - EXPECT_EQ(c2.get_stream(), hipStream_t{0}); - EXPECT_EQ(c4.get_stream(), c5.get_stream()); - - const int N = 5; - int* d_array1 = c1.allocate(N); - c1.deallocate(d_array1); - } - - hipSetDevice(cur_dev); -} - -TEST(CampResource, DifferentDevice) -{ - int cur_dev = 0; - hipGetDevice(&cur_dev); - - int num_devices = 0; - hipGetDeviceCount(&num_devices); - - if (num_devices > 1) { - int diff_dev = (cur_dev + 1) % num_devices; - - Hip c1{Hip::HipFromStream(0, diff_dev)}; - Hip c2{0, diff_dev}; - - EXPECT_EQ(c1.get_device(), diff_dev); - EXPECT_EQ(c2.get_device(), diff_dev); - - const int N = 5; - int* d_array1 = c1.allocate(N); - c1.deallocate(d_array1); - } - - int check_dev = -1; - hipGetDevice(&check_dev); - - EXPECT_EQ(check_dev, cur_dev); -} - TEST(CampResource, Get) { Resource dev_host{Host()};