Skip to content

Commit

Permalink
Merge pull request #131 from MrBurmark/bugfix/burmark1/revert_detect_…
Browse files Browse the repository at this point in the history
…current_gpu_until_config_issues_can_be_fixed
  • Loading branch information
trws authored May 20, 2023
2 parents 00aa208 + bd403a8 commit 1744f66
Show file tree
Hide file tree
Showing 8 changed files with 66 additions and 320 deletions.
8 changes: 0 additions & 8 deletions include/camp/defines.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ For details about use and distribution, please read LICENSE and NOTICE from

// include cuda header if configured, even if not in use
#ifdef CAMP_ENABLE_CUDA
#include <cuda.h>
#include <cuda_runtime.h>
#endif

Expand Down Expand Up @@ -182,13 +181,6 @@ CAMP_DLL_EXPORT void throw_re(const char *s);

#ifdef CAMP_ENABLE_CUDA

#define campCuErrchk(ans) ::camp::cuAssert((ans), #ans, __FILE__, __LINE__)

CAMP_DLL_EXPORT CUresult cuAssert(CUresult code,
const char *call,
const char *file,
int line);

#define campCudaErrchk(ans) ::camp::cudaAssert((ans), #ans, __FILE__, __LINE__)

CAMP_DLL_EXPORT cudaError_t cudaAssert(cudaError_t code,
Expand Down
112 changes: 27 additions & 85 deletions include/camp/resource/cuda.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@ For details about use and distribution, please read LICENSE and NOTICE from
#ifdef CAMP_ENABLE_CUDA

#include <cuda_runtime.h>
#include <mutex>
#include <vector>

namespace camp
{
Expand Down Expand Up @@ -81,90 +79,34 @@ namespace resources

class Cuda
{
static int get_current_device()
static cudaStream_t get_a_stream(int num)
{
int dev = -1;
campCudaErrchk(cudaGetDevice(&dev));
return dev;
}

static void setup_current_device() {
static std::vector<std::once_flag> device_setup([] {
int count = -1;
campCudaErrchk(cudaGetDeviceCount(&count));
return count;
}());

int dev = get_current_device();

std::call_once(device_setup[dev], [&] {
size_t free = 0, total = 0;
campCudaErrchk(cudaMemGetInfo(&free, &total));
});
}

static int get_device_from_stream(cudaStream_t stream)
{
if (stream == 0) {
// If the current device has been set but not used cuCtx API
// functions will return CUDA_ERROR_CONTEXT_IS_DESTROYED
setup_current_device();
}

CUcontext stream_ctx;
campCuErrchk(cuStreamGetCtx(stream, &stream_ctx));
campCuErrchk(cuCtxPushCurrent(stream_ctx));
int dev = -1;
campCuErrchk(cuCtxGetDevice(&dev));
campCuErrchk(cuCtxPopCurrent(&stream_ctx));
return dev;
}

static cudaStream_t get_a_stream(int num, int dev)
{
static constexpr int num_streams = 16;
struct Streams {
cudaStream_t streams[num_streams] = {};
int previous = 0;

std::once_flag onceFlag;
std::mutex mtx;
};

static std::vector<Streams> devices([] {
int count = -1;
campCudaErrchk(cudaGetDeviceCount(&count));
return count;
}());
static cudaStream_t streams[16] = {};
static int previous = 0;

if (dev < 0) {
dev = get_current_device();
}
static std::once_flag m_onceFlag;
static std::mutex m_mtx;

std::call_once(devices[dev].onceFlag, [&] {
auto d{device_guard(dev)};
if (devices[dev].streams[0] == nullptr) {
for (auto &s : devices[dev].streams) {
std::call_once(m_onceFlag, [] {
if (streams[0] == nullptr) {
for (auto &s : streams) {
campCudaErrchk(cudaStreamCreate(&s));
}
}
});

if (num < 0) {
std::lock_guard<std::mutex> guard(devices[dev].mtx);
devices[dev].previous = (devices[dev].previous + 1) % num_streams;
num = devices[dev].previous;
} else {
num = num % num_streams;
m_mtx.lock();
previous = (previous + 1) % 16;
m_mtx.unlock();
return streams[previous];
}

return devices[dev].streams[num];
return streams[num % 16];
}

// Private from-stream constructor
Cuda(cudaStream_t s, int dev)
: stream(s), device((dev >= 0) ? dev : get_device_from_stream(s))
{ }
Cuda(cudaStream_t s, int dev = 0) : stream(s), device(dev) {}

MemoryAccess get_access_type(void *p) {
cudaPointerAttributes a;
Expand All @@ -187,19 +129,20 @@ namespace resources
// related: https://stackoverflow.com/questions/64523302/cuda-missing-return-statement-at-end-of-non-void-function-in-constexpr-if-fun
return MemoryAccess::Unknown;
}

public:
explicit Cuda(int group = -1, int dev = get_current_device())
: stream(get_a_stream(group, dev)), device(dev)
{ }

/// Create a resource from a custom stream.
/// If device is specified it must match the stream. If device is
/// unspecified, we will get it from the stream.
/// This may be called before main if device is specified as no calls to
/// the runtime are made in this case.
Cuda(int group = -1, int dev = 0)
: stream(get_a_stream(group)), device(dev)
{
}

/// Create a resource from a custom stream
/// The device specified must match the stream, if none is specified the
/// currently selected device is used.
static Cuda CudaFromStream(cudaStream_t s, int dev = -1)
{
if (dev < 0) {
campCudaErrchk(cudaGetDevice(&dev));
}
return Cuda(s, dev);
}

Expand All @@ -215,7 +158,7 @@ namespace resources
campCudaErrchk(cudaStreamCreate(&s));
#endif
return s;
}(), get_current_device());
}());
return c;
}

Expand Down Expand Up @@ -275,7 +218,7 @@ namespace resources
void deallocate(void *p, MemoryAccess ma = MemoryAccess::Unknown)
{
auto d{device_guard(device)};
if (ma == MemoryAccess::Unknown) {
if(ma == MemoryAccess::Unknown) {
ma = get_access_type(p);
}
switch (ma) {
Expand All @@ -292,7 +235,6 @@ namespace resources
break;
case MemoryAccess::Unknown:
::camp::throw_re("Unknown memory access type, cannot free");
break;
}
}
void memcpy(void *dst, const void *src, size_t size)
Expand Down
83 changes: 26 additions & 57 deletions include/camp/resource/hip.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,11 @@ For details about use and distribution, please read LICENSE and NOTICE from
#ifndef __CAMP_HIP_HPP
#define __CAMP_HIP_HPP

#include "camp/defines.hpp"
#include "camp/resource/event.hpp"
#include "camp/resource/platform.hpp"

#ifdef CAMP_ENABLE_HIP

#include <hip/hip_runtime.h>
#include <mutex>
#include <vector>

namespace camp
{
Expand Down Expand Up @@ -79,63 +75,34 @@ namespace resources

class Hip
{
static int get_current_device()
static hipStream_t get_a_stream(int num)
{
int dev = -1;
campHipErrchk(hipGetDevice(&dev));
return dev;
}
static hipStream_t streams[16] = {};
static int previous = 0;

static int get_device_from_stream(hipStream_t stream)
{
return hipGetStreamDeviceId(stream);
}
static std::once_flag m_onceFlag;
static std::mutex m_mtx;

static hipStream_t get_a_stream(int num, int dev)
{
static constexpr int num_streams = 16;
struct Streams {
hipStream_t streams[num_streams] = {};
int previous = 0;

std::once_flag onceFlag;
std::mutex mtx;
};

static std::vector<Streams> devices([] {
int count = -1;
campHipErrchk(hipGetDeviceCount(&count));
return count;
}());

if (dev < 0) {
dev = get_current_device();
}

std::call_once(devices[dev].onceFlag, [=] {
auto d{device_guard(dev)};
if (devices[dev].streams[0] == nullptr) {
for (auto &s : devices[dev].streams) {
std::call_once(m_onceFlag, [] {
if (streams[0] == nullptr) {
for (auto &s : streams) {
campHipErrchk(hipStreamCreate(&s));
}
}
});

if (num < 0) {
std::lock_guard<std::mutex> guard(devices[dev].mtx);
devices[dev].previous = (devices[dev].previous + 1) % num_streams;
num = devices[dev].previous;
} else {
num = num % num_streams;
m_mtx.lock();
previous = (previous + 1) % 16;
m_mtx.unlock();
return streams[previous];
}

return devices[dev].streams[num];
return streams[num % 16];
}

// Private from-stream constructor
Hip(hipStream_t s, int dev)
: stream(s), device((dev >= 0) ? dev : get_device_from_stream(s))
{ }
Hip(hipStream_t s, int dev = 0) : stream(s), device(dev) {}

MemoryAccess get_access_type(void *p)
{
Expand All @@ -159,17 +126,19 @@ namespace resources
}

public:
explicit Hip(int group = -1, int dev = get_current_device())
: stream(get_a_stream(group, dev)), device(dev)
{ }

/// Create a resource from a custom stream.
/// If device is specified it must match the stream. If device is
/// unspecified, we will get it from the stream.
/// This may be called before main if device is specified as no calls to
/// the runtime are made in this case.
Hip(int group = -1, int dev = 0)
: stream(get_a_stream(group)), device(dev)
{
}

/// Create a resource from a custom stream
/// The device specified must match the stream, if none is specified the
/// currently selected device is used.
static Hip HipFromStream(hipStream_t s, int dev = -1)
{
if (dev < 0) {
campHipErrchk(hipGetDevice(&dev));
}
return Hip(s, dev);
}

Expand All @@ -185,7 +154,7 @@ namespace resources
campHipErrchk(hipStreamCreate(&s));
#endif
return s;
}(), get_current_device());
}());
return h;
}

Expand Down
2 changes: 1 addition & 1 deletion include/camp/resource/host.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ namespace resources
class Host
{
public:
explicit Host(int /* group */ = -1) {}
Host(int /* group */ = -1) {}

// Methods
Platform get_platform() { return Platform::host; }
Expand Down
2 changes: 1 addition & 1 deletion include/camp/resource/omp_target.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ namespace resources
}
}
public:
explicit Omp(int group = -1, int device = omp_get_default_device())
Omp(int group = -1, int device = omp_get_default_device())
: addr(get_addr(group)), dev(device)
{
}
Expand Down
Loading

0 comments on commit 1744f66

Please sign in to comment.