Skip to content

Commit

Permalink
A bit more verbose and shared cuda and hip feature management (e.g.,
Browse files Browse the repository at this point in the history
streaming modes).
  • Loading branch information
pmccormick committed Nov 16, 2023
1 parent 4797933 commit afdb9c2
Show file tree
Hide file tree
Showing 7 changed files with 86 additions and 26 deletions.
5 changes: 2 additions & 3 deletions kitsune/experiments/inc/kitsune-tapir.mk
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,10 @@ GPU_STRIPMINE_FLAGS?=-mllvm -stripmine-count=1 -mllvm -stripmine-coarsen-factor=
TAPIR_CUDA_FLAGS?=-ftapir=cuda \
-O$(KITSUNE_OPTLEVEL) \
-mllvm -cuabi-opt-level=$(KITSUNE_ABI_OPTLEVEL) \
-ffp-contract=fast \
-fno-unroll-loops \
-mllvm -cuabi-arch=$(CUDA_ARCH) \
-ffp-contract=fast \
-mllvm -cuabi-prefetch=true \
-mllvm -cuabi-streams=true \
-mllvm -cuabi-streams=false \
$(GPU_STRIPMINE_FLAGS) \
$(TAPIR_CUDA_EXTRA_FLAGS)
#-mllvm -cuabi-run-post-opts \
Expand Down
27 changes: 12 additions & 15 deletions kitsune/runtime/cuda/cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -122,12 +122,6 @@ extern unsigned _kitrt_MaxPrefetchStreams;
static unsigned _kitrt_CurPrefetchStream = 0;
std::vector<CUstream> _kitrt_PrefetchStreams;

// Enable auto-prefetching of managed memory pointers.
// This is a very simple approach that likely will
// have limited success. Note that it can significantly
// avoid page miss costs.
static bool _kitrt_cuEnablePrefetch = true;

// NOTE: Over a series of CUDA releases it is worthwhile to
// check in on the header files for replacement versioned
// entry points into the driver API. These are typically
Expand Down Expand Up @@ -332,11 +326,18 @@ bool __kitrt_cuInit() {
_kitrtUseHeuristicLaunchParameters = false;
}

for(unsigned si = 0; si < _kitrt_MaxPrefetchStreams; si++) {
CUstream stream;
CU_SAFE_CALL(cuStreamCreate_p(&stream, CU_STREAM_DEFAULT));
fprintf(stderr, "kitrt: create cuda prefetch stream %d\n", si);
_kitrt_PrefetchStreams.push_back(stream);
if (__kitrt_prefetchEnabled()) {
fprintf(stderr, "kitrt: prefetching enabled.\n");
}

if (__kitrt_prefetchStreamsEnabled()) {
fprintf(stderr, "kitrt: prefetch streams enabled.\n");
fprintf(stderr, "\t\tprefetch stream set size: %d", _kitrt_MaxPrefetchStreams);
for(unsigned si = 0; si < _kitrt_MaxPrefetchStreams; si++) {
CUstream stream;
CU_SAFE_CALL(cuStreamCreate_p(&stream, CU_STREAM_DEFAULT));
_kitrt_PrefetchStreams.push_back(stream);
}
}

return _kitrt_cuIsInitialized;
Expand Down Expand Up @@ -490,10 +491,6 @@ bool __kitrt_cuIsMemManaged(void *vp) {

// ---- Memory/data prefetch and data movement support.

void __kitrt_cuEnablePrefetch() { _kitrt_cuEnablePrefetch = true; }

void __kitrt_cuDisablePrefetch() { _kitrt_cuEnablePrefetch = false; }

void __kitrt_cuMemPrefetchOnStream(void *vp, void *stream) {
assert(vp && "unexpected null pointer!");
if (not __kitrt_isMemPrefetched(vp)) {
Expand Down
14 changes: 8 additions & 6 deletions kitsune/runtime/hip/hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -347,12 +347,14 @@ bool __kitrt_hipInit() {
_kitrt_hipIsInitialized = true;
}

for(unsigned si = 0; si < _kitrt_MaxPrefetchStreams; si++) {
hipStream_t stream;
//HIP_SAFE_CALL(hipStreamCreateWithFlags_p(&stream, hipStreamNonBlocking));
HIP_SAFE_CALL(hipStreamCreate_p(&stream));
fprintf(stderr, "kitrt: create cuda prefetch stream %d\n", si);
_kitrt_PrefetchStreams.push_back(&stream);
if (__kitrt_prefetchStreamsEnabled()) {
fprintf(stderr, "kitrt: prefetch streams enabled.\n");
for(unsigned si = 0; si < _kitrt_MaxPrefetchStreams; si++) {
hipStream_t stream;
//HIP_SAFE_CALL(hipStreamCreateWithFlags_p(&stream, hipStreamNonBlocking));
HIP_SAFE_CALL(hipStreamCreate_p(&stream));
_kitrt_PrefetchStreams.push_back(&stream);
}
}

return _kitrt_hipIsInitialized;
Expand Down
23 changes: 23 additions & 0 deletions kitsune/runtime/kitrt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ static bool _kitrtVerboseMode = false;
static unsigned _kitrtDefaultThreadsPerBlock = 256;
static bool _kitrtUseCustomLaunchParameters = false;

static bool _kitrtEnablePrefetch = false;
static bool _kitrtEnablePrefetchStreams = false;

static unsigned _kitrtThreadsPerBlock = 0;
unsigned _kitrt_MaxPrefetchStreams = 4;
int _kitrt_DefaultDeviceID = -1;
Expand Down Expand Up @@ -99,6 +102,11 @@ void __kitrt_CommonInit() {
__kitrt_getEnvValue("KITRT_VERBOSE", _kitrtVerboseMode);
__kitrt_getEnvValue("KITRT_MAX_NUM_PREFETCH_STREAMS", _kitrt_MaxPrefetchStreams);
__kitrt_getEnvValue("KITRT_DEVICE_ID", _kitrt_DefaultDeviceID);

if (__kitrt_prefetchEnabled())
fprintf(stderr, "kitrt: prefetch enabled.\n");
if (__kitrt_prefetchStreamsEnabled())
fprintf(stderr, "kitrt: maximum prefetch streams: %d\n", _kitrt_MaxPrefetchStreams);
}

void __kitrt_setVerboseMode(bool Enable) {
Expand Down Expand Up @@ -143,6 +151,21 @@ void __kitrt_resetLaunchParameters() {
_kitrtUseCustomLaunchParameters = false;
}

bool __kitrt_prefetchEnabled() {
return _kitrtEnablePrefetch;
}

void __kitrt_enablePrefetching() {
_kitrtEnablePrefetch = true;
}

bool __kitrt_prefetchStreamsEnabled() {
return _kitrtEnablePrefetchStreams;
}

void __kitrt_enablePrefetchStreams() {
_kitrtEnablePrefetchStreams = true;
}

#ifdef __cplusplus
} // extern "C"
Expand Down
6 changes: 5 additions & 1 deletion kitsune/runtime/kitrt.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,11 @@ extern "C" {
int &blocksPerGrid);
extern void __kitrt_resetLaunchParameters();

extern unsigned __kitrt_getNumPrefetchStreams(void);
extern unsigned __kitrt_getNumPrefetchStreams();
extern bool __kitrt_prefetchEnabled();
extern void __kitrt_enablePrefetching();
extern bool __kitrt_prefetchStreamsEnabled();
extern void __kitrt_enablePrefetchStreams();

#ifdef __cplusplus
} // extern "C"
Expand Down
17 changes: 17 additions & 0 deletions llvm/lib/Transforms/Tapir/CudaABI.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1759,8 +1759,25 @@ Function *CudaABI::createCtor(GlobalVariable *Fatbinary,
IRBuilder<> CtorBuilder(CtorEntryBB);
const DataLayout &DL = M.getDataLayout();

if (CodeGenStreams && not CodeGenPrefetch)
report_fatal_error("kitsune: prefetching must be enabled to generate prefetch streams!");

// Tuck the call to initialize the Kitsune runtime into the constructor;
// this in turn will initialized CUDA...
if (CodeGenPrefetch) {
FunctionCallee KitRTEnablePrefetchFn =
M.getOrInsertFunction("__kitrt_enablePrefetching", VoidTy);
CtorBuilder.CreateCall(KitRTEnablePrefetchFn, {});
}

if (CodeGenStreams) {
FunctionCallee KitRTEnablePrefetchStreamsFn =
M.getOrInsertFunction("__kitrt_enablePrefetchStreams", VoidTy);
CtorBuilder.CreateCall(KitRTEnablePrefetchStreamsFn, {});
}



FunctionCallee KitRTInitFn = M.getOrInsertFunction("__kitrt_cuInit", VoidTy);
CtorBuilder.CreateCall(KitRTInitFn, {});

Expand Down
20 changes: 19 additions & 1 deletion llvm/lib/Transforms/Tapir/HipABI.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1995,7 +1995,25 @@ Function *HipABI::createCtor(GlobalVariable *Bundle, GlobalVariable *Wrapper) {

// Tuck some calls in that initialize the Kitsune runtime. This includes
// enabling xnack and explicitly initializing HIP (even though documentation
// suggests it is optional).
// sugges

if (CodeGenStreams && not CodeGenPrefetch)
report_fatal_error("kitsune: prefetching must be enabled to generate prefetch streams!");

// Tuck the call to initialize the Kitsune runtime into the constructor;
// this in turn will initialized CUDA...
if (CodeGenPrefetch) {
FunctionCallee KitRTEnablePrefetchFn =
M.getOrInsertFunction("__kitrt_enablePrefetching", VoidTy);
CtorBuilder.CreateCall(KitRTEnablePrefetchFn, {});
}

if (CodeGenStreams) {
FunctionCallee KitRTEnablePrefetchStreamsFn =
M.getOrInsertFunction("__kitrt_enablePrefetchStreams", VoidTy);
CtorBuilder.CreateCall(KitRTEnablePrefetchStreamsFn, {});
}

if (EnableXnack) {
FunctionCallee KitRTEnableXnackFn =
M.getOrInsertFunction("__kitrt_hipEnableXnack", VoidTy);
Expand Down

0 comments on commit afdb9c2

Please sign in to comment.