Skip to content

Commit

Permalink
A bit more cleanup and adding some infrastructure for the multi-target
Browse files Browse the repository at this point in the history
test code (added makefile and a kokkos version).  Not all the pieces
are in place to fully test.
  • Loading branch information
pmccormick committed Oct 31, 2023
1 parent d884674 commit ff078df
Show file tree
Hide file tree
Showing 4 changed files with 112 additions and 39 deletions.
6 changes: 3 additions & 3 deletions clang/lib/Sema/SemaStmtAttr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -477,7 +477,7 @@ static Attr *handleOpenCLUnrollHint(Sema &S, Stmt *St, const ParsedAttr &A,
}

static Attr *handleTapirTargetAttr(Sema &S, Stmt *St, const ParsedAttr &A,
SourceRange Range)
SourceRange Range)
{
// Check the details of the attribute syntax...
if (A.getNumArgs() != 1) {
Expand Down Expand Up @@ -583,8 +583,8 @@ static Attr *handleTapirStrategyAttr(Sema &S, Stmt *St, const ParsedAttr &A,

if (errState)
return nullptr;
else
return ::new (S.Context) TapirStrategyAttr(S.Context, A, strategyKind);

return ::new (S.Context) TapirStrategyAttr(S.Context, A, strategyKind);
}

// =====+
Expand Down
10 changes: 6 additions & 4 deletions kitsune/experiments/multi-target/kokkos-multi-target.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@

template<typename T>
void random_fill(T *data, size_t N) {
for(size_t i = 0; i < N; ++i)
data[i] = rand() / (T)RAND_MAX;
Kokkos::parallel_for(N, KOKKOS_LAMBDA(const int i) {
data[i] = T(i) / 1000.0;
});
}

int main (int argc, char* argv[]) {
Expand All @@ -25,10 +26,11 @@ int main (int argc, char* argv[]) {

cout << setprecision(5);
cout << "\n";
cout << "---- vector addition benchmark (forall) ----\n"
<< " Vector size: " << size << " elements.\n\n";
cout << "---- multi-target vector addition benchmark (kokkos) ----\n"
<< " Vector size: " << size << " elements.\n\n";
cout << " Allocating arrays and filling with random values..."
<< std::flush;

Kokkos::initialize(argc, argv); {
float *A = alloc<float>(size);
float *B = alloc<float>(size);
Expand Down
41 changes: 41 additions & 0 deletions kitsune/experiments/multi-target/makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
include ../experiments.mk

ifeq ($(BUILD_CUDA_EXPERIMENTS),true)
targets += multi-target.cuda.${host_arch}
targets += kokkos-multi-target.cuda.${host_arch}
endif

all: ${targets}

# forall-based tests
multi-target.cuda.${host_arch}: multi-target.cpp
@echo $@
@$(TIME_CMD) $(KIT_CXX) $(TAPIR_OPENCILK_FLAGS) -o $@ $< -Xlinker -rpath=$(KITSUNE_PREFIX)/lib
@$(FILE_SIZE)

kokkos-multi-target.cuda.${host_arch}: kokkos-multi-target.cpp
@echo $@
@$(TIME_CMD) $(KIT_CXX) $(TAPIR_CUDA_FLAGS) $(KITSUNE_KOKKOS_FLAGS) -o $@ $< -Xlinker -rpath=$(KITSUNE_PREFIX)/lib
@$(FILE_SIZE)

#####
define newline


endef

define RUN_test
$$(./$(1) > $(1).log) $(newline)
endef

run: $(targets)
@echo "running generated exeutables..."
$(foreach prog,$(targets),$(call RUN_test,$(prog)))
#####

.PHONY: all clean run

clean:
-rm -f *.${host_arch} *.ll *.o
-rm -f *~ core *~ *.log

94 changes: 62 additions & 32 deletions kitsune/experiments/multi-target/multi-target.cpp
Original file line number Diff line number Diff line change
@@ -1,45 +1,75 @@
#include <cstdio>
#include <stdlib.h>
#include <string>
#include <iostream>
#include <iomanip>
#include <chrono>
#include <kitsune.h>
#include "kitsune/timer.h"
#include "kitsune/kitrt/llvm-gpu.h"
#include "kitsune/kitrt/kitrt-cuda.h"

using namespace std;
using namespace kitsune;

const size_t ARRAY_SIZE = 1024 * 1024 * 256;

void random_fill(float *data, size_t N) {
for(size_t i = 0; i < N; ++i)
data[i] = rand() / (float)RAND_MAX;
}

void fill(float *data, size_t N) {
for(size_t i = 0; i < N; ++i)
data[i] = float(i);
template<typename T>
void random_fill(T *data, size_t N) {
forall(size_t i = 0; i < N; ++i)
data[i] = T(i) / 1000.0;
}

int main (int argc, char* argv[]) {
size_t size = ARRAY_SIZE;
if (argc > 1)
using namespace std;
size_t size = 1024 * 1024 * 256;
unsigned int iterations = 10;
if (argc >= 2)
size = atol(argv[1]);
if (argc == 3)
iterations = atoi(argv[2]);

fprintf(stdout, "problem size: %ld\n", size);
float *A = (float *)__kitrt_cuMemAllocManaged(sizeof(float) * size);
float *B = (float *)__kitrt_cuMemAllocManaged(sizeof(float) * size);
float *C = (float *)__kitrt_cuMemAllocManaged(sizeof(float) * size);

cout << setprecision(5);
cout << "\n";
cout << "---- multi-target vector addition benchmark (forall) ----\n"
<< " Vector size: " << size << " elements.\n\n";
cout << " Allocating arrays and filling with random values..."
<< std::flush;
float *A = alloc<float>(size);
float *B = alloc<float>(size);
float *C = alloc<float>(size);
random_fill(A, size);
random_fill(B, size);
random_fill(B, size);
cout << " done.\n\n";

[[tapir::target("cuda")]]
forall(size_t i = 0; i < size; i++)
C[i] = A[i] + B[i];
double elapsed_time;
double min_time = 100000.0;
double max_time = 0.0;
for(unsigned t = 0; t < iterations; t++) {
auto start_time = chrono::steady_clock::now();
[[tapir::target("cuda")]]
forall(int i = 0; i < size; i++) {
C[i] = A[i] + B[i];
}
auto end_time = chrono::steady_clock::now();
elapsed_time = chrono::duration<double>(end_time-start_time).count();
if (elapsed_time < min_time)
min_time = elapsed_time;
if (elapsed_time > max_time)
max_time = elapsed_time;
cout << "\t" << t << ". iteration time: " << elapsed_time << ".\n";
}
cout << " Checking final result..." << std::flush;
size_t error_count = 0;
for(size_t i = 0; i < size; i++) {
float sum = A[i] + B[i];
if (C[i] != sum)
error_count++;
}
if (error_count) {
cout << " incorrect result found! ("
<< error_count << " errors found)\n\n";
return 1;
} else {
cout << " pass (answers match).\n\n"
<< " Total time: " << elapsed_time
<< " seconds. (" << size / elapsed_time << " elements/sec.)\n"
<< "*** " << min_time << ", " << max_time << "\n"
<< "----\n\n";
}

printf("%f\n", C[10]);
dealloc(A);
dealloc(B);
dealloc(C);
return 0;
}


0 comments on commit ff078df

Please sign in to comment.