forked from pmccormick/kitsune
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
A bit more cleanup and adding some infrastructure for the multi-target
test code (added makefile and a kokkos version). Not all the pieces are in place to fully test.
- Loading branch information
1 parent
d884674
commit ff078df
Showing
4 changed files
with
112 additions
and
39 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
include ../experiments.mk | ||
|
||
ifeq ($(BUILD_CUDA_EXPERIMENTS),true) | ||
targets += multi-target.cuda.${host_arch} | ||
targets += kokkos-multi-target.cuda.${host_arch} | ||
endif | ||
|
||
all: ${targets} | ||
|
||
# forall-based tests | ||
multi-target.cuda.${host_arch}: multi-target.cpp | ||
@echo $@ | ||
@$(TIME_CMD) $(KIT_CXX) $(TAPIR_OPENCILK_FLAGS) -o $@ $< -Xlinker -rpath=$(KITSUNE_PREFIX)/lib | ||
@$(FILE_SIZE) | ||
|
||
kokkos-multi-target.cuda.${host_arch}: kokkos-multi-target.cpp | ||
@echo $@ | ||
@$(TIME_CMD) $(KIT_CXX) $(TAPIR_CUDA_FLAGS) $(KITSUNE_KOKKOS_FLAGS) -o $@ $< -Xlinker -rpath=$(KITSUNE_PREFIX)/lib | ||
@$(FILE_SIZE) | ||
|
||
##### | ||
define newline | ||
|
||
|
||
endef | ||
|
||
define RUN_test | ||
$$(./$(1) > $(1).log) $(newline) | ||
endef | ||
|
||
run: $(targets) | ||
@echo "running generated exeutables..." | ||
$(foreach prog,$(targets),$(call RUN_test,$(prog))) | ||
##### | ||
|
||
.PHONY: all clean run | ||
|
||
clean: | ||
-rm -f *.${host_arch} *.ll *.o | ||
-rm -f *~ core *~ *.log | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,45 +1,75 @@ | ||
#include <cstdio> | ||
#include <stdlib.h> | ||
#include <string> | ||
#include <iostream> | ||
#include <iomanip> | ||
#include <chrono> | ||
#include <kitsune.h> | ||
#include "kitsune/timer.h" | ||
#include "kitsune/kitrt/llvm-gpu.h" | ||
#include "kitsune/kitrt/kitrt-cuda.h" | ||
|
||
using namespace std; | ||
using namespace kitsune; | ||
|
||
const size_t ARRAY_SIZE = 1024 * 1024 * 256; | ||
|
||
void random_fill(float *data, size_t N) { | ||
for(size_t i = 0; i < N; ++i) | ||
data[i] = rand() / (float)RAND_MAX; | ||
} | ||
|
||
void fill(float *data, size_t N) { | ||
for(size_t i = 0; i < N; ++i) | ||
data[i] = float(i); | ||
template<typename T> | ||
void random_fill(T *data, size_t N) { | ||
forall(size_t i = 0; i < N; ++i) | ||
data[i] = T(i) / 1000.0; | ||
} | ||
|
||
int main (int argc, char* argv[]) { | ||
size_t size = ARRAY_SIZE; | ||
if (argc > 1) | ||
using namespace std; | ||
size_t size = 1024 * 1024 * 256; | ||
unsigned int iterations = 10; | ||
if (argc >= 2) | ||
size = atol(argv[1]); | ||
if (argc == 3) | ||
iterations = atoi(argv[2]); | ||
|
||
fprintf(stdout, "problem size: %ld\n", size); | ||
float *A = (float *)__kitrt_cuMemAllocManaged(sizeof(float) * size); | ||
float *B = (float *)__kitrt_cuMemAllocManaged(sizeof(float) * size); | ||
float *C = (float *)__kitrt_cuMemAllocManaged(sizeof(float) * size); | ||
|
||
cout << setprecision(5); | ||
cout << "\n"; | ||
cout << "---- multi-target vector addition benchmark (forall) ----\n" | ||
<< " Vector size: " << size << " elements.\n\n"; | ||
cout << " Allocating arrays and filling with random values..." | ||
<< std::flush; | ||
float *A = alloc<float>(size); | ||
float *B = alloc<float>(size); | ||
float *C = alloc<float>(size); | ||
random_fill(A, size); | ||
random_fill(B, size); | ||
random_fill(B, size); | ||
cout << " done.\n\n"; | ||
|
||
[[tapir::target("cuda")]] | ||
forall(size_t i = 0; i < size; i++) | ||
C[i] = A[i] + B[i]; | ||
double elapsed_time; | ||
double min_time = 100000.0; | ||
double max_time = 0.0; | ||
for(unsigned t = 0; t < iterations; t++) { | ||
auto start_time = chrono::steady_clock::now(); | ||
[[tapir::target("cuda")]] | ||
forall(int i = 0; i < size; i++) { | ||
C[i] = A[i] + B[i]; | ||
} | ||
auto end_time = chrono::steady_clock::now(); | ||
elapsed_time = chrono::duration<double>(end_time-start_time).count(); | ||
if (elapsed_time < min_time) | ||
min_time = elapsed_time; | ||
if (elapsed_time > max_time) | ||
max_time = elapsed_time; | ||
cout << "\t" << t << ". iteration time: " << elapsed_time << ".\n"; | ||
} | ||
cout << " Checking final result..." << std::flush; | ||
size_t error_count = 0; | ||
for(size_t i = 0; i < size; i++) { | ||
float sum = A[i] + B[i]; | ||
if (C[i] != sum) | ||
error_count++; | ||
} | ||
if (error_count) { | ||
cout << " incorrect result found! (" | ||
<< error_count << " errors found)\n\n"; | ||
return 1; | ||
} else { | ||
cout << " pass (answers match).\n\n" | ||
<< " Total time: " << elapsed_time | ||
<< " seconds. (" << size / elapsed_time << " elements/sec.)\n" | ||
<< "*** " << min_time << ", " << max_time << "\n" | ||
<< "----\n\n"; | ||
} | ||
|
||
printf("%f\n", C[10]); | ||
dealloc(A); | ||
dealloc(B); | ||
dealloc(C); | ||
return 0; | ||
} | ||
|
||
|