diff --git a/kitsune/experiments/euler3d/euler3d-forall.cpp b/kitsune/experiments/euler3d/euler3d-forall.cpp index 7bff7e520cc739..9ba721a2f2d694 100644 --- a/kitsune/experiments/euler3d/euler3d-forall.cpp +++ b/kitsune/experiments/euler3d/euler3d-forall.cpp @@ -401,7 +401,7 @@ int main(int argc, char** argv) return 0; } - int iterations = 2000; + int iterations = 4000; if (argc > 2) iterations = atoi(argv[2]); diff --git a/kitsune/experiments/euler3d/euler3d-kokkos.cpp b/kitsune/experiments/euler3d/euler3d-kokkos.cpp index 8234c4baa1b139..a4a4a5e7c69a9a 100644 --- a/kitsune/experiments/euler3d/euler3d-kokkos.cpp +++ b/kitsune/experiments/euler3d/euler3d-kokkos.cpp @@ -443,7 +443,7 @@ int main(int argc, char** argv) return 0; } - int iterations = 2000; + int iterations = 4000; if (argc > 2) iterations = atoi(argv[2]); diff --git a/kitsune/experiments/launch-attr/makefile b/kitsune/experiments/launch-attr/makefile index d89b3dd1ba5418..1d21b3f5d578a9 100644 --- a/kitsune/experiments/launch-attr/makefile +++ b/kitsune/experiments/launch-attr/makefile @@ -13,7 +13,7 @@ all: ${targets} srad-forall.cuda.${host_arch}: srad-forall.cpp @echo $@ @$(TIME_CMD) $(KIT_CXX) $(TAPIR_CUDA_FLAGS) -DTHREADS_PER_BLOCK=$(THREADS_PER_BLOCK) \ - -mllvm --cuabi-occupancy-launches=false -o $@ $< -Xlinker -rpath=$(KITSUNE_PREFIX)/lib + -o $@ $< -Xlinker -rpath=$(KITSUNE_PREFIX)/lib @$(FILE_SIZE) srad-forall-exprs.cuda.${host_arch}: srad-forall-exprs.cpp diff --git a/kitsune/experiments/launch-attr/srad-forall.cpp b/kitsune/experiments/launch-attr/srad-forall.cpp index 0f08d3f9e30dd2..f3d7d8e402c62e 100644 --- a/kitsune/experiments/launch-attr/srad-forall.cpp +++ b/kitsune/experiments/launch-attr/srad-forall.cpp @@ -111,11 +111,13 @@ int main(int argc, char* argv[]) cout << " Starting benchmark...\n" << std::flush; auto start_time = chrono::steady_clock::now(); + [[kitsune::launch(8)]] forall(int i = 0; i < rows; i++) { iN[i] = i-1; iS[i] = i+1; } + [[kitsune::launch(8)]] forall(int j=0; j < cols; j++) { jW[j] = j-1; jE[j] = j+1; @@ -126,6 +128,7 @@ int main(int argc, char* argv[]) jW[0] = 0; jE[cols-1] = cols-1; + [[kitsune::launch(64)]] forall(int k = 0; k < size_I; k++) J[k] = (float)exp(I[k]) ; @@ -149,7 +152,7 @@ int main(int argc, char* argv[]) q0sqr = varROI / (meanROI*meanROI); auto loop1_start_time = chrono::steady_clock::now(); - [[kitsune::launch(THREADS_PER_BLOCK)]] + [[kitsune::launch(16)]] forall(int i = 0 ; i < rows; i++) { for(int j = 0; j < cols; j++) { @@ -191,7 +194,7 @@ int main(int argc, char* argv[]) loop1_min_time = etime; auto loop2_start_time = chrono::steady_clock::now(); - [[kitsune::launch(THREADS_PER_BLOCK)]] + [[kitsune::launch(8)]] forall(int i = 0; i < rows; i++) { for(int j = 0; j < cols; j++) { // current index diff --git a/kitsune/experiments/launch-bench.sh b/kitsune/experiments/launch-bench.sh index c353a41776bfce..93d881eab20e0d 100755 --- a/kitsune/experiments/launch-bench.sh +++ b/kitsune/experiments/launch-bench.sh @@ -12,7 +12,7 @@ echo "gpu: $gpuname" >> $outfile echo "time stamp: $tstamp" >> $outfile echo "ThreadsPerBlock,Time" >> $outfile -for tpb in {16..1024..16} +for tpb in {8..512..16} do export KITCUDA_THREADS_PER_BLOCK=$tpb echo " running '$*' with $tpb threads-per-block" diff --git a/kitsune/experiments/makefile b/kitsune/experiments/makefile index 428ebb05d29871..372c794b1dc90a 100644 --- a/kitsune/experiments/makefile +++ b/kitsune/experiments/makefile @@ -21,5 +21,6 @@ $(subdirs): run: $(subdirs) $(MAKE) -C $< $(MAKECMDGOALS) - +launch_bench: $(subdirs) + $(MAKE) -C $< $(MAKECMDGOALS) diff --git a/kitsune/experiments/noforall/makefile b/kitsune/experiments/noforall/makefile index 9b6a58fb170f1c..629490d7e8dad3 100644 --- a/kitsune/experiments/noforall/makefile +++ b/kitsune/experiments/noforall/makefile @@ -31,6 +31,9 @@ run: $(targets) .PHONY: all clean run +launch_bench: + @echo "launch benchmark invalid... skipping..." + clean: -rm -f *.${host_arch} *.ll *.o -rm -f *~ core *.log diff --git a/kitsune/experiments/srad/makefile b/kitsune/experiments/srad/makefile index 3b680a87029f38..33a9224370e09c 100644 --- a/kitsune/experiments/srad/makefile +++ b/kitsune/experiments/srad/makefile @@ -29,8 +29,8 @@ srad-forall.hip.${host_arch}: srad-forall.cpp @$(FILE_SIZE) # kokkos-based tests (w/out views) -#srad-kokkos.cuda.kitsune.${host_arch}: srad-kokkos-no-view.cpp -# /usr/bin/time $(KIT_CXX) $(TAPIR_CUDA_FLAGS) $(KITSUNE_KOKKOS_FLAGS) -o $@ $< -Xlinker -rpath=$(KITSUNE_PREFIX)/lib +srad-kokkos.cuda.kitsune.${host_arch}: srad-kokkos-no-view.cpp + /usr/bin/time $(KIT_CXX) $(TAPIR_CUDA_FLAGS) $(KITSUNE_KOKKOS_FLAGS) -o $@ $< -Xlinker -rpath=$(KITSUNE_PREFIX)/lib #srad-kokkos.hip.kitsune.${host_arch}: srad-kokkos-no-view.cpp # /usr/bin/time $(KIT_CXX) $(TAPIR_HIP_FLAGS) -o $@ $< -Xlinker -rpath=$(KITSUNE_PREFIX)/lib