move raja_view perf test to benchmark folder

LLNL · Sep 17, 2024 · f54bcc1 · f54bcc1
1 parent 17a2b04
commit f54bcc1
Show file tree

Hide file tree

Showing 4 changed files with 174 additions and 149 deletions.
diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt
@@ -18,3 +18,7 @@ raja_add_benchmark(
 raja_add_benchmark(
   NAME ltimes
   SOURCES ltimes.cpp)
+
+raja_add_bench_mark(
+  NAME raja_view_blur
+  SOURCES raja_view_blur.cpp)
diff --git a/benchmark/raja_view_blur.cpp b/benchmark/raja_view_blur.cpp
@@ -0,0 +1,170 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2016-24, Lawrence Livermore National Security, LLC
+// and RAJA project contributors. See the RAJA/LICENSE file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include <RAJA/RAJA.hpp>
+#include "RAJA/util/Timer.hpp"
+#include <iostream>
+
+/*
+ * RAJA view performance test
+ * Kernel performs a 2D Gaussian blur
+ *
+ */
+
+#if defined(RAJA_ENABLE_HIP)
+using device_pol = RAJA::hip_exec<256>;
+using device_resources = RAJA::resource::Hip;
+
+using kernel_pol = RAJA::KernelPolicy<
+  RAJA::statement::HipKernelFixed<256,
+    RAJA::statement::For<1, RAJA::hip_global_size_y_direct<16>,
+      RAJA::statement::For<0, RAJA::hip_global_size_x_direct<16>,
+         RAJA::statement::Lambda<0>
+      >
+    >
+   >
+  >;
+#elif defined(RAJA_ENABLE_CUDA)
+using device_pol = RAJA::cuda_exec<256>;
+using device_resources = RAJA::resources::Cuda;
+
+using kernel_pol = RAJA::KernelPolicy<
+  RAJA::statement::CudaKernelFixed<256,
+    RAJA::statement::For<1, RAJA::cuda_global_size_y_direct<16>,
+      RAJA::statement::For<0, RAJA::cuda_global_size_x_direct<16>,
+         RAJA::statement::Lambda<0>
+      >
+    >
+   >
+  >;
+#else
+using host_pol = RAJA::seq_exec;
+using device_resources = RAJA::resources::Host;
+#endif
+
+using host_resources = RAJA::resources::Host;
+
+int main(int RAJA_UNUSED_ARG(argc), char **RAJA_UNUSED_ARG(argv[]))
+{
+
+  const int N = 10000;
+  const int K = 17;
+
+  device_resources def_device_res{device_resources::get_default()};
+  host_resources   def_host_res{host_resources::get_default()};
+
+  auto timer = RAJA::Timer();
+
+  //launch to intialize the stream
+  RAJA::forall<device_pol>
+    (RAJA::RangeSegment(0,1), [=] RAJA_DEVICE (int i) {
+      printf(" \n");
+  });
+
+  int * array      = def_host_res.allocate<int>(N * N);
+  int * array_copy = def_host_res.allocate<int>(N * N);
+
+  //big array, or image
+  for (int i = 0; i < N * N; ++i) {
+    array[i] = 1;
+    array_copy[i] = 1;
+  }
+
+  //small array that acts as the blur
+  //int* kernel = new int[K * K];
+  int * kernel  = def_host_res.allocate<int>(K * K);
+  for (int i = 0; i < K * K; ++i) {
+    kernel[i] = 2;
+  }
+
+  // copying to gpu
+  int* d_array      = def_device_res.allocate<int>(N * N);
+  int* d_array_copy = def_device_res.allocate<int>(N * N);
+  int* d_kernel     = def_device_res.allocate<int>(K * K);
+
+  def_device_res.memcpy(d_array, array, N * N * sizeof(int));
+  def_device_res.memcpy(d_array_copy, array_copy, N * N * sizeof(int));
+  def_device_res.memcpy(d_kernel, kernel, K * K * sizeof(int));
+
+  constexpr int DIM = 2;
+  RAJA::View<int, RAJA::Layout<DIM, int, 1>> array_view(d_array, N, N);
+  RAJA::View<int, RAJA::Layout<DIM, int, 1>> array_view_copy(d_array_copy, N, N);
+  RAJA::View<int, RAJA::Layout<DIM, int, 1>> kernel_view(d_kernel, K, K);
+
+  RAJA::RangeSegment range_i(0, N);
+  RAJA::RangeSegment range_j(0, N);
+
+  timer.start();
+
+  RAJA::kernel<kernel_pol>
+    (RAJA::make_tuple(range_i, range_j),
+     [=] RAJA_DEVICE (int i, int j) {
+      int sum = 0;
+
+      //looping through the "blur"
+      for (int m = 0; m < K; ++m) {
+	for (int n = 0; n < K; ++n) {
+	  int x = i + m;
+	  int y = j + n;
+
+	  // adding the "blur" to the "image" wherever the blur is located on the image
+	  if (x < N && y < N) {
+	    sum += kernel_view(m, n) * array_view(x, y);
+	  }
+	}
+      }
+
+      array_view(i, j) += sum;
+    }
+   );
+
+  timer.stop();
+
+  std::cout<<"Elapsed time with RAJA view : "<<timer.elapsed()<<std::endl;
+
+
+ timer.reset();
+ timer.start();
+
+  RAJA::kernel<kernel_pol>
+    (RAJA::make_tuple(range_i, range_j),
+     [=] RAJA_DEVICE (int i, int j) {
+      int sum = 0;
+
+      // looping through the "blur"
+      for (int m = 0; m < K; ++m) {
+	for (int n = 0; n < K; ++n) {
+	  int x = i + m;
+	  int y = j + n;
+
+	  // adding the "blur" to the "image" wherever the blur is located on the image
+	  if (x < N && y < N) {
+	    sum += d_kernel[m * K + n] * d_array_copy[x * N + y];
+	  }
+	}
+      }
+
+      d_array_copy[i * N + j] += sum;
+    }
+  );
+  timer.stop();
+
+  std::cout<<"Elapsed time with NO RAJA view : "<<timer.elapsed()<<std::endl;
+
+  def_device_res.memcpy(array, d_array, N * N * sizeof(int));
+  def_device_res.memcpy(array_copy, d_array_copy, N * N * sizeof(int));
+
+  def_device_res.deallocate(d_array);
+  def_device_res.deallocate(d_array_copy);
+  def_device_res.deallocate(d_kernel);
+
+  def_host_res.deallocate(array);
+  def_host_res.deallocate(array_copy);
+  def_host_res.deallocate(kernel);
+
+  return 0;
+}
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
@@ -3,10 +3,6 @@
 # SPDX-License-Identifier: (BSD-3-Clause)
 ###############################################################################
 
-raja_add_executable(
-  NAME raja_view_slowdown
-  SOURCES raja_view_slowdown.cpp)
-
 raja_add_executable(
   NAME tut_launch_basic
   SOURCES tut_launch_basic.cpp)

diff --git a/examples/raja_view_slowdown.cpp b/examples/raja_view_slowdown.cpp