Skip to content

Commit

Permalink
move raja_view perf test to benchmark folder
Browse files Browse the repository at this point in the history
  • Loading branch information
artv3 committed Sep 17, 2024
1 parent 17a2b04 commit f54bcc1
Show file tree
Hide file tree
Showing 4 changed files with 174 additions and 149 deletions.
4 changes: 4 additions & 0 deletions benchmark/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,7 @@ raja_add_benchmark(
raja_add_benchmark(
NAME ltimes
SOURCES ltimes.cpp)

raja_add_bench_mark(
NAME raja_view_blur
SOURCES raja_view_blur.cpp)
170 changes: 170 additions & 0 deletions benchmark/raja_view_blur.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
// Copyright (c) 2016-24, Lawrence Livermore National Security, LLC
// and RAJA project contributors. See the RAJA/LICENSE file for details.
//
// SPDX-License-Identifier: (BSD-3-Clause)
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//

#include <RAJA/RAJA.hpp>
#include "RAJA/util/Timer.hpp"
#include <iostream>

/*
* RAJA view performance test
* Kernel performs a 2D Gaussian blur
*
*/

#if defined(RAJA_ENABLE_HIP)
using device_pol = RAJA::hip_exec<256>;
using device_resources = RAJA::resource::Hip;

using kernel_pol = RAJA::KernelPolicy<
RAJA::statement::HipKernelFixed<256,
RAJA::statement::For<1, RAJA::hip_global_size_y_direct<16>,
RAJA::statement::For<0, RAJA::hip_global_size_x_direct<16>,
RAJA::statement::Lambda<0>
>
>
>
>;
#elif defined(RAJA_ENABLE_CUDA)
using device_pol = RAJA::cuda_exec<256>;
using device_resources = RAJA::resources::Cuda;

using kernel_pol = RAJA::KernelPolicy<
RAJA::statement::CudaKernelFixed<256,
RAJA::statement::For<1, RAJA::cuda_global_size_y_direct<16>,
RAJA::statement::For<0, RAJA::cuda_global_size_x_direct<16>,
RAJA::statement::Lambda<0>
>
>
>
>;
#else
using host_pol = RAJA::seq_exec;
using device_resources = RAJA::resources::Host;
#endif

using host_resources = RAJA::resources::Host;

int main(int RAJA_UNUSED_ARG(argc), char **RAJA_UNUSED_ARG(argv[]))
{

const int N = 10000;
const int K = 17;

device_resources def_device_res{device_resources::get_default()};
host_resources def_host_res{host_resources::get_default()};

auto timer = RAJA::Timer();

//launch to intialize the stream
RAJA::forall<device_pol>
(RAJA::RangeSegment(0,1), [=] RAJA_DEVICE (int i) {
printf(" \n");
});

int * array = def_host_res.allocate<int>(N * N);
int * array_copy = def_host_res.allocate<int>(N * N);

//big array, or image
for (int i = 0; i < N * N; ++i) {
array[i] = 1;
array_copy[i] = 1;
}

//small array that acts as the blur
//int* kernel = new int[K * K];
int * kernel = def_host_res.allocate<int>(K * K);
for (int i = 0; i < K * K; ++i) {
kernel[i] = 2;
}

// copying to gpu
int* d_array = def_device_res.allocate<int>(N * N);
int* d_array_copy = def_device_res.allocate<int>(N * N);
int* d_kernel = def_device_res.allocate<int>(K * K);

def_device_res.memcpy(d_array, array, N * N * sizeof(int));
def_device_res.memcpy(d_array_copy, array_copy, N * N * sizeof(int));
def_device_res.memcpy(d_kernel, kernel, K * K * sizeof(int));

constexpr int DIM = 2;
RAJA::View<int, RAJA::Layout<DIM, int, 1>> array_view(d_array, N, N);
RAJA::View<int, RAJA::Layout<DIM, int, 1>> array_view_copy(d_array_copy, N, N);
RAJA::View<int, RAJA::Layout<DIM, int, 1>> kernel_view(d_kernel, K, K);

RAJA::RangeSegment range_i(0, N);
RAJA::RangeSegment range_j(0, N);

timer.start();

RAJA::kernel<kernel_pol>
(RAJA::make_tuple(range_i, range_j),
[=] RAJA_DEVICE (int i, int j) {
int sum = 0;

//looping through the "blur"
for (int m = 0; m < K; ++m) {
for (int n = 0; n < K; ++n) {
int x = i + m;
int y = j + n;

// adding the "blur" to the "image" wherever the blur is located on the image
if (x < N && y < N) {
sum += kernel_view(m, n) * array_view(x, y);
}
}
}

array_view(i, j) += sum;
}
);

timer.stop();

std::cout<<"Elapsed time with RAJA view : "<<timer.elapsed()<<std::endl;


timer.reset();
timer.start();

RAJA::kernel<kernel_pol>
(RAJA::make_tuple(range_i, range_j),
[=] RAJA_DEVICE (int i, int j) {
int sum = 0;

// looping through the "blur"
for (int m = 0; m < K; ++m) {
for (int n = 0; n < K; ++n) {
int x = i + m;
int y = j + n;

// adding the "blur" to the "image" wherever the blur is located on the image
if (x < N && y < N) {
sum += d_kernel[m * K + n] * d_array_copy[x * N + y];
}
}
}

d_array_copy[i * N + j] += sum;
}
);
timer.stop();

std::cout<<"Elapsed time with NO RAJA view : "<<timer.elapsed()<<std::endl;

def_device_res.memcpy(array, d_array, N * N * sizeof(int));
def_device_res.memcpy(array_copy, d_array_copy, N * N * sizeof(int));

def_device_res.deallocate(d_array);
def_device_res.deallocate(d_array_copy);
def_device_res.deallocate(d_kernel);

def_host_res.deallocate(array);
def_host_res.deallocate(array_copy);
def_host_res.deallocate(kernel);

return 0;
}
4 changes: 0 additions & 4 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,6 @@
# SPDX-License-Identifier: (BSD-3-Clause)
###############################################################################

raja_add_executable(
NAME raja_view_slowdown
SOURCES raja_view_slowdown.cpp)

raja_add_executable(
NAME tut_launch_basic
SOURCES tut_launch_basic.cpp)
Expand Down
145 changes: 0 additions & 145 deletions examples/raja_view_slowdown.cpp

This file was deleted.

0 comments on commit f54bcc1

Please sign in to comment.