Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feature: Add make style target and refactor RAJA with clang-format #1731

Closed
wants to merge 12 commits into from
  •  
  •  
  •  
4 changes: 3 additions & 1 deletion .clang-format
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
BasedOnStyle : LLVM
# Indent formatting
IndentWidth : 2
Language: Cpp
UseTab: Never
KeepEmptyLinesAtTheStartOfBlocks : true
MaxEmptyLinesToKeep : 2
Expand Down Expand Up @@ -40,9 +41,10 @@ PointerAlignment: Left
AllowShortIfStatementsOnASingleLine : true
AllowShortFunctionsOnASingleLine : true
AllowShortLoopsOnASingleLine : false
AllowAllArgumentsOnNextLine : false
AllowAllParametersOfDeclarationOnNextLine : false
AlignTrailingComments : true
BinPackArguments : false
BinPackArguments : true
BinPackParameters : false
ConstructorInitializerAllOnOneLineOrOnePerLine : true
ColumnLimit : 80
Expand Down
3 changes: 1 addition & 2 deletions examples/dynamic-forall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,7 @@ int main(int argc, char* argv[])
//----------------------------------------------------------------------------//

// policy is chosen from the list
RAJA::expt::dynamic_forall<policy_list>(pol,
RAJA::RangeSegment(0, N),
RAJA::expt::dynamic_forall<policy_list>(pol, RAJA::RangeSegment(0, N),
[=] RAJA_HOST_DEVICE(int i)
{ c[i] = a[i] + b[i]; });
// _rajaseq_vector_add_end
Expand Down
60 changes: 27 additions & 33 deletions examples/dynamic_mat_transpose.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -355,58 +355,52 @@ int main(int argc, char* argv[])
[=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx)
{
RAJA::loop<outer1>(
ctx,
RAJA::RangeSegment(0, outer_Dimr),
ctx, RAJA::RangeSegment(0, outer_Dimr),
[&](int by)
{
RAJA::loop<outer0>(
ctx,
RAJA::RangeSegment(0, outer_Dimc),
ctx, RAJA::RangeSegment(0, outer_Dimc),
[&](int bx)
{
// Request memory from shared memory pool
int* tile_ptr =
ctx.getSharedMemory<int>(TILE_DIM * TILE_DIM);

// Use RAJA View for simplified indexing
RAJA::View<int, RAJA::Layout<2>> Tile(
tile_ptr, TILE_DIM, TILE_DIM);

RAJA::loop<inner1>(ctx,
RAJA::RangeSegment(0, TILE_DIM),
[&](int ty)
{
RAJA::loop<inner0>(
ctx,
RAJA::RangeSegment(0, TILE_DIM),
[&](int tx)
{
int col =
bx * TILE_DIM +
tx; // Matrix column index
int row = by * TILE_DIM +
ty; // Matrix row index

// Bounds check
if (row < N_r && col < N_c)
{
Tile(ty, tx) = Aview(row, col);
}
});
});
RAJA::View<int, RAJA::Layout<2>> Tile(tile_ptr, TILE_DIM,
TILE_DIM);

RAJA::loop<inner1>(
ctx, RAJA::RangeSegment(0, TILE_DIM),
[&](int ty)
{
RAJA::loop<inner0>(
ctx, RAJA::RangeSegment(0, TILE_DIM),
[&](int tx)
{
int col =
bx * TILE_DIM + tx; // Matrix column index
int row =
by * TILE_DIM + ty; // Matrix row index

// Bounds check
if (row < N_r && col < N_c)
{
Tile(ty, tx) = Aview(row, col);
}
});
});

// Barrier is needed to ensure all threads have written to
// Tile
ctx.teamSync();

RAJA::loop<inner1>(
ctx,
RAJA::RangeSegment(0, TILE_DIM),
ctx, RAJA::RangeSegment(0, TILE_DIM),
[&](int ty)
{
RAJA::loop<inner0>(
ctx,
RAJA::RangeSegment(0, TILE_DIM),
ctx, RAJA::RangeSegment(0, TILE_DIM),
[&](int tx)
{
int col =
Expand Down
71 changes: 18 additions & 53 deletions examples/forall-param-reductions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,20 +118,14 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[]))
VALLOC_INT seq_maxloc(std::numeric_limits<int>::min(), -1);

RAJA::forall<EXEC_POL1>(
host_res,
arange,
RAJA::expt::Reduce<RAJA::operators::plus>(&seq_sum),
host_res, arange, RAJA::expt::Reduce<RAJA::operators::plus>(&seq_sum),
RAJA::expt::Reduce<RAJA::operators::minimum>(&seq_min),
RAJA::expt::Reduce<RAJA::operators::maximum>(&seq_max),
RAJA::expt::Reduce<RAJA::operators::minimum>(&seq_minloc),
RAJA::expt::Reduce<RAJA::operators::maximum>(&seq_maxloc),
RAJA::expt::KernelName("RAJA Reduce Seq Kernel"),
[=](int i,
int& _seq_sum,
int& _seq_min,
int& _seq_max,
VALLOC_INT& _seq_minloc,
VALLOC_INT& _seq_maxloc)
[=](int i, int& _seq_sum, int& _seq_min, int& _seq_max,
VALLOC_INT& _seq_minloc, VALLOC_INT& _seq_maxloc)
{
_seq_sum += a[i];

Expand Down Expand Up @@ -173,20 +167,14 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[]))
VALLOC_INT omp_maxloc(std::numeric_limits<int>::min(), -1);

RAJA::forall<EXEC_POL2>(
host_res,
arange,
RAJA::expt::Reduce<RAJA::operators::plus>(&omp_sum),
host_res, arange, RAJA::expt::Reduce<RAJA::operators::plus>(&omp_sum),
RAJA::expt::Reduce<RAJA::operators::minimum>(&omp_min),
RAJA::expt::Reduce<RAJA::operators::maximum>(&omp_max),
RAJA::expt::Reduce<RAJA::operators::minimum>(&omp_minloc),
RAJA::expt::Reduce<RAJA::operators::maximum>(&omp_maxloc),
RAJA::expt::KernelName("RAJA Reduce OpenMP Kernel"),
[=](int i,
int& _omp_sum,
int& _omp_min,
int& _omp_max,
VALLOC_INT& _omp_minloc,
VALLOC_INT& _omp_maxloc)
[=](int i, int& _omp_sum, int& _omp_min, int& _omp_max,
VALLOC_INT& _omp_minloc, VALLOC_INT& _omp_maxloc)
{
_omp_sum += a[i];

Expand Down Expand Up @@ -227,20 +215,14 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[]))
VALLOC_INT omp_t_maxloc(std::numeric_limits<int>::min(), -1);

RAJA::forall<EXEC_POL3>(
omp_res,
arange,
RAJA::expt::Reduce<RAJA::operators::plus>(&omp_t_sum),
omp_res, arange, RAJA::expt::Reduce<RAJA::operators::plus>(&omp_t_sum),
RAJA::expt::Reduce<RAJA::operators::minimum>(&omp_t_min),
RAJA::expt::Reduce<RAJA::operators::maximum>(&omp_t_max),
RAJA::expt::Reduce<RAJA::operators::minimum>(&omp_t_minloc),
RAJA::expt::Reduce<RAJA::operators::maximum>(&omp_t_maxloc),
RAJA::expt::KernelName("RAJA Reduce Target OpenMP Kernel"),
[=](int i,
int& _omp_t_sum,
int& _omp_t_min,
int& _omp_t_max,
VALLOC_INT& _omp_t_minloc,
VALLOC_INT& _omp_t_maxloc)
[=](int i, int& _omp_t_sum, int& _omp_t_min, int& _omp_t_max,
VALLOC_INT& _omp_t_minloc, VALLOC_INT& _omp_t_maxloc)
{
_omp_t_sum += a[i];

Expand Down Expand Up @@ -285,20 +267,14 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[]))
VALLOC_INT cuda_maxloc(std::numeric_limits<int>::min(), -1);

RAJA::forall<EXEC_POL3>(
cuda_res,
arange,
RAJA::expt::Reduce<RAJA::operators::plus>(&cuda_sum),
cuda_res, arange, RAJA::expt::Reduce<RAJA::operators::plus>(&cuda_sum),
RAJA::expt::Reduce<RAJA::operators::minimum>(&cuda_min),
RAJA::expt::Reduce<RAJA::operators::maximum>(&cuda_max),
RAJA::expt::Reduce<RAJA::operators::minimum>(&cuda_minloc),
RAJA::expt::Reduce<RAJA::operators::maximum>(&cuda_maxloc),
RAJA::expt::KernelName("RAJA Reduce CUDA Kernel"),
[=] RAJA_DEVICE(int i,
int& _cuda_sum,
int& _cuda_min,
int& _cuda_max,
VALLOC_INT& _cuda_minloc,
VALLOC_INT& _cuda_maxloc)
[=] RAJA_DEVICE(int i, int& _cuda_sum, int& _cuda_min, int& _cuda_max,
VALLOC_INT& _cuda_minloc, VALLOC_INT& _cuda_maxloc)
{
_cuda_sum += d_a[i];

Expand Down Expand Up @@ -342,19 +318,14 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[]))
VALLOC_INT hip_maxloc(std::numeric_limits<int>::min(), -1);

RAJA::forall<EXEC_POL3>(
arange,
RAJA::expt::Reduce<RAJA::operators::plus>(&hip_sum),
arange, RAJA::expt::Reduce<RAJA::operators::plus>(&hip_sum),
RAJA::expt::Reduce<RAJA::operators::minimum>(&hip_min),
RAJA::expt::Reduce<RAJA::operators::maximum>(&hip_max),
RAJA::expt::Reduce<RAJA::operators::minimum>(&hip_minloc),
RAJA::expt::Reduce<RAJA::operators::maximum>(&hip_maxloc),
RAJA::expt::KernelName("RAJA Reduce HIP Kernel"),
[=] RAJA_DEVICE(int i,
int& _hip_sum,
int& _hip_min,
int& _hip_max,
VALLOC_INT& _hip_minloc,
VALLOC_INT& _hip_maxloc)
[=] RAJA_DEVICE(int i, int& _hip_sum, int& _hip_min, int& _hip_max,
VALLOC_INT& _hip_minloc, VALLOC_INT& _hip_maxloc)
{
_hip_sum += d_a[i];

Expand Down Expand Up @@ -399,20 +370,14 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[]))
VALLOC_INT sycl_maxloc(std::numeric_limits<int>::min(), -1);

RAJA::forall<EXEC_POL3>(
sycl_res,
arange,
RAJA::expt::Reduce<RAJA::operators::plus>(&sycl_sum),
sycl_res, arange, RAJA::expt::Reduce<RAJA::operators::plus>(&sycl_sum),
RAJA::expt::Reduce<RAJA::operators::minimum>(&sycl_min),
RAJA::expt::Reduce<RAJA::operators::maximum>(&sycl_max),
RAJA::expt::Reduce<RAJA::operators::minimum>(&sycl_minloc),
RAJA::expt::Reduce<RAJA::operators::maximum>(&sycl_maxloc),
RAJA::expt::KernelName("RAJA Reduce SYCL Kernel"),
[=] RAJA_DEVICE(int i,
int& _sycl_sum,
int& _sycl_min,
int& _sycl_max,
VALLOC_INT& _sycl_minloc,
VALLOC_INT& _sycl_maxloc)
[=] RAJA_DEVICE(int i, int& _sycl_sum, int& _sycl_min, int& _sycl_max,
VALLOC_INT& _sycl_minloc, VALLOC_INT& _sycl_maxloc)
{
_sycl_sum += d_a[i];

Expand Down
4 changes: 2 additions & 2 deletions examples/forall_multi-reductions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,8 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv))
res.memcpy(bins, host_bins, N * sizeof(int));
res.memcpy(a, host_a, N * sizeof(int));

example_code<exec_policy, multi_reduce_policy>(
arange, num_bins, bins, a);
example_code<exec_policy, multi_reduce_policy>(arange, num_bins, bins,
a);

res.deallocate(bins);
res.deallocate(a);
Expand Down
37 changes: 11 additions & 26 deletions examples/jacobi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -191,8 +191,7 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[]))
RAJA::RangeSegment jacobiRange(1, (N + 1));

using jacobiSeqNestedPolicy = RAJA::KernelPolicy<RAJA::statement::For<
1,
RAJA::seq_exec,
1, RAJA::seq_exec,
RAJA::statement::For<0, RAJA::seq_exec, RAJA::statement::Lambda<0>>>>;

printf("RAJA: Sequential Policy - Nested ForallN \n");
Expand Down Expand Up @@ -267,8 +266,7 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[]))
*/

using jacobiOmpNestedPolicy = RAJA::KernelPolicy<RAJA::statement::For<
1,
RAJA::omp_parallel_for_exec,
1, RAJA::omp_parallel_for_exec,
RAJA::statement::For<0, RAJA::seq_exec, RAJA::statement::Lambda<0>>>>;

while (resI2 > tol * tol)
Expand Down Expand Up @@ -329,18 +327,12 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[]))

using jacobiCUDANestedPolicy =
RAJA::KernelPolicy<RAJA::statement::CudaKernel<RAJA::statement::Tile<
1,
RAJA::tile_fixed<32>,
RAJA::cuda_block_y_loop,
1, RAJA::tile_fixed<32>, RAJA::cuda_block_y_loop,
RAJA::statement::Tile<
0,
RAJA::tile_fixed<32>,
RAJA::cuda_block_x_loop,
0, RAJA::tile_fixed<32>, RAJA::cuda_block_x_loop,
RAJA::statement::For<
1,
RAJA::cuda_thread_y_direct,
RAJA::statement::For<0,
RAJA::cuda_thread_x_direct,
1, RAJA::cuda_thread_y_direct,
RAJA::statement::For<0, RAJA::cuda_thread_x_direct,
RAJA::statement::Lambda<0>>>>>>>;

resI2 = 1;
Expand Down Expand Up @@ -411,18 +403,12 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[]))

using jacobiHIPNestedPolicy =
RAJA::KernelPolicy<RAJA::statement::HipKernel<RAJA::statement::Tile<
1,
RAJA::tile_fixed<32>,
RAJA::hip_block_y_loop,
1, RAJA::tile_fixed<32>, RAJA::hip_block_y_loop,
RAJA::statement::Tile<
0,
RAJA::tile_fixed<32>,
RAJA::hip_block_x_loop,
0, RAJA::tile_fixed<32>, RAJA::hip_block_x_loop,
RAJA::statement::For<
1,
RAJA::hip_thread_y_direct,
RAJA::statement::For<0,
RAJA::hip_thread_x_direct,
1, RAJA::hip_thread_y_direct,
RAJA::statement::For<0, RAJA::hip_thread_x_direct,
RAJA::statement::Lambda<0>>>>>>>;

resI2 = 1;
Expand Down Expand Up @@ -512,8 +498,7 @@ void computeErr(double* I, grid_s grid)
RAJA::ReduceMax<RAJA::seq_reduce, double> tMax(-1.0);

using jacobiSeqNestedPolicy = RAJA::KernelPolicy<RAJA::statement::For<
1,
RAJA::seq_exec,
1, RAJA::seq_exec,
RAJA::statement::For<0, RAJA::seq_exec, RAJA::statement::Lambda<0>>>>;

RAJA::kernel<jacobiSeqNestedPolicy>(
Expand Down
11 changes: 3 additions & 8 deletions examples/kernel-dynamic-tile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,10 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[]))
using namespace RAJA;

kernel_param<KernelPolicy<statement::Tile<
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is an example of making nested templates hard to read. I think the original format is much easier to read. @artv3 do you agree?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree

1,
tile_dynamic<1>,
seq_exec,
1, tile_dynamic<1>, seq_exec,
statement::Tile<
0,
tile_dynamic<0>,
seq_exec,
statement::For<1,
seq_exec,
0, tile_dynamic<0>, seq_exec,
statement::For<1, seq_exec,
statement::For<0, seq_exec, statement::Lambda<0>>>>>>>(
johnbowen42 marked this conversation as resolved.
Show resolved Hide resolved
make_tuple(RangeSegment{0, 25}, RangeSegment{0, 25}),
make_tuple(TileSize{5}, TileSize{10}),
Expand Down
Loading
Loading