LLNL · johnbowen42 · Sep 5, 2024 · Sep 5, 2024 · Sep 5, 2024 · Sep 5, 2024
diff --git a/.clang-format b/.clang-format
@@ -1,6 +1,7 @@
 BasedOnStyle : LLVM
 # Indent formatting
 IndentWidth : 2
+Language: Cpp
 UseTab: Never
 KeepEmptyLinesAtTheStartOfBlocks : true
 MaxEmptyLinesToKeep : 2
@@ -40,9 +41,10 @@ PointerAlignment: Left
 AllowShortIfStatementsOnASingleLine : true
 AllowShortFunctionsOnASingleLine : true
 AllowShortLoopsOnASingleLine : false
+AllowAllArgumentsOnNextLine : false
 AllowAllParametersOfDeclarationOnNextLine : false
 AlignTrailingComments : true
-BinPackArguments : false
+BinPackArguments : true
 BinPackParameters : false
 ConstructorInitializerAllOnOneLineOrOnePerLine : true
 ColumnLimit : 80

diff --git a/examples/dynamic-forall.cpp b/examples/dynamic-forall.cpp
@@ -100,8 +100,7 @@ int main(int argc, char* argv[])
   //----------------------------------------------------------------------------//
 
   // policy is chosen from the list
-  RAJA::expt::dynamic_forall<policy_list>(pol,
-                                          RAJA::RangeSegment(0, N),
+  RAJA::expt::dynamic_forall<policy_list>(pol, RAJA::RangeSegment(0, N),
                                           [=] RAJA_HOST_DEVICE(int i)
                                           { c[i] = a[i] + b[i]; });
   // _rajaseq_vector_add_end

diff --git a/examples/dynamic_mat_transpose.cpp b/examples/dynamic_mat_transpose.cpp
@@ -355,58 +355,52 @@ int main(int argc, char* argv[])
       [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx)
       {
         RAJA::loop<outer1>(
-            ctx,
-            RAJA::RangeSegment(0, outer_Dimr),
+            ctx, RAJA::RangeSegment(0, outer_Dimr),
             [&](int by)
             {
               RAJA::loop<outer0>(
-                  ctx,
-                  RAJA::RangeSegment(0, outer_Dimc),
+                  ctx, RAJA::RangeSegment(0, outer_Dimc),
                   [&](int bx)
                   {
                     // Request memory from shared memory pool
                     int* tile_ptr =
                         ctx.getSharedMemory<int>(TILE_DIM * TILE_DIM);
 
                     // Use RAJA View for simplified indexing
-                    RAJA::View<int, RAJA::Layout<2>> Tile(
-                        tile_ptr, TILE_DIM, TILE_DIM);
-
-                    RAJA::loop<inner1>(ctx,
-                                       RAJA::RangeSegment(0, TILE_DIM),
-                                       [&](int ty)
-                                       {
-                                         RAJA::loop<inner0>(
-                                             ctx,
-                                             RAJA::RangeSegment(0, TILE_DIM),
-                                             [&](int tx)
-                                             {
-                                               int col =
-                                                   bx * TILE_DIM +
-                                                   tx; // Matrix column index
-                                               int row = by * TILE_DIM +
-                                                         ty; // Matrix row index
-
-                                               // Bounds check
-                                               if (row < N_r && col < N_c)
-                                               {
-                                                 Tile(ty, tx) = Aview(row, col);
-                                               }
-                                             });
-                                       });
+                    RAJA::View<int, RAJA::Layout<2>> Tile(tile_ptr, TILE_DIM,
+                                                          TILE_DIM);
+
+                    RAJA::loop<inner1>(
+                        ctx, RAJA::RangeSegment(0, TILE_DIM),
+                        [&](int ty)
+                        {
+                          RAJA::loop<inner0>(
+                              ctx, RAJA::RangeSegment(0, TILE_DIM),
+                              [&](int tx)
+                              {
+                                int col =
+                                    bx * TILE_DIM + tx; // Matrix column index
+                                int row =
+                                    by * TILE_DIM + ty; // Matrix row index
+
+                                // Bounds check
+                                if (row < N_r && col < N_c)
+                                {
+                                  Tile(ty, tx) = Aview(row, col);
+                                }
+                              });
+                        });
 
                     // Barrier is needed to ensure all threads have written to
                     // Tile
                     ctx.teamSync();
 
                     RAJA::loop<inner1>(
-                        ctx,
-                        RAJA::RangeSegment(0, TILE_DIM),
+                        ctx, RAJA::RangeSegment(0, TILE_DIM),
                         [&](int ty)
                         {
                           RAJA::loop<inner0>(
-                              ctx,
-                              RAJA::RangeSegment(0, TILE_DIM),
+                              ctx, RAJA::RangeSegment(0, TILE_DIM),
                               [&](int tx)
                               {
                                 int col =

diff --git a/examples/forall-param-reductions.cpp b/examples/forall-param-reductions.cpp
@@ -118,20 +118,14 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[]))
   VALLOC_INT seq_maxloc(std::numeric_limits<int>::min(), -1);
 
   RAJA::forall<EXEC_POL1>(
-      host_res,
-      arange,
-      RAJA::expt::Reduce<RAJA::operators::plus>(&seq_sum),
+      host_res, arange, RAJA::expt::Reduce<RAJA::operators::plus>(&seq_sum),
       RAJA::expt::Reduce<RAJA::operators::minimum>(&seq_min),
       RAJA::expt::Reduce<RAJA::operators::maximum>(&seq_max),
       RAJA::expt::Reduce<RAJA::operators::minimum>(&seq_minloc),
       RAJA::expt::Reduce<RAJA::operators::maximum>(&seq_maxloc),
       RAJA::expt::KernelName("RAJA Reduce Seq Kernel"),
-      [=](int         i,
-          int&        _seq_sum,
-          int&        _seq_min,
-          int&        _seq_max,
-          VALLOC_INT& _seq_minloc,
-          VALLOC_INT& _seq_maxloc)
+      [=](int i, int& _seq_sum, int& _seq_min, int& _seq_max,
+          VALLOC_INT& _seq_minloc, VALLOC_INT& _seq_maxloc)
       {
         _seq_sum += a[i];
 
@@ -173,20 +167,14 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[]))
   VALLOC_INT omp_maxloc(std::numeric_limits<int>::min(), -1);
 
   RAJA::forall<EXEC_POL2>(
-      host_res,
-      arange,
-      RAJA::expt::Reduce<RAJA::operators::plus>(&omp_sum),
+      host_res, arange, RAJA::expt::Reduce<RAJA::operators::plus>(&omp_sum),
       RAJA::expt::Reduce<RAJA::operators::minimum>(&omp_min),
       RAJA::expt::Reduce<RAJA::operators::maximum>(&omp_max),
       RAJA::expt::Reduce<RAJA::operators::minimum>(&omp_minloc),
       RAJA::expt::Reduce<RAJA::operators::maximum>(&omp_maxloc),
       RAJA::expt::KernelName("RAJA Reduce OpenMP Kernel"),
-      [=](int         i,
-          int&        _omp_sum,
-          int&        _omp_min,
-          int&        _omp_max,
-          VALLOC_INT& _omp_minloc,
-          VALLOC_INT& _omp_maxloc)
+      [=](int i, int& _omp_sum, int& _omp_min, int& _omp_max,
+          VALLOC_INT& _omp_minloc, VALLOC_INT& _omp_maxloc)
       {
         _omp_sum += a[i];
 
@@ -227,20 +215,14 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[]))
   VALLOC_INT omp_t_maxloc(std::numeric_limits<int>::min(), -1);
 
   RAJA::forall<EXEC_POL3>(
-      omp_res,
-      arange,
-      RAJA::expt::Reduce<RAJA::operators::plus>(&omp_t_sum),
+      omp_res, arange, RAJA::expt::Reduce<RAJA::operators::plus>(&omp_t_sum),
       RAJA::expt::Reduce<RAJA::operators::minimum>(&omp_t_min),
       RAJA::expt::Reduce<RAJA::operators::maximum>(&omp_t_max),
       RAJA::expt::Reduce<RAJA::operators::minimum>(&omp_t_minloc),
       RAJA::expt::Reduce<RAJA::operators::maximum>(&omp_t_maxloc),
       RAJA::expt::KernelName("RAJA Reduce Target OpenMP Kernel"),
-      [=](int         i,
-          int&        _omp_t_sum,
-          int&        _omp_t_min,
-          int&        _omp_t_max,
-          VALLOC_INT& _omp_t_minloc,
-          VALLOC_INT& _omp_t_maxloc)
+      [=](int i, int& _omp_t_sum, int& _omp_t_min, int& _omp_t_max,
+          VALLOC_INT& _omp_t_minloc, VALLOC_INT& _omp_t_maxloc)
       {
         _omp_t_sum += a[i];
 
@@ -285,20 +267,14 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[]))
   VALLOC_INT cuda_maxloc(std::numeric_limits<int>::min(), -1);
 
   RAJA::forall<EXEC_POL3>(
-      cuda_res,
-      arange,
-      RAJA::expt::Reduce<RAJA::operators::plus>(&cuda_sum),
+      cuda_res, arange, RAJA::expt::Reduce<RAJA::operators::plus>(&cuda_sum),
       RAJA::expt::Reduce<RAJA::operators::minimum>(&cuda_min),
       RAJA::expt::Reduce<RAJA::operators::maximum>(&cuda_max),
       RAJA::expt::Reduce<RAJA::operators::minimum>(&cuda_minloc),
       RAJA::expt::Reduce<RAJA::operators::maximum>(&cuda_maxloc),
       RAJA::expt::KernelName("RAJA Reduce CUDA Kernel"),
-      [=] RAJA_DEVICE(int         i,
-                      int&        _cuda_sum,
-                      int&        _cuda_min,
-                      int&        _cuda_max,
-                      VALLOC_INT& _cuda_minloc,
-                      VALLOC_INT& _cuda_maxloc)
+      [=] RAJA_DEVICE(int i, int& _cuda_sum, int& _cuda_min, int& _cuda_max,
+                      VALLOC_INT& _cuda_minloc, VALLOC_INT& _cuda_maxloc)
       {
         _cuda_sum += d_a[i];
 
@@ -342,19 +318,14 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[]))
   VALLOC_INT hip_maxloc(std::numeric_limits<int>::min(), -1);
 
   RAJA::forall<EXEC_POL3>(
-      arange,
-      RAJA::expt::Reduce<RAJA::operators::plus>(&hip_sum),
+      arange, RAJA::expt::Reduce<RAJA::operators::plus>(&hip_sum),
       RAJA::expt::Reduce<RAJA::operators::minimum>(&hip_min),
       RAJA::expt::Reduce<RAJA::operators::maximum>(&hip_max),
       RAJA::expt::Reduce<RAJA::operators::minimum>(&hip_minloc),
       RAJA::expt::Reduce<RAJA::operators::maximum>(&hip_maxloc),
       RAJA::expt::KernelName("RAJA Reduce HIP Kernel"),
-      [=] RAJA_DEVICE(int         i,
-                      int&        _hip_sum,
-                      int&        _hip_min,
-                      int&        _hip_max,
-                      VALLOC_INT& _hip_minloc,
-                      VALLOC_INT& _hip_maxloc)
+      [=] RAJA_DEVICE(int i, int& _hip_sum, int& _hip_min, int& _hip_max,
+                      VALLOC_INT& _hip_minloc, VALLOC_INT& _hip_maxloc)
       {
         _hip_sum += d_a[i];
 
@@ -399,20 +370,14 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[]))
   VALLOC_INT sycl_maxloc(std::numeric_limits<int>::min(), -1);
 
   RAJA::forall<EXEC_POL3>(
-      sycl_res,
-      arange,
-      RAJA::expt::Reduce<RAJA::operators::plus>(&sycl_sum),
+      sycl_res, arange, RAJA::expt::Reduce<RAJA::operators::plus>(&sycl_sum),
       RAJA::expt::Reduce<RAJA::operators::minimum>(&sycl_min),
       RAJA::expt::Reduce<RAJA::operators::maximum>(&sycl_max),
       RAJA::expt::Reduce<RAJA::operators::minimum>(&sycl_minloc),
       RAJA::expt::Reduce<RAJA::operators::maximum>(&sycl_maxloc),
       RAJA::expt::KernelName("RAJA Reduce SYCL Kernel"),
-      [=] RAJA_DEVICE(int         i,
-                      int&        _sycl_sum,
-                      int&        _sycl_min,
-                      int&        _sycl_max,
-                      VALLOC_INT& _sycl_minloc,
-                      VALLOC_INT& _sycl_maxloc)
+      [=] RAJA_DEVICE(int i, int& _sycl_sum, int& _sycl_min, int& _sycl_max,
+                      VALLOC_INT& _sycl_minloc, VALLOC_INT& _sycl_maxloc)
       {
         _sycl_sum += d_a[i];
 

diff --git a/examples/forall_multi-reductions.cpp b/examples/forall_multi-reductions.cpp
@@ -154,8 +154,8 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv))
         res.memcpy(bins, host_bins, N * sizeof(int));
         res.memcpy(a, host_a, N * sizeof(int));
 
-        example_code<exec_policy, multi_reduce_policy>(
-            arange, num_bins, bins, a);
+        example_code<exec_policy, multi_reduce_policy>(arange, num_bins, bins,
+                                                       a);
 
         res.deallocate(bins);
         res.deallocate(a);

diff --git a/examples/jacobi.cpp b/examples/jacobi.cpp
@@ -191,8 +191,7 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[]))
   RAJA::RangeSegment jacobiRange(1, (N + 1));
 
   using jacobiSeqNestedPolicy = RAJA::KernelPolicy<RAJA::statement::For<
-      1,
-      RAJA::seq_exec,
+      1, RAJA::seq_exec,
       RAJA::statement::For<0, RAJA::seq_exec, RAJA::statement::Lambda<0>>>>;
 
   printf("RAJA: Sequential Policy - Nested ForallN \n");
@@ -267,8 +266,7 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[]))
    */
 
   using jacobiOmpNestedPolicy = RAJA::KernelPolicy<RAJA::statement::For<
-      1,
-      RAJA::omp_parallel_for_exec,
+      1, RAJA::omp_parallel_for_exec,
       RAJA::statement::For<0, RAJA::seq_exec, RAJA::statement::Lambda<0>>>>;
 
   while (resI2 > tol * tol)
@@ -329,18 +327,12 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[]))
 
   using jacobiCUDANestedPolicy =
       RAJA::KernelPolicy<RAJA::statement::CudaKernel<RAJA::statement::Tile<
-          1,
-          RAJA::tile_fixed<32>,
-          RAJA::cuda_block_y_loop,
+          1, RAJA::tile_fixed<32>, RAJA::cuda_block_y_loop,
           RAJA::statement::Tile<
-              0,
-              RAJA::tile_fixed<32>,
-              RAJA::cuda_block_x_loop,
+              0, RAJA::tile_fixed<32>, RAJA::cuda_block_x_loop,
               RAJA::statement::For<
-                  1,
-                  RAJA::cuda_thread_y_direct,
-                  RAJA::statement::For<0,
-                                       RAJA::cuda_thread_x_direct,
+                  1, RAJA::cuda_thread_y_direct,
+                  RAJA::statement::For<0, RAJA::cuda_thread_x_direct,
                                        RAJA::statement::Lambda<0>>>>>>>;
 
   resI2     = 1;
@@ -411,18 +403,12 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[]))
 
   using jacobiHIPNestedPolicy =
       RAJA::KernelPolicy<RAJA::statement::HipKernel<RAJA::statement::Tile<
-          1,
-          RAJA::tile_fixed<32>,
-          RAJA::hip_block_y_loop,
+          1, RAJA::tile_fixed<32>, RAJA::hip_block_y_loop,
           RAJA::statement::Tile<
-              0,
-              RAJA::tile_fixed<32>,
-              RAJA::hip_block_x_loop,
+              0, RAJA::tile_fixed<32>, RAJA::hip_block_x_loop,
               RAJA::statement::For<
-                  1,
-                  RAJA::hip_thread_y_direct,
-                  RAJA::statement::For<0,
-                                       RAJA::hip_thread_x_direct,
+                  1, RAJA::hip_thread_y_direct,
+                  RAJA::statement::For<0, RAJA::hip_thread_x_direct,
                                        RAJA::statement::Lambda<0>>>>>>>;
 
   resI2     = 1;
@@ -512,8 +498,7 @@ void computeErr(double* I, grid_s grid)
   RAJA::ReduceMax<RAJA::seq_reduce, double> tMax(-1.0);
 
   using jacobiSeqNestedPolicy = RAJA::KernelPolicy<RAJA::statement::For<
-      1,
-      RAJA::seq_exec,
+      1, RAJA::seq_exec,
       RAJA::statement::For<0, RAJA::seq_exec, RAJA::statement::Lambda<0>>>>;
 
   RAJA::kernel<jacobiSeqNestedPolicy>(

diff --git a/examples/kernel-dynamic-tile.cpp b/examples/kernel-dynamic-tile.cpp
@@ -15,15 +15,10 @@ int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[]))
   using namespace RAJA;
 
   kernel_param<KernelPolicy<statement::Tile<
-      1,
-      tile_dynamic<1>,
-      seq_exec,
+      1, tile_dynamic<1>, seq_exec,
       statement::Tile<
-          0,
-          tile_dynamic<0>,
-          seq_exec,
-          statement::For<1,
-                         seq_exec,
+          0, tile_dynamic<0>, seq_exec,
+          statement::For<1, seq_exec,
                          statement::For<0, seq_exec, statement::Lambda<0>>>>>>>(
       make_tuple(RangeSegment{0, 25}, RangeSegment{0, 25}),
       make_tuple(TileSize{5}, TileSize{10}),