diff --git a/.clang-format b/.clang-format
index b6fa54b233..b50c1facfb 100644
--- a/.clang-format
+++ b/.clang-format
@@ -31,7 +31,7 @@ BraceWrapping:
   AfterExternBlock: false
   BeforeCatch:     true
   BeforeElse:      true
-  BeforeLambdaBody: true 
+  BeforeLambdaBody: true
   IndentBraces:    false
   SplitEmptyFunction: false
   SplitEmptyRecord: false
@@ -40,12 +40,13 @@ BraceWrapping:
 # Pointer alignment
 DerivePointerAlignment: false
 PointerAlignment: Left
+
+# Single line config
 AllowShortIfStatementsOnASingleLine : true
 AllowShortFunctionsOnASingleLine : true
 AllowShortLoopsOnASingleLine : false
 AllowAllArgumentsOnNextLine : true
 AllowAllParametersOfDeclarationOnNextLine : false
-AlignTrailingComments : true
 BinPackArguments : true
 BinPackParameters : false
 ConstructorInitializerAllOnOneLineOrOnePerLine : true
diff --git a/cmake/RAJAMacros.cmake b/cmake/RAJAMacros.cmake
index 8a19001cc7..11c4661cc1 100644
--- a/cmake/RAJAMacros.cmake
+++ b/cmake/RAJAMacros.cmake
@@ -209,7 +209,7 @@ endmacro(raja_add_benchmark)
 ## raja_add_code_checks()
 ##
 ## Adds code checks for all source files recursively in the RAJA repository.
-## 
+##
 ## This creates the following parent build targets:
 ##  check - Runs a non file changing style check and CppCheck
 ##  style - In-place code formatting
@@ -233,7 +233,7 @@ macro(raja_add_code_checks)
   if ("${PROJECT_SOURCE_DIR}" STREQUAL "${CMAKE_SOURCE_DIR}")
       # Create file globbing expressions that only include directories that contain source
       # TODO(bowen) Add examples, exercises and benchmark to the list below
-      set(_base_dirs "RAJA" "examples" "exercises" "benchmark" "include" "src" "test")
+      set(_base_dirs "RAJA" "benchmark" "include" "src" "test")
       set(_ext_expressions "*.cpp" "*.hpp" "*.inl"
                            "*.cxx" "*.hxx" "*.cc" "*.c" "*.h" "*.hh")
 
@@ -248,15 +248,6 @@ macro(raja_add_code_checks)
       set(_sources)
       file(GLOB_RECURSE _sources ${_glob_expressions})
 
-      # Filter out exclusions
-      #set(_exclude_expressions
-      #    "${PROJECT_SOURCE_DIR}/axom/sidre/examples/lulesh2/*"
-      #    "${PROJECT_SOURCE_DIR}/axom/slam/examples/lulesh2.0.3/*"
-      #    "${PROJECT_SOURCE_DIR}/axom/slam/examples/tinyHydro/*")
-      #foreach(_exp ${_exclude_expressions})
-      #    list(FILTER _sources EXCLUDE REGEX ${_exp})
-      #endforeach()
-#
       blt_add_code_checks(PREFIX          RAJA
                           SOURCES         ${_sources}
                           CLANGFORMAT_CFG_FILE ${PROJECT_SOURCE_DIR}/.clang-format
@@ -265,7 +256,7 @@ macro(raja_add_code_checks)
       # Set FOLDER property for code check targets
       foreach(_suffix clangformat_check clangformat_style clang_tidy_check clang_tidy_style)
           set(_tgt ${arg_PREFIX}_${_suffix})
-          if(TARGET ${_tgt}) 
+          if(TARGET ${_tgt})
               set_target_properties(${_tgt} PROPERTIES FOLDER "RAJA/code_checks")
           endif()
       endforeach()
diff --git a/include/RAJA/pattern/launch/launch_core.hpp b/include/RAJA/pattern/launch/launch_core.hpp
index 8bb722a797..453dc75a1c 100644
--- a/include/RAJA/pattern/launch/launch_core.hpp
+++ b/include/RAJA/pattern/launch/launch_core.hpp
@@ -29,7 +29,7 @@
 #include "camp/tuple.hpp"
 
 // Odd dependecy with atomics is breaking CI builds
-//#include "RAJA/util/View.hpp"
+// #include "RAJA/util/View.hpp"
 
 #if defined(RAJA_GPU_DEVICE_COMPILE_PASS_ACTIVE) && !defined(RAJA_ENABLE_SYCL)
 #define RAJA_TEAM_SHARED __shared__
diff --git a/include/RAJA/pattern/tensor/TensorIndex.hpp b/include/RAJA/pattern/tensor/TensorIndex.hpp
index 8f152d92ce..c384465a15 100644
--- a/include/RAJA/pattern/tensor/TensorIndex.hpp
+++ b/include/RAJA/pattern/tensor/TensorIndex.hpp
@@ -29,196 +29,190 @@ namespace expt
 {
 
 
-  template<typename IDX, typename TENSOR_TYPE, camp::idx_t DIM, strip_index_type_t<IDX> INDEX_VALUE, strip_index_type_t<IDX> LENGTH_VALUE>
-  struct StaticTensorIndexInner;
-
-  template<typename INNER_TYPE>
-  struct StaticTensorIndex;
-
-
-  template<typename IDX, typename TENSOR_TYPE, camp::idx_t DIM>
-  class TensorIndex {
-    public:
-      using self_type = TensorIndex<IDX, TENSOR_TYPE, DIM>;
-      using value_type = strip_index_type_t<IDX>;
-      using index_type = IDX;
-      using tensor_type = TENSOR_TYPE;
-
-      RAJA_INLINE
-      RAJA_HOST_DEVICE
-      static
-      constexpr
-      self_type all(){
-        return self_type(index_type(-1), value_type(-1));
-      }
-
-      RAJA_INLINE
-      RAJA_HOST_DEVICE
-      static
-      constexpr
-      StaticTensorIndex<StaticTensorIndexInner<IDX,TENSOR_TYPE,DIM,value_type(-1),value_type(-1)>> static_all(){
-        return StaticTensorIndex<StaticTensorIndexInner<IDX,TENSOR_TYPE,DIM,value_type(-1),value_type(-1)>>();
-      }
-
-      RAJA_INLINE
-      RAJA_HOST_DEVICE
-      static
-      constexpr
-      self_type range(index_type begin, index_type end){
-        return self_type(begin, value_type(stripIndexType(end-begin)));
-      }
-
-      template<value_type TBEGIN, value_type TEND>
-      RAJA_INLINE
-      RAJA_HOST_DEVICE
-      static
-      constexpr
-      StaticTensorIndex<StaticTensorIndexInner<IDX,TENSOR_TYPE,DIM,TBEGIN,TEND-TBEGIN>> static_range(){
-        return StaticTensorIndex<StaticTensorIndexInner<IDX,TENSOR_TYPE,DIM,TBEGIN,TEND-TBEGIN>>();
-      }
-
-
-      RAJA_INLINE
-      RAJA_HOST_DEVICE
-      constexpr
-      TensorIndex() : m_index(index_type(0)), m_length(0) {}
-
-
-      RAJA_INLINE
-      RAJA_HOST_DEVICE
-      constexpr
-      TensorIndex(RAJA::TypedRangeSegment<IDX> const &seg) :
-      m_index(*seg.begin()), m_length(seg.size())
-      {}
-
-      RAJA_INLINE
-      RAJA_HOST_DEVICE
-      constexpr
-      TensorIndex(index_type value, value_type length) : m_index(value), m_length(length) {}
-
-      template<typename T, camp::idx_t D>
-      RAJA_INLINE
-      RAJA_HOST_DEVICE
-      constexpr
-      TensorIndex(TensorIndex<IDX, T, D> const &c) : m_index(*c), m_length(c.size()) {}
-
-
-      template<value_type IDX_VAL, value_type LEN_VAL>
-      RAJA_INLINE
-      RAJA_HOST_DEVICE
-      constexpr
-      TensorIndex(StaticTensorIndex<StaticTensorIndexInner<IDX, TENSOR_TYPE, DIM, IDX_VAL, LEN_VAL>> const RAJA_UNUSED_ARG(&c))
-          : m_index(IDX_VAL)
-          , m_length(LEN_VAL)
-      {}
-
-
-      RAJA_INLINE
-      RAJA_HOST_DEVICE
-      constexpr
-      index_type const &operator*() const {
-        return m_index;
-      }
-
-      // used in strip_by_value as a static cast
-      RAJA_INLINE
-      RAJA_HOST_DEVICE
-      constexpr
-      explicit operator index_type() const {
-        // return does not matter, but suppresses no-return warnings
-        return m_index;
-      }
-
-      RAJA_INLINE
-      RAJA_HOST_DEVICE
-      constexpr
-      index_type begin() const {
-        return m_index;
-      }
-
-      RAJA_INLINE
-      RAJA_HOST_DEVICE
-      constexpr
-      value_type size() const {
-        return m_length;
-      }
-
-      RAJA_INLINE
-      RAJA_HOST_DEVICE
-      constexpr
-      value_type dim() const {
-        return DIM;
-      }
-
-    private:
-      index_type m_index;
-      value_type m_length;
-  };
-
-
-  template<typename IDX, typename TENSOR_TYPE, camp::idx_t DIM, strip_index_type_t<IDX> INDEX_VALUE, strip_index_type_t<IDX> LENGTH_VALUE>
-  struct StaticTensorIndex<StaticTensorIndexInner<IDX,TENSOR_TYPE,DIM,INDEX_VALUE,LENGTH_VALUE>> {
-
-      using base_type  = TensorIndex<IDX,TENSOR_TYPE,DIM>;
-      using value_type = strip_index_type_t<IDX>;
-      using index_type = IDX;
-      using tensor_type = TENSOR_TYPE;
-
-      static const index_type s_index  = INDEX_VALUE;
-      static const index_type s_length = LENGTH_VALUE;
-
-      RAJA_INLINE
-      RAJA_HOST_DEVICE
-      constexpr operator base_type() {
-        return base_type(s_index,s_length);
-      }
-    
-  };
-
-
-
-  /*!
-   * Index that specifies the starting element index of a Vector
-   */
-  template<typename IDX, typename VECTOR_TYPE>
-  using VectorIndex =  TensorIndex<IDX, VECTOR_TYPE, 0>;
-
-  /*!
-   * Index that specifies the starting Row index of a matrix
-   */
-  template<typename IDX, typename MATRIX_TYPE>
-  using RowIndex =  TensorIndex<IDX, MATRIX_TYPE, 0>;
-
-  /*!
-   * Index that specifies the starting Column index of a matrix
-   */
-  template<typename IDX, typename MATRIX_TYPE>
-  using ColIndex =  TensorIndex<IDX, MATRIX_TYPE, 1>;
-
-
-  /*!
-   * Converts a Row index to a Column index
-   */
-  template<typename IDX, typename MATRIX_TYPE>
+template <typename IDX,
+          typename TENSOR_TYPE,
+          camp::idx_t DIM,
+          strip_index_type_t<IDX> INDEX_VALUE,
+          strip_index_type_t<IDX> LENGTH_VALUE>
+struct StaticTensorIndexInner;
+
+template <typename INNER_TYPE>
+struct StaticTensorIndex;
+
+
+template <typename IDX, typename TENSOR_TYPE, camp::idx_t DIM>
+class TensorIndex
+{
+public:
+  using self_type   = TensorIndex<IDX, TENSOR_TYPE, DIM>;
+  using value_type  = strip_index_type_t<IDX>;
+  using index_type  = IDX;
+  using tensor_type = TENSOR_TYPE;
+
+  RAJA_INLINE
+  RAJA_HOST_DEVICE
+  static constexpr self_type all()
+  {
+    return self_type(index_type(-1), value_type(-1));
+  }
+
+  RAJA_INLINE
   RAJA_HOST_DEVICE
+  static constexpr StaticTensorIndex<StaticTensorIndexInner<IDX,
+                                                            TENSOR_TYPE,
+                                                            DIM,
+                                                            value_type(-1),
+                                                            value_type(-1)>>
+  static_all()
+  {
+    return StaticTensorIndex<StaticTensorIndexInner<
+        IDX, TENSOR_TYPE, DIM, value_type(-1), value_type(-1)>>();
+  }
+
   RAJA_INLINE
-  constexpr
-  ColIndex<IDX, MATRIX_TYPE> toColIndex(RowIndex<IDX, MATRIX_TYPE> const &r){
-    return ColIndex<IDX, MATRIX_TYPE>(*r, r.size());
+  RAJA_HOST_DEVICE
+  static constexpr self_type range(index_type begin, index_type end)
+  {
+    return self_type(begin, value_type(stripIndexType(end - begin)));
   }
 
-  /*!
-   * Converts a Column index to a Row index
-   */
-  template<typename IDX, typename MATRIX_TYPE>
+  template <value_type TBEGIN, value_type TEND>
+  RAJA_INLINE RAJA_HOST_DEVICE static constexpr StaticTensorIndex<
+      StaticTensorIndexInner<IDX, TENSOR_TYPE, DIM, TBEGIN, TEND - TBEGIN>>
+  static_range()
+  {
+    return StaticTensorIndex<
+        StaticTensorIndexInner<IDX, TENSOR_TYPE, DIM, TBEGIN, TEND - TBEGIN>>();
+  }
+
+
+  RAJA_INLINE
+  RAJA_HOST_DEVICE
+  constexpr TensorIndex() : m_index(index_type(0)), m_length(0) {}
+
+
+  RAJA_INLINE
+  RAJA_HOST_DEVICE
+  constexpr TensorIndex(RAJA::TypedRangeSegment<IDX> const& seg)
+      : m_index(*seg.begin()), m_length(seg.size())
+  {}
+
+  RAJA_INLINE
+  RAJA_HOST_DEVICE
+  constexpr TensorIndex(index_type value, value_type length)
+      : m_index(value), m_length(length)
+  {}
+
+  template <typename T, camp::idx_t D>
+  RAJA_INLINE
+      RAJA_HOST_DEVICE constexpr TensorIndex(TensorIndex<IDX, T, D> const& c)
+      : m_index(*c), m_length(c.size())
+  {}
+
+
+  template <value_type IDX_VAL, value_type LEN_VAL>
+  RAJA_INLINE RAJA_HOST_DEVICE constexpr TensorIndex(
+      StaticTensorIndex<
+          StaticTensorIndexInner<IDX, TENSOR_TYPE, DIM, IDX_VAL, LEN_VAL>> const
+          RAJA_UNUSED_ARG(&c))
+      : m_index(IDX_VAL), m_length(LEN_VAL)
+  {}
+
+
+  RAJA_INLINE
   RAJA_HOST_DEVICE
+  constexpr index_type const& operator*() const { return m_index; }
+
+  // used in strip_by_value as a static cast
   RAJA_INLINE
-  constexpr
-  RowIndex<IDX, MATRIX_TYPE> toRowIndex(ColIndex<IDX, MATRIX_TYPE> const &c){
-    return RowIndex<IDX, MATRIX_TYPE>(*c, c.size());
+  RAJA_HOST_DEVICE
+  constexpr explicit operator index_type() const
+  {
+    // return does not matter, but suppresses no-return warnings
+    return m_index;
   }
 
-} // namespace expt
+  RAJA_INLINE
+  RAJA_HOST_DEVICE
+  constexpr index_type begin() const { return m_index; }
+
+  RAJA_INLINE
+  RAJA_HOST_DEVICE
+  constexpr value_type size() const { return m_length; }
+
+  RAJA_INLINE
+  RAJA_HOST_DEVICE
+  constexpr value_type dim() const { return DIM; }
+
+private:
+  index_type m_index;
+  value_type m_length;
+};
+
+
+template <typename IDX,
+          typename TENSOR_TYPE,
+          camp::idx_t DIM,
+          strip_index_type_t<IDX> INDEX_VALUE,
+          strip_index_type_t<IDX> LENGTH_VALUE>
+struct StaticTensorIndex<
+    StaticTensorIndexInner<IDX, TENSOR_TYPE, DIM, INDEX_VALUE, LENGTH_VALUE>>
+{
+
+  using base_type   = TensorIndex<IDX, TENSOR_TYPE, DIM>;
+  using value_type  = strip_index_type_t<IDX>;
+  using index_type  = IDX;
+  using tensor_type = TENSOR_TYPE;
+
+  static const index_type s_index  = INDEX_VALUE;
+  static const index_type s_length = LENGTH_VALUE;
+
+  RAJA_INLINE
+  RAJA_HOST_DEVICE
+  constexpr operator base_type() { return base_type(s_index, s_length); }
+};
+
+
+/*!
+ * Index that specifies the starting element index of a Vector
+ */
+template <typename IDX, typename VECTOR_TYPE>
+using VectorIndex = TensorIndex<IDX, VECTOR_TYPE, 0>;
+
+/*!
+ * Index that specifies the starting Row index of a matrix
+ */
+template <typename IDX, typename MATRIX_TYPE>
+using RowIndex = TensorIndex<IDX, MATRIX_TYPE, 0>;
+
+/*!
+ * Index that specifies the starting Column index of a matrix
+ */
+template <typename IDX, typename MATRIX_TYPE>
+using ColIndex = TensorIndex<IDX, MATRIX_TYPE, 1>;
+
+
+/*!
+ * Converts a Row index to a Column index
+ */
+template <typename IDX, typename MATRIX_TYPE>
+RAJA_HOST_DEVICE RAJA_INLINE constexpr ColIndex<IDX, MATRIX_TYPE>
+toColIndex(RowIndex<IDX, MATRIX_TYPE> const& r)
+{
+  return ColIndex<IDX, MATRIX_TYPE>(*r, r.size());
+}
+
+/*!
+ * Converts a Column index to a Row index
+ */
+template <typename IDX, typename MATRIX_TYPE>
+RAJA_HOST_DEVICE RAJA_INLINE constexpr RowIndex<IDX, MATRIX_TYPE>
+toRowIndex(ColIndex<IDX, MATRIX_TYPE> const& c)
+{
+  return RowIndex<IDX, MATRIX_TYPE>(*c, c.size());
+}
+
+}  // namespace expt
 }  // namespace RAJA
 
 #include "RAJA/pattern/tensor/internal/TensorIndexTraits.hpp"
diff --git a/include/RAJA/pattern/tensor/internal/ET/ExpressionTemplateBase.hpp b/include/RAJA/pattern/tensor/internal/ET/ExpressionTemplateBase.hpp
index 0c57f20067..50ae0933c0 100644
--- a/include/RAJA/pattern/tensor/internal/ET/ExpressionTemplateBase.hpp
+++ b/include/RAJA/pattern/tensor/internal/ET/ExpressionTemplateBase.hpp
@@ -28,7 +28,7 @@
 #include "RAJA/pattern/tensor/internal/ET/BinaryOperatorTraits.hpp"
 
 
-//#define RAJA_DEBUG_PRINT_ET_AST
+// #define RAJA_DEBUG_PRINT_ET_AST
 
 namespace RAJA
 {
diff --git a/include/RAJA/pattern/tensor/internal/ET/MultiplyOperator.hpp b/include/RAJA/pattern/tensor/internal/ET/MultiplyOperator.hpp
index c89f887ca5..6ea5d09aa9 100644
--- a/include/RAJA/pattern/tensor/internal/ET/MultiplyOperator.hpp
+++ b/include/RAJA/pattern/tensor/internal/ET/MultiplyOperator.hpp
@@ -1091,13 +1091,13 @@ struct MultiplyOperator<
    * Evaluate operands and perform element-wise multiply
    */
   template <typename TILE_TYPE>
-  RAJA_INLINE RAJA_HOST_DEVICE static block_literal
-  multiply(TILE_TYPE const& tile,
-           LEFT_OPERAND_TYPE const&,
-           RIGHT_OPERAND_TYPE const&)  //->
-                                       /// decltype(TensorMultiply<decltype(left.eval(tile)),
-                                       /// decltype(right.eval(tile))>(left.eval(tile),
-                                       /// right.eval(tile)))
+  RAJA_INLINE RAJA_HOST_DEVICE static block_literal multiply(
+      TILE_TYPE const& tile,
+      LEFT_OPERAND_TYPE const&,
+      RIGHT_OPERAND_TYPE const&)  //->
+                                  /// decltype(TensorMultiply<decltype(left.eval(tile)),
+                                  /// decltype(right.eval(tile))>(left.eval(tile),
+                                  /// right.eval(tile)))
   {
 
     /*
@@ -1126,15 +1126,16 @@ struct MultiplyOperator<
   }
 
   template <typename TILE_TYPE, typename ADD_TYPE>
-  RAJA_INLINE RAJA_HOST_DEVICE static block_literal
-  multiply_add(TILE_TYPE const& tile,
-               LEFT_OPERAND_TYPE const& left,
-               RIGHT_OPERAND_TYPE const& right,
-               ADD_TYPE const& add)  //->
-                                     // decltype(TensorMultiplyAdd<decltype(left.eval(tile)),
-                                     // decltype(right.eval(tile)),
-                                     // decltype(add.eval(tile))>(left.eval(tile),
-                                     // right.eval(tile), add.eval(tile)))
+  RAJA_INLINE RAJA_HOST_DEVICE static block_literal multiply_add(
+      TILE_TYPE const& tile,
+      LEFT_OPERAND_TYPE const& left,
+      RIGHT_OPERAND_TYPE const& right,
+      ADD_TYPE const&
+          add)  //->
+                // decltype(TensorMultiplyAdd<decltype(left.eval(tile)),
+                // decltype(right.eval(tile)),
+                // decltype(add.eval(tile))>(left.eval(tile),
+                // right.eval(tile), add.eval(tile)))
   {
     /*
      * First pass:  we want to return a BlockLiteral ET node with the
diff --git a/include/RAJA/pattern/tensor/internal/MatrixRegisterImpl.hpp b/include/RAJA/pattern/tensor/internal/MatrixRegisterImpl.hpp
index 2b87f1d34d..3134421735 100644
--- a/include/RAJA/pattern/tensor/internal/MatrixRegisterImpl.hpp
+++ b/include/RAJA/pattern/tensor/internal/MatrixRegisterImpl.hpp
@@ -24,7 +24,7 @@
 #include "RAJA/pattern/tensor/internal/MatrixMatrixMultiply.hpp"
 #include "RAJA/util/BitMask.hpp"
 
-//#define DEBUG_MATRIX_LOAD_STORE
+// #define DEBUG_MATRIX_LOAD_STORE
 
 
 namespace RAJA
diff --git a/include/RAJA/policy/atomic_builtin.hpp b/include/RAJA/policy/atomic_builtin.hpp
index e43bd71386..742aaa25b8 100644
--- a/include/RAJA/policy/atomic_builtin.hpp
+++ b/include/RAJA/policy/atomic_builtin.hpp
@@ -22,7 +22,8 @@
 
 #include <cstdint>
 
-#if defined(RAJA_COMPILER_MSVC) || ((defined(_WIN32) || defined(_WIN64)) && defined(__INTEL_COMPILER))
+#if defined(RAJA_COMPILER_MSVC) ||                                             \
+    ((defined(_WIN32) || defined(_WIN64)) && defined(__INTEL_COMPILER))
 #include <intrin.h>
 #endif
 
@@ -41,14 +42,16 @@ namespace RAJA
 
 
 //! Atomic policy that uses the compilers builtin __atomic_XXX routines
-struct builtin_atomic {
-};
+struct builtin_atomic
+{};
 
 
-namespace detail {
+namespace detail
+{
 
 
-#if defined(RAJA_COMPILER_MSVC) || ((defined(_WIN32) || defined(_WIN64)) && defined(__INTEL_COMPILER))
+#if defined(RAJA_COMPILER_MSVC) ||                                             \
+    ((defined(_WIN32) || defined(_WIN64)) && defined(__INTEL_COMPILER))
 
 
 /*!
@@ -56,12 +59,11 @@ namespace detail {
  * using an intrinsic
  */
 template <typename T>
-struct builtin_useIntrinsic {
+struct builtin_useIntrinsic
+{
   static constexpr bool value =
-    std::is_same<T, char>::value ||
-    std::is_same<T, short>::value ||
-    std::is_same<T, long>::value ||
-    std::is_same<T, long long>::value;
+      std::is_same<T, char>::value || std::is_same<T, short>::value ||
+      std::is_same<T, long>::value || std::is_same<T, long long>::value;
 };
 
 
@@ -70,18 +72,18 @@ struct builtin_useIntrinsic {
  * by reinterpreting inputs to types that intrinsics support
  */
 template <typename T>
-struct builtin_useReinterpret {
+struct builtin_useReinterpret
+{
   static constexpr bool value =
-    !builtin_useIntrinsic<T>::value &&
-    (sizeof(T) == 1 ||
-     sizeof(T) == 2 ||
-     sizeof(T) == 4 ||
-     sizeof(T) == 8);
-
-  using type =
-    std::conditional_t<sizeof(T) == 1, char,
-    std::conditional_t<sizeof(T) == 2, short,
-    std::conditional_t<sizeof(T) == 4, long, long long>>>;
+      !builtin_useIntrinsic<T>::value &&
+      (sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 || sizeof(T) == 8);
+
+  using type = std::conditional_t<
+      sizeof(T) == 1,
+      char,
+      std::conditional_t<sizeof(T) == 2,
+                         short,
+                         std::conditional_t<sizeof(T) == 4, long, long long>>>;
 };
 
 
@@ -90,10 +92,11 @@ struct builtin_useReinterpret {
  * using a compare and swap loop
  */
 template <typename T>
-struct builtin_useCAS {
+struct builtin_useCAS
+{
   static constexpr bool value =
-    !builtin_useIntrinsic<T>::value &&
-    (sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 || sizeof(T) == 8);
+      !builtin_useIntrinsic<T>::value &&
+      (sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 || sizeof(T) == 8);
 };
 
 
@@ -105,24 +108,24 @@ struct builtin_useCAS {
 /*!
  * Atomic or using intrinsics
  */
-RAJA_INLINE char builtin_atomicOr(char *acc, char value)
+RAJA_INLINE char builtin_atomicOr(char* acc, char value)
 {
   return _InterlockedOr8(acc, value);
 }
 
-RAJA_INLINE short builtin_atomicOr(short *acc, short value)
+RAJA_INLINE short builtin_atomicOr(short* acc, short value)
 {
   return _InterlockedOr16(acc, value);
 }
 
-RAJA_INLINE long builtin_atomicOr(long *acc, long value)
+RAJA_INLINE long builtin_atomicOr(long* acc, long value)
 {
   return _InterlockedOr(acc, value);
 }
 
 #if defined(_WIN64)
 
-RAJA_INLINE long long builtin_atomicOr(long long *acc, long long value)
+RAJA_INLINE long long builtin_atomicOr(long long* acc, long long value)
 {
   return _InterlockedOr64(acc, value);
 }
@@ -134,7 +137,7 @@ RAJA_INLINE long long builtin_atomicOr(long long *acc, long long value)
  */
 template <typename T,
           std::enable_if_t<builtin_useIntrinsic<T>::value, bool> = true>
-RAJA_INLINE T builtin_atomicLoad(T *acc)
+RAJA_INLINE T builtin_atomicLoad(T* acc)
 {
   return builtin_atomicOr(acc, static_cast<T>(0));
 }
@@ -143,24 +146,24 @@ RAJA_INLINE T builtin_atomicLoad(T *acc)
 /*!
  * Atomic exchange using intrinsics
  */
-RAJA_INLINE char builtin_atomicExchange(char *acc, char value)
+RAJA_INLINE char builtin_atomicExchange(char* acc, char value)
 {
   return _InterlockedExchange8(acc, value);
 }
 
-RAJA_INLINE short builtin_atomicExchange(short *acc, short value)
+RAJA_INLINE short builtin_atomicExchange(short* acc, short value)
 {
   return _InterlockedExchange16(acc, value);
 }
 
-RAJA_INLINE long builtin_atomicExchange(long *acc, long value)
+RAJA_INLINE long builtin_atomicExchange(long* acc, long value)
 {
   return _InterlockedExchange(acc, value);
 }
 
 #if defined(_WIN64)
 
-RAJA_INLINE long long builtin_atomicExchange(long long *acc, long long value)
+RAJA_INLINE long long builtin_atomicExchange(long long* acc, long long value)
 {
   return _InterlockedExchange64(acc, value);
 }
@@ -173,7 +176,7 @@ RAJA_INLINE long long builtin_atomicExchange(long long *acc, long long value)
  */
 template <typename T,
           std::enable_if_t<builtin_useIntrinsic<T>::value, bool> = true>
-RAJA_INLINE void builtin_atomicStore(T *acc, T value)
+RAJA_INLINE void builtin_atomicStore(T* acc, T value)
 {
   builtin_atomicExchange(acc, value);
 }
@@ -182,24 +185,25 @@ RAJA_INLINE void builtin_atomicStore(T *acc, T value)
 /*!
  * Atomic compare and swap using intrinsics
  */
-RAJA_INLINE char builtin_atomicCAS(char *acc, char compare, char value)
+RAJA_INLINE char builtin_atomicCAS(char* acc, char compare, char value)
 {
   return _InterlockedCompareExchange8(acc, value, compare);
 }
 
-RAJA_INLINE short builtin_atomicCAS(short *acc, short compare, short value)
+RAJA_INLINE short builtin_atomicCAS(short* acc, short compare, short value)
 {
   return _InterlockedCompareExchange16(acc, value, compare);
 }
 
-RAJA_INLINE long builtin_atomicCAS(long *acc, long compare, long value)
+RAJA_INLINE long builtin_atomicCAS(long* acc, long compare, long value)
 {
   return _InterlockedCompareExchange(acc, value, compare);
 }
 
 #if defined(_WIN64)
 
-RAJA_INLINE long long builtin_atomicCAS(long long *acc, long long compare, long long value)
+RAJA_INLINE long long
+builtin_atomicCAS(long long* acc, long long compare, long long value)
 {
   return _InterlockedCompareExchange64(acc, value, compare);
 }
@@ -210,24 +214,24 @@ RAJA_INLINE long long builtin_atomicCAS(long long *acc, long long compare, long
 /*!
  * Atomic addition using intrinsics
  */
-RAJA_INLINE char builtin_atomicAdd(char *acc, char value)
+RAJA_INLINE char builtin_atomicAdd(char* acc, char value)
 {
   return _InterlockedExchangeAdd8(acc, value);
 }
 
-RAJA_INLINE short builtin_atomicAdd(short *acc, short value)
+RAJA_INLINE short builtin_atomicAdd(short* acc, short value)
 {
   return _InterlockedExchangeAdd16(acc, value);
 }
 
-RAJA_INLINE long builtin_atomicAdd(long *acc, long value)
+RAJA_INLINE long builtin_atomicAdd(long* acc, long value)
 {
   return _InterlockedExchangeAdd(acc, value);
 }
 
 #if defined(_WIN64)
 
-RAJA_INLINE long long builtin_atomicAdd(long long *acc, long long value)
+RAJA_INLINE long long builtin_atomicAdd(long long* acc, long long value)
 {
   return _InterlockedExchangeAdd64(acc, value);
 }
@@ -238,24 +242,24 @@ RAJA_INLINE long long builtin_atomicAdd(long long *acc, long long value)
 /*!
  * Atomic subtraction using intrinsics
  */
-RAJA_INLINE char builtin_atomicSub(char *acc, char value)
+RAJA_INLINE char builtin_atomicSub(char* acc, char value)
 {
   return _InterlockedExchangeAdd8(acc, -value);
 }
 
-RAJA_INLINE short builtin_atomicSub(short *acc, short value)
+RAJA_INLINE short builtin_atomicSub(short* acc, short value)
 {
   return _InterlockedExchangeAdd16(acc, -value);
 }
 
-RAJA_INLINE long builtin_atomicSub(long *acc, long value)
+RAJA_INLINE long builtin_atomicSub(long* acc, long value)
 {
   return _InterlockedExchangeAdd(acc, -value);
 }
 
 #if defined(_WIN64)
 
-RAJA_INLINE long long builtin_atomicSub(long long *acc, long long value)
+RAJA_INLINE long long builtin_atomicSub(long long* acc, long long value)
 {
   return _InterlockedExchangeAdd64(acc, -value);
 }
@@ -266,24 +270,24 @@ RAJA_INLINE long long builtin_atomicSub(long long *acc, long long value)
 /*!
  * Atomic and using intrinsics
  */
-RAJA_INLINE char builtin_atomicAnd(char *acc, char value)
+RAJA_INLINE char builtin_atomicAnd(char* acc, char value)
 {
   return _InterlockedAnd8(acc, value);
 }
 
-RAJA_INLINE short builtin_atomicAnd(short *acc, short value)
+RAJA_INLINE short builtin_atomicAnd(short* acc, short value)
 {
   return _InterlockedAnd16(acc, value);
 }
 
-RAJA_INLINE long builtin_atomicAnd(long *acc, long value)
+RAJA_INLINE long builtin_atomicAnd(long* acc, long value)
 {
   return _InterlockedAnd(acc, value);
 }
 
 #if defined(_WIN64)
 
-RAJA_INLINE long long builtin_atomicAnd(long long *acc, long long value)
+RAJA_INLINE long long builtin_atomicAnd(long long* acc, long long value)
 {
   return _InterlockedAnd64(acc, value);
 }
@@ -294,24 +298,24 @@ RAJA_INLINE long long builtin_atomicAnd(long long *acc, long long value)
 /*!
  * Atomic xor using intrinsics
  */
-RAJA_INLINE char builtin_atomicXor(char *acc, char value)
+RAJA_INLINE char builtin_atomicXor(char* acc, char value)
 {
   return _InterlockedXor8(acc, value);
 }
 
-RAJA_INLINE short builtin_atomicXor(short *acc, short value)
+RAJA_INLINE short builtin_atomicXor(short* acc, short value)
 {
   return _InterlockedXor16(acc, value);
 }
 
-RAJA_INLINE long builtin_atomicXor(long *acc, long value)
+RAJA_INLINE long builtin_atomicXor(long* acc, long value)
 {
   return _InterlockedXor(acc, value);
 }
 
 #if defined(_WIN64)
 
-RAJA_INLINE long long builtin_atomicXor(long long *acc, long long value)
+RAJA_INLINE long long builtin_atomicXor(long long* acc, long long value)
 {
   return _InterlockedXor64(acc, value);
 }
@@ -327,10 +331,11 @@ RAJA_INLINE long long builtin_atomicXor(long long *acc, long long value)
  * using an intrinsic
  */
 template <typename T>
-struct builtin_useIntrinsic {
+struct builtin_useIntrinsic
+{
   static constexpr bool value =
-    (std::is_integral<T>::value || std::is_enum<T>::value) &&
-    (sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 || sizeof(T) == 8);
+      (std::is_integral<T>::value || std::is_enum<T>::value) &&
+      (sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 || sizeof(T) == 8);
 };
 
 
@@ -339,54 +344,54 @@ struct builtin_useIntrinsic {
  * by reinterpreting inputs to types that intrinsics support
  */
 template <typename T>
-struct builtin_useReinterpret {
-  static constexpr bool value =
-    !std::is_integral<T>::value &&
-    !std::is_enum<T>::value &&
-    ((sizeof(T) == 1
+struct builtin_useReinterpret
+{
+  static constexpr bool value = !std::is_integral<T>::value &&
+                                !std::is_enum<T>::value &&
+                                ((sizeof(T) == 1
 #if !defined(UINT8_MAX)
-      && sizeof(unsigned char) == 1
+                                  && sizeof(unsigned char) == 1
 #endif
-     ) ||
-     (sizeof(T) == 2
+                                  ) ||
+                                 (sizeof(T) == 2
 #if !defined(UINT16_MAX)
-      && sizeof(unsigned short) == 2
+                                  && sizeof(unsigned short) == 2
 #endif
-     ) ||
-     (sizeof(T) == 4
+                                  ) ||
+                                 (sizeof(T) == 4
 #if !defined(UINT32_MAX)
-      && sizeof(unsigned int) == 4
+                                  && sizeof(unsigned int) == 4
 #endif
-     ) ||
-     (sizeof(T) == 8
+                                  ) ||
+                                 (sizeof(T) == 8
 #if !defined(UINT64_MAX)
-      && sizeof(unsigned long long) == 8
+                                  && sizeof(unsigned long long) == 8
 #endif
-     ));
+                                  ));
 
   using type =
-    std::conditional_t<sizeof(T) == 1,
+      std::conditional_t<sizeof(T) == 1,
 #if defined(UINT8_MAX)
-                       uint8_t,
+                         uint8_t,
 #else
-                       unsigned char,
+                         unsigned char,
 #endif
-    std::conditional_t<sizeof(T) == 2,
+                         std::conditional_t<sizeof(T) == 2,
 #if defined(UINT16_MAX)
-                       uint16_t,
+                                            uint16_t,
 #else
-                       unsigned short,
+                                            unsigned short,
 #endif
-    std::conditional_t<sizeof(T) == 4,
+                                            std::conditional_t<sizeof(T) == 4,
 #if defined(UINT32_MAX)
-                       uint32_t,
+                                                               uint32_t,
 #else
-                       unsigned int,
+                                                               unsigned int,
 #endif
 #if defined(UINT64_MAX)
-                       uint64_t>>>;
+                                                               uint64_t>>>;
 #else
-                       unsigned long long>>>;
+                                                               unsigned long long>>>;
 #endif
 };
 
@@ -396,10 +401,11 @@ struct builtin_useReinterpret {
  * using a compare and swap loop
  */
 template <typename T>
-struct builtin_useCAS {
+struct builtin_useCAS
+{
   static constexpr bool value =
-    !std::is_integral<T>::value && !std::is_enum<T>::value &&
-    (sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 || sizeof(T) == 8);
+      !std::is_integral<T>::value && !std::is_enum<T>::value &&
+      (sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 || sizeof(T) == 8);
 };
 
 
@@ -413,7 +419,7 @@ struct builtin_useCAS {
  */
 template <typename T,
           std::enable_if_t<builtin_useIntrinsic<T>::value, bool> = true>
-RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicLoad(T *acc)
+RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicLoad(T* acc)
 {
   return __atomic_load_n(acc, __ATOMIC_RELAXED);
 }
@@ -424,7 +430,7 @@ RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicLoad(T *acc)
  */
 template <typename T,
           std::enable_if_t<builtin_useIntrinsic<T>::value, bool> = true>
-RAJA_DEVICE_HIP RAJA_INLINE void builtin_atomicStore(T *acc, T value)
+RAJA_DEVICE_HIP RAJA_INLINE void builtin_atomicStore(T* acc, T value)
 {
   __atomic_store_n(acc, value, __ATOMIC_RELAXED);
 }
@@ -435,7 +441,7 @@ RAJA_DEVICE_HIP RAJA_INLINE void builtin_atomicStore(T *acc, T value)
  */
 template <typename T,
           std::enable_if_t<builtin_useIntrinsic<T>::value, bool> = true>
-RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicExchange(T *acc, T value)
+RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicExchange(T* acc, T value)
 {
   return __atomic_exchange_n(acc, value, __ATOMIC_RELAXED);
 }
@@ -446,10 +452,10 @@ RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicExchange(T *acc, T value)
  */
 template <typename T,
           std::enable_if_t<builtin_useIntrinsic<T>::value, bool> = true>
-RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicCAS(T *acc, T compare, T value)
+RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicCAS(T* acc, T compare, T value)
 {
-  __atomic_compare_exchange_n(
-      acc, &compare, value, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
+  __atomic_compare_exchange_n(acc, &compare, value, false, __ATOMIC_RELAXED,
+                              __ATOMIC_RELAXED);
   return compare;
 }
 
@@ -459,7 +465,7 @@ RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicCAS(T *acc, T compare, T value)
  */
 template <typename T,
           std::enable_if_t<builtin_useIntrinsic<T>::value, bool> = true>
-RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicAdd(T *acc, T value)
+RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicAdd(T* acc, T value)
 {
   return __atomic_fetch_add(acc, value, __ATOMIC_RELAXED);
 }
@@ -470,7 +476,7 @@ RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicAdd(T *acc, T value)
  */
 template <typename T,
           std::enable_if_t<builtin_useIntrinsic<T>::value, bool> = true>
-RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicSub(T *acc, T value)
+RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicSub(T* acc, T value)
 {
   return __atomic_fetch_sub(acc, value, __ATOMIC_RELAXED);
 }
@@ -481,7 +487,7 @@ RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicSub(T *acc, T value)
  */
 template <typename T,
           std::enable_if_t<builtin_useIntrinsic<T>::value, bool> = true>
-RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicAnd(T *acc, T value)
+RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicAnd(T* acc, T value)
 {
   return __atomic_fetch_and(acc, value, __ATOMIC_RELAXED);
 }
@@ -492,7 +498,7 @@ RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicAnd(T *acc, T value)
  */
 template <typename T,
           std::enable_if_t<builtin_useIntrinsic<T>::value, bool> = true>
-RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicOr(T *acc, T value)
+RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicOr(T* acc, T value)
 {
   return __atomic_fetch_or(acc, value, __ATOMIC_RELAXED);
 }
@@ -503,7 +509,7 @@ RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicOr(T *acc, T value)
  */
 template <typename T,
           std::enable_if_t<builtin_useIntrinsic<T>::value, bool> = true>
-RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicXor(T *acc, T value)
+RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicXor(T* acc, T value)
 {
   return __atomic_fetch_xor(acc, value, __ATOMIC_RELAXED);
 }
@@ -529,12 +535,12 @@ using builtin_useReinterpret_t = typename builtin_useReinterpret<T>::type;
  */
 template <typename T,
           std::enable_if_t<builtin_useReinterpret<T>::value, bool> = true>
-RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicLoad(T *acc)
+RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicLoad(T* acc)
 {
   using R = builtin_useReinterpret_t<T>;
 
   return RAJA::util::reinterp_A_as_B<R, T>(
-    builtin_atomicLoad(reinterpret_cast<R*>(acc)));
+      builtin_atomicLoad(reinterpret_cast<R*>(acc)));
 }
 
 
@@ -543,7 +549,7 @@ RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicLoad(T *acc)
  */
 template <typename T,
           std::enable_if_t<builtin_useReinterpret<T>::value, bool> = true>
-RAJA_DEVICE_HIP RAJA_INLINE void builtin_atomicStore(T *acc, T value)
+RAJA_DEVICE_HIP RAJA_INLINE void builtin_atomicStore(T* acc, T value)
 {
   using R = builtin_useReinterpret_t<T>;
 
@@ -557,13 +563,12 @@ RAJA_DEVICE_HIP RAJA_INLINE void builtin_atomicStore(T *acc, T value)
  */
 template <typename T,
           std::enable_if_t<builtin_useReinterpret<T>::value, bool> = true>
-RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicExchange(T *acc, T value)
+RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicExchange(T* acc, T value)
 {
   using R = builtin_useReinterpret_t<T>;
 
-  return RAJA::util::reinterp_A_as_B<R, T>(
-    builtin_atomicExchange(reinterpret_cast<R*>(acc),
-                           RAJA::util::reinterp_A_as_B<T, R>(value)));
+  return RAJA::util::reinterp_A_as_B<R, T>(builtin_atomicExchange(
+      reinterpret_cast<R*>(acc), RAJA::util::reinterp_A_as_B<T, R>(value)));
 }
 
 
@@ -572,14 +577,13 @@ RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicExchange(T *acc, T value)
  */
 template <typename T,
           std::enable_if_t<builtin_useReinterpret<T>::value, bool> = true>
-RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicCAS(T *acc, T compare, T value)
+RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicCAS(T* acc, T compare, T value)
 {
   using R = builtin_useReinterpret_t<T>;
 
-  return RAJA::util::reinterp_A_as_B<R, T>(
-    builtin_atomicCAS(reinterpret_cast<R*>(acc),
-                      RAJA::util::reinterp_A_as_B<T, R>(compare),
-                      RAJA::util::reinterp_A_as_B<T, R>(value)));
+  return RAJA::util::reinterp_A_as_B<R, T>(builtin_atomicCAS(
+      reinterpret_cast<R*>(acc), RAJA::util::reinterp_A_as_B<T, R>(compare),
+      RAJA::util::reinterp_A_as_B<T, R>(value)));
 }
 
 
@@ -594,7 +598,7 @@ RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicCAS(T *acc, T compare, T value)
  */
 template <typename T,
           std::enable_if_t<builtin_useIntrinsic<T>::value, bool> = true>
-RAJA_DEVICE_HIP RAJA_INLINE bool builtin_atomicCAS_equal(const T &a, const T &b)
+RAJA_DEVICE_HIP RAJA_INLINE bool builtin_atomicCAS_equal(const T& a, const T& b)
 {
   return a == b;
 }
@@ -607,7 +611,7 @@ RAJA_DEVICE_HIP RAJA_INLINE bool builtin_atomicCAS_equal(const T &a, const T &b)
  */
 template <typename T,
           std::enable_if_t<builtin_useReinterpret<T>::value, bool> = true>
-RAJA_DEVICE_HIP RAJA_INLINE bool builtin_atomicCAS_equal(const T &a, const T &b)
+RAJA_DEVICE_HIP RAJA_INLINE bool builtin_atomicCAS_equal(const T& a, const T& b)
 {
   using R = builtin_useReinterpret_t<T>;
 
@@ -622,15 +626,15 @@ RAJA_DEVICE_HIP RAJA_INLINE bool builtin_atomicCAS_equal(const T &a, const T &b)
  * Returns the OLD value that was replaced by the result of this operation.
  */
 template <typename T, typename Oper>
-RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicCAS_loop(T *acc,
-                                                     Oper &&oper)
+RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicCAS_loop(T* acc, Oper&& oper)
 {
   T old = builtin_atomicLoad(acc);
   T expected;
 
-  do {
+  do
+  {
     expected = old;
-    old = builtin_atomicCAS(acc, expected, oper(expected));
+    old      = builtin_atomicCAS(acc, expected, oper(expected));
   } while (!builtin_atomicCAS_equal(old, expected));
 
   return old;
@@ -644,21 +648,23 @@ RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicCAS_loop(T *acc,
  * that was replaced by the result of this operation.
  */
 template <typename T, typename Oper, typename ShortCircuit>
-RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicCAS_loop(T *acc,
-                                                     Oper &&oper,
-                                                     ShortCircuit &&sc)
+RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicCAS_loop(T* acc,
+                                                     Oper&& oper,
+                                                     ShortCircuit&& sc)
 {
   T old = builtin_atomicLoad(acc);
 
-  if (sc(old)) {
+  if (sc(old))
+  {
     return old;
   }
 
   T expected;
 
-  do {
+  do
+  {
     expected = old;
-    old = builtin_atomicCAS(acc, expected, oper(expected));
+    old      = builtin_atomicCAS(acc, expected, oper(expected));
   } while (!builtin_atomicCAS_equal(old, expected) && !sc(old));
 
   return old;
@@ -673,65 +679,50 @@ RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicCAS_loop(T *acc,
 /*!
  * Atomic addition using compare and swap loop
  */
-template <typename T,
-          std::enable_if_t<builtin_useCAS<T>::value, bool> = true>
-RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicAdd(T *acc, T value)
+template <typename T, std::enable_if_t<builtin_useCAS<T>::value, bool> = true>
+RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicAdd(T* acc, T value)
 {
-  return builtin_atomicCAS_loop(acc, [value] (T old) {
-    return old + value;
-  });
+  return builtin_atomicCAS_loop(acc, [value](T old) { return old + value; });
 }
 
 
 /*!
  * Atomic subtraction using compare and swap loop
  */
-template <typename T,
-          std::enable_if_t<builtin_useCAS<T>::value, bool> = true>
-RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicSub(T *acc, T value)
+template <typename T, std::enable_if_t<builtin_useCAS<T>::value, bool> = true>
+RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicSub(T* acc, T value)
 {
-  return builtin_atomicCAS_loop(acc, [value] (T old) {
-    return old - value;
-  });
+  return builtin_atomicCAS_loop(acc, [value](T old) { return old - value; });
 }
 
 
 /*!
  * Atomic and using compare and swap loop
  */
-template <typename T,
-          std::enable_if_t<builtin_useCAS<T>::value, bool> = true>
-RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicAnd(T *acc, T value)
+template <typename T, std::enable_if_t<builtin_useCAS<T>::value, bool> = true>
+RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicAnd(T* acc, T value)
 {
-  return builtin_atomicCAS_loop(acc, [value] (T old) {
-    return old & value;
-  });
+  return builtin_atomicCAS_loop(acc, [value](T old) { return old & value; });
 }
 
 
 /*!
  * Atomic or using compare and swap loop
  */
-template <typename T,
-          std::enable_if_t<builtin_useCAS<T>::value, bool> = true>
-RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicOr(T *acc, T value)
+template <typename T, std::enable_if_t<builtin_useCAS<T>::value, bool> = true>
+RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicOr(T* acc, T value)
 {
-  return builtin_atomicCAS_loop(acc, [value] (T old) {
-    return old | value;
-  });
+  return builtin_atomicCAS_loop(acc, [value](T old) { return old | value; });
 }
 
 
 /*!
  * Atomic xor using compare and swap loop
  */
-template <typename T,
-          std::enable_if_t<builtin_useCAS<T>::value, bool> = true>
-RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicXor(T *acc, T value)
+template <typename T, std::enable_if_t<builtin_useCAS<T>::value, bool> = true>
+RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicXor(T* acc, T value)
 {
-  return builtin_atomicCAS_loop(acc, [value] (T old) {
-    return old ^ value;
-  });
+  return builtin_atomicCAS_loop(acc, [value](T old) { return old ^ value; });
 }
 
 
@@ -739,109 +730,105 @@ RAJA_DEVICE_HIP RAJA_INLINE T builtin_atomicXor(T *acc, T value)
 
 
 template <typename T>
-RAJA_DEVICE_HIP RAJA_INLINE T atomicLoad(builtin_atomic, T *acc)
+RAJA_DEVICE_HIP RAJA_INLINE T atomicLoad(builtin_atomic, T* acc)
 {
   return detail::builtin_atomicLoad(acc);
 }
 
 template <typename T>
-RAJA_DEVICE_HIP RAJA_INLINE void atomicStore(builtin_atomic, T *acc, T value)
+RAJA_DEVICE_HIP RAJA_INLINE void atomicStore(builtin_atomic, T* acc, T value)
 {
   detail::builtin_atomicStore(acc, value);
 }
 
 template <typename T>
-RAJA_DEVICE_HIP RAJA_INLINE T atomicAdd(builtin_atomic, T *acc, T value)
+RAJA_DEVICE_HIP RAJA_INLINE T atomicAdd(builtin_atomic, T* acc, T value)
 {
   return detail::builtin_atomicAdd(acc, value);
 }
 
 template <typename T>
-RAJA_DEVICE_HIP RAJA_INLINE T atomicSub(builtin_atomic, T *acc, T value)
+RAJA_DEVICE_HIP RAJA_INLINE T atomicSub(builtin_atomic, T* acc, T value)
 {
   return detail::builtin_atomicSub(acc, value);
 }
 
 template <typename T>
-RAJA_DEVICE_HIP RAJA_INLINE T atomicMin(builtin_atomic, T *acc, T value)
+RAJA_DEVICE_HIP RAJA_INLINE T atomicMin(builtin_atomic, T* acc, T value)
 {
   return detail::builtin_atomicCAS_loop(
-    acc,
-    [value] (T old) {
-      return value < old ? value : old;
-    },
-    [value] (T current) {
-      return current <= value;
-    });
+      acc, [value](T old) { return value < old ? value : old; },
+      [value](T current) { return current <= value; });
 }
 
 template <typename T>
-RAJA_DEVICE_HIP RAJA_INLINE T atomicMax(builtin_atomic, T *acc, T value)
+RAJA_DEVICE_HIP RAJA_INLINE T atomicMax(builtin_atomic, T* acc, T value)
 {
   return detail::builtin_atomicCAS_loop(
-    acc,
-    [value] (T old) {
-      return old < value ? value : old;
-    },
-    [value] (T current) {
-      return value <= current;
-    });
+      acc, [value](T old) { return old < value ? value : old; },
+      [value](T current) { return value <= current; });
 }
 
 template <typename T>
-RAJA_DEVICE_HIP RAJA_INLINE T atomicInc(builtin_atomic, T *acc)
+RAJA_DEVICE_HIP RAJA_INLINE T atomicInc(builtin_atomic, T* acc)
 {
   return detail::builtin_atomicAdd(acc, static_cast<T>(1));
 }
 
 template <typename T>
-RAJA_DEVICE_HIP RAJA_INLINE T atomicInc(builtin_atomic, T *acc, T value)
+RAJA_DEVICE_HIP RAJA_INLINE T atomicInc(builtin_atomic, T* acc, T value)
 {
-  return detail::builtin_atomicCAS_loop(acc, [value] (T old) {
-    return value <= old ? static_cast<T>(0) : old + static_cast<T>(1);
-  });
+  return detail::builtin_atomicCAS_loop(
+      acc, [value](T old)
+      { return value <= old ? static_cast<T>(0) : old + static_cast<T>(1); });
 }
 
 template <typename T>
-RAJA_DEVICE_HIP RAJA_INLINE T atomicDec(builtin_atomic, T *acc)
+RAJA_DEVICE_HIP RAJA_INLINE T atomicDec(builtin_atomic, T* acc)
 {
   return detail::builtin_atomicSub(acc, static_cast<T>(1));
 }
 
 template <typename T>
-RAJA_DEVICE_HIP RAJA_INLINE T atomicDec(builtin_atomic, T *acc, T value)
+RAJA_DEVICE_HIP RAJA_INLINE T atomicDec(builtin_atomic, T* acc, T value)
 {
-  return detail::builtin_atomicCAS_loop(acc, [value] (T old) {
-    return old == static_cast<T>(0) || value < old ? value : old - static_cast<T>(1);
-  });
+  return detail::builtin_atomicCAS_loop(acc,
+                                        [value](T old)
+                                        {
+                                          return old == static_cast<T>(0) ||
+                                                         value < old
+                                                     ? value
+                                                     : old - static_cast<T>(1);
+                                        });
 }
 
 template <typename T>
-RAJA_DEVICE_HIP RAJA_INLINE T atomicAnd(builtin_atomic, T *acc, T value)
+RAJA_DEVICE_HIP RAJA_INLINE T atomicAnd(builtin_atomic, T* acc, T value)
 {
   return detail::builtin_atomicAnd(acc, value);
 }
 
 template <typename T>
-RAJA_DEVICE_HIP RAJA_INLINE T atomicOr(builtin_atomic, T *acc, T value)
+RAJA_DEVICE_HIP RAJA_INLINE T atomicOr(builtin_atomic, T* acc, T value)
 {
   return detail::builtin_atomicOr(acc, value);
 }
 
 template <typename T>
-RAJA_DEVICE_HIP RAJA_INLINE T atomicXor(builtin_atomic, T *acc, T value)
+RAJA_DEVICE_HIP RAJA_INLINE T atomicXor(builtin_atomic, T* acc, T value)
 {
   return detail::builtin_atomicXor(acc, value);
 }
 
 template <typename T>
-RAJA_DEVICE_HIP RAJA_INLINE T atomicExchange(builtin_atomic, T *acc, T value)
+RAJA_DEVICE_HIP RAJA_INLINE T atomicExchange(builtin_atomic, T* acc, T value)
 {
   return detail::builtin_atomicExchange(acc, value);
 }
 
 template <typename T>
-RAJA_DEVICE_HIP RAJA_INLINE T atomicCAS(builtin_atomic, T *acc, T compare, T value)
+RAJA_DEVICE_HIP RAJA_INLINE T
+atomicCAS(builtin_atomic, T* acc, T compare, T value)
 {
   return detail::builtin_atomicCAS(acc, compare, value);
 }
diff --git a/include/RAJA/policy/cuda/kernel/Sync.hpp b/include/RAJA/policy/cuda/kernel/Sync.hpp
index ae00d346ae..e750c6bfc0 100644
--- a/include/RAJA/policy/cuda/kernel/Sync.hpp
+++ b/include/RAJA/policy/cuda/kernel/Sync.hpp
@@ -85,7 +85,8 @@ struct CudaStatementExecutor<Data, statement::CudaSyncWarp, Types>
 #else
       void
       exec(Data&, bool)
-  {}
+  {
+  }
 #endif
 
   static inline LaunchDims
diff --git a/include/RAJA/policy/openmp/params/forall.hpp b/include/RAJA/policy/openmp/params/forall.hpp
index e22b3d7d59..1f8c2a5e95 100644
--- a/include/RAJA/policy/openmp/params/forall.hpp
+++ b/include/RAJA/policy/openmp/params/forall.hpp
@@ -98,8 +98,8 @@ forall_impl(const ExecPol<ChunkSize>& p,
   RAJA_OMP_DECLARE_REDUCTION_COMBINE;
 
   RAJA_EXTRACT_BED_IT(iter);
-#pragma omp parallel for schedule(static, ChunkSize) reduction(combine         \
-                                                               : f_params)
+#pragma omp parallel for schedule(static, ChunkSize)                           \
+    reduction(combine : f_params)
   for (decltype(distance_it) i = 0; i < distance_it; ++i)
   {
     RAJA::expt::invoke_body(f_params, loop_body, begin_it[i]);
@@ -202,8 +202,8 @@ RAJA_INLINE void forall_impl(const ::RAJA::policy::omp::Dynamic<ChunkSize>& p,
   RAJA_OMP_DECLARE_REDUCTION_COMBINE;
 
   RAJA_EXTRACT_BED_IT(iter);
-#pragma omp parallel for schedule(dynamic, ChunkSize) reduction(combine        \
-                                                                : f_params)
+#pragma omp parallel for schedule(dynamic, ChunkSize)                          \
+    reduction(combine : f_params)
   for (decltype(distance_it) i = 0; i < distance_it; ++i)
   {
     RAJA::expt::invoke_body(f_params, loop_body, begin_it[i]);
@@ -257,8 +257,8 @@ RAJA_INLINE void forall_impl(const ::RAJA::policy::omp::Guided<ChunkSize>& p,
   RAJA_OMP_DECLARE_REDUCTION_COMBINE;
 
   RAJA_EXTRACT_BED_IT(iter);
-#pragma omp parallel for schedule(guided, ChunkSize) reduction(combine         \
-                                                               : f_params)
+#pragma omp parallel for schedule(guided, ChunkSize)                           \
+    reduction(combine : f_params)
   for (decltype(distance_it) i = 0; i < distance_it; ++i)
   {
     RAJA::expt::invoke_body(f_params, loop_body, begin_it[i]);
diff --git a/include/RAJA/policy/openmp/sort.hpp b/include/RAJA/policy/openmp/sort.hpp
index ea88a7b2ff..91f8c1d2a2 100644
--- a/include/RAJA/policy/openmp/sort.hpp
+++ b/include/RAJA/policy/openmp/sort.hpp
@@ -177,7 +177,7 @@ inline void sort(Sorter sorter, Iter begin, Iter end, Compare comp)
         std::min((n + iterates_per_task - 1) / iterates_per_task, max_threads);
     RAJA_UNUSED_VAR(requested_num_threads);  // avoid warning in hip device code
 
-#pragma omp parallel num_threads(static_cast <int>(requested_num_threads))
+#pragma omp parallel num_threads(static_cast<int>(requested_num_threads))
 #pragma omp master
     {
       sort_task(sorter, begin, 0, n, iterates_per_task, comp);
@@ -189,7 +189,7 @@ inline void sort(Sorter sorter, Iter begin, Iter end, Compare comp)
         (n + min_iterates_per_task - 1) / min_iterates_per_task, max_threads);
     RAJA_UNUSED_VAR(requested_num_threads);  // avoid warning in hip device code
 
-#pragma omp parallel num_threads(static_cast <int>(requested_num_threads))
+#pragma omp parallel num_threads(static_cast<int>(requested_num_threads))
     {
       sort_parallel_region(sorter, begin, n, comp);
     }
diff --git a/include/RAJA/policy/openmp_target.hpp b/include/RAJA/policy/openmp_target.hpp
index 4c48a12eda..018b3878d8 100644
--- a/include/RAJA/policy/openmp_target.hpp
+++ b/include/RAJA/policy/openmp_target.hpp
@@ -30,7 +30,7 @@
 #include "RAJA/policy/openmp_target/kernel.hpp"
 #include "RAJA/policy/openmp_target/forall.hpp"
 #include "RAJA/policy/openmp_target/reduce.hpp"
-//#include "RAJA/policy/openmp_target/multi_reduce.hpp"
+// #include "RAJA/policy/openmp_target/multi_reduce.hpp"
 #include "RAJA/policy/openmp_target/WorkGroup.hpp"
 
 
diff --git a/include/RAJA/policy/openmp_target/forall.hpp b/include/RAJA/policy/openmp_target/forall.hpp
index a142b6a606..c61a7d09f5 100644
--- a/include/RAJA/policy/openmp_target/forall.hpp
+++ b/include/RAJA/policy/openmp_target/forall.hpp
@@ -79,9 +79,7 @@ forall_impl(resources::Omp omp_res,
   auto i = distance_it;
 
 #pragma omp target teams distribute parallel for num_teams(numteams)           \
-    schedule(static, 1) map(to                                                 \
-                            : body, begin_it) reduction(combine                \
-                                                        : f_params)
+    schedule(static, 1) map(to : body, begin_it) reduction(combine : f_params)
   for (i = 0; i < distance_it; ++i)
   {
     Body ib = body;
@@ -133,8 +131,7 @@ forall_impl(resources::Omp omp_res,
   auto i = distance_it;
 
 #pragma omp target teams distribute parallel for num_teams(numteams)           \
-    schedule(static, 1) map(to                                                 \
-                            : body, begin_it)
+    schedule(static, 1) map(to : body, begin_it)
   for (i = 0; i < distance_it; ++i)
   {
     Body ib = body;
@@ -167,8 +164,7 @@ forall_impl(resources::Omp omp_res,
   RAJA_EXTRACT_BED_IT(iter);
 
 #pragma omp target teams distribute parallel for schedule(static, 1)           \
-    firstprivate(body, begin_it) reduction(combine                             \
-                                           : f_params)
+    firstprivate(body, begin_it) reduction(combine : f_params)
   for (decltype(distance_it) i = 0; i < distance_it; ++i)
   {
     Body ib = body;
diff --git a/include/RAJA/policy/openmp_target/params/reduce.hpp b/include/RAJA/policy/openmp_target/params/reduce.hpp
index 34c23fb5db..0364470945 100644
--- a/include/RAJA/policy/openmp_target/params/reduce.hpp
+++ b/include/RAJA/policy/openmp_target/params/reduce.hpp
@@ -3,37 +3,43 @@
 
 #include "RAJA/pattern/params/reducer.hpp"
 
-namespace RAJA {
-namespace expt {
-namespace detail {
+namespace RAJA
+{
+namespace expt
+{
+namespace detail
+{
 
 #if defined(RAJA_ENABLE_TARGET_OPENMP)
 
-  // Init
-  template<typename EXEC_POL, typename OP, typename T, typename VOp>
-  camp::concepts::enable_if< type_traits::is_target_openmp_policy<EXEC_POL> >
-  init(Reducer<OP, T, VOp>& red) {
-    red.m_valop.val = OP::identity();
-  }
-
-  // Combine
-  template<typename EXEC_POL, typename OP, typename T, typename VOp>
-  camp::concepts::enable_if< type_traits::is_target_openmp_policy<EXEC_POL> >
-  combine(Reducer<OP, T, VOp>& out, const Reducer<OP, T, VOp>& in) {
-    out.m_valop.val = OP{}(out.m_valop.val, in.m_valop.val);
-  }
-
-  // Resolve
-  template<typename EXEC_POL, typename OP, typename T, typename VOp>
-  camp::concepts::enable_if< type_traits::is_target_openmp_policy<EXEC_POL> >
-  resolve(Reducer<OP, T, VOp>& red) {
-    red.combineTarget(red.m_valop.val);
-  }
+// Init
+template <typename EXEC_POL, typename OP, typename T, typename VOp>
+camp::concepts::enable_if<type_traits::is_target_openmp_policy<EXEC_POL>>
+init(Reducer<OP, T, VOp>& red)
+{
+  red.m_valop.val = OP::identity();
+}
+
+// Combine
+template <typename EXEC_POL, typename OP, typename T, typename VOp>
+camp::concepts::enable_if<type_traits::is_target_openmp_policy<EXEC_POL>>
+combine(Reducer<OP, T, VOp>& out, const Reducer<OP, T, VOp>& in)
+{
+  out.m_valop.val = OP {}(out.m_valop.val, in.m_valop.val);
+}
+
+// Resolve
+template <typename EXEC_POL, typename OP, typename T, typename VOp>
+camp::concepts::enable_if<type_traits::is_target_openmp_policy<EXEC_POL>>
+resolve(Reducer<OP, T, VOp>& red)
+{
+  red.combineTarget(red.m_valop.val);
+}
 
 #endif
 
-} //  namespace detail
-} //  namespace expt
-} //  namespace RAJA
+}  //  namespace detail
+}  //  namespace expt
+}  //  namespace RAJA
 
-#endif //  NEW_REDUCE_OMP_REDUCE_HPP
+#endif  //  NEW_REDUCE_OMP_REDUCE_HPP
diff --git a/include/RAJA/policy/openmp_target/reduce.hpp b/include/RAJA/policy/openmp_target/reduce.hpp
index 8bcbde620d..0470c52136 100644
--- a/include/RAJA/policy/openmp_target/reduce.hpp
+++ b/include/RAJA/policy/openmp_target/reduce.hpp
@@ -12,7 +12,7 @@
 
 #if defined(RAJA_ENABLE_TARGET_OPENMP)
 
-//#include <cassert>  // Leaving out until XL is fixed 2/25/2019.
+// #include <cassert>  // Leaving out until XL is fixed 2/25/2019.
 
 #include <algorithm>
 
diff --git a/include/RAJA/policy/sycl.hpp b/include/RAJA/policy/sycl.hpp
index 491e39910c..81f16d4918 100644
--- a/include/RAJA/policy/sycl.hpp
+++ b/include/RAJA/policy/sycl.hpp
@@ -29,13 +29,13 @@
 #include "RAJA/policy/sycl/forall.hpp"
 #include "RAJA/policy/sycl/policy.hpp"
 #include "RAJA/policy/sycl/reduce.hpp"
-//#include "RAJA/policy/sycl/multi_reduce.hpp"
-//#include "RAJA/policy/sycl/scan.hpp"
-//#include "RAJA/policy/sycl/sort.hpp"
+// #include "RAJA/policy/sycl/multi_reduce.hpp"
+// #include "RAJA/policy/sycl/scan.hpp"
+// #include "RAJA/policy/sycl/sort.hpp"
 #include "RAJA/policy/sycl/kernel.hpp"
-//#include "RAJA/policy/sycl/synchronize.hpp"
+// #include "RAJA/policy/sycl/synchronize.hpp"
 #include "RAJA/policy/sycl/launch.hpp"
-//#include "RAJA/policy/sycl/WorkGroup.hpp"
+// #include "RAJA/policy/sycl/WorkGroup.hpp"
 
 #endif  // closing endif for if defined(RAJA_ENABLE_SYCL)
 
diff --git a/include/RAJA/policy/sycl/kernel.hpp b/include/RAJA/policy/sycl/kernel.hpp
index 641c3a9ef3..803bcd49e0 100644
--- a/include/RAJA/policy/sycl/kernel.hpp
+++ b/include/RAJA/policy/sycl/kernel.hpp
@@ -23,11 +23,11 @@
 #include "RAJA/policy/sycl/kernel/SyclKernel.hpp"
 #include "RAJA/policy/sycl/kernel/For.hpp"
 #include "RAJA/policy/sycl/kernel/ForICount.hpp"
-//#include "RAJA/policy/sycl/kernel/Hyperplane.hpp"
-//#include "RAJA/policy/sycl/kernel/InitLocalMem.hpp"
+// #include "RAJA/policy/sycl/kernel/Hyperplane.hpp"
+// #include "RAJA/policy/sycl/kernel/InitLocalMem.hpp"
 #include "RAJA/policy/sycl/kernel/Lambda.hpp"
-//#include "RAJA/policy/sycl/kernel/Reduce.hpp"
-//#include "RAJA/policy/sycl/kernel/Sync.hpp"
+// #include "RAJA/policy/sycl/kernel/Reduce.hpp"
+// #include "RAJA/policy/sycl/kernel/Sync.hpp"
 #include "RAJA/policy/sycl/kernel/Tile.hpp"
 #include "RAJA/policy/sycl/kernel/TileTCount.hpp"
 #include "RAJA/policy/sycl/kernel/internal.hpp"
diff --git a/include/RAJA/policy/sycl/kernel/SyclKernel.hpp b/include/RAJA/policy/sycl/kernel/SyclKernel.hpp
index 4c79a279d9..d36a7fa2af 100644
--- a/include/RAJA/policy/sycl/kernel/SyclKernel.hpp
+++ b/include/RAJA/policy/sycl/kernel/SyclKernel.hpp
@@ -143,12 +143,9 @@ struct SyclLaunchHelper<false, sycl_launch<async0>, StmtList, Data, Types>
     qu->submit(
           [&](cl::sycl::handler& h)
           {
-            h.parallel_for(launch_dims.fit_nd_range(qu),
-                           [=](cl::sycl::nd_item<3> item)
-                           {
-                             SyclKernelLauncher<Data, executor_t>(*m_data,
-                                                                  item);
-                           });
+            h.parallel_for(
+                launch_dims.fit_nd_range(qu), [=](cl::sycl::nd_item<3> item)
+                { SyclKernelLauncher<Data, executor_t>(*m_data, item); });
           })
         .wait();  // Need to wait to free memory
 
@@ -183,9 +180,7 @@ struct SyclLaunchHelper<true, sycl_launch<async0>, StmtList, Data, Types>
         {
           h.parallel_for(launch_dims.fit_nd_range(qu),
                          [=](cl::sycl::nd_item<3> item)
-                         {
-                           SyclKernelLauncher<Data, executor_t>(data, item);
-                         });
+                         { SyclKernelLauncher<Data, executor_t>(data, item); });
         });
 
     if (!async)
diff --git a/include/RAJA/policy/sycl/launch.hpp b/include/RAJA/policy/sycl/launch.hpp
index fcffc88aed..5cef8f570d 100644
--- a/include/RAJA/policy/sycl/launch.hpp
+++ b/include/RAJA/policy/sycl/launch.hpp
@@ -22,7 +22,7 @@
 #include "RAJA/pattern/detail/privatizer.hpp"
 #include "RAJA/policy/sycl/policy.hpp"
 #include "RAJA/policy/sycl/MemUtils_SYCL.hpp"
-//#include "RAJA/policy/sycl/raja_syclerrchk.hpp"
+// #include "RAJA/policy/sycl/raja_syclerrchk.hpp"
 #include "RAJA/util/resource.hpp"
 
 namespace RAJA
diff --git a/include/RAJA/util/View.hpp b/include/RAJA/util/View.hpp
index d1b15538ae..be3db700a6 100644
--- a/include/RAJA/util/View.hpp
+++ b/include/RAJA/util/View.hpp
@@ -141,15 +141,16 @@ removenth(Lay lyout, Tup&& tup) -> decltype(selecttuple<Lay>(
 // the index into the array-of-pointers to be moved around in the MultiView
 // operator(); see the operator overload. Default of 0 means that the p2p index
 // is in the 0th position.
-template <typename ValueType,
-          typename LayoutType,
-          RAJA::Index_type P2Pidx      = 0,
-          typename PointerType         = ValueType**,
-          typename NonConstPointerType = camp::type::ptr::add<  // adds *
-              camp::type::ptr::add<camp::type::cv::rem<         // removes cv
-                  camp::type::ptr::rem<camp::type::ptr::rem<PointerType>  // removes
-                                                                          // *
-                                       >>>>>
+template <
+    typename ValueType,
+    typename LayoutType,
+    RAJA::Index_type P2Pidx      = 0,
+    typename PointerType         = ValueType**,
+    typename NonConstPointerType = camp::type::ptr::add<  // adds *
+        camp::type::ptr::add<camp::type::cv::rem<         // removes cv
+            camp::type::ptr::rem<camp::type::ptr::rem<PointerType>  // removes
+                                                                    // *
+                                 >>>>>
 struct MultiView
 {
   using value_type      = ValueType;
diff --git a/include/RAJA/util/types.hpp b/include/RAJA/util/types.hpp
index 4e185591f3..b38e82e45b 100644
--- a/include/RAJA/util/types.hpp
+++ b/include/RAJA/util/types.hpp
@@ -246,7 +246,7 @@ using Real_type = double;
 
 #elif defined(RAJA_USE_FLOAT)
 ///
-using Real_type         = float;
+using Real_type = float;
 
 #else
 #error RAJA Real_type is undefined!
@@ -845,8 +845,8 @@ using UnalignedReal_ptr       = Real_type*;
 using const_UnalignedReal_ptr = const Real_type*;
 
 #elif defined(RAJA_USE_RESTRICT_PTR)
-using Real_ptr          = Real_type* RAJA_RESTRICT;
-using const_Real_ptr    = const Real_type* RAJA_RESTRICT;
+using Real_ptr       = Real_type* RAJA_RESTRICT;
+using const_Real_ptr = const Real_type* RAJA_RESTRICT;
 
 #if defined(RAJA_USE_COMPLEX)
 using Complex_ptr       = Complex_type* RAJA_RESTRICT;
@@ -857,24 +857,24 @@ using UnalignedReal_ptr       = Real_type* RAJA_RESTRICT;
 using const_UnalignedReal_ptr = const Real_type* RAJA_RESTRICT;
 
 #elif defined(RAJA_USE_RESTRICT_ALIGNED_PTR)
-using Real_ptr           = TDRAReal_ptr;
-using const_Real_ptr     = const_TDRAReal_ptr;
+using Real_ptr       = TDRAReal_ptr;
+using const_Real_ptr = const_TDRAReal_ptr;
 
 #if defined(RAJA_USE_COMPLEX)
-using Complex_ptr        = Complex_type* RAJA_RESTRICT;
-using const_Complex_ptr  = const Complex_type* RAJA_RESTRICT;
+using Complex_ptr       = Complex_type* RAJA_RESTRICT;
+using const_Complex_ptr = const Complex_type* RAJA_RESTRICT;
 #endif
 
 using UnalignedReal_ptr       = Real_type* RAJA_RESTRICT;
 using const_UnalignedReal_ptr = const Real_type* RAJA_RESTRICT;
 
 #elif defined(RAJA_USE_PTR_CLASS)
-using Real_ptr           = RestrictAlignedRealPtr;
-using const_Real_ptr     = ConstRestrictAlignedRealPtr;
+using Real_ptr       = RestrictAlignedRealPtr;
+using const_Real_ptr = ConstRestrictAlignedRealPtr;
 
 #if defined(RAJA_USE_COMPLEX)
-using Complex_ptr        = RestrictComplexPtr;
-using const_Complex_ptr  = ConstRestrictComplexPtr;
+using Complex_ptr       = RestrictComplexPtr;
+using const_Complex_ptr = ConstRestrictComplexPtr;
 #endif
 
 using UnalignedReal_ptr       = RestrictRealPtr;
diff --git a/test/functional/forall/atomic-ref/tests/test-forall-AtomicRefCAS.hpp b/test/functional/forall/atomic-ref/tests/test-forall-AtomicRefCAS.hpp
index 0adce05b3d..97dccdfead 100644
--- a/test/functional/forall/atomic-ref/tests/test-forall-AtomicRefCAS.hpp
+++ b/test/functional/forall/atomic-ref/tests/test-forall-AtomicRefCAS.hpp
@@ -64,7 +64,8 @@ struct CompareExchangeWeakOtherOp : all_op
   {
     T expect = (T)0;
     while (!other.compare_exchange_weak(expect, (T)i))
-    {}
+    {
+    }
     return expect;
   }
   RAJA::AtomicRef<T, AtomicPolicy> other;
@@ -92,7 +93,8 @@ struct CompareExchangeStrongOtherOp : all_op
   {
     T expect = (T)0;
     while (!other.compare_exchange_strong(expect, (T)i))
-    {}
+    {
+    }
     return expect;
   }
   RAJA::AtomicRef<T, AtomicPolicy> other;
diff --git a/test/functional/kernel/nested-loop/tests/nested-loop-MultiLambdaParam-impl.hpp b/test/functional/kernel/nested-loop/tests/nested-loop-MultiLambdaParam-impl.hpp
index b54eec5b05..bcf51593d6 100644
--- a/test/functional/kernel/nested-loop/tests/nested-loop-MultiLambdaParam-impl.hpp
+++ b/test/functional/kernel/nested-loop/tests/nested-loop-MultiLambdaParam-impl.hpp
@@ -176,11 +176,12 @@ struct MultiLambdaParamNestedLoopExec<DEPTH_3, POLICY_DATA>
               typename camp::at<POLICY_DATA, camp::num<2>>::type,
               RAJA::statement::Lambda<1>  // inner loop: dot += ...
               >,
-          RAJA::statement::Lambda<2, RAJA::Segs<0, 1>, RAJA::Params<0>>  // set
-                                                                         // C(row,
-                                                                         // col)
-                                                                         // =
-                                                                         // dot
+          RAJA::statement::
+              Lambda<2, RAJA::Segs<0, 1>, RAJA::Params<0>>  // set
+                                                            // C(row,
+                                                            // col)
+                                                            // =
+                                                            // dot
           >>>;
 };
 
diff --git a/test/functional/kernel/reduce-loc/tests/test-kernel-reduceloc-Max2DViewTuple.hpp b/test/functional/kernel/reduce-loc/tests/test-kernel-reduceloc-Max2DViewTuple.hpp
index 699a6ff776..f4dd3648a0 100644
--- a/test/functional/kernel/reduce-loc/tests/test-kernel-reduceloc-Max2DViewTuple.hpp
+++ b/test/functional/kernel/reduce-loc/tests/test-kernel-reduceloc-Max2DViewTuple.hpp
@@ -56,7 +56,7 @@ void KernelLocMax2DViewTupleTestImpl(const int xdim, const int ydim)
   work_res.memcpy(work_array, check_array, sizeof(DATA_TYPE) * array_length);
 
 #if defined(RAJA_ENABLE_TARGET_OPENMP)
-  //#pragma omp target data map(to:work_array[0:array_length])
+  // #pragma omp target data map(to:work_array[0:array_length])
 #endif
 
   RAJA::TypedRangeSegment<INDEX_TYPE> colrange(0, xdim);
diff --git a/test/functional/launch/nested_direct/tests/test-launch-nested-Direct.hpp b/test/functional/launch/nested_direct/tests/test-launch-nested-Direct.hpp
index ae6d7e384a..bb64d5424b 100644
--- a/test/functional/launch/nested_direct/tests/test-launch-nested-Direct.hpp
+++ b/test/functional/launch/nested_direct/tests/test-launch-nested-Direct.hpp
@@ -161,9 +161,7 @@ void LaunchNestedDirectTestImpl(INDEX_TYPE M)
                                         RAJA::loop<THREAD_X_POLICY>(
                                             ctx, r1,
                                             [&](INDEX_TYPE RAJA_UNUSED_ARG(tx))
-                                            {
-                                              working_array[0]++;
-                                            });
+                                            { working_array[0]++; });
                                       });
                                 });
                           });
diff --git a/test/functional/launch/nested_loop/tests/test-launch-nested-Loop.hpp b/test/functional/launch/nested_loop/tests/test-launch-nested-Loop.hpp
index cd90bf2298..c9192b6718 100644
--- a/test/functional/launch/nested_loop/tests/test-launch-nested-Loop.hpp
+++ b/test/functional/launch/nested_loop/tests/test-launch-nested-Loop.hpp
@@ -163,9 +163,7 @@ void LaunchNestedLoopTestImpl(INDEX_TYPE M)
                                         RAJA::loop<THREAD_X_POLICY>(
                                             ctx, r1,
                                             [&](INDEX_TYPE RAJA_UNUSED_ARG(tx))
-                                            {
-                                              working_array[0]++;
-                                            });
+                                            { working_array[0]++; });
                                       });
                                 });
                           });
diff --git a/test/functional/launch/nested_tile_direct/tests/test-launch-nested-Tile-Direct.hpp b/test/functional/launch/nested_tile_direct/tests/test-launch-nested-Tile-Direct.hpp
index 4a212875f0..20a4e10ac6 100644
--- a/test/functional/launch/nested_tile_direct/tests/test-launch-nested-Tile-Direct.hpp
+++ b/test/functional/launch/nested_tile_direct/tests/test-launch-nested-Tile-Direct.hpp
@@ -145,9 +145,7 @@ void LaunchNestedTileDirectTestImpl(INDEX_TYPE M)
                                         RAJA::loop<THREAD_X_POLICY>(
                                             ctx, x_tile,
                                             [&](INDEX_TYPE RAJA_UNUSED_ARG(tx))
-                                            {
-                                              working_array[0]++;
-                                            });
+                                            { working_array[0]++; });
                                       });
                                 });
                           });
diff --git a/test/functional/tensor/matrix/test-tensor-matrix-double.hpp b/test/functional/tensor/matrix/test-tensor-matrix-double.hpp
index d988dd8e55..4457687cae 100644
--- a/test/functional/tensor/matrix/test-tensor-matrix-double.hpp
+++ b/test/functional/tensor/matrix/test-tensor-matrix-double.hpp
@@ -45,23 +45,23 @@ using TensorMatrixTypes = ::testing::Types<
 #endif
 
 
-//#ifdef __AVX__
-//    RAJA::expt::RectMatrixRegister<MatrixElementType, TensorMatrixLayoutType,
-//    2,4, RAJA::expt::avx_register>,
-//    RAJA::expt::RectMatrixRegister<MatrixElementType, TensorMatrixLayoutType,
-//    2,8, RAJA::expt::avx_register>,
-//    RAJA::expt::RectMatrixRegister<MatrixElementType, TensorMatrixLayoutType,
-//    4,8, RAJA::expt::avx_register>,
-//    RAJA::expt::RectMatrixRegister<MatrixElementType, TensorMatrixLayoutType,
-//    8,8, RAJA::expt::avx_register>,
-//    RAJA::expt::RectMatrixRegister<MatrixElementType, TensorMatrixLayoutType,
-//    8,4, RAJA::expt::avx_register>,
-//    RAJA::expt::RectMatrixRegister<MatrixElementType, TensorMatrixLayoutType,
-//    4,2, RAJA::expt::avx_register>,
-//    RAJA::expt::RectMatrixRegister<MatrixElementType, TensorMatrixLayoutType,
-//    8,2, RAJA::expt::avx_register>,
+// #ifdef __AVX__
+//     RAJA::expt::RectMatrixRegister<MatrixElementType, TensorMatrixLayoutType,
+//     2,4, RAJA::expt::avx_register>,
+//     RAJA::expt::RectMatrixRegister<MatrixElementType, TensorMatrixLayoutType,
+//     2,8, RAJA::expt::avx_register>,
+//     RAJA::expt::RectMatrixRegister<MatrixElementType, TensorMatrixLayoutType,
+//     4,8, RAJA::expt::avx_register>,
+//     RAJA::expt::RectMatrixRegister<MatrixElementType, TensorMatrixLayoutType,
+//     8,8, RAJA::expt::avx_register>,
+//     RAJA::expt::RectMatrixRegister<MatrixElementType, TensorMatrixLayoutType,
+//     8,4, RAJA::expt::avx_register>,
+//     RAJA::expt::RectMatrixRegister<MatrixElementType, TensorMatrixLayoutType,
+//     4,2, RAJA::expt::avx_register>,
+//     RAJA::expt::RectMatrixRegister<MatrixElementType, TensorMatrixLayoutType,
+//     8,2, RAJA::expt::avx_register>,
 //
-//#endif
+// #endif
 
 
 #ifdef __AVX2__
diff --git a/test/functional/tensor/vector/tests/test-tensor-vector-ForallVectorRef2d.hpp b/test/functional/tensor/vector/tests/test-tensor-vector-ForallVectorRef2d.hpp
index da498db615..3b1111b6ef 100644
--- a/test/functional/tensor/vector/tests/test-tensor-vector-ForallVectorRef2d.hpp
+++ b/test/functional/tensor/vector/tests/test-tensor-vector-ForallVectorRef2d.hpp
@@ -110,12 +110,9 @@ ForallVectorRef2dImpl()
     C[i] = 0.0;
   }
 
-  RAJA::forall<RAJA::seq_exec>(RAJA::TypedRangeSegment<index_t>(0, M),
-                               [=](index_t j)
-                               {
-                                 Z(all, j) =
-                                     3 + (X(all, j) * (5 / Y(all, j))) + 9;
-                               });
+  RAJA::forall<RAJA::seq_exec>(
+      RAJA::TypedRangeSegment<index_t>(0, M),
+      [=](index_t j) { Z(all, j) = 3 + (X(all, j) * (5 / Y(all, j))) + 9; });
 
   for (index_t i = 0; i < N * M; i++)
   {
@@ -131,12 +128,9 @@ ForallVectorRef2dImpl()
     C[i] = 0.0;
   }
 
-  RAJA::forall<RAJA::seq_exec>(RAJA::TypedRangeSegment<index_t>(0, N),
-                               [=](index_t i)
-                               {
-                                 Z(i, all) =
-                                     3 + (X(i, all) * (5 / Y(i, all))) + 9;
-                               });
+  RAJA::forall<RAJA::seq_exec>(
+      RAJA::TypedRangeSegment<index_t>(0, N),
+      [=](index_t i) { Z(i, all) = 3 + (X(i, all) * (5 / Y(i, all))) + 9; });
 
   for (index_t i = 0; i < N * M; i++)
   {
diff --git a/test/include/RAJA_gtest.hpp b/test/include/RAJA_gtest.hpp
index 4b4c786784..7a96d914ae 100644
--- a/test/include/RAJA_gtest.hpp
+++ b/test/include/RAJA_gtest.hpp
@@ -81,20 +81,22 @@
 #define GPU_TYPED_TEST_P(SuiteName, TestName)                                  \
   namespace GTEST_SUITE_NAMESPACE_(SuiteName)                                  \
   {                                                                            \
-    template <typename gtest_TypeParam_>                                       \
-    class TestName : public SuiteName<gtest_TypeParam_>                        \
-    {                                                                          \
-    private:                                                                   \
-      typedef SuiteName<gtest_TypeParam_> TestFixture;                         \
-      typedef gtest_TypeParam_ TypeParam;                                      \
+  template <typename gtest_TypeParam_>                                         \
+  class TestName : public SuiteName<gtest_TypeParam_>                          \
+  {                                                                            \
+  private:                                                                     \
+    typedef SuiteName<gtest_TypeParam_> TestFixture;                           \
+    typedef gtest_TypeParam_ TypeParam;                                        \
                                                                                \
-    public:                                                                    \
-      void TestBody() override;                                                \
-    };                                                                         \
-    static bool gtest_##TestName##_defined_ GTEST_ATTRIBUTE_UNUSED_ =          \
-        GTEST_TYPED_TEST_SUITE_P_STATE_(SuiteName).AddTestName(                \
-            __FILE__, __LINE__, GTEST_STRINGIFY_(SuiteName),                   \
-            GTEST_STRINGIFY_(TestName));                                       \
+  public:                                                                      \
+    void TestBody() override;                                                  \
+  };                                                                           \
+  static bool gtest_##TestName##_defined_ GTEST_ATTRIBUTE_UNUSED_ =            \
+      GTEST_TYPED_TEST_SUITE_P_STATE_(SuiteName).AddTestName(                  \
+          __FILE__,                                                            \
+          __LINE__,                                                            \
+          GTEST_STRINGIFY_(SuiteName),                                         \
+          GTEST_STRINGIFY_(TestName));                                         \
   }                                                                            \
   template <typename gtest_TypeParam_>                                         \
   void GTEST_SUITE_NAMESPACE_(                                                 \
diff --git a/test/include/RAJA_test-reducepol.hpp b/test/include/RAJA_test-reducepol.hpp
index b755677c2e..66fc6f9c7a 100644
--- a/test/include/RAJA_test-reducepol.hpp
+++ b/test/include/RAJA_test-reducepol.hpp
@@ -29,7 +29,7 @@ using OpenMPReducePols =
 #endif
 
 #if defined(RAJA_ENABLE_TARGET_OPENMP)
-    using OpenMPTargetReducePols = camp::list<RAJA::omp_target_reduce>;
+using OpenMPTargetReducePols = camp::list<RAJA::omp_target_reduce>;
 #endif
 
 #if defined(RAJA_ENABLE_CUDA)
diff --git a/test/old-tests/unit/test-sharedmem.cpp b/test/old-tests/unit/test-sharedmem.cpp
index 702d14c8d8..504a850576 100644
--- a/test/old-tests/unit/test-sharedmem.cpp
+++ b/test/old-tests/unit/test-sharedmem.cpp
@@ -692,8 +692,8 @@ using CUDATypes = ::testing::Types<
                         RAJA::statement::For<0,
                                              RAJA::cuda_thread_x_direct,
                                              RAJA::statement::Lambda<1>>>,
-                    RAJA::statement::CudaSyncThreads>  // close shared memory
-                                                       // scope
+                    RAJA::statement::CudaSyncThreads>     // close shared memory
+                                                          // scope
                 >                                         // for 2
             >                                             // for 3
                                                        >  // CudaKernel
@@ -728,8 +728,8 @@ using CUDATypes = ::testing::Types<
                         RAJA::statement::For<0,
                                              RAJA::cuda_thread_x_direct,
                                              RAJA::statement::Lambda<1>>>,
-                    RAJA::statement::CudaSyncThreads>  // close shared memory
-                                                       // scope
+                    RAJA::statement::CudaSyncThreads>     // close shared memory
+                                                          // scope
                 >                                         // for 2
             >                                             // for 3
                                                        >  // CudaKernel
@@ -772,8 +772,8 @@ using HIPTypes = ::testing::Types<
                         RAJA::statement::For<0,
                                              RAJA::hip_thread_x_direct,
                                              RAJA::statement::Lambda<1>>>,
-                    RAJA::statement::HipSyncThreads>  // close shared memory
-                                                      // scope
+                    RAJA::statement::HipSyncThreads>     // close shared memory
+                                                         // scope
                 >                                        // for 2
             >                                            // for 3
                                                       >  // HipKernel
@@ -808,8 +808,8 @@ using HIPTypes = ::testing::Types<
                         RAJA::statement::For<0,
                                              RAJA::hip_thread_x_direct,
                                              RAJA::statement::Lambda<1>>>,
-                    RAJA::statement::HipSyncThreads>  // close shared memory
-                                                      // scope
+                    RAJA::statement::HipSyncThreads>     // close shared memory
+                                                         // scope
                 >                                        // for 2
             >                                            // for 3
                                                       >  // HipKernel
@@ -918,34 +918,24 @@ GPU_TYPED_TEST_P(MatMultiply, shmem)
 
       // Zero out thread local memory for storing dot products
       [=] RAJA_HOST_DEVICE(int tn, int tp, ThreadPriv& pVal)
-      {
-        pVal(tn, tp) = 0.0;
-      },
+      { pVal(tn, tp) = 0.0; },
 
       // Load tile of A
       [=] RAJA_HOST_DEVICE(int n, int m, int tn, int tm, Shmem& aShared)
-      {
-        aShared(tn, tm) = Aview(n, m);
-      },
+      { aShared(tn, tm) = Aview(n, m); },
 
       // Load tile of B
       [=] RAJA_HOST_DEVICE(int m, int p, int tm, int tp, Shmem& bShared)
-      {
-        bShared(tm, tp) = Bview(m, p);
-      },
+      { bShared(tm, tp) = Bview(m, p); },
 
       // Do partial update in shmem
       [=] RAJA_HOST_DEVICE(int tn, int tm, int tp, Shmem& aShared,
                            Shmem& bShared, ThreadPriv& pVal)
-      {
-        pVal(tn, tp) += aShared(tn, tm) * bShared(tm, tp);
-      },
+      { pVal(tn, tp) += aShared(tn, tm) * bShared(tm, tp); },
 
       // Write out complete result
       [=] RAJA_HOST_DEVICE(int n, int p, int tn, int tp, ThreadPriv& pVal)
-      {
-        Cview(n, p) = pVal(tn, tp);
-      });
+      { Cview(n, p) = pVal(tn, tp); });
 
   // copy result back to host (NOP on CPU)
   TypeParam::copy_d2h(N * P, C, d_C);
@@ -1069,10 +1059,11 @@ struct Policy_MatMultiply_cpu
                           RAJA::statement::For<
                               1,
                               RAJA::seq_exec,
-                              RAJA::statement::For<0,
-                                                   RAJA::seq_exec,
-                                                   shmem_Lambda3>>>>,  // sliding
-                                                                       // window
+                              RAJA::statement::For<
+                                  0,
+                                  RAJA::seq_exec,
+                                  shmem_Lambda3>>>>,  // sliding
+                                                      // window
 
                   // Write memory out to global matrix
                   RAJA::statement::For<