diff --git a/.clang-format b/.clang-format index 1d2ad9a77f..d01fb458ca 100644 --- a/.clang-format +++ b/.clang-format @@ -1,27 +1,82 @@ -BasedOnStyle : google +BasedOnStyle : LLVM +# Indent formatting IndentWidth : 2 -BreakBeforeBraces : Linux +Language: Cpp +UseTab: Never KeepEmptyLinesAtTheStartOfBlocks : true MaxEmptyLinesToKeep : 2 AccessModifierOffset : -2 -UseTab: Never +# This must be off so that include order in RAJA is preserved +SortIncludes: false + +# Alignment of consecutive declarations, assignments etc +AlignConsecutiveAssignments : true +AlignConsecutiveDeclarations : false +AlignConsecutiveMacros : true +AlignTrailingComments : true + +# Control curly brace placement +BreakBeforeBraces : Custom +BraceWrapping: + AfterCaseLabel: true + AfterClass: true + AfterControlStatement: true + AfterEnum: true + AfterFunction: true + AfterNamespace: true + AfterObjCDeclaration: false + AfterStruct: true + AfterUnion: true + AfterExternBlock: false + BeforeCatch: true + BeforeElse: true + BeforeLambdaBody: false + IndentBraces: false + SplitEmptyFunction: false + SplitEmptyRecord: false + SplitEmptyNamespace: false + +# Pointer alignment +DerivePointerAlignment: false +PointerAlignment: Left + +# Single line config AllowShortIfStatementsOnASingleLine : true -ConstructorInitializerAllOnOneLineOrOnePerLine : true -AllowShortFunctionsOnASingleLine : true +AllowShortFunctionsOnASingleLine : false AllowShortLoopsOnASingleLine : false -BinPackParameters : false +AllowShortLambdasOnASingleLine : None +AllowAllArgumentsOnNextLine : true AllowAllParametersOfDeclarationOnNextLine : false -AlignTrailingComments : true +BinPackArguments : true +BinPackParameters : false +ConstructorInitializerAllOnOneLineOrOnePerLine : true ColumnLimit : 80 -PenaltyBreakBeforeFirstCallParameter : 100 -PenaltyReturnTypeOnItsOwnLine : 65000 -PenaltyBreakString : 10 +PenaltyExcessCharacter : 10 -# These improve formatting results but require clang 3.6/7 or higher -BreakBeforeBinaryOperators : None -AlignAfterOpenBracket: true -BinPackArguments : false +AlignAfterOpenBracket: Align AlignOperands : true AlwaysBreakTemplateDeclarations : true -Cpp11BracedListStyle : true +AlwaysBreakAfterDefinitionReturnType : None +PenaltyReturnTypeOnItsOwnLine : 10000 +BreakBeforeBinaryOperators : None + +# Indents +IndentCaseLabels: true + +# Lambda body +LambdaBodyIndentation : Signature + +SeparateDefinitionBlocks : Always +SpaceBeforeCpp11BracedList: true +SpaceBeforeCtorInitializerColon: true +SpaceBeforeInheritanceColon: true +SpaceBeforeRangeBasedForLoopColon: true +SpaceInEmptyBlock: false +SpacesBeforeTrailingComments: 2 +SpacesInAngles: false +SpacesInCStyleCastParentheses: false +SpacesInContainerLiterals: false +SpacesInConditionalStatement: false +SpacesInParentheses: false +SpacesInSquareBrackets: false diff --git a/CMakeLists.txt b/CMakeLists.txt index 9b31cbe124..dbe5b3f113 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -41,7 +41,7 @@ project(RAJA LANGUAGES CXX C VERSION ${RAJA_LOADED}) set(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake/thirdparty" ${CMAKE_MODULE_PATH}) - +set(BLT_REQUIRED_CLANGFORMAT_VERSION "14" CACHE STRING "") include(cmake/SetupRajaOptions.cmake) cmake_minimum_required(VERSION 3.23) @@ -136,6 +136,9 @@ include(cmake/SetupCompilers.cmake) # Macros for building executables and libraries include (cmake/RAJAMacros.cmake) +# Configure `style` target for enforcing code style +raja_add_code_checks() + set (raja_sources src/AlignedRangeIndexSetBuilders.cpp src/DepGraphNode.cpp diff --git a/cmake/RAJAMacros.cmake b/cmake/RAJAMacros.cmake index c412593db7..11c4661cc1 100644 --- a/cmake/RAJAMacros.cmake +++ b/cmake/RAJAMacros.cmake @@ -204,3 +204,62 @@ macro(raja_add_benchmark) NUM_OMP_THREADS ${arg_NUM_OMP_THREADS} COMMAND ${TEST_DRIVER} ${arg_NAME}) endmacro(raja_add_benchmark) + +##------------------------------------------------------------------------------ +## raja_add_code_checks() +## +## Adds code checks for all source files recursively in the RAJA repository. +## +## This creates the following parent build targets: +## check - Runs a non file changing style check and CppCheck +## style - In-place code formatting +## +## Creates various child build targets that follow this pattern: +## raja_ +## raja__ +##------------------------------------------------------------------------------ +macro(raja_add_code_checks) + + set(options) + set(singleValueArgs) + set(multiValueArgs) + + # Parse the arguments to the macro + cmake_parse_arguments(arg + "${options}" "${singleValueArgs}" "${multiValueArgs}" ${ARGN}) + + # Only do code checks if building raja by itself and not included in + # another project + if ("${PROJECT_SOURCE_DIR}" STREQUAL "${CMAKE_SOURCE_DIR}") + # Create file globbing expressions that only include directories that contain source + # TODO(bowen) Add examples, exercises and benchmark to the list below + set(_base_dirs "RAJA" "benchmark" "include" "src" "test") + set(_ext_expressions "*.cpp" "*.hpp" "*.inl" + "*.cxx" "*.hxx" "*.cc" "*.c" "*.h" "*.hh") + + set(_glob_expressions) + foreach(_exp ${_ext_expressions}) + foreach(_base_dir ${_base_dirs}) + list(APPEND _glob_expressions "${PROJECT_SOURCE_DIR}/${_base_dir}/${_exp}") + endforeach() + endforeach() + + # Glob for list of files to run code checks on + set(_sources) + file(GLOB_RECURSE _sources ${_glob_expressions}) + + blt_add_code_checks(PREFIX RAJA + SOURCES ${_sources} + CLANGFORMAT_CFG_FILE ${PROJECT_SOURCE_DIR}/.clang-format + CPPCHECK_FLAGS --enable=all --inconclusive) + + # Set FOLDER property for code check targets + foreach(_suffix clangformat_check clangformat_style clang_tidy_check clang_tidy_style) + set(_tgt ${arg_PREFIX}_${_suffix}) + if(TARGET ${_tgt}) + set_target_properties(${_tgt} PROPERTIES FOLDER "RAJA/code_checks") + endif() + endforeach() + endif() + +endmacro(raja_add_code_checks) diff --git a/cmake/SetupRajaOptions.cmake b/cmake/SetupRajaOptions.cmake index 9c5fb043e4..09276e18db 100644 --- a/cmake/SetupRajaOptions.cmake +++ b/cmake/SetupRajaOptions.cmake @@ -28,7 +28,7 @@ option(RAJA_ENABLE_FORCEINLINE_RECURSIVE "Enable Forceinline recursive (only sup option(RAJA_DEPRECATED_TESTS "Test deprecated features" Off) option(RAJA_ENABLE_BOUNDS_CHECK "Enable bounds checking in RAJA::Views/Layouts" Off) option(RAJA_TEST_EXHAUSTIVE "Build RAJA exhaustive tests" Off) -option(RAJA_TEST_OPENMP_TARGET_SUBSET "Build subset of RAJA OpenMP target tests" On) +option(RAJA_TEST_OPENMP_TARGET_SUBSET "Build subset of RAJA OpenMP target tests when it is enabled" On) option(RAJA_ENABLE_RUNTIME_PLUGINS "Enable support for loading plugins at runtime" Off) option(RAJA_ALLOW_INCONSISTENT_OPTIONS "Enable inconsistent values for ENABLE_X and RAJA_ENABLE_X options" Off) diff --git a/include/RAJA/RAJA.hpp b/include/RAJA/RAJA.hpp index 59cca4bf22..aea813237b 100644 --- a/include/RAJA/RAJA.hpp +++ b/include/RAJA/RAJA.hpp @@ -88,7 +88,7 @@ #endif #if defined(RAJA_ENABLE_DESUL_ATOMICS) - #include "RAJA/policy/desul.hpp" +#include "RAJA/policy/desul.hpp" #endif #include "RAJA/index/IndexSet.hpp" @@ -197,11 +197,14 @@ #include "RAJA/pattern/sort.hpp" -namespace RAJA { -namespace expt{} +namespace RAJA +{ +namespace expt +{} + // // provide a RAJA::expt namespace for experimental work, but bring alias // // it into RAJA so it doesn't affect user code // using namespace expt; -} +} // namespace RAJA #endif // closing endif for header file include guard diff --git a/include/RAJA/index/IndexSet.hpp b/include/RAJA/index/IndexSet.hpp index 1a467c8341..b12501c255 100644 --- a/include/RAJA/index/IndexSet.hpp +++ b/include/RAJA/index/IndexSet.hpp @@ -34,10 +34,19 @@ namespace RAJA { -enum PushEnd { PUSH_FRONT, PUSH_BACK }; -enum PushCopy { PUSH_COPY, PUSH_NOCOPY }; +enum PushEnd +{ + PUSH_FRONT, + PUSH_BACK +}; -template +enum PushCopy +{ + PUSH_COPY, + PUSH_NOCOPY +}; + +template class TypedIndexSet; namespace policy @@ -52,11 +61,12 @@ namespace indexset /// over segments. The second describes the policy for executing /// each segment. /// -template +template struct ExecPolicy : public RAJA::make_policy_pattern_t { - using seg_it = SEG_ITER_POLICY_T; + RAJA::Pattern::forall> +{ + using seg_it = SEG_ITER_POLICY_T; using seg_exec = SEG_EXEC_POLICY_T; }; @@ -65,7 +75,6 @@ struct ExecPolicy using policy::indexset::ExecPolicy; - /*! ****************************************************************************** * @@ -74,10 +83,10 @@ using policy::indexset::ExecPolicy; * ****************************************************************************** */ -template +template class TypedIndexSet : public TypedIndexSet { - using PARENT = TypedIndexSet; + using PARENT = TypedIndexSet; static const int T0_TypeId = sizeof...(TREST); public: @@ -91,7 +100,7 @@ class TypedIndexSet : public TypedIndexSet //! Construct empty index set #if _MSC_VER < 1910 - // this one instance of constexpr does not work on VS2012 or VS2015 + // this one instance of constexpr does not work on VS2012 or VS2015 RAJA_INLINE TypedIndexSet() : PARENT() {} #else RAJA_INLINE constexpr TypedIndexSet() : PARENT() {} @@ -99,12 +108,12 @@ class TypedIndexSet : public TypedIndexSet //! Copy-constructor for index set RAJA_INLINE - TypedIndexSet(TypedIndexSet const &c) - : PARENT((PARENT const &)c) + TypedIndexSet(TypedIndexSet const& c) : PARENT((PARENT const&)c) { size_t num = c.data.size(); data.resize(num); - for (size_t i = 0; i < num; ++i) { + for (size_t i = 0; i < num; ++i) + { data[i] = c.data[i]; } // mark all as not owned by us @@ -112,9 +121,10 @@ class TypedIndexSet : public TypedIndexSet } //! Copy-assignment operator for index set - TypedIndexSet &operator=(const TypedIndexSet &rhs) + TypedIndexSet& operator=(const TypedIndexSet& rhs) { - if (&rhs != this) { + if (&rhs != this) + { TypedIndexSet copy(rhs); this->swap(copy); } @@ -125,19 +135,21 @@ class TypedIndexSet : public TypedIndexSet RAJA_INLINE ~TypedIndexSet() { size_t num_seg = data.size(); - for (size_t i = 0; i < num_seg; ++i) { + for (size_t i = 0; i < num_seg; ++i) + { // Only free segment of we allocated it - if (owner[i]) { + if (owner[i]) + { delete data[i]; } } } //! Swap function for copy-and-swap idiom. - void swap(TypedIndexSet &other) + void swap(TypedIndexSet& other) { // Swap parents data - PARENT::swap((PARENT &)other); + PARENT::swap((PARENT&)other); // Swap our data using std::swap; swap(data, other.data); @@ -149,19 +161,21 @@ class TypedIndexSet : public TypedIndexSet /// /// This is used to implement the == and != operators /// - template + template RAJA_INLINE bool compareSegmentById( size_t segid, - const TypedIndexSet &other) const + const TypedIndexSet& other) const { // drill down our types until we have the right type - if (getSegmentTypes()[segid] != T0_TypeId) { + if (getSegmentTypes()[segid] != T0_TypeId) + { // peel off T0 return PARENT::compareSegmentById(segid, other); } // Check that other's segid is of type T0 - if (!other.template checkSegmentType(segid)) { + if (!other.template checkSegmentType(segid)) + { return false; } @@ -170,35 +184,36 @@ class TypedIndexSet : public TypedIndexSet return *data[offset] == other.template getSegment(segid); } - - template + template RAJA_INLINE bool checkSegmentType(size_t segid) const { - if (getSegmentTypes()[segid] == T0_TypeId) { + if (getSegmentTypes()[segid] == T0_TypeId) + { return std::is_same::value; } return PARENT::template checkSegmentType(segid); } - //! get specified segment by ID - template - RAJA_INLINE P0 &getSegment(size_t segid) + template + RAJA_INLINE P0& getSegment(size_t segid) { - if (getSegmentTypes()[segid] == T0_TypeId) { + if (getSegmentTypes()[segid] == T0_TypeId) + { Index_type offset = getSegmentOffsets()[segid]; - return *reinterpret_cast(data[offset]); + return *reinterpret_cast(data[offset]); } return PARENT::template getSegment(segid); } //! get specified segment by ID - template - RAJA_INLINE P0 const &getSegment(size_t segid) const + template + RAJA_INLINE P0 const& getSegment(size_t segid) const { - if (getSegmentTypes()[segid] == T0_TypeId) { + if (getSegmentTypes()[segid] == T0_TypeId) + { Index_type offset = getSegmentOffsets()[segid]; - return *reinterpret_cast(data[offset]); + return *reinterpret_cast(data[offset]); } return PARENT::template getSegment(segid); } @@ -230,43 +245,49 @@ class TypedIndexSet : public TypedIndexSet /// private: - template - RAJA_INLINE void push_into(TypedIndexSet &c, - PushEnd pend = PUSH_BACK, + template + RAJA_INLINE void push_into(TypedIndexSet& c, + PushEnd pend = PUSH_BACK, PushCopy pcopy = PUSH_COPY) { Index_type num = getNumSegments(); - if (pend == PUSH_BACK) { - for (Index_type i = 0; i < num; ++i) { + if (pend == PUSH_BACK) + { + for (Index_type i = 0; i < num; ++i) + { segment_push_into(i, c, pend, pcopy); - } - } else { - for (Index_type i = num-1; i > -1; --i) { + } + } + else + { + for (Index_type i = num - 1; i > -1; --i) + { segment_push_into(i, c, pend, pcopy); - } + } } } - static constexpr int value_for(PushEnd end, PushCopy copy) { return (end == PUSH_BACK) << 1 | (copy == PUSH_COPY); } public: - template + template RAJA_INLINE void segment_push_into(size_t segid, - TypedIndexSet &c, - PushEnd pend = PUSH_BACK, + TypedIndexSet& c, + PushEnd pend = PUSH_BACK, PushCopy pcopy = PUSH_COPY) { - if (getSegmentTypes()[segid] != T0_TypeId) { + if (getSegmentTypes()[segid] != T0_TypeId) + { PARENT::segment_push_into(segid, c, pend, pcopy); return; } Index_type offset = getSegmentOffsets()[segid]; - switch (value_for(pend, pcopy)) { + switch (value_for(pend, pcopy)) + { case value_for(PUSH_BACK, PUSH_COPY): c.push_back(*data[offset]); break; @@ -282,41 +303,43 @@ class TypedIndexSet : public TypedIndexSet } } - //! Add segment to back end of index set without making a copy. - template - RAJA_INLINE void push_back_nocopy(Tnew *val) + template + RAJA_INLINE void push_back_nocopy(Tnew* val) { push_internal(val, PUSH_BACK, PUSH_NOCOPY); } //! Add segment to front end of index set without making a copy. - template - RAJA_INLINE void push_front_nocopy(Tnew *val) + template + RAJA_INLINE void push_front_nocopy(Tnew* val) { push_internal(val, PUSH_FRONT, PUSH_NOCOPY); } //! Add copy of segment to back end of index set. - template - RAJA_INLINE void push_back(Tnew &&val) + template + RAJA_INLINE void push_back(Tnew&& val) { - push_internal(new typename std::decay::type(std::forward(val)), PUSH_BACK, PUSH_COPY); + push_internal(new typename std::decay::type(std::forward(val)), + PUSH_BACK, PUSH_COPY); } //! Add copy of segment to front end of index set. - template - RAJA_INLINE void push_front(Tnew &&val) + template + RAJA_INLINE void push_front(Tnew&& val) { - push_internal(new typename std::decay::type(std::forward(val)), PUSH_FRONT, PUSH_COPY); + push_internal(new typename std::decay::type(std::forward(val)), + PUSH_FRONT, PUSH_COPY); } //! Return total length -- sum of lengths of all segments RAJA_INLINE size_t getLength() const { size_t total = PARENT::getLength(); - size_t num = data.size(); - for (size_t i = 0; i < num; ++i) { + size_t num = data.size(); + for (size_t i = 0; i < num; ++i) + { total += data[i]->size(); } return total; @@ -328,7 +351,6 @@ class TypedIndexSet : public TypedIndexSet return data.size() + PARENT::getNumSegments(); } - /// /// Calls the operator "body" with the segment stored at segid. /// @@ -338,14 +360,14 @@ class TypedIndexSet : public TypedIndexSet /// The "args..." are passed-thru to the body as arguments AFTER the segment. /// RAJA_SUPPRESS_HD_WARN - template + template RAJA_HOST_DEVICE void segmentCall(size_t segid, - BODY &&body, - ARGS &&... args) const + BODY&& body, + ARGS&&... args) const { - if (getSegmentTypes()[segid] != T0_TypeId) { - PARENT::segmentCall(segid, - std::forward(body), + if (getSegmentTypes()[segid] != T0_TypeId) + { + PARENT::segmentCall(segid, std::forward(body), std::forward(args)...); return; } @@ -355,9 +377,9 @@ class TypedIndexSet : public TypedIndexSet protected: //! Internal logic to add a new segment -- catch invalid type insertion - template - RAJA_INLINE void push_internal(Tnew *val, - PushEnd pend = PUSH_BACK, + template + RAJA_INLINE void push_internal(Tnew* val, + PushEnd pend = PUSH_BACK, PushCopy pcopy = PUSH_COPY) { static_assert(sizeof...(TREST) > 0, "Invalid type for this TypedIndexSet"); @@ -365,15 +387,16 @@ class TypedIndexSet : public TypedIndexSet } //! Internal logic to add a new segment - RAJA_INLINE void push_internal(T0 *val, - PushEnd pend = PUSH_BACK, + RAJA_INLINE void push_internal(T0* val, + PushEnd pend = PUSH_BACK, PushCopy pcopy = PUSH_COPY) { data.push_back(val); owner.push_back(pcopy == PUSH_COPY); // Determine if we push at the front or back of the segment list - if (pend == PUSH_BACK) { + if (pend == PUSH_BACK) + { // Store the segment type getSegmentTypes().push_back(T0_TypeId); @@ -384,7 +407,9 @@ class TypedIndexSet : public TypedIndexSet size_t icount = val->size(); getSegmentIcounts().push_back(getTotalLength()); increaseTotalLength(icount); - } else { + } + else + { // Store the segment type getSegmentTypes().push_front(T0_TypeId); @@ -394,7 +419,8 @@ class TypedIndexSet : public TypedIndexSet // Store the segment icount getSegmentIcounts().push_front(0); size_t icount = val->size(); - for (size_t i = 1; i < getSegmentIcounts().size(); ++i) { + for (size_t i = 1; i < getSegmentIcounts().size(); ++i) + { getSegmentIcounts()[i] += icount; } increaseTotalLength(icount); @@ -402,7 +428,7 @@ class TypedIndexSet : public TypedIndexSet } //! Returns the number of indices (the total icount of segments - RAJA_INLINE Index_type &getTotalLength() { return PARENT::getTotalLength(); } + RAJA_INLINE Index_type& getTotalLength() { return PARENT::getTotalLength(); } //! set total length of the indexset RAJA_INLINE void setTotalLength(int n) { return PARENT::setTotalLength(n); } @@ -437,9 +463,10 @@ class TypedIndexSet : public TypedIndexSet { TypedIndexSet retVal; - int minSeg = RAJA::operators::maximum{}(0, begin); - int maxSeg = RAJA::operators::minimum{}(end, getNumSegments()); - for (int i = minSeg; i < maxSeg; ++i) { + int minSeg = RAJA::operators::maximum {}(0, begin); + int maxSeg = RAJA::operators::minimum {}(end, getNumSegments()); + for (int i = minSeg; i < maxSeg; ++i) + { segment_push_into(i, retVal, PUSH_BACK, PUSH_NOCOPY); } return retVal; @@ -452,13 +479,15 @@ class TypedIndexSet : public TypedIndexSet /// This TypedIndexSet will not change and the created "slice" into it /// will not own any of its segments. /// - TypedIndexSet createSlice(const int *segIds, int len) + TypedIndexSet createSlice(const int* segIds, int len) { TypedIndexSet retVal; int numSeg = getNumSegments(); - for (int i = 0; i < len; ++i) { - if (segIds[i] >= 0 && segIds[i] < numSeg) { + for (int i = 0; i < len; ++i) + { + if (segIds[i] >= 0 && segIds[i] < numSeg) + { segment_push_into(segIds[i], retVal, PUSH_BACK, PUSH_NOCOPY); } } @@ -475,13 +504,15 @@ class TypedIndexSet : public TypedIndexSet /// The object must provide methods begin(), end(), and its /// iterator type must de-reference to an integral value. /// - template - TypedIndexSet createSlice(const T &segIds) + template + TypedIndexSet createSlice(const T& segIds) { TypedIndexSet retVal; int numSeg = getNumSegments(); - for (auto &seg : segIds) { - if (seg >= 0 && seg < numSeg) { + for (auto& seg : segIds) + { + if (seg >= 0 && seg < numSeg) + { segment_push_into(seg, retVal, PUSH_BACK, PUSH_NOCOPY); } } @@ -492,7 +523,7 @@ class TypedIndexSet : public TypedIndexSet void setSegmentInterval(size_t interval_id, int begin, int end) { m_seg_interval_begin[interval_id] = begin; - m_seg_interval_end[interval_id] = end; + m_seg_interval_end[interval_id] = end; } //! get lower bound of segment identified with interval_id @@ -509,37 +540,37 @@ class TypedIndexSet : public TypedIndexSet protected: //! Returns the mapping of segment_index -> segment_type - RAJA_INLINE RAJA::RAJAVec &getSegmentTypes() + RAJA_INLINE RAJA::RAJAVec& getSegmentTypes() { return PARENT::getSegmentTypes(); } //! Returns the mapping of segment_index -> segment_type - RAJA_INLINE RAJA::RAJAVec const &getSegmentTypes() const + RAJA_INLINE RAJA::RAJAVec const& getSegmentTypes() const { return PARENT::getSegmentTypes(); } //! Returns the mapping of segment_index -> segment_offset - RAJA_INLINE RAJA::RAJAVec &getSegmentOffsets() + RAJA_INLINE RAJA::RAJAVec& getSegmentOffsets() { return PARENT::getSegmentOffsets(); } //! Returns the mapping of segment_index -> segment_offset - RAJA_INLINE RAJA::RAJAVec const &getSegmentOffsets() const + RAJA_INLINE RAJA::RAJAVec const& getSegmentOffsets() const { return PARENT::getSegmentOffsets(); } //! Returns the icount of segments - RAJA_INLINE RAJA::RAJAVec &getSegmentIcounts() + RAJA_INLINE RAJA::RAJAVec& getSegmentIcounts() { return PARENT::getSegmentIcounts(); } //! Returns the icount of segments - RAJA_INLINE RAJA::RAJAVec const &getSegmentIcounts() const + RAJA_INLINE RAJA::RAJAVec const& getSegmentIcounts() const { return PARENT::getSegmentIcounts(); } @@ -551,14 +582,16 @@ class TypedIndexSet : public TypedIndexSet /// Note: method does not check equality of anything other than segment /// types and indices; e.g., dependency info not checked. /// - template - RAJA_INLINE bool operator==(const TypedIndexSet &other) const + template + RAJA_INLINE bool operator==(const TypedIndexSet& other) const { size_t num_seg = getNumSegments(); if (num_seg != other.getNumSegments()) return false; - for (size_t segid = 0; segid < num_seg; ++segid) { - if (!compareSegmentById(segid, other)) { + for (size_t segid = 0; segid < num_seg; ++segid) + { + if (!compareSegmentById(segid, other)) + { return false; } } @@ -566,15 +599,15 @@ class TypedIndexSet : public TypedIndexSet } //! Inequality operator returns true if any segment is not equal, else false. - template - RAJA_INLINE bool operator!=(const TypedIndexSet &other) const + template + RAJA_INLINE bool operator!=(const TypedIndexSet& other) const { return (!(*this == other)); } private: //! vector of TypedIndexSet data objects of type T0 - RAJA::RAJAVec data; + RAJA::RAJAVec data; //! vector indicating which segments are owned by the TypedIndexSet RAJA::RAJAVec owner; @@ -586,8 +619,7 @@ class TypedIndexSet : public TypedIndexSet RAJA::RAJAVec m_seg_interval_end; }; - -template <> +template<> class TypedIndexSet<> { public: @@ -603,16 +635,16 @@ class TypedIndexSet<> //! Copy-constructor. RAJA_INLINE - TypedIndexSet(TypedIndexSet const &c) + TypedIndexSet(TypedIndexSet const& c) { - segment_types = c.segment_types; + segment_types = c.segment_types; segment_offsets = c.segment_offsets; segment_icounts = c.segment_icounts; - m_len = c.m_len; + m_len = c.m_len; } //! Swap function for copy-and-swap idiom (deep copy). - void swap(TypedIndexSet &other) + void swap(TypedIndexSet& other) { using std::swap; swap(segment_types, other.segment_types); @@ -624,8 +656,8 @@ class TypedIndexSet<> protected: RAJA_INLINE static size_t getNumTypes() { return 0; } - template - RAJA_INLINE constexpr bool isValidSegmentType(T const &) const + template + RAJA_INLINE constexpr bool isValidSegmentType(T const&) const { // Segment type wasn't found return false; @@ -635,89 +667,85 @@ class TypedIndexSet<> RAJA_INLINE static size_t getLength() { return 0; } - template + template RAJA_INLINE void segmentCall(size_t, BODY, ARGS...) const - { - } + {} - RAJA_INLINE RAJA::RAJAVec &getSegmentTypes() + RAJA_INLINE RAJA::RAJAVec& getSegmentTypes() { return segment_types; } - RAJA_INLINE RAJA::RAJAVec const &getSegmentTypes() const + RAJA_INLINE RAJA::RAJAVec const& getSegmentTypes() const { return segment_types; } - RAJA_INLINE RAJA::RAJAVec &getSegmentOffsets() + RAJA_INLINE RAJA::RAJAVec& getSegmentOffsets() { return segment_offsets; } - RAJA_INLINE RAJA::RAJAVec const &getSegmentOffsets() const + RAJA_INLINE RAJA::RAJAVec const& getSegmentOffsets() const { return segment_offsets; } - RAJA_INLINE RAJA::RAJAVec &getSegmentIcounts() + RAJA_INLINE RAJA::RAJAVec& getSegmentIcounts() { return segment_icounts; } - RAJA_INLINE RAJA::RAJAVec const &getSegmentIcounts() const + RAJA_INLINE RAJA::RAJAVec const& getSegmentIcounts() const { return segment_icounts; } - RAJA_INLINE Index_type &getTotalLength() { return m_len; } + RAJA_INLINE Index_type& getTotalLength() { return m_len; } RAJA_INLINE void setTotalLength(int n) { m_len = n; } RAJA_INLINE void increaseTotalLength(int n) { m_len += n; } - template + template RAJA_INLINE bool compareSegmentById(size_t, - const TypedIndexSet &) const + const TypedIndexSet&) const { return false; } - template + template RAJA_INLINE bool checkSegmentType(size_t) const { return false; } - template - RAJA_INLINE P0 &getSegment(size_t) + template + RAJA_INLINE P0& getSegment(size_t) { - return *((P0 *)(this - this)); + return *((P0*)(this - this)); } - template - RAJA_INLINE P0 const &getSegment(size_t) const + template + RAJA_INLINE P0 const& getSegment(size_t) const { - return *((P0 *)(this - this)); + return *((P0*)(this - this)); } - template - RAJA_INLINE void push_into(TypedIndexSet &, PushEnd, PushCopy) const - { - } + template + RAJA_INLINE void push_into(TypedIndexSet&, PushEnd, PushCopy) const + {} - template + template RAJA_INLINE void segment_push_into(size_t, - TypedIndexSet &, + TypedIndexSet&, PushEnd, PushCopy) const - { - } + {} - template - RAJA_INLINE void push(Tnew const &, PushEnd, PushCopy) - { - } + template + RAJA_INLINE void push(Tnew const&, PushEnd, PushCopy) + {} public: using iterator = Iterators::numeric_iterator; @@ -756,19 +784,20 @@ class TypedIndexSet<> Index_type m_len; }; - namespace type_traits { -template +template struct is_index_set - : ::RAJA::type_traits::SpecializationOf::type> { -}; + : ::RAJA::type_traits::SpecializationOf::type> +{}; -template +template struct is_indexset_policy - : ::RAJA::type_traits::SpecializationOf::type> { -}; + : ::RAJA::type_traits::SpecializationOf::type> +{}; } // namespace type_traits } // namespace RAJA diff --git a/include/RAJA/index/IndexSetBuilders.hpp b/include/RAJA/index/IndexSetBuilders.hpp index 543524be01..075aecd1d1 100644 --- a/include/RAJA/index/IndexSetBuilders.hpp +++ b/include/RAJA/index/IndexSetBuilders.hpp @@ -37,13 +37,13 @@ namespace RAJA * \brief Generate an index set with aligned Range segments and List segments, * as needed, from given array of indices. * - * Routine does no error-checking on argements and assumes + * Routine does no error-checking on argements and assumes * RAJA::Index_type array contains valid indices. * - * \param iset reference to index set generated with aligned range segments + * \param iset reference to index set generated with aligned range segments * and list segments. Method assumes index set is empty (no segments). - * \param work_res camp resource object that identifies the memory space in - * which list segment index data will live (passed to list segment + * \param work_res camp resource object that identifies the memory space in + * which list segment index data will live (passed to list segment * ctor). * \param indices_in pointer to start of input array of indices. * \param length size of input index array. @@ -79,37 +79,36 @@ void RAJASHAREDDLL_API buildIndexSetAligned( ****************************************************************************** * * \brief Generate a lock-free "block" index set (planar division) containing - * range segments. + * range segments. * - * The method chunks a fastDim x midDim x slowDim mesh into blocks that + * The method chunks a fastDim x midDim x slowDim mesh into blocks that * can be dependency-scheduled, removing need for lock constructs. * * \param iset reference to index set generated with range segments. - * Method assumes index set is empty (no segments). + * Method assumes index set is empty (no segments). * \param fastDim "fast" block dimension (see above). * \param midDim "mid" block dimension (see above). * \param slowDim "slow" block dimension (see above). * ****************************************************************************** */ -void buildLockFreeBlockIndexset( - RAJA::TypedIndexSet& iset, - int fastDim, - int midDim, - int slowDim); +void buildLockFreeBlockIndexset(RAJA::TypedIndexSet& iset, + int fastDim, + int midDim, + int slowDim); /*! ****************************************************************************** * * \brief Generate a lock-free "color" index set containing range and list * segments. - * - * TThe domain-set is colored based on connectivity to the range-set. - * All elements in each segment are independent, and no two segments + * + * TThe domain-set is colored based on connectivity to the range-set. + * All elements in each segment are independent, and no two segments * can be executed in parallel. * - * \param iset reference to index set generated. Method assumes index set - * is empty (no segments). + * \param iset reference to index set generated. Method assumes index set + * is empty (no segments). * \param work_res camp resource object that identifies the memory space in * which list segment index data will live (passed to list segment * ctor). @@ -123,7 +122,7 @@ void buildLockFreeColorIndexset( int numEntity, int numRangePerDomain, int numEntityRange, - RAJA::Index_type* elemPermutation = nullptr, + RAJA::Index_type* elemPermutation = nullptr, RAJA::Index_type* ielemPermutation = nullptr); } // namespace RAJA diff --git a/include/RAJA/index/IndexSetUtils.hpp b/include/RAJA/index/IndexSetUtils.hpp index 4baea450fc..a6733e6d68 100644 --- a/include/RAJA/index/IndexSetUtils.hpp +++ b/include/RAJA/index/IndexSetUtils.hpp @@ -31,10 +31,10 @@ namespace RAJA //@{ //! @name Methods to gather indices of segment or index set into a container. //! -//! For each method, the given container must be templated on a data type, -//! have default and copy ctors, push_back method, and value_type. Is is -//! assumed that the container data type and segment or index set data type -//! are compatible in the sense that the index set type can be converted to +//! For each method, the given container must be templated on a data type, +//! have default and copy ctors, push_back method, and value_type. Is is +//! assumed that the container data type and segment or index set data type +//! are compatible in the sense that the index set type can be converted to //! the container data type. /*! @@ -44,16 +44,15 @@ namespace RAJA * ****************************************************************************** */ -template +template RAJA_INLINE void getIndices(CONTAINER_T& con, const TypedIndexSet& iset) { CONTAINER_T tcon; - forall >(iset, - [&](typename CONTAINER_T::value_type idx) { - tcon.push_back(idx); - } - ); + forall>( + iset, [&](typename CONTAINER_T::value_type idx) { + tcon.push_back(idx); + }); con = tcon; } @@ -64,15 +63,13 @@ RAJA_INLINE void getIndices(CONTAINER_T& con, * ****************************************************************************** */ -template +template RAJA_INLINE void getIndices(CONTAINER_T& con, const SEGMENT_T& seg) { CONTAINER_T tcon; - forall(seg, - [&](typename CONTAINER_T::value_type idx) { - tcon.push_back(idx); - } - ); + forall(seg, [&](typename CONTAINER_T::value_type idx) { + tcon.push_back(idx); + }); con = tcon; } @@ -84,17 +81,16 @@ RAJA_INLINE void getIndices(CONTAINER_T& con, const SEGMENT_T& seg) * ****************************************************************************** */ -template +template RAJA_INLINE void getIndicesConditional(CONTAINER_T& con, const TypedIndexSet& iset, CONDITIONAL conditional) { CONTAINER_T tcon; - forall >(iset, - [&](typename CONTAINER_T::value_type idx) { - if (conditional(idx)) tcon.push_back(idx); - } - ); + forall>( + iset, [&](typename CONTAINER_T::value_type idx) { + if (conditional(idx)) tcon.push_back(idx); + }); con = tcon; } @@ -106,17 +102,15 @@ RAJA_INLINE void getIndicesConditional(CONTAINER_T& con, * ****************************************************************************** */ -template +template RAJA_INLINE void getIndicesConditional(CONTAINER_T& con, const SEGMENT_T& seg, CONDITIONAL conditional) { CONTAINER_T tcon; - forall(seg, - [&](typename CONTAINER_T::value_type idx) { - if (conditional(idx)) tcon.push_back(idx); - } - ); + forall(seg, [&](typename CONTAINER_T::value_type idx) { + if (conditional(idx)) tcon.push_back(idx); + }); con = tcon; } diff --git a/include/RAJA/index/IndexValue.hpp b/include/RAJA/index/IndexValue.hpp index 44fa143445..15e64c07a2 100644 --- a/include/RAJA/index/IndexValue.hpp +++ b/include/RAJA/index/IndexValue.hpp @@ -28,8 +28,8 @@ namespace RAJA { -struct IndexValueBase { -}; +struct IndexValueBase +{}; /*! * \brief Strongly typed "integer" class. @@ -43,17 +43,18 @@ struct IndexValueBase { * * Yes, this uses the curiously-recurring template pattern. */ -template -struct IndexValue : public IndexValueBase { +template +struct IndexValue : public IndexValueBase +{ using value_type = VALUE; //! Default constructor initializes value to 0. - RAJA_INLINE constexpr IndexValue() = default; - constexpr RAJA_INLINE IndexValue(IndexValue const &) = default; - constexpr RAJA_INLINE IndexValue(IndexValue &&) = default; - RAJA_INLINE IndexValue &operator=(IndexValue const &) = default; - RAJA_INLINE IndexValue &operator=(IndexValue &&) = default; + RAJA_INLINE constexpr IndexValue() = default; + constexpr RAJA_INLINE IndexValue(IndexValue const&) = default; + constexpr RAJA_INLINE IndexValue(IndexValue&&) = default; + RAJA_INLINE IndexValue& operator=(IndexValue const&) = default; + RAJA_INLINE IndexValue& operator=(IndexValue&&) = default; /*! * \brief Explicit constructor. @@ -61,14 +62,13 @@ struct IndexValue : public IndexValueBase { */ RAJA_HOST_DEVICE RAJA_INLINE constexpr explicit IndexValue(value_type v) : value(v) - { - } + {} //! Dereference provides cast-to-integer. - RAJA_HOST_DEVICE RAJA_INLINE value_type &operator*() { return value; } + RAJA_HOST_DEVICE RAJA_INLINE value_type& operator*() { return value; } //! Dereference provides cast-to-integer. - RAJA_HOST_DEVICE RAJA_INLINE const value_type &operator*() const + RAJA_HOST_DEVICE RAJA_INLINE const value_type& operator*() const { return value; } @@ -82,10 +82,10 @@ struct IndexValue : public IndexValueBase { } //! preincrement stored index - RAJA_HOST_DEVICE RAJA_INLINE TYPE &operator++() + RAJA_HOST_DEVICE RAJA_INLINE TYPE& operator++() { value++; - return static_cast(*this); + return static_cast(*this); } //! postdecrement -- returns a copy @@ -97,10 +97,10 @@ struct IndexValue : public IndexValueBase { } //! preincrement stored index - RAJA_HOST_DEVICE RAJA_INLINE TYPE &operator--() + RAJA_HOST_DEVICE RAJA_INLINE TYPE& operator--() { value--; - return static_cast(*this); + return static_cast(*this); } //! addition to underlying index from an Index_type @@ -163,52 +163,52 @@ struct IndexValue : public IndexValueBase { return TYPE(value % a.value); } - RAJA_HOST_DEVICE RAJA_INLINE TYPE &operator+=(value_type x) + RAJA_HOST_DEVICE RAJA_INLINE TYPE& operator+=(value_type x) { value += x; - return static_cast(*this); + return static_cast(*this); } - RAJA_HOST_DEVICE RAJA_INLINE TYPE &operator+=(TYPE x) + RAJA_HOST_DEVICE RAJA_INLINE TYPE& operator+=(TYPE x) { value += x.value; - return static_cast(*this); + return static_cast(*this); } - RAJA_HOST_DEVICE RAJA_INLINE TYPE &operator-=(value_type x) + RAJA_HOST_DEVICE RAJA_INLINE TYPE& operator-=(value_type x) { value -= x; - return static_cast(*this); + return static_cast(*this); } - RAJA_HOST_DEVICE RAJA_INLINE TYPE &operator-=(TYPE x) + RAJA_HOST_DEVICE RAJA_INLINE TYPE& operator-=(TYPE x) { value -= x.value; - return static_cast(*this); + return static_cast(*this); } - RAJA_HOST_DEVICE RAJA_INLINE TYPE &operator*=(value_type x) + RAJA_HOST_DEVICE RAJA_INLINE TYPE& operator*=(value_type x) { value *= x; - return static_cast(*this); + return static_cast(*this); } - RAJA_HOST_DEVICE RAJA_INLINE TYPE &operator*=(TYPE x) + RAJA_HOST_DEVICE RAJA_INLINE TYPE& operator*=(TYPE x) { value *= x.value; - return static_cast(*this); + return static_cast(*this); } - RAJA_HOST_DEVICE RAJA_INLINE TYPE &operator/=(value_type x) + RAJA_HOST_DEVICE RAJA_INLINE TYPE& operator/=(value_type x) { value /= x; - return static_cast(*this); + return static_cast(*this); } - RAJA_HOST_DEVICE RAJA_INLINE TYPE &operator/=(TYPE x) + RAJA_HOST_DEVICE RAJA_INLINE TYPE& operator/=(TYPE x) { value /= x.value; - return static_cast(*this); + return static_cast(*this); } RAJA_HOST_DEVICE RAJA_INLINE bool operator<(value_type x) const @@ -282,12 +282,13 @@ struct IndexValue : public IndexValueBase { namespace internal { -template +template constexpr RAJA_HOST_DEVICE RAJA_INLINE TO convertIndex_helper(FROM const val) { return TO(val); } -template + +template constexpr RAJA_HOST_DEVICE RAJA_INLINE TO convertIndex_helper(typename FROM::IndexValueType const val) { @@ -302,19 +303,18 @@ convertIndex_helper(typename FROM::IndexValueType const val) * convert it to another type, possibly another Index or an int. * */ -template +template constexpr RAJA_HOST_DEVICE RAJA_INLINE TO convertIndex(FROM const val) { return internal::convertIndex_helper(val); } - /*! * \brief Function that strips the strongly typed Index<> and returns its * underlying value_type value. */ // This version is enabled if FROM is a strongly typed class -template +template constexpr RAJA_HOST_DEVICE RAJA_INLINE typename std::enable_if::value, typename FROM::value_type>::type @@ -322,10 +322,11 @@ constexpr RAJA_HOST_DEVICE RAJA_INLINE { return *val; } + /* * enabled if FROM is not a strongly typed class */ -template +template constexpr RAJA_HOST_DEVICE RAJA_INLINE typename std::enable_if::value, FROM>::type @@ -334,18 +335,22 @@ constexpr RAJA_HOST_DEVICE RAJA_INLINE return val; } -namespace internal{ +namespace internal +{ template -struct StripIndexTypeT { - using type = FROM; +struct StripIndexTypeT +{ + using type = FROM; }; template -struct StripIndexTypeT::value>::type> +struct StripIndexTypeT< + FROM, + typename std::enable_if::value>::type> { - using type = typename FROM::value_type; + using type = typename FROM::value_type; }; -} // namespace internal +} // namespace internal /*! * \brief Strips a strongly typed index to its underlying type @@ -363,11 +368,10 @@ using strip_index_type_t = typename internal::StripIndexTypeT::type; * \param FROM the original type */ template -using make_signed_t = typename std::conditional < - std::is_floating_point::value, - std::common_type, - std::make_signed - >::type::type; +using make_signed_t = + typename std::conditional::value, + std::common_type, + std::make_signed>::type::type; } // namespace RAJA @@ -376,19 +380,18 @@ using make_signed_t = typename std::conditional < * \param TYPE the name of the type * \param NAME a string literal to identify this index type */ -#define RAJA_INDEX_VALUE(TYPE, NAME) \ - class TYPE : public ::RAJA::IndexValue \ - { \ - using parent = ::RAJA::IndexValue; \ - \ - public: \ - using IndexValueType = TYPE; \ - RAJA_HOST_DEVICE RAJA_INLINE TYPE() : parent::IndexValue() {} \ - RAJA_HOST_DEVICE RAJA_INLINE explicit TYPE(::RAJA::Index_type v) \ - : parent::IndexValue(v) \ - { \ - } \ - static inline std::string getName() { return NAME; } \ +#define RAJA_INDEX_VALUE(TYPE, NAME) \ + class TYPE : public ::RAJA::IndexValue \ + { \ + using parent = ::RAJA::IndexValue; \ + \ + public: \ + using IndexValueType = TYPE; \ + RAJA_HOST_DEVICE RAJA_INLINE TYPE() : parent::IndexValue() {} \ + RAJA_HOST_DEVICE RAJA_INLINE explicit TYPE(::RAJA::Index_type v) \ + : parent::IndexValue(v) \ + {} \ + static inline std::string getName() { return NAME; } \ }; /*! @@ -397,17 +400,17 @@ using make_signed_t = typename std::conditional < * \param IDXT the index types value type * \param NAME a string literal to identify this index type */ -#define RAJA_INDEX_VALUE_T(TYPE, IDXT, NAME) \ - class TYPE : public ::RAJA::IndexValue \ - { \ - public: \ - RAJA_HOST_DEVICE RAJA_INLINE TYPE() \ - : RAJA::IndexValue::IndexValue() {} \ - RAJA_HOST_DEVICE RAJA_INLINE explicit TYPE(IDXT v) \ - : RAJA::IndexValue::IndexValue(v) \ - { \ - } \ - static inline std::string getName() { return NAME; } \ +#define RAJA_INDEX_VALUE_T(TYPE, IDXT, NAME) \ + class TYPE : public ::RAJA::IndexValue \ + { \ + public: \ + RAJA_HOST_DEVICE RAJA_INLINE TYPE() \ + : RAJA::IndexValue::IndexValue() \ + {} \ + RAJA_HOST_DEVICE RAJA_INLINE explicit TYPE(IDXT v) \ + : RAJA::IndexValue::IndexValue(v) \ + {} \ + static inline std::string getName() { return NAME; } \ }; #endif diff --git a/include/RAJA/index/ListSegment.hpp b/include/RAJA/index/ListSegment.hpp index adee46053c..54b2d7e1c4 100644 --- a/include/RAJA/index/ListSegment.hpp +++ b/include/RAJA/index/ListSegment.hpp @@ -81,11 +81,10 @@ namespace RAJA * ****************************************************************************** */ -template +template class TypedListSegment { public: - //@{ //! @name Types used in implementation based on template parameter. @@ -111,7 +110,7 @@ class TypedListSegment * \param values array of indices defining iteration space of segment * \param length number of indices * \param resource camp resource defining memory space where index data live - * \param owned optional enum value indicating whether segment owns indices + * \param owned optional enum value indicating whether segment owns indices * (Owned or Unowned). Default is Owned. * * If 'Unowned' is passed as last argument, the segment will not own its @@ -121,7 +120,10 @@ class TypedListSegment Index_type length, camp::resources::Resource resource, IndexOwnership owned = Owned) - : m_resource(nullptr), m_owned(Unowned), m_data(nullptr), m_size(0) + : m_resource(nullptr), + m_owned(Unowned), + m_data(nullptr), + m_size(0) { initIndexData(values, length, resource, owned); } @@ -138,33 +140,37 @@ class TypedListSegment * * Constructor assumes container data lives in host memory space. */ - template + template TypedListSegment(const Container& container, camp::resources::Resource resource) - : m_resource(nullptr), m_owned(Unowned), m_data(nullptr), m_size(container.size()) + : m_resource(nullptr), + m_owned(Unowned), + m_data(nullptr), + m_size(container.size()) { - if (m_size > 0) { + if (m_size > 0) + { - camp::resources::Resource host_res{camp::resources::Host()}; + camp::resources::Resource host_res {camp::resources::Host()}; value_type* tmp = host_res.allocate(m_size); - auto dest = tmp; - auto src = container.begin(); + auto dest = tmp; + auto src = container.begin(); auto const end = container.end(); - while (src != end) { + while (src != end) + { *dest = *src; ++dest; ++src; } m_resource = new camp::resources::Resource(resource); - m_data = m_resource->allocate(m_size); + m_data = m_resource->allocate(m_size); m_resource->memcpy(m_data, tmp, sizeof(value_type) * m_size); m_owned = Owned; host_res.deallocate(tmp); - } } @@ -175,10 +181,11 @@ class TypedListSegment // As this may be called from a lambda in a // RAJA method we perform a shallow copy RAJA_HOST_DEVICE TypedListSegment(const TypedListSegment& other) - : m_resource(nullptr), - m_owned(Unowned), m_data(other.m_data), m_size(other.m_size) - { - } + : m_resource(nullptr), + m_owned(Unowned), + m_data(other.m_data), + m_size(other.m_size) + {} //! Copy assignment for list segment // As this may be called from a lambda in a @@ -187,59 +194,59 @@ class TypedListSegment { clear(); m_resource = nullptr; - m_owned = Unowned; - m_data = other.m_data; - m_size = other.m_size; + m_owned = Unowned; + m_data = other.m_data; + m_size = other.m_size; } - //! move assignment for list segment + //! move assignment for list segment // As this may be called from a lambda in a // RAJA method we perform a shallow copy RAJA_HOST_DEVICE TypedListSegment& operator=(TypedListSegment&& rhs) { clear(); m_resource = rhs.m_resource; - m_owned = rhs.m_owned; - m_data = rhs.m_data; - m_size = rhs.m_size; + m_owned = rhs.m_owned; + m_data = rhs.m_data; + m_size = rhs.m_size; rhs.m_resource = nullptr; - rhs.m_owned = Unowned; - rhs.m_data = nullptr; - rhs.m_size = 0; + rhs.m_owned = Unowned; + rhs.m_data = nullptr; + rhs.m_size = 0; } //! Move constructor for list segment RAJA_HOST_DEVICE TypedListSegment(TypedListSegment&& rhs) - : m_resource(rhs.m_resource), - m_owned(rhs.m_owned), m_data(rhs.m_data), m_size(rhs.m_size) + : m_resource(rhs.m_resource), + m_owned(rhs.m_owned), + m_data(rhs.m_data), + m_size(rhs.m_size) { - rhs.m_owned = Unowned; + rhs.m_owned = Unowned; rhs.m_resource = nullptr; - rhs.m_size = 0; - rhs.m_data = nullptr; + rhs.m_size = 0; + rhs.m_data = nullptr; } //! List segment destructor - RAJA_HOST_DEVICE ~TypedListSegment() - { - clear(); - } + RAJA_HOST_DEVICE ~TypedListSegment() { clear(); } //! Clear method to be called RAJA_HOST_DEVICE void clear() { #if !defined(RAJA_GPU_DEVICE_COMPILE_PASS_ACTIVE) - if (m_data != nullptr && m_owned == Owned) { + if (m_data != nullptr && m_owned == Owned) + { m_resource->deallocate(m_data); delete m_resource; } #endif - m_data = nullptr; + m_data = nullptr; m_resource = nullptr; - m_owned = Unowned; - m_size = 0; + m_owned = Unowned; + m_size = 0; } //@} @@ -345,32 +352,35 @@ class TypedListSegment { // empty list segment - if (len <= 0 || container == nullptr) { - m_data = nullptr; - m_size = 0; + if (len <= 0 || container == nullptr) + { + m_data = nullptr; + m_size = 0; m_owned = Unowned; return; } // some non-zero size -- initialize accordingly - m_size = len; + m_size = len; m_owned = container_own; - if (m_owned == Owned) { + if (m_owned == Owned) + { - m_resource = new camp::resources::Resource(resource_); + m_resource = new camp::resources::Resource(resource_); - camp::resources::Resource host_res{camp::resources::Host()}; + camp::resources::Resource host_res {camp::resources::Host()}; - value_type* tmp = host_res.allocate(m_size); + value_type* tmp = host_res.allocate(m_size); - for (Index_type i = 0; i < m_size; ++i) { - tmp[i] = container[i]; - } + for (Index_type i = 0; i < m_size; ++i) + { + tmp[i] = container[i]; + } - m_data = m_resource->allocate(m_size); - m_resource->memcpy(m_data, tmp, sizeof(value_type) * m_size); + m_data = m_resource->allocate(m_size); + m_resource->memcpy(m_data, tmp, sizeof(value_type) * m_size); - host_res.deallocate(tmp); + host_res.deallocate(tmp); return; } @@ -380,9 +390,8 @@ class TypedListSegment m_data = const_cast(container); } - // Copy of camp resource passed to ctor - camp::resources::Resource *m_resource; + camp::resources::Resource* m_resource; // Ownership flag to guide data copying/management IndexOwnership m_owned; @@ -403,7 +412,7 @@ namespace std { //! Specialization of std::swap for TypedListSegment -template +template RAJA_INLINE void swap(RAJA::TypedListSegment& a, RAJA::TypedListSegment& b) { diff --git a/include/RAJA/index/RangeSegment.hpp b/include/RAJA/index/RangeSegment.hpp index a41959c583..35a708766c 100644 --- a/include/RAJA/index/RangeSegment.hpp +++ b/include/RAJA/index/RangeSegment.hpp @@ -50,10 +50,10 @@ namespace RAJA * * NOTE: TypedRangeSegment::iterator is a RandomAccessIterator * - * NOTE: TypedRangeSegment supports negative indices; e.g., an interval of + * NOTE: TypedRangeSegment supports negative indices; e.g., an interval of * indices [-5, 3). * - * NOTE: Proper handling of indices strides requires that StorageT is a + * NOTE: Proper handling of indices strides requires that StorageT is a * signed type. * * Usage: @@ -92,15 +92,19 @@ namespace RAJA * ****************************************************************************** */ -template >> -struct TypedRangeSegment { +template>> +struct TypedRangeSegment +{ - // + // // Static asserts to provide some useful error messages during compilation // for incorrect usage. - // - static_assert(std::is_signed::value, "TypedRangeSegment DiffT requires signed type."); - static_assert(!std::is_floating_point::value, "TypedRangeSegment Type must be non floating point."); + // + static_assert(std::is_signed::value, + "TypedRangeSegment DiffT requires signed type."); + static_assert(!std::is_floating_point::value, + "TypedRangeSegment Type must be non floating point."); //@{ //! @name Types used in implementation based on template parameters. @@ -117,20 +121,21 @@ struct TypedRangeSegment { //@} //@{ - //! @name Constructors, destructor, and copy assignment. + //! @name Constructors, destructor, and copy assignment. /*! * \brief Construct a range segment repreenting the interval [begin, end) - * + * * \param begin start value (inclusive) for the range * \param end end value (exclusive) for the range */ using StripStorageT = strip_index_type_t; - RAJA_HOST_DEVICE constexpr TypedRangeSegment(StripStorageT begin, StripStorageT end) - : m_begin(iterator(begin)), + + RAJA_HOST_DEVICE constexpr TypedRangeSegment(StripStorageT begin, + StripStorageT end) + : m_begin(iterator(begin)), m_end(begin > end ? m_begin : iterator(end)) - { - } + {} //! Disable compiler generated constructor RAJA_HOST_DEVICE TypedRangeSegment() = delete; @@ -187,7 +192,7 @@ struct TypedRangeSegment { * \brief Compare this segment to another for inequality * * \return true if begin or end does not match, else false - */ + */ RAJA_HOST_DEVICE RAJA_INLINE bool operator!=(TypedRangeSegment const& o) const { return !(operator==(o)); @@ -198,9 +203,9 @@ struct TypedRangeSegment { /*! * \brief Get a new TypedRangeSegment instance representing a slice of * existing segment - * - * \param begin start iterate of new range - * \param length maximum length of new range + * + * \param begin start iterate of new range + * \param length maximum length of new range * \return TypedRangeSegment representing the interval * [ *begin() + begin, min( *begin() + begin + length, *end() ) ) * @@ -213,7 +218,7 @@ struct TypedRangeSegment { * auto r = RAJA::TypedRangeSegment(-4, 4); * * // s repreents the subinterval [-3, 2) - * auto s = r.slice(1, 5); + * auto s = r.slice(1, 5); * * \endverbatim */ @@ -221,9 +226,9 @@ struct TypedRangeSegment { DiffT length) const { StorageT start = m_begin[0] + begin; - StorageT end = start + length > m_end[0] ? m_end[0] : start + length; + StorageT end = start + length > m_end[0] ? m_end[0] : start + length; - return TypedRangeSegment{stripIndexType(start), stripIndexType(end)}; + return TypedRangeSegment {stripIndexType(start), stripIndexType(end)}; } /*! @@ -243,12 +248,11 @@ struct TypedRangeSegment { iterator m_end; }; - /*! ****************************************************************************** * - * \class TypedRangeStrideSegment - * + * \class TypedRangeStrideSegment + * * \brief Segment class representing a strided range of typed indices * * \tparam StorageT underlying data type for the segment indices (required) @@ -264,9 +268,9 @@ struct TypedRangeSegment { * * NOTE: TypedRangeStrideSegment::iterator is a RandomAccessIterator * - * NOTE: TypedRangeStrideSegment allows for positive or negative strides and - * indices. This allows for forward (stride > 0) or backward (stride < 0) - * traversal of the iteration space. A stride of zero is undefined and + * NOTE: TypedRangeStrideSegment allows for positive or negative strides and + * indices. This allows for forward (stride > 0) or backward (stride < 0) + * traversal of the iteration space. A stride of zero is undefined and * will cause divide-by-zero errors. * * As with RangeSegment, the iteration space is inclusive of begin() and @@ -275,7 +279,7 @@ struct TypedRangeSegment { * For positive strides, begin() > end() implies size()==0 * For negative strides, begin() < end() implies size()==0 * - * NOTE: Proper handling of negative strides and indices requires that + * NOTE: Proper handling of negative strides and indices requires that * StorageT is a signed type. * * Usage: @@ -321,15 +325,19 @@ struct TypedRangeSegment { * ****************************************************************************** */ -template >> -struct TypedRangeStrideSegment { +template>> +struct TypedRangeStrideSegment +{ // // Static asserts to provide some useful error messages during compilation // for incorrect usage. // - static_assert(std::is_signed::value, "TypedRangeStrideSegment DiffT requires signed type."); - static_assert(!std::is_floating_point::value, "TypedRangeStrideSegment Type must be non floating point."); + static_assert(std::is_signed::value, + "TypedRangeStrideSegment DiffT requires signed type."); + static_assert(!std::is_floating_point::value, + "TypedRangeStrideSegment Type must be non floating point."); //@{ //! @name Types used in implementation based on template parameters. @@ -349,7 +357,7 @@ struct TypedRangeStrideSegment { //! @name Constructors, destructor, and copy assignment. /*! - * \brief Construct a range segment for the interval [begin, end) with + * \brief Construct a range segment for the interval [begin, end) with * given stride * * \param begin start value (inclusive) for the range @@ -357,6 +365,7 @@ struct TypedRangeStrideSegment { * \param stride stride value when iterating over the range */ using StripStorageT = strip_index_type_t; + RAJA_HOST_DEVICE TypedRangeStrideSegment(StripStorageT begin, StripStorageT end, DiffT stride) @@ -367,13 +376,16 @@ struct TypedRangeStrideSegment { m_size((end - begin + stride - (stride > 0 ? 1 : -1)) / stride) { // clamp range when end is unreachable from begin without wrapping - if (stride < 0 && end > begin) { + if (stride < 0 && end > begin) + { m_end = m_begin; - } else if (stride > 0 && end < begin) { + } + else if (stride > 0 && end < begin) + { m_end = m_begin; } // m_size initialized as negative indicates a zero iteration space - m_size = m_size < DiffT{0} ? DiffT{0} : m_size; + m_size = m_size < DiffT {0} ? DiffT {0} : m_size; } //! Disable compiler generated constructor @@ -408,8 +420,8 @@ struct TypedRangeStrideSegment { /*! * \brief Get size of this segment - * - * The size is the number of iterates in the + * + * The size is the number of iterates in the * interval [begin, end) when striding over it */ RAJA_HOST_DEVICE DiffT size() const { return m_size; } @@ -435,7 +447,8 @@ struct TypedRangeStrideSegment { * * \return true if begin, end, or size does not match, else false */ - RAJA_HOST_DEVICE RAJA_INLINE bool operator!=(TypedRangeStrideSegment const& o) const + RAJA_HOST_DEVICE RAJA_INLINE bool operator!=( + TypedRangeStrideSegment const& o) const { return !(operator==(o)); } @@ -450,7 +463,7 @@ struct TypedRangeStrideSegment { * \param length maximum length of new range * * \return TypedRangeStrideSegment representing the interval - * [ *begin() + begin * stride, + * [ *begin() + begin * stride, * min( *begin() + (begin + length) * stride, *end() ) * * Here's an example of a slice operation on a range segment with a negative @@ -466,24 +479,26 @@ struct TypedRangeStrideSegment { * // 5 indices in r starting at the 6th entry * auto s = r.slice(6, 6); * - * \endverbatim + * \endverbatim */ RAJA_HOST_DEVICE TypedRangeStrideSegment slice(StorageT begin, DiffT length) const { StorageT stride = m_begin.get_stride(); - StorageT start = m_begin[0] + begin * stride; - StorageT end = start + stride * length; + StorageT start = m_begin[0] + begin * stride; + StorageT end = start + stride * length; - if (stride > 0) { + if (stride > 0) + { end = end > m_end[0] ? m_end[0] : end; - } else { + } + else + { end = end < m_end[0] ? m_end[0] : end; } - return TypedRangeStrideSegment{stripIndexType(start), - stripIndexType(end), - m_begin.get_stride()}; + return TypedRangeStrideSegment {stripIndexType(start), stripIndexType(end), + m_begin.get_stride()}; } /*! @@ -516,17 +531,18 @@ using RangeStrideSegment = TypedRangeStrideSegment; namespace detail { -template +template struct common_type - : std::common_type::type> { -}; + : std::common_type::type> +{}; -template -struct common_type { +template +struct common_type +{ using type = T; }; -template +template using common_type_t = typename common_type::type; } // namespace detail @@ -539,9 +555,9 @@ using common_type_t = typename common_type::type; * @begin and @end. If there is no common type, then * a compiler error will be produced. */ -template > +template> RAJA_HOST_DEVICE TypedRangeSegment make_range(BeginT&& begin, EndT&& end) { @@ -549,7 +565,7 @@ RAJA_HOST_DEVICE TypedRangeSegment make_range(BeginT&& begin, } /*! - * \brief Function to make a TypedRangeStride Segment for the interval + * \brief Function to make a TypedRangeStride Segment for the interval * [begin, end) with given stride * * \return a newly constructed TypedRangeStrideSegment where @@ -557,32 +573,35 @@ RAJA_HOST_DEVICE TypedRangeSegment make_range(BeginT&& begin, * @begin, @end, and @stride. If there is no common * type, then a compiler error will be produced. */ -template > +template> RAJA_HOST_DEVICE TypedRangeStrideSegment make_strided_range( BeginT&& begin, EndT&& end, StrideT&& stride) { - static_assert(std::is_signed::value, "make_strided_segment : stride must be signed."); - static_assert(std::is_same, StrideT>::value, "make_stride_segment : stride and end must be of similar types."); + static_assert(std::is_signed::value, + "make_strided_segment : stride must be signed."); + static_assert( + std::is_same, StrideT>::value, + "make_stride_segment : stride and end must be of similar types."); return {begin, end, stride}; } namespace concepts { -template +template struct RangeConstructible - : DefineConcept(camp::val>()) { -}; + : DefineConcept(camp::val>()) +{}; -template +template struct RangeStrideConstructible - : DefineConcept(camp::val>()) { -}; + : DefineConcept(camp::val>()) +{}; } // namespace concepts @@ -603,7 +622,7 @@ namespace std { //! Specialization of std::swap for TypedRangeSegment -template +template RAJA_HOST_DEVICE RAJA_INLINE void swap(RAJA::TypedRangeSegment& a, RAJA::TypedRangeSegment& b) { @@ -611,7 +630,7 @@ RAJA_HOST_DEVICE RAJA_INLINE void swap(RAJA::TypedRangeSegment& a, } //! Specialization of std::swap for TypedRangeStrideSegment -template +template RAJA_HOST_DEVICE RAJA_INLINE void swap(RAJA::TypedRangeStrideSegment& a, RAJA::TypedRangeStrideSegment& b) { diff --git a/include/RAJA/internal/DepGraphNode.hpp b/include/RAJA/internal/DepGraphNode.hpp index 8feceae22f..3c6a3a0d91 100644 --- a/include/RAJA/internal/DepGraphNode.hpp +++ b/include/RAJA/internal/DepGraphNode.hpp @@ -56,9 +56,10 @@ class RAJA_ALIGNED_ATTR(256) DepGraphNode /// Default ctor initializes node to default state. /// DepGraphNode() - : m_num_dep_tasks(0), m_semaphore_reload_value(0), m_semaphore_value(0) - { - } + : m_num_dep_tasks(0), + m_semaphore_reload_value(0), + m_semaphore_value(0) + {} /// /// Get/set semaphore value; i.e., the current number of (unsatisfied) @@ -82,7 +83,8 @@ class RAJA_ALIGNED_ATTR(256) DepGraphNode /// void satisfyOne() { - if (m_semaphore_value > 0) { + if (m_semaphore_value > 0) + { --m_semaphore_value; } } @@ -92,7 +94,8 @@ class RAJA_ALIGNED_ATTR(256) DepGraphNode /// void wait() { - while (m_semaphore_value > 0) { + while (m_semaphore_value > 0) + { // TODO: an efficient wait would be better here, but the standard // promise/future is not good enough std::this_thread::yield(); diff --git a/include/RAJA/internal/Iterators.hpp b/include/RAJA/internal/Iterators.hpp index 6f32a56e6d..fd838943c2 100644 --- a/include/RAJA/internal/Iterators.hpp +++ b/include/RAJA/internal/Iterators.hpp @@ -38,7 +38,7 @@ namespace Iterators // Containers #if defined(RAJA_ENABLE_ITERATOR_OVERFLOW_DEBUG) -template +template std::string overflow_msg(LType lhs, RType rhs) { return "Iterator Overflow detected between operation of :\n\ttype : " + @@ -47,10 +47,11 @@ std::string overflow_msg(LType lhs, RType rhs) "\n"; } -template +template RAJA_HOST_DEVICE bool is_addition_overflow(Type lhs, DifferenceType rhs) { - if (std::is_unsigned::value) { + if (std::is_unsigned::value) + { if ((rhs > 0) && (lhs > std::numeric_limits::max() - rhs)) return true; if ((rhs < 0) && (lhs < std::numeric_limits::min() - rhs)) @@ -59,23 +60,27 @@ RAJA_HOST_DEVICE bool is_addition_overflow(Type lhs, DifferenceType rhs) return false; } -template +template RAJA_HOST_DEVICE bool is_subtraction_overflow(Type lhs, DifferenceType rhs, bool iterator_on_left = true) { - if (iterator_on_left) { + if (iterator_on_left) + { - if (std::is_unsigned::value) { + if (std::is_unsigned::value) + { if ((rhs > 0) && (lhs < std::numeric_limits::min() + rhs)) return true; if ((rhs < 0) && (lhs > std::numeric_limits::max() + rhs)) return true; } + } + else + { // Special case where operation is : value(lhs) - iterator(rhs). - } else { // Special case where operation is : value(lhs) - iterator(rhs). - - if (std::is_unsigned::value) { + if (std::is_unsigned::value) + { if ((lhs > 0) && (rhs < std::numeric_limits::min() + lhs)) return true; if ((lhs < 0)) return true; @@ -84,14 +89,14 @@ RAJA_HOST_DEVICE bool is_subtraction_overflow(Type lhs, return false; } -template +template RAJA_HOST_DEVICE void check_is_addition_overflow(Type lhs, DifferenceType rhs) { if (is_addition_overflow(lhs, rhs)) throw std::runtime_error(overflow_msg(lhs, rhs)); } -template +template RAJA_HOST_DEVICE void check_is_subtraction_overflow(Type lhs, DifferenceType rhs) { @@ -100,29 +105,28 @@ RAJA_HOST_DEVICE void check_is_subtraction_overflow(Type lhs, } #endif -template +template class numeric_iterator { public: - using value_type = Type; + using value_type = Type; using stripped_value_type = strip_index_type_t; - using difference_type = DifferenceType; - using pointer = PointerType; - using reference = value_type&; - using iterator_category = std::random_access_iterator_tag; - - constexpr numeric_iterator() noexcept = default; - constexpr numeric_iterator(const numeric_iterator&) noexcept = default; - constexpr numeric_iterator(numeric_iterator&&) noexcept = default; + using difference_type = DifferenceType; + using pointer = PointerType; + using reference = value_type&; + using iterator_category = std::random_access_iterator_tag; + + constexpr numeric_iterator() noexcept = default; + constexpr numeric_iterator(const numeric_iterator&) noexcept = default; + constexpr numeric_iterator(numeric_iterator&&) noexcept = default; numeric_iterator& operator=(const numeric_iterator&) noexcept = default; - numeric_iterator& operator=(numeric_iterator&&) noexcept = default; + numeric_iterator& operator=(numeric_iterator&&) noexcept = default; RAJA_HOST_DEVICE constexpr numeric_iterator(const stripped_value_type& rhs) : val(rhs) - { - } + {} RAJA_HOST_DEVICE inline DifferenceType get_stride() const { return 1; } @@ -130,22 +134,27 @@ class numeric_iterator { return val == rhs.val; } + RAJA_HOST_DEVICE inline bool operator!=(const numeric_iterator& rhs) const { return val != rhs.val; } + RAJA_HOST_DEVICE inline bool operator>(const numeric_iterator& rhs) const { return val > rhs.val; } + RAJA_HOST_DEVICE inline bool operator<(const numeric_iterator& rhs) const { return val < rhs.val; } + RAJA_HOST_DEVICE inline bool operator>=(const numeric_iterator& rhs) const { return val >= rhs.val; } + RAJA_HOST_DEVICE inline bool operator<=(const numeric_iterator& rhs) const { return val <= rhs.val; @@ -156,17 +165,20 @@ class numeric_iterator ++val; return *this; } + RAJA_HOST_DEVICE inline numeric_iterator& operator--() { --val; return *this; } + RAJA_HOST_DEVICE inline numeric_iterator operator++(int) { numeric_iterator tmp(*this); ++val; return tmp; } + RAJA_HOST_DEVICE inline numeric_iterator operator--(int) { numeric_iterator tmp(*this); @@ -183,6 +195,7 @@ class numeric_iterator val += rhs; return *this; } + RAJA_HOST_DEVICE inline numeric_iterator& operator-=( const difference_type& rhs) { @@ -192,12 +205,14 @@ class numeric_iterator val -= rhs; return *this; } + RAJA_HOST_DEVICE inline numeric_iterator& operator+=( const numeric_iterator& rhs) { val += rhs.val; return *this; } + RAJA_HOST_DEVICE inline numeric_iterator& operator-=( const numeric_iterator& rhs) { @@ -210,11 +225,13 @@ class numeric_iterator { return val + rhs.val; } + RAJA_HOST_DEVICE inline stripped_value_type operator-( const numeric_iterator& rhs) const { return val - rhs.val; } + RAJA_HOST_DEVICE inline numeric_iterator operator+( const difference_type& rhs) const { @@ -223,6 +240,7 @@ class numeric_iterator #endif return numeric_iterator(val + rhs); } + RAJA_HOST_DEVICE inline numeric_iterator operator-( const difference_type& rhs) const { @@ -231,6 +249,7 @@ class numeric_iterator #endif return numeric_iterator(val - rhs); } + RAJA_HOST_DEVICE friend constexpr numeric_iterator operator+( difference_type lhs, const numeric_iterator& rhs) @@ -243,6 +262,7 @@ class numeric_iterator return numeric_iterator(lhs + rhs.val); #endif } + RAJA_HOST_DEVICE friend constexpr numeric_iterator operator-( difference_type lhs, const numeric_iterator& rhs) @@ -260,10 +280,12 @@ class numeric_iterator { return value_type(val); } + RAJA_HOST_DEVICE inline value_type operator->() const { return value_type(val); } + RAJA_HOST_DEVICE constexpr value_type operator[](difference_type rhs) const { return value_type(val + rhs); @@ -273,31 +295,35 @@ class numeric_iterator stripped_value_type val = 0; }; -template +template class strided_numeric_iterator { public: - using value_type = Type; + using value_type = Type; using stripped_value_type = strip_index_type_t; - using difference_type = DifferenceType; - using pointer = DifferenceType*; - using reference = DifferenceType&; - using iterator_category = std::random_access_iterator_tag; + using difference_type = DifferenceType; + using pointer = DifferenceType*; + using reference = DifferenceType&; + using iterator_category = std::random_access_iterator_tag; constexpr strided_numeric_iterator() noexcept = default; - constexpr strided_numeric_iterator(const strided_numeric_iterator&) noexcept = default; - constexpr strided_numeric_iterator(strided_numeric_iterator&&) noexcept = default; - strided_numeric_iterator& operator=(const strided_numeric_iterator&) noexcept = default; - strided_numeric_iterator& operator=(strided_numeric_iterator&&) noexcept = default; + constexpr strided_numeric_iterator(const strided_numeric_iterator&) noexcept = + default; + constexpr strided_numeric_iterator(strided_numeric_iterator&&) noexcept = + default; + strided_numeric_iterator& operator=( + const strided_numeric_iterator&) noexcept = default; + strided_numeric_iterator& operator=(strided_numeric_iterator&&) noexcept = + default; RAJA_HOST_DEVICE constexpr strided_numeric_iterator( stripped_value_type rhs, DifferenceType stride_ = DifferenceType(1)) - : val(rhs), stride(stride_) - { - } + : val(rhs), + stride(stride_) + {} RAJA_HOST_DEVICE inline DifferenceType get_stride() const { return stride; } @@ -306,6 +332,7 @@ class strided_numeric_iterator val += stride; return *this; } + RAJA_HOST_DEVICE inline strided_numeric_iterator& operator--() { val -= stride; @@ -321,6 +348,7 @@ class strided_numeric_iterator val += rhs * stride; return *this; } + RAJA_HOST_DEVICE inline strided_numeric_iterator& operator-=( const difference_type& rhs) { @@ -338,16 +366,18 @@ class strided_numeric_iterator (static_cast(rhs.val))) / stride; } + RAJA_HOST_DEVICE inline difference_type operator-( const strided_numeric_iterator& rhs) const { difference_type diff = (static_cast(val) - (static_cast(rhs.val))); - return (diff % stride != difference_type{0}) - ? (difference_type{1} + diff / stride) + return (diff % stride != difference_type {0}) + ? (difference_type {1} + diff / stride) : diff / stride; } + RAJA_HOST_DEVICE inline strided_numeric_iterator operator+( const difference_type& rhs) const { @@ -356,6 +386,7 @@ class strided_numeric_iterator #endif return strided_numeric_iterator(val + rhs * stride, stride); } + RAJA_HOST_DEVICE inline strided_numeric_iterator operator-( const difference_type& rhs) const { @@ -372,6 +403,7 @@ class strided_numeric_iterator { return (val - rhs.val) / stride; } + RAJA_HOST_DEVICE inline bool operator==( const strided_numeric_iterator& rhs) const { @@ -383,31 +415,35 @@ class strided_numeric_iterator { return val * stride > rhs.val * stride; } + RAJA_HOST_DEVICE inline bool operator<( const strided_numeric_iterator& rhs) const { return val * stride < rhs.val * stride; } + RAJA_HOST_DEVICE inline bool operator>=( const strided_numeric_iterator& rhs) const { return val * stride >= rhs.val * stride; } + RAJA_HOST_DEVICE inline bool operator<=( const strided_numeric_iterator& rhs) const { return val * stride <= rhs.val * stride; } - RAJA_HOST_DEVICE inline value_type operator*() const { return value_type(val); } + RAJA_HOST_DEVICE inline value_type operator->() const { return value_type(val); } + RAJA_HOST_DEVICE constexpr value_type operator[](difference_type rhs) const { return value_type(val + rhs * stride); @@ -415,7 +451,7 @@ class strided_numeric_iterator private: stripped_value_type val = 0; - DifferenceType stride = 1; + DifferenceType stride = 1; }; diff --git a/include/RAJA/internal/MemUtils_CPU.hpp b/include/RAJA/internal/MemUtils_CPU.hpp index 55015f9ab7..1281c36277 100644 --- a/include/RAJA/internal/MemUtils_CPU.hpp +++ b/include/RAJA/internal/MemUtils_CPU.hpp @@ -27,7 +27,7 @@ #include "RAJA/util/types.hpp" -#if defined(_WIN32) || defined(WIN32) || defined(__CYGWIN__) || \ +#if defined(_WIN32) || defined(WIN32) || defined(__CYGWIN__) || \ defined(__MINGW32__) || defined(__BORLANDC__) #define RAJA_PLATFORM_WINDOWS #include @@ -44,7 +44,7 @@ inline void* allocate_aligned(size_t alignment, size_t size) #if defined(RAJA_HAVE_POSIX_MEMALIGN) // posix_memalign available void* ret = nullptr; - int err = posix_memalign(&ret, alignment, size); + int err = posix_memalign(&ret, alignment, size); return err ? nullptr : ret; #elif defined(RAJA_HAVE_ALIGNED_ALLOC) return std::aligned_alloc(alignment, size); @@ -53,27 +53,25 @@ inline void* allocate_aligned(size_t alignment, size_t size) #elif defined(RAJA_PLATFORM_WINDOWS) return _aligned_malloc(size, alignment); #else - char *mem = (char *)malloc(size + alignment + sizeof(void *)); + char* mem = (char*)malloc(size + alignment + sizeof(void*)); if (nullptr == mem) return nullptr; - void **ptr = (void **)((std::uintptr_t)(mem + alignment + sizeof(void *)) & - ~(alignment - 1)); + void** ptr = (void**)((std::uintptr_t)(mem + alignment + sizeof(void*)) & + ~(alignment - 1)); // Store the original address one position behind what we give the user. ptr[-1] = mem; return ptr; #endif } - /// /// Portable aligned memory allocation /// -template +template inline T* allocate_aligned_type(size_t alignment, size_t size) { return reinterpret_cast(allocate_aligned(alignment, size)); } - /// /// Portable aligned memory free - required for Windows /// @@ -97,25 +95,23 @@ inline void free_aligned(void* ptr) /// struct FreeAligned { - void operator()(void* ptr) - { - free_aligned(ptr); - } + void operator()(void* ptr) { free_aligned(ptr); } }; /// /// Deleter function object for memory allocated with allocate_aligned_type /// that calls the destructor for the fist size objects in the storage. /// -template < typename T, typename index_type > +template struct FreeAlignedType : FreeAligned { index_type size = 0; void operator()(T* ptr) { - for ( index_type i = size; i > 0; --i ) { - ptr[i-1].~T(); + for (index_type i = size; i > 0; --i) + { + ptr[i - 1].~T(); } FreeAligned::operator()(ptr); } diff --git a/include/RAJA/internal/RAJAVec.hpp b/include/RAJA/internal/RAJAVec.hpp index 1d0ec0cbeb..b4366f41a5 100644 --- a/include/RAJA/internal/RAJAVec.hpp +++ b/include/RAJA/internal/RAJAVec.hpp @@ -49,7 +49,7 @@ namespace RAJA * ****************************************************************************** */ -template > +template> class RAJAVec { using allocator_traits_type = std::allocator_traits; @@ -57,26 +57,30 @@ class RAJAVec typename allocator_traits_type::propagate_on_container_copy_assignment; using propagate_on_container_move_assignment = typename allocator_traits_type::propagate_on_container_move_assignment; - using propagate_on_container_swap = + using propagate_on_container_swap = typename allocator_traits_type::propagate_on_container_swap; + public: - using value_type = T; - using allocator_type = Allocator; - using size_type = std::size_t; + using value_type = T; + using allocator_type = Allocator; + using size_type = std::size_t; using difference_type = std::ptrdiff_t; - using reference = value_type&; + using reference = value_type&; using const_reference = const value_type&; - using pointer = typename allocator_traits_type::pointer; - using const_pointer = typename allocator_traits_type::const_pointer; - using iterator = value_type*; - using const_iterator = const value_type*; + using pointer = typename allocator_traits_type::pointer; + using const_pointer = typename allocator_traits_type::const_pointer; + using iterator = value_type*; + using const_iterator = const value_type*; /// /// Construct empty vector with given capacity. /// - explicit RAJAVec(size_type init_cap = 0, + explicit RAJAVec(size_type init_cap = 0, const allocator_type& a = allocator_type()) - : m_data(nullptr), m_allocator(a), m_capacity(0), m_size(0) + : m_data(nullptr), + m_allocator(a), + m_capacity(0), + m_size(0) { reserve(init_cap); } @@ -86,7 +90,9 @@ class RAJAVec /// RAJAVec(const RAJAVec& other) : m_data(nullptr), - m_allocator(allocator_traits_type::select_on_container_copy_construction(other.m_allocator)), + m_allocator( + allocator_traits_type::select_on_container_copy_construction( + other.m_allocator)), m_capacity(0), m_size(0) { @@ -103,9 +109,9 @@ class RAJAVec m_capacity(other.m_capacity), m_size(other.m_size) { - other.m_data = nullptr; + other.m_data = nullptr; other.m_capacity = 0; - other.m_size = 0; + other.m_size = 0; } /// @@ -113,8 +119,9 @@ class RAJAVec /// RAJAVec& operator=(const RAJAVec& rhs) { - if (&rhs != this) { - copy_assign_private(rhs, propagate_on_container_copy_assignment{}); + if (&rhs != this) + { + copy_assign_private(rhs, propagate_on_container_copy_assignment {}); } return *this; } @@ -124,8 +131,10 @@ class RAJAVec /// RAJAVec& operator=(RAJAVec&& rhs) { - if (&rhs != this) { - move_assign_private(std::move(rhs), propagate_on_container_move_assignment{}); + if (&rhs != this) + { + move_assign_private(std::move(rhs), + propagate_on_container_move_assignment {}); } return *this; } @@ -144,31 +153,36 @@ class RAJAVec /// void swap(RAJAVec& other) { - swap_private(other, propagate_on_container_swap{}); + swap_private(other, propagate_on_container_swap {}); } /// /// Get a pointer to the beginning of the contiguous vector /// - pointer data() { return m_data; } + pointer data() { return m_data; } + /// const_pointer data() const { return m_data; } /// /// Get an iterator to the end. /// - iterator end() { return m_data + m_size; } + iterator end() { return m_data + m_size; } + /// - const_iterator end() const { return m_data + m_size; } + const_iterator end() const { return m_data + m_size; } + /// const_iterator cend() const { return m_data + m_size; } /// /// Get an iterator to the beginning. /// - iterator begin() { return m_data; } + iterator begin() { return m_data; } + /// - const_iterator begin() const { return m_data; } + const_iterator begin() const { return m_data; } + /// const_iterator cbegin() const { return m_data; } @@ -200,18 +214,12 @@ class RAJAVec /// /// Shrink the capacity of the vector to the current size. /// - void shrink_to_fit() - { - shrink_cap(m_size); - } + void shrink_to_fit() { shrink_cap(m_size); } /// /// Empty vector of all data. /// - void clear() - { - destroy_items_after(0); - } + void clear() { destroy_items_after(0); } /// /// Change the size of the vector, @@ -221,10 +229,13 @@ class RAJAVec RAJA_INLINE void resize(size_type new_size) { - if (new_size >= size()) { + if (new_size >= size()) + { reserve(new_size); construct_items_back(new_size); - } else { + } + else + { destroy_items_after(new_size); } } @@ -237,10 +248,13 @@ class RAJAVec RAJA_INLINE void resize(size_type new_size, const_reference new_value) { - if (new_size >= size()) { + if (new_size >= size()) + { reserve(new_size); construct_items_back(new_size, new_value); - } else { + } + else + { destroy_items_after(new_size); } } @@ -248,52 +262,62 @@ class RAJAVec /// /// Bracket operator accessor. /// - reference operator[](difference_type i) { return m_data[i]; } + reference operator[](difference_type i) { return m_data[i]; } + /// const_reference operator[](difference_type i) const { return m_data[i]; } /// /// Access the last item of the vector. /// - reference front() { return m_data[0]; } + reference front() { return m_data[0]; } + /// const_reference front() const { return m_data[0]; } /// /// Access the last item of the vector. /// - reference back() { return m_data[m_size-1]; } + reference back() { return m_data[m_size - 1]; } + /// - const_reference back() const { return m_data[m_size-1]; } + const_reference back() const { return m_data[m_size - 1]; } /// /// Add item to front end of vector. Note that this operation is unique to /// this class; it is not part of the C++ standard library vector interface. /// void push_front(const_reference item) { emplace_front_private(item); } + /// - void push_front( value_type&& item) { emplace_front_private(std::move(item)); } + void push_front(value_type&& item) { emplace_front_private(std::move(item)); } + /// - template < typename ... Os > - void emplace_front(Os&&... os) { emplace_front_private(std::forward(os)...); } + template + void emplace_front(Os&&... os) + { + emplace_front_private(std::forward(os)...); + } /// /// Add item to back end of vector. /// void push_back(const_reference item) { emplace_back_private(item); } + /// - void push_back( value_type&& item) { emplace_back_private(std::move(item)); } + void push_back(value_type&& item) { emplace_back_private(std::move(item)); } + /// - template < typename ... Os > - void emplace_back(Os&&... os) { emplace_back_private(std::forward(os)...); } + template + void emplace_back(Os&&... os) + { + emplace_back_private(std::forward(os)...); + } /// /// Remove the last item of the vector. /// - void pop_back() - { - destroy_items_after(m_size-1); - } + void pop_back() { destroy_items_after(m_size - 1); } private: pointer m_data; @@ -307,13 +331,14 @@ class RAJAVec /// void copy_assign_private(RAJAVec const& rhs, std::true_type) { - if (m_allocator != rhs.m_allocator) { + if (m_allocator != rhs.m_allocator) + { clear(); shrink_to_fit(); m_allocator = rhs.m_allocator; } - copy_assign_private(rhs, std::false_type{}); + copy_assign_private(rhs, std::false_type {}); } /// @@ -323,10 +348,13 @@ class RAJAVec void copy_assign_private(RAJAVec const& rhs, std::false_type) { reserve(rhs.size()); - if (size() < rhs.size()) { + if (size() < rhs.size()) + { copy_assign_items(0, size(), rhs.data()); copy_construct_items_back(rhs.size(), rhs.data()); - } else { + } + else + { copy_assign_items(0, rhs.size(), rhs.data()); destroy_items_after(size()); } @@ -341,14 +369,14 @@ class RAJAVec clear(); shrink_to_fit(); - m_data = rhs.m_data; + m_data = rhs.m_data; m_allocator = std::move(rhs.m_allocator); - m_capacity = rhs.m_capacity; - m_size = rhs.m_size; + m_capacity = rhs.m_capacity; + m_size = rhs.m_size; - rhs.m_data = nullptr; + rhs.m_data = nullptr; rhs.m_capacity = 0; - rhs.m_size = 0; + rhs.m_size = 0; } /// @@ -357,23 +385,29 @@ class RAJAVec /// void move_assign_private(RAJAVec&& rhs, std::false_type) { - if (m_allocator == rhs.m_allocator) { + if (m_allocator == rhs.m_allocator) + { clear(); shrink_to_fit(); - m_data = rhs.m_data; + m_data = rhs.m_data; m_capacity = rhs.m_capacity; - m_size = rhs.m_size; + m_size = rhs.m_size; - rhs.m_data = nullptr; + rhs.m_data = nullptr; rhs.m_capacity = 0; - rhs.m_size = 0; - } else { + rhs.m_size = 0; + } + else + { reserve(rhs.size()); - if (size() < rhs.size()) { + if (size() < rhs.size()) + { move_assign_items(0, size(), rhs.data()); move_construct_items_back(rhs.size(), rhs.data()); - } else { + } + else + { move_assign_items(0, rhs.size(), rhs.data()); destroy_items_after(size()); } @@ -386,10 +420,10 @@ class RAJAVec void swap_private(RAJAVec& other, std::true_type) { using std::swap; - swap(m_data, other.m_data); + swap(m_data, other.m_data); swap(m_allocator, other.m_allocator); - swap(m_capacity, other.m_capacity); - swap(m_size, other.m_size); + swap(m_capacity, other.m_capacity); + swap(m_size, other.m_size); } /// @@ -398,9 +432,9 @@ class RAJAVec void swap_private(RAJAVec& other, std::false_type) { using std::swap; - swap(m_data, other.m_data); - swap(m_capacity, other.m_capacity); - swap(m_size, other.m_size); + swap(m_data, other.m_data); + swap(m_capacity, other.m_capacity); + swap(m_size, other.m_size); } // @@ -408,7 +442,8 @@ class RAJAVec // void copy_assign_items(size_type first, size_type last, const_pointer o_data) { - for (size_type i = first; i < last; ++i) { + for (size_type i = first; i < last; ++i) + { m_data[i] = o_data[i]; } } @@ -418,7 +453,8 @@ class RAJAVec // void move_assign_items(size_type first, size_type last, pointer o_data) { - for (size_type i = first; i < last; ++i) { + for (size_type i = first; i < last; ++i) + { m_data[i] = std::move(o_data[i]); } } @@ -426,11 +462,13 @@ class RAJAVec // // Construct items [m_size, new_size) from args. // - template < typename ... Os > + template void construct_items_back(size_type new_size, Os&&... os) { - for (; m_size < new_size; ++m_size) { - allocator_traits_type::construct(m_allocator, m_data+m_size, std::forward(os)...); + for (; m_size < new_size; ++m_size) + { + allocator_traits_type::construct(m_allocator, m_data + m_size, + std::forward(os)...); } } @@ -439,8 +477,10 @@ class RAJAVec // void copy_construct_items_back(size_type new_size, const_pointer o_data) { - for (; m_size < new_size; ++m_size) { - allocator_traits_type::construct(m_allocator, m_data+m_size, o_data[m_size]); + for (; m_size < new_size; ++m_size) + { + allocator_traits_type::construct(m_allocator, m_data + m_size, + o_data[m_size]); } } @@ -449,8 +489,10 @@ class RAJAVec // void move_construct_items_back(size_type new_size, pointer o_data) { - for (; m_size < new_size; ++m_size) { - allocator_traits_type::construct(m_allocator, m_data+m_size, std::move(o_data[m_size])); + for (; m_size < new_size; ++m_size) + { + allocator_traits_type::construct(m_allocator, m_data + m_size, + std::move(o_data[m_size])); } } @@ -459,39 +501,45 @@ class RAJAVec // void destroy_items_after(size_type new_end) { - for (; m_size > new_end; --m_size) { - allocator_traits_type::destroy(m_allocator, m_data+m_size-1); + for (; m_size > new_end; --m_size) + { + allocator_traits_type::destroy(m_allocator, m_data + m_size - 1); } } // // Add an item to the front, shifting all existing items back one. // - template < typename ... Os > + template void emplace_front_private(Os&&... os) { reserve(m_size + 1); - if (m_size > 0) { + if (m_size > 0) + { size_type i = m_size; - allocator_traits_type::construct(m_allocator, m_data+i, std::move(m_data[i - 1])); - for (--i; i > 0; --i) { + allocator_traits_type::construct(m_allocator, m_data + i, + std::move(m_data[i - 1])); + for (--i; i > 0; --i) + { m_data[i] = std::move(m_data[i - 1]); } allocator_traits_type::destroy(m_allocator, m_data); } - allocator_traits_type::construct(m_allocator, m_data, std::forward(os)...); + allocator_traits_type::construct(m_allocator, m_data, + std::forward(os)...); m_size++; } // // Add an item to the back. // - template < typename ... Os > + template void emplace_back_private(Os&&... os) { reserve(m_size + 1); - allocator_traits_type::construct(m_allocator, m_data+m_size, std::forward(os)...); + allocator_traits_type::construct(m_allocator, m_data + m_size, + std::forward(os)...); m_size++; } @@ -501,7 +549,7 @@ class RAJAVec // relying on STL directly. // static constexpr const size_type s_init_cap = 8; - static constexpr const double s_grow_fac = 1.5; + static constexpr const double s_grow_fac = 1.5; // // Get the next value for capacity given a target and minimum. @@ -509,7 +557,8 @@ class RAJAVec size_type get_next_cap(size_type target_size) { size_type next_cap = s_init_cap; - if (m_capacity != 0) { + if (m_capacity != 0) + { next_cap = static_cast(m_capacity * s_grow_fac); } return std::max(target_size, next_cap); @@ -520,7 +569,8 @@ class RAJAVec // void grow_cap(size_type target_size) { - if (m_capacity < target_size) { + if (m_capacity < target_size) + { change_cap(get_next_cap(target_size)); } } @@ -530,7 +580,8 @@ class RAJAVec // void shrink_cap(size_type target_size) { - if (m_capacity > target_size) { + if (m_capacity > target_size) + { change_cap(std::max(m_size, target_size)); } } @@ -542,19 +593,23 @@ class RAJAVec void change_cap(size_type next_cap) { pointer tdata = nullptr; - if (next_cap != 0) { + if (next_cap != 0) + { tdata = allocator_traits_type::allocate(m_allocator, next_cap); } - if (m_data) { - for (size_type i = 0; i < m_size; ++i) { - allocator_traits_type::construct(m_allocator, tdata+i, std::move(m_data[i])); - allocator_traits_type::destroy(m_allocator, m_data+i); + if (m_data) + { + for (size_type i = 0; i < m_size; ++i) + { + allocator_traits_type::construct(m_allocator, tdata + i, + std::move(m_data[i])); + allocator_traits_type::destroy(m_allocator, m_data + i); } allocator_traits_type::deallocate(m_allocator, m_data, m_capacity); } - m_data = tdata; + m_data = tdata; m_capacity = next_cap; } }; diff --git a/include/RAJA/internal/fault_tolerance.hpp b/include/RAJA/internal/fault_tolerance.hpp index cf3a86cede..66d03ca6cd 100644 --- a/include/RAJA/internal/fault_tolerance.hpp +++ b/include/RAJA/internal/fault_tolerance.hpp @@ -37,60 +37,72 @@ #include #include "cycle.h" -#define RAJA_FT_BEGIN \ - extern volatile int fault_type; \ - bool repeat; \ - bool do_time = false; \ - ticks start = 0, stop = 0; \ - if (fault_type != 0) { \ - printf("Uncaught fault %d\n", fault_type); \ - fault_type = 0; \ - } \ - do { \ - repeat = false; \ - if (do_time) { \ - start = getticks(); \ +#define RAJA_FT_BEGIN \ + extern volatile int fault_type; \ + bool repeat; \ + bool do_time = false; \ + ticks start = 0, stop = 0; \ + if (fault_type != 0) \ + { \ + printf("Uncaught fault %d\n", fault_type); \ + fault_type = 0; \ + } \ + do \ + { \ + repeat = false; \ + if (do_time) \ + { \ + start = getticks(); \ } -#define RAJA_FT_END \ - if (do_time) { \ - stop = getticks(); \ - printf("recoverable fault clock cycles = %16f\n", elapsed(stop, start)); \ - do_time = false; \ - fault_type = 0; \ - } \ - if (fault_type < 0) { \ - printf("Unrecoverable fault (restart penalty)\n"); \ - fault_type = 0; \ - } \ - if (fault_type > 0) { \ - /* invalidate cache */ \ - repeat = true; \ - do_time = true; \ - } \ - } \ - while (repeat == true) \ +#define RAJA_FT_END \ + if (do_time) \ + { \ + stop = getticks(); \ + printf("recoverable fault clock cycles = %16f\n", elapsed(stop, start)); \ + do_time = false; \ + fault_type = 0; \ + } \ + if (fault_type < 0) \ + { \ + printf("Unrecoverable fault (restart penalty)\n"); \ + fault_type = 0; \ + } \ + if (fault_type > 0) \ + { \ + /* invalidate cache */ \ + repeat = true; \ + do_time = true; \ + } \ + } \ + while (repeat == true) \ ; #else -#define RAJA_FT_BEGIN \ - extern volatile int fault_type; \ - bool repeat; \ - if (fault_type == 0) { \ - do { \ +#define RAJA_FT_BEGIN \ + extern volatile int fault_type; \ + bool repeat; \ + if (fault_type == 0) \ + { \ + do \ + { \ repeat = false; -#define RAJA_FT_END \ - if (fault_type > 0) { \ - /* invalidate cache */ \ - repeat = true; \ - fault_type = 0; \ - } \ - } \ - while (repeat == true) \ - ; \ - } \ - else { fault_type = 0; /* ignore for the simulation */ } +#define RAJA_FT_END \ + if (fault_type > 0) \ + { \ + /* invalidate cache */ \ + repeat = true; \ + fault_type = 0; \ + } \ + } \ + while (repeat == true) \ + ; \ + } \ + else \ + { \ + fault_type = 0; /* ignore for the simulation */ \ + } #endif // RAJA_REPORT_FT diff --git a/include/RAJA/internal/foldl.hpp b/include/RAJA/internal/foldl.hpp index af65c05392..844159f82b 100644 --- a/include/RAJA/internal/foldl.hpp +++ b/include/RAJA/internal/foldl.hpp @@ -30,7 +30,6 @@ #include "RAJA/util/macros.hpp" - namespace RAJA { @@ -40,47 +39,54 @@ namespace RAJA namespace detail { // FoldL -template +template struct foldl_impl; -template -struct foldl_impl { +template +struct foldl_impl +{ using Ret = Arg1; }; #if RAJA_HAS_CXX17_IS_INVOCABLE -template -struct foldl_impl { +template +struct foldl_impl +{ using Ret = typename std::invoke_result::type; }; -template -struct foldl_impl { - using Ret = typename foldl_impl< - Op, - typename std::invoke_result::type, - Arg3>::type, - Rest...>::Ret; +template +struct foldl_impl +{ + using Ret = + typename foldl_impl::type, + Arg3>::type, + Rest...>::Ret; }; #else -template -struct foldl_impl { +template +struct foldl_impl +{ using Ret = typename std::result_of::type; }; -template -struct foldl_impl { +template +struct foldl_impl +{ using Ret = typename foldl_impl< Op, typename std::result_of::type, @@ -90,9 +96,9 @@ struct foldl_impl { #endif -} // namespace detail +} // namespace detail -template +template RAJA_HOST_DEVICE RAJA_INLINE constexpr auto foldl( Op&& RAJA_UNUSED_ARG(operation), Arg1&& arg) -> typename detail::foldl_impl::Ret @@ -100,7 +106,7 @@ RAJA_HOST_DEVICE RAJA_INLINE constexpr auto foldl( return camp::forward(arg); } -template +template RAJA_HOST_DEVICE RAJA_INLINE constexpr auto foldl(Op&& operation, Arg1&& arg1, Arg2&& arg2) -> @@ -110,11 +116,11 @@ RAJA_HOST_DEVICE RAJA_INLINE constexpr auto foldl(Op&& operation, camp::forward(arg2)); } -template +template RAJA_HOST_DEVICE RAJA_INLINE constexpr auto foldl(Op&& operation, Arg1&& arg1, Arg2&& arg2, @@ -130,27 +136,26 @@ RAJA_HOST_DEVICE RAJA_INLINE constexpr auto foldl(Op&& operation, camp::forward(rest)...); } - // Convenience folds -template +template RAJA_HOST_DEVICE RAJA_INLINE constexpr Result sum(Args... args) { return foldl(RAJA::operators::plus(), args...); } -template +template RAJA_HOST_DEVICE RAJA_INLINE constexpr Result product(Args... args) { return foldl(RAJA::operators::multiplies(), args...); } -template +template RAJA_HOST_DEVICE RAJA_INLINE constexpr Result max(Args... args) { return foldl(RAJA::operators::maximum(), args...); } -template +template RAJA_HOST_DEVICE RAJA_INLINE constexpr Result min(Args... args) { return foldl(RAJA::operators::minimum(), args...); diff --git a/include/RAJA/internal/get_platform.hpp b/include/RAJA/internal/get_platform.hpp index 0354d04bfd..ca568cf221 100644 --- a/include/RAJA/internal/get_platform.hpp +++ b/include/RAJA/internal/get_platform.hpp @@ -8,19 +8,23 @@ namespace RAJA { -namespace policy { -namespace multi { -template +namespace policy +{ +namespace multi +{ +template class MultiPolicy; } -} +} // namespace policy -namespace detail +namespace detail { -struct max_platform { +struct max_platform +{ RAJA_HOST_DEVICE + RAJA_INLINE constexpr RAJA::Platform operator()(const RAJA::Platform& l, const RAJA::Platform& r) const @@ -33,19 +37,20 @@ struct max_platform { * Returns the platform for the specified execution policy. * This is a catch-all, so anything undefined gets Platform::undefined */ -template -struct get_platform { +template +struct get_platform +{ // catch-all: undefined platform static constexpr Platform value = Platform::undefined; }; - /*! * Takes a list of policies, extracts their platforms, and provides the * reduction of them all. */ -template -struct get_platform_from_list { +template +struct get_platform_from_list +{ static constexpr Platform value = foldl(max_platform(), get_platform::value...); }; @@ -53,42 +58,41 @@ struct get_platform_from_list { /*! * Define an empty list as Platform::undefined; */ -template <> -struct get_platform_from_list<> { +template<> +struct get_platform_from_list<> +{ static constexpr Platform value = Platform::undefined; }; - /*! * Specialization to define the platform for anything derived from PolicyBase, * which should catch all standard policies. * * (not for MultiPolicy or nested::Policy) */ -template +template struct get_platform::value - && !RAJA::type_traits::is_indexset_policy:: - value>::type> { + typename std::enable_if< + std::is_base_of::value && + !RAJA::type_traits::is_indexset_policy::value>::type> +{ static constexpr Platform value = T::platform; }; - /*! * Specialization to define the platform for an IndexSet execution policy. * * Examines both segment iteration and segment execution policies. */ -template +template struct get_platform> - : public get_platform_from_list { -}; + : public get_platform_from_list +{}; - -template -struct get_statement_platform { +template +struct get_statement_platform +{ static constexpr Platform value = get_platform_from_list::value; @@ -101,8 +105,9 @@ struct get_statement_platform { * This collects the Platform from each of it's statements, recursing into * each of them. */ -template -struct get_platform> { +template +struct get_platform> +{ static constexpr Platform value = foldl(max_platform(), get_statement_platform::value...); }; @@ -110,21 +115,22 @@ struct get_platform> { /*! * Specialize for an empty statement list to be undefined */ -template <> -struct get_platform> { +template<> +struct get_platform> +{ static constexpr Platform value = Platform::undefined; }; - // Top level MultiPolicy shouldn't select a platform // Once a specific policy is selected, that policy will select the correct // platform... see policy_invoker in MultiPolicy.hpp -template -struct get_platform> { +template +struct get_platform> +{ static constexpr Platform value = Platform::undefined; }; -} // closing brace for detail namespace -} // closing brace for RAJA namespace +} // namespace detail +} // namespace RAJA -#endif // RAJA_get_platform_HPP +#endif // RAJA_get_platform_HPP diff --git a/include/RAJA/pattern/WorkGroup.hpp b/include/RAJA/pattern/WorkGroup.hpp index 767821b8d8..740e2e64e9 100644 --- a/include/RAJA/pattern/WorkGroup.hpp +++ b/include/RAJA/pattern/WorkGroup.hpp @@ -38,39 +38,44 @@ namespace RAJA * * \verbatim - WorkPool, Allocator> pool(allocator); + WorkPool, Allocator> + pool(allocator); pool.enqueue(..., [=] (Index_type i, int* xarg0, int xarg1) { xarg0[i] = xarg1; }); - WorkGroup, Allocator> group = pool.instantiate(); + WorkGroup, Allocator> group = + pool.instantiate(); int* xarg0 = ...; int xarg1 = ...; - WorkSite, Allocator> site = group.run(xarg0, xarg1); + WorkSite, Allocator> site = + group.run(xarg0, xarg1); * \endverbatim * ****************************************************************************** */ -template < typename ... Args > +template using xargs = camp::list; -namespace detail { +namespace detail +{ -template < typename T > -struct is_xargs { +template +struct is_xargs +{ static constexpr bool value = false; }; -template < typename ... Args > -struct is_xargs> { +template +struct is_xargs> +{ static constexpr bool value = true; }; -} - +} // namespace detail // // Forward declarations for WorkPool and WorkGroup templates. @@ -102,21 +107,24 @@ struct is_xargs> { data[i] = 1; }); - WorkGroup, Allocator> group = pool.instantiate(); + WorkGroup, Allocator> group = + pool.instantiate(); * \endverbatim * ****************************************************************************** */ -template -struct WorkPool { - static_assert(RAJA::pattern_is::value, +template +struct WorkPool +{ + static_assert( + RAJA::pattern_is::value, "WorkPool: WORKGROUP_POLICY_T must be a workgroup policy"); static_assert(detail::is_xargs::value, - "WorkPool: EXTRA_ARGS_T must be a RAJA::xargs<...> type"); + "WorkPool: EXTRA_ARGS_T must be a RAJA::xargs<...> type"); }; /*! @@ -135,23 +143,27 @@ struct WorkPool { * * \verbatim - WorkGroup, Allocator> group = pool.instantiate(); + WorkGroup, Allocator> group = + pool.instantiate(); - WorkSite, Allocator> site = group.run(); + WorkSite, Allocator> site = + group.run(); * \endverbatim * ****************************************************************************** */ -template -struct WorkGroup { - static_assert(RAJA::pattern_is::value, +template +struct WorkGroup +{ + static_assert( + RAJA::pattern_is::value, "WorkGroup: WORKGROUP_POLICY_T must be a workgroup policy"); static_assert(detail::is_xargs::value, - "WorkGroup: EXTRA_ARGS_T must be a RAJA::xargs<...> type"); + "WorkGroup: EXTRA_ARGS_T must be a RAJA::xargs<...> type"); }; /*! @@ -170,7 +182,8 @@ struct WorkGroup { * * \verbatim - WorkSite, Allocator> site = group.run(); + WorkSite, Allocator> site = + group.run(); site.synchronize(); @@ -178,25 +191,26 @@ struct WorkGroup { * ****************************************************************************** */ -template -struct WorkSite { - static_assert(RAJA::pattern_is::value, +template +struct WorkSite +{ + static_assert( + RAJA::pattern_is::value, "WorkSite: WORKGROUP_POLICY_T must be a workgroup policy"); static_assert(detail::is_xargs::value, - "WorkSite: EXTRA_ARGS_T must be a RAJA::xargs<...> type"); + "WorkSite: EXTRA_ARGS_T must be a RAJA::xargs<...> type"); }; - -template +template struct WorkPool, ALLOCATOR_T> { - using exec_policy = EXEC_POLICY_T; - using order_policy = ORDER_POLICY_T; - using storage_policy = STORAGE_POLICY_T; + using exec_policy = EXEC_POLICY_T; + using order_policy = ORDER_POLICY_T; + using storage_policy = STORAGE_POLICY_T; using dispatch_policy = DISPATCH_POLICY_T; - using policy = WorkGroupPolicy; - using index_type = INDEX_T; - using xarg_type = xargs; - using Allocator = ALLOCATOR_T; + using policy = WorkGroupPolicy; + using index_type = INDEX_T; + using xarg_type = xargs; + using Allocator = ALLOCATOR_T; using workgroup_type = WorkGroup; - using worksite_type = WorkSite; + using worksite_type = WorkSite; private: - using workrunner_type = detail::WorkRunner< - exec_policy, order_policy, dispatch_policy, Allocator, index_type, Args...>; - using storage_type = detail::WorkStorage< - storage_policy, Allocator, typename workrunner_type::dispatcher_type>; + using workrunner_type = detail::WorkRunner; + using storage_type = + detail::WorkStorage; friend workgroup_type; friend worksite_type; @@ -229,52 +252,45 @@ struct WorkPool + template inline void enqueue(segment_T&& seg, loop_T&& loop_body) { { // ignore zero length loops - using std::begin; using std::end; + using std::begin; + using std::end; if (begin(seg) == end(seg)) return; } - if (m_storage.begin() == m_storage.end()) { + if (m_storage.begin() == m_storage.end()) + { // perform auto-reserve on reuse reserve(m_max_num_loops, m_max_storage_bytes); } - util::PluginContext context{util::make_context()}; + util::PluginContext context {util::make_context()}; util::callPreCapturePlugins(context); using RAJA::util::trigger_updates_before; auto body = trigger_updates_before(loop_body); - m_runner.enqueue( - m_storage, std::forward(seg), std::move(body)); + m_runner.enqueue(m_storage, std::forward(seg), std::move(body)); util::callPostCapturePlugins(context); } @@ -289,26 +305,23 @@ struct WorkPool +template struct WorkGroup, ALLOCATOR_T> { - using exec_policy = EXEC_POLICY_T; - using order_policy = ORDER_POLICY_T; - using storage_policy = STORAGE_POLICY_T; + using exec_policy = EXEC_POLICY_T; + using order_policy = ORDER_POLICY_T; + using storage_policy = STORAGE_POLICY_T; using dispatch_policy = DISPATCH_POLICY_T; - using policy = WorkGroupPolicy; - using index_type = INDEX_T; - using xarg_type = xargs; - using Allocator = ALLOCATOR_T; + using policy = WorkGroupPolicy; + using index_type = INDEX_T; + using xarg_type = xargs; + using Allocator = ALLOCATOR_T; using workpool_type = WorkPool; using worksite_type = WorkSite; private: - using storage_type = typename workpool_type::storage_type; + using storage_type = typename workpool_type::storage_type; using workrunner_type = typename workpool_type::workrunner_type; friend workpool_type; @@ -339,15 +355,16 @@ struct WorkGroup +template struct WorkSite, ALLOCATOR_T> { - using exec_policy = EXEC_POLICY_T; - using order_policy = ORDER_POLICY_T; - using storage_policy = STORAGE_POLICY_T; + using exec_policy = EXEC_POLICY_T; + using order_policy = ORDER_POLICY_T; + using storage_policy = STORAGE_POLICY_T; using dispatch_policy = DISPATCH_POLICY_T; - using policy = WorkGroupPolicy; - using index_type = INDEX_T; - using xarg_type = xargs; - using Allocator = ALLOCATOR_T; - - using workpool_type = WorkPool; + using policy = WorkGroupPolicy; + using index_type = INDEX_T; + using xarg_type = xargs; + using Allocator = ALLOCATOR_T; + + using workpool_type = WorkPool; using workgroup_type = WorkGroup; private: @@ -412,16 +429,13 @@ struct WorkSite -inline -typename WorkPool< - WorkGroupPolicy, - INDEX_T, - xargs, - ALLOCATOR_T>::workgroup_type -WorkPool< - WorkGroupPolicy, - INDEX_T, - xargs, - ALLOCATOR_T>::instantiate() +template +inline typename WorkPool, + INDEX_T, + xargs, + ALLOCATOR_T>::workgroup_type +WorkPool, + INDEX_T, + xargs, + ALLOCATOR_T>::instantiate() { // update max sizes to auto-reserve on reuse - m_max_num_loops = std::max(m_storage.size(), m_max_num_loops); + m_max_num_loops = std::max(m_storage.size(), m_max_num_loops); m_max_storage_bytes = std::max(m_storage.storage_size(), m_max_storage_bytes); // move storage into workgroup - return workgroup_type{std::move(m_storage), std::move(m_runner)}; + return workgroup_type {std::move(m_storage), std::move(m_runner)}; } -template -inline -typename WorkGroup< - WorkGroupPolicy, - INDEX_T, - xargs, - ALLOCATOR_T>::worksite_type +template +inline typename WorkGroup, + INDEX_T, + xargs, + ALLOCATOR_T>::worksite_type WorkGroup< - WorkGroupPolicy, + WorkGroupPolicy, INDEX_T, xargs, - ALLOCATOR_T>::run(typename WorkGroup< - WorkGroupPolicy, - INDEX_T, - xargs, - ALLOCATOR_T>::resource_type r, + ALLOCATOR_T>::run(typename WorkGroup, + INDEX_T, + xargs, + ALLOCATOR_T>::resource_type r, Args... args) { - util::PluginContext context{util::make_context()}; + util::PluginContext context {util::make_context()}; util::callPreLaunchPlugins(context); // move any per run storage into worksite - worksite_type site(r, m_runner.run(m_storage, r, std::forward(args)...)); + worksite_type site(r, + m_runner.run(m_storage, r, std::forward(args)...)); util::callPostLaunchPlugins(context); diff --git a/include/RAJA/pattern/WorkGroup/Dispatcher.hpp b/include/RAJA/pattern/WorkGroup/Dispatcher.hpp index 1eac283f4b..c74d433cf0 100644 --- a/include/RAJA/pattern/WorkGroup/Dispatcher.hpp +++ b/include/RAJA/pattern/WorkGroup/Dispatcher.hpp @@ -29,42 +29,43 @@ #include - namespace RAJA { namespace detail { -template < typename > +template struct DispatcherVoidPtrWrapper { void* ptr; DispatcherVoidPtrWrapper() = default; + // implicit constructor from void* - RAJA_HOST_DEVICE DispatcherVoidPtrWrapper(void* p) : ptr(p) { } + RAJA_HOST_DEVICE DispatcherVoidPtrWrapper(void* p) : ptr(p) {} }; -template < typename > +template struct DispatcherVoidConstPtrWrapper { const void* ptr; DispatcherVoidConstPtrWrapper() = default; + // implicit constructor from const void* - RAJA_HOST_DEVICE DispatcherVoidConstPtrWrapper(const void* p) : ptr(p) { } + RAJA_HOST_DEVICE DispatcherVoidConstPtrWrapper(const void* p) : ptr(p) {} }; - -constexpr bool dispatcher_use_host_invoke(Platform platform) { +constexpr bool dispatcher_use_host_invoke(Platform platform) +{ return !(platform == Platform::cuda || platform == Platform::hip); } // Transforms one dispatch policy into another by creating a dispatch policy // of holder_type objects. See usage in WorkRunner for more explanation. -template < typename dispatch_policy, typename holder_type > +template struct dispatcher_transform_types; /// -template < typename dispatch_policy, typename holder_type > +template using dispatcher_transform_types_t = typename dispatcher_transform_types::type; @@ -75,12 +76,16 @@ using dispatcher_transform_types_t = * DispatcherID is used to differentiate function pointers based on their * function signature. */ -template < Platform platform, typename dispatch_policy, typename DispatcherID, typename ... CallArgs > +template struct Dispatcher; - -template < typename holder_type > -struct dispatcher_transform_types<::RAJA::indirect_function_call_dispatch, holder_type> { +template +struct dispatcher_transform_types<::RAJA::indirect_function_call_dispatch, + holder_type> +{ using type = ::RAJA::indirect_function_call_dispatch; }; @@ -93,38 +98,45 @@ struct dispatcher_transform_types<::RAJA::indirect_function_call_dispatch, holde * during device linking when functions with high register counts may cause * device linking to fail. */ -template < Platform platform, typename DispatcherID, typename ... CallArgs > -struct Dispatcher { +template +struct Dispatcher +{ static constexpr bool use_host_invoke = dispatcher_use_host_invoke(platform); - using dispatch_policy = ::RAJA::indirect_function_call_dispatch; - using void_ptr_wrapper = DispatcherVoidPtrWrapper; + using dispatch_policy = ::RAJA::indirect_function_call_dispatch; + using void_ptr_wrapper = DispatcherVoidPtrWrapper; using void_cptr_wrapper = DispatcherVoidConstPtrWrapper; /// /// move construct an object of type T in dest as a copy of a T from src and /// destroy the T obj in src /// - template < typename T > - static void s_move_construct_destroy(void_ptr_wrapper dest, void_ptr_wrapper src) + template + static void s_move_construct_destroy(void_ptr_wrapper dest, + void_ptr_wrapper src) { T* dest_as_T = static_cast(dest.ptr); - T* src_as_T = static_cast(src.ptr); - new(dest_as_T) T(std::move(*src_as_T)); + T* src_as_T = static_cast(src.ptr); + new (dest_as_T) T(std::move(*src_as_T)); (*src_as_T).~T(); } /// /// invoke the call operator of the object of type T in obj with args /// - template < typename T > + template static void s_host_invoke(void_cptr_wrapper obj, CallArgs... args) { const T* obj_as_T = static_cast(obj.ptr); (*obj_as_T)(std::forward(args)...); } + /// - template < typename T > - static RAJA_DEVICE void s_device_invoke(void_cptr_wrapper obj, CallArgs... args) + template + static RAJA_DEVICE void s_device_invoke(void_cptr_wrapper obj, + CallArgs... args) { const T* obj_as_T = static_cast(obj.ptr); (*obj_as_T)(std::forward(args)...); @@ -133,22 +145,27 @@ struct Dispatcher + template static void s_destroy(void_ptr_wrapper obj) { T* obj_as_T = static_cast(obj.ptr); (*obj_as_T).~T(); } - using mover_type = void(*)(void_ptr_wrapper /*dest*/, void_ptr_wrapper /*src*/); - using invoker_type = void(*)(void_cptr_wrapper /*obj*/, CallArgs... /*args*/); - using destroyer_type = void(*)(void_ptr_wrapper /*obj*/); + using mover_type = void (*)(void_ptr_wrapper /*dest*/, + void_ptr_wrapper /*src*/); + using invoker_type = void (*)(void_cptr_wrapper /*obj*/, + CallArgs... /*args*/); + using destroyer_type = void (*)(void_ptr_wrapper /*obj*/); // This can't be a cuda device lambda due to compiler limitations - template < typename T > - struct DeviceInvokerFactory { + template + struct DeviceInvokerFactory + { using value_type = invoker_type; - RAJA_DEVICE value_type operator()() { + + RAJA_DEVICE value_type operator()() + { #if defined(RAJA_ENABLE_HIP) && !defined(RAJA_ENABLE_HIP_INDIRECT_FUNCTION_CALL) return nullptr; #else @@ -160,15 +177,16 @@ struct Dispatcher* = nullptr > - static inline Dispatcher makeDispatcher() { - return { mover_type{&s_move_construct_destroy}, - invoker_type{&s_host_invoke}, - destroyer_type{&s_destroy}, - sizeof(T) - }; + template* = nullptr> + static inline Dispatcher makeDispatcher() + { + return {mover_type {&s_move_construct_destroy}, + invoker_type {&s_host_invoke}, destroyer_type {&s_destroy}, + sizeof(T)}; } + /// /// create a Dispatcher that can be used on the device for objects of type T /// @@ -179,14 +197,16 @@ struct Dispatcher* = nullptr > - static inline Dispatcher makeDispatcher(CreateOnDevice&& createOnDevice) { - return { mover_type{&s_move_construct_destroy}, - invoker_type{std::forward(createOnDevice)(DeviceInvokerFactory{})}, - destroyer_type{&s_destroy}, - sizeof(T) - }; + template* = nullptr> + static inline Dispatcher makeDispatcher(CreateOnDevice&& createOnDevice) + { + return {mover_type {&s_move_construct_destroy}, + invoker_type {std::forward(createOnDevice)( + DeviceInvokerFactory {})}, + destroyer_type {&s_destroy}, sizeof(T)}; } mover_type move_construct_destroy; @@ -195,9 +215,10 @@ struct Dispatcher -struct dispatcher_transform_types<::RAJA::indirect_virtual_function_dispatch, holder_type> { +template +struct dispatcher_transform_types<::RAJA::indirect_virtual_function_dispatch, + holder_type> +{ using type = ::RAJA::indirect_virtual_function_dispatch; }; @@ -210,38 +231,48 @@ struct dispatcher_transform_types<::RAJA::indirect_virtual_function_dispatch, ho * during device linking when functions with high register counts may cause * device linking to fail. */ -template < Platform platform, typename DispatcherID, typename ... CallArgs > -struct Dispatcher { +template +struct Dispatcher +{ static constexpr bool use_host_invoke = dispatcher_use_host_invoke(platform); - using dispatch_policy = ::RAJA::indirect_virtual_function_dispatch; - using void_ptr_wrapper = DispatcherVoidPtrWrapper; + using dispatch_policy = ::RAJA::indirect_virtual_function_dispatch; + using void_ptr_wrapper = DispatcherVoidPtrWrapper; using void_cptr_wrapper = DispatcherVoidConstPtrWrapper; - struct impl_base { - virtual void move_destroy(void_ptr_wrapper dest, void_ptr_wrapper src) const = 0; - virtual void destroy(void_ptr_wrapper obj) const = 0; + struct impl_base + { + virtual void move_destroy(void_ptr_wrapper dest, + void_ptr_wrapper src) const = 0; + virtual void destroy(void_ptr_wrapper obj) const = 0; }; - struct host_impl_base { + struct host_impl_base + { virtual void invoke(void_cptr_wrapper obj, CallArgs... args) const = 0; }; - struct device_impl_base { - virtual RAJA_DEVICE void invoke(void_cptr_wrapper obj, CallArgs... args) const = 0; + struct device_impl_base + { + virtual RAJA_DEVICE void invoke(void_cptr_wrapper obj, + CallArgs... args) const = 0; }; - template < typename T > + template struct base_impl_type : impl_base { /// /// move construct an object of type T in dest as a copy of a T from src and /// destroy the T obj in src /// - virtual void move_destroy(void_ptr_wrapper dest, void_ptr_wrapper src) const override + virtual void move_destroy(void_ptr_wrapper dest, + void_ptr_wrapper src) const override { T* dest_as_T = static_cast(dest.ptr); - T* src_as_T = static_cast(src.ptr); - new(dest_as_T) T(std::move(*src_as_T)); + T* src_as_T = static_cast(src.ptr); + new (dest_as_T) T(std::move(*src_as_T)); (*src_as_T).~T(); } @@ -255,7 +286,7 @@ struct Dispatcher + template struct host_impl_type : host_impl_base { /// @@ -268,59 +299,69 @@ struct Dispatcher + template struct device_impl_type : device_impl_base { /// /// invoke the call operator of the object of type T in obj with args /// - virtual RAJA_DEVICE void invoke(void_cptr_wrapper obj, CallArgs... args) const override + virtual RAJA_DEVICE void invoke(void_cptr_wrapper obj, + CallArgs... args) const override { const T* obj_as_T = static_cast(obj.ptr); (*obj_as_T)(std::forward(args)...); } }; - struct mover_type { + struct mover_type + { impl_base* m_impl; + void operator()(void_ptr_wrapper dest, void_ptr_wrapper src) const { m_impl->move_destroy(dest, src); } }; - struct host_invoker_type { + struct host_invoker_type + { host_impl_base* m_impl; + void operator()(void_cptr_wrapper obj, CallArgs... args) const { m_impl->invoke(obj, std::forward(args)...); } }; + /// - struct device_invoker_type { + struct device_invoker_type + { device_impl_base* m_impl; + RAJA_DEVICE void operator()(void_cptr_wrapper obj, CallArgs... args) const { m_impl->invoke(obj, std::forward(args)...); } }; - using invoker_type = std::conditional_t; - struct destroyer_type { + using invoker_type = std:: + conditional_t; + + struct destroyer_type + { impl_base* m_impl; - void operator()(void_ptr_wrapper obj) const - { - m_impl->destroy(obj); - } + + void operator()(void_ptr_wrapper obj) const { m_impl->destroy(obj); } }; // This can't be a cuda device lambda due to compiler limitations - template < typename T > - struct DeviceImplTypeFactory { + template + struct DeviceImplTypeFactory + { using value_type = device_impl_type*; - RAJA_DEVICE value_type operator()() { + + RAJA_DEVICE value_type operator()() + { #if defined(RAJA_ENABLE_HIP) && !defined(RAJA_ENABLE_HIP_INDIRECT_FUNCTION_CALL) return nullptr; #else @@ -333,17 +374,17 @@ struct Dispatcher* = nullptr > - static inline Dispatcher makeDispatcher() { + template* = nullptr> + static inline Dispatcher makeDispatcher() + { static base_impl_type s_base_impl; static host_impl_type s_host_impl; - return { mover_type{&s_base_impl}, - host_invoker_type{&s_host_impl}, - destroyer_type{&s_base_impl}, - sizeof(T) - }; + return {mover_type {&s_base_impl}, host_invoker_type {&s_host_impl}, + destroyer_type {&s_base_impl}, sizeof(T)}; } + /// /// create a Dispatcher that can be used on the device for objects of type T /// @@ -354,17 +395,17 @@ struct Dispatcher* = nullptr> - static inline Dispatcher makeDispatcher(CreateOnDevice&& createOnDevice) { + template* = nullptr> + static inline Dispatcher makeDispatcher(CreateOnDevice&& createOnDevice) + { static base_impl_type s_base_impl; - static device_impl_type* s_device_impl_ptr{ - std::forward(createOnDevice)(DeviceImplTypeFactory{}) }; - return { mover_type{&s_base_impl}, - device_invoker_type{s_device_impl_ptr}, - destroyer_type{&s_base_impl}, - sizeof(T) - }; + static device_impl_type* s_device_impl_ptr {std::forward( + createOnDevice)(DeviceImplTypeFactory {})}; + return {mover_type {&s_base_impl}, device_invoker_type {s_device_impl_ptr}, + destroyer_type {&s_base_impl}, sizeof(T)}; } mover_type move_construct_destroy; @@ -373,74 +414,87 @@ struct Dispatcher -struct dispatcher_transform_types<::RAJA::direct_dispatch, holder_type> { - using type = ::RAJA::direct_dispatch...>; +template +struct dispatcher_transform_types<::RAJA::direct_dispatch, holder_type> +{ + using type = + ::RAJA::direct_dispatch...>; }; /*! * Version of Dispatcher that does direct dispatch to zero callable types. * It implements the interface with callable objects. */ -template < Platform platform, typename DispatcherID, typename ... CallArgs > -struct Dispatcher, DispatcherID, CallArgs...> { +template +struct Dispatcher, + DispatcherID, + CallArgs...> +{ static constexpr bool use_host_invoke = dispatcher_use_host_invoke(platform); - using dispatch_policy = ::RAJA::direct_dispatch<>; - using void_ptr_wrapper = DispatcherVoidPtrWrapper; + using dispatch_policy = ::RAJA::direct_dispatch<>; + using void_ptr_wrapper = DispatcherVoidPtrWrapper; using void_cptr_wrapper = DispatcherVoidConstPtrWrapper; /// /// move construct an object of type T in dest as a copy of a T from src and /// destroy the T obj in src /// - struct mover_type { - void operator()(void_ptr_wrapper, void_ptr_wrapper) const - { } + struct mover_type + { + void operator()(void_ptr_wrapper, void_ptr_wrapper) const {} }; /// /// invoke the call operator of the object of type T in obj with args /// - struct host_invoker_type { - void operator()(void_cptr_wrapper, CallArgs...) const - { } + struct host_invoker_type + { + void operator()(void_cptr_wrapper, CallArgs...) const {} }; - struct device_invoker_type { - RAJA_DEVICE void operator()(void_cptr_wrapper, CallArgs...) const - { } + + struct device_invoker_type + { + RAJA_DEVICE void operator()(void_cptr_wrapper, CallArgs...) const {} }; - using invoker_type = std::conditional_t; + + using invoker_type = std:: + conditional_t; /// /// destroy the object of type T in obj /// - struct destroyer_type { - void operator()(void_ptr_wrapper) const - { } + struct destroyer_type + { + void operator()(void_ptr_wrapper) const {} }; /// /// create a Dispatcher that can be used on the host for objects of type T /// - template< typename T, - bool uhi = use_host_invoke, std::enable_if_t* = nullptr > - static inline Dispatcher makeDispatcher() { - return {mover_type{}, host_invoker_type{}, destroyer_type{}, sizeof(T)}; + template* = nullptr> + static inline Dispatcher makeDispatcher() + { + return {mover_type {}, host_invoker_type {}, destroyer_type {}, sizeof(T)}; } + /// /// create a Dispatcher that can be used on the device for objects of type T /// /// Ignore the CreateOnDevice object as the same invoker object can be used /// on the host and device. /// - template< typename T, typename CreateOnDevice, - bool uhi = use_host_invoke, std::enable_if_t* = nullptr > - static inline Dispatcher makeDispatcher(CreateOnDevice&&) { - return {mover_type{}, device_invoker_type{}, destroyer_type{}, sizeof(T)}; + template* = nullptr> + static inline Dispatcher makeDispatcher(CreateOnDevice&&) + { + return {mover_type {}, device_invoker_type {}, destroyer_type {}, + sizeof(T)}; } mover_type move_construct_destroy; @@ -453,23 +507,31 @@ struct Dispatcher, DispatcherID, CallArgs... * Version of Dispatcher that does direct dispatch to a single callable type. * It implements the interface with callable objects. */ -template < Platform platform, typename T, typename DispatcherID, typename ... CallArgs > -struct Dispatcher, DispatcherID, CallArgs...> { +template +struct Dispatcher, + DispatcherID, + CallArgs...> +{ static constexpr bool use_host_invoke = dispatcher_use_host_invoke(platform); - using dispatch_policy = ::RAJA::direct_dispatch; - using void_ptr_wrapper = DispatcherVoidPtrWrapper; + using dispatch_policy = ::RAJA::direct_dispatch; + using void_ptr_wrapper = DispatcherVoidPtrWrapper; using void_cptr_wrapper = DispatcherVoidConstPtrWrapper; /// /// move construct an object of type T in dest as a copy of a T from src and /// destroy the T obj in src /// - struct mover_type { + struct mover_type + { void operator()(void_ptr_wrapper dest, void_ptr_wrapper src) const { T* dest_as_T = static_cast(dest.ptr); - T* src_as_T = static_cast(src.ptr); - new(dest_as_T) T(std::move(*src_as_T)); + T* src_as_T = static_cast(src.ptr); + new (dest_as_T) T(std::move(*src_as_T)); (*src_as_T).~T(); } }; @@ -477,28 +539,32 @@ struct Dispatcher, DispatcherID, CallArgs.. /// /// invoke the call operator of the object of type T in obj with args /// - struct host_invoker_type { + struct host_invoker_type + { void operator()(void_cptr_wrapper obj, CallArgs... args) const { const T* obj_as_T = static_cast(obj.ptr); (*obj_as_T)(std::forward(args)...); } }; - struct device_invoker_type { + + struct device_invoker_type + { RAJA_DEVICE void operator()(void_cptr_wrapper obj, CallArgs... args) const { const T* obj_as_T = static_cast(obj.ptr); (*obj_as_T)(std::forward(args)...); } }; - using invoker_type = std::conditional_t; + + using invoker_type = std:: + conditional_t; /// /// destroy the object of type T in obj /// - struct destroyer_type { + struct destroyer_type + { void operator()(void_ptr_wrapper obj) const { T* obj_as_T = static_cast(obj.ptr); @@ -509,23 +575,32 @@ struct Dispatcher, DispatcherID, CallArgs.. /// /// create a Dispatcher that can be used on the host for objects of type T /// - template< typename U, - bool uhi = use_host_invoke, std::enable_if_t* = nullptr > - static inline Dispatcher makeDispatcher() { - static_assert(std::is_same::value, "U must be in direct_dispatch types"); - return {mover_type{}, host_invoker_type{}, destroyer_type{}, sizeof(T)}; + template* = nullptr> + static inline Dispatcher makeDispatcher() + { + static_assert(std::is_same::value, + "U must be in direct_dispatch types"); + return {mover_type {}, host_invoker_type {}, destroyer_type {}, sizeof(T)}; } + /// /// create a Dispatcher that can be used on the device for objects of type T /// /// Ignore the CreateOnDevice object as the same invoker object can be used /// on the host and device. /// - template< typename U, typename CreateOnDevice, - bool uhi = use_host_invoke, std::enable_if_t* = nullptr > - static inline Dispatcher makeDispatcher(CreateOnDevice&&) { - static_assert(std::is_same::value, "U must be in direct_dispatch types"); - return {mover_type{}, device_invoker_type{}, destroyer_type{}, sizeof(T)}; + template* = nullptr> + static inline Dispatcher makeDispatcher(CreateOnDevice&&) + { + static_assert(std::is_same::value, + "U must be in direct_dispatch types"); + return {mover_type {}, device_invoker_type {}, destroyer_type {}, + sizeof(T)}; } mover_type move_construct_destroy; @@ -538,46 +613,55 @@ struct Dispatcher, DispatcherID, CallArgs.. * Version of Dispatcher that does direct dispatch to multiple callable types. * It implements the interface with callable objects. */ -template < typename T0, typename T1, typename ... TNs, - Platform platform, typename DispatcherID, typename ... CallArgs > -struct Dispatcher, - DispatcherID, CallArgs...> { +template +struct Dispatcher, + DispatcherID, + CallArgs...> +{ static constexpr bool use_host_invoke = dispatcher_use_host_invoke(platform); - using dispatch_policy = ::RAJA::direct_dispatch; - using void_ptr_wrapper = DispatcherVoidPtrWrapper; + using dispatch_policy = ::RAJA::direct_dispatch; + using void_ptr_wrapper = DispatcherVoidPtrWrapper; using void_cptr_wrapper = DispatcherVoidConstPtrWrapper; - using id_type = int; - using callable_indices = camp::make_int_seq_t; - using callable_types = camp::list; + using id_type = int; + using callable_indices = camp::make_int_seq_t; + using callable_types = camp::list; /// /// move construct an object of type T in dest as a copy of a T from src and /// destroy the T obj in src /// - struct mover_type { + struct mover_type + { id_type id; void operator()(void_ptr_wrapper dest, void_ptr_wrapper src) const { - impl_helper(callable_indices{}, callable_types{}, - dest, src); + impl_helper(callable_indices {}, callable_types {}, dest, src); } private: - template < int ... id_types, typename ... Ts > - void impl_helper(camp::int_seq, camp::list, - void_ptr_wrapper dest, void_ptr_wrapper src) const + template + void impl_helper(camp::int_seq, + camp::list, + void_ptr_wrapper dest, + void_ptr_wrapper src) const { camp::sink(((id_types == id) ? (impl(dest, src), 0) : 0)...); } - template < typename T > + template void impl(void_ptr_wrapper dest, void_ptr_wrapper src) const { T* dest_as_T = static_cast(dest.ptr); - T* src_as_T = static_cast(src.ptr); - new(dest_as_T) T(std::move(*src_as_T)); + T* src_as_T = static_cast(src.ptr); + new (dest_as_T) T(std::move(*src_as_T)); (*src_as_T).~T(); } }; @@ -585,79 +669,91 @@ struct Dispatcher, /// /// invoke the call operator of the object of type T in obj with args /// - struct host_invoker_type { + struct host_invoker_type + { id_type id; void operator()(void_cptr_wrapper obj, CallArgs... args) const { - impl_helper(callable_indices{}, callable_types{}, - obj, std::forward(args)...); + impl_helper(callable_indices {}, callable_types {}, obj, + std::forward(args)...); } private: - template < int ... id_types, typename ... Ts > - void impl_helper(camp::int_seq, camp::list, - void_cptr_wrapper obj, CallArgs... args) const + template + void impl_helper(camp::int_seq, + camp::list, + void_cptr_wrapper obj, + CallArgs... args) const { - camp::sink(((id_types == id) ? (impl(obj, std::forward(args)...), 0) : 0)...); + camp::sink(((id_types == id) + ? (impl(obj, std::forward(args)...), 0) + : 0)...); } - template < typename T > + template void impl(void_cptr_wrapper obj, CallArgs... args) const { const T* obj_as_T = static_cast(obj.ptr); (*obj_as_T)(std::forward(args)...); } }; - struct device_invoker_type { + + struct device_invoker_type + { id_type id; RAJA_DEVICE void operator()(void_cptr_wrapper obj, CallArgs... args) const { - impl_helper(callable_indices{}, callable_types{}, - obj, std::forward(args)...); + impl_helper(callable_indices {}, callable_types {}, obj, + std::forward(args)...); } private: - template < int ... id_types, typename ... Ts > - RAJA_DEVICE void impl_helper(camp::int_seq, camp::list, - void_cptr_wrapper obj, CallArgs... args) const + template + RAJA_DEVICE void impl_helper(camp::int_seq, + camp::list, + void_cptr_wrapper obj, + CallArgs... args) const { - camp::sink(((id_types == id) ? (impl(obj, std::forward(args)...), 0) : 0)...); + camp::sink(((id_types == id) + ? (impl(obj, std::forward(args)...), 0) + : 0)...); } - template < typename T > + template RAJA_DEVICE void impl(void_cptr_wrapper obj, CallArgs... args) const { const T* obj_as_T = static_cast(obj.ptr); (*obj_as_T)(std::forward(args)...); } }; - using invoker_type = std::conditional_t; + + using invoker_type = std:: + conditional_t; /// /// destroy the object of type T in obj /// - struct destroyer_type { + struct destroyer_type + { id_type id; void operator()(void_ptr_wrapper obj) const { - impl_helper(callable_indices{}, callable_types{}, - obj); + impl_helper(callable_indices {}, callable_types {}, obj); } private: - template < int ... id_types, typename ... Ts > - void impl_helper(camp::int_seq, camp::list, - void_ptr_wrapper obj) const + template + void impl_helper(camp::int_seq, + camp::list, + void_ptr_wrapper obj) const { camp::sink(((id_types == id) ? (impl(obj), 0) : 0)...); } - template < typename T > + template void impl(void_ptr_wrapper obj) const { T* obj_as_T = static_cast(obj.ptr); @@ -671,38 +767,50 @@ struct Dispatcher, /// The id is just the index of T in the list of callable_types. /// If T is not in Ts return -1. /// - template < typename T, int ... id_types, typename ... Ts > - static constexpr id_type get_id(camp::int_seq, camp::list) + template + static constexpr id_type get_id(camp::int_seq, + camp::list) { - id_type id{-1}; + id_type id {-1}; // quiet UB warning by sequencing assignment to id with list initialization - int unused[] {0, (std::is_same::value ? ((id = id_types), 0) : 0)...}; - camp::sink(unused); // quiet unused var warning + int unused[] {0, + (std::is_same::value ? ((id = id_types), 0) : 0)...}; + camp::sink(unused); // quiet unused var warning return id; } /// /// create a Dispatcher that can be used on the host for objects of type T /// - template< typename T, - bool uhi = use_host_invoke, std::enable_if_t* = nullptr > - static inline Dispatcher makeDispatcher() { - static constexpr id_type id = get_id(callable_indices{}, callable_types{}); + template* = nullptr> + static inline Dispatcher makeDispatcher() + { + static constexpr id_type id = + get_id(callable_indices {}, callable_types {}); static_assert(id != id_type(-1), "T must be in direct_dispatch types"); - return {mover_type{id}, host_invoker_type{id}, destroyer_type{id}, sizeof(T)}; + return {mover_type {id}, host_invoker_type {id}, destroyer_type {id}, + sizeof(T)}; } + /// /// create a Dispatcher that can be used on the device for objects of type T /// /// Ignore the CreateOnDevice object as the same invoker object can be used /// on the host and device. /// - template< typename T, typename CreateOnDevice, - bool uhi = use_host_invoke, std::enable_if_t* = nullptr > - static inline Dispatcher makeDispatcher(CreateOnDevice&&) { - static constexpr id_type id = get_id(callable_indices{}, callable_types{}); + template* = nullptr> + static inline Dispatcher makeDispatcher(CreateOnDevice&&) + { + static constexpr id_type id = + get_id(callable_indices {}, callable_types {}); static_assert(id != id_type(-1), "T must be in direct_dispatch types"); - return {mover_type{id}, device_invoker_type{id}, destroyer_type{id}, sizeof(T)}; + return {mover_type {id}, device_invoker_type {id}, destroyer_type {id}, + sizeof(T)}; } mover_type move_construct_destroy; diff --git a/include/RAJA/pattern/WorkGroup/WorkRunner.hpp b/include/RAJA/pattern/WorkGroup/WorkRunner.hpp index 9645f73050..112ad14d38 100644 --- a/include/RAJA/pattern/WorkGroup/WorkRunner.hpp +++ b/include/RAJA/pattern/WorkGroup/WorkRunner.hpp @@ -30,7 +30,6 @@ #include "RAJA/pattern/WorkGroup/Dispatcher.hpp" #include "RAJA/policy/WorkGroup.hpp" - namespace RAJA { @@ -40,18 +39,18 @@ namespace detail /*! * A body and args holder for storing loops that are being executed in foralls */ -template +template struct HoldBodyArgs_base { // NOTE: This constructor is disabled when body_in is not LoopBody // to avoid it conflicting with the copy and move constructors - template < typename body_in, - typename = typename std::enable_if< - std::is_same>::value>::type > + template>::value>::type> HoldBodyArgs_base(body_in&& body, Args... args) - : m_body(std::forward(body)) - , m_arg_tuple(std::forward(args)...) - { } + : m_body(std::forward(body)), + m_arg_tuple(std::forward(args)...) + {} protected: LoopBody m_body; @@ -62,7 +61,7 @@ struct HoldBodyArgs_base * A body and args holder for storing loops that are being executed in foralls * that run on the host */ -template +template struct HoldBodyArgs_host : HoldBodyArgs_base { using base = HoldBodyArgs_base; @@ -70,10 +69,10 @@ struct HoldBodyArgs_host : HoldBodyArgs_base RAJA_INLINE void operator()(index_type i) const { - invoke(i, camp::make_idx_seq_t{}); + invoke(i, camp::make_idx_seq_t {}); } - template < camp::idx_t ... Is > + template RAJA_INLINE void invoke(index_type i, camp::idx_seq) const { this->m_body(i, get(this->m_arg_tuple)...); @@ -84,7 +83,7 @@ struct HoldBodyArgs_host : HoldBodyArgs_base * A body and args holder for storing loops that are being executed in foralls * that run on the device */ -template +template struct HoldBodyArgs_device : HoldBodyArgs_base { using base = HoldBodyArgs_base; @@ -92,10 +91,10 @@ struct HoldBodyArgs_device : HoldBodyArgs_base RAJA_DEVICE RAJA_INLINE void operator()(index_type i) const { - invoke(i, camp::make_idx_seq_t{}); + invoke(i, camp::make_idx_seq_t {}); } - template < camp::idx_t ... Is > + template RAJA_DEVICE RAJA_INLINE void invoke(index_type i, camp::idx_seq) const { this->m_body(i, get(this->m_arg_tuple)...); @@ -105,28 +104,29 @@ struct HoldBodyArgs_device : HoldBodyArgs_base /*! * A body and segment holder for storing loops that will be executed as foralls */ -template +template struct HoldForall { using resource_type = typename resources::get_resource::type; - using HoldBodyArgs = typename std::conditional< + using HoldBodyArgs = typename std::conditional< !type_traits::is_device_exec_policy::value, HoldBodyArgs_host, - HoldBodyArgs_device >::type; + HoldBodyArgs_device>::type; - template < typename segment_in, typename body_in > + template HoldForall(segment_in&& segment, body_in&& body) - : m_segment(std::forward(segment)) - , m_body(std::forward(body)) - { } + : m_segment(std::forward(segment)), + m_body(std::forward(body)) + {} RAJA_INLINE void operator()(resource_type r, Args... args) const { - wrap::forall(r, - ExecutionPolicy(), - m_segment, - HoldBodyArgs{m_body, std::forward(args)...}); + wrap::forall(r, ExecutionPolicy(), m_segment, + HoldBodyArgs {m_body, std::forward(args)...}); } private: @@ -138,46 +138,50 @@ struct HoldForall /*! * A class that handles running work in a work container */ -template +template struct WorkRunner; - /*! * Base class describing storage for ordered runners using forall */ -template +template struct WorkRunnerForallOrdered_base { - using exec_policy = EXEC_POLICY_T; - using order_policy = ORDER_POLICY_T; + using exec_policy = EXEC_POLICY_T; + using order_policy = ORDER_POLICY_T; using dispatch_policy = DISPATCH_POLICY_T; - using Allocator = ALLOCATOR_T; - using index_type = INDEX_T; - using resource_type = typename resources::get_resource::type; + using Allocator = ALLOCATOR_T; + using index_type = INDEX_T; + using resource_type = + typename resources::get_resource::type; using forall_exec_policy = FORALL_EXEC_POLICY; // The type that will hold the segment and loop body in work storage - struct holder_type { - template < typename T > - using type = HoldForall>::type, // segment_type - typename camp::at>::type, // loop_type - index_type, Args...>; + struct holder_type + { + template + using type = + HoldForall>::type, // segment_type + typename camp::at>::type, // loop_type + index_type, + Args...>; }; + /// - template < typename T > + template using holder_type_t = typename holder_type::template type; // The policy indicating where the call function is invoked @@ -186,33 +190,40 @@ struct WorkRunnerForallOrdered_base // The Dispatcher policy with holder_types used internally to handle the // ranges and callables passed in by the user. - using dispatcher_holder_policy = dispatcher_transform_types_t; + using dispatcher_holder_policy = + dispatcher_transform_types_t; - using dispatcher_type = Dispatcher; + using dispatcher_type = Dispatcher; WorkRunnerForallOrdered_base() = default; WorkRunnerForallOrdered_base(WorkRunnerForallOrdered_base const&) = delete; - WorkRunnerForallOrdered_base& operator=(WorkRunnerForallOrdered_base const&) = delete; + WorkRunnerForallOrdered_base& operator=(WorkRunnerForallOrdered_base const&) = + delete; - WorkRunnerForallOrdered_base(WorkRunnerForallOrdered_base &&) = default; - WorkRunnerForallOrdered_base& operator=(WorkRunnerForallOrdered_base &&) = default; + WorkRunnerForallOrdered_base(WorkRunnerForallOrdered_base&&) = default; + WorkRunnerForallOrdered_base& operator=(WorkRunnerForallOrdered_base&&) = + default; // runner interfaces with storage to enqueue so the runner can get // information from the segment and loop at enqueue time - template < typename WorkContainer, typename segment_T, typename loop_T > + template inline void enqueue(WorkContainer& storage, segment_T&& seg, loop_T&& loop) { - using holder = holder_type_t, camp::decay>>; + using holder = + holder_type_t, camp::decay>>; storage.template emplace( - get_Dispatcher(dispatcher_exec_policy{}), + get_Dispatcher(dispatcher_exec_policy {}), std::forward(seg), std::forward(loop)); } // clear any state so ready to be destroyed or reused - void clear() - { } + void clear() {} // no extra storage required here using per_run_storage = int; @@ -221,45 +232,44 @@ struct WorkRunnerForallOrdered_base /*! * Runs work in a storage container in order using forall */ -template +template struct WorkRunnerForallOrdered - : WorkRunnerForallOrdered_base< - FORALL_EXEC_POLICY, - EXEC_POLICY_T, - ORDER_POLICY_T, - DISPATCH_POLICY_T, - ALLOCATOR_T, - INDEX_T, - Args...> + : WorkRunnerForallOrdered_base { - using base = WorkRunnerForallOrdered_base< - FORALL_EXEC_POLICY, - EXEC_POLICY_T, - ORDER_POLICY_T, - DISPATCH_POLICY_T, - ALLOCATOR_T, - INDEX_T, - Args...>; + using base = WorkRunnerForallOrdered_base; using base::base; // run the loops using forall in the order that they were enqueued - template < typename WorkContainer > + template typename base::per_run_storage run(WorkContainer const& storage, typename base::resource_type r, Args... args) const { using value_type = typename WorkContainer::value_type; - typename base::per_run_storage run_storage{}; + typename base::per_run_storage run_storage {}; auto end = storage.end(); - for (auto iter = storage.begin(); iter != end; ++iter) { + for (auto iter = storage.begin(); iter != end; ++iter) + { value_type::host_call(&*iter, r, args...); } @@ -270,46 +280,46 @@ struct WorkRunnerForallOrdered /*! * Runs work in a storage container in reverse order using forall */ -template +template struct WorkRunnerForallReverse - : WorkRunnerForallOrdered_base< - FORALL_EXEC_POLICY, - EXEC_POLICY_T, - ORDER_POLICY_T, - DISPATCH_POLICY_T, - ALLOCATOR_T, - INDEX_T, - Args...> + : WorkRunnerForallOrdered_base { - using base = WorkRunnerForallOrdered_base< - FORALL_EXEC_POLICY, - EXEC_POLICY_T, - ORDER_POLICY_T, - DISPATCH_POLICY_T, - ALLOCATOR_T, - INDEX_T, - Args...>; + using base = WorkRunnerForallOrdered_base; using base::base; - // run the loops using forall in the reverse order to the order they were enqueued - template < typename WorkContainer > + // run the loops using forall in the reverse order to the order they were + // enqueued + template typename base::per_run_storage run(WorkContainer const& storage, typename base::resource_type r, Args... args) const { using value_type = typename WorkContainer::value_type; - typename base::per_run_storage run_storage{}; + typename base::per_run_storage run_storage {}; auto begin = storage.begin(); - for (auto iter = storage.end(); iter != begin; --iter) { - value_type::host_call(&*(iter-1), r, args...); + for (auto iter = storage.end(); iter != begin; --iter) + { + value_type::host_call(&*(iter - 1), r, args...); } return run_storage; diff --git a/include/RAJA/pattern/WorkGroup/WorkStorage.hpp b/include/RAJA/pattern/WorkGroup/WorkStorage.hpp index 52631d108f..20c756e8b3 100644 --- a/include/RAJA/pattern/WorkGroup/WorkStorage.hpp +++ b/include/RAJA/pattern/WorkGroup/WorkStorage.hpp @@ -32,7 +32,6 @@ #include "RAJA/pattern/WorkGroup/WorkStruct.hpp" - namespace RAJA { @@ -46,34 +45,30 @@ namespace detail // operator - ( iterator_base const& ) // operator == ( iterator_base const& ) // operator < ( iterator_base const& ) -template < typename iterator_base > +template struct random_access_iterator : iterator_base { - using base = iterator_base; - using value_type = const typename base::value_type; - using pointer = typename base::pointer; - using reference = typename base::reference; - using difference_type = typename base::difference_type; + using base = iterator_base; + using value_type = const typename base::value_type; + using pointer = typename base::pointer; + using reference = typename base::reference; + using difference_type = typename base::difference_type; using iterator_category = std::random_access_iterator_tag; using base::base; random_access_iterator(random_access_iterator const&) = default; - random_access_iterator(random_access_iterator &&) = default; + random_access_iterator(random_access_iterator&&) = default; random_access_iterator& operator=(random_access_iterator const&) = default; - random_access_iterator& operator=(random_access_iterator &&) = default; - + random_access_iterator& operator=(random_access_iterator&&) = default; RAJA_HOST_DEVICE reference operator*() const { return *static_cast(*this); } - RAJA_HOST_DEVICE pointer operator->() const - { - return &(*(*this)); - } + RAJA_HOST_DEVICE pointer operator->() const { return &(*(*this)); } RAJA_HOST_DEVICE reference operator[](difference_type i) const { @@ -121,7 +116,8 @@ struct random_access_iterator : iterator_base } RAJA_HOST_DEVICE friend inline random_access_iterator operator+( - random_access_iterator const& lhs, difference_type rhs) + random_access_iterator const& lhs, + difference_type rhs) { random_access_iterator copy = lhs; copy += rhs; @@ -129,7 +125,8 @@ struct random_access_iterator : iterator_base } RAJA_HOST_DEVICE friend inline random_access_iterator operator+( - difference_type lhs, random_access_iterator const& rhs) + difference_type lhs, + random_access_iterator const& rhs) { random_access_iterator copy = rhs; copy += lhs; @@ -137,7 +134,8 @@ struct random_access_iterator : iterator_base } RAJA_HOST_DEVICE friend inline random_access_iterator operator-( - random_access_iterator const& lhs, difference_type rhs) + random_access_iterator const& lhs, + difference_type rhs) { random_access_iterator copy = lhs; copy -= rhs; @@ -145,43 +143,50 @@ struct random_access_iterator : iterator_base } RAJA_HOST_DEVICE friend inline difference_type operator-( - random_access_iterator const& lhs, random_access_iterator const& rhs) + random_access_iterator const& lhs, + random_access_iterator const& rhs) { return static_cast(lhs) - static_cast(rhs); } RAJA_HOST_DEVICE friend inline bool operator==( - random_access_iterator const& lhs, random_access_iterator const& rhs) + random_access_iterator const& lhs, + random_access_iterator const& rhs) { return static_cast(lhs) == static_cast(rhs); } RAJA_HOST_DEVICE friend inline bool operator!=( - random_access_iterator const& lhs, random_access_iterator const& rhs) + random_access_iterator const& lhs, + random_access_iterator const& rhs) { return !(lhs == rhs); } RAJA_HOST_DEVICE friend inline bool operator<( - random_access_iterator const& lhs, random_access_iterator const& rhs) + random_access_iterator const& lhs, + random_access_iterator const& rhs) { return static_cast(lhs) < static_cast(rhs); } RAJA_HOST_DEVICE friend inline bool operator<=( - random_access_iterator const& lhs, random_access_iterator const& rhs) + random_access_iterator const& lhs, + random_access_iterator const& rhs) { return !(rhs < lhs); } RAJA_HOST_DEVICE friend inline bool operator>( - random_access_iterator const& lhs, random_access_iterator const& rhs) + random_access_iterator const& lhs, + random_access_iterator const& rhs) { return rhs < lhs; } RAJA_HOST_DEVICE friend inline bool operator>=( - random_access_iterator const& lhs, random_access_iterator const& rhs) + random_access_iterator const& lhs, + random_access_iterator const& rhs) { return !(lhs < rhs); } @@ -191,10 +196,10 @@ struct random_access_iterator : iterator_base /*! * A storage container for work groups */ -template < typename STORAGE_POLICY_T, typename ALLOCATOR_T, typename Dispatcher_T > +template class WorkStorage; -template < typename ALLOCATOR_T, typename Dispatcher_T > +template class WorkStorage { using allocator_traits_type = std::allocator_traits; @@ -202,25 +207,27 @@ class WorkStorage typename allocator_traits_type::propagate_on_container_copy_assignment; using propagate_on_container_move_assignment = typename allocator_traits_type::propagate_on_container_move_assignment; - using propagate_on_container_swap = + using propagate_on_container_swap = typename allocator_traits_type::propagate_on_container_swap; - static_assert(std::is_same::value, + static_assert( + std::is_same::value, "WorkStorage expects an allocator for 'char's."); + public: - using storage_policy = RAJA::array_of_pointers; + using storage_policy = RAJA::array_of_pointers; using dispatcher_type = Dispatcher_T; - template < typename holder > + template using true_value_type = WorkStruct; - using value_type = GenericWorkStruct; - using allocator_type = ALLOCATOR_T; - using size_type = std::size_t; + using value_type = GenericWorkStruct; + using allocator_type = ALLOCATOR_T; + using size_type = std::size_t; using difference_type = std::ptrdiff_t; - using reference = value_type&; + using reference = value_type&; using const_reference = const value_type&; - using pointer = value_type*; - using const_pointer = const value_type*; + using pointer = value_type*; + using const_pointer = const value_type*; private: // struct used in storage vector to retain pointer and allocation size @@ -231,24 +238,19 @@ class WorkStorage }; public: - - // iterator base class for accessing stored WorkStructs outside of the container + // iterator base class for accessing stored WorkStructs outside of the + // container struct const_iterator_base { - using value_type = const typename WorkStorage::value_type; - using pointer = typename WorkStorage::const_pointer; - using reference = typename WorkStorage::const_reference; - using difference_type = typename WorkStorage::difference_type; + using value_type = const typename WorkStorage::value_type; + using pointer = typename WorkStorage::const_pointer; + using reference = typename WorkStorage::const_reference; + using difference_type = typename WorkStorage::difference_type; using iterator_category = std::random_access_iterator_tag; - const_iterator_base(const pointer_and_size* ptrptr) - : m_ptrptr(ptrptr) - { } + const_iterator_base(const pointer_and_size* ptrptr) : m_ptrptr(ptrptr) {} - RAJA_HOST_DEVICE reference operator*() const - { - return *(m_ptrptr->ptr); - } + RAJA_HOST_DEVICE reference operator*() const { return *(m_ptrptr->ptr); } RAJA_HOST_DEVICE const_iterator_base& operator+=(difference_type n) { @@ -257,19 +259,22 @@ class WorkStorage } RAJA_HOST_DEVICE friend inline difference_type operator-( - const_iterator_base const& lhs_iter, const_iterator_base const& rhs_iter) + const_iterator_base const& lhs_iter, + const_iterator_base const& rhs_iter) { return lhs_iter.m_ptrptr - rhs_iter.m_ptrptr; } RAJA_HOST_DEVICE friend inline bool operator==( - const_iterator_base const& lhs_iter, const_iterator_base const& rhs_iter) + const_iterator_base const& lhs_iter, + const_iterator_base const& rhs_iter) { return lhs_iter.m_ptrptr == rhs_iter.m_ptrptr; } RAJA_HOST_DEVICE friend inline bool operator<( - const_iterator_base const& lhs_iter, const_iterator_base const& rhs_iter) + const_iterator_base const& lhs_iter, + const_iterator_base const& rhs_iter) { return lhs_iter.m_ptrptr < rhs_iter.m_ptrptr; } @@ -280,24 +285,25 @@ class WorkStorage using const_iterator = random_access_iterator; - explicit WorkStorage(allocator_type const& aloc) - : m_vec(0, aloc) - , m_aloc(aloc) - { } + : m_vec(0, aloc), + m_aloc(aloc) + {} - WorkStorage(WorkStorage const&) = delete; + WorkStorage(WorkStorage const&) = delete; WorkStorage& operator=(WorkStorage const&) = delete; WorkStorage(WorkStorage&& rhs) - : m_vec(std::move(rhs.m_vec)) - , m_aloc(std::move(rhs.m_aloc)) - { } + : m_vec(std::move(rhs.m_vec)), + m_aloc(std::move(rhs.m_aloc)) + {} WorkStorage& operator=(WorkStorage&& rhs) { - if (this != &rhs) { - move_assign_private(std::move(rhs), propagate_on_container_move_assignment{}); + if (this != &rhs) + { + move_assign_private(std::move(rhs), + propagate_on_container_move_assignment {}); } return *this; } @@ -312,33 +318,26 @@ class WorkStorage } // number of loops stored - size_type size() const - { - return m_vec.size(); - } + size_type size() const { return m_vec.size(); } - const_iterator begin() const - { - return const_iterator(m_vec.begin()); - } + const_iterator begin() const { return const_iterator(m_vec.begin()); } - const_iterator end() const - { - return const_iterator(m_vec.end()); - } + const_iterator end() const { return const_iterator(m_vec.end()); } // number of bytes used for storage of loops size_type storage_size() const { size_type storage_size_nbytes = 0; - for (size_t i = 0; i < m_vec.size(); ++i) { + for (size_t i = 0; i < m_vec.size(); ++i) + { storage_size_nbytes += m_vec[i].size; } return storage_size_nbytes; } - template < typename holder, typename ... holder_ctor_args > - void emplace(const dispatcher_type* dispatcher, holder_ctor_args&&... ctor_args) + template + void emplace(const dispatcher_type* dispatcher, + holder_ctor_args&&... ctor_args) { m_vec.emplace_back(create_value( dispatcher, std::forward(ctor_args)...)); @@ -347,27 +346,28 @@ class WorkStorage // destroy all stored loops, deallocates all storage void clear() { - while (!m_vec.empty()) { + while (!m_vec.empty()) + { destroy_value(m_vec.back()); m_vec.pop_back(); } m_vec.shrink_to_fit(); } - ~WorkStorage() - { - clear(); - } + ~WorkStorage() { clear(); } private: - RAJAVec> m_vec; + RAJAVec< + pointer_and_size, + typename allocator_traits_type::template rebind_alloc> + m_vec; allocator_type m_aloc; // move assignment if allocator propagates on move assignment void move_assign_private(WorkStorage&& rhs, std::true_type) { clear(); - m_vec = std::move(rhs.m_vec); + m_vec = std::move(rhs.m_vec); m_aloc = std::move(rhs.m_aloc); } @@ -375,12 +375,16 @@ class WorkStorage void move_assign_private(WorkStorage&& rhs, std::false_type) { clear(); - if (m_aloc == rhs.m_aloc) { + if (m_aloc == rhs.m_aloc) + { // take storage if allocators compare equal m_vec = std::move(rhs.m_vec); - } else { + } + else + { // allocate new storage if allocators do not compare equal - for (size_type i = 0; i < rhs.m_vec.size(); ++i) { + for (size_type i = 0; i < rhs.m_vec.size(); ++i) + { m_vec.emplace_back(move_destroy_value(std::move(rhs), rhs.m_vec[i])); } rhs.m_vec.clear(); @@ -389,7 +393,7 @@ class WorkStorage } // allocate and construct value in storage - template < typename holder, typename ... holder_ctor_args > + template pointer_and_size create_value(const dispatcher_type* dispatcher, holder_ctor_args&&... ctor_args) { @@ -401,7 +405,7 @@ class WorkStorage value_type::template construct( value_ptr, dispatcher, std::forward(ctor_args)...); - return pointer_and_size{value_ptr, value_size}; + return pointer_and_size {value_ptr, value_size}; } // allocate and move construct object as copy of other value and @@ -414,22 +418,24 @@ class WorkStorage value_type::move_destroy(value_ptr, other_value_and_size.ptr); - allocator_traits_type::deallocate(rhs.m_aloc, - reinterpret_cast(other_value_and_size.ptr), other_value_and_size.size); + allocator_traits_type::deallocate( + rhs.m_aloc, reinterpret_cast(other_value_and_size.ptr), + other_value_and_size.size); - return pointer_and_size{value_ptr, other_value_and_size.size}; + return pointer_and_size {value_ptr, other_value_and_size.size}; } // destroy and deallocate value void destroy_value(pointer_and_size value_and_size_ptr) { value_type::destroy(value_and_size_ptr.ptr); - allocator_traits_type::deallocate(m_aloc, - reinterpret_cast(value_and_size_ptr.ptr), value_and_size_ptr.size); + allocator_traits_type::deallocate( + m_aloc, reinterpret_cast(value_and_size_ptr.ptr), + value_and_size_ptr.size); } }; -template < typename ALLOCATOR_T, typename Dispatcher_T > +template class WorkStorage { using allocator_traits_type = std::allocator_traits; @@ -437,44 +443,46 @@ class WorkStorage typename allocator_traits_type::propagate_on_container_copy_assignment; using propagate_on_container_move_assignment = typename allocator_traits_type::propagate_on_container_move_assignment; - using propagate_on_container_swap = + using propagate_on_container_swap = typename allocator_traits_type::propagate_on_container_swap; - static_assert(std::is_same::value, + static_assert( + std::is_same::value, "WorkStorage expects an allocator for 'char's."); + public: - using storage_policy = RAJA::ragged_array_of_objects; + using storage_policy = RAJA::ragged_array_of_objects; using dispatcher_type = Dispatcher_T; - template < typename holder > + template using true_value_type = WorkStruct; - using value_type = GenericWorkStruct; - using allocator_type = ALLOCATOR_T; - using size_type = std::size_t; + using value_type = GenericWorkStruct; + using allocator_type = ALLOCATOR_T; + using size_type = std::size_t; using difference_type = std::ptrdiff_t; - using reference = value_type&; + using reference = value_type&; using const_reference = const value_type&; - using pointer = value_type*; - using const_pointer = const value_type*; + using pointer = value_type*; + using const_pointer = const value_type*; - // iterator base class for accessing stored WorkStructs outside of the container + // iterator base class for accessing stored WorkStructs outside of the + // container struct const_iterator_base { - using value_type = const typename WorkStorage::value_type; - using pointer = typename WorkStorage::const_pointer; - using reference = typename WorkStorage::const_reference; - using difference_type = typename WorkStorage::difference_type; + using value_type = const typename WorkStorage::value_type; + using pointer = typename WorkStorage::const_pointer; + using reference = typename WorkStorage::const_reference; + using difference_type = typename WorkStorage::difference_type; using iterator_category = std::random_access_iterator_tag; const_iterator_base(const char* array_begin, const size_type* offset_iter) - : m_array_begin(array_begin) - , m_offset_iter(offset_iter) - { } + : m_array_begin(array_begin), + m_offset_iter(offset_iter) + {} RAJA_HOST_DEVICE reference operator*() const { - return *reinterpret_cast( - m_array_begin + *m_offset_iter); + return *reinterpret_cast(m_array_begin + *m_offset_iter); } RAJA_HOST_DEVICE const_iterator_base& operator+=(difference_type n) @@ -484,19 +492,22 @@ class WorkStorage } RAJA_HOST_DEVICE friend inline difference_type operator-( - const_iterator_base const& lhs_iter, const_iterator_base const& rhs_iter) + const_iterator_base const& lhs_iter, + const_iterator_base const& rhs_iter) { return lhs_iter.m_offset_iter - rhs_iter.m_offset_iter; } RAJA_HOST_DEVICE friend inline bool operator==( - const_iterator_base const& lhs_iter, const_iterator_base const& rhs_iter) + const_iterator_base const& lhs_iter, + const_iterator_base const& rhs_iter) { return lhs_iter.m_offset_iter == rhs_iter.m_offset_iter; } RAJA_HOST_DEVICE friend inline bool operator<( - const_iterator_base const& lhs_iter, const_iterator_base const& rhs_iter) + const_iterator_base const& lhs_iter, + const_iterator_base const& rhs_iter) { return lhs_iter.m_offset_iter < rhs_iter.m_offset_iter; } @@ -508,31 +519,32 @@ class WorkStorage using const_iterator = random_access_iterator; - explicit WorkStorage(allocator_type const& aloc) - : m_offsets(0, aloc) - , m_aloc(aloc) - { } + : m_offsets(0, aloc), + m_aloc(aloc) + {} - WorkStorage(WorkStorage const&) = delete; + WorkStorage(WorkStorage const&) = delete; WorkStorage& operator=(WorkStorage const&) = delete; WorkStorage(WorkStorage&& rhs) - : m_offsets(std::move(rhs.m_offsets)) - , m_array_begin(rhs.m_array_begin) - , m_array_end(rhs.m_array_end) - , m_array_cap(rhs.m_array_cap) - , m_aloc(std::move(rhs.m_aloc)) + : m_offsets(std::move(rhs.m_offsets)), + m_array_begin(rhs.m_array_begin), + m_array_end(rhs.m_array_end), + m_array_cap(rhs.m_array_cap), + m_aloc(std::move(rhs.m_aloc)) { rhs.m_array_begin = nullptr; - rhs.m_array_end = nullptr; - rhs.m_array_cap = nullptr; + rhs.m_array_end = nullptr; + rhs.m_array_cap = nullptr; } WorkStorage& operator=(WorkStorage&& rhs) { - if (this != &rhs) { - move_assign_private(std::move(rhs), propagate_on_container_move_assignment{}); + if (this != &rhs) + { + move_assign_private(std::move(rhs), + propagate_on_container_move_assignment {}); } return *this; } @@ -546,10 +558,7 @@ class WorkStorage } // number of loops stored - size_type size() const - { - return m_offsets.size(); - } + size_type size() const { return m_offsets.size(); } const_iterator begin() const { @@ -562,17 +571,15 @@ class WorkStorage } // number of bytes used for storage of loops - size_type storage_size() const - { - return m_array_end - m_array_begin; - } + size_type storage_size() const { return m_array_end - m_array_begin; } - template < typename holder, typename ... holder_ctor_args > - void emplace(const dispatcher_type* dispatcher, holder_ctor_args&&... ctor_args) + template + void emplace(const dispatcher_type* dispatcher, + holder_ctor_args&&... ctor_args) { size_type value_offset = storage_size(); - size_type value_size = create_value(value_offset, - dispatcher, std::forward(ctor_args)...); + size_type value_size = create_value( + value_offset, dispatcher, std::forward(ctor_args)...); m_offsets.emplace_back(value_offset); m_array_end += value_size; } @@ -581,21 +588,22 @@ class WorkStorage void clear() { array_clear(); - if (m_array_begin != nullptr) { - allocator_traits_type::deallocate(m_aloc, m_array_begin, storage_capacity()); + if (m_array_begin != nullptr) + { + allocator_traits_type::deallocate(m_aloc, m_array_begin, + storage_capacity()); m_array_begin = nullptr; m_array_end = nullptr; m_array_cap = nullptr; } } - ~WorkStorage() - { - clear(); - } + ~WorkStorage() { clear(); } private: - RAJAVec> m_offsets; + RAJAVec> + m_offsets; char* m_array_begin = nullptr; char* m_array_end = nullptr; char* m_array_cap = nullptr; @@ -608,8 +616,8 @@ class WorkStorage m_offsets = std::move(rhs.m_offsets); m_array_begin = rhs.m_array_begin; - m_array_end = rhs.m_array_end ; - m_array_cap = rhs.m_array_cap ; + m_array_end = rhs.m_array_end; + m_array_cap = rhs.m_array_cap; m_aloc = std::move(rhs.m_aloc); rhs.m_array_begin = nullptr; @@ -621,25 +629,29 @@ class WorkStorage void move_assign_private(WorkStorage&& rhs, std::false_type) { clear(); - if (m_aloc == rhs.m_aloc) { + if (m_aloc == rhs.m_aloc) + { m_offsets = std::move(rhs.m_offsets); m_array_begin = rhs.m_array_begin; - m_array_end = rhs.m_array_end ; - m_array_cap = rhs.m_array_cap ; + m_array_end = rhs.m_array_end; + m_array_cap = rhs.m_array_cap; rhs.m_array_begin = nullptr; rhs.m_array_end = nullptr; rhs.m_array_cap = nullptr; - } else { + } + else + { array_reserve(rhs.storage_size()); - for (size_type i = 0; i < rhs.size(); ++i) { + for (size_type i = 0; i < rhs.size(); ++i) + { m_array_end = m_array_begin + rhs.m_offsets[i]; move_destroy_value(m_array_end, rhs.m_array_begin + rhs.m_offsets[i]); m_offsets.emplace_back(rhs.m_offsets[i]); } - m_array_end = m_array_begin + rhs.storage_size(); + m_array_end = m_array_begin + rhs.storage_size(); rhs.m_array_end = rhs.m_array_begin; rhs.m_offsets.clear(); rhs.clear(); @@ -647,46 +659,45 @@ class WorkStorage } // get loop storage capacity, used and unused in bytes - size_type storage_capacity() const - { - return m_array_cap - m_array_begin; - } + size_type storage_capacity() const { return m_array_cap - m_array_begin; } // get unused loop storage capacity in bytes - size_type storage_unused() const - { - return m_array_cap - m_array_end; - } + size_type storage_unused() const { return m_array_cap - m_array_end; } // reserve space for loop_storage_size bytes of loop storage void array_reserve(size_type loop_storage_size) { - if (loop_storage_size > storage_capacity()) { + if (loop_storage_size > storage_capacity()) + { char* new_array_begin = allocator_traits_type::allocate(m_aloc, loop_storage_size); - char* new_array_end = new_array_begin + storage_size(); - char* new_array_cap = new_array_begin + loop_storage_size; + char* new_array_end = new_array_begin + storage_size(); + char* new_array_cap = new_array_begin + loop_storage_size; - for (size_type i = 0; i < size(); ++i) { + for (size_type i = 0; i < size(); ++i) + { move_destroy_value(new_array_begin + m_offsets[i], - m_array_begin + m_offsets[i]); + m_array_begin + m_offsets[i]); } - if (m_array_begin != nullptr) { - allocator_traits_type::deallocate(m_aloc, m_array_begin, storage_capacity()); + if (m_array_begin != nullptr) + { + allocator_traits_type::deallocate(m_aloc, m_array_begin, + storage_capacity()); } m_array_begin = new_array_begin; - m_array_end = new_array_end ; - m_array_cap = new_array_cap ; + m_array_end = new_array_end; + m_array_cap = new_array_cap; } } // destroy loop objects (does not deallocate array storage) void array_clear() { - while (!m_offsets.empty()) { + while (!m_offsets.empty()) + { destroy_value(m_offsets.back()); m_array_end = m_array_begin + m_offsets.back(); m_offsets.pop_back(); @@ -696,15 +707,17 @@ class WorkStorage // ensure there is enough storage to hold the next loop body at value offset // and store the loop body - template < typename holder, typename ... holder_ctor_args > + template size_type create_value(size_type value_offset, const dispatcher_type* dispatcher, holder_ctor_args&&... ctor_args) { const size_type value_size = sizeof(true_value_type); - if (value_size > storage_unused()) { - array_reserve(std::max(storage_size() + value_size, 2*storage_capacity())); + if (value_size > storage_unused()) + { + array_reserve( + std::max(storage_size() + value_size, 2 * storage_capacity())); } pointer value_ptr = reinterpret_cast(m_array_begin + value_offset); @@ -726,13 +739,12 @@ class WorkStorage // destroy the loop body at value offset void destroy_value(size_type value_offset) { - pointer value_ptr = - reinterpret_cast(m_array_begin + value_offset); + pointer value_ptr = reinterpret_cast(m_array_begin + value_offset); value_type::destroy(value_ptr); } }; -template < typename ALLOCATOR_T, typename Dispatcher_T > +template class WorkStorage @@ -742,39 +754,42 @@ class WorkStorage::value, + static_assert( + std::is_same::value, "WorkStorage expects an allocator for 'char's."); + public: - using storage_policy = RAJA::constant_stride_array_of_objects; + using storage_policy = RAJA::constant_stride_array_of_objects; using dispatcher_type = Dispatcher_T; - template < typename holder > + template using true_value_type = WorkStruct; - using value_type = GenericWorkStruct; - using allocator_type = ALLOCATOR_T; - using size_type = std::size_t; + using value_type = GenericWorkStruct; + using allocator_type = ALLOCATOR_T; + using size_type = std::size_t; using difference_type = std::ptrdiff_t; - using reference = value_type&; + using reference = value_type&; using const_reference = const value_type&; - using pointer = value_type*; - using const_pointer = const value_type*; + using pointer = value_type*; + using const_pointer = const value_type*; - // iterator base class for accessing stored WorkStructs outside of the container + // iterator base class for accessing stored WorkStructs outside of the + // container struct const_iterator_base { - using value_type = const typename WorkStorage::value_type; - using pointer = typename WorkStorage::const_pointer; - using reference = typename WorkStorage::const_reference; - using difference_type = typename WorkStorage::difference_type; + using value_type = const typename WorkStorage::value_type; + using pointer = typename WorkStorage::const_pointer; + using reference = typename WorkStorage::const_reference; + using difference_type = typename WorkStorage::difference_type; using iterator_category = std::random_access_iterator_tag; const_iterator_base(const char* array_pos, size_type stride) - : m_array_pos(array_pos) - , m_stride(stride) - { } + : m_array_pos(array_pos), + m_stride(stride) + {} RAJA_HOST_DEVICE reference operator*() const { @@ -788,19 +803,22 @@ class WorkStorage; + explicit WorkStorage(allocator_type const& aloc) : m_aloc(aloc) {} - explicit WorkStorage(allocator_type const& aloc) - : m_aloc(aloc) - { } - - WorkStorage(WorkStorage const&) = delete; + WorkStorage(WorkStorage const&) = delete; WorkStorage& operator=(WorkStorage const&) = delete; WorkStorage(WorkStorage&& rhs) - : m_aloc(std::move(rhs.m_aloc)) - , m_stride(rhs.m_stride) - , m_array_begin(rhs.m_array_begin) - , m_array_end(rhs.m_array_end) - , m_array_cap(rhs.m_array_cap) + : m_aloc(std::move(rhs.m_aloc)), + m_stride(rhs.m_stride), + m_array_begin(rhs.m_array_begin), + m_array_end(rhs.m_array_end), + m_array_cap(rhs.m_array_cap) { // do not reset stride, leave it for reuse rhs.m_array_begin = nullptr; @@ -835,8 +850,10 @@ class WorkStorage - void emplace(const dispatcher_type* dispatcher, holder_ctor_args&&... ctor_args) + template + void emplace(const dispatcher_type* dispatcher, + holder_ctor_args&&... ctor_args) { - create_value(dispatcher, std::forward(ctor_args)...); + create_value(dispatcher, + std::forward(ctor_args)...); m_array_end += m_stride; } @@ -883,22 +893,21 @@ class WorkStorage storage_capacity() || new_stride > m_stride) { + if (loop_storage_size > storage_capacity() || new_stride > m_stride) + { char* new_array_begin = allocator_traits_type::allocate(m_aloc, loop_storage_size); - char* new_array_end = new_array_begin + size() * new_stride; - char* new_array_cap = new_array_begin + loop_storage_size; + char* new_array_end = new_array_begin + size() * new_stride; + char* new_array_cap = new_array_begin + loop_storage_size; - for (size_type i = 0; i < size(); ++i) { + for (size_type i = 0; i < size(); ++i) + { move_destroy_value(new_array_begin + i * new_stride, - m_array_begin + i * m_stride); + m_array_begin + i * m_stride); } - if (m_array_begin != nullptr) { - allocator_traits_type::deallocate(m_aloc, m_array_begin, storage_capacity()); + if (m_array_begin != nullptr) + { + allocator_traits_type::deallocate(m_aloc, m_array_begin, + storage_capacity()); } - m_stride = new_stride ; + m_stride = new_stride; m_array_begin = new_array_begin; - m_array_end = new_array_end ; - m_array_cap = new_array_cap ; + m_array_end = new_array_end; + m_array_cap = new_array_cap; } } // destroy the loops in storage (does not deallocate loop storage) void array_clear() { - for (size_type value_offset = storage_size(); value_offset > 0; value_offset -= m_stride) { + for (size_type value_offset = storage_size(); value_offset > 0; + value_offset -= m_stride) + { destroy_value(value_offset - m_stride); m_array_end -= m_stride; } @@ -1002,18 +1015,20 @@ class WorkStorage + template void create_value(const dispatcher_type* dispatcher, holder_ctor_args&&... ctor_args) { const size_type value_size = sizeof(true_value_type); - if (value_size > storage_unused() && value_size <= m_stride) { - array_reserve(std::max(storage_size() + m_stride, 2*storage_capacity()), + if (value_size > storage_unused() && value_size <= m_stride) + { + array_reserve(std::max(storage_size() + m_stride, 2 * storage_capacity()), m_stride); - } else if (value_size > m_stride) { - array_reserve((size()+1)*value_size, - value_size); + } + else if (value_size > m_stride) + { + array_reserve((size() + 1) * value_size, value_size); } size_type value_offset = storage_size(); @@ -1025,8 +1040,7 @@ class WorkStorage(value_ptr), reinterpret_cast(other_value_ptr)); @@ -1035,8 +1049,7 @@ class WorkStorage(m_array_begin + value_offset); + pointer value_ptr = reinterpret_cast(m_array_begin + value_offset); value_type::destroy(value_ptr); } }; diff --git a/include/RAJA/pattern/WorkGroup/WorkStruct.hpp b/include/RAJA/pattern/WorkGroup/WorkStruct.hpp index 72e1540c54..0c799efd18 100644 --- a/include/RAJA/pattern/WorkGroup/WorkStruct.hpp +++ b/include/RAJA/pattern/WorkGroup/WorkStruct.hpp @@ -25,7 +25,6 @@ #include "RAJA/pattern/WorkGroup/Dispatcher.hpp" - namespace RAJA { @@ -35,7 +34,7 @@ namespace detail /*! * A struct that gives a generic way to layout memory for different loops */ -template < size_t size, typename Dispatcher_T > +template struct WorkStruct; /*! @@ -44,67 +43,76 @@ struct WorkStruct; * offsetof(GenericWorkStruct<>, obj) == offsetof(WorkStruct, obj) * sizeof(GenericWorkStruct) <= sizeof(WorkStruct) */ -template < typename Dispatcher_T > +template using GenericWorkStruct = WorkStruct; -template < size_t size, Platform platform, typename dispatch_policy, typename DispatcherID, typename ... CallArgs > -struct WorkStruct> +template +struct WorkStruct< + size, + Dispatcher> { - using dispatcher_type = Dispatcher; + using dispatcher_type = + Dispatcher; // construct a WorkStruct with a value of type holder from the args and // check a variety of constraints at compile time - template < typename holder, typename ... holder_ctor_args > - static RAJA_INLINE - void construct(void* ptr, const dispatcher_type* dispatcher, holder_ctor_args&&... ctor_args) + template + static RAJA_INLINE void construct(void* ptr, + const dispatcher_type* dispatcher, + holder_ctor_args&&... ctor_args) { using true_value_type = WorkStruct; - using value_type = GenericWorkStruct; + using value_type = GenericWorkStruct; static_assert(sizeof(holder) <= sizeof(true_value_type::obj), - "holder must fit in WorkStruct::obj"); + "holder must fit in WorkStruct::obj"); static_assert(std::is_standard_layout::value, - "WorkStruct must be a standard layout type"); + "WorkStruct must be a standard layout type"); static_assert(std::is_standard_layout::value, - "GenericWorkStruct must be a standard layout type"); - static_assert(offsetof(value_type, obj) == offsetof(true_value_type, obj), + "GenericWorkStruct must be a standard layout type"); + static_assert( + offsetof(value_type, obj) == offsetof(true_value_type, obj), "WorkStruct and GenericWorkStruct must have obj at the same offset"); static_assert(sizeof(value_type) <= sizeof(true_value_type), - "WorkStruct must not be smaller than GenericWorkStruct"); + "WorkStruct must not be smaller than GenericWorkStruct"); true_value_type* value_ptr = static_cast(ptr); value_ptr->dispatcher = dispatcher; - value_ptr->invoke = dispatcher->invoke; - new(&value_ptr->obj) holder(std::forward(ctor_args)...); + value_ptr->invoke = dispatcher->invoke; + new (&value_ptr->obj) holder(std::forward(ctor_args)...); } // move construct in dst from the value in src and destroy the value in src - static RAJA_INLINE - void move_destroy(WorkStruct* value_dst, - WorkStruct* value_src) + static RAJA_INLINE void move_destroy(WorkStruct* value_dst, + WorkStruct* value_src) { value_dst->dispatcher = value_src->dispatcher; - value_dst->invoke = value_src->invoke; - value_dst->dispatcher->move_construct_destroy(&value_dst->obj, &value_src->obj); + value_dst->invoke = value_src->invoke; + value_dst->dispatcher->move_construct_destroy(&value_dst->obj, + &value_src->obj); } // destroy the value ptr - static RAJA_INLINE - void destroy(WorkStruct* value_ptr) + static RAJA_INLINE void destroy(WorkStruct* value_ptr) { value_ptr->dispatcher->destroy(&value_ptr->obj); } // invoke the call operator of the value ptr with args - static RAJA_INLINE - void host_call(const WorkStruct* value_ptr, CallArgs... args) + static RAJA_INLINE void host_call(const WorkStruct* value_ptr, + CallArgs... args) { value_ptr->invoke(&value_ptr->obj, std::forward(args)...); } + /// // invoke the call operator of the value ptr with args - static RAJA_DEVICE RAJA_INLINE - void device_call(const WorkStruct* value_ptr, CallArgs... args) + static RAJA_DEVICE RAJA_INLINE void device_call(const WorkStruct* value_ptr, + CallArgs... args) { value_ptr->invoke(&value_ptr->obj, std::forward(args)...); } diff --git a/include/RAJA/pattern/atomic.hpp b/include/RAJA/pattern/atomic.hpp index d5905f7928..846a45fc94 100644 --- a/include/RAJA/pattern/atomic.hpp +++ b/include/RAJA/pattern/atomic.hpp @@ -86,26 +86,24 @@ namespace RAJA * @return Value at acc */ RAJA_SUPPRESS_HD_WARN -template -RAJA_INLINE RAJA_HOST_DEVICE T atomicLoad(T *acc) +template +RAJA_INLINE RAJA_HOST_DEVICE T atomicLoad(T* acc) { - return RAJA::atomicLoad(Policy{}, acc); + return RAJA::atomicLoad(Policy {}, acc); } - /*! * @brief Atomic store * @param acc Pointer to location of value * @param value Value to store at *acc */ RAJA_SUPPRESS_HD_WARN -template -RAJA_INLINE RAJA_HOST_DEVICE void atomicStore(T *acc, T value) +template +RAJA_INLINE RAJA_HOST_DEVICE void atomicStore(T* acc, T value) { - RAJA::atomicStore(Policy{}, acc, value); + RAJA::atomicStore(Policy {}, acc, value); } - /*! * @brief Atomic add * @param acc Pointer to location of result value @@ -113,13 +111,12 @@ RAJA_INLINE RAJA_HOST_DEVICE void atomicStore(T *acc, T value) * @return Returns value at acc immediately before this operation completed */ RAJA_SUPPRESS_HD_WARN -template -RAJA_INLINE RAJA_HOST_DEVICE T atomicAdd(T *acc, T value) +template +RAJA_INLINE RAJA_HOST_DEVICE T atomicAdd(T* acc, T value) { - return RAJA::atomicAdd(Policy{}, acc, value); + return RAJA::atomicAdd(Policy {}, acc, value); } - /*! * @brief Atomic subtract * @param acc Pointer to location of result value @@ -127,13 +124,12 @@ RAJA_INLINE RAJA_HOST_DEVICE T atomicAdd(T *acc, T value) * @return Returns value at acc immediately before this operation completed */ RAJA_SUPPRESS_HD_WARN -template -RAJA_INLINE RAJA_HOST_DEVICE T atomicSub(T *acc, T value) +template +RAJA_INLINE RAJA_HOST_DEVICE T atomicSub(T* acc, T value) { - return RAJA::atomicSub(Policy{}, acc, value); + return RAJA::atomicSub(Policy {}, acc, value); } - /*! * @brief Atomic minimum equivalent to (*acc) = std::min(*acc, value) * @param acc Pointer to location of result value @@ -141,13 +137,12 @@ RAJA_INLINE RAJA_HOST_DEVICE T atomicSub(T *acc, T value) * @return Returns value at acc immediately before this operation completed */ RAJA_SUPPRESS_HD_WARN -template -RAJA_INLINE RAJA_HOST_DEVICE T atomicMin(T *acc, T value) +template +RAJA_INLINE RAJA_HOST_DEVICE T atomicMin(T* acc, T value) { - return RAJA::atomicMin(Policy{}, acc, value); + return RAJA::atomicMin(Policy {}, acc, value); } - /*! * @brief Atomic maximum equivalent to (*acc) = std::max(*acc, value) * @param acc Pointer to location of result value @@ -155,26 +150,24 @@ RAJA_INLINE RAJA_HOST_DEVICE T atomicMin(T *acc, T value) * @return Returns value at acc immediately before this operation completed */ RAJA_SUPPRESS_HD_WARN -template -RAJA_INLINE RAJA_HOST_DEVICE T atomicMax(T *acc, T value) +template +RAJA_INLINE RAJA_HOST_DEVICE T atomicMax(T* acc, T value) { - return RAJA::atomicMax(Policy{}, acc, value); + return RAJA::atomicMax(Policy {}, acc, value); } - /*! * @brief Atomic increment * @param acc Pointer to location of value to increment * @return Returns value at acc immediately before this operation completed */ RAJA_SUPPRESS_HD_WARN -template -RAJA_INLINE RAJA_HOST_DEVICE T atomicInc(T *acc) +template +RAJA_INLINE RAJA_HOST_DEVICE T atomicInc(T* acc) { - return RAJA::atomicInc(Policy{}, acc); + return RAJA::atomicInc(Policy {}, acc); } - /*! * @brief Atomic increment with bound * Equivalent to *acc = ((*acc >= compare) ? 0 : ((*acc)+1)) @@ -184,26 +177,24 @@ RAJA_INLINE RAJA_HOST_DEVICE T atomicInc(T *acc) * @return Returns value at acc immediately before this operation completed */ RAJA_SUPPRESS_HD_WARN -template -RAJA_INLINE RAJA_HOST_DEVICE T atomicInc(T *acc, T compare) +template +RAJA_INLINE RAJA_HOST_DEVICE T atomicInc(T* acc, T compare) { - return RAJA::atomicInc(Policy{}, acc, compare); + return RAJA::atomicInc(Policy {}, acc, compare); } - /*! * @brief Atomic decrement * @param acc Pointer to location of value to decrement * @return Returns value at acc immediately before this operation completed */ RAJA_SUPPRESS_HD_WARN -template -RAJA_INLINE RAJA_HOST_DEVICE T atomicDec(T *acc) +template +RAJA_INLINE RAJA_HOST_DEVICE T atomicDec(T* acc) { - return RAJA::atomicDec(Policy{}, acc); + return RAJA::atomicDec(Policy {}, acc); } - /*! * @brief Atomic decrement with bound * Equivalent to *acc = (((*acc==0)|(*acc>compare))?compare:((*acc)-1)) @@ -213,13 +204,12 @@ RAJA_INLINE RAJA_HOST_DEVICE T atomicDec(T *acc) * @return Returns value at acc immediately before this operation completed */ RAJA_SUPPRESS_HD_WARN -template -RAJA_INLINE RAJA_HOST_DEVICE T atomicDec(T *acc, T compare) +template +RAJA_INLINE RAJA_HOST_DEVICE T atomicDec(T* acc, T compare) { - return RAJA::atomicDec(Policy{}, acc, compare); + return RAJA::atomicDec(Policy {}, acc, compare); } - /*! * @brief Atomic bitwise AND equivalent to (*acc) = (*acc) & value * This only works with integral data types @@ -228,15 +218,14 @@ RAJA_INLINE RAJA_HOST_DEVICE T atomicDec(T *acc, T compare) * @return Returns value at acc immediately before this operation completed */ RAJA_SUPPRESS_HD_WARN -template -RAJA_INLINE RAJA_HOST_DEVICE T atomicAnd(T *acc, T value) +template +RAJA_INLINE RAJA_HOST_DEVICE T atomicAnd(T* acc, T value) { static_assert(std::is_integral::value, "atomicAnd can only be used on integral types"); - return RAJA::atomicAnd(Policy{}, acc, value); + return RAJA::atomicAnd(Policy {}, acc, value); } - /*! * @brief Atomic bitwise OR equivalent to (*acc) = (*acc) | value * This only works with integral data types @@ -245,15 +234,14 @@ RAJA_INLINE RAJA_HOST_DEVICE T atomicAnd(T *acc, T value) * @return Returns value at acc immediately before this operation completed */ RAJA_SUPPRESS_HD_WARN -template -RAJA_INLINE RAJA_HOST_DEVICE T atomicOr(T *acc, T value) +template +RAJA_INLINE RAJA_HOST_DEVICE T atomicOr(T* acc, T value) { static_assert(std::is_integral::value, "atomicOr can only be used on integral types"); - return RAJA::atomicOr(Policy{}, acc, value); + return RAJA::atomicOr(Policy {}, acc, value); } - /*! * @brief Atomic bitwise XOR equivalent to (*acc) = (*acc) ^ value * This only works with integral data types @@ -262,15 +250,14 @@ RAJA_INLINE RAJA_HOST_DEVICE T atomicOr(T *acc, T value) * @return Returns value at acc immediately before this operation completed */ RAJA_SUPPRESS_HD_WARN -template -RAJA_INLINE RAJA_HOST_DEVICE T atomicXor(T *acc, T value) +template +RAJA_INLINE RAJA_HOST_DEVICE T atomicXor(T* acc, T value) { static_assert(std::is_integral::value, "atomicXor can only be used on integral types"); - return RAJA::atomicXor(Policy{}, acc, value); + return RAJA::atomicXor(Policy {}, acc, value); } - /*! * @brief Atomic value exchange * @param acc Pointer to location to store value @@ -278,13 +265,12 @@ RAJA_INLINE RAJA_HOST_DEVICE T atomicXor(T *acc, T value) * @return Returns value at *acc immediately before this operation completed */ RAJA_SUPPRESS_HD_WARN -template -RAJA_INLINE RAJA_HOST_DEVICE T atomicExchange(T *acc, T value) +template +RAJA_INLINE RAJA_HOST_DEVICE T atomicExchange(T* acc, T value) { - return RAJA::atomicExchange(Policy{}, acc, value); + return RAJA::atomicExchange(Policy {}, acc, value); } - /*! * @brief Atomic compare and swap * @param acc Pointer to location to store value @@ -294,10 +280,10 @@ RAJA_INLINE RAJA_HOST_DEVICE T atomicExchange(T *acc, T value) */ RAJA_SUPPRESS_HD_WARN -template -RAJA_INLINE RAJA_HOST_DEVICE T atomicCAS(T *acc, T compare, T value) +template +RAJA_INLINE RAJA_HOST_DEVICE T atomicCAS(T* acc, T compare, T value) { - return RAJA::atomicCAS(Policy{}, acc, compare, value); + return RAJA::atomicCAS(Policy {}, acc, compare, value); } /*! @@ -309,32 +295,32 @@ RAJA_INLINE RAJA_HOST_DEVICE T atomicCAS(T *acc, T compare, T value) * This object provides an OO interface to the global function calls provided * as RAJA::atomicXXX */ -template +template class AtomicRef { public: using value_type = T; RAJA_INLINE + RAJA_HOST_DEVICE - constexpr explicit AtomicRef(value_type *value_ptr) - : m_value_ptr(value_ptr) {} + constexpr explicit AtomicRef(value_type* value_ptr) : m_value_ptr(value_ptr) + {} RAJA_INLINE + RAJA_HOST_DEVICE - constexpr AtomicRef(AtomicRef const &c) - : m_value_ptr(c.m_value_ptr) {} + constexpr AtomicRef(AtomicRef const& c) : m_value_ptr(c.m_value_ptr) {} AtomicRef& operator=(AtomicRef const&) = delete; RAJA_INLINE + RAJA_HOST_DEVICE - value_type * getPointer() const - { - return m_value_ptr; - } + value_type* getPointer() const { return m_value_ptr; } RAJA_INLINE + RAJA_HOST_DEVICE void store(value_type rhs) const { @@ -342,6 +328,7 @@ class AtomicRef } RAJA_INLINE + RAJA_HOST_DEVICE value_type operator=(value_type rhs) const { @@ -350,20 +337,17 @@ class AtomicRef } RAJA_INLINE + RAJA_HOST_DEVICE - value_type load() const - { - return RAJA::atomicLoad(m_value_ptr); - } + value_type load() const { return RAJA::atomicLoad(m_value_ptr); } RAJA_INLINE + RAJA_HOST_DEVICE - operator value_type() const - { - return RAJA::atomicLoad(m_value_ptr); - } + operator value_type() const { return RAJA::atomicLoad(m_value_ptr); } RAJA_INLINE + RAJA_HOST_DEVICE value_type exchange(value_type rhs) const { @@ -371,6 +355,7 @@ class AtomicRef } RAJA_INLINE + RAJA_HOST_DEVICE value_type CAS(value_type compare, value_type rhs) const { @@ -378,20 +363,25 @@ class AtomicRef } RAJA_INLINE + RAJA_HOST_DEVICE bool compare_exchange_strong(value_type& expect, value_type rhs) const { value_type compare = expect; - value_type old = RAJA::atomicCAS(m_value_ptr, compare, rhs); - if (compare == old) { + value_type old = RAJA::atomicCAS(m_value_ptr, compare, rhs); + if (compare == old) + { return true; - } else { + } + else + { expect = old; return false; } } RAJA_INLINE + RAJA_HOST_DEVICE bool compare_exchange_weak(value_type& expect, value_type rhs) const { @@ -399,6 +389,7 @@ class AtomicRef } RAJA_INLINE + RAJA_HOST_DEVICE value_type operator++() const { @@ -406,6 +397,7 @@ class AtomicRef } RAJA_INLINE + RAJA_HOST_DEVICE value_type operator++(int) const { @@ -413,6 +405,7 @@ class AtomicRef } RAJA_INLINE + RAJA_HOST_DEVICE value_type operator--() const { @@ -420,6 +413,7 @@ class AtomicRef } RAJA_INLINE + RAJA_HOST_DEVICE value_type operator--(int) const { @@ -427,6 +421,7 @@ class AtomicRef } RAJA_INLINE + RAJA_HOST_DEVICE value_type fetch_add(value_type rhs) const { @@ -434,6 +429,7 @@ class AtomicRef } RAJA_INLINE + RAJA_HOST_DEVICE value_type operator+=(value_type rhs) const { @@ -441,6 +437,7 @@ class AtomicRef } RAJA_INLINE + RAJA_HOST_DEVICE value_type fetch_sub(value_type rhs) const { @@ -448,6 +445,7 @@ class AtomicRef } RAJA_INLINE + RAJA_HOST_DEVICE value_type operator-=(value_type rhs) const { @@ -455,6 +453,7 @@ class AtomicRef } RAJA_INLINE + RAJA_HOST_DEVICE value_type fetch_min(value_type rhs) const { @@ -462,6 +461,7 @@ class AtomicRef } RAJA_INLINE + RAJA_HOST_DEVICE value_type min(value_type rhs) const { @@ -470,6 +470,7 @@ class AtomicRef } RAJA_INLINE + RAJA_HOST_DEVICE value_type fetch_max(value_type rhs) const { @@ -477,6 +478,7 @@ class AtomicRef } RAJA_INLINE + RAJA_HOST_DEVICE value_type max(value_type rhs) const { @@ -485,6 +487,7 @@ class AtomicRef } RAJA_INLINE + RAJA_HOST_DEVICE value_type fetch_and(value_type rhs) const { @@ -492,6 +495,7 @@ class AtomicRef } RAJA_INLINE + RAJA_HOST_DEVICE value_type operator&=(value_type rhs) const { @@ -499,6 +503,7 @@ class AtomicRef } RAJA_INLINE + RAJA_HOST_DEVICE value_type fetch_or(value_type rhs) const { @@ -506,6 +511,7 @@ class AtomicRef } RAJA_INLINE + RAJA_HOST_DEVICE value_type operator|=(value_type rhs) const { @@ -513,6 +519,7 @@ class AtomicRef } RAJA_INLINE + RAJA_HOST_DEVICE value_type fetch_xor(value_type rhs) const { @@ -520,6 +527,7 @@ class AtomicRef } RAJA_INLINE + RAJA_HOST_DEVICE value_type operator^=(value_type rhs) const { @@ -527,7 +535,7 @@ class AtomicRef } private: - value_type *m_value_ptr; + value_type* m_value_ptr; }; diff --git a/include/RAJA/pattern/detail/algorithm.hpp b/include/RAJA/pattern/detail/algorithm.hpp index 21d266bd21..634cf70ce4 100644 --- a/include/RAJA/pattern/detail/algorithm.hpp +++ b/include/RAJA/pattern/detail/algorithm.hpp @@ -32,47 +32,45 @@ namespace RAJA namespace detail { -template +template using IterVal = typename ::std::iterator_traits::value_type; -template +template using IterRef = typename ::std::iterator_traits::reference; -template +template using IterDiff = typename ::std::iterator_traits::difference_type; -template +template using ContainerIter = camp::iterator_from; -template +template using ContainerVal = camp::decay>())>; -template -using ContainerRef = - decltype(*camp::val>()); +template +using ContainerRef = decltype(*camp::val>()); -template +template using ContainerDiff = - camp::decay>()-camp::val>())>; + camp::decay>() - + camp::val>())>; -template -RAJA_INLINE -DiffType firstIndex(DiffType n, CountType num_threads, CountType thread_id) +template +RAJA_INLINE DiffType firstIndex(DiffType n, + CountType num_threads, + CountType thread_id) { return (static_cast(n) * thread_id) / num_threads; } } // end namespace detail - /*! \brief swap values at iterators lhs and rhs */ -template -RAJA_HOST_DEVICE RAJA_INLINE -void -safe_iter_swap(Iter lhs, Iter rhs) +template +RAJA_HOST_DEVICE RAJA_INLINE void safe_iter_swap(Iter lhs, Iter rhs) { #ifdef RAJA_GPU_DEVICE_COMPILE_PASS_ACTIVE using camp::safe_swap; @@ -86,10 +84,8 @@ safe_iter_swap(Iter lhs, Iter rhs) /*! \brief returns iterator to next item */ -template -RAJA_HOST_DEVICE RAJA_INLINE -Iter -next(Iter it) +template +RAJA_HOST_DEVICE RAJA_INLINE Iter next(Iter it) { ++it; return it; @@ -98,10 +94,8 @@ next(Iter it) /*! \brief returns iterator to next item */ -template -RAJA_HOST_DEVICE RAJA_INLINE -Iter -prev(Iter it) +template +RAJA_HOST_DEVICE RAJA_INLINE Iter prev(Iter it) { --it; return it; diff --git a/include/RAJA/pattern/detail/forall.hpp b/include/RAJA/pattern/detail/forall.hpp index 3bd5d7ecaf..aa9a3ac888 100644 --- a/include/RAJA/pattern/detail/forall.hpp +++ b/include/RAJA/pattern/detail/forall.hpp @@ -19,12 +19,12 @@ #ifndef RAJA_PATTERN_DETAIL_FORALL_HPP #define RAJA_PATTERN_DETAIL_FORALL_HPP -#define RAJA_EXTRACT_BED_SUFFIXED(CONTAINER, SUFFIX) \ - using std::begin; \ - using std::end; \ - using std::distance; \ - auto begin##SUFFIX = begin(CONTAINER); \ - auto end##SUFFIX = end(CONTAINER); \ +#define RAJA_EXTRACT_BED_SUFFIXED(CONTAINER, SUFFIX) \ + using std::begin; \ + using std::end; \ + using std::distance; \ + auto begin##SUFFIX = begin(CONTAINER); \ + auto end##SUFFIX = end(CONTAINER); \ auto distance##SUFFIX = distance(begin##SUFFIX, end##SUFFIX) #define RAJA_EXTRACT_BED_IT(CONTAINER) RAJA_EXTRACT_BED_SUFFIXED(CONTAINER, _it) diff --git a/include/RAJA/pattern/detail/multi_reduce.hpp b/include/RAJA/pattern/detail/multi_reduce.hpp index 884b9aa989..7e2cdabf7a 100644 --- a/include/RAJA/pattern/detail/multi_reduce.hpp +++ b/include/RAJA/pattern/detail/multi_reduce.hpp @@ -26,32 +26,29 @@ #include "RAJA/util/RepeatView.hpp" -#define RAJA_DECLARE_MULTI_REDUCER(OP_NAME, OP, POL, DATA) \ - template \ - struct MultiReduce##OP_NAME, T> \ - : reduce::detail::BaseMultiReduce##OP_NAME< \ - DATA, tuning>> \ - { \ - using policy = POL; \ - using Base = reduce::detail::BaseMultiReduce##OP_NAME< \ - DATA, tuning>>; \ - using Base::Base; \ - using typename Base::value_type; \ - using typename Base::reference; \ - \ - RAJA_SUPPRESS_HD_WARN \ - RAJA_HOST_DEVICE \ - reference operator[](size_t bin) const \ - { \ - return reference(*this, bin); \ - } \ +#define RAJA_DECLARE_MULTI_REDUCER(OP_NAME, OP, POL, DATA) \ + template \ + struct MultiReduce##OP_NAME, T> \ + : reduce::detail::BaseMultiReduce##OP_NAME< \ + DATA, tuning>> \ + { \ + using policy = POL; \ + using Base = reduce::detail::BaseMultiReduce##OP_NAME< \ + DATA, tuning>>; \ + using Base::Base; \ + using typename Base::value_type; \ + using typename Base::reference; \ + \ + RAJA_SUPPRESS_HD_WARN \ + RAJA_HOST_DEVICE \ + reference operator[](size_t bin) const { return reference(*this, bin); } \ }; -#define RAJA_DECLARE_ALL_MULTI_REDUCERS(POL, DATA) \ - RAJA_DECLARE_MULTI_REDUCER(Sum, sum, POL, DATA) \ - RAJA_DECLARE_MULTI_REDUCER(Min, min, POL, DATA) \ - RAJA_DECLARE_MULTI_REDUCER(Max, max, POL, DATA) \ - RAJA_DECLARE_MULTI_REDUCER(BitOr, or_bit, POL, DATA) \ +#define RAJA_DECLARE_ALL_MULTI_REDUCERS(POL, DATA) \ + RAJA_DECLARE_MULTI_REDUCER(Sum, sum, POL, DATA) \ + RAJA_DECLARE_MULTI_REDUCER(Min, min, POL, DATA) \ + RAJA_DECLARE_MULTI_REDUCER(Max, max, POL, DATA) \ + RAJA_DECLARE_MULTI_REDUCER(BitOr, or_bit, POL, DATA) \ RAJA_DECLARE_MULTI_REDUCER(BitAnd, and_bit, POL, DATA) namespace RAJA @@ -63,36 +60,40 @@ namespace reduce namespace detail { -template +template struct BaseMultiReduce { using MultiReduceData = t_MultiReduceData; - using MultiReduceOp = typename t_MultiReduceData::MultiReduceOp; - using value_type = typename t_MultiReduceData::value_type; + using MultiReduceOp = typename t_MultiReduceData::MultiReduceOp; + using value_type = typename t_MultiReduceData::value_type; - BaseMultiReduce() : BaseMultiReduce{RepeatView(MultiReduceOp::identity(), 0)} {} + BaseMultiReduce() + : BaseMultiReduce {RepeatView(MultiReduceOp::identity(), 0)} + {} explicit BaseMultiReduce(size_t num_bins, value_type init_val = MultiReduceOp::identity(), value_type identity = MultiReduceOp::identity()) - : BaseMultiReduce{RepeatView(init_val, num_bins), identity} - { } - - template < typename Container, - concepts::enable_if_t, - concepts::negate>, - concepts::negate>>* = nullptr > + : BaseMultiReduce {RepeatView(init_val, num_bins), identity} + {} + + template, + concepts::negate>, + concepts::negate>>* = + nullptr> explicit BaseMultiReduce(Container const& container, value_type identity = MultiReduceOp::identity()) - : data{container, identity} - { } + : data {container, identity} + {} RAJA_SUPPRESS_HD_WARN BaseMultiReduce(BaseMultiReduce const&) = default; RAJA_SUPPRESS_HD_WARN - BaseMultiReduce(BaseMultiReduce &&) = default; - BaseMultiReduce &operator=(BaseMultiReduce const&) = delete; - BaseMultiReduce &operator=(BaseMultiReduce &&) = delete; + BaseMultiReduce(BaseMultiReduce&&) = default; + BaseMultiReduce& operator=(BaseMultiReduce const&) = delete; + BaseMultiReduce& operator=(BaseMultiReduce&&) = delete; RAJA_SUPPRESS_HD_WARN ~BaseMultiReduce() = default; @@ -108,24 +109,27 @@ struct BaseMultiReduce reset(RepeatView(init_val, num_bins), identity); } - template < typename Container, - concepts::enable_if_t>* = nullptr > + template>* = nullptr> void reset(Container const& container, value_type identity = MultiReduceOp::identity()) { - for (size_t bin = 0; bin < data.num_bins(); ++bin) { - RAJA_UNUSED_VAR(get(bin)); // automatic get() before reset + for (size_t bin = 0; bin < data.num_bins(); ++bin) + { + RAJA_UNUSED_VAR(get(bin)); // automatic get() before reset } data.reset(container, identity); } RAJA_SUPPRESS_HD_WARN + RAJA_HOST_DEVICE size_t size() const { return data.num_bins(); } RAJA_SUPPRESS_HD_WARN + RAJA_HOST_DEVICE - BaseMultiReduce const& combine(size_t bin, value_type const &other) const + BaseMultiReduce const& combine(size_t bin, value_type const& other) const { data.combine(bin, other); return *this; @@ -135,16 +139,19 @@ struct BaseMultiReduce value_type get(size_t bin) const { return data.get(bin); } //! Get the calculated reduced value for each bin and store it in container - template < typename Container, - concepts::enable_if_t>* = nullptr > + template>* = nullptr> void get_all(Container& container) const { RAJA_EXTRACT_BED_IT(container); - if (size_t(distance_it) != data.num_bins()) { - RAJA_ABORT_OR_THROW("MultiReduce::get_all container has different size than multi reducer"); + if (size_t(distance_it) != data.num_bins()) + { + RAJA_ABORT_OR_THROW("MultiReduce::get_all container has different size " + "than multi reducer"); } size_t bin = 0; - for (auto& val : container) { + for (auto& val : container) + { val = data.get(bin); ++bin; } @@ -154,7 +161,6 @@ struct BaseMultiReduce MultiReduceData mutable data; }; - /*! ****************************************************************************** * @@ -162,22 +168,22 @@ struct BaseMultiReduce * ****************************************************************************** */ -template +template class BaseMultiReduceMin : public BaseMultiReduce { public: using Base = BaseMultiReduce; - using typename Base::value_type; using Base::Base; + using typename Base::value_type; RAJA_SUPPRESS_HD_WARN BaseMultiReduceMin(BaseMultiReduceMin const&) = default; RAJA_SUPPRESS_HD_WARN - BaseMultiReduceMin(BaseMultiReduceMin &&) = default; + BaseMultiReduceMin(BaseMultiReduceMin&&) = default; RAJA_SUPPRESS_HD_WARN - BaseMultiReduceMin &operator=(BaseMultiReduceMin const&) = delete; + BaseMultiReduceMin& operator=(BaseMultiReduceMin const&) = delete; RAJA_SUPPRESS_HD_WARN - BaseMultiReduceMin &operator=(BaseMultiReduceMin &&) = delete; + BaseMultiReduceMin& operator=(BaseMultiReduceMin&&) = delete; RAJA_SUPPRESS_HD_WARN ~BaseMultiReduceMin() = default; @@ -185,8 +191,9 @@ class BaseMultiReduceMin : public BaseMultiReduce { RAJA_HOST_DEVICE reference(BaseMultiReduceMin const& base, size_t bin) - : m_base(base), m_bin(bin) - { } + : m_base(base), + m_bin(bin) + {} //! reducer function; updates the current instance's state RAJA_HOST_DEVICE @@ -196,10 +203,7 @@ class BaseMultiReduceMin : public BaseMultiReduce return *this; } - value_type get() const - { - return m_base.get(m_bin); - } + value_type get() const { return m_base.get(m_bin); } private: BaseMultiReduceMin const& m_base; @@ -214,7 +218,7 @@ class BaseMultiReduceMin : public BaseMultiReduce * ************************************************************************** */ -template +template class BaseMultiReduceMax : public BaseMultiReduce { public: @@ -226,9 +230,9 @@ class BaseMultiReduceMax : public BaseMultiReduce RAJA_SUPPRESS_HD_WARN BaseMultiReduceMax(BaseMultiReduceMax const&) = default; RAJA_SUPPRESS_HD_WARN - BaseMultiReduceMax(BaseMultiReduceMax &&) = default; - BaseMultiReduceMax &operator=(BaseMultiReduceMax const&) = delete; - BaseMultiReduceMax &operator=(BaseMultiReduceMax &&) = delete; + BaseMultiReduceMax(BaseMultiReduceMax&&) = default; + BaseMultiReduceMax& operator=(BaseMultiReduceMax const&) = delete; + BaseMultiReduceMax& operator=(BaseMultiReduceMax&&) = delete; RAJA_SUPPRESS_HD_WARN ~BaseMultiReduceMax() = default; @@ -236,8 +240,9 @@ class BaseMultiReduceMax : public BaseMultiReduce { RAJA_HOST_DEVICE reference(BaseMultiReduceMax const& base, size_t bin) - : m_base(base), m_bin(bin) - { } + : m_base(base), + m_bin(bin) + {} //! reducer function; updates the current instance's state RAJA_HOST_DEVICE @@ -247,10 +252,7 @@ class BaseMultiReduceMax : public BaseMultiReduce return *this; } - value_type get() const - { - return m_base.get(m_bin); - } + value_type get() const { return m_base.get(m_bin); } private: BaseMultiReduceMax const& m_base; @@ -265,7 +267,7 @@ class BaseMultiReduceMax : public BaseMultiReduce * ************************************************************************** */ -template +template class BaseMultiReduceSum : public BaseMultiReduce { public: @@ -277,9 +279,9 @@ class BaseMultiReduceSum : public BaseMultiReduce RAJA_SUPPRESS_HD_WARN BaseMultiReduceSum(BaseMultiReduceSum const&) = default; RAJA_SUPPRESS_HD_WARN - BaseMultiReduceSum(BaseMultiReduceSum &&) = default; - BaseMultiReduceSum &operator=(BaseMultiReduceSum const&) = delete; - BaseMultiReduceSum &operator=(BaseMultiReduceSum &&) = delete; + BaseMultiReduceSum(BaseMultiReduceSum&&) = default; + BaseMultiReduceSum& operator=(BaseMultiReduceSum const&) = delete; + BaseMultiReduceSum& operator=(BaseMultiReduceSum&&) = delete; RAJA_SUPPRESS_HD_WARN ~BaseMultiReduceSum() = default; @@ -287,8 +289,9 @@ class BaseMultiReduceSum : public BaseMultiReduce { RAJA_HOST_DEVICE reference(BaseMultiReduceSum const& base, size_t bin) - : m_base(base), m_bin(bin) - { } + : m_base(base), + m_bin(bin) + {} //! reducer function; updates the current instance's state RAJA_HOST_DEVICE @@ -298,10 +301,7 @@ class BaseMultiReduceSum : public BaseMultiReduce return *this; } - value_type get() const - { - return m_base.get(m_bin); - } + value_type get() const { return m_base.get(m_bin); } private: BaseMultiReduceSum const& m_base; @@ -316,7 +316,7 @@ class BaseMultiReduceSum : public BaseMultiReduce * ************************************************************************** */ -template +template class BaseMultiReduceBitOr : public BaseMultiReduce { public: @@ -328,9 +328,9 @@ class BaseMultiReduceBitOr : public BaseMultiReduce RAJA_SUPPRESS_HD_WARN BaseMultiReduceBitOr(BaseMultiReduceBitOr const&) = default; RAJA_SUPPRESS_HD_WARN - BaseMultiReduceBitOr(BaseMultiReduceBitOr &&) = default; - BaseMultiReduceBitOr &operator=(BaseMultiReduceBitOr const&) = delete; - BaseMultiReduceBitOr &operator=(BaseMultiReduceBitOr &&) = delete; + BaseMultiReduceBitOr(BaseMultiReduceBitOr&&) = default; + BaseMultiReduceBitOr& operator=(BaseMultiReduceBitOr const&) = delete; + BaseMultiReduceBitOr& operator=(BaseMultiReduceBitOr&&) = delete; RAJA_SUPPRESS_HD_WARN ~BaseMultiReduceBitOr() = default; @@ -338,8 +338,9 @@ class BaseMultiReduceBitOr : public BaseMultiReduce { RAJA_HOST_DEVICE reference(BaseMultiReduceBitOr const& base, size_t bin) - : m_base(base), m_bin(bin) - { } + : m_base(base), + m_bin(bin) + {} //! reducer function; updates the current instance's state RAJA_HOST_DEVICE @@ -349,10 +350,7 @@ class BaseMultiReduceBitOr : public BaseMultiReduce return *this; } - value_type get() const - { - return m_base.get(m_bin); - } + value_type get() const { return m_base.get(m_bin); } private: BaseMultiReduceBitOr const& m_base; @@ -367,7 +365,7 @@ class BaseMultiReduceBitOr : public BaseMultiReduce * ************************************************************************** */ -template +template class BaseMultiReduceBitAnd : public BaseMultiReduce { public: @@ -379,9 +377,9 @@ class BaseMultiReduceBitAnd : public BaseMultiReduce RAJA_SUPPRESS_HD_WARN BaseMultiReduceBitAnd(BaseMultiReduceBitAnd const&) = default; RAJA_SUPPRESS_HD_WARN - BaseMultiReduceBitAnd(BaseMultiReduceBitAnd &&) = default; - BaseMultiReduceBitAnd &operator=(BaseMultiReduceBitAnd const&) = delete; - BaseMultiReduceBitAnd &operator=(BaseMultiReduceBitAnd &&) = delete; + BaseMultiReduceBitAnd(BaseMultiReduceBitAnd&&) = default; + BaseMultiReduceBitAnd& operator=(BaseMultiReduceBitAnd const&) = delete; + BaseMultiReduceBitAnd& operator=(BaseMultiReduceBitAnd&&) = delete; RAJA_SUPPRESS_HD_WARN ~BaseMultiReduceBitAnd() = default; @@ -389,8 +387,9 @@ class BaseMultiReduceBitAnd : public BaseMultiReduce { RAJA_HOST_DEVICE reference(BaseMultiReduceBitAnd const& base, size_t bin) - : m_base(base), m_bin(bin) - { } + : m_base(base), + m_bin(bin) + {} //! reducer function; updates the current instance's state RAJA_HOST_DEVICE @@ -400,10 +399,7 @@ class BaseMultiReduceBitAnd : public BaseMultiReduce return *this; } - value_type get() const - { - return m_base.get(m_bin); - } + value_type get() const { return m_base.get(m_bin); } private: BaseMultiReduceBitAnd const& m_base; diff --git a/include/RAJA/pattern/detail/privatizer.hpp b/include/RAJA/pattern/detail/privatizer.hpp index 3579027cd3..4d828e07f9 100644 --- a/include/RAJA/pattern/detail/privatizer.hpp +++ b/include/RAJA/pattern/detail/privatizer.hpp @@ -24,30 +24,30 @@ namespace internal // }; // DefineTypeTraitFromConcept(has_privatizer, HasPrivatizer); -template +template class has_privatizer { private: - template + template static auto Test(void*) - -> decltype(camp::val(), camp::true_type{}); + -> decltype(camp::val(), camp::true_type {}); - template + template static camp::false_type Test(...); public: static bool const value = decltype(Test(0))::value; }; - static_assert(!has_privatizer::value, "if this fires, abandon all hope"); -struct GenericWrapperBase { -}; +struct GenericWrapperBase +{}; -template -struct Privatizer { - using value_type = camp::decay; +template +struct Privatizer +{ + using value_type = camp::decay; using reference_type = value_type&; value_type priv; static_assert(!has_privatizer::value, @@ -58,7 +58,7 @@ struct Privatizer { "a bug"); RAJA_SUPPRESS_HD_WARN - RAJA_HOST_DEVICE Privatizer(const T& o) : priv{o} {} + RAJA_HOST_DEVICE Privatizer(const T& o) : priv {o} {} RAJA_SUPPRESS_HD_WARN RAJA_HOST_DEVICE reference_type get_priv() { return priv; } @@ -81,19 +81,19 @@ struct Privatizer { * that does not belong here. * */ -template ::value>::type* = nullptr> +template::value>::type* = nullptr> RAJA_HOST_DEVICE auto thread_privatize(const T& item) -> Privatizer { - return Privatizer{item}; + return Privatizer {item}; } RAJA_SUPPRESS_HD_WARN -template ::value>::type* = nullptr> +template::value>::type* = nullptr> RAJA_HOST_DEVICE auto thread_privatize(const T& item) -> typename T::privatizer { - return typename T::privatizer{item}; + return typename T::privatizer {item}; } } // namespace internal diff --git a/include/RAJA/pattern/detail/reduce.hpp b/include/RAJA/pattern/detail/reduce.hpp index 788f3c698d..fb49658c2a 100644 --- a/include/RAJA/pattern/detail/reduce.hpp +++ b/include/RAJA/pattern/detail/reduce.hpp @@ -21,33 +21,33 @@ #include "RAJA/util/Operators.hpp" #include "RAJA/util/types.hpp" -#define RAJA_DECLARE_REDUCER(OP, POL, COMBINER) \ - template \ - class Reduce##OP \ - : public reduce::detail::BaseReduce##OP \ - { \ - public: \ - using Base = reduce::detail::BaseReduce##OP; \ - using Base::Base; \ +#define RAJA_DECLARE_REDUCER(OP, POL, COMBINER) \ + template \ + class Reduce##OP \ + : public reduce::detail::BaseReduce##OP \ + { \ + public: \ + using Base = reduce::detail::BaseReduce##OP; \ + using Base::Base; \ }; -#define RAJA_DECLARE_INDEX_REDUCER(OP, POL, COMBINER) \ - template \ - class Reduce##OP \ - : public reduce::detail::BaseReduce##OP \ - { \ - public: \ - using Base = reduce::detail::BaseReduce##OP; \ - using Base::Base; \ +#define RAJA_DECLARE_INDEX_REDUCER(OP, POL, COMBINER) \ + template \ + class Reduce##OP \ + : public reduce::detail::BaseReduce##OP \ + { \ + public: \ + using Base = reduce::detail::BaseReduce##OP; \ + using Base::Base; \ }; -#define RAJA_DECLARE_ALL_REDUCERS(POL, COMBINER) \ - RAJA_DECLARE_REDUCER(Sum, POL, COMBINER) \ - RAJA_DECLARE_REDUCER(Min, POL, COMBINER) \ - RAJA_DECLARE_REDUCER(Max, POL, COMBINER) \ - RAJA_DECLARE_INDEX_REDUCER(MinLoc, POL, COMBINER) \ - RAJA_DECLARE_INDEX_REDUCER(MaxLoc, POL, COMBINER) \ - RAJA_DECLARE_REDUCER(BitOr, POL, COMBINER) \ +#define RAJA_DECLARE_ALL_REDUCERS(POL, COMBINER) \ + RAJA_DECLARE_REDUCER(Sum, POL, COMBINER) \ + RAJA_DECLARE_REDUCER(Min, POL, COMBINER) \ + RAJA_DECLARE_REDUCER(Max, POL, COMBINER) \ + RAJA_DECLARE_INDEX_REDUCER(MinLoc, POL, COMBINER) \ + RAJA_DECLARE_INDEX_REDUCER(MaxLoc, POL, COMBINER) \ + RAJA_DECLARE_REDUCER(BitOr, POL, COMBINER) \ RAJA_DECLARE_REDUCER(BitAnd, POL, COMBINER) namespace RAJA @@ -63,40 +63,42 @@ namespace reduce namespace detail { -template class Op> -struct op_adapter : private Op { +template class Op> +struct op_adapter : private Op +{ using operator_type = Op; + RAJA_HOST_DEVICE static constexpr T identity() { return operator_type::identity(); } - RAJA_HOST_DEVICE RAJA_INLINE void operator()(T &val, const T v) const + RAJA_HOST_DEVICE RAJA_INLINE void operator()(T& val, const T v) const { val = operator_type::operator()(val, v); } }; } // namespace detail -template -struct sum : detail::op_adapter { -}; +template +struct sum : detail::op_adapter +{}; -template -struct min : detail::op_adapter { -}; +template +struct min : detail::op_adapter +{}; -template -struct max : detail::op_adapter { -}; +template +struct max : detail::op_adapter +{}; -template -struct or_bit : detail::op_adapter { -}; +template +struct or_bit : detail::op_adapter +{}; -template -struct and_bit : detail::op_adapter { -}; +template +struct and_bit : detail::op_adapter +{}; #if defined(RAJA_ENABLE_TARGET_OPENMP) @@ -106,52 +108,71 @@ struct and_bit : detail::op_adapter { namespace detail { -template ::value> -struct DefaultLoc {}; +template::value> +struct DefaultLoc +{}; -template +template struct DefaultLoc // any non-integral type { RAJA_HOST_DEVICE constexpr T value() const { return T(); } }; -template +template struct DefaultLoc { RAJA_HOST_DEVICE constexpr T value() const { return -1; } }; -template +template class ValueLoc { public: T val = doing_min ? operators::limits::max() : operators::limits::min(); IndexType loc = DefaultLoc().value(); -#if __NVCC__ && defined(CUDART_VERSION) && CUDART_VERSION < 9020 || defined(__HIPCC__) +#if __NVCC__ && defined(CUDART_VERSION) && CUDART_VERSION < 9020 || \ + defined(__HIPCC__) RAJA_HOST_DEVICE constexpr ValueLoc() {} - RAJA_HOST_DEVICE constexpr ValueLoc(ValueLoc const &other) : val{other.val}, loc{other.loc} {} + + RAJA_HOST_DEVICE constexpr ValueLoc(ValueLoc const& other) + : val {other.val}, + loc {other.loc} + {} + RAJA_HOST_DEVICE - ValueLoc &operator=(ValueLoc const &other) { val = other.val; loc = other.loc; return *this;} + ValueLoc& operator=(ValueLoc const& other) + { + val = other.val; + loc = other.loc; + return *this; + } #else - constexpr ValueLoc() = default; - constexpr ValueLoc(ValueLoc const &) = default; - ValueLoc &operator=(ValueLoc const &) = default; + constexpr ValueLoc() = default; + constexpr ValueLoc(ValueLoc const&) = default; + ValueLoc& operator=(ValueLoc const&) = default; #endif - RAJA_HOST_DEVICE constexpr ValueLoc(T const &val_) : val{val_}, loc{DefaultLoc().value()} {} - RAJA_HOST_DEVICE constexpr ValueLoc(T const &val_, IndexType const &loc_) - : val{val_}, loc{loc_} - { - } + RAJA_HOST_DEVICE constexpr ValueLoc(T const& val_) + : val {val_}, + loc {DefaultLoc().value()} + {} + + RAJA_HOST_DEVICE constexpr ValueLoc(T const& val_, IndexType const& loc_) + : val {val_}, + loc {loc_} + {} RAJA_HOST_DEVICE operator T() const { return val; } + RAJA_HOST_DEVICE IndexType getLoc() { return loc; } - RAJA_HOST_DEVICE bool operator<(ValueLoc const &rhs) const + + RAJA_HOST_DEVICE bool operator<(ValueLoc const& rhs) const { return val < rhs.val; } - RAJA_HOST_DEVICE bool operator>(ValueLoc const &rhs) const + + RAJA_HOST_DEVICE bool operator>(ValueLoc const& rhs) const { return val > rhs.val; } @@ -163,15 +184,19 @@ class ValueLoc namespace operators { -template -struct limits<::RAJA::reduce::detail::ValueLoc> { - RAJA_INLINE RAJA_HOST_DEVICE static constexpr - ::RAJA::reduce::detail::ValueLoc min() +template +struct limits<::RAJA::reduce::detail::ValueLoc> +{ + RAJA_INLINE RAJA_HOST_DEVICE static constexpr ::RAJA::reduce::detail:: + ValueLoc + min() { return ::RAJA::reduce::detail::ValueLoc(limits::min()); } - RAJA_INLINE RAJA_HOST_DEVICE static constexpr - ::RAJA::reduce::detail::ValueLoc max() + + RAJA_INLINE RAJA_HOST_DEVICE static constexpr ::RAJA::reduce::detail:: + ValueLoc + max() { return ::RAJA::reduce::detail::ValueLoc(limits::max()); } @@ -184,11 +209,11 @@ namespace reduce namespace detail { -template - class Reduce_, - template - class Combiner_> +template + class Reduce_, + template + class Combiner_> class BaseReduce { using Reduce = Reduce_; @@ -197,50 +222,55 @@ class BaseReduce Combiner_t mutable c; public: - using value_type = T; + using value_type = T; using reduce_type = Reduce; RAJA_SUPPRESS_HD_WARN + RAJA_HOST_DEVICE - BaseReduce() : c{T(), Reduce::identity()} {} + BaseReduce() : c {T(), Reduce::identity()} {} RAJA_SUPPRESS_HD_WARN + RAJA_HOST_DEVICE BaseReduce(T init_val, T identity_ = Reduce::identity()) - : c{init_val, identity_} - { - } + : c {init_val, identity_} + {} RAJA_SUPPRESS_HD_WARN + RAJA_HOST_DEVICE void reset(T val, T identity_ = Reduce::identity()) { - operator T(); // automatic get() before reset + operator T(); // automatic get() before reset c.reset(val, identity_); } //! prohibit compiler-generated copy assignment - BaseReduce &operator=(const BaseReduce &) = delete; + BaseReduce& operator=(const BaseReduce&) = delete; //! compiler-generated copy constructor RAJA_SUPPRESS_HD_WARN + RAJA_HOST_DEVICE - BaseReduce(const BaseReduce ©) : c(copy.c) {} + BaseReduce(const BaseReduce& copy) : c(copy.c) {} //! compiler-generated move constructor RAJA_SUPPRESS_HD_WARN RAJA_HOST_DEVICE + RAJA_INLINE - BaseReduce(BaseReduce &©) : c(std::move(copy.c)) {} + BaseReduce(BaseReduce&& copy) : c(std::move(copy.c)) {} //! compiler-generated move assignment - BaseReduce &operator=(BaseReduce &&) = default; + BaseReduce& operator=(BaseReduce&&) = default; RAJA_SUPPRESS_HD_WARN + RAJA_HOST_DEVICE - void combine(T const &other) const { c.combine(other); } + void combine(T const& other) const { c.combine(other); } - T &local() const { return c.local(); } + T& local() const { return c.local(); } //! Get the calculated reduced value operator T() const { return c.get(); } @@ -249,55 +279,61 @@ class BaseReduce T get() const { return c.get(); } }; -template +template class BaseCombinable { protected: - BaseCombinable const *parent = nullptr; + BaseCombinable const* parent = nullptr; T identity; T mutable my_data; public: RAJA_SUPPRESS_HD_WARN + RAJA_HOST_DEVICE - constexpr BaseCombinable() : identity{T()}, my_data{T()} {} + constexpr BaseCombinable() : identity {T()}, my_data {T()} {} RAJA_SUPPRESS_HD_WARN + RAJA_HOST_DEVICE constexpr BaseCombinable(T init_val, T identity_ = T()) - : identity{identity_}, my_data{init_val} - { - } + : identity {identity_}, + my_data {init_val} + {} RAJA_SUPPRESS_HD_WARN + RAJA_HOST_DEVICE void reset(T init_val, T identity_) { - my_data = init_val; + my_data = init_val; identity = identity_; } RAJA_SUPPRESS_HD_WARN + RAJA_HOST_DEVICE - constexpr BaseCombinable(BaseCombinable const &other) - : parent{other.parent ? other.parent : &other}, - identity{other.identity}, - my_data{identity} - { - } + constexpr BaseCombinable(BaseCombinable const& other) + : parent {other.parent ? other.parent : &other}, + identity {other.identity}, + my_data {identity} + {} RAJA_SUPPRESS_HD_WARN + RAJA_HOST_DEVICE ~BaseCombinable() { - if (parent && my_data != identity) { + if (parent && my_data != identity) + { Reduce()(parent->my_data, my_data); } } RAJA_SUPPRESS_HD_WARN + RAJA_HOST_DEVICE - void combine(T const &other) { Reduce{}(my_data, other); } + void combine(T const& other) { Reduce {}(my_data, other); } /*! * \return the calculated reduced value @@ -307,17 +343,18 @@ class BaseCombinable /*! * \return reference to the local value */ - T &local() const { return my_data; } + T& local() const { return my_data; } T get_combined() const { return my_data; } private: // Convenience method for CRTP - const Derived &derived() const + const Derived& derived() const { - return *(static_cast(this)); + return *(static_cast(this)); } - Derived &derived() { return *(static_cast(this)); } + + Derived& derived() { return *(static_cast(this)); } }; /*! @@ -327,7 +364,7 @@ class BaseCombinable * ****************************************************************************** */ -template class Combiner> +template class Combiner> class BaseReduceMin : public BaseReduce { public: @@ -336,7 +373,7 @@ class BaseReduceMin : public BaseReduce //! reducer function; updates the current instance's state RAJA_HOST_DEVICE - const BaseReduceMin &min(T rhs) const + const BaseReduceMin& min(T rhs) const { this->combine(rhs); return *this; @@ -350,36 +387,43 @@ class BaseReduceMin : public BaseReduce * ************************************************************************** */ -template class Combiner> +template + class Combiner> class BaseReduceMinLoc : public BaseReduce, RAJA::reduce::min, Combiner> { public: using Base = BaseReduce, RAJA::reduce::min, Combiner>; - using value_type = typename Base::value_type; + using value_type = typename Base::value_type; using reduce_type = typename Base::reduce_type; using Base::Base; constexpr BaseReduceMinLoc() : Base(value_type(T(), IndexType())) {} - constexpr BaseReduceMinLoc(T init_val, IndexType init_idx, - T identity_val_ = reduce_type::identity(), - IndexType identity_loc_ = DefaultLoc().value()) - : Base(value_type(init_val, init_idx), value_type(identity_val_, identity_loc_)) - { - } - - void reset(T init_val, IndexType init_idx, - T identity_val_ = reduce_type::identity(), + constexpr BaseReduceMinLoc( + T init_val, + IndexType init_idx, + T identity_val_ = reduce_type::identity(), + IndexType identity_loc_ = DefaultLoc().value()) + : Base(value_type(init_val, init_idx), + value_type(identity_val_, identity_loc_)) + {} + + void reset(T init_val, + IndexType init_idx, + T identity_val_ = reduce_type::identity(), IndexType identity_loc_ = DefaultLoc().value()) { - operator T(); // automatic get() before reset - Base::reset(value_type(init_val, init_idx), value_type(identity_val_, identity_loc_)); + operator T(); // automatic get() before reset + Base::reset(value_type(init_val, init_idx), + value_type(identity_val_, identity_loc_)); } /// \brief reducer function; updates the current instance's state RAJA_HOST_DEVICE - const BaseReduceMinLoc &minloc(T rhs, IndexType loc) const + const BaseReduceMinLoc& minloc(T rhs, IndexType loc) const { this->combine(value_type(rhs, loc)); return *this; @@ -399,7 +443,7 @@ class BaseReduceMinLoc * ************************************************************************** */ -template class Combiner> +template class Combiner> class BaseReduceMax : public BaseReduce { public: @@ -408,7 +452,7 @@ class BaseReduceMax : public BaseReduce //! reducer function; updates the current instance's state RAJA_HOST_DEVICE - const BaseReduceMax &max(T rhs) const + const BaseReduceMax& max(T rhs) const { this->combine(rhs); return *this; @@ -422,7 +466,7 @@ class BaseReduceMax : public BaseReduce * ************************************************************************** */ -template class Combiner> +template class Combiner> class BaseReduceSum : public BaseReduce { public: @@ -431,8 +475,9 @@ class BaseReduceSum : public BaseReduce //! reducer function; updates the current instance's state RAJA_SUPPRESS_HD_WARN + RAJA_HOST_DEVICE - const BaseReduceSum &operator+=(T rhs) const + const BaseReduceSum& operator+=(T rhs) const { this->combine(rhs); return *this; @@ -446,7 +491,7 @@ class BaseReduceSum : public BaseReduce * ************************************************************************** */ -template class Combiner> +template class Combiner> class BaseReduceBitOr : public BaseReduce { public: @@ -455,8 +500,9 @@ class BaseReduceBitOr : public BaseReduce //! reducer function; updates the current instance's state RAJA_SUPPRESS_HD_WARN + RAJA_HOST_DEVICE - const BaseReduceBitOr &operator|=(T rhs) const + const BaseReduceBitOr& operator|=(T rhs) const { this->combine(rhs); return *this; @@ -470,7 +516,7 @@ class BaseReduceBitOr : public BaseReduce * ************************************************************************** */ -template class Combiner> +template class Combiner> class BaseReduceBitAnd : public BaseReduce { public: @@ -479,15 +525,15 @@ class BaseReduceBitAnd : public BaseReduce //! reducer function; updates the current instance's state RAJA_SUPPRESS_HD_WARN + RAJA_HOST_DEVICE - const BaseReduceBitAnd &operator&=(T rhs) const + const BaseReduceBitAnd& operator&=(T rhs) const { this->combine(rhs); return *this; } }; - /*! ************************************************************************** * @@ -495,36 +541,45 @@ class BaseReduceBitAnd : public BaseReduce * ************************************************************************** */ -template class Combiner> -class BaseReduceMaxLoc - : public BaseReduce, RAJA::reduce::max, Combiner> +template + class Combiner> +class BaseReduceMaxLoc : public BaseReduce, + RAJA::reduce::max, + Combiner> { public: - using Base = BaseReduce, RAJA::reduce::max, Combiner>; - using value_type = typename Base::value_type; + using Base = + BaseReduce, RAJA::reduce::max, Combiner>; + using value_type = typename Base::value_type; using reduce_type = typename Base::reduce_type; using Base::Base; constexpr BaseReduceMaxLoc() : Base(value_type(T(), IndexType())) {} - constexpr BaseReduceMaxLoc(T init_val, IndexType init_idx, - T identity_val_ = reduce_type::identity(), - IndexType identity_loc_ = DefaultLoc().value()) - : Base(value_type(init_val, init_idx), value_type(identity_val_, identity_loc_)) - { - } - - void reset(T init_val, IndexType init_idx, - T identity_val_ = reduce_type::identity(), + constexpr BaseReduceMaxLoc( + T init_val, + IndexType init_idx, + T identity_val_ = reduce_type::identity(), + IndexType identity_loc_ = DefaultLoc().value()) + : Base(value_type(init_val, init_idx), + value_type(identity_val_, identity_loc_)) + {} + + void reset(T init_val, + IndexType init_idx, + T identity_val_ = reduce_type::identity(), IndexType identity_loc_ = DefaultLoc().value()) { - operator T(); // automatic get() before reset - Base::reset(value_type(init_val, init_idx), value_type(identity_val_, identity_loc_)); + operator T(); // automatic get() before reset + Base::reset(value_type(init_val, init_idx), + value_type(identity_val_, identity_loc_)); } //! reducer function; updates the current instance's state RAJA_HOST_DEVICE - const BaseReduceMaxLoc &maxloc(T rhs, IndexType loc) const + const BaseReduceMaxLoc& maxloc(T rhs, IndexType loc) const { this->combine(value_type(rhs, loc)); return *this; diff --git a/include/RAJA/pattern/forall.hpp b/include/RAJA/pattern/forall.hpp index 686f0e8c6b..9ca1046f93 100644 --- a/include/RAJA/pattern/forall.hpp +++ b/include/RAJA/pattern/forall.hpp @@ -97,38 +97,59 @@ namespace RAJA namespace detail { /// Adapter to replace specific implementations for the icount variants -template -struct icount_adapter { +template +struct icount_adapter +{ using index_type = typename std::decay::type; typename std::decay::type body; using container_type = typename std::decay::type; typename container_type::iterator begin_it; Index_type icount; + icount_adapter(Range const& r, Body const& b, IndexT icount_) - : body{b}, icount{icount_} + : body {b}, + icount {icount_} { using std::begin; begin_it = begin(r); } RAJA_SUPPRESS_HD_WARN - template + template RAJA_HOST_DEVICE void operator()(T const& i) const { body(static_cast(i + icount), begin_it[i]); } }; -struct CallForall { - template - RAJA_INLINE camp::resources::EventProxy operator()(T const&, ExecPol, Body, Res, ForallParams) const; +struct CallForall +{ + template + RAJA_INLINE camp::resources::EventProxy operator()(T const&, + ExecPol, + Body, + Res, + ForallParams) const; }; -struct CallForallIcount { +struct CallForallIcount +{ constexpr CallForallIcount(int s); - template - RAJA_INLINE camp::resources::EventProxy operator()(T const&, ExecPol, Body, Res, ForallParams) const; + template + RAJA_INLINE camp::resources::EventProxy operator()(T const&, + ExecPol, + Body, + Res, + ForallParams) const; const int start; }; @@ -152,22 +173,31 @@ namespace wrap * ****************************************************************************** */ -template +template RAJA_INLINE concepts::enable_if_t< RAJA::resources::EventProxy, concepts::negate>, type_traits::is_range> -forall(Res r, ExecutionPolicy&& p, Container&& c, LoopBody&& loop_body, ForallParams&& f_params) +forall(Res r, + ExecutionPolicy&& p, + Container&& c, + LoopBody&& loop_body, + ForallParams&& f_params) { RAJA_FORCEINLINE_RECURSIVE - return forall_impl(r, - std::forward(p), - std::forward(c), - std::forward(loop_body), - std::forward(f_params)); + return forall_impl( + r, std::forward(p), std::forward(c), + std::forward(loop_body), std::forward(f_params)); } -template +template RAJA_INLINE concepts::enable_if_t< RAJA::resources::EventProxy, concepts::negate>, @@ -175,14 +205,11 @@ RAJA_INLINE concepts::enable_if_t< forall(Res r, ExecutionPolicy&& p, Container&& c, LoopBody&& loop_body) { RAJA_FORCEINLINE_RECURSIVE - return forall_impl(r, - std::forward(p), - std::forward(c), - std::forward(loop_body), - expt::get_empty_forall_param_pack()); + return forall_impl( + r, std::forward(p), std::forward(c), + std::forward(loop_body), expt::get_empty_forall_param_pack()); } - /*! ****************************************************************************** * @@ -190,29 +217,29 @@ forall(Res r, ExecutionPolicy&& p, Container&& c, LoopBody&& loop_body) * ****************************************************************************** */ -template +template RAJA_INLINE resources::EventProxy forall_Icount(Res r, - ExecutionPolicy&& p, - Container&& c, - IndexType&& icount, - LoopBody&& loop_body, - ForallParams&& f_params) + ExecutionPolicy&& p, + Container&& c, + IndexType&& icount, + LoopBody&& loop_body, + ForallParams&& f_params) { using std::begin; using std::distance; using std::end; auto range = RangeSegment(0, distance(begin(c), end(c))); - detail::icount_adapter adapted(c, - loop_body, + detail::icount_adapter adapted(c, loop_body, icount); using policy::sequential::forall_impl; RAJA_FORCEINLINE_RECURSIVE - return forall_impl(r, std::forward(p), range, adapted, std::forward(f_params)); + return forall_impl(r, std::forward(p), range, adapted, + std::forward(f_params)); } /*! @@ -224,62 +251,60 @@ RAJA_INLINE resources::EventProxy forall_Icount(Res r, * ****************************************************************************** */ -template -RAJA_INLINE resources::EventProxy forall_Icount(Res r, - ExecPolicy, - const TypedIndexSet& iset, - LoopBody loop_body, - ForallParams f_params) +template +RAJA_INLINE resources::EventProxy forall_Icount( + Res r, + ExecPolicy, + const TypedIndexSet& iset, + LoopBody loop_body, + ForallParams f_params) { // no need for icount variant here - auto segIterRes = resources::get_resource::type::get_default(); + auto segIterRes = + resources::get_resource::type::get_default(); wrap::forall(segIterRes, SegmentIterPolicy(), iset, [=, &r](int segID) { iset.segmentCall(segID, detail::CallForallIcount(iset.getStartingIcount(segID)), - SegmentExecPolicy(), - loop_body, - r, - f_params); + SegmentExecPolicy(), loop_body, r, f_params); }); return RAJA::resources::EventProxy(r); } -template -RAJA_INLINE resources::EventProxy forall(Res r, - ExecPolicy, - const TypedIndexSet& iset, - LoopBody loop_body, - ForallParams f_params) -{ - auto segIterRes = resources::get_resource::type::get_default(); +template +RAJA_INLINE resources::EventProxy forall( + Res r, + ExecPolicy, + const TypedIndexSet& iset, + LoopBody loop_body, + ForallParams f_params) +{ + auto segIterRes = + resources::get_resource::type::get_default(); wrap::forall(segIterRes, SegmentIterPolicy(), iset, [=, &r](int segID) { - iset.segmentCall(segID, detail::CallForall{}, SegmentExecPolicy(), loop_body, r, f_params); + iset.segmentCall(segID, detail::CallForall {}, SegmentExecPolicy(), + loop_body, r, f_params); }); return RAJA::resources::EventProxy(r); } } // end namespace wrap - - /*! ****************************************************************************** * - * \brief The RAJA::policy_by_value_interface forall functions provide an interface with - * value-based policies. It also enforces the interface and performs - * static checks as well as triggering plugins and loop body updates. + * \brief The RAJA::policy_by_value_interface forall functions provide an + *interface with value-based policies. It also enforces the interface and + *performs static checks as well as triggering plugins and loop body updates. * ****************************************************************************** */ @@ -294,7 +319,10 @@ inline namespace policy_by_value_interface * ****************************************************************************** */ -template +template RAJA_INLINE resources::EventProxy forall_Icount(ExecutionPolicy&& p, Res r, IdxSet&& c, @@ -306,9 +334,10 @@ RAJA_INLINE resources::EventProxy forall_Icount(ExecutionPolicy&& p, auto f_params = expt::make_forall_param_pack(std::forward(params)...); auto&& loop_body = expt::get_lambda(std::forward(params)...); - //expt::check_forall_optional_args(loop_body, f_params); + // expt::check_forall_optional_args(loop_body, f_params); - util::PluginContext context{util::make_context>()}; + util::PluginContext context { + util::make_context>()}; util::callPreCapturePlugins(context); using RAJA::util::trigger_updates_before; @@ -318,27 +347,25 @@ RAJA_INLINE resources::EventProxy forall_Icount(ExecutionPolicy&& p, util::callPreLaunchPlugins(context); - RAJA::resources::EventProxy e = wrap::forall_Icount( - r, - std::forward(p), - std::forward(c), - std::move(body), - f_params); + RAJA::resources::EventProxy e = + wrap::forall_Icount(r, std::forward(p), + std::forward(c), std::move(body), f_params); util::callPostLaunchPlugins(context); return e; } -template ::type > + +template::type> RAJA_INLINE resources::EventProxy forall_Icount(ExecutionPolicy&& p, IdxSet&& c, LoopBody&& loop_body) { auto r = Res::get_default(); return ::RAJA::policy_by_value_interface::forall_Icount( - std::forward(p), - r, - std::forward(c), + std::forward(p), r, std::forward(c), std::forward(loop_body)); } @@ -349,7 +376,10 @@ RAJA_INLINE resources::EventProxy forall_Icount(ExecutionPolicy&& p, * ****************************************************************************** */ -template +template RAJA_INLINE concepts::enable_if_t< resources::EventProxy, type_traits::is_indexset_policy> @@ -363,7 +393,8 @@ forall(ExecutionPolicy&& p, Res r, IdxSet&& c, Params&&... params) auto&& loop_body = expt::get_lambda(std::forward(params)...); expt::check_forall_optional_args(loop_body, f_params); - util::PluginContext context{util::make_context>()}; + util::PluginContext context { + util::make_context>()}; util::callPreCapturePlugins(context); using RAJA::util::trigger_updates_before; @@ -373,18 +404,18 @@ forall(ExecutionPolicy&& p, Res r, IdxSet&& c, Params&&... params) util::callPreLaunchPlugins(context); - resources::EventProxy e = wrap::forall( - r, - std::forward(p), - std::forward(c), - std::move(body), - f_params); + resources::EventProxy e = + wrap::forall(r, std::forward(p), std::forward(c), + std::move(body), f_params); util::callPostLaunchPlugins(context); return e; } -template ::type > + +template::type> RAJA_INLINE concepts::enable_if_t< resources::EventProxy, type_traits::is_indexset_policy> @@ -392,9 +423,7 @@ forall(ExecutionPolicy&& p, IdxSet&& c, LoopBody&& loop_body) { auto r = Res::get_default(); return ::RAJA::policy_by_value_interface::forall( - std::forward(p), - r, - std::forward(c), + std::forward(p), r, std::forward(c), std::forward(loop_body)); } @@ -405,12 +434,13 @@ forall(ExecutionPolicy&& p, IdxSet&& c, LoopBody&& loop_body) * ****************************************************************************** */ -template ::type > -RAJA_INLINE concepts::enable_if_t< - resources::EventProxy, - type_traits::is_multi_policy, - type_traits::is_range> +template::type> +RAJA_INLINE concepts::enable_if_t, + type_traits::is_multi_policy, + type_traits::is_range> forall(ExecutionPolicy&& p, Container&& c, LoopBody&& loop_body) { static_assert(type_traits::is_random_access_range::value, @@ -419,10 +449,9 @@ forall(ExecutionPolicy&& p, Container&& c, LoopBody&& loop_body) auto r = Res::get_default(); // plugins handled in multipolicy policy_invoker - return forall_impl(r, - std::forward(p), - std::forward(c), - std::forward(loop_body)); + return forall_impl(r, std::forward(p), + std::forward(c), + std::forward(loop_body)); } /*! @@ -432,16 +461,15 @@ forall(ExecutionPolicy&& p, Container&& c, LoopBody&& loop_body) * ****************************************************************************** */ -template -RAJA_INLINE concepts::enable_if_t< - resources::EventProxy, - type_traits::is_range, - type_traits::is_integral> +template +RAJA_INLINE concepts::enable_if_t, + type_traits::is_range, + type_traits::is_integral> forall_Icount(ExecutionPolicy&& p, Res r, Container&& c, @@ -452,11 +480,14 @@ forall_Icount(ExecutionPolicy&& p, static_assert(type_traits::is_random_access_range::value, "Container does not model RandomAccessIterator"); - auto f_params = expt::make_forall_param_pack(std::forward(first), std::forward(params)...); - auto&& loop_body = expt::get_lambda(std::forward(first), std::forward(params)...); - //expt::check_forall_optional_args(loop_body, f_params); + auto f_params = expt::make_forall_param_pack(std::forward(first), + std::forward(params)...); + auto&& loop_body = expt::get_lambda(std::forward(first), + std::forward(params)...); + // expt::check_forall_optional_args(loop_body, f_params); - util::PluginContext context{util::make_context>()}; + util::PluginContext context { + util::make_context>()}; util::callPreCapturePlugins(context); using RAJA::util::trigger_updates_before; @@ -467,21 +498,18 @@ forall_Icount(ExecutionPolicy&& p, util::callPreLaunchPlugins(context); resources::EventProxy e = wrap::forall_Icount( - r, - std::forward(p), - std::forward(c), - icount, - std::move(body), - f_params); + r, std::forward(p), std::forward(c), icount, + std::move(body), f_params); util::callPostLaunchPlugins(context); return e; } -template ::type > + +template::type> RAJA_INLINE concepts::enable_if_t< resources::EventProxy, type_traits::is_range, @@ -494,10 +522,7 @@ forall_Icount(ExecutionPolicy&& p, { auto r = Res::get_default(); return ::RAJA::policy_by_value_interface::forall_Icount( - std::forward(p), - r, - std::forward(c), - icount, + std::forward(p), r, std::forward(c), icount, std::forward(loop_body)); } @@ -509,7 +534,10 @@ forall_Icount(ExecutionPolicy&& p, ****************************************************************************** */ -template +template RAJA_INLINE concepts::enable_if_t< resources::EventProxy, concepts::negate>, @@ -524,7 +552,8 @@ forall(ExecutionPolicy&& p, Res r, Container&& c, Params&&... params) auto&& loop_body = expt::get_lambda(std::forward(params)...); expt::check_forall_optional_args(loop_body, f_params); - util::PluginContext context{util::make_context>()}; + util::PluginContext context { + util::make_context>()}; util::callPreCapturePlugins(context); using RAJA::util::trigger_updates_before; @@ -534,19 +563,18 @@ forall(ExecutionPolicy&& p, Res r, Container&& c, Params&&... params) util::callPreLaunchPlugins(context); - resources::EventProxy e = wrap::forall( - r, - std::forward(p), - std::forward(c), - std::move(body), - f_params); + resources::EventProxy e = + wrap::forall(r, std::forward(p), + std::forward(c), std::move(body), f_params); util::callPostLaunchPlugins(context); return e; } -template ::type > +template::type> RAJA_INLINE concepts::enable_if_t< resources::EventProxy, concepts::negate>, @@ -556,34 +584,34 @@ forall(ExecutionPolicy&& p, Container&& c, LoopBody&& loop_body) { auto r = Res::get_default(); return ::RAJA::policy_by_value_interface::forall( - std::forward(p), - r, - std::forward(c), + std::forward(p), r, std::forward(c), std::forward(loop_body)); } -} // end inline namespace policy_by_value_interface - +} // namespace policy_by_value_interface /*! * \brief Conversion from template-based policy to value-based policy for forall * * this reduces implementation overhead and perfectly forwards all arguments */ -template ::type > +template::type> RAJA_INLINE resources::EventProxy forall(Args&&... args) { Res r = Res::get_default(); - return ::RAJA::policy_by_value_interface::forall( - ExecutionPolicy(), r, std::forward(args)...); + return ::RAJA::policy_by_value_interface::forall(ExecutionPolicy(), r, + std::forward(args)...); } -template -RAJA_INLINE concepts::enable_if_t, type_traits::is_resource> + +template +RAJA_INLINE concepts::enable_if_t, + type_traits::is_resource> forall(Res r, Args&&... args) { - return ::RAJA::policy_by_value_interface::forall( - ExecutionPolicy(), r, std::forward(args)...); + return ::RAJA::policy_by_value_interface::forall(ExecutionPolicy(), r, + std::forward(args)...); } /*! @@ -592,16 +620,19 @@ forall(Res r, Args&&... args) * * this reduces implementation overhead and perfectly forwards all arguments */ -template ::type > +template::type> RAJA_INLINE resources::EventProxy forall_Icount(Args&&... args) { Res r = Res::get_default(); return ::RAJA::policy_by_value_interface::forall_Icount( ExecutionPolicy(), r, std::forward(args)...); } -template -RAJA_INLINE concepts::enable_if_t, type_traits::is_resource> + +template +RAJA_INLINE concepts::enable_if_t, + type_traits::is_resource> forall_Icount(Res r, Args&&... args) { return ::RAJA::policy_by_value_interface::forall_Icount( @@ -611,12 +642,17 @@ forall_Icount(Res r, Args&&... args) namespace detail { -template -RAJA_INLINE camp::resources::EventProxy CallForall::operator()(T const& segment, - ExecutionPolicy, - LoopBody body, - Res r, - ForallParams f_params) const +template +RAJA_INLINE camp::resources::EventProxy CallForall::operator()( + T const& segment, + ExecutionPolicy, + LoopBody body, + Res r, + ForallParams f_params) const { // this is only called inside a region, use impl using policy::sequential::forall_impl; @@ -626,15 +662,21 @@ RAJA_INLINE camp::resources::EventProxy CallForall::operator()(T const& seg constexpr CallForallIcount::CallForallIcount(int s) : start(s) {} -template -RAJA_INLINE camp::resources::EventProxy CallForallIcount::operator()(T const& segment, - ExecutionPolicy, - LoopBody body, - Res r, - ForallParams f_params) const +template +RAJA_INLINE camp::resources::EventProxy CallForallIcount::operator()( + T const& segment, + ExecutionPolicy, + LoopBody body, + Res r, + ForallParams f_params) const { // go through wrap to unwrap icount - return wrap::forall_Icount(r, ExecutionPolicy(), segment, start, body, f_params); + return wrap::forall_Icount(r, ExecutionPolicy(), segment, start, body, + f_params); } } // namespace detail @@ -650,100 +692,112 @@ RAJA_INLINE camp::resources::EventProxy CallForallIcount::operator()(T cons namespace expt { - template - struct dynamic_helper +template +struct dynamic_helper +{ + template + static void invoke_forall(const int pol, SEGMENT const& seg, BODY const& body) { - template - static void invoke_forall(const int pol, SEGMENT const &seg, BODY const &body) + if (IDX == pol) { - if(IDX==pol){ - using t_pol = typename camp::at>::type; - RAJA::forall(seg, body); - return; - } - dynamic_helper::invoke_forall(pol, seg, body); + using t_pol = typename camp::at>::type; + RAJA::forall(seg, body); + return; } + dynamic_helper::invoke_forall(pol, seg, body); + } - template - static resources::EventProxy - invoke_forall(RAJA::resources::Resource r, const int pol, SEGMENT const &seg, BODY const &body) - { - - using t_pol = typename camp::at>::type; - using resource_type = typename resources::get_resource::type; + template + static resources::EventProxy invoke_forall( + RAJA::resources::Resource r, + const int pol, + SEGMENT const& seg, + BODY const& body) + { - if(IDX==pol){ - RAJA::forall(r.get(), seg, body); + using t_pol = typename camp::at>::type; + using resource_type = typename resources::get_resource::type; - //Return a generic event proxy from r, - //because forall returns a typed event proxy - return {r}; - } + if (IDX == pol) + { + RAJA::forall(r.get(), seg, body); - return dynamic_helper::invoke_forall(r, pol, seg, body); + // Return a generic event proxy from r, + // because forall returns a typed event proxy + return {r}; } - }; + return dynamic_helper::invoke_forall(r, pol, seg, + body); + } +}; - template - struct dynamic_helper<0, POLICY_LIST> +template +struct dynamic_helper<0, POLICY_LIST> +{ + template + static void invoke_forall(const int pol, SEGMENT const& seg, BODY const& body) { - template - static void - invoke_forall(const int pol, SEGMENT const &seg, BODY const &body) + if (0 == pol) { - if(0==pol){ - using t_pol = typename camp::at>::type; - RAJA::forall(seg, body); - return; - } - RAJA_ABORT_OR_THROW("Policy enum not supported "); + using t_pol = typename camp::at>::type; + RAJA::forall(seg, body); + return; } + RAJA_ABORT_OR_THROW("Policy enum not supported "); + } - template - static resources::EventProxy - invoke_forall(RAJA::resources::Resource r, const int pol, SEGMENT const &seg, BODY const &body) - { - if(pol != 0) RAJA_ABORT_OR_THROW("Policy value out of range "); + template + static resources::EventProxy invoke_forall( + RAJA::resources::Resource r, + const int pol, + SEGMENT const& seg, + BODY const& body) + { + if (pol != 0) RAJA_ABORT_OR_THROW("Policy value out of range "); - using t_pol = typename camp::at>::type; - using resource_type = typename resources::get_resource::type; + using t_pol = typename camp::at>::type; + using resource_type = typename resources::get_resource::type; - RAJA::forall(r.get(), seg, body); + RAJA::forall(r.get(), seg, body); - //Return a generic event proxy from r, - //because forall returns a typed event proxy - return {r}; - } + // Return a generic event proxy from r, + // because forall returns a typed event proxy + return {r}; + } +}; - }; +template +void dynamic_forall(const int pol, SEGMENT const& seg, BODY const& body) +{ + constexpr int N = camp::size::value; + static_assert(N > 0, "RAJA policy list must not be empty"); - template - void dynamic_forall(const int pol, SEGMENT const &seg, BODY const &body) + if (pol > N - 1) { - constexpr int N = camp::size::value; - static_assert(N > 0, "RAJA policy list must not be empty"); - - if(pol > N-1) { - RAJA_ABORT_OR_THROW("Policy enum not supported"); - } - dynamic_helper::invoke_forall(pol, seg, body); + RAJA_ABORT_OR_THROW("Policy enum not supported"); } + dynamic_helper::invoke_forall(pol, seg, body); +} - template - resources::EventProxy - dynamic_forall(RAJA::resources::Resource r, const int pol, SEGMENT const &seg, BODY const &body) - { - constexpr int N = camp::size::value; - static_assert(N > 0, "RAJA policy list must not be empty"); - - if(pol > N-1) { - RAJA_ABORT_OR_THROW("Policy value out of range"); - } +template +resources::EventProxy dynamic_forall( + RAJA::resources::Resource r, + const int pol, + SEGMENT const& seg, + BODY const& body) +{ + constexpr int N = camp::size::value; + static_assert(N > 0, "RAJA policy list must not be empty"); - return dynamic_helper::invoke_forall(r, pol, seg, body); + if (pol > N - 1) + { + RAJA_ABORT_OR_THROW("Policy value out of range"); } + return dynamic_helper::invoke_forall(r, pol, seg, body); +} + } // namespace expt diff --git a/include/RAJA/pattern/kernel.hpp b/include/RAJA/pattern/kernel.hpp index 1875fe27d9..06387eb0e5 100644 --- a/include/RAJA/pattern/kernel.hpp +++ b/include/RAJA/pattern/kernel.hpp @@ -40,33 +40,32 @@ namespace RAJA * * This is just a list of RAJA::kernel statements. */ -template +template using KernelPolicy = internal::StatementList; /// /// Template list of argument indices /// -template +template using ArgList = camp::idx_seq; -template +template struct IterableWrapperTuple; -template -struct IterableWrapperTuple> { +template +struct IterableWrapperTuple> +{ - using type = - camp::tuple::iterator, - typename camp::decay::IndexType>...>; + using type = camp::tuple::iterator, + typename camp::decay::IndexType>...>; }; - namespace internal { -template -RAJA_INLINE constexpr auto make_wrapped_tuple_impl(Tuple &&t, +template +RAJA_INLINE constexpr auto make_wrapped_tuple_impl(Tuple&& t, camp::idx_seq) -> camp::tuple>>::IndexType>...> { return camp::make_tuple( - RAJA::Span< - typename camp::decay< - camp::tuple_element_t>>::iterator, - typename camp::decay>>:: - IndexType>{camp::get(std::forward(t)).begin(), - camp::get(std::forward(t)).end()}...); + RAJA::Span>>::iterator, + typename camp::decay< + camp::tuple_element_t>>::IndexType> { + camp::get(std::forward(t)).begin(), + camp::get(std::forward(t)).end()}...); } } // namespace internal -template -RAJA_INLINE constexpr auto make_wrapped_tuple(Tuple &&t) +template +RAJA_INLINE constexpr auto make_wrapped_tuple(Tuple&& t) -> decltype(internal::make_wrapped_tuple_impl( std::forward(t), - camp::make_idx_seq_t>::value>{})) + camp::make_idx_seq_t>::value> {})) { return internal::make_wrapped_tuple_impl( std::forward(t), - camp::make_idx_seq_t>::value>{}); + camp::make_idx_seq_t>::value> {}); } - -template -RAJA_INLINE resources::EventProxy kernel_param_resource(SegmentTuple &&segments, - ParamTuple &¶ms, - Resource resource, - Bodies &&... bodies) +template +RAJA_INLINE resources::EventProxy kernel_param_resource( + SegmentTuple&& segments, + ParamTuple&& params, + Resource resource, + Bodies&&... bodies) { - util::PluginContext context{util::make_context()}; + util::PluginContext context {util::make_context()}; // TODO: test that all policy members model the Executor policy concept // TODO: add a static_assert for functors which cannot be invoked with @@ -119,10 +118,8 @@ RAJA_INLINE resources::EventProxy kernel_param_resource(SegmentTuple & using param_tuple_t = camp::decay; - using loop_data_t = internal::LoopData...>; + using loop_data_t = internal::LoopData...>; util::callPreCapturePlugins(context); @@ -131,11 +128,10 @@ RAJA_INLINE resources::EventProxy kernel_param_resource(SegmentTuple & // our segments, loop bodies, and the tuple of loop indices // it is passed through all of the kernel mechanics by-referenece, // and only copied to provide thread-private instances. - loop_data_t loop_data(make_wrapped_tuple( - std::forward(segments)), - std::forward(params), - resource, - std::forward(bodies)...); + loop_data_t loop_data( + make_wrapped_tuple(std::forward(segments)), + std::forward(params), resource, + std::forward(bodies)...); util::callPostCapturePlugins(context); @@ -152,50 +148,46 @@ RAJA_INLINE resources::EventProxy kernel_param_resource(SegmentTuple & return resources::EventProxy(resource); } -template -RAJA_INLINE resources::EventProxy kernel_resource(SegmentTuple &&segments, - Resource resource, - Bodies &&... bodies) +template +RAJA_INLINE resources::EventProxy kernel_resource( + SegmentTuple&& segments, + Resource resource, + Bodies&&... bodies) { - return RAJA::kernel_param_resource(std::forward(segments), - RAJA::make_tuple(), - resource, - std::forward(bodies)...); + return RAJA::kernel_param_resource( + std::forward(segments), RAJA::make_tuple(), resource, + std::forward(bodies)...); } -template -RAJA_INLINE resources::EventProxy> kernel_param(SegmentTuple &&segments, - ParamTuple &¶ms, - Bodies &&... bodies) +template +RAJA_INLINE resources::EventProxy> +kernel_param(SegmentTuple&& segments, ParamTuple&& params, Bodies&&... bodies) { auto res = resources::get_default_resource(); - return RAJA::kernel_param_resource(std::forward(segments), - std::forward(params), - res, - std::forward(bodies)...); + return RAJA::kernel_param_resource( + std::forward(segments), std::forward(params), + res, std::forward(bodies)...); } -template -RAJA_INLINE resources::EventProxy> kernel(SegmentTuple &&segments, - Bodies &&... bodies) +template +RAJA_INLINE resources::EventProxy> +kernel(SegmentTuple&& segments, Bodies&&... bodies) { auto res = resources::get_default_resource(); - return RAJA::kernel_param_resource(std::forward(segments), - RAJA::make_tuple(), - res, - std::forward(bodies)...); + return RAJA::kernel_param_resource( + std::forward(segments), RAJA::make_tuple(), res, + std::forward(bodies)...); } } // end namespace RAJA - #include "RAJA/pattern/kernel/Collapse.hpp" #include "RAJA/pattern/kernel/Conditional.hpp" #include "RAJA/pattern/kernel/For.hpp" diff --git a/include/RAJA/pattern/kernel/Collapse.hpp b/include/RAJA/pattern/kernel/Collapse.hpp index 8efb126397..5d4a0c2308 100644 --- a/include/RAJA/pattern/kernel/Collapse.hpp +++ b/include/RAJA/pattern/kernel/Collapse.hpp @@ -26,11 +26,11 @@ namespace statement { -template +template struct Collapse : public internal::ForList, public internal::CollapseBase, - public internal::Statement { -}; + public internal::Statement +{}; } // namespace statement diff --git a/include/RAJA/pattern/kernel/Conditional.hpp b/include/RAJA/pattern/kernel/Conditional.hpp index 6b7875c4c2..32188c80a7 100644 --- a/include/RAJA/pattern/kernel/Conditional.hpp +++ b/include/RAJA/pattern/kernel/Conditional.hpp @@ -36,20 +36,20 @@ namespace statement * A RAJA::kernel statement that implements conditional control logic * */ -template -struct If : public internal::Statement { -}; - +template +struct If : public internal::Statement +{}; /*! * An expression that returns a compile time literal value. * */ -template -struct Value { +template +struct Value +{ - template - RAJA_HOST_DEVICE RAJA_INLINE static long eval(Data const &) + template + RAJA_HOST_DEVICE RAJA_INLINE static long eval(Data const&) { return value; } @@ -59,11 +59,12 @@ struct Value { * An equality expression * */ -template -struct Equals { +template +struct Equals +{ - template - RAJA_HOST_DEVICE RAJA_INLINE static bool eval(Data const &data) + template + RAJA_HOST_DEVICE RAJA_INLINE static bool eval(Data const& data) { return L::eval(data) == R::eval(data); } @@ -73,116 +74,117 @@ struct Equals { * A negated equality expression * */ -template -struct NotEquals { +template +struct NotEquals +{ - template - RAJA_HOST_DEVICE RAJA_INLINE static bool eval(Data const &data) + template + RAJA_HOST_DEVICE RAJA_INLINE static bool eval(Data const& data) { return L::eval(data) != R::eval(data); } }; - /*! * A logical OR expression * */ -template -struct Or { +template +struct Or +{ - template - RAJA_HOST_DEVICE RAJA_INLINE static bool eval(Data const &data) + template + RAJA_HOST_DEVICE RAJA_INLINE static bool eval(Data const& data) { return L::eval(data) || R::eval(data); } }; - /*! * A logical AND expression * */ -template -struct And { +template +struct And +{ - template - RAJA_HOST_DEVICE RAJA_INLINE static bool eval(Data const &data) + template + RAJA_HOST_DEVICE RAJA_INLINE static bool eval(Data const& data) { return L::eval(data) && R::eval(data); } }; - /*! * A less than expression * */ -template -struct LessThan { +template +struct LessThan +{ - template - RAJA_HOST_DEVICE RAJA_INLINE static bool eval(Data const &data) + template + RAJA_HOST_DEVICE RAJA_INLINE static bool eval(Data const& data) { return L::eval(data) < R::eval(data); } }; - /*! * A less or equals than expression * */ -template -struct LessThanEq { +template +struct LessThanEq +{ - template - RAJA_HOST_DEVICE RAJA_INLINE static bool eval(Data const &data) + template + RAJA_HOST_DEVICE RAJA_INLINE static bool eval(Data const& data) { return L::eval(data) <= R::eval(data); } }; - /*! * A greater than expression * */ -template -struct GreaterThan { +template +struct GreaterThan +{ - template - RAJA_HOST_DEVICE RAJA_INLINE static bool eval(Data const &data) + template + RAJA_HOST_DEVICE RAJA_INLINE static bool eval(Data const& data) { return L::eval(data) > R::eval(data); } }; - /*! * A greater or equals than expression * */ -template -struct GreaterThanEq { +template +struct GreaterThanEq +{ - template - RAJA_HOST_DEVICE RAJA_INLINE static bool eval(Data const &data) + template + RAJA_HOST_DEVICE RAJA_INLINE static bool eval(Data const& data) { return L::eval(data) >= R::eval(data); } }; - /*! * A negation expression * */ -template -struct Not { +template +struct Not +{ - template - RAJA_HOST_DEVICE RAJA_INLINE static bool eval(Data const &data) + template + RAJA_HOST_DEVICE RAJA_INLINE static bool eval(Data const& data) { return !(L::eval(data)); } @@ -195,15 +197,17 @@ namespace internal { -template -struct StatementExecutor, Types> { +template +struct StatementExecutor, Types> +{ - template - static RAJA_INLINE void exec(Data &&data) + template + static RAJA_INLINE void exec(Data&& data) { - if (Condition::eval(data)) { + if (Condition::eval(data)) + { execute_statement_list, Types>( std::forward(data)); } diff --git a/include/RAJA/pattern/kernel/For.hpp b/include/RAJA/pattern/kernel/For.hpp index 539c451673..a4684236f4 100644 --- a/include/RAJA/pattern/kernel/For.hpp +++ b/include/RAJA/pattern/kernel/For.hpp @@ -37,12 +37,13 @@ namespace statement * Assigns the loop iterate to argument ArgumentId * */ -template +template struct For : public internal::ForList, public internal::ForTraitBase, - public internal::Statement { + public internal::Statement +{ // TODO: add static_assert for valid policy in Pol using execution_policy_t = ExecPolicy; @@ -59,14 +60,18 @@ namespace internal * Assigns the loop index to offset ArgumentId * */ -template -struct ForWrapper : public GenericWrapper { +template +struct ForWrapper : public GenericWrapper +{ using Base = GenericWrapper; using Base::Base; using privatizer = NestedPrivatizer; - template + template RAJA_INLINE void operator()(InIndexType i) { Base::data.template assign_offset(i); @@ -74,22 +79,23 @@ struct ForWrapper : public GenericWrapper { } }; - /*! * A generic RAJA::kernel forall_impl executor for statement::For * * */ -template +template struct StatementExecutor< - statement::For, Types> { + statement::For, + Types> +{ - template - static RAJA_INLINE void exec(Data &&data) + template + static RAJA_INLINE void exec(Data&& data) { // Set the argument type for this loop @@ -98,12 +104,13 @@ struct StatementExecutor< // Create a wrapper, just in case forall_impl needs to thread_privatize ForWrapper for_wrapper(data); - auto len = segment_length(data); + auto len = segment_length(data); using len_t = decltype(len); auto r = data.res; - forall_impl(r, ExecPolicy{}, TypedRangeSegment(0, len), for_wrapper, RAJA::expt::get_empty_forall_param_pack()); + forall_impl(r, ExecPolicy {}, TypedRangeSegment(0, len), for_wrapper, + RAJA::expt::get_empty_forall_param_pack()); } }; @@ -112,15 +119,14 @@ struct StatementExecutor< * * */ -template -struct StatementExecutor< - statement::For, Types> { +template +struct StatementExecutor, + Types> +{ - template - static RAJA_INLINE void exec(Data &&data) + template + static RAJA_INLINE void exec(Data&& data) { // Set the argument type for this loop @@ -129,12 +135,13 @@ struct StatementExecutor< // Create a wrapper, just in case forall_impl needs to thread_privatize ForWrapper for_wrapper(data); - auto len = segment_length(data); + auto len = segment_length(data); using len_t = decltype(len); RAJA_EXTRACT_BED_IT(TypedRangeSegment(0, len)); - for (decltype(distance_it) i = 0; i < distance_it; ++i) { + for (decltype(distance_it) i = 0; i < distance_it; ++i) + { for_wrapper(*(begin_it + i)); } } diff --git a/include/RAJA/pattern/kernel/ForICount.hpp b/include/RAJA/pattern/kernel/ForICount.hpp index 18515c7f59..3276c6e0e8 100644 --- a/include/RAJA/pattern/kernel/ForICount.hpp +++ b/include/RAJA/pattern/kernel/ForICount.hpp @@ -39,13 +39,14 @@ namespace statement * Assigns the loop index to param ParamId * */ -template +template struct ForICount : public internal::ForList, - public internal::ForTraitBase, - public internal::Statement { + public internal::ForTraitBase, + public internal::Statement +{ static_assert(std::is_base_of::value, "Inappropriate ParamId, ParamId must be of type " @@ -64,15 +65,19 @@ namespace internal * Assigns the loop index to offset ArgumentId * Assigns the loop index to param ParamId */ -template -struct ForICountWrapper : public GenericWrapper { +template +struct ForICountWrapper : public GenericWrapper +{ using Base = GenericWrapper; using Base::Base; using privatizer = NestedPrivatizer; - template + template RAJA_INLINE void operator()(InIndexType i) { Base::data.template assign_offset(i); @@ -81,38 +86,40 @@ struct ForICountWrapper : public GenericWrapper { } }; - /*! * A generic RAJA::kernel forall_impl executor for statement::ForICount * * */ -template +template struct StatementExecutor< - statement::ForICount, Types> { + statement::ForICount, + Types> +{ - template - static RAJA_INLINE void exec(Data &&data) + template + static RAJA_INLINE void exec(Data&& data) { // Set the argument type for this loop using NewTypes = setSegmentTypeFromData; // Create a wrapper, just in case forall_impl needs to thread_privatize - ForICountWrapper for_wrapper(data); + ForICountWrapper + for_wrapper(data); - auto len = segment_length(data); + auto len = segment_length(data); using len_t = decltype(len); auto r = resources::get_resource::type::get_default(); - forall_impl(r, ExecPolicy{}, TypedRangeSegment(0, len), for_wrapper, RAJA::expt::get_empty_forall_param_pack()); + forall_impl(r, ExecPolicy {}, TypedRangeSegment(0, len), for_wrapper, + RAJA::expt::get_empty_forall_param_pack()); } }; diff --git a/include/RAJA/pattern/kernel/Hyperplane.hpp b/include/RAJA/pattern/kernel/Hyperplane.hpp index 955afcecc0..41e43333a9 100644 --- a/include/RAJA/pattern/kernel/Hyperplane.hpp +++ b/include/RAJA/pattern/kernel/Hyperplane.hpp @@ -76,15 +76,13 @@ namespace statement * }); * */ -template -struct Hyperplane - : public internal::Statement { -}; +template +struct Hyperplane : public internal::Statement +{}; } // end namespace statement @@ -92,27 +90,27 @@ namespace internal { -template -struct HyperplaneInner - : public internal::Statement { -}; - +template +struct HyperplaneInner : public internal::Statement +{}; -template +template struct StatementExecutor, ExecPolicy, - EnclosedStmts...>, Types> { + EnclosedStmts...>, + Types> +{ - template - static RAJA_INLINE void exec(Data &data) + template + static RAJA_INLINE void exec(Data& data) { // get type of Hp arguments index @@ -126,8 +124,7 @@ struct StatementExecutor, + ExecPolicy, ArgList, HyperplaneInner, EnclosedStmts...>>; // Create a For-loop wrapper for the outer loop @@ -135,9 +132,9 @@ struct StatementExecutor(data) + - foldl(RAJA::operators::plus(), - segment_length(data)...); + idx_t hp_len = + segment_length(data) + + foldl(RAJA::operators::plus(), segment_length(data)...); /* Execute the outer loop over hyperplanes * @@ -146,40 +143,40 @@ struct StatementExecutor::type::get_default(); - forall_impl(r, HpExecPolicy{}, - TypedRangeSegment(0, hp_len), - outer_wrapper, - RAJA::expt::get_empty_forall_param_pack()); + forall_impl(r, HpExecPolicy {}, TypedRangeSegment(0, hp_len), + outer_wrapper, RAJA::expt::get_empty_forall_param_pack()); } }; - -template +template struct StatementExecutor< - HyperplaneInner, EnclosedStmts...>, Types> { + HyperplaneInner, EnclosedStmts...>, + Types> +{ - template - static RAJA_INLINE void exec(Data &data) + template + static RAJA_INLINE void exec(Data& data) { // get h value - auto h = camp::get(data.offset_tuple); + auto h = camp::get(data.offset_tuple); using idx_t = decltype(h); // compute actual iterate for HpArgumentId // as: i0 = h - (i1 + i2 + i3 + ...) idx_t i = h - foldl(RAJA::operators::plus(), - camp::get(data.offset_tuple)...); + camp::get(data.offset_tuple)...); // get length of Hp indexed argument auto len = segment_length(data); // check bounds - if (i >= 0 && i < len) { + if (i >= 0 && i < len) + { // store in tuple data.template assign_offset(i); diff --git a/include/RAJA/pattern/kernel/InitLocalMem.hpp b/include/RAJA/pattern/kernel/InitLocalMem.hpp index 21d9e3cd2a..92ca93579c 100644 --- a/include/RAJA/pattern/kernel/InitLocalMem.hpp +++ b/include/RAJA/pattern/kernel/InitLocalMem.hpp @@ -26,10 +26,9 @@ namespace RAJA { -//Policies for RAJA local arrays +// Policies for RAJA local arrays struct cpu_tile_mem; - namespace statement { @@ -44,13 +43,15 @@ namespace statement * Will intialize the 0th array in the param tuple */ template -struct InitLocalMem : public internal::Statement { -}; +struct InitLocalMem : public internal::Statement +{}; -//Policy Specialization +// Policy Specialization template -struct InitLocalMem, EnclosedStmts...> : public internal::Statement { -}; +struct InitLocalMem, + EnclosedStmts...> : public internal::Statement +{}; } // end namespace statement @@ -58,28 +59,33 @@ struct InitLocalMem, EnclosedStmts namespace internal { -//Statement executor to initalize RAJA local array +// Statement executor to initalize RAJA local array template -struct StatementExecutor, EnclosedStmts...>, Types>{ - - //Execute statement list +struct StatementExecutor, + EnclosedStmts...>, + Types> +{ + + // Execute statement list template - static void RAJA_INLINE exec_expanded(Data && data) + static void RAJA_INLINE exec_expanded(Data&& data) { execute_statement_list, Types>(data); } - - //Intialize local array - //Identifies type + number of elements needed + + // Intialize local array + // Identifies type + number of elements needed template - static void RAJA_INLINE exec_expanded(Data && data) + static void RAJA_INLINE exec_expanded(Data&& data) { - using varType = typename camp::tuple_element_t::param_tuple_t>::value_type; + using varType = typename camp::tuple_element_t< + Pos, typename camp::decay::param_tuple_t>::value_type; // Initialize memory #ifdef RAJA_COMPILER_MSVC // MSVC doesn't like taking a pointer to stack allocated data?!?! - varType *ptr = new varType[camp::get(data.param_tuple).size()]; + varType* ptr = new varType[camp::get(data.param_tuple).size()]; camp::get(data.param_tuple).set_data(ptr); #else varType Array[camp::get(data.param_tuple).size()]; @@ -95,16 +101,13 @@ struct StatementExecutor - static RAJA_INLINE void exec(Data &&data) + static RAJA_INLINE void exec(Data&& data) { - //Initalize local arrays + execute statements + cleanup + // Initalize local arrays + execute statements + cleanup exec_expanded(data); } - }; diff --git a/include/RAJA/pattern/kernel/Lambda.hpp b/include/RAJA/pattern/kernel/Lambda.hpp index 29d41b431e..4cbf67f72a 100644 --- a/include/RAJA/pattern/kernel/Lambda.hpp +++ b/include/RAJA/pattern/kernel/Lambda.hpp @@ -49,25 +49,24 @@ struct lambda_arg_offset_t template struct lambda_arg_value_t { - using type = T; + using type = T; }; template struct LambdaArg { - static constexpr camp::idx_t value = V; + static constexpr camp::idx_t value = V; }; -} - - +} // namespace internal /*! * Used in RAJA::statement::Lambda to specify that one or more segment values * should be passed into the lambda as an argument */ -template -using Segs = camp::list...>; +template +using Segs = + camp::list...>; /*! * Used in RAJA::statement::Lambda to specify that one or more segment offsets @@ -79,16 +78,18 @@ using Segs = camp::list... * In the case of tiling (with Tile) the offset is w.r.t. the beginning of the * current tile. */ -template -using Offsets = camp::list...>; +template +using Offsets = + camp::list...>; /*! * Used in RAJA::statement::Lambda to specify that one or more parameters that * should be passed into the lambda as an argument. */ -template -using Params = camp::list...>; +template +using Params = + camp::list...>; /*! * Used in RAJA::statement::Lambda to specify that one or more constant values @@ -103,9 +104,9 @@ using Params = camp::list> * invokes: lambda0( (double)3, (double) 4 ) */ -template -using ValuesT = camp::list, values>...>; - +template +using ValuesT = + camp::list, values>...>; namespace statement { @@ -119,8 +120,9 @@ namespace statement * RAJA::kernel(make_tuple{s0, s1, s2}, lambda0, lambda1); * */ -template -struct Lambda : internal::Statement { +template +struct Lambda : internal::Statement +{ static const camp::idx_t loop_body_index = BodyIdx; }; @@ -130,13 +132,6 @@ namespace internal { - - - - - - - /* * Helper that extracts a segment value for a lambda argument * @@ -150,22 +145,18 @@ template struct LambdaSegExtractor { - static_assert(!std::is_same::value, + static_assert( + !std::is_same::value, "Segment not assigned, but used in Lambda with Segs<> argument"); template - RAJA_HOST_DEVICE - RAJA_INLINE - constexpr - static SegmentType extract(Data &&data) + RAJA_HOST_DEVICE RAJA_INLINE constexpr static SegmentType extract(Data&& data) { - return SegmentType(camp::get(data.segment_tuple).begin()[camp::get(data.offset_tuple)]); + return SegmentType(camp::get(data.segment_tuple) + .begin()[camp::get(data.offset_tuple)]); } - }; - - /* * Helper that extracts a segment value for a lambda argument * @@ -179,22 +170,18 @@ template struct LambdaOffsetExtractor { - static_assert(!std::is_same::value, + static_assert( + !std::is_same::value, "Segment not assigned, but used in Lambda with Offsets<> argument"); template - RAJA_HOST_DEVICE - RAJA_INLINE - constexpr - static OffsetType extract(Data &&data) + RAJA_HOST_DEVICE RAJA_INLINE constexpr static OffsetType extract(Data&& data) { return OffsetType(camp::get(data.offset_tuple)); } - }; - /* * Helper that provides first level of argument extraction * This acts as a switchboard between Segs, Offsets, and Params @@ -205,25 +192,22 @@ struct LambdaOffsetExtractor template struct LambdaArgSwitchboard; - template struct LambdaArgSwitchboard> { using OffsetType = camp::at_v; - static_assert(!std::is_same::value, + static_assert( + !std::is_same::value, "Offset not assigned, but used in Lambda with Offsets<> argument"); template - RAJA_HOST_DEVICE - RAJA_INLINE - constexpr - static OffsetType extract(Data &&data) + RAJA_HOST_DEVICE RAJA_INLINE constexpr static OffsetType extract(Data&& data) { - return LambdaOffsetExtractor::extract(std::forward(data)); + return LambdaOffsetExtractor::extract( + std::forward(data)); } - }; template @@ -232,110 +216,108 @@ struct LambdaArgSwitchboard> using SegmentType = camp::at_v; - static_assert(!std::is_same::value, + static_assert( + !std::is_same::value, "Segment not assigned, but used in Lambda with Segs<> argument"); template - RAJA_HOST_DEVICE - RAJA_INLINE - constexpr - static SegmentType extract(Data &&data) + RAJA_HOST_DEVICE RAJA_INLINE constexpr static SegmentType extract(Data&& data) { - return LambdaSegExtractor::extract(std::forward(data)); + return LambdaSegExtractor::extract( + std::forward(data)); } - }; template struct LambdaArgSwitchboard> { template - RAJA_HOST_DEVICE - RAJA_INLINE - constexpr - static auto extract(Data &&data)-> - typename std::add_lvalue_reference::param_tuple_t>>::type + RAJA_HOST_DEVICE RAJA_INLINE constexpr static auto extract(Data&& data) -> + typename std::add_lvalue_reference::param_tuple_t>>::type { return camp::get(data.param_tuple); } }; - template struct LambdaArgSwitchboard, value>> { template - RAJA_HOST_DEVICE - RAJA_INLINE - constexpr - static T extract(Data &&) + RAJA_HOST_DEVICE RAJA_INLINE constexpr static T extract(Data&&) { return T(value); } }; - - RAJA_SUPPRESS_HD_WARN -template -RAJA_INLINE RAJA_HOST_DEVICE void invoke_lambda_with_args(Data &&data, - camp::list const &) +template +RAJA_INLINE RAJA_HOST_DEVICE void invoke_lambda_with_args( + Data&& data, + camp::list const&) { camp::get(data.bodies)( LambdaArgSwitchboard::extract(data)...); } - - - /*! * A RAJA::kernel statement that invokes a lambda function * with user specified arguments. */ -template -struct StatementExecutor, Types> { +template +struct StatementExecutor, Types> +{ - template - static RAJA_INLINE RAJA_HOST_DEVICE void exec(Data &&data) + template + static RAJA_INLINE RAJA_HOST_DEVICE void exec(Data&& data) { - //Convert SegList, ParamList into Seg, Param types, and store in a list + // Convert SegList, ParamList into Seg, Param types, and store in a list using targList = typename camp::flatten>::type; - invoke_lambda_with_args(std::forward(data), targList{}); + invoke_lambda_with_args(std::forward(data), + targList {}); } }; - - -template -RAJA_INLINE RAJA_HOST_DEVICE void invoke_lambda(Data &&data, camp::idx_seq const &, camp::idx_seq const &) +template +RAJA_INLINE RAJA_HOST_DEVICE void invoke_lambda(Data&& data, + camp::idx_seq const&, + camp::idx_seq const&) { - using AllSegs = Segs; + using AllSegs = Segs; using AllParams = Params; // invoke the expanded Lambda executor, passing in all segments and params - StatementExecutor, Types>::exec(std::forward(data)); + StatementExecutor, + Types>::exec(std::forward(data)); } +template +struct StatementExecutor, Types> +{ -template -struct StatementExecutor, Types> { - - template - static RAJA_INLINE RAJA_HOST_DEVICE void exec(Data &&data) + template + static RAJA_INLINE RAJA_HOST_DEVICE void exec(Data&& data) { - using Data_t = camp::decay; + using Data_t = camp::decay; using offset_tuple_t = typename Data_t::offset_tuple_t; - using param_tuple_t = typename Data_t::param_tuple_t; + using param_tuple_t = typename Data_t::param_tuple_t; invoke_lambda( std::forward(data), - camp::make_idx_seq_t::value>{}, - camp::make_idx_seq_t::value>{}); - + camp::make_idx_seq_t::value> {}, + camp::make_idx_seq_t::value> {}); } }; diff --git a/include/RAJA/pattern/kernel/Param.hpp b/include/RAJA/pattern/kernel/Param.hpp index 8e870ebe15..49e8de5710 100644 --- a/include/RAJA/pattern/kernel/Param.hpp +++ b/include/RAJA/pattern/kernel/Param.hpp @@ -31,10 +31,10 @@ namespace RAJA namespace internal { -struct ParamBase { -}; +struct ParamBase +{}; -}// end namespace internal +} // end namespace internal namespace statement { @@ -46,13 +46,14 @@ namespace statement * This allows run-time values to affect the control logic within * RAJA::kernel execution policies. */ -template -struct Param : public internal::ParamBase { +template +struct Param : public internal::ParamBase +{ constexpr static camp::idx_t param_idx = ParamId; - template - RAJA_HOST_DEVICE RAJA_INLINE static auto eval(Data const &data) + template + RAJA_HOST_DEVICE RAJA_INLINE static auto eval(Data const& data) -> decltype(camp::get(data.param_tuple)) { return camp::get(data.param_tuple); diff --git a/include/RAJA/pattern/kernel/Reduce.hpp b/include/RAJA/pattern/kernel/Reduce.hpp index 4de4922ea3..e2ee22cccb 100644 --- a/include/RAJA/pattern/kernel/Reduce.hpp +++ b/include/RAJA/pattern/kernel/Reduce.hpp @@ -38,11 +38,13 @@ namespace statement * the enclosed statements on the thread which contains the reduced value. * */ -template class ReduceOperator, - typename ParamId, - typename... EnclosedStmts> -struct Reduce : public internal::Statement { +template + class ReduceOperator, + typename ParamId, + typename... EnclosedStmts> +struct Reduce : public internal::Statement +{ static_assert(std::is_base_of::value, "Inappropriate ParamId, ParamId must be of type " diff --git a/include/RAJA/pattern/kernel/Region.hpp b/include/RAJA/pattern/kernel/Region.hpp index 82b79ae775..9f15e17963 100644 --- a/include/RAJA/pattern/kernel/Region.hpp +++ b/include/RAJA/pattern/kernel/Region.hpp @@ -31,8 +31,8 @@ namespace statement { template -struct Region : public internal::Statement { -}; +struct Region : public internal::Statement +{}; } // end namespace statement @@ -40,23 +40,24 @@ struct Region : public internal::Statement { namespace internal { -//Statement executor to create a region within kernel +// Statement executor to create a region within kernel -//Note: RAJA region's lambda must capture by reference otherwise -//internal function calls are undefined. +// Note: RAJA region's lambda must capture by reference otherwise +// internal function calls are undefined. template -struct StatementExecutor, Types> { - -template -static RAJA_INLINE void exec(Data &&data) +struct StatementExecutor, + Types> { - RAJA::region([&]() { + template + static RAJA_INLINE void exec(Data&& data) + { + + RAJA::region([&]() { using data_t = camp::decay; execute_statement_list, Types>(data_t(data)); }); -} - + } }; diff --git a/include/RAJA/pattern/kernel/Tile.hpp b/include/RAJA/pattern/kernel/Tile.hpp index 43f72e0545..a65d6b326d 100644 --- a/include/RAJA/pattern/kernel/Tile.hpp +++ b/include/RAJA/pattern/kernel/Tile.hpp @@ -34,14 +34,14 @@ namespace RAJA { -struct TileSize { +struct TileSize +{ const camp::idx_t size; RAJA_HOST_DEVICE + RAJA_INLINE - constexpr TileSize(camp::idx_t size_) : size{size_} - { - } + constexpr TileSize(camp::idx_t size_) : size {size_} {} }; namespace statement @@ -52,11 +52,12 @@ namespace statement * A RAJA::kernel statement that implements a tiling (or blocking) loop. * */ -template -struct Tile : public internal::Statement { +template +struct Tile : public internal::Statement +{ using tile_policy_t = TilePolicy; using exec_policy_t = ExecPolicy; }; @@ -64,18 +65,18 @@ struct Tile : public internal::Statement { } // end namespace statement ///! tag for a tiling loop -template -struct tile_fixed { +template +struct tile_fixed +{ static constexpr camp::idx_t chunk_size = chunk_size_; }; -template -struct tile_dynamic { +template +struct tile_dynamic +{ static constexpr camp::idx_t id = ArgumentId; }; - - namespace internal { @@ -84,14 +85,18 @@ namespace internal * Assigns the tile segment to segment ArgumentId * */ -template -struct TileWrapper : public GenericWrapper { +template +struct TileWrapper : public GenericWrapper +{ using Base = GenericWrapper; using Base::Base; using privatizer = NestedPrivatizer; - template + template RAJA_INLINE void operator()(InSegmentIndexType si) { // Assign the tile's segment to the tuple @@ -102,9 +107,9 @@ struct TileWrapper : public GenericWrapper { } }; - -template -struct IterableTiler { +template +struct IterableTiler +{ using value_type = camp::decay; struct iterate @@ -120,46 +125,48 @@ struct IterableTiler { const Index_type block_id; public: - using value_type = iterate; - using difference_type = camp::idx_t; - using pointer = value_type *; - using reference = value_type &; + using value_type = iterate; + using difference_type = camp::idx_t; + using pointer = value_type*; + using reference = value_type&; using iterator_category = std::random_access_iterator_tag; RAJA_HOST_DEVICE + RAJA_INLINE - constexpr iterator(IterableTiler const &itiler_, Index_type block_id_) - : itiler{itiler_}, block_id{block_id_} - { - } + constexpr iterator(IterableTiler const& itiler_, Index_type block_id_) + : itiler {itiler_}, + block_id {block_id_} + {} RAJA_HOST_DEVICE + RAJA_INLINE value_type operator*() { auto start = block_id * itiler.block_size; - return iterate{itiler.it.slice(start, itiler.block_size), block_id}; + return iterate {itiler.it.slice(start, itiler.block_size), block_id}; } RAJA_HOST_DEVICE - RAJA_INLINE difference_type operator-(const iterator &rhs) const + RAJA_INLINE difference_type operator-(const iterator& rhs) const { return static_cast(block_id) - static_cast(rhs.block_id); } RAJA_HOST_DEVICE - RAJA_INLINE iterator operator-(const difference_type &rhs) const + RAJA_INLINE iterator operator-(const difference_type& rhs) const { return iterator(itiler, block_id - rhs); } RAJA_HOST_DEVICE - RAJA_INLINE iterator operator+(const difference_type &rhs) const + RAJA_INLINE iterator operator+(const difference_type& rhs) const { - return iterator(itiler, - block_id + rhs >= itiler.num_blocks ? itiler.num_blocks - : block_id + rhs); + return iterator(itiler, block_id + rhs >= itiler.num_blocks + ? itiler.num_blocks + : block_id + rhs); } RAJA_HOST_DEVICE @@ -169,39 +176,44 @@ struct IterableTiler { } RAJA_HOST_DEVICE - RAJA_INLINE bool operator!=(const iterator &rhs) const + RAJA_INLINE bool operator!=(const iterator& rhs) const { return block_id != rhs.block_id; } RAJA_HOST_DEVICE - RAJA_INLINE bool operator<(const iterator &rhs) const + RAJA_INLINE bool operator<(const iterator& rhs) const { return block_id < rhs.block_id; } }; RAJA_HOST_DEVICE + RAJA_INLINE - IterableTiler(const Iterable &it_, camp::idx_t block_size_) - : it{it_}, block_size{block_size_} + IterableTiler(const Iterable& it_, camp::idx_t block_size_) + : it {it_}, + block_size {block_size_} { using std::begin; using std::distance; using std::end; - dist = it.end() - it.begin(); // distance(begin(it), end(it)); + dist = it.end() - it.begin(); // distance(begin(it), end(it)); num_blocks = dist / block_size; // if (dist % block_size) num_blocks += 1; - if (dist - num_blocks * block_size > 0) { + if (dist - num_blocks * block_size > 0) + { num_blocks += 1; } } RAJA_HOST_DEVICE + RAJA_INLINE iterator begin() const { return iterator(*this, 0); } RAJA_HOST_DEVICE + RAJA_INLINE iterator end() const { return iterator(*this, num_blocks); } @@ -216,19 +228,21 @@ struct IterableTiler { * * */ -template +template struct StatementExecutor< - statement::Tile, EPol, EnclosedStmts...>, Types> { + statement::Tile, EPol, EnclosedStmts...>, + Types> +{ - template - static RAJA_INLINE void exec(Data &data) + template + static RAJA_INLINE void exec(Data& data) { // Get the segment we are going to tile - auto const &segment = camp::get(data.segment_tuple); + auto const& segment = camp::get(data.segment_tuple); // Get the tiling policies chunk size auto chunk_size = tile_fixed::chunk_size; @@ -238,12 +252,12 @@ struct StatementExecutor< IterableTiler tiled_iterable(segment, chunk_size); // Wrap in case forall_impl needs to thread_privatize - TileWrapper tile_wrapper(data); + TileWrapper tile_wrapper(data); // Loop over tiles, executing enclosed statement list auto r = resources::get_resource::type::get_default(); - forall_impl(r, EPol{}, tiled_iterable, tile_wrapper, RAJA::expt::get_empty_forall_param_pack()); + forall_impl(r, EPol {}, tiled_iterable, tile_wrapper, + RAJA::expt::get_empty_forall_param_pack()); // Set range back to original values camp::get(data.segment_tuple) = tiled_iterable.it; @@ -251,34 +265,38 @@ struct StatementExecutor< }; template + typename EPol, + typename... EnclosedStmts, + typename Types> struct StatementExecutor< - statement::Tile, EPol, EnclosedStmts...>, Types> { + statement:: + Tile, EPol, EnclosedStmts...>, + Types> +{ - template - static RAJA_INLINE void exec(Data &data) + template + static RAJA_INLINE void exec(Data& data) { // Get the segment we are going to tile - auto const &segment = camp::get(data.segment_tuple); + auto const& segment = camp::get(data.segment_tuple); // Get the tiling policies chunk size auto chunk_size = camp::get(data.param_tuple); - static_assert(camp::concepts::metalib::is_same::value, - "Extracted parameter must be of type TileSize."); + static_assert( + camp::concepts::metalib::is_same::value, + "Extracted parameter must be of type TileSize."); // Create a tile iterator IterableTiler tiled_iterable(segment, chunk_size.size); // Wrap in case forall_impl needs to thread_privatize - TileWrapper tile_wrapper(data); + TileWrapper tile_wrapper(data); // Loop over tiles, executing enclosed statement list auto r = resources::get_resource::type::get_default(); - forall_impl(r, EPol{}, tiled_iterable, tile_wrapper, RAJA::expt::get_empty_forall_param_pack()); - + forall_impl(r, EPol {}, tiled_iterable, tile_wrapper, + RAJA::expt::get_empty_forall_param_pack()); + // Set range back to original values camp::get(data.segment_tuple) = tiled_iterable.it; } diff --git a/include/RAJA/pattern/kernel/TileTCount.hpp b/include/RAJA/pattern/kernel/TileTCount.hpp index 2653e992c7..293d1730a2 100644 --- a/include/RAJA/pattern/kernel/TileTCount.hpp +++ b/include/RAJA/pattern/kernel/TileTCount.hpp @@ -42,12 +42,13 @@ namespace statement * Assigns the tile index to param ParamId * */ -template -struct TileTCount : public internal::Statement { +template +struct TileTCount : public internal::Statement +{ static_assert(std::is_base_of::value, "Inappropriate ParamId, ParamId must be of type " "RAJA::Statement::Param< # >"); @@ -66,50 +67,54 @@ namespace internal * Assigns the tile segment to segment ArgumentId * Assigns the tile index to param ParamId */ -template -struct TileTCountWrapper : public GenericWrapper { +template +struct TileTCountWrapper : public GenericWrapper +{ using Base = GenericWrapper; using Base::Base; using privatizer = NestedPrivatizer; - template + template RAJA_INLINE void operator()(InSegmentIndexType si) { // Assign the tile's segment to the tuple camp::get(Base::data.segment_tuple) = si.s; - + // Assign the tile's index Base::data.template assign_param(si.i); - + // Execute enclosed statements Base::exec(); } }; - - /*! * A generic RAJA::kernel forall_impl executor for statement::TileTCount * * */ -template +template struct StatementExecutor< - statement::TileTCount, Types> { + statement::TileTCount, + Types> +{ - template - static RAJA_INLINE void exec(Data &data) + template + static RAJA_INLINE void exec(Data& data) { // Get the segment we are going to tile - auto const &segment = camp::get(data.segment_tuple); + auto const& segment = camp::get(data.segment_tuple); // Get the tiling policies chunk size auto chunk_size = TPol::chunk_size; @@ -119,12 +124,13 @@ struct StatementExecutor< IterableTiler tiled_iterable(segment, chunk_size); // Wrap in case forall_impl needs to thread_privatize - TileTCountWrapper tile_wrapper(data); + TileTCountWrapper + tile_wrapper(data); // Loop over tiles, executing enclosed statement list auto r = resources::get_resource::type::get_default(); - forall_impl(r, EPol{}, tiled_iterable, tile_wrapper, RAJA::expt::get_empty_forall_param_pack()); + forall_impl(r, EPol {}, tiled_iterable, tile_wrapper, + RAJA::expt::get_empty_forall_param_pack()); // Set range back to original values camp::get(data.segment_tuple) = tiled_iterable.it; diff --git a/include/RAJA/pattern/kernel/internal/LoopData.hpp b/include/RAJA/pattern/kernel/internal/LoopData.hpp index 9667a55538..5b57a01070 100644 --- a/include/RAJA/pattern/kernel/internal/LoopData.hpp +++ b/include/RAJA/pattern/kernel/internal/LoopData.hpp @@ -40,73 +40,71 @@ namespace internal { +// Universal base of all For wrappers for type traits +struct ForList +{}; +struct ForBase +{}; - // Universal base of all For wrappers for type traits - struct ForList { - }; - struct ForBase { - }; - struct CollapseBase { - }; - template - struct ForTraitBase : public ForBase { - constexpr static camp::idx_t index_val = ArgumentId; - using index = camp::num; - using index_type = camp::nil; // default to invalid type - using policy_type = Policy; - using type = ForTraitBase; // make camp::value compatible - }; - - +struct CollapseBase +{}; +template +struct ForTraitBase : public ForBase +{ + constexpr static camp::idx_t index_val = ArgumentId; + using index = camp::num; + using index_type = camp::nil; // default to invalid type + using policy_type = Policy; + using type = ForTraitBase; // make camp::value compatible +}; -template -struct iterable_difftype_getter { +template +struct iterable_difftype_getter +{ using type = typename std::iterator_traits< typename Iterator::iterator>::difference_type; }; -template +template using difftype_list_from_segments = typename camp::transform::type; -template +template using difftype_tuple_from_segments = typename camp::apply_l, difftype_list_from_segments>::type; - -template -struct iterable_value_type_getter { +template +struct iterable_value_type_getter +{ using type = typename std::iterator_traits::value_type; }; -template +template using value_type_list_from_segments = typename camp::transform::type; -template +template using index_tuple_from_segments = typename camp::apply_l, value_type_list_from_segments>::type; -template +template using index_types_from_segments = typename camp::apply_l, value_type_list_from_segments>::type; - - - -template -struct LoopData { +template +struct LoopData +{ using Self = LoopData; @@ -138,105 +136,97 @@ struct LoopData { using vector_sizes_t = tuple_of_n::value>; vector_sizes_t vector_sizes; - RAJA_INLINE RAJA_HOST_DEVICE constexpr - LoopData(SegmentTuple const &s, ParamTuple const &p, Resource r, Bodies const &... b) - : segment_tuple(s), param_tuple(p), res(r), bodies(b...) - { - } - constexpr LoopData(LoopData const &) = default; - constexpr LoopData(LoopData &&) = default; - - template - RAJA_HOST_DEVICE RAJA_INLINE void assign_offset(IndexT const &i) + RAJA_INLINE RAJA_HOST_DEVICE constexpr LoopData(SegmentTuple const& s, + ParamTuple const& p, + Resource r, + Bodies const&... b) + : segment_tuple(s), + param_tuple(p), + res(r), + bodies(b...) + {} + + constexpr LoopData(LoopData const&) = default; + constexpr LoopData(LoopData&&) = default; + + template + RAJA_HOST_DEVICE RAJA_INLINE void assign_offset(IndexT const& i) { camp::get(offset_tuple) = i; } - template - RAJA_HOST_DEVICE RAJA_INLINE void assign_param(IndexT const &i) + template + RAJA_HOST_DEVICE RAJA_INLINE void assign_param(IndexT const& i) { - using param_t = camp::at_v; + using param_t = + camp::at_v; camp::get(param_tuple) = param_t(i); } - template - RAJA_HOST_DEVICE RAJA_INLINE - auto get_param() -> - camp::at_v + template + RAJA_HOST_DEVICE RAJA_INLINE auto get_param() + -> camp::at_v { return camp::get(param_tuple); } - RAJA_HOST_DEVICE RAJA_INLINE - Resource get_resource() - { - return res; - } - - + RAJA_HOST_DEVICE RAJA_INLINE Resource get_resource() { return res; } }; +template +using segment_diff_type = typename std::iterator_traits< + typename camp::at_v::iterator>::difference_type; - - -template -using segment_diff_type = - typename std::iterator_traits< - typename camp::at_v::iterator>::difference_type; - - - - -template -RAJA_INLINE RAJA_HOST_DEVICE auto segment_length(Data const &data) -> - segment_diff_type +template +RAJA_INLINE RAJA_HOST_DEVICE auto segment_length(Data const& data) + -> segment_diff_type { return camp::get(data.segment_tuple).end() - camp::get(data.segment_tuple).begin(); } - - - -template -struct GenericWrapper : GenericWrapperBase { +template +struct GenericWrapper : GenericWrapperBase +{ using data_t = camp::decay; - data_t &data; + data_t& data; RAJA_INLINE - constexpr explicit GenericWrapper(data_t &d) : data{d} {} + constexpr explicit GenericWrapper(data_t& d) : data {d} {} RAJA_INLINE - void exec() { execute_statement_list, Types>(data); } + void exec() + { + execute_statement_list, Types>(data); + } }; - /*! * Convenience object used to create a thread-private LoopData object. */ -template -struct NestedPrivatizer { - using data_t = typename T::data_t; - using value_type = camp::decay; - using reference_type = value_type &; +template +struct NestedPrivatizer +{ + using data_t = typename T::data_t; + using value_type = camp::decay; + using reference_type = value_type&; data_t privatized_data; value_type privatized_wrapper; RAJA_INLINE - constexpr NestedPrivatizer(const T &o) - : privatized_data{o.data}, privatized_wrapper(privatized_data) - { - } + constexpr NestedPrivatizer(const T& o) + : privatized_data {o.data}, + privatized_wrapper(privatized_data) + {} RAJA_INLINE reference_type get_priv() { return privatized_wrapper; } }; - } // end namespace internal } // end namespace RAJA diff --git a/include/RAJA/pattern/kernel/internal/LoopTypes.hpp b/include/RAJA/pattern/kernel/internal/LoopTypes.hpp index 7f77df4214..3312e8a76a 100644 --- a/include/RAJA/pattern/kernel/internal/LoopTypes.hpp +++ b/include/RAJA/pattern/kernel/internal/LoopTypes.hpp @@ -22,70 +22,75 @@ #include "RAJA/pattern/kernel/internal/Template.hpp" #include "camp/camp.hpp" - namespace RAJA { namespace internal { -template +template struct LoopTypes; -template -struct LoopTypes, camp::list> { +template +struct LoopTypes, camp::list> +{ - using Self = LoopTypes, camp::list>; + using Self = + LoopTypes, camp::list>; static constexpr size_t s_num_segments = sizeof...(SegmentTypes); // This ensures that you don't double-loop over a segment within the same // loop nesting static_assert(s_num_segments == sizeof...(OffsetTypes), - "Number of segments and offsets must match"); + "Number of segments and offsets must match"); using segment_types_t = camp::list; - using offset_types_t = camp::list; + using offset_types_t = camp::list; }; - template -using makeInitialLoopTypes = - LoopTypes::value>, - list_of_n::value>>; +using makeInitialLoopTypes = LoopTypes< + list_of_n::value>, + list_of_n::value>>; template struct SetSegmentTypeHelper; -template +template struct SetSegmentTypeHelper> { - using segment_list = typename Types::segment_types_t; - using offset_list = typename Types::offset_types_t; - - static_assert(std::is_same, void>::value, - "Segment was already assigned: Probably looping over same segment in loop nest"); - - using type = LoopTypes< - camp::list>::type...>, - camp::list>::type...>>; - + using segment_list = typename Types::segment_types_t; + using offset_list = typename Types::offset_types_t; + + static_assert(std::is_same, void>::value, + "Segment was already assigned: Probably looping over same " + "segment in loop nest"); + + using type = LoopTypes< + camp::list< + typename std::conditional>::type...>, + camp::list< + typename std::conditional>::type...>>; }; - template -using setSegmentType = - typename SetSegmentTypeHelper>::type; +using setSegmentType = typename SetSegmentTypeHelper< + Types, + Segment, + T, + camp::make_idx_seq_t>::type; template -using setSegmentTypeFromData = - setSegmentType::index_types_t, Segment>>; +using setSegmentTypeFromData = setSegmentType< + Types, + Segment, + camp::at_v::index_types_t, Segment>>; } // end namespace internal diff --git a/include/RAJA/pattern/kernel/internal/Statement.hpp b/include/RAJA/pattern/kernel/internal/Statement.hpp index 48ca828a68..6748948ce0 100644 --- a/include/RAJA/pattern/kernel/internal/Statement.hpp +++ b/include/RAJA/pattern/kernel/internal/Statement.hpp @@ -28,25 +28,24 @@ namespace internal { - -template -struct Statement { - static_assert(std::is_same::value || sizeof...(EnclosedStmts) > 0, - "Executable statement with no enclosed statements, this is almost certainly a bug"); +template +struct Statement +{ + static_assert(std::is_same::value || + sizeof...(EnclosedStmts) > 0, + "Executable statement with no enclosed statements, this is " + "almost certainly a bug"); Statement() = delete; using enclosed_statements_t = StatementList; - using execution_policy_t = ExecPolicy; + using execution_policy_t = ExecPolicy; }; - - -template +template struct StatementExecutor; - } // end namespace internal } // end namespace RAJA diff --git a/include/RAJA/pattern/kernel/internal/StatementList.hpp b/include/RAJA/pattern/kernel/internal/StatementList.hpp index 5c0d71afb4..c9e005ca1e 100644 --- a/include/RAJA/pattern/kernel/internal/StatementList.hpp +++ b/include/RAJA/pattern/kernel/internal/StatementList.hpp @@ -31,27 +31,26 @@ namespace internal // forward decl -template +template struct StatementExecutor; - - -template +template using StatementList = camp::list; -template +template struct StatementListExecutor; +template +struct StatementListExecutor +{ -template -struct StatementListExecutor { - - template - static RAJA_INLINE void exec(Data &&data) + template + static RAJA_INLINE void exec(Data&& data) { // Get the statement we're going to execute @@ -61,35 +60,32 @@ struct StatementListExecutor { StatementExecutor::exec(std::forward(data)); // call our next statement - StatementListExecutor::exec( - std::forward(data)); + StatementListExecutor::exec(std::forward(data)); } }; - /* * termination case, a NOP. */ -template -struct StatementListExecutor { +template +struct StatementListExecutor +{ - template - static RAJA_INLINE void exec(Data &&) - { - } + template + static RAJA_INLINE void exec(Data&&) + {} }; - -template -RAJA_INLINE void execute_statement_list(Data &&data) +template +RAJA_INLINE void execute_statement_list(Data&& data) { StatementListExecutor<0, camp::size::value, StmtList, Types>::exec( std::forward(data)); } - } // end namespace internal } // end namespace RAJA diff --git a/include/RAJA/pattern/kernel/internal/Template.hpp b/include/RAJA/pattern/kernel/internal/Template.hpp index c750b95986..59ba6b8fbf 100644 --- a/include/RAJA/pattern/kernel/internal/Template.hpp +++ b/include/RAJA/pattern/kernel/internal/Template.hpp @@ -20,7 +20,6 @@ #include "camp/camp.hpp" - namespace RAJA { namespace internal @@ -30,32 +29,32 @@ namespace detail { // Helper class to convert a camp::idx_t into some type T // used in template expansion in ListOfNHelper -template +template struct SeqToType { using type = T; }; -template +template struct ListOfNHelper; -template -struct ListOfNHelper > +template +struct ListOfNHelper> { using type = camp::list::type...>; }; -template +template struct TupleOfNHelper; -template -struct TupleOfNHelper > +template +struct TupleOfNHelper> { using type = camp::tuple::type...>; }; -} // namespace detail +} // namespace detail /* * This creates a camp::list with N types, each one being T. @@ -63,8 +62,9 @@ struct TupleOfNHelper > * That is, list_of_n == camp::list * */ -template -using list_of_n = typename detail::ListOfNHelper>::type; +template +using list_of_n = + typename detail::ListOfNHelper>::type; /* @@ -73,9 +73,9 @@ using list_of_n = typename detail::ListOfNHelper>::ty * That is, tuple_of_n == camp::tuple * */ -template -using tuple_of_n = typename detail::TupleOfNHelper>::type; - +template +using tuple_of_n = + typename detail::TupleOfNHelper>::type; } // end namespace internal diff --git a/include/RAJA/pattern/launch/launch_core.hpp b/include/RAJA/pattern/launch/launch_core.hpp index f1d70aeacb..a05629dcd0 100644 --- a/include/RAJA/pattern/launch/launch_core.hpp +++ b/include/RAJA/pattern/launch/launch_core.hpp @@ -28,8 +28,8 @@ #include "camp/concepts.hpp" #include "camp/tuple.hpp" -//Odd dependecy with atomics is breaking CI builds -//#include "RAJA/util/View.hpp" +// Odd dependecy with atomics is breaking CI builds +// #include "RAJA/util/View.hpp" #if defined(RAJA_GPU_DEVICE_COMPILE_PASS_ACTIVE) && !defined(RAJA_ENABLE_SYCL) #define RAJA_TEAM_SHARED __shared__ @@ -41,95 +41,114 @@ namespace RAJA { // GPU or CPU threads available -//strongly type the ExecPlace (guards agaist errors) -enum struct ExecPlace : int { HOST, DEVICE, NUM_PLACES }; - -struct null_launch_t { +// strongly type the ExecPlace (guards agaist errors) +enum struct ExecPlace : int +{ + HOST, + DEVICE, + NUM_PLACES }; +struct null_launch_t +{}; + // Support for host, and device -template + > -struct LoopPolicy { +struct LoopPolicy +{ using host_policy_t = HOST_POLICY; #if defined(RAJA_GPU_ACTIVE) using device_policy_t = DEVICE_POLICY; #endif }; -template -struct LaunchPolicy { + > +struct LaunchPolicy +{ using host_policy_t = HOST_POLICY; #if defined(RAJA_GPU_ACTIVE) using device_policy_t = DEVICE_POLICY; #endif }; - -struct Teams { +struct Teams +{ int value[3]; RAJA_INLINE + RAJA_HOST_DEVICE - constexpr Teams() : value{1, 1, 1} {} + constexpr Teams() : value {1, 1, 1} {} RAJA_INLINE + RAJA_HOST_DEVICE - constexpr Teams(int i) : value{i, 1, 1} {} + constexpr Teams(int i) : value {i, 1, 1} {} RAJA_INLINE + RAJA_HOST_DEVICE - constexpr Teams(int i, int j) : value{i, j, 1} {} + constexpr Teams(int i, int j) : value {i, j, 1} {} RAJA_INLINE + RAJA_HOST_DEVICE - constexpr Teams(int i, int j, int k) : value{i, j, k} {} + constexpr Teams(int i, int j, int k) : value {i, j, k} {} }; -struct Threads { +struct Threads +{ int value[3]; RAJA_INLINE - RAJA_HOST_DEVICE - constexpr Threads() : value{1, 1, 1} {} + RAJA_HOST_DEVICE + constexpr Threads() : value {1, 1, 1} {} RAJA_INLINE + RAJA_HOST_DEVICE - constexpr Threads(int i) : value{i, 1, 1} {} + constexpr Threads(int i) : value {i, 1, 1} {} RAJA_INLINE + RAJA_HOST_DEVICE - constexpr Threads(int i, int j) : value{i, j, 1} {} + constexpr Threads(int i, int j) : value {i, j, 1} {} RAJA_INLINE + RAJA_HOST_DEVICE - constexpr Threads(int i, int j, int k) : value{i, j, k} {} + constexpr Threads(int i, int j, int k) : value {i, j, k} {} }; -struct Lanes { +struct Lanes +{ int value; RAJA_INLINE + RAJA_HOST_DEVICE constexpr Lanes() : value(0) {} RAJA_INLINE + RAJA_HOST_DEVICE constexpr Lanes(int i) : value(i) {} }; -struct LaunchParams { +struct LaunchParams +{ public: Teams teams; Threads threads; @@ -138,67 +157,74 @@ struct LaunchParams { RAJA_INLINE LaunchParams() = default; - LaunchParams(Teams in_teams, Threads in_threads, size_t in_shared_mem_size = 0) - : teams(in_teams), threads(in_threads), shared_mem_size(in_shared_mem_size) {}; + LaunchParams(Teams in_teams, + Threads in_threads, + size_t in_shared_mem_size = 0) + : teams(in_teams), + threads(in_threads), + shared_mem_size(in_shared_mem_size) {}; private: RAJA_HOST_DEVICE + RAJA_INLINE - Teams apply(Teams const &a) { return (teams = a); } + Teams apply(Teams const& a) { return (teams = a); } RAJA_HOST_DEVICE + RAJA_INLINE - Threads apply(Threads const &a) { return (threads = a); } + Threads apply(Threads const& a) { return (threads = a); } }; class LaunchContext { public: - - //Bump style allocator used to - //get memory from the pool + // Bump style allocator used to + // get memory from the pool size_t shared_mem_offset; - void *shared_mem_ptr; + void* shared_mem_ptr; #if defined(RAJA_ENABLE_SYCL) - mutable cl::sycl::nd_item<3> *itm; + mutable cl::sycl::nd_item<3>* itm; #endif RAJA_HOST_DEVICE LaunchContext() - : shared_mem_offset(0), shared_mem_ptr(nullptr) - { - } + : shared_mem_offset(0), + shared_mem_ptr(nullptr) + {} - //TODO handle alignment + // TODO handle alignment template RAJA_HOST_DEVICE T* getSharedMemory(size_t bytes) { - //Calculate offset in bytes with a char pointer - void* mem_ptr = static_cast(shared_mem_ptr) + shared_mem_offset; + // Calculate offset in bytes with a char pointer + void* mem_ptr = static_cast(shared_mem_ptr) + shared_mem_offset; - shared_mem_offset += bytes*sizeof(T); + shared_mem_offset += bytes * sizeof(T); - //convert to desired type + // convert to desired type return static_cast(mem_ptr); } /* //Odd dependecy with atomics is breaking CI builds - template - RAJA_HOST_DEVICE auto getSharedMemoryView(size_t bytes, arg idx, args... idxs) + template RAJA_HOST_DEVICE auto + getSharedMemoryView(size_t bytes, arg idx, args... idxs) { T * mem_ptr = &((T*) shared_mem_ptr)[shared_mem_offset]; shared_mem_offset += bytes*sizeof(T); - return RAJA::View>(mem_ptr, idx, idxs...); + return RAJA::View>(mem_ptr, idx, + idxs...); } */ RAJA_HOST_DEVICE void releaseSharedMemory() { - //On the cpu/gpu we want to restart the count + // On the cpu/gpu we want to restart the count shared_mem_offset = 0; } @@ -215,22 +241,27 @@ class LaunchContext } }; -template +template struct LaunchExecute; -//Policy based launch with support to new reducers... -template -void launch(LaunchParams const &launch_params, const char *kernel_name, ReduceParams&&... rest_of_launch_args) +// Policy based launch with support to new reducers... +template +void launch(LaunchParams const& launch_params, + const char* kernel_name, + ReduceParams&&... rest_of_launch_args) { - //Get reducers - auto reducers = expt::make_forall_param_pack(std::forward(rest_of_launch_args)...); + // Get reducers + auto reducers = expt::make_forall_param_pack( + std::forward(rest_of_launch_args)...); - auto&& launch_body = expt::get_lambda(std::forward(rest_of_launch_args)...); + auto&& launch_body = + expt::get_lambda(std::forward(rest_of_launch_args)...); - //Take the first policy as we assume the second policy is not user defined. - //We rely on the user to pair launch and loop policies correctly. - util::PluginContext context{util::make_context()}; + // Take the first policy as we assume the second policy is not user defined. + // We rely on the user to pair launch and loop policies correctly. + util::PluginContext context { + util::make_context()}; util::callPreCapturePlugins(context); using RAJA::util::trigger_updates_before; @@ -242,29 +273,35 @@ void launch(LaunchParams const &launch_params, const char *kernel_name, ReducePa using launch_t = LaunchExecute; - using Res = typename resources::get_resource::type; + using Res = typename resources::get_resource< + typename LAUNCH_POLICY::host_policy_t>::type; - launch_t::exec(Res::get_default(), launch_params, kernel_name, p_body, reducers); + launch_t::exec(Res::get_default(), launch_params, kernel_name, p_body, + reducers); util::callPostLaunchPlugins(context); } - -//Duplicate of code above on account that we need to support the case in which a kernel_name is not given -template -void launch(LaunchParams const &launch_params, ReduceParams&&... rest_of_launch_args) +// Duplicate of code above on account that we need to support the case in which +// a kernel_name is not given +template +void launch(LaunchParams const& launch_params, + ReduceParams&&... rest_of_launch_args) { - const char *kernel_name = nullptr; + const char* kernel_name = nullptr; - //Get reducers - auto reducers = expt::make_forall_param_pack(std::forward(rest_of_launch_args)...); + // Get reducers + auto reducers = expt::make_forall_param_pack( + std::forward(rest_of_launch_args)...); - auto&& launch_body = expt::get_lambda(std::forward(rest_of_launch_args)...); + auto&& launch_body = + expt::get_lambda(std::forward(rest_of_launch_args)...); - //Take the first policy as we assume the second policy is not user defined. - //We rely on the user to pair launch and loop policies correctly. - util::PluginContext context{util::make_context()}; + // Take the first policy as we assume the second policy is not user defined. + // We rely on the user to pair launch and loop policies correctly. + util::PluginContext context { + util::make_context()}; util::callPreCapturePlugins(context); using RAJA::util::trigger_updates_before; @@ -276,148 +313,201 @@ void launch(LaunchParams const &launch_params, ReduceParams&&... rest_of_launch_ using launch_t = LaunchExecute; - using Res = typename resources::get_resource::type; + using Res = typename resources::get_resource< + typename LAUNCH_POLICY::host_policy_t>::type; - launch_t::exec(Res::get_default(), launch_params, kernel_name, p_body, reducers); + launch_t::exec(Res::get_default(), launch_params, kernel_name, p_body, + reducers); util::callPostLaunchPlugins(context); } //================================================= -//Run time based policy launch +// Run time based policy launch //================================================= -template -void launch(ExecPlace place, LaunchParams const ¶ms, BODY const &body) +template +void launch(ExecPlace place, LaunchParams const& params, BODY const& body) { launch(place, params, nullptr, body); } -template -void launch(ExecPlace place, const LaunchParams ¶ms, const char *kernel_name, BODY const &body) +template +void launch(ExecPlace place, + const LaunchParams& params, + const char* kernel_name, + BODY const& body) { - //Forward to single policy launch API - simplifies testing of plugins - switch (place) { - case ExecPlace::HOST: { - using Res = typename resources::get_resource::type; - launch>(Res::get_default(), params, kernel_name, body); + // Forward to single policy launch API - simplifies testing of plugins + switch (place) + { + case ExecPlace::HOST: + { + using Res = typename resources::get_resource< + typename POLICY_LIST::host_policy_t>::type; + launch>( + Res::get_default(), params, kernel_name, body); break; } #if defined(RAJA_GPU_ACTIVE) - case ExecPlace::DEVICE: { - using Res = typename resources::get_resource::type; - launch>(Res::get_default(), params, kernel_name, body); + case ExecPlace::DEVICE: + { + using Res = typename resources::get_resource< + typename POLICY_LIST::device_policy_t>::type; + launch>( + Res::get_default(), params, kernel_name, body); break; } #endif default: RAJA_ABORT_OR_THROW("Unknown launch place or device is not enabled"); } - } -//Run-time API for new reducer interface -template -void launch(ExecPlace place, const LaunchParams &launch_params, const char *kernel_name, ReduceParams&&... rest_of_launch_args) +// Run-time API for new reducer interface +template +void launch(ExecPlace place, + const LaunchParams& launch_params, + const char* kernel_name, + ReduceParams&&... rest_of_launch_args) { - //Forward to single policy launch API - simplifies testing of plugins - switch (place) { - case ExecPlace::HOST: { - using Res = typename resources::get_resource::type; - launch> - (Res::get_default(), launch_params, kernel_name, std::forward(rest_of_launch_args)...); + // Forward to single policy launch API - simplifies testing of plugins + switch (place) + { + case ExecPlace::HOST: + { + using Res = typename resources::get_resource< + typename POLICY_LIST::host_policy_t>::type; + launch>( + Res::get_default(), launch_params, kernel_name, + std::forward(rest_of_launch_args)...); break; } #if defined(RAJA_GPU_ACTIVE) - case ExecPlace::DEVICE: { - using Res = typename resources::get_resource::type; - launch> - (Res::get_default(), launch_params, kernel_name, std::forward(rest_of_launch_args)...); + case ExecPlace::DEVICE: + { + using Res = typename resources::get_resource< + typename POLICY_LIST::device_policy_t>::type; + launch>( + Res::get_default(), launch_params, kernel_name, + std::forward(rest_of_launch_args)...); break; } #endif default: RAJA_ABORT_OR_THROW("Unknown launch place or device is not enabled"); } - } -//Run-time API for new reducer interface with support of the case without a new kernel name -template -void launch(ExecPlace place, const LaunchParams &launch_params, ReduceParams&&... rest_of_launch_args) - //BODY const &body) +// Run-time API for new reducer interface with support of the case without a new +// kernel name +template +void launch(ExecPlace place, + const LaunchParams& launch_params, + ReduceParams&&... rest_of_launch_args) +// BODY const &body) { - const char *kernel_name = nullptr; + const char* kernel_name = nullptr; - //Forward to single policy launch API - simplifies testing of plugins - switch (place) { - case ExecPlace::HOST: { - using Res = typename resources::get_resource::type; - launch> - (Res::get_default(), launch_params, kernel_name, std::forward(rest_of_launch_args)...); + // Forward to single policy launch API - simplifies testing of plugins + switch (place) + { + case ExecPlace::HOST: + { + using Res = typename resources::get_resource< + typename POLICY_LIST::host_policy_t>::type; + launch>( + Res::get_default(), launch_params, kernel_name, + std::forward(rest_of_launch_args)...); break; } #if defined(RAJA_GPU_ACTIVE) - case ExecPlace::DEVICE: { - using Res = typename resources::get_resource::type; - launch> - (Res::get_default(), launch_params, kernel_name, std::forward(rest_of_launch_args)...); + case ExecPlace::DEVICE: + { + using Res = typename resources::get_resource< + typename POLICY_LIST::device_policy_t>::type; + launch>( + Res::get_default(), launch_params, kernel_name, + std::forward(rest_of_launch_args)...); break; } #endif default: RAJA_ABORT_OR_THROW("Unknown launch place or device is not enabled"); } - } -// Helper function to retrieve a resource based on the run-time policy - if a device is active -#if defined(RAJA_ENABLE_CUDA) || defined(RAJA_ENABLE_HIP) || defined(RAJA_ENABLE_SYCL) +// Helper function to retrieve a resource based on the run-time policy - if a +// device is active +#if defined(RAJA_ENABLE_CUDA) || defined(RAJA_ENABLE_HIP) || \ + defined(RAJA_ENABLE_SYCL) template -RAJA::resources::Resource Get_Runtime_Resource(T host_res, U device_res, RAJA::ExecPlace device){ - if(device == RAJA::ExecPlace::DEVICE) {return RAJA::resources::Resource(device_res);} - else { return RAJA::resources::Resource(host_res); } +RAJA::resources::Resource Get_Runtime_Resource(T host_res, + U device_res, + RAJA::ExecPlace device) +{ + if (device == RAJA::ExecPlace::DEVICE) + { + return RAJA::resources::Resource(device_res); + } + else + { + return RAJA::resources::Resource(host_res); + } } #endif template -RAJA::resources::Resource Get_Host_Resource(T host_res, RAJA::ExecPlace device){ - if(device == RAJA::ExecPlace::DEVICE) {RAJA_ABORT_OR_THROW("Device is not enabled");} +RAJA::resources::Resource Get_Host_Resource(T host_res, RAJA::ExecPlace device) +{ + if (device == RAJA::ExecPlace::DEVICE) + { + RAJA_ABORT_OR_THROW("Device is not enabled"); + } return RAJA::resources::Resource(host_res); } -//Launch API which takes team resource struct and supports new reducers -template -resources::EventProxy -launch(RAJA::resources::Resource res, LaunchParams const &launch_params, - const char *kernel_name, ReduceParams&&... rest_of_launch_args) +// Launch API which takes team resource struct and supports new reducers +template +resources::EventProxy launch( + RAJA::resources::Resource res, + LaunchParams const& launch_params, + const char* kernel_name, + ReduceParams&&... rest_of_launch_args) { - //Get reducers - auto reducers = expt::make_forall_param_pack(std::forward(rest_of_launch_args)...); + // Get reducers + auto reducers = expt::make_forall_param_pack( + std::forward(rest_of_launch_args)...); - auto&& launch_body = expt::get_lambda(std::forward(rest_of_launch_args)...); + auto&& launch_body = + expt::get_lambda(std::forward(rest_of_launch_args)...); ExecPlace place; - if(res.get_platform() == RAJA::Platform::host) { + if (res.get_platform() == RAJA::Platform::host) + { place = RAJA::ExecPlace::HOST; - } else { + } + else + { place = RAJA::ExecPlace::DEVICE; } // - //Configure plugins + // Configure plugins // #if defined(RAJA_GPU_ACTIVE) - util::PluginContext context{place == ExecPlace::HOST ? - util::make_context() : - util::make_context()}; + util::PluginContext context { + place == ExecPlace::HOST + ? util::make_context() + : util::make_context()}; #else - util::PluginContext context{util::make_context()}; + util::PluginContext context { + util::make_context()}; #endif util::callPreCapturePlugins(context); @@ -429,22 +519,28 @@ launch(RAJA::resources::Resource res, LaunchParams const &launch_params, util::callPreLaunchPlugins(context); - switch (place) { - case ExecPlace::HOST: { + switch (place) + { + case ExecPlace::HOST: + { using launch_t = LaunchExecute; - resources::EventProxy e_proxy = launch_t::exec(res, launch_params, kernel_name, p_body, reducers); + resources::EventProxy e_proxy = + launch_t::exec(res, launch_params, kernel_name, p_body, reducers); util::callPostLaunchPlugins(context); return e_proxy; } #if defined(RAJA_GPU_ACTIVE) - case ExecPlace::DEVICE: { + case ExecPlace::DEVICE: + { using launch_t = LaunchExecute; - resources::EventProxy e_proxy = launch_t::exec(res, launch_params, kernel_name, p_body, reducers); + resources::EventProxy e_proxy = + launch_t::exec(res, launch_params, kernel_name, p_body, reducers); util::callPostLaunchPlugins(context); return e_proxy; } #endif - default: { + default: + { RAJA_ABORT_OR_THROW("Unknown launch place or device is not enabled"); } } @@ -455,37 +551,45 @@ launch(RAJA::resources::Resource res, LaunchParams const &launch_params, return resources::EventProxy(res); } - -//Duplicate of API above on account that we need to handle the case that a kernel name is not provided -template -resources::EventProxy -launch(RAJA::resources::Resource res, LaunchParams const &launch_params, - ReduceParams&&... rest_of_launch_args) +// Duplicate of API above on account that we need to handle the case that a +// kernel name is not provided +template +resources::EventProxy launch( + RAJA::resources::Resource res, + LaunchParams const& launch_params, + ReduceParams&&... rest_of_launch_args) { - const char *kernel_name = nullptr; + const char* kernel_name = nullptr; - //Get reducers - auto reducers = expt::make_forall_param_pack(std::forward(rest_of_launch_args)...); + // Get reducers + auto reducers = expt::make_forall_param_pack( + std::forward(rest_of_launch_args)...); - auto&& launch_body = expt::get_lambda(std::forward(rest_of_launch_args)...); + auto&& launch_body = + expt::get_lambda(std::forward(rest_of_launch_args)...); ExecPlace place; - if(res.get_platform() == RAJA::Platform::host) { + if (res.get_platform() == RAJA::Platform::host) + { place = RAJA::ExecPlace::HOST; - } else { + } + else + { place = RAJA::ExecPlace::DEVICE; } // - //Configure plugins + // Configure plugins // #if defined(RAJA_GPU_ACTIVE) - util::PluginContext context{place == ExecPlace::HOST ? - util::make_context() : - util::make_context()}; + util::PluginContext context { + place == ExecPlace::HOST + ? util::make_context() + : util::make_context()}; #else - util::PluginContext context{util::make_context()}; + util::PluginContext context { + util::make_context()}; #endif util::callPreCapturePlugins(context); @@ -497,22 +601,28 @@ launch(RAJA::resources::Resource res, LaunchParams const &launch_params, util::callPreLaunchPlugins(context); - switch (place) { - case ExecPlace::HOST: { + switch (place) + { + case ExecPlace::HOST: + { using launch_t = LaunchExecute; - resources::EventProxy e_proxy = launch_t::exec(res, launch_params, kernel_name, p_body, reducers); + resources::EventProxy e_proxy = + launch_t::exec(res, launch_params, kernel_name, p_body, reducers); util::callPostLaunchPlugins(context); return e_proxy; } #if defined(RAJA_GPU_ACTIVE) - case ExecPlace::DEVICE: { + case ExecPlace::DEVICE: + { using launch_t = LaunchExecute; - resources::EventProxy e_proxy = launch_t::exec(res, launch_params, kernel_name, p_body, reducers); + resources::EventProxy e_proxy = + launch_t::exec(res, launch_params, kernel_name, p_body, reducers); util::callPostLaunchPlugins(context); return e_proxy; } #endif - default: { + default: + { RAJA_ABORT_OR_THROW("Unknown launch place or device is not enabled"); } } @@ -530,183 +640,163 @@ using loop_policy = typename POLICY_LIST::device_policy_t; using loop_policy = typename POLICY_LIST::host_policy_t; #endif -template +template struct LoopExecute; -template +template struct LoopICountExecute; RAJA_SUPPRESS_HD_WARN -template -RAJA_HOST_DEVICE RAJA_INLINE void loop(CONTEXT const &ctx, - SEGMENT const &segment, - BODY const &body) +template +RAJA_HOST_DEVICE RAJA_INLINE void loop(CONTEXT const& ctx, + SEGMENT const& segment, + BODY const& body) { - LoopExecute, SEGMENT>::exec(ctx, - segment, - body); + LoopExecute, SEGMENT>::exec(ctx, segment, body); } -template -RAJA_HOST_DEVICE RAJA_INLINE void loop_icount(CONTEXT const &ctx, - SEGMENT const &segment, - BODY const &body) +template +RAJA_HOST_DEVICE RAJA_INLINE void loop_icount(CONTEXT const& ctx, + SEGMENT const& segment, + BODY const& body) { - LoopICountExecute, SEGMENT>::exec(ctx, - segment, - body); + LoopICountExecute, SEGMENT>::exec(ctx, segment, + body); } namespace expt { RAJA_SUPPRESS_HD_WARN -template -RAJA_HOST_DEVICE RAJA_INLINE void loop(CONTEXT const &ctx, - SEGMENT const &segment0, - SEGMENT const &segment1, - BODY const &body) +template +RAJA_HOST_DEVICE RAJA_INLINE void loop(CONTEXT const& ctx, + SEGMENT const& segment0, + SEGMENT const& segment1, + BODY const& body) { - LoopExecute, SEGMENT>::exec(ctx, - segment0, - segment1, + LoopExecute, SEGMENT>::exec(ctx, segment0, segment1, body); } RAJA_SUPPRESS_HD_WARN -template -RAJA_HOST_DEVICE RAJA_INLINE void loop(CONTEXT const &ctx, - SEGMENT const &segment0, - SEGMENT const &segment1, - SEGMENT const &segment2, - BODY const &body) +template +RAJA_HOST_DEVICE RAJA_INLINE void loop(CONTEXT const& ctx, + SEGMENT const& segment0, + SEGMENT const& segment1, + SEGMENT const& segment2, + BODY const& body) { - LoopExecute, SEGMENT>::exec(ctx, - segment0, - segment1, - segment2, - body); + LoopExecute, SEGMENT>::exec(ctx, segment0, segment1, + segment2, body); } RAJA_SUPPRESS_HD_WARN -template -RAJA_HOST_DEVICE RAJA_INLINE void loop_icount(CONTEXT const &ctx, - SEGMENT const &segment0, - SEGMENT const &segment1, - SEGMENT const &segment2, - BODY const &body) +template +RAJA_HOST_DEVICE RAJA_INLINE void loop_icount(CONTEXT const& ctx, + SEGMENT const& segment0, + SEGMENT const& segment1, + SEGMENT const& segment2, + BODY const& body) { - LoopICountExecute, SEGMENT>::exec(ctx, - segment0, segment1, segment2, body); + LoopICountExecute, SEGMENT>::exec( + ctx, segment0, segment1, segment2, body); } -} //namespace expt +} // namespace expt -template +template struct TileExecute; -template +template struct TileTCountExecute; -template -RAJA_HOST_DEVICE RAJA_INLINE void tile(CONTEXT const &ctx, +template +RAJA_HOST_DEVICE RAJA_INLINE void tile(CONTEXT const& ctx, TILE_T tile_size, - SEGMENT const &segment, - BODY const &body) + SEGMENT const& segment, + BODY const& body) { - TileExecute, SEGMENT>::exec(ctx, - tile_size, - segment, + TileExecute, SEGMENT>::exec(ctx, tile_size, segment, body); } -template -RAJA_HOST_DEVICE RAJA_INLINE void tile_tcount(CONTEXT const &ctx, - TILE_T tile_size, - SEGMENT const &segment, - BODY const &body) +template +RAJA_HOST_DEVICE RAJA_INLINE void tile_tcount(CONTEXT const& ctx, + TILE_T tile_size, + SEGMENT const& segment, + BODY const& body) { - TileTCountExecute, SEGMENT>::exec(ctx, - tile_size, - segment, - body); + TileTCountExecute, SEGMENT>::exec(ctx, tile_size, + segment, body); } namespace expt { -template -RAJA_HOST_DEVICE RAJA_INLINE void tile(CONTEXT const &ctx, +template +RAJA_HOST_DEVICE RAJA_INLINE void tile(CONTEXT const& ctx, TILE_T tile_size0, TILE_T tile_size1, - SEGMENT const &segment0, - SEGMENT const &segment1, - BODY const &body) + SEGMENT const& segment0, + SEGMENT const& segment1, + BODY const& body) { - TileExecute, SEGMENT>::exec(ctx, - tile_size0, - tile_size1, - segment0, - segment1, - body); + TileExecute, SEGMENT>::exec( + ctx, tile_size0, tile_size1, segment0, segment1, body); } -template -RAJA_HOST_DEVICE RAJA_INLINE void tile_tcount(CONTEXT const &ctx, - TILE_T tile_size0, - TILE_T tile_size1, - SEGMENT const &segment0, - SEGMENT const &segment1, - BODY const &body) +template +RAJA_HOST_DEVICE RAJA_INLINE void tile_tcount(CONTEXT const& ctx, + TILE_T tile_size0, + TILE_T tile_size1, + SEGMENT const& segment0, + SEGMENT const& segment1, + BODY const& body) { - TileTCountExecute, SEGMENT>::exec(ctx, - tile_size0, - tile_size1, - segment0, - segment1, - body); + TileTCountExecute, SEGMENT>::exec( + ctx, tile_size0, tile_size1, segment0, segment1, body); } -} //namespace expt +} // namespace expt } // namespace RAJA #endif diff --git a/include/RAJA/pattern/multi_reduce.hpp b/include/RAJA/pattern/multi_reduce.hpp index 3fbe36877c..8a1c37db2b 100644 --- a/include/RAJA/pattern/multi_reduce.hpp +++ b/include/RAJA/pattern/multi_reduce.hpp @@ -64,7 +64,7 @@ namespace RAJA * ****************************************************************************** */ -template +template struct MultiReduceMin; /*! @@ -94,7 +94,7 @@ struct MultiReduceMin; * ****************************************************************************** */ -template +template struct MultiReduceMax; /*! @@ -124,7 +124,7 @@ struct MultiReduceMax; * ****************************************************************************** */ -template +template struct MultiReduceSum; /*! @@ -154,9 +154,9 @@ struct MultiReduceSum; * ****************************************************************************** */ -template +template struct MultiReduceBitOr; - + /*! ****************************************************************************** @@ -171,7 +171,8 @@ struct MultiReduceBitOr; Index_ptr bins = ...; Real_ptr bit_vals = ...; - MultiReduceBitAnd my_bits(num_bins, init_val); + MultiReduceBitAnd my_bits(num_bins, + init_val); forall( ..., [=] (Index_type i) { my_bits[bins[i]] &= (data[i]); @@ -185,10 +186,10 @@ struct MultiReduceBitOr; * ****************************************************************************** */ -template +template struct MultiReduceBitAnd; -} //namespace RAJA +} // namespace RAJA #endif // closing endif for header file include guard diff --git a/include/RAJA/pattern/params/forall.hpp b/include/RAJA/pattern/params/forall.hpp index 5a656206f5..c98e4429a2 100644 --- a/include/RAJA/pattern/params/forall.hpp +++ b/include/RAJA/pattern/params/forall.hpp @@ -21,348 +21,450 @@ namespace RAJA namespace expt { - // - // - // Forall Parameter Packing type - // - // - struct ParamMultiplexer; - - template - struct ForallParamPack { - - friend struct ParamMultiplexer; - - using Base = camp::tuple; - Base param_tup; - - static constexpr size_t param_tup_sz = camp::tuple_size::value; - using params_seq = camp::make_idx_seq_t< param_tup_sz >; - - private: - - // Init - template - static constexpr void detail_init(EXEC_POL, camp::idx_seq, ForallParamPack& f_params, Args&& ...args) { - CAMP_EXPAND(expt::detail::init( camp::get(f_params.param_tup), std::forward(args)... )); - } - - // Combine - template - RAJA_HOST_DEVICE - static constexpr void detail_combine(EXEC_POL, camp::idx_seq, ForallParamPack& out, const ForallParamPack& in ) { - CAMP_EXPAND(detail::combine( camp::get(out.param_tup), camp::get(in.param_tup))); - } - - template - RAJA_HOST_DEVICE - static constexpr void detail_combine(EXEC_POL, camp::idx_seq, ForallParamPack& f_params ) { - CAMP_EXPAND(detail::combine( camp::get(f_params.param_tup) )); - } - - // Resolve - template - static constexpr void detail_resolve(EXEC_POL, camp::idx_seq, ForallParamPack& f_params, Args&& ...args) { - CAMP_EXPAND(detail::resolve( camp::get(f_params.param_tup), std::forward(args)... )); - } - - // Used to construct the argument TYPES that will be invoked with the lambda. - template - static constexpr auto LAMBDA_ARG_TUP_T() { return camp::tuple<>{}; }; - template - static constexpr auto LAMBDA_ARG_TUP_T() { return typename First::ARG_TUP_T(); }; - template - static constexpr auto LAMBDA_ARG_TUP_T() { return camp::tuple_cat_pair(typename First::ARG_TUP_T(), LAMBDA_ARG_TUP_T()); }; - - using lambda_arg_tuple_t = decltype(LAMBDA_ARG_TUP_T()); - - //Use the size of param_tup to generate the argument list. - RAJA_HOST_DEVICE constexpr auto LAMBDA_ARG_TUP_V(camp::num<0>) { return camp::make_tuple(); } - RAJA_HOST_DEVICE constexpr auto LAMBDA_ARG_TUP_V(camp::num<1>) { return camp::get(param_tup).get_lambda_arg_tup(); } - template - RAJA_HOST_DEVICE constexpr auto LAMBDA_ARG_TUP_V(camp::num) { - return camp::tuple_cat_pair( camp::get(param_tup).get_lambda_arg_tup(), LAMBDA_ARG_TUP_V(camp::num()) ); - } - - public: - ForallParamPack(){} - - RAJA_HOST_DEVICE constexpr lambda_arg_tuple_t lambda_args() {return LAMBDA_ARG_TUP_V(camp::num());} - - using lambda_arg_seq = camp::make_idx_seq_t::value>; - - template - ForallParamPack(camp::tuple&& t) : param_tup(std::move(t)) {}; - }; // struct ForallParamPack - - - - //=========================================================================== - // - // - // ParamMultiplexer is how we hook into the individual calls within forall_impl. - // - // - struct ParamMultiplexer { - template> - static void constexpr init( ForallParamPack& f_params, Args&& ...args) { - FP::detail_init(EXEC_POL(),typename FP::params_seq(), f_params, std::forward(args)... ); - } - template> - static void constexpr combine(ForallParamPack& f_params, Args&& ...args){ - FP::detail_combine(EXEC_POL(), typename FP::params_seq(), f_params, std::forward(args)... ); - } - template> - static void constexpr resolve( ForallParamPack& f_params, Args&& ...args){ - FP::detail_resolve(EXEC_POL(), typename FP::params_seq(), f_params, std::forward(args)... ); - } - }; - //=========================================================================== +// +// +// Forall Parameter Packing type +// +// +struct ParamMultiplexer; + +template +struct ForallParamPack +{ + friend struct ParamMultiplexer; + using Base = camp::tuple; + Base param_tup; - //=========================================================================== - // - // - // ForallParamPack generators. - // - // - RAJA_INLINE static auto get_empty_forall_param_pack(){ - static ForallParamPack<> p; - return p; - } + static constexpr size_t param_tup_sz = camp::tuple_size::value; + using params_seq = camp::make_idx_seq_t; - namespace detail { - // all_true trick to perform variadic expansion in static asserts. - // https://stackoverflow.com/questions/36933176/how-do-you-static-assert-the-values-in-a-parameter-pack-of-a-variadic-template - template struct bool_pack; - template - using all_true = std::is_same, bool_pack>; +private: + // Init + template + static constexpr void detail_init(EXEC_POL, + camp::idx_seq, + ForallParamPack& f_params, + Args&&... args) + { + CAMP_EXPAND(expt::detail::init(camp::get(f_params.param_tup), + std::forward(args)...)); + } - template - using check_types_derive_base = all_true::value...>; - } // namespace detail + // Combine + template + RAJA_HOST_DEVICE static constexpr void detail_combine( + EXEC_POL, + camp::idx_seq, + ForallParamPack& out, + const ForallParamPack& in) + { + CAMP_EXPAND(detail::combine(camp::get(out.param_tup), + camp::get(in.param_tup))); + } + template + RAJA_HOST_DEVICE static constexpr void detail_combine( + EXEC_POL, + camp::idx_seq, + ForallParamPack& f_params) + { + CAMP_EXPAND(detail::combine(camp::get(f_params.param_tup))); + } - template - constexpr auto make_forall_param_pack_from_tuple(camp::tuple&& tuple) { - static_assert(detail::check_types_derive_base...>::value, - "Forall optional arguments do not derive ForallParamBase. Please see Reducer, ReducerLoc and KernelName for examples.") ; - return ForallParamPack...>(std::move(tuple)); + // Resolve + template + static constexpr void detail_resolve(EXEC_POL, + camp::idx_seq, + ForallParamPack& f_params, + Args&&... args) + { + CAMP_EXPAND(detail::resolve(camp::get(f_params.param_tup), + std::forward(args)...)); } - + // Used to construct the argument TYPES that will be invoked with the lambda. + template + static constexpr auto LAMBDA_ARG_TUP_T() + { + return camp::tuple<> {}; + }; - namespace detail { - // Maybe we should do a lot of these with structs... - template - constexpr auto tuple_from_seq (const camp::idx_seq&, TupleType&& tuple){ - return camp::forward_as_tuple( camp::get< Seq >(std::forward(tuple))... ); - }; + template + static constexpr auto LAMBDA_ARG_TUP_T() + { + return typename First::ARG_TUP_T(); + }; - template - constexpr auto strip_last_elem(camp::tuple&& tuple){ - return tuple_from_seq(camp::make_idx_seq_t{},std::move(tuple)); - }; - } // namespace detail + template + static constexpr auto LAMBDA_ARG_TUP_T() + { + return camp::tuple_cat_pair(typename First::ARG_TUP_T(), + LAMBDA_ARG_TUP_T()); + }; + using lambda_arg_tuple_t = decltype(LAMBDA_ARG_TUP_T()); - // Make a tuple of the param pack except the final element... - template - constexpr auto make_forall_param_pack(Args&&... args){ - // We assume the last element of the pack is the lambda so we need to strip it from the list. - auto stripped_arg_tuple = detail::strip_last_elem( camp::forward_as_tuple(std::forward(args)...) ); - return make_forall_param_pack_from_tuple(std::move(stripped_arg_tuple)); + // Use the size of param_tup to generate the argument list. + RAJA_HOST_DEVICE constexpr auto LAMBDA_ARG_TUP_V(camp::num<0>) + { + return camp::make_tuple(); } - //=========================================================================== - - - - //=========================================================================== - // - // - // Callable should be the last argument in the param pack, just extract it... - // - // - template - constexpr auto&& get_lambda(Args&&... args){ - return camp::get( camp::forward_as_tuple(std::forward(args)...) ); - } - //=========================================================================== - - - - //=========================================================================== - // - // - // Checking expected argument list against the assumed lambda. - // - // - namespace detail { - - // - // - // Lambda traits Utilities - // - // - template - struct lambda_traits; - - template - struct lambda_traits - { // non-const specialization - using arg_type = First; - }; - template - struct lambda_traits - { // const specialization - using arg_type = First; - }; - - template - typename lambda_traits::arg_type* lambda_arg_helper(T); - - - // - // - // List manipulation Utilities - // - // - template - constexpr auto list_remove_pointer(const camp::list&){ - return camp::list::type>...>{}; - } - - template - constexpr auto list_add_lvalue_ref(const camp::list&){ - return camp::list::type...>{}; - } - - template - constexpr auto tuple_to_list(const camp::tuple&) { - return camp::list{}; - } - - // TODO : Change to std::is_invocable at c++17 - template - struct is_invocable : - std::is_constructible< - std::function, - std::reference_wrapper::type> - >{}; - - template - using void_t = void; - - template - struct has_empty_op : std::false_type{}; - - template - struct has_empty_op)>> : std::true_type{}; - - template - struct get_lambda_index_type { - typedef typename std::remove_pointer< - decltype(lambda_arg_helper( - &camp::decay::operator()) - ) - >::type type; - }; - - // If LAMBDA::operator() is not available this probably isn't a generic lambda and we can't extract and check args. - template - constexpr concepts::enable_if>> check_invocable(LAMBDA&&, const camp::list&) {} - - template - constexpr concepts::enable_if> check_invocable(LAMBDA&&, const camp::list&) { -#if !defined(RAJA_ENABLE_HIP) - static_assert(is_invocable::type, EXPECTED_ARGS...>::value, "LAMBDA Not invocable w/ EXPECTED_ARGS. Ordering and types must match between RAJA::expt::Reduce() and ValOp arguments."); -#endif - } - - } // namespace detail + RAJA_HOST_DEVICE constexpr auto LAMBDA_ARG_TUP_V(camp::num<1>) + { + return camp::get(param_tup).get_lambda_arg_tup(); + } - template - constexpr - void - check_forall_optional_args(Lambda&& l, ForallParams& fpp) { + template + RAJA_HOST_DEVICE constexpr auto LAMBDA_ARG_TUP_V(camp::num) + { + return camp::tuple_cat_pair( + camp::get(param_tup).get_lambda_arg_tup(), + LAMBDA_ARG_TUP_V(camp::num())); + } - using expected_arg_type_list = decltype( detail::list_add_lvalue_ref( - detail::list_remove_pointer( - detail::tuple_to_list( - fpp.lambda_args() - ) - ) - )); +public: + ForallParamPack() {} - detail::check_invocable(std::forward(l), expected_arg_type_list{}); + RAJA_HOST_DEVICE constexpr lambda_arg_tuple_t lambda_args() + { + return LAMBDA_ARG_TUP_V(camp::num()); } - //=========================================================================== - + using lambda_arg_seq = + camp::make_idx_seq_t::value>; - //=========================================================================== - // - // - // Type trailts for SFINAE work. - // - // - namespace type_traits + template + ForallParamPack(camp::tuple&& t) : param_tup(std::move(t)) {}; +}; // struct ForallParamPack + +//=========================================================================== +// +// +// ParamMultiplexer is how we hook into the individual calls within forall_impl. +// +// +struct ParamMultiplexer +{ + template> + static void constexpr init(ForallParamPack& f_params, + Args&&... args) { - template struct is_ForallParamPack : std::false_type {}; - template struct is_ForallParamPack> : std::true_type {}; + FP::detail_init(EXEC_POL(), typename FP::params_seq(), f_params, + std::forward(args)...); + } - template struct is_ForallParamPack_empty : std::true_type {}; - template struct is_ForallParamPack_empty> : std::false_type {}; - template <> struct is_ForallParamPack_empty> : std::true_type {}; + template> + static void constexpr combine(ForallParamPack& f_params, + Args&&... args) + { + FP::detail_combine(EXEC_POL(), typename FP::params_seq(), f_params, + std::forward(args)...); } - //=========================================================================== - - - - //=========================================================================== - // - // - // Invoke Forall with Params. - // - // - namespace detail { - template - RAJA_HOST_DEVICE - constexpr - auto get_lambda_args(FP& fpp) - -> decltype( *camp::get( fpp.lambda_args() ) ) { - return ( *camp::get( fpp.lambda_args() ) ); - } - - CAMP_SUPPRESS_HD_WARN - template - RAJA_HOST_DEVICE constexpr auto invoke_with_order(Params&& params, - Fn&& f, - camp::idx_seq, - Ts&&... extra) - { - return f(std::forward(extra...), ( get_lambda_args(params) )...); - } - } // namespace detail - - //CAMP_SUPPRESS_HD_WARN - template - RAJA_HOST_DEVICE constexpr auto invoke_body(Params&& params, Fn&& f, Ts&&... extra) + + template> + static void constexpr resolve(ForallParamPack& f_params, + Args&&... args) { - return detail::invoke_with_order( - camp::forward(params), - camp::forward(f), - typename camp::decay::lambda_arg_seq(), - camp::forward(extra)...); + FP::detail_resolve(EXEC_POL(), typename FP::params_seq(), f_params, + std::forward(args)...); } - //=========================================================================== +}; + +//=========================================================================== + + +//=========================================================================== +// +// +// ForallParamPack generators. +// +// +RAJA_INLINE static auto get_empty_forall_param_pack() +{ + static ForallParamPack<> p; + return p; +} + +namespace detail +{ +// all_true trick to perform variadic expansion in static asserts. +// https://stackoverflow.com/questions/36933176/how-do-you-static-assert-the-values-in-a-parameter-pack-of-a-variadic-template +template +struct bool_pack; +template +using all_true = std::is_same, bool_pack>; + +template +using check_types_derive_base = + all_true::value...>; +} // namespace detail + +template +constexpr auto make_forall_param_pack_from_tuple(camp::tuple&& tuple) +{ + static_assert(detail::check_types_derive_base...>::value, + "Forall optional arguments do not derive ForallParamBase. " + "Please see Reducer, ReducerLoc and KernelName for examples."); + return ForallParamPack...>(std::move(tuple)); +} + +namespace detail +{ +// Maybe we should do a lot of these with structs... +template +constexpr auto tuple_from_seq(const camp::idx_seq&, TupleType&& tuple) +{ + return camp::forward_as_tuple( + camp::get(std::forward(tuple))...); +}; + +template +constexpr auto strip_last_elem(camp::tuple&& tuple) +{ + return tuple_from_seq(camp::make_idx_seq_t {}, + std::move(tuple)); +}; +} // namespace detail + +// Make a tuple of the param pack except the final element... +template +constexpr auto make_forall_param_pack(Args&&... args) +{ + // We assume the last element of the pack is the lambda so we need to strip it + // from the list. + auto stripped_arg_tuple = detail::strip_last_elem( + camp::forward_as_tuple(std::forward(args)...)); + return make_forall_param_pack_from_tuple(std::move(stripped_arg_tuple)); +} + +//=========================================================================== + + +//=========================================================================== +// +// +// Callable should be the last argument in the param pack, just extract it... +// +// +template +constexpr auto&& get_lambda(Args&&... args) +{ + return camp::get( + camp::forward_as_tuple(std::forward(args)...)); +} + +//=========================================================================== + + +//=========================================================================== +// +// +// Checking expected argument list against the assumed lambda. +// +// +namespace detail +{ + +// +// +// Lambda traits Utilities +// +// +template +struct lambda_traits; + +template +struct lambda_traits +{ // non-const specialization + using arg_type = First; +}; + +template +struct lambda_traits +{ // const specialization + using arg_type = First; +}; + +template +typename lambda_traits::arg_type* lambda_arg_helper(T); + +// +// +// List manipulation Utilities +// +// +template +constexpr auto list_remove_pointer(const camp::list&) +{ + return camp::list::type>...> {}; +} + +template +constexpr auto list_add_lvalue_ref(const camp::list&) +{ + return camp::list::type...> {}; +} + +template +constexpr auto tuple_to_list(const camp::tuple&) +{ + return camp::list {}; +} + +// TODO : Change to std::is_invocable at c++17 +template +struct is_invocable + : std::is_constructible< + std::function, + std::reference_wrapper::type>> +{}; + +template +using void_t = void; + +template +struct has_empty_op : std::false_type +{}; + +template +struct has_empty_op)>> + : std::true_type +{}; + +template +struct get_lambda_index_type +{ + typedef typename std::remove_pointer::operator()))>::type type; +}; + +// If LAMBDA::operator() is not available this probably isn't a generic lambda +// and we can't extract and check args. +template +constexpr concepts::enable_if>> +check_invocable(LAMBDA&&, const camp::list&) +{} + +template +constexpr concepts::enable_if> check_invocable( + LAMBDA&&, + const camp::list&) +{ +#if !defined(RAJA_ENABLE_HIP) + static_assert( + is_invocable::type, + EXPECTED_ARGS...>::value, + "LAMBDA Not invocable w/ EXPECTED_ARGS. Ordering and types must match " + "between RAJA::expt::Reduce() and ValOp arguments."); +#endif +} + +} // namespace detail + +template +constexpr void check_forall_optional_args(Lambda&& l, ForallParams& fpp) +{ + + using expected_arg_type_list = decltype(detail::list_add_lvalue_ref( + detail::list_remove_pointer(detail::tuple_to_list(fpp.lambda_args())))); + + detail::check_invocable(std::forward(l), expected_arg_type_list {}); +} + +//=========================================================================== + + +//=========================================================================== +// +// +// Type trailts for SFINAE work. +// +// +namespace type_traits +{ +template +struct is_ForallParamPack : std::false_type +{}; + +template +struct is_ForallParamPack> : std::true_type +{}; + +template +struct is_ForallParamPack_empty : std::true_type +{}; + +template +struct is_ForallParamPack_empty> + : std::false_type +{}; + +template<> +struct is_ForallParamPack_empty> : std::true_type +{}; +} // namespace type_traits + +//=========================================================================== + + +//=========================================================================== +// +// +// Invoke Forall with Params. +// +// +namespace detail +{ +template +RAJA_HOST_DEVICE constexpr auto get_lambda_args(FP& fpp) + -> decltype(*camp::get(fpp.lambda_args())) +{ + return (*camp::get(fpp.lambda_args())); +} + +CAMP_SUPPRESS_HD_WARN +template +RAJA_HOST_DEVICE constexpr auto invoke_with_order(Params&& params, + Fn&& f, + camp::idx_seq, + Ts&&... extra) +{ + return f(std::forward(extra...), + (get_lambda_args(params))...); +} +} // namespace detail + +// CAMP_SUPPRESS_HD_WARN +template +RAJA_HOST_DEVICE constexpr auto invoke_body(Params&& params, + Fn&& f, + Ts&&... extra) +{ + return detail::invoke_with_order( + camp::forward(params), camp::forward(f), + typename camp::decay::lambda_arg_seq(), + camp::forward(extra)...); +} + +//=========================================================================== -} // namespace expt -} // namespace RAJA +} // namespace expt +} // namespace RAJA -#endif // FORALL_PARAM_HPP +#endif // FORALL_PARAM_HPP diff --git a/include/RAJA/pattern/params/kernel_name.hpp b/include/RAJA/pattern/params/kernel_name.hpp index e768d8dd59..2d26436c94 100644 --- a/include/RAJA/pattern/params/kernel_name.hpp +++ b/include/RAJA/pattern/params/kernel_name.hpp @@ -10,23 +10,22 @@ namespace expt namespace detail { - struct KernelName : public ForallParamBase { - RAJA_HOST_DEVICE KernelName() {} - KernelName(const char* name_in) : name(name_in) {} - const char* name; - }; +struct KernelName : public ForallParamBase +{ + RAJA_HOST_DEVICE KernelName() {} -} // namespace detail + KernelName(const char* name_in) : name(name_in) {} -inline auto KernelName(const char * n) -{ - return detail::KernelName(n); -} -} // namespace expt + const char* name; +}; + +} // namespace detail +inline auto KernelName(const char* n) { return detail::KernelName(n); } +} // namespace expt -} // namespace RAJA +} // namespace RAJA -#endif // KERNEL_NAME_HPP +#endif // KERNEL_NAME_HPP diff --git a/include/RAJA/pattern/params/params_base.hpp b/include/RAJA/pattern/params/params_base.hpp index 98380f6ffc..27f2adec7a 100644 --- a/include/RAJA/pattern/params/params_base.hpp +++ b/include/RAJA/pattern/params/params_base.hpp @@ -1,135 +1,278 @@ #ifndef RAJA_PARAMS_BASE #define RAJA_PARAMS_BASE - namespace RAJA { namespace expt { - template - struct ValLoc { - using index_type = IndexType; - using value_type = T; - - ValLoc() = default; - RAJA_HOST_DEVICE constexpr explicit ValLoc(value_type v) : val(v) {} - RAJA_HOST_DEVICE constexpr ValLoc(value_type v, index_type l) : val(v), loc(l) {} - - ValLoc(ValLoc const &) = default; - ValLoc(ValLoc &&) = default; - ValLoc& operator=(ValLoc const &) = default; - ValLoc& operator=(ValLoc &&) = default; - - RAJA_HOST_DEVICE constexpr bool operator<(const ValLoc& rhs) const { return val < rhs.val; } - RAJA_HOST_DEVICE constexpr bool operator>(const ValLoc& rhs) const { return val > rhs.val; } - - RAJA_HOST_DEVICE constexpr const value_type& getVal() const {return val;} - RAJA_HOST_DEVICE constexpr const index_type& getLoc() const {return loc;} - - RAJA_HOST_DEVICE void set(T inval, IndexType inindex) {val = inval; loc = inindex;} - RAJA_HOST_DEVICE void setVal(T inval) {val = inval;} - RAJA_HOST_DEVICE void setLoc(IndexType inindex) {loc = inindex;} - - value_type val; - index_type loc = -1; - }; - - template class Op> - struct ValOp { - using value_type = T; - using op_type = Op; - - ValOp() = default; - RAJA_HOST_DEVICE constexpr explicit ValOp(value_type v) : val(v) {} - - ValOp(ValOp const &) = default; - ValOp(ValOp &&) = default; - ValOp& operator=(ValOp const &) = default; - ValOp& operator=(ValOp &&) = default; - - template >::value> * = nullptr> - RAJA_HOST_DEVICE constexpr ValOp & min(value_type v) { if (v < val) { val = v; } return *this; } - template >::value> * = nullptr> - RAJA_HOST_DEVICE constexpr ValOp & max(value_type v) { if (v > val) { val = v; } return *this; } - - template >::value> * = nullptr> - RAJA_HOST_DEVICE constexpr ValOp & operator+=(const value_type& rhs) { val += rhs; return *this; } - - template >::value> * = nullptr> - RAJA_HOST_DEVICE constexpr ValOp & operator&=(const value_type& rhs) { val &= rhs; return *this; } - - template >::value> * = nullptr> - RAJA_HOST_DEVICE constexpr ValOp & operator|=(const value_type& rhs) { val |= rhs; return *this; } - - template >::value> * = nullptr> - RAJA_HOST_DEVICE ValOp & operator&=(value_type& rhs) { val &= rhs; return *this; } +template +struct ValLoc +{ + using index_type = IndexType; + using value_type = T; - template >::value> * = nullptr> - RAJA_HOST_DEVICE ValOp & operator|=(value_type& rhs) { val |= rhs; return *this; } + ValLoc() = default; - RAJA_HOST_DEVICE constexpr bool operator<(const ValOp& rhs) const { val < rhs.val; return *this; } - RAJA_HOST_DEVICE constexpr bool operator>(const ValOp& rhs) const { val > rhs.val; return *this; } + RAJA_HOST_DEVICE constexpr explicit ValLoc(value_type v) : val(v) {} - value_type val = op_type::identity(); - }; + RAJA_HOST_DEVICE constexpr ValLoc(value_type v, index_type l) : val(v), loc(l) + {} - template class Op> - struct ValOp , Op> { - using index_type = IndexType; - using value_type = ValLoc; - using op_type = Op; - using valloc_value_type = typename value_type::value_type; - using valloc_index_type = typename value_type::index_type; + ValLoc(ValLoc const&) = default; + ValLoc(ValLoc&&) = default; + ValLoc& operator=(ValLoc const&) = default; + ValLoc& operator=(ValLoc&&) = default; - ValOp() = default; - RAJA_HOST_DEVICE constexpr explicit ValOp(value_type v) : val(v) {} - RAJA_HOST_DEVICE constexpr ValOp(valloc_value_type v, valloc_index_type l) : val(v, l) {} + RAJA_HOST_DEVICE constexpr bool operator<(const ValLoc& rhs) const + { + return val < rhs.val; + } - ValOp(ValOp const &) = default; - ValOp(ValOp &&) = default; - ValOp& operator=(ValOp const &) = default; - ValOp& operator=(ValOp &&) = default; + RAJA_HOST_DEVICE constexpr bool operator>(const ValLoc& rhs) const + { + return val > rhs.val; + } - template >::value> * = nullptr> - RAJA_HOST_DEVICE constexpr ValOp & min(value_type v) { if (v < val) { val = v; } return *this; } + RAJA_HOST_DEVICE constexpr const value_type& getVal() const { return val; } - template >::value> * = nullptr> - RAJA_HOST_DEVICE constexpr ValOp & max(value_type v) { if (v > val) { val = v; } return *this; } + RAJA_HOST_DEVICE constexpr const index_type& getLoc() const { return loc; } - template >::value> * = nullptr> - RAJA_HOST_DEVICE constexpr ValOp & minloc(valloc_value_type v, valloc_index_type l) { return min(value_type(v,l)); } + RAJA_HOST_DEVICE void set(T inval, IndexType inindex) + { + val = inval; + loc = inindex; + } - template >::value> * = nullptr> - RAJA_HOST_DEVICE constexpr ValOp & maxloc(valloc_value_type v, valloc_index_type l) { return max(value_type(v,l)); } + RAJA_HOST_DEVICE void setVal(T inval) { val = inval; } - RAJA_HOST_DEVICE constexpr bool operator<(const ValOp& rhs) const { return val < rhs.val; } - RAJA_HOST_DEVICE constexpr bool operator>(const ValOp& rhs) const { return val > rhs.val; } + RAJA_HOST_DEVICE void setLoc(IndexType inindex) { loc = inindex; } - value_type val = op_type::identity(); - }; + value_type val; + index_type loc = -1; +}; - template class Op> - using ValLocOp = ValOp, Op>; +template class Op> +struct ValOp +{ + using value_type = T; + using op_type = Op; + + ValOp() = default; + + RAJA_HOST_DEVICE constexpr explicit ValOp(value_type v) : val(v) {} + + ValOp(ValOp const&) = default; + ValOp(ValOp&&) = default; + ValOp& operator=(ValOp const&) = default; + ValOp& operator=(ValOp&&) = default; + + template< + typename U = op_type, + std::enable_if_t< + std::is_same>::value>* = nullptr> + RAJA_HOST_DEVICE constexpr ValOp& min(value_type v) + { + if (v < val) + { + val = v; + } + return *this; + } + + template< + typename U = op_type, + std::enable_if_t< + std::is_same>::value>* = nullptr> + RAJA_HOST_DEVICE constexpr ValOp& max(value_type v) + { + if (v > val) + { + val = v; + } + return *this; + } + + template< + typename U = op_type, + std::enable_if_t< + std::is_same>::value>* = nullptr> + RAJA_HOST_DEVICE constexpr ValOp& operator+=(const value_type& rhs) + { + val += rhs; + return *this; + } + + template< + typename U = op_type, + std::enable_if_t< + std::is_same>::value>* = nullptr> + RAJA_HOST_DEVICE constexpr ValOp& operator&=(const value_type& rhs) + { + val &= rhs; + return *this; + } + + template< + typename U = op_type, + std::enable_if_t< + std::is_same>::value>* = nullptr> + RAJA_HOST_DEVICE constexpr ValOp& operator|=(const value_type& rhs) + { + val |= rhs; + return *this; + } + + template< + typename U = op_type, + std::enable_if_t< + std::is_same>::value>* = nullptr> + RAJA_HOST_DEVICE ValOp& operator&=(value_type& rhs) + { + val &= rhs; + return *this; + } + + template< + typename U = op_type, + std::enable_if_t< + std::is_same>::value>* = nullptr> + RAJA_HOST_DEVICE ValOp& operator|=(value_type& rhs) + { + val |= rhs; + return *this; + } + + RAJA_HOST_DEVICE constexpr bool operator<(const ValOp& rhs) const + { + val < rhs.val; + return *this; + } + + RAJA_HOST_DEVICE constexpr bool operator>(const ValOp& rhs) const + { + val > rhs.val; + return *this; + } + + value_type val = op_type::identity(); +}; + +template + class Op> +struct ValOp, Op> +{ + using index_type = IndexType; + using value_type = ValLoc; + using op_type = Op; + using valloc_value_type = typename value_type::value_type; + using valloc_index_type = typename value_type::index_type; + + ValOp() = default; + + RAJA_HOST_DEVICE constexpr explicit ValOp(value_type v) : val(v) {} + + RAJA_HOST_DEVICE constexpr ValOp(valloc_value_type v, valloc_index_type l) + : val(v, l) + {} + + ValOp(ValOp const&) = default; + ValOp(ValOp&&) = default; + ValOp& operator=(ValOp const&) = default; + ValOp& operator=(ValOp&&) = default; + + template>:: + value>* = nullptr> + RAJA_HOST_DEVICE constexpr ValOp& min(value_type v) + { + if (v < val) + { + val = v; + } + return *this; + } + + template>:: + value>* = nullptr> + RAJA_HOST_DEVICE constexpr ValOp& max(value_type v) + { + if (v > val) + { + val = v; + } + return *this; + } + + template>:: + value>* = nullptr> + RAJA_HOST_DEVICE constexpr ValOp& minloc(valloc_value_type v, + valloc_index_type l) + { + return min(value_type(v, l)); + } + + template>:: + value>* = nullptr> + RAJA_HOST_DEVICE constexpr ValOp& maxloc(valloc_value_type v, + valloc_index_type l) + { + return max(value_type(v, l)); + } + + RAJA_HOST_DEVICE constexpr bool operator<(const ValOp& rhs) const + { + return val < rhs.val; + } + + RAJA_HOST_DEVICE constexpr bool operator>(const ValOp& rhs) const + { + return val > rhs.val; + } + + value_type val = op_type::identity(); +}; + +template + class Op> +using ValLocOp = ValOp, Op>; namespace detail { - struct ForallParamBase { +struct ForallParamBase +{ + + // Some of this can be made virtual in c++20, for now must be defined in each + // child class if any arguments to the forall lambda are needed (e.g. + // KernelName is excluded.) + using ARG_TUP_T = camp::tuple<>; + using ARG_LIST_T = typename ARG_TUP_T::TList; + + RAJA_HOST_DEVICE ARG_TUP_T get_lambda_arg_tup() { return camp::make_tuple(); } - // Some of this can be made virtual in c++20, for now must be defined in each child class - // if any arguments to the forall lambda are needed (e.g. KernelName is excluded.) - using ARG_TUP_T = camp::tuple<>; - using ARG_LIST_T = typename ARG_TUP_T::TList; - RAJA_HOST_DEVICE ARG_TUP_T get_lambda_arg_tup() { return camp::make_tuple(); } - static constexpr size_t num_lambda_args = camp::tuple_size::value; - - }; + static constexpr size_t num_lambda_args = camp::tuple_size::value; +}; -} // namespace detail +} // namespace detail -} // namespace expt +} // namespace expt -} // namespace RAJA +} // namespace RAJA -#endif // RAJA_PARAMS_BASE +#endif // RAJA_PARAMS_BASE diff --git a/include/RAJA/pattern/params/reducer.hpp b/include/RAJA/pattern/params/reducer.hpp index 78b6d7714d..bb8595f621 100644 --- a/include/RAJA/pattern/params/reducer.hpp +++ b/include/RAJA/pattern/params/reducer.hpp @@ -18,21 +18,25 @@ namespace RAJA namespace operators { -template -struct limits> { - RAJA_INLINE RAJA_HOST_DEVICE static constexpr RAJA::expt::ValLoc min() +template +struct limits> +{ + RAJA_INLINE RAJA_HOST_DEVICE static constexpr RAJA::expt::ValLoc + min() { return RAJA::expt::ValLoc(RAJA::operators::limits::min()); } - RAJA_INLINE RAJA_HOST_DEVICE static constexpr RAJA::expt::ValLoc max() + + RAJA_INLINE RAJA_HOST_DEVICE static constexpr RAJA::expt::ValLoc + max() { return RAJA::expt::ValLoc(RAJA::operators::limits::max()); } }; -} // namespace operators +} // namespace operators -} // namespace RAJA +} // namespace RAJA namespace RAJA { @@ -43,159 +47,201 @@ namespace detail { #if defined(RAJA_CUDA_ACTIVE) - using device_mem_pool_t = RAJA::cuda::device_mempool_type; +using device_mem_pool_t = RAJA::cuda::device_mempool_type; #elif defined(RAJA_HIP_ACTIVE) - using device_mem_pool_t = RAJA::hip::device_mempool_type; +using device_mem_pool_t = RAJA::hip::device_mempool_type; #elif defined(RAJA_SYCL_ACTIVE) - using device_mem_pool_t = RAJA::sycl::device_mempool_type; +using device_mem_pool_t = RAJA::sycl::device_mempool_type; #endif - // - // - // Basic Reducer - // - // - - // Basic data type Reducer - // T must be a basic data type - // VOp must be ValOp - template - struct Reducer : public ForallParamBase { - using op = Op; - using value_type = T; // This is a basic data type - - Reducer() = default; - - // Basic data type constructor - RAJA_HOST_DEVICE Reducer(value_type *target_in) : m_valop(VOp{}), target(target_in){} - - Reducer(Reducer const &) = default; - Reducer(Reducer &&) = default; - Reducer& operator=(Reducer const &) = default; - Reducer& operator=(Reducer &&) = default; - - // Internal ValOp object that is used within RAJA::forall/launch - VOp m_valop = VOp{}; - - // Points to the user specified result variable - value_type *target = nullptr; - - // combineTarget() performs the final op on the target data and location in resolve() - RAJA_HOST_DEVICE void combineTarget(value_type in) - { - value_type temp = op{}(*target, in); - *target = temp; - } - - RAJA_HOST_DEVICE - value_type & - getVal() { return m_valop.val; } - -#if defined(RAJA_CUDA_ACTIVE) || defined(RAJA_HIP_ACTIVE) || defined(RAJA_SYCL_ACTIVE) - // Device related attributes. - value_type * devicetarget = nullptr; - RAJA::detail::SoAPtr device_mem; - unsigned int * device_count = nullptr; +// +// +// Basic Reducer +// +// + +// Basic data type Reducer +// T must be a basic data type +// VOp must be ValOp +template +struct Reducer : public ForallParamBase +{ + using op = Op; + using value_type = T; // This is a basic data type + + Reducer() = default; + + // Basic data type constructor + RAJA_HOST_DEVICE Reducer(value_type* target_in) + : m_valop(VOp {}), + target(target_in) + {} + + Reducer(Reducer const&) = default; + Reducer(Reducer&&) = default; + Reducer& operator=(Reducer const&) = default; + Reducer& operator=(Reducer&&) = default; + + // Internal ValOp object that is used within RAJA::forall/launch + VOp m_valop = VOp {}; + + // Points to the user specified result variable + value_type* target = nullptr; + + // combineTarget() performs the final op on the target data and location in + // resolve() + RAJA_HOST_DEVICE void combineTarget(value_type in) + { + value_type temp = op {}(*target, in); + *target = temp; + } + + RAJA_HOST_DEVICE + value_type& getVal() { return m_valop.val; } + +#if defined(RAJA_CUDA_ACTIVE) || defined(RAJA_HIP_ACTIVE) || \ + defined(RAJA_SYCL_ACTIVE) + // Device related attributes. + value_type* devicetarget = nullptr; + RAJA::detail::SoAPtr device_mem; + unsigned int* device_count = nullptr; #endif - // These are types and parameters extracted from this struct, and given to the forall. - using ARG_TUP_T = camp::tuple; - RAJA_HOST_DEVICE ARG_TUP_T get_lambda_arg_tup() { return camp::make_tuple(&m_valop); } - - using ARG_LIST_T = typename ARG_TUP_T::TList; - static constexpr size_t num_lambda_args = camp::tuple_size::value ; - }; - - // Partial specialization of Reducer for ValLoc - // T is a deduced basic data type - // I is a deduced index type - template class Op> - struct Reducer, ValLoc, ValLoc>, ValLoc, ValOp, Op>> : public ForallParamBase { - using target_value_type = T; - using target_index_type = I; - using value_type = ValLoc; - using op = Op; - using VOp = ValOp, Op>; - - Reducer() = default; - - // ValLoc constructor - // Note that the target_ variables point to the val and loc within the user defined target ValLoc - RAJA_HOST_DEVICE Reducer(value_type *target_in) : m_valop(VOp{}), target_value(&target_in->val), target_index(&target_in->loc) {} - - // Dual input constructor for ReduceLoc<>(data, index) case - // The target_ variables point to vars defined by the user - RAJA_HOST_DEVICE Reducer(target_value_type *data_in, target_index_type *index_in) : m_valop(VOp{}), target_value(data_in), target_index(index_in) {} - - Reducer(Reducer const &) = default; - Reducer(Reducer &&) = default; - Reducer& operator=(Reducer const &) = default; - Reducer& operator=(Reducer &&) = default; - - // The ValLoc within m_valop is initialized with data and location values from either a ValLoc, or dual data and location values, passed into the constructor - VOp m_valop = VOp{}; - - // Points to either dual value and index defined by the user, or value and index within a ValLoc defined by the user - target_value_type *target_value = nullptr; - target_index_type *target_index = nullptr; - - // combineTarget() performs the final op on the target data and location in resolve() - RAJA_HOST_DEVICE void combineTarget(value_type in) - { - // Create a different temp ValLoc solely for combining - value_type temp(*target_value, *target_index); - temp = op{}(temp, in); - *target_value = temp.val; - *target_index = temp.loc; - } - - RAJA_HOST_DEVICE - value_type & - getVal() { return m_valop.val; } - -#if defined(RAJA_CUDA_ACTIVE) || defined(RAJA_HIP_ACTIVE) || defined(RAJA_SYCL_ACTIVE) - // Device related attributes. - value_type * devicetarget = nullptr; - RAJA::detail::SoAPtr device_mem; - unsigned int * device_count = nullptr; + // These are types and parameters extracted from this struct, and given to the + // forall. + using ARG_TUP_T = camp::tuple; + + RAJA_HOST_DEVICE ARG_TUP_T get_lambda_arg_tup() + { + return camp::make_tuple(&m_valop); + } + + using ARG_LIST_T = typename ARG_TUP_T::TList; + static constexpr size_t num_lambda_args = camp::tuple_size::value; +}; + +// Partial specialization of Reducer for ValLoc +// T is a deduced basic data type +// I is a deduced index type +template + class Op> +struct Reducer, ValLoc, ValLoc>, + ValLoc, + ValOp, Op>> : public ForallParamBase +{ + using target_value_type = T; + using target_index_type = I; + using value_type = ValLoc; + using op = Op; + using VOp = ValOp, Op>; + + Reducer() = default; + + // ValLoc constructor + // Note that the target_ variables point to the val and loc within the user + // defined target ValLoc + RAJA_HOST_DEVICE Reducer(value_type* target_in) + : m_valop(VOp {}), + target_value(&target_in->val), + target_index(&target_in->loc) + {} + + // Dual input constructor for ReduceLoc<>(data, index) case + // The target_ variables point to vars defined by the user + RAJA_HOST_DEVICE Reducer(target_value_type* data_in, + target_index_type* index_in) + : m_valop(VOp {}), + target_value(data_in), + target_index(index_in) + {} + + Reducer(Reducer const&) = default; + Reducer(Reducer&&) = default; + Reducer& operator=(Reducer const&) = default; + Reducer& operator=(Reducer&&) = default; + + // The ValLoc within m_valop is initialized with data and location values from + // either a ValLoc, or dual data and location values, passed into the + // constructor + VOp m_valop = VOp {}; + + // Points to either dual value and index defined by the user, or value and + // index within a ValLoc defined by the user + target_value_type* target_value = nullptr; + target_index_type* target_index = nullptr; + + // combineTarget() performs the final op on the target data and location in + // resolve() + RAJA_HOST_DEVICE void combineTarget(value_type in) + { + // Create a different temp ValLoc solely for combining + value_type temp(*target_value, *target_index); + temp = op {}(temp, in); + *target_value = temp.val; + *target_index = temp.loc; + } + + RAJA_HOST_DEVICE + value_type& getVal() { return m_valop.val; } + +#if defined(RAJA_CUDA_ACTIVE) || defined(RAJA_HIP_ACTIVE) || \ + defined(RAJA_SYCL_ACTIVE) + // Device related attributes. + value_type* devicetarget = nullptr; + RAJA::detail::SoAPtr device_mem; + unsigned int* device_count = nullptr; #endif - // These are types and parameters extracted from this struct, and given to the forall. - using ARG_TUP_T = camp::tuple; - RAJA_HOST_DEVICE ARG_TUP_T get_lambda_arg_tup() { return camp::make_tuple(&m_valop); } + // These are types and parameters extracted from this struct, and given to the + // forall. + using ARG_TUP_T = camp::tuple; - using ARG_LIST_T = typename ARG_TUP_T::TList; - static constexpr size_t num_lambda_args = camp::tuple_size::value ; - }; + RAJA_HOST_DEVICE ARG_TUP_T get_lambda_arg_tup() + { + return camp::make_tuple(&m_valop); + } + + using ARG_LIST_T = typename ARG_TUP_T::TList; + static constexpr size_t num_lambda_args = camp::tuple_size::value; +}; -} // namespace detail +} // namespace detail // Standard use case. -template