diff --git a/.clang-format b/.clang-format index 1d2ad9a77f..b50c1facfb 100644 --- a/.clang-format +++ b/.clang-format @@ -1,27 +1,71 @@ -BasedOnStyle : google +BasedOnStyle : LLVM +# Indent formatting IndentWidth : 2 -BreakBeforeBraces : Linux +Language: Cpp +UseTab: Never KeepEmptyLinesAtTheStartOfBlocks : true MaxEmptyLinesToKeep : 2 AccessModifierOffset : -2 -UseTab: Never +# This must be off so that include order in RAJA is preserved +SortIncludes: false + +# Alignment of consecutive declarations, assignments etc +AlignConsecutiveAssignments : true +AlignConsecutiveDeclarations : false +AlignConsecutiveMacros : true +AlignTrailingComments : true +AlwaysBreakAfterDefinitionReturnType: false + +# Control curly brace placement +BreakBeforeBraces : Custom +BraceWrapping: + AfterCaseLabel: true + AfterClass: true + AfterControlStatement: true + AfterEnum: true + AfterFunction: true + AfterNamespace: true + AfterObjCDeclaration: false + AfterStruct: true + AfterUnion: true + AfterExternBlock: false + BeforeCatch: true + BeforeElse: true + BeforeLambdaBody: true + IndentBraces: false + SplitEmptyFunction: false + SplitEmptyRecord: false + SplitEmptyNamespace: false + +# Pointer alignment +DerivePointerAlignment: false +PointerAlignment: Left + +# Single line config AllowShortIfStatementsOnASingleLine : true -ConstructorInitializerAllOnOneLineOrOnePerLine : true AllowShortFunctionsOnASingleLine : true AllowShortLoopsOnASingleLine : false -BinPackParameters : false +AllowAllArgumentsOnNextLine : true AllowAllParametersOfDeclarationOnNextLine : false -AlignTrailingComments : true +BinPackArguments : true +BinPackParameters : false +ConstructorInitializerAllOnOneLineOrOnePerLine : true ColumnLimit : 80 -PenaltyBreakBeforeFirstCallParameter : 100 -PenaltyReturnTypeOnItsOwnLine : 65000 -PenaltyBreakString : 10 -# These improve formatting results but require clang 3.6/7 or higher -BreakBeforeBinaryOperators : None -AlignAfterOpenBracket: true -BinPackArguments : false +AlignAfterOpenBracket: Align AlignOperands : true AlwaysBreakTemplateDeclarations : true -Cpp11BracedListStyle : true +BreakBeforeBinaryOperators : None +SpaceBeforeCpp11BracedList: true +SpaceBeforeCtorInitializerColon: true +SpaceBeforeInheritanceColon: true +SpaceBeforeRangeBasedForLoopColon: true +SpaceInEmptyBlock: false +SpacesBeforeTrailingComments: 2 +SpacesInAngles: false +SpacesInCStyleCastParentheses: false +SpacesInContainerLiterals: false +SpacesInConditionalStatement: false +SpacesInParentheses: false +SpacesInSquareBrackets: false diff --git a/CMakeLists.txt b/CMakeLists.txt index 9b31cbe124..dbe5b3f113 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -41,7 +41,7 @@ project(RAJA LANGUAGES CXX C VERSION ${RAJA_LOADED}) set(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake/thirdparty" ${CMAKE_MODULE_PATH}) - +set(BLT_REQUIRED_CLANGFORMAT_VERSION "14" CACHE STRING "") include(cmake/SetupRajaOptions.cmake) cmake_minimum_required(VERSION 3.23) @@ -136,6 +136,9 @@ include(cmake/SetupCompilers.cmake) # Macros for building executables and libraries include (cmake/RAJAMacros.cmake) +# Configure `style` target for enforcing code style +raja_add_code_checks() + set (raja_sources src/AlignedRangeIndexSetBuilders.cpp src/DepGraphNode.cpp diff --git a/cmake/RAJAMacros.cmake b/cmake/RAJAMacros.cmake index c412593db7..11c4661cc1 100644 --- a/cmake/RAJAMacros.cmake +++ b/cmake/RAJAMacros.cmake @@ -204,3 +204,62 @@ macro(raja_add_benchmark) NUM_OMP_THREADS ${arg_NUM_OMP_THREADS} COMMAND ${TEST_DRIVER} ${arg_NAME}) endmacro(raja_add_benchmark) + +##------------------------------------------------------------------------------ +## raja_add_code_checks() +## +## Adds code checks for all source files recursively in the RAJA repository. +## +## This creates the following parent build targets: +## check - Runs a non file changing style check and CppCheck +## style - In-place code formatting +## +## Creates various child build targets that follow this pattern: +## raja_ +## raja__ +##------------------------------------------------------------------------------ +macro(raja_add_code_checks) + + set(options) + set(singleValueArgs) + set(multiValueArgs) + + # Parse the arguments to the macro + cmake_parse_arguments(arg + "${options}" "${singleValueArgs}" "${multiValueArgs}" ${ARGN}) + + # Only do code checks if building raja by itself and not included in + # another project + if ("${PROJECT_SOURCE_DIR}" STREQUAL "${CMAKE_SOURCE_DIR}") + # Create file globbing expressions that only include directories that contain source + # TODO(bowen) Add examples, exercises and benchmark to the list below + set(_base_dirs "RAJA" "benchmark" "include" "src" "test") + set(_ext_expressions "*.cpp" "*.hpp" "*.inl" + "*.cxx" "*.hxx" "*.cc" "*.c" "*.h" "*.hh") + + set(_glob_expressions) + foreach(_exp ${_ext_expressions}) + foreach(_base_dir ${_base_dirs}) + list(APPEND _glob_expressions "${PROJECT_SOURCE_DIR}/${_base_dir}/${_exp}") + endforeach() + endforeach() + + # Glob for list of files to run code checks on + set(_sources) + file(GLOB_RECURSE _sources ${_glob_expressions}) + + blt_add_code_checks(PREFIX RAJA + SOURCES ${_sources} + CLANGFORMAT_CFG_FILE ${PROJECT_SOURCE_DIR}/.clang-format + CPPCHECK_FLAGS --enable=all --inconclusive) + + # Set FOLDER property for code check targets + foreach(_suffix clangformat_check clangformat_style clang_tidy_check clang_tidy_style) + set(_tgt ${arg_PREFIX}_${_suffix}) + if(TARGET ${_tgt}) + set_target_properties(${_tgt} PROPERTIES FOLDER "RAJA/code_checks") + endif() + endforeach() + endif() + +endmacro(raja_add_code_checks) diff --git a/include/RAJA/RAJA.hpp b/include/RAJA/RAJA.hpp index 59cca4bf22..abc965b0f5 100644 --- a/include/RAJA/RAJA.hpp +++ b/include/RAJA/RAJA.hpp @@ -88,7 +88,7 @@ #endif #if defined(RAJA_ENABLE_DESUL_ATOMICS) - #include "RAJA/policy/desul.hpp" +#include "RAJA/policy/desul.hpp" #endif #include "RAJA/index/IndexSet.hpp" @@ -197,11 +197,13 @@ #include "RAJA/pattern/sort.hpp" -namespace RAJA { -namespace expt{} +namespace RAJA +{ +namespace expt +{} // // provide a RAJA::expt namespace for experimental work, but bring alias // // it into RAJA so it doesn't affect user code // using namespace expt; -} +} // namespace RAJA #endif // closing endif for header file include guard diff --git a/include/RAJA/index/IndexSet.hpp b/include/RAJA/index/IndexSet.hpp index 1a467c8341..3261c27b7a 100644 --- a/include/RAJA/index/IndexSet.hpp +++ b/include/RAJA/index/IndexSet.hpp @@ -34,8 +34,16 @@ namespace RAJA { -enum PushEnd { PUSH_FRONT, PUSH_BACK }; -enum PushCopy { PUSH_COPY, PUSH_NOCOPY }; +enum PushEnd +{ + PUSH_FRONT, + PUSH_BACK +}; +enum PushCopy +{ + PUSH_COPY, + PUSH_NOCOPY +}; template class TypedIndexSet; @@ -55,8 +63,9 @@ namespace indexset template struct ExecPolicy : public RAJA::make_policy_pattern_t { - using seg_it = SEG_ITER_POLICY_T; + RAJA::Pattern::forall> +{ + using seg_it = SEG_ITER_POLICY_T; using seg_exec = SEG_EXEC_POLICY_T; }; @@ -77,7 +86,7 @@ using policy::indexset::ExecPolicy; template class TypedIndexSet : public TypedIndexSet { - using PARENT = TypedIndexSet; + using PARENT = TypedIndexSet; static const int T0_TypeId = sizeof...(TREST); public: @@ -91,7 +100,7 @@ class TypedIndexSet : public TypedIndexSet //! Construct empty index set #if _MSC_VER < 1910 - // this one instance of constexpr does not work on VS2012 or VS2015 + // this one instance of constexpr does not work on VS2012 or VS2015 RAJA_INLINE TypedIndexSet() : PARENT() {} #else RAJA_INLINE constexpr TypedIndexSet() : PARENT() {} @@ -99,12 +108,12 @@ class TypedIndexSet : public TypedIndexSet //! Copy-constructor for index set RAJA_INLINE - TypedIndexSet(TypedIndexSet const &c) - : PARENT((PARENT const &)c) + TypedIndexSet(TypedIndexSet const& c) : PARENT((PARENT const&)c) { size_t num = c.data.size(); data.resize(num); - for (size_t i = 0; i < num; ++i) { + for (size_t i = 0; i < num; ++i) + { data[i] = c.data[i]; } // mark all as not owned by us @@ -112,9 +121,10 @@ class TypedIndexSet : public TypedIndexSet } //! Copy-assignment operator for index set - TypedIndexSet &operator=(const TypedIndexSet &rhs) + TypedIndexSet& operator=(const TypedIndexSet& rhs) { - if (&rhs != this) { + if (&rhs != this) + { TypedIndexSet copy(rhs); this->swap(copy); } @@ -125,19 +135,21 @@ class TypedIndexSet : public TypedIndexSet RAJA_INLINE ~TypedIndexSet() { size_t num_seg = data.size(); - for (size_t i = 0; i < num_seg; ++i) { + for (size_t i = 0; i < num_seg; ++i) + { // Only free segment of we allocated it - if (owner[i]) { + if (owner[i]) + { delete data[i]; } } } //! Swap function for copy-and-swap idiom. - void swap(TypedIndexSet &other) + void swap(TypedIndexSet& other) { // Swap parents data - PARENT::swap((PARENT &)other); + PARENT::swap((PARENT&)other); // Swap our data using std::swap; swap(data, other.data); @@ -150,18 +162,20 @@ class TypedIndexSet : public TypedIndexSet /// This is used to implement the == and != operators /// template - RAJA_INLINE bool compareSegmentById( - size_t segid, - const TypedIndexSet &other) const + RAJA_INLINE bool + compareSegmentById(size_t segid, + const TypedIndexSet& other) const { // drill down our types until we have the right type - if (getSegmentTypes()[segid] != T0_TypeId) { + if (getSegmentTypes()[segid] != T0_TypeId) + { // peel off T0 return PARENT::compareSegmentById(segid, other); } // Check that other's segid is of type T0 - if (!other.template checkSegmentType(segid)) { + if (!other.template checkSegmentType(segid)) + { return false; } @@ -174,7 +188,8 @@ class TypedIndexSet : public TypedIndexSet template RAJA_INLINE bool checkSegmentType(size_t segid) const { - if (getSegmentTypes()[segid] == T0_TypeId) { + if (getSegmentTypes()[segid] == T0_TypeId) + { return std::is_same::value; } return PARENT::template checkSegmentType(segid); @@ -183,22 +198,24 @@ class TypedIndexSet : public TypedIndexSet //! get specified segment by ID template - RAJA_INLINE P0 &getSegment(size_t segid) + RAJA_INLINE P0& getSegment(size_t segid) { - if (getSegmentTypes()[segid] == T0_TypeId) { + if (getSegmentTypes()[segid] == T0_TypeId) + { Index_type offset = getSegmentOffsets()[segid]; - return *reinterpret_cast(data[offset]); + return *reinterpret_cast(data[offset]); } return PARENT::template getSegment(segid); } //! get specified segment by ID template - RAJA_INLINE P0 const &getSegment(size_t segid) const + RAJA_INLINE P0 const& getSegment(size_t segid) const { - if (getSegmentTypes()[segid] == T0_TypeId) { + if (getSegmentTypes()[segid] == T0_TypeId) + { Index_type offset = getSegmentOffsets()[segid]; - return *reinterpret_cast(data[offset]); + return *reinterpret_cast(data[offset]); } return PARENT::template getSegment(segid); } @@ -231,20 +248,25 @@ class TypedIndexSet : public TypedIndexSet private: template - RAJA_INLINE void push_into(TypedIndexSet &c, - PushEnd pend = PUSH_BACK, + RAJA_INLINE void push_into(TypedIndexSet& c, + PushEnd pend = PUSH_BACK, PushCopy pcopy = PUSH_COPY) { Index_type num = getNumSegments(); - if (pend == PUSH_BACK) { - for (Index_type i = 0; i < num; ++i) { + if (pend == PUSH_BACK) + { + for (Index_type i = 0; i < num; ++i) + { segment_push_into(i, c, pend, pcopy); - } - } else { - for (Index_type i = num-1; i > -1; --i) { + } + } + else + { + for (Index_type i = num - 1; i > -1; --i) + { segment_push_into(i, c, pend, pcopy); - } + } } } @@ -257,66 +279,71 @@ class TypedIndexSet : public TypedIndexSet public: template RAJA_INLINE void segment_push_into(size_t segid, - TypedIndexSet &c, - PushEnd pend = PUSH_BACK, + TypedIndexSet& c, + PushEnd pend = PUSH_BACK, PushCopy pcopy = PUSH_COPY) { - if (getSegmentTypes()[segid] != T0_TypeId) { + if (getSegmentTypes()[segid] != T0_TypeId) + { PARENT::segment_push_into(segid, c, pend, pcopy); return; } Index_type offset = getSegmentOffsets()[segid]; - switch (value_for(pend, pcopy)) { - case value_for(PUSH_BACK, PUSH_COPY): - c.push_back(*data[offset]); - break; - case value_for(PUSH_BACK, PUSH_NOCOPY): - c.push_back_nocopy(data[offset]); - break; - case value_for(PUSH_FRONT, PUSH_COPY): - c.push_front(*data[offset]); - break; - case value_for(PUSH_FRONT, PUSH_NOCOPY): - c.push_front_nocopy(data[offset]); - break; + switch (value_for(pend, pcopy)) + { + case value_for(PUSH_BACK, PUSH_COPY): + c.push_back(*data[offset]); + break; + case value_for(PUSH_BACK, PUSH_NOCOPY): + c.push_back_nocopy(data[offset]); + break; + case value_for(PUSH_FRONT, PUSH_COPY): + c.push_front(*data[offset]); + break; + case value_for(PUSH_FRONT, PUSH_NOCOPY): + c.push_front_nocopy(data[offset]); + break; } } //! Add segment to back end of index set without making a copy. template - RAJA_INLINE void push_back_nocopy(Tnew *val) + RAJA_INLINE void push_back_nocopy(Tnew* val) { push_internal(val, PUSH_BACK, PUSH_NOCOPY); } //! Add segment to front end of index set without making a copy. template - RAJA_INLINE void push_front_nocopy(Tnew *val) + RAJA_INLINE void push_front_nocopy(Tnew* val) { push_internal(val, PUSH_FRONT, PUSH_NOCOPY); } //! Add copy of segment to back end of index set. template - RAJA_INLINE void push_back(Tnew &&val) + RAJA_INLINE void push_back(Tnew&& val) { - push_internal(new typename std::decay::type(std::forward(val)), PUSH_BACK, PUSH_COPY); + push_internal(new typename std::decay::type(std::forward(val)), + PUSH_BACK, PUSH_COPY); } //! Add copy of segment to front end of index set. template - RAJA_INLINE void push_front(Tnew &&val) + RAJA_INLINE void push_front(Tnew&& val) { - push_internal(new typename std::decay::type(std::forward(val)), PUSH_FRONT, PUSH_COPY); + push_internal(new typename std::decay::type(std::forward(val)), + PUSH_FRONT, PUSH_COPY); } //! Return total length -- sum of lengths of all segments RAJA_INLINE size_t getLength() const { size_t total = PARENT::getLength(); - size_t num = data.size(); - for (size_t i = 0; i < num; ++i) { + size_t num = data.size(); + for (size_t i = 0; i < num; ++i) + { total += data[i]->size(); } return total; @@ -339,13 +366,12 @@ class TypedIndexSet : public TypedIndexSet /// RAJA_SUPPRESS_HD_WARN template - RAJA_HOST_DEVICE void segmentCall(size_t segid, - BODY &&body, - ARGS &&... args) const + RAJA_HOST_DEVICE void + segmentCall(size_t segid, BODY&& body, ARGS&&... args) const { - if (getSegmentTypes()[segid] != T0_TypeId) { - PARENT::segmentCall(segid, - std::forward(body), + if (getSegmentTypes()[segid] != T0_TypeId) + { + PARENT::segmentCall(segid, std::forward(body), std::forward(args)...); return; } @@ -356,24 +382,23 @@ class TypedIndexSet : public TypedIndexSet protected: //! Internal logic to add a new segment -- catch invalid type insertion template - RAJA_INLINE void push_internal(Tnew *val, - PushEnd pend = PUSH_BACK, - PushCopy pcopy = PUSH_COPY) + RAJA_INLINE void + push_internal(Tnew* val, PushEnd pend = PUSH_BACK, PushCopy pcopy = PUSH_COPY) { static_assert(sizeof...(TREST) > 0, "Invalid type for this TypedIndexSet"); PARENT::push_internal(val, pend, pcopy); } //! Internal logic to add a new segment - RAJA_INLINE void push_internal(T0 *val, - PushEnd pend = PUSH_BACK, - PushCopy pcopy = PUSH_COPY) + RAJA_INLINE void + push_internal(T0* val, PushEnd pend = PUSH_BACK, PushCopy pcopy = PUSH_COPY) { data.push_back(val); owner.push_back(pcopy == PUSH_COPY); // Determine if we push at the front or back of the segment list - if (pend == PUSH_BACK) { + if (pend == PUSH_BACK) + { // Store the segment type getSegmentTypes().push_back(T0_TypeId); @@ -384,7 +409,9 @@ class TypedIndexSet : public TypedIndexSet size_t icount = val->size(); getSegmentIcounts().push_back(getTotalLength()); increaseTotalLength(icount); - } else { + } + else + { // Store the segment type getSegmentTypes().push_front(T0_TypeId); @@ -394,7 +421,8 @@ class TypedIndexSet : public TypedIndexSet // Store the segment icount getSegmentIcounts().push_front(0); size_t icount = val->size(); - for (size_t i = 1; i < getSegmentIcounts().size(); ++i) { + for (size_t i = 1; i < getSegmentIcounts().size(); ++i) + { getSegmentIcounts()[i] += icount; } increaseTotalLength(icount); @@ -402,7 +430,7 @@ class TypedIndexSet : public TypedIndexSet } //! Returns the number of indices (the total icount of segments - RAJA_INLINE Index_type &getTotalLength() { return PARENT::getTotalLength(); } + RAJA_INLINE Index_type& getTotalLength() { return PARENT::getTotalLength(); } //! set total length of the indexset RAJA_INLINE void setTotalLength(int n) { return PARENT::setTotalLength(n); } @@ -437,9 +465,10 @@ class TypedIndexSet : public TypedIndexSet { TypedIndexSet retVal; - int minSeg = RAJA::operators::maximum{}(0, begin); - int maxSeg = RAJA::operators::minimum{}(end, getNumSegments()); - for (int i = minSeg; i < maxSeg; ++i) { + int minSeg = RAJA::operators::maximum {}(0, begin); + int maxSeg = RAJA::operators::minimum {}(end, getNumSegments()); + for (int i = minSeg; i < maxSeg; ++i) + { segment_push_into(i, retVal, PUSH_BACK, PUSH_NOCOPY); } return retVal; @@ -452,13 +481,15 @@ class TypedIndexSet : public TypedIndexSet /// This TypedIndexSet will not change and the created "slice" into it /// will not own any of its segments. /// - TypedIndexSet createSlice(const int *segIds, int len) + TypedIndexSet createSlice(const int* segIds, int len) { TypedIndexSet retVal; int numSeg = getNumSegments(); - for (int i = 0; i < len; ++i) { - if (segIds[i] >= 0 && segIds[i] < numSeg) { + for (int i = 0; i < len; ++i) + { + if (segIds[i] >= 0 && segIds[i] < numSeg) + { segment_push_into(segIds[i], retVal, PUSH_BACK, PUSH_NOCOPY); } } @@ -476,12 +507,14 @@ class TypedIndexSet : public TypedIndexSet /// iterator type must de-reference to an integral value. /// template - TypedIndexSet createSlice(const T &segIds) + TypedIndexSet createSlice(const T& segIds) { TypedIndexSet retVal; int numSeg = getNumSegments(); - for (auto &seg : segIds) { - if (seg >= 0 && seg < numSeg) { + for (auto& seg : segIds) + { + if (seg >= 0 && seg < numSeg) + { segment_push_into(seg, retVal, PUSH_BACK, PUSH_NOCOPY); } } @@ -492,7 +525,7 @@ class TypedIndexSet : public TypedIndexSet void setSegmentInterval(size_t interval_id, int begin, int end) { m_seg_interval_begin[interval_id] = begin; - m_seg_interval_end[interval_id] = end; + m_seg_interval_end[interval_id] = end; } //! get lower bound of segment identified with interval_id @@ -509,37 +542,37 @@ class TypedIndexSet : public TypedIndexSet protected: //! Returns the mapping of segment_index -> segment_type - RAJA_INLINE RAJA::RAJAVec &getSegmentTypes() + RAJA_INLINE RAJA::RAJAVec& getSegmentTypes() { return PARENT::getSegmentTypes(); } //! Returns the mapping of segment_index -> segment_type - RAJA_INLINE RAJA::RAJAVec const &getSegmentTypes() const + RAJA_INLINE RAJA::RAJAVec const& getSegmentTypes() const { return PARENT::getSegmentTypes(); } //! Returns the mapping of segment_index -> segment_offset - RAJA_INLINE RAJA::RAJAVec &getSegmentOffsets() + RAJA_INLINE RAJA::RAJAVec& getSegmentOffsets() { return PARENT::getSegmentOffsets(); } //! Returns the mapping of segment_index -> segment_offset - RAJA_INLINE RAJA::RAJAVec const &getSegmentOffsets() const + RAJA_INLINE RAJA::RAJAVec const& getSegmentOffsets() const { return PARENT::getSegmentOffsets(); } //! Returns the icount of segments - RAJA_INLINE RAJA::RAJAVec &getSegmentIcounts() + RAJA_INLINE RAJA::RAJAVec& getSegmentIcounts() { return PARENT::getSegmentIcounts(); } //! Returns the icount of segments - RAJA_INLINE RAJA::RAJAVec const &getSegmentIcounts() const + RAJA_INLINE RAJA::RAJAVec const& getSegmentIcounts() const { return PARENT::getSegmentIcounts(); } @@ -552,13 +585,15 @@ class TypedIndexSet : public TypedIndexSet /// types and indices; e.g., dependency info not checked. /// template - RAJA_INLINE bool operator==(const TypedIndexSet &other) const + RAJA_INLINE bool operator==(const TypedIndexSet& other) const { size_t num_seg = getNumSegments(); if (num_seg != other.getNumSegments()) return false; - for (size_t segid = 0; segid < num_seg; ++segid) { - if (!compareSegmentById(segid, other)) { + for (size_t segid = 0; segid < num_seg; ++segid) + { + if (!compareSegmentById(segid, other)) + { return false; } } @@ -567,14 +602,14 @@ class TypedIndexSet : public TypedIndexSet //! Inequality operator returns true if any segment is not equal, else false. template - RAJA_INLINE bool operator!=(const TypedIndexSet &other) const + RAJA_INLINE bool operator!=(const TypedIndexSet& other) const { return (!(*this == other)); } private: //! vector of TypedIndexSet data objects of type T0 - RAJA::RAJAVec data; + RAJA::RAJAVec data; //! vector indicating which segments are owned by the TypedIndexSet RAJA::RAJAVec owner; @@ -603,16 +638,16 @@ class TypedIndexSet<> //! Copy-constructor. RAJA_INLINE - TypedIndexSet(TypedIndexSet const &c) + TypedIndexSet(TypedIndexSet const& c) { - segment_types = c.segment_types; + segment_types = c.segment_types; segment_offsets = c.segment_offsets; segment_icounts = c.segment_icounts; - m_len = c.m_len; + m_len = c.m_len; } //! Swap function for copy-and-swap idiom (deep copy). - void swap(TypedIndexSet &other) + void swap(TypedIndexSet& other) { using std::swap; swap(segment_types, other.segment_types); @@ -625,7 +660,7 @@ class TypedIndexSet<> RAJA_INLINE static size_t getNumTypes() { return 0; } template - RAJA_INLINE constexpr bool isValidSegmentType(T const &) const + RAJA_INLINE constexpr bool isValidSegmentType(T const&) const { // Segment type wasn't found return false; @@ -637,40 +672,39 @@ class TypedIndexSet<> template RAJA_INLINE void segmentCall(size_t, BODY, ARGS...) const - { - } + {} - RAJA_INLINE RAJA::RAJAVec &getSegmentTypes() + RAJA_INLINE RAJA::RAJAVec& getSegmentTypes() { return segment_types; } - RAJA_INLINE RAJA::RAJAVec const &getSegmentTypes() const + RAJA_INLINE RAJA::RAJAVec const& getSegmentTypes() const { return segment_types; } - RAJA_INLINE RAJA::RAJAVec &getSegmentOffsets() + RAJA_INLINE RAJA::RAJAVec& getSegmentOffsets() { return segment_offsets; } - RAJA_INLINE RAJA::RAJAVec const &getSegmentOffsets() const + RAJA_INLINE RAJA::RAJAVec const& getSegmentOffsets() const { return segment_offsets; } - RAJA_INLINE RAJA::RAJAVec &getSegmentIcounts() + RAJA_INLINE RAJA::RAJAVec& getSegmentIcounts() { return segment_icounts; } - RAJA_INLINE RAJA::RAJAVec const &getSegmentIcounts() const + RAJA_INLINE RAJA::RAJAVec const& getSegmentIcounts() const { return segment_icounts; } - RAJA_INLINE Index_type &getTotalLength() { return m_len; } + RAJA_INLINE Index_type& getTotalLength() { return m_len; } RAJA_INLINE void setTotalLength(int n) { m_len = n; } @@ -678,7 +712,7 @@ class TypedIndexSet<> template RAJA_INLINE bool compareSegmentById(size_t, - const TypedIndexSet &) const + const TypedIndexSet&) const { return false; } @@ -690,34 +724,29 @@ class TypedIndexSet<> } template - RAJA_INLINE P0 &getSegment(size_t) + RAJA_INLINE P0& getSegment(size_t) { - return *((P0 *)(this - this)); + return *((P0*)(this - this)); } template - RAJA_INLINE P0 const &getSegment(size_t) const + RAJA_INLINE P0 const& getSegment(size_t) const { - return *((P0 *)(this - this)); + return *((P0*)(this - this)); } template - RAJA_INLINE void push_into(TypedIndexSet &, PushEnd, PushCopy) const - { - } + RAJA_INLINE void push_into(TypedIndexSet&, PushEnd, PushCopy) const + {} template - RAJA_INLINE void segment_push_into(size_t, - TypedIndexSet &, - PushEnd, - PushCopy) const - { - } + RAJA_INLINE void + segment_push_into(size_t, TypedIndexSet&, PushEnd, PushCopy) const + {} template - RAJA_INLINE void push(Tnew const &, PushEnd, PushCopy) - { - } + RAJA_INLINE void push(Tnew const&, PushEnd, PushCopy) + {} public: using iterator = Iterators::numeric_iterator; @@ -762,13 +791,15 @@ namespace type_traits template struct is_index_set - : ::RAJA::type_traits::SpecializationOf::type> { -}; + : ::RAJA::type_traits::SpecializationOf::type> +{}; template struct is_indexset_policy - : ::RAJA::type_traits::SpecializationOf::type> { -}; + : ::RAJA::type_traits::SpecializationOf::type> +{}; } // namespace type_traits } // namespace RAJA diff --git a/include/RAJA/index/IndexSetBuilders.hpp b/include/RAJA/index/IndexSetBuilders.hpp index 543524be01..075aecd1d1 100644 --- a/include/RAJA/index/IndexSetBuilders.hpp +++ b/include/RAJA/index/IndexSetBuilders.hpp @@ -37,13 +37,13 @@ namespace RAJA * \brief Generate an index set with aligned Range segments and List segments, * as needed, from given array of indices. * - * Routine does no error-checking on argements and assumes + * Routine does no error-checking on argements and assumes * RAJA::Index_type array contains valid indices. * - * \param iset reference to index set generated with aligned range segments + * \param iset reference to index set generated with aligned range segments * and list segments. Method assumes index set is empty (no segments). - * \param work_res camp resource object that identifies the memory space in - * which list segment index data will live (passed to list segment + * \param work_res camp resource object that identifies the memory space in + * which list segment index data will live (passed to list segment * ctor). * \param indices_in pointer to start of input array of indices. * \param length size of input index array. @@ -79,37 +79,36 @@ void RAJASHAREDDLL_API buildIndexSetAligned( ****************************************************************************** * * \brief Generate a lock-free "block" index set (planar division) containing - * range segments. + * range segments. * - * The method chunks a fastDim x midDim x slowDim mesh into blocks that + * The method chunks a fastDim x midDim x slowDim mesh into blocks that * can be dependency-scheduled, removing need for lock constructs. * * \param iset reference to index set generated with range segments. - * Method assumes index set is empty (no segments). + * Method assumes index set is empty (no segments). * \param fastDim "fast" block dimension (see above). * \param midDim "mid" block dimension (see above). * \param slowDim "slow" block dimension (see above). * ****************************************************************************** */ -void buildLockFreeBlockIndexset( - RAJA::TypedIndexSet& iset, - int fastDim, - int midDim, - int slowDim); +void buildLockFreeBlockIndexset(RAJA::TypedIndexSet& iset, + int fastDim, + int midDim, + int slowDim); /*! ****************************************************************************** * * \brief Generate a lock-free "color" index set containing range and list * segments. - * - * TThe domain-set is colored based on connectivity to the range-set. - * All elements in each segment are independent, and no two segments + * + * TThe domain-set is colored based on connectivity to the range-set. + * All elements in each segment are independent, and no two segments * can be executed in parallel. * - * \param iset reference to index set generated. Method assumes index set - * is empty (no segments). + * \param iset reference to index set generated. Method assumes index set + * is empty (no segments). * \param work_res camp resource object that identifies the memory space in * which list segment index data will live (passed to list segment * ctor). @@ -123,7 +122,7 @@ void buildLockFreeColorIndexset( int numEntity, int numRangePerDomain, int numEntityRange, - RAJA::Index_type* elemPermutation = nullptr, + RAJA::Index_type* elemPermutation = nullptr, RAJA::Index_type* ielemPermutation = nullptr); } // namespace RAJA diff --git a/include/RAJA/index/IndexSetUtils.hpp b/include/RAJA/index/IndexSetUtils.hpp index 4baea450fc..d5da3e9e19 100644 --- a/include/RAJA/index/IndexSetUtils.hpp +++ b/include/RAJA/index/IndexSetUtils.hpp @@ -31,10 +31,10 @@ namespace RAJA //@{ //! @name Methods to gather indices of segment or index set into a container. //! -//! For each method, the given container must be templated on a data type, -//! have default and copy ctors, push_back method, and value_type. Is is -//! assumed that the container data type and segment or index set data type -//! are compatible in the sense that the index set type can be converted to +//! For each method, the given container must be templated on a data type, +//! have default and copy ctors, push_back method, and value_type. Is is +//! assumed that the container data type and segment or index set data type +//! are compatible in the sense that the index set type can be converted to //! the container data type. /*! @@ -49,11 +49,8 @@ RAJA_INLINE void getIndices(CONTAINER_T& con, const TypedIndexSet& iset) { CONTAINER_T tcon; - forall >(iset, - [&](typename CONTAINER_T::value_type idx) { - tcon.push_back(idx); - } - ); + forall>( + iset, [&](typename CONTAINER_T::value_type idx) { tcon.push_back(idx); }); con = tcon; } @@ -68,11 +65,8 @@ template RAJA_INLINE void getIndices(CONTAINER_T& con, const SEGMENT_T& seg) { CONTAINER_T tcon; - forall(seg, - [&](typename CONTAINER_T::value_type idx) { - tcon.push_back(idx); - } - ); + forall(seg, [&](typename CONTAINER_T::value_type idx) + { tcon.push_back(idx); }); con = tcon; } @@ -90,11 +84,12 @@ RAJA_INLINE void getIndicesConditional(CONTAINER_T& con, CONDITIONAL conditional) { CONTAINER_T tcon; - forall >(iset, - [&](typename CONTAINER_T::value_type idx) { - if (conditional(idx)) tcon.push_back(idx); - } - ); + forall>( + iset, + [&](typename CONTAINER_T::value_type idx) + { + if (conditional(idx)) tcon.push_back(idx); + }); con = tcon; } @@ -113,10 +108,10 @@ RAJA_INLINE void getIndicesConditional(CONTAINER_T& con, { CONTAINER_T tcon; forall(seg, - [&](typename CONTAINER_T::value_type idx) { - if (conditional(idx)) tcon.push_back(idx); - } - ); + [&](typename CONTAINER_T::value_type idx) + { + if (conditional(idx)) tcon.push_back(idx); + }); con = tcon; } diff --git a/include/RAJA/index/IndexValue.hpp b/include/RAJA/index/IndexValue.hpp index 44fa143445..7ed94a299e 100644 --- a/include/RAJA/index/IndexValue.hpp +++ b/include/RAJA/index/IndexValue.hpp @@ -28,8 +28,8 @@ namespace RAJA { -struct IndexValueBase { -}; +struct IndexValueBase +{}; /*! * \brief Strongly typed "integer" class. @@ -44,16 +44,17 @@ struct IndexValueBase { * Yes, this uses the curiously-recurring template pattern. */ template -struct IndexValue : public IndexValueBase { +struct IndexValue : public IndexValueBase +{ using value_type = VALUE; //! Default constructor initializes value to 0. - RAJA_INLINE constexpr IndexValue() = default; - constexpr RAJA_INLINE IndexValue(IndexValue const &) = default; - constexpr RAJA_INLINE IndexValue(IndexValue &&) = default; - RAJA_INLINE IndexValue &operator=(IndexValue const &) = default; - RAJA_INLINE IndexValue &operator=(IndexValue &&) = default; + RAJA_INLINE constexpr IndexValue() = default; + constexpr RAJA_INLINE IndexValue(IndexValue const&) = default; + constexpr RAJA_INLINE IndexValue(IndexValue&&) = default; + RAJA_INLINE IndexValue& operator=(IndexValue const&) = default; + RAJA_INLINE IndexValue& operator=(IndexValue&&) = default; /*! * \brief Explicit constructor. @@ -61,14 +62,13 @@ struct IndexValue : public IndexValueBase { */ RAJA_HOST_DEVICE RAJA_INLINE constexpr explicit IndexValue(value_type v) : value(v) - { - } + {} //! Dereference provides cast-to-integer. - RAJA_HOST_DEVICE RAJA_INLINE value_type &operator*() { return value; } + RAJA_HOST_DEVICE RAJA_INLINE value_type& operator*() { return value; } //! Dereference provides cast-to-integer. - RAJA_HOST_DEVICE RAJA_INLINE const value_type &operator*() const + RAJA_HOST_DEVICE RAJA_INLINE const value_type& operator*() const { return value; } @@ -82,10 +82,10 @@ struct IndexValue : public IndexValueBase { } //! preincrement stored index - RAJA_HOST_DEVICE RAJA_INLINE TYPE &operator++() + RAJA_HOST_DEVICE RAJA_INLINE TYPE& operator++() { value++; - return static_cast(*this); + return static_cast(*this); } //! postdecrement -- returns a copy @@ -97,10 +97,10 @@ struct IndexValue : public IndexValueBase { } //! preincrement stored index - RAJA_HOST_DEVICE RAJA_INLINE TYPE &operator--() + RAJA_HOST_DEVICE RAJA_INLINE TYPE& operator--() { value--; - return static_cast(*this); + return static_cast(*this); } //! addition to underlying index from an Index_type @@ -163,52 +163,52 @@ struct IndexValue : public IndexValueBase { return TYPE(value % a.value); } - RAJA_HOST_DEVICE RAJA_INLINE TYPE &operator+=(value_type x) + RAJA_HOST_DEVICE RAJA_INLINE TYPE& operator+=(value_type x) { value += x; - return static_cast(*this); + return static_cast(*this); } - RAJA_HOST_DEVICE RAJA_INLINE TYPE &operator+=(TYPE x) + RAJA_HOST_DEVICE RAJA_INLINE TYPE& operator+=(TYPE x) { value += x.value; - return static_cast(*this); + return static_cast(*this); } - RAJA_HOST_DEVICE RAJA_INLINE TYPE &operator-=(value_type x) + RAJA_HOST_DEVICE RAJA_INLINE TYPE& operator-=(value_type x) { value -= x; - return static_cast(*this); + return static_cast(*this); } - RAJA_HOST_DEVICE RAJA_INLINE TYPE &operator-=(TYPE x) + RAJA_HOST_DEVICE RAJA_INLINE TYPE& operator-=(TYPE x) { value -= x.value; - return static_cast(*this); + return static_cast(*this); } - RAJA_HOST_DEVICE RAJA_INLINE TYPE &operator*=(value_type x) + RAJA_HOST_DEVICE RAJA_INLINE TYPE& operator*=(value_type x) { value *= x; - return static_cast(*this); + return static_cast(*this); } - RAJA_HOST_DEVICE RAJA_INLINE TYPE &operator*=(TYPE x) + RAJA_HOST_DEVICE RAJA_INLINE TYPE& operator*=(TYPE x) { value *= x.value; - return static_cast(*this); + return static_cast(*this); } - RAJA_HOST_DEVICE RAJA_INLINE TYPE &operator/=(value_type x) + RAJA_HOST_DEVICE RAJA_INLINE TYPE& operator/=(value_type x) { value /= x; - return static_cast(*this); + return static_cast(*this); } - RAJA_HOST_DEVICE RAJA_INLINE TYPE &operator/=(TYPE x) + RAJA_HOST_DEVICE RAJA_INLINE TYPE& operator/=(TYPE x) { value /= x.value; - return static_cast(*this); + return static_cast(*this); } RAJA_HOST_DEVICE RAJA_INLINE bool operator<(value_type x) const @@ -334,18 +334,22 @@ constexpr RAJA_HOST_DEVICE RAJA_INLINE return val; } -namespace internal{ -template -struct StripIndexTypeT { - using type = FROM; +namespace internal +{ +template +struct StripIndexTypeT +{ + using type = FROM; }; -template -struct StripIndexTypeT::value>::type> +template +struct StripIndexTypeT< + FROM, + typename std::enable_if::value>::type> { - using type = typename FROM::value_type; + using type = typename FROM::value_type; }; -} // namespace internal +} // namespace internal /*! * \brief Strips a strongly typed index to its underlying type @@ -353,7 +357,7 @@ struct StripIndexTypeT +template using strip_index_type_t = typename internal::StripIndexTypeT::type; /*! @@ -362,12 +366,11 @@ using strip_index_type_t = typename internal::StripIndexTypeT::type; * * \param FROM the original type */ -template -using make_signed_t = typename std::conditional < - std::is_floating_point::value, - std::common_type, - std::make_signed - >::type::type; +template +using make_signed_t = + typename std::conditional::value, + std::common_type, + std::make_signed>::type::type; } // namespace RAJA @@ -376,19 +379,18 @@ using make_signed_t = typename std::conditional < * \param TYPE the name of the type * \param NAME a string literal to identify this index type */ -#define RAJA_INDEX_VALUE(TYPE, NAME) \ - class TYPE : public ::RAJA::IndexValue \ - { \ - using parent = ::RAJA::IndexValue; \ - \ - public: \ - using IndexValueType = TYPE; \ - RAJA_HOST_DEVICE RAJA_INLINE TYPE() : parent::IndexValue() {} \ - RAJA_HOST_DEVICE RAJA_INLINE explicit TYPE(::RAJA::Index_type v) \ - : parent::IndexValue(v) \ - { \ - } \ - static inline std::string getName() { return NAME; } \ +#define RAJA_INDEX_VALUE(TYPE, NAME) \ + class TYPE : public ::RAJA::IndexValue \ + { \ + using parent = ::RAJA::IndexValue; \ + \ + public: \ + using IndexValueType = TYPE; \ + RAJA_HOST_DEVICE RAJA_INLINE TYPE() : parent::IndexValue() {} \ + RAJA_HOST_DEVICE RAJA_INLINE explicit TYPE(::RAJA::Index_type v) \ + : parent::IndexValue(v) \ + {} \ + static inline std::string getName() { return NAME; } \ }; /*! @@ -397,17 +399,17 @@ using make_signed_t = typename std::conditional < * \param IDXT the index types value type * \param NAME a string literal to identify this index type */ -#define RAJA_INDEX_VALUE_T(TYPE, IDXT, NAME) \ - class TYPE : public ::RAJA::IndexValue \ - { \ - public: \ - RAJA_HOST_DEVICE RAJA_INLINE TYPE() \ - : RAJA::IndexValue::IndexValue() {} \ - RAJA_HOST_DEVICE RAJA_INLINE explicit TYPE(IDXT v) \ - : RAJA::IndexValue::IndexValue(v) \ - { \ - } \ - static inline std::string getName() { return NAME; } \ +#define RAJA_INDEX_VALUE_T(TYPE, IDXT, NAME) \ + class TYPE : public ::RAJA::IndexValue \ + { \ + public: \ + RAJA_HOST_DEVICE RAJA_INLINE TYPE() \ + : RAJA::IndexValue::IndexValue() \ + {} \ + RAJA_HOST_DEVICE RAJA_INLINE explicit TYPE(IDXT v) \ + : RAJA::IndexValue::IndexValue(v) \ + {} \ + static inline std::string getName() { return NAME; } \ }; #endif diff --git a/include/RAJA/index/ListSegment.hpp b/include/RAJA/index/ListSegment.hpp index adee46053c..187ec05d3f 100644 --- a/include/RAJA/index/ListSegment.hpp +++ b/include/RAJA/index/ListSegment.hpp @@ -85,7 +85,6 @@ template class TypedListSegment { public: - //@{ //! @name Types used in implementation based on template parameter. @@ -111,7 +110,7 @@ class TypedListSegment * \param values array of indices defining iteration space of segment * \param length number of indices * \param resource camp resource defining memory space where index data live - * \param owned optional enum value indicating whether segment owns indices + * \param owned optional enum value indicating whether segment owns indices * (Owned or Unowned). Default is Owned. * * If 'Unowned' is passed as last argument, the segment will not own its @@ -121,7 +120,7 @@ class TypedListSegment Index_type length, camp::resources::Resource resource, IndexOwnership owned = Owned) - : m_resource(nullptr), m_owned(Unowned), m_data(nullptr), m_size(0) + : m_resource(nullptr), m_owned(Unowned), m_data(nullptr), m_size(0) { initIndexData(values, length, resource, owned); } @@ -141,30 +140,34 @@ class TypedListSegment template TypedListSegment(const Container& container, camp::resources::Resource resource) - : m_resource(nullptr), m_owned(Unowned), m_data(nullptr), m_size(container.size()) + : m_resource(nullptr), + m_owned(Unowned), + m_data(nullptr), + m_size(container.size()) { - if (m_size > 0) { + if (m_size > 0) + { - camp::resources::Resource host_res{camp::resources::Host()}; + camp::resources::Resource host_res {camp::resources::Host()}; value_type* tmp = host_res.allocate(m_size); - auto dest = tmp; - auto src = container.begin(); + auto dest = tmp; + auto src = container.begin(); auto const end = container.end(); - while (src != end) { + while (src != end) + { *dest = *src; ++dest; ++src; } m_resource = new camp::resources::Resource(resource); - m_data = m_resource->allocate(m_size); + m_data = m_resource->allocate(m_size); m_resource->memcpy(m_data, tmp, sizeof(value_type) * m_size); m_owned = Owned; host_res.deallocate(tmp); - } } @@ -175,10 +178,11 @@ class TypedListSegment // As this may be called from a lambda in a // RAJA method we perform a shallow copy RAJA_HOST_DEVICE TypedListSegment(const TypedListSegment& other) - : m_resource(nullptr), - m_owned(Unowned), m_data(other.m_data), m_size(other.m_size) - { - } + : m_resource(nullptr), + m_owned(Unowned), + m_data(other.m_data), + m_size(other.m_size) + {} //! Copy assignment for list segment // As this may be called from a lambda in a @@ -187,59 +191,59 @@ class TypedListSegment { clear(); m_resource = nullptr; - m_owned = Unowned; - m_data = other.m_data; - m_size = other.m_size; + m_owned = Unowned; + m_data = other.m_data; + m_size = other.m_size; } - //! move assignment for list segment + //! move assignment for list segment // As this may be called from a lambda in a // RAJA method we perform a shallow copy RAJA_HOST_DEVICE TypedListSegment& operator=(TypedListSegment&& rhs) { clear(); m_resource = rhs.m_resource; - m_owned = rhs.m_owned; - m_data = rhs.m_data; - m_size = rhs.m_size; + m_owned = rhs.m_owned; + m_data = rhs.m_data; + m_size = rhs.m_size; rhs.m_resource = nullptr; - rhs.m_owned = Unowned; - rhs.m_data = nullptr; - rhs.m_size = 0; + rhs.m_owned = Unowned; + rhs.m_data = nullptr; + rhs.m_size = 0; } //! Move constructor for list segment RAJA_HOST_DEVICE TypedListSegment(TypedListSegment&& rhs) - : m_resource(rhs.m_resource), - m_owned(rhs.m_owned), m_data(rhs.m_data), m_size(rhs.m_size) + : m_resource(rhs.m_resource), + m_owned(rhs.m_owned), + m_data(rhs.m_data), + m_size(rhs.m_size) { - rhs.m_owned = Unowned; + rhs.m_owned = Unowned; rhs.m_resource = nullptr; - rhs.m_size = 0; - rhs.m_data = nullptr; + rhs.m_size = 0; + rhs.m_data = nullptr; } //! List segment destructor - RAJA_HOST_DEVICE ~TypedListSegment() - { - clear(); - } + RAJA_HOST_DEVICE ~TypedListSegment() { clear(); } //! Clear method to be called RAJA_HOST_DEVICE void clear() { #if !defined(RAJA_GPU_DEVICE_COMPILE_PASS_ACTIVE) - if (m_data != nullptr && m_owned == Owned) { + if (m_data != nullptr && m_owned == Owned) + { m_resource->deallocate(m_data); delete m_resource; } #endif - m_data = nullptr; + m_data = nullptr; m_resource = nullptr; - m_owned = Unowned; - m_size = 0; + m_owned = Unowned; + m_size = 0; } //@} @@ -345,32 +349,35 @@ class TypedListSegment { // empty list segment - if (len <= 0 || container == nullptr) { - m_data = nullptr; - m_size = 0; + if (len <= 0 || container == nullptr) + { + m_data = nullptr; + m_size = 0; m_owned = Unowned; return; } // some non-zero size -- initialize accordingly - m_size = len; + m_size = len; m_owned = container_own; - if (m_owned == Owned) { + if (m_owned == Owned) + { - m_resource = new camp::resources::Resource(resource_); + m_resource = new camp::resources::Resource(resource_); - camp::resources::Resource host_res{camp::resources::Host()}; + camp::resources::Resource host_res {camp::resources::Host()}; - value_type* tmp = host_res.allocate(m_size); + value_type* tmp = host_res.allocate(m_size); - for (Index_type i = 0; i < m_size; ++i) { - tmp[i] = container[i]; - } + for (Index_type i = 0; i < m_size; ++i) + { + tmp[i] = container[i]; + } - m_data = m_resource->allocate(m_size); - m_resource->memcpy(m_data, tmp, sizeof(value_type) * m_size); + m_data = m_resource->allocate(m_size); + m_resource->memcpy(m_data, tmp, sizeof(value_type) * m_size); - host_res.deallocate(tmp); + host_res.deallocate(tmp); return; } @@ -382,7 +389,7 @@ class TypedListSegment // Copy of camp resource passed to ctor - camp::resources::Resource *m_resource; + camp::resources::Resource* m_resource; // Ownership flag to guide data copying/management IndexOwnership m_owned; diff --git a/include/RAJA/index/RangeSegment.hpp b/include/RAJA/index/RangeSegment.hpp index a41959c583..57fdb4c55e 100644 --- a/include/RAJA/index/RangeSegment.hpp +++ b/include/RAJA/index/RangeSegment.hpp @@ -50,10 +50,10 @@ namespace RAJA * * NOTE: TypedRangeSegment::iterator is a RandomAccessIterator * - * NOTE: TypedRangeSegment supports negative indices; e.g., an interval of + * NOTE: TypedRangeSegment supports negative indices; e.g., an interval of * indices [-5, 3). * - * NOTE: Proper handling of indices strides requires that StorageT is a + * NOTE: Proper handling of indices strides requires that StorageT is a * signed type. * * Usage: @@ -92,15 +92,19 @@ namespace RAJA * ****************************************************************************** */ -template >> -struct TypedRangeSegment { +template >> +struct TypedRangeSegment +{ - // + // // Static asserts to provide some useful error messages during compilation // for incorrect usage. - // - static_assert(std::is_signed::value, "TypedRangeSegment DiffT requires signed type."); - static_assert(!std::is_floating_point::value, "TypedRangeSegment Type must be non floating point."); + // + static_assert(std::is_signed::value, + "TypedRangeSegment DiffT requires signed type."); + static_assert(!std::is_floating_point::value, + "TypedRangeSegment Type must be non floating point."); //@{ //! @name Types used in implementation based on template parameters. @@ -117,20 +121,19 @@ struct TypedRangeSegment { //@} //@{ - //! @name Constructors, destructor, and copy assignment. + //! @name Constructors, destructor, and copy assignment. /*! * \brief Construct a range segment repreenting the interval [begin, end) - * + * * \param begin start value (inclusive) for the range * \param end end value (exclusive) for the range */ using StripStorageT = strip_index_type_t; - RAJA_HOST_DEVICE constexpr TypedRangeSegment(StripStorageT begin, StripStorageT end) - : m_begin(iterator(begin)), - m_end(begin > end ? m_begin : iterator(end)) - { - } + RAJA_HOST_DEVICE constexpr TypedRangeSegment(StripStorageT begin, + StripStorageT end) + : m_begin(iterator(begin)), m_end(begin > end ? m_begin : iterator(end)) + {} //! Disable compiler generated constructor RAJA_HOST_DEVICE TypedRangeSegment() = delete; @@ -187,7 +190,7 @@ struct TypedRangeSegment { * \brief Compare this segment to another for inequality * * \return true if begin or end does not match, else false - */ + */ RAJA_HOST_DEVICE RAJA_INLINE bool operator!=(TypedRangeSegment const& o) const { return !(operator==(o)); @@ -198,9 +201,9 @@ struct TypedRangeSegment { /*! * \brief Get a new TypedRangeSegment instance representing a slice of * existing segment - * - * \param begin start iterate of new range - * \param length maximum length of new range + * + * \param begin start iterate of new range + * \param length maximum length of new range * \return TypedRangeSegment representing the interval * [ *begin() + begin, min( *begin() + begin + length, *end() ) ) * @@ -213,7 +216,7 @@ struct TypedRangeSegment { * auto r = RAJA::TypedRangeSegment(-4, 4); * * // s repreents the subinterval [-3, 2) - * auto s = r.slice(1, 5); + * auto s = r.slice(1, 5); * * \endverbatim */ @@ -221,9 +224,9 @@ struct TypedRangeSegment { DiffT length) const { StorageT start = m_begin[0] + begin; - StorageT end = start + length > m_end[0] ? m_end[0] : start + length; + StorageT end = start + length > m_end[0] ? m_end[0] : start + length; - return TypedRangeSegment{stripIndexType(start), stripIndexType(end)}; + return TypedRangeSegment {stripIndexType(start), stripIndexType(end)}; } /*! @@ -247,8 +250,8 @@ struct TypedRangeSegment { /*! ****************************************************************************** * - * \class TypedRangeStrideSegment - * + * \class TypedRangeStrideSegment + * * \brief Segment class representing a strided range of typed indices * * \tparam StorageT underlying data type for the segment indices (required) @@ -264,9 +267,9 @@ struct TypedRangeSegment { * * NOTE: TypedRangeStrideSegment::iterator is a RandomAccessIterator * - * NOTE: TypedRangeStrideSegment allows for positive or negative strides and - * indices. This allows for forward (stride > 0) or backward (stride < 0) - * traversal of the iteration space. A stride of zero is undefined and + * NOTE: TypedRangeStrideSegment allows for positive or negative strides and + * indices. This allows for forward (stride > 0) or backward (stride < 0) + * traversal of the iteration space. A stride of zero is undefined and * will cause divide-by-zero errors. * * As with RangeSegment, the iteration space is inclusive of begin() and @@ -275,7 +278,7 @@ struct TypedRangeSegment { * For positive strides, begin() > end() implies size()==0 * For negative strides, begin() < end() implies size()==0 * - * NOTE: Proper handling of negative strides and indices requires that + * NOTE: Proper handling of negative strides and indices requires that * StorageT is a signed type. * * Usage: @@ -321,15 +324,19 @@ struct TypedRangeSegment { * ****************************************************************************** */ -template >> -struct TypedRangeStrideSegment { +template >> +struct TypedRangeStrideSegment +{ // // Static asserts to provide some useful error messages during compilation // for incorrect usage. // - static_assert(std::is_signed::value, "TypedRangeStrideSegment DiffT requires signed type."); - static_assert(!std::is_floating_point::value, "TypedRangeStrideSegment Type must be non floating point."); + static_assert(std::is_signed::value, + "TypedRangeStrideSegment DiffT requires signed type."); + static_assert(!std::is_floating_point::value, + "TypedRangeStrideSegment Type must be non floating point."); //@{ //! @name Types used in implementation based on template parameters. @@ -349,7 +356,7 @@ struct TypedRangeStrideSegment { //! @name Constructors, destructor, and copy assignment. /*! - * \brief Construct a range segment for the interval [begin, end) with + * \brief Construct a range segment for the interval [begin, end) with * given stride * * \param begin start value (inclusive) for the range @@ -357,9 +364,8 @@ struct TypedRangeStrideSegment { * \param stride stride value when iterating over the range */ using StripStorageT = strip_index_type_t; - RAJA_HOST_DEVICE TypedRangeStrideSegment(StripStorageT begin, - StripStorageT end, - DiffT stride) + RAJA_HOST_DEVICE + TypedRangeStrideSegment(StripStorageT begin, StripStorageT end, DiffT stride) : m_begin(iterator(begin, stride)), m_end(iterator(end, stride)), // essentially a ceil((end-begin)/stride) but using integer math, @@ -367,13 +373,16 @@ struct TypedRangeStrideSegment { m_size((end - begin + stride - (stride > 0 ? 1 : -1)) / stride) { // clamp range when end is unreachable from begin without wrapping - if (stride < 0 && end > begin) { + if (stride < 0 && end > begin) + { m_end = m_begin; - } else if (stride > 0 && end < begin) { + } + else if (stride > 0 && end < begin) + { m_end = m_begin; } // m_size initialized as negative indicates a zero iteration space - m_size = m_size < DiffT{0} ? DiffT{0} : m_size; + m_size = m_size < DiffT {0} ? DiffT {0} : m_size; } //! Disable compiler generated constructor @@ -408,8 +417,8 @@ struct TypedRangeStrideSegment { /*! * \brief Get size of this segment - * - * The size is the number of iterates in the + * + * The size is the number of iterates in the * interval [begin, end) when striding over it */ RAJA_HOST_DEVICE DiffT size() const { return m_size; } @@ -435,7 +444,8 @@ struct TypedRangeStrideSegment { * * \return true if begin, end, or size does not match, else false */ - RAJA_HOST_DEVICE RAJA_INLINE bool operator!=(TypedRangeStrideSegment const& o) const + RAJA_HOST_DEVICE RAJA_INLINE bool + operator!=(TypedRangeStrideSegment const& o) const { return !(operator==(o)); } @@ -450,7 +460,7 @@ struct TypedRangeStrideSegment { * \param length maximum length of new range * * \return TypedRangeStrideSegment representing the interval - * [ *begin() + begin * stride, + * [ *begin() + begin * stride, * min( *begin() + (begin + length) * stride, *end() ) * * Here's an example of a slice operation on a range segment with a negative @@ -466,24 +476,26 @@ struct TypedRangeStrideSegment { * // 5 indices in r starting at the 6th entry * auto s = r.slice(6, 6); * - * \endverbatim + * \endverbatim */ RAJA_HOST_DEVICE TypedRangeStrideSegment slice(StorageT begin, DiffT length) const { StorageT stride = m_begin.get_stride(); - StorageT start = m_begin[0] + begin * stride; - StorageT end = start + stride * length; + StorageT start = m_begin[0] + begin * stride; + StorageT end = start + stride * length; - if (stride > 0) { + if (stride > 0) + { end = end > m_end[0] ? m_end[0] : end; - } else { + } + else + { end = end < m_end[0] ? m_end[0] : end; } - return TypedRangeStrideSegment{stripIndexType(start), - stripIndexType(end), - m_begin.get_stride()}; + return TypedRangeStrideSegment {stripIndexType(start), stripIndexType(end), + m_begin.get_stride()}; } /*! @@ -518,11 +530,12 @@ namespace detail template struct common_type - : std::common_type::type> { -}; + : std::common_type::type> +{}; template -struct common_type { +struct common_type +{ using type = T; }; @@ -549,7 +562,7 @@ RAJA_HOST_DEVICE TypedRangeSegment make_range(BeginT&& begin, } /*! - * \brief Function to make a TypedRangeStride Segment for the interval + * \brief Function to make a TypedRangeStride Segment for the interval * [begin, end) with given stride * * \return a newly constructed TypedRangeStrideSegment where @@ -561,13 +574,14 @@ template > -RAJA_HOST_DEVICE TypedRangeStrideSegment make_strided_range( - BeginT&& begin, - EndT&& end, - StrideT&& stride) +RAJA_HOST_DEVICE TypedRangeStrideSegment +make_strided_range(BeginT&& begin, EndT&& end, StrideT&& stride) { - static_assert(std::is_signed::value, "make_strided_segment : stride must be signed."); - static_assert(std::is_same, StrideT>::value, "make_stride_segment : stride and end must be of similar types."); + static_assert(std::is_signed::value, + "make_strided_segment : stride must be signed."); + static_assert( + std::is_same, StrideT>::value, + "make_stride_segment : stride and end must be of similar types."); return {begin, end, stride}; } @@ -576,13 +590,13 @@ namespace concepts template struct RangeConstructible - : DefineConcept(camp::val>()) { -}; + : DefineConcept(camp::val>()) +{}; template struct RangeStrideConstructible - : DefineConcept(camp::val>()) { -}; + : DefineConcept(camp::val>()) +{}; } // namespace concepts diff --git a/include/RAJA/internal/DepGraphNode.hpp b/include/RAJA/internal/DepGraphNode.hpp index 8feceae22f..d2a30ee5ce 100644 --- a/include/RAJA/internal/DepGraphNode.hpp +++ b/include/RAJA/internal/DepGraphNode.hpp @@ -57,8 +57,7 @@ class RAJA_ALIGNED_ATTR(256) DepGraphNode /// DepGraphNode() : m_num_dep_tasks(0), m_semaphore_reload_value(0), m_semaphore_value(0) - { - } + {} /// /// Get/set semaphore value; i.e., the current number of (unsatisfied) @@ -82,7 +81,8 @@ class RAJA_ALIGNED_ATTR(256) DepGraphNode /// void satisfyOne() { - if (m_semaphore_value > 0) { + if (m_semaphore_value > 0) + { --m_semaphore_value; } } @@ -92,7 +92,8 @@ class RAJA_ALIGNED_ATTR(256) DepGraphNode /// void wait() { - while (m_semaphore_value > 0) { + while (m_semaphore_value > 0) + { // TODO: an efficient wait would be better here, but the standard // promise/future is not good enough std::this_thread::yield(); diff --git a/include/RAJA/internal/Iterators.hpp b/include/RAJA/internal/Iterators.hpp index 6f32a56e6d..33cdd3f539 100644 --- a/include/RAJA/internal/Iterators.hpp +++ b/include/RAJA/internal/Iterators.hpp @@ -50,7 +50,8 @@ std::string overflow_msg(LType lhs, RType rhs) template RAJA_HOST_DEVICE bool is_addition_overflow(Type lhs, DifferenceType rhs) { - if (std::is_unsigned::value) { + if (std::is_unsigned::value) + { if ((rhs > 0) && (lhs > std::numeric_limits::max() - rhs)) return true; if ((rhs < 0) && (lhs < std::numeric_limits::min() - rhs)) @@ -64,18 +65,22 @@ RAJA_HOST_DEVICE bool is_subtraction_overflow(Type lhs, DifferenceType rhs, bool iterator_on_left = true) { - if (iterator_on_left) { + if (iterator_on_left) + { - if (std::is_unsigned::value) { + if (std::is_unsigned::value) + { if ((rhs > 0) && (lhs < std::numeric_limits::min() + rhs)) return true; if ((rhs < 0) && (lhs > std::numeric_limits::max() + rhs)) return true; } + } + else + { // Special case where operation is : value(lhs) - iterator(rhs). - } else { // Special case where operation is : value(lhs) - iterator(rhs). - - if (std::is_unsigned::value) { + if (std::is_unsigned::value) + { if ((lhs > 0) && (rhs < std::numeric_limits::min() + lhs)) return true; if ((lhs < 0)) return true; @@ -100,29 +105,28 @@ RAJA_HOST_DEVICE void check_is_subtraction_overflow(Type lhs, } #endif -template + typename PointerType = Type*> class numeric_iterator { public: - using value_type = Type; + using value_type = Type; using stripped_value_type = strip_index_type_t; - using difference_type = DifferenceType; - using pointer = PointerType; - using reference = value_type&; - using iterator_category = std::random_access_iterator_tag; - - constexpr numeric_iterator() noexcept = default; - constexpr numeric_iterator(const numeric_iterator&) noexcept = default; - constexpr numeric_iterator(numeric_iterator&&) noexcept = default; + using difference_type = DifferenceType; + using pointer = PointerType; + using reference = value_type&; + using iterator_category = std::random_access_iterator_tag; + + constexpr numeric_iterator() noexcept = default; + constexpr numeric_iterator(const numeric_iterator&) noexcept = default; + constexpr numeric_iterator(numeric_iterator&&) noexcept = default; numeric_iterator& operator=(const numeric_iterator&) noexcept = default; - numeric_iterator& operator=(numeric_iterator&&) noexcept = default; + numeric_iterator& operator=(numeric_iterator&&) noexcept = default; RAJA_HOST_DEVICE constexpr numeric_iterator(const stripped_value_type& rhs) : val(rhs) - { - } + {} RAJA_HOST_DEVICE inline DifferenceType get_stride() const { return 1; } @@ -174,8 +178,8 @@ class numeric_iterator return tmp; } - RAJA_HOST_DEVICE inline numeric_iterator& operator+=( - const difference_type& rhs) + RAJA_HOST_DEVICE inline numeric_iterator& + operator+=(const difference_type& rhs) { #if defined(RAJA_ENABLE_ITERATOR_OVERFLOW_DEBUG) check_is_addition_overflow(val, rhs); @@ -183,8 +187,8 @@ class numeric_iterator val += rhs; return *this; } - RAJA_HOST_DEVICE inline numeric_iterator& operator-=( - const difference_type& rhs) + RAJA_HOST_DEVICE inline numeric_iterator& + operator-=(const difference_type& rhs) { #if defined(RAJA_ENABLE_ITERATOR_OVERFLOW_DEBUG) check_is_subtraction_overflow(val, rhs); @@ -192,48 +196,47 @@ class numeric_iterator val -= rhs; return *this; } - RAJA_HOST_DEVICE inline numeric_iterator& operator+=( - const numeric_iterator& rhs) + RAJA_HOST_DEVICE inline numeric_iterator& + operator+=(const numeric_iterator& rhs) { val += rhs.val; return *this; } - RAJA_HOST_DEVICE inline numeric_iterator& operator-=( - const numeric_iterator& rhs) + RAJA_HOST_DEVICE inline numeric_iterator& + operator-=(const numeric_iterator& rhs) { val -= rhs.val; return *this; } - RAJA_HOST_DEVICE inline stripped_value_type operator+( - const numeric_iterator& rhs) const + RAJA_HOST_DEVICE inline stripped_value_type + operator+(const numeric_iterator& rhs) const { return val + rhs.val; } - RAJA_HOST_DEVICE inline stripped_value_type operator-( - const numeric_iterator& rhs) const + RAJA_HOST_DEVICE inline stripped_value_type + operator-(const numeric_iterator& rhs) const { return val - rhs.val; } - RAJA_HOST_DEVICE inline numeric_iterator operator+( - const difference_type& rhs) const + RAJA_HOST_DEVICE inline numeric_iterator + operator+(const difference_type& rhs) const { #if defined(RAJA_ENABLE_ITERATOR_OVERFLOW_DEBUG) check_is_addition_overflow(val, rhs); #endif return numeric_iterator(val + rhs); } - RAJA_HOST_DEVICE inline numeric_iterator operator-( - const difference_type& rhs) const + RAJA_HOST_DEVICE inline numeric_iterator + operator-(const difference_type& rhs) const { #if defined(RAJA_ENABLE_ITERATOR_OVERFLOW_DEBUG) check_is_subtraction_overflow(val, rhs); #endif return numeric_iterator(val - rhs); } - RAJA_HOST_DEVICE friend constexpr numeric_iterator operator+( - difference_type lhs, - const numeric_iterator& rhs) + RAJA_HOST_DEVICE friend constexpr numeric_iterator + operator+(difference_type lhs, const numeric_iterator& rhs) { #if defined(RAJA_ENABLE_ITERATOR_OVERFLOW_DEBUG) return is_addition_overflow(rhs.val, lhs) @@ -243,9 +246,8 @@ class numeric_iterator return numeric_iterator(lhs + rhs.val); #endif } - RAJA_HOST_DEVICE friend constexpr numeric_iterator operator-( - difference_type lhs, - const numeric_iterator& rhs) + RAJA_HOST_DEVICE friend constexpr numeric_iterator + operator-(difference_type lhs, const numeric_iterator& rhs) { #if defined(RAJA_ENABLE_ITERATOR_OVERFLOW_DEBUG) return is_subtraction_overflow(rhs.val, lhs, false) @@ -273,31 +275,34 @@ class numeric_iterator stripped_value_type val = 0; }; -template + typename PointerType = Type*> class strided_numeric_iterator { public: - using value_type = Type; + using value_type = Type; using stripped_value_type = strip_index_type_t; - using difference_type = DifferenceType; - using pointer = DifferenceType*; - using reference = DifferenceType&; - using iterator_category = std::random_access_iterator_tag; + using difference_type = DifferenceType; + using pointer = DifferenceType*; + using reference = DifferenceType&; + using iterator_category = std::random_access_iterator_tag; constexpr strided_numeric_iterator() noexcept = default; - constexpr strided_numeric_iterator(const strided_numeric_iterator&) noexcept = default; - constexpr strided_numeric_iterator(strided_numeric_iterator&&) noexcept = default; - strided_numeric_iterator& operator=(const strided_numeric_iterator&) noexcept = default; - strided_numeric_iterator& operator=(strided_numeric_iterator&&) noexcept = default; + constexpr strided_numeric_iterator(const strided_numeric_iterator&) noexcept = + default; + constexpr strided_numeric_iterator(strided_numeric_iterator&&) noexcept = + default; + strided_numeric_iterator& + operator=(const strided_numeric_iterator&) noexcept = default; + strided_numeric_iterator& + operator=(strided_numeric_iterator&&) noexcept = default; RAJA_HOST_DEVICE constexpr strided_numeric_iterator( stripped_value_type rhs, DifferenceType stride_ = DifferenceType(1)) : val(rhs), stride(stride_) - { - } + {} RAJA_HOST_DEVICE inline DifferenceType get_stride() const { return stride; } @@ -312,8 +317,8 @@ class strided_numeric_iterator return *this; } - RAJA_HOST_DEVICE inline strided_numeric_iterator& operator+=( - const difference_type& rhs) + RAJA_HOST_DEVICE inline strided_numeric_iterator& + operator+=(const difference_type& rhs) { #if defined(RAJA_ENABLE_ITERATOR_OVERFLOW_DEBUG) check_is_addition_overflow(val, rhs * stride); @@ -321,8 +326,8 @@ class strided_numeric_iterator val += rhs * stride; return *this; } - RAJA_HOST_DEVICE inline strided_numeric_iterator& operator-=( - const difference_type& rhs) + RAJA_HOST_DEVICE inline strided_numeric_iterator& + operator-=(const difference_type& rhs) { #if defined(RAJA_ENABLE_ITERATOR_OVERFLOW_DEBUG) check_is_subtraction_overflow(val, rhs * stride); @@ -331,33 +336,33 @@ class strided_numeric_iterator return *this; } - RAJA_HOST_DEVICE inline difference_type operator+( - const strided_numeric_iterator& rhs) const + RAJA_HOST_DEVICE inline difference_type + operator+(const strided_numeric_iterator& rhs) const { return (static_cast(val) + (static_cast(rhs.val))) / stride; } - RAJA_HOST_DEVICE inline difference_type operator-( - const strided_numeric_iterator& rhs) const + RAJA_HOST_DEVICE inline difference_type + operator-(const strided_numeric_iterator& rhs) const { difference_type diff = (static_cast(val) - (static_cast(rhs.val))); - return (diff % stride != difference_type{0}) - ? (difference_type{1} + diff / stride) + return (diff % stride != difference_type {0}) + ? (difference_type {1} + diff / stride) : diff / stride; } - RAJA_HOST_DEVICE inline strided_numeric_iterator operator+( - const difference_type& rhs) const + RAJA_HOST_DEVICE inline strided_numeric_iterator + operator+(const difference_type& rhs) const { #if defined(RAJA_ENABLE_ITERATOR_OVERFLOW_DEBUG) check_is_addition_overflow(val, rhs * stride); #endif return strided_numeric_iterator(val + rhs * stride, stride); } - RAJA_HOST_DEVICE inline strided_numeric_iterator operator-( - const difference_type& rhs) const + RAJA_HOST_DEVICE inline strided_numeric_iterator + operator-(const difference_type& rhs) const { #if defined(RAJA_ENABLE_ITERATOR_OVERFLOW_DEBUG) check_is_subtraction_overflow(val, rhs * stride); @@ -367,34 +372,34 @@ class strided_numeric_iterator // Specialized comparison to allow normal iteration to work on off-stride // multiples by adjusting rhs to the nearest *higher* multiple of stride - RAJA_HOST_DEVICE inline bool operator!=( - const strided_numeric_iterator& rhs) const + RAJA_HOST_DEVICE inline bool + operator!=(const strided_numeric_iterator& rhs) const { return (val - rhs.val) / stride; } - RAJA_HOST_DEVICE inline bool operator==( - const strided_numeric_iterator& rhs) const + RAJA_HOST_DEVICE inline bool + operator==(const strided_numeric_iterator& rhs) const { return !((val - rhs.val) / stride); } - RAJA_HOST_DEVICE inline bool operator>( - const strided_numeric_iterator& rhs) const + RAJA_HOST_DEVICE inline bool + operator>(const strided_numeric_iterator& rhs) const { return val * stride > rhs.val * stride; } - RAJA_HOST_DEVICE inline bool operator<( - const strided_numeric_iterator& rhs) const + RAJA_HOST_DEVICE inline bool + operator<(const strided_numeric_iterator& rhs) const { return val * stride < rhs.val * stride; } - RAJA_HOST_DEVICE inline bool operator>=( - const strided_numeric_iterator& rhs) const + RAJA_HOST_DEVICE inline bool + operator>=(const strided_numeric_iterator& rhs) const { return val * stride >= rhs.val * stride; } - RAJA_HOST_DEVICE inline bool operator<=( - const strided_numeric_iterator& rhs) const + RAJA_HOST_DEVICE inline bool + operator<=(const strided_numeric_iterator& rhs) const { return val * stride <= rhs.val * stride; } @@ -415,7 +420,7 @@ class strided_numeric_iterator private: stripped_value_type val = 0; - DifferenceType stride = 1; + DifferenceType stride = 1; }; diff --git a/include/RAJA/internal/MemUtils_CPU.hpp b/include/RAJA/internal/MemUtils_CPU.hpp index 55015f9ab7..a7dee5a77c 100644 --- a/include/RAJA/internal/MemUtils_CPU.hpp +++ b/include/RAJA/internal/MemUtils_CPU.hpp @@ -27,7 +27,7 @@ #include "RAJA/util/types.hpp" -#if defined(_WIN32) || defined(WIN32) || defined(__CYGWIN__) || \ +#if defined(_WIN32) || defined(WIN32) || defined(__CYGWIN__) || \ defined(__MINGW32__) || defined(__BORLANDC__) #define RAJA_PLATFORM_WINDOWS #include @@ -44,7 +44,7 @@ inline void* allocate_aligned(size_t alignment, size_t size) #if defined(RAJA_HAVE_POSIX_MEMALIGN) // posix_memalign available void* ret = nullptr; - int err = posix_memalign(&ret, alignment, size); + int err = posix_memalign(&ret, alignment, size); return err ? nullptr : ret; #elif defined(RAJA_HAVE_ALIGNED_ALLOC) return std::aligned_alloc(alignment, size); @@ -53,10 +53,10 @@ inline void* allocate_aligned(size_t alignment, size_t size) #elif defined(RAJA_PLATFORM_WINDOWS) return _aligned_malloc(size, alignment); #else - char *mem = (char *)malloc(size + alignment + sizeof(void *)); + char* mem = (char*)malloc(size + alignment + sizeof(void*)); if (nullptr == mem) return nullptr; - void **ptr = (void **)((std::uintptr_t)(mem + alignment + sizeof(void *)) & - ~(alignment - 1)); + void** ptr = (void**)((std::uintptr_t)(mem + alignment + sizeof(void*)) & + ~(alignment - 1)); // Store the original address one position behind what we give the user. ptr[-1] = mem; return ptr; @@ -97,25 +97,23 @@ inline void free_aligned(void* ptr) /// struct FreeAligned { - void operator()(void* ptr) - { - free_aligned(ptr); - } + void operator()(void* ptr) { free_aligned(ptr); } }; /// /// Deleter function object for memory allocated with allocate_aligned_type /// that calls the destructor for the fist size objects in the storage. /// -template < typename T, typename index_type > +template struct FreeAlignedType : FreeAligned { index_type size = 0; void operator()(T* ptr) { - for ( index_type i = size; i > 0; --i ) { - ptr[i-1].~T(); + for (index_type i = size; i > 0; --i) + { + ptr[i - 1].~T(); } FreeAligned::operator()(ptr); } diff --git a/include/RAJA/internal/RAJAVec.hpp b/include/RAJA/internal/RAJAVec.hpp index 1d0ec0cbeb..7802bda6cd 100644 --- a/include/RAJA/internal/RAJAVec.hpp +++ b/include/RAJA/internal/RAJAVec.hpp @@ -49,7 +49,7 @@ namespace RAJA * ****************************************************************************** */ -template > +template > class RAJAVec { using allocator_traits_type = std::allocator_traits; @@ -57,24 +57,25 @@ class RAJAVec typename allocator_traits_type::propagate_on_container_copy_assignment; using propagate_on_container_move_assignment = typename allocator_traits_type::propagate_on_container_move_assignment; - using propagate_on_container_swap = + using propagate_on_container_swap = typename allocator_traits_type::propagate_on_container_swap; + public: - using value_type = T; - using allocator_type = Allocator; - using size_type = std::size_t; + using value_type = T; + using allocator_type = Allocator; + using size_type = std::size_t; using difference_type = std::ptrdiff_t; - using reference = value_type&; + using reference = value_type&; using const_reference = const value_type&; - using pointer = typename allocator_traits_type::pointer; - using const_pointer = typename allocator_traits_type::const_pointer; - using iterator = value_type*; - using const_iterator = const value_type*; + using pointer = typename allocator_traits_type::pointer; + using const_pointer = typename allocator_traits_type::const_pointer; + using iterator = value_type*; + using const_iterator = const value_type*; /// /// Construct empty vector with given capacity. /// - explicit RAJAVec(size_type init_cap = 0, + explicit RAJAVec(size_type init_cap = 0, const allocator_type& a = allocator_type()) : m_data(nullptr), m_allocator(a), m_capacity(0), m_size(0) { @@ -86,7 +87,9 @@ class RAJAVec /// RAJAVec(const RAJAVec& other) : m_data(nullptr), - m_allocator(allocator_traits_type::select_on_container_copy_construction(other.m_allocator)), + m_allocator( + allocator_traits_type::select_on_container_copy_construction( + other.m_allocator)), m_capacity(0), m_size(0) { @@ -103,9 +106,9 @@ class RAJAVec m_capacity(other.m_capacity), m_size(other.m_size) { - other.m_data = nullptr; + other.m_data = nullptr; other.m_capacity = 0; - other.m_size = 0; + other.m_size = 0; } /// @@ -113,8 +116,9 @@ class RAJAVec /// RAJAVec& operator=(const RAJAVec& rhs) { - if (&rhs != this) { - copy_assign_private(rhs, propagate_on_container_copy_assignment{}); + if (&rhs != this) + { + copy_assign_private(rhs, propagate_on_container_copy_assignment {}); } return *this; } @@ -124,8 +128,10 @@ class RAJAVec /// RAJAVec& operator=(RAJAVec&& rhs) { - if (&rhs != this) { - move_assign_private(std::move(rhs), propagate_on_container_move_assignment{}); + if (&rhs != this) + { + move_assign_private(std::move(rhs), + propagate_on_container_move_assignment {}); } return *this; } @@ -144,31 +150,31 @@ class RAJAVec /// void swap(RAJAVec& other) { - swap_private(other, propagate_on_container_swap{}); + swap_private(other, propagate_on_container_swap {}); } /// /// Get a pointer to the beginning of the contiguous vector /// - pointer data() { return m_data; } + pointer data() { return m_data; } /// const_pointer data() const { return m_data; } /// /// Get an iterator to the end. /// - iterator end() { return m_data + m_size; } + iterator end() { return m_data + m_size; } /// - const_iterator end() const { return m_data + m_size; } + const_iterator end() const { return m_data + m_size; } /// const_iterator cend() const { return m_data + m_size; } /// /// Get an iterator to the beginning. /// - iterator begin() { return m_data; } + iterator begin() { return m_data; } /// - const_iterator begin() const { return m_data; } + const_iterator begin() const { return m_data; } /// const_iterator cbegin() const { return m_data; } @@ -200,18 +206,12 @@ class RAJAVec /// /// Shrink the capacity of the vector to the current size. /// - void shrink_to_fit() - { - shrink_cap(m_size); - } + void shrink_to_fit() { shrink_cap(m_size); } /// /// Empty vector of all data. /// - void clear() - { - destroy_items_after(0); - } + void clear() { destroy_items_after(0); } /// /// Change the size of the vector, @@ -221,10 +221,13 @@ class RAJAVec RAJA_INLINE void resize(size_type new_size) { - if (new_size >= size()) { + if (new_size >= size()) + { reserve(new_size); construct_items_back(new_size); - } else { + } + else + { destroy_items_after(new_size); } } @@ -237,10 +240,13 @@ class RAJAVec RAJA_INLINE void resize(size_type new_size, const_reference new_value) { - if (new_size >= size()) { + if (new_size >= size()) + { reserve(new_size); construct_items_back(new_size, new_value); - } else { + } + else + { destroy_items_after(new_size); } } @@ -248,23 +254,23 @@ class RAJAVec /// /// Bracket operator accessor. /// - reference operator[](difference_type i) { return m_data[i]; } + reference operator[](difference_type i) { return m_data[i]; } /// const_reference operator[](difference_type i) const { return m_data[i]; } /// /// Access the last item of the vector. /// - reference front() { return m_data[0]; } + reference front() { return m_data[0]; } /// const_reference front() const { return m_data[0]; } /// /// Access the last item of the vector. /// - reference back() { return m_data[m_size-1]; } + reference back() { return m_data[m_size - 1]; } /// - const_reference back() const { return m_data[m_size-1]; } + const_reference back() const { return m_data[m_size - 1]; } /// /// Add item to front end of vector. Note that this operation is unique to @@ -272,28 +278,31 @@ class RAJAVec /// void push_front(const_reference item) { emplace_front_private(item); } /// - void push_front( value_type&& item) { emplace_front_private(std::move(item)); } + void push_front(value_type&& item) { emplace_front_private(std::move(item)); } /// - template < typename ... Os > - void emplace_front(Os&&... os) { emplace_front_private(std::forward(os)...); } + template + void emplace_front(Os&&... os) + { + emplace_front_private(std::forward(os)...); + } /// /// Add item to back end of vector. /// void push_back(const_reference item) { emplace_back_private(item); } /// - void push_back( value_type&& item) { emplace_back_private(std::move(item)); } + void push_back(value_type&& item) { emplace_back_private(std::move(item)); } /// - template < typename ... Os > - void emplace_back(Os&&... os) { emplace_back_private(std::forward(os)...); } + template + void emplace_back(Os&&... os) + { + emplace_back_private(std::forward(os)...); + } /// /// Remove the last item of the vector. /// - void pop_back() - { - destroy_items_after(m_size-1); - } + void pop_back() { destroy_items_after(m_size - 1); } private: pointer m_data; @@ -307,13 +316,14 @@ class RAJAVec /// void copy_assign_private(RAJAVec const& rhs, std::true_type) { - if (m_allocator != rhs.m_allocator) { + if (m_allocator != rhs.m_allocator) + { clear(); shrink_to_fit(); m_allocator = rhs.m_allocator; } - copy_assign_private(rhs, std::false_type{}); + copy_assign_private(rhs, std::false_type {}); } /// @@ -323,10 +333,13 @@ class RAJAVec void copy_assign_private(RAJAVec const& rhs, std::false_type) { reserve(rhs.size()); - if (size() < rhs.size()) { + if (size() < rhs.size()) + { copy_assign_items(0, size(), rhs.data()); copy_construct_items_back(rhs.size(), rhs.data()); - } else { + } + else + { copy_assign_items(0, rhs.size(), rhs.data()); destroy_items_after(size()); } @@ -341,14 +354,14 @@ class RAJAVec clear(); shrink_to_fit(); - m_data = rhs.m_data; + m_data = rhs.m_data; m_allocator = std::move(rhs.m_allocator); - m_capacity = rhs.m_capacity; - m_size = rhs.m_size; + m_capacity = rhs.m_capacity; + m_size = rhs.m_size; - rhs.m_data = nullptr; + rhs.m_data = nullptr; rhs.m_capacity = 0; - rhs.m_size = 0; + rhs.m_size = 0; } /// @@ -357,23 +370,29 @@ class RAJAVec /// void move_assign_private(RAJAVec&& rhs, std::false_type) { - if (m_allocator == rhs.m_allocator) { + if (m_allocator == rhs.m_allocator) + { clear(); shrink_to_fit(); - m_data = rhs.m_data; + m_data = rhs.m_data; m_capacity = rhs.m_capacity; - m_size = rhs.m_size; + m_size = rhs.m_size; - rhs.m_data = nullptr; + rhs.m_data = nullptr; rhs.m_capacity = 0; - rhs.m_size = 0; - } else { + rhs.m_size = 0; + } + else + { reserve(rhs.size()); - if (size() < rhs.size()) { + if (size() < rhs.size()) + { move_assign_items(0, size(), rhs.data()); move_construct_items_back(rhs.size(), rhs.data()); - } else { + } + else + { move_assign_items(0, rhs.size(), rhs.data()); destroy_items_after(size()); } @@ -386,10 +405,10 @@ class RAJAVec void swap_private(RAJAVec& other, std::true_type) { using std::swap; - swap(m_data, other.m_data); + swap(m_data, other.m_data); swap(m_allocator, other.m_allocator); - swap(m_capacity, other.m_capacity); - swap(m_size, other.m_size); + swap(m_capacity, other.m_capacity); + swap(m_size, other.m_size); } /// @@ -398,9 +417,9 @@ class RAJAVec void swap_private(RAJAVec& other, std::false_type) { using std::swap; - swap(m_data, other.m_data); - swap(m_capacity, other.m_capacity); - swap(m_size, other.m_size); + swap(m_data, other.m_data); + swap(m_capacity, other.m_capacity); + swap(m_size, other.m_size); } // @@ -408,7 +427,8 @@ class RAJAVec // void copy_assign_items(size_type first, size_type last, const_pointer o_data) { - for (size_type i = first; i < last; ++i) { + for (size_type i = first; i < last; ++i) + { m_data[i] = o_data[i]; } } @@ -418,7 +438,8 @@ class RAJAVec // void move_assign_items(size_type first, size_type last, pointer o_data) { - for (size_type i = first; i < last; ++i) { + for (size_type i = first; i < last; ++i) + { m_data[i] = std::move(o_data[i]); } } @@ -426,11 +447,13 @@ class RAJAVec // // Construct items [m_size, new_size) from args. // - template < typename ... Os > + template void construct_items_back(size_type new_size, Os&&... os) { - for (; m_size < new_size; ++m_size) { - allocator_traits_type::construct(m_allocator, m_data+m_size, std::forward(os)...); + for (; m_size < new_size; ++m_size) + { + allocator_traits_type::construct(m_allocator, m_data + m_size, + std::forward(os)...); } } @@ -439,8 +462,10 @@ class RAJAVec // void copy_construct_items_back(size_type new_size, const_pointer o_data) { - for (; m_size < new_size; ++m_size) { - allocator_traits_type::construct(m_allocator, m_data+m_size, o_data[m_size]); + for (; m_size < new_size; ++m_size) + { + allocator_traits_type::construct(m_allocator, m_data + m_size, + o_data[m_size]); } } @@ -449,8 +474,10 @@ class RAJAVec // void move_construct_items_back(size_type new_size, pointer o_data) { - for (; m_size < new_size; ++m_size) { - allocator_traits_type::construct(m_allocator, m_data+m_size, std::move(o_data[m_size])); + for (; m_size < new_size; ++m_size) + { + allocator_traits_type::construct(m_allocator, m_data + m_size, + std::move(o_data[m_size])); } } @@ -459,39 +486,45 @@ class RAJAVec // void destroy_items_after(size_type new_end) { - for (; m_size > new_end; --m_size) { - allocator_traits_type::destroy(m_allocator, m_data+m_size-1); + for (; m_size > new_end; --m_size) + { + allocator_traits_type::destroy(m_allocator, m_data + m_size - 1); } } // // Add an item to the front, shifting all existing items back one. // - template < typename ... Os > + template void emplace_front_private(Os&&... os) { reserve(m_size + 1); - if (m_size > 0) { + if (m_size > 0) + { size_type i = m_size; - allocator_traits_type::construct(m_allocator, m_data+i, std::move(m_data[i - 1])); - for (--i; i > 0; --i) { + allocator_traits_type::construct(m_allocator, m_data + i, + std::move(m_data[i - 1])); + for (--i; i > 0; --i) + { m_data[i] = std::move(m_data[i - 1]); } allocator_traits_type::destroy(m_allocator, m_data); } - allocator_traits_type::construct(m_allocator, m_data, std::forward(os)...); + allocator_traits_type::construct(m_allocator, m_data, + std::forward(os)...); m_size++; } // // Add an item to the back. // - template < typename ... Os > + template void emplace_back_private(Os&&... os) { reserve(m_size + 1); - allocator_traits_type::construct(m_allocator, m_data+m_size, std::forward(os)...); + allocator_traits_type::construct(m_allocator, m_data + m_size, + std::forward(os)...); m_size++; } @@ -501,7 +534,7 @@ class RAJAVec // relying on STL directly. // static constexpr const size_type s_init_cap = 8; - static constexpr const double s_grow_fac = 1.5; + static constexpr const double s_grow_fac = 1.5; // // Get the next value for capacity given a target and minimum. @@ -509,7 +542,8 @@ class RAJAVec size_type get_next_cap(size_type target_size) { size_type next_cap = s_init_cap; - if (m_capacity != 0) { + if (m_capacity != 0) + { next_cap = static_cast(m_capacity * s_grow_fac); } return std::max(target_size, next_cap); @@ -520,7 +554,8 @@ class RAJAVec // void grow_cap(size_type target_size) { - if (m_capacity < target_size) { + if (m_capacity < target_size) + { change_cap(get_next_cap(target_size)); } } @@ -530,7 +565,8 @@ class RAJAVec // void shrink_cap(size_type target_size) { - if (m_capacity > target_size) { + if (m_capacity > target_size) + { change_cap(std::max(m_size, target_size)); } } @@ -542,19 +578,23 @@ class RAJAVec void change_cap(size_type next_cap) { pointer tdata = nullptr; - if (next_cap != 0) { + if (next_cap != 0) + { tdata = allocator_traits_type::allocate(m_allocator, next_cap); } - if (m_data) { - for (size_type i = 0; i < m_size; ++i) { - allocator_traits_type::construct(m_allocator, tdata+i, std::move(m_data[i])); - allocator_traits_type::destroy(m_allocator, m_data+i); + if (m_data) + { + for (size_type i = 0; i < m_size; ++i) + { + allocator_traits_type::construct(m_allocator, tdata + i, + std::move(m_data[i])); + allocator_traits_type::destroy(m_allocator, m_data + i); } allocator_traits_type::deallocate(m_allocator, m_data, m_capacity); } - m_data = tdata; + m_data = tdata; m_capacity = next_cap; } }; diff --git a/include/RAJA/internal/fault_tolerance.hpp b/include/RAJA/internal/fault_tolerance.hpp index cf3a86cede..66d03ca6cd 100644 --- a/include/RAJA/internal/fault_tolerance.hpp +++ b/include/RAJA/internal/fault_tolerance.hpp @@ -37,60 +37,72 @@ #include #include "cycle.h" -#define RAJA_FT_BEGIN \ - extern volatile int fault_type; \ - bool repeat; \ - bool do_time = false; \ - ticks start = 0, stop = 0; \ - if (fault_type != 0) { \ - printf("Uncaught fault %d\n", fault_type); \ - fault_type = 0; \ - } \ - do { \ - repeat = false; \ - if (do_time) { \ - start = getticks(); \ +#define RAJA_FT_BEGIN \ + extern volatile int fault_type; \ + bool repeat; \ + bool do_time = false; \ + ticks start = 0, stop = 0; \ + if (fault_type != 0) \ + { \ + printf("Uncaught fault %d\n", fault_type); \ + fault_type = 0; \ + } \ + do \ + { \ + repeat = false; \ + if (do_time) \ + { \ + start = getticks(); \ } -#define RAJA_FT_END \ - if (do_time) { \ - stop = getticks(); \ - printf("recoverable fault clock cycles = %16f\n", elapsed(stop, start)); \ - do_time = false; \ - fault_type = 0; \ - } \ - if (fault_type < 0) { \ - printf("Unrecoverable fault (restart penalty)\n"); \ - fault_type = 0; \ - } \ - if (fault_type > 0) { \ - /* invalidate cache */ \ - repeat = true; \ - do_time = true; \ - } \ - } \ - while (repeat == true) \ +#define RAJA_FT_END \ + if (do_time) \ + { \ + stop = getticks(); \ + printf("recoverable fault clock cycles = %16f\n", elapsed(stop, start)); \ + do_time = false; \ + fault_type = 0; \ + } \ + if (fault_type < 0) \ + { \ + printf("Unrecoverable fault (restart penalty)\n"); \ + fault_type = 0; \ + } \ + if (fault_type > 0) \ + { \ + /* invalidate cache */ \ + repeat = true; \ + do_time = true; \ + } \ + } \ + while (repeat == true) \ ; #else -#define RAJA_FT_BEGIN \ - extern volatile int fault_type; \ - bool repeat; \ - if (fault_type == 0) { \ - do { \ +#define RAJA_FT_BEGIN \ + extern volatile int fault_type; \ + bool repeat; \ + if (fault_type == 0) \ + { \ + do \ + { \ repeat = false; -#define RAJA_FT_END \ - if (fault_type > 0) { \ - /* invalidate cache */ \ - repeat = true; \ - fault_type = 0; \ - } \ - } \ - while (repeat == true) \ - ; \ - } \ - else { fault_type = 0; /* ignore for the simulation */ } +#define RAJA_FT_END \ + if (fault_type > 0) \ + { \ + /* invalidate cache */ \ + repeat = true; \ + fault_type = 0; \ + } \ + } \ + while (repeat == true) \ + ; \ + } \ + else \ + { \ + fault_type = 0; /* ignore for the simulation */ \ + } #endif // RAJA_REPORT_FT diff --git a/include/RAJA/internal/foldl.hpp b/include/RAJA/internal/foldl.hpp index af65c05392..f16bd9bee4 100644 --- a/include/RAJA/internal/foldl.hpp +++ b/include/RAJA/internal/foldl.hpp @@ -44,14 +44,16 @@ template struct foldl_impl; template -struct foldl_impl { +struct foldl_impl +{ using Ret = Arg1; }; #if RAJA_HAS_CXX17_IS_INVOCABLE template -struct foldl_impl { +struct foldl_impl +{ using Ret = typename std::invoke_result::type; }; @@ -60,18 +62,22 @@ template -struct foldl_impl { - using Ret = typename foldl_impl< - Op, - typename std::invoke_result::type, - Arg3>::type, - Rest...>::Ret; +struct foldl_impl +{ + using Ret = + typename foldl_impl::type, + Arg3>::type, + Rest...>::Ret; }; #else template -struct foldl_impl { +struct foldl_impl +{ using Ret = typename std::result_of::type; }; @@ -80,7 +86,8 @@ template -struct foldl_impl { +struct foldl_impl +{ using Ret = typename foldl_impl< Op, typename std::result_of::type, @@ -90,20 +97,19 @@ struct foldl_impl { #endif -} // namespace detail +} // namespace detail template -RAJA_HOST_DEVICE RAJA_INLINE constexpr auto foldl( - Op&& RAJA_UNUSED_ARG(operation), - Arg1&& arg) -> typename detail::foldl_impl::Ret +RAJA_HOST_DEVICE RAJA_INLINE constexpr auto +foldl(Op&& RAJA_UNUSED_ARG(operation), Arg1&& arg) -> + typename detail::foldl_impl::Ret { return camp::forward(arg); } template -RAJA_HOST_DEVICE RAJA_INLINE constexpr auto foldl(Op&& operation, - Arg1&& arg1, - Arg2&& arg2) -> +RAJA_HOST_DEVICE RAJA_INLINE constexpr auto +foldl(Op&& operation, Arg1&& arg1, Arg2&& arg2) -> typename detail::foldl_impl::Ret { return camp::forward(operation)(camp::forward(arg1), @@ -115,11 +121,8 @@ template -RAJA_HOST_DEVICE RAJA_INLINE constexpr auto foldl(Op&& operation, - Arg1&& arg1, - Arg2&& arg2, - Arg3&& arg3, - Rest&&... rest) -> +RAJA_HOST_DEVICE RAJA_INLINE constexpr auto +foldl(Op&& operation, Arg1&& arg1, Arg2&& arg2, Arg3&& arg3, Rest&&... rest) -> typename detail::foldl_impl::Ret { return foldl(camp::forward(operation), diff --git a/include/RAJA/internal/get_platform.hpp b/include/RAJA/internal/get_platform.hpp index 0354d04bfd..313ef66934 100644 --- a/include/RAJA/internal/get_platform.hpp +++ b/include/RAJA/internal/get_platform.hpp @@ -8,18 +8,21 @@ namespace RAJA { -namespace policy { -namespace multi { +namespace policy +{ +namespace multi +{ template class MultiPolicy; } -} +} // namespace policy -namespace detail +namespace detail { -struct max_platform { +struct max_platform +{ RAJA_HOST_DEVICE RAJA_INLINE constexpr RAJA::Platform operator()(const RAJA::Platform& l, @@ -34,7 +37,8 @@ struct max_platform { * This is a catch-all, so anything undefined gets Platform::undefined */ template -struct get_platform { +struct get_platform +{ // catch-all: undefined platform static constexpr Platform value = Platform::undefined; }; @@ -45,7 +49,8 @@ struct get_platform { * reduction of them all. */ template -struct get_platform_from_list { +struct get_platform_from_list +{ static constexpr Platform value = foldl(max_platform(), get_platform::value...); }; @@ -54,7 +59,8 @@ struct get_platform_from_list { * Define an empty list as Platform::undefined; */ template <> -struct get_platform_from_list<> { +struct get_platform_from_list<> +{ static constexpr Platform value = Platform::undefined; }; @@ -67,10 +73,10 @@ struct get_platform_from_list<> { */ template struct get_platform::value - && !RAJA::type_traits::is_indexset_policy:: - value>::type> { + typename std::enable_if< + std::is_base_of::value && + !RAJA::type_traits::is_indexset_policy::value>::type> +{ static constexpr Platform value = T::platform; }; @@ -83,12 +89,13 @@ struct get_platform struct get_platform> - : public get_platform_from_list { -}; + : public get_platform_from_list +{}; template -struct get_statement_platform { +struct get_statement_platform +{ static constexpr Platform value = get_platform_from_list::value; @@ -102,7 +109,8 @@ struct get_statement_platform { * each of them. */ template -struct get_platform> { +struct get_platform> +{ static constexpr Platform value = foldl(max_platform(), get_statement_platform::value...); }; @@ -111,7 +119,8 @@ struct get_platform> { * Specialize for an empty statement list to be undefined */ template <> -struct get_platform> { +struct get_platform> +{ static constexpr Platform value = Platform::undefined; }; @@ -120,11 +129,12 @@ struct get_platform> { // Once a specific policy is selected, that policy will select the correct // platform... see policy_invoker in MultiPolicy.hpp template -struct get_platform> { +struct get_platform> +{ static constexpr Platform value = Platform::undefined; }; -} // closing brace for detail namespace -} // closing brace for RAJA namespace +} // namespace detail +} // namespace RAJA -#endif // RAJA_get_platform_HPP +#endif // RAJA_get_platform_HPP diff --git a/include/RAJA/pattern/WorkGroup.hpp b/include/RAJA/pattern/WorkGroup.hpp index 767821b8d8..be5abb6848 100644 --- a/include/RAJA/pattern/WorkGroup.hpp +++ b/include/RAJA/pattern/WorkGroup.hpp @@ -38,38 +38,44 @@ namespace RAJA * * \verbatim - WorkPool, Allocator> pool(allocator); + WorkPool, Allocator> + pool(allocator); pool.enqueue(..., [=] (Index_type i, int* xarg0, int xarg1) { xarg0[i] = xarg1; }); - WorkGroup, Allocator> group = pool.instantiate(); + WorkGroup, Allocator> group = + pool.instantiate(); int* xarg0 = ...; int xarg1 = ...; - WorkSite, Allocator> site = group.run(xarg0, xarg1); + WorkSite, Allocator> site = + group.run(xarg0, xarg1); * \endverbatim * ****************************************************************************** */ -template < typename ... Args > +template using xargs = camp::list; -namespace detail { +namespace detail +{ -template < typename T > -struct is_xargs { +template +struct is_xargs +{ static constexpr bool value = false; }; -template < typename ... Args > -struct is_xargs> { +template +struct is_xargs> +{ static constexpr bool value = true; }; -} +} // namespace detail // @@ -102,7 +108,8 @@ struct is_xargs> { data[i] = 1; }); - WorkGroup, Allocator> group = pool.instantiate(); + WorkGroup, Allocator> group = + pool.instantiate(); * \endverbatim * @@ -112,11 +119,13 @@ template -struct WorkPool { - static_assert(RAJA::pattern_is::value, +struct WorkPool +{ + static_assert( + RAJA::pattern_is::value, "WorkPool: WORKGROUP_POLICY_T must be a workgroup policy"); static_assert(detail::is_xargs::value, - "WorkPool: EXTRA_ARGS_T must be a RAJA::xargs<...> type"); + "WorkPool: EXTRA_ARGS_T must be a RAJA::xargs<...> type"); }; /*! @@ -135,9 +144,11 @@ struct WorkPool { * * \verbatim - WorkGroup, Allocator> group = pool.instantiate(); + WorkGroup, Allocator> group = + pool.instantiate(); - WorkSite, Allocator> site = group.run(); + WorkSite, Allocator> site = + group.run(); * \endverbatim * @@ -147,11 +158,13 @@ template -struct WorkGroup { - static_assert(RAJA::pattern_is::value, +struct WorkGroup +{ + static_assert( + RAJA::pattern_is::value, "WorkGroup: WORKGROUP_POLICY_T must be a workgroup policy"); static_assert(detail::is_xargs::value, - "WorkGroup: EXTRA_ARGS_T must be a RAJA::xargs<...> type"); + "WorkGroup: EXTRA_ARGS_T must be a RAJA::xargs<...> type"); }; /*! @@ -170,7 +183,8 @@ struct WorkGroup { * * \verbatim - WorkSite, Allocator> site = group.run(); + WorkSite, Allocator> site = + group.run(); site.synchronize(); @@ -182,11 +196,13 @@ template -struct WorkSite { - static_assert(RAJA::pattern_is::value, +struct WorkSite +{ + static_assert( + RAJA::pattern_is::value, "WorkSite: WORKGROUP_POLICY_T must be a workgroup policy"); static_assert(detail::is_xargs::value, - "WorkSite: EXTRA_ARGS_T must be a RAJA::xargs<...> type"); + "WorkSite: EXTRA_ARGS_T must be a RAJA::xargs<...> type"); }; @@ -195,7 +211,7 @@ template struct WorkPool, ALLOCATOR_T> { - using exec_policy = EXEC_POLICY_T; - using order_policy = ORDER_POLICY_T; - using storage_policy = STORAGE_POLICY_T; + using exec_policy = EXEC_POLICY_T; + using order_policy = ORDER_POLICY_T; + using storage_policy = STORAGE_POLICY_T; using dispatch_policy = DISPATCH_POLICY_T; - using policy = WorkGroupPolicy; - using index_type = INDEX_T; - using xarg_type = xargs; - using Allocator = ALLOCATOR_T; + using policy = WorkGroupPolicy; + using index_type = INDEX_T; + using xarg_type = xargs; + using Allocator = ALLOCATOR_T; using workgroup_type = WorkGroup; - using worksite_type = WorkSite; + using worksite_type = WorkSite; private: - using workrunner_type = detail::WorkRunner< - exec_policy, order_policy, dispatch_policy, Allocator, index_type, Args...>; - using storage_type = detail::WorkStorage< - storage_policy, Allocator, typename workrunner_type::dispatcher_type>; + using workrunner_type = detail::WorkRunner; + using storage_type = + detail::WorkStorage; friend workgroup_type; friend worksite_type; @@ -229,52 +254,45 @@ struct WorkPool + template inline void enqueue(segment_T&& seg, loop_T&& loop_body) { { // ignore zero length loops - using std::begin; using std::end; + using std::begin; + using std::end; if (begin(seg) == end(seg)) return; } - if (m_storage.begin() == m_storage.end()) { + if (m_storage.begin() == m_storage.end()) + { // perform auto-reserve on reuse reserve(m_max_num_loops, m_max_storage_bytes); } - util::PluginContext context{util::make_context()}; + util::PluginContext context {util::make_context()}; util::callPreCapturePlugins(context); using RAJA::util::trigger_updates_before; auto body = trigger_updates_before(loop_body); - m_runner.enqueue( - m_storage, std::forward(seg), std::move(body)); + m_runner.enqueue(m_storage, std::forward(seg), std::move(body)); util::callPostCapturePlugins(context); } @@ -289,14 +307,11 @@ struct WorkPool struct WorkGroup, ALLOCATOR_T> { - using exec_policy = EXEC_POLICY_T; - using order_policy = ORDER_POLICY_T; - using storage_policy = STORAGE_POLICY_T; + using exec_policy = EXEC_POLICY_T; + using order_policy = ORDER_POLICY_T; + using storage_policy = STORAGE_POLICY_T; using dispatch_policy = DISPATCH_POLICY_T; - using policy = WorkGroupPolicy; - using index_type = INDEX_T; - using xarg_type = xargs; - using Allocator = ALLOCATOR_T; + using policy = WorkGroupPolicy; + using index_type = INDEX_T; + using xarg_type = xargs; + using Allocator = ALLOCATOR_T; using workpool_type = WorkPool; using worksite_type = WorkSite; private: - using storage_type = typename workpool_type::storage_type; + using storage_type = typename workpool_type::storage_type; using workrunner_type = typename workpool_type::workrunner_type; friend workpool_type; @@ -339,15 +357,16 @@ struct WorkGroup struct WorkSite, ALLOCATOR_T> { - using exec_policy = EXEC_POLICY_T; - using order_policy = ORDER_POLICY_T; - using storage_policy = STORAGE_POLICY_T; + using exec_policy = EXEC_POLICY_T; + using order_policy = ORDER_POLICY_T; + using storage_policy = STORAGE_POLICY_T; using dispatch_policy = DISPATCH_POLICY_T; - using policy = WorkGroupPolicy; - using index_type = INDEX_T; - using xarg_type = xargs; - using Allocator = ALLOCATOR_T; - - using workpool_type = WorkPool; + using policy = WorkGroupPolicy; + using index_type = INDEX_T; + using xarg_type = xargs; + using Allocator = ALLOCATOR_T; + + using workpool_type = WorkPool; using workgroup_type = WorkGroup; private: @@ -412,16 +430,13 @@ struct WorkSite -inline -typename WorkPool< - WorkGroupPolicy, - INDEX_T, - xargs, - ALLOCATOR_T>::workgroup_type -WorkPool< - WorkGroupPolicy, - INDEX_T, - xargs, - ALLOCATOR_T>::instantiate() +inline typename WorkPool, + INDEX_T, + xargs, + ALLOCATOR_T>::workgroup_type +WorkPool, + INDEX_T, + xargs, + ALLOCATOR_T>::instantiate() { // update max sizes to auto-reserve on reuse - m_max_num_loops = std::max(m_storage.size(), m_max_num_loops); + m_max_num_loops = std::max(m_storage.size(), m_max_num_loops); m_max_storage_bytes = std::max(m_storage.storage_size(), m_max_storage_bytes); // move storage into workgroup - return workgroup_type{std::move(m_storage), std::move(m_runner)}; + return workgroup_type {std::move(m_storage), std::move(m_runner)}; } template -inline -typename WorkGroup< - WorkGroupPolicy, - INDEX_T, - xargs, - ALLOCATOR_T>::worksite_type +inline typename WorkGroup, + INDEX_T, + xargs, + ALLOCATOR_T>::worksite_type WorkGroup< - WorkGroupPolicy, + WorkGroupPolicy, INDEX_T, xargs, - ALLOCATOR_T>::run(typename WorkGroup< - WorkGroupPolicy, - INDEX_T, - xargs, - ALLOCATOR_T>::resource_type r, + ALLOCATOR_T>::run(typename WorkGroup, + INDEX_T, + xargs, + ALLOCATOR_T>::resource_type r, Args... args) { - util::PluginContext context{util::make_context()}; + util::PluginContext context {util::make_context()}; util::callPreLaunchPlugins(context); // move any per run storage into worksite - worksite_type site(r, m_runner.run(m_storage, r, std::forward(args)...)); + worksite_type site(r, + m_runner.run(m_storage, r, std::forward(args)...)); util::callPostLaunchPlugins(context); diff --git a/include/RAJA/pattern/WorkGroup/Dispatcher.hpp b/include/RAJA/pattern/WorkGroup/Dispatcher.hpp index 1eac283f4b..d7c35feb3d 100644 --- a/include/RAJA/pattern/WorkGroup/Dispatcher.hpp +++ b/include/RAJA/pattern/WorkGroup/Dispatcher.hpp @@ -36,35 +36,36 @@ namespace RAJA namespace detail { -template < typename > +template struct DispatcherVoidPtrWrapper { void* ptr; DispatcherVoidPtrWrapper() = default; // implicit constructor from void* - RAJA_HOST_DEVICE DispatcherVoidPtrWrapper(void* p) : ptr(p) { } + RAJA_HOST_DEVICE DispatcherVoidPtrWrapper(void* p) : ptr(p) {} }; -template < typename > +template struct DispatcherVoidConstPtrWrapper { const void* ptr; DispatcherVoidConstPtrWrapper() = default; // implicit constructor from const void* - RAJA_HOST_DEVICE DispatcherVoidConstPtrWrapper(const void* p) : ptr(p) { } + RAJA_HOST_DEVICE DispatcherVoidConstPtrWrapper(const void* p) : ptr(p) {} }; -constexpr bool dispatcher_use_host_invoke(Platform platform) { +constexpr bool dispatcher_use_host_invoke(Platform platform) +{ return !(platform == Platform::cuda || platform == Platform::hip); } // Transforms one dispatch policy into another by creating a dispatch policy // of holder_type objects. See usage in WorkRunner for more explanation. -template < typename dispatch_policy, typename holder_type > +template struct dispatcher_transform_types; /// -template < typename dispatch_policy, typename holder_type > +template using dispatcher_transform_types_t = typename dispatcher_transform_types::type; @@ -75,12 +76,17 @@ using dispatcher_transform_types_t = * DispatcherID is used to differentiate function pointers based on their * function signature. */ -template < Platform platform, typename dispatch_policy, typename DispatcherID, typename ... CallArgs > +template struct Dispatcher; -template < typename holder_type > -struct dispatcher_transform_types<::RAJA::indirect_function_call_dispatch, holder_type> { +template +struct dispatcher_transform_types<::RAJA::indirect_function_call_dispatch, + holder_type> +{ using type = ::RAJA::indirect_function_call_dispatch; }; @@ -93,38 +99,44 @@ struct dispatcher_transform_types<::RAJA::indirect_function_call_dispatch, holde * during device linking when functions with high register counts may cause * device linking to fail. */ -template < Platform platform, typename DispatcherID, typename ... CallArgs > -struct Dispatcher { +template +struct Dispatcher +{ static constexpr bool use_host_invoke = dispatcher_use_host_invoke(platform); - using dispatch_policy = ::RAJA::indirect_function_call_dispatch; - using void_ptr_wrapper = DispatcherVoidPtrWrapper; + using dispatch_policy = ::RAJA::indirect_function_call_dispatch; + using void_ptr_wrapper = DispatcherVoidPtrWrapper; using void_cptr_wrapper = DispatcherVoidConstPtrWrapper; /// /// move construct an object of type T in dest as a copy of a T from src and /// destroy the T obj in src /// - template < typename T > - static void s_move_construct_destroy(void_ptr_wrapper dest, void_ptr_wrapper src) + template + static void s_move_construct_destroy(void_ptr_wrapper dest, + void_ptr_wrapper src) { T* dest_as_T = static_cast(dest.ptr); - T* src_as_T = static_cast(src.ptr); - new(dest_as_T) T(std::move(*src_as_T)); + T* src_as_T = static_cast(src.ptr); + new (dest_as_T) T(std::move(*src_as_T)); (*src_as_T).~T(); } /// /// invoke the call operator of the object of type T in obj with args /// - template < typename T > + template static void s_host_invoke(void_cptr_wrapper obj, CallArgs... args) { const T* obj_as_T = static_cast(obj.ptr); (*obj_as_T)(std::forward(args)...); } /// - template < typename T > - static RAJA_DEVICE void s_device_invoke(void_cptr_wrapper obj, CallArgs... args) + template + static RAJA_DEVICE void s_device_invoke(void_cptr_wrapper obj, + CallArgs... args) { const T* obj_as_T = static_cast(obj.ptr); (*obj_as_T)(std::forward(args)...); @@ -133,22 +145,26 @@ struct Dispatcher + template static void s_destroy(void_ptr_wrapper obj) { T* obj_as_T = static_cast(obj.ptr); (*obj_as_T).~T(); } - using mover_type = void(*)(void_ptr_wrapper /*dest*/, void_ptr_wrapper /*src*/); - using invoker_type = void(*)(void_cptr_wrapper /*obj*/, CallArgs... /*args*/); - using destroyer_type = void(*)(void_ptr_wrapper /*obj*/); + using mover_type = void (*)(void_ptr_wrapper /*dest*/, + void_ptr_wrapper /*src*/); + using invoker_type = void (*)(void_cptr_wrapper /*obj*/, + CallArgs... /*args*/); + using destroyer_type = void (*)(void_ptr_wrapper /*obj*/); // This can't be a cuda device lambda due to compiler limitations - template < typename T > - struct DeviceInvokerFactory { + template + struct DeviceInvokerFactory + { using value_type = invoker_type; - RAJA_DEVICE value_type operator()() { + RAJA_DEVICE value_type operator()() + { #if defined(RAJA_ENABLE_HIP) && !defined(RAJA_ENABLE_HIP_INDIRECT_FUNCTION_CALL) return nullptr; #else @@ -160,14 +176,14 @@ struct Dispatcher* = nullptr > - static inline Dispatcher makeDispatcher() { - return { mover_type{&s_move_construct_destroy}, - invoker_type{&s_host_invoke}, - destroyer_type{&s_destroy}, - sizeof(T) - }; + template * = nullptr> + static inline Dispatcher makeDispatcher() + { + return {mover_type {&s_move_construct_destroy}, + invoker_type {&s_host_invoke}, destroyer_type {&s_destroy}, + sizeof(T)}; } /// /// create a Dispatcher that can be used on the device for objects of type T @@ -179,14 +195,16 @@ struct Dispatcher* = nullptr > - static inline Dispatcher makeDispatcher(CreateOnDevice&& createOnDevice) { - return { mover_type{&s_move_construct_destroy}, - invoker_type{std::forward(createOnDevice)(DeviceInvokerFactory{})}, - destroyer_type{&s_destroy}, - sizeof(T) - }; + template * = nullptr> + static inline Dispatcher makeDispatcher(CreateOnDevice&& createOnDevice) + { + return {mover_type {&s_move_construct_destroy}, + invoker_type {std::forward(createOnDevice)( + DeviceInvokerFactory {})}, + destroyer_type {&s_destroy}, sizeof(T)}; } mover_type move_construct_destroy; @@ -196,8 +214,10 @@ struct Dispatcher -struct dispatcher_transform_types<::RAJA::indirect_virtual_function_dispatch, holder_type> { +template +struct dispatcher_transform_types<::RAJA::indirect_virtual_function_dispatch, + holder_type> +{ using type = ::RAJA::indirect_virtual_function_dispatch; }; @@ -210,38 +230,48 @@ struct dispatcher_transform_types<::RAJA::indirect_virtual_function_dispatch, ho * during device linking when functions with high register counts may cause * device linking to fail. */ -template < Platform platform, typename DispatcherID, typename ... CallArgs > -struct Dispatcher { +template +struct Dispatcher +{ static constexpr bool use_host_invoke = dispatcher_use_host_invoke(platform); - using dispatch_policy = ::RAJA::indirect_virtual_function_dispatch; - using void_ptr_wrapper = DispatcherVoidPtrWrapper; + using dispatch_policy = ::RAJA::indirect_virtual_function_dispatch; + using void_ptr_wrapper = DispatcherVoidPtrWrapper; using void_cptr_wrapper = DispatcherVoidConstPtrWrapper; - struct impl_base { - virtual void move_destroy(void_ptr_wrapper dest, void_ptr_wrapper src) const = 0; - virtual void destroy(void_ptr_wrapper obj) const = 0; + struct impl_base + { + virtual void move_destroy(void_ptr_wrapper dest, + void_ptr_wrapper src) const = 0; + virtual void destroy(void_ptr_wrapper obj) const = 0; }; - struct host_impl_base { + struct host_impl_base + { virtual void invoke(void_cptr_wrapper obj, CallArgs... args) const = 0; }; - struct device_impl_base { - virtual RAJA_DEVICE void invoke(void_cptr_wrapper obj, CallArgs... args) const = 0; + struct device_impl_base + { + virtual RAJA_DEVICE void invoke(void_cptr_wrapper obj, + CallArgs... args) const = 0; }; - template < typename T > + template struct base_impl_type : impl_base { /// /// move construct an object of type T in dest as a copy of a T from src and /// destroy the T obj in src /// - virtual void move_destroy(void_ptr_wrapper dest, void_ptr_wrapper src) const override + virtual void move_destroy(void_ptr_wrapper dest, + void_ptr_wrapper src) const override { T* dest_as_T = static_cast(dest.ptr); - T* src_as_T = static_cast(src.ptr); - new(dest_as_T) T(std::move(*src_as_T)); + T* src_as_T = static_cast(src.ptr); + new (dest_as_T) T(std::move(*src_as_T)); (*src_as_T).~T(); } @@ -255,7 +285,7 @@ struct Dispatcher + template struct host_impl_type : host_impl_base { /// @@ -268,20 +298,22 @@ struct Dispatcher + template struct device_impl_type : device_impl_base { /// /// invoke the call operator of the object of type T in obj with args /// - virtual RAJA_DEVICE void invoke(void_cptr_wrapper obj, CallArgs... args) const override + virtual RAJA_DEVICE void invoke(void_cptr_wrapper obj, + CallArgs... args) const override { const T* obj_as_T = static_cast(obj.ptr); (*obj_as_T)(std::forward(args)...); } }; - struct mover_type { + struct mover_type + { impl_base* m_impl; void operator()(void_ptr_wrapper dest, void_ptr_wrapper src) const { @@ -289,7 +321,8 @@ struct Dispatcherinvoke(obj, std::forward(args)...); } }; - using invoker_type = std::conditional_t; + using invoker_type = std:: + conditional_t; - struct destroyer_type { + struct destroyer_type + { impl_base* m_impl; - void operator()(void_ptr_wrapper obj) const - { - m_impl->destroy(obj); - } + void operator()(void_ptr_wrapper obj) const { m_impl->destroy(obj); } }; // This can't be a cuda device lambda due to compiler limitations - template < typename T > - struct DeviceImplTypeFactory { + template + struct DeviceImplTypeFactory + { using value_type = device_impl_type*; - RAJA_DEVICE value_type operator()() { + RAJA_DEVICE value_type operator()() + { #if defined(RAJA_ENABLE_HIP) && !defined(RAJA_ENABLE_HIP_INDIRECT_FUNCTION_CALL) return nullptr; #else @@ -333,16 +366,15 @@ struct Dispatcher* = nullptr > - static inline Dispatcher makeDispatcher() { + template * = nullptr> + static inline Dispatcher makeDispatcher() + { static base_impl_type s_base_impl; static host_impl_type s_host_impl; - return { mover_type{&s_base_impl}, - host_invoker_type{&s_host_impl}, - destroyer_type{&s_base_impl}, - sizeof(T) - }; + return {mover_type {&s_base_impl}, host_invoker_type {&s_host_impl}, + destroyer_type {&s_base_impl}, sizeof(T)}; } /// /// create a Dispatcher that can be used on the device for objects of type T @@ -354,17 +386,17 @@ struct Dispatcher* = nullptr> - static inline Dispatcher makeDispatcher(CreateOnDevice&& createOnDevice) { + template * = nullptr> + static inline Dispatcher makeDispatcher(CreateOnDevice&& createOnDevice) + { static base_impl_type s_base_impl; - static device_impl_type* s_device_impl_ptr{ - std::forward(createOnDevice)(DeviceImplTypeFactory{}) }; - return { mover_type{&s_base_impl}, - device_invoker_type{s_device_impl_ptr}, - destroyer_type{&s_base_impl}, - sizeof(T) - }; + static device_impl_type* s_device_impl_ptr {std::forward( + createOnDevice)(DeviceImplTypeFactory {})}; + return {mover_type {&s_base_impl}, device_invoker_type {s_device_impl_ptr}, + destroyer_type {&s_base_impl}, sizeof(T)}; } mover_type move_construct_destroy; @@ -375,61 +407,68 @@ struct Dispatcher -struct dispatcher_transform_types<::RAJA::direct_dispatch, holder_type> { - using type = ::RAJA::direct_dispatch...>; +template +struct dispatcher_transform_types<::RAJA::direct_dispatch, holder_type> +{ + using type = + ::RAJA::direct_dispatch...>; }; /*! * Version of Dispatcher that does direct dispatch to zero callable types. * It implements the interface with callable objects. */ -template < Platform platform, typename DispatcherID, typename ... CallArgs > -struct Dispatcher, DispatcherID, CallArgs...> { +template +struct Dispatcher, + DispatcherID, + CallArgs...> +{ static constexpr bool use_host_invoke = dispatcher_use_host_invoke(platform); - using dispatch_policy = ::RAJA::direct_dispatch<>; - using void_ptr_wrapper = DispatcherVoidPtrWrapper; + using dispatch_policy = ::RAJA::direct_dispatch<>; + using void_ptr_wrapper = DispatcherVoidPtrWrapper; using void_cptr_wrapper = DispatcherVoidConstPtrWrapper; /// /// move construct an object of type T in dest as a copy of a T from src and /// destroy the T obj in src /// - struct mover_type { - void operator()(void_ptr_wrapper, void_ptr_wrapper) const - { } + struct mover_type + { + void operator()(void_ptr_wrapper, void_ptr_wrapper) const {} }; /// /// invoke the call operator of the object of type T in obj with args /// - struct host_invoker_type { - void operator()(void_cptr_wrapper, CallArgs...) const - { } + struct host_invoker_type + { + void operator()(void_cptr_wrapper, CallArgs...) const {} }; - struct device_invoker_type { - RAJA_DEVICE void operator()(void_cptr_wrapper, CallArgs...) const - { } + struct device_invoker_type + { + RAJA_DEVICE void operator()(void_cptr_wrapper, CallArgs...) const {} }; - using invoker_type = std::conditional_t; + using invoker_type = std:: + conditional_t; /// /// destroy the object of type T in obj /// - struct destroyer_type { - void operator()(void_ptr_wrapper) const - { } + struct destroyer_type + { + void operator()(void_ptr_wrapper) const {} }; /// /// create a Dispatcher that can be used on the host for objects of type T /// - template< typename T, - bool uhi = use_host_invoke, std::enable_if_t* = nullptr > - static inline Dispatcher makeDispatcher() { - return {mover_type{}, host_invoker_type{}, destroyer_type{}, sizeof(T)}; + template * = nullptr> + static inline Dispatcher makeDispatcher() + { + return {mover_type {}, host_invoker_type {}, destroyer_type {}, sizeof(T)}; } /// /// create a Dispatcher that can be used on the device for objects of type T @@ -437,10 +476,14 @@ struct Dispatcher, DispatcherID, CallArgs... /// Ignore the CreateOnDevice object as the same invoker object can be used /// on the host and device. /// - template< typename T, typename CreateOnDevice, - bool uhi = use_host_invoke, std::enable_if_t* = nullptr > - static inline Dispatcher makeDispatcher(CreateOnDevice&&) { - return {mover_type{}, device_invoker_type{}, destroyer_type{}, sizeof(T)}; + template * = nullptr> + static inline Dispatcher makeDispatcher(CreateOnDevice&&) + { + return {mover_type {}, device_invoker_type {}, destroyer_type {}, + sizeof(T)}; } mover_type move_construct_destroy; @@ -453,23 +496,31 @@ struct Dispatcher, DispatcherID, CallArgs... * Version of Dispatcher that does direct dispatch to a single callable type. * It implements the interface with callable objects. */ -template < Platform platform, typename T, typename DispatcherID, typename ... CallArgs > -struct Dispatcher, DispatcherID, CallArgs...> { +template +struct Dispatcher, + DispatcherID, + CallArgs...> +{ static constexpr bool use_host_invoke = dispatcher_use_host_invoke(platform); - using dispatch_policy = ::RAJA::direct_dispatch; - using void_ptr_wrapper = DispatcherVoidPtrWrapper; + using dispatch_policy = ::RAJA::direct_dispatch; + using void_ptr_wrapper = DispatcherVoidPtrWrapper; using void_cptr_wrapper = DispatcherVoidConstPtrWrapper; /// /// move construct an object of type T in dest as a copy of a T from src and /// destroy the T obj in src /// - struct mover_type { + struct mover_type + { void operator()(void_ptr_wrapper dest, void_ptr_wrapper src) const { T* dest_as_T = static_cast(dest.ptr); - T* src_as_T = static_cast(src.ptr); - new(dest_as_T) T(std::move(*src_as_T)); + T* src_as_T = static_cast(src.ptr); + new (dest_as_T) T(std::move(*src_as_T)); (*src_as_T).~T(); } }; @@ -477,28 +528,30 @@ struct Dispatcher, DispatcherID, CallArgs.. /// /// invoke the call operator of the object of type T in obj with args /// - struct host_invoker_type { + struct host_invoker_type + { void operator()(void_cptr_wrapper obj, CallArgs... args) const { const T* obj_as_T = static_cast(obj.ptr); (*obj_as_T)(std::forward(args)...); } }; - struct device_invoker_type { + struct device_invoker_type + { RAJA_DEVICE void operator()(void_cptr_wrapper obj, CallArgs... args) const { const T* obj_as_T = static_cast(obj.ptr); (*obj_as_T)(std::forward(args)...); } }; - using invoker_type = std::conditional_t; + using invoker_type = std:: + conditional_t; /// /// destroy the object of type T in obj /// - struct destroyer_type { + struct destroyer_type + { void operator()(void_ptr_wrapper obj) const { T* obj_as_T = static_cast(obj.ptr); @@ -509,11 +562,14 @@ struct Dispatcher, DispatcherID, CallArgs.. /// /// create a Dispatcher that can be used on the host for objects of type T /// - template< typename U, - bool uhi = use_host_invoke, std::enable_if_t* = nullptr > - static inline Dispatcher makeDispatcher() { - static_assert(std::is_same::value, "U must be in direct_dispatch types"); - return {mover_type{}, host_invoker_type{}, destroyer_type{}, sizeof(T)}; + template * = nullptr> + static inline Dispatcher makeDispatcher() + { + static_assert(std::is_same::value, + "U must be in direct_dispatch types"); + return {mover_type {}, host_invoker_type {}, destroyer_type {}, sizeof(T)}; } /// /// create a Dispatcher that can be used on the device for objects of type T @@ -521,11 +577,16 @@ struct Dispatcher, DispatcherID, CallArgs.. /// Ignore the CreateOnDevice object as the same invoker object can be used /// on the host and device. /// - template< typename U, typename CreateOnDevice, - bool uhi = use_host_invoke, std::enable_if_t* = nullptr > - static inline Dispatcher makeDispatcher(CreateOnDevice&&) { - static_assert(std::is_same::value, "U must be in direct_dispatch types"); - return {mover_type{}, device_invoker_type{}, destroyer_type{}, sizeof(T)}; + template * = nullptr> + static inline Dispatcher makeDispatcher(CreateOnDevice&&) + { + static_assert(std::is_same::value, + "U must be in direct_dispatch types"); + return {mover_type {}, device_invoker_type {}, destroyer_type {}, + sizeof(T)}; } mover_type move_construct_destroy; @@ -538,46 +599,55 @@ struct Dispatcher, DispatcherID, CallArgs.. * Version of Dispatcher that does direct dispatch to multiple callable types. * It implements the interface with callable objects. */ -template < typename T0, typename T1, typename ... TNs, - Platform platform, typename DispatcherID, typename ... CallArgs > -struct Dispatcher, - DispatcherID, CallArgs...> { +template +struct Dispatcher, + DispatcherID, + CallArgs...> +{ static constexpr bool use_host_invoke = dispatcher_use_host_invoke(platform); - using dispatch_policy = ::RAJA::direct_dispatch; - using void_ptr_wrapper = DispatcherVoidPtrWrapper; + using dispatch_policy = ::RAJA::direct_dispatch; + using void_ptr_wrapper = DispatcherVoidPtrWrapper; using void_cptr_wrapper = DispatcherVoidConstPtrWrapper; - using id_type = int; - using callable_indices = camp::make_int_seq_t; - using callable_types = camp::list; + using id_type = int; + using callable_indices = camp::make_int_seq_t; + using callable_types = camp::list; /// /// move construct an object of type T in dest as a copy of a T from src and /// destroy the T obj in src /// - struct mover_type { + struct mover_type + { id_type id; void operator()(void_ptr_wrapper dest, void_ptr_wrapper src) const { - impl_helper(callable_indices{}, callable_types{}, - dest, src); + impl_helper(callable_indices {}, callable_types {}, dest, src); } private: - template < int ... id_types, typename ... Ts > - void impl_helper(camp::int_seq, camp::list, - void_ptr_wrapper dest, void_ptr_wrapper src) const + template + void impl_helper(camp::int_seq, + camp::list, + void_ptr_wrapper dest, + void_ptr_wrapper src) const { camp::sink(((id_types == id) ? (impl(dest, src), 0) : 0)...); } - template < typename T > + template void impl(void_ptr_wrapper dest, void_ptr_wrapper src) const { T* dest_as_T = static_cast(dest.ptr); - T* src_as_T = static_cast(src.ptr); - new(dest_as_T) T(std::move(*src_as_T)); + T* src_as_T = static_cast(src.ptr); + new (dest_as_T) T(std::move(*src_as_T)); (*src_as_T).~T(); } }; @@ -585,79 +655,89 @@ struct Dispatcher, /// /// invoke the call operator of the object of type T in obj with args /// - struct host_invoker_type { + struct host_invoker_type + { id_type id; void operator()(void_cptr_wrapper obj, CallArgs... args) const { - impl_helper(callable_indices{}, callable_types{}, - obj, std::forward(args)...); + impl_helper(callable_indices {}, callable_types {}, obj, + std::forward(args)...); } private: - template < int ... id_types, typename ... Ts > - void impl_helper(camp::int_seq, camp::list, - void_cptr_wrapper obj, CallArgs... args) const + template + void impl_helper(camp::int_seq, + camp::list, + void_cptr_wrapper obj, + CallArgs... args) const { - camp::sink(((id_types == id) ? (impl(obj, std::forward(args)...), 0) : 0)...); + camp::sink(((id_types == id) + ? (impl(obj, std::forward(args)...), 0) + : 0)...); } - template < typename T > + template void impl(void_cptr_wrapper obj, CallArgs... args) const { const T* obj_as_T = static_cast(obj.ptr); (*obj_as_T)(std::forward(args)...); } }; - struct device_invoker_type { + struct device_invoker_type + { id_type id; RAJA_DEVICE void operator()(void_cptr_wrapper obj, CallArgs... args) const { - impl_helper(callable_indices{}, callable_types{}, - obj, std::forward(args)...); + impl_helper(callable_indices {}, callable_types {}, obj, + std::forward(args)...); } private: - template < int ... id_types, typename ... Ts > - RAJA_DEVICE void impl_helper(camp::int_seq, camp::list, - void_cptr_wrapper obj, CallArgs... args) const + template + RAJA_DEVICE void impl_helper(camp::int_seq, + camp::list, + void_cptr_wrapper obj, + CallArgs... args) const { - camp::sink(((id_types == id) ? (impl(obj, std::forward(args)...), 0) : 0)...); + camp::sink(((id_types == id) + ? (impl(obj, std::forward(args)...), 0) + : 0)...); } - template < typename T > + template RAJA_DEVICE void impl(void_cptr_wrapper obj, CallArgs... args) const { const T* obj_as_T = static_cast(obj.ptr); (*obj_as_T)(std::forward(args)...); } }; - using invoker_type = std::conditional_t; + using invoker_type = std:: + conditional_t; /// /// destroy the object of type T in obj /// - struct destroyer_type { + struct destroyer_type + { id_type id; void operator()(void_ptr_wrapper obj) const { - impl_helper(callable_indices{}, callable_types{}, - obj); + impl_helper(callable_indices {}, callable_types {}, obj); } private: - template < int ... id_types, typename ... Ts > - void impl_helper(camp::int_seq, camp::list, - void_ptr_wrapper obj) const + template + void impl_helper(camp::int_seq, + camp::list, + void_ptr_wrapper obj) const { camp::sink(((id_types == id) ? (impl(obj), 0) : 0)...); } - template < typename T > + template void impl(void_ptr_wrapper obj) const { T* obj_as_T = static_cast(obj.ptr); @@ -671,25 +751,31 @@ struct Dispatcher, /// The id is just the index of T in the list of callable_types. /// If T is not in Ts return -1. /// - template < typename T, int ... id_types, typename ... Ts > - static constexpr id_type get_id(camp::int_seq, camp::list) + template + static constexpr id_type get_id(camp::int_seq, + camp::list) { - id_type id{-1}; + id_type id {-1}; // quiet UB warning by sequencing assignment to id with list initialization - int unused[] {0, (std::is_same::value ? ((id = id_types), 0) : 0)...}; - camp::sink(unused); // quiet unused var warning + int unused[] {0, + (std::is_same::value ? ((id = id_types), 0) : 0)...}; + camp::sink(unused); // quiet unused var warning return id; } /// /// create a Dispatcher that can be used on the host for objects of type T /// - template< typename T, - bool uhi = use_host_invoke, std::enable_if_t* = nullptr > - static inline Dispatcher makeDispatcher() { - static constexpr id_type id = get_id(callable_indices{}, callable_types{}); + template * = nullptr> + static inline Dispatcher makeDispatcher() + { + static constexpr id_type id = + get_id(callable_indices {}, callable_types {}); static_assert(id != id_type(-1), "T must be in direct_dispatch types"); - return {mover_type{id}, host_invoker_type{id}, destroyer_type{id}, sizeof(T)}; + return {mover_type {id}, host_invoker_type {id}, destroyer_type {id}, + sizeof(T)}; } /// /// create a Dispatcher that can be used on the device for objects of type T @@ -697,12 +783,17 @@ struct Dispatcher, /// Ignore the CreateOnDevice object as the same invoker object can be used /// on the host and device. /// - template< typename T, typename CreateOnDevice, - bool uhi = use_host_invoke, std::enable_if_t* = nullptr > - static inline Dispatcher makeDispatcher(CreateOnDevice&&) { - static constexpr id_type id = get_id(callable_indices{}, callable_types{}); + template * = nullptr> + static inline Dispatcher makeDispatcher(CreateOnDevice&&) + { + static constexpr id_type id = + get_id(callable_indices {}, callable_types {}); static_assert(id != id_type(-1), "T must be in direct_dispatch types"); - return {mover_type{id}, device_invoker_type{id}, destroyer_type{id}, sizeof(T)}; + return {mover_type {id}, device_invoker_type {id}, destroyer_type {id}, + sizeof(T)}; } mover_type move_construct_destroy; diff --git a/include/RAJA/pattern/WorkGroup/WorkRunner.hpp b/include/RAJA/pattern/WorkGroup/WorkRunner.hpp index 9645f73050..5a666d1c73 100644 --- a/include/RAJA/pattern/WorkGroup/WorkRunner.hpp +++ b/include/RAJA/pattern/WorkGroup/WorkRunner.hpp @@ -40,18 +40,18 @@ namespace detail /*! * A body and args holder for storing loops that are being executed in foralls */ -template +template struct HoldBodyArgs_base { // NOTE: This constructor is disabled when body_in is not LoopBody // to avoid it conflicting with the copy and move constructors - template < typename body_in, - typename = typename std::enable_if< - std::is_same>::value>::type > + template >::value>::type> HoldBodyArgs_base(body_in&& body, Args... args) - : m_body(std::forward(body)) - , m_arg_tuple(std::forward(args)...) - { } + : m_body(std::forward(body)), + m_arg_tuple(std::forward(args)...) + {} protected: LoopBody m_body; @@ -62,7 +62,7 @@ struct HoldBodyArgs_base * A body and args holder for storing loops that are being executed in foralls * that run on the host */ -template +template struct HoldBodyArgs_host : HoldBodyArgs_base { using base = HoldBodyArgs_base; @@ -70,10 +70,10 @@ struct HoldBodyArgs_host : HoldBodyArgs_base RAJA_INLINE void operator()(index_type i) const { - invoke(i, camp::make_idx_seq_t{}); + invoke(i, camp::make_idx_seq_t {}); } - template < camp::idx_t ... Is > + template RAJA_INLINE void invoke(index_type i, camp::idx_seq) const { this->m_body(i, get(this->m_arg_tuple)...); @@ -84,7 +84,7 @@ struct HoldBodyArgs_host : HoldBodyArgs_base * A body and args holder for storing loops that are being executed in foralls * that run on the device */ -template +template struct HoldBodyArgs_device : HoldBodyArgs_base { using base = HoldBodyArgs_base; @@ -92,10 +92,10 @@ struct HoldBodyArgs_device : HoldBodyArgs_base RAJA_DEVICE RAJA_INLINE void operator()(index_type i) const { - invoke(i, camp::make_idx_seq_t{}); + invoke(i, camp::make_idx_seq_t {}); } - template < camp::idx_t ... Is > + template RAJA_DEVICE RAJA_INLINE void invoke(index_type i, camp::idx_seq) const { this->m_body(i, get(this->m_arg_tuple)...); @@ -105,28 +105,29 @@ struct HoldBodyArgs_device : HoldBodyArgs_base /*! * A body and segment holder for storing loops that will be executed as foralls */ -template +template struct HoldForall { using resource_type = typename resources::get_resource::type; - using HoldBodyArgs = typename std::conditional< + using HoldBodyArgs = typename std::conditional< !type_traits::is_device_exec_policy::value, HoldBodyArgs_host, - HoldBodyArgs_device >::type; + HoldBodyArgs_device>::type; - template < typename segment_in, typename body_in > + template HoldForall(segment_in&& segment, body_in&& body) - : m_segment(std::forward(segment)) - , m_body(std::forward(body)) - { } + : m_segment(std::forward(segment)), + m_body(std::forward(body)) + {} RAJA_INLINE void operator()(resource_type r, Args... args) const { - wrap::forall(r, - ExecutionPolicy(), - m_segment, - HoldBodyArgs{m_body, std::forward(args)...}); + wrap::forall(r, ExecutionPolicy(), m_segment, + HoldBodyArgs {m_body, std::forward(args)...}); } private: @@ -143,7 +144,7 @@ template + typename... Args> struct WorkRunner; @@ -156,28 +157,32 @@ template + typename... Args> struct WorkRunnerForallOrdered_base { - using exec_policy = EXEC_POLICY_T; - using order_policy = ORDER_POLICY_T; + using exec_policy = EXEC_POLICY_T; + using order_policy = ORDER_POLICY_T; using dispatch_policy = DISPATCH_POLICY_T; - using Allocator = ALLOCATOR_T; - using index_type = INDEX_T; - using resource_type = typename resources::get_resource::type; + using Allocator = ALLOCATOR_T; + using index_type = INDEX_T; + using resource_type = + typename resources::get_resource::type; using forall_exec_policy = FORALL_EXEC_POLICY; // The type that will hold the segment and loop body in work storage - struct holder_type { - template < typename T > - using type = HoldForall>::type, // segment_type - typename camp::at>::type, // loop_type - index_type, Args...>; + struct holder_type + { + template + using type = + HoldForall>::type, // segment_type + typename camp::at>::type, // loop_type + index_type, + Args...>; }; /// - template < typename T > + template using holder_type_t = typename holder_type::template type; // The policy indicating where the call function is invoked @@ -186,33 +191,40 @@ struct WorkRunnerForallOrdered_base // The Dispatcher policy with holder_types used internally to handle the // ranges and callables passed in by the user. - using dispatcher_holder_policy = dispatcher_transform_types_t; + using dispatcher_holder_policy = + dispatcher_transform_types_t; - using dispatcher_type = Dispatcher; + using dispatcher_type = Dispatcher; WorkRunnerForallOrdered_base() = default; WorkRunnerForallOrdered_base(WorkRunnerForallOrdered_base const&) = delete; - WorkRunnerForallOrdered_base& operator=(WorkRunnerForallOrdered_base const&) = delete; + WorkRunnerForallOrdered_base& + operator=(WorkRunnerForallOrdered_base const&) = delete; - WorkRunnerForallOrdered_base(WorkRunnerForallOrdered_base &&) = default; - WorkRunnerForallOrdered_base& operator=(WorkRunnerForallOrdered_base &&) = default; + WorkRunnerForallOrdered_base(WorkRunnerForallOrdered_base&&) = default; + WorkRunnerForallOrdered_base& + operator=(WorkRunnerForallOrdered_base&&) = default; // runner interfaces with storage to enqueue so the runner can get // information from the segment and loop at enqueue time - template < typename WorkContainer, typename segment_T, typename loop_T > + template inline void enqueue(WorkContainer& storage, segment_T&& seg, loop_T&& loop) { - using holder = holder_type_t, camp::decay>>; + using holder = + holder_type_t, camp::decay>>; storage.template emplace( - get_Dispatcher(dispatcher_exec_policy{}), + get_Dispatcher(dispatcher_exec_policy {}), std::forward(seg), std::forward(loop)); } // clear any state so ready to be destroyed or reused - void clear() - { } + void clear() {} // no extra storage required here using per_run_storage = int; @@ -227,39 +239,38 @@ template + typename... Args> struct WorkRunnerForallOrdered - : WorkRunnerForallOrdered_base< - FORALL_EXEC_POLICY, - EXEC_POLICY_T, - ORDER_POLICY_T, - DISPATCH_POLICY_T, - ALLOCATOR_T, - INDEX_T, - Args...> + : WorkRunnerForallOrdered_base { - using base = WorkRunnerForallOrdered_base< - FORALL_EXEC_POLICY, - EXEC_POLICY_T, - ORDER_POLICY_T, - DISPATCH_POLICY_T, - ALLOCATOR_T, - INDEX_T, - Args...>; + using base = WorkRunnerForallOrdered_base; using base::base; // run the loops using forall in the order that they were enqueued - template < typename WorkContainer > + template typename base::per_run_storage run(WorkContainer const& storage, typename base::resource_type r, Args... args) const { using value_type = typename WorkContainer::value_type; - typename base::per_run_storage run_storage{}; + typename base::per_run_storage run_storage {}; auto end = storage.end(); - for (auto iter = storage.begin(); iter != end; ++iter) { + for (auto iter = storage.begin(); iter != end; ++iter) + { value_type::host_call(&*iter, r, args...); } @@ -276,40 +287,40 @@ template + typename... Args> struct WorkRunnerForallReverse - : WorkRunnerForallOrdered_base< - FORALL_EXEC_POLICY, - EXEC_POLICY_T, - ORDER_POLICY_T, - DISPATCH_POLICY_T, - ALLOCATOR_T, - INDEX_T, - Args...> + : WorkRunnerForallOrdered_base { - using base = WorkRunnerForallOrdered_base< - FORALL_EXEC_POLICY, - EXEC_POLICY_T, - ORDER_POLICY_T, - DISPATCH_POLICY_T, - ALLOCATOR_T, - INDEX_T, - Args...>; + using base = WorkRunnerForallOrdered_base; using base::base; - // run the loops using forall in the reverse order to the order they were enqueued - template < typename WorkContainer > + // run the loops using forall in the reverse order to the order they were + // enqueued + template typename base::per_run_storage run(WorkContainer const& storage, typename base::resource_type r, Args... args) const { using value_type = typename WorkContainer::value_type; - typename base::per_run_storage run_storage{}; + typename base::per_run_storage run_storage {}; auto begin = storage.begin(); - for (auto iter = storage.end(); iter != begin; --iter) { - value_type::host_call(&*(iter-1), r, args...); + for (auto iter = storage.end(); iter != begin; --iter) + { + value_type::host_call(&*(iter - 1), r, args...); } return run_storage; diff --git a/include/RAJA/pattern/WorkGroup/WorkStorage.hpp b/include/RAJA/pattern/WorkGroup/WorkStorage.hpp index 52631d108f..d7eceaef7f 100644 --- a/include/RAJA/pattern/WorkGroup/WorkStorage.hpp +++ b/include/RAJA/pattern/WorkGroup/WorkStorage.hpp @@ -46,23 +46,23 @@ namespace detail // operator - ( iterator_base const& ) // operator == ( iterator_base const& ) // operator < ( iterator_base const& ) -template < typename iterator_base > +template struct random_access_iterator : iterator_base { - using base = iterator_base; - using value_type = const typename base::value_type; - using pointer = typename base::pointer; - using reference = typename base::reference; - using difference_type = typename base::difference_type; + using base = iterator_base; + using value_type = const typename base::value_type; + using pointer = typename base::pointer; + using reference = typename base::reference; + using difference_type = typename base::difference_type; using iterator_category = std::random_access_iterator_tag; using base::base; random_access_iterator(random_access_iterator const&) = default; - random_access_iterator(random_access_iterator &&) = default; + random_access_iterator(random_access_iterator&&) = default; random_access_iterator& operator=(random_access_iterator const&) = default; - random_access_iterator& operator=(random_access_iterator &&) = default; + random_access_iterator& operator=(random_access_iterator&&) = default; RAJA_HOST_DEVICE reference operator*() const @@ -70,10 +70,7 @@ struct random_access_iterator : iterator_base return *static_cast(*this); } - RAJA_HOST_DEVICE pointer operator->() const - { - return &(*(*this)); - } + RAJA_HOST_DEVICE pointer operator->() const { return &(*(*this)); } RAJA_HOST_DEVICE reference operator[](difference_type i) const { @@ -120,68 +117,75 @@ struct random_access_iterator : iterator_base return *this; } - RAJA_HOST_DEVICE friend inline random_access_iterator operator+( - random_access_iterator const& lhs, difference_type rhs) + RAJA_HOST_DEVICE friend inline random_access_iterator + operator+(random_access_iterator const& lhs, difference_type rhs) { random_access_iterator copy = lhs; copy += rhs; return copy; } - RAJA_HOST_DEVICE friend inline random_access_iterator operator+( - difference_type lhs, random_access_iterator const& rhs) + RAJA_HOST_DEVICE friend inline random_access_iterator + operator+(difference_type lhs, random_access_iterator const& rhs) { random_access_iterator copy = rhs; copy += lhs; return copy; } - RAJA_HOST_DEVICE friend inline random_access_iterator operator-( - random_access_iterator const& lhs, difference_type rhs) + RAJA_HOST_DEVICE friend inline random_access_iterator + operator-(random_access_iterator const& lhs, difference_type rhs) { random_access_iterator copy = lhs; copy -= rhs; return copy; } - RAJA_HOST_DEVICE friend inline difference_type operator-( - random_access_iterator const& lhs, random_access_iterator const& rhs) + RAJA_HOST_DEVICE friend inline difference_type + operator-(random_access_iterator const& lhs, + random_access_iterator const& rhs) { return static_cast(lhs) - static_cast(rhs); } - RAJA_HOST_DEVICE friend inline bool operator==( - random_access_iterator const& lhs, random_access_iterator const& rhs) + RAJA_HOST_DEVICE friend inline bool + operator==(random_access_iterator const& lhs, + random_access_iterator const& rhs) { return static_cast(lhs) == static_cast(rhs); } - RAJA_HOST_DEVICE friend inline bool operator!=( - random_access_iterator const& lhs, random_access_iterator const& rhs) + RAJA_HOST_DEVICE friend inline bool + operator!=(random_access_iterator const& lhs, + random_access_iterator const& rhs) { return !(lhs == rhs); } - RAJA_HOST_DEVICE friend inline bool operator<( - random_access_iterator const& lhs, random_access_iterator const& rhs) + RAJA_HOST_DEVICE friend inline bool + operator<(random_access_iterator const& lhs, + random_access_iterator const& rhs) { return static_cast(lhs) < static_cast(rhs); } - RAJA_HOST_DEVICE friend inline bool operator<=( - random_access_iterator const& lhs, random_access_iterator const& rhs) + RAJA_HOST_DEVICE friend inline bool + operator<=(random_access_iterator const& lhs, + random_access_iterator const& rhs) { return !(rhs < lhs); } - RAJA_HOST_DEVICE friend inline bool operator>( - random_access_iterator const& lhs, random_access_iterator const& rhs) + RAJA_HOST_DEVICE friend inline bool + operator>(random_access_iterator const& lhs, + random_access_iterator const& rhs) { return rhs < lhs; } - RAJA_HOST_DEVICE friend inline bool operator>=( - random_access_iterator const& lhs, random_access_iterator const& rhs) + RAJA_HOST_DEVICE friend inline bool + operator>=(random_access_iterator const& lhs, + random_access_iterator const& rhs) { return !(lhs < rhs); } @@ -191,10 +195,12 @@ struct random_access_iterator : iterator_base /*! * A storage container for work groups */ -template < typename STORAGE_POLICY_T, typename ALLOCATOR_T, typename Dispatcher_T > +template class WorkStorage; -template < typename ALLOCATOR_T, typename Dispatcher_T > +template class WorkStorage { using allocator_traits_type = std::allocator_traits; @@ -202,25 +208,27 @@ class WorkStorage typename allocator_traits_type::propagate_on_container_copy_assignment; using propagate_on_container_move_assignment = typename allocator_traits_type::propagate_on_container_move_assignment; - using propagate_on_container_swap = + using propagate_on_container_swap = typename allocator_traits_type::propagate_on_container_swap; - static_assert(std::is_same::value, + static_assert( + std::is_same::value, "WorkStorage expects an allocator for 'char's."); + public: - using storage_policy = RAJA::array_of_pointers; + using storage_policy = RAJA::array_of_pointers; using dispatcher_type = Dispatcher_T; - template < typename holder > + template using true_value_type = WorkStruct; - using value_type = GenericWorkStruct; - using allocator_type = ALLOCATOR_T; - using size_type = std::size_t; + using value_type = GenericWorkStruct; + using allocator_type = ALLOCATOR_T; + using size_type = std::size_t; using difference_type = std::ptrdiff_t; - using reference = value_type&; + using reference = value_type&; using const_reference = const value_type&; - using pointer = value_type*; - using const_pointer = const value_type*; + using pointer = value_type*; + using const_pointer = const value_type*; private: // struct used in storage vector to retain pointer and allocation size @@ -231,24 +239,19 @@ class WorkStorage }; public: - - // iterator base class for accessing stored WorkStructs outside of the container + // iterator base class for accessing stored WorkStructs outside of the + // container struct const_iterator_base { - using value_type = const typename WorkStorage::value_type; - using pointer = typename WorkStorage::const_pointer; - using reference = typename WorkStorage::const_reference; - using difference_type = typename WorkStorage::difference_type; + using value_type = const typename WorkStorage::value_type; + using pointer = typename WorkStorage::const_pointer; + using reference = typename WorkStorage::const_reference; + using difference_type = typename WorkStorage::difference_type; using iterator_category = std::random_access_iterator_tag; - const_iterator_base(const pointer_and_size* ptrptr) - : m_ptrptr(ptrptr) - { } + const_iterator_base(const pointer_and_size* ptrptr) : m_ptrptr(ptrptr) {} - RAJA_HOST_DEVICE reference operator*() const - { - return *(m_ptrptr->ptr); - } + RAJA_HOST_DEVICE reference operator*() const { return *(m_ptrptr->ptr); } RAJA_HOST_DEVICE const_iterator_base& operator+=(difference_type n) { @@ -256,20 +259,23 @@ class WorkStorage return *this; } - RAJA_HOST_DEVICE friend inline difference_type operator-( - const_iterator_base const& lhs_iter, const_iterator_base const& rhs_iter) + RAJA_HOST_DEVICE friend inline difference_type + operator-(const_iterator_base const& lhs_iter, + const_iterator_base const& rhs_iter) { return lhs_iter.m_ptrptr - rhs_iter.m_ptrptr; } - RAJA_HOST_DEVICE friend inline bool operator==( - const_iterator_base const& lhs_iter, const_iterator_base const& rhs_iter) + RAJA_HOST_DEVICE friend inline bool + operator==(const_iterator_base const& lhs_iter, + const_iterator_base const& rhs_iter) { return lhs_iter.m_ptrptr == rhs_iter.m_ptrptr; } - RAJA_HOST_DEVICE friend inline bool operator<( - const_iterator_base const& lhs_iter, const_iterator_base const& rhs_iter) + RAJA_HOST_DEVICE friend inline bool + operator<(const_iterator_base const& lhs_iter, + const_iterator_base const& rhs_iter) { return lhs_iter.m_ptrptr < rhs_iter.m_ptrptr; } @@ -282,22 +288,22 @@ class WorkStorage explicit WorkStorage(allocator_type const& aloc) - : m_vec(0, aloc) - , m_aloc(aloc) - { } + : m_vec(0, aloc), m_aloc(aloc) + {} - WorkStorage(WorkStorage const&) = delete; + WorkStorage(WorkStorage const&) = delete; WorkStorage& operator=(WorkStorage const&) = delete; WorkStorage(WorkStorage&& rhs) - : m_vec(std::move(rhs.m_vec)) - , m_aloc(std::move(rhs.m_aloc)) - { } + : m_vec(std::move(rhs.m_vec)), m_aloc(std::move(rhs.m_aloc)) + {} WorkStorage& operator=(WorkStorage&& rhs) { - if (this != &rhs) { - move_assign_private(std::move(rhs), propagate_on_container_move_assignment{}); + if (this != &rhs) + { + move_assign_private(std::move(rhs), + propagate_on_container_move_assignment {}); } return *this; } @@ -312,33 +318,26 @@ class WorkStorage } // number of loops stored - size_type size() const - { - return m_vec.size(); - } + size_type size() const { return m_vec.size(); } - const_iterator begin() const - { - return const_iterator(m_vec.begin()); - } + const_iterator begin() const { return const_iterator(m_vec.begin()); } - const_iterator end() const - { - return const_iterator(m_vec.end()); - } + const_iterator end() const { return const_iterator(m_vec.end()); } // number of bytes used for storage of loops size_type storage_size() const { size_type storage_size_nbytes = 0; - for (size_t i = 0; i < m_vec.size(); ++i) { + for (size_t i = 0; i < m_vec.size(); ++i) + { storage_size_nbytes += m_vec[i].size; } return storage_size_nbytes; } - template < typename holder, typename ... holder_ctor_args > - void emplace(const dispatcher_type* dispatcher, holder_ctor_args&&... ctor_args) + template + void emplace(const dispatcher_type* dispatcher, + holder_ctor_args&&... ctor_args) { m_vec.emplace_back(create_value( dispatcher, std::forward(ctor_args)...)); @@ -347,27 +346,28 @@ class WorkStorage // destroy all stored loops, deallocates all storage void clear() { - while (!m_vec.empty()) { + while (!m_vec.empty()) + { destroy_value(m_vec.back()); m_vec.pop_back(); } m_vec.shrink_to_fit(); } - ~WorkStorage() - { - clear(); - } + ~WorkStorage() { clear(); } private: - RAJAVec> m_vec; + RAJAVec< + pointer_and_size, + typename allocator_traits_type::template rebind_alloc> + m_vec; allocator_type m_aloc; // move assignment if allocator propagates on move assignment void move_assign_private(WorkStorage&& rhs, std::true_type) { clear(); - m_vec = std::move(rhs.m_vec); + m_vec = std::move(rhs.m_vec); m_aloc = std::move(rhs.m_aloc); } @@ -375,12 +375,16 @@ class WorkStorage void move_assign_private(WorkStorage&& rhs, std::false_type) { clear(); - if (m_aloc == rhs.m_aloc) { + if (m_aloc == rhs.m_aloc) + { // take storage if allocators compare equal m_vec = std::move(rhs.m_vec); - } else { + } + else + { // allocate new storage if allocators do not compare equal - for (size_type i = 0; i < rhs.m_vec.size(); ++i) { + for (size_type i = 0; i < rhs.m_vec.size(); ++i) + { m_vec.emplace_back(move_destroy_value(std::move(rhs), rhs.m_vec[i])); } rhs.m_vec.clear(); @@ -389,7 +393,7 @@ class WorkStorage } // allocate and construct value in storage - template < typename holder, typename ... holder_ctor_args > + template pointer_and_size create_value(const dispatcher_type* dispatcher, holder_ctor_args&&... ctor_args) { @@ -401,7 +405,7 @@ class WorkStorage value_type::template construct( value_ptr, dispatcher, std::forward(ctor_args)...); - return pointer_and_size{value_ptr, value_size}; + return pointer_and_size {value_ptr, value_size}; } // allocate and move construct object as copy of other value and @@ -414,22 +418,24 @@ class WorkStorage value_type::move_destroy(value_ptr, other_value_and_size.ptr); - allocator_traits_type::deallocate(rhs.m_aloc, - reinterpret_cast(other_value_and_size.ptr), other_value_and_size.size); + allocator_traits_type::deallocate( + rhs.m_aloc, reinterpret_cast(other_value_and_size.ptr), + other_value_and_size.size); - return pointer_and_size{value_ptr, other_value_and_size.size}; + return pointer_and_size {value_ptr, other_value_and_size.size}; } // destroy and deallocate value void destroy_value(pointer_and_size value_and_size_ptr) { value_type::destroy(value_and_size_ptr.ptr); - allocator_traits_type::deallocate(m_aloc, - reinterpret_cast(value_and_size_ptr.ptr), value_and_size_ptr.size); + allocator_traits_type::deallocate( + m_aloc, reinterpret_cast(value_and_size_ptr.ptr), + value_and_size_ptr.size); } }; -template < typename ALLOCATOR_T, typename Dispatcher_T > +template class WorkStorage { using allocator_traits_type = std::allocator_traits; @@ -437,44 +443,45 @@ class WorkStorage typename allocator_traits_type::propagate_on_container_copy_assignment; using propagate_on_container_move_assignment = typename allocator_traits_type::propagate_on_container_move_assignment; - using propagate_on_container_swap = + using propagate_on_container_swap = typename allocator_traits_type::propagate_on_container_swap; - static_assert(std::is_same::value, + static_assert( + std::is_same::value, "WorkStorage expects an allocator for 'char's."); + public: - using storage_policy = RAJA::ragged_array_of_objects; + using storage_policy = RAJA::ragged_array_of_objects; using dispatcher_type = Dispatcher_T; - template < typename holder > + template using true_value_type = WorkStruct; - using value_type = GenericWorkStruct; - using allocator_type = ALLOCATOR_T; - using size_type = std::size_t; + using value_type = GenericWorkStruct; + using allocator_type = ALLOCATOR_T; + using size_type = std::size_t; using difference_type = std::ptrdiff_t; - using reference = value_type&; + using reference = value_type&; using const_reference = const value_type&; - using pointer = value_type*; - using const_pointer = const value_type*; + using pointer = value_type*; + using const_pointer = const value_type*; - // iterator base class for accessing stored WorkStructs outside of the container + // iterator base class for accessing stored WorkStructs outside of the + // container struct const_iterator_base { - using value_type = const typename WorkStorage::value_type; - using pointer = typename WorkStorage::const_pointer; - using reference = typename WorkStorage::const_reference; - using difference_type = typename WorkStorage::difference_type; + using value_type = const typename WorkStorage::value_type; + using pointer = typename WorkStorage::const_pointer; + using reference = typename WorkStorage::const_reference; + using difference_type = typename WorkStorage::difference_type; using iterator_category = std::random_access_iterator_tag; const_iterator_base(const char* array_begin, const size_type* offset_iter) - : m_array_begin(array_begin) - , m_offset_iter(offset_iter) - { } + : m_array_begin(array_begin), m_offset_iter(offset_iter) + {} RAJA_HOST_DEVICE reference operator*() const { - return *reinterpret_cast( - m_array_begin + *m_offset_iter); + return *reinterpret_cast(m_array_begin + *m_offset_iter); } RAJA_HOST_DEVICE const_iterator_base& operator+=(difference_type n) @@ -483,20 +490,23 @@ class WorkStorage return *this; } - RAJA_HOST_DEVICE friend inline difference_type operator-( - const_iterator_base const& lhs_iter, const_iterator_base const& rhs_iter) + RAJA_HOST_DEVICE friend inline difference_type + operator-(const_iterator_base const& lhs_iter, + const_iterator_base const& rhs_iter) { return lhs_iter.m_offset_iter - rhs_iter.m_offset_iter; } - RAJA_HOST_DEVICE friend inline bool operator==( - const_iterator_base const& lhs_iter, const_iterator_base const& rhs_iter) + RAJA_HOST_DEVICE friend inline bool + operator==(const_iterator_base const& lhs_iter, + const_iterator_base const& rhs_iter) { return lhs_iter.m_offset_iter == rhs_iter.m_offset_iter; } - RAJA_HOST_DEVICE friend inline bool operator<( - const_iterator_base const& lhs_iter, const_iterator_base const& rhs_iter) + RAJA_HOST_DEVICE friend inline bool + operator<(const_iterator_base const& lhs_iter, + const_iterator_base const& rhs_iter) { return lhs_iter.m_offset_iter < rhs_iter.m_offset_iter; } @@ -510,29 +520,30 @@ class WorkStorage explicit WorkStorage(allocator_type const& aloc) - : m_offsets(0, aloc) - , m_aloc(aloc) - { } + : m_offsets(0, aloc), m_aloc(aloc) + {} - WorkStorage(WorkStorage const&) = delete; + WorkStorage(WorkStorage const&) = delete; WorkStorage& operator=(WorkStorage const&) = delete; WorkStorage(WorkStorage&& rhs) - : m_offsets(std::move(rhs.m_offsets)) - , m_array_begin(rhs.m_array_begin) - , m_array_end(rhs.m_array_end) - , m_array_cap(rhs.m_array_cap) - , m_aloc(std::move(rhs.m_aloc)) + : m_offsets(std::move(rhs.m_offsets)), + m_array_begin(rhs.m_array_begin), + m_array_end(rhs.m_array_end), + m_array_cap(rhs.m_array_cap), + m_aloc(std::move(rhs.m_aloc)) { rhs.m_array_begin = nullptr; - rhs.m_array_end = nullptr; - rhs.m_array_cap = nullptr; + rhs.m_array_end = nullptr; + rhs.m_array_cap = nullptr; } WorkStorage& operator=(WorkStorage&& rhs) { - if (this != &rhs) { - move_assign_private(std::move(rhs), propagate_on_container_move_assignment{}); + if (this != &rhs) + { + move_assign_private(std::move(rhs), + propagate_on_container_move_assignment {}); } return *this; } @@ -546,10 +557,7 @@ class WorkStorage } // number of loops stored - size_type size() const - { - return m_offsets.size(); - } + size_type size() const { return m_offsets.size(); } const_iterator begin() const { @@ -562,17 +570,15 @@ class WorkStorage } // number of bytes used for storage of loops - size_type storage_size() const - { - return m_array_end - m_array_begin; - } + size_type storage_size() const { return m_array_end - m_array_begin; } - template < typename holder, typename ... holder_ctor_args > - void emplace(const dispatcher_type* dispatcher, holder_ctor_args&&... ctor_args) + template + void emplace(const dispatcher_type* dispatcher, + holder_ctor_args&&... ctor_args) { size_type value_offset = storage_size(); - size_type value_size = create_value(value_offset, - dispatcher, std::forward(ctor_args)...); + size_type value_size = create_value( + value_offset, dispatcher, std::forward(ctor_args)...); m_offsets.emplace_back(value_offset); m_array_end += value_size; } @@ -581,21 +587,22 @@ class WorkStorage void clear() { array_clear(); - if (m_array_begin != nullptr) { - allocator_traits_type::deallocate(m_aloc, m_array_begin, storage_capacity()); + if (m_array_begin != nullptr) + { + allocator_traits_type::deallocate(m_aloc, m_array_begin, + storage_capacity()); m_array_begin = nullptr; m_array_end = nullptr; m_array_cap = nullptr; } } - ~WorkStorage() - { - clear(); - } + ~WorkStorage() { clear(); } private: - RAJAVec> m_offsets; + RAJAVec> + m_offsets; char* m_array_begin = nullptr; char* m_array_end = nullptr; char* m_array_cap = nullptr; @@ -608,8 +615,8 @@ class WorkStorage m_offsets = std::move(rhs.m_offsets); m_array_begin = rhs.m_array_begin; - m_array_end = rhs.m_array_end ; - m_array_cap = rhs.m_array_cap ; + m_array_end = rhs.m_array_end; + m_array_cap = rhs.m_array_cap; m_aloc = std::move(rhs.m_aloc); rhs.m_array_begin = nullptr; @@ -621,25 +628,29 @@ class WorkStorage void move_assign_private(WorkStorage&& rhs, std::false_type) { clear(); - if (m_aloc == rhs.m_aloc) { + if (m_aloc == rhs.m_aloc) + { m_offsets = std::move(rhs.m_offsets); m_array_begin = rhs.m_array_begin; - m_array_end = rhs.m_array_end ; - m_array_cap = rhs.m_array_cap ; + m_array_end = rhs.m_array_end; + m_array_cap = rhs.m_array_cap; rhs.m_array_begin = nullptr; rhs.m_array_end = nullptr; rhs.m_array_cap = nullptr; - } else { + } + else + { array_reserve(rhs.storage_size()); - for (size_type i = 0; i < rhs.size(); ++i) { + for (size_type i = 0; i < rhs.size(); ++i) + { m_array_end = m_array_begin + rhs.m_offsets[i]; move_destroy_value(m_array_end, rhs.m_array_begin + rhs.m_offsets[i]); m_offsets.emplace_back(rhs.m_offsets[i]); } - m_array_end = m_array_begin + rhs.storage_size(); + m_array_end = m_array_begin + rhs.storage_size(); rhs.m_array_end = rhs.m_array_begin; rhs.m_offsets.clear(); rhs.clear(); @@ -647,46 +658,45 @@ class WorkStorage } // get loop storage capacity, used and unused in bytes - size_type storage_capacity() const - { - return m_array_cap - m_array_begin; - } + size_type storage_capacity() const { return m_array_cap - m_array_begin; } // get unused loop storage capacity in bytes - size_type storage_unused() const - { - return m_array_cap - m_array_end; - } + size_type storage_unused() const { return m_array_cap - m_array_end; } // reserve space for loop_storage_size bytes of loop storage void array_reserve(size_type loop_storage_size) { - if (loop_storage_size > storage_capacity()) { + if (loop_storage_size > storage_capacity()) + { char* new_array_begin = allocator_traits_type::allocate(m_aloc, loop_storage_size); - char* new_array_end = new_array_begin + storage_size(); - char* new_array_cap = new_array_begin + loop_storage_size; + char* new_array_end = new_array_begin + storage_size(); + char* new_array_cap = new_array_begin + loop_storage_size; - for (size_type i = 0; i < size(); ++i) { + for (size_type i = 0; i < size(); ++i) + { move_destroy_value(new_array_begin + m_offsets[i], - m_array_begin + m_offsets[i]); + m_array_begin + m_offsets[i]); } - if (m_array_begin != nullptr) { - allocator_traits_type::deallocate(m_aloc, m_array_begin, storage_capacity()); + if (m_array_begin != nullptr) + { + allocator_traits_type::deallocate(m_aloc, m_array_begin, + storage_capacity()); } m_array_begin = new_array_begin; - m_array_end = new_array_end ; - m_array_cap = new_array_cap ; + m_array_end = new_array_end; + m_array_cap = new_array_cap; } } // destroy loop objects (does not deallocate array storage) void array_clear() { - while (!m_offsets.empty()) { + while (!m_offsets.empty()) + { destroy_value(m_offsets.back()); m_array_end = m_array_begin + m_offsets.back(); m_offsets.pop_back(); @@ -696,15 +706,17 @@ class WorkStorage // ensure there is enough storage to hold the next loop body at value offset // and store the loop body - template < typename holder, typename ... holder_ctor_args > + template size_type create_value(size_type value_offset, const dispatcher_type* dispatcher, holder_ctor_args&&... ctor_args) { const size_type value_size = sizeof(true_value_type); - if (value_size > storage_unused()) { - array_reserve(std::max(storage_size() + value_size, 2*storage_capacity())); + if (value_size > storage_unused()) + { + array_reserve( + std::max(storage_size() + value_size, 2 * storage_capacity())); } pointer value_ptr = reinterpret_cast(m_array_begin + value_offset); @@ -726,13 +738,12 @@ class WorkStorage // destroy the loop body at value offset void destroy_value(size_type value_offset) { - pointer value_ptr = - reinterpret_cast(m_array_begin + value_offset); + pointer value_ptr = reinterpret_cast(m_array_begin + value_offset); value_type::destroy(value_ptr); } }; -template < typename ALLOCATOR_T, typename Dispatcher_T > +template class WorkStorage @@ -742,39 +753,41 @@ class WorkStorage::value, + static_assert( + std::is_same::value, "WorkStorage expects an allocator for 'char's."); + public: - using storage_policy = RAJA::constant_stride_array_of_objects; + using storage_policy = RAJA::constant_stride_array_of_objects; using dispatcher_type = Dispatcher_T; - template < typename holder > + template using true_value_type = WorkStruct; - using value_type = GenericWorkStruct; - using allocator_type = ALLOCATOR_T; - using size_type = std::size_t; + using value_type = GenericWorkStruct; + using allocator_type = ALLOCATOR_T; + using size_type = std::size_t; using difference_type = std::ptrdiff_t; - using reference = value_type&; + using reference = value_type&; using const_reference = const value_type&; - using pointer = value_type*; - using const_pointer = const value_type*; + using pointer = value_type*; + using const_pointer = const value_type*; - // iterator base class for accessing stored WorkStructs outside of the container + // iterator base class for accessing stored WorkStructs outside of the + // container struct const_iterator_base { - using value_type = const typename WorkStorage::value_type; - using pointer = typename WorkStorage::const_pointer; - using reference = typename WorkStorage::const_reference; - using difference_type = typename WorkStorage::difference_type; + using value_type = const typename WorkStorage::value_type; + using pointer = typename WorkStorage::const_pointer; + using reference = typename WorkStorage::const_reference; + using difference_type = typename WorkStorage::difference_type; using iterator_category = std::random_access_iterator_tag; const_iterator_base(const char* array_pos, size_type stride) - : m_array_pos(array_pos) - , m_stride(stride) - { } + : m_array_pos(array_pos), m_stride(stride) + {} RAJA_HOST_DEVICE reference operator*() const { @@ -787,20 +800,23 @@ class WorkStorage; - explicit WorkStorage(allocator_type const& aloc) - : m_aloc(aloc) - { } + explicit WorkStorage(allocator_type const& aloc) : m_aloc(aloc) {} - WorkStorage(WorkStorage const&) = delete; + WorkStorage(WorkStorage const&) = delete; WorkStorage& operator=(WorkStorage const&) = delete; WorkStorage(WorkStorage&& rhs) - : m_aloc(std::move(rhs.m_aloc)) - , m_stride(rhs.m_stride) - , m_array_begin(rhs.m_array_begin) - , m_array_end(rhs.m_array_end) - , m_array_cap(rhs.m_array_cap) + : m_aloc(std::move(rhs.m_aloc)), + m_stride(rhs.m_stride), + m_array_begin(rhs.m_array_begin), + m_array_end(rhs.m_array_end), + m_array_cap(rhs.m_array_cap) { // do not reset stride, leave it for reuse rhs.m_array_begin = nullptr; @@ -835,8 +849,10 @@ class WorkStorage - void emplace(const dispatcher_type* dispatcher, holder_ctor_args&&... ctor_args) + template + void emplace(const dispatcher_type* dispatcher, + holder_ctor_args&&... ctor_args) { - create_value(dispatcher, std::forward(ctor_args)...); + create_value(dispatcher, + std::forward(ctor_args)...); m_array_end += m_stride; } @@ -883,22 +892,21 @@ class WorkStorage storage_capacity() || new_stride > m_stride) { + if (loop_storage_size > storage_capacity() || new_stride > m_stride) + { char* new_array_begin = allocator_traits_type::allocate(m_aloc, loop_storage_size); - char* new_array_end = new_array_begin + size() * new_stride; - char* new_array_cap = new_array_begin + loop_storage_size; + char* new_array_end = new_array_begin + size() * new_stride; + char* new_array_cap = new_array_begin + loop_storage_size; - for (size_type i = 0; i < size(); ++i) { + for (size_type i = 0; i < size(); ++i) + { move_destroy_value(new_array_begin + i * new_stride, - m_array_begin + i * m_stride); + m_array_begin + i * m_stride); } - if (m_array_begin != nullptr) { - allocator_traits_type::deallocate(m_aloc, m_array_begin, storage_capacity()); + if (m_array_begin != nullptr) + { + allocator_traits_type::deallocate(m_aloc, m_array_begin, + storage_capacity()); } - m_stride = new_stride ; + m_stride = new_stride; m_array_begin = new_array_begin; - m_array_end = new_array_end ; - m_array_cap = new_array_cap ; + m_array_end = new_array_end; + m_array_cap = new_array_cap; } } // destroy the loops in storage (does not deallocate loop storage) void array_clear() { - for (size_type value_offset = storage_size(); value_offset > 0; value_offset -= m_stride) { + for (size_type value_offset = storage_size(); value_offset > 0; + value_offset -= m_stride) + { destroy_value(value_offset - m_stride); m_array_end -= m_stride; } @@ -1002,18 +1014,20 @@ class WorkStorage + template void create_value(const dispatcher_type* dispatcher, holder_ctor_args&&... ctor_args) { const size_type value_size = sizeof(true_value_type); - if (value_size > storage_unused() && value_size <= m_stride) { - array_reserve(std::max(storage_size() + m_stride, 2*storage_capacity()), + if (value_size > storage_unused() && value_size <= m_stride) + { + array_reserve(std::max(storage_size() + m_stride, 2 * storage_capacity()), m_stride); - } else if (value_size > m_stride) { - array_reserve((size()+1)*value_size, - value_size); + } + else if (value_size > m_stride) + { + array_reserve((size() + 1) * value_size, value_size); } size_type value_offset = storage_size(); @@ -1025,8 +1039,7 @@ class WorkStorage(value_ptr), reinterpret_cast(other_value_ptr)); @@ -1035,8 +1048,7 @@ class WorkStorage(m_array_begin + value_offset); + pointer value_ptr = reinterpret_cast(m_array_begin + value_offset); value_type::destroy(value_ptr); } }; diff --git a/include/RAJA/pattern/WorkGroup/WorkStruct.hpp b/include/RAJA/pattern/WorkGroup/WorkStruct.hpp index 72e1540c54..90792d4037 100644 --- a/include/RAJA/pattern/WorkGroup/WorkStruct.hpp +++ b/include/RAJA/pattern/WorkGroup/WorkStruct.hpp @@ -35,7 +35,7 @@ namespace detail /*! * A struct that gives a generic way to layout memory for different loops */ -template < size_t size, typename Dispatcher_T > +template struct WorkStruct; /*! @@ -44,67 +44,75 @@ struct WorkStruct; * offsetof(GenericWorkStruct<>, obj) == offsetof(WorkStruct, obj) * sizeof(GenericWorkStruct) <= sizeof(WorkStruct) */ -template < typename Dispatcher_T > +template using GenericWorkStruct = WorkStruct; -template < size_t size, Platform platform, typename dispatch_policy, typename DispatcherID, typename ... CallArgs > -struct WorkStruct> +template +struct WorkStruct< + size, + Dispatcher> { - using dispatcher_type = Dispatcher; + using dispatcher_type = + Dispatcher; // construct a WorkStruct with a value of type holder from the args and // check a variety of constraints at compile time - template < typename holder, typename ... holder_ctor_args > - static RAJA_INLINE - void construct(void* ptr, const dispatcher_type* dispatcher, holder_ctor_args&&... ctor_args) + template + static RAJA_INLINE void construct(void* ptr, + const dispatcher_type* dispatcher, + holder_ctor_args&&... ctor_args) { using true_value_type = WorkStruct; - using value_type = GenericWorkStruct; + using value_type = GenericWorkStruct; static_assert(sizeof(holder) <= sizeof(true_value_type::obj), - "holder must fit in WorkStruct::obj"); + "holder must fit in WorkStruct::obj"); static_assert(std::is_standard_layout::value, - "WorkStruct must be a standard layout type"); + "WorkStruct must be a standard layout type"); static_assert(std::is_standard_layout::value, - "GenericWorkStruct must be a standard layout type"); - static_assert(offsetof(value_type, obj) == offsetof(true_value_type, obj), + "GenericWorkStruct must be a standard layout type"); + static_assert( + offsetof(value_type, obj) == offsetof(true_value_type, obj), "WorkStruct and GenericWorkStruct must have obj at the same offset"); static_assert(sizeof(value_type) <= sizeof(true_value_type), - "WorkStruct must not be smaller than GenericWorkStruct"); + "WorkStruct must not be smaller than GenericWorkStruct"); true_value_type* value_ptr = static_cast(ptr); value_ptr->dispatcher = dispatcher; - value_ptr->invoke = dispatcher->invoke; - new(&value_ptr->obj) holder(std::forward(ctor_args)...); + value_ptr->invoke = dispatcher->invoke; + new (&value_ptr->obj) holder(std::forward(ctor_args)...); } // move construct in dst from the value in src and destroy the value in src - static RAJA_INLINE - void move_destroy(WorkStruct* value_dst, - WorkStruct* value_src) + static RAJA_INLINE void move_destroy(WorkStruct* value_dst, + WorkStruct* value_src) { value_dst->dispatcher = value_src->dispatcher; - value_dst->invoke = value_src->invoke; - value_dst->dispatcher->move_construct_destroy(&value_dst->obj, &value_src->obj); + value_dst->invoke = value_src->invoke; + value_dst->dispatcher->move_construct_destroy(&value_dst->obj, + &value_src->obj); } // destroy the value ptr - static RAJA_INLINE - void destroy(WorkStruct* value_ptr) + static RAJA_INLINE void destroy(WorkStruct* value_ptr) { value_ptr->dispatcher->destroy(&value_ptr->obj); } // invoke the call operator of the value ptr with args - static RAJA_INLINE - void host_call(const WorkStruct* value_ptr, CallArgs... args) + static RAJA_INLINE void host_call(const WorkStruct* value_ptr, + CallArgs... args) { value_ptr->invoke(&value_ptr->obj, std::forward(args)...); } /// // invoke the call operator of the value ptr with args - static RAJA_DEVICE RAJA_INLINE - void device_call(const WorkStruct* value_ptr, CallArgs... args) + static RAJA_DEVICE RAJA_INLINE void device_call(const WorkStruct* value_ptr, + CallArgs... args) { value_ptr->invoke(&value_ptr->obj, std::forward(args)...); } diff --git a/include/RAJA/pattern/atomic.hpp b/include/RAJA/pattern/atomic.hpp index d5905f7928..d56c576710 100644 --- a/include/RAJA/pattern/atomic.hpp +++ b/include/RAJA/pattern/atomic.hpp @@ -87,9 +87,9 @@ namespace RAJA */ RAJA_SUPPRESS_HD_WARN template -RAJA_INLINE RAJA_HOST_DEVICE T atomicLoad(T *acc) +RAJA_INLINE RAJA_HOST_DEVICE T atomicLoad(T* acc) { - return RAJA::atomicLoad(Policy{}, acc); + return RAJA::atomicLoad(Policy {}, acc); } @@ -100,9 +100,9 @@ RAJA_INLINE RAJA_HOST_DEVICE T atomicLoad(T *acc) */ RAJA_SUPPRESS_HD_WARN template -RAJA_INLINE RAJA_HOST_DEVICE void atomicStore(T *acc, T value) +RAJA_INLINE RAJA_HOST_DEVICE void atomicStore(T* acc, T value) { - RAJA::atomicStore(Policy{}, acc, value); + RAJA::atomicStore(Policy {}, acc, value); } @@ -114,9 +114,9 @@ RAJA_INLINE RAJA_HOST_DEVICE void atomicStore(T *acc, T value) */ RAJA_SUPPRESS_HD_WARN template -RAJA_INLINE RAJA_HOST_DEVICE T atomicAdd(T *acc, T value) +RAJA_INLINE RAJA_HOST_DEVICE T atomicAdd(T* acc, T value) { - return RAJA::atomicAdd(Policy{}, acc, value); + return RAJA::atomicAdd(Policy {}, acc, value); } @@ -128,9 +128,9 @@ RAJA_INLINE RAJA_HOST_DEVICE T atomicAdd(T *acc, T value) */ RAJA_SUPPRESS_HD_WARN template -RAJA_INLINE RAJA_HOST_DEVICE T atomicSub(T *acc, T value) +RAJA_INLINE RAJA_HOST_DEVICE T atomicSub(T* acc, T value) { - return RAJA::atomicSub(Policy{}, acc, value); + return RAJA::atomicSub(Policy {}, acc, value); } @@ -142,9 +142,9 @@ RAJA_INLINE RAJA_HOST_DEVICE T atomicSub(T *acc, T value) */ RAJA_SUPPRESS_HD_WARN template -RAJA_INLINE RAJA_HOST_DEVICE T atomicMin(T *acc, T value) +RAJA_INLINE RAJA_HOST_DEVICE T atomicMin(T* acc, T value) { - return RAJA::atomicMin(Policy{}, acc, value); + return RAJA::atomicMin(Policy {}, acc, value); } @@ -156,9 +156,9 @@ RAJA_INLINE RAJA_HOST_DEVICE T atomicMin(T *acc, T value) */ RAJA_SUPPRESS_HD_WARN template -RAJA_INLINE RAJA_HOST_DEVICE T atomicMax(T *acc, T value) +RAJA_INLINE RAJA_HOST_DEVICE T atomicMax(T* acc, T value) { - return RAJA::atomicMax(Policy{}, acc, value); + return RAJA::atomicMax(Policy {}, acc, value); } @@ -169,9 +169,9 @@ RAJA_INLINE RAJA_HOST_DEVICE T atomicMax(T *acc, T value) */ RAJA_SUPPRESS_HD_WARN template -RAJA_INLINE RAJA_HOST_DEVICE T atomicInc(T *acc) +RAJA_INLINE RAJA_HOST_DEVICE T atomicInc(T* acc) { - return RAJA::atomicInc(Policy{}, acc); + return RAJA::atomicInc(Policy {}, acc); } @@ -185,9 +185,9 @@ RAJA_INLINE RAJA_HOST_DEVICE T atomicInc(T *acc) */ RAJA_SUPPRESS_HD_WARN template -RAJA_INLINE RAJA_HOST_DEVICE T atomicInc(T *acc, T compare) +RAJA_INLINE RAJA_HOST_DEVICE T atomicInc(T* acc, T compare) { - return RAJA::atomicInc(Policy{}, acc, compare); + return RAJA::atomicInc(Policy {}, acc, compare); } @@ -198,9 +198,9 @@ RAJA_INLINE RAJA_HOST_DEVICE T atomicInc(T *acc, T compare) */ RAJA_SUPPRESS_HD_WARN template -RAJA_INLINE RAJA_HOST_DEVICE T atomicDec(T *acc) +RAJA_INLINE RAJA_HOST_DEVICE T atomicDec(T* acc) { - return RAJA::atomicDec(Policy{}, acc); + return RAJA::atomicDec(Policy {}, acc); } @@ -214,9 +214,9 @@ RAJA_INLINE RAJA_HOST_DEVICE T atomicDec(T *acc) */ RAJA_SUPPRESS_HD_WARN template -RAJA_INLINE RAJA_HOST_DEVICE T atomicDec(T *acc, T compare) +RAJA_INLINE RAJA_HOST_DEVICE T atomicDec(T* acc, T compare) { - return RAJA::atomicDec(Policy{}, acc, compare); + return RAJA::atomicDec(Policy {}, acc, compare); } @@ -229,11 +229,11 @@ RAJA_INLINE RAJA_HOST_DEVICE T atomicDec(T *acc, T compare) */ RAJA_SUPPRESS_HD_WARN template -RAJA_INLINE RAJA_HOST_DEVICE T atomicAnd(T *acc, T value) +RAJA_INLINE RAJA_HOST_DEVICE T atomicAnd(T* acc, T value) { static_assert(std::is_integral::value, "atomicAnd can only be used on integral types"); - return RAJA::atomicAnd(Policy{}, acc, value); + return RAJA::atomicAnd(Policy {}, acc, value); } @@ -246,11 +246,11 @@ RAJA_INLINE RAJA_HOST_DEVICE T atomicAnd(T *acc, T value) */ RAJA_SUPPRESS_HD_WARN template -RAJA_INLINE RAJA_HOST_DEVICE T atomicOr(T *acc, T value) +RAJA_INLINE RAJA_HOST_DEVICE T atomicOr(T* acc, T value) { static_assert(std::is_integral::value, "atomicOr can only be used on integral types"); - return RAJA::atomicOr(Policy{}, acc, value); + return RAJA::atomicOr(Policy {}, acc, value); } @@ -263,11 +263,11 @@ RAJA_INLINE RAJA_HOST_DEVICE T atomicOr(T *acc, T value) */ RAJA_SUPPRESS_HD_WARN template -RAJA_INLINE RAJA_HOST_DEVICE T atomicXor(T *acc, T value) +RAJA_INLINE RAJA_HOST_DEVICE T atomicXor(T* acc, T value) { static_assert(std::is_integral::value, "atomicXor can only be used on integral types"); - return RAJA::atomicXor(Policy{}, acc, value); + return RAJA::atomicXor(Policy {}, acc, value); } @@ -279,9 +279,9 @@ RAJA_INLINE RAJA_HOST_DEVICE T atomicXor(T *acc, T value) */ RAJA_SUPPRESS_HD_WARN template -RAJA_INLINE RAJA_HOST_DEVICE T atomicExchange(T *acc, T value) +RAJA_INLINE RAJA_HOST_DEVICE T atomicExchange(T* acc, T value) { - return RAJA::atomicExchange(Policy{}, acc, value); + return RAJA::atomicExchange(Policy {}, acc, value); } @@ -295,9 +295,9 @@ RAJA_INLINE RAJA_HOST_DEVICE T atomicExchange(T *acc, T value) RAJA_SUPPRESS_HD_WARN template -RAJA_INLINE RAJA_HOST_DEVICE T atomicCAS(T *acc, T compare, T value) +RAJA_INLINE RAJA_HOST_DEVICE T atomicCAS(T* acc, T compare, T value) { - return RAJA::atomicCAS(Policy{}, acc, compare, value); + return RAJA::atomicCAS(Policy {}, acc, compare, value); } /*! @@ -317,22 +317,18 @@ class AtomicRef RAJA_INLINE RAJA_HOST_DEVICE - constexpr explicit AtomicRef(value_type *value_ptr) - : m_value_ptr(value_ptr) {} + constexpr explicit AtomicRef(value_type* value_ptr) : m_value_ptr(value_ptr) + {} RAJA_INLINE RAJA_HOST_DEVICE - constexpr AtomicRef(AtomicRef const &c) - : m_value_ptr(c.m_value_ptr) {} + constexpr AtomicRef(AtomicRef const& c) : m_value_ptr(c.m_value_ptr) {} AtomicRef& operator=(AtomicRef const&) = delete; RAJA_INLINE RAJA_HOST_DEVICE - value_type * getPointer() const - { - return m_value_ptr; - } + value_type* getPointer() const { return m_value_ptr; } RAJA_INLINE RAJA_HOST_DEVICE @@ -351,17 +347,11 @@ class AtomicRef RAJA_INLINE RAJA_HOST_DEVICE - value_type load() const - { - return RAJA::atomicLoad(m_value_ptr); - } + value_type load() const { return RAJA::atomicLoad(m_value_ptr); } RAJA_INLINE RAJA_HOST_DEVICE - operator value_type() const - { - return RAJA::atomicLoad(m_value_ptr); - } + operator value_type() const { return RAJA::atomicLoad(m_value_ptr); } RAJA_INLINE RAJA_HOST_DEVICE @@ -382,10 +372,13 @@ class AtomicRef bool compare_exchange_strong(value_type& expect, value_type rhs) const { value_type compare = expect; - value_type old = RAJA::atomicCAS(m_value_ptr, compare, rhs); - if (compare == old) { + value_type old = RAJA::atomicCAS(m_value_ptr, compare, rhs); + if (compare == old) + { return true; - } else { + } + else + { expect = old; return false; } @@ -527,7 +520,7 @@ class AtomicRef } private: - value_type *m_value_ptr; + value_type* m_value_ptr; }; diff --git a/include/RAJA/pattern/detail/algorithm.hpp b/include/RAJA/pattern/detail/algorithm.hpp index 21d266bd21..0a5521e0e3 100644 --- a/include/RAJA/pattern/detail/algorithm.hpp +++ b/include/RAJA/pattern/detail/algorithm.hpp @@ -49,16 +49,17 @@ using ContainerVal = camp::decay>())>; template -using ContainerRef = - decltype(*camp::val>()); +using ContainerRef = decltype(*camp::val>()); template using ContainerDiff = - camp::decay>()-camp::val>())>; + camp::decay>() - + camp::val>())>; template -RAJA_INLINE -DiffType firstIndex(DiffType n, CountType num_threads, CountType thread_id) +RAJA_INLINE DiffType firstIndex(DiffType n, + CountType num_threads, + CountType thread_id) { return (static_cast(n) * thread_id) / num_threads; } @@ -70,9 +71,7 @@ DiffType firstIndex(DiffType n, CountType num_threads, CountType thread_id) \brief swap values at iterators lhs and rhs */ template -RAJA_HOST_DEVICE RAJA_INLINE -void -safe_iter_swap(Iter lhs, Iter rhs) +RAJA_HOST_DEVICE RAJA_INLINE void safe_iter_swap(Iter lhs, Iter rhs) { #ifdef RAJA_GPU_DEVICE_COMPILE_PASS_ACTIVE using camp::safe_swap; @@ -87,9 +86,7 @@ safe_iter_swap(Iter lhs, Iter rhs) \brief returns iterator to next item */ template -RAJA_HOST_DEVICE RAJA_INLINE -Iter -next(Iter it) +RAJA_HOST_DEVICE RAJA_INLINE Iter next(Iter it) { ++it; return it; @@ -99,9 +96,7 @@ next(Iter it) \brief returns iterator to next item */ template -RAJA_HOST_DEVICE RAJA_INLINE -Iter -prev(Iter it) +RAJA_HOST_DEVICE RAJA_INLINE Iter prev(Iter it) { --it; return it; diff --git a/include/RAJA/pattern/detail/forall.hpp b/include/RAJA/pattern/detail/forall.hpp index 3bd5d7ecaf..aa9a3ac888 100644 --- a/include/RAJA/pattern/detail/forall.hpp +++ b/include/RAJA/pattern/detail/forall.hpp @@ -19,12 +19,12 @@ #ifndef RAJA_PATTERN_DETAIL_FORALL_HPP #define RAJA_PATTERN_DETAIL_FORALL_HPP -#define RAJA_EXTRACT_BED_SUFFIXED(CONTAINER, SUFFIX) \ - using std::begin; \ - using std::end; \ - using std::distance; \ - auto begin##SUFFIX = begin(CONTAINER); \ - auto end##SUFFIX = end(CONTAINER); \ +#define RAJA_EXTRACT_BED_SUFFIXED(CONTAINER, SUFFIX) \ + using std::begin; \ + using std::end; \ + using std::distance; \ + auto begin##SUFFIX = begin(CONTAINER); \ + auto end##SUFFIX = end(CONTAINER); \ auto distance##SUFFIX = distance(begin##SUFFIX, end##SUFFIX) #define RAJA_EXTRACT_BED_IT(CONTAINER) RAJA_EXTRACT_BED_SUFFIXED(CONTAINER, _it) diff --git a/include/RAJA/pattern/detail/multi_reduce.hpp b/include/RAJA/pattern/detail/multi_reduce.hpp index 884b9aa989..14b655475b 100644 --- a/include/RAJA/pattern/detail/multi_reduce.hpp +++ b/include/RAJA/pattern/detail/multi_reduce.hpp @@ -26,32 +26,29 @@ #include "RAJA/util/RepeatView.hpp" -#define RAJA_DECLARE_MULTI_REDUCER(OP_NAME, OP, POL, DATA) \ - template \ - struct MultiReduce##OP_NAME, T> \ - : reduce::detail::BaseMultiReduce##OP_NAME< \ - DATA, tuning>> \ - { \ - using policy = POL; \ - using Base = reduce::detail::BaseMultiReduce##OP_NAME< \ - DATA, tuning>>; \ - using Base::Base; \ - using typename Base::value_type; \ - using typename Base::reference; \ - \ - RAJA_SUPPRESS_HD_WARN \ - RAJA_HOST_DEVICE \ - reference operator[](size_t bin) const \ - { \ - return reference(*this, bin); \ - } \ +#define RAJA_DECLARE_MULTI_REDUCER(OP_NAME, OP, POL, DATA) \ + template \ + struct MultiReduce##OP_NAME, T> \ + : reduce::detail::BaseMultiReduce##OP_NAME< \ + DATA, tuning>> \ + { \ + using policy = POL; \ + using Base = reduce::detail::BaseMultiReduce##OP_NAME< \ + DATA, tuning>>; \ + using Base::Base; \ + using typename Base::value_type; \ + using typename Base::reference; \ + \ + RAJA_SUPPRESS_HD_WARN \ + RAJA_HOST_DEVICE \ + reference operator[](size_t bin) const { return reference(*this, bin); } \ }; -#define RAJA_DECLARE_ALL_MULTI_REDUCERS(POL, DATA) \ - RAJA_DECLARE_MULTI_REDUCER(Sum, sum, POL, DATA) \ - RAJA_DECLARE_MULTI_REDUCER(Min, min, POL, DATA) \ - RAJA_DECLARE_MULTI_REDUCER(Max, max, POL, DATA) \ - RAJA_DECLARE_MULTI_REDUCER(BitOr, or_bit, POL, DATA) \ +#define RAJA_DECLARE_ALL_MULTI_REDUCERS(POL, DATA) \ + RAJA_DECLARE_MULTI_REDUCER(Sum, sum, POL, DATA) \ + RAJA_DECLARE_MULTI_REDUCER(Min, min, POL, DATA) \ + RAJA_DECLARE_MULTI_REDUCER(Max, max, POL, DATA) \ + RAJA_DECLARE_MULTI_REDUCER(BitOr, or_bit, POL, DATA) \ RAJA_DECLARE_MULTI_REDUCER(BitAnd, and_bit, POL, DATA) namespace RAJA @@ -67,32 +64,37 @@ template struct BaseMultiReduce { using MultiReduceData = t_MultiReduceData; - using MultiReduceOp = typename t_MultiReduceData::MultiReduceOp; - using value_type = typename t_MultiReduceData::value_type; + using MultiReduceOp = typename t_MultiReduceData::MultiReduceOp; + using value_type = typename t_MultiReduceData::value_type; - BaseMultiReduce() : BaseMultiReduce{RepeatView(MultiReduceOp::identity(), 0)} {} + BaseMultiReduce() + : BaseMultiReduce {RepeatView(MultiReduceOp::identity(), 0)} + {} explicit BaseMultiReduce(size_t num_bins, value_type init_val = MultiReduceOp::identity(), value_type identity = MultiReduceOp::identity()) - : BaseMultiReduce{RepeatView(init_val, num_bins), identity} - { } - - template < typename Container, - concepts::enable_if_t, - concepts::negate>, - concepts::negate>>* = nullptr > + : BaseMultiReduce {RepeatView(init_val, num_bins), identity} + {} + + template < + typename Container, + concepts::enable_if_t< + type_traits::is_range, + concepts::negate>, + concepts::negate>>* = + nullptr> explicit BaseMultiReduce(Container const& container, value_type identity = MultiReduceOp::identity()) - : data{container, identity} - { } + : data {container, identity} + {} RAJA_SUPPRESS_HD_WARN BaseMultiReduce(BaseMultiReduce const&) = default; RAJA_SUPPRESS_HD_WARN - BaseMultiReduce(BaseMultiReduce &&) = default; - BaseMultiReduce &operator=(BaseMultiReduce const&) = delete; - BaseMultiReduce &operator=(BaseMultiReduce &&) = delete; + BaseMultiReduce(BaseMultiReduce&&) = default; + BaseMultiReduce& operator=(BaseMultiReduce const&) = delete; + BaseMultiReduce& operator=(BaseMultiReduce&&) = delete; RAJA_SUPPRESS_HD_WARN ~BaseMultiReduce() = default; @@ -108,13 +110,14 @@ struct BaseMultiReduce reset(RepeatView(init_val, num_bins), identity); } - template < typename Container, - concepts::enable_if_t>* = nullptr > + template >* = nullptr> void reset(Container const& container, value_type identity = MultiReduceOp::identity()) { - for (size_t bin = 0; bin < data.num_bins(); ++bin) { - RAJA_UNUSED_VAR(get(bin)); // automatic get() before reset + for (size_t bin = 0; bin < data.num_bins(); ++bin) + { + RAJA_UNUSED_VAR(get(bin)); // automatic get() before reset } data.reset(container, identity); } @@ -125,7 +128,7 @@ struct BaseMultiReduce RAJA_SUPPRESS_HD_WARN RAJA_HOST_DEVICE - BaseMultiReduce const& combine(size_t bin, value_type const &other) const + BaseMultiReduce const& combine(size_t bin, value_type const& other) const { data.combine(bin, other); return *this; @@ -135,16 +138,19 @@ struct BaseMultiReduce value_type get(size_t bin) const { return data.get(bin); } //! Get the calculated reduced value for each bin and store it in container - template < typename Container, - concepts::enable_if_t>* = nullptr > + template >* = nullptr> void get_all(Container& container) const { RAJA_EXTRACT_BED_IT(container); - if (size_t(distance_it) != data.num_bins()) { - RAJA_ABORT_OR_THROW("MultiReduce::get_all container has different size than multi reducer"); + if (size_t(distance_it) != data.num_bins()) + { + RAJA_ABORT_OR_THROW("MultiReduce::get_all container has different size " + "than multi reducer"); } size_t bin = 0; - for (auto& val : container) { + for (auto& val : container) + { val = data.get(bin); ++bin; } @@ -167,17 +173,17 @@ class BaseMultiReduceMin : public BaseMultiReduce { public: using Base = BaseMultiReduce; - using typename Base::value_type; using Base::Base; + using typename Base::value_type; RAJA_SUPPRESS_HD_WARN BaseMultiReduceMin(BaseMultiReduceMin const&) = default; RAJA_SUPPRESS_HD_WARN - BaseMultiReduceMin(BaseMultiReduceMin &&) = default; + BaseMultiReduceMin(BaseMultiReduceMin&&) = default; RAJA_SUPPRESS_HD_WARN - BaseMultiReduceMin &operator=(BaseMultiReduceMin const&) = delete; + BaseMultiReduceMin& operator=(BaseMultiReduceMin const&) = delete; RAJA_SUPPRESS_HD_WARN - BaseMultiReduceMin &operator=(BaseMultiReduceMin &&) = delete; + BaseMultiReduceMin& operator=(BaseMultiReduceMin&&) = delete; RAJA_SUPPRESS_HD_WARN ~BaseMultiReduceMin() = default; @@ -185,8 +191,8 @@ class BaseMultiReduceMin : public BaseMultiReduce { RAJA_HOST_DEVICE reference(BaseMultiReduceMin const& base, size_t bin) - : m_base(base), m_bin(bin) - { } + : m_base(base), m_bin(bin) + {} //! reducer function; updates the current instance's state RAJA_HOST_DEVICE @@ -196,10 +202,7 @@ class BaseMultiReduceMin : public BaseMultiReduce return *this; } - value_type get() const - { - return m_base.get(m_bin); - } + value_type get() const { return m_base.get(m_bin); } private: BaseMultiReduceMin const& m_base; @@ -226,9 +229,9 @@ class BaseMultiReduceMax : public BaseMultiReduce RAJA_SUPPRESS_HD_WARN BaseMultiReduceMax(BaseMultiReduceMax const&) = default; RAJA_SUPPRESS_HD_WARN - BaseMultiReduceMax(BaseMultiReduceMax &&) = default; - BaseMultiReduceMax &operator=(BaseMultiReduceMax const&) = delete; - BaseMultiReduceMax &operator=(BaseMultiReduceMax &&) = delete; + BaseMultiReduceMax(BaseMultiReduceMax&&) = default; + BaseMultiReduceMax& operator=(BaseMultiReduceMax const&) = delete; + BaseMultiReduceMax& operator=(BaseMultiReduceMax&&) = delete; RAJA_SUPPRESS_HD_WARN ~BaseMultiReduceMax() = default; @@ -236,8 +239,8 @@ class BaseMultiReduceMax : public BaseMultiReduce { RAJA_HOST_DEVICE reference(BaseMultiReduceMax const& base, size_t bin) - : m_base(base), m_bin(bin) - { } + : m_base(base), m_bin(bin) + {} //! reducer function; updates the current instance's state RAJA_HOST_DEVICE @@ -247,10 +250,7 @@ class BaseMultiReduceMax : public BaseMultiReduce return *this; } - value_type get() const - { - return m_base.get(m_bin); - } + value_type get() const { return m_base.get(m_bin); } private: BaseMultiReduceMax const& m_base; @@ -277,9 +277,9 @@ class BaseMultiReduceSum : public BaseMultiReduce RAJA_SUPPRESS_HD_WARN BaseMultiReduceSum(BaseMultiReduceSum const&) = default; RAJA_SUPPRESS_HD_WARN - BaseMultiReduceSum(BaseMultiReduceSum &&) = default; - BaseMultiReduceSum &operator=(BaseMultiReduceSum const&) = delete; - BaseMultiReduceSum &operator=(BaseMultiReduceSum &&) = delete; + BaseMultiReduceSum(BaseMultiReduceSum&&) = default; + BaseMultiReduceSum& operator=(BaseMultiReduceSum const&) = delete; + BaseMultiReduceSum& operator=(BaseMultiReduceSum&&) = delete; RAJA_SUPPRESS_HD_WARN ~BaseMultiReduceSum() = default; @@ -287,8 +287,8 @@ class BaseMultiReduceSum : public BaseMultiReduce { RAJA_HOST_DEVICE reference(BaseMultiReduceSum const& base, size_t bin) - : m_base(base), m_bin(bin) - { } + : m_base(base), m_bin(bin) + {} //! reducer function; updates the current instance's state RAJA_HOST_DEVICE @@ -298,10 +298,7 @@ class BaseMultiReduceSum : public BaseMultiReduce return *this; } - value_type get() const - { - return m_base.get(m_bin); - } + value_type get() const { return m_base.get(m_bin); } private: BaseMultiReduceSum const& m_base; @@ -328,9 +325,9 @@ class BaseMultiReduceBitOr : public BaseMultiReduce RAJA_SUPPRESS_HD_WARN BaseMultiReduceBitOr(BaseMultiReduceBitOr const&) = default; RAJA_SUPPRESS_HD_WARN - BaseMultiReduceBitOr(BaseMultiReduceBitOr &&) = default; - BaseMultiReduceBitOr &operator=(BaseMultiReduceBitOr const&) = delete; - BaseMultiReduceBitOr &operator=(BaseMultiReduceBitOr &&) = delete; + BaseMultiReduceBitOr(BaseMultiReduceBitOr&&) = default; + BaseMultiReduceBitOr& operator=(BaseMultiReduceBitOr const&) = delete; + BaseMultiReduceBitOr& operator=(BaseMultiReduceBitOr&&) = delete; RAJA_SUPPRESS_HD_WARN ~BaseMultiReduceBitOr() = default; @@ -338,8 +335,8 @@ class BaseMultiReduceBitOr : public BaseMultiReduce { RAJA_HOST_DEVICE reference(BaseMultiReduceBitOr const& base, size_t bin) - : m_base(base), m_bin(bin) - { } + : m_base(base), m_bin(bin) + {} //! reducer function; updates the current instance's state RAJA_HOST_DEVICE @@ -349,10 +346,7 @@ class BaseMultiReduceBitOr : public BaseMultiReduce return *this; } - value_type get() const - { - return m_base.get(m_bin); - } + value_type get() const { return m_base.get(m_bin); } private: BaseMultiReduceBitOr const& m_base; @@ -379,9 +373,9 @@ class BaseMultiReduceBitAnd : public BaseMultiReduce RAJA_SUPPRESS_HD_WARN BaseMultiReduceBitAnd(BaseMultiReduceBitAnd const&) = default; RAJA_SUPPRESS_HD_WARN - BaseMultiReduceBitAnd(BaseMultiReduceBitAnd &&) = default; - BaseMultiReduceBitAnd &operator=(BaseMultiReduceBitAnd const&) = delete; - BaseMultiReduceBitAnd &operator=(BaseMultiReduceBitAnd &&) = delete; + BaseMultiReduceBitAnd(BaseMultiReduceBitAnd&&) = default; + BaseMultiReduceBitAnd& operator=(BaseMultiReduceBitAnd const&) = delete; + BaseMultiReduceBitAnd& operator=(BaseMultiReduceBitAnd&&) = delete; RAJA_SUPPRESS_HD_WARN ~BaseMultiReduceBitAnd() = default; @@ -389,8 +383,8 @@ class BaseMultiReduceBitAnd : public BaseMultiReduce { RAJA_HOST_DEVICE reference(BaseMultiReduceBitAnd const& base, size_t bin) - : m_base(base), m_bin(bin) - { } + : m_base(base), m_bin(bin) + {} //! reducer function; updates the current instance's state RAJA_HOST_DEVICE @@ -400,10 +394,7 @@ class BaseMultiReduceBitAnd : public BaseMultiReduce return *this; } - value_type get() const - { - return m_base.get(m_bin); - } + value_type get() const { return m_base.get(m_bin); } private: BaseMultiReduceBitAnd const& m_base; diff --git a/include/RAJA/pattern/detail/privatizer.hpp b/include/RAJA/pattern/detail/privatizer.hpp index 3579027cd3..9ca50f308b 100644 --- a/include/RAJA/pattern/detail/privatizer.hpp +++ b/include/RAJA/pattern/detail/privatizer.hpp @@ -30,7 +30,7 @@ class has_privatizer private: template static auto Test(void*) - -> decltype(camp::val(), camp::true_type{}); + -> decltype(camp::val(), camp::true_type {}); template static camp::false_type Test(...); @@ -42,12 +42,13 @@ class has_privatizer static_assert(!has_privatizer::value, "if this fires, abandon all hope"); -struct GenericWrapperBase { -}; +struct GenericWrapperBase +{}; template -struct Privatizer { - using value_type = camp::decay; +struct Privatizer +{ + using value_type = camp::decay; using reference_type = value_type&; value_type priv; static_assert(!has_privatizer::value, @@ -58,7 +59,7 @@ struct Privatizer { "a bug"); RAJA_SUPPRESS_HD_WARN - RAJA_HOST_DEVICE Privatizer(const T& o) : priv{o} {} + RAJA_HOST_DEVICE Privatizer(const T& o) : priv {o} {} RAJA_SUPPRESS_HD_WARN RAJA_HOST_DEVICE reference_type get_priv() { return priv; } @@ -85,7 +86,7 @@ template ::value>::type* = nullptr> RAJA_HOST_DEVICE auto thread_privatize(const T& item) -> Privatizer { - return Privatizer{item}; + return Privatizer {item}; } RAJA_SUPPRESS_HD_WARN @@ -93,7 +94,7 @@ template ::value>::type* = nullptr> RAJA_HOST_DEVICE auto thread_privatize(const T& item) -> typename T::privatizer { - return typename T::privatizer{item}; + return typename T::privatizer {item}; } } // namespace internal diff --git a/include/RAJA/pattern/detail/reduce.hpp b/include/RAJA/pattern/detail/reduce.hpp index 788f3c698d..2f826b590f 100644 --- a/include/RAJA/pattern/detail/reduce.hpp +++ b/include/RAJA/pattern/detail/reduce.hpp @@ -21,33 +21,33 @@ #include "RAJA/util/Operators.hpp" #include "RAJA/util/types.hpp" -#define RAJA_DECLARE_REDUCER(OP, POL, COMBINER) \ - template \ - class Reduce##OP \ - : public reduce::detail::BaseReduce##OP \ - { \ - public: \ - using Base = reduce::detail::BaseReduce##OP; \ - using Base::Base; \ +#define RAJA_DECLARE_REDUCER(OP, POL, COMBINER) \ + template \ + class Reduce##OP \ + : public reduce::detail::BaseReduce##OP \ + { \ + public: \ + using Base = reduce::detail::BaseReduce##OP; \ + using Base::Base; \ }; -#define RAJA_DECLARE_INDEX_REDUCER(OP, POL, COMBINER) \ - template \ - class Reduce##OP \ - : public reduce::detail::BaseReduce##OP \ - { \ - public: \ - using Base = reduce::detail::BaseReduce##OP; \ - using Base::Base; \ +#define RAJA_DECLARE_INDEX_REDUCER(OP, POL, COMBINER) \ + template \ + class Reduce##OP \ + : public reduce::detail::BaseReduce##OP \ + { \ + public: \ + using Base = reduce::detail::BaseReduce##OP; \ + using Base::Base; \ }; -#define RAJA_DECLARE_ALL_REDUCERS(POL, COMBINER) \ - RAJA_DECLARE_REDUCER(Sum, POL, COMBINER) \ - RAJA_DECLARE_REDUCER(Min, POL, COMBINER) \ - RAJA_DECLARE_REDUCER(Max, POL, COMBINER) \ - RAJA_DECLARE_INDEX_REDUCER(MinLoc, POL, COMBINER) \ - RAJA_DECLARE_INDEX_REDUCER(MaxLoc, POL, COMBINER) \ - RAJA_DECLARE_REDUCER(BitOr, POL, COMBINER) \ +#define RAJA_DECLARE_ALL_REDUCERS(POL, COMBINER) \ + RAJA_DECLARE_REDUCER(Sum, POL, COMBINER) \ + RAJA_DECLARE_REDUCER(Min, POL, COMBINER) \ + RAJA_DECLARE_REDUCER(Max, POL, COMBINER) \ + RAJA_DECLARE_INDEX_REDUCER(MinLoc, POL, COMBINER) \ + RAJA_DECLARE_INDEX_REDUCER(MaxLoc, POL, COMBINER) \ + RAJA_DECLARE_REDUCER(BitOr, POL, COMBINER) \ RAJA_DECLARE_REDUCER(BitAnd, POL, COMBINER) namespace RAJA @@ -64,14 +64,15 @@ namespace detail { template class Op> -struct op_adapter : private Op { +struct op_adapter : private Op +{ using operator_type = Op; RAJA_HOST_DEVICE static constexpr T identity() { return operator_type::identity(); } - RAJA_HOST_DEVICE RAJA_INLINE void operator()(T &val, const T v) const + RAJA_HOST_DEVICE RAJA_INLINE void operator()(T& val, const T v) const { val = operator_type::operator()(val, v); } @@ -79,24 +80,24 @@ struct op_adapter : private Op { } // namespace detail template -struct sum : detail::op_adapter { -}; +struct sum : detail::op_adapter +{}; template -struct min : detail::op_adapter { -}; +struct min : detail::op_adapter +{}; template -struct max : detail::op_adapter { -}; +struct max : detail::op_adapter +{}; template -struct or_bit : detail::op_adapter { -}; +struct or_bit : detail::op_adapter +{}; template -struct and_bit : detail::op_adapter { -}; +struct and_bit : detail::op_adapter +{}; #if defined(RAJA_ENABLE_TARGET_OPENMP) @@ -107,7 +108,8 @@ namespace detail { template ::value> -struct DefaultLoc {}; +struct DefaultLoc +{}; template struct DefaultLoc // any non-integral type @@ -128,30 +130,39 @@ class ValueLoc T val = doing_min ? operators::limits::max() : operators::limits::min(); IndexType loc = DefaultLoc().value(); -#if __NVCC__ && defined(CUDART_VERSION) && CUDART_VERSION < 9020 || defined(__HIPCC__) +#if __NVCC__ && defined(CUDART_VERSION) && CUDART_VERSION < 9020 || \ + defined(__HIPCC__) RAJA_HOST_DEVICE constexpr ValueLoc() {} - RAJA_HOST_DEVICE constexpr ValueLoc(ValueLoc const &other) : val{other.val}, loc{other.loc} {} + RAJA_HOST_DEVICE constexpr ValueLoc(ValueLoc const& other) + : val {other.val}, loc {other.loc} + {} RAJA_HOST_DEVICE - ValueLoc &operator=(ValueLoc const &other) { val = other.val; loc = other.loc; return *this;} + ValueLoc& operator=(ValueLoc const& other) + { + val = other.val; + loc = other.loc; + return *this; + } #else - constexpr ValueLoc() = default; - constexpr ValueLoc(ValueLoc const &) = default; - ValueLoc &operator=(ValueLoc const &) = default; + constexpr ValueLoc() = default; + constexpr ValueLoc(ValueLoc const&) = default; + ValueLoc& operator=(ValueLoc const&) = default; #endif - RAJA_HOST_DEVICE constexpr ValueLoc(T const &val_) : val{val_}, loc{DefaultLoc().value()} {} - RAJA_HOST_DEVICE constexpr ValueLoc(T const &val_, IndexType const &loc_) - : val{val_}, loc{loc_} - { - } + RAJA_HOST_DEVICE constexpr ValueLoc(T const& val_) + : val {val_}, loc {DefaultLoc().value()} + {} + RAJA_HOST_DEVICE constexpr ValueLoc(T const& val_, IndexType const& loc_) + : val {val_}, loc {loc_} + {} RAJA_HOST_DEVICE operator T() const { return val; } RAJA_HOST_DEVICE IndexType getLoc() { return loc; } - RAJA_HOST_DEVICE bool operator<(ValueLoc const &rhs) const + RAJA_HOST_DEVICE bool operator<(ValueLoc const& rhs) const { return val < rhs.val; } - RAJA_HOST_DEVICE bool operator>(ValueLoc const &rhs) const + RAJA_HOST_DEVICE bool operator>(ValueLoc const& rhs) const { return val > rhs.val; } @@ -164,14 +175,17 @@ class ValueLoc namespace operators { template -struct limits<::RAJA::reduce::detail::ValueLoc> { - RAJA_INLINE RAJA_HOST_DEVICE static constexpr - ::RAJA::reduce::detail::ValueLoc min() +struct limits<::RAJA::reduce::detail::ValueLoc> +{ + RAJA_INLINE RAJA_HOST_DEVICE static constexpr ::RAJA::reduce::detail:: + ValueLoc + min() { return ::RAJA::reduce::detail::ValueLoc(limits::min()); } - RAJA_INLINE RAJA_HOST_DEVICE static constexpr - ::RAJA::reduce::detail::ValueLoc max() + RAJA_INLINE RAJA_HOST_DEVICE static constexpr ::RAJA::reduce::detail:: + ValueLoc + max() { return ::RAJA::reduce::detail::ValueLoc(limits::max()); } @@ -197,50 +211,49 @@ class BaseReduce Combiner_t mutable c; public: - using value_type = T; + using value_type = T; using reduce_type = Reduce; RAJA_SUPPRESS_HD_WARN RAJA_HOST_DEVICE - BaseReduce() : c{T(), Reduce::identity()} {} + BaseReduce() : c {T(), Reduce::identity()} {} RAJA_SUPPRESS_HD_WARN RAJA_HOST_DEVICE BaseReduce(T init_val, T identity_ = Reduce::identity()) - : c{init_val, identity_} - { - } + : c {init_val, identity_} + {} RAJA_SUPPRESS_HD_WARN RAJA_HOST_DEVICE void reset(T val, T identity_ = Reduce::identity()) { - operator T(); // automatic get() before reset + operator T(); // automatic get() before reset c.reset(val, identity_); } //! prohibit compiler-generated copy assignment - BaseReduce &operator=(const BaseReduce &) = delete; + BaseReduce& operator=(const BaseReduce&) = delete; //! compiler-generated copy constructor RAJA_SUPPRESS_HD_WARN RAJA_HOST_DEVICE - BaseReduce(const BaseReduce ©) : c(copy.c) {} + BaseReduce(const BaseReduce& copy) : c(copy.c) {} //! compiler-generated move constructor RAJA_SUPPRESS_HD_WARN RAJA_HOST_DEVICE RAJA_INLINE - BaseReduce(BaseReduce &©) : c(std::move(copy.c)) {} + BaseReduce(BaseReduce&& copy) : c(std::move(copy.c)) {} //! compiler-generated move assignment - BaseReduce &operator=(BaseReduce &&) = default; + BaseReduce& operator=(BaseReduce&&) = default; RAJA_SUPPRESS_HD_WARN RAJA_HOST_DEVICE - void combine(T const &other) const { c.combine(other); } + void combine(T const& other) const { c.combine(other); } - T &local() const { return c.local(); } + T& local() const { return c.local(); } //! Get the calculated reduced value operator T() const { return c.get(); } @@ -253,51 +266,50 @@ template class BaseCombinable { protected: - BaseCombinable const *parent = nullptr; + BaseCombinable const* parent = nullptr; T identity; T mutable my_data; public: RAJA_SUPPRESS_HD_WARN RAJA_HOST_DEVICE - constexpr BaseCombinable() : identity{T()}, my_data{T()} {} + constexpr BaseCombinable() : identity {T()}, my_data {T()} {} RAJA_SUPPRESS_HD_WARN RAJA_HOST_DEVICE constexpr BaseCombinable(T init_val, T identity_ = T()) - : identity{identity_}, my_data{init_val} - { - } + : identity {identity_}, my_data {init_val} + {} RAJA_SUPPRESS_HD_WARN RAJA_HOST_DEVICE void reset(T init_val, T identity_) { - my_data = init_val; + my_data = init_val; identity = identity_; } RAJA_SUPPRESS_HD_WARN RAJA_HOST_DEVICE - constexpr BaseCombinable(BaseCombinable const &other) - : parent{other.parent ? other.parent : &other}, - identity{other.identity}, - my_data{identity} - { - } + constexpr BaseCombinable(BaseCombinable const& other) + : parent {other.parent ? other.parent : &other}, + identity {other.identity}, + my_data {identity} + {} RAJA_SUPPRESS_HD_WARN RAJA_HOST_DEVICE ~BaseCombinable() { - if (parent && my_data != identity) { + if (parent && my_data != identity) + { Reduce()(parent->my_data, my_data); } } RAJA_SUPPRESS_HD_WARN RAJA_HOST_DEVICE - void combine(T const &other) { Reduce{}(my_data, other); } + void combine(T const& other) { Reduce {}(my_data, other); } /*! * \return the calculated reduced value @@ -307,17 +319,17 @@ class BaseCombinable /*! * \return reference to the local value */ - T &local() const { return my_data; } + T& local() const { return my_data; } T get_combined() const { return my_data; } private: // Convenience method for CRTP - const Derived &derived() const + const Derived& derived() const { - return *(static_cast(this)); + return *(static_cast(this)); } - Derived &derived() { return *(static_cast(this)); } + Derived& derived() { return *(static_cast(this)); } }; /*! @@ -336,7 +348,7 @@ class BaseReduceMin : public BaseReduce //! reducer function; updates the current instance's state RAJA_HOST_DEVICE - const BaseReduceMin &min(T rhs) const + const BaseReduceMin& min(T rhs) const { this->combine(rhs); return *this; @@ -350,36 +362,43 @@ class BaseReduceMin : public BaseReduce * ************************************************************************** */ -template class Combiner> +template + class Combiner> class BaseReduceMinLoc : public BaseReduce, RAJA::reduce::min, Combiner> { public: using Base = BaseReduce, RAJA::reduce::min, Combiner>; - using value_type = typename Base::value_type; + using value_type = typename Base::value_type; using reduce_type = typename Base::reduce_type; using Base::Base; constexpr BaseReduceMinLoc() : Base(value_type(T(), IndexType())) {} - constexpr BaseReduceMinLoc(T init_val, IndexType init_idx, - T identity_val_ = reduce_type::identity(), - IndexType identity_loc_ = DefaultLoc().value()) - : Base(value_type(init_val, init_idx), value_type(identity_val_, identity_loc_)) - { - } - - void reset(T init_val, IndexType init_idx, - T identity_val_ = reduce_type::identity(), + constexpr BaseReduceMinLoc( + T init_val, + IndexType init_idx, + T identity_val_ = reduce_type::identity(), + IndexType identity_loc_ = DefaultLoc().value()) + : Base(value_type(init_val, init_idx), + value_type(identity_val_, identity_loc_)) + {} + + void reset(T init_val, + IndexType init_idx, + T identity_val_ = reduce_type::identity(), IndexType identity_loc_ = DefaultLoc().value()) { - operator T(); // automatic get() before reset - Base::reset(value_type(init_val, init_idx), value_type(identity_val_, identity_loc_)); + operator T(); // automatic get() before reset + Base::reset(value_type(init_val, init_idx), + value_type(identity_val_, identity_loc_)); } /// \brief reducer function; updates the current instance's state RAJA_HOST_DEVICE - const BaseReduceMinLoc &minloc(T rhs, IndexType loc) const + const BaseReduceMinLoc& minloc(T rhs, IndexType loc) const { this->combine(value_type(rhs, loc)); return *this; @@ -408,7 +427,7 @@ class BaseReduceMax : public BaseReduce //! reducer function; updates the current instance's state RAJA_HOST_DEVICE - const BaseReduceMax &max(T rhs) const + const BaseReduceMax& max(T rhs) const { this->combine(rhs); return *this; @@ -432,7 +451,7 @@ class BaseReduceSum : public BaseReduce //! reducer function; updates the current instance's state RAJA_SUPPRESS_HD_WARN RAJA_HOST_DEVICE - const BaseReduceSum &operator+=(T rhs) const + const BaseReduceSum& operator+=(T rhs) const { this->combine(rhs); return *this; @@ -456,7 +475,7 @@ class BaseReduceBitOr : public BaseReduce //! reducer function; updates the current instance's state RAJA_SUPPRESS_HD_WARN RAJA_HOST_DEVICE - const BaseReduceBitOr &operator|=(T rhs) const + const BaseReduceBitOr& operator|=(T rhs) const { this->combine(rhs); return *this; @@ -480,7 +499,7 @@ class BaseReduceBitAnd : public BaseReduce //! reducer function; updates the current instance's state RAJA_SUPPRESS_HD_WARN RAJA_HOST_DEVICE - const BaseReduceBitAnd &operator&=(T rhs) const + const BaseReduceBitAnd& operator&=(T rhs) const { this->combine(rhs); return *this; @@ -495,36 +514,45 @@ class BaseReduceBitAnd : public BaseReduce * ************************************************************************** */ -template class Combiner> -class BaseReduceMaxLoc - : public BaseReduce, RAJA::reduce::max, Combiner> +template + class Combiner> +class BaseReduceMaxLoc : public BaseReduce, + RAJA::reduce::max, + Combiner> { public: - using Base = BaseReduce, RAJA::reduce::max, Combiner>; - using value_type = typename Base::value_type; + using Base = + BaseReduce, RAJA::reduce::max, Combiner>; + using value_type = typename Base::value_type; using reduce_type = typename Base::reduce_type; using Base::Base; constexpr BaseReduceMaxLoc() : Base(value_type(T(), IndexType())) {} - constexpr BaseReduceMaxLoc(T init_val, IndexType init_idx, - T identity_val_ = reduce_type::identity(), - IndexType identity_loc_ = DefaultLoc().value()) - : Base(value_type(init_val, init_idx), value_type(identity_val_, identity_loc_)) - { - } - - void reset(T init_val, IndexType init_idx, - T identity_val_ = reduce_type::identity(), + constexpr BaseReduceMaxLoc( + T init_val, + IndexType init_idx, + T identity_val_ = reduce_type::identity(), + IndexType identity_loc_ = DefaultLoc().value()) + : Base(value_type(init_val, init_idx), + value_type(identity_val_, identity_loc_)) + {} + + void reset(T init_val, + IndexType init_idx, + T identity_val_ = reduce_type::identity(), IndexType identity_loc_ = DefaultLoc().value()) { - operator T(); // automatic get() before reset - Base::reset(value_type(init_val, init_idx), value_type(identity_val_, identity_loc_)); + operator T(); // automatic get() before reset + Base::reset(value_type(init_val, init_idx), + value_type(identity_val_, identity_loc_)); } //! reducer function; updates the current instance's state RAJA_HOST_DEVICE - const BaseReduceMaxLoc &maxloc(T rhs, IndexType loc) const + const BaseReduceMaxLoc& maxloc(T rhs, IndexType loc) const { this->combine(value_type(rhs, loc)); return *this; diff --git a/include/RAJA/pattern/forall.hpp b/include/RAJA/pattern/forall.hpp index 686f0e8c6b..e0b87a5d60 100644 --- a/include/RAJA/pattern/forall.hpp +++ b/include/RAJA/pattern/forall.hpp @@ -98,14 +98,15 @@ namespace detail { /// Adapter to replace specific implementations for the icount variants template -struct icount_adapter { +struct icount_adapter +{ using index_type = typename std::decay::type; typename std::decay::type body; using container_type = typename std::decay::type; typename container_type::iterator begin_it; Index_type icount; icount_adapter(Range const& r, Body const& b, IndexT icount_) - : body{b}, icount{icount_} + : body {b}, icount {icount_} { using std::begin; begin_it = begin(r); @@ -119,16 +120,28 @@ struct icount_adapter { } }; -struct CallForall { - template - RAJA_INLINE camp::resources::EventProxy operator()(T const&, ExecPol, Body, Res, ForallParams) const; +struct CallForall +{ + template + RAJA_INLINE camp::resources::EventProxy + operator()(T const&, ExecPol, Body, Res, ForallParams) const; }; -struct CallForallIcount { +struct CallForallIcount +{ constexpr CallForallIcount(int s); - template - RAJA_INLINE camp::resources::EventProxy operator()(T const&, ExecPol, Body, Res, ForallParams) const; + template + RAJA_INLINE camp::resources::EventProxy + operator()(T const&, ExecPol, Body, Res, ForallParams) const; const int start; }; @@ -152,22 +165,31 @@ namespace wrap * ****************************************************************************** */ -template +template RAJA_INLINE concepts::enable_if_t< RAJA::resources::EventProxy, concepts::negate>, type_traits::is_range> -forall(Res r, ExecutionPolicy&& p, Container&& c, LoopBody&& loop_body, ForallParams&& f_params) +forall(Res r, + ExecutionPolicy&& p, + Container&& c, + LoopBody&& loop_body, + ForallParams&& f_params) { RAJA_FORCEINLINE_RECURSIVE - return forall_impl(r, - std::forward(p), - std::forward(c), - std::forward(loop_body), - std::forward(f_params)); + return forall_impl( + r, std::forward(p), std::forward(c), + std::forward(loop_body), std::forward(f_params)); } -template +template RAJA_INLINE concepts::enable_if_t< RAJA::resources::EventProxy, concepts::negate>, @@ -175,11 +197,9 @@ RAJA_INLINE concepts::enable_if_t< forall(Res r, ExecutionPolicy&& p, Container&& c, LoopBody&& loop_body) { RAJA_FORCEINLINE_RECURSIVE - return forall_impl(r, - std::forward(p), - std::forward(c), - std::forward(loop_body), - expt::get_empty_forall_param_pack()); + return forall_impl( + r, std::forward(p), std::forward(c), + std::forward(loop_body), expt::get_empty_forall_param_pack()); } @@ -197,22 +217,22 @@ template RAJA_INLINE resources::EventProxy forall_Icount(Res r, - ExecutionPolicy&& p, - Container&& c, - IndexType&& icount, - LoopBody&& loop_body, - ForallParams&& f_params) + ExecutionPolicy&& p, + Container&& c, + IndexType&& icount, + LoopBody&& loop_body, + ForallParams&& f_params) { using std::begin; using std::distance; using std::end; auto range = RangeSegment(0, distance(begin(c), end(c))); - detail::icount_adapter adapted(c, - loop_body, + detail::icount_adapter adapted(c, loop_body, icount); using policy::sequential::forall_impl; RAJA_FORCEINLINE_RECURSIVE - return forall_impl(r, std::forward(p), range, adapted, std::forward(f_params)); + return forall_impl(r, std::forward(p), range, adapted, + std::forward(f_params)); } /*! @@ -230,23 +250,24 @@ template -RAJA_INLINE resources::EventProxy forall_Icount(Res r, - ExecPolicy, - const TypedIndexSet& iset, - LoopBody loop_body, - ForallParams f_params) +RAJA_INLINE resources::EventProxy +forall_Icount(Res r, + ExecPolicy, + const TypedIndexSet& iset, + LoopBody loop_body, + ForallParams f_params) { // no need for icount variant here - auto segIterRes = resources::get_resource::type::get_default(); - wrap::forall(segIterRes, SegmentIterPolicy(), iset, [=, &r](int segID) { - iset.segmentCall(segID, + auto segIterRes = + resources::get_resource::type::get_default(); + wrap::forall(segIterRes, SegmentIterPolicy(), iset, + [=, &r](int segID) + { + iset.segmentCall( + segID, detail::CallForallIcount(iset.getStartingIcount(segID)), - SegmentExecPolicy(), - loop_body, - r, - f_params); - }); + SegmentExecPolicy(), loop_body, r, f_params); + }); return RAJA::resources::EventProxy(r); } @@ -256,30 +277,33 @@ template -RAJA_INLINE resources::EventProxy forall(Res r, - ExecPolicy, - const TypedIndexSet& iset, - LoopBody loop_body, - ForallParams f_params) -{ - auto segIterRes = resources::get_resource::type::get_default(); - wrap::forall(segIterRes, SegmentIterPolicy(), iset, [=, &r](int segID) { - iset.segmentCall(segID, detail::CallForall{}, SegmentExecPolicy(), loop_body, r, f_params); - }); +RAJA_INLINE resources::EventProxy +forall(Res r, + ExecPolicy, + const TypedIndexSet& iset, + LoopBody loop_body, + ForallParams f_params) +{ + auto segIterRes = + resources::get_resource::type::get_default(); + wrap::forall(segIterRes, SegmentIterPolicy(), iset, + [=, &r](int segID) + { + iset.segmentCall(segID, detail::CallForall {}, + SegmentExecPolicy(), loop_body, r, f_params); + }); return RAJA::resources::EventProxy(r); } } // end namespace wrap - /*! ****************************************************************************** * - * \brief The RAJA::policy_by_value_interface forall functions provide an interface with - * value-based policies. It also enforces the interface and performs - * static checks as well as triggering plugins and loop body updates. + * \brief The RAJA::policy_by_value_interface forall functions provide an + *interface with value-based policies. It also enforces the interface and + *performs static checks as well as triggering plugins and loop body updates. * ****************************************************************************** */ @@ -294,11 +318,12 @@ inline namespace policy_by_value_interface * ****************************************************************************** */ -template -RAJA_INLINE resources::EventProxy forall_Icount(ExecutionPolicy&& p, - Res r, - IdxSet&& c, - Params&&... params) +template +RAJA_INLINE resources::EventProxy +forall_Icount(ExecutionPolicy&& p, Res r, IdxSet&& c, Params&&... params) { static_assert(type_traits::is_index_set::value, "Expected a TypedIndexSet but did not get one. Are you using " @@ -306,9 +331,10 @@ RAJA_INLINE resources::EventProxy forall_Icount(ExecutionPolicy&& p, auto f_params = expt::make_forall_param_pack(std::forward(params)...); auto&& loop_body = expt::get_lambda(std::forward(params)...); - //expt::check_forall_optional_args(loop_body, f_params); + // expt::check_forall_optional_args(loop_body, f_params); - util::PluginContext context{util::make_context>()}; + util::PluginContext context { + util::make_context>()}; util::callPreCapturePlugins(context); using RAJA::util::trigger_updates_before; @@ -318,27 +344,24 @@ RAJA_INLINE resources::EventProxy forall_Icount(ExecutionPolicy&& p, util::callPreLaunchPlugins(context); - RAJA::resources::EventProxy e = wrap::forall_Icount( - r, - std::forward(p), - std::forward(c), - std::move(body), - f_params); + RAJA::resources::EventProxy e = + wrap::forall_Icount(r, std::forward(p), + std::forward(c), std::move(body), f_params); util::callPostLaunchPlugins(context); return e; } -template ::type > -RAJA_INLINE resources::EventProxy forall_Icount(ExecutionPolicy&& p, - IdxSet&& c, - LoopBody&& loop_body) +template < + typename ExecutionPolicy, + typename IdxSet, + typename LoopBody, + typename Res = typename resources::get_resource::type> +RAJA_INLINE resources::EventProxy +forall_Icount(ExecutionPolicy&& p, IdxSet&& c, LoopBody&& loop_body) { auto r = Res::get_default(); return ::RAJA::policy_by_value_interface::forall_Icount( - std::forward(p), - r, - std::forward(c), + std::forward(p), r, std::forward(c), std::forward(loop_body)); } @@ -349,11 +372,14 @@ RAJA_INLINE resources::EventProxy forall_Icount(ExecutionPolicy&& p, * ****************************************************************************** */ -template -RAJA_INLINE concepts::enable_if_t< - resources::EventProxy, - type_traits::is_indexset_policy> -forall(ExecutionPolicy&& p, Res r, IdxSet&& c, Params&&... params) +template +RAJA_INLINE + concepts::enable_if_t, + type_traits::is_indexset_policy> + forall(ExecutionPolicy&& p, Res r, IdxSet&& c, Params&&... params) { static_assert(type_traits::is_index_set::value, "Expected a TypedIndexSet but did not get one. Are you using " @@ -363,7 +389,8 @@ forall(ExecutionPolicy&& p, Res r, IdxSet&& c, Params&&... params) auto&& loop_body = expt::get_lambda(std::forward(params)...); expt::check_forall_optional_args(loop_body, f_params); - util::PluginContext context{util::make_context>()}; + util::PluginContext context { + util::make_context>()}; util::callPreCapturePlugins(context); using RAJA::util::trigger_updates_before; @@ -373,28 +400,26 @@ forall(ExecutionPolicy&& p, Res r, IdxSet&& c, Params&&... params) util::callPreLaunchPlugins(context); - resources::EventProxy e = wrap::forall( - r, - std::forward(p), - std::forward(c), - std::move(body), - f_params); + resources::EventProxy e = + wrap::forall(r, std::forward(p), std::forward(c), + std::move(body), f_params); util::callPostLaunchPlugins(context); return e; } -template ::type > -RAJA_INLINE concepts::enable_if_t< - resources::EventProxy, - type_traits::is_indexset_policy> -forall(ExecutionPolicy&& p, IdxSet&& c, LoopBody&& loop_body) +template < + typename ExecutionPolicy, + typename IdxSet, + typename LoopBody, + typename Res = typename resources::get_resource::type> +RAJA_INLINE + concepts::enable_if_t, + type_traits::is_indexset_policy> + forall(ExecutionPolicy&& p, IdxSet&& c, LoopBody&& loop_body) { auto r = Res::get_default(); return ::RAJA::policy_by_value_interface::forall( - std::forward(p), - r, - std::forward(c), + std::forward(p), r, std::forward(c), std::forward(loop_body)); } @@ -405,12 +430,14 @@ forall(ExecutionPolicy&& p, IdxSet&& c, LoopBody&& loop_body) * ****************************************************************************** */ -template ::type > -RAJA_INLINE concepts::enable_if_t< - resources::EventProxy, - type_traits::is_multi_policy, - type_traits::is_range> +template < + typename ExecutionPolicy, + typename Container, + typename LoopBody, + typename Res = typename resources::get_resource::type> +RAJA_INLINE concepts::enable_if_t, + type_traits::is_multi_policy, + type_traits::is_range> forall(ExecutionPolicy&& p, Container&& c, LoopBody&& loop_body) { static_assert(type_traits::is_random_access_range::value, @@ -419,10 +446,9 @@ forall(ExecutionPolicy&& p, Container&& c, LoopBody&& loop_body) auto r = Res::get_default(); // plugins handled in multipolicy policy_invoker - return forall_impl(r, - std::forward(p), - std::forward(c), - std::forward(loop_body)); + return forall_impl(r, std::forward(p), + std::forward(c), + std::forward(loop_body)); } /*! @@ -438,10 +464,9 @@ template -RAJA_INLINE concepts::enable_if_t< - resources::EventProxy, - type_traits::is_range, - type_traits::is_integral> +RAJA_INLINE concepts::enable_if_t, + type_traits::is_range, + type_traits::is_integral> forall_Icount(ExecutionPolicy&& p, Res r, Container&& c, @@ -452,11 +477,14 @@ forall_Icount(ExecutionPolicy&& p, static_assert(type_traits::is_random_access_range::value, "Container does not model RandomAccessIterator"); - auto f_params = expt::make_forall_param_pack(std::forward(first), std::forward(params)...); - auto&& loop_body = expt::get_lambda(std::forward(first), std::forward(params)...); - //expt::check_forall_optional_args(loop_body, f_params); + auto f_params = expt::make_forall_param_pack(std::forward(first), + std::forward(params)...); + auto&& loop_body = expt::get_lambda(std::forward(first), + std::forward(params)...); + // expt::check_forall_optional_args(loop_body, f_params); - util::PluginContext context{util::make_context>()}; + util::PluginContext context { + util::make_context>()}; util::callPreCapturePlugins(context); using RAJA::util::trigger_updates_before; @@ -467,21 +495,18 @@ forall_Icount(ExecutionPolicy&& p, util::callPreLaunchPlugins(context); resources::EventProxy e = wrap::forall_Icount( - r, - std::forward(p), - std::forward(c), - icount, - std::move(body), - f_params); + r, std::forward(p), std::forward(c), icount, + std::move(body), f_params); util::callPostLaunchPlugins(context); return e; } -template ::type > +template < + typename ExecutionPolicy, + typename Container, + typename IndexType, + typename LoopBody, + typename Res = typename resources::get_resource::type> RAJA_INLINE concepts::enable_if_t< resources::EventProxy, type_traits::is_range, @@ -494,10 +519,7 @@ forall_Icount(ExecutionPolicy&& p, { auto r = Res::get_default(); return ::RAJA::policy_by_value_interface::forall_Icount( - std::forward(p), - r, - std::forward(c), - icount, + std::forward(p), r, std::forward(c), icount, std::forward(loop_body)); } @@ -509,7 +531,10 @@ forall_Icount(ExecutionPolicy&& p, ****************************************************************************** */ -template +template RAJA_INLINE concepts::enable_if_t< resources::EventProxy, concepts::negate>, @@ -524,7 +549,8 @@ forall(ExecutionPolicy&& p, Res r, Container&& c, Params&&... params) auto&& loop_body = expt::get_lambda(std::forward(params)...); expt::check_forall_optional_args(loop_body, f_params); - util::PluginContext context{util::make_context>()}; + util::PluginContext context { + util::make_context>()}; util::callPreCapturePlugins(context); using RAJA::util::trigger_updates_before; @@ -534,19 +560,19 @@ forall(ExecutionPolicy&& p, Res r, Container&& c, Params&&... params) util::callPreLaunchPlugins(context); - resources::EventProxy e = wrap::forall( - r, - std::forward(p), - std::forward(c), - std::move(body), - f_params); + resources::EventProxy e = + wrap::forall(r, std::forward(p), + std::forward(c), std::move(body), f_params); util::callPostLaunchPlugins(context); return e; } -template ::type > +template < + typename ExecutionPolicy, + typename Container, + typename LoopBody, + typename Res = typename resources::get_resource::type> RAJA_INLINE concepts::enable_if_t< resources::EventProxy, concepts::negate>, @@ -556,13 +582,11 @@ forall(ExecutionPolicy&& p, Container&& c, LoopBody&& loop_body) { auto r = Res::get_default(); return ::RAJA::policy_by_value_interface::forall( - std::forward(p), - r, - std::forward(c), + std::forward(p), r, std::forward(c), std::forward(loop_body)); } -} // end inline namespace policy_by_value_interface +} // namespace policy_by_value_interface /*! @@ -570,20 +594,23 @@ forall(ExecutionPolicy&& p, Container&& c, LoopBody&& loop_body) * * this reduces implementation overhead and perfectly forwards all arguments */ -template ::type > +template < + typename ExecutionPolicy, + typename... Args, + typename Res = typename resources::get_resource::type> RAJA_INLINE resources::EventProxy forall(Args&&... args) { Res r = Res::get_default(); - return ::RAJA::policy_by_value_interface::forall( - ExecutionPolicy(), r, std::forward(args)...); + return ::RAJA::policy_by_value_interface::forall(ExecutionPolicy(), r, + std::forward(args)...); } template -RAJA_INLINE concepts::enable_if_t, type_traits::is_resource> +RAJA_INLINE concepts::enable_if_t, + type_traits::is_resource> forall(Res r, Args&&... args) { - return ::RAJA::policy_by_value_interface::forall( - ExecutionPolicy(), r, std::forward(args)...); + return ::RAJA::policy_by_value_interface::forall(ExecutionPolicy(), r, + std::forward(args)...); } /*! @@ -592,8 +619,10 @@ forall(Res r, Args&&... args) * * this reduces implementation overhead and perfectly forwards all arguments */ -template ::type > +template < + typename ExecutionPolicy, + typename... Args, + typename Res = typename resources::get_resource::type> RAJA_INLINE resources::EventProxy forall_Icount(Args&&... args) { Res r = Res::get_default(); @@ -601,7 +630,8 @@ RAJA_INLINE resources::EventProxy forall_Icount(Args&&... args) ExecutionPolicy(), r, std::forward(args)...); } template -RAJA_INLINE concepts::enable_if_t, type_traits::is_resource> +RAJA_INLINE concepts::enable_if_t, + type_traits::is_resource> forall_Icount(Res r, Args&&... args) { return ::RAJA::policy_by_value_interface::forall_Icount( @@ -611,12 +641,17 @@ forall_Icount(Res r, Args&&... args) namespace detail { -template -RAJA_INLINE camp::resources::EventProxy CallForall::operator()(T const& segment, - ExecutionPolicy, - LoopBody body, - Res r, - ForallParams f_params) const +template +RAJA_INLINE camp::resources::EventProxy +CallForall::operator()(T const& segment, + ExecutionPolicy, + LoopBody body, + Res r, + ForallParams f_params) const { // this is only called inside a region, use impl using policy::sequential::forall_impl; @@ -626,15 +661,21 @@ RAJA_INLINE camp::resources::EventProxy CallForall::operator()(T const& seg constexpr CallForallIcount::CallForallIcount(int s) : start(s) {} -template -RAJA_INLINE camp::resources::EventProxy CallForallIcount::operator()(T const& segment, - ExecutionPolicy, - LoopBody body, - Res r, - ForallParams f_params) const +template +RAJA_INLINE camp::resources::EventProxy +CallForallIcount::operator()(T const& segment, + ExecutionPolicy, + LoopBody body, + Res r, + ForallParams f_params) const { // go through wrap to unwrap icount - return wrap::forall_Icount(r, ExecutionPolicy(), segment, start, body, f_params); + return wrap::forall_Icount(r, ExecutionPolicy(), segment, start, body, + f_params); } } // namespace detail @@ -650,100 +691,112 @@ RAJA_INLINE camp::resources::EventProxy CallForallIcount::operator()(T cons namespace expt { - template - struct dynamic_helper +template +struct dynamic_helper +{ + template + static void invoke_forall(const int pol, SEGMENT const& seg, BODY const& body) { - template - static void invoke_forall(const int pol, SEGMENT const &seg, BODY const &body) + if (IDX == pol) { - if(IDX==pol){ - using t_pol = typename camp::at>::type; - RAJA::forall(seg, body); - return; - } - dynamic_helper::invoke_forall(pol, seg, body); + using t_pol = typename camp::at>::type; + RAJA::forall(seg, body); + return; } + dynamic_helper::invoke_forall(pol, seg, body); + } - template - static resources::EventProxy - invoke_forall(RAJA::resources::Resource r, const int pol, SEGMENT const &seg, BODY const &body) - { - - using t_pol = typename camp::at>::type; - using resource_type = typename resources::get_resource::type; + template + static resources::EventProxy + invoke_forall(RAJA::resources::Resource r, + const int pol, + SEGMENT const& seg, + BODY const& body) + { - if(IDX==pol){ - RAJA::forall(r.get(), seg, body); + using t_pol = typename camp::at>::type; + using resource_type = typename resources::get_resource::type; - //Return a generic event proxy from r, - //because forall returns a typed event proxy - return {r}; - } + if (IDX == pol) + { + RAJA::forall(r.get(), seg, body); - return dynamic_helper::invoke_forall(r, pol, seg, body); + // Return a generic event proxy from r, + // because forall returns a typed event proxy + return {r}; } - }; + return dynamic_helper::invoke_forall(r, pol, seg, + body); + } +}; - template - struct dynamic_helper<0, POLICY_LIST> +template +struct dynamic_helper<0, POLICY_LIST> +{ + template + static void invoke_forall(const int pol, SEGMENT const& seg, BODY const& body) { - template - static void - invoke_forall(const int pol, SEGMENT const &seg, BODY const &body) + if (0 == pol) { - if(0==pol){ - using t_pol = typename camp::at>::type; - RAJA::forall(seg, body); - return; - } - RAJA_ABORT_OR_THROW("Policy enum not supported "); + using t_pol = typename camp::at>::type; + RAJA::forall(seg, body); + return; } + RAJA_ABORT_OR_THROW("Policy enum not supported "); + } - template - static resources::EventProxy - invoke_forall(RAJA::resources::Resource r, const int pol, SEGMENT const &seg, BODY const &body) - { - if(pol != 0) RAJA_ABORT_OR_THROW("Policy value out of range "); + template + static resources::EventProxy + invoke_forall(RAJA::resources::Resource r, + const int pol, + SEGMENT const& seg, + BODY const& body) + { + if (pol != 0) RAJA_ABORT_OR_THROW("Policy value out of range "); - using t_pol = typename camp::at>::type; - using resource_type = typename resources::get_resource::type; + using t_pol = typename camp::at>::type; + using resource_type = typename resources::get_resource::type; - RAJA::forall(r.get(), seg, body); + RAJA::forall(r.get(), seg, body); - //Return a generic event proxy from r, - //because forall returns a typed event proxy - return {r}; - } + // Return a generic event proxy from r, + // because forall returns a typed event proxy + return {r}; + } +}; - }; +template +void dynamic_forall(const int pol, SEGMENT const& seg, BODY const& body) +{ + constexpr int N = camp::size::value; + static_assert(N > 0, "RAJA policy list must not be empty"); - template - void dynamic_forall(const int pol, SEGMENT const &seg, BODY const &body) + if (pol > N - 1) { - constexpr int N = camp::size::value; - static_assert(N > 0, "RAJA policy list must not be empty"); - - if(pol > N-1) { - RAJA_ABORT_OR_THROW("Policy enum not supported"); - } - dynamic_helper::invoke_forall(pol, seg, body); + RAJA_ABORT_OR_THROW("Policy enum not supported"); } + dynamic_helper::invoke_forall(pol, seg, body); +} - template - resources::EventProxy - dynamic_forall(RAJA::resources::Resource r, const int pol, SEGMENT const &seg, BODY const &body) - { - constexpr int N = camp::size::value; - static_assert(N > 0, "RAJA policy list must not be empty"); - - if(pol > N-1) { - RAJA_ABORT_OR_THROW("Policy value out of range"); - } +template +resources::EventProxy +dynamic_forall(RAJA::resources::Resource r, + const int pol, + SEGMENT const& seg, + BODY const& body) +{ + constexpr int N = camp::size::value; + static_assert(N > 0, "RAJA policy list must not be empty"); - return dynamic_helper::invoke_forall(r, pol, seg, body); + if (pol > N - 1) + { + RAJA_ABORT_OR_THROW("Policy value out of range"); } + return dynamic_helper::invoke_forall(r, pol, seg, body); +} + } // namespace expt diff --git a/include/RAJA/pattern/kernel.hpp b/include/RAJA/pattern/kernel.hpp index 1875fe27d9..d03c8f531f 100644 --- a/include/RAJA/pattern/kernel.hpp +++ b/include/RAJA/pattern/kernel.hpp @@ -55,44 +55,43 @@ template struct IterableWrapperTuple; template -struct IterableWrapperTuple> { +struct IterableWrapperTuple> +{ - using type = - camp::tuple::iterator, - typename camp::decay::IndexType>...>; + using type = camp::tuple::iterator, + typename camp::decay::IndexType>...>; }; namespace internal { template -RAJA_INLINE constexpr auto make_wrapped_tuple_impl(Tuple &&t, - camp::idx_seq) - -> camp::tuple>>::iterator, - typename camp::decay< - camp::tuple_element_t>>::IndexType>...> +RAJA_INLINE constexpr auto +make_wrapped_tuple_impl(Tuple&& t, camp::idx_seq) -> camp::tuple< + RAJA::Span>>::iterator, + typename camp::decay< + camp::tuple_element_t>>::IndexType>...> { return camp::make_tuple( - RAJA::Span< - typename camp::decay< - camp::tuple_element_t>>::iterator, - typename camp::decay>>:: - IndexType>{camp::get(std::forward(t)).begin(), - camp::get(std::forward(t)).end()}...); + RAJA::Span>>::iterator, + typename camp::decay< + camp::tuple_element_t>>::IndexType> { + camp::get(std::forward(t)).begin(), + camp::get(std::forward(t)).end()}...); } } // namespace internal template -RAJA_INLINE constexpr auto make_wrapped_tuple(Tuple &&t) +RAJA_INLINE constexpr auto make_wrapped_tuple(Tuple&& t) -> decltype(internal::make_wrapped_tuple_impl( std::forward(t), - camp::make_idx_seq_t>::value>{})) + camp::make_idx_seq_t>::value> {})) { return internal::make_wrapped_tuple_impl( std::forward(t), - camp::make_idx_seq_t>::value>{}); + camp::make_idx_seq_t>::value> {}); } @@ -101,12 +100,13 @@ template -RAJA_INLINE resources::EventProxy kernel_param_resource(SegmentTuple &&segments, - ParamTuple &¶ms, - Resource resource, - Bodies &&... bodies) +RAJA_INLINE resources::EventProxy +kernel_param_resource(SegmentTuple&& segments, + ParamTuple&& params, + Resource resource, + Bodies&&... bodies) { - util::PluginContext context{util::make_context()}; + util::PluginContext context {util::make_context()}; // TODO: test that all policy members model the Executor policy concept // TODO: add a static_assert for functors which cannot be invoked with @@ -119,10 +119,8 @@ RAJA_INLINE resources::EventProxy kernel_param_resource(SegmentTuple & using param_tuple_t = camp::decay; - using loop_data_t = internal::LoopData...>; + using loop_data_t = internal::LoopData...>; util::callPreCapturePlugins(context); @@ -131,11 +129,10 @@ RAJA_INLINE resources::EventProxy kernel_param_resource(SegmentTuple & // our segments, loop bodies, and the tuple of loop indices // it is passed through all of the kernel mechanics by-referenece, // and only copied to provide thread-private instances. - loop_data_t loop_data(make_wrapped_tuple( - std::forward(segments)), - std::forward(params), - resource, - std::forward(bodies)...); + loop_data_t loop_data( + make_wrapped_tuple(std::forward(segments)), + std::forward(params), resource, + std::forward(bodies)...); util::callPostCapturePlugins(context); @@ -156,40 +153,35 @@ template -RAJA_INLINE resources::EventProxy kernel_resource(SegmentTuple &&segments, - Resource resource, - Bodies &&... bodies) +RAJA_INLINE resources::EventProxy +kernel_resource(SegmentTuple&& segments, Resource resource, Bodies&&... bodies) { - return RAJA::kernel_param_resource(std::forward(segments), - RAJA::make_tuple(), - resource, - std::forward(bodies)...); + return RAJA::kernel_param_resource( + std::forward(segments), RAJA::make_tuple(), resource, + std::forward(bodies)...); } template -RAJA_INLINE resources::EventProxy> kernel_param(SegmentTuple &&segments, - ParamTuple &¶ms, - Bodies &&... bodies) +RAJA_INLINE resources::EventProxy> +kernel_param(SegmentTuple&& segments, ParamTuple&& params, Bodies&&... bodies) { auto res = resources::get_default_resource(); - return RAJA::kernel_param_resource(std::forward(segments), - std::forward(params), - res, - std::forward(bodies)...); + return RAJA::kernel_param_resource( + std::forward(segments), std::forward(params), + res, std::forward(bodies)...); } template -RAJA_INLINE resources::EventProxy> kernel(SegmentTuple &&segments, - Bodies &&... bodies) +RAJA_INLINE resources::EventProxy> +kernel(SegmentTuple&& segments, Bodies&&... bodies) { auto res = resources::get_default_resource(); - return RAJA::kernel_param_resource(std::forward(segments), - RAJA::make_tuple(), - res, - std::forward(bodies)...); + return RAJA::kernel_param_resource( + std::forward(segments), RAJA::make_tuple(), res, + std::forward(bodies)...); } diff --git a/include/RAJA/pattern/kernel/Collapse.hpp b/include/RAJA/pattern/kernel/Collapse.hpp index 8efb126397..10afccda53 100644 --- a/include/RAJA/pattern/kernel/Collapse.hpp +++ b/include/RAJA/pattern/kernel/Collapse.hpp @@ -29,8 +29,8 @@ namespace statement template struct Collapse : public internal::ForList, public internal::CollapseBase, - public internal::Statement { -}; + public internal::Statement +{}; } // namespace statement diff --git a/include/RAJA/pattern/kernel/Conditional.hpp b/include/RAJA/pattern/kernel/Conditional.hpp index 6b7875c4c2..1b8f38f76b 100644 --- a/include/RAJA/pattern/kernel/Conditional.hpp +++ b/include/RAJA/pattern/kernel/Conditional.hpp @@ -37,8 +37,8 @@ namespace statement * */ template -struct If : public internal::Statement { -}; +struct If : public internal::Statement +{}; /*! @@ -46,10 +46,11 @@ struct If : public internal::Statement { * */ template -struct Value { +struct Value +{ template - RAJA_HOST_DEVICE RAJA_INLINE static long eval(Data const &) + RAJA_HOST_DEVICE RAJA_INLINE static long eval(Data const&) { return value; } @@ -60,10 +61,11 @@ struct Value { * */ template -struct Equals { +struct Equals +{ template - RAJA_HOST_DEVICE RAJA_INLINE static bool eval(Data const &data) + RAJA_HOST_DEVICE RAJA_INLINE static bool eval(Data const& data) { return L::eval(data) == R::eval(data); } @@ -74,10 +76,11 @@ struct Equals { * */ template -struct NotEquals { +struct NotEquals +{ template - RAJA_HOST_DEVICE RAJA_INLINE static bool eval(Data const &data) + RAJA_HOST_DEVICE RAJA_INLINE static bool eval(Data const& data) { return L::eval(data) != R::eval(data); } @@ -89,10 +92,11 @@ struct NotEquals { * */ template -struct Or { +struct Or +{ template - RAJA_HOST_DEVICE RAJA_INLINE static bool eval(Data const &data) + RAJA_HOST_DEVICE RAJA_INLINE static bool eval(Data const& data) { return L::eval(data) || R::eval(data); } @@ -104,10 +108,11 @@ struct Or { * */ template -struct And { +struct And +{ template - RAJA_HOST_DEVICE RAJA_INLINE static bool eval(Data const &data) + RAJA_HOST_DEVICE RAJA_INLINE static bool eval(Data const& data) { return L::eval(data) && R::eval(data); } @@ -119,10 +124,11 @@ struct And { * */ template -struct LessThan { +struct LessThan +{ template - RAJA_HOST_DEVICE RAJA_INLINE static bool eval(Data const &data) + RAJA_HOST_DEVICE RAJA_INLINE static bool eval(Data const& data) { return L::eval(data) < R::eval(data); } @@ -134,10 +140,11 @@ struct LessThan { * */ template -struct LessThanEq { +struct LessThanEq +{ template - RAJA_HOST_DEVICE RAJA_INLINE static bool eval(Data const &data) + RAJA_HOST_DEVICE RAJA_INLINE static bool eval(Data const& data) { return L::eval(data) <= R::eval(data); } @@ -149,10 +156,11 @@ struct LessThanEq { * */ template -struct GreaterThan { +struct GreaterThan +{ template - RAJA_HOST_DEVICE RAJA_INLINE static bool eval(Data const &data) + RAJA_HOST_DEVICE RAJA_INLINE static bool eval(Data const& data) { return L::eval(data) > R::eval(data); } @@ -164,10 +172,11 @@ struct GreaterThan { * */ template -struct GreaterThanEq { +struct GreaterThanEq +{ template - RAJA_HOST_DEVICE RAJA_INLINE static bool eval(Data const &data) + RAJA_HOST_DEVICE RAJA_INLINE static bool eval(Data const& data) { return L::eval(data) >= R::eval(data); } @@ -179,10 +188,11 @@ struct GreaterThanEq { * */ template -struct Not { +struct Not +{ template - RAJA_HOST_DEVICE RAJA_INLINE static bool eval(Data const &data) + RAJA_HOST_DEVICE RAJA_INLINE static bool eval(Data const& data) { return !(L::eval(data)); } @@ -196,14 +206,16 @@ namespace internal template -struct StatementExecutor, Types> { +struct StatementExecutor, Types> +{ template - static RAJA_INLINE void exec(Data &&data) + static RAJA_INLINE void exec(Data&& data) { - if (Condition::eval(data)) { + if (Condition::eval(data)) + { execute_statement_list, Types>( std::forward(data)); } diff --git a/include/RAJA/pattern/kernel/For.hpp b/include/RAJA/pattern/kernel/For.hpp index 539c451673..661fe92868 100644 --- a/include/RAJA/pattern/kernel/For.hpp +++ b/include/RAJA/pattern/kernel/For.hpp @@ -42,7 +42,8 @@ template struct For : public internal::ForList, public internal::ForTraitBase, - public internal::Statement { + public internal::Statement +{ // TODO: add static_assert for valid policy in Pol using execution_policy_t = ExecPolicy; @@ -59,8 +60,12 @@ namespace internal * Assigns the loop index to offset ArgumentId * */ -template -struct ForWrapper : public GenericWrapper { +template +struct ForWrapper : public GenericWrapper +{ using Base = GenericWrapper; using Base::Base; @@ -85,11 +90,13 @@ template struct StatementExecutor< - statement::For, Types> { + statement::For, + Types> +{ template - static RAJA_INLINE void exec(Data &&data) + static RAJA_INLINE void exec(Data&& data) { // Set the argument type for this loop @@ -98,12 +105,13 @@ struct StatementExecutor< // Create a wrapper, just in case forall_impl needs to thread_privatize ForWrapper for_wrapper(data); - auto len = segment_length(data); + auto len = segment_length(data); using len_t = decltype(len); auto r = data.res; - forall_impl(r, ExecPolicy{}, TypedRangeSegment(0, len), for_wrapper, RAJA::expt::get_empty_forall_param_pack()); + forall_impl(r, ExecPolicy {}, TypedRangeSegment(0, len), for_wrapper, + RAJA::expt::get_empty_forall_param_pack()); } }; @@ -112,15 +120,14 @@ struct StatementExecutor< * * */ -template -struct StatementExecutor< - statement::For, Types> { +template +struct StatementExecutor, + Types> +{ template - static RAJA_INLINE void exec(Data &&data) + static RAJA_INLINE void exec(Data&& data) { // Set the argument type for this loop @@ -129,12 +136,13 @@ struct StatementExecutor< // Create a wrapper, just in case forall_impl needs to thread_privatize ForWrapper for_wrapper(data); - auto len = segment_length(data); + auto len = segment_length(data); using len_t = decltype(len); RAJA_EXTRACT_BED_IT(TypedRangeSegment(0, len)); - for (decltype(distance_it) i = 0; i < distance_it; ++i) { + for (decltype(distance_it) i = 0; i < distance_it; ++i) + { for_wrapper(*(begin_it + i)); } } diff --git a/include/RAJA/pattern/kernel/ForICount.hpp b/include/RAJA/pattern/kernel/ForICount.hpp index 18515c7f59..c6e75c35aa 100644 --- a/include/RAJA/pattern/kernel/ForICount.hpp +++ b/include/RAJA/pattern/kernel/ForICount.hpp @@ -44,8 +44,9 @@ template struct ForICount : public internal::ForList, - public internal::ForTraitBase, - public internal::Statement { + public internal::ForTraitBase, + public internal::Statement +{ static_assert(std::is_base_of::value, "Inappropriate ParamId, ParamId must be of type " @@ -64,9 +65,13 @@ namespace internal * Assigns the loop index to offset ArgumentId * Assigns the loop index to param ParamId */ -template -struct ForICountWrapper : public GenericWrapper { +struct ForICountWrapper : public GenericWrapper +{ using Base = GenericWrapper; using Base::Base; @@ -93,26 +98,29 @@ template struct StatementExecutor< - statement::ForICount, Types> { + statement::ForICount, + Types> +{ template - static RAJA_INLINE void exec(Data &&data) + static RAJA_INLINE void exec(Data&& data) { // Set the argument type for this loop using NewTypes = setSegmentTypeFromData; // Create a wrapper, just in case forall_impl needs to thread_privatize - ForICountWrapper for_wrapper(data); + ForICountWrapper + for_wrapper(data); - auto len = segment_length(data); + auto len = segment_length(data); using len_t = decltype(len); auto r = resources::get_resource::type::get_default(); - forall_impl(r, ExecPolicy{}, TypedRangeSegment(0, len), for_wrapper, RAJA::expt::get_empty_forall_param_pack()); + forall_impl(r, ExecPolicy {}, TypedRangeSegment(0, len), for_wrapper, + RAJA::expt::get_empty_forall_param_pack()); } }; diff --git a/include/RAJA/pattern/kernel/Hyperplane.hpp b/include/RAJA/pattern/kernel/Hyperplane.hpp index 955afcecc0..66be036556 100644 --- a/include/RAJA/pattern/kernel/Hyperplane.hpp +++ b/include/RAJA/pattern/kernel/Hyperplane.hpp @@ -81,10 +81,8 @@ template -struct Hyperplane - : public internal::Statement { -}; +struct Hyperplane : public internal::Statement +{}; } // end namespace statement @@ -93,9 +91,8 @@ namespace internal template -struct HyperplaneInner - : public internal::Statement { -}; +struct HyperplaneInner : public internal::Statement +{}; template , ExecPolicy, - EnclosedStmts...>, Types> { + EnclosedStmts...>, + Types> +{ template - static RAJA_INLINE void exec(Data &data) + static RAJA_INLINE void exec(Data& data) { // get type of Hp arguments index @@ -126,8 +125,7 @@ struct StatementExecutor, + ExecPolicy, ArgList, HyperplaneInner, EnclosedStmts...>>; // Create a For-loop wrapper for the outer loop @@ -135,9 +133,9 @@ struct StatementExecutor(data) + - foldl(RAJA::operators::plus(), - segment_length(data)...); + idx_t hp_len = + segment_length(data) + + foldl(RAJA::operators::plus(), segment_length(data)...); /* Execute the outer loop over hyperplanes * @@ -146,10 +144,8 @@ struct StatementExecutor::type::get_default(); - forall_impl(r, HpExecPolicy{}, - TypedRangeSegment(0, hp_len), - outer_wrapper, - RAJA::expt::get_empty_forall_param_pack()); + forall_impl(r, HpExecPolicy {}, TypedRangeSegment(0, hp_len), + outer_wrapper, RAJA::expt::get_empty_forall_param_pack()); } }; @@ -159,27 +155,30 @@ template struct StatementExecutor< - HyperplaneInner, EnclosedStmts...>, Types> { + HyperplaneInner, EnclosedStmts...>, + Types> +{ template - static RAJA_INLINE void exec(Data &data) + static RAJA_INLINE void exec(Data& data) { // get h value - auto h = camp::get(data.offset_tuple); + auto h = camp::get(data.offset_tuple); using idx_t = decltype(h); // compute actual iterate for HpArgumentId // as: i0 = h - (i1 + i2 + i3 + ...) idx_t i = h - foldl(RAJA::operators::plus(), - camp::get(data.offset_tuple)...); + camp::get(data.offset_tuple)...); // get length of Hp indexed argument auto len = segment_length(data); // check bounds - if (i >= 0 && i < len) { + if (i >= 0 && i < len) + { // store in tuple data.template assign_offset(i); diff --git a/include/RAJA/pattern/kernel/InitLocalMem.hpp b/include/RAJA/pattern/kernel/InitLocalMem.hpp index 21d9e3cd2a..25bd0a10df 100644 --- a/include/RAJA/pattern/kernel/InitLocalMem.hpp +++ b/include/RAJA/pattern/kernel/InitLocalMem.hpp @@ -26,7 +26,7 @@ namespace RAJA { -//Policies for RAJA local arrays +// Policies for RAJA local arrays struct cpu_tile_mem; @@ -43,14 +43,16 @@ namespace statement * IntiLocalMem, statements...> * Will intialize the 0th array in the param tuple */ -template -struct InitLocalMem : public internal::Statement { -}; +template +struct InitLocalMem : public internal::Statement +{}; -//Policy Specialization -template -struct InitLocalMem, EnclosedStmts...> : public internal::Statement { -}; +// Policy Specialization +template +struct InitLocalMem, + EnclosedStmts...> : public internal::Statement +{}; } // end namespace statement @@ -58,28 +60,33 @@ struct InitLocalMem, EnclosedStmts namespace internal { -//Statement executor to initalize RAJA local array -template -struct StatementExecutor, EnclosedStmts...>, Types>{ - - //Execute statement list - template - static void RAJA_INLINE exec_expanded(Data && data) +// Statement executor to initalize RAJA local array +template +struct StatementExecutor, + EnclosedStmts...>, + Types> +{ + + // Execute statement list + template + static void RAJA_INLINE exec_expanded(Data&& data) { execute_statement_list, Types>(data); } - - //Intialize local array - //Identifies type + number of elements needed - template - static void RAJA_INLINE exec_expanded(Data && data) + + // Intialize local array + // Identifies type + number of elements needed + template + static void RAJA_INLINE exec_expanded(Data&& data) { - using varType = typename camp::tuple_element_t::param_tuple_t>::value_type; + using varType = typename camp::tuple_element_t< + Pos, typename camp::decay::param_tuple_t>::value_type; // Initialize memory #ifdef RAJA_COMPILER_MSVC // MSVC doesn't like taking a pointer to stack allocated data?!?! - varType *ptr = new varType[camp::get(data.param_tuple).size()]; + varType* ptr = new varType[camp::get(data.param_tuple).size()]; camp::get(data.param_tuple).set_data(ptr); #else varType Array[camp::get(data.param_tuple).size()]; @@ -95,16 +102,14 @@ struct StatementExecutor - static RAJA_INLINE void exec(Data &&data) + + template + static RAJA_INLINE void exec(Data&& data) { - //Initalize local arrays + execute statements + cleanup + // Initalize local arrays + execute statements + cleanup exec_expanded(data); } - }; diff --git a/include/RAJA/pattern/kernel/Lambda.hpp b/include/RAJA/pattern/kernel/Lambda.hpp index 29d41b431e..d9b87bf3d1 100644 --- a/include/RAJA/pattern/kernel/Lambda.hpp +++ b/include/RAJA/pattern/kernel/Lambda.hpp @@ -46,28 +46,28 @@ struct lambda_arg_param_t struct lambda_arg_offset_t {}; -template +template struct lambda_arg_value_t { - using type = T; + using type = T; }; -template +template struct LambdaArg { - static constexpr camp::idx_t value = V; + static constexpr camp::idx_t value = V; }; -} - +} // namespace internal /*! * Used in RAJA::statement::Lambda to specify that one or more segment values * should be passed into the lambda as an argument */ -template -using Segs = camp::list...>; +template +using Segs = + camp::list...>; /*! * Used in RAJA::statement::Lambda to specify that one or more segment offsets @@ -79,16 +79,18 @@ using Segs = camp::list... * In the case of tiling (with Tile) the offset is w.r.t. the beginning of the * current tile. */ -template -using Offsets = camp::list...>; +template +using Offsets = + camp::list...>; /*! * Used in RAJA::statement::Lambda to specify that one or more parameters that * should be passed into the lambda as an argument. */ -template -using Params = camp::list...>; +template +using Params = + camp::list...>; /*! * Used in RAJA::statement::Lambda to specify that one or more constant values @@ -103,8 +105,9 @@ using Params = camp::list> * invokes: lambda0( (double)3, (double) 4 ) */ -template -using ValuesT = camp::list, values>...>; +template +using ValuesT = + camp::list, values>...>; namespace statement @@ -119,8 +122,9 @@ namespace statement * RAJA::kernel(make_tuple{s0, s1, s2}, lambda0, lambda1); * */ -template -struct Lambda : internal::Statement { +template +struct Lambda : internal::Statement +{ static const camp::idx_t loop_body_index = BodyIdx; }; @@ -130,13 +134,6 @@ namespace internal { - - - - - - - /* * Helper that extracts a segment value for a lambda argument * @@ -146,26 +143,23 @@ namespace internal * This class allows specialization on the segment type in LoopTypes so that * fancier constructions can happen (ie vector_exec, etc.) */ -template +template struct LambdaSegExtractor { - static_assert(!std::is_same::value, + static_assert( + !std::is_same::value, "Segment not assigned, but used in Lambda with Segs<> argument"); - template - RAJA_HOST_DEVICE - RAJA_INLINE - constexpr - static SegmentType extract(Data &&data) + template + RAJA_HOST_DEVICE RAJA_INLINE constexpr static SegmentType extract(Data&& data) { - return SegmentType(camp::get(data.segment_tuple).begin()[camp::get(data.offset_tuple)]); + return SegmentType(camp::get(data.segment_tuple) + .begin()[camp::get(data.offset_tuple)]); } - }; - /* * Helper that extracts a segment value for a lambda argument * @@ -175,26 +169,22 @@ struct LambdaSegExtractor * This class allows specialization on the segment type in LoopTypes so that * fancier constructions can happen (ie vector_exec, etc.) */ -template +template struct LambdaOffsetExtractor { - static_assert(!std::is_same::value, + static_assert( + !std::is_same::value, "Segment not assigned, but used in Lambda with Offsets<> argument"); - template - RAJA_HOST_DEVICE - RAJA_INLINE - constexpr - static OffsetType extract(Data &&data) + template + RAJA_HOST_DEVICE RAJA_INLINE constexpr static OffsetType extract(Data&& data) { return OffsetType(camp::get(data.offset_tuple)); } - }; - /* * Helper that provides first level of argument extraction * This acts as a switchboard between Segs, Offsets, and Params @@ -202,140 +192,140 @@ struct LambdaOffsetExtractor * It calls LambdaArgExtractor to perform the actual argument extraction. * This allows LambdaArgExtractor to be specialized */ -template +template struct LambdaArgSwitchboard; -template +template struct LambdaArgSwitchboard> { using OffsetType = camp::at_v; - static_assert(!std::is_same::value, + static_assert( + !std::is_same::value, "Offset not assigned, but used in Lambda with Offsets<> argument"); - template - RAJA_HOST_DEVICE - RAJA_INLINE - constexpr - static OffsetType extract(Data &&data) + template + RAJA_HOST_DEVICE RAJA_INLINE constexpr static OffsetType extract(Data&& data) { - return LambdaOffsetExtractor::extract(std::forward(data)); + return LambdaOffsetExtractor::extract( + std::forward(data)); } - }; -template +template struct LambdaArgSwitchboard> { using SegmentType = camp::at_v; - static_assert(!std::is_same::value, + static_assert( + !std::is_same::value, "Segment not assigned, but used in Lambda with Segs<> argument"); - template - RAJA_HOST_DEVICE - RAJA_INLINE - constexpr - static SegmentType extract(Data &&data) + template + RAJA_HOST_DEVICE RAJA_INLINE constexpr static SegmentType extract(Data&& data) { - return LambdaSegExtractor::extract(std::forward(data)); + return LambdaSegExtractor::extract( + std::forward(data)); } - }; -template +template struct LambdaArgSwitchboard> { - template - RAJA_HOST_DEVICE - RAJA_INLINE - constexpr - static auto extract(Data &&data)-> - typename std::add_lvalue_reference::param_tuple_t>>::type + template + RAJA_HOST_DEVICE RAJA_INLINE constexpr static auto + extract(Data&& data) -> typename std::add_lvalue_reference< + camp::tuple_element_t::param_tuple_t>>::type { return camp::get(data.param_tuple); } }; -template +template struct LambdaArgSwitchboard, value>> { - template - RAJA_HOST_DEVICE - RAJA_INLINE - constexpr - static T extract(Data &&) + template + RAJA_HOST_DEVICE RAJA_INLINE constexpr static T extract(Data&&) { return T(value); } }; - RAJA_SUPPRESS_HD_WARN -template -RAJA_INLINE RAJA_HOST_DEVICE void invoke_lambda_with_args(Data &&data, - camp::list const &) +template +RAJA_INLINE RAJA_HOST_DEVICE void +invoke_lambda_with_args(Data&& data, camp::list const&) { camp::get(data.bodies)( LambdaArgSwitchboard::extract(data)...); } - - /*! * A RAJA::kernel statement that invokes a lambda function * with user specified arguments. */ -template -struct StatementExecutor, Types> { +template +struct StatementExecutor, Types> +{ template - static RAJA_INLINE RAJA_HOST_DEVICE void exec(Data &&data) + static RAJA_INLINE RAJA_HOST_DEVICE void exec(Data&& data) { - //Convert SegList, ParamList into Seg, Param types, and store in a list + // Convert SegList, ParamList into Seg, Param types, and store in a list using targList = typename camp::flatten>::type; - invoke_lambda_with_args(std::forward(data), targList{}); + invoke_lambda_with_args(std::forward(data), + targList {}); } }; - -template -RAJA_INLINE RAJA_HOST_DEVICE void invoke_lambda(Data &&data, camp::idx_seq const &, camp::idx_seq const &) +template +RAJA_INLINE RAJA_HOST_DEVICE void invoke_lambda(Data&& data, + camp::idx_seq const&, + camp::idx_seq const&) { - using AllSegs = Segs; + using AllSegs = Segs; using AllParams = Params; // invoke the expanded Lambda executor, passing in all segments and params - StatementExecutor, Types>::exec(std::forward(data)); + StatementExecutor, + Types>::exec(std::forward(data)); } template -struct StatementExecutor, Types> { +struct StatementExecutor, Types> +{ template - static RAJA_INLINE RAJA_HOST_DEVICE void exec(Data &&data) + static RAJA_INLINE RAJA_HOST_DEVICE void exec(Data&& data) { - using Data_t = camp::decay; + using Data_t = camp::decay; using offset_tuple_t = typename Data_t::offset_tuple_t; - using param_tuple_t = typename Data_t::param_tuple_t; + using param_tuple_t = typename Data_t::param_tuple_t; invoke_lambda( std::forward(data), - camp::make_idx_seq_t::value>{}, - camp::make_idx_seq_t::value>{}); - + camp::make_idx_seq_t::value> {}, + camp::make_idx_seq_t::value> {}); } }; diff --git a/include/RAJA/pattern/kernel/Param.hpp b/include/RAJA/pattern/kernel/Param.hpp index 8e870ebe15..999e1a9ebe 100644 --- a/include/RAJA/pattern/kernel/Param.hpp +++ b/include/RAJA/pattern/kernel/Param.hpp @@ -31,10 +31,10 @@ namespace RAJA namespace internal { -struct ParamBase { -}; +struct ParamBase +{}; -}// end namespace internal +} // end namespace internal namespace statement { @@ -47,12 +47,13 @@ namespace statement * RAJA::kernel execution policies. */ template -struct Param : public internal::ParamBase { +struct Param : public internal::ParamBase +{ constexpr static camp::idx_t param_idx = ParamId; template - RAJA_HOST_DEVICE RAJA_INLINE static auto eval(Data const &data) + RAJA_HOST_DEVICE RAJA_INLINE static auto eval(Data const& data) -> decltype(camp::get(data.param_tuple)) { return camp::get(data.param_tuple); diff --git a/include/RAJA/pattern/kernel/Reduce.hpp b/include/RAJA/pattern/kernel/Reduce.hpp index 4de4922ea3..db45d2dfe4 100644 --- a/include/RAJA/pattern/kernel/Reduce.hpp +++ b/include/RAJA/pattern/kernel/Reduce.hpp @@ -39,10 +39,12 @@ namespace statement * */ template class ReduceOperator, + template + class ReduceOperator, typename ParamId, typename... EnclosedStmts> -struct Reduce : public internal::Statement { +struct Reduce : public internal::Statement +{ static_assert(std::is_base_of::value, "Inappropriate ParamId, ParamId must be of type " diff --git a/include/RAJA/pattern/kernel/Region.hpp b/include/RAJA/pattern/kernel/Region.hpp index 82b79ae775..700df61199 100644 --- a/include/RAJA/pattern/kernel/Region.hpp +++ b/include/RAJA/pattern/kernel/Region.hpp @@ -30,9 +30,9 @@ namespace RAJA namespace statement { -template -struct Region : public internal::Statement { -}; +template +struct Region : public internal::Statement +{}; } // end namespace statement @@ -40,23 +40,27 @@ struct Region : public internal::Statement { namespace internal { -//Statement executor to create a region within kernel - -//Note: RAJA region's lambda must capture by reference otherwise -//internal function calls are undefined. -template -struct StatementExecutor, Types> { +// Statement executor to create a region within kernel -template -static RAJA_INLINE void exec(Data &&data) +// Note: RAJA region's lambda must capture by reference otherwise +// internal function calls are undefined. +template +struct StatementExecutor, + Types> { - RAJA::region([&]() { - using data_t = camp::decay; - execute_statement_list, Types>(data_t(data)); - }); -} - + template + static RAJA_INLINE void exec(Data&& data) + { + + RAJA::region( + [&]() + { + using data_t = camp::decay; + execute_statement_list, Types>( + data_t(data)); + }); + } }; diff --git a/include/RAJA/pattern/kernel/Tile.hpp b/include/RAJA/pattern/kernel/Tile.hpp index 43f72e0545..3b3b3e689d 100644 --- a/include/RAJA/pattern/kernel/Tile.hpp +++ b/include/RAJA/pattern/kernel/Tile.hpp @@ -34,14 +34,13 @@ namespace RAJA { -struct TileSize { +struct TileSize +{ const camp::idx_t size; RAJA_HOST_DEVICE RAJA_INLINE - constexpr TileSize(camp::idx_t size_) : size{size_} - { - } + constexpr TileSize(camp::idx_t size_) : size {size_} {} }; namespace statement @@ -56,7 +55,8 @@ template -struct Tile : public internal::Statement { +struct Tile : public internal::Statement +{ using tile_policy_t = TilePolicy; using exec_policy_t = ExecPolicy; }; @@ -65,17 +65,18 @@ struct Tile : public internal::Statement { ///! tag for a tiling loop template -struct tile_fixed { +struct tile_fixed +{ static constexpr camp::idx_t chunk_size = chunk_size_; }; template -struct tile_dynamic { +struct tile_dynamic +{ static constexpr camp::idx_t id = ArgumentId; }; - namespace internal { @@ -84,8 +85,12 @@ namespace internal * Assigns the tile segment to segment ArgumentId * */ -template -struct TileWrapper : public GenericWrapper { +template +struct TileWrapper : public GenericWrapper +{ using Base = GenericWrapper; using Base::Base; @@ -104,7 +109,8 @@ struct TileWrapper : public GenericWrapper { template -struct IterableTiler { +struct IterableTiler +{ using value_type = camp::decay; struct iterate @@ -120,46 +126,45 @@ struct IterableTiler { const Index_type block_id; public: - using value_type = iterate; - using difference_type = camp::idx_t; - using pointer = value_type *; - using reference = value_type &; + using value_type = iterate; + using difference_type = camp::idx_t; + using pointer = value_type*; + using reference = value_type&; using iterator_category = std::random_access_iterator_tag; RAJA_HOST_DEVICE RAJA_INLINE - constexpr iterator(IterableTiler const &itiler_, Index_type block_id_) - : itiler{itiler_}, block_id{block_id_} - { - } + constexpr iterator(IterableTiler const& itiler_, Index_type block_id_) + : itiler {itiler_}, block_id {block_id_} + {} RAJA_HOST_DEVICE RAJA_INLINE value_type operator*() { auto start = block_id * itiler.block_size; - return iterate{itiler.it.slice(start, itiler.block_size), block_id}; + return iterate {itiler.it.slice(start, itiler.block_size), block_id}; } RAJA_HOST_DEVICE - RAJA_INLINE difference_type operator-(const iterator &rhs) const + RAJA_INLINE difference_type operator-(const iterator& rhs) const { return static_cast(block_id) - static_cast(rhs.block_id); } RAJA_HOST_DEVICE - RAJA_INLINE iterator operator-(const difference_type &rhs) const + RAJA_INLINE iterator operator-(const difference_type& rhs) const { return iterator(itiler, block_id - rhs); } RAJA_HOST_DEVICE - RAJA_INLINE iterator operator+(const difference_type &rhs) const + RAJA_INLINE iterator operator+(const difference_type& rhs) const { - return iterator(itiler, - block_id + rhs >= itiler.num_blocks ? itiler.num_blocks - : block_id + rhs); + return iterator(itiler, block_id + rhs >= itiler.num_blocks + ? itiler.num_blocks + : block_id + rhs); } RAJA_HOST_DEVICE @@ -169,13 +174,13 @@ struct IterableTiler { } RAJA_HOST_DEVICE - RAJA_INLINE bool operator!=(const iterator &rhs) const + RAJA_INLINE bool operator!=(const iterator& rhs) const { return block_id != rhs.block_id; } RAJA_HOST_DEVICE - RAJA_INLINE bool operator<(const iterator &rhs) const + RAJA_INLINE bool operator<(const iterator& rhs) const { return block_id < rhs.block_id; } @@ -183,16 +188,17 @@ struct IterableTiler { RAJA_HOST_DEVICE RAJA_INLINE - IterableTiler(const Iterable &it_, camp::idx_t block_size_) - : it{it_}, block_size{block_size_} + IterableTiler(const Iterable& it_, camp::idx_t block_size_) + : it {it_}, block_size {block_size_} { using std::begin; using std::distance; using std::end; - dist = it.end() - it.begin(); // distance(begin(it), end(it)); + dist = it.end() - it.begin(); // distance(begin(it), end(it)); num_blocks = dist / block_size; // if (dist % block_size) num_blocks += 1; - if (dist - num_blocks * block_size > 0) { + if (dist - num_blocks * block_size > 0) + { num_blocks += 1; } } @@ -222,13 +228,15 @@ template struct StatementExecutor< - statement::Tile, EPol, EnclosedStmts...>, Types> { + statement::Tile, EPol, EnclosedStmts...>, + Types> +{ template - static RAJA_INLINE void exec(Data &data) + static RAJA_INLINE void exec(Data& data) { // Get the segment we are going to tile - auto const &segment = camp::get(data.segment_tuple); + auto const& segment = camp::get(data.segment_tuple); // Get the tiling policies chunk size auto chunk_size = tile_fixed::chunk_size; @@ -238,47 +246,51 @@ struct StatementExecutor< IterableTiler tiled_iterable(segment, chunk_size); // Wrap in case forall_impl needs to thread_privatize - TileWrapper tile_wrapper(data); + TileWrapper tile_wrapper(data); // Loop over tiles, executing enclosed statement list auto r = resources::get_resource::type::get_default(); - forall_impl(r, EPol{}, tiled_iterable, tile_wrapper, RAJA::expt::get_empty_forall_param_pack()); + forall_impl(r, EPol {}, tiled_iterable, tile_wrapper, + RAJA::expt::get_empty_forall_param_pack()); // Set range back to original values camp::get(data.segment_tuple) = tiled_iterable.it; } }; -template +template struct StatementExecutor< - statement::Tile, EPol, EnclosedStmts...>, Types> { + statement:: + Tile, EPol, EnclosedStmts...>, + Types> +{ template - static RAJA_INLINE void exec(Data &data) + static RAJA_INLINE void exec(Data& data) { // Get the segment we are going to tile - auto const &segment = camp::get(data.segment_tuple); + auto const& segment = camp::get(data.segment_tuple); // Get the tiling policies chunk size auto chunk_size = camp::get(data.param_tuple); - static_assert(camp::concepts::metalib::is_same::value, - "Extracted parameter must be of type TileSize."); + static_assert( + camp::concepts::metalib::is_same::value, + "Extracted parameter must be of type TileSize."); // Create a tile iterator IterableTiler tiled_iterable(segment, chunk_size.size); // Wrap in case forall_impl needs to thread_privatize - TileWrapper tile_wrapper(data); + TileWrapper tile_wrapper(data); // Loop over tiles, executing enclosed statement list auto r = resources::get_resource::type::get_default(); - forall_impl(r, EPol{}, tiled_iterable, tile_wrapper, RAJA::expt::get_empty_forall_param_pack()); - + forall_impl(r, EPol {}, tiled_iterable, tile_wrapper, + RAJA::expt::get_empty_forall_param_pack()); + // Set range back to original values camp::get(data.segment_tuple) = tiled_iterable.it; } diff --git a/include/RAJA/pattern/kernel/TileTCount.hpp b/include/RAJA/pattern/kernel/TileTCount.hpp index 2653e992c7..d741e0a4b0 100644 --- a/include/RAJA/pattern/kernel/TileTCount.hpp +++ b/include/RAJA/pattern/kernel/TileTCount.hpp @@ -47,7 +47,8 @@ template -struct TileTCount : public internal::Statement { +struct TileTCount : public internal::Statement +{ static_assert(std::is_base_of::value, "Inappropriate ParamId, ParamId must be of type " "RAJA::Statement::Param< # >"); @@ -66,9 +67,13 @@ namespace internal * Assigns the tile segment to segment ArgumentId * Assigns the tile index to param ParamId */ -template -struct TileTCountWrapper : public GenericWrapper { +struct TileTCountWrapper : public GenericWrapper +{ using Base = GenericWrapper; using Base::Base; @@ -79,17 +84,16 @@ struct TileTCountWrapper : public GenericWrapper { // Assign the tile's segment to the tuple camp::get(Base::data.segment_tuple) = si.s; - + // Assign the tile's index Base::data.template assign_param(si.i); - + // Execute enclosed statements Base::exec(); } }; - /*! * A generic RAJA::kernel forall_impl executor for statement::TileTCount * @@ -102,14 +106,16 @@ template struct StatementExecutor< - statement::TileTCount, Types> { + statement::TileTCount, + Types> +{ template - static RAJA_INLINE void exec(Data &data) + static RAJA_INLINE void exec(Data& data) { // Get the segment we are going to tile - auto const &segment = camp::get(data.segment_tuple); + auto const& segment = camp::get(data.segment_tuple); // Get the tiling policies chunk size auto chunk_size = TPol::chunk_size; @@ -119,12 +125,13 @@ struct StatementExecutor< IterableTiler tiled_iterable(segment, chunk_size); // Wrap in case forall_impl needs to thread_privatize - TileTCountWrapper tile_wrapper(data); + TileTCountWrapper + tile_wrapper(data); // Loop over tiles, executing enclosed statement list auto r = resources::get_resource::type::get_default(); - forall_impl(r, EPol{}, tiled_iterable, tile_wrapper, RAJA::expt::get_empty_forall_param_pack()); + forall_impl(r, EPol {}, tiled_iterable, tile_wrapper, + RAJA::expt::get_empty_forall_param_pack()); // Set range back to original values camp::get(data.segment_tuple) = tiled_iterable.it; diff --git a/include/RAJA/pattern/kernel/internal/LoopData.hpp b/include/RAJA/pattern/kernel/internal/LoopData.hpp index 9667a55538..08f72ab91f 100644 --- a/include/RAJA/pattern/kernel/internal/LoopData.hpp +++ b/include/RAJA/pattern/kernel/internal/LoopData.hpp @@ -40,29 +40,27 @@ namespace internal { - - - // Universal base of all For wrappers for type traits - struct ForList { - }; - struct ForBase { - }; - struct CollapseBase { - }; - template - struct ForTraitBase : public ForBase { - constexpr static camp::idx_t index_val = ArgumentId; - using index = camp::num; - using index_type = camp::nil; // default to invalid type - using policy_type = Policy; - using type = ForTraitBase; // make camp::value compatible - }; - - +// Universal base of all For wrappers for type traits +struct ForList +{}; +struct ForBase +{}; +struct CollapseBase +{}; +template +struct ForTraitBase : public ForBase +{ + constexpr static camp::idx_t index_val = ArgumentId; + using index = camp::num; + using index_type = camp::nil; // default to invalid type + using policy_type = Policy; + using type = ForTraitBase; // make camp::value compatible +}; template -struct iterable_difftype_getter { +struct iterable_difftype_getter +{ using type = typename std::iterator_traits< typename Iterator::iterator>::difference_type; }; @@ -79,7 +77,8 @@ using difftype_tuple_from_segments = template -struct iterable_value_type_getter { +struct iterable_value_type_getter +{ using type = typename std::iterator_traits::value_type; }; @@ -100,13 +99,12 @@ using index_types_from_segments = value_type_list_from_segments>::type; - - template -struct LoopData { +struct LoopData +{ using Self = LoopData; @@ -138,78 +136,70 @@ struct LoopData { using vector_sizes_t = tuple_of_n::value>; vector_sizes_t vector_sizes; - RAJA_INLINE RAJA_HOST_DEVICE constexpr - LoopData(SegmentTuple const &s, ParamTuple const &p, Resource r, Bodies const &... b) + RAJA_INLINE RAJA_HOST_DEVICE constexpr LoopData(SegmentTuple const& s, + ParamTuple const& p, + Resource r, + Bodies const&... b) : segment_tuple(s), param_tuple(p), res(r), bodies(b...) - { - } - constexpr LoopData(LoopData const &) = default; - constexpr LoopData(LoopData &&) = default; + {} + constexpr LoopData(LoopData const&) = default; + constexpr LoopData(LoopData&&) = default; template - RAJA_HOST_DEVICE RAJA_INLINE void assign_offset(IndexT const &i) + RAJA_HOST_DEVICE RAJA_INLINE void assign_offset(IndexT const& i) { camp::get(offset_tuple) = i; } template - RAJA_HOST_DEVICE RAJA_INLINE void assign_param(IndexT const &i) + RAJA_HOST_DEVICE RAJA_INLINE void assign_param(IndexT const& i) { - using param_t = camp::at_v; + using param_t = + camp::at_v; camp::get(param_tuple) = param_t(i); } template - RAJA_HOST_DEVICE RAJA_INLINE - auto get_param() -> - camp::at_v + RAJA_HOST_DEVICE RAJA_INLINE auto get_param() + -> camp::at_v { return camp::get(param_tuple); } - RAJA_HOST_DEVICE RAJA_INLINE - Resource get_resource() - { - return res; - } - - + RAJA_HOST_DEVICE RAJA_INLINE Resource get_resource() { return res; } }; - - template -using segment_diff_type = - typename std::iterator_traits< - typename camp::at_v::iterator>::difference_type; - - +using segment_diff_type = typename std::iterator_traits< + typename camp::at_v::iterator>::difference_type; template -RAJA_INLINE RAJA_HOST_DEVICE auto segment_length(Data const &data) -> - segment_diff_type +RAJA_INLINE RAJA_HOST_DEVICE auto segment_length(Data const& data) + -> segment_diff_type { return camp::get(data.segment_tuple).end() - camp::get(data.segment_tuple).begin(); } - - template -struct GenericWrapper : GenericWrapperBase { +struct GenericWrapper : GenericWrapperBase +{ using data_t = camp::decay; - data_t &data; + data_t& data; RAJA_INLINE - constexpr explicit GenericWrapper(data_t &d) : data{d} {} + constexpr explicit GenericWrapper(data_t& d) : data {d} {} RAJA_INLINE - void exec() { execute_statement_list, Types>(data); } + void exec() + { + execute_statement_list, Types>(data); + } }; @@ -217,26 +207,25 @@ struct GenericWrapper : GenericWrapperBase { * Convenience object used to create a thread-private LoopData object. */ template -struct NestedPrivatizer { - using data_t = typename T::data_t; - using value_type = camp::decay; - using reference_type = value_type &; +struct NestedPrivatizer +{ + using data_t = typename T::data_t; + using value_type = camp::decay; + using reference_type = value_type&; data_t privatized_data; value_type privatized_wrapper; RAJA_INLINE - constexpr NestedPrivatizer(const T &o) - : privatized_data{o.data}, privatized_wrapper(privatized_data) - { - } + constexpr NestedPrivatizer(const T& o) + : privatized_data {o.data}, privatized_wrapper(privatized_data) + {} RAJA_INLINE reference_type get_priv() { return privatized_wrapper; } }; - } // end namespace internal } // end namespace RAJA diff --git a/include/RAJA/pattern/kernel/internal/LoopTypes.hpp b/include/RAJA/pattern/kernel/internal/LoopTypes.hpp index 7f77df4214..0f334c542b 100644 --- a/include/RAJA/pattern/kernel/internal/LoopTypes.hpp +++ b/include/RAJA/pattern/kernel/internal/LoopTypes.hpp @@ -29,63 +29,71 @@ namespace internal { -template +template struct LoopTypes; -template -struct LoopTypes, camp::list> { +template +struct LoopTypes, camp::list> +{ - using Self = LoopTypes, camp::list>; + using Self = + LoopTypes, camp::list>; static constexpr size_t s_num_segments = sizeof...(SegmentTypes); // This ensures that you don't double-loop over a segment within the same // loop nesting static_assert(s_num_segments == sizeof...(OffsetTypes), - "Number of segments and offsets must match"); + "Number of segments and offsets must match"); using segment_types_t = camp::list; - using offset_types_t = camp::list; + using offset_types_t = camp::list; }; -template -using makeInitialLoopTypes = - LoopTypes::value>, - list_of_n::value>>; +template +using makeInitialLoopTypes = LoopTypes< + list_of_n::value>, + list_of_n::value>>; -template +template struct SetSegmentTypeHelper; -template +template struct SetSegmentTypeHelper> { - using segment_list = typename Types::segment_types_t; - using offset_list = typename Types::offset_types_t; - - static_assert(std::is_same, void>::value, - "Segment was already assigned: Probably looping over same segment in loop nest"); - - using type = LoopTypes< - camp::list>::type...>, - camp::list>::type...>>; - + using segment_list = typename Types::segment_types_t; + using offset_list = typename Types::offset_types_t; + + static_assert(std::is_same, void>::value, + "Segment was already assigned: Probably looping over same " + "segment in loop nest"); + + using type = LoopTypes< + camp::list< + typename std::conditional>::type...>, + camp::list< + typename std::conditional>::type...>>; }; -template -using setSegmentType = - typename SetSegmentTypeHelper>::type; +template +using setSegmentType = typename SetSegmentTypeHelper< + Types, + Segment, + T, + camp::make_idx_seq_t>::type; -template -using setSegmentTypeFromData = - setSegmentType::index_types_t, Segment>>; +template +using setSegmentTypeFromData = setSegmentType< + Types, + Segment, + camp::at_v::index_types_t, Segment>>; } // end namespace internal diff --git a/include/RAJA/pattern/kernel/internal/Statement.hpp b/include/RAJA/pattern/kernel/internal/Statement.hpp index 48ca828a68..c0402edad9 100644 --- a/include/RAJA/pattern/kernel/internal/Statement.hpp +++ b/include/RAJA/pattern/kernel/internal/Statement.hpp @@ -28,25 +28,24 @@ namespace internal { - template -struct Statement { - static_assert(std::is_same::value || sizeof...(EnclosedStmts) > 0, - "Executable statement with no enclosed statements, this is almost certainly a bug"); +struct Statement +{ + static_assert(std::is_same::value || + sizeof...(EnclosedStmts) > 0, + "Executable statement with no enclosed statements, this is " + "almost certainly a bug"); Statement() = delete; using enclosed_statements_t = StatementList; - using execution_policy_t = ExecPolicy; + using execution_policy_t = ExecPolicy; }; - - template struct StatementExecutor; - } // end namespace internal } // end namespace RAJA diff --git a/include/RAJA/pattern/kernel/internal/StatementList.hpp b/include/RAJA/pattern/kernel/internal/StatementList.hpp index 5c0d71afb4..f0e5cd5175 100644 --- a/include/RAJA/pattern/kernel/internal/StatementList.hpp +++ b/include/RAJA/pattern/kernel/internal/StatementList.hpp @@ -35,8 +35,6 @@ template struct StatementExecutor; - - template using StatementList = camp::list; @@ -47,11 +45,13 @@ struct StatementListExecutor; template -struct StatementListExecutor { + typename StmtList, + typename Types> +struct StatementListExecutor +{ template - static RAJA_INLINE void exec(Data &&data) + static RAJA_INLINE void exec(Data&& data) { // Get the statement we're going to execute @@ -61,8 +61,8 @@ struct StatementListExecutor { StatementExecutor::exec(std::forward(data)); // call our next statement - StatementListExecutor::exec( - std::forward(data)); + StatementListExecutor::exec(std::forward(data)); } }; @@ -72,24 +72,23 @@ struct StatementListExecutor { */ template -struct StatementListExecutor { +struct StatementListExecutor +{ template - static RAJA_INLINE void exec(Data &&) - { - } + static RAJA_INLINE void exec(Data&&) + {} }; template -RAJA_INLINE void execute_statement_list(Data &&data) +RAJA_INLINE void execute_statement_list(Data&& data) { StatementListExecutor<0, camp::size::value, StmtList, Types>::exec( std::forward(data)); } - } // end namespace internal } // end namespace RAJA diff --git a/include/RAJA/pattern/kernel/internal/Template.hpp b/include/RAJA/pattern/kernel/internal/Template.hpp index c750b95986..7771ae99ee 100644 --- a/include/RAJA/pattern/kernel/internal/Template.hpp +++ b/include/RAJA/pattern/kernel/internal/Template.hpp @@ -39,8 +39,8 @@ struct SeqToType template struct ListOfNHelper; -template -struct ListOfNHelper > +template +struct ListOfNHelper> { using type = camp::list::type...>; }; @@ -49,13 +49,13 @@ struct ListOfNHelper > template struct TupleOfNHelper; -template -struct TupleOfNHelper > +template +struct TupleOfNHelper> { using type = camp::tuple::type...>; }; -} // namespace detail +} // namespace detail /* * This creates a camp::list with N types, each one being T. @@ -64,7 +64,8 @@ struct TupleOfNHelper > * */ template -using list_of_n = typename detail::ListOfNHelper>::type; +using list_of_n = + typename detail::ListOfNHelper>::type; /* @@ -74,8 +75,8 @@ using list_of_n = typename detail::ListOfNHelper>::ty * */ template -using tuple_of_n = typename detail::TupleOfNHelper>::type; - +using tuple_of_n = + typename detail::TupleOfNHelper>::type; } // end namespace internal diff --git a/include/RAJA/pattern/launch/launch_core.hpp b/include/RAJA/pattern/launch/launch_core.hpp index f1d70aeacb..453dc75a1c 100644 --- a/include/RAJA/pattern/launch/launch_core.hpp +++ b/include/RAJA/pattern/launch/launch_core.hpp @@ -28,8 +28,8 @@ #include "camp/concepts.hpp" #include "camp/tuple.hpp" -//Odd dependecy with atomics is breaking CI builds -//#include "RAJA/util/View.hpp" +// Odd dependecy with atomics is breaking CI builds +// #include "RAJA/util/View.hpp" #if defined(RAJA_GPU_DEVICE_COMPILE_PASS_ACTIVE) && !defined(RAJA_ENABLE_SYCL) #define RAJA_TEAM_SHARED __shared__ @@ -41,12 +41,17 @@ namespace RAJA { // GPU or CPU threads available -//strongly type the ExecPlace (guards agaist errors) -enum struct ExecPlace : int { HOST, DEVICE, NUM_PLACES }; - -struct null_launch_t { +// strongly type the ExecPlace (guards agaist errors) +enum struct ExecPlace : int +{ + HOST, + DEVICE, + NUM_PLACES }; +struct null_launch_t +{}; + // Support for host, and device template -struct LoopPolicy { +struct LoopPolicy +{ using host_policy_t = HOST_POLICY; #if defined(RAJA_GPU_ACTIVE) using device_policy_t = DEVICE_POLICY; @@ -68,7 +74,8 @@ template -struct LaunchPolicy { +struct LaunchPolicy +{ using host_policy_t = HOST_POLICY; #if defined(RAJA_GPU_ACTIVE) using device_policy_t = DEVICE_POLICY; @@ -76,48 +83,51 @@ struct LaunchPolicy { }; -struct Teams { +struct Teams +{ int value[3]; RAJA_INLINE RAJA_HOST_DEVICE - constexpr Teams() : value{1, 1, 1} {} + constexpr Teams() : value {1, 1, 1} {} RAJA_INLINE RAJA_HOST_DEVICE - constexpr Teams(int i) : value{i, 1, 1} {} + constexpr Teams(int i) : value {i, 1, 1} {} RAJA_INLINE RAJA_HOST_DEVICE - constexpr Teams(int i, int j) : value{i, j, 1} {} + constexpr Teams(int i, int j) : value {i, j, 1} {} RAJA_INLINE RAJA_HOST_DEVICE - constexpr Teams(int i, int j, int k) : value{i, j, k} {} + constexpr Teams(int i, int j, int k) : value {i, j, k} {} }; -struct Threads { +struct Threads +{ int value[3]; RAJA_INLINE RAJA_HOST_DEVICE - constexpr Threads() : value{1, 1, 1} {} + constexpr Threads() : value {1, 1, 1} {} RAJA_INLINE RAJA_HOST_DEVICE - constexpr Threads(int i) : value{i, 1, 1} {} + constexpr Threads(int i) : value {i, 1, 1} {} RAJA_INLINE RAJA_HOST_DEVICE - constexpr Threads(int i, int j) : value{i, j, 1} {} + constexpr Threads(int i, int j) : value {i, j, 1} {} RAJA_INLINE RAJA_HOST_DEVICE - constexpr Threads(int i, int j, int k) : value{i, j, k} {} + constexpr Threads(int i, int j, int k) : value {i, j, k} {} }; -struct Lanes { +struct Lanes +{ int value; RAJA_INLINE @@ -129,7 +139,8 @@ struct Lanes { constexpr Lanes(int i) : value(i) {} }; -struct LaunchParams { +struct LaunchParams +{ public: Teams teams; Threads threads; @@ -138,67 +149,71 @@ struct LaunchParams { RAJA_INLINE LaunchParams() = default; - LaunchParams(Teams in_teams, Threads in_threads, size_t in_shared_mem_size = 0) - : teams(in_teams), threads(in_threads), shared_mem_size(in_shared_mem_size) {}; + LaunchParams(Teams in_teams, + Threads in_threads, + size_t in_shared_mem_size = 0) + : teams(in_teams), + threads(in_threads), + shared_mem_size(in_shared_mem_size) {}; private: RAJA_HOST_DEVICE RAJA_INLINE - Teams apply(Teams const &a) { return (teams = a); } + Teams apply(Teams const& a) { return (teams = a); } RAJA_HOST_DEVICE RAJA_INLINE - Threads apply(Threads const &a) { return (threads = a); } + Threads apply(Threads const& a) { return (threads = a); } }; class LaunchContext { public: - - //Bump style allocator used to - //get memory from the pool + // Bump style allocator used to + // get memory from the pool size_t shared_mem_offset; - void *shared_mem_ptr; + void* shared_mem_ptr; #if defined(RAJA_ENABLE_SYCL) - mutable cl::sycl::nd_item<3> *itm; + mutable cl::sycl::nd_item<3>* itm; #endif RAJA_HOST_DEVICE LaunchContext() - : shared_mem_offset(0), shared_mem_ptr(nullptr) - { - } + : shared_mem_offset(0), shared_mem_ptr(nullptr) + {} - //TODO handle alignment - template + // TODO handle alignment + template RAJA_HOST_DEVICE T* getSharedMemory(size_t bytes) { - //Calculate offset in bytes with a char pointer - void* mem_ptr = static_cast(shared_mem_ptr) + shared_mem_offset; + // Calculate offset in bytes with a char pointer + void* mem_ptr = static_cast(shared_mem_ptr) + shared_mem_offset; - shared_mem_offset += bytes*sizeof(T); + shared_mem_offset += bytes * sizeof(T); - //convert to desired type + // convert to desired type return static_cast(mem_ptr); } /* //Odd dependecy with atomics is breaking CI builds - template - RAJA_HOST_DEVICE auto getSharedMemoryView(size_t bytes, arg idx, args... idxs) + template RAJA_HOST_DEVICE auto + getSharedMemoryView(size_t bytes, arg idx, args... idxs) { T * mem_ptr = &((T*) shared_mem_ptr)[shared_mem_offset]; shared_mem_offset += bytes*sizeof(T); - return RAJA::View>(mem_ptr, idx, idxs...); + return RAJA::View>(mem_ptr, idx, + idxs...); } */ RAJA_HOST_DEVICE void releaseSharedMemory() { - //On the cpu/gpu we want to restart the count + // On the cpu/gpu we want to restart the count shared_mem_offset = 0; } @@ -218,19 +233,24 @@ class LaunchContext template struct LaunchExecute; -//Policy based launch with support to new reducers... -template -void launch(LaunchParams const &launch_params, const char *kernel_name, ReduceParams&&... rest_of_launch_args) +// Policy based launch with support to new reducers... +template +void launch(LaunchParams const& launch_params, + const char* kernel_name, + ReduceParams&&... rest_of_launch_args) { - //Get reducers - auto reducers = expt::make_forall_param_pack(std::forward(rest_of_launch_args)...); + // Get reducers + auto reducers = expt::make_forall_param_pack( + std::forward(rest_of_launch_args)...); - auto&& launch_body = expt::get_lambda(std::forward(rest_of_launch_args)...); + auto&& launch_body = + expt::get_lambda(std::forward(rest_of_launch_args)...); - //Take the first policy as we assume the second policy is not user defined. - //We rely on the user to pair launch and loop policies correctly. - util::PluginContext context{util::make_context()}; + // Take the first policy as we assume the second policy is not user defined. + // We rely on the user to pair launch and loop policies correctly. + util::PluginContext context { + util::make_context()}; util::callPreCapturePlugins(context); using RAJA::util::trigger_updates_before; @@ -242,29 +262,36 @@ void launch(LaunchParams const &launch_params, const char *kernel_name, ReducePa using launch_t = LaunchExecute; - using Res = typename resources::get_resource::type; + using Res = typename resources::get_resource< + typename LAUNCH_POLICY::host_policy_t>::type; - launch_t::exec(Res::get_default(), launch_params, kernel_name, p_body, reducers); + launch_t::exec(Res::get_default(), launch_params, kernel_name, p_body, + reducers); util::callPostLaunchPlugins(context); } -//Duplicate of code above on account that we need to support the case in which a kernel_name is not given -template -void launch(LaunchParams const &launch_params, ReduceParams&&... rest_of_launch_args) +// Duplicate of code above on account that we need to support the case in which +// a kernel_name is not given +template +void launch(LaunchParams const& launch_params, + ReduceParams&&... rest_of_launch_args) { - const char *kernel_name = nullptr; + const char* kernel_name = nullptr; - //Get reducers - auto reducers = expt::make_forall_param_pack(std::forward(rest_of_launch_args)...); + // Get reducers + auto reducers = expt::make_forall_param_pack( + std::forward(rest_of_launch_args)...); - auto&& launch_body = expt::get_lambda(std::forward(rest_of_launch_args)...); + auto&& launch_body = + expt::get_lambda(std::forward(rest_of_launch_args)...); - //Take the first policy as we assume the second policy is not user defined. - //We rely on the user to pair launch and loop policies correctly. - util::PluginContext context{util::make_context()}; + // Take the first policy as we assume the second policy is not user defined. + // We rely on the user to pair launch and loop policies correctly. + util::PluginContext context { + util::make_context()}; util::callPreCapturePlugins(context); using RAJA::util::trigger_updates_before; @@ -276,148 +303,200 @@ void launch(LaunchParams const &launch_params, ReduceParams&&... rest_of_launch_ using launch_t = LaunchExecute; - using Res = typename resources::get_resource::type; + using Res = typename resources::get_resource< + typename LAUNCH_POLICY::host_policy_t>::type; - launch_t::exec(Res::get_default(), launch_params, kernel_name, p_body, reducers); + launch_t::exec(Res::get_default(), launch_params, kernel_name, p_body, + reducers); util::callPostLaunchPlugins(context); } //================================================= -//Run time based policy launch +// Run time based policy launch //================================================= template -void launch(ExecPlace place, LaunchParams const ¶ms, BODY const &body) +void launch(ExecPlace place, LaunchParams const& params, BODY const& body) { launch(place, params, nullptr, body); } template -void launch(ExecPlace place, const LaunchParams ¶ms, const char *kernel_name, BODY const &body) +void launch(ExecPlace place, + const LaunchParams& params, + const char* kernel_name, + BODY const& body) { - //Forward to single policy launch API - simplifies testing of plugins - switch (place) { - case ExecPlace::HOST: { - using Res = typename resources::get_resource::type; - launch>(Res::get_default(), params, kernel_name, body); - break; - } + // Forward to single policy launch API - simplifies testing of plugins + switch (place) + { + case ExecPlace::HOST: + { + using Res = typename resources::get_resource< + typename POLICY_LIST::host_policy_t>::type; + launch>( + Res::get_default(), params, kernel_name, body); + break; + } #if defined(RAJA_GPU_ACTIVE) - case ExecPlace::DEVICE: { - using Res = typename resources::get_resource::type; - launch>(Res::get_default(), params, kernel_name, body); - break; - } + case ExecPlace::DEVICE: + { + using Res = typename resources::get_resource< + typename POLICY_LIST::device_policy_t>::type; + launch>( + Res::get_default(), params, kernel_name, body); + break; + } #endif - default: - RAJA_ABORT_OR_THROW("Unknown launch place or device is not enabled"); + default: + RAJA_ABORT_OR_THROW("Unknown launch place or device is not enabled"); } - } -//Run-time API for new reducer interface +// Run-time API for new reducer interface template -void launch(ExecPlace place, const LaunchParams &launch_params, const char *kernel_name, ReduceParams&&... rest_of_launch_args) +void launch(ExecPlace place, + const LaunchParams& launch_params, + const char* kernel_name, + ReduceParams&&... rest_of_launch_args) { - //Forward to single policy launch API - simplifies testing of plugins - switch (place) { - case ExecPlace::HOST: { - using Res = typename resources::get_resource::type; - launch> - (Res::get_default(), launch_params, kernel_name, std::forward(rest_of_launch_args)...); - break; - } + // Forward to single policy launch API - simplifies testing of plugins + switch (place) + { + case ExecPlace::HOST: + { + using Res = typename resources::get_resource< + typename POLICY_LIST::host_policy_t>::type; + launch>( + Res::get_default(), launch_params, kernel_name, + std::forward(rest_of_launch_args)...); + break; + } #if defined(RAJA_GPU_ACTIVE) - case ExecPlace::DEVICE: { - using Res = typename resources::get_resource::type; - launch> - (Res::get_default(), launch_params, kernel_name, std::forward(rest_of_launch_args)...); - break; - } + case ExecPlace::DEVICE: + { + using Res = typename resources::get_resource< + typename POLICY_LIST::device_policy_t>::type; + launch>( + Res::get_default(), launch_params, kernel_name, + std::forward(rest_of_launch_args)...); + break; + } #endif - default: - RAJA_ABORT_OR_THROW("Unknown launch place or device is not enabled"); + default: + RAJA_ABORT_OR_THROW("Unknown launch place or device is not enabled"); } - } -//Run-time API for new reducer interface with support of the case without a new kernel name +// Run-time API for new reducer interface with support of the case without a new +// kernel name template -void launch(ExecPlace place, const LaunchParams &launch_params, ReduceParams&&... rest_of_launch_args) - //BODY const &body) +void launch(ExecPlace place, + const LaunchParams& launch_params, + ReduceParams&&... rest_of_launch_args) +// BODY const &body) { - const char *kernel_name = nullptr; + const char* kernel_name = nullptr; - //Forward to single policy launch API - simplifies testing of plugins - switch (place) { - case ExecPlace::HOST: { - using Res = typename resources::get_resource::type; - launch> - (Res::get_default(), launch_params, kernel_name, std::forward(rest_of_launch_args)...); - break; - } + // Forward to single policy launch API - simplifies testing of plugins + switch (place) + { + case ExecPlace::HOST: + { + using Res = typename resources::get_resource< + typename POLICY_LIST::host_policy_t>::type; + launch>( + Res::get_default(), launch_params, kernel_name, + std::forward(rest_of_launch_args)...); + break; + } #if defined(RAJA_GPU_ACTIVE) - case ExecPlace::DEVICE: { - using Res = typename resources::get_resource::type; - launch> - (Res::get_default(), launch_params, kernel_name, std::forward(rest_of_launch_args)...); - break; - } + case ExecPlace::DEVICE: + { + using Res = typename resources::get_resource< + typename POLICY_LIST::device_policy_t>::type; + launch>( + Res::get_default(), launch_params, kernel_name, + std::forward(rest_of_launch_args)...); + break; + } #endif - default: - RAJA_ABORT_OR_THROW("Unknown launch place or device is not enabled"); + default: + RAJA_ABORT_OR_THROW("Unknown launch place or device is not enabled"); } - } -// Helper function to retrieve a resource based on the run-time policy - if a device is active -#if defined(RAJA_ENABLE_CUDA) || defined(RAJA_ENABLE_HIP) || defined(RAJA_ENABLE_SYCL) -template -RAJA::resources::Resource Get_Runtime_Resource(T host_res, U device_res, RAJA::ExecPlace device){ - if(device == RAJA::ExecPlace::DEVICE) {return RAJA::resources::Resource(device_res);} - else { return RAJA::resources::Resource(host_res); } +// Helper function to retrieve a resource based on the run-time policy - if a +// device is active +#if defined(RAJA_ENABLE_CUDA) || defined(RAJA_ENABLE_HIP) || \ + defined(RAJA_ENABLE_SYCL) +template +RAJA::resources::Resource +Get_Runtime_Resource(T host_res, U device_res, RAJA::ExecPlace device) +{ + if (device == RAJA::ExecPlace::DEVICE) + { + return RAJA::resources::Resource(device_res); + } + else + { + return RAJA::resources::Resource(host_res); + } } #endif -template -RAJA::resources::Resource Get_Host_Resource(T host_res, RAJA::ExecPlace device){ - if(device == RAJA::ExecPlace::DEVICE) {RAJA_ABORT_OR_THROW("Device is not enabled");} +template +RAJA::resources::Resource Get_Host_Resource(T host_res, RAJA::ExecPlace device) +{ + if (device == RAJA::ExecPlace::DEVICE) + { + RAJA_ABORT_OR_THROW("Device is not enabled"); + } return RAJA::resources::Resource(host_res); } -//Launch API which takes team resource struct and supports new reducers -template +// Launch API which takes team resource struct and supports new reducers +template resources::EventProxy -launch(RAJA::resources::Resource res, LaunchParams const &launch_params, - const char *kernel_name, ReduceParams&&... rest_of_launch_args) +launch(RAJA::resources::Resource res, + LaunchParams const& launch_params, + const char* kernel_name, + ReduceParams&&... rest_of_launch_args) { - //Get reducers - auto reducers = expt::make_forall_param_pack(std::forward(rest_of_launch_args)...); + // Get reducers + auto reducers = expt::make_forall_param_pack( + std::forward(rest_of_launch_args)...); - auto&& launch_body = expt::get_lambda(std::forward(rest_of_launch_args)...); + auto&& launch_body = + expt::get_lambda(std::forward(rest_of_launch_args)...); ExecPlace place; - if(res.get_platform() == RAJA::Platform::host) { + if (res.get_platform() == RAJA::Platform::host) + { place = RAJA::ExecPlace::HOST; - } else { + } + else + { place = RAJA::ExecPlace::DEVICE; } // - //Configure plugins + // Configure plugins // #if defined(RAJA_GPU_ACTIVE) - util::PluginContext context{place == ExecPlace::HOST ? - util::make_context() : - util::make_context()}; + util::PluginContext context { + place == ExecPlace::HOST + ? util::make_context() + : util::make_context()}; #else - util::PluginContext context{util::make_context()}; + util::PluginContext context { + util::make_context()}; #endif util::callPreCapturePlugins(context); @@ -429,24 +508,30 @@ launch(RAJA::resources::Resource res, LaunchParams const &launch_params, util::callPreLaunchPlugins(context); - switch (place) { - case ExecPlace::HOST: { - using launch_t = LaunchExecute; - resources::EventProxy e_proxy = launch_t::exec(res, launch_params, kernel_name, p_body, reducers); - util::callPostLaunchPlugins(context); - return e_proxy; - } + switch (place) + { + case ExecPlace::HOST: + { + using launch_t = LaunchExecute; + resources::EventProxy e_proxy = + launch_t::exec(res, launch_params, kernel_name, p_body, reducers); + util::callPostLaunchPlugins(context); + return e_proxy; + } #if defined(RAJA_GPU_ACTIVE) - case ExecPlace::DEVICE: { - using launch_t = LaunchExecute; - resources::EventProxy e_proxy = launch_t::exec(res, launch_params, kernel_name, p_body, reducers); - util::callPostLaunchPlugins(context); - return e_proxy; - } + case ExecPlace::DEVICE: + { + using launch_t = LaunchExecute; + resources::EventProxy e_proxy = + launch_t::exec(res, launch_params, kernel_name, p_body, reducers); + util::callPostLaunchPlugins(context); + return e_proxy; + } #endif - default: { - RAJA_ABORT_OR_THROW("Unknown launch place or device is not enabled"); - } + default: + { + RAJA_ABORT_OR_THROW("Unknown launch place or device is not enabled"); + } } RAJA_ABORT_OR_THROW("Unknown launch place"); @@ -456,36 +541,45 @@ launch(RAJA::resources::Resource res, LaunchParams const &launch_params, } -//Duplicate of API above on account that we need to handle the case that a kernel name is not provided -template +// Duplicate of API above on account that we need to handle the case that a +// kernel name is not provided +template resources::EventProxy -launch(RAJA::resources::Resource res, LaunchParams const &launch_params, +launch(RAJA::resources::Resource res, + LaunchParams const& launch_params, ReduceParams&&... rest_of_launch_args) { - const char *kernel_name = nullptr; + const char* kernel_name = nullptr; - //Get reducers - auto reducers = expt::make_forall_param_pack(std::forward(rest_of_launch_args)...); + // Get reducers + auto reducers = expt::make_forall_param_pack( + std::forward(rest_of_launch_args)...); - auto&& launch_body = expt::get_lambda(std::forward(rest_of_launch_args)...); + auto&& launch_body = + expt::get_lambda(std::forward(rest_of_launch_args)...); ExecPlace place; - if(res.get_platform() == RAJA::Platform::host) { + if (res.get_platform() == RAJA::Platform::host) + { place = RAJA::ExecPlace::HOST; - } else { + } + else + { place = RAJA::ExecPlace::DEVICE; } // - //Configure plugins + // Configure plugins // #if defined(RAJA_GPU_ACTIVE) - util::PluginContext context{place == ExecPlace::HOST ? - util::make_context() : - util::make_context()}; + util::PluginContext context { + place == ExecPlace::HOST + ? util::make_context() + : util::make_context()}; #else - util::PluginContext context{util::make_context()}; + util::PluginContext context { + util::make_context()}; #endif util::callPreCapturePlugins(context); @@ -497,24 +591,30 @@ launch(RAJA::resources::Resource res, LaunchParams const &launch_params, util::callPreLaunchPlugins(context); - switch (place) { - case ExecPlace::HOST: { - using launch_t = LaunchExecute; - resources::EventProxy e_proxy = launch_t::exec(res, launch_params, kernel_name, p_body, reducers); - util::callPostLaunchPlugins(context); - return e_proxy; - } + switch (place) + { + case ExecPlace::HOST: + { + using launch_t = LaunchExecute; + resources::EventProxy e_proxy = + launch_t::exec(res, launch_params, kernel_name, p_body, reducers); + util::callPostLaunchPlugins(context); + return e_proxy; + } #if defined(RAJA_GPU_ACTIVE) - case ExecPlace::DEVICE: { - using launch_t = LaunchExecute; - resources::EventProxy e_proxy = launch_t::exec(res, launch_params, kernel_name, p_body, reducers); - util::callPostLaunchPlugins(context); - return e_proxy; - } + case ExecPlace::DEVICE: + { + using launch_t = LaunchExecute; + resources::EventProxy e_proxy = + launch_t::exec(res, launch_params, kernel_name, p_body, reducers); + util::callPostLaunchPlugins(context); + return e_proxy; + } #endif - default: { - RAJA_ABORT_OR_THROW("Unknown launch place or device is not enabled"); - } + default: + { + RAJA_ABORT_OR_THROW("Unknown launch place or device is not enabled"); + } } RAJA_ABORT_OR_THROW("Unknown launch place"); @@ -523,7 +623,7 @@ launch(RAJA::resources::Resource res, LaunchParams const &launch_params, return resources::EventProxy(res); } -template +template #if defined(RAJA_GPU_DEVICE_COMPILE_PASS_ACTIVE) using loop_policy = typename POLICY_LIST::device_policy_t; #else @@ -541,28 +641,23 @@ template -RAJA_HOST_DEVICE RAJA_INLINE void loop(CONTEXT const &ctx, - SEGMENT const &segment, - BODY const &body) +RAJA_HOST_DEVICE RAJA_INLINE void +loop(CONTEXT const& ctx, SEGMENT const& segment, BODY const& body) { - LoopExecute, SEGMENT>::exec(ctx, - segment, - body); + LoopExecute, SEGMENT>::exec(ctx, segment, body); } template -RAJA_HOST_DEVICE RAJA_INLINE void loop_icount(CONTEXT const &ctx, - SEGMENT const &segment, - BODY const &body) +RAJA_HOST_DEVICE RAJA_INLINE void +loop_icount(CONTEXT const& ctx, SEGMENT const& segment, BODY const& body) { - LoopICountExecute, SEGMENT>::exec(ctx, - segment, - body); + LoopICountExecute, SEGMENT>::exec(ctx, segment, + body); } namespace expt @@ -573,15 +668,13 @@ template -RAJA_HOST_DEVICE RAJA_INLINE void loop(CONTEXT const &ctx, - SEGMENT const &segment0, - SEGMENT const &segment1, - BODY const &body) +RAJA_HOST_DEVICE RAJA_INLINE void loop(CONTEXT const& ctx, + SEGMENT const& segment0, + SEGMENT const& segment1, + BODY const& body) { - LoopExecute, SEGMENT>::exec(ctx, - segment0, - segment1, + LoopExecute, SEGMENT>::exec(ctx, segment0, segment1, body); } @@ -590,18 +683,15 @@ template -RAJA_HOST_DEVICE RAJA_INLINE void loop(CONTEXT const &ctx, - SEGMENT const &segment0, - SEGMENT const &segment1, - SEGMENT const &segment2, - BODY const &body) +RAJA_HOST_DEVICE RAJA_INLINE void loop(CONTEXT const& ctx, + SEGMENT const& segment0, + SEGMENT const& segment1, + SEGMENT const& segment2, + BODY const& body) { - LoopExecute, SEGMENT>::exec(ctx, - segment0, - segment1, - segment2, - body); + LoopExecute, SEGMENT>::exec(ctx, segment0, segment1, + segment2, body); } RAJA_SUPPRESS_HD_WARN @@ -609,18 +699,18 @@ template -RAJA_HOST_DEVICE RAJA_INLINE void loop_icount(CONTEXT const &ctx, - SEGMENT const &segment0, - SEGMENT const &segment1, - SEGMENT const &segment2, - BODY const &body) +RAJA_HOST_DEVICE RAJA_INLINE void loop_icount(CONTEXT const& ctx, + SEGMENT const& segment0, + SEGMENT const& segment1, + SEGMENT const& segment2, + BODY const& body) { - LoopICountExecute, SEGMENT>::exec(ctx, - segment0, segment1, segment2, body); + LoopICountExecute, SEGMENT>::exec( + ctx, segment0, segment1, segment2, body); } -} //namespace expt +} // namespace expt template struct TileExecute; @@ -633,15 +723,13 @@ template -RAJA_HOST_DEVICE RAJA_INLINE void tile(CONTEXT const &ctx, +RAJA_HOST_DEVICE RAJA_INLINE void tile(CONTEXT const& ctx, TILE_T tile_size, - SEGMENT const &segment, - BODY const &body) + SEGMENT const& segment, + BODY const& body) { - TileExecute, SEGMENT>::exec(ctx, - tile_size, - segment, + TileExecute, SEGMENT>::exec(ctx, tile_size, segment, body); } @@ -650,15 +738,13 @@ template -RAJA_HOST_DEVICE RAJA_INLINE void tile_tcount(CONTEXT const &ctx, - TILE_T tile_size, - SEGMENT const &segment, - BODY const &body) +RAJA_HOST_DEVICE RAJA_INLINE void tile_tcount(CONTEXT const& ctx, + TILE_T tile_size, + SEGMENT const& segment, + BODY const& body) { - TileTCountExecute, SEGMENT>::exec(ctx, - tile_size, - segment, - body); + TileTCountExecute, SEGMENT>::exec(ctx, tile_size, + segment, body); } namespace expt @@ -669,20 +755,16 @@ template -RAJA_HOST_DEVICE RAJA_INLINE void tile(CONTEXT const &ctx, +RAJA_HOST_DEVICE RAJA_INLINE void tile(CONTEXT const& ctx, TILE_T tile_size0, TILE_T tile_size1, - SEGMENT const &segment0, - SEGMENT const &segment1, - BODY const &body) + SEGMENT const& segment0, + SEGMENT const& segment1, + BODY const& body) { - TileExecute, SEGMENT>::exec(ctx, - tile_size0, - tile_size1, - segment0, - segment1, - body); + TileExecute, SEGMENT>::exec( + ctx, tile_size0, tile_size1, segment0, segment1, body); } template -RAJA_HOST_DEVICE RAJA_INLINE void tile_tcount(CONTEXT const &ctx, - TILE_T tile_size0, - TILE_T tile_size1, - SEGMENT const &segment0, - SEGMENT const &segment1, - BODY const &body) +RAJA_HOST_DEVICE RAJA_INLINE void tile_tcount(CONTEXT const& ctx, + TILE_T tile_size0, + TILE_T tile_size1, + SEGMENT const& segment0, + SEGMENT const& segment1, + BODY const& body) { - TileTCountExecute, SEGMENT>::exec(ctx, - tile_size0, - tile_size1, - segment0, - segment1, - body); + TileTCountExecute, SEGMENT>::exec( + ctx, tile_size0, tile_size1, segment0, segment1, body); } -} //namespace expt +} // namespace expt } // namespace RAJA #endif diff --git a/include/RAJA/pattern/multi_reduce.hpp b/include/RAJA/pattern/multi_reduce.hpp index 3fbe36877c..ca3f4e58d0 100644 --- a/include/RAJA/pattern/multi_reduce.hpp +++ b/include/RAJA/pattern/multi_reduce.hpp @@ -156,7 +156,7 @@ struct MultiReduceSum; */ template struct MultiReduceBitOr; - + /*! ****************************************************************************** @@ -171,7 +171,8 @@ struct MultiReduceBitOr; Index_ptr bins = ...; Real_ptr bit_vals = ...; - MultiReduceBitAnd my_bits(num_bins, init_val); + MultiReduceBitAnd my_bits(num_bins, + init_val); forall( ..., [=] (Index_type i) { my_bits[bins[i]] &= (data[i]); @@ -188,7 +189,7 @@ struct MultiReduceBitOr; template struct MultiReduceBitAnd; -} //namespace RAJA +} // namespace RAJA #endif // closing endif for header file include guard diff --git a/include/RAJA/pattern/params/forall.hpp b/include/RAJA/pattern/params/forall.hpp index 5a656206f5..0cb36f597c 100644 --- a/include/RAJA/pattern/params/forall.hpp +++ b/include/RAJA/pattern/params/forall.hpp @@ -21,348 +21,436 @@ namespace RAJA namespace expt { - // - // - // Forall Parameter Packing type - // - // - struct ParamMultiplexer; - - template - struct ForallParamPack { - - friend struct ParamMultiplexer; - - using Base = camp::tuple; - Base param_tup; - - static constexpr size_t param_tup_sz = camp::tuple_size::value; - using params_seq = camp::make_idx_seq_t< param_tup_sz >; - - private: - - // Init - template - static constexpr void detail_init(EXEC_POL, camp::idx_seq, ForallParamPack& f_params, Args&& ...args) { - CAMP_EXPAND(expt::detail::init( camp::get(f_params.param_tup), std::forward(args)... )); - } - - // Combine - template - RAJA_HOST_DEVICE - static constexpr void detail_combine(EXEC_POL, camp::idx_seq, ForallParamPack& out, const ForallParamPack& in ) { - CAMP_EXPAND(detail::combine( camp::get(out.param_tup), camp::get(in.param_tup))); - } - - template - RAJA_HOST_DEVICE - static constexpr void detail_combine(EXEC_POL, camp::idx_seq, ForallParamPack& f_params ) { - CAMP_EXPAND(detail::combine( camp::get(f_params.param_tup) )); - } - - // Resolve - template - static constexpr void detail_resolve(EXEC_POL, camp::idx_seq, ForallParamPack& f_params, Args&& ...args) { - CAMP_EXPAND(detail::resolve( camp::get(f_params.param_tup), std::forward(args)... )); - } - - // Used to construct the argument TYPES that will be invoked with the lambda. - template - static constexpr auto LAMBDA_ARG_TUP_T() { return camp::tuple<>{}; }; - template - static constexpr auto LAMBDA_ARG_TUP_T() { return typename First::ARG_TUP_T(); }; - template - static constexpr auto LAMBDA_ARG_TUP_T() { return camp::tuple_cat_pair(typename First::ARG_TUP_T(), LAMBDA_ARG_TUP_T()); }; - - using lambda_arg_tuple_t = decltype(LAMBDA_ARG_TUP_T()); - - //Use the size of param_tup to generate the argument list. - RAJA_HOST_DEVICE constexpr auto LAMBDA_ARG_TUP_V(camp::num<0>) { return camp::make_tuple(); } - RAJA_HOST_DEVICE constexpr auto LAMBDA_ARG_TUP_V(camp::num<1>) { return camp::get(param_tup).get_lambda_arg_tup(); } - template - RAJA_HOST_DEVICE constexpr auto LAMBDA_ARG_TUP_V(camp::num) { - return camp::tuple_cat_pair( camp::get(param_tup).get_lambda_arg_tup(), LAMBDA_ARG_TUP_V(camp::num()) ); - } - - public: - ForallParamPack(){} - - RAJA_HOST_DEVICE constexpr lambda_arg_tuple_t lambda_args() {return LAMBDA_ARG_TUP_V(camp::num());} - - using lambda_arg_seq = camp::make_idx_seq_t::value>; - - template - ForallParamPack(camp::tuple&& t) : param_tup(std::move(t)) {}; - }; // struct ForallParamPack - - - - //=========================================================================== - // - // - // ParamMultiplexer is how we hook into the individual calls within forall_impl. - // - // - struct ParamMultiplexer { - template> - static void constexpr init( ForallParamPack& f_params, Args&& ...args) { - FP::detail_init(EXEC_POL(),typename FP::params_seq(), f_params, std::forward(args)... ); - } - template> - static void constexpr combine(ForallParamPack& f_params, Args&& ...args){ - FP::detail_combine(EXEC_POL(), typename FP::params_seq(), f_params, std::forward(args)... ); - } - template> - static void constexpr resolve( ForallParamPack& f_params, Args&& ...args){ - FP::detail_resolve(EXEC_POL(), typename FP::params_seq(), f_params, std::forward(args)... ); - } - }; - //=========================================================================== +// +// +// Forall Parameter Packing type +// +// +struct ParamMultiplexer; + +template +struct ForallParamPack +{ + + friend struct ParamMultiplexer; + using Base = camp::tuple; + Base param_tup; + static constexpr size_t param_tup_sz = camp::tuple_size::value; + using params_seq = camp::make_idx_seq_t; + +private: + // Init + template + static constexpr void detail_init(EXEC_POL, + camp::idx_seq, + ForallParamPack& f_params, + Args&&... args) + { + CAMP_EXPAND(expt::detail::init(camp::get(f_params.param_tup), + std::forward(args)...)); + } + + // Combine + template + RAJA_HOST_DEVICE static constexpr void + detail_combine(EXEC_POL, + camp::idx_seq, + ForallParamPack& out, + const ForallParamPack& in) + { + CAMP_EXPAND(detail::combine(camp::get(out.param_tup), + camp::get(in.param_tup))); + } - //=========================================================================== - // - // - // ForallParamPack generators. - // - // - RAJA_INLINE static auto get_empty_forall_param_pack(){ - static ForallParamPack<> p; - return p; + template + RAJA_HOST_DEVICE static constexpr void + detail_combine(EXEC_POL, camp::idx_seq, ForallParamPack& f_params) + { + CAMP_EXPAND(detail::combine(camp::get(f_params.param_tup))); } - namespace detail { - // all_true trick to perform variadic expansion in static asserts. - // https://stackoverflow.com/questions/36933176/how-do-you-static-assert-the-values-in-a-parameter-pack-of-a-variadic-template - template struct bool_pack; - template - using all_true = std::is_same, bool_pack>; + // Resolve + template + static constexpr void detail_resolve(EXEC_POL, + camp::idx_seq, + ForallParamPack& f_params, + Args&&... args) + { + CAMP_EXPAND(detail::resolve(camp::get(f_params.param_tup), + std::forward(args)...)); + } - template - using check_types_derive_base = all_true::value...>; - } // namespace detail + // Used to construct the argument TYPES that will be invoked with the lambda. + template + static constexpr auto LAMBDA_ARG_TUP_T() + { + return camp::tuple<> {}; + }; + template + static constexpr auto LAMBDA_ARG_TUP_T() + { + return typename First::ARG_TUP_T(); + }; + template + static constexpr auto LAMBDA_ARG_TUP_T() + { + return camp::tuple_cat_pair(typename First::ARG_TUP_T(), + LAMBDA_ARG_TUP_T()); + }; + using lambda_arg_tuple_t = decltype(LAMBDA_ARG_TUP_T()); - template - constexpr auto make_forall_param_pack_from_tuple(camp::tuple&& tuple) { - static_assert(detail::check_types_derive_base...>::value, - "Forall optional arguments do not derive ForallParamBase. Please see Reducer, ReducerLoc and KernelName for examples.") ; - return ForallParamPack...>(std::move(tuple)); + // Use the size of param_tup to generate the argument list. + RAJA_HOST_DEVICE constexpr auto LAMBDA_ARG_TUP_V(camp::num<0>) + { + return camp::make_tuple(); + } + RAJA_HOST_DEVICE constexpr auto LAMBDA_ARG_TUP_V(camp::num<1>) + { + return camp::get(param_tup).get_lambda_arg_tup(); + } + template + RAJA_HOST_DEVICE constexpr auto LAMBDA_ARG_TUP_V(camp::num) + { + return camp::tuple_cat_pair( + camp::get(param_tup).get_lambda_arg_tup(), + LAMBDA_ARG_TUP_V(camp::num())); } - +public: + ForallParamPack() {} - namespace detail { - // Maybe we should do a lot of these with structs... - template - constexpr auto tuple_from_seq (const camp::idx_seq&, TupleType&& tuple){ - return camp::forward_as_tuple( camp::get< Seq >(std::forward(tuple))... ); - }; + RAJA_HOST_DEVICE constexpr lambda_arg_tuple_t lambda_args() + { + return LAMBDA_ARG_TUP_V(camp::num()); + } - template - constexpr auto strip_last_elem(camp::tuple&& tuple){ - return tuple_from_seq(camp::make_idx_seq_t{},std::move(tuple)); - }; - } // namespace detail + using lambda_arg_seq = + camp::make_idx_seq_t::value>; + template + ForallParamPack(camp::tuple&& t) : param_tup(std::move(t)) {}; +}; // struct ForallParamPack - // Make a tuple of the param pack except the final element... - template - constexpr auto make_forall_param_pack(Args&&... args){ - // We assume the last element of the pack is the lambda so we need to strip it from the list. - auto stripped_arg_tuple = detail::strip_last_elem( camp::forward_as_tuple(std::forward(args)...) ); - return make_forall_param_pack_from_tuple(std::move(stripped_arg_tuple)); + +//=========================================================================== +// +// +// ParamMultiplexer is how we hook into the individual calls within forall_impl. +// +// +struct ParamMultiplexer +{ + template > + static void constexpr init(ForallParamPack& f_params, + Args&&... args) + { + FP::detail_init(EXEC_POL(), typename FP::params_seq(), f_params, + std::forward(args)...); + } + template > + static void constexpr combine(ForallParamPack& f_params, + Args&&... args) + { + FP::detail_combine(EXEC_POL(), typename FP::params_seq(), f_params, + std::forward(args)...); + } + template > + static void constexpr resolve(ForallParamPack& f_params, + Args&&... args) + { + FP::detail_resolve(EXEC_POL(), typename FP::params_seq(), f_params, + std::forward(args)...); } - //=========================================================================== - - - - //=========================================================================== - // - // - // Callable should be the last argument in the param pack, just extract it... - // - // - template - constexpr auto&& get_lambda(Args&&... args){ - return camp::get( camp::forward_as_tuple(std::forward(args)...) ); - } - //=========================================================================== - - - - //=========================================================================== - // - // - // Checking expected argument list against the assumed lambda. - // - // - namespace detail { - - // - // - // Lambda traits Utilities - // - // - template - struct lambda_traits; - - template - struct lambda_traits - { // non-const specialization - using arg_type = First; - }; - template - struct lambda_traits - { // const specialization - using arg_type = First; - }; - - template - typename lambda_traits::arg_type* lambda_arg_helper(T); - - - // - // - // List manipulation Utilities - // - // - template - constexpr auto list_remove_pointer(const camp::list&){ - return camp::list::type>...>{}; - } - - template - constexpr auto list_add_lvalue_ref(const camp::list&){ - return camp::list::type...>{}; - } - - template - constexpr auto tuple_to_list(const camp::tuple&) { - return camp::list{}; - } - - // TODO : Change to std::is_invocable at c++17 - template - struct is_invocable : - std::is_constructible< - std::function, - std::reference_wrapper::type> - >{}; - - template - using void_t = void; - - template - struct has_empty_op : std::false_type{}; - - template - struct has_empty_op)>> : std::true_type{}; - - template - struct get_lambda_index_type { - typedef typename std::remove_pointer< - decltype(lambda_arg_helper( - &camp::decay::operator()) - ) - >::type type; - }; - - // If LAMBDA::operator() is not available this probably isn't a generic lambda and we can't extract and check args. - template - constexpr concepts::enable_if>> check_invocable(LAMBDA&&, const camp::list&) {} - - template - constexpr concepts::enable_if> check_invocable(LAMBDA&&, const camp::list&) { +}; +//=========================================================================== + + +//=========================================================================== +// +// +// ForallParamPack generators. +// +// +RAJA_INLINE static auto get_empty_forall_param_pack() +{ + static ForallParamPack<> p; + return p; +} + +namespace detail +{ +// all_true trick to perform variadic expansion in static asserts. +// https://stackoverflow.com/questions/36933176/how-do-you-static-assert-the-values-in-a-parameter-pack-of-a-variadic-template +template +struct bool_pack; +template +using all_true = std::is_same, bool_pack>; + +template +using check_types_derive_base = + all_true::value...>; +} // namespace detail + + +template +constexpr auto make_forall_param_pack_from_tuple(camp::tuple&& tuple) +{ + static_assert(detail::check_types_derive_base...>::value, + "Forall optional arguments do not derive ForallParamBase. " + "Please see Reducer, ReducerLoc and KernelName for examples."); + return ForallParamPack...>(std::move(tuple)); +} + + +namespace detail +{ +// Maybe we should do a lot of these with structs... +template +constexpr auto tuple_from_seq(const camp::idx_seq&, TupleType&& tuple) +{ + return camp::forward_as_tuple( + camp::get(std::forward(tuple))...); +}; + +template +constexpr auto strip_last_elem(camp::tuple&& tuple) +{ + return tuple_from_seq(camp::make_idx_seq_t {}, + std::move(tuple)); +}; +} // namespace detail + + +// Make a tuple of the param pack except the final element... +template +constexpr auto make_forall_param_pack(Args&&... args) +{ + // We assume the last element of the pack is the lambda so we need to strip it + // from the list. + auto stripped_arg_tuple = detail::strip_last_elem( + camp::forward_as_tuple(std::forward(args)...)); + return make_forall_param_pack_from_tuple(std::move(stripped_arg_tuple)); +} +//=========================================================================== + + +//=========================================================================== +// +// +// Callable should be the last argument in the param pack, just extract it... +// +// +template +constexpr auto&& get_lambda(Args&&... args) +{ + return camp::get( + camp::forward_as_tuple(std::forward(args)...)); +} +//=========================================================================== + + +//=========================================================================== +// +// +// Checking expected argument list against the assumed lambda. +// +// +namespace detail +{ + +// +// +// Lambda traits Utilities +// +// +template +struct lambda_traits; + +template +struct lambda_traits +{ // non-const specialization + using arg_type = First; +}; +template +struct lambda_traits +{ // const specialization + using arg_type = First; +}; + +template +typename lambda_traits::arg_type* lambda_arg_helper(T); + + +// +// +// List manipulation Utilities +// +// +template +constexpr auto list_remove_pointer(const camp::list&) +{ + return camp::list::type>...> {}; +} + +template +constexpr auto list_add_lvalue_ref(const camp::list&) +{ + return camp::list::type...> {}; +} + +template +constexpr auto tuple_to_list(const camp::tuple&) +{ + return camp::list {}; +} + +// TODO : Change to std::is_invocable at c++17 +template +struct is_invocable + : std::is_constructible< + std::function, + std::reference_wrapper::type>> +{}; + +template +using void_t = void; + +template +struct has_empty_op : std::false_type +{}; + +template +struct has_empty_op)>> + : std::true_type +{}; + +template +struct get_lambda_index_type +{ + typedef typename std::remove_pointer::operator()))>::type type; +}; + +// If LAMBDA::operator() is not available this probably isn't a generic lambda +// and we can't extract and check args. +template +constexpr concepts::enable_if>> +check_invocable(LAMBDA&&, const camp::list&) +{} + +template +constexpr concepts::enable_if> +check_invocable(LAMBDA&&, const camp::list&) +{ #if !defined(RAJA_ENABLE_HIP) - static_assert(is_invocable::type, EXPECTED_ARGS...>::value, "LAMBDA Not invocable w/ EXPECTED_ARGS. Ordering and types must match between RAJA::expt::Reduce() and ValOp arguments."); + static_assert( + is_invocable::type, + EXPECTED_ARGS...>::value, + "LAMBDA Not invocable w/ EXPECTED_ARGS. Ordering and types must match " + "between RAJA::expt::Reduce() and ValOp arguments."); #endif - } - - } // namespace detail +} +} // namespace detail - template - constexpr - void - check_forall_optional_args(Lambda&& l, ForallParams& fpp) { - using expected_arg_type_list = decltype( detail::list_add_lvalue_ref( - detail::list_remove_pointer( - detail::tuple_to_list( - fpp.lambda_args() - ) - ) - )); +template +constexpr void check_forall_optional_args(Lambda&& l, ForallParams& fpp) +{ - detail::check_invocable(std::forward(l), expected_arg_type_list{}); - } - //=========================================================================== - + using expected_arg_type_list = decltype(detail::list_add_lvalue_ref( + detail::list_remove_pointer(detail::tuple_to_list(fpp.lambda_args())))); + detail::check_invocable(std::forward(l), expected_arg_type_list {}); +} +//=========================================================================== - //=========================================================================== - // - // - // Type trailts for SFINAE work. - // - // - namespace type_traits - { - template struct is_ForallParamPack : std::false_type {}; - template struct is_ForallParamPack> : std::true_type {}; - template struct is_ForallParamPack_empty : std::true_type {}; - template struct is_ForallParamPack_empty> : std::false_type {}; - template <> struct is_ForallParamPack_empty> : std::true_type {}; - } - //=========================================================================== - - - - //=========================================================================== - // - // - // Invoke Forall with Params. - // - // - namespace detail { - template - RAJA_HOST_DEVICE - constexpr - auto get_lambda_args(FP& fpp) - -> decltype( *camp::get( fpp.lambda_args() ) ) { - return ( *camp::get( fpp.lambda_args() ) ); - } - - CAMP_SUPPRESS_HD_WARN - template - RAJA_HOST_DEVICE constexpr auto invoke_with_order(Params&& params, - Fn&& f, - camp::idx_seq, - Ts&&... extra) - { - return f(std::forward(extra...), ( get_lambda_args(params) )...); - } - } // namespace detail - - //CAMP_SUPPRESS_HD_WARN - template - RAJA_HOST_DEVICE constexpr auto invoke_body(Params&& params, Fn&& f, Ts&&... extra) - { - return detail::invoke_with_order( - camp::forward(params), - camp::forward(f), - typename camp::decay::lambda_arg_seq(), - camp::forward(extra)...); - } - //=========================================================================== +//=========================================================================== +// +// +// Type trailts for SFINAE work. +// +// +namespace type_traits +{ +template +struct is_ForallParamPack : std::false_type +{}; +template +struct is_ForallParamPack> : std::true_type +{}; + +template +struct is_ForallParamPack_empty : std::true_type +{}; +template +struct is_ForallParamPack_empty> + : std::false_type +{}; +template <> +struct is_ForallParamPack_empty> : std::true_type +{}; +} // namespace type_traits +//=========================================================================== + + +//=========================================================================== +// +// +// Invoke Forall with Params. +// +// +namespace detail +{ +template +RAJA_HOST_DEVICE constexpr auto get_lambda_args(FP& fpp) + -> decltype(*camp::get(fpp.lambda_args())) +{ + return (*camp::get(fpp.lambda_args())); +} + +CAMP_SUPPRESS_HD_WARN +template +RAJA_HOST_DEVICE constexpr auto invoke_with_order(Params&& params, + Fn&& f, + camp::idx_seq, + Ts&&... extra) +{ + return f(std::forward(extra...), + (get_lambda_args(params))...); +} +} // namespace detail + +// CAMP_SUPPRESS_HD_WARN +template +RAJA_HOST_DEVICE constexpr auto +invoke_body(Params&& params, Fn&& f, Ts&&... extra) +{ + return detail::invoke_with_order( + camp::forward(params), camp::forward(f), + typename camp::decay::lambda_arg_seq(), + camp::forward(extra)...); +} +//=========================================================================== -} // namespace expt -} // namespace RAJA +} // namespace expt +} // namespace RAJA -#endif // FORALL_PARAM_HPP +#endif // FORALL_PARAM_HPP diff --git a/include/RAJA/pattern/params/kernel_name.hpp b/include/RAJA/pattern/params/kernel_name.hpp index e768d8dd59..e9d2a6e3e1 100644 --- a/include/RAJA/pattern/params/kernel_name.hpp +++ b/include/RAJA/pattern/params/kernel_name.hpp @@ -10,23 +10,20 @@ namespace expt namespace detail { - struct KernelName : public ForallParamBase { - RAJA_HOST_DEVICE KernelName() {} - KernelName(const char* name_in) : name(name_in) {} - const char* name; - }; - -} // namespace detail - -inline auto KernelName(const char * n) +struct KernelName : public ForallParamBase { - return detail::KernelName(n); -} -} // namespace expt + RAJA_HOST_DEVICE KernelName() {} + KernelName(const char* name_in) : name(name_in) {} + const char* name; +}; + +} // namespace detail +inline auto KernelName(const char* n) { return detail::KernelName(n); } +} // namespace expt -} // namespace RAJA +} // namespace RAJA -#endif // KERNEL_NAME_HPP +#endif // KERNEL_NAME_HPP diff --git a/include/RAJA/pattern/params/params_base.hpp b/include/RAJA/pattern/params/params_base.hpp index 98380f6ffc..7347dc521d 100644 --- a/include/RAJA/pattern/params/params_base.hpp +++ b/include/RAJA/pattern/params/params_base.hpp @@ -7,129 +7,259 @@ namespace RAJA namespace expt { - template - struct ValLoc { - using index_type = IndexType; - using value_type = T; - - ValLoc() = default; - RAJA_HOST_DEVICE constexpr explicit ValLoc(value_type v) : val(v) {} - RAJA_HOST_DEVICE constexpr ValLoc(value_type v, index_type l) : val(v), loc(l) {} - - ValLoc(ValLoc const &) = default; - ValLoc(ValLoc &&) = default; - ValLoc& operator=(ValLoc const &) = default; - ValLoc& operator=(ValLoc &&) = default; - - RAJA_HOST_DEVICE constexpr bool operator<(const ValLoc& rhs) const { return val < rhs.val; } - RAJA_HOST_DEVICE constexpr bool operator>(const ValLoc& rhs) const { return val > rhs.val; } - - RAJA_HOST_DEVICE constexpr const value_type& getVal() const {return val;} - RAJA_HOST_DEVICE constexpr const index_type& getLoc() const {return loc;} - - RAJA_HOST_DEVICE void set(T inval, IndexType inindex) {val = inval; loc = inindex;} - RAJA_HOST_DEVICE void setVal(T inval) {val = inval;} - RAJA_HOST_DEVICE void setLoc(IndexType inindex) {loc = inindex;} - - value_type val; - index_type loc = -1; - }; - - template class Op> - struct ValOp { - using value_type = T; - using op_type = Op; - - ValOp() = default; - RAJA_HOST_DEVICE constexpr explicit ValOp(value_type v) : val(v) {} - - ValOp(ValOp const &) = default; - ValOp(ValOp &&) = default; - ValOp& operator=(ValOp const &) = default; - ValOp& operator=(ValOp &&) = default; - - template >::value> * = nullptr> - RAJA_HOST_DEVICE constexpr ValOp & min(value_type v) { if (v < val) { val = v; } return *this; } - template >::value> * = nullptr> - RAJA_HOST_DEVICE constexpr ValOp & max(value_type v) { if (v > val) { val = v; } return *this; } - - template >::value> * = nullptr> - RAJA_HOST_DEVICE constexpr ValOp & operator+=(const value_type& rhs) { val += rhs; return *this; } - - template >::value> * = nullptr> - RAJA_HOST_DEVICE constexpr ValOp & operator&=(const value_type& rhs) { val &= rhs; return *this; } - - template >::value> * = nullptr> - RAJA_HOST_DEVICE constexpr ValOp & operator|=(const value_type& rhs) { val |= rhs; return *this; } - - template >::value> * = nullptr> - RAJA_HOST_DEVICE ValOp & operator&=(value_type& rhs) { val &= rhs; return *this; } - - template >::value> * = nullptr> - RAJA_HOST_DEVICE ValOp & operator|=(value_type& rhs) { val |= rhs; return *this; } - - RAJA_HOST_DEVICE constexpr bool operator<(const ValOp& rhs) const { val < rhs.val; return *this; } - RAJA_HOST_DEVICE constexpr bool operator>(const ValOp& rhs) const { val > rhs.val; return *this; } - - value_type val = op_type::identity(); - }; - - template class Op> - struct ValOp , Op> { - using index_type = IndexType; - using value_type = ValLoc; - using op_type = Op; - using valloc_value_type = typename value_type::value_type; - using valloc_index_type = typename value_type::index_type; - - ValOp() = default; - RAJA_HOST_DEVICE constexpr explicit ValOp(value_type v) : val(v) {} - RAJA_HOST_DEVICE constexpr ValOp(valloc_value_type v, valloc_index_type l) : val(v, l) {} - - ValOp(ValOp const &) = default; - ValOp(ValOp &&) = default; - ValOp& operator=(ValOp const &) = default; - ValOp& operator=(ValOp &&) = default; - - template >::value> * = nullptr> - RAJA_HOST_DEVICE constexpr ValOp & min(value_type v) { if (v < val) { val = v; } return *this; } - - template >::value> * = nullptr> - RAJA_HOST_DEVICE constexpr ValOp & max(value_type v) { if (v > val) { val = v; } return *this; } - - template >::value> * = nullptr> - RAJA_HOST_DEVICE constexpr ValOp & minloc(valloc_value_type v, valloc_index_type l) { return min(value_type(v,l)); } - - template >::value> * = nullptr> - RAJA_HOST_DEVICE constexpr ValOp & maxloc(valloc_value_type v, valloc_index_type l) { return max(value_type(v,l)); } - - RAJA_HOST_DEVICE constexpr bool operator<(const ValOp& rhs) const { return val < rhs.val; } - RAJA_HOST_DEVICE constexpr bool operator>(const ValOp& rhs) const { return val > rhs.val; } - - value_type val = op_type::identity(); - }; - - template class Op> - using ValLocOp = ValOp, Op>; +template +struct ValLoc +{ + using index_type = IndexType; + using value_type = T; + + ValLoc() = default; + RAJA_HOST_DEVICE constexpr explicit ValLoc(value_type v) : val(v) {} + RAJA_HOST_DEVICE constexpr ValLoc(value_type v, index_type l) : val(v), loc(l) + {} + + ValLoc(ValLoc const&) = default; + ValLoc(ValLoc&&) = default; + ValLoc& operator=(ValLoc const&) = default; + ValLoc& operator=(ValLoc&&) = default; + + RAJA_HOST_DEVICE constexpr bool operator<(const ValLoc& rhs) const + { + return val < rhs.val; + } + RAJA_HOST_DEVICE constexpr bool operator>(const ValLoc& rhs) const + { + return val > rhs.val; + } + + RAJA_HOST_DEVICE constexpr const value_type& getVal() const { return val; } + RAJA_HOST_DEVICE constexpr const index_type& getLoc() const { return loc; } + + RAJA_HOST_DEVICE void set(T inval, IndexType inindex) + { + val = inval; + loc = inindex; + } + RAJA_HOST_DEVICE void setVal(T inval) { val = inval; } + RAJA_HOST_DEVICE void setLoc(IndexType inindex) { loc = inindex; } + + value_type val; + index_type loc = -1; +}; + +template class Op> +struct ValOp +{ + using value_type = T; + using op_type = Op; + + ValOp() = default; + RAJA_HOST_DEVICE constexpr explicit ValOp(value_type v) : val(v) {} + + ValOp(ValOp const&) = default; + ValOp(ValOp&&) = default; + ValOp& operator=(ValOp const&) = default; + ValOp& operator=(ValOp&&) = default; + + template < + typename U = op_type, + std::enable_if_t< + std::is_same>::value>* = nullptr> + RAJA_HOST_DEVICE constexpr ValOp& min(value_type v) + { + if (v < val) + { + val = v; + } + return *this; + } + template < + typename U = op_type, + std::enable_if_t< + std::is_same>::value>* = nullptr> + RAJA_HOST_DEVICE constexpr ValOp& max(value_type v) + { + if (v > val) + { + val = v; + } + return *this; + } + + template < + typename U = op_type, + std::enable_if_t< + std::is_same>::value>* = nullptr> + RAJA_HOST_DEVICE constexpr ValOp& operator+=(const value_type& rhs) + { + val += rhs; + return *this; + } + + template < + typename U = op_type, + std::enable_if_t< + std::is_same>::value>* = nullptr> + RAJA_HOST_DEVICE constexpr ValOp& operator&=(const value_type& rhs) + { + val &= rhs; + return *this; + } + + template < + typename U = op_type, + std::enable_if_t< + std::is_same>::value>* = nullptr> + RAJA_HOST_DEVICE constexpr ValOp& operator|=(const value_type& rhs) + { + val |= rhs; + return *this; + } + + template < + typename U = op_type, + std::enable_if_t< + std::is_same>::value>* = nullptr> + RAJA_HOST_DEVICE ValOp& operator&=(value_type& rhs) + { + val &= rhs; + return *this; + } + + template < + typename U = op_type, + std::enable_if_t< + std::is_same>::value>* = nullptr> + RAJA_HOST_DEVICE ValOp& operator|=(value_type& rhs) + { + val |= rhs; + return *this; + } + + RAJA_HOST_DEVICE constexpr bool operator<(const ValOp& rhs) const + { + val < rhs.val; + return *this; + } + RAJA_HOST_DEVICE constexpr bool operator>(const ValOp& rhs) const + { + val > rhs.val; + return *this; + } + + value_type val = op_type::identity(); +}; + +template + class Op> +struct ValOp, Op> +{ + using index_type = IndexType; + using value_type = ValLoc; + using op_type = Op; + using valloc_value_type = typename value_type::value_type; + using valloc_index_type = typename value_type::index_type; + + ValOp() = default; + RAJA_HOST_DEVICE constexpr explicit ValOp(value_type v) : val(v) {} + RAJA_HOST_DEVICE constexpr ValOp(valloc_value_type v, valloc_index_type l) + : val(v, l) + {} + + ValOp(ValOp const&) = default; + ValOp(ValOp&&) = default; + ValOp& operator=(ValOp const&) = default; + ValOp& operator=(ValOp&&) = default; + + template >:: + value>* = nullptr> + RAJA_HOST_DEVICE constexpr ValOp& min(value_type v) + { + if (v < val) + { + val = v; + } + return *this; + } + + template >:: + value>* = nullptr> + RAJA_HOST_DEVICE constexpr ValOp& max(value_type v) + { + if (v > val) + { + val = v; + } + return *this; + } + + template >:: + value>* = nullptr> + RAJA_HOST_DEVICE constexpr ValOp& minloc(valloc_value_type v, + valloc_index_type l) + { + return min(value_type(v, l)); + } + + template >:: + value>* = nullptr> + RAJA_HOST_DEVICE constexpr ValOp& maxloc(valloc_value_type v, + valloc_index_type l) + { + return max(value_type(v, l)); + } + + RAJA_HOST_DEVICE constexpr bool operator<(const ValOp& rhs) const + { + return val < rhs.val; + } + RAJA_HOST_DEVICE constexpr bool operator>(const ValOp& rhs) const + { + return val > rhs.val; + } + + value_type val = op_type::identity(); +}; + +template + class Op> +using ValLocOp = ValOp, Op>; namespace detail { - struct ForallParamBase { +struct ForallParamBase +{ - // Some of this can be made virtual in c++20, for now must be defined in each child class - // if any arguments to the forall lambda are needed (e.g. KernelName is excluded.) - using ARG_TUP_T = camp::tuple<>; - using ARG_LIST_T = typename ARG_TUP_T::TList; - RAJA_HOST_DEVICE ARG_TUP_T get_lambda_arg_tup() { return camp::make_tuple(); } - static constexpr size_t num_lambda_args = camp::tuple_size::value; - - }; + // Some of this can be made virtual in c++20, for now must be defined in each + // child class if any arguments to the forall lambda are needed (e.g. + // KernelName is excluded.) + using ARG_TUP_T = camp::tuple<>; + using ARG_LIST_T = typename ARG_TUP_T::TList; + RAJA_HOST_DEVICE ARG_TUP_T get_lambda_arg_tup() { return camp::make_tuple(); } + static constexpr size_t num_lambda_args = camp::tuple_size::value; +}; -} // namespace detail +} // namespace detail -} // namespace expt +} // namespace expt -} // namespace RAJA +} // namespace RAJA -#endif // RAJA_PARAMS_BASE +#endif // RAJA_PARAMS_BASE diff --git a/include/RAJA/pattern/params/reducer.hpp b/include/RAJA/pattern/params/reducer.hpp index 78b6d7714d..ee4ac7c7f7 100644 --- a/include/RAJA/pattern/params/reducer.hpp +++ b/include/RAJA/pattern/params/reducer.hpp @@ -19,20 +19,23 @@ namespace operators { template -struct limits> { - RAJA_INLINE RAJA_HOST_DEVICE static constexpr RAJA::expt::ValLoc min() +struct limits> +{ + RAJA_INLINE RAJA_HOST_DEVICE static constexpr RAJA::expt::ValLoc + min() { return RAJA::expt::ValLoc(RAJA::operators::limits::min()); } - RAJA_INLINE RAJA_HOST_DEVICE static constexpr RAJA::expt::ValLoc max() + RAJA_INLINE RAJA_HOST_DEVICE static constexpr RAJA::expt::ValLoc + max() { return RAJA::expt::ValLoc(RAJA::operators::limits::max()); } }; -} // namespace operators +} // namespace operators -} // namespace RAJA +} // namespace RAJA namespace RAJA { @@ -43,159 +46,196 @@ namespace detail { #if defined(RAJA_CUDA_ACTIVE) - using device_mem_pool_t = RAJA::cuda::device_mempool_type; +using device_mem_pool_t = RAJA::cuda::device_mempool_type; #elif defined(RAJA_HIP_ACTIVE) - using device_mem_pool_t = RAJA::hip::device_mempool_type; +using device_mem_pool_t = RAJA::hip::device_mempool_type; #elif defined(RAJA_SYCL_ACTIVE) - using device_mem_pool_t = RAJA::sycl::device_mempool_type; +using device_mem_pool_t = RAJA::sycl::device_mempool_type; #endif - // - // - // Basic Reducer - // - // - - // Basic data type Reducer - // T must be a basic data type - // VOp must be ValOp - template - struct Reducer : public ForallParamBase { - using op = Op; - using value_type = T; // This is a basic data type - - Reducer() = default; - - // Basic data type constructor - RAJA_HOST_DEVICE Reducer(value_type *target_in) : m_valop(VOp{}), target(target_in){} - - Reducer(Reducer const &) = default; - Reducer(Reducer &&) = default; - Reducer& operator=(Reducer const &) = default; - Reducer& operator=(Reducer &&) = default; - - // Internal ValOp object that is used within RAJA::forall/launch - VOp m_valop = VOp{}; - - // Points to the user specified result variable - value_type *target = nullptr; - - // combineTarget() performs the final op on the target data and location in resolve() - RAJA_HOST_DEVICE void combineTarget(value_type in) - { - value_type temp = op{}(*target, in); - *target = temp; - } - - RAJA_HOST_DEVICE - value_type & - getVal() { return m_valop.val; } - -#if defined(RAJA_CUDA_ACTIVE) || defined(RAJA_HIP_ACTIVE) || defined(RAJA_SYCL_ACTIVE) - // Device related attributes. - value_type * devicetarget = nullptr; - RAJA::detail::SoAPtr device_mem; - unsigned int * device_count = nullptr; +// +// +// Basic Reducer +// +// + +// Basic data type Reducer +// T must be a basic data type +// VOp must be ValOp +template +struct Reducer : public ForallParamBase +{ + using op = Op; + using value_type = T; // This is a basic data type + + Reducer() = default; + + // Basic data type constructor + RAJA_HOST_DEVICE Reducer(value_type* target_in) + : m_valop(VOp {}), target(target_in) + {} + + Reducer(Reducer const&) = default; + Reducer(Reducer&&) = default; + Reducer& operator=(Reducer const&) = default; + Reducer& operator=(Reducer&&) = default; + + // Internal ValOp object that is used within RAJA::forall/launch + VOp m_valop = VOp {}; + + // Points to the user specified result variable + value_type* target = nullptr; + + // combineTarget() performs the final op on the target data and location in + // resolve() + RAJA_HOST_DEVICE void combineTarget(value_type in) + { + value_type temp = op {}(*target, in); + *target = temp; + } + + RAJA_HOST_DEVICE + value_type& getVal() { return m_valop.val; } + +#if defined(RAJA_CUDA_ACTIVE) || defined(RAJA_HIP_ACTIVE) || \ + defined(RAJA_SYCL_ACTIVE) + // Device related attributes. + value_type* devicetarget = nullptr; + RAJA::detail::SoAPtr device_mem; + unsigned int* device_count = nullptr; #endif - // These are types and parameters extracted from this struct, and given to the forall. - using ARG_TUP_T = camp::tuple; - RAJA_HOST_DEVICE ARG_TUP_T get_lambda_arg_tup() { return camp::make_tuple(&m_valop); } - - using ARG_LIST_T = typename ARG_TUP_T::TList; - static constexpr size_t num_lambda_args = camp::tuple_size::value ; - }; - - // Partial specialization of Reducer for ValLoc - // T is a deduced basic data type - // I is a deduced index type - template class Op> - struct Reducer, ValLoc, ValLoc>, ValLoc, ValOp, Op>> : public ForallParamBase { - using target_value_type = T; - using target_index_type = I; - using value_type = ValLoc; - using op = Op; - using VOp = ValOp, Op>; - - Reducer() = default; - - // ValLoc constructor - // Note that the target_ variables point to the val and loc within the user defined target ValLoc - RAJA_HOST_DEVICE Reducer(value_type *target_in) : m_valop(VOp{}), target_value(&target_in->val), target_index(&target_in->loc) {} - - // Dual input constructor for ReduceLoc<>(data, index) case - // The target_ variables point to vars defined by the user - RAJA_HOST_DEVICE Reducer(target_value_type *data_in, target_index_type *index_in) : m_valop(VOp{}), target_value(data_in), target_index(index_in) {} - - Reducer(Reducer const &) = default; - Reducer(Reducer &&) = default; - Reducer& operator=(Reducer const &) = default; - Reducer& operator=(Reducer &&) = default; - - // The ValLoc within m_valop is initialized with data and location values from either a ValLoc, or dual data and location values, passed into the constructor - VOp m_valop = VOp{}; - - // Points to either dual value and index defined by the user, or value and index within a ValLoc defined by the user - target_value_type *target_value = nullptr; - target_index_type *target_index = nullptr; - - // combineTarget() performs the final op on the target data and location in resolve() - RAJA_HOST_DEVICE void combineTarget(value_type in) - { - // Create a different temp ValLoc solely for combining - value_type temp(*target_value, *target_index); - temp = op{}(temp, in); - *target_value = temp.val; - *target_index = temp.loc; - } - - RAJA_HOST_DEVICE - value_type & - getVal() { return m_valop.val; } - -#if defined(RAJA_CUDA_ACTIVE) || defined(RAJA_HIP_ACTIVE) || defined(RAJA_SYCL_ACTIVE) - // Device related attributes. - value_type * devicetarget = nullptr; - RAJA::detail::SoAPtr device_mem; - unsigned int * device_count = nullptr; + // These are types and parameters extracted from this struct, and given to the + // forall. + using ARG_TUP_T = camp::tuple; + RAJA_HOST_DEVICE ARG_TUP_T get_lambda_arg_tup() + { + return camp::make_tuple(&m_valop); + } + + using ARG_LIST_T = typename ARG_TUP_T::TList; + static constexpr size_t num_lambda_args = camp::tuple_size::value; +}; + +// Partial specialization of Reducer for ValLoc +// T is a deduced basic data type +// I is a deduced index type +template + class Op> +struct Reducer, ValLoc, ValLoc>, + ValLoc, + ValOp, Op>> : public ForallParamBase +{ + using target_value_type = T; + using target_index_type = I; + using value_type = ValLoc; + using op = Op; + using VOp = ValOp, Op>; + + Reducer() = default; + + // ValLoc constructor + // Note that the target_ variables point to the val and loc within the user + // defined target ValLoc + RAJA_HOST_DEVICE Reducer(value_type* target_in) + : m_valop(VOp {}), + target_value(&target_in->val), + target_index(&target_in->loc) + {} + + // Dual input constructor for ReduceLoc<>(data, index) case + // The target_ variables point to vars defined by the user + RAJA_HOST_DEVICE Reducer(target_value_type* data_in, + target_index_type* index_in) + : m_valop(VOp {}), target_value(data_in), target_index(index_in) + {} + + Reducer(Reducer const&) = default; + Reducer(Reducer&&) = default; + Reducer& operator=(Reducer const&) = default; + Reducer& operator=(Reducer&&) = default; + + // The ValLoc within m_valop is initialized with data and location values from + // either a ValLoc, or dual data and location values, passed into the + // constructor + VOp m_valop = VOp {}; + + // Points to either dual value and index defined by the user, or value and + // index within a ValLoc defined by the user + target_value_type* target_value = nullptr; + target_index_type* target_index = nullptr; + + // combineTarget() performs the final op on the target data and location in + // resolve() + RAJA_HOST_DEVICE void combineTarget(value_type in) + { + // Create a different temp ValLoc solely for combining + value_type temp(*target_value, *target_index); + temp = op {}(temp, in); + *target_value = temp.val; + *target_index = temp.loc; + } + + RAJA_HOST_DEVICE + value_type& getVal() { return m_valop.val; } + +#if defined(RAJA_CUDA_ACTIVE) || defined(RAJA_HIP_ACTIVE) || \ + defined(RAJA_SYCL_ACTIVE) + // Device related attributes. + value_type* devicetarget = nullptr; + RAJA::detail::SoAPtr device_mem; + unsigned int* device_count = nullptr; #endif - // These are types and parameters extracted from this struct, and given to the forall. - using ARG_TUP_T = camp::tuple; - RAJA_HOST_DEVICE ARG_TUP_T get_lambda_arg_tup() { return camp::make_tuple(&m_valop); } + // These are types and parameters extracted from this struct, and given to the + // forall. + using ARG_TUP_T = camp::tuple; + RAJA_HOST_DEVICE ARG_TUP_T get_lambda_arg_tup() + { + return camp::make_tuple(&m_valop); + } - using ARG_LIST_T = typename ARG_TUP_T::TList; - static constexpr size_t num_lambda_args = camp::tuple_size::value ; - }; + using ARG_LIST_T = typename ARG_TUP_T::TList; + static constexpr size_t num_lambda_args = camp::tuple_size::value; +}; -} // namespace detail +} // namespace detail // Standard use case. template