From 01da58c6e74fc4febeac706554f1883d304379c1 Mon Sep 17 00:00:00 2001 From: Arturo Vargas Date: Thu, 31 Oct 2024 16:32:48 -0700 Subject: [PATCH 01/18] add reduction support to dynamic_forall --- examples/dynamic-forall.cpp | 33 +++++++++++++-------------- include/RAJA/pattern/forall.hpp | 40 ++++++++++++++++----------------- 2 files changed, 35 insertions(+), 38 deletions(-) diff --git a/examples/dynamic-forall.cpp b/examples/dynamic-forall.cpp index 5131010bd6..3c120cb0ea 100644 --- a/examples/dynamic-forall.cpp +++ b/examples/dynamic-forall.cpp @@ -77,28 +77,25 @@ int main(int argc, char *argv[]) //----------------------------------------------------------------------------// - std::cout << "\n Running C-style vector addition...\n"; - - // _cstyle_vector_add_start - for (int i = 0; i < N; ++i) { - c[i] = a[i] + b[i]; - } - // _cstyle_vector_add_end - - checkResult(c, N); -//printResult(c, N); - - -//----------------------------------------------------------------------------// -// Example of dynamic policy selection for forall -//----------------------------------------------------------------------------// + std::cout << "\n Running dynamic forall vector addition and reductions...\n"; + int sum = 0; + using VAL_INT_SUM = RAJA::expt::ValOp; + + RAJA::RangeSegment range(0, N); + //policy is chosen from the list - RAJA::expt::dynamic_forall(pol, RAJA::RangeSegment(0, N), [=] RAJA_HOST_DEVICE (int i) { + RAJA::expt::dynamic_forall(pol, range, + RAJA::expt::Reduce(&sum), + RAJA::expt::KernelName("RAJA dynamic forall"), + [=] RAJA_HOST_DEVICE (int i, VAL_INT_SUM &_sum) { + c[i] = a[i] + b[i]; + _sum += 1; }); // _rajaseq_vector_add_end + std::cout<<"Sum = "< struct dynamic_helper { - template - static void invoke_forall(const int pol, SEGMENT const &seg, BODY const &body) + template + static void invoke_forall(const int pol, SEGMENT const &seg, PARAMS&&... params) { if(IDX==pol){ using t_pol = typename camp::at>::type; - RAJA::forall(seg, body); + RAJA::forall(seg, params...); return; } - dynamic_helper::invoke_forall(pol, seg, body); + dynamic_helper::invoke_forall(pol, seg, params...); } - template + template static resources::EventProxy - invoke_forall(RAJA::resources::Resource r, const int pol, SEGMENT const &seg, BODY const &body) + invoke_forall(RAJA::resources::Resource r, const int pol, SEGMENT const &seg, PARAMS&&... params) { using t_pol = typename camp::at>::type; using resource_type = typename resources::get_resource::type; if(IDX==pol){ - RAJA::forall(r.get(), seg, body); + RAJA::forall(r.get(), seg, params...); //Return a generic event proxy from r, //because forall returns a typed event proxy return {r}; } - return dynamic_helper::invoke_forall(r, pol, seg, body); + return dynamic_helper::invoke_forall(r, pol, seg, params...); } }; @@ -688,28 +688,28 @@ namespace expt template struct dynamic_helper<0, POLICY_LIST> { - template + template static void - invoke_forall(const int pol, SEGMENT const &seg, BODY const &body) + invoke_forall(const int pol, SEGMENT const &seg, PARAMS&&... params) { if(0==pol){ using t_pol = typename camp::at>::type; - RAJA::forall(seg, body); + RAJA::forall(seg, params...); return; } RAJA_ABORT_OR_THROW("Policy enum not supported "); } - template + template static resources::EventProxy - invoke_forall(RAJA::resources::Resource r, const int pol, SEGMENT const &seg, BODY const &body) + invoke_forall(RAJA::resources::Resource r, const int pol, SEGMENT const &seg, PARAMS&&... params) { if(pol != 0) RAJA_ABORT_OR_THROW("Policy value out of range "); using t_pol = typename camp::at>::type; using resource_type = typename resources::get_resource::type; - RAJA::forall(r.get(), seg, body); + RAJA::forall(r.get(), seg, params...); //Return a generic event proxy from r, //because forall returns a typed event proxy @@ -718,8 +718,8 @@ namespace expt }; - template - void dynamic_forall(const int pol, SEGMENT const &seg, BODY const &body) + template + void dynamic_forall(const int pol, SEGMENT const &seg, PARAMS&&... params) { constexpr int N = camp::size::value; static_assert(N > 0, "RAJA policy list must not be empty"); @@ -727,12 +727,12 @@ namespace expt if(pol > N-1) { RAJA_ABORT_OR_THROW("Policy enum not supported"); } - dynamic_helper::invoke_forall(pol, seg, body); + dynamic_helper::invoke_forall(pol, seg, params...); } - template + template resources::EventProxy - dynamic_forall(RAJA::resources::Resource r, const int pol, SEGMENT const &seg, BODY const &body) + dynamic_forall(RAJA::resources::Resource r, const int pol, SEGMENT const &seg, PARAMS&&... params) { constexpr int N = camp::size::value; static_assert(N > 0, "RAJA policy list must not be empty"); @@ -741,7 +741,7 @@ namespace expt RAJA_ABORT_OR_THROW("Policy value out of range"); } - return dynamic_helper::invoke_forall(r, pol, seg, body); + return dynamic_helper::invoke_forall(r, pol, seg, params...); } } // namespace expt From 41a3765d80b079c2ce0f200ee105da8396806775 Mon Sep 17 00:00:00 2001 From: Arturo Vargas Date: Thu, 31 Oct 2024 16:40:48 -0700 Subject: [PATCH 02/18] remove dynamic forall from expt --- examples/dynamic-forall.cpp | 2 +- include/RAJA/pattern/forall.hpp | 141 +++++++++++++++----------------- 2 files changed, 69 insertions(+), 74 deletions(-) diff --git a/examples/dynamic-forall.cpp b/examples/dynamic-forall.cpp index 3c120cb0ea..7c037571e9 100644 --- a/examples/dynamic-forall.cpp +++ b/examples/dynamic-forall.cpp @@ -85,7 +85,7 @@ int main(int argc, char *argv[]) RAJA::RangeSegment range(0, N); //policy is chosen from the list - RAJA::expt::dynamic_forall(pol, range, + RAJA::dynamic_forall(pol, range, RAJA::expt::Reduce(&sum), RAJA::expt::KernelName("RAJA dynamic forall"), [=] RAJA_HOST_DEVICE (int i, VAL_INT_SUM &_sum) { diff --git a/include/RAJA/pattern/forall.hpp b/include/RAJA/pattern/forall.hpp index 28e8897397..e75cc43af7 100644 --- a/include/RAJA/pattern/forall.hpp +++ b/include/RAJA/pattern/forall.hpp @@ -647,68 +647,29 @@ RAJA_INLINE camp::resources::EventProxy CallForallIcount::operator()(T cons // - Returns a generic event proxy only if a resource is provided // avoids overhead of constructing a typed erased resource // -namespace expt +template +struct dynamic_helper { - - template - struct dynamic_helper + template + static void invoke_forall(const int pol, SEGMENT const &seg, PARAMS&&... params) { - template - static void invoke_forall(const int pol, SEGMENT const &seg, PARAMS&&... params) - { - if(IDX==pol){ - using t_pol = typename camp::at>::type; - RAJA::forall(seg, params...); - return; - } - dynamic_helper::invoke_forall(pol, seg, params...); - } - - template - static resources::EventProxy - invoke_forall(RAJA::resources::Resource r, const int pol, SEGMENT const &seg, PARAMS&&... params) - { - + if(IDX==pol){ using t_pol = typename camp::at>::type; - using resource_type = typename resources::get_resource::type; - - if(IDX==pol){ - RAJA::forall(r.get(), seg, params...); - - //Return a generic event proxy from r, - //because forall returns a typed event proxy - return {r}; - } - - return dynamic_helper::invoke_forall(r, pol, seg, params...); + RAJA::forall(seg, params...); + return; } + dynamic_helper::invoke_forall(pol, seg, params...); + } - }; - - template - struct dynamic_helper<0, POLICY_LIST> + template + static resources::EventProxy + invoke_forall(RAJA::resources::Resource r, const int pol, SEGMENT const &seg, PARAMS&&... params) { - template - static void - invoke_forall(const int pol, SEGMENT const &seg, PARAMS&&... params) - { - if(0==pol){ - using t_pol = typename camp::at>::type; - RAJA::forall(seg, params...); - return; - } - RAJA_ABORT_OR_THROW("Policy enum not supported "); - } - template - static resources::EventProxy - invoke_forall(RAJA::resources::Resource r, const int pol, SEGMENT const &seg, PARAMS&&... params) - { - if(pol != 0) RAJA_ABORT_OR_THROW("Policy value out of range "); - - using t_pol = typename camp::at>::type; - using resource_type = typename resources::get_resource::type; + using t_pol = typename camp::at>::type; + using resource_type = typename resources::get_resource::type; + if(IDX==pol){ RAJA::forall(r.get(), seg, params...); //Return a generic event proxy from r, @@ -716,35 +677,69 @@ namespace expt return {r}; } - }; + return dynamic_helper::invoke_forall(r, pol, seg, params...); + } - template - void dynamic_forall(const int pol, SEGMENT const &seg, PARAMS&&... params) - { - constexpr int N = camp::size::value; - static_assert(N > 0, "RAJA policy list must not be empty"); +}; - if(pol > N-1) { - RAJA_ABORT_OR_THROW("Policy enum not supported"); +template +struct dynamic_helper<0, POLICY_LIST> +{ + template + static void + invoke_forall(const int pol, SEGMENT const &seg, PARAMS&&... params) + { + if(0==pol){ + using t_pol = typename camp::at>::type; + RAJA::forall(seg, params...); + return; } - dynamic_helper::invoke_forall(pol, seg, params...); + RAJA_ABORT_OR_THROW("Policy enum not supported "); } - template - resources::EventProxy - dynamic_forall(RAJA::resources::Resource r, const int pol, SEGMENT const &seg, PARAMS&&... params) + template + static resources::EventProxy + invoke_forall(RAJA::resources::Resource r, const int pol, SEGMENT const &seg, PARAMS&&... params) { - constexpr int N = camp::size::value; - static_assert(N > 0, "RAJA policy list must not be empty"); + if(pol != 0) RAJA_ABORT_OR_THROW("Policy value out of range "); - if(pol > N-1) { - RAJA_ABORT_OR_THROW("Policy value out of range"); - } + using t_pol = typename camp::at>::type; + using resource_type = typename resources::get_resource::type; + + RAJA::forall(r.get(), seg, params...); + + //Return a generic event proxy from r, + //because forall returns a typed event proxy + return {r}; + } + +}; - return dynamic_helper::invoke_forall(r, pol, seg, params...); +template +void dynamic_forall(const int pol, SEGMENT const &seg, PARAMS&&... params) +{ + constexpr int N = camp::size::value; + static_assert(N > 0, "RAJA policy list must not be empty"); + + if(pol > N-1) { + RAJA_ABORT_OR_THROW("Policy enum not supported"); + } + dynamic_helper::invoke_forall(pol, seg, params...); +} + +template +resources::EventProxy +dynamic_forall(RAJA::resources::Resource r, const int pol, SEGMENT const &seg, PARAMS&&... params) +{ + constexpr int N = camp::size::value; + static_assert(N > 0, "RAJA policy list must not be empty"); + + if(pol > N-1) { + RAJA_ABORT_OR_THROW("Policy value out of range"); } -} // namespace expt + return dynamic_helper::invoke_forall(r, pol, seg, params...); +} } // namespace RAJA From 36e06139c5f9175c36f860c533b8c024ffa1d8e6 Mon Sep 17 00:00:00 2001 From: Arturo Vargas Date: Thu, 31 Oct 2024 16:54:59 -0700 Subject: [PATCH 03/18] bring resource dynamic forall out of expt --- examples/resource-dynamic-forall.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/resource-dynamic-forall.cpp b/examples/resource-dynamic-forall.cpp index 0b35017fac..ac6fd62398 100644 --- a/examples/resource-dynamic-forall.cpp +++ b/examples/resource-dynamic-forall.cpp @@ -121,7 +121,7 @@ int main(int argc, char *argv[]) RAJA::resources::Resource res = RAJA::Get_Host_Resource(host_res, select_cpu_or_gpu); #endif - RAJA::expt::dynamic_forall + RAJA::dynamic_forall (res, pol, RAJA::RangeSegment(0, N), [=] RAJA_HOST_DEVICE (int i) { c[i] = a[i] + b[i]; From ff4bbf8b7917546f1a0b8d178bdf19d99f23773d Mon Sep 17 00:00:00 2001 From: Arturo Vargas Date: Thu, 31 Oct 2024 17:03:07 -0700 Subject: [PATCH 04/18] fix tests --- .../tests/test-dynamic-forall-resource-RangeSegment.hpp | 2 +- .../segment/tests/test-dynamic-forall-RangeSegment.hpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/test/functional/dynamic_forall/resource-segment/tests/test-dynamic-forall-resource-RangeSegment.hpp b/test/functional/dynamic_forall/resource-segment/tests/test-dynamic-forall-resource-RangeSegment.hpp index 8c8d051d8f..dde4273abe 100644 --- a/test/functional/dynamic_forall/resource-segment/tests/test-dynamic-forall-resource-RangeSegment.hpp +++ b/test/functional/dynamic_forall/resource-segment/tests/test-dynamic-forall-resource-RangeSegment.hpp @@ -34,7 +34,7 @@ void DynamicForallResourceRangeSegmentTestImpl(INDEX_TYPE first, INDEX_TYPE last std::iota(test_array, test_array + RAJA::stripIndexType(N), rbegin); - RAJA::expt::dynamic_forall(working_res, pol, r1, [=] RAJA_HOST_DEVICE(INDEX_TYPE idx) { + RAJA::dynamic_forall(working_res, pol, r1, [=] RAJA_HOST_DEVICE(INDEX_TYPE idx) { working_array[RAJA::stripIndexType(idx - rbegin)] = idx; }); diff --git a/test/functional/dynamic_forall/segment/tests/test-dynamic-forall-RangeSegment.hpp b/test/functional/dynamic_forall/segment/tests/test-dynamic-forall-RangeSegment.hpp index 11168b0e30..6f13f07cf5 100644 --- a/test/functional/dynamic_forall/segment/tests/test-dynamic-forall-RangeSegment.hpp +++ b/test/functional/dynamic_forall/segment/tests/test-dynamic-forall-RangeSegment.hpp @@ -40,7 +40,7 @@ void DynamicForallRangeSegmentTestImpl(INDEX_TYPE first, INDEX_TYPE last, const std::iota(test_array, test_array + RAJA::stripIndexType(N), rbegin); - RAJA::expt::dynamic_forall(pol, r1, [=] RAJA_HOST_DEVICE(INDEX_TYPE idx) { + RAJA::dynamic_forall(pol, r1, [=] RAJA_HOST_DEVICE(INDEX_TYPE idx) { working_array[RAJA::stripIndexType(idx - rbegin)] = idx; }); @@ -50,7 +50,7 @@ void DynamicForallRangeSegmentTestImpl(INDEX_TYPE first, INDEX_TYPE last, const working_res.memcpy(working_array, test_array, sizeof(INDEX_TYPE) * data_len); - RAJA::expt::dynamic_forall(pol, r1, [=] RAJA_HOST_DEVICE(INDEX_TYPE idx) { + RAJA::dynamic_forall(pol, r1, [=] RAJA_HOST_DEVICE(INDEX_TYPE idx) { (void) idx; working_array[0]++; }); From a586ffa0415cf6cc55967cce948143b43aeb0eca Mon Sep 17 00:00:00 2001 From: Rich Hornung Date: Fri, 8 Nov 2024 12:11:26 -0800 Subject: [PATCH 05/18] Shorten CMake message to be more consistent with others --- cmake/SetupRajaOptions.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/SetupRajaOptions.cmake b/cmake/SetupRajaOptions.cmake index 09276e18db..9c5fb043e4 100644 --- a/cmake/SetupRajaOptions.cmake +++ b/cmake/SetupRajaOptions.cmake @@ -28,7 +28,7 @@ option(RAJA_ENABLE_FORCEINLINE_RECURSIVE "Enable Forceinline recursive (only sup option(RAJA_DEPRECATED_TESTS "Test deprecated features" Off) option(RAJA_ENABLE_BOUNDS_CHECK "Enable bounds checking in RAJA::Views/Layouts" Off) option(RAJA_TEST_EXHAUSTIVE "Build RAJA exhaustive tests" Off) -option(RAJA_TEST_OPENMP_TARGET_SUBSET "Build subset of RAJA OpenMP target tests when it is enabled" On) +option(RAJA_TEST_OPENMP_TARGET_SUBSET "Build subset of RAJA OpenMP target tests" On) option(RAJA_ENABLE_RUNTIME_PLUGINS "Enable support for loading plugins at runtime" Off) option(RAJA_ALLOW_INCONSISTENT_OPTIONS "Enable inconsistent values for ENABLE_X and RAJA_ENABLE_X options" Off) From 0d6b3eeb5738c1273a9b94e96bf099c1c00a319f Mon Sep 17 00:00:00 2001 From: Rich Hornung Date: Fri, 8 Nov 2024 12:12:59 -0800 Subject: [PATCH 06/18] Clean up OpenMP Target teststo get a passing GitLab CI check --- .../forall/CombiningAdapter/CMakeLists.txt | 9 ++++ .../forall/atomic-ref/CMakeLists.txt | 39 +-------------- .../forall/atomic-view/CMakeLists.txt | 36 +------------- .../forall/indexset-view/CMakeLists.txt | 7 +-- .../forall/multi-reduce-basic/CMakeLists.txt | 36 ++------------ .../forall/reduce-basic/CMakeLists.txt | 49 ++----------------- .../reduce-multiple-indexset/CMakeLists.txt | 7 +-- .../reduce-multiple-segment/CMakeLists.txt | 8 +-- .../forall/resource-indexset/CMakeLists.txt | 7 +-- .../basic-fission-fusion-loop/CMakeLists.txt | 9 ++++ .../CMakeLists.txt | 9 ++++ .../kernel/multi-reduce-nested/CMakeLists.txt | 12 ++--- .../nested-loop-reducesum/CMakeLists.txt | 36 ++------------ .../nested-loop-segment-types/CMakeLists.txt | 19 ++----- .../kernel/nested-loop/CMakeLists.txt | 48 +++++++----------- .../CMakeLists.txt | 31 +++++++----- .../kernel/tile-variants/CMakeLists.txt | 15 +++--- test/functional/workgroup/CMakeLists.txt | 28 ++--------- test/integration/CMakeLists.txt | 4 +- test/unit/algorithm/CMakeLists.txt | 3 ++ test/unit/indexing/CMakeLists.txt | 2 +- test/unit/multi_reducer/CMakeLists.txt | 2 +- ...est-reducer-constructors-openmp-target.cpp | 16 ++++++ 23 files changed, 137 insertions(+), 295 deletions(-) diff --git a/test/functional/forall/CombiningAdapter/CMakeLists.txt b/test/functional/forall/CombiningAdapter/CMakeLists.txt index 5773289aa7..fd3ff2cd82 100644 --- a/test/functional/forall/CombiningAdapter/CMakeLists.txt +++ b/test/functional/forall/CombiningAdapter/CMakeLists.txt @@ -10,6 +10,15 @@ # set(DIMENSIONS 1D 2D 3D) +## +## Enable OpenMP Target tests when support for Combining Adapter is fixed +## +if(RAJA_ENABLE_TARGET_OPENMP) + if(RAJA_TEST_OPENMP_TARGET_SUBSET) + list(REMOVE_ITEM FORALL_BACKENDS OpenMPTarget) + endif() +endif() + # # Generate tests for each enabled RAJA back-end. # diff --git a/test/functional/forall/atomic-ref/CMakeLists.txt b/test/functional/forall/atomic-ref/CMakeLists.txt index 6e3f56114a..7e4c2856ec 100644 --- a/test/functional/forall/atomic-ref/CMakeLists.txt +++ b/test/functional/forall/atomic-ref/CMakeLists.txt @@ -10,21 +10,12 @@ # set(TESTTYPES AtomicRefAdd AtomicRefSub AtomicRefLoadStore AtomicRefCAS AtomicRefMinMax AtomicRefLogical) -# -# If building a subset of openmp target tests, remove the back-end from -# from the list of tests to generate here. -# -if(RAJA_ENABLE_TARGET_OPENMP) - if(RAJA_TEST_OPENMP_TARGET_SUBSET) - list(REMOVE_ITEM FORALL_ATOMIC_BACKENDS OpenMPTarget) - endif() -endif() - # # Generate atomicref tests for each enabled RAJA back-end. # # Note: FORALL_ATOMIC_BACKENDS is defined in ../CMakeLists.txt # + foreach( ATOMIC_BACKEND ${FORALL_ATOMIC_BACKENDS} ) foreach( TEST ${TESTTYPES} ) configure_file( test-forall-atomicref.cpp.in @@ -33,34 +24,8 @@ foreach( ATOMIC_BACKEND ${FORALL_ATOMIC_BACKENDS} ) SOURCES ${CMAKE_CURRENT_BINARY_DIR}/test-forall-${TEST}-${ATOMIC_BACKEND}.cpp ) target_include_directories(test-forall-${TEST}-${ATOMIC_BACKEND}.exe - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/tests) + PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/tests) endforeach() endforeach() unset( TESTTYPES ) - -# -# If building a subset of openmp target tests, add tests to build here. -# -if(RAJA_ENABLE_TARGET_OPENMP) - if(RAJA_TEST_OPENMP_TARGET_SUBSET) - - set(ATOMIC_BACKEND OpenMPTarget) - set(TESTTYPES AtomicRefAdd AtomicRefCAS) - - foreach( TEST ${TESTTYPES} ) - configure_file( test-forall-atomicref.cpp.in - test-forall-${TEST}-${ATOMIC_BACKEND}.cpp ) - raja_add_test( NAME test-forall-${TEST}-${ATOMIC_BACKEND} - SOURCES ${CMAKE_CURRENT_BINARY_DIR}/test-forall-${TEST}-${ATOMIC_BACKEND}.cpp ) - - target_include_directories(test-forall-${TEST}-${ATOMIC_BACKEND}.exe - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/tests) - endforeach() - - unset( TESTTYPES ) - - endif() -endif() - - diff --git a/test/functional/forall/atomic-view/CMakeLists.txt b/test/functional/forall/atomic-view/CMakeLists.txt index d6aed154eb..3be534bc44 100644 --- a/test/functional/forall/atomic-view/CMakeLists.txt +++ b/test/functional/forall/atomic-view/CMakeLists.txt @@ -10,21 +10,12 @@ # set(TESTTYPES AtomicView AtomicMultiView) -# -# If building a subset of openmp target tests, remove the back-end from -# from the list of tests to generate here. -# -if(RAJA_ENABLE_TARGET_OPENMP) - if(RAJA_TEST_OPENMP_TARGET_SUBSET) - list(REMOVE_ITEM FORALL_ATOMIC_BACKENDS OpenMPTarget) - endif() -endif() - # # Generate tests for each enabled RAJA back-end. # # Note: FORALL_ATOMIC_BACKENDS is defined in ../CMakeLists.txt # + foreach( ATOMIC_BACKEND ${FORALL_ATOMIC_BACKENDS} ) foreach( TEST ${TESTTYPES} ) configure_file( test-forall-atomic-view.cpp.in @@ -39,30 +30,6 @@ endforeach() unset( TESTTYPES ) -# -# If building a subset of openmp target tests, add tests to build here. -# -if(RAJA_ENABLE_TARGET_OPENMP) - if(RAJA_TEST_OPENMP_TARGET_SUBSET) - - set(ATOMIC_BACKEND OpenMPTarget) - set(TESTTYPES AtomicMultiView) - - foreach( TEST ${TESTTYPES} ) - configure_file( test-forall-atomic-view.cpp.in - test-forall-${TEST}-${ATOMIC_BACKEND}.cpp ) - raja_add_test( NAME test-forall-${TEST}-${ATOMIC_BACKEND} - SOURCES ${CMAKE_CURRENT_BINARY_DIR}/test-forall-${TEST}-${ATOMIC_BACKEND}.cpp ) - - target_include_directories(test-forall-${TEST}-${ATOMIC_BACKEND}.exe - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/tests) - endforeach() - - unset(TESTTYPES) - - endif() -endif() - # # Testing failure cases with only Sequential. Failures for various backends differ immensely. # @@ -81,4 +48,3 @@ foreach( ATOMIC_BACKEND ${FORALL_FAIL_ATOMIC_BACKENDS} ) endforeach() unset(FAILTESTS) - diff --git a/test/functional/forall/indexset-view/CMakeLists.txt b/test/functional/forall/indexset-view/CMakeLists.txt index f6b868c2b8..040e61a336 100644 --- a/test/functional/forall/indexset-view/CMakeLists.txt +++ b/test/functional/forall/indexset-view/CMakeLists.txt @@ -10,17 +10,12 @@ # set(INDEXSETTESTTYPES IndexSetView IcountIndexSetView) -if(RAJA_ENABLE_TARGET_OPENMP) - if(RAJA_TEST_OPENMP_TARGET_SUBSET) - list(REMOVE_ITEM FORALL_BACKENDS OpenMPTarget) - endif() -endif() - # # Generate tests for each enabled RAJA back-end. # # Note: FORALL_BACKENDS is defined in ../CMakeLists.txt # + foreach( BACKEND ${FORALL_BACKENDS} ) foreach( INDEXSETTESTTYPE ${INDEXSETTESTTYPES} ) configure_file( test-forall-indexset-view.cpp.in diff --git a/test/functional/forall/multi-reduce-basic/CMakeLists.txt b/test/functional/forall/multi-reduce-basic/CMakeLists.txt index 31ec872c0f..06b7f7b509 100644 --- a/test/functional/forall/multi-reduce-basic/CMakeLists.txt +++ b/test/functional/forall/multi-reduce-basic/CMakeLists.txt @@ -11,18 +11,14 @@ set(REDUCETYPES Sum Min Max BitAnd BitOr) # -# If building openmp target tests, remove the back-end to -# from the list of tests to generate here. +# Do not create tests for OpenMP Target, support not currently implemented # if(RAJA_ENABLE_TARGET_OPENMP) - #if(RAJA_TEST_OPENMP_TARGET_SUBSET) - list(REMOVE_ITEM FORALL_BACKENDS OpenMPTarget) - #endif() + list(REMOVE_ITEM FORALL_BACKENDS OpenMPTarget) endif() # -# If building SYCL tests, remove the back-end to -# from the list of tests to generate here. +# Do not create tests for SYCL, support not currently implemented # if(RAJA_ENABLE_SYCL) list(REMOVE_ITEM FORALL_BACKENDS Sycl) @@ -33,6 +29,7 @@ endif() # # Note: FORALL_BACKENDS is defined in ../CMakeLists.txt # + foreach( BACKEND ${FORALL_BACKENDS} ) foreach( REDUCETYPE ${REDUCETYPES} ) configure_file( test-forall-basic-multi-reduce.cpp.in @@ -46,28 +43,3 @@ foreach( BACKEND ${FORALL_BACKENDS} ) endforeach() unset( REDUCETYPES ) - - -# -# If building a subset of openmp target tests, add tests to build here. -# -#if(RAJA_ENABLE_TARGET_OPENMP) -# if(RAJA_TEST_OPENMP_TARGET_SUBSET) -# -# set(BACKEND OpenMPTarget) -# set(REDUCETYPES ReduceSum) -# -# foreach( REDUCETYPE ${REDUCETYPES} ) -# configure_file( test-forall-basic-multi-reduce.cpp.in -# test-forall-basic-MultiReduce${REDUCETYPE}-${BACKEND}.cpp ) -# raja_add_test( NAME test-forall-basic-MultiReduce${REDUCETYPE}-${BACKEND} -# SOURCES ${CMAKE_CURRENT_BINARY_DIR}/test-forall-basic-MultiReduce${REDUCETYPE}-${BACKEND}.cpp ) -# -# target_include_directories(test-forall-basic-MultiReduce${REDUCETYPE}-${BACKEND}.exe -# PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/tests) -# endforeach() -# -# endif() -#endif() - -unset( REDUCETYPES ) diff --git a/test/functional/forall/reduce-basic/CMakeLists.txt b/test/functional/forall/reduce-basic/CMakeLists.txt index de6ce12264..c661c57dfd 100644 --- a/test/functional/forall/reduce-basic/CMakeLists.txt +++ b/test/functional/forall/reduce-basic/CMakeLists.txt @@ -12,22 +12,13 @@ set(REDUCETYPES ReduceSum ReduceMin ReduceMax ReduceMaxLoc ReduceMinLoc ReduceMa set(DATATYPES CoreReductionDataTypeList) -# -# If building a subset of openmp target tests, remove the back-end from -# from the list of tests to generate here. -# -if(RAJA_ENABLE_TARGET_OPENMP) - if(RAJA_TEST_OPENMP_TARGET_SUBSET) - list(REMOVE_ITEM FORALL_BACKENDS OpenMPTarget) - endif() -endif() - # # Generate core reduction tests for each enabled RAJA back-end # # Note: FORALL_BACKENDS is defined in ../CMakeLists.txt # + foreach( BACKEND ${FORALL_BACKENDS} ) foreach( REDUCETYPE ${REDUCETYPES} ) configure_file( test-forall-basic-expt-reduce.cpp.in @@ -86,14 +77,11 @@ set(REDUCETYPES ReduceSum ReduceMin ReduceMax ReduceMinLoc ReduceMaxLoc) set(DATATYPES CoreReductionDataTypeList) -# -# If building a subset of openmp target tests, remove the back-end from -# from the list of tests to generate here. -# +## +## Do not create OpenMP Target tests for "traditional" RAJA reduction interface +## if(RAJA_ENABLE_TARGET_OPENMP) - if(RAJA_TEST_OPENMP_TARGET_SUBSET) - list(REMOVE_ITEM FORALL_BACKENDS OpenMPTarget) - endif() + list(REMOVE_ITEM FORALL_BACKENDS OpenMPTarget) endif() # @@ -160,30 +148,3 @@ endforeach() unset( DATATYPES ) unset( REDUCETYPES ) - - -# -# If building a subset of openmp target tests, add tests to build here. -# -if(RAJA_ENABLE_TARGET_OPENMP) - if(RAJA_TEST_OPENMP_TARGET_SUBSET) - - set(BACKEND OpenMPTarget) - set(REDUCETYPES ReduceSum) - set(DATATYPES CoreReductionDataTypeList) - - foreach( REDUCETYPE ${REDUCETYPES} ) - configure_file( test-forall-basic-reduce.cpp.in - test-forall-basic-${REDUCETYPE}-${BACKEND}.cpp ) - raja_add_test( NAME test-forall-basic-${REDUCETYPE}-${BACKEND} - SOURCES ${CMAKE_CURRENT_BINARY_DIR}/test-forall-basic-${REDUCETYPE}-${BACKEND}.cpp ) - - target_include_directories(test-forall-basic-${REDUCETYPE}-${BACKEND}.exe - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/tests) - endforeach() - - endif() -endif() - -unset( DATATYPES ) -unset( REDUCETYPES ) diff --git a/test/functional/forall/reduce-multiple-indexset/CMakeLists.txt b/test/functional/forall/reduce-multiple-indexset/CMakeLists.txt index 257b019b2d..649600ca3d 100644 --- a/test/functional/forall/reduce-multiple-indexset/CMakeLists.txt +++ b/test/functional/forall/reduce-multiple-indexset/CMakeLists.txt @@ -10,10 +10,11 @@ # set(REDUCETYPES ReduceSum ReduceMin ReduceMax ReduceMinLoc ReduceMaxLoc) +## +## Do not create OpenMP Target tests for "traditional" RAJA reduction interface +## if(RAJA_ENABLE_TARGET_OPENMP) - if(RAJA_TEST_OPENMP_TARGET_SUBSET) - list(REMOVE_ITEM FORALL_BACKENDS OpenMPTarget) - endif() + list(REMOVE_ITEM FORALL_BACKENDS OpenMPTarget) endif() # diff --git a/test/functional/forall/reduce-multiple-segment/CMakeLists.txt b/test/functional/forall/reduce-multiple-segment/CMakeLists.txt index 6d2234f948..20bfffc5d4 100644 --- a/test/functional/forall/reduce-multiple-segment/CMakeLists.txt +++ b/test/functional/forall/reduce-multiple-segment/CMakeLists.txt @@ -10,10 +10,11 @@ # set(REDUCETYPES ReduceSum ReduceMin ReduceMax ReduceMinLoc ReduceMaxLoc) +## +## Do not create OpenMP Target tests for "traditional" RAJA reduction interface +## if(RAJA_ENABLE_TARGET_OPENMP) - if(RAJA_TEST_OPENMP_TARGET_SUBSET) - list(REMOVE_ITEM FORALL_BACKENDS OpenMPTarget) - endif() + list(REMOVE_ITEM FORALL_BACKENDS OpenMPTarget) endif() # @@ -31,6 +32,7 @@ endif() # # Note: FORALL_BACKENDS is defined in ../CMakeLists.txt # + foreach( BACKEND ${FORALL_BACKENDS} ) foreach( REDUCETYPE ${REDUCETYPES} ) configure_file( test-forall-segment-multiple-reduce.cpp.in diff --git a/test/functional/forall/resource-indexset/CMakeLists.txt b/test/functional/forall/resource-indexset/CMakeLists.txt index 827f658537..361be6e5e1 100644 --- a/test/functional/forall/resource-indexset/CMakeLists.txt +++ b/test/functional/forall/resource-indexset/CMakeLists.txt @@ -10,17 +10,12 @@ # set(INDEXSETTESTTYPES ResourceIndexSet ResourceIcountIndexSet) -if(RAJA_ENABLE_TARGET_OPENMP) - if(RAJA_TEST_OPENMP_TARGET_SUBSET) - list(REMOVE_ITEM FORALL_BACKENDS OpenMPTarget) - endif() -endif() - # # Generate tests for each enabled RAJA back-end. # # Note: FORALL_BACKENDS is defined in ../CMakeLists.txt # + foreach( BACKEND ${FORALL_BACKENDS} ) foreach( INDEXSETTESTTYPE ${INDEXSETTESTTYPES} ) configure_file( test-forall-resource-indexset.cpp.in diff --git a/test/functional/kernel/basic-fission-fusion-loop/CMakeLists.txt b/test/functional/kernel/basic-fission-fusion-loop/CMakeLists.txt index 5698f46fa5..697bd1ecae 100644 --- a/test/functional/kernel/basic-fission-fusion-loop/CMakeLists.txt +++ b/test/functional/kernel/basic-fission-fusion-loop/CMakeLists.txt @@ -11,6 +11,15 @@ # Note: KERNEL_BACKENDS is defined in ../CMakeLists.txt # +## +## Enable OpenMP Target tests when support for fission-fusion is fixed +## +if(RAJA_ENABLE_TARGET_OPENMP) + if(RAJA_TEST_OPENMP_TARGET_SUBSET) + list(REMOVE_ITEM KERNEL_BACKENDS OpenMPTarget) + endif() +endif() + foreach( BACKEND ${KERNEL_BACKENDS} ) configure_file( test-kernel-basic-fission-fusion-loop.cpp.in test-kernel-basic-fission-fusion-loop-${BACKEND}.cpp ) diff --git a/test/functional/kernel/conditional-fission-fusion-loop/CMakeLists.txt b/test/functional/kernel/conditional-fission-fusion-loop/CMakeLists.txt index 3b5a34f4a3..74b64f00bf 100644 --- a/test/functional/kernel/conditional-fission-fusion-loop/CMakeLists.txt +++ b/test/functional/kernel/conditional-fission-fusion-loop/CMakeLists.txt @@ -20,6 +20,15 @@ # list(APPEND KERNEL_BACKENDS Sycl) #endif() +## +## Enable OpenMP Target tests when support for fission-fusion is fixed +## +if(RAJA_ENABLE_TARGET_OPENMP) + if(RAJA_TEST_OPENMP_TARGET_SUBSET) + list(REMOVE_ITEM KERNEL_BACKENDS OpenMPTarget) + endif() +endif() + foreach( BACKEND ${KERNEL_BACKENDS} ) configure_file( test-kernel-conditional-fission-fusion-loop.cpp.in test-kernel-conditional-fission-fusion-loop-${BACKEND}.cpp ) diff --git a/test/functional/kernel/multi-reduce-nested/CMakeLists.txt b/test/functional/kernel/multi-reduce-nested/CMakeLists.txt index 9efda7d133..1a2db130ed 100644 --- a/test/functional/kernel/multi-reduce-nested/CMakeLists.txt +++ b/test/functional/kernel/multi-reduce-nested/CMakeLists.txt @@ -10,14 +10,11 @@ # set(REDUCETYPES Sum Min Max BitAnd BitOr) -# -# If building openmp target tests, remove the back-end to -# from the list of tests to generate here. -# +## +## Disable traditional RAJA reductions test creation for OpenMP Target +## if(RAJA_ENABLE_TARGET_OPENMP) - #if(RAJA_TEST_OPENMP_TARGET_SUBSET) - list(REMOVE_ITEM KERNEL_BACKENDS OpenMPTarget) - #endif() + list(REMOVE_ITEM KERNEL_BACKENDS OpenMPTarget) endif() # @@ -33,6 +30,7 @@ endif() # # Note: KERNEL_BACKENDS is defined in ../CMakeLists.txt # + foreach( BACKEND ${KERNEL_BACKENDS} ) foreach( REDUCETYPE ${REDUCETYPES} ) configure_file( test-kernel-nested-multi-reduce.cpp.in diff --git a/test/functional/kernel/nested-loop-reducesum/CMakeLists.txt b/test/functional/kernel/nested-loop-reducesum/CMakeLists.txt index e07d55e65e..2a3a02aecf 100644 --- a/test/functional/kernel/nested-loop-reducesum/CMakeLists.txt +++ b/test/functional/kernel/nested-loop-reducesum/CMakeLists.txt @@ -19,14 +19,11 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM") endif() endif() -# -# If building a subset of openmp target tests, remove the back-end from -# from the list of tests to generate here. -# +## +## Don't create OpenMP Target tests for "traditional" RAJA reduce interface +## if(RAJA_ENABLE_TARGET_OPENMP) - if(RAJA_TEST_OPENMP_TARGET_SUBSET) - list(REMOVE_ITEM KERNEL_BACKENDS OpenMPTarget) - endif() + list(REMOVE_ITEM KERNEL_BACKENDS OpenMPTarget) endif() # @@ -61,28 +58,3 @@ foreach( NESTED_LOOP_BACKEND ${KERNEL_BACKENDS} ) endforeach() unset( NESTED_LOOPTYPES ) - -# -# If building a subset of openmp target tests, add tests to build here. -# -if(RAJA_ENABLE_TARGET_OPENMP) - if(RAJA_TEST_OPENMP_TARGET_SUBSET) - - set(NESTED_LOOP_BACKEND OpenMPTarget) - set(NESTED_LOOPTYPES ReduceSum) - - set(RESOURCE "-") - foreach( NESTED_LOOP_TYPE ${NESTED_LOOPTYPES} ) - configure_file( test-kernel-nested-loop.cpp.in - test-kernel-nested-loop-${NESTED_LOOP_TYPE}-${NESTED_LOOP_BACKEND}.cpp ) - raja_add_test( NAME test-kernel-nested-loop-${NESTED_LOOP_TYPE}-${NESTED_LOOP_BACKEND} - SOURCES ${CMAKE_CURRENT_BINARY_DIR}/test-kernel-nested-loop-${NESTED_LOOP_TYPE}-${NESTED_LOOP_BACKEND}.cpp ) - - target_include_directories(test-kernel-nested-loop-${NESTED_LOOP_TYPE}-${NESTED_LOOP_BACKEND}.exe - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/tests) - endforeach() - - endif() -endif() - -unset( NESTED_LOOPTYPES ) diff --git a/test/functional/kernel/nested-loop-segment-types/CMakeLists.txt b/test/functional/kernel/nested-loop-segment-types/CMakeLists.txt index c4cffd48bf..5667607cd3 100644 --- a/test/functional/kernel/nested-loop-segment-types/CMakeLists.txt +++ b/test/functional/kernel/nested-loop-segment-types/CMakeLists.txt @@ -11,27 +11,14 @@ # Note: KERNEL_BACKENDS is defined in ../CMakeLists.txt # -if(RAJA_ENABLE_TARGET_OPENMP) - if(RAJA_TEST_OPENMP_TARGET_SUBSET) - list(REMOVE_ITEM KERNEL_BACKENDS OpenMPTarget) - endif() -endif() - -# -# Remove SYCL until kernel reduction support is added -# -if(RAJA_ENABLE_SYCL) - list(REMOVE_ITEM KERNEL_BACKENDS Sycl) -endif() - # # While we're adding SYCL tests, enable it for each test set like this. # # Eventually, remove this and enable in the top-level CMakeLists.txt file. # -#if(RAJA_ENABLE_SYCL) -# list(APPEND KERNEL_BACKENDS Sycl) -#endif() +if(RAJA_ENABLE_SYCL) + list(APPEND KERNEL_BACKENDS Sycl) +endif() foreach( BACKEND ${KERNEL_BACKENDS} ) configure_file( test-kernel-nested-loop-segments.cpp.in diff --git a/test/functional/kernel/nested-loop/CMakeLists.txt b/test/functional/kernel/nested-loop/CMakeLists.txt index 95f85b2912..fbb2fc872a 100644 --- a/test/functional/kernel/nested-loop/CMakeLists.txt +++ b/test/functional/kernel/nested-loop/CMakeLists.txt @@ -9,16 +9,6 @@ set(NESTED_LOOPTYPES Basic) set( USE_RESOURCE "-resource-" "-" ) -# -# If building a subset of openmp target tests, remove the back-end from -# from the list of tests to generate here. -# -if(RAJA_ENABLE_TARGET_OPENMP) - if(RAJA_TEST_OPENMP_TARGET_SUBSET) - list(REMOVE_ITEM KERNEL_BACKENDS OpenMPTarget) - endif() -endif() - # # Generate kernel basic tests for each enabled RAJA back-end. # @@ -74,24 +64,24 @@ unset( NESTED_LOOPTYPES ) # # If building a subset of openmp target tests, add tests to build here. # -if(RAJA_ENABLE_TARGET_OPENMP) - if(RAJA_TEST_OPENMP_TARGET_SUBSET) - - set(NESTED_LOOP_BACKEND OpenMPTarget) - set(NESTED_LOOPTYPES Basic MultiLambdaParam) - - set(RESOURCE "-") - foreach( NESTED_LOOP_TYPE ${NESTED_LOOPTYPES} ) - configure_file( test-kernel-nested-loop.cpp.in - test-kernel-nested-loop-${NESTED_LOOP_TYPE}-${NESTED_LOOP_BACKEND}.cpp ) - raja_add_test( NAME test-kernel-nested-loop-${NESTED_LOOP_TYPE}-${NESTED_LOOP_BACKEND} - SOURCES ${CMAKE_CURRENT_BINARY_DIR}/test-kernel-nested-loop-${NESTED_LOOP_TYPE}-${NESTED_LOOP_BACKEND}.cpp ) - - target_include_directories(test-kernel-nested-loop-${NESTED_LOOP_TYPE}-${NESTED_LOOP_BACKEND}.exe - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/tests) - endforeach() - - endif() -endif() +##if(RAJA_ENABLE_TARGET_OPENMP) +## if(RAJA_TEST_OPENMP_TARGET_SUBSET) +## +## set(NESTED_LOOP_BACKEND OpenMPTarget) +## set(NESTED_LOOPTYPES Basic MultiLambdaParam) +## +## set(RESOURCE "-") +## foreach( NESTED_LOOP_TYPE ${NESTED_LOOPTYPES} ) +## configure_file( test-kernel-nested-loop.cpp.in +## test-kernel-nested-loop-${NESTED_LOOP_TYPE}-${NESTED_LOOP_BACKEND}.cpp ) +## raja_add_test( NAME test-kernel-nested-loop-${NESTED_LOOP_TYPE}-${NESTED_LOOP_BACKEND} +## SOURCES ${CMAKE_CURRENT_BINARY_DIR}/test-kernel-nested-loop-${NESTED_LOOP_TYPE}-${NESTED_LOOP_BACKEND}.cpp ) +## +## target_include_directories(test-kernel-nested-loop-${NESTED_LOOP_TYPE}-${NESTED_LOOP_BACKEND}.exe +## PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/tests) +## endforeach() +## +## endif() +##endif() unset( NESTED_LOOPTYPES ) diff --git a/test/functional/kernel/single-loop-tile-icount-tcount/CMakeLists.txt b/test/functional/kernel/single-loop-tile-icount-tcount/CMakeLists.txt index adc04fb1bc..b6d02f9da6 100644 --- a/test/functional/kernel/single-loop-tile-icount-tcount/CMakeLists.txt +++ b/test/functional/kernel/single-loop-tile-icount-tcount/CMakeLists.txt @@ -16,20 +16,27 @@ set(TILESIZES 8 32) # # Note: KERNEL_BACKENDS is defined in ../CMakeLists.txt # + +## +## Disable OpenMP Target tests, which cause front-end crash in LLVM based compilers +## +if(RAJA_ENABLE_TARGET_OPENMP) + if(RAJA_TEST_OPENMP_TARGET_SUBSET) + list(REMOVE_ITEM KERNEL_BACKENDS OpenMPTarget) + endif() +endif() + foreach( BACKEND ${KERNEL_BACKENDS} ) - # using omp target crashes the compiler with this one - if( NOT ((BACKEND STREQUAL "OpenMPTarget")) ) - foreach( TESTTYPE ${TESTTYPES} ) - foreach( TILESIZE ${TILESIZES} ) - configure_file( test-kernel-single-loop-tile-count.cpp.in - test-kernel-single-loop-${TESTTYPE}-${TILESIZE}-${BACKEND}.cpp ) - raja_add_test( NAME test-kernel-single-loop-${TESTTYPE}-${TILESIZE}-${BACKEND} - SOURCES ${CMAKE_CURRENT_BINARY_DIR}/test-kernel-single-loop-${TESTTYPE}-${TILESIZE}-${BACKEND}.cpp ) - target_include_directories(test-kernel-single-loop-${TESTTYPE}-${TILESIZE}-${BACKEND}.exe - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/tests) - endforeach() + foreach( TESTTYPE ${TESTTYPES} ) + foreach( TILESIZE ${TILESIZES} ) + configure_file( test-kernel-single-loop-tile-count.cpp.in + test-kernel-single-loop-${TESTTYPE}-${TILESIZE}-${BACKEND}.cpp ) + raja_add_test( NAME test-kernel-single-loop-${TESTTYPE}-${TILESIZE}-${BACKEND} + SOURCES ${CMAKE_CURRENT_BINARY_DIR}/test-kernel-single-loop-${TESTTYPE}-${TILESIZE}-${BACKEND}.cpp ) + target_include_directories(test-kernel-single-loop-${TESTTYPE}-${TILESIZE}-${BACKEND}.exe + PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/tests) endforeach() - endif() + endforeach() endforeach() unset( TILESIZES ) diff --git a/test/functional/kernel/tile-variants/CMakeLists.txt b/test/functional/kernel/tile-variants/CMakeLists.txt index ac5ba913e9..32f254b42e 100644 --- a/test/functional/kernel/tile-variants/CMakeLists.txt +++ b/test/functional/kernel/tile-variants/CMakeLists.txt @@ -12,12 +12,14 @@ set(TILETYPES Fixed2D Fixed2DSum Fixed2DMinMax) foreach( TILE_BACKEND ${KERNEL_BACKENDS} ) foreach( TILE_TYPE ${TILETYPES} ) - # OpenMPTarget crashes the xl compiler when building this test... - if( NOT((TILE_BACKEND STREQUAL "OpenMPTarget")) ) + # + # OpenMPTarget tests fail for traditional RAJA reductions + # + if( (TILE_TYPE STREQUAL "Fixed2D") OR NOT((TILE_BACKEND STREQUAL "OpenMPTarget")) ) configure_file( test-kernel-tilefixed.cpp.in test-kernel-tile-${TILE_TYPE}-${TILE_BACKEND}.cpp ) raja_add_test( NAME test-kernel-tile-${TILE_TYPE}-${TILE_BACKEND} - SOURCES ${CMAKE_CURRENT_BINARY_DIR}/test-kernel-tile-${TILE_TYPE}-${TILE_BACKEND}.cpp ) + SOURCES ${CMAKE_CURRENT_BINARY_DIR}/test-kernel-tile-${TILE_TYPE}-${TILE_BACKEND}.cpp ) target_include_directories(test-kernel-tile-${TILE_TYPE}-${TILE_BACKEND}.exe PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/tests) @@ -34,9 +36,10 @@ set(TILETYPES Dynamic2D) foreach( TILE_BACKEND ${KERNEL_BACKENDS} ) foreach( TILE_TYPE ${TILETYPES} ) - # Dynamic tiling not yet implemented for Cuda or Hip - # Removing OpenMPTarget because XLC compilation requires ~50 minutes - if( NOT ((TILE_BACKEND STREQUAL "Cuda") OR (TILE_BACKEND STREQUAL "Hip") OR (TILE_BACKEND STREQUAL "OpenMPTarget") OR (TILE_BACKEND STREQUAL "Sycl")) ) + # + # Dynamic tiling not yet implemented for CUDA, HIP, or SYCL + # + if( NOT ((TILE_BACKEND STREQUAL "Cuda") OR (TILE_BACKEND STREQUAL "Hip") OR (TILE_BACKEND STREQUAL "Sycl")) ) configure_file( test-kernel-tiledyn.cpp.in test-kernel-tile-${TILE_TYPE}-${TILE_BACKEND}.cpp ) raja_add_test( NAME test-kernel-tile-${TILE_TYPE}-${TILE_BACKEND} diff --git a/test/functional/workgroup/CMakeLists.txt b/test/functional/workgroup/CMakeLists.txt index 82d03626bf..2fd7cd8bff 100644 --- a/test/functional/workgroup/CMakeLists.txt +++ b/test/functional/workgroup/CMakeLists.txt @@ -39,16 +39,6 @@ if(RAJA_ENABLE_OPENMP) list(APPEND BACKENDS OpenMP) endif() -# -# If building a subset of openmp target tests, do not add the back-end to -# the list of tests to generate here. -# -if(RAJA_ENABLE_TARGET_OPENMP) - if(NOT RAJA_TEST_OPENMP_TARGET_SUBSET) - list(APPEND BACKENDS OpenMPTarget) - endif() -endif() - if(RAJA_ENABLE_CUDA) list(APPEND BACKENDS Cuda) endif() @@ -57,6 +47,10 @@ if(RAJA_ENABLE_HIP) list(APPEND BACKENDS Hip) endif() +## +## Do not create OpenMP Target or SYCL tests for workgroup, since not supported yet +## + set(DISPATCHERS IndirectFunction IndirectVirtual Direct) @@ -67,20 +61,6 @@ buildfunctionalworkgrouptest(Ordered "${Ordered_SUBTESTS}" "${DISPATCHERS}" "${B set(Unordered_SUBTESTS Single MultipleReuse) buildfunctionalworkgrouptest(Unordered "${Unordered_SUBTESTS}" "${DISPATCHERS}" "${BACKENDS}") -unset(BACKENDS) - -# -# If building a subset of openmp target tests, add tests to build here. -# -if(RAJA_ENABLE_TARGET_OPENMP) - if(RAJA_TEST_OPENMP_TARGET_SUBSET) - - set(BACKENDS OpenMPTarget) - buildfunctionalworkgrouptest(Unordered "${Unordered_SUBTESTS}" "${DISPATCHERS}" "${BACKENDS}") - - endif() -endif() - unset(DISPATCHERS) unset(BACKENDS) unset(Ordered_SUBTESTS) diff --git a/test/integration/CMakeLists.txt b/test/integration/CMakeLists.txt index 30538469fc..0679da91d4 100644 --- a/test/integration/CMakeLists.txt +++ b/test/integration/CMakeLists.txt @@ -20,7 +20,9 @@ if(RAJA_ENABLE_HIP) endif() if(RAJA_ENABLE_TARGET_OPENMP) - # list(APPEND PLUGIN_BACKENDS OpenMPTarget) + if(RAJA_TEST_OPENMP_TARGET_SUBSET) + list(REMOVE_ITEM FORALL_BACKENDS OpenMPTarget) + endif() endif() add_subdirectory(plugin) diff --git a/test/unit/algorithm/CMakeLists.txt b/test/unit/algorithm/CMakeLists.txt index ea93727d59..1fbb28ea8f 100644 --- a/test/unit/algorithm/CMakeLists.txt +++ b/test/unit/algorithm/CMakeLists.txt @@ -19,6 +19,9 @@ if(RAJA_ENABLE_HIP) list(APPEND SORT_BACKENDS Hip) endif() +## +## OpenMP Target back-end support missing for these tests +## # if(RAJA_ENABLE_TARGET_OPENMP) # list(APPEND SORT_BACKENDS OpenMPTarget) # endif() diff --git a/test/unit/indexing/CMakeLists.txt b/test/unit/indexing/CMakeLists.txt index b1f85247d0..a9a0735515 100644 --- a/test/unit/indexing/CMakeLists.txt +++ b/test/unit/indexing/CMakeLists.txt @@ -25,5 +25,5 @@ foreach( INDEXING_BACKEND ${INDEXING_BACKENDS} ) SOURCES ${CMAKE_CURRENT_BINARY_DIR}/test-indexing-global-${INDEXING_BACKEND}.cpp ) target_include_directories(test-indexing-global-${INDEXING_BACKEND}.exe - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/tests) + PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/tests) endforeach() diff --git a/test/unit/multi_reducer/CMakeLists.txt b/test/unit/multi_reducer/CMakeLists.txt index 94bbbc68d9..e0f584e474 100644 --- a/test/unit/multi_reducer/CMakeLists.txt +++ b/test/unit/multi_reducer/CMakeLists.txt @@ -37,7 +37,7 @@ if(RAJA_ENABLE_OPENMP) list(APPEND BACKENDS OpenMP) endif() -# Add this back in when OpenMP Target implementation exists for multi-reducer +# Add OpenMP Target tests when implementation exists for multi-reducer #if(RAJA_ENABLE_TARGET_OPENMP) # list(APPEND BACKENDS OpenMPTarget) #endif() diff --git a/test/unit/reducer/test-reducer-constructors-openmp-target.cpp b/test/unit/reducer/test-reducer-constructors-openmp-target.cpp index b3204c7827..a32a8660e4 100644 --- a/test/unit/reducer/test-reducer-constructors-openmp-target.cpp +++ b/test/unit/reducer/test-reducer-constructors-openmp-target.cpp @@ -12,6 +12,22 @@ #include "tests/test-reducer-constructors.hpp" #if defined(RAJA_ENABLE_TARGET_OPENMP) + +#if 0 +// Tests cannot be created since OpenMP Target reduction type constructor is +// explicitly marked deleted, which is inconsistent with other back-ends --RDH +using OpenMPTargetBasicReducerConstructorTypes = + Test< camp::cartesian_product< OpenMPTargetReducerPolicyList, + DataTypeList, + OpenMPTargetResourceList > >::Types; +INSTANTIATE_TYPED_TEST_SUITE_P(OpenMPTargetBasicTest, + ReducerBasicConstructorUnitTest, + OpenMPTargetBasicReducerConstructorTypes); +#else +// This is needed to suppress a runtime test error for uninstantiated test +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(ReducerBasicConstructorUnitTest); +#endif + using OpenMPTargetInitReducerConstructorTypes = Test< camp::cartesian_product< OpenMPTargetReducerPolicyList, DataTypeList, From 9edc74b408681bca598413473569f8d6447296c8 Mon Sep 17 00:00:00 2001 From: Rich Hornung Date: Fri, 8 Nov 2024 12:14:35 -0800 Subject: [PATCH 07/18] Modify GitLab CI job for OpenMP Target --- .gitlab/jobs/lassen.yml | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/.gitlab/jobs/lassen.yml b/.gitlab/jobs/lassen.yml index 2724d38a6b..95df5f3db3 100644 --- a/.gitlab/jobs/lassen.yml +++ b/.gitlab/jobs/lassen.yml @@ -60,12 +60,8 @@ gcc_8_3_1_cuda_11_7_desul_atomics: MODULE_LIST: "cuda/11.7.0" extends: .job_on_lassen -# Warning: Allowed to fail temporarily -# Deactivated due to issues with OpenMP Target and various tests and compilers. -clang_16_0_6_ibm_omptarget: +clang_16_0_6_omptarget: variables: - SPEC: " ~shared +openmp +omptarget +tests %clang@=16.0.6.ibm.gcc.8.3.1" - ON_LASSEN: "OFF" + SPEC: " ~shared +openmp +omptarget +tests %clang@=16.0.6.cuda.11.8.0.gcc.11.2.1 cxxflags==\"-Wunknown-cuda-version\"" extends: .job_on_lassen - allow_failure: true From 969ac4f9f7a418172da6e16c57b8862ee9d13453 Mon Sep 17 00:00:00 2001 From: Rich Hornung Date: Fri, 8 Nov 2024 13:16:55 -0800 Subject: [PATCH 08/18] Repair errant change --- .../kernel/nested-loop-segment-types/CMakeLists.txt | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/test/functional/kernel/nested-loop-segment-types/CMakeLists.txt b/test/functional/kernel/nested-loop-segment-types/CMakeLists.txt index 5667607cd3..8275a4d4a5 100644 --- a/test/functional/kernel/nested-loop-segment-types/CMakeLists.txt +++ b/test/functional/kernel/nested-loop-segment-types/CMakeLists.txt @@ -12,12 +12,10 @@ # # -# While we're adding SYCL tests, enable it for each test set like this. -# -# Eventually, remove this and enable in the top-level CMakeLists.txt file. +# Eventually, enable SYCL tests when we know they work # if(RAJA_ENABLE_SYCL) - list(APPEND KERNEL_BACKENDS Sycl) + list(REMOVE_ITEM KERNEL_BACKENDS Sycl) endif() foreach( BACKEND ${KERNEL_BACKENDS} ) From ae4c8fb466b321d0af563a9c07af3f0380432d5f Mon Sep 17 00:00:00 2001 From: Rich Hornung Date: Fri, 8 Nov 2024 14:17:12 -0800 Subject: [PATCH 09/18] Remove compiler flag -- it doesn't work. TRy something else. --- .gitlab/jobs/lassen.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab/jobs/lassen.yml b/.gitlab/jobs/lassen.yml index 95df5f3db3..b9bd8bac8e 100644 --- a/.gitlab/jobs/lassen.yml +++ b/.gitlab/jobs/lassen.yml @@ -62,6 +62,6 @@ gcc_8_3_1_cuda_11_7_desul_atomics: clang_16_0_6_omptarget: variables: - SPEC: " ~shared +openmp +omptarget +tests %clang@=16.0.6.cuda.11.8.0.gcc.11.2.1 cxxflags==\"-Wunknown-cuda-version\"" + SPEC: " ~shared +openmp +omptarget +tests %clang@=16.0.6.cuda.11.8.0.gcc.11.2.1+allow-unsupported-compilers" extends: .job_on_lassen From 888d25af1c25f78f29104c61952edb3848ccd0de Mon Sep 17 00:00:00 2001 From: Rich Hornung Date: Fri, 8 Nov 2024 14:38:46 -0800 Subject: [PATCH 10/18] Remove debris --- .../kernel/nested-loop/CMakeLists.txt | 25 ------------------- 1 file changed, 25 deletions(-) diff --git a/test/functional/kernel/nested-loop/CMakeLists.txt b/test/functional/kernel/nested-loop/CMakeLists.txt index fbb2fc872a..bbef4c1f32 100644 --- a/test/functional/kernel/nested-loop/CMakeLists.txt +++ b/test/functional/kernel/nested-loop/CMakeLists.txt @@ -60,28 +60,3 @@ foreach( NESTED_LOOP_BACKEND ${KERNEL_BACKENDS} ) endforeach() unset( NESTED_LOOPTYPES ) - -# -# If building a subset of openmp target tests, add tests to build here. -# -##if(RAJA_ENABLE_TARGET_OPENMP) -## if(RAJA_TEST_OPENMP_TARGET_SUBSET) -## -## set(NESTED_LOOP_BACKEND OpenMPTarget) -## set(NESTED_LOOPTYPES Basic MultiLambdaParam) -## -## set(RESOURCE "-") -## foreach( NESTED_LOOP_TYPE ${NESTED_LOOPTYPES} ) -## configure_file( test-kernel-nested-loop.cpp.in -## test-kernel-nested-loop-${NESTED_LOOP_TYPE}-${NESTED_LOOP_BACKEND}.cpp ) -## raja_add_test( NAME test-kernel-nested-loop-${NESTED_LOOP_TYPE}-${NESTED_LOOP_BACKEND} -## SOURCES ${CMAKE_CURRENT_BINARY_DIR}/test-kernel-nested-loop-${NESTED_LOOP_TYPE}-${NESTED_LOOP_BACKEND}.cpp ) -## -## target_include_directories(test-kernel-nested-loop-${NESTED_LOOP_TYPE}-${NESTED_LOOP_BACKEND}.exe -## PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/tests) -## endforeach() -## -## endif() -##endif() - -unset( NESTED_LOOPTYPES ) From f247c11dacd6c9f692e4bf179f9aa37caa110c90 Mon Sep 17 00:00:00 2001 From: Rich Hornung Date: Mon, 11 Nov 2024 08:47:26 -0800 Subject: [PATCH 11/18] Remove allow-unsupported-compilers, only applies to nvcc --- .gitlab/jobs/lassen.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab/jobs/lassen.yml b/.gitlab/jobs/lassen.yml index b9bd8bac8e..7a8c2a5621 100644 --- a/.gitlab/jobs/lassen.yml +++ b/.gitlab/jobs/lassen.yml @@ -62,6 +62,6 @@ gcc_8_3_1_cuda_11_7_desul_atomics: clang_16_0_6_omptarget: variables: - SPEC: " ~shared +openmp +omptarget +tests %clang@=16.0.6.cuda.11.8.0.gcc.11.2.1+allow-unsupported-compilers" + SPEC: " ~shared +openmp +omptarget +tests %clang@=16.0.6.cuda.11.8.0.gcc.11.2.1" extends: .job_on_lassen From 9ee40f72d944af2b0df7f814a17881148e48dbc6 Mon Sep 17 00:00:00 2001 From: Rich Hornung Date: Thu, 14 Nov 2024 13:37:55 -0800 Subject: [PATCH 12/18] Don't enable google becnhmark in corona build script. The dpcpp compiler does not support C++ standards less than 17 and the version of gbench that comes with BLT does not support C++17. --- scripts/lc-builds/corona_sycl.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/lc-builds/corona_sycl.sh b/scripts/lc-builds/corona_sycl.sh index 4b636e4da0..815928e434 100755 --- a/scripts/lc-builds/corona_sycl.sh +++ b/scripts/lc-builds/corona_sycl.sh @@ -55,7 +55,6 @@ cmake \ -DBLT_CXX_STD=c++17 \ -DENABLE_TESTS=On \ -DENABLE_EXAMPLES=On \ - -DENABLE_BENCHMARKS=On \ "$@" \ .. From bd9f2fb8f976cce77a2bdbd4375bd48f762fc5a5 Mon Sep 17 00:00:00 2001 From: Sean Miller Date: Fri, 16 Aug 2024 17:03:03 -0500 Subject: [PATCH 13/18] Various tweaks to enable Wave32 for Radeon cards with HIP. --- CMakeLists.txt | 1 + include/RAJA/config.hpp.in | 2 ++ include/RAJA/pattern/kernel/InitLocalMem.hpp | 9 +-------- include/RAJA/policy/hip/policy.hpp | 5 +++-- include/RAJA/policy/tensor/arch/hip/hip_wave.hpp | 9 +++++---- include/RAJA/policy/tensor/arch/hip/traits.hpp | 3 ++- test/include/RAJA_test-tensor.hpp | 4 +++- 7 files changed, 17 insertions(+), 16 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9b31cbe124..3eb0dbc8d2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -171,6 +171,7 @@ endif() if(RAJA_ENABLE_HIP) message(STATUS "HIP version: ${hip_VERSION}") + set(RAJA_HIP_WAVESIZE "64" CACHE STRING "Set the wave size for GPU architecture. E.g. MI200/MI300 this is 64.") if("${hip_VERSION}" VERSION_LESS "3.5") message(FATAL_ERROR "Trying to use HIP/ROCm version ${hip_VERSION}. RAJA requires HIP/ROCm version 3.5 or newer. ") endif() diff --git a/include/RAJA/config.hpp.in b/include/RAJA/config.hpp.in index 380418efa1..29d97fed69 100644 --- a/include/RAJA/config.hpp.in +++ b/include/RAJA/config.hpp.in @@ -182,6 +182,8 @@ static_assert(RAJA_HAS_SOME_CXX14, #cmakedefine RAJA_ENABLE_NV_TOOLS_EXT #cmakedefine RAJA_ENABLE_ROCTX +#cmakedefine RAJA_HIP_WAVESIZE @RAJA_HIP_WAVESIZE@ + /*! ****************************************************************************** * diff --git a/include/RAJA/pattern/kernel/InitLocalMem.hpp b/include/RAJA/pattern/kernel/InitLocalMem.hpp index 21d9e3cd2a..e4d5cc9567 100644 --- a/include/RAJA/pattern/kernel/InitLocalMem.hpp +++ b/include/RAJA/pattern/kernel/InitLocalMem.hpp @@ -77,23 +77,16 @@ struct StatementExecutor::param_tuple_t>::value_type; // Initialize memory -#ifdef RAJA_COMPILER_MSVC - // MSVC doesn't like taking a pointer to stack allocated data?!?! varType *ptr = new varType[camp::get(data.param_tuple).size()]; camp::get(data.param_tuple).set_data(ptr); -#else - varType Array[camp::get(data.param_tuple).size()]; - camp::get(data.param_tuple).set_data(&Array[0]); -#endif + // Initialize others and execute exec_expanded(data); // Cleanup and return camp::get(data.param_tuple).set_data(nullptr); -#ifdef RAJA_COMPILER_MSVC delete[] ptr; -#endif } diff --git a/include/RAJA/policy/hip/policy.hpp b/include/RAJA/policy/hip/policy.hpp index a9f9027675..040de50f31 100644 --- a/include/RAJA/policy/hip/policy.hpp +++ b/include/RAJA/policy/hip/policy.hpp @@ -324,8 +324,9 @@ struct DeviceConstants // values for HIP warp size and max block size. // #if defined(__HIP_PLATFORM_AMD__) -constexpr DeviceConstants device_constants(64, 1024, 64); // MI300A -// constexpr DeviceConstants device_constants(64, 1024, 128); // MI250X +constexpr DeviceConstants device_constants(RAJA_HIP_WAVESIZE, 1024, 64); // MI300A +// constexpr DeviceConstants device_constants(RAJA_HIP_WAVESIZE, 1024, 128); // MI250X + #elif defined(__HIP_PLATFORM_NVIDIA__) constexpr DeviceConstants device_constants(32, 1024, 32); // V100 #endif diff --git a/include/RAJA/policy/tensor/arch/hip/hip_wave.hpp b/include/RAJA/policy/tensor/arch/hip/hip_wave.hpp index 74bbc2f077..3e1cff1d56 100644 --- a/include/RAJA/policy/tensor/arch/hip/hip_wave.hpp +++ b/include/RAJA/policy/tensor/arch/hip/hip_wave.hpp @@ -57,7 +57,7 @@ namespace expt public: - static constexpr int s_num_elem = 64; + static constexpr int s_num_elem = policy::hip::device_constants.WARP_SIZE; /*! * @brief Default constructor, zeros register contents @@ -780,8 +780,8 @@ namespace expt // Third: mask off everything but output_segment // this is because all output segments are valid at this point - // (5-segbits), the 5 is since the warp-width is 32 == 1<<5 - int our_output_segment = get_lane()>>(6-segbits); + constexpr int log2_warp_size = RAJA::log2(RAJA::policy::hip::device_constants.WARP_SIZE); + int our_output_segment = get_lane()>>(log2_warp_size-segbits); bool in_output_segment = our_output_segment == output_segment; if(!in_output_segment){ result.get_raw_value() = 0; @@ -828,8 +828,9 @@ namespace expt // First: tree reduce values within each segment element_type x = m_value; + constexpr int log2_warp_size = RAJA::log2(RAJA::policy::hip::device_constants.WARP_SIZE); RAJA_UNROLL - for(int i = 0;i < 6-segbits; ++ i){ + for(int i = 0;i < log2_warp_size-segbits; ++ i){ // tree shuffle int delta = s_num_elem >> (i+1); diff --git a/include/RAJA/policy/tensor/arch/hip/traits.hpp b/include/RAJA/policy/tensor/arch/hip/traits.hpp index 4c4d959599..1b8a9679bb 100644 --- a/include/RAJA/policy/tensor/arch/hip/traits.hpp +++ b/include/RAJA/policy/tensor/arch/hip/traits.hpp @@ -29,7 +29,8 @@ namespace expt { struct RegisterTraits{ using element_type = T; using register_policy = RAJA::expt::hip_wave_register; - static constexpr camp::idx_t s_num_elem = 64; + static constexpr camp::idx_t s_num_elem = policy::hip::device_constants.WARP_SIZE; + static constexpr camp::idx_t s_num_bits = sizeof(T) * s_num_elem; using int_element_type = int32_t; }; diff --git a/test/include/RAJA_test-tensor.hpp b/test/include/RAJA_test-tensor.hpp index cf633098a9..83ef4fe49f 100644 --- a/test/include/RAJA_test-tensor.hpp +++ b/test/include/RAJA_test-tensor.hpp @@ -87,7 +87,9 @@ struct TensorTestHelper void exec(BODY const &body){ hipDeviceSynchronize(); - RAJA::forall>(RAJA::RangeSegment(0,64), + constexpr int warp_size = RAJA::policy::hip::device_constants.WARP_SIZE; + + RAJA::forall>(RAJA::RangeSegment(0,warp_size), [=] RAJA_HOST_DEVICE (int ){ body(); }); From f352a5f84bd28bcb20b59b6398b21d837bac9b1c Mon Sep 17 00:00:00 2001 From: Sean Miller Date: Mon, 18 Nov 2024 11:33:30 -0600 Subject: [PATCH 14/18] Removing dynamic stack allocation fix --- include/RAJA/pattern/kernel/InitLocalMem.hpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/include/RAJA/pattern/kernel/InitLocalMem.hpp b/include/RAJA/pattern/kernel/InitLocalMem.hpp index e4d5cc9567..21d9e3cd2a 100644 --- a/include/RAJA/pattern/kernel/InitLocalMem.hpp +++ b/include/RAJA/pattern/kernel/InitLocalMem.hpp @@ -77,16 +77,23 @@ struct StatementExecutor::param_tuple_t>::value_type; // Initialize memory +#ifdef RAJA_COMPILER_MSVC + // MSVC doesn't like taking a pointer to stack allocated data?!?! varType *ptr = new varType[camp::get(data.param_tuple).size()]; camp::get(data.param_tuple).set_data(ptr); - +#else + varType Array[camp::get(data.param_tuple).size()]; + camp::get(data.param_tuple).set_data(&Array[0]); +#endif // Initialize others and execute exec_expanded(data); // Cleanup and return camp::get(data.param_tuple).set_data(nullptr); +#ifdef RAJA_COMPILER_MSVC delete[] ptr; +#endif } From 24e345385f1f4df6b805581dfab26cf188339633 Mon Sep 17 00:00:00 2001 From: "Richard D. Hornung" Date: Mon, 25 Nov 2024 10:44:03 -0800 Subject: [PATCH 15/18] Remove deprecated SYCL usage for newer compilers --- examples/memoryManager.hpp | 2 +- exercises/memoryManager.hpp | 2 +- include/RAJA/pattern/launch/launch_core.hpp | 2 +- include/RAJA/policy/sycl/MemUtils_SYCL.hpp | 4 +- include/RAJA/policy/sycl/forall.hpp | 4 +- .../RAJA/policy/sycl/kernel/Conditional.hpp | 2 +- include/RAJA/policy/sycl/kernel/For.hpp | 14 ++-- include/RAJA/policy/sycl/kernel/ForICount.hpp | 14 ++-- include/RAJA/policy/sycl/kernel/Lambda.hpp | 2 +- .../RAJA/policy/sycl/kernel/SyclKernel.hpp | 18 ++--- include/RAJA/policy/sycl/kernel/Tile.hpp | 10 +-- .../RAJA/policy/sycl/kernel/TileTCount.hpp | 10 +-- include/RAJA/policy/sycl/kernel/internal.hpp | 18 ++--- include/RAJA/policy/sycl/launch.hpp | 32 ++++----- include/RAJA/policy/sycl/policy.hpp | 2 +- include/RAJA/policy/sycl/reduce.hpp | 66 +++++++++---------- src/MemUtils_SYCL.cpp | 4 +- tpl/camp | 2 +- 18 files changed, 104 insertions(+), 104 deletions(-) diff --git a/examples/memoryManager.hpp b/examples/memoryManager.hpp index 62d3d6e3e7..685a572033 100644 --- a/examples/memoryManager.hpp +++ b/examples/memoryManager.hpp @@ -76,7 +76,7 @@ void deallocate(T *&ptr) hipErrchk(hipMalloc((void **)&ptr, sizeof(T) * size)); #elif defined(RAJA_ENABLE_SYCL) auto qu = sycl_res->get().get_queue(); - ptr = cl::sycl::malloc_device(size, *qu); + ptr = ::sycl::malloc_device(size, *qu); #endif return ptr; } diff --git a/exercises/memoryManager.hpp b/exercises/memoryManager.hpp index 62d3d6e3e7..685a572033 100644 --- a/exercises/memoryManager.hpp +++ b/exercises/memoryManager.hpp @@ -76,7 +76,7 @@ void deallocate(T *&ptr) hipErrchk(hipMalloc((void **)&ptr, sizeof(T) * size)); #elif defined(RAJA_ENABLE_SYCL) auto qu = sycl_res->get().get_queue(); - ptr = cl::sycl::malloc_device(size, *qu); + ptr = ::sycl::malloc_device(size, *qu); #endif return ptr; } diff --git a/include/RAJA/pattern/launch/launch_core.hpp b/include/RAJA/pattern/launch/launch_core.hpp index f1d70aeacb..7ea7ce57ef 100644 --- a/include/RAJA/pattern/launch/launch_core.hpp +++ b/include/RAJA/pattern/launch/launch_core.hpp @@ -162,7 +162,7 @@ class LaunchContext void *shared_mem_ptr; #if defined(RAJA_ENABLE_SYCL) - mutable cl::sycl::nd_item<3> *itm; + mutable ::sycl::nd_item<3> *itm; #endif RAJA_HOST_DEVICE LaunchContext() diff --git a/include/RAJA/policy/sycl/MemUtils_SYCL.hpp b/include/RAJA/policy/sycl/MemUtils_SYCL.hpp index 081a88dc23..1a8c9930dd 100644 --- a/include/RAJA/policy/sycl/MemUtils_SYCL.hpp +++ b/include/RAJA/policy/sycl/MemUtils_SYCL.hpp @@ -50,7 +50,7 @@ namespace detail struct syclInfo { sycl_dim_t gridDim{0}; sycl_dim_t blockDim{0}; - cl::sycl::queue qu = cl::sycl::queue(); + ::sycl::queue qu = ::sycl::queue(); bool setup_reducers = false; #if defined(RAJA_ENABLE_OPENMP) syclInfo* thread_states = nullptr; @@ -62,7 +62,7 @@ extern syclInfo g_status; extern syclInfo tl_status; -extern std::unordered_map g_queue_info_map; +extern std::unordered_map<::sycl::queue, bool> g_queue_info_map; } // namespace detail diff --git a/include/RAJA/policy/sycl/forall.hpp b/include/RAJA/policy/sycl/forall.hpp index 901cc694f0..1c6876e328 100644 --- a/include/RAJA/policy/sycl/forall.hpp +++ b/include/RAJA/policy/sycl/forall.hpp @@ -206,8 +206,8 @@ resources::EventProxy forall_impl(resources::Sycl &sycl_res, }).wait(); // Need to wait for completion to free memory // Free our device memory - cl::sycl::free(lbody, *q); - cl::sycl::free(beg, *q); + ::sycl::free(lbody, *q); + ::sycl::free(beg, *q); RAJA_FT_END; } diff --git a/include/RAJA/policy/sycl/kernel/Conditional.hpp b/include/RAJA/policy/sycl/kernel/Conditional.hpp index 9149418518..e2e6b09e6d 100644 --- a/include/RAJA/policy/sycl/kernel/Conditional.hpp +++ b/include/RAJA/policy/sycl/kernel/Conditional.hpp @@ -51,7 +51,7 @@ struct SyclStatementExecutor item, bool thread_active) + void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active) { if (Conditional::eval(data)) { diff --git a/include/RAJA/policy/sycl/kernel/For.hpp b/include/RAJA/policy/sycl/kernel/For.hpp index d0976b931f..2019bfa7a9 100644 --- a/include/RAJA/policy/sycl/kernel/For.hpp +++ b/include/RAJA/policy/sycl/kernel/For.hpp @@ -59,7 +59,7 @@ struct SyclStatementExecutor< using diff_t = segment_diff_type; static - inline RAJA_DEVICE void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active) + inline RAJA_DEVICE void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active) { auto len = segment_length(data); auto i = item.get_global_id(Dim); @@ -124,7 +124,7 @@ struct SyclStatementExecutor< using diff_t = segment_diff_type; static - inline RAJA_DEVICE void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active) + inline RAJA_DEVICE void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active) { auto len = segment_length(data); auto i = item.get_group(Dim); @@ -187,7 +187,7 @@ struct SyclStatementExecutor< using diff_t = segment_diff_type; static - inline RAJA_DEVICE void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active) + inline RAJA_DEVICE void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active) { auto len = segment_length(data); auto i0 = item.get_group(Dim); @@ -253,7 +253,7 @@ struct SyclStatementExecutor< using diff_t = segment_diff_type; static - inline RAJA_DEVICE void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active) + inline RAJA_DEVICE void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active) { auto len = segment_length(data); auto i = item.get_local_id(Dim); @@ -317,7 +317,7 @@ struct SyclStatementExecutor< using diff_t = segment_diff_type; static - inline RAJA_DEVICE void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active) + inline RAJA_DEVICE void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active) { auto len = segment_length(data); auto i0 = item.get_local_id(Dim); @@ -393,7 +393,7 @@ struct SyclStatementExecutor< using diff_t = segment_diff_type; static - inline RAJA_DEVICE void exec(Data &data, cl::sycl::nd_item<3> item) + inline RAJA_DEVICE void exec(Data &data, ::sycl::nd_item<3> item) { auto len = segment_length(data); auto i = item.get_global_id(0); @@ -454,7 +454,7 @@ struct SyclStatementExecutor< static inline RAJA_DEVICE - void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active) + void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active) { using idx_type = camp::decay(data.offset_tuple))>; diff --git a/include/RAJA/policy/sycl/kernel/ForICount.hpp b/include/RAJA/policy/sycl/kernel/ForICount.hpp index 9c25bb0ab9..fcd3b1824d 100644 --- a/include/RAJA/policy/sycl/kernel/ForICount.hpp +++ b/include/RAJA/policy/sycl/kernel/ForICount.hpp @@ -63,7 +63,7 @@ struct SyclStatementExecutor< static inline RAJA_DEVICE - void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active) + void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active) { diff_t len = segment_length(data); auto i = item.get_local_id(ThreadDim); @@ -121,7 +121,7 @@ struct SyclStatementExecutor< static inline RAJA_DEVICE - void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active) + void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active) { diff_t len = segment_length(data); auto i0 = item.get_local_id(0); @@ -181,7 +181,7 @@ struct SyclStatementExecutor< static inline RAJA_DEVICE - void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active) + void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active) { // masked size strided loop diff_t len = segment_length(data); @@ -243,7 +243,7 @@ struct SyclStatementExecutor< using typename Base::diff_t; static - inline RAJA_DEVICE void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active) + inline RAJA_DEVICE void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active) { // block stride loop diff_t len = segment_length(data); @@ -300,7 +300,7 @@ struct SyclStatementExecutor< using typename Base::diff_t; static - inline RAJA_DEVICE void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active) + inline RAJA_DEVICE void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active) { // grid stride loop diff_t len = segment_length(data); @@ -349,7 +349,7 @@ struct SyclStatementExecutor< using typename Base::diff_t; static - inline RAJA_DEVICE void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active) + inline RAJA_DEVICE void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active) { // grid stride loop diff_t len = segment_length(data); @@ -399,7 +399,7 @@ struct SyclStatementExecutor< static inline RAJA_DEVICE - void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active) + void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active) { diff_t len = segment_length(data); diff --git a/include/RAJA/policy/sycl/kernel/Lambda.hpp b/include/RAJA/policy/sycl/kernel/Lambda.hpp index 0542f4b81e..05f4fb3a44 100644 --- a/include/RAJA/policy/sycl/kernel/Lambda.hpp +++ b/include/RAJA/policy/sycl/kernel/Lambda.hpp @@ -46,7 +46,7 @@ template , Types> { static - inline RAJA_DEVICE void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active) + inline RAJA_DEVICE void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active) { // Only execute the lambda if it hasn't been masked off if(thread_active){ diff --git a/include/RAJA/policy/sycl/kernel/SyclKernel.hpp b/include/RAJA/policy/sycl/kernel/SyclKernel.hpp index 88c789c062..f339bccef5 100644 --- a/include/RAJA/policy/sycl/kernel/SyclKernel.hpp +++ b/include/RAJA/policy/sycl/kernel/SyclKernel.hpp @@ -93,7 +93,7 @@ namespace internal * SYCL global function for launching SyclKernel policies. */ template -void SyclKernelLauncher(Data data, cl::sycl::nd_item<3> item) +void SyclKernelLauncher(Data data, ::sycl::nd_item<3> item) { using data_t = camp::decay; @@ -128,7 +128,7 @@ struct SyclLaunchHelper,StmtList,Data,Types> static void launch(Data &&data, internal::LaunchDims launch_dims, size_t shmem, - cl::sycl::queue* qu) + ::sycl::queue* qu) { // @@ -136,20 +136,20 @@ struct SyclLaunchHelper,StmtList,Data,Types> // Kernel body is nontrivially copyable, create space on device and copy to // Workaround until "is_device_copyable" is supported // - data_t* m_data = (data_t*) cl::sycl::malloc_device(sizeof(data_t), *qu); + data_t* m_data = (data_t*) ::sycl::malloc_device(sizeof(data_t), *qu); qu->memcpy(m_data, &data, sizeof(data_t)).wait(); - qu->submit([&](cl::sycl::handler& h) { + qu->submit([&](::sycl::handler& h) { h.parallel_for(launch_dims.fit_nd_range(qu), - [=] (cl::sycl::nd_item<3> item) { + [=] (::sycl::nd_item<3> item) { SyclKernelLauncher(*m_data, item); }); }).wait(); // Need to wait to free memory - cl::sycl::free(m_data, *qu); + ::sycl::free(m_data, *qu); } }; @@ -172,13 +172,13 @@ struct SyclLaunchHelper,StmtList,Data,Types> static void launch(Data &&data, internal::LaunchDims launch_dims, size_t shmem, - cl::sycl::queue* qu) + ::sycl::queue* qu) { - qu->submit([&](cl::sycl::handler& h) { + qu->submit([&](::sycl::handler& h) { h.parallel_for(launch_dims.fit_nd_range(qu), - [=] (cl::sycl::nd_item<3> item) { + [=] (::sycl::nd_item<3> item) { SyclKernelLauncher(data, item); diff --git a/include/RAJA/policy/sycl/kernel/Tile.hpp b/include/RAJA/policy/sycl/kernel/Tile.hpp index 81a57cdecb..59590b2556 100644 --- a/include/RAJA/policy/sycl/kernel/Tile.hpp +++ b/include/RAJA/policy/sycl/kernel/Tile.hpp @@ -61,7 +61,7 @@ struct SyclStatementExecutor< using enclosed_stmts_t = SyclStatementListExecutor; using diff_t = segment_diff_type; - static inline RAJA_DEVICE void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active){ + static inline RAJA_DEVICE void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active){ // Get the segment referenced by this Tile statement auto &segment = camp::get(data.segment_tuple); @@ -139,7 +139,7 @@ struct SyclStatementExecutor< using diff_t = segment_diff_type; - static inline RAJA_DEVICE void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active) + static inline RAJA_DEVICE void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active) { // Get the segment referenced by this Tile statement auto &segment = camp::get(data.segment_tuple); @@ -231,7 +231,7 @@ struct SyclStatementExecutor< using diff_t = segment_diff_type; - static inline RAJA_DEVICE void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active) + static inline RAJA_DEVICE void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active) { // Get the segment referenced by this Tile statement auto &segment = camp::get(data.segment_tuple); @@ -321,7 +321,7 @@ struct SyclStatementExecutor< using diff_t = segment_diff_type; - static inline RAJA_DEVICE void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active) + static inline RAJA_DEVICE void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active) { // Get the segment referenced by this Tile statement auto &segment = camp::get(data.segment_tuple); @@ -409,7 +409,7 @@ struct SyclStatementExecutor< using diff_t = segment_diff_type; - static inline RAJA_DEVICE void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active) + static inline RAJA_DEVICE void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active) { // Get the segment referenced by this Tile statement auto &segment = camp::get(data.segment_tuple); diff --git a/include/RAJA/policy/sycl/kernel/TileTCount.hpp b/include/RAJA/policy/sycl/kernel/TileTCount.hpp index b1d263a263..f27dafca80 100644 --- a/include/RAJA/policy/sycl/kernel/TileTCount.hpp +++ b/include/RAJA/policy/sycl/kernel/TileTCount.hpp @@ -70,7 +70,7 @@ struct SyclStatementExecutor< static inline RAJA_DEVICE - void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active){ + void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active){ // Get the segment referenced by this Tile statement auto &segment = camp::get(data.segment_tuple); @@ -141,7 +141,7 @@ struct SyclStatementExecutor< static inline RAJA_DEVICE - void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active) + void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active) { // Get the segment referenced by this Tile statement auto &segment = camp::get(data.segment_tuple); @@ -214,7 +214,7 @@ struct SyclStatementExecutor< static inline RAJA_DEVICE - void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active) + void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active) { // Get the segment referenced by this Tile statement auto &segment = camp::get(data.segment_tuple); @@ -289,7 +289,7 @@ struct SyclStatementExecutor< static inline RAJA_DEVICE - void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active) + void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active) { // Get the segment referenced by this Tile statement auto &segment = camp::get(data.segment_tuple); @@ -363,7 +363,7 @@ struct SyclStatementExecutor< static inline RAJA_DEVICE - void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active) + void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active) { // Get the segment referenced by this Tile statement auto &segment = camp::get(data.segment_tuple); diff --git a/include/RAJA/policy/sycl/kernel/internal.hpp b/include/RAJA/policy/sycl/kernel/internal.hpp index 56e3a9aa1e..3498550c25 100644 --- a/include/RAJA/policy/sycl/kernel/internal.hpp +++ b/include/RAJA/policy/sycl/kernel/internal.hpp @@ -86,7 +86,7 @@ struct LaunchDims { return result; } - cl::sycl::nd_range<3> fit_nd_range(::sycl::queue* q) { + ::sycl::nd_range<3> fit_nd_range(::sycl::queue* q) { sycl_dim_3_t launch_global; @@ -95,9 +95,9 @@ struct LaunchDims { launch_local.y = std::max(launch_local.y, local.y); launch_local.z = std::max(launch_local.z, local.z); - cl::sycl::device dev = q->get_device(); + ::sycl::device dev = q->get_device(); - auto max_work_group_size = dev.get_info< ::cl::sycl::info::device::max_work_group_size>(); + auto max_work_group_size = dev.get_info< ::sycl::info::device::max_work_group_size>(); if(launch_local.x > max_work_group_size) { launch_local.x = max_work_group_size; @@ -160,10 +160,10 @@ struct LaunchDims { launch_global.z = ((launch_global.z / launch_local.z) + 1) * launch_local.z; } - cl::sycl::range<3> ret_th = {launch_local.x, launch_local.y, launch_local.z}; - cl::sycl::range<3> ret_gl = {launch_global.x, launch_global.y, launch_global.z}; + ::sycl::range<3> ret_th = {launch_local.x, launch_local.y, launch_local.z}; + ::sycl::range<3> ret_gl = {launch_global.x, launch_global.y, launch_global.z}; - return cl::sycl::nd_range<3>(ret_gl, ret_th); + return ::sycl::nd_range<3>(ret_gl, ret_th); } }; @@ -176,7 +176,7 @@ struct SyclStatementListExecutorHelper { using cur_stmt_t = camp::at_v; template - inline static RAJA_DEVICE void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active) + inline static RAJA_DEVICE void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active) { // Execute stmt cur_stmt_t::exec(data, item, thread_active); @@ -203,7 +203,7 @@ template struct SyclStatementListExecutorHelper { template - inline static RAJA_DEVICE void exec(Data &, cl::sycl::nd_item<3> item, bool) + inline static RAJA_DEVICE void exec(Data &, ::sycl::nd_item<3> item, bool) { // nop terminator } @@ -233,7 +233,7 @@ struct SyclStatementListExecutor, Types> { static inline RAJA_DEVICE - void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active) + void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active) { // Execute statements in order with helper class SyclStatementListExecutorHelper<0, num_stmts, enclosed_stmts_t>::exec(data, item, thread_active); diff --git a/include/RAJA/policy/sycl/launch.hpp b/include/RAJA/policy/sycl/launch.hpp index ad9fecc222..c8bc7aab53 100644 --- a/include/RAJA/policy/sycl/launch.hpp +++ b/include/RAJA/policy/sycl/launch.hpp @@ -63,13 +63,13 @@ struct LaunchExecute> { RAJA_FT_BEGIN; - q->submit([&](cl::sycl::handler& h) { + q->submit([&](::sycl::handler& h) { auto s_vec = ::sycl::local_accessor (params.shared_mem_size, h); h.parallel_for - (cl::sycl::nd_range<3>(gridSize, blockSize), - [=] (cl::sycl::nd_item<3> itm) { + (::sycl::nd_range<3>(gridSize, blockSize), + [=] (::sycl::nd_item<3> itm) { LaunchContext ctx; ctx.itm = &itm; @@ -136,14 +136,14 @@ struct LaunchExecute> { RAJA::expt::ParamMultiplexer::init(*res); auto reduction = ::sycl::reduction(res, launch_reducers, combiner); - q->submit([&](cl::sycl::handler& h) { + q->submit([&](::sycl::handler& h) { auto s_vec = ::sycl::local_accessor (launch_params.shared_mem_size, h); h.parallel_for - (cl::sycl::nd_range<3>(gridSize, blockSize), + (::sycl::nd_range<3>(gridSize, blockSize), reduction, - [=] (cl::sycl::nd_item<3> itm, auto & red) { + [=] (::sycl::nd_item<3> itm, auto & red) { LaunchContext ctx; ctx.itm = &itm; @@ -211,16 +211,16 @@ struct LaunchExecute> { // using LOOP_BODY = camp::decay; LOOP_BODY* lbody; - lbody = (LOOP_BODY*) cl::sycl::malloc_device(sizeof(LOOP_BODY), *q); + lbody = (LOOP_BODY*) ::sycl::malloc_device(sizeof(LOOP_BODY), *q); q->memcpy(lbody, &body_in, sizeof(LOOP_BODY)).wait(); - q->submit([&](cl::sycl::handler& h) { + q->submit([&](::sycl::handler& h) { auto s_vec = ::sycl::local_accessor (params.shared_mem_size, h); h.parallel_for - (cl::sycl::nd_range<3>(gridSize, blockSize), - [=] (cl::sycl::nd_item<3> itm) { + (::sycl::nd_range<3>(gridSize, blockSize), + [=] (::sycl::nd_item<3> itm) { LaunchContext ctx; ctx.itm = &itm; @@ -234,7 +234,7 @@ struct LaunchExecute> { }).wait(); // Need to wait for completion to free memory - cl::sycl::free(lbody, *q); + ::sycl::free(lbody, *q); RAJA_FT_END; @@ -290,21 +290,21 @@ struct LaunchExecute> { // using LOOP_BODY = camp::decay; LOOP_BODY* lbody; - lbody = (LOOP_BODY*) cl::sycl::malloc_device(sizeof(LOOP_BODY), *q); + lbody = (LOOP_BODY*) ::sycl::malloc_device(sizeof(LOOP_BODY), *q); q->memcpy(lbody, &body_in, sizeof(LOOP_BODY)).wait(); ReduceParams* res = ::sycl::malloc_shared(1,*q); RAJA::expt::ParamMultiplexer::init(*res); auto reduction = ::sycl::reduction(res, launch_reducers, combiner); - q->submit([&](cl::sycl::handler& h) { + q->submit([&](::sycl::handler& h) { auto s_vec = ::sycl::local_accessor (launch_params.shared_mem_size, h); h.parallel_for - (cl::sycl::nd_range<3>(gridSize, blockSize), + (::sycl::nd_range<3>(gridSize, blockSize), reduction, - [=] (cl::sycl::nd_item<3> itm, auto & red) { + [=] (::sycl::nd_item<3> itm, auto & red) { LaunchContext ctx; ctx.itm = &itm; @@ -325,7 +325,7 @@ struct LaunchExecute> { RAJA::expt::ParamMultiplexer::combine( launch_reducers, *res ); ::sycl::free(res, *q); - cl::sycl::free(lbody, *q); + ::sycl::free(lbody, *q); RAJA_FT_END; } diff --git a/include/RAJA/policy/sycl/policy.hpp b/include/RAJA/policy/sycl/policy.hpp index 0f92fe27e1..e850994fef 100644 --- a/include/RAJA/policy/sycl/policy.hpp +++ b/include/RAJA/policy/sycl/policy.hpp @@ -39,7 +39,7 @@ struct uint3 { unsigned long x, y, z; }; -using sycl_dim_t = cl::sycl::range<1>; +using sycl_dim_t = sycl::range<1>; using sycl_dim_3_t = uint3; diff --git a/include/RAJA/policy/sycl/reduce.hpp b/include/RAJA/policy/sycl/reduce.hpp index 49d89b3cd2..8a84d5b412 100644 --- a/include/RAJA/policy/sycl/reduce.hpp +++ b/include/RAJA/policy/sycl/reduce.hpp @@ -107,11 +107,11 @@ struct Reduce_Data Reduce_Data(T initValue, T identityValue, Offload_Info &info) : value(initValue) { - cl::sycl::queue* q = ::camp::resources::Sycl::get_default().get_queue(); + ::sycl::queue* q = ::camp::resources::Sycl::get_default().get_queue(); - device = reinterpret_cast(cl::sycl::malloc_device(sycl::MaxNumTeams * sizeof(T), *(q))); - host = reinterpret_cast(cl::sycl::malloc_host(sycl::MaxNumTeams * sizeof(T), *(q))); + device = reinterpret_cast(::sycl::malloc_device(sycl::MaxNumTeams * sizeof(T), *(q))); + host = reinterpret_cast(::sycl::malloc_host(sycl::MaxNumTeams * sizeof(T), *(q))); if (!host) { printf("Unable to allocate space on host\n"); @@ -136,7 +136,7 @@ struct Reduce_Data //! transfers from the host to the device -- exit() is called upon failure RAJA_INLINE void hostToDevice(Offload_Info &info) { - cl::sycl::queue* q = ::camp::resources::Sycl::get_default().get_queue(); + ::sycl::queue* q = ::camp::resources::Sycl::get_default().get_queue(); if(!q) { camp::resources::Resource res = camp::resources::Sycl(); @@ -154,7 +154,7 @@ struct Reduce_Data //! transfers from the device to the host -- exit() is called upon failure RAJA_INLINE void deviceToHost(Offload_Info &info) { - cl::sycl::queue* q = ::camp::resources::Sycl::get_default().get_queue(); + ::sycl::queue* q = ::camp::resources::Sycl::get_default().get_queue(); if(!q) { camp::resources::Resource res = camp::resources::Sycl(); @@ -172,14 +172,14 @@ struct Reduce_Data //! frees all data from the offload information passed RAJA_INLINE void cleanup(Offload_Info &info) { - cl::sycl::queue* q = ::camp::resources::Sycl::get_default().get_queue(); + ::sycl::queue* q = ::camp::resources::Sycl::get_default().get_queue(); if (device) { - cl::sycl::free(reinterpret_cast(device), *q); + ::sycl::free(reinterpret_cast(device), *q); device = nullptr; } if (host) { - cl::sycl::free(reinterpret_cast(host), *q); + ::sycl::free(reinterpret_cast(host), *q); //delete[] host; host = nullptr; } @@ -243,8 +243,8 @@ struct TargetReduce TargetReduce &reduce(T rhsVal) { #ifdef __SYCL_DEVICE_ONLY__ - auto i = 0; //__spirv::initLocalInvocationId<1, cl::sycl::id<1>>()[0]; - auto atm = ::sycl::atomic_ref(val.device[i]); + auto i = 0; //__spirv::initLocalInvocationId<1, ::sycl::id<1>>()[0]; + auto atm = ::sycl::atomic_ref(val.device[i]); Reducer{}(atm, rhsVal); return *this; #else @@ -257,8 +257,8 @@ struct TargetReduce const TargetReduce &reduce(T rhsVal) const { #ifdef __SYCL_DEVICE_ONLY__ - auto i = 0; //__spirv::initLocalInvocationId<1, cl::sycl::id<1>>()[0]; - auto atm = ::sycl::atomic_ref(val.device[i]); + auto i = 0; //__spirv::initLocalInvocationId<1, ::sycl::id<1>>()[0]; + auto atm = ::sycl::atomic_ref(val.device[i]); Reducer{}(atm, rhsVal); return *this; #else @@ -356,10 +356,10 @@ struct TargetReduceLoc TargetReduceLoc &reduce(T rhsVal, IndexType rhsLoc) { #ifdef __SYCL_DEVICE_ONLY__ - auto i = 0; //__spirv::initLocalInvocationId<1, cl::sycl::id<1>>()[0]; - cl::sycl::atomic_fence(cl::sycl::memory_order_acquire, cl::sycl::memory_scope::device); + auto i = 0; //__spirv::initLocalInvocationId<1, ::sycl::id<1>>()[0]; + ::sycl::atomic_fence(::sycl::memory_order_acquire, ::sycl::memory_scope::device); Reducer{}(val.device[i], loc.device[i], rhsVal, rhsLoc); - cl::sycl::atomic_fence(cl::sycl::memory_order_release, cl::sycl::memory_scope::device); + ::sycl::atomic_fence(::sycl::memory_order_release, ::sycl::memory_scope::device); return *this; #else Reducer{}(val.value, loc.value, rhsVal, rhsLoc); @@ -415,8 +415,8 @@ class ReduceSum const self &operator+=(T rhsVal) const { #ifdef __SYCL_DEVICE_ONLY__ - auto i = 0;//__spirv::initLocalInvocationId<1, cl::sycl::id<1>>()[0]; - auto atm = ::sycl::atomic_ref(parent::val.device[i]); + auto i = 0;//__spirv::initLocalInvocationId<1, ::sycl::id<1>>()[0]; + auto atm = ::sycl::atomic_ref(parent::val.device[i]); atm.fetch_add(rhsVal); return *this; #else @@ -441,8 +441,8 @@ class ReduceBitOr self &operator|=(T rhsVal) { #ifdef __SYCL_DEVICE_ONLY__ - auto i = 0;//__spirv::initLocalInvocationId<1, cl::sycl::id<1>>()[0]; - auto atm = ::sycl::atomic_ref(parent::val.device[i]); + auto i = 0;//__spirv::initLocalInvocationId<1, ::sycl::id<1>>()[0]; + auto atm = ::sycl::atomic_ref(parent::val.device[i]); atm |= rhsVal; return *this; #else @@ -455,8 +455,8 @@ class ReduceBitOr const self &operator|=(T rhsVal) const { #ifdef __SYCL_DEVICE_ONLY__ - auto i = 0;//__spirv::initLocalInvocationId<1, cl::sycl::id<1>>()[0]; - auto atm = ::sycl::atomic_ref(parent::val.device[i]); + auto i = 0;//__spirv::initLocalInvocationId<1, ::sycl::id<1>>()[0]; + auto atm = ::sycl::atomic_ref(parent::val.device[i]); atm |= rhsVal; return *this; #else @@ -481,8 +481,8 @@ class ReduceBitAnd self &operator&=(T rhsVal) { #ifdef __SYCL_DEVICE_ONLY__ - auto i = 0;//__spirv::initLocalInvocationId<1, cl::sycl::id<1>>()[0]; - auto atm = ::sycl::atomic_ref(parent::val.device[i]); + auto i = 0;//__spirv::initLocalInvocationId<1, ::sycl::id<1>>()[0]; + auto atm = ::sycl::atomic_ref(parent::val.device[i]); atm &= rhsVal; return *this; #else @@ -495,8 +495,8 @@ class ReduceBitAnd const self &operator&=(T rhsVal) const { #ifdef __SYCL_DEVICE_ONLY__ - auto i = 0;//__spirv::initLocalInvocationId<1, cl::sycl::id<1>>()[0]; - auto atm = ::sycl::atomic_ref(parent::val.device[i]); + auto i = 0;//__spirv::initLocalInvocationId<1, ::sycl::id<1>>()[0]; + auto atm = ::sycl::atomic_ref(parent::val.device[i]); atm &= rhsVal; return *this; #else @@ -522,8 +522,8 @@ class ReduceMin self &min(T rhsVal) { #ifdef __SYCL_DEVICE_ONLY__ - auto i = 0;//__spirv::initLocalInvocationId<1, cl::sycl::id<1>>()[0]; - auto atm = ::sycl::atomic_ref(parent::val.device[i]); + auto i = 0;//__spirv::initLocalInvocationId<1, ::sycl::id<1>>()[0]; + auto atm = ::sycl::atomic_ref(parent::val.device[i]); atm.fetch_min(rhsVal); return *this; #else @@ -536,8 +536,8 @@ class ReduceMin const self &min(T rhsVal) const { #ifdef __SYCL_DEVICE_ONLY__ - auto i = 0;//__spirv::initLocalInvocationId<1, cl::sycl::id<1>>()[0]; - auto atm = ::sycl::atomic_ref(parent::val.device[i]); + auto i = 0;//__spirv::initLocalInvocationId<1, ::sycl::id<1>>()[0]; + auto atm = ::sycl::atomic_ref(parent::val.device[i]); atm.fetch_min(rhsVal); return *this; #else @@ -563,8 +563,8 @@ class ReduceMax self &max(T rhsVal) { #ifdef __SYCL_DEVICE_ONLY__ - auto i = 0;//__spirv::initLocalInvocationId<1, cl::sycl::id<1>>()[0]; - auto atm = ::sycl::atomic_ref(parent::val.device[i]); + auto i = 0;//__spirv::initLocalInvocationId<1, ::sycl::id<1>>()[0]; + auto atm = ::sycl::atomic_ref(parent::val.device[i]); atm.fetch_max(rhsVal); return *this; #else @@ -577,8 +577,8 @@ class ReduceMax const self &max(T rhsVal) const { #ifdef __SYCL_DEVICE_ONLY__ - auto i = 0;//__spirv::initLocalInvocationId<1, cl::sycl::id<1>>()[0]; - auto atm = ::sycl::atomic_ref(parent::val.device[i]); + auto i = 0;//__spirv::initLocalInvocationId<1, ::sycl::id<1>>()[0]; + auto atm = ::sycl::atomic_ref(parent::val.device[i]); atm.fetch_max(rhsVal); return *this; #else diff --git a/src/MemUtils_SYCL.cpp b/src/MemUtils_SYCL.cpp index 0b5f1b8be6..3a3976c675 100644 --- a/src/MemUtils_SYCL.cpp +++ b/src/MemUtils_SYCL.cpp @@ -49,8 +49,8 @@ syclInfo tl_status; #endif //! State of raja sycl queue synchronization for sycl reducer objects -std::unordered_map g_queue_info_map{ - {cl::sycl::queue(), true}}; +std::unordered_map<::sycl::queue, bool> g_queue_info_map{ + {::sycl::queue(), true}}; } // namespace detail diff --git a/tpl/camp b/tpl/camp index 0f07de4240..7866dc34a4 160000 --- a/tpl/camp +++ b/tpl/camp @@ -1 +1 @@ -Subproject commit 0f07de4240c42e0b38a8d872a20440cb4b33d9f5 +Subproject commit 7866dc34a4f86b6c58891f6daf9f6da0f89b3d94 From d0a65e74e7b4b0e0d3d308de011d8d2faefa4be7 Mon Sep 17 00:00:00 2001 From: Jason Burmark Date: Mon, 25 Nov 2024 11:29:29 -0800 Subject: [PATCH 16/18] Apply suggestions from code review --- include/RAJA/policy/tensor/arch/hip/hip_wave.hpp | 4 ++-- test/include/RAJA_test-tensor.hpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/RAJA/policy/tensor/arch/hip/hip_wave.hpp b/include/RAJA/policy/tensor/arch/hip/hip_wave.hpp index 3e1cff1d56..f1810807f9 100644 --- a/include/RAJA/policy/tensor/arch/hip/hip_wave.hpp +++ b/include/RAJA/policy/tensor/arch/hip/hip_wave.hpp @@ -780,7 +780,7 @@ namespace expt // Third: mask off everything but output_segment // this is because all output segments are valid at this point - constexpr int log2_warp_size = RAJA::log2(RAJA::policy::hip::device_constants.WARP_SIZE); + static constexpr int log2_warp_size = RAJA::log2(RAJA::policy::hip::device_constants.WARP_SIZE); int our_output_segment = get_lane()>>(log2_warp_size-segbits); bool in_output_segment = our_output_segment == output_segment; if(!in_output_segment){ @@ -828,7 +828,7 @@ namespace expt // First: tree reduce values within each segment element_type x = m_value; - constexpr int log2_warp_size = RAJA::log2(RAJA::policy::hip::device_constants.WARP_SIZE); + static constexpr int log2_warp_size = RAJA::log2(RAJA::policy::hip::device_constants.WARP_SIZE); RAJA_UNROLL for(int i = 0;i < log2_warp_size-segbits; ++ i){ diff --git a/test/include/RAJA_test-tensor.hpp b/test/include/RAJA_test-tensor.hpp index 83ef4fe49f..d836e1463f 100644 --- a/test/include/RAJA_test-tensor.hpp +++ b/test/include/RAJA_test-tensor.hpp @@ -87,7 +87,7 @@ struct TensorTestHelper void exec(BODY const &body){ hipDeviceSynchronize(); - constexpr int warp_size = RAJA::policy::hip::device_constants.WARP_SIZE; + static constexpr int warp_size = RAJA::policy::hip::device_constants.WARP_SIZE; RAJA::forall>(RAJA::RangeSegment(0,warp_size), [=] RAJA_HOST_DEVICE (int ){ From 775e869a0a755edac89f71f2ee75952a74df9bc4 Mon Sep 17 00:00:00 2001 From: Arturo Vargas Date: Tue, 26 Nov 2024 11:03:02 -0800 Subject: [PATCH 17/18] Update examples/dynamic-forall.cpp Co-authored-by: Robert Chen --- examples/dynamic-forall.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/dynamic-forall.cpp b/examples/dynamic-forall.cpp index 7c037571e9..d405408e7f 100644 --- a/examples/dynamic-forall.cpp +++ b/examples/dynamic-forall.cpp @@ -95,7 +95,7 @@ int main(int argc, char *argv[]) }); // _rajaseq_vector_add_end - std::cout<<"Sum = "< Date: Tue, 10 Dec 2024 14:47:11 -0800 Subject: [PATCH 18/18] Global scope for consistency --- include/RAJA/policy/sycl/policy.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/RAJA/policy/sycl/policy.hpp b/include/RAJA/policy/sycl/policy.hpp index e850994fef..a02c9ea30f 100644 --- a/include/RAJA/policy/sycl/policy.hpp +++ b/include/RAJA/policy/sycl/policy.hpp @@ -39,7 +39,7 @@ struct uint3 { unsigned long x, y, z; }; -using sycl_dim_t = sycl::range<1>; +using sycl_dim_t = ::sycl::range<1>; using sycl_dim_3_t = uint3;