From 932a0901c099d7e5cb7a73fc2744109dcbac0e47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zo=C3=AB=20Long?= Date: Wed, 4 Dec 2024 09:03:52 -0800 Subject: [PATCH 1/7] Rough implementation of scan algorithm. --- include/RAJA/policy/sycl.hpp | 4 +- include/RAJA/policy/sycl/scan.hpp | 167 ++++++++++++++++++++++++++++ test/functional/scan/CMakeLists.txt | 8 +- 3 files changed, 175 insertions(+), 4 deletions(-) create mode 100644 include/RAJA/policy/sycl/scan.hpp diff --git a/include/RAJA/policy/sycl.hpp b/include/RAJA/policy/sycl.hpp index dc4112d8a7..b567ab80a4 100644 --- a/include/RAJA/policy/sycl.hpp +++ b/include/RAJA/policy/sycl.hpp @@ -29,8 +29,8 @@ #include "RAJA/policy/sycl/forall.hpp" #include "RAJA/policy/sycl/policy.hpp" #include "RAJA/policy/sycl/reduce.hpp" -//#include "RAJA/policy/sycl/scan.hpp" -//#include "RAJA/policy/sycl/sort.hpp" +#include "RAJA/policy/sycl/scan.hpp" +#include "RAJA/policy/sycl/sort.hpp" #include "RAJA/policy/sycl/kernel.hpp" //#include "RAJA/policy/sycl/synchronize.hpp" #include "RAJA/policy/sycl/launch.hpp" diff --git a/include/RAJA/policy/sycl/scan.hpp b/include/RAJA/policy/sycl/scan.hpp new file mode 100644 index 0000000000..8464addd87 --- /dev/null +++ b/include/RAJA/policy/sycl/scan.hpp @@ -0,0 +1,167 @@ +/*! +****************************************************************************** +* +* \file +* +* \brief Header file providing RAJA scan declarations. +* +****************************************************************************** +*/ + +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2016-24, Lawrence Livermore National Security, LLC +// and RAJA project contributors. See the RAJA/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#ifndef RAJA_scan_sycl_HPP +#define RAJA_scan_sycl_HPP + +#include "RAJA/config.hpp" +#include "camp/resource/sycl.hpp" + +#if defined(RAJA_ENABLE_SYCL) + + +#include + +// #include + +// #include + +#include "RAJA/pattern/detail/algorithm.hpp" + +// #include "RAJA/policy/sycl/MemUtils_SYCL.hpp" + +#include "RAJA/policy/sycl/policy.hpp" + +namespace RAJA +{ +namespace impl +{ +namespace scan +{ + +template +RAJA_INLINE +camp::resources::EventProxy +inclusive_inplace( + camp::resources::Sycl sycl_res, + ::RAJA::policy::sycl::sycl_exec, + InputIter begin, + InputIter end, + Function binary_op) +{ + ::sycl::queue* sycl_queue = sycl_res.get_queue(); + + using valueT = typename std::remove_reference::type; + + // Calculate the size of the input range + size_t size = std::distance(begin, end); + + ::sycl::buffer buf(begin, ::sycl::range<1>(size)); + + // Submit the kernel to the SYCL queue + sycl_queue->submit([&](::sycl::handler& cgh) { + // ::sycl::accessor(size), [=](::sycl::id<1> idx) { + if (idx[0] != 0) { + *(begin + idx[0]) = binary_op(*(begin + idx[0] - 1), *(begin + idx[0])); + } + }); + }); + + sycl_res.wait(); + return camp::resources::EventProxy(sycl_res); +} + +template +RAJA_INLINE +resources::EventProxy +exclusive_inplace( + resources::Sycl sycl_res, + ::RAJA::policy::sycl::sycl_exec exec, + InputIter begin, + InputIter end, + Function binary_op, + TT initVal) +{ + + ::sycl::queue* sycl_queue = sycl_res.get_queue(); + + // Calculate the size of the input range + size_t size = std::distance(begin, end); + using valueT = typename std::remove_reference::type; + valueT agg = *begin; + // Submit the kernel to the SYCL queue + sycl_queue->submit([&](::sycl::handler& cgh) { + + cgh.parallel_for(::sycl::range<1>(size), [=](::sycl::id<1> idx) { + if (idx[0] == 0) { + *begin = binary_op(*begin, initVal); + } else { + // agg = binary_op(agg, begin + idx[0]); + *(begin + idx[0]) = binary_op(*(begin + idx[0] - 1), *(begin + idx[0])); + } + }); + }); + + sycl_res.wait(); + return camp::resources::EventProxy(sycl_res); +} + +template +RAJA_INLINE +resources::EventProxy +inclusive( + resources::Sycl sycl_res, + ::RAJA::policy::sycl::sycl_exec exec, + InputIter begin, + InputIter end, + OutputIter out, + Function binary_op) +{ + // ::sycl::joint_inclusive_scan() + using std::distance; + std::copy(begin, end, out); + return inclusive_inplace(sycl_res, exec, out, out + distance(begin, end), binary_op); +} + +template +RAJA_INLINE +resources::EventProxy +exclusive( + resources::Sycl sycl_res, + ::RAJA::policy::sycl::sycl_exec exec, + InputIter begin, + InputIter end, + OutputIter out, + Function binary_op, + TT initVal) +{ + using std::distance; + std::copy(begin, end, out); + return exclusive_inplace(sycl_res, exec, out, out + distance(begin, end), binary_op, initVal); +} + +} // namespace scan +} // namespace impl +} // namespace RAJA + +#endif // closing endif for RAJA enable Sycl guard + +#endif // closing endif for header include guard \ No newline at end of file diff --git a/test/functional/scan/CMakeLists.txt b/test/functional/scan/CMakeLists.txt index b3d8113f9c..73b4a64dbc 100644 --- a/test/functional/scan/CMakeLists.txt +++ b/test/functional/scan/CMakeLists.txt @@ -5,7 +5,7 @@ # SPDX-License-Identifier: (BSD-3-Clause) ############################################################################### -list(APPEND SCAN_BACKENDS Sequential) +# list(APPEND SCAN_BACKENDS Sequential) if(RAJA_ENABLE_OPENMP) list(APPEND SCAN_BACKENDS OpenMP) @@ -19,8 +19,12 @@ if(RAJA_ENABLE_HIP) list(APPEND SCAN_BACKENDS Hip) endif() +if(RAJA_ENABLE_SYCL) + list(APPEND SCAN_BACKENDS Sycl) +endif() + -set(SCAN_TYPES Exclusive ExclusiveInplace Inclusive InclusiveInplace) +set(SCAN_TYPES ExclusiveInplace Exclusive Inclusive InclusiveInplace) # # Generate scan tests for each enabled RAJA back-end. From 5fe4a546c04f45b1b64c35a45f4acd5497593925 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zo=C3=AB=20Long?= Date: Tue, 17 Dec 2024 13:35:04 -0800 Subject: [PATCH 2/7] Got inclusive scan working --- include/RAJA/policy/sycl/scan.hpp | 119 ++++++++++++++++++++++-------- 1 file changed, 87 insertions(+), 32 deletions(-) diff --git a/include/RAJA/policy/sycl/scan.hpp b/include/RAJA/policy/sycl/scan.hpp index 8464addd87..a4d6cb75e5 100644 --- a/include/RAJA/policy/sycl/scan.hpp +++ b/include/RAJA/policy/sycl/scan.hpp @@ -18,6 +18,7 @@ #ifndef RAJA_scan_sycl_HPP #define RAJA_scan_sycl_HPP +#include #include "RAJA/config.hpp" #include "camp/resource/sycl.hpp" @@ -60,17 +61,47 @@ inclusive_inplace( // Calculate the size of the input range size_t size = std::distance(begin, end); - ::sycl::buffer buf(begin, ::sycl::range<1>(size)); - - // Submit the kernel to the SYCL queue - sycl_queue->submit([&](::sycl::handler& cgh) { - // ::sycl::accessor(size), [=](::sycl::id<1> idx) { - if (idx[0] != 0) { - *(begin + idx[0]) = binary_op(*(begin + idx[0] - 1), *(begin + idx[0])); - } + ::sycl::buffer inBuff(begin, end); + ::sycl::buffer outBuff(begin, ::sycl::range<1>(size)); + + int iterations = 0; + for (size_t ii = size >> 1; ii > 0; ii >>= 1) { + iterations++; + } + if ((size & (size - 1)) != 0) { + iterations++; + } + + auto inPtr = &inBuff; + auto outPtr = &outBuff; + + if (iterations % 2 == 0) { + outPtr = &inBuff; + inPtr = &outBuff; + } + + int ii = 1; + do { + // Submit the kernel to the SYCL queue + sycl_queue->submit([&](::sycl::handler& cgh) { + auto inAccessor = inPtr->get_access(cgh); + auto outAccessor = outPtr->get_access(cgh); + // outBuffAccessor = outBuff->get_access<::sycl::access::mode::read(cgh); + + cgh.parallel_for(::sycl::range<1>(size), [=](::sycl::item<1> idx) { + size_t td = 1 << (ii - 1); + size_t thisID = idx.get_id(0); + if (thisID < size and thisID >= td) { + outAccessor[thisID] = binary_op(inAccessor[thisID - td], inAccessor[thisID]); + } else { + outAccessor[thisID] = inAccessor[thisID]; + } + }); }); - }); + + std::swap(inPtr, outPtr); + ii++; + } while ( ii <= iterations); sycl_res.wait(); return camp::resources::EventProxy(sycl_res); @@ -91,28 +122,52 @@ exclusive_inplace( Function binary_op, TT initVal) { - - ::sycl::queue* sycl_queue = sycl_res.get_queue(); - - // Calculate the size of the input range - size_t size = std::distance(begin, end); - using valueT = typename std::remove_reference::type; - valueT agg = *begin; - // Submit the kernel to the SYCL queue - sycl_queue->submit([&](::sycl::handler& cgh) { - - cgh.parallel_for(::sycl::range<1>(size), [=](::sycl::id<1> idx) { - if (idx[0] == 0) { - *begin = binary_op(*begin, initVal); - } else { - // agg = binary_op(agg, begin + idx[0]); - *(begin + idx[0]) = binary_op(*(begin + idx[0] - 1), *(begin + idx[0])); - } - }); - }); - - sycl_res.wait(); - return camp::resources::EventProxy(sycl_res); +// ::sycl::queue* sycl_queue = sycl_res.get_queue(); + +// using valueT = typename std::remove_reference::type; + +// // Calculate the size of the input range +// size_t size = std::distance(begin, end); + +// ::sycl::buffer outBuff(begin, ::sycl::range<1>(size)); + +// int iterations = 0; +// for (size_t ii = size >> 1; ii > 0; ii >>= 1) { +// iterations++; +// } +// if ((size & (size - 1)) != 0) { +// iterations++; +// } + +// auto inPtr = begin; +// auto outPtr = &outBuff; + +// if (iterations % 2 != 0) { +// outPtr = begin; +// inPtr = &outBuff; +// } + +// // Submit the kernel to the SYCL queue +// sycl_queue->submit([&](::sycl::handler& cgh) { +// // outBuffAccessor = outBuff->get_access<::sycl::access::mode::read(cgh); + +// // ::sycl::accessor(size), [=](::sycl::nd_item<1> idx) { +// size_t thisID = idx.get_global_id(0); +// if (thisID > 0 ) { +// outPtr[thisID] = inPtr[thisID - 1]; +// } +// else { +// outPtr[thisID] = initVal; +// } +// // if (idx[0] != 0) { +// // *(begin + idx[0]) = binary_op(*(begin + idx[0] - 1), *(begin + idx[0])); +// // } +// }); +// }); + +// sycl_res.wait(); + return camp::resources::EventProxy(sycl_res); } template Date: Wed, 18 Dec 2024 15:55:59 -0800 Subject: [PATCH 3/7] All scan types working! --- include/RAJA/policy/sycl/scan.hpp | 127 ++++++++++++++++++------------ 1 file changed, 76 insertions(+), 51 deletions(-) diff --git a/include/RAJA/policy/sycl/scan.hpp b/include/RAJA/policy/sycl/scan.hpp index a4d6cb75e5..d194365350 100644 --- a/include/RAJA/policy/sycl/scan.hpp +++ b/include/RAJA/policy/sycl/scan.hpp @@ -61,8 +61,8 @@ inclusive_inplace( // Calculate the size of the input range size_t size = std::distance(begin, end); - ::sycl::buffer inBuff(begin, end); - ::sycl::buffer outBuff(begin, ::sycl::range<1>(size)); + ::sycl::buffer buffer(begin, end); + ::sycl::buffer tempAccBuff(begin, ::sycl::range<1>(size)); int iterations = 0; for (size_t ii = size >> 1; ii > 0; ii >>= 1) { @@ -72,34 +72,34 @@ inclusive_inplace( iterations++; } - auto inPtr = &inBuff; - auto outPtr = &outBuff; + auto buffPtr = &buffer; + auto tempPtr = &tempAccBuff; if (iterations % 2 == 0) { - outPtr = &inBuff; - inPtr = &outBuff; + tempPtr = &buffer; + buffPtr = &tempAccBuff; } int ii = 1; do { // Submit the kernel to the SYCL queue sycl_queue->submit([&](::sycl::handler& cgh) { - auto inAccessor = inPtr->get_access(cgh); - auto outAccessor = outPtr->get_access(cgh); + auto buffAccessor = buffPtr->get_access(cgh); + auto tempAccessor = tempPtr->get_access(cgh); // outBuffAccessor = outBuff->get_access<::sycl::access::mode::read(cgh); cgh.parallel_for(::sycl::range<1>(size), [=](::sycl::item<1> idx) { size_t td = 1 << (ii - 1); size_t thisID = idx.get_id(0); if (thisID < size and thisID >= td) { - outAccessor[thisID] = binary_op(inAccessor[thisID - td], inAccessor[thisID]); + tempAccessor[thisID] = binary_op(buffAccessor[thisID - td], buffAccessor[thisID]); } else { - outAccessor[thisID] = inAccessor[thisID]; + tempAccessor[thisID] = buffAccessor[thisID]; } }); }); - std::swap(inPtr, outPtr); + std::swap(buffPtr, tempPtr); ii++; } while ( ii <= iterations); @@ -122,52 +122,77 @@ exclusive_inplace( Function binary_op, TT initVal) { -// ::sycl::queue* sycl_queue = sycl_res.get_queue(); - -// using valueT = typename std::remove_reference::type; + ::sycl::queue* sycl_queue = sycl_res.get_queue(); + + using valueT = typename std::remove_reference::type; -// // Calculate the size of the input range -// size_t size = std::distance(begin, end); + // Calculate the size of the input range + size_t size = std::distance(begin, end); -// ::sycl::buffer outBuff(begin, ::sycl::range<1>(size)); + ::sycl::buffer buffer(begin, end); + ::sycl::buffer tempAccBuff(begin, ::sycl::range<1>(size)); -// int iterations = 0; -// for (size_t ii = size >> 1; ii > 0; ii >>= 1) { -// iterations++; -// } -// if ((size & (size - 1)) != 0) { -// iterations++; -// } + int iterations = 0; + for (size_t ii = size >> 1; ii > 0; ii >>= 1) { + iterations++; + } + if ((size & (size - 1)) != 0) { + iterations++; + } -// auto inPtr = begin; -// auto outPtr = &outBuff; + auto buffPtr = &buffer; + auto tempPtr = &tempAccBuff; -// if (iterations % 2 != 0) { -// outPtr = begin; -// inPtr = &outBuff; -// } - -// // Submit the kernel to the SYCL queue -// sycl_queue->submit([&](::sycl::handler& cgh) { -// // outBuffAccessor = outBuff->get_access<::sycl::access::mode::read(cgh); - -// // ::sycl::accessor(size), [=](::sycl::nd_item<1> idx) { -// size_t thisID = idx.get_global_id(0); -// if (thisID > 0 ) { -// outPtr[thisID] = inPtr[thisID - 1]; -// } -// else { -// outPtr[thisID] = initVal; -// } -// // if (idx[0] != 0) { -// // *(begin + idx[0]) = binary_op(*(begin + idx[0] - 1), *(begin + idx[0])); -// // } -// }); -// }); - -// sycl_res.wait(); + if (iterations % 2 != 0) { + tempPtr = &buffer; + buffPtr = &tempAccBuff; + } + + // Submit the kernel to the SYCL queue + sycl_queue->submit([&](::sycl::handler& cgh) { + auto inAccessor = buffPtr->get_access(cgh); + auto outAccessor = tempPtr->get_access(cgh); + // outBuffAccessor = outBuff->get_access<::sycl::access::mode::read(cgh); + + cgh.parallel_for(::sycl::range<1>(size), [=](::sycl::item<1> idx) { + size_t thisID = idx.get_id(0); + // size_t td = 1 << (ii - 1); + if (thisID > 0) { + outAccessor[thisID] = inAccessor[thisID - 1]; + } else { + outAccessor[thisID] = initVal; + } + }); + }); + + std::swap(buffPtr, tempPtr); + + int ii = 1; + do { + // Submit the kernel to the SYCL queue + sycl_queue->submit([&](::sycl::handler& cgh) { + auto buffAccessor = buffPtr->get_access(cgh); + auto tempAccessor = tempPtr->get_access(cgh); + // outBuffAccessor = outBuff->get_access<::sycl::access::mode::read(cgh); + + cgh.parallel_for(::sycl::range<1>(size), [=](::sycl::item<1> idx) { + size_t td = 1 << (ii - 1); + size_t thisID = idx.get_id(0); + if (thisID < size and thisID >= td) { + tempAccessor[thisID] = binary_op(buffAccessor[thisID - td], buffAccessor[thisID]); + } else { + tempAccessor[thisID] = buffAccessor[thisID]; + } + }); + }); + + std::swap(buffPtr, tempPtr); + ii++; + } while ( ii <= iterations); + + sycl_res.wait(); return camp::resources::EventProxy(sycl_res); + return inclusive_inplace(sycl_res, exec, begin, end, binary_op); } template Date: Fri, 20 Dec 2024 07:54:01 -0800 Subject: [PATCH 4/7] Changes based on PR feedback --- include/RAJA/policy/sycl/scan.hpp | 11 ----------- test/functional/scan/CMakeLists.txt | 4 ++-- 2 files changed, 2 insertions(+), 13 deletions(-) diff --git a/include/RAJA/policy/sycl/scan.hpp b/include/RAJA/policy/sycl/scan.hpp index d194365350..a613714a13 100644 --- a/include/RAJA/policy/sycl/scan.hpp +++ b/include/RAJA/policy/sycl/scan.hpp @@ -24,17 +24,6 @@ #if defined(RAJA_ENABLE_SYCL) - -#include - -// #include - -// #include - -#include "RAJA/pattern/detail/algorithm.hpp" - -// #include "RAJA/policy/sycl/MemUtils_SYCL.hpp" - #include "RAJA/policy/sycl/policy.hpp" namespace RAJA diff --git a/test/functional/scan/CMakeLists.txt b/test/functional/scan/CMakeLists.txt index 73b4a64dbc..a90caa1012 100644 --- a/test/functional/scan/CMakeLists.txt +++ b/test/functional/scan/CMakeLists.txt @@ -5,7 +5,7 @@ # SPDX-License-Identifier: (BSD-3-Clause) ############################################################################### -# list(APPEND SCAN_BACKENDS Sequential) +list(APPEND SCAN_BACKENDS Sequential) if(RAJA_ENABLE_OPENMP) list(APPEND SCAN_BACKENDS OpenMP) @@ -24,7 +24,7 @@ if(RAJA_ENABLE_SYCL) endif() -set(SCAN_TYPES ExclusiveInplace Exclusive Inclusive InclusiveInplace) +set(SCAN_TYPES Exclusive ExclusiveInplace Inclusive InclusiveInplace) # # Generate scan tests for each enabled RAJA back-end. From 50d16a6c74271e59e77daa29fc13303be2f278df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zo=C3=AB=20Long?= Date: Fri, 3 Jan 2025 13:15:19 -0800 Subject: [PATCH 5/7] Changed std::copy to sycl memcpy. --- include/RAJA/policy/sycl/scan.hpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/include/RAJA/policy/sycl/scan.hpp b/include/RAJA/policy/sycl/scan.hpp index a613714a13..450fbfe56d 100644 --- a/include/RAJA/policy/sycl/scan.hpp +++ b/include/RAJA/policy/sycl/scan.hpp @@ -19,6 +19,7 @@ #define RAJA_scan_sycl_HPP #include +#include #include "RAJA/config.hpp" #include "camp/resource/sycl.hpp" @@ -199,10 +200,9 @@ inclusive( OutputIter out, Function binary_op) { - // ::sycl::joint_inclusive_scan() - using std::distance; - std::copy(begin, end, out); - return inclusive_inplace(sycl_res, exec, out, out + distance(begin, end), binary_op); + using valueT = typename std::remove_reference::type; + sycl_res.memcpy(out, begin, std::distance(begin, end) * sizeof(valueT)); + return inclusive_inplace(sycl_res, exec, out, out + std::distance(begin, end), binary_op); } template ::type; + sycl_res.memcpy(out, begin, std::distance(begin, end) * sizeof(valueT)); + return exclusive_inplace(sycl_res, exec, out, out + std::distance(begin, end), binary_op, initVal); } } // namespace scan From c1ec6f44a3f4befae334ce33b36f60f5905f439b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zo=C3=AB=20Long?= Date: Fri, 3 Jan 2025 13:55:55 -0800 Subject: [PATCH 6/7] use iterator traits instead of decltype --- include/RAJA/policy/sycl/scan.hpp | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/include/RAJA/policy/sycl/scan.hpp b/include/RAJA/policy/sycl/scan.hpp index 450fbfe56d..c415fd902c 100644 --- a/include/RAJA/policy/sycl/scan.hpp +++ b/include/RAJA/policy/sycl/scan.hpp @@ -46,7 +46,8 @@ inclusive_inplace( { ::sycl::queue* sycl_queue = sycl_res.get_queue(); - using valueT = typename std::remove_reference::type; + // using valueT = typename std::remove_reference::type; + using valueT = typename std::iterator_traits::value_type; // Calculate the size of the input range size_t size = std::distance(begin, end); @@ -114,7 +115,9 @@ exclusive_inplace( { ::sycl::queue* sycl_queue = sycl_res.get_queue(); - using valueT = typename std::remove_reference::type; + // using valueT = typename std::remove_reference::type; + using valueT = typename std::iterator_traits::value_type; + // Calculate the size of the input range size_t size = std::distance(begin, end); @@ -200,7 +203,8 @@ inclusive( OutputIter out, Function binary_op) { - using valueT = typename std::remove_reference::type; + // using valueT = typename std::remove_reference::type; + using valueT = typename std::iterator_traits::value_type; sycl_res.memcpy(out, begin, std::distance(begin, end) * sizeof(valueT)); return inclusive_inplace(sycl_res, exec, out, out + std::distance(begin, end), binary_op); } @@ -222,7 +226,9 @@ exclusive( Function binary_op, TT initVal) { - using valueT = typename std::remove_reference::type; + // using valueT = typename std::remove_reference::type; + using valueT = typename std::iterator_traits::value_type; + sycl_res.memcpy(out, begin, std::distance(begin, end) * sizeof(valueT)); return exclusive_inplace(sycl_res, exec, out, out + std::distance(begin, end), binary_op, initVal); } From 9a43cb59fe4d7df476b39bcb7c350950b57486c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zo=C3=AB=20Long?= Date: Mon, 6 Jan 2025 10:09:04 -0800 Subject: [PATCH 7/7] Changed waits to be conditional on policy async --- include/RAJA/policy/sycl/scan.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/RAJA/policy/sycl/scan.hpp b/include/RAJA/policy/sycl/scan.hpp index c415fd902c..81e5614b71 100644 --- a/include/RAJA/policy/sycl/scan.hpp +++ b/include/RAJA/policy/sycl/scan.hpp @@ -94,7 +94,7 @@ inclusive_inplace( ii++; } while ( ii <= iterations); - sycl_res.wait(); + if (!Async) { sycl_res.wait(); } return camp::resources::EventProxy(sycl_res); } @@ -183,7 +183,7 @@ exclusive_inplace( ii++; } while ( ii <= iterations); - sycl_res.wait(); + if(!Async) { sycl_res.wait(); } return camp::resources::EventProxy(sycl_res); return inclusive_inplace(sycl_res, exec, begin, end, binary_op); }