From 24e345385f1f4df6b805581dfab26cf188339633 Mon Sep 17 00:00:00 2001 From: "Richard D. Hornung" Date: Mon, 25 Nov 2024 10:44:03 -0800 Subject: [PATCH 1/2] Remove deprecated SYCL usage for newer compilers --- examples/memoryManager.hpp | 2 +- exercises/memoryManager.hpp | 2 +- include/RAJA/pattern/launch/launch_core.hpp | 2 +- include/RAJA/policy/sycl/MemUtils_SYCL.hpp | 4 +- include/RAJA/policy/sycl/forall.hpp | 4 +- .../RAJA/policy/sycl/kernel/Conditional.hpp | 2 +- include/RAJA/policy/sycl/kernel/For.hpp | 14 ++-- include/RAJA/policy/sycl/kernel/ForICount.hpp | 14 ++-- include/RAJA/policy/sycl/kernel/Lambda.hpp | 2 +- .../RAJA/policy/sycl/kernel/SyclKernel.hpp | 18 ++--- include/RAJA/policy/sycl/kernel/Tile.hpp | 10 +-- .../RAJA/policy/sycl/kernel/TileTCount.hpp | 10 +-- include/RAJA/policy/sycl/kernel/internal.hpp | 18 ++--- include/RAJA/policy/sycl/launch.hpp | 32 ++++----- include/RAJA/policy/sycl/policy.hpp | 2 +- include/RAJA/policy/sycl/reduce.hpp | 66 +++++++++---------- src/MemUtils_SYCL.cpp | 4 +- tpl/camp | 2 +- 18 files changed, 104 insertions(+), 104 deletions(-) diff --git a/examples/memoryManager.hpp b/examples/memoryManager.hpp index 62d3d6e3e7..685a572033 100644 --- a/examples/memoryManager.hpp +++ b/examples/memoryManager.hpp @@ -76,7 +76,7 @@ void deallocate(T *&ptr) hipErrchk(hipMalloc((void **)&ptr, sizeof(T) * size)); #elif defined(RAJA_ENABLE_SYCL) auto qu = sycl_res->get().get_queue(); - ptr = cl::sycl::malloc_device(size, *qu); + ptr = ::sycl::malloc_device(size, *qu); #endif return ptr; } diff --git a/exercises/memoryManager.hpp b/exercises/memoryManager.hpp index 62d3d6e3e7..685a572033 100644 --- a/exercises/memoryManager.hpp +++ b/exercises/memoryManager.hpp @@ -76,7 +76,7 @@ void deallocate(T *&ptr) hipErrchk(hipMalloc((void **)&ptr, sizeof(T) * size)); #elif defined(RAJA_ENABLE_SYCL) auto qu = sycl_res->get().get_queue(); - ptr = cl::sycl::malloc_device(size, *qu); + ptr = ::sycl::malloc_device(size, *qu); #endif return ptr; } diff --git a/include/RAJA/pattern/launch/launch_core.hpp b/include/RAJA/pattern/launch/launch_core.hpp index f1d70aeacb..7ea7ce57ef 100644 --- a/include/RAJA/pattern/launch/launch_core.hpp +++ b/include/RAJA/pattern/launch/launch_core.hpp @@ -162,7 +162,7 @@ class LaunchContext void *shared_mem_ptr; #if defined(RAJA_ENABLE_SYCL) - mutable cl::sycl::nd_item<3> *itm; + mutable ::sycl::nd_item<3> *itm; #endif RAJA_HOST_DEVICE LaunchContext() diff --git a/include/RAJA/policy/sycl/MemUtils_SYCL.hpp b/include/RAJA/policy/sycl/MemUtils_SYCL.hpp index 081a88dc23..1a8c9930dd 100644 --- a/include/RAJA/policy/sycl/MemUtils_SYCL.hpp +++ b/include/RAJA/policy/sycl/MemUtils_SYCL.hpp @@ -50,7 +50,7 @@ namespace detail struct syclInfo { sycl_dim_t gridDim{0}; sycl_dim_t blockDim{0}; - cl::sycl::queue qu = cl::sycl::queue(); + ::sycl::queue qu = ::sycl::queue(); bool setup_reducers = false; #if defined(RAJA_ENABLE_OPENMP) syclInfo* thread_states = nullptr; @@ -62,7 +62,7 @@ extern syclInfo g_status; extern syclInfo tl_status; -extern std::unordered_map g_queue_info_map; +extern std::unordered_map<::sycl::queue, bool> g_queue_info_map; } // namespace detail diff --git a/include/RAJA/policy/sycl/forall.hpp b/include/RAJA/policy/sycl/forall.hpp index 901cc694f0..1c6876e328 100644 --- a/include/RAJA/policy/sycl/forall.hpp +++ b/include/RAJA/policy/sycl/forall.hpp @@ -206,8 +206,8 @@ resources::EventProxy forall_impl(resources::Sycl &sycl_res, }).wait(); // Need to wait for completion to free memory // Free our device memory - cl::sycl::free(lbody, *q); - cl::sycl::free(beg, *q); + ::sycl::free(lbody, *q); + ::sycl::free(beg, *q); RAJA_FT_END; } diff --git a/include/RAJA/policy/sycl/kernel/Conditional.hpp b/include/RAJA/policy/sycl/kernel/Conditional.hpp index 9149418518..e2e6b09e6d 100644 --- a/include/RAJA/policy/sycl/kernel/Conditional.hpp +++ b/include/RAJA/policy/sycl/kernel/Conditional.hpp @@ -51,7 +51,7 @@ struct SyclStatementExecutor item, bool thread_active) + void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active) { if (Conditional::eval(data)) { diff --git a/include/RAJA/policy/sycl/kernel/For.hpp b/include/RAJA/policy/sycl/kernel/For.hpp index d0976b931f..2019bfa7a9 100644 --- a/include/RAJA/policy/sycl/kernel/For.hpp +++ b/include/RAJA/policy/sycl/kernel/For.hpp @@ -59,7 +59,7 @@ struct SyclStatementExecutor< using diff_t = segment_diff_type; static - inline RAJA_DEVICE void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active) + inline RAJA_DEVICE void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active) { auto len = segment_length(data); auto i = item.get_global_id(Dim); @@ -124,7 +124,7 @@ struct SyclStatementExecutor< using diff_t = segment_diff_type; static - inline RAJA_DEVICE void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active) + inline RAJA_DEVICE void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active) { auto len = segment_length(data); auto i = item.get_group(Dim); @@ -187,7 +187,7 @@ struct SyclStatementExecutor< using diff_t = segment_diff_type; static - inline RAJA_DEVICE void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active) + inline RAJA_DEVICE void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active) { auto len = segment_length(data); auto i0 = item.get_group(Dim); @@ -253,7 +253,7 @@ struct SyclStatementExecutor< using diff_t = segment_diff_type; static - inline RAJA_DEVICE void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active) + inline RAJA_DEVICE void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active) { auto len = segment_length(data); auto i = item.get_local_id(Dim); @@ -317,7 +317,7 @@ struct SyclStatementExecutor< using diff_t = segment_diff_type; static - inline RAJA_DEVICE void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active) + inline RAJA_DEVICE void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active) { auto len = segment_length(data); auto i0 = item.get_local_id(Dim); @@ -393,7 +393,7 @@ struct SyclStatementExecutor< using diff_t = segment_diff_type; static - inline RAJA_DEVICE void exec(Data &data, cl::sycl::nd_item<3> item) + inline RAJA_DEVICE void exec(Data &data, ::sycl::nd_item<3> item) { auto len = segment_length(data); auto i = item.get_global_id(0); @@ -454,7 +454,7 @@ struct SyclStatementExecutor< static inline RAJA_DEVICE - void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active) + void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active) { using idx_type = camp::decay(data.offset_tuple))>; diff --git a/include/RAJA/policy/sycl/kernel/ForICount.hpp b/include/RAJA/policy/sycl/kernel/ForICount.hpp index 9c25bb0ab9..fcd3b1824d 100644 --- a/include/RAJA/policy/sycl/kernel/ForICount.hpp +++ b/include/RAJA/policy/sycl/kernel/ForICount.hpp @@ -63,7 +63,7 @@ struct SyclStatementExecutor< static inline RAJA_DEVICE - void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active) + void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active) { diff_t len = segment_length(data); auto i = item.get_local_id(ThreadDim); @@ -121,7 +121,7 @@ struct SyclStatementExecutor< static inline RAJA_DEVICE - void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active) + void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active) { diff_t len = segment_length(data); auto i0 = item.get_local_id(0); @@ -181,7 +181,7 @@ struct SyclStatementExecutor< static inline RAJA_DEVICE - void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active) + void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active) { // masked size strided loop diff_t len = segment_length(data); @@ -243,7 +243,7 @@ struct SyclStatementExecutor< using typename Base::diff_t; static - inline RAJA_DEVICE void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active) + inline RAJA_DEVICE void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active) { // block stride loop diff_t len = segment_length(data); @@ -300,7 +300,7 @@ struct SyclStatementExecutor< using typename Base::diff_t; static - inline RAJA_DEVICE void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active) + inline RAJA_DEVICE void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active) { // grid stride loop diff_t len = segment_length(data); @@ -349,7 +349,7 @@ struct SyclStatementExecutor< using typename Base::diff_t; static - inline RAJA_DEVICE void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active) + inline RAJA_DEVICE void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active) { // grid stride loop diff_t len = segment_length(data); @@ -399,7 +399,7 @@ struct SyclStatementExecutor< static inline RAJA_DEVICE - void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active) + void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active) { diff_t len = segment_length(data); diff --git a/include/RAJA/policy/sycl/kernel/Lambda.hpp b/include/RAJA/policy/sycl/kernel/Lambda.hpp index 0542f4b81e..05f4fb3a44 100644 --- a/include/RAJA/policy/sycl/kernel/Lambda.hpp +++ b/include/RAJA/policy/sycl/kernel/Lambda.hpp @@ -46,7 +46,7 @@ template , Types> { static - inline RAJA_DEVICE void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active) + inline RAJA_DEVICE void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active) { // Only execute the lambda if it hasn't been masked off if(thread_active){ diff --git a/include/RAJA/policy/sycl/kernel/SyclKernel.hpp b/include/RAJA/policy/sycl/kernel/SyclKernel.hpp index 88c789c062..f339bccef5 100644 --- a/include/RAJA/policy/sycl/kernel/SyclKernel.hpp +++ b/include/RAJA/policy/sycl/kernel/SyclKernel.hpp @@ -93,7 +93,7 @@ namespace internal * SYCL global function for launching SyclKernel policies. */ template -void SyclKernelLauncher(Data data, cl::sycl::nd_item<3> item) +void SyclKernelLauncher(Data data, ::sycl::nd_item<3> item) { using data_t = camp::decay; @@ -128,7 +128,7 @@ struct SyclLaunchHelper,StmtList,Data,Types> static void launch(Data &&data, internal::LaunchDims launch_dims, size_t shmem, - cl::sycl::queue* qu) + ::sycl::queue* qu) { // @@ -136,20 +136,20 @@ struct SyclLaunchHelper,StmtList,Data,Types> // Kernel body is nontrivially copyable, create space on device and copy to // Workaround until "is_device_copyable" is supported // - data_t* m_data = (data_t*) cl::sycl::malloc_device(sizeof(data_t), *qu); + data_t* m_data = (data_t*) ::sycl::malloc_device(sizeof(data_t), *qu); qu->memcpy(m_data, &data, sizeof(data_t)).wait(); - qu->submit([&](cl::sycl::handler& h) { + qu->submit([&](::sycl::handler& h) { h.parallel_for(launch_dims.fit_nd_range(qu), - [=] (cl::sycl::nd_item<3> item) { + [=] (::sycl::nd_item<3> item) { SyclKernelLauncher(*m_data, item); }); }).wait(); // Need to wait to free memory - cl::sycl::free(m_data, *qu); + ::sycl::free(m_data, *qu); } }; @@ -172,13 +172,13 @@ struct SyclLaunchHelper,StmtList,Data,Types> static void launch(Data &&data, internal::LaunchDims launch_dims, size_t shmem, - cl::sycl::queue* qu) + ::sycl::queue* qu) { - qu->submit([&](cl::sycl::handler& h) { + qu->submit([&](::sycl::handler& h) { h.parallel_for(launch_dims.fit_nd_range(qu), - [=] (cl::sycl::nd_item<3> item) { + [=] (::sycl::nd_item<3> item) { SyclKernelLauncher(data, item); diff --git a/include/RAJA/policy/sycl/kernel/Tile.hpp b/include/RAJA/policy/sycl/kernel/Tile.hpp index 81a57cdecb..59590b2556 100644 --- a/include/RAJA/policy/sycl/kernel/Tile.hpp +++ b/include/RAJA/policy/sycl/kernel/Tile.hpp @@ -61,7 +61,7 @@ struct SyclStatementExecutor< using enclosed_stmts_t = SyclStatementListExecutor; using diff_t = segment_diff_type; - static inline RAJA_DEVICE void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active){ + static inline RAJA_DEVICE void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active){ // Get the segment referenced by this Tile statement auto &segment = camp::get(data.segment_tuple); @@ -139,7 +139,7 @@ struct SyclStatementExecutor< using diff_t = segment_diff_type; - static inline RAJA_DEVICE void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active) + static inline RAJA_DEVICE void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active) { // Get the segment referenced by this Tile statement auto &segment = camp::get(data.segment_tuple); @@ -231,7 +231,7 @@ struct SyclStatementExecutor< using diff_t = segment_diff_type; - static inline RAJA_DEVICE void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active) + static inline RAJA_DEVICE void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active) { // Get the segment referenced by this Tile statement auto &segment = camp::get(data.segment_tuple); @@ -321,7 +321,7 @@ struct SyclStatementExecutor< using diff_t = segment_diff_type; - static inline RAJA_DEVICE void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active) + static inline RAJA_DEVICE void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active) { // Get the segment referenced by this Tile statement auto &segment = camp::get(data.segment_tuple); @@ -409,7 +409,7 @@ struct SyclStatementExecutor< using diff_t = segment_diff_type; - static inline RAJA_DEVICE void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active) + static inline RAJA_DEVICE void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active) { // Get the segment referenced by this Tile statement auto &segment = camp::get(data.segment_tuple); diff --git a/include/RAJA/policy/sycl/kernel/TileTCount.hpp b/include/RAJA/policy/sycl/kernel/TileTCount.hpp index b1d263a263..f27dafca80 100644 --- a/include/RAJA/policy/sycl/kernel/TileTCount.hpp +++ b/include/RAJA/policy/sycl/kernel/TileTCount.hpp @@ -70,7 +70,7 @@ struct SyclStatementExecutor< static inline RAJA_DEVICE - void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active){ + void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active){ // Get the segment referenced by this Tile statement auto &segment = camp::get(data.segment_tuple); @@ -141,7 +141,7 @@ struct SyclStatementExecutor< static inline RAJA_DEVICE - void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active) + void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active) { // Get the segment referenced by this Tile statement auto &segment = camp::get(data.segment_tuple); @@ -214,7 +214,7 @@ struct SyclStatementExecutor< static inline RAJA_DEVICE - void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active) + void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active) { // Get the segment referenced by this Tile statement auto &segment = camp::get(data.segment_tuple); @@ -289,7 +289,7 @@ struct SyclStatementExecutor< static inline RAJA_DEVICE - void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active) + void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active) { // Get the segment referenced by this Tile statement auto &segment = camp::get(data.segment_tuple); @@ -363,7 +363,7 @@ struct SyclStatementExecutor< static inline RAJA_DEVICE - void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active) + void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active) { // Get the segment referenced by this Tile statement auto &segment = camp::get(data.segment_tuple); diff --git a/include/RAJA/policy/sycl/kernel/internal.hpp b/include/RAJA/policy/sycl/kernel/internal.hpp index 56e3a9aa1e..3498550c25 100644 --- a/include/RAJA/policy/sycl/kernel/internal.hpp +++ b/include/RAJA/policy/sycl/kernel/internal.hpp @@ -86,7 +86,7 @@ struct LaunchDims { return result; } - cl::sycl::nd_range<3> fit_nd_range(::sycl::queue* q) { + ::sycl::nd_range<3> fit_nd_range(::sycl::queue* q) { sycl_dim_3_t launch_global; @@ -95,9 +95,9 @@ struct LaunchDims { launch_local.y = std::max(launch_local.y, local.y); launch_local.z = std::max(launch_local.z, local.z); - cl::sycl::device dev = q->get_device(); + ::sycl::device dev = q->get_device(); - auto max_work_group_size = dev.get_info< ::cl::sycl::info::device::max_work_group_size>(); + auto max_work_group_size = dev.get_info< ::sycl::info::device::max_work_group_size>(); if(launch_local.x > max_work_group_size) { launch_local.x = max_work_group_size; @@ -160,10 +160,10 @@ struct LaunchDims { launch_global.z = ((launch_global.z / launch_local.z) + 1) * launch_local.z; } - cl::sycl::range<3> ret_th = {launch_local.x, launch_local.y, launch_local.z}; - cl::sycl::range<3> ret_gl = {launch_global.x, launch_global.y, launch_global.z}; + ::sycl::range<3> ret_th = {launch_local.x, launch_local.y, launch_local.z}; + ::sycl::range<3> ret_gl = {launch_global.x, launch_global.y, launch_global.z}; - return cl::sycl::nd_range<3>(ret_gl, ret_th); + return ::sycl::nd_range<3>(ret_gl, ret_th); } }; @@ -176,7 +176,7 @@ struct SyclStatementListExecutorHelper { using cur_stmt_t = camp::at_v; template - inline static RAJA_DEVICE void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active) + inline static RAJA_DEVICE void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active) { // Execute stmt cur_stmt_t::exec(data, item, thread_active); @@ -203,7 +203,7 @@ template struct SyclStatementListExecutorHelper { template - inline static RAJA_DEVICE void exec(Data &, cl::sycl::nd_item<3> item, bool) + inline static RAJA_DEVICE void exec(Data &, ::sycl::nd_item<3> item, bool) { // nop terminator } @@ -233,7 +233,7 @@ struct SyclStatementListExecutor, Types> { static inline RAJA_DEVICE - void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active) + void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active) { // Execute statements in order with helper class SyclStatementListExecutorHelper<0, num_stmts, enclosed_stmts_t>::exec(data, item, thread_active); diff --git a/include/RAJA/policy/sycl/launch.hpp b/include/RAJA/policy/sycl/launch.hpp index ad9fecc222..c8bc7aab53 100644 --- a/include/RAJA/policy/sycl/launch.hpp +++ b/include/RAJA/policy/sycl/launch.hpp @@ -63,13 +63,13 @@ struct LaunchExecute> { RAJA_FT_BEGIN; - q->submit([&](cl::sycl::handler& h) { + q->submit([&](::sycl::handler& h) { auto s_vec = ::sycl::local_accessor (params.shared_mem_size, h); h.parallel_for - (cl::sycl::nd_range<3>(gridSize, blockSize), - [=] (cl::sycl::nd_item<3> itm) { + (::sycl::nd_range<3>(gridSize, blockSize), + [=] (::sycl::nd_item<3> itm) { LaunchContext ctx; ctx.itm = &itm; @@ -136,14 +136,14 @@ struct LaunchExecute> { RAJA::expt::ParamMultiplexer::init(*res); auto reduction = ::sycl::reduction(res, launch_reducers, combiner); - q->submit([&](cl::sycl::handler& h) { + q->submit([&](::sycl::handler& h) { auto s_vec = ::sycl::local_accessor (launch_params.shared_mem_size, h); h.parallel_for - (cl::sycl::nd_range<3>(gridSize, blockSize), + (::sycl::nd_range<3>(gridSize, blockSize), reduction, - [=] (cl::sycl::nd_item<3> itm, auto & red) { + [=] (::sycl::nd_item<3> itm, auto & red) { LaunchContext ctx; ctx.itm = &itm; @@ -211,16 +211,16 @@ struct LaunchExecute> { // using LOOP_BODY = camp::decay; LOOP_BODY* lbody; - lbody = (LOOP_BODY*) cl::sycl::malloc_device(sizeof(LOOP_BODY), *q); + lbody = (LOOP_BODY*) ::sycl::malloc_device(sizeof(LOOP_BODY), *q); q->memcpy(lbody, &body_in, sizeof(LOOP_BODY)).wait(); - q->submit([&](cl::sycl::handler& h) { + q->submit([&](::sycl::handler& h) { auto s_vec = ::sycl::local_accessor (params.shared_mem_size, h); h.parallel_for - (cl::sycl::nd_range<3>(gridSize, blockSize), - [=] (cl::sycl::nd_item<3> itm) { + (::sycl::nd_range<3>(gridSize, blockSize), + [=] (::sycl::nd_item<3> itm) { LaunchContext ctx; ctx.itm = &itm; @@ -234,7 +234,7 @@ struct LaunchExecute> { }).wait(); // Need to wait for completion to free memory - cl::sycl::free(lbody, *q); + ::sycl::free(lbody, *q); RAJA_FT_END; @@ -290,21 +290,21 @@ struct LaunchExecute> { // using LOOP_BODY = camp::decay; LOOP_BODY* lbody; - lbody = (LOOP_BODY*) cl::sycl::malloc_device(sizeof(LOOP_BODY), *q); + lbody = (LOOP_BODY*) ::sycl::malloc_device(sizeof(LOOP_BODY), *q); q->memcpy(lbody, &body_in, sizeof(LOOP_BODY)).wait(); ReduceParams* res = ::sycl::malloc_shared(1,*q); RAJA::expt::ParamMultiplexer::init(*res); auto reduction = ::sycl::reduction(res, launch_reducers, combiner); - q->submit([&](cl::sycl::handler& h) { + q->submit([&](::sycl::handler& h) { auto s_vec = ::sycl::local_accessor (launch_params.shared_mem_size, h); h.parallel_for - (cl::sycl::nd_range<3>(gridSize, blockSize), + (::sycl::nd_range<3>(gridSize, blockSize), reduction, - [=] (cl::sycl::nd_item<3> itm, auto & red) { + [=] (::sycl::nd_item<3> itm, auto & red) { LaunchContext ctx; ctx.itm = &itm; @@ -325,7 +325,7 @@ struct LaunchExecute> { RAJA::expt::ParamMultiplexer::combine( launch_reducers, *res ); ::sycl::free(res, *q); - cl::sycl::free(lbody, *q); + ::sycl::free(lbody, *q); RAJA_FT_END; } diff --git a/include/RAJA/policy/sycl/policy.hpp b/include/RAJA/policy/sycl/policy.hpp index 0f92fe27e1..e850994fef 100644 --- a/include/RAJA/policy/sycl/policy.hpp +++ b/include/RAJA/policy/sycl/policy.hpp @@ -39,7 +39,7 @@ struct uint3 { unsigned long x, y, z; }; -using sycl_dim_t = cl::sycl::range<1>; +using sycl_dim_t = sycl::range<1>; using sycl_dim_3_t = uint3; diff --git a/include/RAJA/policy/sycl/reduce.hpp b/include/RAJA/policy/sycl/reduce.hpp index 49d89b3cd2..8a84d5b412 100644 --- a/include/RAJA/policy/sycl/reduce.hpp +++ b/include/RAJA/policy/sycl/reduce.hpp @@ -107,11 +107,11 @@ struct Reduce_Data Reduce_Data(T initValue, T identityValue, Offload_Info &info) : value(initValue) { - cl::sycl::queue* q = ::camp::resources::Sycl::get_default().get_queue(); + ::sycl::queue* q = ::camp::resources::Sycl::get_default().get_queue(); - device = reinterpret_cast(cl::sycl::malloc_device(sycl::MaxNumTeams * sizeof(T), *(q))); - host = reinterpret_cast(cl::sycl::malloc_host(sycl::MaxNumTeams * sizeof(T), *(q))); + device = reinterpret_cast(::sycl::malloc_device(sycl::MaxNumTeams * sizeof(T), *(q))); + host = reinterpret_cast(::sycl::malloc_host(sycl::MaxNumTeams * sizeof(T), *(q))); if (!host) { printf("Unable to allocate space on host\n"); @@ -136,7 +136,7 @@ struct Reduce_Data //! transfers from the host to the device -- exit() is called upon failure RAJA_INLINE void hostToDevice(Offload_Info &info) { - cl::sycl::queue* q = ::camp::resources::Sycl::get_default().get_queue(); + ::sycl::queue* q = ::camp::resources::Sycl::get_default().get_queue(); if(!q) { camp::resources::Resource res = camp::resources::Sycl(); @@ -154,7 +154,7 @@ struct Reduce_Data //! transfers from the device to the host -- exit() is called upon failure RAJA_INLINE void deviceToHost(Offload_Info &info) { - cl::sycl::queue* q = ::camp::resources::Sycl::get_default().get_queue(); + ::sycl::queue* q = ::camp::resources::Sycl::get_default().get_queue(); if(!q) { camp::resources::Resource res = camp::resources::Sycl(); @@ -172,14 +172,14 @@ struct Reduce_Data //! frees all data from the offload information passed RAJA_INLINE void cleanup(Offload_Info &info) { - cl::sycl::queue* q = ::camp::resources::Sycl::get_default().get_queue(); + ::sycl::queue* q = ::camp::resources::Sycl::get_default().get_queue(); if (device) { - cl::sycl::free(reinterpret_cast(device), *q); + ::sycl::free(reinterpret_cast(device), *q); device = nullptr; } if (host) { - cl::sycl::free(reinterpret_cast(host), *q); + ::sycl::free(reinterpret_cast(host), *q); //delete[] host; host = nullptr; } @@ -243,8 +243,8 @@ struct TargetReduce TargetReduce &reduce(T rhsVal) { #ifdef __SYCL_DEVICE_ONLY__ - auto i = 0; //__spirv::initLocalInvocationId<1, cl::sycl::id<1>>()[0]; - auto atm = ::sycl::atomic_ref(val.device[i]); + auto i = 0; //__spirv::initLocalInvocationId<1, ::sycl::id<1>>()[0]; + auto atm = ::sycl::atomic_ref(val.device[i]); Reducer{}(atm, rhsVal); return *this; #else @@ -257,8 +257,8 @@ struct TargetReduce const TargetReduce &reduce(T rhsVal) const { #ifdef __SYCL_DEVICE_ONLY__ - auto i = 0; //__spirv::initLocalInvocationId<1, cl::sycl::id<1>>()[0]; - auto atm = ::sycl::atomic_ref(val.device[i]); + auto i = 0; //__spirv::initLocalInvocationId<1, ::sycl::id<1>>()[0]; + auto atm = ::sycl::atomic_ref(val.device[i]); Reducer{}(atm, rhsVal); return *this; #else @@ -356,10 +356,10 @@ struct TargetReduceLoc TargetReduceLoc &reduce(T rhsVal, IndexType rhsLoc) { #ifdef __SYCL_DEVICE_ONLY__ - auto i = 0; //__spirv::initLocalInvocationId<1, cl::sycl::id<1>>()[0]; - cl::sycl::atomic_fence(cl::sycl::memory_order_acquire, cl::sycl::memory_scope::device); + auto i = 0; //__spirv::initLocalInvocationId<1, ::sycl::id<1>>()[0]; + ::sycl::atomic_fence(::sycl::memory_order_acquire, ::sycl::memory_scope::device); Reducer{}(val.device[i], loc.device[i], rhsVal, rhsLoc); - cl::sycl::atomic_fence(cl::sycl::memory_order_release, cl::sycl::memory_scope::device); + ::sycl::atomic_fence(::sycl::memory_order_release, ::sycl::memory_scope::device); return *this; #else Reducer{}(val.value, loc.value, rhsVal, rhsLoc); @@ -415,8 +415,8 @@ class ReduceSum const self &operator+=(T rhsVal) const { #ifdef __SYCL_DEVICE_ONLY__ - auto i = 0;//__spirv::initLocalInvocationId<1, cl::sycl::id<1>>()[0]; - auto atm = ::sycl::atomic_ref(parent::val.device[i]); + auto i = 0;//__spirv::initLocalInvocationId<1, ::sycl::id<1>>()[0]; + auto atm = ::sycl::atomic_ref(parent::val.device[i]); atm.fetch_add(rhsVal); return *this; #else @@ -441,8 +441,8 @@ class ReduceBitOr self &operator|=(T rhsVal) { #ifdef __SYCL_DEVICE_ONLY__ - auto i = 0;//__spirv::initLocalInvocationId<1, cl::sycl::id<1>>()[0]; - auto atm = ::sycl::atomic_ref(parent::val.device[i]); + auto i = 0;//__spirv::initLocalInvocationId<1, ::sycl::id<1>>()[0]; + auto atm = ::sycl::atomic_ref(parent::val.device[i]); atm |= rhsVal; return *this; #else @@ -455,8 +455,8 @@ class ReduceBitOr const self &operator|=(T rhsVal) const { #ifdef __SYCL_DEVICE_ONLY__ - auto i = 0;//__spirv::initLocalInvocationId<1, cl::sycl::id<1>>()[0]; - auto atm = ::sycl::atomic_ref(parent::val.device[i]); + auto i = 0;//__spirv::initLocalInvocationId<1, ::sycl::id<1>>()[0]; + auto atm = ::sycl::atomic_ref(parent::val.device[i]); atm |= rhsVal; return *this; #else @@ -481,8 +481,8 @@ class ReduceBitAnd self &operator&=(T rhsVal) { #ifdef __SYCL_DEVICE_ONLY__ - auto i = 0;//__spirv::initLocalInvocationId<1, cl::sycl::id<1>>()[0]; - auto atm = ::sycl::atomic_ref(parent::val.device[i]); + auto i = 0;//__spirv::initLocalInvocationId<1, ::sycl::id<1>>()[0]; + auto atm = ::sycl::atomic_ref(parent::val.device[i]); atm &= rhsVal; return *this; #else @@ -495,8 +495,8 @@ class ReduceBitAnd const self &operator&=(T rhsVal) const { #ifdef __SYCL_DEVICE_ONLY__ - auto i = 0;//__spirv::initLocalInvocationId<1, cl::sycl::id<1>>()[0]; - auto atm = ::sycl::atomic_ref(parent::val.device[i]); + auto i = 0;//__spirv::initLocalInvocationId<1, ::sycl::id<1>>()[0]; + auto atm = ::sycl::atomic_ref(parent::val.device[i]); atm &= rhsVal; return *this; #else @@ -522,8 +522,8 @@ class ReduceMin self &min(T rhsVal) { #ifdef __SYCL_DEVICE_ONLY__ - auto i = 0;//__spirv::initLocalInvocationId<1, cl::sycl::id<1>>()[0]; - auto atm = ::sycl::atomic_ref(parent::val.device[i]); + auto i = 0;//__spirv::initLocalInvocationId<1, ::sycl::id<1>>()[0]; + auto atm = ::sycl::atomic_ref(parent::val.device[i]); atm.fetch_min(rhsVal); return *this; #else @@ -536,8 +536,8 @@ class ReduceMin const self &min(T rhsVal) const { #ifdef __SYCL_DEVICE_ONLY__ - auto i = 0;//__spirv::initLocalInvocationId<1, cl::sycl::id<1>>()[0]; - auto atm = ::sycl::atomic_ref(parent::val.device[i]); + auto i = 0;//__spirv::initLocalInvocationId<1, ::sycl::id<1>>()[0]; + auto atm = ::sycl::atomic_ref(parent::val.device[i]); atm.fetch_min(rhsVal); return *this; #else @@ -563,8 +563,8 @@ class ReduceMax self &max(T rhsVal) { #ifdef __SYCL_DEVICE_ONLY__ - auto i = 0;//__spirv::initLocalInvocationId<1, cl::sycl::id<1>>()[0]; - auto atm = ::sycl::atomic_ref(parent::val.device[i]); + auto i = 0;//__spirv::initLocalInvocationId<1, ::sycl::id<1>>()[0]; + auto atm = ::sycl::atomic_ref(parent::val.device[i]); atm.fetch_max(rhsVal); return *this; #else @@ -577,8 +577,8 @@ class ReduceMax const self &max(T rhsVal) const { #ifdef __SYCL_DEVICE_ONLY__ - auto i = 0;//__spirv::initLocalInvocationId<1, cl::sycl::id<1>>()[0]; - auto atm = ::sycl::atomic_ref(parent::val.device[i]); + auto i = 0;//__spirv::initLocalInvocationId<1, ::sycl::id<1>>()[0]; + auto atm = ::sycl::atomic_ref(parent::val.device[i]); atm.fetch_max(rhsVal); return *this; #else diff --git a/src/MemUtils_SYCL.cpp b/src/MemUtils_SYCL.cpp index 0b5f1b8be6..3a3976c675 100644 --- a/src/MemUtils_SYCL.cpp +++ b/src/MemUtils_SYCL.cpp @@ -49,8 +49,8 @@ syclInfo tl_status; #endif //! State of raja sycl queue synchronization for sycl reducer objects -std::unordered_map g_queue_info_map{ - {cl::sycl::queue(), true}}; +std::unordered_map<::sycl::queue, bool> g_queue_info_map{ + {::sycl::queue(), true}}; } // namespace detail diff --git a/tpl/camp b/tpl/camp index 0f07de4240..7866dc34a4 160000 --- a/tpl/camp +++ b/tpl/camp @@ -1 +1 @@ -Subproject commit 0f07de4240c42e0b38a8d872a20440cb4b33d9f5 +Subproject commit 7866dc34a4f86b6c58891f6daf9f6da0f89b3d94 From 839764971f685c8b55d2b7b18390aaac498c2e5d Mon Sep 17 00:00:00 2001 From: Rich Hornung Date: Tue, 10 Dec 2024 14:47:11 -0800 Subject: [PATCH 2/2] Global scope for consistency --- include/RAJA/policy/sycl/policy.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/RAJA/policy/sycl/policy.hpp b/include/RAJA/policy/sycl/policy.hpp index e850994fef..a02c9ea30f 100644 --- a/include/RAJA/policy/sycl/policy.hpp +++ b/include/RAJA/policy/sycl/policy.hpp @@ -39,7 +39,7 @@ struct uint3 { unsigned long x, y, z; }; -using sycl_dim_t = sycl::range<1>; +using sycl_dim_t = ::sycl::range<1>; using sycl_dim_3_t = uint3;