diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 8aabaf94c1..778f180595 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -4,7 +4,7 @@ jobs: build_docker: strategy: matrix: - target: [gcc12, gcc13, clang13, clang15, rocm5.6, rocm5.6_desul, intel2024, intel2024_debug, intel2024_sycl] + target: [gcc12_debug, gcc13, clang13, clang15, rocm5.6, rocm5.6_desul, intel2024, intel2024_debug, intel2024_sycl] runs-on: ubuntu-latest steps: - run: | diff --git a/Dockerfile b/Dockerfile index 4e224371eb..66a50c3794 100644 --- a/Dockerfile +++ b/Dockerfile @@ -17,22 +17,24 @@ COPY . /home/raja/workspace WORKDIR /home/raja/workspace/build RUN cmake -DCMAKE_CXX_COMPILER=g++ -DCMAKE_BUILD_TYPE=Release -DRAJA_ENABLE_WARNINGS=On -DRAJA_ENABLE_WARNINGS_AS_ERRORS=On -DENABLE_OPENMP=On .. && \ make -j 6 &&\ - ctest -T test --output-on-failure + ctest -T test --output-on-failure && \ + make clean FROM ghcr.io/llnl/radiuss:gcc-12-ubuntu-22.04 AS gcc12 ENV GTEST_COLOR=1 COPY . /home/raja/workspace WORKDIR /home/raja/workspace/build RUN cmake -DCMAKE_CXX_COMPILER=g++ -DCMAKE_BUILD_TYPE=Release -DRAJA_ENABLE_WARNINGS=On -DRAJA_ENABLE_WARNINGS_AS_ERRORS=On -DENABLE_OPENMP=On .. && \ - make -j 16 &&\ - ctest -T test --output-on-failure + make -j 6 &&\ + ctest -T test --output-on-failure && \ + make clean FROM ghcr.io/llnl/radiuss:gcc-12-ubuntu-22.04 AS gcc12_debug ENV GTEST_COLOR=1 COPY . /home/raja/workspace WORKDIR /home/raja/workspace/build RUN cmake -DCMAKE_CXX_COMPILER=g++ -DCMAKE_BUILD_TYPE=Debug -DRAJA_ENABLE_WARNINGS=On -DRAJA_ENABLE_WARNINGS_AS_ERRORS=On -DENABLE_OPENMP=On .. && \ - make -j 6 &&\ + make -j 16 &&\ ctest -T test --output-on-failure FROM ghcr.io/llnl/radiuss:gcc-12-ubuntu-22.04 AS gcc12_desul @@ -41,7 +43,8 @@ COPY . /home/raja/workspace WORKDIR /home/raja/workspace/build RUN cmake -DCMAKE_CXX_COMPILER=g++ -DCMAKE_BUILD_TYPE=Release -DRAJA_ENABLE_WARNINGS=On -DRAJA_ENABLE_WARNINGS_AS_ERRORS=On -DENABLE_OPENMP=On -DRAJA_ENABLE_DESUL_ATOMICS=On .. && \ make -j 6 &&\ - ctest -T test --output-on-failure + ctest -T test --output-on-failure && \ + make clean FROM ghcr.io/llnl/radiuss:gcc-13-ubuntu-22.04 AS gcc13 ENV GTEST_COLOR=1 @@ -65,7 +68,8 @@ COPY . /home/raja/workspace WORKDIR /home/raja/workspace/build RUN cmake -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=Debug -DENABLE_OPENMP=On .. && \ make -j 6 &&\ - ctest -T test --output-on-failure + ctest -T test --output-on-failure && \ + make clean FROM ghcr.io/llnl/radiuss:clang-15-ubuntu-22.04 AS clang15 ENV GTEST_COLOR=1 @@ -81,7 +85,8 @@ COPY . /home/raja/workspace WORKDIR /home/raja/workspace/build RUN cmake -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=Release -DENABLE_OPENMP=On -DRAJA_ENABLE_DESUL_ATOMICS=On .. && \ make -j 6 &&\ - ctest -T test --output-on-failure + ctest -T test --output-on-failure && \ + make clean ## Test run failure in RAJA launch tests with new reducer interface. ## Need to figure out best way to handle that. diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 4535138152..e78ed7a10b 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -4,8 +4,10 @@ jobs: matrix: shared: SHARED_ARGS: '-DBUILD_SHARED_LIBS=On' - static: - SHARED_ARGS: '-DBUILD_SHARED_LIBS=Off' +########################### +## Windows static build is run on GitHub Actions +## static: +## SHARED_ARGS: '-DBUILD_SHARED_LIBS=Off' pool: vmImage: 'windows-2019' variables: @@ -36,8 +38,8 @@ jobs: matrix: gcc11: docker_target: gcc11 - gcc12_debug: - docker_target: gcc12_debug + gcc12: + docker_target: gcc12 gcc12_desul: docker_target: gcc12_desul clang14_debug: diff --git a/exercises/dot-product.cpp b/exercises/dot-product.cpp index 67ec877f89..c2830c6cb2 100644 --- a/exercises/dot-product.cpp +++ b/exercises/dot-product.cpp @@ -38,11 +38,6 @@ int main(int RAJA_UNUSED_ARG(argc), char **RAJA_UNUSED_ARG(argv[])) std::cout << "\n\nExercise: vector dot product...\n"; -#if defined(RAJA_ENABLE_SYCL) - memoryManager::sycl_res = new camp::resources::Resource{camp::resources::Sycl()}; - ::RAJA::sycl::detail::setQueue(memoryManager::sycl_res); -#endif - // // Define vector length // diff --git a/exercises/dot-product_solution.cpp b/exercises/dot-product_solution.cpp index a1bc7ad419..d0ae458171 100644 --- a/exercises/dot-product_solution.cpp +++ b/exercises/dot-product_solution.cpp @@ -40,11 +40,6 @@ int main(int RAJA_UNUSED_ARG(argc), char **RAJA_UNUSED_ARG(argv[])) std::cout << "\n\nExercise: vector dot product...\n"; -#if defined(RAJA_ENABLE_SYCL) - memoryManager::sycl_res = new camp::resources::Resource{camp::resources::Sycl()}; - ::RAJA::sycl::detail::setQueue(memoryManager::sycl_res); -#endif - // // Define vector length // @@ -113,7 +108,7 @@ int main(int RAJA_UNUSED_ARG(argc), char **RAJA_UNUSED_ARG(argv[])) RAJA::forall(RAJA::RangeSegment(0, N), [=] (int i) { ompdot += a[i] * b[i]; - }); + }); dot = ompdot.get(); // _rajaomp_dotprod_end diff --git a/exercises/vector-addition.cpp b/exercises/vector-addition.cpp index a266402224..dbe5260f6d 100644 --- a/exercises/vector-addition.cpp +++ b/exercises/vector-addition.cpp @@ -62,11 +62,6 @@ int main(int RAJA_UNUSED_ARG(argc), char **RAJA_UNUSED_ARG(argv[])) std::cout << "\n\nExercise: RAJA Vector Addition...\n"; -#if defined(RAJA_ENABLE_SYCL) - memoryManager::sycl_res = new camp::resources::Resource{camp::resources::Sycl()}; - ::RAJA::sycl::detail::setQueue(memoryManager::sycl_res); -#endif - // // Define vector length // diff --git a/exercises/vector-addition_solution.cpp b/exercises/vector-addition_solution.cpp index 31956601e0..3bbc070731 100644 --- a/exercises/vector-addition_solution.cpp +++ b/exercises/vector-addition_solution.cpp @@ -62,11 +62,6 @@ int main(int RAJA_UNUSED_ARG(argc), char **RAJA_UNUSED_ARG(argv[])) std::cout << "\n\nExercise: RAJA Vector Addition...\n"; -#if defined(RAJA_ENABLE_SYCL) - memoryManager::sycl_res = new camp::resources::Resource{camp::resources::Sycl()}; - ::RAJA::sycl::detail::setQueue(memoryManager::sycl_res); -#endif - // // Define vector length // @@ -179,7 +174,7 @@ int main(int RAJA_UNUSED_ARG(argc), char **RAJA_UNUSED_ARG(argv[])) RAJA::TypedRangeSegment(0, N), [=] (int i) { c[i] = a[i] + b[i]; } - ); + ); // _rajaomp_vector_add_end checkResult(c, c_ref, N); diff --git a/include/RAJA/policy/cuda/launch.hpp b/include/RAJA/policy/cuda/launch.hpp index 0db1dc4e0d..75e5f6902b 100644 --- a/include/RAJA/policy/cuda/launch.hpp +++ b/include/RAJA/policy/cuda/launch.hpp @@ -209,7 +209,8 @@ void launch_global_fcn_fixed(BODY body_in) } template -__global__ void launch_new_reduce_global_fcn_fixed(BODY body_in, ReduceParams reduce_params) +__launch_bounds__(num_threads, BLOCKS_PER_SM) __global__ +void launch_new_reduce_global_fcn_fixed(BODY body_in, ReduceParams reduce_params) { LaunchContext ctx; @@ -298,7 +299,7 @@ struct LaunchExecute; auto func = reinterpret_cast( - &launch_new_reduce_global_fcn>); + &launch_new_reduce_global_fcn_fixed>); resources::Cuda cuda_res = res.get(); diff --git a/include/RAJA/policy/sycl/MemUtils_SYCL.hpp b/include/RAJA/policy/sycl/MemUtils_SYCL.hpp index 27d3209ae3..081a88dc23 100644 --- a/include/RAJA/policy/sycl/MemUtils_SYCL.hpp +++ b/include/RAJA/policy/sycl/MemUtils_SYCL.hpp @@ -64,10 +64,6 @@ extern syclInfo tl_status; extern std::unordered_map g_queue_info_map; -void setQueue(camp::resources::Resource* q); - -cl::sycl::queue* getQueue(); - } // namespace detail //! Allocator for pinned memory for use in basic_mempool @@ -77,7 +73,7 @@ struct PinnedAllocator { void* malloc(size_t nbytes) { void* ptr; - ::sycl::queue* q = ::RAJA::sycl::detail::getQueue(); + ::sycl::queue* q = ::camp::resources::Sycl::get_default().get_queue(); ptr = ::sycl::malloc_host(nbytes, *q); return ptr; } @@ -86,7 +82,7 @@ struct PinnedAllocator { // Will throw if ptr is not in q's context bool free(void* ptr) { - ::sycl::queue* q = ::RAJA::sycl::detail::getQueue(); + ::sycl::queue* q = ::camp::resources::Sycl::get_default().get_queue(); ::sycl::free(ptr, *q); return true; } @@ -99,7 +95,7 @@ struct DeviceAllocator { void* malloc(size_t nbytes) { void* ptr; - ::sycl::queue* q = ::RAJA::sycl::detail::getQueue(); + ::sycl::queue* q = ::camp::resources::Sycl::get_default().get_queue(); ptr = ::sycl::malloc_device(nbytes, *q); return ptr; } @@ -108,7 +104,7 @@ struct DeviceAllocator { // Will throw if ptr is not in q's context bool free(void* ptr) { - ::sycl::queue* q = ::RAJA::sycl::detail::getQueue(); + ::sycl::queue* q = ::camp::resources::Sycl::get_default().get_queue(); ::sycl::free(ptr, *q); return true; } @@ -122,7 +118,7 @@ struct DeviceZeroedAllocator { void* malloc(size_t nbytes) { void* ptr; - ::sycl::queue* q = ::RAJA::sycl::detail::getQueue(); + ::sycl::queue* q = ::camp::resources::Sycl::get_default().get_queue(); ptr = ::sycl::malloc_device(nbytes, *q); q->memset(ptr, 0, nbytes); return ptr; @@ -132,7 +128,7 @@ struct DeviceZeroedAllocator { // Will throw if ptr is not in q's context bool free(void* ptr) { - ::sycl::queue* q = ::RAJA::sycl::detail::getQueue(); + ::sycl::queue* q = ::camp::resources::Sycl::get_default().get_queue(); ::sycl::free(ptr, *q); return true; } diff --git a/include/RAJA/policy/sycl/reduce.hpp b/include/RAJA/policy/sycl/reduce.hpp index 58cb83d295..49d89b3cd2 100644 --- a/include/RAJA/policy/sycl/reduce.hpp +++ b/include/RAJA/policy/sycl/reduce.hpp @@ -107,12 +107,8 @@ struct Reduce_Data Reduce_Data(T initValue, T identityValue, Offload_Info &info) : value(initValue) { - cl::sycl::queue* q = ::RAJA::sycl::detail::getQueue(); + cl::sycl::queue* q = ::camp::resources::Sycl::get_default().get_queue(); - if(!q) { - camp::resources::Resource res = camp::resources::Sycl(); - q = res.get().get_queue(); - } device = reinterpret_cast(cl::sycl::malloc_device(sycl::MaxNumTeams * sizeof(T), *(q))); host = reinterpret_cast(cl::sycl::malloc_host(sycl::MaxNumTeams * sizeof(T), *(q))); @@ -140,7 +136,7 @@ struct Reduce_Data //! transfers from the host to the device -- exit() is called upon failure RAJA_INLINE void hostToDevice(Offload_Info &info) { - cl::sycl::queue* q = ::RAJA::sycl::detail::getQueue(); + cl::sycl::queue* q = ::camp::resources::Sycl::get_default().get_queue(); if(!q) { camp::resources::Resource res = camp::resources::Sycl(); @@ -158,7 +154,7 @@ struct Reduce_Data //! transfers from the device to the host -- exit() is called upon failure RAJA_INLINE void deviceToHost(Offload_Info &info) { - cl::sycl::queue* q = ::RAJA::sycl::detail::getQueue(); + cl::sycl::queue* q = ::camp::resources::Sycl::get_default().get_queue(); if(!q) { camp::resources::Resource res = camp::resources::Sycl(); @@ -169,18 +165,15 @@ struct Reduce_Data auto e = q->memcpy(reinterpret_cast(host), reinterpret_cast(device), sycl::MaxNumTeams * sizeof(T)); - + e.wait(); } //! frees all data from the offload information passed RAJA_INLINE void cleanup(Offload_Info &info) { - cl::sycl::queue* q = ::RAJA::sycl::detail::getQueue(); - if(!q) { - camp::resources::Resource res = camp::resources::Sycl(); - q = res.get().get_queue(); - } + cl::sycl::queue* q = ::camp::resources::Sycl::get_default().get_queue(); + if (device) { cl::sycl::free(reinterpret_cast(device), *q); device = nullptr; diff --git a/src/MemUtils_SYCL.cpp b/src/MemUtils_SYCL.cpp index 911ff132b6..0b5f1b8be6 100644 --- a/src/MemUtils_SYCL.cpp +++ b/src/MemUtils_SYCL.cpp @@ -48,24 +48,10 @@ syclInfo tl_status; #pragma omp threadprivate(tl_status) #endif -camp::resources::Resource* app_q = NULL; - -void setQueue(camp::resources::Resource* qu) { - app_q = qu; -} - //! State of raja sycl queue synchronization for sycl reducer objects std::unordered_map g_queue_info_map{ {cl::sycl::queue(), true}}; -cl::sycl::queue* getQueue() { - if (app_q != NULL) { - return app_q->get().get_queue(); - } - - return NULL; -} - } // namespace detail } // namespace sycl