diff --git a/docs/sphinx/user_guide/feature/policies.rst b/docs/sphinx/user_guide/feature/policies.rst index 8ba998f012..cbb43e4774 100644 --- a/docs/sphinx/user_guide/feature/policies.rst +++ b/docs/sphinx/user_guide/feature/policies.rst @@ -309,15 +309,16 @@ policies have the prefix ``hip_``. | | | expression is executed | | | | on the device. | +----------------------------------------------------+---------------+---------------------------------+ -| cuda/hip_thread_x_unchecked | kernel (For) | Map loop iterates unchecked to | -| | launch (loop) | GPU threads in x-dimension, one | +| cuda/hip_thread_x_direct_unchecked | kernel (For) | Map loop iterates directly | +| | launch (loop) | without checking loop bounds to | +| | | GPU threads in x-dimension, one | | | | iterate per thread. See note | | | | below about limitations. | +----------------------------------------------------+---------------+---------------------------------+ -| cuda/hip_thread_y_unchecked | kernel (For) | Same as above, but map | +| cuda/hip_thread_y_direct_unchecked | kernel (For) | Same as above, but map | | | launch (loop) | to threads in y-dimension. | +----------------------------------------------------+---------------+---------------------------------+ -| cuda/hip_thread_z_unchecked | kernel (For) | Same as above, but map | +| cuda/hip_thread_z_direct_unchecked | kernel (For) | Same as above, but map | | | launch (loop) | to threads in z-dimension. | +----------------------------------------------------+---------------+---------------------------------+ | cuda/hip_thread_x_direct | kernel (For) | Map loop iterates directly to | @@ -346,15 +347,16 @@ policies have the prefix ``hip_``. | | launch (loop) | policy, but safe to use | | | | with Cuda/HipSyncThreads. | +----------------------------------------------------+---------------+---------------------------------+ -| cuda/hip_thread_size_x_unchecked | kernel (For) | Same as thread_x_unchecked | -| | launch (loop) | policy above but with | +| cuda/hip_thread_size_x_direct_unchecked| kernel (For) | Same as | +| | launch (loop) | thread_x_direct_unchecked | +| | | policy above but with | | | | a compile time number of | | | | threads. | +----------------------------------------------------+---------------+---------------------------------+ -| cuda/hip_thread_size_y_unchecked | kernel (For) | Same as above, but map | +| cuda/hip_thread_size_y_direct_unchecked| kernel (For) | Same as above, but map | | | launch (loop) | to threads in y-dimension | +----------------------------------------------------+---------------+---------------------------------+ -| cuda/hip_thread_size_z_unchecked | kernel (For) | Same as above, but map | +| cuda/hip_thread_size_z_direct_unchecked| kernel (For) | Same as above, but map | | | launch (loop) | to threads in z-dimension. | +----------------------------------------------------+---------------+---------------------------------+ | cuda/hip_thread_size_x_direct | kernel (For) | Same as thread_x_direct | @@ -368,7 +370,7 @@ policies have the prefix ``hip_``. | cuda/hip_thread_size_z_direct | kernel (For) | Same as above, but map | | | launch (loop) | to threads in z-dimension. | +----------------------------------------------------+---------------+---------------------------------+ -| cuda/hip_flatten_threads_{xyz}_unchecked | launch (loop) | Reshapes threads in a | +| cuda/hip_flatten_threads_{xyz}_direct_unchecked | launch (loop) | Reshapes threads in a | | | | multi-dimensional thread | | | | team into one-dimension. | | | | Accepts any permutation | @@ -381,15 +383,16 @@ policies have the prefix ``hip_``. | cuda/hip_flatten_threads_{xyz}_loop | launch (loop) | Same as above, but with loop | | | | mapping. | +----------------------------------------------------+---------------+---------------------------------+ -| cuda/hip_block_x_unchecked | kernel (For) | Map loop iterates unchecked | -| | launch (loop) | to GPU thread blocks in the | +| cuda/hip_block_x_direct_unchecked | kernel (For) | Map loop iterates directly | +| | launch (loop) | without checking loop bounds | +| | | to GPU thread blocks in the | | | | x-dimension, one iterate per | | | | block. | +----------------------------------------------------+---------------+---------------------------------+ -| cuda/hip_block_y_unchecked | kernel (For) | Same as above, but map | +| cuda/hip_block_y_direct_unchecked | kernel (For) | Same as above, but map | | | launch (loop) | to blocks in y-dimension | +----------------------------------------------------+---------------+---------------------------------+ -| cuda/hip_block_z_unchecked | kernel (For) | Same as above, but map | +| cuda/hip_block_z_direct_unchecked | kernel (For) | Same as above, but map | | | launch (loop) | to blocks in z-dimension | +----------------------------------------------------+---------------+---------------------------------+ | cuda/hip_block_x_direct | kernel (For) | Map loop iterates directly to | @@ -413,14 +416,15 @@ policies have the prefix ``hip_``. | cuda/hip_block_z_loop | kernel (For) | Same as above, but use | | | launch (loop) | blocks in z-dimension | +----------------------------------------------------+---------------+---------------------------------+ -| cuda/hip_block_size_x_unchecked | kernel (For) | Same as block_x_unchecked | +| cuda/hip_block_size_x_direct_unchecked | kernel (For) | Same as | +| | | block_x_direct_unchecked | | | launch (loop) | policy above but with a | | | | compile time number of blocks | +----------------------------------------------------+---------------+---------------------------------+ -| cuda/hip_block_size_y_unchecked | kernel (For) | Same as above, but map | +| cuda/hip_block_size_y_direct_unchecked | kernel (For) | Same as above, but map | | | launch (loop) | to blocks in y-dim | +----------------------------------------------------+---------------+---------------------------------+ -| cuda/hip_block_size_z_unchecked | kernel (For) | Same as above, but map | +| cuda/hip_block_size_z_direct_unchecked | kernel (For) | Same as above, but map | | | launch (loop) | to blocks in z-dim | +----------------------------------------------------+---------------+---------------------------------+ | cuda/hip_block_size_x_direct | kernel (For) | Same as block_x_direct | @@ -443,8 +447,9 @@ policies have the prefix ``hip_``. | cuda/hip_block_size_z_loop | kernel (For) | Same as above, but map | | | launch (loop) | to blocks in z-dim | +----------------------------------------------------+---------------+---------------------------------+ -| cuda/hip_global_x_unchecked | kernel (For) | Map loop iterates unchecked | -| | launch (loop) | to GPU threads in the grid in | +| cuda/hip_global_x_direct_unchecked | kernel (For) | Map loop iterates directly | +| | launch (loop) | without checking loop bounds | +| | | to GPU threads in the grid in | | | | the x-dimension, one iterate | | | | per thread. Creates a unique | | | | thread id for each thread on | @@ -453,13 +458,14 @@ policies have the prefix ``hip_``. | | | threadIdx.x + | | | | threadDim.x * blockIdx.x. | +----------------------------------------------------+---------------+---------------------------------+ -| cuda/hip_global_y_unchecked | kernel (For) | Same as above, but uses | +| cuda/hip_global_y_direct_unchecked | kernel (For) | Same as above, but uses | | | launch (loop) | globals in y-dimension. | +----------------------------------------------------+---------------+---------------------------------+ -| cuda/hip_global_z_unchecked | kernel (For) | Same as above, but uses | +| cuda/hip_global_z_direct_unchecked | kernel (For) | Same as above, but uses | | | launch (loop) | globals in z-dimension. | +----------------------------------------------------+---------------+---------------------------------+ -| cuda/hip_global_x_direct | kernel (For) | Same as global_x_unchecked | +| cuda/hip_global_x_direct | kernel (For) | Same as | +| | | global_x_direct_unchecked | | | launch (loop) | above, but maps loop iterates | | | launch (loop) | directly to GPU threads in the | | | | grid, one or no iterates per | @@ -482,15 +488,16 @@ policies have the prefix ``hip_``. | cuda/hip_global_z_loop | kernel (For) | Same as above, but use | | | launch (loop) | globals in z-dimension | +----------------------------------------------------+---------------+---------------------------------+ -| cuda/hip_global_size_x_unchecked | kernel (For) | Same as global_x_unchecked | +| cuda/hip_global_size_x_direct_unchecked| kernel (For) | Same as | +| | | global_x_direct_unchecked | | | launch (loop) | policy above but with | | | | a compile time block | | | | size. | +----------------------------------------------------+---------------+---------------------------------+ -| cuda/hip_global_size_y_unchecked | kernel (For) | Same as above, but map | +| cuda/hip_global_size_y_direct_unchecked| kernel (For) | Same as above, but map | | | launch (loop) | to globals in y-dim | +----------------------------------------------------+---------------+---------------------------------+ -| cuda/hip_global_size_z_unchecked | kernel (For) | Same as above, but map | +| cuda/hip_global_size_z_direct_unchecked| kernel (For) | Same as above, but map | | | launch (loop) | to globals in z-dim | +----------------------------------------------------+---------------+---------------------------------+ | cuda/hip_global_size_x_direct | kernel (For) | Same as global_x_direct | @@ -515,8 +522,9 @@ policies have the prefix ``hip_``. | cuda/hip_global_size_z_loop | kernel (For) | Same as above, but map | | | launch (loop) | to globals in z-dim | +----------------------------------------------------+---------------+---------------------------------+ -| cuda/hip_warp_unchecked | kernel (For) | Map work to threads in a | -| | | warp unchecked. | +| cuda/hip_warp_direct_unchecked | kernel (For) | Map work to threads in a | +| | | warp directly without checking | +| | | loop bounds. | | | | Cannot be used in conjunction | | | | with cuda/hip_thread_x_* | | | | policies. | @@ -525,7 +533,8 @@ policies have the prefix ``hip_``. | | | cuda/hip_thread_y/z_* | | | | policies. | +----------------------------------------------------+---------------+---------------------------------+ -| cuda/hip_warp_direct | kernel (For) | Similar to warp_unchecked, but | +| cuda/hip_warp_direct | kernel (For) | Similar to | +| | | warp_direct_unchecked, but | | | | map work to threads | | | | in a warp directly. | +----------------------------------------------------+---------------+---------------------------------+ @@ -589,22 +598,22 @@ policies: | | BLOCKS_PER_SM_OFFSET) * sm_per_device | +----------------------------------------------------+-----------------------------------------+ -Several notable constraints apply to RAJA CUDA/HIP *unchecked* policies. +Several notable constraints apply to RAJA CUDA/HIP *direct_unchecked* policies. -.. note:: * Unchecked policies do not mask out threads that are out-of-range. +.. note:: * DirectUnchecked policies do not mask out threads that are out-of-range. So they should only be used when the size of the range matches the size of the block or grid. - * Repeating unchecked policies with the same dimension in perfectly + * Repeating direct_unchecked policies with the same dimension in perfectly nested loops is not recommended. Your code may do something, but likely will not do what you expect and/or be correct. - * If multiple unchecked policies are used in a kernel (using different + * If multiple direct_unchecked policies are used in a kernel (using different dimensions), the product of sizes of the corresponding iteration spaces cannot be greater than the maximum allowable threads per block or blocks per grid. Typically, this is 1024 threads per block. Attempting to execute a kernel with more than the maximum allowed causes the CUDA/HIP runtime to complain about *illegal launch parameters.* - * **Block-unchecked policies are recommended for most tiled loop + * **Block-direct-unchecked policies are recommended for most tiled loop patterns. In these cases the CUDA/HIP kernel is launched with the exact number of blocks needed so no checking is necessary.** @@ -648,9 +657,9 @@ Several notes regarding CUDA/HIP *loop* policies are also good to know. Finally -.. note:: CUDA/HIP block-unchecked or block-direct policies may be preferable +.. note:: CUDA/HIP block-direct-unchecked or block-direct policies may be preferable to block-loop policies in situations where block load balancing may - be an issue as the block-unchecked or block-direct policies may yield + be an issue as the block-direct-unchecked or block-direct policies may yield better performance. Several notes regarding the CUDA/HIP policy implementation that allow you to @@ -661,7 +670,7 @@ write more explicit policies. behavior of the policy. * Policies have a mapping from loop iterations to iterates in the index set via a iteration_mapping enum template parameter. The - possible values are Unchecked, Direct, and StridedLoop. + possible values are DirectUnchecked, Direct, and StridedLoop. * Policies can be safely used with some synchronization constructs via a kernel_sync_requirement enum template parameter. The possible values are none and sync. diff --git a/include/RAJA/policy/cuda/kernel/For.hpp b/include/RAJA/policy/cuda/kernel/For.hpp index abefacd9e5..7a6d10f4ec 100644 --- a/include/RAJA/policy/cuda/kernel/For.hpp +++ b/include/RAJA/policy/cuda/kernel/For.hpp @@ -45,7 +45,7 @@ template , + RAJA::policy::cuda::cuda_indexer, EnclosedStmts...>, Types> { @@ -60,7 +60,7 @@ struct CudaStatementExecutor< using diff_t = segment_diff_type; using DimensionCalculator = RAJA::internal::KernelDimensionCalculator< - RAJA::policy::cuda::cuda_indexer>; + RAJA::policy::cuda::cuda_indexer>; static inline RAJA_DEVICE void exec(Data &data, bool thread_active) diff --git a/include/RAJA/policy/cuda/kernel/ForICount.hpp b/include/RAJA/policy/cuda/kernel/ForICount.hpp index 85b82a9cc6..92a59cb9a8 100644 --- a/include/RAJA/policy/cuda/kernel/ForICount.hpp +++ b/include/RAJA/policy/cuda/kernel/ForICount.hpp @@ -32,7 +32,7 @@ namespace internal /* * Executor for work sharing inside CudaKernel. - * Provides an unchecked mapping. + * Provides a direct unchecked mapping. * Assigns the loop index to offset ArgumentId * Assigns the loop index to param ParamId * Meets all sync requirements @@ -47,20 +47,20 @@ template , + RAJA::policy::cuda::cuda_indexer, EnclosedStmts...>, Types> : CudaStatementExecutor< Data, statement::For, + RAJA::policy::cuda::cuda_indexer, EnclosedStmts...>, Types> { using Base = CudaStatementExecutor< Data, statement::For, + RAJA::policy::cuda::cuda_indexer, EnclosedStmts...>, Types>; diff --git a/include/RAJA/policy/cuda/kernel/Tile.hpp b/include/RAJA/policy/cuda/kernel/Tile.hpp index 865e476da0..a5377f7d7d 100644 --- a/include/RAJA/policy/cuda/kernel/Tile.hpp +++ b/include/RAJA/policy/cuda/kernel/Tile.hpp @@ -58,7 +58,7 @@ struct CudaStatementExecutor< Data, statement::Tile, - RAJA::policy::cuda::cuda_indexer, + RAJA::policy::cuda::cuda_indexer, EnclosedStmts...>, Types> { @@ -69,7 +69,7 @@ struct CudaStatementExecutor< using diff_t = segment_diff_type; - using DimensionCalculator = KernelDimensionCalculator>; + using DimensionCalculator = KernelDimensionCalculator>; static inline RAJA_DEVICE void exec(Data &data, bool thread_active) @@ -116,8 +116,8 @@ struct CudaStatementExecutor< // restrict to first tile segment = segment.slice(0, static_cast(chunk_size)); - // NOTE: We do not detect improper uses of unchecked policies under tiling. - // This happens when using an unchecked policy on a tiled range that is not + // NOTE: We do not detect improper uses of direct_unchecked policies under tiling. + // This happens when using a direct unchecked policy on a tiled range that is not // evenly divisible by chunk_size. LaunchDims enclosed_dims = enclosed_stmts_t::calculateDimensions(private_data); diff --git a/include/RAJA/policy/cuda/kernel/TileTCount.hpp b/include/RAJA/policy/cuda/kernel/TileTCount.hpp index 513b2fded4..a2de5e2bf3 100644 --- a/include/RAJA/policy/cuda/kernel/TileTCount.hpp +++ b/include/RAJA/policy/cuda/kernel/TileTCount.hpp @@ -60,14 +60,14 @@ struct CudaStatementExecutor< Data, statement::TileTCount, - RAJA::policy::cuda::cuda_indexer, + RAJA::policy::cuda::cuda_indexer, EnclosedStmts...>, Types> : public CudaStatementExecutor< Data, statement::Tile, - RAJA::policy::cuda::cuda_indexer, + RAJA::policy::cuda::cuda_indexer, EnclosedStmts...>, Types> { @@ -75,7 +75,7 @@ struct CudaStatementExecutor< Data, statement::Tile, - RAJA::policy::cuda::cuda_indexer, + RAJA::policy::cuda::cuda_indexer, EnclosedStmts...>, Types>; diff --git a/include/RAJA/policy/cuda/kernel/internal.hpp b/include/RAJA/policy/cuda/kernel/internal.hpp index 09be99c506..6e3ddcdde8 100644 --- a/include/RAJA/policy/cuda/kernel/internal.hpp +++ b/include/RAJA/policy/cuda/kernel/internal.hpp @@ -215,9 +215,9 @@ using cuda_statement_list_executor_t = CudaStatementListExecutor< template struct KernelDimensionCalculator; -// specialization for unchecked sequential policies +// specialization for direct unchecked sequential policies template -struct KernelDimensionCalculator>> { @@ -232,9 +232,9 @@ struct KernelDimensionCalculator -struct KernelDimensionCalculator>> { @@ -250,7 +250,7 @@ struct KernelDimensionCalculator -struct KernelDimensionCalculator>> { @@ -262,16 +262,16 @@ struct KernelDimensionCalculator(IndexMapper::block_size) ) { - RAJA_ABORT_OR_THROW("len does not match the size of the unchecked mapped index space"); + RAJA_ABORT_OR_THROW("len does not match the size of the direct_unchecked mapped index space"); } set_cuda_dim(dims.threads, static_cast(IndexMapper::block_size)); set_cuda_dim(min_dims.threads, static_cast(IndexMapper::block_size)); } }; -// specialization for unchecked block policies +// specialization for direct unchecked block policies template -struct KernelDimensionCalculator>> { @@ -286,7 +286,7 @@ struct KernelDimensionCalculator -struct KernelDimensionCalculator>> { @@ -298,16 +298,16 @@ struct KernelDimensionCalculator(IndexMapper::grid_size) ) { - RAJA_ABORT_OR_THROW("len does not match the size of the unchecked mapped index space"); + RAJA_ABORT_OR_THROW("len does not match the size of the direct_unchecked mapped index space"); } set_cuda_dim(dims.blocks, static_cast(IndexMapper::grid_size)); set_cuda_dim(min_dims.blocks, static_cast(IndexMapper::grid_size)); } }; -// specialization for unchecked global policies +// specialization for direct unchecked global policies template -struct KernelDimensionCalculator>> { @@ -323,7 +323,7 @@ struct KernelDimensionCalculator -struct KernelDimensionCalculator>> { @@ -337,7 +337,7 @@ struct KernelDimensionCalculator(IndexMapper::grid_size)); if ( len != (block_size * static_cast(IndexMapper::grid_size)) ) { - RAJA_ABORT_OR_THROW("len does not match the size of the unchecked mapped index space"); + RAJA_ABORT_OR_THROW("len does not match the size of the direct_unchecked mapped index space"); } set_cuda_dim(dims.threads, block_size); set_cuda_dim(dims.blocks, static_cast(IndexMapper::grid_size)); @@ -347,7 +347,7 @@ struct KernelDimensionCalculator -struct KernelDimensionCalculator>> { @@ -360,7 +360,7 @@ struct KernelDimensionCalculator(IndexMapper::block_size)); if ( len != (static_cast(IndexMapper::block_size) * grid_size) ) { - RAJA_ABORT_OR_THROW("len does not match the size of the unchecked mapped index space"); + RAJA_ABORT_OR_THROW("len does not match the size of the direct_unchecked mapped index space"); } set_cuda_dim(dims.threads, static_cast(IndexMapper::block_size)); set_cuda_dim(dims.blocks, grid_size); @@ -370,7 +370,7 @@ struct KernelDimensionCalculator -struct KernelDimensionCalculator>> { @@ -384,7 +384,7 @@ struct KernelDimensionCalculator(IndexMapper::block_size) * static_cast(IndexMapper::grid_size)) ) { - RAJA_ABORT_OR_THROW("len does not match the size of the unchecked mapped index space"); + RAJA_ABORT_OR_THROW("len does not match the size of the direct_unchecked mapped index space"); } set_cuda_dim(dims.threads, static_cast(IndexMapper::block_size)); set_cuda_dim(dims.blocks, static_cast(IndexMapper::grid_size)); diff --git a/include/RAJA/policy/cuda/launch.hpp b/include/RAJA/policy/cuda/launch.hpp index fea2845e57..574899f408 100644 --- a/include/RAJA/policy/cuda/launch.hpp +++ b/include/RAJA/policy/cuda/launch.hpp @@ -358,7 +358,7 @@ struct LaunchExecute -struct LoopExecute, SEGMENT> { @@ -378,7 +378,7 @@ struct LoopExecute -struct LoopExecute, @@ -401,7 +401,7 @@ struct LoopExecute -struct LoopExecute -struct LoopICountExecute, SEGMENT> { @@ -642,7 +642,7 @@ struct LoopICountExecute -struct LoopICountExecute, @@ -667,7 +667,7 @@ struct LoopICountExecute -struct LoopICountExecute -struct LoopExecute, SEGMENT> - : LoopExecute, SEGMENT> {}; template -struct LoopExecute, @@ -931,7 +931,7 @@ struct LoopExecute -struct LoopExecute -struct TileExecute, SEGMENT> { @@ -1135,7 +1135,7 @@ struct TileExecute -struct TileExecute, @@ -1161,7 +1161,7 @@ struct TileExecute -struct TileExecute -struct TileTCountExecute, SEGMENT> { @@ -1416,7 +1416,7 @@ struct TileTCountExecute -struct TileTCountExecute, @@ -1446,7 +1446,7 @@ struct TileTCountExecute -struct TileTCountExecute>; using cuda_warp_direct = RAJA::policy::cuda::cuda_indexer< @@ -1588,8 +1588,8 @@ using cuda_launch_t = policy::cuda::cuda_launch_explicit_t -using cuda_indexer_unchecked = policy::cuda::cuda_indexer< - iteration_mapping::Unchecked, +using cuda_indexer_direct_unchecked = policy::cuda::cuda_indexer< + iteration_mapping::DirectUnchecked, kernel_sync_requirement::none, indexers...>; @@ -1612,8 +1612,8 @@ using cuda_indexer_syncable_loop = policy::cuda::cuda_indexer< indexers...>; template < typename ... indexers > -using cuda_flatten_indexer_unchecked = policy::cuda::cuda_flatten_indexer< - iteration_mapping::Unchecked, +using cuda_flatten_indexer_direct_unchecked = policy::cuda::cuda_flatten_indexer< + iteration_mapping::DirectUnchecked, kernel_sync_requirement::none, indexers...>; @@ -1683,11 +1683,11 @@ using cuda_flatten_indexer_loop = policy::cuda::cuda_flatten_indexer< * For example, a segment of size 1000 will only fit into 1000 threads, blocks, or global threads, and * triggers a runtime error in some cases. */ -RAJA_INTERNAL_CUDA_ALIAS_INDEXER_THREAD_POLICIES(, unchecked) +RAJA_INTERNAL_CUDA_ALIAS_INDEXER_THREAD_POLICIES(, direct_unchecked) -RAJA_INTERNAL_CUDA_ALIAS_INDEXER_BLOCK_POLICIES(, unchecked) +RAJA_INTERNAL_CUDA_ALIAS_INDEXER_BLOCK_POLICIES(, direct_unchecked) -RAJA_INTERNAL_CUDA_ALIAS_INDEXER_GLOBAL_POLICIES(, unchecked) +RAJA_INTERNAL_CUDA_ALIAS_INDEXER_GLOBAL_POLICIES(, direct_unchecked) /*! * Maps segment indices to CUDA threads, blocks, or global threads. @@ -1734,11 +1734,11 @@ RAJA_INTERNAL_CUDA_ALIAS_INDEXER_GLOBAL_POLICIES(, syncable_loop) * Reshapes multiple physical threads, blocks, or global threads into a 1D * iteration space */ -RAJA_INTERNAL_CUDA_ALIAS_INDEXER_THREAD_POLICIES(flatten_, unchecked) +RAJA_INTERNAL_CUDA_ALIAS_INDEXER_THREAD_POLICIES(flatten_, direct_unchecked) -RAJA_INTERNAL_CUDA_ALIAS_INDEXER_BLOCK_POLICIES(flatten_, unchecked) +RAJA_INTERNAL_CUDA_ALIAS_INDEXER_BLOCK_POLICIES(flatten_, direct_unchecked) -RAJA_INTERNAL_CUDA_ALIAS_INDEXER_GLOBAL_POLICIES(flatten_, unchecked) +RAJA_INTERNAL_CUDA_ALIAS_INDEXER_GLOBAL_POLICIES(flatten_, direct_unchecked) /* * Maps segment indices to flattened CUDA threads, blocks, or global threads. @@ -1888,11 +1888,11 @@ RAJA_INTERNAL_CUDA_ALIAS_INDEXER_GLOBAL_POLICIES(flatten_, loop) * This is the lowest overhead mapping, but requires that there are the same * number of physical threads as the map requests. */ -RAJA_INTERNAL_CUDA_ALIAS_INDEXER_THREAD_SIZE_POLICIES(, unchecked) +RAJA_INTERNAL_CUDA_ALIAS_INDEXER_THREAD_SIZE_POLICIES(, direct_unchecked) -RAJA_INTERNAL_CUDA_ALIAS_INDEXER_BLOCK_SIZE_POLICIES(, unchecked) +RAJA_INTERNAL_CUDA_ALIAS_INDEXER_BLOCK_SIZE_POLICIES(, direct_unchecked) -RAJA_INTERNAL_CUDA_ALIAS_INDEXER_GLOBAL_SIZE_POLICIES(, unchecked) +RAJA_INTERNAL_CUDA_ALIAS_INDEXER_GLOBAL_SIZE_POLICIES(, direct_unchecked) /*! * Maps segment indices to CUDA threads, blocks, or global threads. @@ -1924,11 +1924,11 @@ RAJA_INTERNAL_CUDA_ALIAS_INDEXER_GLOBAL_SIZE_POLICIES(, loop) * Reshapes multiple physical threads, blocks, or global threads into a 1D * iteration space. */ -RAJA_INTERNAL_CUDA_ALIAS_INDEXER_THREAD_SIZE_POLICIES(flatten_, unchecked) +RAJA_INTERNAL_CUDA_ALIAS_INDEXER_THREAD_SIZE_POLICIES(flatten_, direct_unchecked) -RAJA_INTERNAL_CUDA_ALIAS_INDEXER_BLOCK_SIZE_POLICIES(flatten_, unchecked) +RAJA_INTERNAL_CUDA_ALIAS_INDEXER_BLOCK_SIZE_POLICIES(flatten_, direct_unchecked) -RAJA_INTERNAL_CUDA_ALIAS_INDEXER_GLOBAL_SIZE_POLICIES(flatten_, unchecked) +RAJA_INTERNAL_CUDA_ALIAS_INDEXER_GLOBAL_SIZE_POLICIES(flatten_, direct_unchecked) /* * Maps segment indices to flattened CUDA threads, blocks, or global threads. diff --git a/include/RAJA/policy/hip/kernel/For.hpp b/include/RAJA/policy/hip/kernel/For.hpp index 1f181d4590..addb556b88 100644 --- a/include/RAJA/policy/hip/kernel/For.hpp +++ b/include/RAJA/policy/hip/kernel/For.hpp @@ -45,7 +45,7 @@ template , + RAJA::policy::hip::hip_indexer, EnclosedStmts...>, Types> { @@ -60,7 +60,7 @@ struct HipStatementExecutor< using diff_t = segment_diff_type; using DimensionCalculator = RAJA::internal::KernelDimensionCalculator< - RAJA::policy::hip::hip_indexer>; + RAJA::policy::hip::hip_indexer>; static inline RAJA_DEVICE void exec(Data &data, bool thread_active) diff --git a/include/RAJA/policy/hip/kernel/ForICount.hpp b/include/RAJA/policy/hip/kernel/ForICount.hpp index 30d143c6cf..3342f994e0 100644 --- a/include/RAJA/policy/hip/kernel/ForICount.hpp +++ b/include/RAJA/policy/hip/kernel/ForICount.hpp @@ -32,7 +32,7 @@ namespace internal /* * Executor for work sharing inside HipKernel. - * Provides an unchecked mapping. + * Provides a direct unchecked mapping. * Assigns the loop index to offset ArgumentId * Assigns the loop index to param ParamId * Meets all sync requirements @@ -47,20 +47,20 @@ template , + RAJA::policy::hip::hip_indexer, EnclosedStmts...>, Types> : HipStatementExecutor< Data, statement::For, + RAJA::policy::hip::hip_indexer, EnclosedStmts...>, Types> { using Base = HipStatementExecutor< Data, statement::For, + RAJA::policy::hip::hip_indexer, EnclosedStmts...>, Types>; diff --git a/include/RAJA/policy/hip/kernel/Tile.hpp b/include/RAJA/policy/hip/kernel/Tile.hpp index 90c147329c..55653ddfe5 100644 --- a/include/RAJA/policy/hip/kernel/Tile.hpp +++ b/include/RAJA/policy/hip/kernel/Tile.hpp @@ -58,7 +58,7 @@ struct HipStatementExecutor< Data, statement::Tile, - RAJA::policy::hip::hip_indexer, + RAJA::policy::hip::hip_indexer, EnclosedStmts...>, Types> { @@ -69,7 +69,7 @@ struct HipStatementExecutor< using diff_t = segment_diff_type; - using DimensionCalculator = KernelDimensionCalculator>; + using DimensionCalculator = KernelDimensionCalculator>; static inline RAJA_DEVICE void exec(Data &data, bool thread_active) @@ -116,8 +116,8 @@ struct HipStatementExecutor< // restrict to first tile segment = segment.slice(0, static_cast(chunk_size)); - // NOTE: We do not detect improper uses of unchecked policies under tiling. - // This happens when using an unchecked policy on a tiled range that is not + // NOTE: We do not detect improper uses of direct_unchecked policies under tiling. + // This happens when using a direct unchecked policy on a tiled range that is not // evenly divisible by chunk_size. LaunchDims enclosed_dims = enclosed_stmts_t::calculateDimensions(private_data); diff --git a/include/RAJA/policy/hip/kernel/TileTCount.hpp b/include/RAJA/policy/hip/kernel/TileTCount.hpp index 6975c5a083..d73c71169e 100644 --- a/include/RAJA/policy/hip/kernel/TileTCount.hpp +++ b/include/RAJA/policy/hip/kernel/TileTCount.hpp @@ -60,14 +60,14 @@ struct HipStatementExecutor< Data, statement::TileTCount, - RAJA::policy::hip::hip_indexer, + RAJA::policy::hip::hip_indexer, EnclosedStmts...>, Types> : public HipStatementExecutor< Data, statement::Tile, - RAJA::policy::hip::hip_indexer, + RAJA::policy::hip::hip_indexer, EnclosedStmts...>, Types> { @@ -75,7 +75,7 @@ struct HipStatementExecutor< Data, statement::Tile, - RAJA::policy::hip::hip_indexer, + RAJA::policy::hip::hip_indexer, EnclosedStmts...>, Types>; diff --git a/include/RAJA/policy/hip/kernel/internal.hpp b/include/RAJA/policy/hip/kernel/internal.hpp index c518d67f1f..b8a2f017b6 100644 --- a/include/RAJA/policy/hip/kernel/internal.hpp +++ b/include/RAJA/policy/hip/kernel/internal.hpp @@ -215,9 +215,9 @@ using hip_statement_list_executor_t = HipStatementListExecutor< template struct KernelDimensionCalculator; -// specialization for unchecked sequential policies +// specialization for direct unchecked sequential policies template -struct KernelDimensionCalculator>> { @@ -227,14 +227,14 @@ struct KernelDimensionCalculator(1) ) { - RAJA_ABORT_OR_THROW("len does not match the size of the unchecked mapped index space"); + RAJA_ABORT_OR_THROW("len does not match the size of the direct_unchecked mapped index space"); } } }; -// specialization for unchecked thread policies +// specialization for direct unchecked thread policies template -struct KernelDimensionCalculator>> { @@ -250,7 +250,7 @@ struct KernelDimensionCalculator -struct KernelDimensionCalculator>> { @@ -262,16 +262,16 @@ struct KernelDimensionCalculator(IndexMapper::block_size) ) { - RAJA_ABORT_OR_THROW("len does not match the size of the unchecked mapped index space"); + RAJA_ABORT_OR_THROW("len does not match the size of the direct_unchecked mapped index space"); } set_hip_dim(dims.threads, static_cast(IndexMapper::block_size)); set_hip_dim(min_dims.threads, static_cast(IndexMapper::block_size)); } }; -// specialization for unchecked block policies +// specialization for direct unchecked block policies template -struct KernelDimensionCalculator>> { @@ -286,7 +286,7 @@ struct KernelDimensionCalculator -struct KernelDimensionCalculator>> { @@ -298,16 +298,16 @@ struct KernelDimensionCalculator(IndexMapper::grid_size) ) { - RAJA_ABORT_OR_THROW("len does not match the size of the unchecked mapped index space"); + RAJA_ABORT_OR_THROW("len does not match the size of the direct_unchecked mapped index space"); } set_hip_dim(dims.blocks, static_cast(IndexMapper::grid_size)); set_hip_dim(min_dims.blocks, static_cast(IndexMapper::grid_size)); } }; -// specialization for unchecked global policies +// specialization for direct unchecked global policies template -struct KernelDimensionCalculator>> { @@ -323,7 +323,7 @@ struct KernelDimensionCalculator -struct KernelDimensionCalculator>> { @@ -337,7 +337,7 @@ struct KernelDimensionCalculator(IndexMapper::grid_size)); if ( len != (block_size * static_cast(IndexMapper::grid_size)) ) { - RAJA_ABORT_OR_THROW("len does not match the size of the unchecked mapped index space"); + RAJA_ABORT_OR_THROW("len does not match the size of the direct_unchecked mapped index space"); } set_hip_dim(dims.threads, block_size); set_hip_dim(dims.blocks, static_cast(IndexMapper::grid_size)); @@ -347,7 +347,7 @@ struct KernelDimensionCalculator -struct KernelDimensionCalculator>> { @@ -360,7 +360,7 @@ struct KernelDimensionCalculator(IndexMapper::block_size)); if ( len != (static_cast(IndexMapper::block_size) * grid_size) ) { - RAJA_ABORT_OR_THROW("len does not match the size of the unchecked mapped index space"); + RAJA_ABORT_OR_THROW("len does not match the size of the direct_unchecked mapped index space"); } set_hip_dim(dims.threads, static_cast(IndexMapper::block_size)); set_hip_dim(dims.blocks, grid_size); @@ -370,7 +370,7 @@ struct KernelDimensionCalculator -struct KernelDimensionCalculator>> { @@ -384,7 +384,7 @@ struct KernelDimensionCalculator(IndexMapper::block_size) * static_cast(IndexMapper::grid_size)) ) { - RAJA_ABORT_OR_THROW("len does not match the size of the unchecked mapped index space"); + RAJA_ABORT_OR_THROW("len does not match the size of the direct_unchecked mapped index space"); } set_hip_dim(dims.threads, static_cast(IndexMapper::block_size)); set_hip_dim(dims.blocks, static_cast(IndexMapper::grid_size)); diff --git a/include/RAJA/policy/hip/launch.hpp b/include/RAJA/policy/hip/launch.hpp index 18ab91526d..f5b4eda529 100644 --- a/include/RAJA/policy/hip/launch.hpp +++ b/include/RAJA/policy/hip/launch.hpp @@ -358,7 +358,7 @@ struct LaunchExecute> { HIP generic loop implementations */ template -struct LoopExecute, SEGMENT> { @@ -378,7 +378,7 @@ struct LoopExecute -struct LoopExecute, @@ -401,7 +401,7 @@ struct LoopExecute -struct LoopExecute -struct LoopICountExecute, SEGMENT> { @@ -642,7 +642,7 @@ struct LoopICountExecute -struct LoopICountExecute, @@ -667,7 +667,7 @@ struct LoopICountExecute -struct LoopICountExecute -struct LoopExecute, SEGMENT> - : LoopExecute, SEGMENT> {}; template -struct LoopExecute, @@ -931,7 +931,7 @@ struct LoopExecute -struct LoopExecute -struct TileExecute, SEGMENT> { @@ -1135,7 +1135,7 @@ struct TileExecute -struct TileExecute, @@ -1161,7 +1161,7 @@ struct TileExecute -struct TileExecute -struct TileTCountExecute, SEGMENT> { @@ -1416,7 +1416,7 @@ struct TileTCountExecute -struct TileTCountExecute, @@ -1446,7 +1446,7 @@ struct TileTCountExecute -struct TileTCountExecute>; using hip_warp_direct = RAJA::policy::hip::hip_indexer< @@ -1490,8 +1490,8 @@ using policy::hip::hip_launch_t; // policies usable with kernel and launch template < typename ... indexers > -using hip_indexer_unchecked = policy::hip::hip_indexer< - iteration_mapping::Unchecked, +using hip_indexer_direct_unchecked = policy::hip::hip_indexer< + iteration_mapping::DirectUnchecked, kernel_sync_requirement::none, indexers...>; @@ -1514,8 +1514,8 @@ using hip_indexer_syncable_loop = policy::hip::hip_indexer< indexers...>; template < typename ... indexers > -using hip_flatten_indexer_unchecked = policy::hip::hip_flatten_indexer< - iteration_mapping::Unchecked, +using hip_flatten_indexer_direct_unchecked = policy::hip::hip_flatten_indexer< + iteration_mapping::DirectUnchecked, kernel_sync_requirement::none, indexers...>; @@ -1585,11 +1585,11 @@ using hip_flatten_indexer_loop = policy::hip::hip_flatten_indexer< * For example, a segment of size 1000 will only fit into 1000 threads, blocks, or global threads, and * triggers a runtime error in some cases. */ -RAJA_INTERNAL_HIP_ALIAS_INDEXER_THREAD_POLICIES(, unchecked) +RAJA_INTERNAL_HIP_ALIAS_INDEXER_THREAD_POLICIES(, direct_unchecked) -RAJA_INTERNAL_HIP_ALIAS_INDEXER_BLOCK_POLICIES(, unchecked) +RAJA_INTERNAL_HIP_ALIAS_INDEXER_BLOCK_POLICIES(, direct_unchecked) -RAJA_INTERNAL_HIP_ALIAS_INDEXER_GLOBAL_POLICIES(, unchecked) +RAJA_INTERNAL_HIP_ALIAS_INDEXER_GLOBAL_POLICIES(, direct_unchecked) /*! * Maps segment indices to HIP threads, blocks, or global threads. @@ -1636,11 +1636,11 @@ RAJA_INTERNAL_HIP_ALIAS_INDEXER_GLOBAL_POLICIES(, syncable_loop) * Reshapes multiple physical threads, blocks, or global threads into a 1D * iteration space */ -RAJA_INTERNAL_HIP_ALIAS_INDEXER_THREAD_POLICIES(flatten_, unchecked) +RAJA_INTERNAL_HIP_ALIAS_INDEXER_THREAD_POLICIES(flatten_, direct_unchecked) -RAJA_INTERNAL_HIP_ALIAS_INDEXER_BLOCK_POLICIES(flatten_, unchecked) +RAJA_INTERNAL_HIP_ALIAS_INDEXER_BLOCK_POLICIES(flatten_, direct_unchecked) -RAJA_INTERNAL_HIP_ALIAS_INDEXER_GLOBAL_POLICIES(flatten_, unchecked) +RAJA_INTERNAL_HIP_ALIAS_INDEXER_GLOBAL_POLICIES(flatten_, direct_unchecked) /* * Maps segment indices to flattened HIP threads, blocks, or global threads. @@ -1788,11 +1788,11 @@ RAJA_INTERNAL_HIP_ALIAS_INDEXER_GLOBAL_POLICIES(flatten_, loop) * This is the lowest overhead mapping, but requires that there are the same * number of physical threads as the map requests. */ -RAJA_INTERNAL_HIP_ALIAS_INDEXER_THREAD_SIZE_POLICIES(, unchecked) +RAJA_INTERNAL_HIP_ALIAS_INDEXER_THREAD_SIZE_POLICIES(, direct_unchecked) -RAJA_INTERNAL_HIP_ALIAS_INDEXER_BLOCK_SIZE_POLICIES(, unchecked) +RAJA_INTERNAL_HIP_ALIAS_INDEXER_BLOCK_SIZE_POLICIES(, direct_unchecked) -RAJA_INTERNAL_HIP_ALIAS_INDEXER_GLOBAL_SIZE_POLICIES(, unchecked) +RAJA_INTERNAL_HIP_ALIAS_INDEXER_GLOBAL_SIZE_POLICIES(, direct_unchecked) /*! * Maps segment indices to HIP threads, blocks, or global threads. @@ -1824,11 +1824,11 @@ RAJA_INTERNAL_HIP_ALIAS_INDEXER_GLOBAL_SIZE_POLICIES(, loop) * Reshapes multiple physical threads, blocks, or global threads into a 1D * iteration space. */ -RAJA_INTERNAL_HIP_ALIAS_INDEXER_THREAD_SIZE_POLICIES(flatten_, unchecked) +RAJA_INTERNAL_HIP_ALIAS_INDEXER_THREAD_SIZE_POLICIES(flatten_, direct_unchecked) -RAJA_INTERNAL_HIP_ALIAS_INDEXER_BLOCK_SIZE_POLICIES(flatten_, unchecked) +RAJA_INTERNAL_HIP_ALIAS_INDEXER_BLOCK_SIZE_POLICIES(flatten_, direct_unchecked) -RAJA_INTERNAL_HIP_ALIAS_INDEXER_GLOBAL_SIZE_POLICIES(flatten_, unchecked) +RAJA_INTERNAL_HIP_ALIAS_INDEXER_GLOBAL_SIZE_POLICIES(flatten_, direct_unchecked) /* * Maps segment indices to flattened HIP threads, blocks, or global threads. diff --git a/include/RAJA/util/types.hpp b/include/RAJA/util/types.hpp index 53f30fe4cb..0674db71c4 100644 --- a/include/RAJA/util/types.hpp +++ b/include/RAJA/util/types.hpp @@ -70,7 +70,7 @@ enum struct kernel_sync_requirement : int namespace iteration_mapping { -struct UncheckedBase {}; +struct DirectUncheckedBase {}; struct DirectBase {}; struct LoopBase {}; struct ContiguousLoopBase : LoopBase {}; @@ -84,8 +84,9 @@ struct SizedLoopSpecifyingBase : SizedLoopBase }; /// -/// Unchecked assumes the loop has the same number of iterations and indices and -/// maps directly without bounds checking from an iteration to an index. +/// DirectUnchecked assumes the loop has the same number of iterations and +/// indices and maps directly without bounds checking from an iteration to an +/// index. /// /// For example a loop with 4 iterations mapping indices from a range of size 4. /// int iterations = 4; @@ -99,7 +100,7 @@ struct SizedLoopSpecifyingBase : SizedLoopBase /// // 2 -> {2} /// // 3 -> {3} /// -struct Unchecked : UncheckedBase {}; +struct DirectUnchecked : DirectUncheckedBase {}; /// /// Direct assumes the loop has enough iterations for all of the indices and diff --git a/test/functional/kernel/CMakeLists.txt b/test/functional/kernel/CMakeLists.txt index b7951b822d..efaf08e0f7 100644 --- a/test/functional/kernel/CMakeLists.txt +++ b/test/functional/kernel/CMakeLists.txt @@ -6,7 +6,7 @@ ############################################################################### list(APPEND KERNEL_BACKENDS Sequential) -set(KERNEL_UNCHECKED_BACKENDS "") +set(KERNEL_DIRECT_UNCHECKED_BACKENDS "") if(RAJA_ENABLE_OPENMP) list(APPEND KERNEL_BACKENDS OpenMP) @@ -14,12 +14,12 @@ endif() if(RAJA_ENABLE_CUDA) list(APPEND KERNEL_BACKENDS Cuda) - list(APPEND KERNEL_UNCHECKED_BACKENDS Cuda) + list(APPEND KERNEL_DIRECT_UNCHECKED_BACKENDS Cuda) endif() if(RAJA_ENABLE_HIP) list(APPEND KERNEL_BACKENDS Hip) - list(APPEND KERNEL_UNCHECKED_BACKENDS Hip) + list(APPEND KERNEL_DIRECT_UNCHECKED_BACKENDS Hip) endif() if(RAJA_ENABLE_SYCL) @@ -52,7 +52,7 @@ add_subdirectory(nested-loop-view-types) add_subdirectory(reduce-loc) -add_subdirectory(tile-icount-tcount-unchecked) +add_subdirectory(tile-icount-tcount-direct-unchecked) add_subdirectory(tile-icount-tcount-direct) diff --git a/test/functional/kernel/nested-loop-reducesum/test-kernel-nested-loop.cpp.in b/test/functional/kernel/nested-loop-reducesum/test-kernel-nested-loop.cpp.in index 6a493c2d6b..fa27eb90b6 100644 --- a/test/functional/kernel/nested-loop-reducesum/test-kernel-nested-loop.cpp.in +++ b/test/functional/kernel/nested-loop-reducesum/test-kernel-nested-loop.cpp.in @@ -56,7 +56,7 @@ using CudaKernelNestedLoopExecPols = camp::list< // Device Depth 3 ReduceSum Exec Pols NestedLoopData, - NestedLoopData, + NestedLoopData, NestedLoopData >; @@ -71,7 +71,7 @@ using HipKernelNestedLoopExecPols = camp::list< // Device Depth 3 ReduceSum Exec Pols NestedLoopData, - NestedLoopData, + NestedLoopData, NestedLoopData >; diff --git a/test/functional/kernel/nested-loop-segment-types/test-kernel-nested-loop-segments.cpp.in b/test/functional/kernel/nested-loop-segment-types/test-kernel-nested-loop-segments.cpp.in index 9e620aea4b..342c9dae83 100644 --- a/test/functional/kernel/nested-loop-segment-types/test-kernel-nested-loop-segments.cpp.in +++ b/test/functional/kernel/nested-loop-segment-types/test-kernel-nested-loop-segments.cpp.in @@ -83,7 +83,7 @@ using CudaKernelExecPols = camp::list< RAJA::KernelPolicy< RAJA::statement::CudaKernelAsync< RAJA::statement::For<0, RAJA::cuda_block_z_loop, - RAJA::statement::For<1, RAJA::cuda_block_y_unchecked, + RAJA::statement::For<1, RAJA::cuda_block_y_direct_unchecked, RAJA::statement::For<2, RAJA::cuda_thread_x_loop, RAJA::statement::Lambda<0, RAJA::Segs<0, 1, 2>> > @@ -113,7 +113,7 @@ using HipKernelExecPols = camp::list< RAJA::KernelPolicy< RAJA::statement::HipKernelAsync< RAJA::statement::For<0, RAJA::hip_block_z_loop, - RAJA::statement::For<1, RAJA::hip_block_y_unchecked, + RAJA::statement::For<1, RAJA::hip_block_y_direct_unchecked, RAJA::statement::For<2, RAJA::hip_thread_x_loop, RAJA::statement::Lambda<0, RAJA::Segs<0, 1, 2>> > diff --git a/test/functional/kernel/nested-loop-view-types/test-kernel-nested-loop-view.cpp.in b/test/functional/kernel/nested-loop-view-types/test-kernel-nested-loop-view.cpp.in index 601ddfafe7..add4aa2915 100644 --- a/test/functional/kernel/nested-loop-view-types/test-kernel-nested-loop-view.cpp.in +++ b/test/functional/kernel/nested-loop-view-types/test-kernel-nested-loop-view.cpp.in @@ -145,7 +145,7 @@ camp::list< RAJA::statement::Tile<0, RAJA::tile_fixed<8>, RAJA::cuda_block_y_direct, RAJA::statement::Tile<1, RAJA::tile_fixed<8>, - RAJA::cuda_block_x_unchecked, + RAJA::cuda_block_x_direct_unchecked, RAJA::statement::For<0, RAJA::cuda_thread_y_direct, // outer RAJA::statement::For<1, RAJA::cuda_thread_x_direct, // inner RAJA::statement::Lambda<0> @@ -176,11 +176,11 @@ camp::list< RAJA::KernelPolicy< RAJA::statement::CudaKernelFixed<4*8*8, RAJA::statement::Tile<0, RAJA::tile_fixed<4>, - RAJA::cuda_block_z_unchecked, + RAJA::cuda_block_z_direct_unchecked, RAJA::statement::Tile<1, RAJA::tile_fixed<8>, - RAJA::cuda_block_y_unchecked, + RAJA::cuda_block_y_direct_unchecked, RAJA::statement::Tile<2, RAJA::tile_fixed<8>, - RAJA::cuda_block_x_unchecked, + RAJA::cuda_block_x_direct_unchecked, RAJA::statement::For<0, RAJA::cuda_thread_z_direct, // outer RAJA::statement::For<1, RAJA::cuda_thread_y_direct, // middle RAJA::statement::For<2, RAJA::cuda_thread_x_direct, // inner @@ -216,7 +216,7 @@ camp::list< RAJA::statement::Tile<0, RAJA::tile_fixed<8>, RAJA::hip_block_y_direct, RAJA::statement::Tile<1, RAJA::tile_fixed<8>, - RAJA::hip_block_x_unchecked, + RAJA::hip_block_x_direct_unchecked, RAJA::statement::For<0, RAJA::hip_thread_y_direct, // outer RAJA::statement::For<1, RAJA::hip_thread_x_direct, // inner RAJA::statement::Lambda<0> @@ -247,11 +247,11 @@ camp::list< RAJA::KernelPolicy< RAJA::statement::HipKernelFixed<4*8*8, RAJA::statement::Tile<0, RAJA::tile_fixed<4>, - RAJA::hip_block_z_unchecked, + RAJA::hip_block_z_direct_unchecked, RAJA::statement::Tile<1, RAJA::tile_fixed<8>, - RAJA::hip_block_y_unchecked, + RAJA::hip_block_y_direct_unchecked, RAJA::statement::Tile<2, RAJA::tile_fixed<8>, - RAJA::hip_block_x_unchecked, + RAJA::hip_block_x_direct_unchecked, RAJA::statement::For<0, RAJA::hip_thread_z_direct, // outer RAJA::statement::For<1, RAJA::hip_thread_y_direct, // middle RAJA::statement::For<2, RAJA::hip_thread_x_direct, // inner diff --git a/test/functional/kernel/nested-loop/test-kernel-nested-loop.cpp.in b/test/functional/kernel/nested-loop/test-kernel-nested-loop.cpp.in index d0e3166583..be5320c55d 100644 --- a/test/functional/kernel/nested-loop/test-kernel-nested-loop.cpp.in +++ b/test/functional/kernel/nested-loop/test-kernel-nested-loop.cpp.in @@ -73,11 +73,11 @@ using CudaKernelNestedLoopExecPols = camp::list< // Depth 2 Exec Pols NestedLoopData, NestedLoopData, - NestedLoopData, + NestedLoopData, NestedLoopData, RAJA::cuda_global_size_y_loop<8> >, // Depth 3 Exec Pols - NestedLoopData, + NestedLoopData, NestedLoopData, NestedLoopData, RAJA::cuda_global_size_y_direct<16>, RAJA::seq_exec > >; @@ -91,11 +91,11 @@ using HipKernelNestedLoopExecPols = camp::list< // Depth 2 Exec Pols NestedLoopData, NestedLoopData, - NestedLoopData, + NestedLoopData, NestedLoopData, RAJA::hip_global_size_y_loop<4> >, // Depth 3 Exec Pols - NestedLoopData, + NestedLoopData, NestedLoopData, NestedLoopData, RAJA::hip_global_size_y_direct<8>, RAJA::seq_exec > >; diff --git a/test/functional/kernel/tile-icount-tcount-unchecked/CMakeLists.txt b/test/functional/kernel/tile-icount-tcount-unchecked/CMakeLists.txt index be28532b6e..77d9c61a33 100644 --- a/test/functional/kernel/tile-icount-tcount-unchecked/CMakeLists.txt +++ b/test/functional/kernel/tile-icount-tcount-unchecked/CMakeLists.txt @@ -16,16 +16,16 @@ set(TILESIZES 8 32) # # Note: KERNEL_BACKENDS is defined in ../CMakeLists.txt # -foreach( BACKEND ${KERNEL_UNCHECKED_BACKENDS} ) +foreach( BACKEND ${KERNEL_DIRECT_UNCHECKED_BACKENDS} ) # using omp target crashes the compiler with this one if( NOT ((BACKEND STREQUAL "OpenMPTarget")) ) foreach( TESTTYPE ${TESTTYPES} ) foreach( TILESIZE ${TILESIZES} ) - configure_file( test-kernel-tile-count-unchecked.cpp.in - test-kernel-${TESTTYPE}-${TILESIZE}-${BACKEND}-unchecked.cpp ) - raja_add_test( NAME test-kernel-${TESTTYPE}-${TILESIZE}-${BACKEND}-unchecked - SOURCES ${CMAKE_CURRENT_BINARY_DIR}/test-kernel-${TESTTYPE}-${TILESIZE}-${BACKEND}-unchecked.cpp ) - target_include_directories(test-kernel-${TESTTYPE}-${TILESIZE}-${BACKEND}-unchecked.exe + configure_file( test-kernel-tile-count-direct-unchecked.cpp.in + test-kernel-${TESTTYPE}-${TILESIZE}-${BACKEND}-direct-unchecked.cpp ) + raja_add_test( NAME test-kernel-${TESTTYPE}-${TILESIZE}-${BACKEND}-direct-unchecked + SOURCES ${CMAKE_CURRENT_BINARY_DIR}/test-kernel-${TESTTYPE}-${TILESIZE}-${BACKEND}-direct-unchecked.cpp ) + target_include_directories(test-kernel-${TESTTYPE}-${TILESIZE}-${BACKEND}-direct-unchecked.exe PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/tests) endforeach() endforeach() diff --git a/test/functional/kernel/tile-icount-tcount-unchecked/test-kernel-tile-count-unchecked.cpp.in b/test/functional/kernel/tile-icount-tcount-unchecked/test-kernel-tile-count-unchecked.cpp.in index fe526aa5a6..3680f2be59 100644 --- a/test/functional/kernel/tile-icount-tcount-unchecked/test-kernel-tile-count-unchecked.cpp.in +++ b/test/functional/kernel/tile-icount-tcount-unchecked/test-kernel-tile-count-unchecked.cpp.in @@ -18,7 +18,7 @@ // Header for tests in ./tests directory // // Note: CMake adds ./tests as an include dir for these tests. -#include "test-kernel-tile-@TESTTYPE@-unchecked.hpp" +#include "test-kernel-tile-@TESTTYPE@-direct-unchecked.hpp" // // Note that a separate test file/executable is generated for each tile size @@ -42,8 +42,8 @@ using CudaKernelForICountExecPols = camp::list< RAJA::KernelPolicy< RAJA::statement::CudaKernel< - RAJA::statement::Tile<0, RAJA::tile_fixed<@TILESIZE@>, RAJA::cuda_block_x_unchecked, - RAJA::statement::ForICount<0, RAJA::statement::Param<0>, RAJA::cuda_thread_x_unchecked, + RAJA::statement::Tile<0, RAJA::tile_fixed<@TILESIZE@>, RAJA::cuda_block_x_direct_unchecked, + RAJA::statement::ForICount<0, RAJA::statement::Param<0>, RAJA::cuda_thread_x_direct_unchecked, RAJA::statement::Lambda<0> > > @@ -56,8 +56,8 @@ using CudaKernelTileTCountExecPols = camp::list< RAJA::KernelPolicy< RAJA::statement::CudaKernel< - RAJA::statement::TileTCount<0, RAJA::statement::Param<0>, RAJA::tile_fixed<@TILESIZE@>, RAJA::cuda_block_x_unchecked, - RAJA::statement::For<0, RAJA::cuda_thread_x_unchecked, + RAJA::statement::TileTCount<0, RAJA::statement::Param<0>, RAJA::tile_fixed<@TILESIZE@>, RAJA::cuda_block_x_direct_unchecked, + RAJA::statement::For<0, RAJA::cuda_thread_x_direct_unchecked, RAJA::statement::Lambda<0> > > @@ -77,8 +77,8 @@ using HipKernelForICountExecPols = camp::list< RAJA::KernelPolicy< RAJA::statement::HipKernel< - RAJA::statement::Tile<0, RAJA::tile_fixed<@TILESIZE@>, RAJA::hip_block_x_unchecked, - RAJA::statement::ForICount<0, RAJA::statement::Param<0>, RAJA::hip_thread_x_unchecked, + RAJA::statement::Tile<0, RAJA::tile_fixed<@TILESIZE@>, RAJA::hip_block_x_direct_unchecked, + RAJA::statement::ForICount<0, RAJA::statement::Param<0>, RAJA::hip_thread_x_direct_unchecked, RAJA::statement::Lambda<0> > > @@ -91,8 +91,8 @@ using HipKernelTileTCountExecPols = camp::list< RAJA::KernelPolicy< RAJA::statement::HipKernel< - RAJA::statement::TileTCount<0, RAJA::statement::Param<0>, RAJA::tile_fixed<@TILESIZE@>, RAJA::hip_block_x_unchecked, - RAJA::statement::For<0, RAJA::hip_thread_x_unchecked, + RAJA::statement::TileTCount<0, RAJA::statement::Param<0>, RAJA::tile_fixed<@TILESIZE@>, RAJA::hip_block_x_direct_unchecked, + RAJA::statement::For<0, RAJA::hip_thread_x_direct_unchecked, RAJA::statement::Lambda<0> > > @@ -115,5 +115,5 @@ using @BACKEND@KernelTile@TESTTYPE@Types = // Instantiate parameterized tests // INSTANTIATE_TYPED_TEST_SUITE_P(@BACKEND@, - KernelTile@TESTTYPE@UncheckedTest, + KernelTile@TESTTYPE@DirectUncheckedTest, @BACKEND@KernelTile@TESTTYPE@Types); diff --git a/test/functional/kernel/tile-icount-tcount-unchecked/tests/test-kernel-tile-ForICount-unchecked.hpp b/test/functional/kernel/tile-icount-tcount-unchecked/tests/test-kernel-tile-ForICount-unchecked.hpp index 1a831c3f12..c173369481 100644 --- a/test/functional/kernel/tile-icount-tcount-unchecked/tests/test-kernel-tile-ForICount-unchecked.hpp +++ b/test/functional/kernel/tile-icount-tcount-unchecked/tests/test-kernel-tile-ForICount-unchecked.hpp @@ -5,8 +5,8 @@ // SPDX-License-Identifier: (BSD-3-Clause) //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -#ifndef __TEST_KERNEL_TILE_FORICOUNT_UNCHECKED_HPP__ -#define __TEST_KERNEL_TILE_FORICOUNT_UNCHECKED_HPP__ +#ifndef __TEST_KERNEL_TILE_FORICOUNT_DIRECT_UNCHECKED_HPP__ +#define __TEST_KERNEL_TILE_FORICOUNT_DIRECT_UNCHECKED_HPP__ // // Value struct for manipulating tile sizes in parameterized tests. @@ -18,7 +18,7 @@ struct Value { template -void KernelTileForICountUncheckedTestImpl(IDX_TYPE N, IDX_TYPE tsize) +void KernelTileForICountDirectUncheckedTestImpl(IDX_TYPE N, IDX_TYPE tsize) { RAJA::ReduceSum trip_count(0); @@ -55,14 +55,14 @@ void KernelTileForICountUncheckedTestImpl(IDX_TYPE N, IDX_TYPE tsize) } -TYPED_TEST_SUITE_P(KernelTileForICountUncheckedTest); +TYPED_TEST_SUITE_P(KernelTileForICountDirectUncheckedTest); template -class KernelTileForICountUncheckedTest : public ::testing::Test +class KernelTileForICountDirectUncheckedTest : public ::testing::Test { }; -TYPED_TEST_P(KernelTileForICountUncheckedTest, ForICountTileKernel) +TYPED_TEST_P(KernelTileForICountDirectUncheckedTest, ForICountTileKernel) { using IDX_TYPE = typename camp::at>::type; using EXEC_POLICY = typename camp::at>::type; @@ -70,16 +70,16 @@ TYPED_TEST_P(KernelTileForICountUncheckedTest, ForICountTileKernel) IDX_TYPE tsize = camp::at_v::value; - KernelTileForICountUncheckedTestImpl( + KernelTileForICountDirectUncheckedTestImpl( IDX_TYPE(0), tsize); - KernelTileForICountUncheckedTestImpl( + KernelTileForICountDirectUncheckedTestImpl( IDX_TYPE(tsize), tsize); - KernelTileForICountUncheckedTestImpl( + KernelTileForICountDirectUncheckedTestImpl( IDX_TYPE(13*tsize), tsize); } -REGISTER_TYPED_TEST_SUITE_P(KernelTileForICountUncheckedTest, +REGISTER_TYPED_TEST_SUITE_P(KernelTileForICountDirectUncheckedTest, ForICountTileKernel); -#endif // __TEST_KERNEL_TILE_FORICOUNT_UNCHECKED_HPP__ +#endif // __TEST_KERNEL_TILE_FORICOUNT_DIRECT_UNCHECKED_HPP__ diff --git a/test/functional/kernel/tile-icount-tcount-unchecked/tests/test-kernel-tile-TileTCount-unchecked.hpp b/test/functional/kernel/tile-icount-tcount-unchecked/tests/test-kernel-tile-TileTCount-unchecked.hpp index ef56efd788..9407741025 100644 --- a/test/functional/kernel/tile-icount-tcount-unchecked/tests/test-kernel-tile-TileTCount-unchecked.hpp +++ b/test/functional/kernel/tile-icount-tcount-unchecked/tests/test-kernel-tile-TileTCount-unchecked.hpp @@ -5,8 +5,8 @@ // SPDX-License-Identifier: (BSD-3-Clause) //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -#ifndef __TEST_KERNEL_TILE_TILETCOUNT_UNCHECKED_HPP_ -#define __TEST_KERNEL_TILE_TILETCOUNT_UNCHECKED_HPP_ +#ifndef __TEST_KERNEL_TILE_TILETCOUNT_DIRECT_UNCHECKED_HPP_ +#define __TEST_KERNEL_TILE_TILETCOUNT_DIRECT_UNCHECKED_HPP_ // // Value struct for manipulating tile sizes in parameterized tests. @@ -18,7 +18,7 @@ struct Value { template -void KernelTileTileTCountUncheckedTestImpl(IDX_TYPE N, IDX_TYPE tsize) +void KernelTileTileTCountDirectUncheckedTestImpl(IDX_TYPE N, IDX_TYPE tsize) { IDX_TYPE NT = (N + tsize - 1) / tsize; @@ -57,14 +57,14 @@ void KernelTileTileTCountUncheckedTestImpl(IDX_TYPE N, IDX_TYPE tsize) } -TYPED_TEST_SUITE_P(KernelTileTileTCountUncheckedTest); +TYPED_TEST_SUITE_P(KernelTileTileTCountDirectUncheckedTest); template -class KernelTileTileTCountUncheckedTest : public ::testing::Test +class KernelTileTileTCountDirectUncheckedTest : public ::testing::Test { }; -TYPED_TEST_P(KernelTileTileTCountUncheckedTest, TileTCountTileKernel) +TYPED_TEST_P(KernelTileTileTCountDirectUncheckedTest, TileTCountTileKernel) { using IDX_TYPE = typename camp::at>::type; using EXEC_POLICY = typename camp::at>::type; @@ -72,16 +72,16 @@ TYPED_TEST_P(KernelTileTileTCountUncheckedTest, TileTCountTileKernel) IDX_TYPE tsize = camp::at_v::value; - KernelTileTileTCountUncheckedTestImpl( + KernelTileTileTCountDirectUncheckedTestImpl( IDX_TYPE(0), tsize); - KernelTileTileTCountUncheckedTestImpl( + KernelTileTileTCountDirectUncheckedTestImpl( IDX_TYPE(tsize), tsize); - KernelTileTileTCountUncheckedTestImpl( + KernelTileTileTCountDirectUncheckedTestImpl( IDX_TYPE(13*tsize), tsize); } -REGISTER_TYPED_TEST_SUITE_P(KernelTileTileTCountUncheckedTest, +REGISTER_TYPED_TEST_SUITE_P(KernelTileTileTCountDirectUncheckedTest, TileTCountTileKernel); -#endif // __TEST_KERNEL_TILE_TILETCOUNT_UNCHECKED_HPP_ +#endif // __TEST_KERNEL_TILE_TILETCOUNT_DIRECT_UNCHECKED_HPP_ diff --git a/test/functional/kernel/tile-variants/test-kernel-tiledyn.cpp.in b/test/functional/kernel/tile-variants/test-kernel-tiledyn.cpp.in index 401561065c..822d3d089b 100644 --- a/test/functional/kernel/tile-variants/test-kernel-tiledyn.cpp.in +++ b/test/functional/kernel/tile-variants/test-kernel-tiledyn.cpp.in @@ -147,7 +147,7 @@ using OpenMPTargetKernelTileExecPols = // RAJA::KernelPolicy< // RAJA::statement::CudaKernel< // RAJA::statement::Tile<1, RAJA::tile_dynamic<1>, RAJA::seq_exec, -// RAJA::statement::Tile<0, RAJA::tile_dynamic<0>, RAJA::cuda_block_x_unchecked, +// RAJA::statement::Tile<0, RAJA::tile_dynamic<0>, RAJA::cuda_block_x_direct_unchecked, // RAJA::statement::For<1, RAJA::seq_exec, // RAJA::statement::For<0, RAJA::cuda_thread_x_loop, // RAJA::statement::Lambda<0, RAJA::Segs<0,1>, RAJA::Params<>> @@ -198,7 +198,7 @@ using OpenMPTargetKernelTileExecPols = // RAJA::KernelPolicy< // RAJA::statement::HipKernel< // RAJA::statement::Tile<1, RAJA::tile_dynamic<1>, RAJA::seq_exec, -// RAJA::statement::Tile<0, RAJA::tile_dynamic<0>, RAJA::hip_block_x_unchecked, +// RAJA::statement::Tile<0, RAJA::tile_dynamic<0>, RAJA::hip_block_x_direct_unchecked, // RAJA::statement::For<1, RAJA::seq_exec, // RAJA::statement::For<0, RAJA::hip_thread_x_loop, // RAJA::statement::Lambda<0, RAJA::Segs<0,1>, RAJA::Params<>> diff --git a/test/functional/kernel/tile-variants/test-kernel-tilefixed.cpp.in b/test/functional/kernel/tile-variants/test-kernel-tilefixed.cpp.in index 9204ab4548..4e3fca9704 100644 --- a/test/functional/kernel/tile-variants/test-kernel-tilefixed.cpp.in +++ b/test/functional/kernel/tile-variants/test-kernel-tilefixed.cpp.in @@ -147,7 +147,7 @@ using CudaKernelTileExecPols = RAJA::KernelPolicy< RAJA::statement::CudaKernel< RAJA::statement::Tile<1, RAJA::tile_fixed, RAJA::seq_exec, - RAJA::statement::Tile<0, RAJA::tile_fixed, RAJA::cuda_block_x_unchecked, + RAJA::statement::Tile<0, RAJA::tile_fixed, RAJA::cuda_block_x_direct_unchecked, RAJA::statement::For<1, RAJA::seq_exec, RAJA::statement::For<0, RAJA::cuda_thread_x_loop, RAJA::statement::Lambda<0> @@ -198,7 +198,7 @@ using HipKernelTileExecPols = RAJA::KernelPolicy< RAJA::statement::HipKernel< RAJA::statement::Tile<1, RAJA::tile_fixed, RAJA::seq_exec, - RAJA::statement::Tile<0, RAJA::tile_fixed, RAJA::hip_block_x_unchecked, + RAJA::statement::Tile<0, RAJA::tile_fixed, RAJA::hip_block_x_direct_unchecked, RAJA::statement::For<1, RAJA::seq_exec, RAJA::statement::For<0, RAJA::hip_thread_x_loop, RAJA::statement::Lambda<0> diff --git a/test/functional/kernel/tile-variants/test-kernel-tilelocal.cpp.in b/test/functional/kernel/tile-variants/test-kernel-tilelocal.cpp.in index b2a1b6a9bd..c2cb62180b 100644 --- a/test/functional/kernel/tile-variants/test-kernel-tilelocal.cpp.in +++ b/test/functional/kernel/tile-variants/test-kernel-tilelocal.cpp.in @@ -95,8 +95,8 @@ using CudaKernelTileExecPols = RAJA::KernelPolicy< RAJA::statement::CudaKernel< - RAJA::statement::Tile<1, RAJA::tile_fixed, RAJA::cuda_block_x_unchecked, - RAJA::statement::Tile<0, RAJA::tile_fixed, RAJA::cuda_block_y_unchecked, + RAJA::statement::Tile<1, RAJA::tile_fixed, RAJA::cuda_block_x_direct_unchecked, + RAJA::statement::Tile<0, RAJA::tile_fixed, RAJA::cuda_block_y_direct_unchecked, RAJA::statement::InitLocalMem, @@ -132,8 +132,8 @@ using HipKernelTileExecPols = RAJA::KernelPolicy< RAJA::statement::HipKernel< - RAJA::statement::Tile<1, RAJA::tile_fixed, RAJA::hip_block_x_unchecked, - RAJA::statement::Tile<0, RAJA::tile_fixed, RAJA::hip_block_y_unchecked, + RAJA::statement::Tile<1, RAJA::tile_fixed, RAJA::hip_block_x_direct_unchecked, + RAJA::statement::Tile<0, RAJA::tile_fixed, RAJA::hip_block_y_direct_unchecked, RAJA::statement::InitLocalMem, diff --git a/test/functional/launch/CMakeLists.txt b/test/functional/launch/CMakeLists.txt index 25a0bf9379..b6cd1b1755 100644 --- a/test/functional/launch/CMakeLists.txt +++ b/test/functional/launch/CMakeLists.txt @@ -6,7 +6,7 @@ ############################################################################### list(APPEND LAUNCH_BACKENDS Sequential) -set(LAUNCH_UNCHECKED_BACKENDS "") +set(LAUNCH_DIRECT_UNCHECKED_BACKENDS "") if(RAJA_ENABLE_OPENMP) list(APPEND LAUNCH_BACKENDS OpenMP) @@ -14,12 +14,12 @@ endif() if(RAJA_ENABLE_CUDA) list(APPEND LAUNCH_BACKENDS Cuda) - list(APPEND LAUNCH_UNCHECKED_BACKENDS Cuda) + list(APPEND LAUNCH_DIRECT_UNCHECKED_BACKENDS Cuda) endif() if(RAJA_ENABLE_HIP) list(APPEND LAUNCH_BACKENDS Hip) - list(APPEND LAUNCH_UNCHECKED_BACKENDS Hip) + list(APPEND LAUNCH_DIRECT_UNCHECKED_BACKENDS Hip) endif() if(RAJA_ENABLE_SYCL) @@ -39,24 +39,24 @@ add_subdirectory(segment) add_subdirectory(shared_mem) -add_subdirectory(nested_unchecked) +add_subdirectory(nested_direct_unchecked) add_subdirectory(nested_direct) add_subdirectory(nested_loop) -add_subdirectory(tile_icount_tcount_unchecked) +add_subdirectory(tile_icount_tcount_direct_unchecked) add_subdirectory(tile_icount_tcount_direct) add_subdirectory(tile_icount_tcount_loop) -add_subdirectory(nested_tile_unchecked) +add_subdirectory(nested_tile_direct_unchecked) add_subdirectory(nested_tile_direct) add_subdirectory(nested_tile_loop) unset( LAUNCH_BACKENDS ) -unset( LAUNCH_UNCHECKED_BACKENDS ) +unset( LAUNCH_DIRECT_UNCHECKED_BACKENDS ) diff --git a/test/functional/launch/nested_tile_unchecked/CMakeLists.txt b/test/functional/launch/nested_tile_unchecked/CMakeLists.txt index 6152dfa2ad..ef87cf263c 100644 --- a/test/functional/launch/nested_tile_unchecked/CMakeLists.txt +++ b/test/functional/launch/nested_tile_unchecked/CMakeLists.txt @@ -10,13 +10,13 @@ # # -foreach( BACKEND ${LAUNCH_UNCHECKED_BACKENDS} ) - configure_file( test-launch-nested-tile-unchecked.cpp.in - test-launch-nested-Tile-Unchecked-${BACKEND}.cpp ) - raja_add_test( NAME test-launch-nested-Tile-Unchecked-${BACKEND} - SOURCES ${CMAKE_CURRENT_BINARY_DIR}/test-launch-nested-Tile-Unchecked-${BACKEND}.cpp ) +foreach( BACKEND ${LAUNCH_DIRECT_UNCHECKED_BACKENDS} ) + configure_file( test-launch-nested-tile-direct-unchecked.cpp.in + test-launch-nested-Tile-DirectUnchecked-${BACKEND}.cpp ) + raja_add_test( NAME test-launch-nested-Tile-DirectUnchecked-${BACKEND} + SOURCES ${CMAKE_CURRENT_BINARY_DIR}/test-launch-nested-Tile-DirectUnchecked-${BACKEND}.cpp ) - target_include_directories(test-launch-nested-Tile-Unchecked-${BACKEND}.exe + target_include_directories(test-launch-nested-Tile-DirectUnchecked-${BACKEND}.exe PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/tests) endforeach() diff --git a/test/functional/launch/nested_tile_unchecked/test-launch-nested-tile-unchecked.cpp.in b/test/functional/launch/nested_tile_unchecked/test-launch-nested-tile-unchecked.cpp.in index 85595ac970..8239f01441 100644 --- a/test/functional/launch/nested_tile_unchecked/test-launch-nested-tile-unchecked.cpp.in +++ b/test/functional/launch/nested_tile_unchecked/test-launch-nested-tile-unchecked.cpp.in @@ -13,14 +13,14 @@ #include "RAJA_test-index-types.hpp" #include "RAJA_test-forall-data.hpp" -#include "RAJA_test-launch-unchecked-teams-threads-3D-execpol.hpp" +#include "RAJA_test-launch-direct-unchecked-teams-threads-3D-execpol.hpp" // // Header for tests in ./tests directory // // Note: CMake adds ./tests as an include dir for these tests. // -#include "test-launch-nested-Tile-Unchecked.hpp" +#include "test-launch-nested-Tile-DirectUnchecked.hpp" // @@ -35,5 +35,5 @@ using @BACKEND@LaunchNestedTypes = // Instantiate parameterized test // INSTANTIATE_TYPED_TEST_SUITE_P(@BACKEND@, - LaunchNestedTileUncheckedTest, + LaunchNestedTileDirectUncheckedTest, @BACKEND@LaunchNestedTypes); diff --git a/test/functional/launch/nested_tile_unchecked/tests/test-launch-nested-Tile-Unchecked.hpp b/test/functional/launch/nested_tile_unchecked/tests/test-launch-nested-Tile-Unchecked.hpp index c50a5fb267..3dcbea06e8 100644 --- a/test/functional/launch/nested_tile_unchecked/tests/test-launch-nested-Tile-Unchecked.hpp +++ b/test/functional/launch/nested_tile_unchecked/tests/test-launch-nested-Tile-Unchecked.hpp @@ -5,15 +5,15 @@ // SPDX-License-Identifier: (BSD-3-Clause) //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -#ifndef __TEST_LAUNCH_NESTED_TILE_UNCHECKED_HPP__ -#define __TEST_LAUNCH_NESTED_TILE_UNCHECKED_HPP__ +#ifndef __TEST_LAUNCH_NESTED_TILE_DIRECT_UNCHECKED_HPP__ +#define __TEST_LAUNCH_NESTED_TILE_DIRECT_UNCHECKED_HPP__ #include template -void LaunchNestedTileUncheckedTestImpl(INDEX_TYPE M) +void LaunchNestedTileDirectUncheckedTestImpl(INDEX_TYPE M) { const int tile_size_x = 2; @@ -104,14 +104,14 @@ void LaunchNestedTileUncheckedTestImpl(INDEX_TYPE M) } -TYPED_TEST_SUITE_P(LaunchNestedTileUncheckedTest); +TYPED_TEST_SUITE_P(LaunchNestedTileDirectUncheckedTest); template -class LaunchNestedTileUncheckedTest : public ::testing::Test +class LaunchNestedTileDirectUncheckedTest : public ::testing::Test { }; -TYPED_TEST_P(LaunchNestedTileUncheckedTest, RangeSegmentTeams) +TYPED_TEST_P(LaunchNestedTileDirectUncheckedTest, RangeSegmentTeams) { using INDEX_TYPE = typename camp::at>::type; @@ -128,13 +128,13 @@ TYPED_TEST_P(LaunchNestedTileUncheckedTest, RangeSegmentTeams) // test zero-length range segment - LaunchNestedTileUncheckedTestImpl (INDEX_TYPE(0)); - //Keep at one since we are doing a unchecked thread test - LaunchNestedTileUncheckedTestImpl (INDEX_TYPE(1)); @@ -142,7 +142,7 @@ TYPED_TEST_P(LaunchNestedTileUncheckedTest, RangeSegmentTeams) } -REGISTER_TYPED_TEST_SUITE_P(LaunchNestedTileUncheckedTest, +REGISTER_TYPED_TEST_SUITE_P(LaunchNestedTileDirectUncheckedTest, RangeSegmentTeams); -#endif // __TEST_LAUNCH_NESTED_TILE_UNCHECKED_HPP__ +#endif // __TEST_LAUNCH_NESTED_TILE_DIRECT_UNCHECKED_HPP__ diff --git a/test/functional/launch/nested_unchecked/CMakeLists.txt b/test/functional/launch/nested_unchecked/CMakeLists.txt index cb67616db9..cc9cd2a7cf 100644 --- a/test/functional/launch/nested_unchecked/CMakeLists.txt +++ b/test/functional/launch/nested_unchecked/CMakeLists.txt @@ -8,14 +8,14 @@ # # List of segment types for generating test files. # -set(NESTEDTYPES Unchecked) +set(NESTEDTYPES DirectUnchecked) # # Generate tests for each enabled RAJA back-end. # # -foreach( BACKEND ${LAUNCH_UNCHECKED_BACKENDS} ) +foreach( BACKEND ${LAUNCH_DIRECT_UNCHECKED_BACKENDS} ) foreach( NESTEDTYPES ${NESTEDTYPES} ) configure_file( test-launch-nested.cpp.in test-launch-nested-${NESTEDTYPES}-${BACKEND}.cpp ) diff --git a/test/functional/launch/nested_unchecked/test-launch-nested.cpp.in b/test/functional/launch/nested_unchecked/test-launch-nested.cpp.in index 08ec672089..066143508a 100644 --- a/test/functional/launch/nested_unchecked/test-launch-nested.cpp.in +++ b/test/functional/launch/nested_unchecked/test-launch-nested.cpp.in @@ -13,7 +13,7 @@ #include "RAJA_test-index-types.hpp" #include "RAJA_test-forall-data.hpp" -#include "RAJA_test-launch-unchecked-teams-threads-3D-execpol.hpp" +#include "RAJA_test-launch-direct-unchecked-teams-threads-3D-execpol.hpp" // // Header for tests in ./tests directory diff --git a/test/functional/launch/nested_unchecked/tests/test-launch-nested-Unchecked.hpp b/test/functional/launch/nested_unchecked/tests/test-launch-nested-Unchecked.hpp index ed2a096c39..49e799b2e3 100644 --- a/test/functional/launch/nested_unchecked/tests/test-launch-nested-Unchecked.hpp +++ b/test/functional/launch/nested_unchecked/tests/test-launch-nested-Unchecked.hpp @@ -5,15 +5,15 @@ // SPDX-License-Identifier: (BSD-3-Clause) //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -#ifndef __TEST_LAUNCH_NESTED_UNCHECKED_HPP__ -#define __TEST_LAUNCH_NESTED_UNCHECKED_HPP__ +#ifndef __TEST_LAUNCH_NESTED_DIRECT_UNCHECKED_HPP__ +#define __TEST_LAUNCH_NESTED_DIRECT_UNCHECKED_HPP__ #include template -void LaunchNestedUncheckedTestImpl(INDEX_TYPE M) +void LaunchNestedDirectUncheckedTestImpl(INDEX_TYPE M) { RAJA::TypedRangeSegment r1(0, 2*M); @@ -108,14 +108,14 @@ void LaunchNestedUncheckedTestImpl(INDEX_TYPE M) } -TYPED_TEST_SUITE_P(LaunchNestedUncheckedTest); +TYPED_TEST_SUITE_P(LaunchNestedDirectUncheckedTest); template -class LaunchNestedUncheckedTest : public ::testing::Test +class LaunchNestedDirectUncheckedTest : public ::testing::Test { }; -TYPED_TEST_P(LaunchNestedUncheckedTest, RangeSegmentTeams) +TYPED_TEST_P(LaunchNestedDirectUncheckedTest, RangeSegmentTeams) { using INDEX_TYPE = typename camp::at>::type; @@ -133,13 +133,13 @@ TYPED_TEST_P(LaunchNestedUncheckedTest, RangeSegmentTeams) // test zero-length range segment - LaunchNestedUncheckedTestImpl (INDEX_TYPE(0)); - //Keep at one since we are doing a unchecked thread test - LaunchNestedUncheckedTestImpl (INDEX_TYPE(1)); @@ -147,7 +147,7 @@ TYPED_TEST_P(LaunchNestedUncheckedTest, RangeSegmentTeams) } -REGISTER_TYPED_TEST_SUITE_P(LaunchNestedUncheckedTest, +REGISTER_TYPED_TEST_SUITE_P(LaunchNestedDirectUncheckedTest, RangeSegmentTeams); -#endif // __TEST_LAUNCH_NESTED_UNCHECKED_HPP__ +#endif // __TEST_LAUNCH_NESTED_DIRECT_UNCHECKED_HPP__ diff --git a/test/functional/launch/tile_icount_tcount_unchecked/CMakeLists.txt b/test/functional/launch/tile_icount_tcount_unchecked/CMakeLists.txt index cb01f0b926..b81a48b86e 100644 --- a/test/functional/launch/tile_icount_tcount_unchecked/CMakeLists.txt +++ b/test/functional/launch/tile_icount_tcount_unchecked/CMakeLists.txt @@ -10,13 +10,13 @@ # # -foreach( BACKEND ${LAUNCH_UNCHECKED_BACKENDS} ) - configure_file( test-launch-nested-tile-icount-tcount-unchecked.cpp.in - test-launch-nested-Tile-iCount-tCount-Unchecked-${BACKEND}.cpp ) - raja_add_test( NAME test-launch-nested-Tile-iCount-tCount-Unchecked-${BACKEND} - SOURCES ${CMAKE_CURRENT_BINARY_DIR}/test-launch-nested-Tile-iCount-tCount-Unchecked-${BACKEND}.cpp ) +foreach( BACKEND ${LAUNCH_DIRECT_UNCHECKED_BACKENDS} ) + configure_file( test-launch-nested-tile-icount-tcount-direct-unchecked.cpp.in + test-launch-nested-Tile-iCount-tCount-DirectUnchecked-${BACKEND}.cpp ) + raja_add_test( NAME test-launch-nested-Tile-iCount-tCount-DirectUnchecked-${BACKEND} + SOURCES ${CMAKE_CURRENT_BINARY_DIR}/test-launch-nested-Tile-iCount-tCount-DirectUnchecked-${BACKEND}.cpp ) - target_include_directories(test-launch-nested-Tile-iCount-tCount-Unchecked-${BACKEND}.exe + target_include_directories(test-launch-nested-Tile-iCount-tCount-DirectUnchecked-${BACKEND}.exe PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/tests) endforeach() diff --git a/test/functional/launch/tile_icount_tcount_unchecked/test-launch-nested-tile-icount-tcount-unchecked.cpp.in b/test/functional/launch/tile_icount_tcount_unchecked/test-launch-nested-tile-icount-tcount-unchecked.cpp.in index 3ddaeff554..eff627c848 100644 --- a/test/functional/launch/tile_icount_tcount_unchecked/test-launch-nested-tile-icount-tcount-unchecked.cpp.in +++ b/test/functional/launch/tile_icount_tcount_unchecked/test-launch-nested-tile-icount-tcount-unchecked.cpp.in @@ -13,14 +13,14 @@ #include "RAJA_test-index-types.hpp" #include "RAJA_test-forall-data.hpp" -#include "RAJA_test-launch-unchecked-teams-threads-1D-execpol.hpp" +#include "RAJA_test-launch-direct-unchecked-teams-threads-1D-execpol.hpp" // // Header for tests in ./tests directory // // Note: CMake adds ./tests as an include dir for these tests. // -#include "test-launch-nested-Tile-iCount-tCount-Unchecked.hpp" +#include "test-launch-nested-Tile-iCount-tCount-DirectUnchecked.hpp" // @@ -35,5 +35,5 @@ using @BACKEND@LaunchNestedTypes = // Instantiate parameterized test // INSTANTIATE_TYPED_TEST_SUITE_P(@BACKEND@, - LaunchNestedTileUncheckedTest, + LaunchNestedTileDirectUncheckedTest, @BACKEND@LaunchNestedTypes); diff --git a/test/functional/launch/tile_icount_tcount_unchecked/tests/test-launch-nested-Tile-iCount-tCount-Unchecked.hpp b/test/functional/launch/tile_icount_tcount_unchecked/tests/test-launch-nested-Tile-iCount-tCount-Unchecked.hpp index d9b19a83a7..0cfcb3121e 100644 --- a/test/functional/launch/tile_icount_tcount_unchecked/tests/test-launch-nested-Tile-iCount-tCount-Unchecked.hpp +++ b/test/functional/launch/tile_icount_tcount_unchecked/tests/test-launch-nested-Tile-iCount-tCount-Unchecked.hpp @@ -5,14 +5,14 @@ // SPDX-License-Identifier: (BSD-3-Clause) //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -#ifndef __TEST_LAUNCH_NESTED_TILE_ICOUNT_TCOUNT_UNCHECKED_HPP__ -#define __TEST_LAUNCH_NESTED_TILE_ICOUNT_TCOUNT_UNCHECKED_HPP__ +#ifndef __TEST_LAUNCH_NESTED_TILE_ICOUNT_TCOUNT_DIRECT_UNCHECKED_HPP__ +#define __TEST_LAUNCH_NESTED_TILE_ICOUNT_TCOUNT_DIRECT_UNCHECKED_HPP__ #include template -void LaunchNestedTileUncheckedTestImpl(INDEX_TYPE M) +void LaunchNestedTileDirectUncheckedTestImpl(INDEX_TYPE M) { constexpr int threads_x = 4; @@ -99,14 +99,14 @@ void LaunchNestedTileUncheckedTestImpl(INDEX_TYPE M) } -TYPED_TEST_SUITE_P(LaunchNestedTileUncheckedTest); +TYPED_TEST_SUITE_P(LaunchNestedTileDirectUncheckedTest); template -class LaunchNestedTileUncheckedTest : public ::testing::Test +class LaunchNestedTileDirectUncheckedTest : public ::testing::Test { }; -TYPED_TEST_P(LaunchNestedTileUncheckedTest, RangeSegmentTeams) +TYPED_TEST_P(LaunchNestedTileDirectUncheckedTest, RangeSegmentTeams) { using INDEX_TYPE = typename camp::at>::type; @@ -118,23 +118,23 @@ TYPED_TEST_P(LaunchNestedTileUncheckedTest, RangeSegmentTeams) // test zero-length range segment - LaunchNestedTileUncheckedTestImpl (INDEX_TYPE(0)); - //Keep at one since we are doing a unchecked thread test - LaunchNestedTileUncheckedTestImpl (INDEX_TYPE(1)); - LaunchNestedTileUncheckedTestImpl (INDEX_TYPE(2)); } -REGISTER_TYPED_TEST_SUITE_P(LaunchNestedTileUncheckedTest, +REGISTER_TYPED_TEST_SUITE_P(LaunchNestedTileDirectUncheckedTest, RangeSegmentTeams); -#endif // __TEST_LAUNCH_NESTED_TILE_ICOUNT_TCOUNT_UNCHECKED_HPP__ +#endif // __TEST_LAUNCH_NESTED_TILE_ICOUNT_TCOUNT_DIRECT_UNCHECKED_HPP__ diff --git a/test/include/RAJA_test-launch-direct-teams-threads-1D-execpol.hpp b/test/include/RAJA_test-launch-direct-teams-threads-1D-execpol.hpp index 5b756e00bf..da87899469 100644 --- a/test/include/RAJA_test-launch-direct-teams-threads-1D-execpol.hpp +++ b/test/include/RAJA_test-launch-direct-teams-threads-1D-execpol.hpp @@ -45,7 +45,7 @@ using OpenMP_launch_policies = camp::list; using cuda_direct_policies = camp::list< RAJA::LaunchPolicy>, - RAJA::LoopPolicy, + RAJA::LoopPolicy, RAJA::LoopPolicy >; @@ -68,7 +68,7 @@ using Cuda_launch_policies = using hip_direct_policies = camp::list< RAJA::LaunchPolicy>, - RAJA::LoopPolicy, + RAJA::LoopPolicy, RAJA::LoopPolicy >; diff --git a/test/include/RAJA_test-launch-unchecked-teams-threads-1D-execpol.hpp b/test/include/RAJA_test-launch-unchecked-teams-threads-1D-execpol.hpp index da7eac0553..dccb077a2b 100644 --- a/test/include/RAJA_test-launch-unchecked-teams-threads-1D-execpol.hpp +++ b/test/include/RAJA_test-launch-unchecked-teams-threads-1D-execpol.hpp @@ -9,8 +9,8 @@ // Execution policy lists used throughout launch tests // -#ifndef __RAJA_TEST_LAUNCH_UNCHECKED_TEAMS_THREADS_1D_EXECPOL_HPP__ -#define __RAJA_TEST_LAUNCH_UNCHECKED_TEAMS_THREADS_1D_EXECPOL_HPP__ +#ifndef __RAJA_TEST_LAUNCH_DIRECT_UNCHECKED_TEAMS_THREADS_1D_EXECPOL_HPP__ +#define __RAJA_TEST_LAUNCH_DIRECT_UNCHECKED_TEAMS_THREADS_1D_EXECPOL_HPP__ #include "RAJA/RAJA.hpp" #include "camp/list.hpp" @@ -18,39 +18,39 @@ #if defined(RAJA_ENABLE_CUDA) -using cuda_unchecked_policies = +using cuda_direct_unchecked_policies = camp::list< RAJA::LaunchPolicy>, - RAJA::LoopPolicy, - RAJA::LoopPolicy + RAJA::LoopPolicy, + RAJA::LoopPolicy >; -using cuda_unchecked_explicit_policies = +using cuda_direct_unchecked_explicit_policies = camp::list< RAJA::LaunchPolicy>, - RAJA::LoopPolicy, - RAJA::LoopPolicy + RAJA::LoopPolicy, + RAJA::LoopPolicy >; using Cuda_launch_policies = camp::list< - cuda_unchecked_policies, - cuda_unchecked_explicit_policies + cuda_direct_unchecked_policies, + cuda_direct_unchecked_explicit_policies >; #endif // RAJA_ENABLE_CUDA #if defined(RAJA_ENABLE_HIP) -using hip_unchecked_policies = +using hip_direct_unchecked_policies = camp::list< RAJA::LaunchPolicy>, - RAJA::LoopPolicy, - RAJA::LoopPolicy + RAJA::LoopPolicy, + RAJA::LoopPolicy >; -using Hip_launch_policies = camp::list; +using Hip_launch_policies = camp::list; #endif // RAJA_ENABLE_HIP -#endif // __RAJA_TEST_LAUNCH_UNCHECKED_TEAMS_THREADS_1D_EXECPOL_HPP__ +#endif // __RAJA_TEST_LAUNCH_DIRECT_UNCHECKED_TEAMS_THREADS_1D_EXECPOL_HPP__ diff --git a/test/include/RAJA_test-launch-unchecked-teams-threads-3D-execpol.hpp b/test/include/RAJA_test-launch-unchecked-teams-threads-3D-execpol.hpp index 59e16ab3e8..125e84dd49 100644 --- a/test/include/RAJA_test-launch-unchecked-teams-threads-3D-execpol.hpp +++ b/test/include/RAJA_test-launch-unchecked-teams-threads-3D-execpol.hpp @@ -9,60 +9,60 @@ // Execution policy lists used throughout launch tests // -#ifndef __RAJA_TEST_LAUNCH_UNCHECKED_TEAM_THREADS_3D_EXECPOL_HPP__ -#define __RAJA_TEST_LAUNCH_UNCHECKED_TEAM_THREADS_3D_EXECPOL_HPP__ +#ifndef __RAJA_TEST_LAUNCH_DIRECT_UNCHECKED_TEAM_THREADS_3D_EXECPOL_HPP__ +#define __RAJA_TEST_LAUNCH_DIRECT_UNCHECKED_TEAM_THREADS_3D_EXECPOL_HPP__ #include "RAJA/RAJA.hpp" #include "camp/list.hpp" #if defined(RAJA_ENABLE_CUDA) -using cuda_unchecked_policies = +using cuda_direct_unchecked_policies = camp::list< RAJA::LaunchPolicy>, - RAJA::LoopPolicy, - RAJA::LoopPolicy, - RAJA::LoopPolicy, - RAJA::LoopPolicy, - RAJA::LoopPolicy, - RAJA::LoopPolicy + RAJA::LoopPolicy, + RAJA::LoopPolicy, + RAJA::LoopPolicy, + RAJA::LoopPolicy, + RAJA::LoopPolicy, + RAJA::LoopPolicy >; -using cuda_unchecked_explicit_policies = +using cuda_direct_unchecked_explicit_policies = camp::list< RAJA::LaunchPolicy>, - RAJA::LoopPolicy, - RAJA::LoopPolicy, - RAJA::LoopPolicy, - RAJA::LoopPolicy, - RAJA::LoopPolicy, - RAJA::LoopPolicy + RAJA::LoopPolicy, + RAJA::LoopPolicy, + RAJA::LoopPolicy, + RAJA::LoopPolicy, + RAJA::LoopPolicy, + RAJA::LoopPolicy >; using Cuda_launch_policies = camp::list< - cuda_unchecked_policies, - cuda_unchecked_explicit_policies + cuda_direct_unchecked_policies, + cuda_direct_unchecked_explicit_policies >; #endif // RAJA_ENABLE_CUDA #if defined(RAJA_ENABLE_HIP) -using hip_unchecked_policies = +using hip_direct_unchecked_policies = camp::list< RAJA::LaunchPolicy>, - RAJA::LoopPolicy, - RAJA::LoopPolicy, - RAJA::LoopPolicy, - RAJA::LoopPolicy, - RAJA::LoopPolicy, - RAJA::LoopPolicy + RAJA::LoopPolicy, + RAJA::LoopPolicy, + RAJA::LoopPolicy, + RAJA::LoopPolicy, + RAJA::LoopPolicy, + RAJA::LoopPolicy >; -using Hip_launch_policies = camp::list; +using Hip_launch_policies = camp::list; #endif // RAJA_ENABLE_HIP -#endif //__RAJA_TEST_LAUNCH_UNCHECKED_TEAM_THREADS_3D_EXECPOL_HPP__ +#endif //__RAJA_TEST_LAUNCH_DIRECT_UNCHECKED_TEAM_THREADS_3D_EXECPOL_HPP__