Skip to content

Commit

Permalink
Merge branch 'develop' into feature/bowen/add-clang-format-v2
Browse files Browse the repository at this point in the history
  • Loading branch information
johnbowen42 committed Dec 12, 2024
2 parents f279264 + 0e40820 commit 11cc30b
Show file tree
Hide file tree
Showing 30 changed files with 208 additions and 209 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ endif()

if(RAJA_ENABLE_HIP)
message(STATUS "HIP version: ${hip_VERSION}")
set(RAJA_HIP_WAVESIZE "64" CACHE STRING "Set the wave size for GPU architecture. E.g. MI200/MI300 this is 64.")
if("${hip_VERSION}" VERSION_LESS "3.5")
message(FATAL_ERROR "Trying to use HIP/ROCm version ${hip_VERSION}. RAJA requires HIP/ROCm version 3.5 or newer. ")
endif()
Expand Down
33 changes: 15 additions & 18 deletions examples/dynamic-forall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,28 +77,25 @@ int main(int argc, char *argv[])

//----------------------------------------------------------------------------//

std::cout << "\n Running C-style vector addition...\n";

// _cstyle_vector_add_start
for (int i = 0; i < N; ++i) {
c[i] = a[i] + b[i];
}
// _cstyle_vector_add_end

checkResult(c, N);
//printResult(c, N);


//----------------------------------------------------------------------------//
// Example of dynamic policy selection for forall
//----------------------------------------------------------------------------//
std::cout << "\n Running dynamic forall vector addition and reductions...\n";

int sum = 0;
using VAL_INT_SUM = RAJA::expt::ValOp<int, RAJA::operators::plus>;

RAJA::RangeSegment range(0, N);

//policy is chosen from the list
RAJA::expt::dynamic_forall<policy_list>(pol, RAJA::RangeSegment(0, N), [=] RAJA_HOST_DEVICE (int i) {
RAJA::dynamic_forall<policy_list>(pol, range,
RAJA::expt::Reduce<RAJA::operators::plus>(&sum),
RAJA::expt::KernelName("RAJA dynamic forall"),
[=] RAJA_HOST_DEVICE (int i, VAL_INT_SUM &_sum) {

c[i] = a[i] + b[i];
_sum += 1;
});
// _rajaseq_vector_add_end

std::cout << "Sum = " << sum << ", expected sum: " << N << std::endl;
checkResult(c, N);
//printResult(c, N);

Expand Down Expand Up @@ -126,9 +123,9 @@ void checkResult(int* res, int len)
if ( res[i] != 0 ) { correct = false; }
}
if ( correct ) {
std::cout << "\n\t result -- PASS\n";
std::cout << "\n\t Vector sum result -- PASS\n";
} else {
std::cout << "\n\t result -- FAIL\n";
std::cout << "\n\t Vector sum result -- FAIL\n";
}
}

Expand Down
2 changes: 1 addition & 1 deletion examples/memoryManager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ void deallocate(T *&ptr)
hipErrchk(hipMalloc((void **)&ptr, sizeof(T) * size));
#elif defined(RAJA_ENABLE_SYCL)
auto qu = sycl_res->get<camp::resources::Sycl>().get_queue();
ptr = cl::sycl::malloc_device<T>(size, *qu);
ptr = ::sycl::malloc_device<T>(size, *qu);
#endif
return ptr;
}
Expand Down
2 changes: 1 addition & 1 deletion examples/resource-dynamic-forall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ int main(int argc, char *argv[])
RAJA::resources::Resource res = RAJA::Get_Host_Resource(host_res, select_cpu_or_gpu);
#endif

RAJA::expt::dynamic_forall<policy_list>
RAJA::dynamic_forall<policy_list>
(res, pol, RAJA::RangeSegment(0, N), [=] RAJA_HOST_DEVICE (int i) {

c[i] = a[i] + b[i];
Expand Down
2 changes: 1 addition & 1 deletion exercises/memoryManager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ void deallocate(T *&ptr)
hipErrchk(hipMalloc((void **)&ptr, sizeof(T) * size));
#elif defined(RAJA_ENABLE_SYCL)
auto qu = sycl_res->get<camp::resources::Sycl>().get_queue();
ptr = cl::sycl::malloc_device<T>(size, *qu);
ptr = ::sycl::malloc_device<T>(size, *qu);
#endif
return ptr;
}
Expand Down
2 changes: 2 additions & 0 deletions include/RAJA/config.hpp.in
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,8 @@ static_assert(RAJA_HAS_SOME_CXX14,
#cmakedefine RAJA_ENABLE_NV_TOOLS_EXT
#cmakedefine RAJA_ENABLE_ROCTX

#cmakedefine RAJA_HIP_WAVESIZE @RAJA_HIP_WAVESIZE@

/*!
******************************************************************************
*
Expand Down
143 changes: 69 additions & 74 deletions include/RAJA/pattern/forall.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -647,104 +647,99 @@ RAJA_INLINE camp::resources::EventProxy<Res> CallForallIcount::operator()(T cons
// - Returns a generic event proxy only if a resource is provided
// avoids overhead of constructing a typed erased resource
//
namespace expt
template<camp::idx_t IDX, typename POLICY_LIST>
struct dynamic_helper
{

template<camp::idx_t IDX, typename POLICY_LIST>
struct dynamic_helper
template<typename SEGMENT, typename... PARAMS>
static void invoke_forall(const int pol, SEGMENT const &seg, PARAMS&&... params)
{
template<typename SEGMENT, typename BODY>
static void invoke_forall(const int pol, SEGMENT const &seg, BODY const &body)
{
if(IDX==pol){
using t_pol = typename camp::at<POLICY_LIST,camp::num<IDX>>::type;
RAJA::forall<t_pol>(seg, body);
return;
}
dynamic_helper<IDX-1, POLICY_LIST>::invoke_forall(pol, seg, body);
}

template<typename SEGMENT, typename BODY>
static resources::EventProxy<resources::Resource>
invoke_forall(RAJA::resources::Resource r, const int pol, SEGMENT const &seg, BODY const &body)
{

if(IDX==pol){
using t_pol = typename camp::at<POLICY_LIST,camp::num<IDX>>::type;
using resource_type = typename resources::get_resource<t_pol>::type;

if(IDX==pol){
RAJA::forall<t_pol>(r.get<resource_type>(), seg, body);

//Return a generic event proxy from r,
//because forall returns a typed event proxy
return {r};
}

return dynamic_helper<IDX-1, POLICY_LIST>::invoke_forall(r, pol, seg, body);
RAJA::forall<t_pol>(seg, params...);
return;
}
dynamic_helper<IDX-1, POLICY_LIST>::invoke_forall(pol, seg, params...);
}

};

template<typename POLICY_LIST>
struct dynamic_helper<0, POLICY_LIST>
template<typename SEGMENT, typename... PARAMS>
static resources::EventProxy<resources::Resource>
invoke_forall(RAJA::resources::Resource r, const int pol, SEGMENT const &seg, PARAMS&&... params)
{
template<typename SEGMENT, typename BODY>
static void
invoke_forall(const int pol, SEGMENT const &seg, BODY const &body)
{
if(0==pol){
using t_pol = typename camp::at<POLICY_LIST,camp::num<0>>::type;
RAJA::forall<t_pol>(seg, body);
return;
}
RAJA_ABORT_OR_THROW("Policy enum not supported ");
}

template<typename SEGMENT, typename BODY>
static resources::EventProxy<resources::Resource>
invoke_forall(RAJA::resources::Resource r, const int pol, SEGMENT const &seg, BODY const &body)
{
if(pol != 0) RAJA_ABORT_OR_THROW("Policy value out of range ");

using t_pol = typename camp::at<POLICY_LIST,camp::num<0>>::type;
using resource_type = typename resources::get_resource<t_pol>::type;
using t_pol = typename camp::at<POLICY_LIST,camp::num<IDX>>::type;
using resource_type = typename resources::get_resource<t_pol>::type;

RAJA::forall<t_pol>(r.get<resource_type>(), seg, body);
if(IDX==pol){
RAJA::forall<t_pol>(r.get<resource_type>(), seg, params...);

//Return a generic event proxy from r,
//because forall returns a typed event proxy
return {r};
}

};
return dynamic_helper<IDX-1, POLICY_LIST>::invoke_forall(r, pol, seg, params...);
}

template<typename POLICY_LIST, typename SEGMENT, typename BODY>
void dynamic_forall(const int pol, SEGMENT const &seg, BODY const &body)
{
constexpr int N = camp::size<POLICY_LIST>::value;
static_assert(N > 0, "RAJA policy list must not be empty");
};

if(pol > N-1) {
RAJA_ABORT_OR_THROW("Policy enum not supported");
template<typename POLICY_LIST>
struct dynamic_helper<0, POLICY_LIST>
{
template<typename SEGMENT, typename... PARAMS>
static void
invoke_forall(const int pol, SEGMENT const &seg, PARAMS&&... params)
{
if(0==pol){
using t_pol = typename camp::at<POLICY_LIST,camp::num<0>>::type;
RAJA::forall<t_pol>(seg, params...);
return;
}
dynamic_helper<N-1, POLICY_LIST>::invoke_forall(pol, seg, body);
RAJA_ABORT_OR_THROW("Policy enum not supported ");
}

template<typename POLICY_LIST, typename SEGMENT, typename BODY>
resources::EventProxy<resources::Resource>
dynamic_forall(RAJA::resources::Resource r, const int pol, SEGMENT const &seg, BODY const &body)
template<typename SEGMENT, typename... PARAMS>
static resources::EventProxy<resources::Resource>
invoke_forall(RAJA::resources::Resource r, const int pol, SEGMENT const &seg, PARAMS&&... params)
{
constexpr int N = camp::size<POLICY_LIST>::value;
static_assert(N > 0, "RAJA policy list must not be empty");
if(pol != 0) RAJA_ABORT_OR_THROW("Policy value out of range ");

if(pol > N-1) {
RAJA_ABORT_OR_THROW("Policy value out of range");
}
using t_pol = typename camp::at<POLICY_LIST,camp::num<0>>::type;
using resource_type = typename resources::get_resource<t_pol>::type;

RAJA::forall<t_pol>(r.get<resource_type>(), seg, params...);

//Return a generic event proxy from r,
//because forall returns a typed event proxy
return {r};
}

};

return dynamic_helper<N-1, POLICY_LIST>::invoke_forall(r, pol, seg, body);
template<typename POLICY_LIST, typename SEGMENT, typename... PARAMS>
void dynamic_forall(const int pol, SEGMENT const &seg, PARAMS&&... params)
{
constexpr int N = camp::size<POLICY_LIST>::value;
static_assert(N > 0, "RAJA policy list must not be empty");

if(pol > N-1) {
RAJA_ABORT_OR_THROW("Policy enum not supported");
}
dynamic_helper<N-1, POLICY_LIST>::invoke_forall(pol, seg, params...);
}

template<typename POLICY_LIST, typename SEGMENT, typename... PARAMS>
resources::EventProxy<resources::Resource>
dynamic_forall(RAJA::resources::Resource r, const int pol, SEGMENT const &seg, PARAMS&&... params)
{
constexpr int N = camp::size<POLICY_LIST>::value;
static_assert(N > 0, "RAJA policy list must not be empty");

if(pol > N-1) {
RAJA_ABORT_OR_THROW("Policy value out of range");
}

} // namespace expt
return dynamic_helper<N-1, POLICY_LIST>::invoke_forall(r, pol, seg, params...);
}


} // namespace RAJA
Expand Down
2 changes: 1 addition & 1 deletion include/RAJA/pattern/launch/launch_core.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ class LaunchContext
void *shared_mem_ptr;

#if defined(RAJA_ENABLE_SYCL)
mutable cl::sycl::nd_item<3> *itm;
mutable ::sycl::nd_item<3> *itm;
#endif

RAJA_HOST_DEVICE LaunchContext()
Expand Down
5 changes: 3 additions & 2 deletions include/RAJA/policy/hip/policy.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -324,8 +324,9 @@ struct DeviceConstants
// values for HIP warp size and max block size.
//
#if defined(__HIP_PLATFORM_AMD__)
constexpr DeviceConstants device_constants(64, 1024, 64); // MI300A
// constexpr DeviceConstants device_constants(64, 1024, 128); // MI250X
constexpr DeviceConstants device_constants(RAJA_HIP_WAVESIZE, 1024, 64); // MI300A
// constexpr DeviceConstants device_constants(RAJA_HIP_WAVESIZE, 1024, 128); // MI250X

#elif defined(__HIP_PLATFORM_NVIDIA__)
constexpr DeviceConstants device_constants(32, 1024, 32); // V100
#endif
Expand Down
4 changes: 2 additions & 2 deletions include/RAJA/policy/sycl/MemUtils_SYCL.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ namespace detail
struct syclInfo {
sycl_dim_t gridDim{0};
sycl_dim_t blockDim{0};
cl::sycl::queue qu = cl::sycl::queue();
::sycl::queue qu = ::sycl::queue();
bool setup_reducers = false;
#if defined(RAJA_ENABLE_OPENMP)
syclInfo* thread_states = nullptr;
Expand All @@ -62,7 +62,7 @@ extern syclInfo g_status;

extern syclInfo tl_status;

extern std::unordered_map<cl::sycl::queue, bool> g_queue_info_map;
extern std::unordered_map<::sycl::queue, bool> g_queue_info_map;

} // namespace detail

Expand Down
4 changes: 2 additions & 2 deletions include/RAJA/policy/sycl/forall.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -206,8 +206,8 @@ resources::EventProxy<resources::Sycl> forall_impl(resources::Sycl &sycl_res,
}).wait(); // Need to wait for completion to free memory

// Free our device memory
cl::sycl::free(lbody, *q);
cl::sycl::free(beg, *q);
::sycl::free(lbody, *q);
::sycl::free(beg, *q);

RAJA_FT_END;
}
Expand Down
2 changes: 1 addition & 1 deletion include/RAJA/policy/sycl/kernel/Conditional.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ struct SyclStatementExecutor<Data,
static
inline
RAJA_DEVICE
void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active)
void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active)
{
if (Conditional::eval(data)) {

Expand Down
14 changes: 7 additions & 7 deletions include/RAJA/policy/sycl/kernel/For.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ struct SyclStatementExecutor<
using diff_t = segment_diff_type<ArgumentId, Data>;

static
inline RAJA_DEVICE void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active)
inline RAJA_DEVICE void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active)
{
auto len = segment_length<ArgumentId>(data);
auto i = item.get_global_id(Dim);
Expand Down Expand Up @@ -124,7 +124,7 @@ struct SyclStatementExecutor<
using diff_t = segment_diff_type<ArgumentId, Data>;

static
inline RAJA_DEVICE void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active)
inline RAJA_DEVICE void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active)
{
auto len = segment_length<ArgumentId>(data);
auto i = item.get_group(Dim);
Expand Down Expand Up @@ -187,7 +187,7 @@ struct SyclStatementExecutor<
using diff_t = segment_diff_type<ArgumentId, Data>;

static
inline RAJA_DEVICE void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active)
inline RAJA_DEVICE void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active)
{
auto len = segment_length<ArgumentId>(data);
auto i0 = item.get_group(Dim);
Expand Down Expand Up @@ -253,7 +253,7 @@ struct SyclStatementExecutor<
using diff_t = segment_diff_type<ArgumentId, Data>;

static
inline RAJA_DEVICE void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active)
inline RAJA_DEVICE void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active)
{
auto len = segment_length<ArgumentId>(data);
auto i = item.get_local_id(Dim);
Expand Down Expand Up @@ -317,7 +317,7 @@ struct SyclStatementExecutor<
using diff_t = segment_diff_type<ArgumentId, Data>;

static
inline RAJA_DEVICE void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active)
inline RAJA_DEVICE void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active)
{
auto len = segment_length<ArgumentId>(data);
auto i0 = item.get_local_id(Dim);
Expand Down Expand Up @@ -393,7 +393,7 @@ struct SyclStatementExecutor<
using diff_t = segment_diff_type<ArgumentId, Data>;

static
inline RAJA_DEVICE void exec(Data &data, cl::sycl::nd_item<3> item)
inline RAJA_DEVICE void exec(Data &data, ::sycl::nd_item<3> item)
{
auto len = segment_length<ArgumentId>(data);
auto i = item.get_global_id(0);
Expand Down Expand Up @@ -454,7 +454,7 @@ struct SyclStatementExecutor<
static
inline
RAJA_DEVICE
void exec(Data &data, cl::sycl::nd_item<3> item, bool thread_active)
void exec(Data &data, ::sycl::nd_item<3> item, bool thread_active)
{

using idx_type = camp::decay<decltype(camp::get<ArgumentId>(data.offset_tuple))>;
Expand Down
Loading

0 comments on commit 11cc30b

Please sign in to comment.