Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

kernel reduction api #1523

Draft
wants to merge 2 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
# SPDX-License-Identifier: (BSD-3-Clause)
###############################################################################

raja_add_executable(
NAME kernel-param-reductions
SOURCES kernel-param-reductions.cpp)

raja_add_executable(
NAME tut_launch_basic
SOURCES tut_launch_basic.cpp)
Expand Down
99 changes: 99 additions & 0 deletions examples/kernel-param-reductions.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#include "RAJA/RAJA.hpp"

#include "memoryManager.hpp"

int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[]))
{

std::cout << "\n\nRAJA kernel reductions example...\n";

constexpr int N = 100;
constexpr int Nsq = N * N;

using DATA_TYPE = double;
DATA_TYPE* a = memoryManager::allocate<double>(Nsq);

using OUTER_LOOP_EXEC = RAJA::omp_parallel_for_exec;
using REDUCE_POL = RAJA::omp_reduce;

// Populate arr with values, calculate basic sum solution.

double a_red_sol = 0;
for (int i=0; i < Nsq; i++){
a[i] = i * 0.1;
a_red_sol += a[i];
}

using EXEC_POL =
RAJA::KernelPolicy<
RAJA::statement::For<1, OUTER_LOOP_EXEC,
RAJA::statement::For<0, RAJA::loop_exec,
RAJA::statement::Lambda<0>
>
>
>;

//Current Implementation
{
std::cout << "\n\n RAJA::ReduceSum example...\n";

RAJA::ReduceSum<REDUCE_POL, DATA_TYPE> work_sum(0);
DATA_TYPE race_sum(0);

RAJA::kernel<EXEC_POL>(
RAJA::make_tuple(
RAJA::TypedRangeSegment<int>(0, N),
RAJA::TypedRangeSegment<int>(0, N)
),

[=, &race_sum](int i, int j) {
work_sum += a[i * N + j];
race_sum += a[i * N + j];
}
);

std::cout << "Seq Solution : " << a_red_sol << "\n";
std::cout << "ReduceSum Solution : "<< work_sum.get() << "\n";
std::cout << "Race Sum Solution : "<< race_sum << "\n";
}

//Param Implementation
{
std::cout << "\n\n RAJA::expt::Reduce example...\n";

RAJA::ReduceSum<REDUCE_POL, DATA_TYPE> work_sum(0);
DATA_TYPE race_sum(0);
DATA_TYPE expt_sum(0);

//using EXPT_REDUCE = RAJA::expt::Reduce< RAJA::operators::plus<DATA_TYPE> >;

RAJA::kernel_param<EXEC_POL>(
RAJA::make_tuple(
RAJA::TypedRangeSegment<int>(0, N),
RAJA::TypedRangeSegment<int>(0, N)
),

RAJA::make_tuple(
DATA_TYPE(0),
//EXPT_REDUCE(&expt_sum)
RAJA::expt::Reduce< RAJA::operators::plus >(&expt_sum)
),

[=, &race_sum](int i, int j
, DATA_TYPE& r //) {
, DATA_TYPE& r2 ) {
//) {
work_sum += a[i * N + j];
race_sum += a[i * N + j];
//r += a[i * N + j];
}
);

std::cout << "Seq Solution : " << a_red_sol << "\n";
std::cout << "ReduceSum Solution : "<< work_sum.get() << "\n";
std::cout << "Race Sum Solution : "<< race_sum << "\n";
std::cout << "Expt Sun Solution : "<< expt_sum << "\n";
}

return EXIT_SUCCESS;
}
70 changes: 65 additions & 5 deletions include/RAJA/pattern/kernel/Lambda.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -246,21 +246,81 @@ struct LambdaArgSwitchboard<Types, LambdaArg<lambda_arg_seg_t, id>>

};

//
//////////////////////////////////////////////////////////////////////
//
// Lambda Parameter interpretation, we need to find a way to treat
// Executable Parameter arguments differently. Such as expt::Reduce...
//
//////////////////////////////////////////////////////////////////////
//
using ExecParamBase = ::RAJA::expt::detail::ForallParamBase;

template<typename T, bool>
struct ParamReturnHelper{};

template<typename T>
struct ParamReturnHelper<T, true>
{
using type = typename std::add_lvalue_reference<typename camp::decay<T>::value_type>::type;
};

template<typename T>
struct ParamReturnHelper<T, false>
{
using type = typename std::add_lvalue_reference<T>::type;

};

template<typename T>
using ParamReturnType = typename ParamReturnHelper<T, std::is_convertible<T, ExecParamBase>::value>::type;

template<typename Types, camp::idx_t id>
struct LambdaArgSwitchboard<Types, LambdaArg<lambda_arg_param_t, id>>
{

private:

template<typename T>
RAJA_HOST_DEVICE
RAJA_INLINE
constexpr
static
::camp::concepts::enable_if_t<ParamReturnType<T>,
::camp::concepts::negate<std::is_convertible<T, ExecParamBase>>>
param_helper(T&& t)
{
return t;
}

template<typename T>
RAJA_HOST_DEVICE
RAJA_INLINE
constexpr
static
::camp::concepts::enable_if_t<ParamReturnType<T>,
std::is_convertible<T, ExecParamBase>>
param_helper(T&& t)
{
// This is just for expt::reduce, will need more intelligent logic for technical parameter types.
return t.val;
}

public:
template<typename Data>
RAJA_HOST_DEVICE
RAJA_INLINE
constexpr
static auto extract(Data &&data)->
typename std::add_lvalue_reference<camp::tuple_element_t<id,typename camp::decay<Data>::param_tuple_t>>::type
static auto& extract(Data &&data)//->
//typename std::add_lvalue_reference<camp::tuple_element_t<id,typename camp::decay<Data>::param_tuple_t>>::type
{
return camp::get<id>(data.param_tuple);
//return camp::get<id>(data.param_tuple);
return param_helper(camp::get<id>(data.param_tuple));
}
};



template<typename Types, typename T, camp::idx_t value>
struct LambdaArgSwitchboard<Types, LambdaArg<lambda_arg_value_t<T>, value>>
{
Expand All @@ -277,11 +337,11 @@ struct LambdaArgSwitchboard<Types, LambdaArg<lambda_arg_value_t<T>, value>>


RAJA_SUPPRESS_HD_WARN
template<camp::idx_t LoopIndex, typename Types, typename Data, typename... targLists>
template<camp::idx_t LambdaIndex, typename Types, typename Data, typename... targLists>
RAJA_INLINE RAJA_HOST_DEVICE void invoke_lambda_with_args(Data &&data,
camp::list<targLists...> const &)
{
camp::get<LoopIndex>(data.bodies)(
camp::get<LambdaIndex>(data.bodies)(
LambdaArgSwitchboard<Types, targLists>::extract(data)...);
}

Expand Down