Skip to content

Commit

Permalink
generic: sycl: fix accessor types
Browse files Browse the repository at this point in the history
  • Loading branch information
t4c1 authored and sgeor255 committed Oct 29, 2024
1 parent 188ae7f commit b4fa638
Show file tree
Hide file tree
Showing 11 changed files with 39 additions and 29 deletions.
4 changes: 2 additions & 2 deletions src/gpu/generic/sycl/binary_kernels.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ struct binary_kernel_vec_t {
: conf_(conf)
, src0_(CTX_IN_SYCL_KERNEL_MEMORY(DNNL_ARG_SRC_0))
, src1_(CTX_IN_SYCL_KERNEL_MEMORY(DNNL_ARG_SRC_1))
, dst_(CTX_OUT_SYCL_KERNEL_MEMORY(DNNL_ARG_DST))
, dst_(CTX_INOUT_SYCL_KERNEL_MEMORY(DNNL_ARG_DST))
, src0_scale_(CTX_IN_SYCL_KERNEL_MEMORY(
DNNL_ARG_ATTR_SCALES | DNNL_ARG_SRC_0))
, src1_scale_(CTX_IN_SYCL_KERNEL_MEMORY(
Expand Down Expand Up @@ -200,7 +200,7 @@ struct binary_kernel_vec_t {

xpu::sycl::in_memory_arg_t src0_;
xpu::sycl::in_memory_arg_t src1_;
xpu::sycl::out_memory_arg_t dst_;
xpu::sycl::inout_memory_arg_t dst_;
xpu::sycl::in_memory_arg_t src0_scale_;
xpu::sycl::in_memory_arg_t src1_scale_;
data_type_t scales_dt_;
Expand Down
8 changes: 4 additions & 4 deletions src/gpu/generic/sycl/convolution_kernels.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ struct convolution_kernel_fwd_t {
, data_(CTX_IN_SYCL_KERNEL_MEMORY(DNNL_ARG_SRC_0))
, weights_(CTX_IN_SYCL_KERNEL_MEMORY(DNNL_ARG_WEIGHTS))
, bias_(CTX_IN_SYCL_KERNEL_MEMORY(DNNL_ARG_BIAS))
, dst_(CTX_OUT_SYCL_KERNEL_MEMORY(DNNL_ARG_DST))
, dst_(CTX_INOUT_SYCL_KERNEL_MEMORY(DNNL_ARG_DST))
, data_scale_(CTX_IN_SYCL_KERNEL_MEMORY(
DNNL_ARG_ATTR_SCALES | DNNL_ARG_SRC_0))
, weights_scale_(CTX_IN_SYCL_KERNEL_MEMORY(
Expand Down Expand Up @@ -244,7 +244,7 @@ struct convolution_kernel_fwd_t {
xpu::sycl::in_memory_arg_t data_;
xpu::sycl::in_memory_arg_t weights_;
xpu::sycl::in_memory_arg_t bias_;
xpu::sycl::out_memory_arg_t dst_;
xpu::sycl::inout_memory_arg_t dst_;
xpu::sycl::in_memory_arg_t data_scale_;
xpu::sycl::in_memory_arg_t weights_scale_;
xpu::sycl::in_memory_arg_t dst_scale_;
Expand All @@ -262,7 +262,7 @@ struct convolution_kernel_bwd_data_t {
convolution_kernel_bwd_data_t(const sycl_convolution_conf_t &conf,
::sycl::handler &cgh, const exec_ctx_t &ctx)
: conf_(conf)
, diff_data_(CTX_OUT_SYCL_KERNEL_MEMORY(DNNL_ARG_DIFF_SRC))
, diff_data_(CTX_INOUT_SYCL_KERNEL_MEMORY(DNNL_ARG_DIFF_SRC))
, weights_(CTX_IN_SYCL_KERNEL_MEMORY(DNNL_ARG_WEIGHTS))
, bias_(CTX_IN_SYCL_KERNEL_MEMORY(DNNL_ARG_BIAS))
, diff_dst_(CTX_IN_SYCL_KERNEL_MEMORY(DNNL_ARG_DIFF_DST))
Expand Down Expand Up @@ -473,7 +473,7 @@ struct convolution_kernel_bwd_data_t {

sycl_convolution_conf_t conf_;

xpu::sycl::out_memory_arg_t diff_data_;
xpu::sycl::inout_memory_arg_t diff_data_;
xpu::sycl::in_memory_arg_t weights_;
xpu::sycl::in_memory_arg_t bias_;
xpu::sycl::in_memory_arg_t diff_dst_;
Expand Down
4 changes: 2 additions & 2 deletions src/gpu/generic/sycl/eltwise_kernels.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ struct eltwise_fwd_kernel_vec_t {
: conf_(conf)
, src_(CTX_IN_SYCL_KERNEL_MEMORY(DNNL_ARG_SRC))
, po_args_(cgh, ctx)
, dst_(CTX_OUT_SYCL_KERNEL_MEMORY(DNNL_ARG_DST)) {}
, dst_(CTX_INOUT_SYCL_KERNEL_MEMORY(DNNL_ARG_DST)) {}

void operator()(::sycl::nd_item<1> item) const {
memory_tensor_t src_mem(src_, conf_.src_md);
Expand Down Expand Up @@ -204,7 +204,7 @@ struct eltwise_fwd_kernel_vec_t {
sycl_eltwise_conf_t conf_;
xpu::sycl::in_memory_arg_t src_;
post_op_input_args po_args_;
xpu::sycl::out_memory_arg_t dst_;
xpu::sycl::inout_memory_arg_t dst_;
};

struct eltwise_bwd_kernel_vec_t {
Expand Down
14 changes: 7 additions & 7 deletions src/gpu/generic/sycl/matmul_kernels.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ struct matmul_kernel_fwd_t {
}

static void store_vec_helper(
out_memory_tensor_t &output, Vec data, int offset) {
inout_memory_tensor_t &output, Vec data, int offset) {
data_type_t type = output.md().data_type();
char *offset_ptr = static_cast<char *>(output.ptr())
+ data_type_size(type) * offset;
Expand Down Expand Up @@ -189,7 +189,7 @@ struct matmul_kernel_fwd_t {
}
}

void store(out_memory_tensor_t &output, int offset, int row_stride) {
void store(inout_memory_tensor_t &output, int offset, int row_stride) {
for (int row = 0; row < Rows; row++) {
for (int col = 0; col < Cols / vec_len; col++) {
store_vec_helper(output, data[row][col],
Expand All @@ -198,8 +198,8 @@ struct matmul_kernel_fwd_t {
}
}

void store_edge(out_memory_tensor_t &output, int offset, int row_stride,
int rows, int cols) {
void store_edge(inout_memory_tensor_t &output, int offset,
int row_stride, int rows, int cols) {
for (int row = 0; row < rows; row++) {
int col;
for (col = 0; col < cols / vec_len; col++) {
Expand All @@ -215,7 +215,7 @@ struct matmul_kernel_fwd_t {
}
}

void store_generic(out_memory_tensor_t &output, int offset,
void store_generic(inout_memory_tensor_t &output, int offset,
int row_stride, bool transpose, bool is_edge_block, int rows,
int cols) {
if (is_edge_block) {
Expand Down Expand Up @@ -331,7 +331,7 @@ struct matmul_kernel_fwd_t {
, data_(CTX_IN_SYCL_KERNEL_MEMORY(DNNL_ARG_SRC_0))
, weights_(CTX_IN_SYCL_KERNEL_MEMORY(DNNL_ARG_WEIGHTS))
, bias_(CTX_IN_SYCL_KERNEL_MEMORY(DNNL_ARG_BIAS))
, dst_(CTX_OUT_SYCL_KERNEL_MEMORY(DNNL_ARG_DST))
, dst_(CTX_INOUT_SYCL_KERNEL_MEMORY(DNNL_ARG_DST))
, data_scale_(CTX_IN_SYCL_KERNEL_MEMORY(
DNNL_ARG_ATTR_SCALES | DNNL_ARG_SRC_0))
, data_scales_dt_((conf_.do_scale_data)
Expand Down Expand Up @@ -618,7 +618,7 @@ struct matmul_kernel_fwd_t {
xpu::sycl::in_memory_arg_t data_;
xpu::sycl::in_memory_arg_t weights_;
xpu::sycl::in_memory_arg_t bias_;
xpu::sycl::out_memory_arg_t dst_;
xpu::sycl::inout_memory_arg_t dst_;
xpu::sycl::in_memory_arg_t data_scale_;
data_type_t data_scales_dt_;
xpu::sycl::in_memory_arg_t weights_scale_;
Expand Down
1 change: 1 addition & 0 deletions src/gpu/generic/sycl/ref_pooling.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ struct ref_pooling_fwd_t : public gpu::generic::sycl::primitive_t {
src_md(0)->data_type != dst_md(0)->data_type,
desc()->prop_kind == forward_inference))
&& attr()->has_default_values(sm::post_ops)
&& sycl_post_ops_t::post_ops_ok(attr(), true, false)
&& attr_.set_default_formats(dst_md(0)) == status::success
&& md_dims_in_range(src_md());
if (!ok) return status::unimplemented;
Expand Down
1 change: 1 addition & 0 deletions src/gpu/generic/sycl/ref_resampling.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ struct ref_resampling_fwd_t : public gpu::generic::sycl::primitive_t {
&& utils::one_of(
dst_md(0)->data_type, f32, bf16, f16, s32, s8, u8)
&& attr()->has_default_values(sm::post_ops)
&& sycl_post_ops_t::post_ops_ok(attr())
&& set_default_params() == status::success
&& attr_.set_default_formats(dst_md(0)) == status::success
&& (src_md(0)->format_desc.blocking.inner_nblks == 0)
Expand Down
4 changes: 2 additions & 2 deletions src/gpu/generic/sycl/reorder_kernels.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ struct reorder_kernel_t {
const exec_ctx_t &ctx)
: conf_(conf)
, src_(CTX_IN_SYCL_KERNEL_MEMORY(DNNL_ARG_SRC_0))
, dst_(CTX_OUT_SYCL_KERNEL_MEMORY(DNNL_ARG_DST))
, dst_(CTX_INOUT_SYCL_KERNEL_MEMORY(DNNL_ARG_DST))
, src_scale_(CTX_IN_SYCL_KERNEL_MEMORY(
DNNL_ARG_ATTR_SCALES | DNNL_ARG_SRC_0))
, dst_scale_(CTX_IN_SYCL_KERNEL_MEMORY(
Expand Down Expand Up @@ -153,7 +153,7 @@ struct reorder_kernel_t {
sycl_reorder_conf_t conf_;

xpu::sycl::in_memory_arg_t src_;
xpu::sycl::out_memory_arg_t dst_;
xpu::sycl::inout_memory_arg_t dst_;
xpu::sycl::in_memory_arg_t src_scale_;
xpu::sycl::in_memory_arg_t dst_scale_;
data_type_t scales_src_dt_;
Expand Down
4 changes: 2 additions & 2 deletions src/gpu/generic/sycl/resampling_kernels.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ struct resampling_kernel_fwd_vec_t {
::sycl::handler &cgh, const exec_ctx_t &ctx)
: conf_(conf)
, src_(CTX_IN_SYCL_KERNEL_MEMORY(DNNL_ARG_SRC))
, dst_(CTX_OUT_SYCL_KERNEL_MEMORY(DNNL_ARG_DST))
, dst_(CTX_INOUT_SYCL_KERNEL_MEMORY(DNNL_ARG_DST))
, po_args_(cgh, ctx) {}

void operator()(::sycl::nd_item<1> item) const {
Expand Down Expand Up @@ -142,7 +142,7 @@ struct resampling_kernel_fwd_vec_t {
sycl_resampling_conf_t conf_;

xpu::sycl::in_memory_arg_t src_;
xpu::sycl::out_memory_arg_t dst_;
xpu::sycl::inout_memory_arg_t dst_;
post_op_input_args po_args_;
};

Expand Down
4 changes: 2 additions & 2 deletions src/gpu/generic/sycl/softmax_kernels.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ struct softmax_fwd_kernel_vec_t {
DNNL_ARG_ATTR_SCALES | DNNL_ARG_SRC))
, scale_dst_(CTX_IN_SYCL_KERNEL_MEMORY(
DNNL_ARG_ATTR_SCALES | DNNL_ARG_DST))
, dst_(CTX_OUT_SYCL_KERNEL_MEMORY(DNNL_ARG_DST))
, dst_(CTX_INOUT_SYCL_KERNEL_MEMORY(DNNL_ARG_DST))
, po_args_(cgh, ctx) {}

void operator()(::sycl::nd_item<1> item) const {
Expand Down Expand Up @@ -150,7 +150,7 @@ struct softmax_fwd_kernel_vec_t {
xpu::sycl::in_memory_arg_t src_;
xpu::sycl::in_memory_arg_t scale_src_;
xpu::sycl::in_memory_arg_t scale_dst_;
xpu::sycl::out_memory_arg_t dst_;
xpu::sycl::inout_memory_arg_t dst_;
post_op_input_args po_args_;
};

Expand Down
16 changes: 8 additions & 8 deletions src/gpu/generic/sycl/sycl_post_ops.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ struct ref_sum_op_t {
ref_sum_op_t(float scale, float zeropoint)
: scale_(scale), zeropoint_(zeropoint) {}

float load_and_compute(float acc, const xpu::sycl::out_memory_arg_t &dst,
float load_and_compute(float acc, const xpu::sycl::inout_memory_arg_t &dst,
dnnl::impl::data_type_t sum_dt_,
dim_t offset) const { // TODO dims32_t
memory_plain_t dst_mem(dst, sum_dt_);
Expand Down Expand Up @@ -265,14 +265,14 @@ struct sycl_post_ops_t {
n_post_ops_ = attr_po.len();
}

inline float apply(float acc, const xpu::sycl::out_memory_arg_t &dst,
inline float apply(float acc, const xpu::sycl::inout_memory_arg_t &dst,
dim_t dst_offset, const post_op_input_args &po_args,
dims_t src_offset) const;
inline float apply(float acc, float dst, const post_op_input_args &po_args,
dims_t src_offset) const;
inline float apply(float acc, const post_op_input_args &po_args,
dims_t src_offset) const;
inline float apply(float acc, const xpu::sycl::out_memory_arg_t &dst,
inline float apply(float acc, const xpu::sycl::inout_memory_arg_t &dst,
dim_t dst_offset) const;

inline int get_post_op() const { return n_post_ops_; }
Expand Down Expand Up @@ -311,9 +311,9 @@ struct post_op_input_args {
xpu::sycl::in_memory_arg_t args_[sycl_post_ops_t::max_post_ops];
};

float sycl_post_ops_t::apply(float acc, const xpu::sycl::out_memory_arg_t &dst,
dim_t dst_offset, const post_op_input_args &po_args,
dims_t src_offset) const {
float sycl_post_ops_t::apply(float acc,
const xpu::sycl::inout_memory_arg_t &dst, dim_t dst_offset,
const post_op_input_args &po_args, dims_t src_offset) const {
using namespace primitive_kind;

for (auto i = 0; i < n_post_ops_; ++i) {
Expand Down Expand Up @@ -368,8 +368,8 @@ float sycl_post_ops_t::apply(
return acc;
}

float sycl_post_ops_t::apply(float acc, const xpu::sycl::out_memory_arg_t &dst,
dim_t dst_offset) const {
float sycl_post_ops_t::apply(float acc,
const xpu::sycl::inout_memory_arg_t &dst, dim_t dst_offset) const {
using namespace primitive_kind;

for (auto i = 0; i < n_post_ops_; ++i) {
Expand Down
8 changes: 8 additions & 0 deletions src/xpu/sycl/types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,14 @@ namespace sycl {
&CTX_OUT_STORAGE(arg)) \
->get_out_memory_arg(ctx.stream(), cgh)

#define CTX_INOUT_SYCL_KERNEL_MEMORY(arg) \
CTX_OUT_STORAGE(arg).is_null() \
? xpu::sycl::memory_storage_base_t::empty_inout_memory_arg( \
ctx.stream(), cgh) \
: utils::downcast<const xpu::sycl::memory_storage_base_t *>( \
&CTX_OUT_STORAGE(arg)) \
->get_inout_memory_arg(ctx.stream(), cgh)

#define CHECK_SYCL_KERNEL_ARG_TYPE(type) \
static_assert(::sycl::is_device_copyable_v<type>)

Expand Down

0 comments on commit b4fa638

Please sign in to comment.