Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

generic: sycl: fix accessor types #2189

Merged
merged 1 commit into from
Nov 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/gpu/generic/sycl/binary_kernels.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ struct binary_kernel_vec_t {
: conf_(conf)
, src0_(CTX_IN_SYCL_KERNEL_MEMORY(DNNL_ARG_SRC_0))
, src1_(CTX_IN_SYCL_KERNEL_MEMORY(DNNL_ARG_SRC_1))
, dst_(CTX_OUT_SYCL_KERNEL_MEMORY(DNNL_ARG_DST))
, dst_(CTX_INOUT_SYCL_KERNEL_MEMORY(DNNL_ARG_DST))
, src0_scale_(CTX_IN_SYCL_KERNEL_MEMORY(
DNNL_ARG_ATTR_SCALES | DNNL_ARG_SRC_0))
, src1_scale_(CTX_IN_SYCL_KERNEL_MEMORY(
Expand Down Expand Up @@ -200,7 +200,7 @@ struct binary_kernel_vec_t {

xpu::sycl::in_memory_arg_t src0_;
xpu::sycl::in_memory_arg_t src1_;
xpu::sycl::out_memory_arg_t dst_;
xpu::sycl::inout_memory_arg_t dst_;
xpu::sycl::in_memory_arg_t src0_scale_;
xpu::sycl::in_memory_arg_t src1_scale_;
data_type_t scales_dt_;
Expand Down
8 changes: 4 additions & 4 deletions src/gpu/generic/sycl/convolution_kernels.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ struct convolution_kernel_fwd_t {
, data_(CTX_IN_SYCL_KERNEL_MEMORY(DNNL_ARG_SRC_0))
, weights_(CTX_IN_SYCL_KERNEL_MEMORY(DNNL_ARG_WEIGHTS))
, bias_(CTX_IN_SYCL_KERNEL_MEMORY(DNNL_ARG_BIAS))
, dst_(CTX_OUT_SYCL_KERNEL_MEMORY(DNNL_ARG_DST))
, dst_(CTX_INOUT_SYCL_KERNEL_MEMORY(DNNL_ARG_DST))
, data_scale_(CTX_IN_SYCL_KERNEL_MEMORY(
DNNL_ARG_ATTR_SCALES | DNNL_ARG_SRC_0))
, weights_scale_(CTX_IN_SYCL_KERNEL_MEMORY(
Expand Down Expand Up @@ -244,7 +244,7 @@ struct convolution_kernel_fwd_t {
xpu::sycl::in_memory_arg_t data_;
xpu::sycl::in_memory_arg_t weights_;
xpu::sycl::in_memory_arg_t bias_;
xpu::sycl::out_memory_arg_t dst_;
xpu::sycl::inout_memory_arg_t dst_;
xpu::sycl::in_memory_arg_t data_scale_;
xpu::sycl::in_memory_arg_t weights_scale_;
xpu::sycl::in_memory_arg_t dst_scale_;
Expand All @@ -262,7 +262,7 @@ struct convolution_kernel_bwd_data_t {
convolution_kernel_bwd_data_t(const sycl_convolution_conf_t &conf,
::sycl::handler &cgh, const exec_ctx_t &ctx)
: conf_(conf)
, diff_data_(CTX_OUT_SYCL_KERNEL_MEMORY(DNNL_ARG_DIFF_SRC))
, diff_data_(CTX_INOUT_SYCL_KERNEL_MEMORY(DNNL_ARG_DIFF_SRC))
, weights_(CTX_IN_SYCL_KERNEL_MEMORY(DNNL_ARG_WEIGHTS))
, bias_(CTX_IN_SYCL_KERNEL_MEMORY(DNNL_ARG_BIAS))
, diff_dst_(CTX_IN_SYCL_KERNEL_MEMORY(DNNL_ARG_DIFF_DST))
Expand Down Expand Up @@ -473,7 +473,7 @@ struct convolution_kernel_bwd_data_t {

sycl_convolution_conf_t conf_;

xpu::sycl::out_memory_arg_t diff_data_;
xpu::sycl::inout_memory_arg_t diff_data_;
xpu::sycl::in_memory_arg_t weights_;
xpu::sycl::in_memory_arg_t bias_;
xpu::sycl::in_memory_arg_t diff_dst_;
Expand Down
4 changes: 2 additions & 2 deletions src/gpu/generic/sycl/eltwise_kernels.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ struct eltwise_fwd_kernel_vec_t {
: conf_(conf)
, src_(CTX_IN_SYCL_KERNEL_MEMORY(DNNL_ARG_SRC))
, po_args_(cgh, ctx)
, dst_(CTX_OUT_SYCL_KERNEL_MEMORY(DNNL_ARG_DST)) {}
, dst_(CTX_INOUT_SYCL_KERNEL_MEMORY(DNNL_ARG_DST)) {}

void operator()(::sycl::nd_item<1> item) const {
memory_tensor_t src_mem(src_, conf_.src_md);
Expand Down Expand Up @@ -204,7 +204,7 @@ struct eltwise_fwd_kernel_vec_t {
sycl_eltwise_conf_t conf_;
xpu::sycl::in_memory_arg_t src_;
post_op_input_args po_args_;
xpu::sycl::out_memory_arg_t dst_;
xpu::sycl::inout_memory_arg_t dst_;
};

struct eltwise_bwd_kernel_vec_t {
Expand Down
14 changes: 7 additions & 7 deletions src/gpu/generic/sycl/matmul_kernels.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ struct matmul_kernel_fwd_t {
}

static void store_vec_helper(
out_memory_tensor_t &output, Vec data, int offset) {
inout_memory_tensor_t &output, Vec data, int offset) {
data_type_t type = output.md().data_type();
char *offset_ptr = static_cast<char *>(output.ptr())
+ data_type_size(type) * offset;
Expand Down Expand Up @@ -189,7 +189,7 @@ struct matmul_kernel_fwd_t {
}
}

void store(out_memory_tensor_t &output, int offset, int row_stride) {
void store(inout_memory_tensor_t &output, int offset, int row_stride) {
for (int row = 0; row < Rows; row++) {
for (int col = 0; col < Cols / vec_len; col++) {
store_vec_helper(output, data[row][col],
Expand All @@ -198,8 +198,8 @@ struct matmul_kernel_fwd_t {
}
}

void store_edge(out_memory_tensor_t &output, int offset, int row_stride,
int rows, int cols) {
void store_edge(inout_memory_tensor_t &output, int offset,
int row_stride, int rows, int cols) {
for (int row = 0; row < rows; row++) {
int col;
for (col = 0; col < cols / vec_len; col++) {
Expand All @@ -215,7 +215,7 @@ struct matmul_kernel_fwd_t {
}
}

void store_generic(out_memory_tensor_t &output, int offset,
void store_generic(inout_memory_tensor_t &output, int offset,
int row_stride, bool transpose, bool is_edge_block, int rows,
int cols) {
if (is_edge_block) {
Expand Down Expand Up @@ -331,7 +331,7 @@ struct matmul_kernel_fwd_t {
, data_(CTX_IN_SYCL_KERNEL_MEMORY(DNNL_ARG_SRC_0))
, weights_(CTX_IN_SYCL_KERNEL_MEMORY(DNNL_ARG_WEIGHTS))
, bias_(CTX_IN_SYCL_KERNEL_MEMORY(DNNL_ARG_BIAS))
, dst_(CTX_OUT_SYCL_KERNEL_MEMORY(DNNL_ARG_DST))
, dst_(CTX_INOUT_SYCL_KERNEL_MEMORY(DNNL_ARG_DST))
, data_scale_(CTX_IN_SYCL_KERNEL_MEMORY(
DNNL_ARG_ATTR_SCALES | DNNL_ARG_SRC_0))
, data_scales_dt_((conf_.do_scale_data)
Expand Down Expand Up @@ -618,7 +618,7 @@ struct matmul_kernel_fwd_t {
xpu::sycl::in_memory_arg_t data_;
xpu::sycl::in_memory_arg_t weights_;
xpu::sycl::in_memory_arg_t bias_;
xpu::sycl::out_memory_arg_t dst_;
xpu::sycl::inout_memory_arg_t dst_;
xpu::sycl::in_memory_arg_t data_scale_;
data_type_t data_scales_dt_;
xpu::sycl::in_memory_arg_t weights_scale_;
Expand Down
1 change: 1 addition & 0 deletions src/gpu/generic/sycl/ref_pooling.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ struct ref_pooling_fwd_t : public gpu::generic::sycl::primitive_t {
src_md(0)->data_type != dst_md(0)->data_type,
desc()->prop_kind == forward_inference))
&& attr()->has_default_values(sm::post_ops)
&& sycl_post_ops_t::post_ops_ok(attr(), true, false)
&& attr_.set_default_formats(dst_md(0)) == status::success
&& md_dims_in_range(src_md());
if (!ok) return status::unimplemented;
Expand Down
1 change: 1 addition & 0 deletions src/gpu/generic/sycl/ref_resampling.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ struct ref_resampling_fwd_t : public gpu::generic::sycl::primitive_t {
&& utils::one_of(
dst_md(0)->data_type, f32, bf16, f16, s32, s8, u8)
&& attr()->has_default_values(sm::post_ops)
&& sycl_post_ops_t::post_ops_ok(attr())
&& set_default_params() == status::success
&& attr_.set_default_formats(dst_md(0)) == status::success
&& (src_md(0)->format_desc.blocking.inner_nblks == 0)
Expand Down
4 changes: 2 additions & 2 deletions src/gpu/generic/sycl/reorder_kernels.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ struct reorder_kernel_t {
const exec_ctx_t &ctx)
: conf_(conf)
, src_(CTX_IN_SYCL_KERNEL_MEMORY(DNNL_ARG_SRC_0))
, dst_(CTX_OUT_SYCL_KERNEL_MEMORY(DNNL_ARG_DST))
, dst_(CTX_INOUT_SYCL_KERNEL_MEMORY(DNNL_ARG_DST))
, src_scale_(CTX_IN_SYCL_KERNEL_MEMORY(
DNNL_ARG_ATTR_SCALES | DNNL_ARG_SRC_0))
, dst_scale_(CTX_IN_SYCL_KERNEL_MEMORY(
Expand Down Expand Up @@ -153,7 +153,7 @@ struct reorder_kernel_t {
sycl_reorder_conf_t conf_;

xpu::sycl::in_memory_arg_t src_;
xpu::sycl::out_memory_arg_t dst_;
xpu::sycl::inout_memory_arg_t dst_;
xpu::sycl::in_memory_arg_t src_scale_;
xpu::sycl::in_memory_arg_t dst_scale_;
data_type_t scales_src_dt_;
Expand Down
4 changes: 2 additions & 2 deletions src/gpu/generic/sycl/resampling_kernels.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ struct resampling_kernel_fwd_vec_t {
::sycl::handler &cgh, const exec_ctx_t &ctx)
: conf_(conf)
, src_(CTX_IN_SYCL_KERNEL_MEMORY(DNNL_ARG_SRC))
, dst_(CTX_OUT_SYCL_KERNEL_MEMORY(DNNL_ARG_DST))
, dst_(CTX_INOUT_SYCL_KERNEL_MEMORY(DNNL_ARG_DST))
, po_args_(cgh, ctx) {}

void operator()(::sycl::nd_item<1> item) const {
Expand Down Expand Up @@ -142,7 +142,7 @@ struct resampling_kernel_fwd_vec_t {
sycl_resampling_conf_t conf_;

xpu::sycl::in_memory_arg_t src_;
xpu::sycl::out_memory_arg_t dst_;
xpu::sycl::inout_memory_arg_t dst_;
post_op_input_args po_args_;
};

Expand Down
4 changes: 2 additions & 2 deletions src/gpu/generic/sycl/softmax_kernels.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ struct softmax_fwd_kernel_vec_t {
DNNL_ARG_ATTR_SCALES | DNNL_ARG_SRC))
, scale_dst_(CTX_IN_SYCL_KERNEL_MEMORY(
DNNL_ARG_ATTR_SCALES | DNNL_ARG_DST))
, dst_(CTX_OUT_SYCL_KERNEL_MEMORY(DNNL_ARG_DST))
, dst_(CTX_INOUT_SYCL_KERNEL_MEMORY(DNNL_ARG_DST))
, po_args_(cgh, ctx) {}

void operator()(::sycl::nd_item<1> item) const {
Expand Down Expand Up @@ -150,7 +150,7 @@ struct softmax_fwd_kernel_vec_t {
xpu::sycl::in_memory_arg_t src_;
xpu::sycl::in_memory_arg_t scale_src_;
xpu::sycl::in_memory_arg_t scale_dst_;
xpu::sycl::out_memory_arg_t dst_;
xpu::sycl::inout_memory_arg_t dst_;
post_op_input_args po_args_;
};

Expand Down
16 changes: 8 additions & 8 deletions src/gpu/generic/sycl/sycl_post_ops.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ struct ref_sum_op_t {
ref_sum_op_t(float scale, float zeropoint)
: scale_(scale), zeropoint_(zeropoint) {}

float load_and_compute(float acc, const xpu::sycl::out_memory_arg_t &dst,
float load_and_compute(float acc, const xpu::sycl::inout_memory_arg_t &dst,
dnnl::impl::data_type_t sum_dt_,
dim_t offset) const { // TODO dims32_t
memory_plain_t dst_mem(dst, sum_dt_);
Expand Down Expand Up @@ -265,14 +265,14 @@ struct sycl_post_ops_t {
n_post_ops_ = attr_po.len();
}

inline float apply(float acc, const xpu::sycl::out_memory_arg_t &dst,
inline float apply(float acc, const xpu::sycl::inout_memory_arg_t &dst,
dim_t dst_offset, const post_op_input_args &po_args,
dims_t src_offset) const;
inline float apply(float acc, float dst, const post_op_input_args &po_args,
dims_t src_offset) const;
inline float apply(float acc, const post_op_input_args &po_args,
dims_t src_offset) const;
inline float apply(float acc, const xpu::sycl::out_memory_arg_t &dst,
inline float apply(float acc, const xpu::sycl::inout_memory_arg_t &dst,
dim_t dst_offset) const;

inline int get_post_op() const { return n_post_ops_; }
Expand Down Expand Up @@ -311,9 +311,9 @@ struct post_op_input_args {
xpu::sycl::in_memory_arg_t args_[sycl_post_ops_t::max_post_ops];
};

float sycl_post_ops_t::apply(float acc, const xpu::sycl::out_memory_arg_t &dst,
dim_t dst_offset, const post_op_input_args &po_args,
dims_t src_offset) const {
float sycl_post_ops_t::apply(float acc,
const xpu::sycl::inout_memory_arg_t &dst, dim_t dst_offset,
const post_op_input_args &po_args, dims_t src_offset) const {
using namespace primitive_kind;

for (auto i = 0; i < n_post_ops_; ++i) {
Expand Down Expand Up @@ -368,8 +368,8 @@ float sycl_post_ops_t::apply(
return acc;
}

float sycl_post_ops_t::apply(float acc, const xpu::sycl::out_memory_arg_t &dst,
dim_t dst_offset) const {
float sycl_post_ops_t::apply(float acc,
const xpu::sycl::inout_memory_arg_t &dst, dim_t dst_offset) const {
using namespace primitive_kind;

for (auto i = 0; i < n_post_ops_; ++i) {
Expand Down
8 changes: 8 additions & 0 deletions src/xpu/sycl/types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,14 @@ namespace sycl {
&CTX_OUT_STORAGE(arg)) \
->get_out_memory_arg(ctx.stream(), cgh)

#define CTX_INOUT_SYCL_KERNEL_MEMORY(arg) \
CTX_OUT_STORAGE(arg).is_null() \
? xpu::sycl::memory_storage_base_t::empty_inout_memory_arg( \
ctx.stream(), cgh) \
: utils::downcast<const xpu::sycl::memory_storage_base_t *>( \
&CTX_OUT_STORAGE(arg)) \
->get_inout_memory_arg(ctx.stream(), cgh)

#define CHECK_SYCL_KERNEL_ARG_TYPE(type) \
static_assert(::sycl::is_device_copyable_v<type>)

Expand Down