Skip to content

Commit

Permalink
new intrinsic: sub group clustered broadcast
Browse files Browse the repository at this point in the history
Adds new intrinsic: sub group clustered broadcast.
Initial implementation has a set of restrictions:
 * Supports only cluster size 8 or 16.
 * Supports only constant cluster size and cluster line.
  • Loading branch information
pkwasnie-intel authored and igcbot committed Nov 28, 2024
1 parent 4ebc428 commit 0ae060e
Show file tree
Hide file tree
Showing 13 changed files with 274 additions and 0 deletions.
9 changes: 9 additions & 0 deletions IGC/BiFModule/Implementation/IGCBiF_Intrinsics.cl
Original file line number Diff line number Diff line change
Expand Up @@ -571,6 +571,15 @@ half __builtin_IB_simd_broadcast_h( half, uint );
double __builtin_IB_simd_broadcast_df( double, uint );
void __builtin_IB_sub_group_barrier();

// SubGroup clustered broadcast - for internal use
uint __builtin_IB_simd_clustered_broadcast( uint, uint, uint );
bool __builtin_IB_simd_clustered_broadcast_b( bool, uint, uint );
uchar __builtin_IB_simd_clustered_broadcast_c( uchar, uint, uint );
ushort __builtin_IB_simd_clustered_broadcast_us( ushort, uint, uint );
float __builtin_IB_simd_clustered_broadcast_f( float, uint, uint );
half __builtin_IB_simd_clustered_broadcast_h( half, uint, uint );
double __builtin_IB_simd_clustered_broadcast_df( double, uint, uint );

// Block read : global address space
uint __builtin_IB_simd_block_read_1_global( const __global uint* );
uint2 __builtin_IB_simd_block_read_2_global( const __global uint* );
Expand Down
1 change: 1 addition & 0 deletions IGC/Compiler/CISACodeGen/CodeSinking.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2489,6 +2489,7 @@ namespace IGC {
// Wave intrinsics
case GenISAIntrinsic::GenISA_WaveShuffleIndex:
case GenISAIntrinsic::GenISA_WaveBroadcast:
case GenISAIntrinsic::GenISA_WaveClusteredBroadcast:
case GenISAIntrinsic::GenISA_WaveBallot:
case GenISAIntrinsic::GenISA_WaveInverseBallot:
case GenISAIntrinsic::GenISA_WaveAll:
Expand Down
45 changes: 45 additions & 0 deletions IGC/Compiler/CISACodeGen/EmitVISAPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5732,6 +5732,48 @@ void EmitPass::emitSimdShuffle(llvm::Instruction* inst)
}
}

void EmitPass::emitSimdClusteredBroadcast(llvm::Instruction* inst)
{
CVariable* data = GetSymbol(inst->getOperand(0));

// If input is uniform, just copy to all lanes.
if (data->IsUniform())
{
m_encoder->Copy(m_destination, data);
if (!m_destination->IsUniform() && m_currShader->m_numberInstance > 1)
{
m_encoder->SetSecondHalf(true);
m_encoder->Copy(m_destination, data);
m_encoder->SetSecondHalf(false);
}
m_encoder->Push();
return;
}

IGC_ASSERT_MESSAGE(!m_destination->IsUniform(), "Unsupported: dst must be non-uniform");

IGC_ASSERT_MESSAGE(isa<llvm::ConstantInt>(inst->getOperand(1)), "Unsupported: cluster size must be constant");
const unsigned int clusterSize = int_cast<uint32_t>(cast<llvm::ConstantInt>(inst->getOperand(1))->getZExtValue());

IGC_ASSERT_MESSAGE(isa<llvm::ConstantInt>(inst->getOperand(2)), "Unsupported: cluster lane must be constant");
const unsigned int clusterLane = int_cast<uint32_t>(cast<llvm::ConstantInt>(inst->getOperand(2))->getZExtValue());

IGC_ASSERT_MESSAGE(clusterSize < numLanes(m_currShader->m_dispatchSize), "cluster size must be smaller than SIMD");
IGC_ASSERT_MESSAGE(clusterSize == 8 || clusterSize == 16, "cluster size must be 8 or 16");
IGC_ASSERT_MESSAGE(clusterLane < clusterSize, "cluster lane does not fit in cluster size");

m_encoder->SetSrcRegion(0, clusterSize, clusterSize, 0);
m_encoder->SetSrcSubReg(0, clusterLane);
m_encoder->Copy(m_destination, data);
if (m_currShader->m_numberInstance > 1)
{
m_encoder->SetSecondHalf(true);
m_encoder->Copy(m_destination, data);
m_encoder->SetSecondHalf(false);
}
m_encoder->Push();
}

void EmitPass::emitSimdShuffleDown(llvm::Instruction* inst)
{
CVariable* pCurrentData = GetSymbol(inst->getOperand(0));
Expand Down Expand Up @@ -9133,6 +9175,9 @@ void EmitPass::EmitGenIntrinsicMessage(llvm::GenIntrinsicInst* inst)
case GenISAIntrinsic::GenISA_WaveBroadcast:
emitSimdShuffle(inst);
break;
case GenISAIntrinsic::GenISA_WaveClusteredBroadcast:
emitSimdClusteredBroadcast(inst);
break;
case GenISAIntrinsic::GenISA_WavePrefix:
emitWavePrefix(cast<WavePrefixIntrinsic>(inst));
break;
Expand Down
1 change: 1 addition & 0 deletions IGC/Compiler/CISACodeGen/EmitVISAPass.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,7 @@ class EmitPass : public llvm::FunctionPass
void emitSimdLaneIdReplicate(llvm::Instruction* inst);
void emitSimdSize(llvm::Instruction* inst);
void emitSimdShuffle(llvm::Instruction* inst);
void emitSimdClusteredBroadcast(llvm::Instruction* inst);
void emitCrossInstanceMov(const SSource& source, const DstModifier& modifier);
void emitSimdShuffleDown(llvm::Instruction* inst);
void emitSimdShuffleXor(llvm::Instruction* inst);
Expand Down
4 changes: 4 additions & 0 deletions IGC/Compiler/CISACodeGen/PromoteInt8Type.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1113,6 +1113,7 @@ void PromoteInt8Type::promoteIntrinsic()
continue;
if (GII->isGenIntrinsic(GenISAIntrinsic::GenISA_WaveShuffleIndex) ||
GII->isGenIntrinsic(GenISAIntrinsic::GenISA_WaveBroadcast) ||
GII->isGenIntrinsic(GenISAIntrinsic::GenISA_WaveClusteredBroadcast) ||
GII->isGenIntrinsic(GenISAIntrinsic::GenISA_simdShuffleDown))
{
// Those are mov insts. Need to promote if its operand is
Expand Down Expand Up @@ -1166,6 +1167,7 @@ void PromoteInt8Type::promoteIntrinsic()
gid == GenISAIntrinsic::GenISA_QuadPrefix ||
gid == GenISAIntrinsic::GenISA_WaveShuffleIndex ||
gid == GenISAIntrinsic::GenISA_WaveBroadcast ||
gid == GenISAIntrinsic::GenISA_WaveClusteredBroadcast ||
gid == GenISAIntrinsic::GenISA_simdShuffleDown)
{
//
Expand Down Expand Up @@ -1204,10 +1206,12 @@ void PromoteInt8Type::promoteIntrinsic()
}
case GenISAIntrinsic::GenISA_WaveClustered:
case GenISAIntrinsic::GenISA_WaveInterleave:
case GenISAIntrinsic::GenISA_WaveClusteredBroadcast:
{
// prototype:
// Ty <clustered> (Ty, char, int, int)
// Ty <interleave> (Ty, char, int, int)
// Ty <clusteredbroadcast> (Ty, int, int, int)
iArgs.push_back(GII->getArgOperand(1));
iArgs.push_back(GII->getArgOperand(2));
iArgs.push_back(GII->getArgOperand(3));
Expand Down
1 change: 1 addition & 0 deletions IGC/Compiler/CISACodeGen/WIAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1411,6 +1411,7 @@ WIAnalysis::WIDependancy WIAnalysisRunner::calculate_dep(const CallInst* inst)
intrinsic_name == llvm_cycleCounter ||
intrinsic_name == llvm_waveShuffleIndex ||
intrinsic_name == llvm_waveBroadcast ||
intrinsic_name == llvm_waveClusteredBroadcast ||
intrinsic_name == llvm_waveBallot ||
intrinsic_name == llvm_waveAll ||
intrinsic_name == llvm_waveClustered ||
Expand Down
2 changes: 2 additions & 0 deletions IGC/Compiler/CISACodeGen/helper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1594,6 +1594,7 @@ namespace IGC
{
case GenISAIntrinsic::GenISA_WaveShuffleIndex:
case GenISAIntrinsic::GenISA_WaveBroadcast:
case GenISAIntrinsic::GenISA_WaveClusteredBroadcast:
case GenISAIntrinsic::GenISA_simdShuffleDown:
case GenISAIntrinsic::GenISA_simdShuffleXor:
case GenISAIntrinsic::GenISA_simdBlockRead:
Expand Down Expand Up @@ -1882,6 +1883,7 @@ namespace IGC
opcode == llvm_wavePrefix ||
opcode == llvm_waveShuffleIndex ||
opcode == llvm_waveBroadcast ||
opcode == llvm_waveClusteredBroadcast ||
opcode == llvm_waveBallot ||
opcode == llvm_simdShuffleDown ||
opcode == llvm_simdBlockRead||
Expand Down
1 change: 1 addition & 0 deletions IGC/Compiler/CISACodeGen/opCode.h
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,7 @@ DECLARE_OPCODE(GenISA_WavePrefix, GenISAIntrinsic, llvm_wavePrefix, false, false
DECLARE_OPCODE(GenISA_QuadPrefix, GenISAIntrinsic, llvm_quadPrefix, false, false, false, false, false, false, false)
DECLARE_OPCODE(GenISA_WaveShuffleIndex, GenISAIntrinsic, llvm_waveShuffleIndex, false, false, false, false, false, false, false)
DECLARE_OPCODE(GenISA_WaveBroadcast, GenISAIntrinsic, llvm_waveBroadcast, false, false, false, false, false, false, false)
DECLARE_OPCODE(GenISA_WaveClusteredBroadcast, GenISAIntrinsic, llvm_waveClusteredBroadcast, false, false, false, false, false, false, false)

// Unmasked region
DECLARE_OPCODE(GenISA_UnmaskedRegionBegin, GenISAIntrinsic, llvm_unmaskedBegin, false, false, false, false, false, false, false)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,13 @@ const llvm::StringRef SubGroupFuncsResolution::SUB_GROUP_BROADCAST_US = "__built
const llvm::StringRef SubGroupFuncsResolution::SUB_GROUP_BROADCAST_F = "__builtin_IB_simd_broadcast_f";
const llvm::StringRef SubGroupFuncsResolution::SUB_GROUP_BROADCAST_H = "__builtin_IB_simd_broadcast_h";
const llvm::StringRef SubGroupFuncsResolution::SUB_GROUP_BROADCAST_DF = "__builtin_IB_simd_broadcast_df";
const llvm::StringRef SubGroupFuncsResolution::SUB_GROUP_CLUSTERED_BROADCAST = "__builtin_IB_simd_clustered_broadcast";
const llvm::StringRef SubGroupFuncsResolution::SUB_GROUP_CLUSTERED_BROADCAST_B = "__builtin_IB_simd_clustered_broadcast_b";
const llvm::StringRef SubGroupFuncsResolution::SUB_GROUP_CLUSTERED_BROADCAST_C = "__builtin_IB_simd_clustered_broadcast_c";
const llvm::StringRef SubGroupFuncsResolution::SUB_GROUP_CLUSTERED_BROADCAST_US = "__builtin_IB_simd_clustered_broadcast_us";
const llvm::StringRef SubGroupFuncsResolution::SUB_GROUP_CLUSTERED_BROADCAST_F = "__builtin_IB_simd_clustered_broadcast_f";
const llvm::StringRef SubGroupFuncsResolution::SUB_GROUP_CLUSTERED_BROADCAST_H = "__builtin_IB_simd_clustered_broadcast_h";
const llvm::StringRef SubGroupFuncsResolution::SUB_GROUP_CLUSTERED_BROADCAST_DF = "__builtin_IB_simd_clustered_broadcast_df";
const llvm::StringRef SubGroupFuncsResolution::SIMD_BLOCK_READ_1_GBL = "__builtin_IB_simd_block_read_1_global";
const llvm::StringRef SubGroupFuncsResolution::SIMD_BLOCK_READ_2_GBL = "__builtin_IB_simd_block_read_2_global";
const llvm::StringRef SubGroupFuncsResolution::SIMD_BLOCK_READ_4_GBL = "__builtin_IB_simd_block_read_4_global";
Expand Down Expand Up @@ -680,6 +687,41 @@ void SubGroupFuncsResolution::visitCallInst(CallInst& CI)
CI.replaceAllUsesWith(simdBroadcast);
CI.eraseFromParent();
}
else if (funcName.equals(SubGroupFuncsResolution::SUB_GROUP_CLUSTERED_BROADCAST) ||
funcName.equals(SubGroupFuncsResolution::SUB_GROUP_CLUSTERED_BROADCAST_US) ||
funcName.equals(SubGroupFuncsResolution::SUB_GROUP_CLUSTERED_BROADCAST_F) ||
funcName.equals(SubGroupFuncsResolution::SUB_GROUP_CLUSTERED_BROADCAST_H) ||
funcName.equals(SubGroupFuncsResolution::SUB_GROUP_CLUSTERED_BROADCAST_C) ||
funcName.equals(SubGroupFuncsResolution::SUB_GROUP_CLUSTERED_BROADCAST_B) ||
funcName.equals(SubGroupFuncsResolution::SUB_GROUP_CLUSTERED_BROADCAST_DF)
)
{
// Creates intrinsics that will be lowered in the CodeGen and will handle the sub_group_clustered_broadcast function
IRBuilder<> IRB(&CI);
Value* args[4];
args[0] = CI.getArgOperand(0);
args[1] = CI.getArgOperand(1);
args[2] = CI.getArgOperand(2);
args[3] = IRB.getInt32(0);

if (!isa<ConstantInt>(args[1]))
{
m_pCtx->EmitError("cluster_size argument in clustered_broadcast must be constant.", &CI);
return;
}
if (!isa<ConstantInt>(args[2]))
{
m_pCtx->EmitError("in_cluster_lane argument in clustered_broadcast must be constant.", &CI);
return;
}

Function* simdClusteredBroadcastFunc = GenISAIntrinsic::getDeclaration(CI.getCalledFunction()->getParent(),
GenISAIntrinsic::GenISA_WaveClusteredBroadcast, args[0]->getType());
Instruction* simdClusteredBroadcast = CallInst::Create(simdClusteredBroadcastFunc, args, "simdClusteredBroadcast", &CI);
updateDebugLoc(&CI, simdClusteredBroadcast);
CI.replaceAllUsesWith(simdClusteredBroadcast);
CI.eraseFromParent();
}
else if (funcName.equals(SubGroupFuncsResolution::SUB_GROUP_SHUFFLE_DOWN) ||
funcName.equals(SubGroupFuncsResolution::SUB_GROUP_SHUFFLE_DOWN_US) ||
funcName.equals(SubGroupFuncsResolution::SUB_GROUP_SHUFFLE_DOWN_UC))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,13 @@ namespace IGC
static const llvm::StringRef SUB_GROUP_BROADCAST_C;
static const llvm::StringRef SUB_GROUP_BROADCAST_B;
static const llvm::StringRef SUB_GROUP_BROADCAST_DF;
static const llvm::StringRef SUB_GROUP_CLUSTERED_BROADCAST;
static const llvm::StringRef SUB_GROUP_CLUSTERED_BROADCAST_US;
static const llvm::StringRef SUB_GROUP_CLUSTERED_BROADCAST_F;
static const llvm::StringRef SUB_GROUP_CLUSTERED_BROADCAST_H;
static const llvm::StringRef SUB_GROUP_CLUSTERED_BROADCAST_C;
static const llvm::StringRef SUB_GROUP_CLUSTERED_BROADCAST_B;
static const llvm::StringRef SUB_GROUP_CLUSTERED_BROADCAST_DF;

static const llvm::StringRef SIMD_BLOCK_READ_1_GBL;
static const llvm::StringRef SIMD_BLOCK_READ_2_GBL;
Expand Down
29 changes: 29 additions & 0 deletions IGC/Compiler/tests/PromoteInt8Type/intrinsic.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
;=========================== begin_copyright_notice ============================
;
; Copyright (C) 2024 Intel Corporation
;
; SPDX-License-Identifier: MIT
;
;============================ end_copyright_notice =============================
;
; RUN: igc_opt -debugify --igc-promoteint8type -check-debugify -S < %s 2>&1 | FileCheck %s
; ------------------------------------------------
; PromoteInt8Type
; ------------------------------------------------

; Debug-info related check
; CHECK-NOT: WARNING
; CHECK: CheckModuleDebugify: PASS

define i8 @test_clustered_broadcast(i8 %src1, i32 %lane) {
; CHECK-LABEL: @test_clustered_broadcast(
; CHECK: [[B2S1:%.*]] = sext i8 [[SRC1:%.*]] to i16
; CHECK: [[B2S2:%.*]] = call i16 @llvm.genx.GenISA.WaveClusteredBroadcast.i16(i16 [[B2S1]], i32 8, i32 %lane, i32 0)
; CHECK: [[TMP1:%.*]] = trunc i16 [[B2S2]] to i8
; CHECK: ret i8 [[TMP1]]
;
%1 = call i8 @llvm.genx.GenISA.WaveClusteredBroadcast.i8(i8 %src1, i32 8, i32 %lane, i32 0)
ret i8 %1
}

declare i8 @llvm.genx.GenISA.WaveClusteredBroadcast.i8(i8, i32, i32, i32)
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
;=========================== begin_copyright_notice ============================
;
; Copyright (C) 2024 Intel Corporation
;
; SPDX-License-Identifier: MIT
;
;============================ end_copyright_notice =============================
;
; RUN: igc_opt -debugify --igc-sub-group-func-resolution -check-debugify -S < %s 2>&1 | FileCheck %s
; ------------------------------------------------
; SubGroupFuncsResolution
; ------------------------------------------------

; Debug-info related check
; CHECK-NOT: WARNING
; CHECK: CheckModuleDebugify: PASS

define i1 @test_clustered_broadcast_i1(i1 %src) {
; CHECK-LABEL: @test_clustered_broadcast_i1(
; CHECK: [[TMP1:%.*]] = call i1 @llvm.genx.GenISA.WaveClusteredBroadcast.i1(i1 %src, i32 8, i32 5, i32 0)
; CHECK: ret i1 [[TMP1]]
;
%1 = call spir_func i1 @__builtin_IB_simd_clustered_broadcast_b(i1 %src, i32 8, i32 5)
ret i1 %1
}

define i8 @test_clustered_broadcast_i8(i8 %src) {
; CHECK-LABEL: @test_clustered_broadcast_i8(
; CHECK: [[TMP1:%.*]] = call i8 @llvm.genx.GenISA.WaveClusteredBroadcast.i8(i8 %src, i32 8, i32 5, i32 0)
; CHECK: ret i8 [[TMP1]]
;
%1 = call spir_func i8 @__builtin_IB_simd_clustered_broadcast_c(i8 %src, i32 8, i32 5)
ret i8 %1
}

define i16 @test_clustered_broadcast_i16(i16 %src) {
; CHECK-LABEL: @test_clustered_broadcast_i16(
; CHECK: [[TMP1:%.*]] = call i16 @llvm.genx.GenISA.WaveClusteredBroadcast.i16(i16 %src, i32 8, i32 5, i32 0)
; CHECK: ret i16 [[TMP1]]
;
%1 = call spir_func i16 @__builtin_IB_simd_clustered_broadcast_us(i16 %src, i32 8, i32 5)
ret i16 %1
}

define i32 @test_clustered_broadcast_i32(i32 %src) {
; CHECK-LABEL: @test_clustered_broadcast_i32(
; CHECK: [[TMP1:%.*]] = call i32 @llvm.genx.GenISA.WaveClusteredBroadcast.i32(i32 %src, i32 8, i32 5, i32 0)
; CHECK: ret i32 [[TMP1]]
;
%1 = call spir_func i32 @__builtin_IB_simd_clustered_broadcast(i32 %src, i32 8, i32 5)
ret i32 %1
}

define float @test_clustered_broadcast_float(float %src) {
; CHECK-LABEL: @test_clustered_broadcast_float(
; CHECK: [[TMP1:%.*]] = call float @llvm.genx.GenISA.WaveClusteredBroadcast.f32(float %src, i32 8, i32 5, i32 0)
; CHECK: ret float [[TMP1]]
;
%1 = call spir_func float @__builtin_IB_simd_clustered_broadcast_f(float %src, i32 8, i32 5)
ret float %1
}

define half @test_clustered_broadcast_half(half %src) {
; CHECK-LABEL: @test_clustered_broadcast_half(
; CHECK: [[TMP1:%.*]] = call half @llvm.genx.GenISA.WaveClusteredBroadcast.f16(half %src, i32 8, i32 5, i32 0)
; CHECK: ret half [[TMP1]]
;
%1 = call spir_func half @__builtin_IB_simd_clustered_broadcast_h(half %src, i32 8, i32 5)
ret half %1
}

define double @test_clustered_broadcast_double(double %src) {
; CHECK-LABEL: @test_clustered_broadcast_double(
; CHECK: [[TMP1:%.*]] = call double @llvm.genx.GenISA.WaveClusteredBroadcast.f64(double %src, i32 8, i32 5, i32 0)
; CHECK: ret double [[TMP1]]
;
%1 = call spir_func double @__builtin_IB_simd_clustered_broadcast_df(double %src, i32 8, i32 5)
ret double %1
}

define float @test_clustered_broadcast_invalid_cluster_size(float %src, i32 %arg) {
; CHECK: error: cluster_size argument in clustered_broadcast must be constant.
; CHECK: in function: 'test_clustered_broadcast_invalid_cluster_size'
%1 = call spir_func float @__builtin_IB_simd_clustered_broadcast_f(float %src, i32 %arg, i32 5)
ret float %1
}

define float @test_clustered_broadcast_invalid_cluster_lane(float %src, i32 %arg) {
; CHECK: error: in_cluster_lane argument in clustered_broadcast must be constant.
; CHECK: in function: 'test_clustered_broadcast_invalid_cluster_lane'
%1 = call spir_func float @__builtin_IB_simd_clustered_broadcast_f(float %src, i32 8, i32 %arg)
ret float %1
}

declare spir_func i1 @__builtin_IB_simd_clustered_broadcast_b(i1, i32, i32)
declare spir_func i8 @__builtin_IB_simd_clustered_broadcast_c(i8, i32, i32)
declare spir_func i16 @__builtin_IB_simd_clustered_broadcast_us(i16, i32, i32)
declare spir_func i32 @__builtin_IB_simd_clustered_broadcast(i32, i32, i32)
declare spir_func float @__builtin_IB_simd_clustered_broadcast_f(float, i32, i32)
declare spir_func half @__builtin_IB_simd_clustered_broadcast_h(half, i32, i32)
declare spir_func double @__builtin_IB_simd_clustered_broadcast_df(double, i32, i32)
31 changes: 31 additions & 0 deletions IGC/GenISAIntrinsics/generator/input/Intrinsic_definitions.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2383,6 +2383,37 @@ intrinsics:
memory_effects:
- !<MemoryRestriction>
memory_access: !MemoryAccessType NoModRef
- !<IntrinsicDefinition>
name: "GenISA_WaveClusteredBroadcast"
comment: "Broadcasts from specific lane to all lanes in cluster.\
\ Works in non-uniform threads, but result is undefined if\
\ broadcasted lane is inactive."
return_definition: !<ReturnDefinition>
type_definition: *any_int
comment: "result"
arguments:
- !<ArgumentDefinition>
name: Arg0
type_definition: *ref_0_
comment: "value"
- !<ArgumentDefinition>
name: Arg1
type_definition: *i32
comment: "cluster size - must be a compile time constant 8 or 16"
- !<ArgumentDefinition>
name: Arg2
type_definition: *i32
comment: "cluster lane - lane inside cluster to broadcast"
- !<ArgumentDefinition>
name: Arg3
type_definition: *i32
comment: "helperLaneMode"
attributes:
- !AttributeID "Convergent"
- !AttributeID "NoUnwind"
memory_effects:
- !<MemoryRestriction>
memory_access: !MemoryAccessType NoModRef
- !<IntrinsicDefinition>
name: "GenISA_WorkGroupAny"
comment: "This intrinsic implies a barrier"
Expand Down

0 comments on commit 0ae060e

Please sign in to comment.