Skip to content

Commit

Permalink
[AMDGPU] Rename amdgcn_wwm to amdgcn_strict_wwm
Browse files Browse the repository at this point in the history
 * Introduce the new intrinsic amdgcn_strict_wwm
 * Deprecate the old intrinsic amdgcn_wwm

The change is done for consistency as the "strict"
prefix will become an important, distinguishing factor
between amdgcn_wqm and amdgcn_strictwqm in the future.

The "strict" prefix indicates that inactive lanes do not
take part in control flow, specifically an inactive lane
enabled by a strict mode will always be enabled irrespective
of control flow decisions.

The amdgcn_wwm will be removed, but doing so in two steps
gives users time to switch to the new name at their own pace.

Reviewed By: critson

Differential Revision: https://reviews.llvm.org/D96257
  • Loading branch information
piotrAMD authored and memfrob committed Oct 4, 2022
1 parent b5c4944 commit 7fe75b0
Show file tree
Hide file tree
Showing 19 changed files with 774 additions and 135 deletions.
7 changes: 6 additions & 1 deletion llvm/include/llvm/IR/IntrinsicsAMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -1610,8 +1610,13 @@ def int_amdgcn_wqm_demote : Intrinsic<[],
// Copies the active channels of the source value to the destination value,
// with the guarantee that the source value is computed as if the entire
// program were executed in Whole Wavefront Mode, i.e. with all channels
// enabled, with a few exceptions: - Phi nodes with require WWM return an
// enabled, with a few exceptions: - Phi nodes which require WWM return an
// undefined value.
def int_amdgcn_strict_wwm : Intrinsic<[llvm_any_ty],
[LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable,
IntrConvergent, IntrWillReturn]
>;
// Deprecated. Use int_amdgcn_strict_wwm instead.
def int_amdgcn_wwm : Intrinsic<[llvm_any_ty],
[LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable,
IntrConvergent, IntrWillReturn]
Expand Down
5 changes: 3 additions & 2 deletions llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -517,7 +517,7 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
}

// Finally mark the readlanes in the WWM section.
NewV = B.CreateIntrinsic(Intrinsic::amdgcn_wwm, Ty, NewV);
NewV = B.CreateIntrinsic(Intrinsic::amdgcn_strict_wwm, Ty, NewV);
} else {
switch (Op) {
default:
Expand Down Expand Up @@ -621,7 +621,8 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
// from the first lane, to get our lane's index into the atomic result.
Value *LaneOffset = nullptr;
if (ValDivergent) {
LaneOffset = B.CreateIntrinsic(Intrinsic::amdgcn_wwm, Ty, ExclScan);
LaneOffset =
B.CreateIntrinsic(Intrinsic::amdgcn_strict_wwm, Ty, ExclScan);
} else {
switch (Op) {
default:
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2642,7 +2642,8 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) {
Opcode = AMDGPU::SOFT_WQM;
break;
case Intrinsic::amdgcn_wwm:
Opcode = AMDGPU::WWM;
case Intrinsic::amdgcn_strict_wwm:
Opcode = AMDGPU::STRICT_WWM;
break;
case Intrinsic::amdgcn_interp_p1_f16:
SelectInterpP1F16(N);
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -927,8 +927,9 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const {
return constrainCopyLikeIntrin(I, AMDGPU::WQM);
case Intrinsic::amdgcn_softwqm:
return constrainCopyLikeIntrin(I, AMDGPU::SOFT_WQM);
case Intrinsic::amdgcn_strict_wwm:
case Intrinsic::amdgcn_wwm:
return constrainCopyLikeIntrin(I, AMDGPU::WWM);
return constrainCopyLikeIntrin(I, AMDGPU::STRICT_WWM);
case Intrinsic::amdgcn_writelane:
return selectWritelane(I);
case Intrinsic::amdgcn_div_scale:
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3956,6 +3956,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_update_dpp:
case Intrinsic::amdgcn_mov_dpp8:
case Intrinsic::amdgcn_mov_dpp:
case Intrinsic::amdgcn_strict_wwm:
case Intrinsic::amdgcn_wwm:
case Intrinsic::amdgcn_wqm:
case Intrinsic::amdgcn_softwqm:
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -582,7 +582,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
case AMDGPU::COPY:
case AMDGPU::WQM:
case AMDGPU::SOFT_WQM:
case AMDGPU::WWM: {
case AMDGPU::STRICT_WWM: {
Register DstReg = MI.getOperand(0).getReg();

const TargetRegisterClass *SrcRC, *DstRC;
Expand Down
14 changes: 7 additions & 7 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1942,16 +1942,16 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MI.eraseFromParent();
break;
}
case AMDGPU::ENTER_WWM: {
case AMDGPU::ENTER_STRICT_WWM: {
// This only gets its own opcode so that SIPreAllocateWWMRegs can tell when
// WWM is entered.
// Whole Wave Mode is entered.
MI.setDesc(get(ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32
: AMDGPU::S_OR_SAVEEXEC_B64));
break;
}
case AMDGPU::EXIT_WWM: {
case AMDGPU::EXIT_STRICT_WWM: {
// This only gets its own opcode so that SIPreAllocateWWMRegs can tell when
// WWM is exited.
// Whole Wave Mode is exited.
MI.setDesc(get(ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64));
break;
}
Expand Down Expand Up @@ -4406,7 +4406,7 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
case AMDGPU::INSERT_SUBREG: return AMDGPU::INSERT_SUBREG;
case AMDGPU::WQM: return AMDGPU::WQM;
case AMDGPU::SOFT_WQM: return AMDGPU::SOFT_WQM;
case AMDGPU::WWM: return AMDGPU::WWM;
case AMDGPU::STRICT_WWM: return AMDGPU::STRICT_WWM;
case AMDGPU::S_MOV_B32: {
const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
return MI.getOperand(1).isReg() ||
Expand Down Expand Up @@ -6642,7 +6642,7 @@ void SIInstrInfo::addUsersToMoveToVALUWorklist(
case AMDGPU::COPY:
case AMDGPU::WQM:
case AMDGPU::SOFT_WQM:
case AMDGPU::WWM:
case AMDGPU::STRICT_WWM:
case AMDGPU::REG_SEQUENCE:
case AMDGPU::PHI:
case AMDGPU::INSERT_SUBREG:
Expand Down Expand Up @@ -6800,7 +6800,7 @@ const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass(
case AMDGPU::INSERT_SUBREG:
case AMDGPU::WQM:
case AMDGPU::SOFT_WQM:
case AMDGPU::WWM: {
case AMDGPU::STRICT_WWM: {
const TargetRegisterClass *SrcRC = getOpRegClass(Inst, 1);
if (RI.hasAGPRs(SrcRC)) {
if (RI.hasAGPRs(NewDstRC))
Expand Down
10 changes: 5 additions & 5 deletions llvm/lib/Target/AMDGPU/SIInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -119,25 +119,25 @@ def WQM : PseudoInstSI <(outs unknown:$vdst), (ins unknown:$src0)>;
// turned into a copy by WQM pass, but does not seed WQM requirements.
def SOFT_WQM : PseudoInstSI <(outs unknown:$vdst), (ins unknown:$src0)>;

// Pseudoinstruction for @llvm.amdgcn.wwm. It is turned into a copy post-RA, so
// Pseudoinstruction for @llvm.amdgcn.strict.wwm. It is turned into a copy post-RA, so
// that the @earlyclobber is respected. The @earlyclobber is to make sure that
// the instruction that defines $src0 (which is run in WWM) doesn't
// the instruction that defines $src0 (which is run in Whole Wave Mode) doesn't
// accidentally clobber inactive channels of $vdst.
let Constraints = "@earlyclobber $vdst" in {
def WWM : PseudoInstSI <(outs unknown:$vdst), (ins unknown:$src0)>;
def STRICT_WWM : PseudoInstSI <(outs unknown:$vdst), (ins unknown:$src0)>;
}

} // End let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC]

def ENTER_WWM : SPseudoInstSI <(outs SReg_1:$sdst), (ins i64imm:$src0)> {
def ENTER_STRICT_WWM : SPseudoInstSI <(outs SReg_1:$sdst), (ins i64imm:$src0)> {
let Uses = [EXEC];
let Defs = [EXEC, SCC];
let hasSideEffects = 0;
let mayLoad = 0;
let mayStore = 0;
}

def EXIT_WWM : SPseudoInstSI <(outs SReg_1:$sdst), (ins SReg_1:$src0)> {
def EXIT_STRICT_WWM : SPseudoInstSI <(outs SReg_1:$sdst), (ins SReg_1:$src0)> {
let hasSideEffects = 0;
let mayLoad = 0;
let mayStore = 0;
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,13 +185,13 @@ bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) {
MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B64)
RegsAssigned |= processDef(MI.getOperand(0));

if (MI.getOpcode() == AMDGPU::ENTER_WWM) {
if (MI.getOpcode() == AMDGPU::ENTER_STRICT_WWM) {
LLVM_DEBUG(dbgs() << "entering WWM region: " << MI << "\n");
InWWM = true;
continue;
}

if (MI.getOpcode() == AMDGPU::EXIT_WWM) {
if (MI.getOpcode() == AMDGPU::EXIT_STRICT_WWM) {
LLVM_DEBUG(dbgs() << "exiting WWM region: " << MI << "\n");
InWWM = false;
}
Expand Down
Loading

0 comments on commit 7fe75b0

Please sign in to comment.