Skip to content

Commit

Permalink
[Autobackout][FuncReg]Revert of change: c564d49
Browse files Browse the repository at this point in the history
 Avoid reciprocal round-trip error in FDIV expansion

If x == y then x/y == 1, skip FDIV expansion computation to avoid
reciprocal round-trip error.
  • Loading branch information
michalpaszkowski authored and igcbot committed Sep 17, 2023
1 parent c564d49 commit f5b78ad
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 68 deletions.
39 changes: 6 additions & 33 deletions IGC/Compiler/LegalizationPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2710,38 +2710,20 @@ static bool needsNoScaling(Value* Val)
bool IGC::expandFDIVInstructions(llvm::Function& F)
{
bool Changed = false;
for (auto BBIter = F.begin(); BBIter != F.end();) {
BasicBlock* BB = &*BBIter++;

for (auto Iter = BB->begin(); Iter != BB->end();) {
for (auto& BB : F.getBasicBlockList()) {
for (auto Iter = BB.begin(); Iter != BB.end();) {
Instruction* Inst = &*Iter++;
if (!isCandidateFDiv(Inst))
continue;

IRBuilder<> Builder(Inst);
Builder.setFastMathFlags(Inst->getFastMathFlags());

auto& Ctx = Inst->getContext();
Value* X = Inst->getOperand(0);
Value* Y = Inst->getOperand(1);
Value* V = nullptr;

BasicBlock* PreFDIVExpBB = BB;
BasicBlock* PostFDIVExpBB = BB->splitBasicBlock(Inst->getNextNode());
BasicBlock* FDIVExpBB = BB->splitBasicBlock(Inst);

IRBuilder<> Builder(FDIVExpBB->getPrevNode()->getTerminator());
Builder.setFastMathFlags(Inst->getFastMathFlags());

// If x == y then x/y == 1, skip FDIV expansion basic block to avoid
// reciprocal round-trip error, break to post-FDIV-expansion basic
// block.
Value* CmpXY = Builder.CreateFCmp(CmpInst::FCMP_OEQ, X, Y);
Builder.CreateCondBr(CmpXY, PostFDIVExpBB, FDIVExpBB)->getNextNode()->eraseFromParent();

// Update iterators after creating BBs.
BBIter = PostFDIVExpBB->getIterator();
BB = FDIVExpBB;
Iter = ++FDIVExpBB->begin();
Builder.SetInsertPoint(Inst);

if (Inst->getType()->isHalfTy()) {
if (Inst->hasAllowReciprocal()) {
APFloat Val(1.0f);
Expand Down Expand Up @@ -2789,16 +2771,7 @@ bool IGC::expandFDIVInstructions(llvm::Function& F)
V = Builder.CreateFMul(V, Scale);
}

Builder.SetInsertPoint(&*PostFDIVExpBB->begin());
PHINode* Phi = Builder.CreatePHI(V->getType(), 2);
APFloat VConstOne(1.0f);
if (V->getType()->isHalfTy()) {
bool ignored;
VConstOne.convert(APFloat::IEEEhalf(), APFloat::rmTowardZero, &ignored);
}
Phi->addIncoming(ConstantFP::get(Ctx, VConstOne), PreFDIVExpBB);
Phi->addIncoming(V, FDIVExpBB);
Inst->replaceAllUsesWith(Phi);
Inst->replaceAllUsesWith(V);
Inst->eraseFromParent();
Changed = true;
}
Expand Down
55 changes: 20 additions & 35 deletions IGC/Compiler/tests/GenFDIVEmulation/basic.ll
Original file line number Diff line number Diff line change
Expand Up @@ -17,22 +17,17 @@

define void @test_fdiv(float %a, float %b) {
; CHECK-LABEL: @test_fdiv(
; CHECK: [[TMP1:%[A-z0-9]*]] = fcmp oeq float [[A:%[A-z0-9]*]], [[B:%[A-z0-9]*]]
; CHECK: br i1 [[TMP1]], label %[[BB3:[A-z0-9]*]], label %[[BB2:[A-z0-9]*]]
; CHECK: [[BB2]]:
; CHECK: [[TMP2:%[A-z0-9]*]] = bitcast float [[B]] to i32
; CHECK: [[TMP3:%[A-z0-9]*]] = and i32 [[TMP2]], 2139095040
; CHECK: [[TMP4:%[A-z0-9]*]] = icmp eq i32 [[TMP3]], 0
; CHECK: [[TMP5:%[A-z0-9]*]] = select i1 [[TMP4]], float 0x41F0000000000000, float 1.000000e+00
; CHECK: [[TMP6:%[A-z0-9]*]] = icmp uge i32 [[TMP3]], 1677721600
; CHECK: [[TMP7:%[A-z0-9]*]] = select i1 [[TMP6]], float 0x3DF0000000000000, float [[TMP5]]
; CHECK: [[TMP8:%[A-z0-9]*]] = fmul float [[B]], [[TMP7]]
; CHECK: [[TMP9:%[A-z0-9]*]] = fdiv float 1.000000e+00, [[TMP8]]
; CHECK: [[TMP10:%[A-z0-9]*]] = fmul float [[TMP9]], [[A]]
; CHECK: [[TMP11:%[A-z0-9]*]] = fmul float [[TMP10]], [[TMP7]]
; CHECK: [[BB3]]:
; CHECK: [[TMP12:%[A-z0-9]*]] = phi float [ 1.000000e+00, %[[BB1:[A-z0-9]*]] ], [ [[TMP11]], %[[BB2]] ]
; CHECK: call void @use.f32(float [[TMP12]])
; CHECK: [[TMP1:%[A-z0-9]*]] = bitcast float [[B:%[A-z0-9]*]] to i32
; CHECK: [[TMP2:%[A-z0-9]*]] = and i32 [[TMP1]], 2139095040
; CHECK: [[TMP3:%[A-z0-9]*]] = icmp eq i32 [[TMP2]], 0
; CHECK: [[TMP4:%[A-z0-9]*]] = select i1 [[TMP3]], float 0x41F0000000000000, float 1.000000e+00
; CHECK: [[TMP5:%[A-z0-9]*]] = icmp uge i32 [[TMP2]], 1677721600
; CHECK: [[TMP6:%[A-z0-9]*]] = select i1 [[TMP5]], float 0x3DF0000000000000, float [[TMP4]]
; CHECK: [[TMP7:%[A-z0-9]*]] = fmul float [[B]], [[TMP6]]
; CHECK: [[TMP8:%[A-z0-9]*]] = fdiv float 1.000000e+00, [[TMP7]]
; CHECK: [[TMP9:%[A-z0-9]*]] = fmul float [[TMP8]], [[A:%[A-z0-9]*]]
; CHECK: [[TMP10:%[A-z0-9]*]] = fmul float [[TMP9]], [[TMP6]]
; CHECK: call void @use.f32(float [[TMP10]])
; CHECK: ret void
;
%1 = fdiv float %a, %b
Expand All @@ -42,14 +37,9 @@ define void @test_fdiv(float %a, float %b) {

define void @test_fdiv_arcp(float %a, float %b) {
; CHECK-LABEL: @test_fdiv_arcp(
; CHECK: [[TMP1:%[A-z0-9]*]] = fcmp arcp oeq float [[A:%[A-z0-9]*]], [[B:%[A-z0-9]*]]
; CHECK: br i1 [[TMP1]], label %[[BB3:[A-z0-9]*]], label %[[BB2:[A-z0-9]*]]
; CHECK: [[BB2]]:
; CHECK: [[TMP2:%[A-z0-9]*]] = fdiv arcp float 1.000000e+00, [[B]]
; CHECK: [[TMP3:%[A-z0-9]*]] = fmul arcp float [[TMP2]], [[A]]
; CHECK: [[BB3]]:
; CHECK: [[TMP4:%[A-z0-9]*]] = phi {{(arcp )?}}float [ 1.000000e+00, %[[BB1:[A-z0-9]*]] ], [ [[TMP3]], %[[BB2]] ]
; CHECK: call void @use.f32(float [[TMP4]])
; CHECK: [[TMP1:%[A-z0-9]*]] = fdiv arcp float 1.000000e+00, [[B:%[A-z0-9]*]]
; CHECK: [[TMP2:%[A-z0-9]*]] = fmul arcp float [[TMP1]], [[A:%[A-z0-9]*]]
; CHECK: call void @use.f32(float [[TMP2]])
; CHECK: ret void
;
%1 = fdiv arcp float %a, %b
Expand All @@ -59,17 +49,12 @@ define void @test_fdiv_arcp(float %a, float %b) {

define void @test_fdiv_half(half %a, half %b) {
; CHECK-LABEL: @test_fdiv_half(
; CHECK: [[TMP1:%[A-z0-9]*]] = fcmp oeq half [[A:%[A-z0-9]*]], [[B:%[A-z0-9]*]]
; CHECK: br i1 [[TMP1]], label %[[BB3:[A-z0-9]*]], label %[[BB2:[A-z0-9]*]]
; CHECK: [[BB2]]:
; CHECK: [[TMP2:%[A-z0-9]*]] = fpext half [[B]] to float
; CHECK: [[TMP3:%[A-z0-9]*]] = fdiv float 1.000000e+00, [[TMP2]]
; CHECK: [[TMP4:%[A-z0-9]*]] = fpext half [[A]] to float
; CHECK: [[TMP5:%[A-z0-9]*]] = fmul float [[TMP3]], [[TMP4]]
; CHECK: [[TMP6:%[A-z0-9]*]] = fptrunc float [[TMP5]] to half
; CHECK: [[BB3]]:
; CHECK: [[TMP7:%[A-z0-9]*]] = phi half [ 0xH3C00, %[[BB1:[A-z0-9]*]] ], [ [[TMP6]], %[[BB2]] ]
; CHECK: call void @use.f16(half [[TMP7]])
; CHECK: [[TMP1:%[A-z0-9]*]] = fpext half [[B:%[A-z0-9]*]] to float
; CHECK: [[TMP2:%[A-z0-9]*]] = fdiv float 1.000000e+00, [[TMP1]]
; CHECK: [[TMP3:%[A-z0-9]*]] = fpext half [[A:%[A-z0-9]*]] to float
; CHECK: [[TMP4:%[A-z0-9]*]] = fmul float [[TMP2]], [[TMP3]]
; CHECK: [[TMP5:%[A-z0-9]*]] = fptrunc float [[TMP4]] to half
; CHECK: call void @use.f16(half [[TMP5]])
; CHECK: ret void
;
%1 = fdiv half %a, %b
Expand Down

0 comments on commit f5b78ad

Please sign in to comment.