From f5b78ad15e22e2e0eb6aeddabd5903a826d8092d Mon Sep 17 00:00:00 2001
From: Michal Paszkowski <michal.paszkowski@intel.com>
Date: Wed, 13 Sep 2023 13:25:16 -0700
Subject: [PATCH] [Autobackout][FuncReg]Revert of change:
 c564d49dd5c728fddc6d3dd3feb8af8e0353ecfc  Avoid reciprocal round-trip error
 in FDIV expansion

If x == y then x/y == 1, skip FDIV expansion computation to avoid
reciprocal round-trip error.
---
 IGC/Compiler/LegalizationPass.cpp            | 39 +++-----------
 IGC/Compiler/tests/GenFDIVEmulation/basic.ll | 55 +++++++-------------
 2 files changed, 26 insertions(+), 68 deletions(-)

diff --git a/IGC/Compiler/LegalizationPass.cpp b/IGC/Compiler/LegalizationPass.cpp
index 8788c14ebe82..9315f3279aeb 100644
--- a/IGC/Compiler/LegalizationPass.cpp
+++ b/IGC/Compiler/LegalizationPass.cpp
@@ -2710,38 +2710,20 @@ static bool needsNoScaling(Value* Val)
 bool IGC::expandFDIVInstructions(llvm::Function& F)
 {
     bool Changed = false;
-    for (auto BBIter = F.begin(); BBIter != F.end();) {
-        BasicBlock* BB = &*BBIter++;
-
-        for (auto Iter = BB->begin(); Iter != BB->end();) {
+    for (auto& BB : F.getBasicBlockList()) {
+        for (auto Iter = BB.begin(); Iter != BB.end();) {
             Instruction* Inst = &*Iter++;
             if (!isCandidateFDiv(Inst))
                 continue;
 
+            IRBuilder<> Builder(Inst);
+            Builder.setFastMathFlags(Inst->getFastMathFlags());
+
             auto& Ctx = Inst->getContext();
             Value* X = Inst->getOperand(0);
             Value* Y = Inst->getOperand(1);
             Value* V = nullptr;
 
-            BasicBlock* PreFDIVExpBB = BB;
-            BasicBlock* PostFDIVExpBB = BB->splitBasicBlock(Inst->getNextNode());
-            BasicBlock* FDIVExpBB = BB->splitBasicBlock(Inst);
-
-            IRBuilder<> Builder(FDIVExpBB->getPrevNode()->getTerminator());
-            Builder.setFastMathFlags(Inst->getFastMathFlags());
-
-            // If x == y then x/y == 1, skip FDIV expansion basic block to avoid
-            // reciprocal round-trip error, break to post-FDIV-expansion basic
-            // block.
-            Value* CmpXY = Builder.CreateFCmp(CmpInst::FCMP_OEQ, X, Y);
-            Builder.CreateCondBr(CmpXY, PostFDIVExpBB, FDIVExpBB)->getNextNode()->eraseFromParent();
-
-            // Update iterators after creating BBs.
-            BBIter = PostFDIVExpBB->getIterator();
-            BB = FDIVExpBB;
-            Iter = ++FDIVExpBB->begin();
-            Builder.SetInsertPoint(Inst);
-
             if (Inst->getType()->isHalfTy()) {
                 if (Inst->hasAllowReciprocal()) {
                     APFloat Val(1.0f);
@@ -2789,16 +2771,7 @@ bool IGC::expandFDIVInstructions(llvm::Function& F)
                 V = Builder.CreateFMul(V, Scale);
             }
 
-            Builder.SetInsertPoint(&*PostFDIVExpBB->begin());
-            PHINode* Phi = Builder.CreatePHI(V->getType(), 2);
-            APFloat VConstOne(1.0f);
-            if (V->getType()->isHalfTy()) {
-                bool ignored;
-                VConstOne.convert(APFloat::IEEEhalf(), APFloat::rmTowardZero, &ignored);
-            }
-            Phi->addIncoming(ConstantFP::get(Ctx, VConstOne), PreFDIVExpBB);
-            Phi->addIncoming(V, FDIVExpBB);
-            Inst->replaceAllUsesWith(Phi);
+            Inst->replaceAllUsesWith(V);
             Inst->eraseFromParent();
             Changed = true;
         }
diff --git a/IGC/Compiler/tests/GenFDIVEmulation/basic.ll b/IGC/Compiler/tests/GenFDIVEmulation/basic.ll
index c29ac849be4b..f418f49e5b6e 100644
--- a/IGC/Compiler/tests/GenFDIVEmulation/basic.ll
+++ b/IGC/Compiler/tests/GenFDIVEmulation/basic.ll
@@ -17,22 +17,17 @@
 
 define void @test_fdiv(float %a, float %b) {
 ; CHECK-LABEL: @test_fdiv(
-; CHECK:    [[TMP1:%[A-z0-9]*]] = fcmp oeq float [[A:%[A-z0-9]*]], [[B:%[A-z0-9]*]]
-; CHECK:    br i1 [[TMP1]], label %[[BB3:[A-z0-9]*]], label %[[BB2:[A-z0-9]*]]
-; CHECK:  [[BB2]]:
-; CHECK:    [[TMP2:%[A-z0-9]*]] = bitcast float [[B]] to i32
-; CHECK:    [[TMP3:%[A-z0-9]*]] = and i32 [[TMP2]], 2139095040
-; CHECK:    [[TMP4:%[A-z0-9]*]] = icmp eq i32 [[TMP3]], 0
-; CHECK:    [[TMP5:%[A-z0-9]*]] = select i1 [[TMP4]], float 0x41F0000000000000, float 1.000000e+00
-; CHECK:    [[TMP6:%[A-z0-9]*]] = icmp uge i32 [[TMP3]], 1677721600
-; CHECK:    [[TMP7:%[A-z0-9]*]] = select i1 [[TMP6]], float 0x3DF0000000000000, float [[TMP5]]
-; CHECK:    [[TMP8:%[A-z0-9]*]] = fmul float [[B]], [[TMP7]]
-; CHECK:    [[TMP9:%[A-z0-9]*]] = fdiv float 1.000000e+00, [[TMP8]]
-; CHECK:    [[TMP10:%[A-z0-9]*]] = fmul float [[TMP9]], [[A]]
-; CHECK:    [[TMP11:%[A-z0-9]*]] = fmul float [[TMP10]], [[TMP7]]
-; CHECK:  [[BB3]]:
-; CHECK:    [[TMP12:%[A-z0-9]*]] = phi float [ 1.000000e+00, %[[BB1:[A-z0-9]*]] ], [ [[TMP11]], %[[BB2]] ]
-; CHECK:    call void @use.f32(float [[TMP12]])
+; CHECK:    [[TMP1:%[A-z0-9]*]] = bitcast float [[B:%[A-z0-9]*]] to i32
+; CHECK:    [[TMP2:%[A-z0-9]*]] = and i32 [[TMP1]], 2139095040
+; CHECK:    [[TMP3:%[A-z0-9]*]] = icmp eq i32 [[TMP2]], 0
+; CHECK:    [[TMP4:%[A-z0-9]*]] = select i1 [[TMP3]], float 0x41F0000000000000, float 1.000000e+00
+; CHECK:    [[TMP5:%[A-z0-9]*]] = icmp uge i32 [[TMP2]], 1677721600
+; CHECK:    [[TMP6:%[A-z0-9]*]] = select i1 [[TMP5]], float 0x3DF0000000000000, float [[TMP4]]
+; CHECK:    [[TMP7:%[A-z0-9]*]] = fmul float [[B]], [[TMP6]]
+; CHECK:    [[TMP8:%[A-z0-9]*]] = fdiv float 1.000000e+00, [[TMP7]]
+; CHECK:    [[TMP9:%[A-z0-9]*]] = fmul float [[TMP8]], [[A:%[A-z0-9]*]]
+; CHECK:    [[TMP10:%[A-z0-9]*]] = fmul float [[TMP9]], [[TMP6]]
+; CHECK:    call void @use.f32(float [[TMP10]])
 ; CHECK:    ret void
 ;
   %1 = fdiv float %a, %b
@@ -42,14 +37,9 @@ define void @test_fdiv(float %a, float %b) {
 
 define void @test_fdiv_arcp(float %a, float %b) {
 ; CHECK-LABEL: @test_fdiv_arcp(
-; CHECK:    [[TMP1:%[A-z0-9]*]] = fcmp arcp oeq float [[A:%[A-z0-9]*]], [[B:%[A-z0-9]*]]
-; CHECK:    br i1 [[TMP1]], label %[[BB3:[A-z0-9]*]], label %[[BB2:[A-z0-9]*]]
-; CHECK:  [[BB2]]:
-; CHECK:    [[TMP2:%[A-z0-9]*]] = fdiv arcp float 1.000000e+00, [[B]]
-; CHECK:    [[TMP3:%[A-z0-9]*]] = fmul arcp float [[TMP2]], [[A]]
-; CHECK:  [[BB3]]:
-; CHECK:    [[TMP4:%[A-z0-9]*]] = phi {{(arcp )?}}float [ 1.000000e+00, %[[BB1:[A-z0-9]*]] ], [ [[TMP3]], %[[BB2]] ]
-; CHECK:    call void @use.f32(float [[TMP4]])
+; CHECK:    [[TMP1:%[A-z0-9]*]] = fdiv arcp float 1.000000e+00, [[B:%[A-z0-9]*]]
+; CHECK:    [[TMP2:%[A-z0-9]*]] = fmul arcp float [[TMP1]], [[A:%[A-z0-9]*]]
+; CHECK:    call void @use.f32(float [[TMP2]])
 ; CHECK:    ret void
 ;
   %1 = fdiv arcp float %a, %b
@@ -59,17 +49,12 @@ define void @test_fdiv_arcp(float %a, float %b) {
 
 define void @test_fdiv_half(half %a, half %b) {
 ; CHECK-LABEL: @test_fdiv_half(
-; CHECK:    [[TMP1:%[A-z0-9]*]] = fcmp oeq half [[A:%[A-z0-9]*]], [[B:%[A-z0-9]*]]
-; CHECK:    br i1 [[TMP1]], label %[[BB3:[A-z0-9]*]], label %[[BB2:[A-z0-9]*]]
-; CHECK:  [[BB2]]:
-; CHECK:    [[TMP2:%[A-z0-9]*]] = fpext half [[B]] to float
-; CHECK:    [[TMP3:%[A-z0-9]*]] = fdiv float 1.000000e+00, [[TMP2]]
-; CHECK:    [[TMP4:%[A-z0-9]*]] = fpext half [[A]] to float
-; CHECK:    [[TMP5:%[A-z0-9]*]] = fmul float [[TMP3]], [[TMP4]]
-; CHECK:    [[TMP6:%[A-z0-9]*]] = fptrunc float [[TMP5]] to half
-; CHECK:  [[BB3]]:
-; CHECK:    [[TMP7:%[A-z0-9]*]] = phi half [ 0xH3C00, %[[BB1:[A-z0-9]*]] ], [ [[TMP6]], %[[BB2]] ]
-; CHECK:    call void @use.f16(half [[TMP7]])
+; CHECK:    [[TMP1:%[A-z0-9]*]] = fpext half [[B:%[A-z0-9]*]] to float
+; CHECK:    [[TMP2:%[A-z0-9]*]] = fdiv float 1.000000e+00, [[TMP1]]
+; CHECK:    [[TMP3:%[A-z0-9]*]] = fpext half [[A:%[A-z0-9]*]] to float
+; CHECK:    [[TMP4:%[A-z0-9]*]] = fmul float [[TMP2]], [[TMP3]]
+; CHECK:    [[TMP5:%[A-z0-9]*]] = fptrunc float [[TMP4]] to half
+; CHECK:    call void @use.f16(half [[TMP5]])
 ; CHECK:    ret void
 ;
   %1 = fdiv half %a, %b