diff --git a/IGC/Compiler/LegalizationPass.cpp b/IGC/Compiler/LegalizationPass.cpp index 8788c14ebe82..9315f3279aeb 100644 --- a/IGC/Compiler/LegalizationPass.cpp +++ b/IGC/Compiler/LegalizationPass.cpp @@ -2710,38 +2710,20 @@ static bool needsNoScaling(Value* Val) bool IGC::expandFDIVInstructions(llvm::Function& F) { bool Changed = false; - for (auto BBIter = F.begin(); BBIter != F.end();) { - BasicBlock* BB = &*BBIter++; - - for (auto Iter = BB->begin(); Iter != BB->end();) { + for (auto& BB : F.getBasicBlockList()) { + for (auto Iter = BB.begin(); Iter != BB.end();) { Instruction* Inst = &*Iter++; if (!isCandidateFDiv(Inst)) continue; + IRBuilder<> Builder(Inst); + Builder.setFastMathFlags(Inst->getFastMathFlags()); + auto& Ctx = Inst->getContext(); Value* X = Inst->getOperand(0); Value* Y = Inst->getOperand(1); Value* V = nullptr; - BasicBlock* PreFDIVExpBB = BB; - BasicBlock* PostFDIVExpBB = BB->splitBasicBlock(Inst->getNextNode()); - BasicBlock* FDIVExpBB = BB->splitBasicBlock(Inst); - - IRBuilder<> Builder(FDIVExpBB->getPrevNode()->getTerminator()); - Builder.setFastMathFlags(Inst->getFastMathFlags()); - - // If x == y then x/y == 1, skip FDIV expansion basic block to avoid - // reciprocal round-trip error, break to post-FDIV-expansion basic - // block. - Value* CmpXY = Builder.CreateFCmp(CmpInst::FCMP_OEQ, X, Y); - Builder.CreateCondBr(CmpXY, PostFDIVExpBB, FDIVExpBB)->getNextNode()->eraseFromParent(); - - // Update iterators after creating BBs. - BBIter = PostFDIVExpBB->getIterator(); - BB = FDIVExpBB; - Iter = ++FDIVExpBB->begin(); - Builder.SetInsertPoint(Inst); - if (Inst->getType()->isHalfTy()) { if (Inst->hasAllowReciprocal()) { APFloat Val(1.0f); @@ -2789,16 +2771,7 @@ bool IGC::expandFDIVInstructions(llvm::Function& F) V = Builder.CreateFMul(V, Scale); } - Builder.SetInsertPoint(&*PostFDIVExpBB->begin()); - PHINode* Phi = Builder.CreatePHI(V->getType(), 2); - APFloat VConstOne(1.0f); - if (V->getType()->isHalfTy()) { - bool ignored; - VConstOne.convert(APFloat::IEEEhalf(), APFloat::rmTowardZero, &ignored); - } - Phi->addIncoming(ConstantFP::get(Ctx, VConstOne), PreFDIVExpBB); - Phi->addIncoming(V, FDIVExpBB); - Inst->replaceAllUsesWith(Phi); + Inst->replaceAllUsesWith(V); Inst->eraseFromParent(); Changed = true; } diff --git a/IGC/Compiler/tests/GenFDIVEmulation/basic.ll b/IGC/Compiler/tests/GenFDIVEmulation/basic.ll index c29ac849be4b..f418f49e5b6e 100644 --- a/IGC/Compiler/tests/GenFDIVEmulation/basic.ll +++ b/IGC/Compiler/tests/GenFDIVEmulation/basic.ll @@ -17,22 +17,17 @@ define void @test_fdiv(float %a, float %b) { ; CHECK-LABEL: @test_fdiv( -; CHECK: [[TMP1:%[A-z0-9]*]] = fcmp oeq float [[A:%[A-z0-9]*]], [[B:%[A-z0-9]*]] -; CHECK: br i1 [[TMP1]], label %[[BB3:[A-z0-9]*]], label %[[BB2:[A-z0-9]*]] -; CHECK: [[BB2]]: -; CHECK: [[TMP2:%[A-z0-9]*]] = bitcast float [[B]] to i32 -; CHECK: [[TMP3:%[A-z0-9]*]] = and i32 [[TMP2]], 2139095040 -; CHECK: [[TMP4:%[A-z0-9]*]] = icmp eq i32 [[TMP3]], 0 -; CHECK: [[TMP5:%[A-z0-9]*]] = select i1 [[TMP4]], float 0x41F0000000000000, float 1.000000e+00 -; CHECK: [[TMP6:%[A-z0-9]*]] = icmp uge i32 [[TMP3]], 1677721600 -; CHECK: [[TMP7:%[A-z0-9]*]] = select i1 [[TMP6]], float 0x3DF0000000000000, float [[TMP5]] -; CHECK: [[TMP8:%[A-z0-9]*]] = fmul float [[B]], [[TMP7]] -; CHECK: [[TMP9:%[A-z0-9]*]] = fdiv float 1.000000e+00, [[TMP8]] -; CHECK: [[TMP10:%[A-z0-9]*]] = fmul float [[TMP9]], [[A]] -; CHECK: [[TMP11:%[A-z0-9]*]] = fmul float [[TMP10]], [[TMP7]] -; CHECK: [[BB3]]: -; CHECK: [[TMP12:%[A-z0-9]*]] = phi float [ 1.000000e+00, %[[BB1:[A-z0-9]*]] ], [ [[TMP11]], %[[BB2]] ] -; CHECK: call void @use.f32(float [[TMP12]]) +; CHECK: [[TMP1:%[A-z0-9]*]] = bitcast float [[B:%[A-z0-9]*]] to i32 +; CHECK: [[TMP2:%[A-z0-9]*]] = and i32 [[TMP1]], 2139095040 +; CHECK: [[TMP3:%[A-z0-9]*]] = icmp eq i32 [[TMP2]], 0 +; CHECK: [[TMP4:%[A-z0-9]*]] = select i1 [[TMP3]], float 0x41F0000000000000, float 1.000000e+00 +; CHECK: [[TMP5:%[A-z0-9]*]] = icmp uge i32 [[TMP2]], 1677721600 +; CHECK: [[TMP6:%[A-z0-9]*]] = select i1 [[TMP5]], float 0x3DF0000000000000, float [[TMP4]] +; CHECK: [[TMP7:%[A-z0-9]*]] = fmul float [[B]], [[TMP6]] +; CHECK: [[TMP8:%[A-z0-9]*]] = fdiv float 1.000000e+00, [[TMP7]] +; CHECK: [[TMP9:%[A-z0-9]*]] = fmul float [[TMP8]], [[A:%[A-z0-9]*]] +; CHECK: [[TMP10:%[A-z0-9]*]] = fmul float [[TMP9]], [[TMP6]] +; CHECK: call void @use.f32(float [[TMP10]]) ; CHECK: ret void ; %1 = fdiv float %a, %b @@ -42,14 +37,9 @@ define void @test_fdiv(float %a, float %b) { define void @test_fdiv_arcp(float %a, float %b) { ; CHECK-LABEL: @test_fdiv_arcp( -; CHECK: [[TMP1:%[A-z0-9]*]] = fcmp arcp oeq float [[A:%[A-z0-9]*]], [[B:%[A-z0-9]*]] -; CHECK: br i1 [[TMP1]], label %[[BB3:[A-z0-9]*]], label %[[BB2:[A-z0-9]*]] -; CHECK: [[BB2]]: -; CHECK: [[TMP2:%[A-z0-9]*]] = fdiv arcp float 1.000000e+00, [[B]] -; CHECK: [[TMP3:%[A-z0-9]*]] = fmul arcp float [[TMP2]], [[A]] -; CHECK: [[BB3]]: -; CHECK: [[TMP4:%[A-z0-9]*]] = phi {{(arcp )?}}float [ 1.000000e+00, %[[BB1:[A-z0-9]*]] ], [ [[TMP3]], %[[BB2]] ] -; CHECK: call void @use.f32(float [[TMP4]]) +; CHECK: [[TMP1:%[A-z0-9]*]] = fdiv arcp float 1.000000e+00, [[B:%[A-z0-9]*]] +; CHECK: [[TMP2:%[A-z0-9]*]] = fmul arcp float [[TMP1]], [[A:%[A-z0-9]*]] +; CHECK: call void @use.f32(float [[TMP2]]) ; CHECK: ret void ; %1 = fdiv arcp float %a, %b @@ -59,17 +49,12 @@ define void @test_fdiv_arcp(float %a, float %b) { define void @test_fdiv_half(half %a, half %b) { ; CHECK-LABEL: @test_fdiv_half( -; CHECK: [[TMP1:%[A-z0-9]*]] = fcmp oeq half [[A:%[A-z0-9]*]], [[B:%[A-z0-9]*]] -; CHECK: br i1 [[TMP1]], label %[[BB3:[A-z0-9]*]], label %[[BB2:[A-z0-9]*]] -; CHECK: [[BB2]]: -; CHECK: [[TMP2:%[A-z0-9]*]] = fpext half [[B]] to float -; CHECK: [[TMP3:%[A-z0-9]*]] = fdiv float 1.000000e+00, [[TMP2]] -; CHECK: [[TMP4:%[A-z0-9]*]] = fpext half [[A]] to float -; CHECK: [[TMP5:%[A-z0-9]*]] = fmul float [[TMP3]], [[TMP4]] -; CHECK: [[TMP6:%[A-z0-9]*]] = fptrunc float [[TMP5]] to half -; CHECK: [[BB3]]: -; CHECK: [[TMP7:%[A-z0-9]*]] = phi half [ 0xH3C00, %[[BB1:[A-z0-9]*]] ], [ [[TMP6]], %[[BB2]] ] -; CHECK: call void @use.f16(half [[TMP7]]) +; CHECK: [[TMP1:%[A-z0-9]*]] = fpext half [[B:%[A-z0-9]*]] to float +; CHECK: [[TMP2:%[A-z0-9]*]] = fdiv float 1.000000e+00, [[TMP1]] +; CHECK: [[TMP3:%[A-z0-9]*]] = fpext half [[A:%[A-z0-9]*]] to float +; CHECK: [[TMP4:%[A-z0-9]*]] = fmul float [[TMP2]], [[TMP3]] +; CHECK: [[TMP5:%[A-z0-9]*]] = fptrunc float [[TMP4]] to half +; CHECK: call void @use.f16(half [[TMP5]]) ; CHECK: ret void ; %1 = fdiv half %a, %b