Skip to content

Commit

Permalink
Avoid reciprocal round-trip error in FDIV emulation
Browse files Browse the repository at this point in the history
If x == y then x/y == 1 (assuming x and y are normal values, i.e.
neither is +/-0, +/-NaN, +/-Inf, or subnormal), skip FDIV expansion
computation to avoid reciprocal round-trip error.
  • Loading branch information
michalpaszkowski authored and igcbot committed Sep 25, 2023
1 parent 0d6a129 commit 549f098
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 22 deletions.
57 changes: 47 additions & 10 deletions IGC/Compiler/LegalizationPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2710,8 +2710,10 @@ static bool needsNoScaling(Value* Val)
bool IGC::expandFDIVInstructions(llvm::Function& F)
{
bool Changed = false;
for (auto& BB : F.getBasicBlockList()) {
for (auto Iter = BB.begin(); Iter != BB.end();) {
for (auto BBIter = F.begin(); BBIter != F.end();) {
BasicBlock *BB = &*BBIter++;

for (auto Iter = BB->begin(); Iter != BB->end();) {
Instruction* Inst = &*Iter++;
if (!isCandidateFDiv(Inst))
continue;
Expand Down Expand Up @@ -2747,28 +2749,63 @@ bool IGC::expandFDIVInstructions(llvm::Function& F)
V = Builder.CreateFMul(Y, X);
}
else {
BasicBlock *PreFDIVExpBB = BB;
BasicBlock *PostFDIVExpBB = BB->splitBasicBlock(Inst->getNextNode());
BasicBlock *FDIVExpBB = BB->splitBasicBlock(Inst);

Builder.SetInsertPoint(
FDIVExpBB->getPrevNode()->getTerminator());

// If x == y then x/y == 1 (assuming x and y are normal values,
// i.e. neither is +/-0, +/-NaN, +/-Inf, or subnormal), skip
// FDIV expansion basic block to avoid reciprocal
// round-trip error, break to post-FDIV-expansion basic block.
Value *CmpXY = Builder.CreateFCmp(CmpInst::FCMP_OEQ, X, Y);
Value *YAsInt32 = Builder.CreateBitCast(Y, Builder.getInt32Ty());
Value *YExp = Builder.CreateAnd(YAsInt32,
Builder.getInt32(0x7f800000));
Value *YMantissa = Builder.CreateAnd(YAsInt32,
Builder.getInt32(0x007fffff));
Value *CmpYExpZero =
Builder.CreateICmpNE(YExp, Builder.getInt32(0));
Value *CmpYMantissaZero =
Builder.CreateICmpNE(YMantissa, Builder.getInt32(0));
Value *CmpXYandYZero =
Builder.CreateAnd({CmpXY, CmpYExpZero, CmpYMantissaZero});
Builder.CreateCondBr(CmpXYandYZero, PostFDIVExpBB, FDIVExpBB)
->getNextNode()
->eraseFromParent();

// Update iterators after creating BBs.
BBIter = PostFDIVExpBB->getIterator();
BB = FDIVExpBB;
Iter = ++FDIVExpBB->begin();
Builder.SetInsertPoint(Inst);

float S32 = uint64_t(1) << 32;
ConstantFP* C0 = ConstantFP::get(Ctx, APFloat(S32));
ConstantFP* C1 = ConstantFP::get(Ctx, APFloat(1.0f));
ConstantFP* C2 = ConstantFP::get(Ctx, APFloat(1.0f / S32));

Value* Exp = Builder.CreateAnd(
Builder.CreateBitCast(Y, Builder.getInt32Ty()),
Builder.getInt32(0x7f800000));

// Check if B's exponent is 0, scale up.
Value* P1 = Builder.CreateICmpEQ(Exp, Builder.getInt32(0));
// Check if y's exponent is 0, scale up.
Value* P1 = Builder.CreateICmpEQ(YExp, Builder.getInt32(0));
Value* Scale = Builder.CreateSelect(P1, C0, C1);

// Check if B's exponent >= 200, scale down.
Value* P2 = Builder.CreateICmpUGE(Exp, Builder.getInt32(200 << 23));
// Check if y's exponent >= 200, scale down.
Value* P2 = Builder.CreateICmpUGE(YExp, Builder.getInt32(200 << 23));
Scale = Builder.CreateSelect(P2, C2, Scale);

// Compute rcp(y * S) * x * S
V = Builder.CreateFMul(Y, Scale);
V = Builder.CreateFDiv(C1, V);
V = Builder.CreateFMul(V, X);
V = Builder.CreateFMul(V, Scale);

Builder.SetInsertPoint(&*PostFDIVExpBB->begin());
PHINode *Phi = Builder.CreatePHI(V->getType(), 2);
Phi->addIncoming(ConstantFP::get(Ctx, APFloat(1.0f)), PreFDIVExpBB);
Phi->addIncoming(V, FDIVExpBB);
V = Phi;
}

Inst->replaceAllUsesWith(V);
Expand Down
2 changes: 1 addition & 1 deletion IGC/Compiler/Optimizer/SynchronizationObjectCoalescing.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ enum InstructionMask : uint32_t
EndOfThreadOperation = (1 << 9),
};
constexpr InstructionMask AllNoAtomicMask =
InstructionMask{ ((EndOfThreadOperation << 1) - 1) & ~InstructionMask::AtomicOperation };
InstructionMask{ ((1 << 9) - 1) & ~InstructionMask::AtomicOperation };

inline constexpr InstructionMask operator|(InstructionMask a, InstructionMask b)
{
Expand Down
32 changes: 21 additions & 11 deletions IGC/Compiler/tests/GenFDIVEmulation/basic.ll
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,27 @@

define void @test_fdiv(float %a, float %b) {
; CHECK-LABEL: @test_fdiv(
; CHECK: [[TMP1:%[A-z0-9]*]] = bitcast float [[B:%[A-z0-9]*]] to i32
; CHECK: [[TMP2:%[A-z0-9]*]] = and i32 [[TMP1]], 2139095040
; CHECK: [[TMP3:%[A-z0-9]*]] = icmp eq i32 [[TMP2]], 0
; CHECK: [[TMP4:%[A-z0-9]*]] = select i1 [[TMP3]], float 0x41F0000000000000, float 1.000000e+00
; CHECK: [[TMP5:%[A-z0-9]*]] = icmp uge i32 [[TMP2]], 1677721600
; CHECK: [[TMP6:%[A-z0-9]*]] = select i1 [[TMP5]], float 0x3DF0000000000000, float [[TMP4]]
; CHECK: [[TMP7:%[A-z0-9]*]] = fmul float [[B]], [[TMP6]]
; CHECK: [[TMP8:%[A-z0-9]*]] = fdiv float 1.000000e+00, [[TMP7]]
; CHECK: [[TMP9:%[A-z0-9]*]] = fmul float [[TMP8]], [[A:%[A-z0-9]*]]
; CHECK: [[TMP10:%[A-z0-9]*]] = fmul float [[TMP9]], [[TMP6]]
; CHECK: call void @use.f32(float [[TMP10]])
; CHECK: [[TMP1:%[A-z0-9]*]] = fcmp oeq float [[A:%[A-z0-9]*]], [[B:%[A-z0-9]*]]
; CHECK: [[TMP2:%[A-z0-9]*]] = bitcast float [[B]] to i32
; CHECK: [[TMP3:%[A-z0-9]*]] = and i32 [[TMP2]], 2139095040
; CHECK: [[TMP4:%[A-z0-9]*]] = and i32 [[TMP2]], 8388607
; CHECK: [[TMP5:%[A-z0-9]*]] = icmp ne i32 [[TMP3]], 0
; CHECK: [[TMP6:%[A-z0-9]*]] = icmp ne i32 [[TMP4]], 0
; CHECK: [[TMP7:%[A-z0-9]*]] = and i1 [[TMP1]], [[TMP5]], !dbg !11
; CHECK: [[TMP8:%[A-z0-9]*]] = and i1 [[TMP7]], [[TMP6]], !dbg !11
; CHECK: br i1 [[TMP8]], label %[[BB3:[A-z0-9]*]], label %[[BB2:[A-z0-9]*]]
; CHECK: [[BB2]]:
; CHECK: [[TMP9:%[A-z0-9]*]] = icmp eq i32 [[TMP3]], 0
; CHECK: [[TMP10:%[A-z0-9]*]] = select i1 [[TMP9]], float 0x41F0000000000000, float 1.000000e+00
; CHECK: [[TMP11:%[A-z0-9]*]] = icmp uge i32 [[TMP3]], 1677721600
; CHECK: [[TMP12:%[A-z0-9]*]] = select i1 [[TMP11]], float 0x3DF0000000000000, float [[TMP10]]
; CHECK: [[TMP13:%[A-z0-9]*]] = fmul float [[B]], [[TMP12]]
; CHECK: [[TMP14:%[A-z0-9]*]] = fdiv float 1.000000e+00, [[TMP13]]
; CHECK: [[TMP15:%[A-z0-9]*]] = fmul float [[TMP14]], [[A]]
; CHECK: [[TMP16:%[A-z0-9]*]] = fmul float [[TMP15]], [[TMP12]]
; CHECK: [[BB3]]:
; CHECK: [[TMP14:%[A-z0-9]*]] = phi float [ 1.000000e+00, %[[BB1:[A-z0-9]*]] ], [ [[TMP16]], %[[BB2]] ]
; CHECK: call void @use.f32(float [[TMP14]])
; CHECK: ret void
;
%1 = fdiv float %a, %b
Expand Down

0 comments on commit 549f098

Please sign in to comment.