Skip to content

Commit

Permalink
Refactor fdiv-pattern in PatternMatch
Browse files Browse the repository at this point in the history
.
  • Loading branch information
igorban-intel authored and igcbot committed Sep 25, 2023
1 parent 549f098 commit c1b1059
Show file tree
Hide file tree
Showing 7 changed files with 81 additions and 61 deletions.
50 changes: 6 additions & 44 deletions IGC/VectorCompiler/lib/GenXCodeGen/GenXPatternMatch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2022,58 +2022,19 @@ findOptimalInsertionPos(Instruction *I, Instruction *Ref, DominatorTree *DT,
return Pos;
}

// For the specified constant, calculate its reciprocal if it's safe;
// otherwise, return null.
static Constant *getReciprocal(Constant *C, bool HasAllowReciprocal) {
IGC_ASSERT_MESSAGE(C->getType()->isFPOrFPVectorTy(),
"Floating point value is expected!");

// TODO: remove this and use ConstantExpr::getFDiv.

// Reciprocal of undef can be undef.
if (isa<UndefValue>(C))
return C;

if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
// Compute the reciprocal of C.
const APFloat &Divisor = CFP->getValueAPF();
APFloat Rcp(Divisor.getSemantics(), 1U);
APFloat::opStatus Status =
Rcp.divide(Divisor, APFloat::rmNearestTiesToEven);
// Only fold it if it's safe.
if (Status == APFloat::opOK ||
(HasAllowReciprocal && Status == APFloat::opInexact))
return ConstantFP::get(C->getType()->getContext(), Rcp);
return nullptr;
}

auto *VTy = cast<IGCLLVM::FixedVectorType>(C->getType());
IntegerType *ITy = Type::getInt32Ty(VTy->getContext());

SmallVector<Constant *, 16> Result;
for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
Constant *Elt =
ConstantExpr::getExtractElement(C, ConstantInt::get(ITy, i));
Constant *Rcp = getReciprocal(Elt, HasAllowReciprocal);
// Skip if any of elements fails to be folded as reciprocal.
if (!Rcp)
return nullptr;
Result.push_back(Rcp);
}
return ConstantVector::get(Result);
}

// For the given value, calculate its reciprocal and performance constant
// folding if allowed.
static Value *getReciprocal(IRBuilder<> &IRB, Value *V,
bool HasAllowReciprocal = true) {
Module *M = IRB.GetInsertBlock()->getModule();
if (Constant *C = dyn_cast<Constant>(V))
return getReciprocal(C, HasAllowReciprocal);
return ConstantFoldBinaryOpOperands(Instruction::FDiv,
ConstantFP::get(C->getType(), 1.0), C,
M->getDataLayout());

if (!HasAllowReciprocal)
return nullptr;

Module *M = IRB.GetInsertBlock()->getModule();
Twine Name = V->getName() + ".inv";
auto Func = GenXIntrinsic::getGenXDeclaration(M, GenXIntrinsic::genx_inv,
V->getType());
Expand Down Expand Up @@ -2112,7 +2073,8 @@ void GenXPatternMatch::visitFDiv(BinaryOperator &I) {
Value *Op1 = I.getOperand(1);
// Constant folding Op1 if it's safe.
if (Constant *C1 = dyn_cast<Constant>(Op1)) {
Constant *Rcp = getReciprocal(C1, I.hasAllowReciprocal());
Constant *Rcp = ConstantFoldBinaryOpOperands(
Instruction::FDiv, ConstantFP::get(C1->getType(), 1.0), C1, *DL);
if (!Rcp)
return;
IRB.setFastMathFlags(I.getFastMathFlags());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,10 +146,13 @@ bool GenXCloneIndirectFunctions::runOnModule(Module &M) {

auto &&BECfg = getAnalysis<GenXBackendConfig>();
IGC_ASSERT_MESSAGE(
llvm::none_of(M.functions(),
[&](const Function& F) { return F.hasAddressTaken() && BECfg.directCallsOnly(F.getName()); }),
"A function has address taken inside the module that contradicts "
"DirectCallsOnly option");
llvm::none_of(M.functions(),
[&](const Function &F) {
return F.hasAddressTaken() &&
BECfg.directCallsOnly(F.getName());
}),
"A function has address taken inside the module that contradicts "
"DirectCallsOnly option");

// If direct calls are forced for all functions.
if (BECfg.directCallsOnly()) {
Expand All @@ -161,7 +164,8 @@ bool GenXCloneIndirectFunctions::runOnModule(Module &M) {
bool Modified = false;

for (auto [F, IsExternal] : IndirectFuncs) {
if (BECfg.directCallsOnly(F->getName())) continue;
if (BECfg.directCallsOnly(F->getName()))
continue;

auto CheckDirectCall = [Func = F](User *U) {
auto *CI = dyn_cast<CallInst>(U);
Expand Down
17 changes: 12 additions & 5 deletions IGC/VectorCompiler/lib/GenXOpts/CMTrans/GenXLinkageCorruptor.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*========================== begin_copyright_notice ============================
Copyright (C) 2021 Intel Corporation
Copyright (C) 2021-2023 Intel Corporation
SPDX-License-Identifier: MIT
Expand Down Expand Up @@ -68,6 +68,8 @@ bool GenXLinkageCorruptor::runOnModule(Module &M) {

// Indirect functions are always stack calls.
if (F.hasAddressTaken()) {
LLVM_DEBUG(dbgs() << "Adding stack call to indirect function: "
<< F.getName() << "\n");
F.addFnAttr(genx::FunctionMD::CMStackCall);
Changed = true;
IGC_ASSERT(vc::isIndirect(F));
Expand All @@ -80,10 +82,15 @@ bool GenXLinkageCorruptor::runOnModule(Module &M) {
Changed = true;
}

// Do not change stack calls linkage as we may have both types of stack
// calls.
if (vc::requiresStackCall(&F) && SaveStackCallLinkage)
continue;
// Remove alwaysinline attribute and keep unchanged stack calls linkage as
// we may have both types of stack calls.
if (vc::requiresStackCall(&F)) {
F.removeFnAttr(Attribute::AlwaysInline);
Changed = true;

if (SaveStackCallLinkage)
continue;
}

F.setLinkage(GlobalValue::InternalLinkage);
Changed = true;
Expand Down
3 changes: 2 additions & 1 deletion IGC/VectorCompiler/test/CloneIndirectFunctions/basic.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
;=========================== begin_copyright_notice ============================
;
; Copyright (C) 2022 Intel Corporation
; Copyright (C) 2022-2023 Intel Corporation
;
; SPDX-License-Identifier: MIT
;
Expand Down Expand Up @@ -31,6 +31,7 @@ define dllexport void @kernel() {

; COM: direct with internal linkage type
; CHECK: define internal spir_func void @foo_direct
; CHECK-SAME: ) {
; CHECK-NEXT: %vec.ref.ld = load <8 x i32>, <8 x i32>* %vec.ref
; CHECK-NEXT: ret void

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ define internal spir_func float @bar() {
}

; COM: function with taken address shouldn't change
define internal spir_func void @indirect() {
define internal spir_func void @indirect() #0 {
; CHECK: define internal spir_func void @indirect() #[[ATTR:[0-9]]] {
%indirect.get.ptr = call i64 @get_printf_ptr()
store i64 %indirect.get.ptr, i64* @__imparg_llvm.vc.internal.print.buffer, align 8
Expand All @@ -50,6 +50,7 @@ define dllexport spir_kernel void @foo_kernel() {
ret void
}

attributes #0 = { alwaysinline }
; CHECK: attributes #[[ATTR]] = { "CMStackCall" }

!genx.kernels = !{!0}
Expand Down
12 changes: 7 additions & 5 deletions IGC/VectorCompiler/test/LinkageCorruptor/stackcall_conv_new.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
;=========================== begin_copyright_notice ============================
;
; Copyright (C) 2021 Intel Corporation
; Copyright (C) 2021-2023 Intel Corporation
;
; SPDX-License-Identifier: MIT
;
Expand All @@ -13,23 +13,25 @@

target datalayout = "e-p:64:64-i64:64-n8:16:32"

; Function Attrs: noinline nounwind
define spir_func void @foo(<8 x i32>* %vec.ref) {

; CHECK: define spir_func void @foo(<8 x i32>* %vec.ref) [[ATTR:#[0-9]+]] {
define spir_func void @foo(<8 x i32>* %vec.ref) #0 {
%vec.ref.ld = load <8 x i32>, <8 x i32>* %vec.ref
ret void
}

; Function Attrs: noinline nounwind
define dllexport void @kernel() {
%kernel.vec.ref = alloca <8 x i32>, align 32

call spir_func void @foo(<8 x i32>* nonnull %kernel.vec.ref)
; CHECK: call spir_func void @foo
; CHECK-SAME: <8 x i32>* nonnull
; CHECK: CMStackCall

ret void
}

; CHECK: [[ATTR]] = { "CMStackCall" }
attributes #0 = { alwaysinline }

!genx.kernels = !{!0}
!0 = !{void ()* @kernel}
43 changes: 43 additions & 0 deletions IGC/VectorCompiler/test/PatternMatch/fdiv-patt.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
;=========================== begin_copyright_notice ============================
;
; Copyright (C) 2023 Intel Corporation
;
; SPDX-License-Identifier: MIT
;
;============================ end_copyright_notice =============================

; RUN: opt %use_old_pass_manager% -GenXPatternMatch -march=genx64 -mcpu=Gen9 -mtriple=spir64-unknown-unknown -S < %s | FileCheck %s

; Test, based on laplace cm-test
; CHECK-LABEL: @laplace_genx
define spir_kernel void @laplace_genx(<4 x float> %0, <144 x float> %1, <24 x float> %2) {
.preheader764:
; Reduced all uitofp and fdiv
; CHECK-NOT: uitofp
; CHECK-NOT: fdiv
; CHECK: fmul <144 x float> {{.*}}, <
; CHECK-COUNT-144: float 0x3F70101020000000,
; CHECK: fmul <4 x float> {{.*}}, <
; CHECK-COUNT-4: float 0x3F70101020000000,
; CHECK: fmul <24 x float> {{.*}}, <
; CHECK-COUNT-24: float 0x3F70101020000000,
%3 = fdiv <144 x float> %1, <float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02>
%4 = fdiv <4 x float> %0, <float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02>
%5 = fdiv <4 x float> %0, zeroinitializer
%6 = fdiv <24 x float> %2, <float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02>
%7 = fdiv <24 x float> %2, <float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02>
%8 = fdiv <24 x float> %2, <float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02>
%.regioncollapsed1042 = tail call <1 x float> @llvm.genx.rdregionf.v1f32.v24f32.i16(<24 x float> %7, i32 0, i32 0, i32 0, i16 0, i32 0)
%9 = tail call <1 x float> @llvm.genx.rdregionf.v1f32.v24f32.i16(<24 x float> %8, i32 0, i32 0, i32 0, i16 0, i32 0)
%.regioncollapsed1039 = tail call <1 x float> @llvm.genx.rdregionf.v1f32.v144f32.i16(<144 x float> %3, i32 0, i32 0, i32 0, i16 0, i32 0)
%.regioncollapsed1033 = tail call <1 x float> @llvm.genx.rdregionf.v1f32.v24f32.i16(<24 x float> %6, i32 0, i32 0, i32 0, i16 0, i32 0)
%10 = tail call <4 x float> @llvm.genx.wrregionf.v4f32.v1f32.i16.i1(<4 x float> %5, <1 x float> zeroinitializer, i32 0, i32 0, i32 0, i16 0, i32 0, i1 false)
%11 = tail call <4 x float> @llvm.genx.wrregionf.v4f32.v1f32.i16.i1(<4 x float> %4, <1 x float> zeroinitializer, i32 0, i32 0, i32 0, i16 0, i32 0, i1 false)
ret void
}

declare <4 x float> @llvm.genx.wrregionf.v4f32.v1f32.i16.i1(<4 x float>, <1 x float>, i32, i32, i32, i16, i32, i1)

declare <1 x float> @llvm.genx.rdregionf.v1f32.v24f32.i16(<24 x float>, i32, i32, i32, i16, i32)

declare <1 x float> @llvm.genx.rdregionf.v1f32.v144f32.i16(<144 x float>, i32, i32, i32, i16, i32)

0 comments on commit c1b1059

Please sign in to comment.