Refactor fdiv-pattern in PatternMatch

.
intel · Sep 25, 2023 · c1b1059 · c1b1059
1 parent 549f098
commit c1b1059
Show file tree

Hide file tree

Showing 7 changed files with 81 additions and 61 deletions.
diff --git a/IGC/VectorCompiler/lib/GenXCodeGen/GenXPatternMatch.cpp b/IGC/VectorCompiler/lib/GenXCodeGen/GenXPatternMatch.cpp
@@ -2022,58 +2022,19 @@ findOptimalInsertionPos(Instruction *I, Instruction *Ref, DominatorTree *DT,
   return Pos;
 }
 
-// For the specified constant, calculate its reciprocal if it's safe;
-// otherwise, return null.
-static Constant *getReciprocal(Constant *C, bool HasAllowReciprocal) {
-  IGC_ASSERT_MESSAGE(C->getType()->isFPOrFPVectorTy(),
-                     "Floating point value is expected!");
-
-  // TODO: remove this and use ConstantExpr::getFDiv.
-
-  // Reciprocal of undef can be undef.
-  if (isa<UndefValue>(C))
-    return C;
-
-  if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
-    // Compute the reciprocal of C.
-    const APFloat &Divisor = CFP->getValueAPF();
-    APFloat Rcp(Divisor.getSemantics(), 1U);
-    APFloat::opStatus Status =
-        Rcp.divide(Divisor, APFloat::rmNearestTiesToEven);
-    // Only fold it if it's safe.
-    if (Status == APFloat::opOK ||
-        (HasAllowReciprocal && Status == APFloat::opInexact))
-      return ConstantFP::get(C->getType()->getContext(), Rcp);
-    return nullptr;
-  }
-
-  auto *VTy = cast<IGCLLVM::FixedVectorType>(C->getType());
-  IntegerType *ITy = Type::getInt32Ty(VTy->getContext());
-
-  SmallVector<Constant *, 16> Result;
-  for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
-    Constant *Elt =
-        ConstantExpr::getExtractElement(C, ConstantInt::get(ITy, i));
-    Constant *Rcp = getReciprocal(Elt, HasAllowReciprocal);
-    // Skip if any of elements fails to be folded as reciprocal.
-    if (!Rcp)
-      return nullptr;
-    Result.push_back(Rcp);
-  }
-  return ConstantVector::get(Result);
-}
-
 // For the given value, calculate its reciprocal and performance constant
 // folding if allowed.
 static Value *getReciprocal(IRBuilder<> &IRB, Value *V,
                             bool HasAllowReciprocal = true) {
+  Module *M = IRB.GetInsertBlock()->getModule();
   if (Constant *C = dyn_cast<Constant>(V))
-    return getReciprocal(C, HasAllowReciprocal);
+    return ConstantFoldBinaryOpOperands(Instruction::FDiv,
+                                        ConstantFP::get(C->getType(), 1.0), C,
+                                        M->getDataLayout());
 
   if (!HasAllowReciprocal)
     return nullptr;
 
-  Module *M = IRB.GetInsertBlock()->getModule();
   Twine Name = V->getName() + ".inv";
   auto Func = GenXIntrinsic::getGenXDeclaration(M, GenXIntrinsic::genx_inv,
                                                 V->getType());
@@ -2112,7 +2073,8 @@ void GenXPatternMatch::visitFDiv(BinaryOperator &I) {
   Value *Op1 = I.getOperand(1);
   // Constant folding Op1 if it's safe.
   if (Constant *C1 = dyn_cast<Constant>(Op1)) {
-    Constant *Rcp = getReciprocal(C1, I.hasAllowReciprocal());
+    Constant *Rcp = ConstantFoldBinaryOpOperands(
+        Instruction::FDiv, ConstantFP::get(C1->getType(), 1.0), C1, *DL);
     if (!Rcp)
       return;
     IRB.setFastMathFlags(I.getFastMathFlags());

diff --git a/IGC/VectorCompiler/lib/GenXOpts/CMTrans/GenXCloneIndirectFunctions.cpp b/IGC/VectorCompiler/lib/GenXOpts/CMTrans/GenXCloneIndirectFunctions.cpp
@@ -146,10 +146,13 @@ bool GenXCloneIndirectFunctions::runOnModule(Module &M) {
 
   auto &&BECfg = getAnalysis<GenXBackendConfig>();
   IGC_ASSERT_MESSAGE(
-    llvm::none_of(M.functions(),
-      [&](const Function& F) { return F.hasAddressTaken() && BECfg.directCallsOnly(F.getName()); }),
-    "A function has address taken inside the module that contradicts "
-    "DirectCallsOnly option");
+      llvm::none_of(M.functions(),
+                    [&](const Function &F) {
+                      return F.hasAddressTaken() &&
+                             BECfg.directCallsOnly(F.getName());
+                    }),
+      "A function has address taken inside the module that contradicts "
+      "DirectCallsOnly option");
 
   // If direct calls are forced for all functions.
   if (BECfg.directCallsOnly()) {
@@ -161,7 +164,8 @@ bool GenXCloneIndirectFunctions::runOnModule(Module &M) {
   bool Modified = false;
 
   for (auto [F, IsExternal] : IndirectFuncs) {
-    if (BECfg.directCallsOnly(F->getName())) continue;
+    if (BECfg.directCallsOnly(F->getName()))
+      continue;
 
     auto CheckDirectCall = [Func = F](User *U) {
       auto *CI = dyn_cast<CallInst>(U);

diff --git a/IGC/VectorCompiler/lib/GenXOpts/CMTrans/GenXLinkageCorruptor.cpp b/IGC/VectorCompiler/lib/GenXOpts/CMTrans/GenXLinkageCorruptor.cpp
@@ -1,6 +1,6 @@
 /*========================== begin_copyright_notice ============================
 
-Copyright (C) 2021 Intel Corporation
+Copyright (C) 2021-2023 Intel Corporation
 
 SPDX-License-Identifier: MIT
 
@@ -68,6 +68,8 @@ bool GenXLinkageCorruptor::runOnModule(Module &M) {
 
     // Indirect functions are always stack calls.
     if (F.hasAddressTaken()) {
+      LLVM_DEBUG(dbgs() << "Adding stack call to indirect function: "
+                        << F.getName() << "\n");
       F.addFnAttr(genx::FunctionMD::CMStackCall);
       Changed = true;
       IGC_ASSERT(vc::isIndirect(F));
@@ -80,10 +82,15 @@ bool GenXLinkageCorruptor::runOnModule(Module &M) {
       Changed = true;
     }
 
-    // Do not change stack calls linkage as we may have both types of stack
-    // calls.
-    if (vc::requiresStackCall(&F) && SaveStackCallLinkage)
-      continue;
+    // Remove alwaysinline attribute and keep unchanged stack calls linkage as
+    // we may have both types of stack calls.
+    if (vc::requiresStackCall(&F)) {
+      F.removeFnAttr(Attribute::AlwaysInline);
+      Changed = true;
+
+      if (SaveStackCallLinkage)
+        continue;
+    }
 
     F.setLinkage(GlobalValue::InternalLinkage);
     Changed = true;

diff --git a/IGC/VectorCompiler/test/CloneIndirectFunctions/basic.ll b/IGC/VectorCompiler/test/CloneIndirectFunctions/basic.ll
@@ -1,6 +1,6 @@
 ;=========================== begin_copyright_notice ============================
 ;
-; Copyright (C) 2022 Intel Corporation
+; Copyright (C) 2022-2023 Intel Corporation
 ;
 ; SPDX-License-Identifier: MIT
 ;
@@ -31,6 +31,7 @@ define dllexport void @kernel() {
 
 ; COM: direct with internal linkage type
 ; CHECK: define internal spir_func void @foo_direct
+; CHECK-SAME: ) {
 ; CHECK-NEXT: %vec.ref.ld = load <8 x i32>, <8 x i32>* %vec.ref
 ; CHECK-NEXT: ret void
 

diff --git a/IGC/VectorCompiler/test/LinkageCorruptor/func_with_taken_addr.ll b/IGC/VectorCompiler/test/LinkageCorruptor/func_with_taken_addr.ll
@@ -34,7 +34,7 @@ define internal spir_func float @bar() {
 }
 
 ; COM: function with taken address shouldn't change
-define internal spir_func void @indirect() {
+define internal spir_func void @indirect() #0 {
 ; CHECK: define internal spir_func void @indirect() #[[ATTR:[0-9]]] {
   %indirect.get.ptr = call i64 @get_printf_ptr()
   store i64 %indirect.get.ptr, i64* @__imparg_llvm.vc.internal.print.buffer, align 8
@@ -50,6 +50,7 @@ define dllexport spir_kernel void @foo_kernel() {
   ret void
 }
 
+attributes #0 = { alwaysinline }
 ; CHECK: attributes #[[ATTR]] = { "CMStackCall" }
 
 !genx.kernels = !{!0}

diff --git a/IGC/VectorCompiler/test/LinkageCorruptor/stackcall_conv_new.ll b/IGC/VectorCompiler/test/LinkageCorruptor/stackcall_conv_new.ll
@@ -1,6 +1,6 @@
 ;=========================== begin_copyright_notice ============================
 ;
-; Copyright (C) 2021 Intel Corporation
+; Copyright (C) 2021-2023 Intel Corporation
 ;
 ; SPDX-License-Identifier: MIT
 ;
@@ -13,23 +13,25 @@
 
 target datalayout = "e-p:64:64-i64:64-n8:16:32"
 
-; Function Attrs: noinline nounwind
-define spir_func void @foo(<8 x i32>* %vec.ref) {
+
+; CHECK: define spir_func void @foo(<8 x i32>* %vec.ref) [[ATTR:#[0-9]+]] {
+define spir_func void @foo(<8 x i32>* %vec.ref) #0 {
   %vec.ref.ld = load <8 x i32>, <8 x i32>* %vec.ref
   ret void
 }
 
-; Function Attrs: noinline nounwind
 define dllexport void @kernel() {
   %kernel.vec.ref = alloca <8 x i32>, align 32
 
   call spir_func void @foo(<8 x i32>* nonnull %kernel.vec.ref)
 ; CHECK: call spir_func void @foo
 ; CHECK-SAME: <8 x i32>* nonnull
-; CHECK: CMStackCall
 
   ret void
 }
 
+; CHECK: [[ATTR]] = { "CMStackCall" }
+attributes #0 = { alwaysinline }
+
 !genx.kernels = !{!0}
 !0 = !{void ()* @kernel}
diff --git a/IGC/VectorCompiler/test/PatternMatch/fdiv-patt.ll b/IGC/VectorCompiler/test/PatternMatch/fdiv-patt.ll
@@ -0,0 +1,43 @@
+;=========================== begin_copyright_notice ============================
+;
+; Copyright (C) 2023 Intel Corporation
+;
+; SPDX-License-Identifier: MIT
+;
+;============================ end_copyright_notice =============================
+
+; RUN: opt %use_old_pass_manager% -GenXPatternMatch -march=genx64 -mcpu=Gen9 -mtriple=spir64-unknown-unknown -S < %s | FileCheck %s
+
+; Test, based on laplace cm-test
+; CHECK-LABEL: @laplace_genx
+define spir_kernel void @laplace_genx(<4 x float> %0, <144 x float> %1, <24 x float> %2) {
+.preheader764:
+; Reduced all uitofp and fdiv
+; CHECK-NOT: uitofp
+; CHECK-NOT: fdiv
+; CHECK: fmul <144 x float> {{.*}}, <
+; CHECK-COUNT-144: float 0x3F70101020000000,
+; CHECK: fmul <4 x float> {{.*}}, <
+; CHECK-COUNT-4: float 0x3F70101020000000,
+; CHECK: fmul <24 x float> {{.*}}, <
+; CHECK-COUNT-24: float 0x3F70101020000000,
+  %3 = fdiv <144 x float> %1, <float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02>
+  %4 = fdiv <4 x float> %0, <float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02>
+  %5 = fdiv <4 x float> %0, zeroinitializer
+  %6 = fdiv <24 x float> %2, <float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02>
+  %7 = fdiv <24 x float> %2, <float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02>
+  %8 = fdiv <24 x float> %2, <float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02, float 2.550000e+02>
+  %.regioncollapsed1042 = tail call <1 x float> @llvm.genx.rdregionf.v1f32.v24f32.i16(<24 x float> %7, i32 0, i32 0, i32 0, i16 0, i32 0)
+  %9 = tail call <1 x float> @llvm.genx.rdregionf.v1f32.v24f32.i16(<24 x float> %8, i32 0, i32 0, i32 0, i16 0, i32 0)
+  %.regioncollapsed1039 = tail call <1 x float> @llvm.genx.rdregionf.v1f32.v144f32.i16(<144 x float> %3, i32 0, i32 0, i32 0, i16 0, i32 0)
+  %.regioncollapsed1033 = tail call <1 x float> @llvm.genx.rdregionf.v1f32.v24f32.i16(<24 x float> %6, i32 0, i32 0, i32 0, i16 0, i32 0)
+  %10 = tail call <4 x float> @llvm.genx.wrregionf.v4f32.v1f32.i16.i1(<4 x float> %5, <1 x float> zeroinitializer, i32 0, i32 0, i32 0, i16 0, i32 0, i1 false)
+  %11 = tail call <4 x float> @llvm.genx.wrregionf.v4f32.v1f32.i16.i1(<4 x float> %4, <1 x float> zeroinitializer, i32 0, i32 0, i32 0, i16 0, i32 0, i1 false)
+  ret void
+}
+
+declare <4 x float> @llvm.genx.wrregionf.v4f32.v1f32.i16.i1(<4 x float>, <1 x float>, i32, i32, i32, i16, i32, i1)
+
+declare <1 x float> @llvm.genx.rdregionf.v1f32.v24f32.i16(<24 x float>, i32, i32, i32, i16, i32)
+
+declare <1 x float> @llvm.genx.rdregionf.v1f32.v144f32.i16(<144 x float>, i32, i32, i32, i16, i32)