From c1b10596723486eb5ecc740730081f765997935e Mon Sep 17 00:00:00 2001 From: "Gorban, Igor" Date: Mon, 25 Sep 2023 21:38:13 +0000 Subject: [PATCH] Refactor fdiv-pattern in PatternMatch . --- .../lib/GenXCodeGen/GenXPatternMatch.cpp | 50 +++---------------- .../CMTrans/GenXCloneIndirectFunctions.cpp | 14 ++++-- .../GenXOpts/CMTrans/GenXLinkageCorruptor.cpp | 17 +++++-- .../test/CloneIndirectFunctions/basic.ll | 3 +- .../LinkageCorruptor/func_with_taken_addr.ll | 3 +- .../LinkageCorruptor/stackcall_conv_new.ll | 12 +++-- .../test/PatternMatch/fdiv-patt.ll | 43 ++++++++++++++++ 7 files changed, 81 insertions(+), 61 deletions(-) create mode 100644 IGC/VectorCompiler/test/PatternMatch/fdiv-patt.ll diff --git a/IGC/VectorCompiler/lib/GenXCodeGen/GenXPatternMatch.cpp b/IGC/VectorCompiler/lib/GenXCodeGen/GenXPatternMatch.cpp index 45466d683dce..1d15e1f5b78d 100644 --- a/IGC/VectorCompiler/lib/GenXCodeGen/GenXPatternMatch.cpp +++ b/IGC/VectorCompiler/lib/GenXCodeGen/GenXPatternMatch.cpp @@ -2022,58 +2022,19 @@ findOptimalInsertionPos(Instruction *I, Instruction *Ref, DominatorTree *DT, return Pos; } -// For the specified constant, calculate its reciprocal if it's safe; -// otherwise, return null. -static Constant *getReciprocal(Constant *C, bool HasAllowReciprocal) { - IGC_ASSERT_MESSAGE(C->getType()->isFPOrFPVectorTy(), - "Floating point value is expected!"); - - // TODO: remove this and use ConstantExpr::getFDiv. - - // Reciprocal of undef can be undef. - if (isa(C)) - return C; - - if (ConstantFP *CFP = dyn_cast(C)) { - // Compute the reciprocal of C. - const APFloat &Divisor = CFP->getValueAPF(); - APFloat Rcp(Divisor.getSemantics(), 1U); - APFloat::opStatus Status = - Rcp.divide(Divisor, APFloat::rmNearestTiesToEven); - // Only fold it if it's safe. - if (Status == APFloat::opOK || - (HasAllowReciprocal && Status == APFloat::opInexact)) - return ConstantFP::get(C->getType()->getContext(), Rcp); - return nullptr; - } - - auto *VTy = cast(C->getType()); - IntegerType *ITy = Type::getInt32Ty(VTy->getContext()); - - SmallVector Result; - for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { - Constant *Elt = - ConstantExpr::getExtractElement(C, ConstantInt::get(ITy, i)); - Constant *Rcp = getReciprocal(Elt, HasAllowReciprocal); - // Skip if any of elements fails to be folded as reciprocal. - if (!Rcp) - return nullptr; - Result.push_back(Rcp); - } - return ConstantVector::get(Result); -} - // For the given value, calculate its reciprocal and performance constant // folding if allowed. static Value *getReciprocal(IRBuilder<> &IRB, Value *V, bool HasAllowReciprocal = true) { + Module *M = IRB.GetInsertBlock()->getModule(); if (Constant *C = dyn_cast(V)) - return getReciprocal(C, HasAllowReciprocal); + return ConstantFoldBinaryOpOperands(Instruction::FDiv, + ConstantFP::get(C->getType(), 1.0), C, + M->getDataLayout()); if (!HasAllowReciprocal) return nullptr; - Module *M = IRB.GetInsertBlock()->getModule(); Twine Name = V->getName() + ".inv"; auto Func = GenXIntrinsic::getGenXDeclaration(M, GenXIntrinsic::genx_inv, V->getType()); @@ -2112,7 +2073,8 @@ void GenXPatternMatch::visitFDiv(BinaryOperator &I) { Value *Op1 = I.getOperand(1); // Constant folding Op1 if it's safe. if (Constant *C1 = dyn_cast(Op1)) { - Constant *Rcp = getReciprocal(C1, I.hasAllowReciprocal()); + Constant *Rcp = ConstantFoldBinaryOpOperands( + Instruction::FDiv, ConstantFP::get(C1->getType(), 1.0), C1, *DL); if (!Rcp) return; IRB.setFastMathFlags(I.getFastMathFlags()); diff --git a/IGC/VectorCompiler/lib/GenXOpts/CMTrans/GenXCloneIndirectFunctions.cpp b/IGC/VectorCompiler/lib/GenXOpts/CMTrans/GenXCloneIndirectFunctions.cpp index 6efc26c126e7..0e36425f74fd 100644 --- a/IGC/VectorCompiler/lib/GenXOpts/CMTrans/GenXCloneIndirectFunctions.cpp +++ b/IGC/VectorCompiler/lib/GenXOpts/CMTrans/GenXCloneIndirectFunctions.cpp @@ -146,10 +146,13 @@ bool GenXCloneIndirectFunctions::runOnModule(Module &M) { auto &&BECfg = getAnalysis(); IGC_ASSERT_MESSAGE( - llvm::none_of(M.functions(), - [&](const Function& F) { return F.hasAddressTaken() && BECfg.directCallsOnly(F.getName()); }), - "A function has address taken inside the module that contradicts " - "DirectCallsOnly option"); + llvm::none_of(M.functions(), + [&](const Function &F) { + return F.hasAddressTaken() && + BECfg.directCallsOnly(F.getName()); + }), + "A function has address taken inside the module that contradicts " + "DirectCallsOnly option"); // If direct calls are forced for all functions. if (BECfg.directCallsOnly()) { @@ -161,7 +164,8 @@ bool GenXCloneIndirectFunctions::runOnModule(Module &M) { bool Modified = false; for (auto [F, IsExternal] : IndirectFuncs) { - if (BECfg.directCallsOnly(F->getName())) continue; + if (BECfg.directCallsOnly(F->getName())) + continue; auto CheckDirectCall = [Func = F](User *U) { auto *CI = dyn_cast(U); diff --git a/IGC/VectorCompiler/lib/GenXOpts/CMTrans/GenXLinkageCorruptor.cpp b/IGC/VectorCompiler/lib/GenXOpts/CMTrans/GenXLinkageCorruptor.cpp index d5549e653337..0a3a539ca08a 100644 --- a/IGC/VectorCompiler/lib/GenXOpts/CMTrans/GenXLinkageCorruptor.cpp +++ b/IGC/VectorCompiler/lib/GenXOpts/CMTrans/GenXLinkageCorruptor.cpp @@ -1,6 +1,6 @@ /*========================== begin_copyright_notice ============================ -Copyright (C) 2021 Intel Corporation +Copyright (C) 2021-2023 Intel Corporation SPDX-License-Identifier: MIT @@ -68,6 +68,8 @@ bool GenXLinkageCorruptor::runOnModule(Module &M) { // Indirect functions are always stack calls. if (F.hasAddressTaken()) { + LLVM_DEBUG(dbgs() << "Adding stack call to indirect function: " + << F.getName() << "\n"); F.addFnAttr(genx::FunctionMD::CMStackCall); Changed = true; IGC_ASSERT(vc::isIndirect(F)); @@ -80,10 +82,15 @@ bool GenXLinkageCorruptor::runOnModule(Module &M) { Changed = true; } - // Do not change stack calls linkage as we may have both types of stack - // calls. - if (vc::requiresStackCall(&F) && SaveStackCallLinkage) - continue; + // Remove alwaysinline attribute and keep unchanged stack calls linkage as + // we may have both types of stack calls. + if (vc::requiresStackCall(&F)) { + F.removeFnAttr(Attribute::AlwaysInline); + Changed = true; + + if (SaveStackCallLinkage) + continue; + } F.setLinkage(GlobalValue::InternalLinkage); Changed = true; diff --git a/IGC/VectorCompiler/test/CloneIndirectFunctions/basic.ll b/IGC/VectorCompiler/test/CloneIndirectFunctions/basic.ll index e90ea8996ae6..d28e5b049330 100644 --- a/IGC/VectorCompiler/test/CloneIndirectFunctions/basic.ll +++ b/IGC/VectorCompiler/test/CloneIndirectFunctions/basic.ll @@ -1,6 +1,6 @@ ;=========================== begin_copyright_notice ============================ ; -; Copyright (C) 2022 Intel Corporation +; Copyright (C) 2022-2023 Intel Corporation ; ; SPDX-License-Identifier: MIT ; @@ -31,6 +31,7 @@ define dllexport void @kernel() { ; COM: direct with internal linkage type ; CHECK: define internal spir_func void @foo_direct +; CHECK-SAME: ) { ; CHECK-NEXT: %vec.ref.ld = load <8 x i32>, <8 x i32>* %vec.ref ; CHECK-NEXT: ret void diff --git a/IGC/VectorCompiler/test/LinkageCorruptor/func_with_taken_addr.ll b/IGC/VectorCompiler/test/LinkageCorruptor/func_with_taken_addr.ll index a6de638e4c40..9ce9ac0eb682 100644 --- a/IGC/VectorCompiler/test/LinkageCorruptor/func_with_taken_addr.ll +++ b/IGC/VectorCompiler/test/LinkageCorruptor/func_with_taken_addr.ll @@ -34,7 +34,7 @@ define internal spir_func float @bar() { } ; COM: function with taken address shouldn't change -define internal spir_func void @indirect() { +define internal spir_func void @indirect() #0 { ; CHECK: define internal spir_func void @indirect() #[[ATTR:[0-9]]] { %indirect.get.ptr = call i64 @get_printf_ptr() store i64 %indirect.get.ptr, i64* @__imparg_llvm.vc.internal.print.buffer, align 8 @@ -50,6 +50,7 @@ define dllexport spir_kernel void @foo_kernel() { ret void } +attributes #0 = { alwaysinline } ; CHECK: attributes #[[ATTR]] = { "CMStackCall" } !genx.kernels = !{!0} diff --git a/IGC/VectorCompiler/test/LinkageCorruptor/stackcall_conv_new.ll b/IGC/VectorCompiler/test/LinkageCorruptor/stackcall_conv_new.ll index 7e4fd11057f1..f7fa7b7ec526 100644 --- a/IGC/VectorCompiler/test/LinkageCorruptor/stackcall_conv_new.ll +++ b/IGC/VectorCompiler/test/LinkageCorruptor/stackcall_conv_new.ll @@ -1,6 +1,6 @@ ;=========================== begin_copyright_notice ============================ ; -; Copyright (C) 2021 Intel Corporation +; Copyright (C) 2021-2023 Intel Corporation ; ; SPDX-License-Identifier: MIT ; @@ -13,23 +13,25 @@ target datalayout = "e-p:64:64-i64:64-n8:16:32" -; Function Attrs: noinline nounwind -define spir_func void @foo(<8 x i32>* %vec.ref) { + +; CHECK: define spir_func void @foo(<8 x i32>* %vec.ref) [[ATTR:#[0-9]+]] { +define spir_func void @foo(<8 x i32>* %vec.ref) #0 { %vec.ref.ld = load <8 x i32>, <8 x i32>* %vec.ref ret void } -; Function Attrs: noinline nounwind define dllexport void @kernel() { %kernel.vec.ref = alloca <8 x i32>, align 32 call spir_func void @foo(<8 x i32>* nonnull %kernel.vec.ref) ; CHECK: call spir_func void @foo ; CHECK-SAME: <8 x i32>* nonnull -; CHECK: CMStackCall ret void } +; CHECK: [[ATTR]] = { "CMStackCall" } +attributes #0 = { alwaysinline } + !genx.kernels = !{!0} !0 = !{void ()* @kernel} diff --git a/IGC/VectorCompiler/test/PatternMatch/fdiv-patt.ll b/IGC/VectorCompiler/test/PatternMatch/fdiv-patt.ll new file mode 100644 index 000000000000..39454c171cd8 --- /dev/null +++ b/IGC/VectorCompiler/test/PatternMatch/fdiv-patt.ll @@ -0,0 +1,43 @@ +;=========================== begin_copyright_notice ============================ +; +; Copyright (C) 2023 Intel Corporation +; +; SPDX-License-Identifier: MIT +; +;============================ end_copyright_notice ============================= + +; RUN: opt %use_old_pass_manager% -GenXPatternMatch -march=genx64 -mcpu=Gen9 -mtriple=spir64-unknown-unknown -S < %s | FileCheck %s + +; Test, based on laplace cm-test +; CHECK-LABEL: @laplace_genx +define spir_kernel void @laplace_genx(<4 x float> %0, <144 x float> %1, <24 x float> %2) { +.preheader764: +; Reduced all uitofp and fdiv +; CHECK-NOT: uitofp +; CHECK-NOT: fdiv +; CHECK: fmul <144 x float> {{.*}}, < +; CHECK-COUNT-144: float 0x3F70101020000000, +; CHECK: fmul <4 x float> {{.*}}, < +; CHECK-COUNT-4: float 0x3F70101020000000, +; CHECK: fmul <24 x float> {{.*}}, < +; CHECK-COUNT-24: float 0x3F70101020000000, + %3 = fdiv <144 x float> %1, + %4 = fdiv <4 x float> %0, + %5 = fdiv <4 x float> %0, zeroinitializer + %6 = fdiv <24 x float> %2, + %7 = fdiv <24 x float> %2, + %8 = fdiv <24 x float> %2, + %.regioncollapsed1042 = tail call <1 x float> @llvm.genx.rdregionf.v1f32.v24f32.i16(<24 x float> %7, i32 0, i32 0, i32 0, i16 0, i32 0) + %9 = tail call <1 x float> @llvm.genx.rdregionf.v1f32.v24f32.i16(<24 x float> %8, i32 0, i32 0, i32 0, i16 0, i32 0) + %.regioncollapsed1039 = tail call <1 x float> @llvm.genx.rdregionf.v1f32.v144f32.i16(<144 x float> %3, i32 0, i32 0, i32 0, i16 0, i32 0) + %.regioncollapsed1033 = tail call <1 x float> @llvm.genx.rdregionf.v1f32.v24f32.i16(<24 x float> %6, i32 0, i32 0, i32 0, i16 0, i32 0) + %10 = tail call <4 x float> @llvm.genx.wrregionf.v4f32.v1f32.i16.i1(<4 x float> %5, <1 x float> zeroinitializer, i32 0, i32 0, i32 0, i16 0, i32 0, i1 false) + %11 = tail call <4 x float> @llvm.genx.wrregionf.v4f32.v1f32.i16.i1(<4 x float> %4, <1 x float> zeroinitializer, i32 0, i32 0, i32 0, i16 0, i32 0, i1 false) + ret void +} + +declare <4 x float> @llvm.genx.wrregionf.v4f32.v1f32.i16.i1(<4 x float>, <1 x float>, i32, i32, i32, i16, i32, i1) + +declare <1 x float> @llvm.genx.rdregionf.v1f32.v24f32.i16(<24 x float>, i32, i32, i32, i16, i32) + +declare <1 x float> @llvm.genx.rdregionf.v1f32.v144f32.i16(<144 x float>, i32, i32, i32, i16, i32)