From 32adf24c4fcbfb649a06d5aa6fd052196bedf5a5 Mon Sep 17 00:00:00 2001 From: Bartlomiej Gajda Date: Tue, 3 Dec 2024 16:44:29 +0000 Subject: [PATCH] Optimize trunc on WaveShuffle on zext Emit optimized pattern. --- IGC/Compiler/CISACodeGen/helper.cpp | 19 ++++++++++ IGC/Compiler/CISACodeGen/helper.h | 1 + IGC/Compiler/CustomSafeOptPass.cpp | 36 +++++++++++++++++++ IGC/Compiler/CustomSafeOptPass.hpp | 1 + .../CustomSafeOptPass/trunc_shuffle_zext.ll | 33 +++++++++++++++++ 5 files changed, 90 insertions(+) create mode 100644 IGC/Compiler/tests/CustomSafeOptPass/trunc_shuffle_zext.ll diff --git a/IGC/Compiler/CISACodeGen/helper.cpp b/IGC/Compiler/CISACodeGen/helper.cpp index 0c0efd771796..ef1d4476844d 100644 --- a/IGC/Compiler/CISACodeGen/helper.cpp +++ b/IGC/Compiler/CISACodeGen/helper.cpp @@ -1627,6 +1627,25 @@ namespace IGC } } + // This returns true for all the sub-group shuffle optimized intrinsics + bool isSubGroupShuffleVariant(const llvm::Instruction* I) + { + const GenIntrinsicInst* GII = dyn_cast(I); + if(!GII) + return false; + + switch(GII->getIntrinsicID()) + { + case GenISAIntrinsic::GenISA_WaveShuffleIndex: + case GenISAIntrinsic::GenISA_WaveBroadcast: + case GenISAIntrinsic::GenISA_WaveClusteredBroadcast: + case GenISAIntrinsic::GenISA_simdShuffleXor: + return true; + default: + return false; + } + } + bool hasSubGroupIntrinsicPVC(llvm::Function& F) { for (auto& BB : F) diff --git a/IGC/Compiler/CISACodeGen/helper.h b/IGC/Compiler/CISACodeGen/helper.h index a8fe5e2bcaf4..f903aa0895c6 100644 --- a/IGC/Compiler/CISACodeGen/helper.h +++ b/IGC/Compiler/CISACodeGen/helper.h @@ -185,6 +185,7 @@ namespace IGC bool IsSIMDBlockIntrinsic(const llvm::Instruction* inst); bool isSubGroupIntrinsic(const llvm::Instruction* I); bool isSubGroupIntrinsicPVC(const llvm::Instruction* I); + bool isSubGroupShuffleVariant(const llvm::Instruction* I); bool hasSubGroupIntrinsicPVC(llvm::Function& F); bool isBarrierIntrinsic(const llvm::Instruction* I); diff --git a/IGC/Compiler/CustomSafeOptPass.cpp b/IGC/Compiler/CustomSafeOptPass.cpp index f0f8424699a0..86bd32adf825 100644 --- a/IGC/Compiler/CustomSafeOptPass.cpp +++ b/IGC/Compiler/CustomSafeOptPass.cpp @@ -1928,6 +1928,42 @@ void CustomSafeOptPass::visitBinaryOperator(BinaryOperator& I) } } +void CustomSafeOptPass::visitTruncInst(TruncInst& I) +{ + /* + From: + %334 = zext i16 %orig to i32 + %335 = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 %334, i32 %333, i32 0) + %336 = trunc i32 %335 to i16 + To: + %335 = call i16 @llvm.genx.GenISA.WaveShuffleIndex.i16(i16 %orig, i32 %333, i32 0) + */ + if( I.getSrcTy()->isIntegerTy( 32 ) && I.getDestTy()->isIntegerTy( 16 ) ) + { + // We know all variants of shuffle from zext are safe to demote. (unlike WaveAll which might not be) + if( auto* genIntr = dyn_cast( I.getOperand( 0 ) ); genIntr && isSubGroupShuffleVariant( genIntr ) && genIntr->hasOneUse() ) + { + if( auto* ZI = dyn_cast( genIntr->getOperand( 0 ) ); ZI && ZI->getSrcTy()->isIntegerTy( 16 ) && ZI->getDestTy()->isIntegerTy( 32 ) ) + { + IRBuilder<> builder( &I ); + + llvm::SmallVector newArgs( genIntr->args().begin(), genIntr->args().end() ); + + // Override first arg (same position for all enabled intrinsics here) with lower type + newArgs[ 0 ] = ZI->getOperand( 0 ); + + // We do it this way, so that module will get proper func declaration of demoted type + Function* demotedFuncDeclaration = GenISAIntrinsic::getDeclaration( I.getModule(), genIntr->getIntrinsicID(), builder.getInt16Ty() ); + Value* replacementCall = builder.CreateCall( demotedFuncDeclaration, newArgs ); + + I.replaceAllUsesWith( replacementCall ); + I.eraseFromParent(); + genIntr->eraseFromParent(); + } + } + } +} + void IGC::CustomSafeOptPass::visitLdptr(llvm::SamplerLoadIntrinsic* inst) { if (!IGC_IS_FLAG_ENABLED(UseHDCTypedReadForAllTextures) && diff --git a/IGC/Compiler/CustomSafeOptPass.hpp b/IGC/Compiler/CustomSafeOptPass.hpp index ad293e615c9e..20e3c2850a40 100644 --- a/IGC/Compiler/CustomSafeOptPass.hpp +++ b/IGC/Compiler/CustomSafeOptPass.hpp @@ -76,6 +76,7 @@ namespace IGC void visitShuffleIndex(llvm::CallInst* I); void visitSelectInst(llvm::SelectInst& S); void mergeDotAddToDp4a(llvm::CallInst* I); + void visitTruncInst( llvm::TruncInst& I ); // // IEEE Floating point arithmetic is not associative. Any pattern diff --git a/IGC/Compiler/tests/CustomSafeOptPass/trunc_shuffle_zext.ll b/IGC/Compiler/tests/CustomSafeOptPass/trunc_shuffle_zext.ll new file mode 100644 index 000000000000..e0102cce587f --- /dev/null +++ b/IGC/Compiler/tests/CustomSafeOptPass/trunc_shuffle_zext.ll @@ -0,0 +1,33 @@ +;=========================== begin_copyright_notice ============================ +; +; Copyright (C) 2020-2021 Intel Corporation +; +; SPDX-License-Identifier: MIT +; +;============================ end_copyright_notice ============================= +; REQUIRES: llvm-14-plus +; RUN: igc_opt -igc-custom-safe-opt -dce -verify -S < %s | FileCheck %s +; +; Test checks that we demote WaveShuffle when used on zext value. + +define i16 @sample_test(i16 %x, i32 %index) nounwind { +; CHECK-LABEL: @sample_test( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SHUFFLE:%.*]] = call i16 @llvm.genx.GenISA.WaveShuffleIndex.i16(i16 [[X:%.*]], i32 [[INDEX:%.*]], i32 0) +; CHECK-NEXT: ret i16 [[SHUFFLE]] +; +entry: + %zext = zext i16 %x to i32 + %shuffle = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 %zext, i32 %index, i32 0) + %trunc = trunc i32 %shuffle to i16 + ret i16 %trunc +} + + +declare i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32, i32, i32) + +!IGCMetadata = !{!0} + +!0 = !{!"ModuleMD", !1} +!1 = !{!"compOpt", !2} +!2 = !{!"FastRelaxedMath", i1 true}