Skip to content

Commit

Permalink
Optimize trunc on WaveShuffle on zext
Browse files Browse the repository at this point in the history
Emit optimized pattern.
  • Loading branch information
bgajdaINTC authored and igcbot committed Dec 11, 2024
1 parent fd3672e commit 32adf24
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 0 deletions.
19 changes: 19 additions & 0 deletions IGC/Compiler/CISACodeGen/helper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1627,6 +1627,25 @@ namespace IGC
}
}

// This returns true for all the sub-group shuffle optimized intrinsics
bool isSubGroupShuffleVariant(const llvm::Instruction* I)
{
const GenIntrinsicInst* GII = dyn_cast<GenIntrinsicInst>(I);
if(!GII)
return false;

switch(GII->getIntrinsicID())
{
case GenISAIntrinsic::GenISA_WaveShuffleIndex:
case GenISAIntrinsic::GenISA_WaveBroadcast:
case GenISAIntrinsic::GenISA_WaveClusteredBroadcast:
case GenISAIntrinsic::GenISA_simdShuffleXor:
return true;
default:
return false;
}
}

bool hasSubGroupIntrinsicPVC(llvm::Function& F)
{
for (auto& BB : F)
Expand Down
1 change: 1 addition & 0 deletions IGC/Compiler/CISACodeGen/helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@ namespace IGC
bool IsSIMDBlockIntrinsic(const llvm::Instruction* inst);
bool isSubGroupIntrinsic(const llvm::Instruction* I);
bool isSubGroupIntrinsicPVC(const llvm::Instruction* I);
bool isSubGroupShuffleVariant(const llvm::Instruction* I);
bool hasSubGroupIntrinsicPVC(llvm::Function& F);

bool isBarrierIntrinsic(const llvm::Instruction* I);
Expand Down
36 changes: 36 additions & 0 deletions IGC/Compiler/CustomSafeOptPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1928,6 +1928,42 @@ void CustomSafeOptPass::visitBinaryOperator(BinaryOperator& I)
}
}

void CustomSafeOptPass::visitTruncInst(TruncInst& I)
{
/*
From:
%334 = zext i16 %orig to i32
%335 = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 %334, i32 %333, i32 0)
%336 = trunc i32 %335 to i16
To:
%335 = call i16 @llvm.genx.GenISA.WaveShuffleIndex.i16(i16 %orig, i32 %333, i32 0)
*/
if( I.getSrcTy()->isIntegerTy( 32 ) && I.getDestTy()->isIntegerTy( 16 ) )
{
// We know all variants of shuffle from zext are safe to demote. (unlike WaveAll which might not be)
if( auto* genIntr = dyn_cast<GenIntrinsicInst>( I.getOperand( 0 ) ); genIntr && isSubGroupShuffleVariant( genIntr ) && genIntr->hasOneUse() )
{
if( auto* ZI = dyn_cast<ZExtInst>( genIntr->getOperand( 0 ) ); ZI && ZI->getSrcTy()->isIntegerTy( 16 ) && ZI->getDestTy()->isIntegerTy( 32 ) )
{
IRBuilder<> builder( &I );

llvm::SmallVector<Value*> newArgs( genIntr->args().begin(), genIntr->args().end() );

// Override first arg (same position for all enabled intrinsics here) with lower type
newArgs[ 0 ] = ZI->getOperand( 0 );

// We do it this way, so that module will get proper func declaration of demoted type
Function* demotedFuncDeclaration = GenISAIntrinsic::getDeclaration( I.getModule(), genIntr->getIntrinsicID(), builder.getInt16Ty() );
Value* replacementCall = builder.CreateCall( demotedFuncDeclaration, newArgs );

I.replaceAllUsesWith( replacementCall );
I.eraseFromParent();
genIntr->eraseFromParent();
}
}
}
}

void IGC::CustomSafeOptPass::visitLdptr(llvm::SamplerLoadIntrinsic* inst)
{
if (!IGC_IS_FLAG_ENABLED(UseHDCTypedReadForAllTextures) &&
Expand Down
1 change: 1 addition & 0 deletions IGC/Compiler/CustomSafeOptPass.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ namespace IGC
void visitShuffleIndex(llvm::CallInst* I);
void visitSelectInst(llvm::SelectInst& S);
void mergeDotAddToDp4a(llvm::CallInst* I);
void visitTruncInst( llvm::TruncInst& I );

//
// IEEE Floating point arithmetic is not associative. Any pattern
Expand Down
33 changes: 33 additions & 0 deletions IGC/Compiler/tests/CustomSafeOptPass/trunc_shuffle_zext.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
;=========================== begin_copyright_notice ============================
;
; Copyright (C) 2020-2021 Intel Corporation
;
; SPDX-License-Identifier: MIT
;
;============================ end_copyright_notice =============================
; REQUIRES: llvm-14-plus
; RUN: igc_opt -igc-custom-safe-opt -dce -verify -S < %s | FileCheck %s
;
; Test checks that we demote WaveShuffle when used on zext value.

define i16 @sample_test(i16 %x, i32 %index) nounwind {
; CHECK-LABEL: @sample_test(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SHUFFLE:%.*]] = call i16 @llvm.genx.GenISA.WaveShuffleIndex.i16(i16 [[X:%.*]], i32 [[INDEX:%.*]], i32 0)
; CHECK-NEXT: ret i16 [[SHUFFLE]]
;
entry:
%zext = zext i16 %x to i32
%shuffle = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 %zext, i32 %index, i32 0)
%trunc = trunc i32 %shuffle to i16
ret i16 %trunc
}


declare i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32, i32, i32)

!IGCMetadata = !{!0}

!0 = !{!"ModuleMD", !1}
!1 = !{!"compOpt", !2}
!2 = !{!"FastRelaxedMath", i1 true}

0 comments on commit 32adf24

Please sign in to comment.