From 76c73122859802a664cfc7d9ac2965fed67b5517 Mon Sep 17 00:00:00 2001 From: Bartlomiej Gajda Date: Tue, 29 Oct 2024 18:26:42 +0000 Subject: [PATCH] Extend capability of wavePrefix. Extend capability of wavePrefix. --- IGC/Compiler/CISACodeGen/EmitVISAPass.cpp | 27 ++++++++++++++++------- IGC/Compiler/CISACodeGen/EmitVISAPass.hpp | 2 +- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/IGC/Compiler/CISACodeGen/EmitVISAPass.cpp b/IGC/Compiler/CISACodeGen/EmitVISAPass.cpp index 4a0507de2869..6d45b6f44978 100644 --- a/IGC/Compiler/CISACodeGen/EmitVISAPass.cpp +++ b/IGC/Compiler/CISACodeGen/EmitVISAPass.cpp @@ -21755,7 +21755,10 @@ static void GetReductionOp(WaveOps op, Type* opndTy, uint64_t& identity, e_opcod void EmitPass::emitWavePrefix(WavePrefixIntrinsic* I) { - bool disableHelperLanes = int_cast(cast(I->getArgOperand(4))->getSExtValue()) == 2; + auto helperLanes = int_cast( cast( I->getArgOperand( 4 ) )->getSExtValue() ); + bool disableHelperLanes = (helperLanes == 2); + bool getFullPrefix = ( helperLanes == 4 ); + if (disableHelperLanes) { ForceDMask(); @@ -21771,7 +21774,7 @@ void EmitPass::emitWavePrefix(WavePrefixIntrinsic* I) } m_encoder->SetSubSpanDestination(false); emitScan( - I->getSrc(), I->getOpKind(), I->isInclusiveScan(), Mask, false); + I->getSrc(), I->getOpKind(), I->isInclusiveScan(), Mask, false, getFullPrefix ); if (disableHelperLanes) { ResetVMask(); @@ -21786,7 +21789,7 @@ void EmitPass::emitQuadPrefix(QuadPrefixIntrinsic* I) void EmitPass::emitScan( Value* Src, IGC::WaveOps Op, - bool isInclusiveScan, Value* Mask, bool isQuad) + bool isInclusiveScan, Value* Mask, bool isQuad, bool noMask) { VISA_Type type; e_opcode opCode; @@ -21801,11 +21804,19 @@ void EmitPass::emitScan( false, src, dst, Flag, !isInclusiveScan, isQuad); - // Now that we've computed the result in temporary registers, - // make sure we only write the results to lanes participating in the - // scan as specified by 'mask'. - if (Flag) - m_encoder->SetPredicate(Flag); + if(noMask) + { + m_encoder->SetNoMask(); + } + else + { + // Now that we've computed the result in temporary registers, + // make sure we only write the results to lanes participating in the + // scan as specified by 'mask'. + if (Flag) + m_encoder->SetPredicate(Flag); + } + m_encoder->Copy(m_destination, dst[0]); if (m_currShader->m_numberInstance == 2) { diff --git a/IGC/Compiler/CISACodeGen/EmitVISAPass.hpp b/IGC/Compiler/CISACodeGen/EmitVISAPass.hpp index 9ee9f66201df..ef20f5772c4d 100644 --- a/IGC/Compiler/CISACodeGen/EmitVISAPass.hpp +++ b/IGC/Compiler/CISACodeGen/EmitVISAPass.hpp @@ -1069,7 +1069,7 @@ class EmitPass : public llvm::FunctionPass void emitSetMessagePhaseType_legacy(llvm::GenIntrinsicInst* inst, VISA_Type type); void emitScan(llvm::Value* Src, IGC::WaveOps Op, - bool isInclusiveScan, llvm::Value* Mask, bool isQuad); + bool isInclusiveScan, llvm::Value* Mask, bool isQuad, bool noMask = false); // Cached per lane offset variables. This is a per basic block data // structure. For each entry, the first item is the scalar type size in