diff --git a/IGC/Compiler/CISACodeGen/EmitVISAPass.cpp b/IGC/Compiler/CISACodeGen/EmitVISAPass.cpp index 5aafa92fbcca..67cc07fee681 100644 --- a/IGC/Compiler/CISACodeGen/EmitVISAPass.cpp +++ b/IGC/Compiler/CISACodeGen/EmitVISAPass.cpp @@ -1537,10 +1537,40 @@ void EmitPass::MovPhiSources(llvm::BasicBlock* aBB) Value* dstRootV; // root value of dst (dessa) Value* srcRootV; // root value of src (dessa) }; + + struct VTyInfo { + unsigned numElt; + bool isSplat; + uint64_t splatValue; + + VTyInfo() : numElt(0), isSplat(false), splatValue(0) { } + explicit VTyInfo(Value * V) : numElt(0), isSplat(false), splatValue(0) { + if (IGCLLVM::FixedVectorType * vTy = dyn_cast(V->getType())) { + numElt = vTy->getNumElements(); + if (isa(V)) { + isSplat = true; + splatValue = 0; + } + else if (ConstantDataVector * CDV = dyn_cast(V)) { + if (Constant * C = CDV->getSplatValue()) { + if (ConstantInt * CInt = dyn_cast(C)) { + isSplat = true; + splatValue = CInt->getZExtValue(); + } + else if (ConstantFP * CFP = dyn_cast(C)) { + isSplat = true; + splatValue = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); + } + } + } + } + } + }; + BumpPtrAllocator phiAllocator; std::list phiSrcDstList; std::vector> emitList; - std::map dstVTyMap; + std::map dstVTyMap; llvm::BasicBlock* bb = aBB; IGCLLVM::TerminatorInst* TI = aBB->getTerminator(); IGC_ASSERT(nullptr != TI); @@ -1565,7 +1595,6 @@ void EmitPass::MovPhiSources(llvm::BasicBlock* aBB) if (PN->getIncomingBlock(i) == bb) { Value* Src = PN->getOperand(i); - Value* dstRootV = m_deSSA ? m_deSSA->getRootValue(PN) : PN; Value* srcRootV = m_deSSA ? m_deSSA->getRootValue(Src) : Src; dstRootV = dstRootV ? dstRootV : PN; @@ -1575,19 +1604,14 @@ void EmitPass::MovPhiSources(llvm::BasicBlock* aBB) // might have the same variable with two different CVariable. if (dstRootV != srcRootV) { + VTyInfo vTyInfo(Src); PhiSrcMoveInfo* phiInfo = new (phiAllocator) PhiSrcMoveInfo(); + phiInfo->srcCVar = vTyInfo.isSplat ? nullptr : m_currShader->GetSymbol(Src); phiInfo->dstCVar = m_currShader->GetSymbol(PN); - phiInfo->srcCVar = m_currShader->GetSymbol(Src); phiInfo->dstRootV = dstRootV; phiInfo->srcRootV = srcRootV; phiSrcDstList.push_back(phiInfo); - - int numElt = 0; - if (IGCLLVM::FixedVectorType * vTy = dyn_cast(PN->getType())) - { - numElt = int_cast(vTy->getNumElements()); - } - dstVTyMap.insert(std::pair(phiInfo->dstCVar, numElt)); + dstVTyMap.insert(std::make_pair(phiInfo->dstCVar, vTyInfo)); } } } @@ -1651,14 +1675,16 @@ void EmitPass::MovPhiSources(llvm::BasicBlock* aBB) if (phiinfo->srcRootV == dRootV) { CVariable* sVar = phiinfo->srcCVar; CVariable* nVar; - if (sVar->GetType() != T->GetType()) { - nVar = m_currShader->GetNewAlias( - T, sVar->GetType(), 0, sVar->GetNumberElement()); - } - else { - nVar = T; + if (sVar) { // sVar is null if srcCVar is a constant splat vector + if (sVar->GetType() != T->GetType()) { + nVar = m_currShader->GetNewAlias( + T, sVar->GetType(), 0, sVar->GetNumberElement()); + } + else { + nVar = T; + } + phiinfo->srcCVar = nVar; } - phiinfo->srcCVar = nVar; } } } @@ -1688,13 +1714,14 @@ void EmitPass::MovPhiSources(llvm::BasicBlock* aBB) for (uint instance = 0; instance < dst->GetNumberInstance(); instance++) { m_encoder->SetSecondHalf(instance == 1 ? true : false); - unsigned int numVTyElt = dstVTyMap[dst]; - if (numVTyElt > 0) - { - emitVectorCopy(dst, src, numVTyElt); + const VTyInfo & vTyInfo = dstVTyMap[dst]; + if (vTyInfo.isSplat) { + emitConstantVector(dst, vTyInfo.splatValue); } - else - { + else if (vTyInfo.numElt > 0) { + emitVectorCopy(dst, src, vTyInfo.numElt); + } + else { m_encoder->Copy(dst, src); m_encoder->Push(); } @@ -20103,6 +20130,18 @@ void EmitPass::emitVectorCopy(CVariable* Dst, CVariable* Src, uint32_t nElts, } } +void EmitPass::emitConstantVector(CVariable* Dst, uint64_t value) +{ + uint16_t width = Dst->IsUniform() ? 1 : numLanes(m_currShader->m_SIMDSize); + CVariable * constant = m_currShader->ImmToVariable(value, Dst->GetType()); + for (uint16_t i = 0; width * i < Dst->GetNumberElement(); ++i) + { + m_encoder->SetDstSubReg(width * i); + m_encoder->Copy(Dst, constant); + m_encoder->Push(); + } +} + //emitVectorCopyToOrFromAOS() // To implement the follow two functions. // emitVectorCopyToAOS(): IsToAOS = true diff --git a/IGC/Compiler/CISACodeGen/EmitVISAPass.hpp b/IGC/Compiler/CISACodeGen/EmitVISAPass.hpp index 04f56888d745..16a550b18250 100644 --- a/IGC/Compiler/CISACodeGen/EmitVISAPass.hpp +++ b/IGC/Compiler/CISACodeGen/EmitVISAPass.hpp @@ -490,6 +490,7 @@ class EmitPass : public llvm::FunctionPass void emitVectorCopy(CVariable* Dst, CVariable* Src, uint32_t nElts, uint32_t DstSubRegOffset = 0, uint32_t SrcSubRegOffset = 0, bool allowLargerSIMDSize = false); + void emitConstantVector(CVariable* Dst, uint64_t value = 0); void emitCopyAll(CVariable* Dst, CVariable* Src, llvm::Type* Ty); void emitPushFrameToStack(Function* ParentFunction, unsigned& pushSize); diff --git a/IGC/Compiler/CISACodeGen/VariableReuseAnalysis.cpp b/IGC/Compiler/CISACodeGen/VariableReuseAnalysis.cpp index fbc78a4e2d58..5c951f18cbcc 100644 --- a/IGC/Compiler/CISACodeGen/VariableReuseAnalysis.cpp +++ b/IGC/Compiler/CISACodeGen/VariableReuseAnalysis.cpp @@ -1168,7 +1168,12 @@ void VariableReuseAnalysis::InsertElementAliasing(Function* F) // =0x2: subvec aliasing for both isolated and non-isolated value) const auto control = (m_pCtx->getVectorCoalescingControl() & 0x3); // To avoid increasing GRF pressure, skip if F is too large or not an entry - const int32_t NumBBThreshold = (int)IGC_GET_FLAG_VALUE(VectorAliasBBThreshold); + // We remove the threshold when the code comes from CUTLASS, + // which often generates a large number of basic blocks. + const int32_t NumBBThreshold = + F->getName().str().substr(0, 2) == "_Z" && F->getName().str().find("cutlass") != std::string::npos + ? std::numeric_limits::max() + : static_cast(IGC_GET_FLAG_VALUE(VectorAliasBBThreshold)); MetaDataUtils* pMdUtils = getAnalysis().getMetaDataUtils(); if (control == 0 || !isEntryFunc(pMdUtils, F) || getNumBBs(F) > NumBBThreshold) { return;