diff --git a/IGC/Compiler/CISACodeGen/CodeSinking.cpp b/IGC/Compiler/CISACodeGen/CodeSinking.cpp index 0aa9f929002a..b29e298b33a1 100644 --- a/IGC/Compiler/CISACodeGen/CodeSinking.cpp +++ b/IGC/Compiler/CISACodeGen/CodeSinking.cpp @@ -44,6 +44,7 @@ See LICENSE.TXT for details. #include "llvm/IR/Verifier.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvmWrapper/IR/Function.h" +#include "llvmWrapper/IR/Value.h" #include "common/LLVMWarningsPop.hpp" #include "Compiler/CodeGenPublic.h" #include "Compiler/CISACodeGen/CodeSinking.hpp" @@ -67,10 +68,12 @@ namespace IGC { IGC_INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) IGC_INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) IGC_INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) + IGC_INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) + IGC_INITIALIZE_PASS_DEPENDENCY(WIAnalysis) IGC_INITIALIZE_PASS_END(CodeSinking, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG_ONLY, PASS_ANALYSIS) CodeSinking::CodeSinking(bool generalSinking) : FunctionPass(ID) { - generalCodeSinking = generalSinking; + generalCodeSinking = generalSinking || IGC_IS_FLAG_ENABLED(ForceLoopSink); initializeCodeSinkingPass(*PassRegistry::getPassRegistry()); } @@ -207,7 +210,7 @@ namespace IGC { return false; } if (IGC_IS_FLAG_ENABLED(DisableCodeSinking) || - numInsts(F) < CODE_SINKING_MIN_SIZE) + numInsts(F) < IGC_GET_FLAG_VALUE(CodeSinkingMinSize)) { return false; } @@ -215,8 +218,13 @@ namespace IGC { DT = &getAnalysis().getDomTree(); PDT = &getAnalysis().getPostDomTree(); LI = &getAnalysis().getLoopInfo(); + AA = &getAnalysis().getAAResults(); + WI = &getAnalysis(); DL = &F.getParent()->getDataLayout(); + MemoizedStoresInLoops.clear(); + BlacklistedLoops.clear(); + bool changed = hoistCongruentPhi(F); bool madeChange, everMadeChange = false; @@ -237,26 +245,19 @@ namespace IGC { } while (madeChange /*diagnosis: && numChanges < sinkLimit*/); everMadeChange = madeChange; - for (SmallPtrSet::iterator BI = localBlkSet.begin(), BE = localBlkSet.end(); BI != BE; BI++) + for (auto BI = LocalBlkSet.begin(), BE = LocalBlkSet.end(); BI != BE; BI++) { madeChange = LocalSink(*BI); everMadeChange |= madeChange; } - localBlkSet.clear(); - localInstSet.clear(); + LocalBlkSet.clear(); + LocalInstSet.clear(); CTX->m_numGradientSinked = totalGradientMoved; - uint32_t GRFThresholdDelta = IGC_GET_FLAG_VALUE(LoopSinkThresholdDelta); - uint32_t ngrf = CTX->getNumGRFPerThread(); for (unsigned i = 0, n = m_fatLoops.size(); i < n; ++i) { auto FatLoop = m_fatLoops[i]; - auto Pressure = m_fatLoopPressures[i]; - // Enable multiple-level loop sink if pressure is high enough - bool sinkMultiLevel = (Pressure > (2*ngrf + 2 * GRFThresholdDelta)); - if (loopSink(FatLoop, sinkMultiLevel)) { - changed = true; - } + changed |= loopSink(FatLoop); } m_fatLoopPressures.clear(); m_fatLoops.clear(); @@ -364,7 +365,7 @@ namespace IGC { pressure0 = EstimateLiveOutPressure(&blk, DL); uint32_t GRFThresholdDelta = IGC_GET_FLAG_VALUE(LoopSinkThresholdDelta); uint32_t ngrf = CTX->getNumGRFPerThread(); - if (pressure0 > (2*ngrf + GRFThresholdDelta) && + if ((pressure0 > (2*ngrf + GRFThresholdDelta) || IGC_IS_FLAG_ENABLED(ForceLoopSink)) && CTX->type == ShaderType::OPENCL_SHADER) { if (auto L = findLoopAsPreheader(blk)) @@ -457,7 +458,7 @@ namespace IGC { return madeChange; } - static bool reduceRP(Instruction* Inst) + static bool isCastInstrReducingPressure(Instruction* Inst) { if (auto CI = dyn_cast(Inst)) { @@ -468,23 +469,7 @@ namespace IGC { // Non-primitive types. return false; } - if (SrcSize == 1) - { - // i1 -> i32, reduces GRF pressure but increases flag pressure. - // Do not consider it as reduce. - return false; - } - else if (DstSize == 1) - { - // i32 -> i1, reduces flag pressure but increases grf pressure. - // Consider it as reduce. - return true; - } - else if (SrcSize < DstSize) - { - // sext i32 to i64. - return true; - } + return SrcSize <= DstSize; } return false; @@ -515,8 +500,8 @@ namespace IGC { isa(inst)) { hasAliasConcern = false; - // sink CmpInst to make the flag-register lifetime short - reducePressure = (reduceRP(inst) || isa(inst)); + // sink CmpInst to make the flag-register lifetime short only if it's uniform + reducePressure = (isCastInstrReducingPressure(inst) || (isa(inst) && WI->isUniform(inst))); return true; } } @@ -614,47 +599,145 @@ namespace IGC { return false; } + const CodeSinking::StoresVec CodeSinking::getAllStoresInLoop(Loop *L) + { + IGC_ASSERT(!BlacklistedLoops.count(L)); + + // if all the stores for this loop are not memoized yet, do it first + if (!MemoizedStoresInLoops.count(L)) + { + llvm::SmallVector& StoresInLoop = MemoizedStoresInLoops[L]; + for (BasicBlock *BB: L->blocks()) + { + for (Instruction &I : *BB) + { + if (I.mayWriteToMemory()) + { + StoresInLoop.push_back(&I); + } + } + } + } + return MemoizedStoresInLoops[L]; + } + + /// isSafeToLoopSinkLoad - Determine whether it is safe to sink the load + /// instruction in the loop, using alias information + bool CodeSinking::isSafeToLoopSinkLoad(Instruction *InstToSink, Loop *L, AliasAnalysis *AA) + { + if (!L || !AA) + return false; + + if (BlacklistedLoops.count(L)) + return false; + + // Only load instructions are supported for now + if (!isa(InstToSink)) + return false; + + IGC_ASSERT(InstToSink->getParent() == L->getLoopPreheader()); + + auto getRemainingStoresInBB = [](Instruction *I) + { + StoresVec Stores; + BasicBlock *BB = I->getParent(); + Instruction *Last = BB->getTerminator(); + for ( ; I != Last ; I = I->getNextNode()) + { + if (I->mayWriteToMemory()) + { + Stores.push_back(I); + } + } + return Stores; + }; + + StoresVec RemainingStores = getRemainingStoresInBB(InstToSink); + StoresVec LoopStores = getAllStoresInLoop(L); + MemoryLocation A = MemoryLocation::get(InstToSink); + for (auto Stores : { &RemainingStores, &LoopStores }) + { + for (Instruction *I: *Stores) + { + if (StoreInst *SI = dyn_cast(I)) + { + MemoryLocation B = MemoryLocation::get(SI); + if (!A.Ptr || !B.Ptr || AA->alias(A, B)) + { + return false; + } + continue; + } + if (GenIntrinsicInst *Intr = dyn_cast(I)) + { + if (Intr->getIntrinsicID() == GenISAIntrinsic::GenISA_LSCPrefetch) + { + continue; + } + } + + // unsupported store + if (L->contains(I->getParent())) + BlacklistedLoops.insert(L); + return false; + } + } + + return true; + } + /// SinkInstruction - Determine whether it is safe to sink the specified machine /// instruction out of its current block into a successor. bool CodeSinking::SinkInstruction( - Instruction* inst, - SmallPtrSetImpl& Stores, - bool ForceToReducePressure) + Instruction *InstToSink, + SmallPtrSetImpl &Stores, + bool IsLoopSink) { // Check if it's safe to move the instruction. - bool hasAliasConcern =false; - bool reducePressure = false; - if (!isSafeToMove(inst, reducePressure, hasAliasConcern, Stores/*, AA*/)) + bool HasAliasConcern = false; + bool ReducePressure = false; + + if (!isSafeToMove(InstToSink, ReducePressure, HasAliasConcern, Stores/*, AA*/)) return false; - if (ForceToReducePressure) { - reducePressure = true; + + if (IsLoopSink) + { + // forcing that we reduce pressure + // as we already checked it is beneficial to sink in the loop + ReducePressure = true; } // SuccToSinkTo - This is the successor to sink this instruction to, once we // decide. - BasicBlock* succToSinkTo = 0; - SmallPtrSet usesInBlk; - if (!hasAliasConcern) + BasicBlock *SuccToSinkTo = nullptr; + SmallPtrSet UsesInBB; + + if (!HasAliasConcern || IsLoopSink) { // find the lowest common dominator of all uses - BasicBlock* tgtBlk = 0x0; - bool outerLoop = false; - if (FindLowestSinkTarget(inst, tgtBlk, usesInBlk, outerLoop, ForceToReducePressure)) + BasicBlock *TgtBB = nullptr; + bool IsOuterLoop = false; + if (FindLowestSinkTarget(InstToSink, TgtBB, UsesInBB, IsOuterLoop, IsLoopSink)) { - // heuristic, avoid code-motion that does not reduce execution frequency but may increase register usage - if (reducePressure || - (tgtBlk && (outerLoop || !PDT->dominates(tgtBlk, inst->getParent())))) + // heuristic, avoid code-motion that does not reduce execution frequency + // but may increase register usage + if (ReducePressure || + (TgtBB && (IsOuterLoop || !PDT->dominates(TgtBB, InstToSink->getParent())))) { - succToSinkTo = tgtBlk; + if (!HasAliasConcern || + (IsLoopSink && isSafeToLoopSinkLoad(InstToSink, LI->getLoopFor(TgtBB), AA))) + { + SuccToSinkTo = TgtBB; + } } } else { // local code motion for cases like cmp and pln - if (reducePressure) + if (ReducePressure) { - localBlkSet.insert(inst->getParent()); - localInstSet.insert(inst); + LocalBlkSet.insert(InstToSink->getParent()); + LocalInstSet.insert(InstToSink); } return false; } @@ -663,93 +746,127 @@ namespace IGC { { // when aliasing is a concern, only look at all the immed successors and // decide which one we should sink to, if any. - BasicBlock* curBlk = inst->getParent(); - for (succ_iterator I = succ_begin(inst->getParent()), - E = succ_end(inst->getParent()); I != E && succToSinkTo == 0; ++I) + BasicBlock *CurBB = InstToSink->getParent(); + for (succ_iterator I = succ_begin(InstToSink->getParent()), + E = succ_end(InstToSink->getParent()); I != E && SuccToSinkTo == 0; ++I) { // avoid sinking an instruction into its own block. This can // happen with loops. - if ((*I) == curBlk) + if ((*I) == CurBB) continue; // punt on it because of alias concern - if ((*I)->getUniquePredecessor() != curBlk) + if ((*I)->getUniquePredecessor() != CurBB) continue; // Don't move instruction across a loop. - Loop* succLoop = LI->getLoopFor((*I)); - Loop* currLoop = LI->getLoopFor(curBlk); + Loop *succLoop = LI->getLoopFor((*I)); + Loop *currLoop = LI->getLoopFor(CurBB); if (succLoop != currLoop) continue; - if (AllUsesDominatedByBlock(inst, (*I), usesInBlk)) - succToSinkTo = *I; + if (AllUsesDominatedByBlock(InstToSink, (*I), UsesInBB)) + SuccToSinkTo = *I; } } // If we couldn't find a block to sink to, ignore this instruction. - if (succToSinkTo == 0) + if (!SuccToSinkTo) { return false; } - if (ComputesGradient(inst)) + if (ComputesGradient(InstToSink)) { numGradientMovedOutBB++; } - if (!reducePressure || hasAliasConcern) + if (!IsLoopSink && HasAliasConcern) { - inst->moveBefore(&(*succToSinkTo->getFirstInsertionPt())); + InstToSink->moveBefore(&(*SuccToSinkTo->getFirstInsertionPt())); } // when alasing is not an issue and reg-pressure is not an issue // move it as close to the uses as possible - else if (usesInBlk.empty()) + else if (UsesInBB.empty()) { - inst->moveBefore(succToSinkTo->getTerminator()); + InstToSink->moveBefore(SuccToSinkTo->getTerminator()); } - else if (usesInBlk.size() == 1) + else if (UsesInBB.size() == 1) { - Instruction* use = *(usesInBlk.begin()); - inst->moveBefore(use); + InstToSink->moveBefore(*(UsesInBB.begin())); } else { // first move to the beginning of the target block - inst->moveBefore(&(*succToSinkTo->getFirstInsertionPt())); + InstToSink->moveBefore(&(*SuccToSinkTo->getFirstInsertionPt())); // later on, move it close to the use - localBlkSet.insert(succToSinkTo); - localInstSet.insert(inst); + LocalBlkSet.insert(SuccToSinkTo); + LocalInstSet.insert(InstToSink); } return true; } - bool CodeSinking::LocalSink(BasicBlock* blk) + bool CodeSinking::LocalSink(BasicBlock *BB) { - bool madeChange = false; - for (BasicBlock::iterator I = blk->begin(), E = blk->end(); I != E; ++I) + auto getInsertPointBeforeUse = [&](Instruction *Use) { - Instruction* use = &(*I); - for (unsigned i = 0; i < use->getNumOperands(); ++i) + // Try scheduling the instruction earlier than the use. + // Useful for loads to cover some latency. + int Cnt = IGC_GET_FLAG_VALUE(CodeSinkingLoadSchedulingInstr); + Instruction *InsertPoint = Use; + Instruction *I = Use->getPrevNode(); + for (;;) { + if (I == nullptr) + break; + if (isa(I)) + break; + if (I->mayWriteToMemory()) + { + // At this point of the program we might have lost some information + // About aliasing so don't schedule anything before possible stores + // But it's OK to alias with prefetch + GenIntrinsicInst *Intr = dyn_cast(I); + if (!(Intr && Intr->getIntrinsicID() == GenISAIntrinsic::GenISA_LSCPrefetch)) + { + break; + } + } + if (--Cnt <= 0) + break; + InsertPoint = I; + I = I->getPrevNode(); + } + return InsertPoint; + }; + + bool Changed = false; + for (auto &I : *BB) + { + Instruction *Use = &I; + for (unsigned i = 0; i < Use->getNumOperands(); ++i) { - Instruction* def = dyn_cast(use->getOperand(i)); - if (def && def->getParent() == blk && localInstSet.count(def)) + Instruction *Def = dyn_cast(Use->getOperand(i)); + if (Def && Def->getParent() == BB && LocalInstSet.count(Def)) { - // "use" can be a phi-node for a single-block loop, + // "Use" can be a phi-node for a single-block loop, // which is not really a local-code-motion - if (def->getNextNode() != use && !isa(use)) + if (Def->getNextNode() != Use && !isa(Use)) { - if (!def->getMetadata("implicitGlobalID")) + if (!Def->getMetadata("implicitGlobalID")) { - def->moveBefore(use); - madeChange = true; + // If it's a load we'll try scheduling earlier than the use + // to cover latency + Instruction *InsertPoint = + isa(Def) ? getInsertPointBeforeUse(Use) : Use; + Def->moveBefore(InsertPoint); + Changed = true; } } - localInstSet.erase(def); + LocalInstSet.erase(Def); } } } - if (madeChange) { - ProcessDbgValueInst(*blk); + if (Changed) { + ProcessDbgValueInst(*BB); } - return madeChange; + return Changed; } /////////////////////////////////////////////////////////////////////////// @@ -1109,126 +1226,153 @@ namespace IGC { return changed; } - bool CodeSinking::loopSink(Loop* LoopWithPressure, bool SinkMultipleLevel) + bool CodeSinking::loopSink(Loop *L) { // Sink loop invariants back into the loop body if register // pressure can be reduced. - // L0 is inner loop - Loop* const L0 = LoopWithPressure; - IGC_ASSERT(L0); + IGC_ASSERT(L); - // L1 is parent loop - Loop* L1 = nullptr; - if (SinkMultipleLevel) { - L1 = L0->getParentLoop(); - } + // No Preheader, stop! + BasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) + return false; - // At most, do two-level loop sink - // x = ... - // ParentLoop - // y = ... - // Loop: - // = x - // = y - // Normally, only y can be sinked. When multiLevel is true, - // x can be sinked into Loop (inner) as well. - bool changed = false; - for (int i = 0; i < 2; ++i) - { - Loop* L = (i == 0) ? L0 : L1; - if (!L) { - break; - } - // No Preheader, stop! - BasicBlock* Preheader = L->getLoopPreheader(); - if (!Preheader) - break; + bool EverChanged = false; - // Find LIs in preheader that would definitely reduce - // register pressure after moving those LIs inside the loop - SmallPtrSet stores; - SmallVector sinkCandidates; + // Find LIs in preheader that would definitely reduce + // register pressure after moving those LIs inside the loop + SmallPtrSet Stores; + SmallVector SinkCandidates; + SmallPtrSet LoadChains; + bool IterChanged = false; + do + { // Moving LI back to the loop. Here we only consider to move LIs into // the single BB (BBWithPressure). // // Go over instructions in reverse order and sink the noOp instructions // on-the-fly first, so that their dependent instructions can be added // into candidate lists for further sinking. + + Stores.clear(); + SinkCandidates.clear(); + + // If we sinked something we could allow sinking of the previous instructions as well + // on the next iteration of do-loop + // + // For example, here we sink 2 EE first and need one more iteration to sink load: + // preheader: + // %l = load <2 x double> + // extractelement 1, %l + // extractelement 2, %l + // loop: + // ... + IterChanged = false; + for (auto II = Preheader->rbegin(), IE = Preheader->rend(); II != IE;) { - Instruction* I = &*II++; + Instruction *I = &*II++; - if (I->mayWriteToMemory()) { - stores.insert(I); - } - if (!canLoopSink(I, L)) - continue; + if (I->mayWriteToMemory()) + Stores.insert(I); - // Sink noOp instruction. - if (isNoOpInst(I, CTX) || reduceRP(I)) { - if (SinkInstruction(I, stores, true)) { - changed = true; - } - continue; - } - - sinkCandidates.push_back(I); + if (isLoopSinkCandidate(I, L)) + SinkCandidates.push_back(I); } - bool t = LoopSinkInstructions(sinkCandidates, L); - changed |= t; - - if (changed) { + IterChanged |= loopSinkInstructions(SinkCandidates, LoadChains, L); + if (IterChanged) + { + EverChanged = true; ProcessDbgValueInst(*Preheader); } - } + } while (IterChanged); // Invoke LocalSink() to move def to its first use // (Currently, it should be no opt as LoopSink only // sinks singleUse instructions, which should be done // completely within sinkInstruction. - if (localBlkSet.size() > 0) + if (LocalBlkSet.size() > 0) { - for (auto BI = localBlkSet.begin(), BE = localBlkSet.end(); BI != BE; BI++) + for (auto BI = LocalBlkSet.begin(), BE = LocalBlkSet.end(); BI != BE; BI++) { - BasicBlock* BB = *BI; - bool t = LocalSink(BB); - changed |= t; + BasicBlock *BB = *BI; + EverChanged |= LocalSink(BB); } - localBlkSet.clear(); - localInstSet.clear(); + LocalBlkSet.clear(); + LocalInstSet.clear(); } - return changed; + return EverChanged; + } + + bool CodeSinking::isAlwaysSinkInstruction(Instruction *I) + { + return (isa(I) || + isa(I) || + isa(I) || + isa(I)); } - bool CodeSinking::canLoopSink(Instruction* I, Loop* L) + bool CodeSinking::isLoopSinkCandidate(Instruction *I, Loop *L) { // Limit sinking for the following case for now. - for (const User* UserInst : I->users()) + for (const User *UserInst : I->users()) { if (!isa(UserInst)) return false; if (!L->contains(cast(UserInst))) return false; } - return (isNoOpInst(I, CTX) || reduceRP(I) || - isa(I) /*|| isa(I)*/); + + if (isAlwaysSinkInstruction(I) || isa(I) || isa(I)) + return true; + if (isa(I) && IGC_IS_FLAG_ENABLED(EnableLoadsLoopSink)) + return true; + + return false; } - bool CodeSinking::LoopSinkInstructions( - SmallVector sinkCandidates, - Loop* L) + bool CodeSinking::loopSinkInstructions( + SmallVector &SinkCandidates, + SmallPtrSet &LoadChains, + Loop *L) { - auto IsUsedInLoop = [](Value* V, Loop* L) -> bool { - if (isa(V)) { + struct OperandUseGroup { + SmallPtrSet Operands; + SmallVector Users; + + void print(raw_ostream& OS) + { + OS << "OUG " << Operands.size() << " -> " << Users.size() << "\n"; + OS << " Operands:\n"; + for (Value* V : Operands) + { + OS << " "; + V->print(OS); + OS << "\n"; + } + OS << " Users:\n"; + for (Instruction* I : Users) + { + OS << " "; + I->print(OS); + OS << "\n"; + } + } + }; + + auto isUsedInLoop = [](Value *V, Loop *L) -> bool { + if (isa(V)) + { // Ignore constant return false; } - for (auto UI : V->users()) { - if (Instruction * User = dyn_cast(UI)) + for (auto UI : V->users()) + { + if (Instruction *User = dyn_cast(UI)) { if (L->contains(User)) return true; @@ -1237,9 +1381,11 @@ namespace IGC { return false; }; - auto IsSameSet = [](SmallPtrSet & S0, SmallPtrSet & S1)-> bool { - if (S0.size() == S1.size()) { - for (auto I : S1) { + auto isSameSet = [](SmallPtrSet &S0, SmallPtrSet &S1) -> bool { + if (S0.size() == S1.size()) + { + for (auto I : S1) + { Value* V = I; if (!S0.count(V)) { return false; @@ -1250,6 +1396,89 @@ namespace IGC { return false; }; + // Check that this instruction is a part of address calc + // chain of an already sinked load + auto isLoadChain = [&LoadChains](Instruction *I) -> bool + { + if (!isa(I)) + return false; + User *InstrUser = IGCLLVM::getUniqueUndroppableUser(I); + if (!InstrUser) + return false; + Instruction *UI = dyn_cast(InstrUser); + return UI && LoadChains.count(UI); + }; + + auto isBeneficialToSink = [&](OperandUseGroup *OUG)-> bool + { + auto getDstSize = [this](Value *V) -> int + { + int DstSize = 0; + Type* Ty = V->getType(); + if (Ty->isPointerTy()) + { + uint32_t addrSpace = cast(Ty)->getAddressSpace(); + int PtrSize = (int) CTX->getRegisterPointerSizeInBits(addrSpace); + DstSize = PtrSize; + } + else + { + DstSize = (int) Ty->getPrimitiveSizeInBits(); + } + return DstSize; + }; + + IGC_ASSERT(OUG); + + // All instructions are safe to sink always or consume larger type than produce + if (std::all_of(OUG->Users.begin(), OUG->Users.end(), + [this](Instruction *I) + { + return isAlwaysSinkInstruction(I) || isCastInstrReducingPressure(I); + })) + { + return true; + } + + // Estimate how much regpressure we save (in bytes). + // Don't count uniform values. This way if every operand that is used only in the loop + // is uniform, but the User (instruction to sink) is uniform, we'll decide it's beneficial to sink + int AccSave = 0; + + for (Value *V : OUG->Operands) + { + int DstSize = getDstSize(V); + if (!DstSize) + return false; + if (WI->isUniform(V)) + continue; + AccSave -= DstSize / 8; + } + + for (Value *V : OUG->Users) + { + int DstSize = getDstSize(V); + if (!DstSize) + return false; + if (WI->isUniform(V)) + continue; + AccSave += DstSize / 8; + } + + // All instructions are part of a chain to already sinked load and don't + // increase pressure too much. It simplifies the code a little and without + // adding remat pass for simple cases + if (AccSave >= 0 && std::all_of(OUG->Users.begin(), OUG->Users.end(), isLoadChain)) + { + return true; + } + + // Compare estimated saved regpressure with the specified threshold + // Number 4 here is just a constant multiplicator of the option to make the numbers more human-friendly, + // as the typical minimum data size is usually 32-bit. 1 (=4b) means roughly 1 register of saved regpressure + return AccSave >= (int)(IGC_GET_FLAG_VALUE(LoopSinkMinSave) * 4); + }; + // For each candidate like the following: // preheader: // x = add y, z @@ -1280,101 +1509,100 @@ namespace IGC { // Here we group all candidates based on its operands and select ones that definitely // reduce the pressure. // - struct OperandUseGroup { - SmallPtrSet Operands; - SmallVector Users; - }; - OperandUseGroup* allGroups = new OperandUseGroup[sinkCandidates.size()]; - SmallVector InstUseInfo; - for (uint32_t i = 0, e = (uint32_t)sinkCandidates.size(); i < e; ++i) + OperandUseGroup *AllGroups = new OperandUseGroup[SinkCandidates.size()]; + SmallVector InstUseInfo; + for (uint32_t i = 0, e = (uint32_t)SinkCandidates.size(); i < e; ++i) { - Instruction* I = sinkCandidates[i]; - SmallPtrSet theUses; - for (Use& U : I->operands()) + Instruction *I = SinkCandidates[i]; + SmallPtrSet theUses; + for (Use &U : I->operands()) { - Value* V = U; - if (isa(V) || IsUsedInLoop(V, L)) + Value *V = U; + if (isa(V) || isUsedInLoop(V, L)) continue; theUses.insert(V); } + + if (theUses.empty()) + continue; + // If this set of uses have been referenced by other instructions, // put this inst in the same group. Note that we don't union sets // that intersect each other. uint32_t j, je = (uint32_t)InstUseInfo.size(); for (j = 0; j < je; ++j) { - OperandUseGroup* OUG = InstUseInfo[j]; - if (IsSameSet(OUG->Operands, theUses)) { + OperandUseGroup *OUG = InstUseInfo[j]; + if (isSameSet(OUG->Operands, theUses)) { OUG->Users.push_back(I); break; } } - if (j == je) { // No match found, create the new one. - OperandUseGroup& OUG = allGroups[i]; + OperandUseGroup &OUG = AllGroups[i]; OUG.Operands = theUses; OUG.Users.push_back(I); InstUseInfo.push_back(&OUG); } } - bool changed = false; + bool EverChanged = false; // Just a placeholder, all LIs considered here are ALUs. - SmallPtrSet stores; - const int SaveThreshold = IGC_GET_FLAG_VALUE(LoopSinkMinSave); - bool keepLooping; - uint32_t N = (uint32_t)InstUseInfo.size(); + SmallPtrSet Stores; + bool IterChanged; + uint32_t N = (uint32_t) InstUseInfo.size(); do { - keepLooping = false; + IterChanged = false; for (uint32_t i = 0; i < N; ++i) { - OperandUseGroup* OUG = InstUseInfo[i]; + OperandUseGroup *OUG = InstUseInfo[i]; if (!OUG) continue; - int sz1 = (int)OUG->Users.size(); - int save = sz1 - (int)(OUG->Operands.size()); - if (save >= SaveThreshold) + if (!isBeneficialToSink(OUG)) + continue; + + bool GroupChanged = false; + for (int j = 0; j < (int)(OUG->Users.size()); ++j) { - // Sink - bool t = false; - for (int j = 0; j < sz1; ++j) + Instruction *I = OUG->Users[j]; + bool UserChanged = SinkInstruction(I, Stores, true); + if (UserChanged && (isa(I) || isLoadChain(I))) { - Instruction* I = OUG->Users[j]; - bool t1 = SinkInstruction(I, stores, true); - t |= t1; + LoadChains.insert(I); } - if (t) { - changed = true; - keepLooping = true; + GroupChanged |= UserChanged; + } + if (GroupChanged) { + IterChanged = true; + EverChanged = true; - // Since those operands become global already, remove - // them from the sets in the vector. - for (uint32_t k = 0; k < N; ++k) - { - OperandUseGroup* OUG1 = InstUseInfo[k]; - if (k == i || !OUG1) - continue; + // Since those operands become global already, remove + // them from the sets in the vector. + for (uint32_t k = 0; k < N; ++k) + { + OperandUseGroup *OUG1 = InstUseInfo[k]; + if (k == i || !OUG1) + continue; - for (auto I : OUG->Operands) { - Value* V = I; - OUG1->Operands.erase(V); - } + for (auto I : OUG->Operands) { + Value *V = I; + OUG1->Operands.erase(V); } } - - // Just set it to nullptr (erasing it would be more expensive). - InstUseInfo[i] = nullptr; } + + // Just set it to nullptr (erasing it would be more expensive). + InstUseInfo[i] = nullptr; } - } while (keepLooping); + } while (IterChanged); - delete[] allGroups; + delete[] AllGroups; - return changed; + return EverChanged; } // Move referenced DbgValueInst intrinsics calls after defining instructions diff --git a/IGC/Compiler/CISACodeGen/CodeSinking.hpp b/IGC/Compiler/CISACodeGen/CodeSinking.hpp index 7192d8c9d4e8..9362604155f1 100644 --- a/IGC/Compiler/CISACodeGen/CodeSinking.hpp +++ b/IGC/Compiler/CISACodeGen/CodeSinking.hpp @@ -14,6 +14,7 @@ See LICENSE.TXT for details. ============================= end_copyright_notice ===========================*/ #pragma once +#include "Compiler/CISACodeGen/WIAnalysis.hpp" #include "common/LLVMWarningsPush.hpp" #include #include @@ -21,12 +22,13 @@ See LICENSE.TXT for details. namespace IGC { -#define CODE_SINKING_MIN_SIZE 32 - class CodeSinking : public llvm::FunctionPass { llvm::DominatorTree* DT; llvm::PostDominatorTree* PDT; llvm::LoopInfo* LI; + llvm::AliasAnalysis* AA; + WIAnalysis* WI; + const llvm::DataLayout* DL; // to estimate register pressure CodeGenContext* CTX; public: @@ -38,13 +40,19 @@ namespace IGC { virtual void getAnalysisUsage(llvm::AnalysisUsage& AU) const override { AU.setPreservesCFG(); + AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addRequired(); + AU.addRequired(); AU.addRequired(); + AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); + AU.addPreserved(); + AU.addPreservedID(WIAnalysis::ID); } private: bool ProcessBlock(llvm::BasicBlock& blk); @@ -61,12 +69,14 @@ namespace IGC { bool isSafeToMove(llvm::Instruction* inst, bool& reducePressure, bool& hasAliasConcern, llvm::SmallPtrSetImpl& Stores); + bool isSafeToLoopSinkLoad(llvm::Instruction* I, llvm::Loop* Loop, llvm::AliasAnalysis* AA); + bool isAlwaysSinkInstruction(llvm::Instruction* I); /// local processing bool LocalSink(llvm::BasicBlock* blk); /// data members for local-sinking - llvm::SmallPtrSet localBlkSet; - llvm::SmallPtrSet localInstSet; + llvm::SmallPtrSet LocalBlkSet; + llvm::SmallPtrSet LocalInstSet; /// data members for undo std::vector movedInsts; std::vector undoLocas; @@ -88,6 +98,12 @@ namespace IGC { typedef std::pair InstPair; typedef smallvector InstVec; + // memoize all possible stores for every loop that is a candidate for sinking + typedef llvm::SmallVector StoresVec; + llvm::DenseMap MemoizedStoresInLoops; + llvm::SmallPtrSet BlacklistedLoops; + const StoresVec getAllStoresInLoop(llvm::Loop* L); + void appendIfNotExist(InstPair src, std::vector &instMap) { if (std::find(instMap.begin(), instMap.end(), src) == instMap.end()) @@ -123,12 +139,14 @@ namespace IGC { bool hoistCongruentPhi(llvm::Function& F); llvm::Loop* findLoopAsPreheader(llvm::BasicBlock& blk); - // move LI back into loops - bool loopSink(llvm::Loop* LoopWithPressure, bool SinkMultipleLevel); + // move LI back into loop + bool loopSink(llvm::Loop* LoopWithPressure); // pre-condition to sink an instruction into a loop - bool canLoopSink(llvm::Instruction* I, llvm::Loop* L); - bool LoopSinkInstructions( - llvm::SmallVector sinkCandidates, llvm::Loop* L); + bool isLoopSinkCandidate(llvm::Instruction* I, llvm::Loop* L); + bool loopSinkInstructions( + llvm::SmallVector& SinkCandidates, + llvm::SmallPtrSet& LoadChains, + llvm::Loop* L); // Move referencing DbgValueInst intrinsics calls after defining instructions void ProcessDbgValueInst(llvm::BasicBlock& blk); diff --git a/IGC/Compiler/tests/CodeSinking/LoopSinking/adds-sinking-uniform.ll b/IGC/Compiler/tests/CodeSinking/LoopSinking/adds-sinking-uniform.ll new file mode 100644 index 000000000000..8d0bc590d7bd --- /dev/null +++ b/IGC/Compiler/tests/CodeSinking/LoopSinking/adds-sinking-uniform.ll @@ -0,0 +1,89 @@ +;=========================== begin_copyright_notice ============================ +; +; Copyright (C) 2023 Intel Corporation +; +; SPDX-License-Identifier: MIT +; +;============================ end_copyright_notice ============================= + +; REQUIRES: regkeys +; RUN: igc_opt --regkey LoopSinkMinSave=1 --regkey ForceLoopSink=1 --regkey CodeSinkingMinSize=10 %enable-basic-aa% --igc-wi-analysis --igc-code-sinking -S %s | FileCheck %s +define spir_kernel void @foo(float addrspace(1)* %in0, float addrspace(1)* %in1, float addrspace(1)* %out0, i32 %count, i16 %localIdX, i16 %localIdY, i16 %localIdZ) #0 { +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LOCALIDX32:%.*]] = zext i16 [[LOCALIDX:%.*]] to i32 +; CHECK-NEXT: [[NON_UNIFORM_ADDR_1:%.*]] = getelementptr float, float addrspace(1)* [[IN0:%.*]], i32 [[LOCALIDX32]] +; CHECK-NEXT: [[UNIFORM_ADDR_2:%.*]] = getelementptr float, float addrspace(1)* [[IN1:%.*]], i32 0 + +; CHECK: entry_preheader: +; CHECK-NEXT: [[NON_UNIFORM_LOAD_1:%.*]] = load float, float addrspace(1)* [[NON_UNIFORM_ADDR_1]], align 16 +; CHECK-NEXT: [[UNIFORM_LOAD_2:%.*]] = load float, float addrspace(1)* [[UNIFORM_ADDR_2]], align 16 + +; this fadd should not be sinked - NON_UNIFORM_LOAD_1 already has uses in the loop + +; CHECK: [[ADDF_1:%.*]] = fadd float [[NON_UNIFORM_LOAD_1]], 1.000000e+00 +; CHECK: br label [[LOOP:%.*]] + +; CHECK: loop: + +; this ADDFF_1 should be sinked: +; it's i32,i32->i32, but the only parameter that is not used in the loop, is uniform +; and the fadd is not, so we remove register pressure by sinking it + +; It wouldn't be sinked if didn't prove it's uniform +; CHECK: [[ADDFF_1:%.*]] = fadd float [[ADDF_1]], [[UNIFORM_LOAD_2]] + +; CHECK: afterloop: +; +entry: + %localIdX32 = zext i16 %localIdX to i32 + %addr_1 = getelementptr float, float addrspace(1)* %in0, i32 %localIdX32 + %addr_2 = getelementptr float, float addrspace(1)* %in1, i32 0 + br label %entry_preheader + +entry_preheader: ; preds = %entry + %l_1 = load float, float addrspace(1)* %addr_1, align 16 + %l_2 = load float, float addrspace(1)* %addr_2, align 16 + %addf_1 = fadd float %l_1, 1.0 + %addff_1 = fadd float %addf_1, %l_2 + br label %loop + +loop: ; preds = %loop, %entry_preheader + %index = phi i32 [ 0, %entry_preheader ], [ %inc, %loop ] + %addf_2 = fadd float %l_1, 2.0 + %acc0 = fadd float %addf_1, %addf_2 + %acc1 = fadd float %addf_2, %addff_1 + + %out0_shifted = getelementptr float, float addrspace(1)* %out0, i32 %index + store float %acc1, float addrspace(1)* %out0_shifted, align 8 + %cmptmp = icmp ult i32 %index, %count + %inc = add i32 %index, 1 + br i1 %cmptmp, label %loop, label %afterloop + +afterloop: ; preds = %loop + ret void +} + +!IGCMetadata = !{!2} +!igc.functions = !{!13} + +!2 = !{!"ModuleMD", !3} +!3 = !{!"FuncMD", !4, !5} +!4 = !{!"FuncMDMap[0]", void (float addrspace(1)*, float addrspace(1)*, float addrspace(1)*, i32, i16, i16, i16)* @foo} +!5 = !{!"FuncMDValue[0]", !6, !7, !11, !12} +!6 = !{!"localOffsets"} +!7 = !{!"workGroupWalkOrder", !8, !9, !10} +!8 = !{!"dim0", i32 0} +!9 = !{!"dim1", i32 1} +!10 = !{!"dim2", i32 2} +!11 = !{!"funcArgs"} +!12 = !{!"functionType", !"KernelFunction"} +!13 = !{void (float addrspace(1)*, float addrspace(1)*, float addrspace(1)*, i32, i16, i16, i16)* @foo, !14} +!14 = !{!15, !16} +!15 = !{!"function_type", i32 0} +!16 = !{!"implicit_arg_desc", !17, !18, !19, !20, !21} +!17 = !{i32 0} +!18 = !{i32 1} +!19 = !{i32 7} +!20 = !{i32 8} +!21 = !{i32 9} diff --git a/IGC/Compiler/tests/CodeSinking/LoopSinking/adds-sinking.ll b/IGC/Compiler/tests/CodeSinking/LoopSinking/adds-sinking.ll new file mode 100644 index 000000000000..2059924e777c --- /dev/null +++ b/IGC/Compiler/tests/CodeSinking/LoopSinking/adds-sinking.ll @@ -0,0 +1,87 @@ +;=========================== begin_copyright_notice ============================ +; +; Copyright (C) 2023 Intel Corporation +; +; SPDX-License-Identifier: MIT +; +;============================ end_copyright_notice ============================= + +; REQUIRES: regkeys +; RUN: igc_opt --regkey LoopSinkMinSave=4 --regkey ForceLoopSink=1 --regkey CodeSinkingMinSize=10 %enable-basic-aa% --igc-code-sinking -S %s | FileCheck %s +define void @foo(float addrspace(1)* %in0, double addrspace(1)* %in1, float addrspace(1)* noalias %out0, i32 %count, i32 %offsetIn0, i32 %offsetIn2) { +; CHECK-LABEL: @foo( +; CHECK: entry: +; CHECK: [[ADDR_1:%.*]] = getelementptr float, float addrspace(1)* [[IN0:%.*]], i32 0 +; CHECK: [[ADDR_3:%.*]] = getelementptr float, float addrspace(1)* [[IN0]], i32 2 +; CHECK: br label [[ENTRY_PREHEADER:%.*]] +; CHECK: entry_preheader: +; CHECK: [[L_1:%.*]] = load float, float addrspace(1)* [[ADDR_1]], align 16 +; CHECK: [[L_3:%.*]] = load float, float addrspace(1)* [[ADDR_3]], align 16 + +; this add is not beneficial to sink + +; CHECK: [[ADDFF_1:%.*]] = fadd float [[L_3]], 1.000000e+00 +; CHECK: br label [[LOOP:%.*]] +; CHECK: loop: + +; These 5 adds are beneficial to sink at once, because now only one value is alive in the loop (L_1), instead of 5 + +; CHECK: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY_PREHEADER]] ], [ [[INC:%.*]], [[LOOP]] ] +; CHECK: [[ADDF_2:%.*]] = fadd float [[L_1]], 2.000000e+00 +; CHECK: [[ADDF_1:%.*]] = fadd float [[L_1]], 1.000000e+00 +; CHECK: [[ACC0:%.*]] = fadd float [[ADDF_1]], [[ADDF_2]] +; CHECK: [[ADDF_3:%.*]] = fadd float [[L_1]], 3.000000e+00 +; CHECK: [[ACC1:%.*]] = fadd float [[ACC0]], [[ADDF_3]] +; CHECK: [[ADDF_4:%.*]] = fadd float [[L_1]], 4.000000e+00 +; CHECK: [[ACC2:%.*]] = fadd float [[ACC1]], [[ADDF_4]] +; CHECK: [[ADDF_5:%.*]] = fadd float [[L_1]], 5.000000e+00 +; CHECK: [[ACC3:%.*]] = fadd float [[ACC2]], [[ADDF_5]] +; CHECK: [[ACC4:%.*]] = fadd float [[ACC2]], [[ADDFF_1]] +; CHECK: [[OUT0_SHIFTED:%.*]] = getelementptr float, float addrspace(1)* [[OUT0:%.*]], i32 [[INDEX]] +; CHECK: store float [[ACC4]], float addrspace(1)* [[OUT0_SHIFTED]], align 8 +; CHECK: [[INC]] = add i32 [[INDEX]], 1 +; CHECK: [[CMPTMP:%.*]] = icmp ult i32 [[INDEX]], [[COUNT:%.*]] +; CHECK: br i1 [[CMPTMP]], label [[LOOP]], label [[AFTERLOOP:%.*]] +; CHECK: afterloop: +; CHECK: ret void +; +entry: + %addr_1 = getelementptr float, float addrspace(1)* %in0, i32 0 + %addr_3 = getelementptr float, float addrspace(1)* %in0, i32 2 + + %l_1 = load float, float addrspace(1)* %addr_1, align 16 + %l_3 = load float, float addrspace(1)* %addr_3, align 16 + + br label %entry_preheader + +entry_preheader: ; preds = %entry + %addf_1 = fadd float %l_1, 1.0 + %addf_2 = fadd float %l_1, 2.0 + %addf_3 = fadd float %l_1, 3.0 + %addf_4 = fadd float %l_1, 4.0 + %addf_5 = fadd float %l_1, 5.0 + + %addff_1 = fadd float %l_3, 1.0 + + br label %loop + +loop: ; preds = %loop, %entry_preheader + %index = phi i32 [ 0, %entry_preheader ], [ %inc, %loop ] + %acc0 = fadd float %addf_1, %addf_2 + %acc1 = fadd float %acc0, %addf_3 + %acc2 = fadd float %acc1, %addf_4 + %acc3 = fadd float %acc2, %addf_5 + + %acc4 = fadd float %acc2, %addff_1 + + %out0_shifted = getelementptr float, float addrspace(1)* %out0, i32 %index + store float %acc4, float addrspace(1)* %out0_shifted, align 8 + %cmptmp = icmp ult i32 %index, %count + %inc = add i32 %index, 1 + br i1 %cmptmp, label %loop, label %afterloop + +afterloop: ; preds = %loop + ret void +} + +!igc.functions = !{} diff --git a/IGC/Compiler/tests/CodeSinking/LoopSinking/load-loopsink.ll b/IGC/Compiler/tests/CodeSinking/LoopSinking/load-loopsink.ll new file mode 100644 index 000000000000..2383edc1d487 --- /dev/null +++ b/IGC/Compiler/tests/CodeSinking/LoopSinking/load-loopsink.ll @@ -0,0 +1,71 @@ +;=========================== begin_copyright_notice ============================ +; +; Copyright (C) 2023 Intel Corporation +; +; SPDX-License-Identifier: MIT +; +;============================ end_copyright_notice ============================= + +; REQUIRES: regkeys +; RUN: igc_opt --regkey CodeSinkingLoadSchedulingInstr=1 --regkey LoopSinkMinSave=1 --regkey EnableLoadsLoopSink=1 --regkey ForceLoopSink=1 --regkey CodeSinkingMinSize=10 %enable-basic-aa% --igc-code-sinking -S %s | FileCheck %s +define void @foo(<2 x double> addrspace(3)* %in0, double addrspace(3)* %in1, double addrspace(3)* noalias %out0, i32 %count, i32 %offsetIn0, i32 %offsetIn2) { +; CHECK-LABEL: @foo( +; +; CHECK-NEXT: entry: +entry: +; CHECK-NEXT: [[IN0_SHIFTED:%.*]] = getelementptr <2 x double>, <2 x double> addrspace(3)* [[IN0:%.*]], i32 [[OFFSETIN0:%.*]] + %in0_shifted = getelementptr <2 x double>, <2 x double> addrspace(3)* %in0, i32 %offsetIn0 +; CHECK-NEXT: [[IN2_SHIFTED:%.*]] = getelementptr <2 x double>, <2 x double> addrspace(3)* [[IN0]], i32 [[OFFSETIN2:%.*]] + %in2_shifted = getelementptr <2 x double>, <2 x double> addrspace(3)* %in0, i32 %offsetIn2 + %in0_add = add i32 %offsetIn0, 10 + %in0_shifted_for_store = getelementptr <2 x double>, <2 x double> addrspace(3)* %in0, i32 %in0_add + %in0_bc_for_store = bitcast <2 x double> addrspace(3)* %in0_shifted_for_store to double addrspace(3)* + br label %entry_preheader + +; CHECK: entry_preheader: +entry_preheader: ; preds = %entry + %l0 = load <2 x double>, <2 x double> addrspace(3)* %in0_shifted, align 16 + %l0e0 = extractelement <2 x double> %l0, i32 0 + %l0e1 = extractelement <2 x double> %l0, i32 1 +; CHECK: [[L1:%.*]] = load double, double addrspace(3)* %[[_:.*]], align 8 + %l1 = load double, double addrspace(3)* %in1, align 8 + store double 5.6, double addrspace(3)* %out0, align 8 +; check not sinked +; CHECK: [[L2:%.*]] = load <2 x double>, <2 x double> addrspace(3)* [[IN2_SHIFTED]], align 16 + %l2 = load <2 x double>, <2 x double> addrspace(3)* %in2_shifted, align 16 + %l2e0 = extractelement <2 x double> %l2, i32 0 + %l2e1 = extractelement <2 x double> %l2, i32 1 + br label %loop + +; CHECK: loop: +loop: ; preds = %loop, %entry_preheader + %index = phi i32 [ 0, %entry_preheader ], [ %inc, %loop ] +; CHECK: store double 8.600000e+00, double addrspace(3)* [[IN0_BC_FOR_STORE:%.*]], align 8 + store double 8.6, double addrspace(3)* %in0_bc_for_store, align 8 + +; check that sinked here +; CHECK: [[L0:%.*]] = load <2 x double>, <2 x double> addrspace(3)* [[IN0_SHIFTED]], align 16 + +; CHECK: [[L0E0:%.*]] = extractelement <2 x double> [[L0]], i32 0 +; CHECK: [[L0E1:%.*]] = extractelement <2 x double> [[L0]], i32 1 + + %a0 = fadd double %l0e0, 2.000000e+00 + %a1 = fadd double %l1, 2.000000e+00 + %a3 = fadd double %l0e1, 2.000000e+00 + %a4 = fadd double %l2e0, 2.000000e+00 + %a5 = fadd double %l2e1, 4.000000e+00 + %combine = fadd double %a0, %a1 + %combine2 = fadd double %a3, %combine + %combine3 = fadd double %a4, %a5 + %toStore = fadd double %combine2, %combine3 + %out0_shifted = getelementptr double, double addrspace(3)* %out0, i32 %index + store double %toStore, double addrspace(3)* %out0_shifted, align 8 + %cmptmp = icmp ult i32 %index, %count + %inc = add i32 %index, 1 + br i1 %cmptmp, label %loop, label %afterloop + +afterloop: ; preds = %loop + ret void +} + +!igc.functions = !{} diff --git a/IGC/common/igc_flags.h b/IGC/common/igc_flags.h index ca45de7deae4..07e83d07e19f 100644 --- a/IGC/common/igc_flags.h +++ b/IGC/common/igc_flags.h @@ -125,7 +125,11 @@ DECLARE_IGC_REGKEY(bool, DisableIGCOptimizations, false, "Setting this to DECLARE_IGC_REGKEY(bool, DisableLLVMGenericOptimizations, false, "Disable LLVM generic optimization passes", false) DECLARE_IGC_REGKEY(bool, DisableCodeSinking, false, "Setting this to 1/true adds a compiler switch to disable code-sinking", false) DECLARE_IGC_REGKEY(bool, DisableCodeSinkingInputVec, false, "Setting this to 1/true disable sinking inputVec inst (test)", false) -DECLARE_IGC_REGKEY(DWORD, LoopSinkMinSave, 5, "If loop sink can have save more than this Minimum, do it; otherwise, skip", false) +DECLARE_IGC_REGKEY(bool, ForceLoopSink, false, "Force sinking in all loops", false) +DECLARE_IGC_REGKEY(bool, EnableLoadsLoopSink, true, "Allow sinking of loads in the loop", false) +DECLARE_IGC_REGKEY(DWORD, CodeSinkingMinSize, 32, "Don't sink if the number of instructions in the kernel is less", false) +DECLARE_IGC_REGKEY(DWORD, CodeSinkingLoadSchedulingInstr, 5, "Instructions number to step to schedule loads in advance before the load use to cover latency. 1 to insert it immediately before use", false) +DECLARE_IGC_REGKEY(DWORD, LoopSinkMinSave, 1, "If loop sink can have save more 32-bit values than this Minimum, do it; otherwise, skip", false) DECLARE_IGC_REGKEY(DWORD, LoopSinkThresholdDelta, 50, "Do loop sink If the estimated register pressure is higher than this + #avaialble registers", false) DECLARE_IGC_REGKEY(bool, EnableLoopHoistConstant, false, "Enables pass to check for specific loop patterns where variables are constant across all but the last iteration, and hoist them out of the loop.", false) DECLARE_IGC_REGKEY(bool, DisableCodeHoisting, false, "Setting this to 1/true adds a compiler switch to disable code-hoisting", false)