Skip to content

Commit

Permalink
[WorkSpanAnalysis, LoopStripMine] Improve work-span cost analysis and…
Browse files Browse the repository at this point in the history
… loop stripmining in several ways:

- Use branch probabilities when estimating the cost of a loop body.
- Use unit grainsizes more eagerly, whenever cost-analysis indicates the loop body is sufficiently expensive.
- Avoid modifying loops when they don't stripmine them.
- When loops do not appear to be profitable to parallelize, just serialize them.
  • Loading branch information
neboat committed Apr 25, 2024
1 parent dd4beba commit 6a58f18
Show file tree
Hide file tree
Showing 11 changed files with 193 additions and 141 deletions.
9 changes: 6 additions & 3 deletions llvm/include/llvm/Analysis/WorkSpanAnalysis.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,10 @@
// Tapir.

namespace llvm {
class BlockFrequencyInfo;
class Loop;
class LoopInfo;
class OptimizationRemarkEmitter;
class ScalarEvolution;
class TargetLibraryInfo;
class TargetTransformInfo;
Expand All @@ -50,8 +52,9 @@ unsigned getConstTripCount(const Loop *L, ScalarEvolution &SE);

void estimateLoopCost(WSCost &LoopCost, const Loop *L, LoopInfo *LI,
ScalarEvolution *SE, const TargetTransformInfo &TTI,
TargetLibraryInfo *TLI,
const SmallPtrSetImpl<const Value *> &EphValues);
}
TargetLibraryInfo *TLI, BlockFrequencyInfo *BFI,
const SmallPtrSetImpl<const Value *> &EphValues,
OptimizationRemarkEmitter *ORE);
} // namespace llvm

#endif // LLVM_ANALYSIS_WORKSPANANALYSIS_H_
1 change: 0 additions & 1 deletion llvm/include/llvm/Transforms/Tapir/LoopStripMine.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
#define LLVM_TRANSFORMS_TAPIR_LOOPSTRIPMINE_H

#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Support/InstructionCost.h"

Expand Down
95 changes: 67 additions & 28 deletions llvm/lib/Analysis/WorkSpanAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,20 +13,18 @@
//===----------------------------------------------------------------------===//

#include "llvm/Analysis/WorkSpanAnalysis.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/BlockFrequency.h"
#include "llvm/Support/InstructionCost.h"

using namespace llvm;

Expand All @@ -49,15 +47,24 @@ unsigned llvm::getConstTripCount(const Loop *L, ScalarEvolution &SE) {
/// Recursive helper routine to estimate the amount of work in a loop.
static void estimateLoopCostHelper(const Loop *L, CodeMetrics &Metrics,
WSCost &LoopCost, LoopInfo *LI,
ScalarEvolution *SE) {
ScalarEvolution *SE, BlockFrequencyInfo *BFI,
OptimizationRemarkEmitter *ORE) {
if (LoopCost.UnknownCost)
return;

// TODO: Handle control flow within the loop intelligently, using
// BlockFrequencyInfo.
BlockFrequency LoopEntryFreq =
BFI ? BFI->getBlockFreq(L->getHeader()) : BlockFrequency();

for (Loop *SubL : *L) {
WSCost SubLoopCost;
estimateLoopCostHelper(SubL, Metrics, SubLoopCost, LI, SE);
BlockFrequency SubloopEntryFreq =
BFI ? BFI->getBlockFreq(SubL->getHeader()) : BlockFrequency();

estimateLoopCostHelper(SubL, Metrics, SubLoopCost, LI, SE, BFI, ORE);
if (LoopEntryFreq.getFrequency() && SubloopEntryFreq.getFrequency() &&
SubloopEntryFreq < LoopEntryFreq)
SubLoopCost.Work /=
(LoopEntryFreq.getFrequency() / SubloopEntryFreq.getFrequency());
// Quit early if the size of this subloop is already too big.
if (InstructionCost::getMax() == SubLoopCost.Work)
LoopCost.Work = InstructionCost::getMax();
Expand All @@ -67,45 +74,77 @@ static void estimateLoopCostHelper(const Loop *L, CodeMetrics &Metrics,
// TODO: Use a more precise analysis to account for non-constant trip
// counts.
if (!ConstTripCount) {
LoopCost.UnknownCost = true;
// If we cannot compute a constant trip count, assume this subloop
// executes at least once.
ConstTripCount = 1;
if (ORE)
ORE->emit([&]() {
return OptimizationRemark("work-span-analysis", "NoConstTripCount",
SubL->getStartLoc(), SubL->getHeader())
<< "Could not determine constant trip count for subloop.";
});
if (BFI && SubloopEntryFreq.getFrequency() &&
LoopEntryFreq.getFrequency()) {
ConstTripCount =
SubloopEntryFreq.getFrequency() / LoopEntryFreq.getFrequency();
ConstTripCount |= (ConstTripCount == 0);
} else {
// If we cannot compute a constant trip count, assume this subloop
// executes at least once.
LoopCost.UnknownCost = true;
ConstTripCount = 1;
}
} else if (BFI && SubloopEntryFreq.getFrequency() &&
LoopEntryFreq.getFrequency()) {
LLVM_DEBUG(dbgs() << "ConstTripCount " << ConstTripCount
<< ", BFI estimate "
<< SubloopEntryFreq.getFrequency() /
LoopEntryFreq.getFrequency()
<< "\n");
}

// Check if the total size of this subloop is huge.
if (InstructionCost::getMax() / ConstTripCount > SubLoopCost.Work)
LoopCost.Work = InstructionCost::getMax();

// Check if this subloop suffices to make loop L huge.
if (InstructionCost::getMax() - LoopCost.Work <
(SubLoopCost.Work * ConstTripCount))
(SubLoopCost.Work * ConstTripCount)) {
if (ORE)
ORE->emit([&]() {
return OptimizationRemark("work-span-analysis", "LargeSubloop",
SubL->getStartLoc(), SubL->getHeader())
<< "Subloop work makes this loop huge.";
});
LoopCost.Work = InstructionCost::getMax();
}

// Add in the size of this subloop.
LoopCost.Work += (SubLoopCost.Work * ConstTripCount);
if (LoopCost.Work < InstructionCost::getMax())
// Add in the size of this subloop.
LoopCost.Work += (SubLoopCost.Work * ConstTripCount);
}

// After looking at all subloops, if we've concluded we have a huge loop size,
// return early.
if (InstructionCost::getMax() == LoopCost.Work)
return;

for (BasicBlock *BB : L->blocks())
for (BasicBlock *BB : L->blocks()) {
if (LI->getLoopFor(BB) == L) {
InstructionCost BBCost = Metrics.NumBBInsts[BB];
BlockFrequency BBFreq = BFI ? BFI->getBlockFreq(BB) : BlockFrequency();
if (LoopEntryFreq.getFrequency() && BBFreq.getFrequency() &&
BBFreq < LoopEntryFreq) {
BBCost /= (LoopEntryFreq.getFrequency() / BBFreq.getFrequency());
}
// Check if this BB suffices to make loop L huge.
if (InstructionCost::getMax() - LoopCost.Work < Metrics.NumBBInsts[BB]) {
if (InstructionCost::getMax() - LoopCost.Work < BBCost) {
LoopCost.Work = InstructionCost::getMax();
return;
}
LoopCost.Work += Metrics.NumBBInsts[BB];
LoopCost.Work += BBCost;
}
}
}

void llvm::estimateLoopCost(WSCost &LoopCost, const Loop *L, LoopInfo *LI,
ScalarEvolution *SE, const TargetTransformInfo &TTI,
TargetLibraryInfo *TLI,
const SmallPtrSetImpl<const Value *> &EphValues) {
TargetLibraryInfo *TLI, BlockFrequencyInfo *BFI,
const SmallPtrSetImpl<const Value *> &EphValues,
OptimizationRemarkEmitter *ORE) {
// TODO: Use more precise analysis to estimate the work in each call.
// TODO: Use vectorizability to enhance cost analysis.

Expand All @@ -114,5 +153,5 @@ void llvm::estimateLoopCost(WSCost &LoopCost, const Loop *L, LoopInfo *LI,
LoopCost.Metrics.analyzeBasicBlock(BB, TTI, EphValues,
/*PrepareForLTO*/ false, TLI);

estimateLoopCostHelper(L, LoopCost.Metrics, LoopCost, LI, SE);
estimateLoopCostHelper(L, LoopCost.Metrics, LoopCost, LI, SE, BFI, ORE);
}
3 changes: 2 additions & 1 deletion llvm/lib/Transforms/Tapir/LoopStripMine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,8 @@ bool llvm::computeStripMineCount(
DetachI, TargetTransformInfo::TCK_SizeAndLatency) /
LoopCost)
.getValue());

// Make sure the stripmine count is at least 1.
SMP.Count |= (SMP.Count == 0);
return false;
}

Expand Down
Loading

0 comments on commit 6a58f18

Please sign in to comment.