diff --git a/bolt/Passes/HFSort.h b/bolt/Passes/HFSort.h index 7c837e029397..2329ec171417 100644 --- a/bolt/Passes/HFSort.h +++ b/bolt/Passes/HFSort.h @@ -103,9 +103,7 @@ std::vector clusterize(const CallGraph &Cg); /* * Optimize function placement for iTLB cache and i-cache. */ -std::vector hfsortPlus(CallGraph &Cg, - bool UseGainCache = true, - bool UseShortCallCache = true); +std::vector hfsortPlus(CallGraph &Cg, bool UseGainCache = true); /* * Pettis-Hansen code layout algorithm diff --git a/bolt/Passes/HFSortPlus.cpp b/bolt/Passes/HFSortPlus.cpp index d7006af2d005..fb8f2cbcf2c2 100644 --- a/bolt/Passes/HFSortPlus.cpp +++ b/bolt/Passes/HFSortPlus.cpp @@ -46,7 +46,7 @@ using namespace llvm; using namespace bolt; namespace opts { -extern cl::OptionCategory BoltCategory; + extern cl::OptionCategory BoltOptCategory; extern cl::opt Verbosity; @@ -92,17 +92,6 @@ int32_t ITLBPageSize; // while smaller values result in better i-cache performance int32_t ITLBEntries; -const char* cacheKindString(bool UseGainCache, bool UseShortCallCache) { - if (UseGainCache && UseShortCallCache) - return "gain + short call cache"; - else if (UseGainCache) - return "gain cache"; - else if (UseShortCallCache) - return "short call cache"; - else - return "no cache"; -} - // This class maintains adjacency information for all Clusters being // processed. It is used to invalidate cache entries when merging // Clusters and for visiting all neighbors of any given Cluster. @@ -215,17 +204,16 @@ class PrecomputedResults { Valid[Index] = true; } - void invalidate(const AdjacencyMatrix &Adjacent, const Cluster *C) { - invalidate(C); - Adjacent.forallAdjacent(C, [&](const Cluster *A) { invalidate(A); }); - } - private: void invalidate(const Cluster *C) { Valid.reset(C->id() * Size, (C->id() + 1) * Size); + for (size_t Id = 0; Id < Size; Id++) { + Valid.reset(Id * Size + C->id()); + } } + private: size_t index(const Cluster *First, const Cluster *Second) const { - return (First->id() * Size) + Second->id(); + return First->id() * Size + Second->id(); } size_t Size; @@ -347,12 +335,6 @@ class HFSortPlus { * the same cache page */ double shortCalls(const Cluster *Cluster) const { - if (UseShortCallCache) { - auto Itr = ShortCallCache.find(Cluster); - if (Itr != ShortCallCache.end()) - return Itr->second; - } - double Calls = 0; for (auto TargetId : Cluster->targets()) { for (auto Succ : Cg.successors(TargetId)) { @@ -367,10 +349,6 @@ class HFSortPlus { } } - if (UseShortCallCache) { - ShortCallCache[Cluster] = Calls; - } - return Calls; } @@ -380,11 +358,6 @@ class HFSortPlus { */ double shortCalls(const Cluster *ClusterPred, const Cluster *ClusterSucc) const { - if (UseShortCallCache && - ShortCallPairCache.contains(ClusterPred, ClusterSucc)) { - return ShortCallPairCache.get(ClusterPred, ClusterSucc); - } - double Calls = 0; for (auto TargetId : ClusterPred->targets()) { for (auto Succ : Cg.successors(TargetId)) { @@ -413,10 +386,6 @@ class HFSortPlus { } } - if (UseShortCallCache) { - ShortCallPairCache.set(ClusterPred, ClusterSucc, Calls); - } - return Calls; } @@ -434,8 +403,8 @@ class HFSortPlus { */ double mergeGain(const Cluster *ClusterPred, const Cluster *ClusterSucc) const { - if (UseGainCache && Cache.contains(ClusterPred, ClusterSucc)) { - return Cache.get(ClusterPred, ClusterSucc); + if (UseGainCache && GainCache.contains(ClusterPred, ClusterSucc)) { + return GainCache.get(ClusterPred, ClusterSucc); } // cache misses on the first cluster @@ -460,7 +429,7 @@ class HFSortPlus { Gain /= std::min(ClusterPred->size(), ClusterSucc->size()); if (UseGainCache) { - Cache.set(ClusterPred, ClusterSucc, Gain); + GainCache.set(ClusterPred, ClusterSucc, Gain); } return Gain; @@ -513,7 +482,7 @@ class HFSortPlus { const double ProbOut = CallsFromPred > 0 ? CallsPredSucc / CallsFromPred : 0; assert(0.0 <= ProbOut && ProbOut <= 1.0 && "incorrect probability"); - + // probability that the second cluster is called from the first one const double ProbIn = CallsToSucc > 0 ? CallsPredSucc / CallsToSucc : 0; @@ -601,13 +570,12 @@ class HFSortPlus { */ std::vector run() { DEBUG(dbgs() << "Starting hfsort+ w/" - << cacheKindString(UseGainCache, UseShortCallCache) + << (UseGainCache ? "gain cache" : "no cache") << " for " << Clusters.size() << " clusters " << "with ITLBPageSize = " << ITLBPageSize << ", " << "ITLBEntries = " << ITLBEntries << ", " << "and MergeProbability = " << opts::MergeProbability << "\n"); - // Pass 1 runPassOne(); @@ -628,9 +596,7 @@ class HFSortPlus { return Result; } - HFSortPlus(const CallGraph &Cg, - bool UseGainCache, - bool UseShortCallCache) + HFSortPlus(const CallGraph &Cg, bool UseGainCache) : Cg(Cg), FuncCluster(Cg.numNodes(), nullptr), Addr(Cg.numNodes(), InvalidAddr), @@ -638,9 +604,7 @@ class HFSortPlus { Clusters(initializeClusters()), Adjacent(Cg, Clusters, FuncCluster), UseGainCache(UseGainCache), - UseShortCallCache(UseShortCallCache), - Cache(Clusters.size()), - ShortCallPairCache(Clusters.size()) { + GainCache(Clusters.size()) { } private: @@ -696,31 +660,16 @@ class HFSortPlus { CurAddr = ((CurAddr + Align - 1) / Align) * Align; } - // Update caches - invalidateCaches(Into); + // Invalidate all cache entries associated with cluster Into + if (UseGainCache) { + GainCache.invalidate(Into); + } // Remove cluster From from the list of active clusters auto Iter = std::remove(Clusters.begin(), Clusters.end(), From); Clusters.erase(Iter, Clusters.end()); } - /* - * Invalidate all cache entries associated with cluster C and its neighbors. - */ - void invalidateCaches(const Cluster *C) { - if (UseShortCallCache) { - maybeErase(ShortCallCache, C); - Adjacent.forallAdjacent(C, - [this](const Cluster *A) { - maybeErase(ShortCallCache, A); - }); - ShortCallPairCache.invalidate(Adjacent, C); - } - if (UseGainCache) { - Cache.invalidate(Adjacent, C); - } - } - // The call graph const CallGraph &Cg; @@ -746,32 +695,21 @@ class HFSortPlus { // Use cache for mergeGain results bool UseGainCache; - // Use caches for shortCalls results - bool UseShortCallCache; - // A cache that keeps precomputed values of mergeGain for pairs of clusters; // when a pair of clusters (x,y) gets merged, we need to invalidate the pairs // containing both x and y and all clusters adjacent to x and y (and recompute // them on the next iteration). - mutable PrecomputedResults Cache; - - // Cache for shortCalls for a single cluster. - mutable std::unordered_map ShortCallCache; - - // Cache for shortCalls for a pair of Clusters - mutable PrecomputedResults ShortCallPairCache; + mutable PrecomputedResults GainCache; }; } -std::vector hfsortPlus(CallGraph &Cg, - bool UseGainCache, - bool UseShortCallCache) { +std::vector hfsortPlus(CallGraph &Cg, bool UseGainCache) { // It is required that the sum of incoming arc weights is not greater // than the number of samples for every function. // Ensuring the call graph obeys the property before running the algorithm. Cg.adjustArcWeights(); - return HFSortPlus(Cg, UseGainCache, UseShortCallCache).run(); + return HFSortPlus(Cg, UseGainCache).run(); } }} diff --git a/bolt/Passes/ReorderFunctions.cpp b/bolt/Passes/ReorderFunctions.cpp index 4676c1c2fa8a..bf4f178e2259 100644 --- a/bolt/Passes/ReorderFunctions.cpp +++ b/bolt/Passes/ReorderFunctions.cpp @@ -119,14 +119,6 @@ UseGainCache("hfsort+-use-cache", llvm::cl::Hidden, llvm::cl::cat(BoltOptCategory)); -static llvm::cl::opt -UseShortCallCache("hfsort+-use-short-call-cache", - llvm::cl::desc("Use a cache for shortCall results when computing hfsort+."), - llvm::cl::ZeroOrMore, - llvm::cl::init(true), - llvm::cl::Hidden, - llvm::cl::cat(BoltOptCategory)); - } // namespace opts namespace llvm { @@ -353,7 +345,7 @@ void ReorderFunctions::runOnFunctions(BinaryContext &BC, Clusters = clusterize(Cg); break; case BinaryFunction::RT_HFSORT_PLUS: - Clusters = hfsortPlus(Cg, opts::UseGainCache, opts::UseShortCallCache); + Clusters = hfsortPlus(Cg, opts::UseGainCache); break; case BinaryFunction::RT_PETTIS_HANSEN: Clusters = pettisAndHansen(Cg);