Skip to content

Commit

Permalink
[LoopSpawning][TaskSimplify] Properly enclose the body of a Tapir loo…
Browse files Browse the repository at this point in the history
…p within a taskframe when transforming the Tapir loop to implement parallel recursive divide-and-conquer spawning. Fix optimization in task-simplify to prevent erroneous deletion of taskframes. Cleanup code and improve naming of split basic blocks.
  • Loading branch information
neboat committed Mar 2, 2024
1 parent 3a60e32 commit f4d6301
Show file tree
Hide file tree
Showing 11 changed files with 2,217 additions and 48 deletions.
5 changes: 4 additions & 1 deletion llvm/include/llvm/Transforms/Tapir/LoweringUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,10 @@ class LoopOutlineProcessor {
/// a common post-processing step for outlined helper functions.
void addSyncToOutlineReturns(TapirLoopInfo &TL, TaskOutlineInfo &Out,
ValueToValueMapTy &VMap);

/// Enclose the task of the Tapir loop in a taskframe, if the Tapir loop
/// contains nested spawns.
void maybeEncloseInTaskFrame(TapirLoopInfo &TL, TaskOutlineInfo &Out,
ValueToValueMapTy &VMap);
/// Move Cilksan instrumentation out of cloned loop.
void moveCilksanInstrumentation(TapirLoopInfo &TL, TaskOutlineInfo &Out,
ValueToValueMapTy &VMap);
Expand Down
145 changes: 113 additions & 32 deletions llvm/lib/Transforms/Tapir/LoopSpawningTI.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TapirTaskInfo.h"
Expand All @@ -25,15 +25,13 @@
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/ValueMap.h"
#include "llvm/IR/Verifier.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
Expand All @@ -43,7 +41,6 @@
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/IndVarSimplify.h"
#include "llvm/Transforms/Scalar/SimplifyCFG.h"
#include "llvm/Transforms/Scalar/LoopDeletion.h"
#include "llvm/Transforms/Tapir.h"
#include "llvm/Transforms/Tapir/LoweringUtils.h"
Expand All @@ -53,14 +50,14 @@
#include "llvm/Transforms/Utils/EscapeEnumerator.h"
#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
#include "llvm/Transforms/Utils/TapirUtils.h"
#include "llvm/Transforms/Utils/ValueMapper.h"

#include <utility>

using namespace llvm;

#define LS_NAME "loop-spawning-ti"
#define LS_NAME "loop-spawning"
#define DEBUG_TYPE LS_NAME

STATISTIC(TapirLoopsFound,
Expand Down Expand Up @@ -91,6 +88,7 @@ class DACSpawning : public LoopOutlineProcessor {
void postProcessOutline(TapirLoopInfo &TL, TaskOutlineInfo &Out,
ValueToValueMapTy &VMap) override final {
LoopOutlineProcessor::postProcessOutline(TL, Out, VMap);
maybeEncloseInTaskFrame(TL, Out, VMap);
implementDACIterSpawnOnHelper(TL, Out, VMap);
++LoopsConvertedToDAC;

Expand All @@ -110,7 +108,7 @@ static bool isSRetInput(const Value *V, const Function &F) {
if (!isa<Argument>(V))
return false;

auto ArgIter = F.arg_begin();
const auto *ArgIter = F.arg_begin();
if (F.hasParamAttribute(0, Attribute::StructRet) && V == &*ArgIter)
return true;
++ArgIter;
Expand Down Expand Up @@ -186,7 +184,9 @@ void LoopOutlineProcessor::addSyncToOutlineReturns(TapirLoopInfo &TL,
continue;

BasicBlock *Exit = AtExit->GetInsertBlock();
BasicBlock *NewExit = SplitBlock(Exit, Exit->getTerminator());
BasicBlock *NewExit =
SplitBlock(Exit, Exit->getTerminator(), (DomTreeUpdater *)nullptr,
nullptr, nullptr, Exit->getName() + ".synced");
SyncInst *NewSync = SyncInst::Create(NewExit, SyncRegion);
ReplaceInstWithInst(Exit->getTerminator(), NewSync);

Expand All @@ -206,6 +206,77 @@ void LoopOutlineProcessor::addSyncToOutlineReturns(TapirLoopInfo &TL,
}
}

void LoopOutlineProcessor::maybeEncloseInTaskFrame(TapirLoopInfo &TL,
TaskOutlineInfo &Out,
ValueToValueMapTy &VMap) {
Task *T = TL.getTask();
if (T->subtasks().empty())
return;

BasicBlock &Entry = Out.Outline->getEntryBlock();

// Get the taskframe intrinsics.
Function *TFCreateFn =
Intrinsic::getDeclaration(&M, Intrinsic::taskframe_create);
Function *TFEndFn =
Intrinsic::getDeclaration(&M, Intrinsic::taskframe_end);

// Insert the taskframe.create.
Instruction *TFCreate =
IRBuilder<>(&Entry, Entry.begin()).CreateCall(TFCreateFn, {}, "ls.tf");
TFCreate->setDebugLoc(Entry.getTerminator()->getDebugLoc());
BasicBlock *UnreachableBlk = nullptr;
BasicBlock *NewResume = nullptr;
EscapeEnumerator EE(*Out.Outline, "ls.tfend", false);
SmallVector<ResumeInst *, 1> Resumes;
while (IRBuilder<> *AtExit = EE.Next()) {
if (isa<ReturnInst>(*AtExit->GetInsertPoint())) {
AtExit->CreateCall(TFEndFn, TFCreate);
continue;
}

BasicBlock *Exit = AtExit->GetInsertBlock();
if (TL.getUnwindDest() &&
Exit == cast<BasicBlock>(VMap[TL.getUnwindDest()]))
continue;
if (Exit == NewResume)
continue;

if (!UnreachableBlk) {
// Create the placeholder unreachable block, now that it's needed.
UnreachableBlk = BasicBlock::Create(
M.getContext(), Exit->getName() + ".unreachable", Out.Outline);
{ // Add an unreachable instruction to the end of UnreachableBlk.
IRBuilder<> Builder(UnreachableBlk);
Builder.CreateUnreachable();
}
}

// Create a new resume block.
if (!NewResume) {
NewResume = BasicBlock::Create(
M.getContext(), Exit->getName() + ".tfunwind", Out.Outline);
IRBuilder<> Builder(NewResume);
Builder.SetCurrentDebugLocation(Exit->getTerminator()->getDebugLoc());
LandingPadInst *LPad = Builder.CreateLandingPad(
cast<ResumeInst>(Exit->getTerminator())->getValue()->getType(), 0);
LPad->setCleanup(true);
Builder.CreateResume(LPad);
}

Resumes.push_back(cast<ResumeInst>(Exit->getTerminator()));
}

for (ResumeInst *R : Resumes) {
Value *Exn = R->getValue();
Function *TFResumeFn = Intrinsic::getDeclaration(
&M, Intrinsic::taskframe_resume, {Exn->getType()});
InvokeInst *TFResume = InvokeInst::Create(TFResumeFn, UnreachableBlk,
NewResume, {TFCreate, Exn});
ReplaceInstWithInst(R, TFResume);
}
}

static void getDependenciesInSameBlock(Instruction *I,
SmallPtrSetImpl<Instruction *> &Deps) {
const BasicBlock *Block = I->getParent();
Expand Down Expand Up @@ -303,18 +374,15 @@ void LoopOutlineProcessor::moveCilksanInstrumentation(TapirLoopInfo &TL,
}

// Move __csan_detach and __csan_task to the Preheader.
moveInstrumentation("__csan_detach", *Header, *Preheader,
Preheader->getTerminator());
moveInstrumentation("__csan_task", *TaskEntry, *Preheader,
Preheader->getTerminator());

// Move __csan_detach_continue and __csan_task_exit on the normal exit path to
// LatchExit.
moveInstrumentation("__csan_detach_continue", *Latch, *LatchExit);
moveInstrumentation("__csan_task", *TaskEntry, *Preheader);
moveInstrumentation("__csan_detach", *Header, *Preheader);

// Move __csan_task_exit on the normal exit path to LatchExit.
if (TaskExit)
// There's only one block with __csan_task_exit instrumentation to move, so
// move it from that block.
moveInstrumentation("__csan_task_exit", *TaskExit, *LatchExit);
moveInstrumentation("__csan_task_exit", *TaskExit, *LatchExit,
LatchExit->getTerminator());
else {
// We need to create PHI nodes for the arguments of a new instrumentation
// call in LatchExit.
Expand Down Expand Up @@ -364,12 +432,15 @@ void LoopOutlineProcessor::moveCilksanInstrumentation(TapirLoopInfo &TL,

// Insert new instrumentation call at the start of LatchExit.
CallInst::Create(InstrFunc->getFunctionType(), InstrFunc, InstrArgs, "",
&*LatchExit->getFirstInsertionPt());
LatchExit->getTerminator());

// Remove old instrumentation calls from predecessors
for (BasicBlock *Pred : predecessors(Latch))
Instrumentation[Pred]->eraseFromParent();
}
// Move __csan_detach_continue on the normal exit path to LatchExit.
moveInstrumentation("__csan_detach_continue", *Latch, *LatchExit,
LatchExit->getTerminator());
}

namespace {
Expand Down Expand Up @@ -598,7 +669,7 @@ void DACSpawning::implementDACIterSpawnOnHelper(
// Get end and grainsize arguments
Argument *End, *Grainsize;
{
auto OutlineArgsIter = Helper->arg_begin();
auto *OutlineArgsIter = Helper->arg_begin();
if (Helper->hasParamAttribute(0, Attribute::StructRet))
++OutlineArgsIter;
// End argument is second LC input.
Expand All @@ -611,7 +682,9 @@ void DACSpawning::implementDACIterSpawnOnHelper(
if (&(Helper->getEntryBlock()) == Preheader) {
// Split the entry block. We'll want to create a backedge into
// the split block later.
DACHead = SplitBlock(Preheader, &Preheader->front());
DACHead =
SplitBlock(Preheader, &Preheader->front(), (DomTreeUpdater *)nullptr,
nullptr, nullptr, Preheader->getName() + ".dac.head");

// Move any syncregion_start's in DACHead into Preheader.
BasicBlock::iterator InsertPoint = Preheader->begin();
Expand Down Expand Up @@ -690,8 +763,12 @@ void DACSpawning::implementDACIterSpawnOnHelper(
/*BranchWeights=*/nullptr);
RecurHead = RecurTerm->getParent();
// Create RecurHead, RecurDet, and RecurCont, with appropriate branches.
RecurDet = SplitBlock(RecurHead, RecurHead->getTerminator());
RecurCont = SplitBlock(RecurDet, RecurDet->getTerminator());
RecurDet = SplitBlock(RecurHead, RecurHead->getTerminator(),
(DomTreeUpdater *)nullptr, nullptr, nullptr,
Preheader->getName() + ".dac.detach");
RecurCont = SplitBlock(RecurDet, RecurDet->getTerminator(),
(DomTreeUpdater *)nullptr, nullptr, nullptr,
Preheader->getName() + ".dac.cont");
RecurCont->getTerminator()->replaceUsesOfWith(RecurTerm->getSuccessor(0),
DACHead);
}
Expand Down Expand Up @@ -758,7 +835,9 @@ void DACSpawning::implementDACIterSpawnOnHelper(
RecurCall->setDoesNotThrow();
} else {
InvokeInst *RecurCall;
BasicBlock *CallDest = SplitBlock(RecurDet, RecurDet->getTerminator());
BasicBlock *CallDest = SplitBlock(RecurDet, RecurDet->getTerminator(),
(DomTreeUpdater *)nullptr, nullptr,
nullptr, RecurDet->getName() + ".noexc");
BasicBlock *CallUnwind =
createTaskUnwind(Helper, UnwindDest, SyncRegion,
RecurDet->getName()+".unwind");
Expand Down Expand Up @@ -866,7 +945,9 @@ Task *LoopSpawningImpl::getTaskIfTapirLoop(const Loop *L) {
emitMissedWarning(L, Hints, &ORE);
}
return nullptr;
} else if (!isa<BranchInst>(Preheader->getTerminator())) {
}

if (!isa<BranchInst>(Preheader->getTerminator())) {
LLVM_DEBUG(dbgs() << "Loop preheader is not terminated by a branch.\n");
if (hintsDemandOutlining(Hints)) {
ORE.emit(TapirLoopInfo::createMissedAnalysis(LS_NAME, "ComplexPreheader",
Expand Down Expand Up @@ -1251,7 +1332,7 @@ class ArgEndMaterializer final : public OutlineMaterializer {
return OutlineMaterializer::materialize(V);
}
};
}
} // namespace

/// Outline Tapir loop \p TL into a helper function. The \p Args set specified
/// the arguments to that helper function. The map \p VMap will store the
Expand Down Expand Up @@ -1428,9 +1509,9 @@ TaskOutlineMapTy LoopSpawningImpl::outlineAllTapirLoops() {
for (Task *T : post_order(TI.getRootTask())) {
if (TapirLoopInfo *TL = getTapirLoop(T)) {
PredicatedScalarEvolution PSE(SE, *TL->getLoop());
bool canOutline = TL->prepareForOutlining(DT, LI, TI, PSE, AC, LS_NAME,
bool CanOutline = TL->prepareForOutlining(DT, LI, TI, PSE, AC, LS_NAME,
ORE, TTI);
if (!canOutline) {
if (!CanOutline) {
const Loop *L = TL->getLoop();
TapirLoopHints Hints(L);
emitMissedWarning(L, Hints, &ORE);
Expand Down Expand Up @@ -1743,10 +1824,10 @@ struct LoopSpawningTI : public FunctionPass {
AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
}
};
}
} // namespace

char LoopSpawningTI::ID = 0;
static const char ls_name[] = "Loop Spawning with Task Info";
static const char LsName[] = "Loop Spawning with Task Info";
INITIALIZE_PASS_BEGIN(LoopSpawningTI, LS_NAME, ls_name, false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
Expand All @@ -1758,10 +1839,10 @@ INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TaskInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
INITIALIZE_PASS_END(LoopSpawningTI, LS_NAME, ls_name, false, false)
INITIALIZE_PASS_END(LoopSpawningTI, LS_NAME, LsName, false, false)

namespace llvm {
Pass *createLoopSpawningTIPass() {
return new LoopSpawningTI();
}
}
} // namespace llvm
2 changes: 1 addition & 1 deletion llvm/lib/Transforms/Utils/TaskSimplify.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ static bool canRemoveTaskFrame(const Spindle *TF, MaybeParallelTasks &MPTasks,
continue;

// Skip spindles in nested taskframes.
if (S != TF && S->getTaskFrameParent() != TF)
if (S != TF && S->getTaskFrameParent() && S->getTaskFrameParent() != TF)
continue;

// Filter the task list of S to exclude tasks in parallel with the entry.
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Transforms/Tapir/loop-remark-iv.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
; RUN: opt < %s -passes='loop-spawning' -pass-remarks-analysis=loop-spawning-ti -disable-output 2>&1 | FileCheck %s
; RUN: opt < %s -passes='loop-spawning' -pass-remarks-analysis=loop-spawning -disable-output 2>&1 | FileCheck %s

target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -212,8 +212,8 @@ pfor.cond.cleanup: ; preds = %pfor.inc
; CHECK: _Z10initializePdS_S_S_S_mm.exit.ls1:
; CHECK: %[[SYNCREG:.+]] = {{.*}}call token @llvm.syncregion.start()

; CHECK: _Z10initializePdS_S_S_S_mm.exit.ls1.split:
; CHECK-NEXT: %[[DACIV:.+]] = phi i64 [ %indvars.iv376.start.ls1, %_Z10initializePdS_S_S_S_mm.exit.ls1 ], [ %[[DACIVINC:.+]], %.split.split ]
; CHECK: _Z10initializePdS_S_S_S_mm.exit.ls1.dac.head:
; CHECK-NEXT: %[[DACIV:.+]] = phi i64 [ %indvars.iv376.start.ls1, %_Z10initializePdS_S_S_S_mm.exit.ls1 ], [ %[[DACIVINC:.+]], %_Z10initializePdS_S_S_S_mm.exit.ls1.dac.cont ]
; CHECK: %[[DACIVSTART:.+]] = trunc i64 %[[DACIV]] to i32
; CHECK: %[[ITERCOUNT:.+]] = sub i32 %[[END]], %[[DACIVSTART]]
; CHECK: %[[CMP:.+]] = icmp ugt i32 %[[ITERCOUNT]], %[[GRAINSIZE]]
Expand All @@ -230,7 +230,7 @@ pfor.cond.cleanup: ; preds = %pfor.inc

; CHECK: [[CONTIN]]:
; CHECK: %[[DACIVINC]] = zext i32 %[[MIDITER]] to i64
; CHECK: br label %_Z10initializePdS_S_S_S_mm.exit.ls1.split
; CHECK: br label %_Z10initializePdS_S_S_S_mm.exit.ls1.dac.head

declare dso_local double @sqrt(double) local_unnamed_addr #0

Expand Down
Loading

0 comments on commit f4d6301

Please sign in to comment.