From 0fbc0b926a3b5023fb5bd20b1041b54a77aad3a7 Mon Sep 17 00:00:00 2001 From: TB Schardl Date: Mon, 8 Jul 2024 07:24:45 -0400 Subject: [PATCH] [Tapir] Update OpenCilkABI to pass parent stack frame to spawn helpers. --- llvm/include/llvm/Transforms/Tapir/CilkABI.h | 2 +- .../llvm/Transforms/Tapir/LoweringUtils.h | 115 +++++++++-------- .../llvm/Transforms/Tapir/OpenCilkABI.h | 12 +- llvm/lib/Transforms/Tapir/CilkABI.cpp | 2 +- llvm/lib/Transforms/Tapir/LoweringUtils.cpp | 32 ++--- llvm/lib/Transforms/Tapir/OpenCilkABI.cpp | 120 +++++++++++++++--- llvm/lib/Transforms/Tapir/TapirToTarget.cpp | 54 +------- 7 files changed, 193 insertions(+), 144 deletions(-) diff --git a/llvm/include/llvm/Transforms/Tapir/CilkABI.h b/llvm/include/llvm/Transforms/Tapir/CilkABI.h index a803d7c2597f..9f94b3ec2250 100644 --- a/llvm/include/llvm/Transforms/Tapir/CilkABI.h +++ b/llvm/include/llvm/Transforms/Tapir/CilkABI.h @@ -127,7 +127,7 @@ class CilkABI : public TapirTarget { DominatorTree &DT) override final; LoopOutlineProcessor * - getLoopOutlineProcessor(const TapirLoopInfo *TL) const override final; + getLoopOutlineProcessor(const TapirLoopInfo *TL) override final; }; } // namespace llvm diff --git a/llvm/include/llvm/Transforms/Tapir/LoweringUtils.h b/llvm/include/llvm/Transforms/Tapir/LoweringUtils.h index 54f69af873d1..af71de32cc6d 100644 --- a/llvm/include/llvm/Transforms/Tapir/LoweringUtils.h +++ b/llvm/include/llvm/Transforms/Tapir/LoweringUtils.h @@ -241,11 +241,10 @@ class TapirTarget { /// for the containing function, i.e., after the task has been outlined. virtual void lowerTaskFrameAddrCall(CallInst *TaskFrameAddrCall); - /// Lower a Tapir sync instruction SI. + /// Lower a Tapir sync instruction \p SI. virtual void lowerSync(SyncInst &SI) = 0; - virtual void lowerReducerOperation(CallBase *Call) { - } + virtual void lowerReducerOperation(CallBase *Call) {} /// Lower calls to the tapir.runtime.{start,end} intrinsics. Only /// tapir.runtime.start intrinsics are stored; uses of those intrinsics @@ -258,17 +257,30 @@ class TapirTarget { /// Returns true if Function F should be processed. virtual bool shouldProcessFunction(const Function &F) const; - /// Returns true if tasks in Function F should be outlined into their own + /// Returns true if tasks in Function \p F should be outlined into their own /// functions. Such outlining is a common step for many Tapir backends. virtual bool shouldDoOutlining(const Function &F) const { return true; } - /// Process Function F before any function outlining is performed. This + /// Process Function \p F before any function outlining is performed. This /// routine should not modify the CFG structure, unless it processes all Tapir - /// instructions in F itself. Returns true if it modifies the CFG, false + /// instructions in \p F itself. Returns true if it modifies the CFG, false /// otherwise. virtual bool preProcessFunction(Function &F, TaskInfo &TI, bool ProcessingTapirLoops = false) = 0; + /// Prepares the set \p HelperArgs of function arguments for the outlined + /// helper function. Also prepares the list \p HelperInputs of input values + /// passed to a call to Helper. + virtual void setupTaskOutlineArgs(Function &F, ValueSet &HelperArgs, + SmallVectorImpl &HelperInputs, + const ValueSet &TaskHelperArgs) { + // By default, simply copy the helper arguments into HelperInputs in order. + for (Value *V : TaskHelperArgs) { + HelperArgs.insert(V); + HelperInputs.push_back(V); + } + } + /// Returns an ArgStructMode enum value describing how inputs to a task should /// be passed to the task, e.g., directly as arguments to the outlined /// function, or marshalled in a structure. @@ -282,51 +294,51 @@ class TapirTarget { /// Get the Module where outlined Helper will be placed. Module &getDestinationModule() const { return DestM; } - // Add attributes to the Function Helper produced from outlining a task. + /// Add attributes to the Function \p Helper produced from outlining a task. virtual void addHelperAttributes(Function &Helper) {} - // Remap any Target-local structures after taskframe starting at TFEntry is - // outlined. + /// Remap any Target-local structures after taskframe starting at \p TFEntry + /// is outlined. virtual void remapAfterOutlining(BasicBlock *TFEntry, ValueToValueMapTy &VMap) {} - // Pre-process the Function F that has just been outlined from a task. This - // routine is executed on each outlined function by traversing in post-order - // the tasks in the original function. + /// Pre-process the Function \p F that has just been outlined from a task. + /// This routine is executed on each outlined function by traversing in + /// post-order the tasks in the original function. virtual void preProcessOutlinedTask(Function &F, Instruction *DetachPt, Instruction *TaskFrameCreate, bool IsSpawner, BasicBlock *TFEntry) = 0; - // Post-process the Function F that has just been outlined from a task. This - // routine is executed on each outlined function by traversing in post-order - // the tasks in the original function. + /// Post-process the Function \p F that has just been outlined from a task. + /// This routine is executed on each outlined function by traversing in + /// post-order the tasks in the original function. virtual void postProcessOutlinedTask(Function &F, Instruction *DetachPt, Instruction *TaskFrameCreate, bool IsSpawner, BasicBlock *TFEntry) = 0; - // Pre-process the root Function F as a function that can spawn subtasks. + /// Pre-process the root Function \p F as a function that can spawn subtasks. virtual void preProcessRootSpawner(Function &F, BasicBlock *TFEntry) = 0; - // Post-process the root Function F as a function that can spawn subtasks. + /// Post-process the root Function \p F as a function that can spawn subtasks. virtual void postProcessRootSpawner(Function &F, BasicBlock *TFEntry) = 0; - // Process the invocation of a task for an outlined function. This routine is - // invoked after processSpawner once for each child subtask. + /// Process the invocation of a task for an outlined function. This routine + /// is invoked after processSpawner once for each child subtask. virtual void processSubTaskCall(TaskOutlineInfo &TOI, DominatorTree &DT) = 0; - // Process Function F at the end of the lowering process. + /// Process Function \p F at the end of the lowering process. virtual void postProcessFunction(Function &F, bool ProcessingTapirLoops = false) = 0; - // Process a generated helper Function F produced via outlining, at the end of - // the lowering process. + /// Process a generated helper Function \p F produced via outlining, at the + /// end of the lowering process. virtual void postProcessHelper(Function &F) = 0; virtual bool processOrdinaryFunction(Function &F, BasicBlock *TFEntry); - // Get the LoopOutlineProcessor associated with this Tapir target. + /// Get the LoopOutlineProcessor associated with this Tapir target. virtual LoopOutlineProcessor * - getLoopOutlineProcessor(const TapirLoopInfo *TL) const { + getLoopOutlineProcessor(const TapirLoopInfo *TL) { return nullptr; } }; @@ -395,12 +407,12 @@ class LoopOutlineProcessor { return ArgStructMode::None; } - /// Prepares the set HelperArgs of function arguments for the outlined helper - /// function Helper for a Tapir loop. Also prepares the list HelperInputs of - /// input values passed to a call to Helper. HelperArgs and HelperInputs are - /// derived from the loop-control arguments LCArgs and loop-control inputs - /// LCInputs for the Tapir loop, as well the set TLInputsFixed of arguments to - /// the task underlying the Tapir loop. + /// Prepares the set \p HelperArgs of function arguments for the outlined + /// helper function Helper for a Tapir loop. Also prepares the list \p + /// HelperInputs of input values passed to a call to Helper. \p HelperArgs + /// and \p HelperInputs are derived from the loop-control arguments \p LCArgs + /// and loop-control inputs \p LCInputs for the Tapir loop, as well the set + /// \p TLInputsFixed of arguments to the task underlying the Tapir loop. virtual void setupLoopOutlineArgs( Function &F, ValueSet &HelperArgs, SmallVectorImpl &HelperInputs, ValueSet &InputSet, const SmallVectorImpl &LCArgs, @@ -415,7 +427,7 @@ class LoopOutlineProcessor { virtual unsigned getIVArgIndex(const Function &F, const ValueSet &Args) const; /// Returns an integer identifying the index of the helper-function argument - /// in Args that specifies the ending iteration number. This return value + /// in \p Args that specifies the ending iteration number. This return value /// must complement the behavior of setupLoopOutlineArgs(). virtual unsigned getLimitArgIndex(const Function &F, const ValueSet &Args) const { @@ -529,25 +541,27 @@ Function *createHelperForTaskFrame( Instruction *replaceTaskFrameWithCallToOutline( Spindle *TF, TaskOutlineInfo &Out, SmallVectorImpl &OutlineInputs); -/// Outlines a task \p T into a helper function that accepts the inputs \p -/// Inputs. The map \p VMap is updated with the mapping of instructions in \p T -/// to instructions in the new helper function. Information about the helper -/// function is returned as a TaskOutlineInfo structure. -TaskOutlineInfo outlineTask( - Task *T, ValueSet &Inputs, SmallVectorImpl &HelperInputs, - Module *DestM, ValueToValueMapTy &VMap, - TapirTarget::ArgStructMode UseArgStruct, Type *ReturnType, - ValueToValueMapTy &InputMap, OutlineAnalysis &OA); - -/// Outlines a taskframe \p TF into a helper function that accepts the inputs \p -/// Inputs. The map \p VMap is updated with the mapping of instructions in \p -/// TF to instructions in the new helper function. Information about the helper -/// function is returned as a TaskOutlineInfo structure. -TaskOutlineInfo outlineTaskFrame( - Spindle *TF, ValueSet &Inputs, SmallVectorImpl &HelperInputs, - Module *DestM, ValueToValueMapTy &VMap, - TapirTarget::ArgStructMode UseArgStruct, Type *ReturnType, - ValueToValueMapTy &InputMap, OutlineAnalysis &OA); +/// Outlines a task \p T into a helper function that accepts the inputs +/// \p Inputs. The map \p VMap is updated with the mapping of instructions in +/// \p T to instructions in the new helper function. Information about the +/// helper function is returned as a TaskOutlineInfo structure. +TaskOutlineInfo outlineTask(Task *T, ValueSet &Inputs, + SmallVectorImpl &HelperInputs, + Module *DestM, ValueToValueMapTy &VMap, + TapirTarget::ArgStructMode UseArgStruct, + Type *ReturnType, ValueToValueMapTy &InputMap, + OutlineAnalysis &OA, TapirTarget *Target); + +/// Outlines a taskframe \p TF into a helper function that accepts the inputs +/// \p Inputs. The map \p VMap is updated with the mapping of instructions in +/// \p TF to instructions in the new helper function. Information about the +/// helper function is returned as a TaskOutlineInfo structure. +TaskOutlineInfo outlineTaskFrame(Spindle *TF, ValueSet &Inputs, + SmallVectorImpl &HelperInputs, + Module *DestM, ValueToValueMapTy &VMap, + TapirTarget::ArgStructMode UseArgStruct, + Type *ReturnType, ValueToValueMapTy &InputMap, + OutlineAnalysis &OA); //----------------------------------------------------------------------------// // Methods for lowering Tapir loops @@ -556,7 +570,6 @@ TaskOutlineInfo outlineTaskFrame( /// returns the set of inputs for the Tapir loop itself. ValueSet getTapirLoopInputs(TapirLoopInfo *TL, ValueSet &TaskInputs); - /// Replaces the Tapir loop \p TL, with associated TaskOutlineInfo \p Out, with /// a call or invoke to the outlined helper function created for \p TL. Instruction *replaceLoopWithCallToOutline( diff --git a/llvm/include/llvm/Transforms/Tapir/OpenCilkABI.h b/llvm/include/llvm/Transforms/Tapir/OpenCilkABI.h index a4bae8a902ca..24cccd19130e 100644 --- a/llvm/include/llvm/Transforms/Tapir/OpenCilkABI.h +++ b/llvm/include/llvm/Transforms/Tapir/OpenCilkABI.h @@ -140,11 +140,10 @@ class OpenCilkABI final : public TapirTarget { Value *CreateStackFrame(Function &F); Value *GetOrCreateCilkStackFrame(Function &F); - CallInst *InsertStackFramePush(Function &F, - Instruction *TaskFrameCreate = nullptr, - bool Helper = false); + CallInst *InsertStackFramePush(Function &F, Instruction *TaskFrameCreate, + bool Helper, bool Spawner); void InsertStackFramePop(Function &F, bool PromoteCallsToInvokes, - bool InsertPauseFrame, bool Helper); + bool InsertPauseFrame, bool Helper, bool Spawner); void InsertDetach(Function &F, Instruction *DetachPt); @@ -166,6 +165,9 @@ class OpenCilkABI final : public TapirTarget { ArgStructMode getArgStructMode() const override final { return ArgStructMode::None; } + void setupTaskOutlineArgs(Function &F, ValueSet &HelperArgs, + SmallVectorImpl &HelperInputs, + const ValueSet &TaskHelperArgs) override final; void addHelperAttributes(Function &F) override final; void remapAfterOutlining(BasicBlock *TFEntry, @@ -190,7 +192,7 @@ class OpenCilkABI final : public TapirTarget { bool processOrdinaryFunction(Function &F, BasicBlock *TFEntry) override final; LoopOutlineProcessor * - getLoopOutlineProcessor(const TapirLoopInfo *TL) const override final; + getLoopOutlineProcessor(const TapirLoopInfo *TL) override final; }; } // namespace llvm diff --git a/llvm/lib/Transforms/Tapir/CilkABI.cpp b/llvm/lib/Transforms/Tapir/CilkABI.cpp index 887b4034749a..328d1023ba43 100644 --- a/llvm/lib/Transforms/Tapir/CilkABI.cpp +++ b/llvm/lib/Transforms/Tapir/CilkABI.cpp @@ -1784,7 +1784,7 @@ void CilkABI::postProcessFunction(Function &F, bool ProcessingTapirLoops) { void CilkABI::postProcessHelper(Function &F) {} LoopOutlineProcessor * -CilkABI::getLoopOutlineProcessor(const TapirLoopInfo *TL) const { +CilkABI::getLoopOutlineProcessor(const TapirLoopInfo *TL) { if (UseRuntimeCilkFor) return new RuntimeCilkFor(M); return nullptr; diff --git a/llvm/lib/Transforms/Tapir/LoweringUtils.cpp b/llvm/lib/Transforms/Tapir/LoweringUtils.cpp index 3132a58f855d..031e14ef43a2 100644 --- a/llvm/lib/Transforms/Tapir/LoweringUtils.cpp +++ b/llvm/lib/Transforms/Tapir/LoweringUtils.cpp @@ -975,9 +975,8 @@ TaskOutlineInfo llvm::outlineTaskFrame( Module *DestM, ValueToValueMapTy &VMap, TapirTarget::ArgStructMode UseArgStruct, Type *ReturnType, ValueToValueMapTy &InputMap, OutlineAnalysis &OA) { - if (Task *T = TF->getTaskFromTaskFrame()) - return outlineTask(T, Inputs, HelperInputs, DestM, VMap, UseArgStruct, - ReturnType, InputMap, OA); + assert(!TF->getTaskFromTaskFrame() && + "outlineTaskFrame called to outline task."); Function &F = *TF->getEntry()->getParent(); BasicBlock *Entry = TF->getEntry(); @@ -1062,15 +1061,16 @@ Instruction *llvm::replaceTaskFrameWithCallToOutline( return TopCall; } -/// Outlines a task \p T into a helper function that accepts the inputs \p -/// Inputs. The map \p VMap is updated with the mapping of instructions in \p T -/// to instructions in the new helper function. Information about the helper -/// function is returned as a TaskOutlineInfo structure. -TaskOutlineInfo llvm::outlineTask( - Task *T, ValueSet &Inputs, SmallVectorImpl &HelperInputs, - Module *DestM, ValueToValueMapTy &VMap, - TapirTarget::ArgStructMode UseArgStruct, Type *ReturnType, - ValueToValueMapTy &InputMap, OutlineAnalysis &OA) { +/// Outlines a task \p T into a helper function that accepts the inputs +/// \p Inputs. The map \p VMap is updated with the mapping of instructions in +/// \p T to instructions in the new helper function. Information about the +/// helper function is returned as a TaskOutlineInfo structure. +TaskOutlineInfo llvm::outlineTask(Task *T, ValueSet &Inputs, + SmallVectorImpl &HelperInputs, + Module *DestM, ValueToValueMapTy &VMap, + TapirTarget::ArgStructMode UseArgStruct, + Type *ReturnType, ValueToValueMapTy &InputMap, + OutlineAnalysis &OA, TapirTarget *Target) { assert(!T->isRootTask() && "Cannot outline the root task."); Function &F = *T->getEntry()->getParent(); DetachInst *DI = T->getDetach(); @@ -1094,11 +1094,11 @@ TaskOutlineInfo llvm::outlineTask( } // Convert the inputs of the task to inputs to the helper. - ValueSet HelperArgs; - Instruction *ArgsStart = fixupHelperInputs(F, T, Inputs, HelperArgs, StorePt, + ValueSet TaskHelperArgs; + Instruction *ArgsStart = fixupHelperInputs(F, T, Inputs, TaskHelperArgs, StorePt, LoadPt, UseArgStruct, InputMap); - for (Value *V : HelperArgs) - HelperInputs.push_back(V); + ValueSet HelperArgs; + Target->setupTaskOutlineArgs(F, HelperArgs, HelperInputs, TaskHelperArgs); // Clone the blocks into a helper function. Function *Helper = createHelperForTask(F, T, HelperArgs, DestM, VMap, diff --git a/llvm/lib/Transforms/Tapir/OpenCilkABI.cpp b/llvm/lib/Transforms/Tapir/OpenCilkABI.cpp index 3d6b4d1c4de2..b5815cbc5e8f 100644 --- a/llvm/lib/Transforms/Tapir/OpenCilkABI.cpp +++ b/llvm/lib/Transforms/Tapir/OpenCilkABI.cpp @@ -17,20 +17,24 @@ #include "llvm/ADT/StringSet.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/TapirTaskInfo.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/InstIterator.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Verifier.h" #include "llvm/IRReader/IRReader.h" #include "llvm/Linker/Linker.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ModRef.h" #include "llvm/Transforms/Tapir/CilkRTSCilkFor.h" +#include "llvm/Transforms/Tapir/LoweringUtils.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/EscapeEnumerator.h" @@ -212,16 +216,23 @@ void OpenCilkABI::prepareModule() { PointerType *StackFramePtrTy = PointerType::getUnqual(StackFrameTy); Type *VoidTy = Type::getVoidTy(C); Type *VoidPtrTy = PointerType::getUnqual(C); + Type *BoolTy = Type::getInt1Ty(C); // Define the types of the CilkRTS functions. FunctionType *CilkRTSFnTy = FunctionType::get(VoidTy, {StackFramePtrTy}, false); + FunctionType *CilkRTSHelperFnTy = FunctionType::get( + VoidTy, {StackFramePtrTy, StackFramePtrTy, BoolTy}, false); FunctionType *CilkPrepareSpawnFnTy = FunctionType::get(Int32Ty, {StackFramePtrTy}, false); + FunctionType *CilkRTSDetachFnTy = + FunctionType::get(VoidTy, {StackFramePtrTy, StackFramePtrTy}, false); FunctionType *CilkRTSEnterLandingpadFnTy = FunctionType::get(VoidTy, {StackFramePtrTy, Int32Ty}, false); FunctionType *CilkRTSPauseFrameFnTy = FunctionType::get( - VoidTy, {StackFramePtrTy, PointerType::getUnqual(C)}, false); + VoidTy, + {StackFramePtrTy, StackFramePtrTy, PointerType::getUnqual(C), BoolTy}, + false); FunctionType *Grainsize8FnTy = FunctionType::get(Int8Ty, {Int8Ty}, false); FunctionType *Grainsize16FnTy = FunctionType::get(Int16Ty, {Int16Ty}, false); FunctionType *Grainsize32FnTy = FunctionType::get(Int32Ty, {Int32Ty}, false); @@ -240,15 +251,17 @@ void OpenCilkABI::prepareModule() { // FunctionCallee member variables in the OpenCilkABI class. CilkRTSFnDesc CilkRTSFunctions[] = { {"__cilkrts_enter_frame", CilkRTSFnTy, CilkRTSEnterFrame}, - {"__cilkrts_enter_frame_helper", CilkRTSFnTy, CilkRTSEnterFrameHelper}, - {"__cilkrts_detach", CilkRTSFnTy, CilkRTSDetach}, + {"__cilkrts_enter_frame_helper", CilkRTSHelperFnTy, + CilkRTSEnterFrameHelper}, + {"__cilkrts_detach", CilkRTSDetachFnTy, CilkRTSDetach}, {"__cilkrts_leave_frame", CilkRTSFnTy, CilkRTSLeaveFrame}, - {"__cilkrts_leave_frame_helper", CilkRTSFnTy, CilkRTSLeaveFrameHelper}, + {"__cilkrts_leave_frame_helper", CilkRTSHelperFnTy, + CilkRTSLeaveFrameHelper}, {"__cilk_prepare_spawn", CilkPrepareSpawnFnTy, CilkPrepareSpawn}, {"__cilk_sync", CilkRTSFnTy, CilkSync}, {"__cilk_sync_nothrow", CilkRTSFnTy, CilkSyncNoThrow}, {"__cilk_parent_epilogue", CilkRTSFnTy, CilkParentEpilogue}, - {"__cilk_helper_epilogue", CilkRTSFnTy, CilkHelperEpilogue}, + {"__cilk_helper_epilogue", CilkRTSHelperFnTy, CilkHelperEpilogue}, {"__cilkrts_enter_landingpad", CilkRTSEnterLandingpadFnTy, CilkRTSEnterLandingpad}, {"__cilkrts_pause_frame", CilkRTSPauseFrameFnTy, CilkRTSPauseFrame}, @@ -333,6 +346,47 @@ void OpenCilkABI::prepareModule() { } } +static bool isSRetInput(const Value *V, const Function &F) { + if (!isa(V)) + return false; + + const auto *ArgIter = F.arg_begin(); + if (F.hasParamAttribute(0, Attribute::StructRet) && V == &*ArgIter) + return true; + ++ArgIter; + if (F.hasParamAttribute(1, Attribute::StructRet) && V == &*ArgIter) + return true; + + return false; +} + +void OpenCilkABI::setupTaskOutlineArgs(Function &F, ValueSet &HelperArgs, + SmallVectorImpl &HelperInputs, + const ValueSet &TaskHelperArgs) { + PointerType *SFPtrTy = PointerType::getUnqual(F.getContext()); + + // First add the sret task input, if it exists. + ValueSet::iterator TaskInputIter = TaskHelperArgs.begin(); + if ((TaskInputIter != TaskHelperArgs.end()) && isSRetInput(*TaskInputIter, F)) { + HelperArgs.insert(*TaskInputIter); + HelperInputs.push_back(*TaskInputIter); + ++TaskInputIter; + } + + // Add a pointer for the parent stack frame. This pointer will be replaced + // later in the call to the helper. + Value *ParentSFArg = ConstantPointerNull::get(SFPtrTy); + HelperArgs.insert(ParentSFArg); + HelperInputs.push_back(ParentSFArg); + + // Add the remaining task input arguments. + while (TaskInputIter != TaskHelperArgs.end()) { + Value *V = *TaskInputIter++; + HelperArgs.insert(V); + HelperInputs.push_back(V); + } +} + void OpenCilkABI::addHelperAttributes(Function &Helper) { // Use a fast calling convention for the helper. Helper.setCallingConv(CallingConv::Fast); @@ -451,6 +505,10 @@ Value* OpenCilkABI::GetOrCreateCilkStackFrame(Function &F) { return SF; } +static unsigned getParentSFArgNum(Function &H) { + return isSRetInput(H.getArg(0), H) ? 1 : 0; +} + // Helper function to add a debug location to an IRBuilder if it otherwise lacks // a debug location. static void ensureDebugLocation(IRBuilder<> &B, Function &F) { @@ -465,7 +523,6 @@ static void ensureDebugLocation(IRBuilder<> &B, Function &F) { void OpenCilkABI::InsertDetach(Function &F, Instruction *DetachPt) { Instruction *SF = cast(GetOrCreateCilkStackFrame(F)); assert(SF && "No Cilk stack frame for Cilk function."); - Value *Args[1] = {SF}; // Scan function to see if it detaches. LLVM_DEBUG({ @@ -477,7 +534,7 @@ void OpenCilkABI::InsertDetach(Function &F, Instruction *DetachPt) { // Call __cilkrts_detach IRBuilder<> IRB(DetachPt); ensureDebugLocation(IRB, F); - IRB.CreateCall(CILKRTS_FUNC(detach), Args); + IRB.CreateCall(CILKRTS_FUNC(detach), {SF, F.getArg(getParentSFArgNum(F))}); } // Insert a call in Function F to __cilkrts_enter_frame{_helper} to initialize @@ -485,7 +542,7 @@ void OpenCilkABI::InsertDetach(Function &F, Instruction *DetachPt) { // __cilkrts_enter_frame{_helper} is inserted at TaskFramecreate. CallInst *OpenCilkABI::InsertStackFramePush(Function &F, Instruction *TaskFrameCreate, - bool Helper) { + bool Helper, bool Spawner) { Instruction *SF = cast(GetOrCreateCilkStackFrame(F)); BasicBlock::iterator InsertPt = ++SF->getIterator(); @@ -494,10 +551,12 @@ CallInst *OpenCilkABI::InsertStackFramePush(Function &F, B.SetInsertPoint(TaskFrameCreate); ensureDebugLocation(B, F); - Value *Args[1] = {SF}; if (Helper) - return B.CreateCall(CILKRTS_FUNC(enter_frame_helper), Args); - return B.CreateCall(CILKRTS_FUNC(enter_frame), Args); + return B.CreateCall( + CILKRTS_FUNC(enter_frame_helper), + {SF, F.getArg(getParentSFArgNum(F)), + ConstantInt::getBool(Type::getInt1Ty(F.getContext()), Spawner)}); + return B.CreateCall(CILKRTS_FUNC(enter_frame), {SF}); } // Helper method to copy the debug location from RI to CI or add an empty debug @@ -526,7 +585,8 @@ static void copyDebugLocation(Instruction *CI, const Instruction *RI, // promoted to invoke instructions prior to inserting the epilogue-function // calls. void OpenCilkABI::InsertStackFramePop(Function &F, bool PromoteCallsToInvokes, - bool InsertPauseFrame, bool Helper) { + bool InsertPauseFrame, bool Helper, + bool Spawner) { Value *SF = GetOrCreateCilkStackFrame(F); SmallPtrSet Returns; SmallPtrSet Resumes; @@ -552,7 +612,11 @@ void OpenCilkABI::InsertStackFramePop(Function &F, bool PromoteCallsToInvokes, for (ReturnInst *RI : Returns) { CallInst *CI; if (Helper) { - CI = CallInst::Create(GetCilkHelperEpilogueFn(), {SF}, "", RI); + CI = CallInst::Create( + GetCilkHelperEpilogueFn(), + {SF, F.getArg(getParentSFArgNum(F)), + ConstantInt::getBool(Type::getInt1Ty(F.getContext()), Spawner)}, + "", RI); } else { CI = CallInst::Create(GetCilkParentEpilogueFn(), {SF}, "", RI); } @@ -563,7 +627,11 @@ void OpenCilkABI::InsertStackFramePop(Function &F, bool PromoteCallsToInvokes, Value *Exn = ExtractValueInst::Create(RI->getValue(), {0}, "", RI); // If throwing an exception, pass the exception object to the epilogue // function. - CallInst *CI = CallInst::Create(GetCilkHelperEpilogueExnFn(), {SF, Exn}, "", RI); + CallInst *CI = CallInst::Create( + GetCilkHelperEpilogueExnFn(), + {SF, F.getArg(getParentSFArgNum(F)), Exn, + ConstantInt::getBool(Type::getInt1Ty(F.getContext()), Spawner)}, + "", RI); copyDebugLocation(CI, RI, F); } } @@ -749,8 +817,8 @@ void OpenCilkABI::preProcessOutlinedTask(Function &F, Instruction *DetachPt, MarkSpawner(F); CallInst *EnterFrame = - InsertStackFramePush(F, TaskFrameCreate, /*Helper*/ true); - InsertDetach(F, (DetachPt ? DetachPt : &*(++EnterFrame->getIterator()))); + InsertStackFramePush(F, TaskFrameCreate, /*Helper*/ true, IsSpawner); + InsertDetach(F, (DetachPt ? DetachPt : EnterFrame->getNextNode())); } void OpenCilkABI::postProcessOutlinedTask(Function &F, Instruction *DetachPt, @@ -762,7 +830,7 @@ void OpenCilkABI::postProcessOutlinedTask(Function &F, Instruction *DetachPt, // the parent was stolen, in which case we want to save the exception for // later reduction. InsertStackFramePop(F, /*PromoteCallsToInvokes*/ true, - /*InsertPauseFrame*/ true, /*Helper*/ true); + /*InsertPauseFrame*/ true, /*Helper*/ true, IsSpawner); // TODO: If F is itself a spawner, see if we need to ensure that the Cilk // personality function does not pop an already-popped frame. We might be @@ -773,7 +841,7 @@ void OpenCilkABI::postProcessOutlinedTask(Function &F, Instruction *DetachPt, void OpenCilkABI::preProcessRootSpawner(Function &F, BasicBlock *TFEntry) { MarkSpawner(F); if (TapirRTCalls[TFEntry].empty()) { - InsertStackFramePush(F); + InsertStackFramePush(F, nullptr, false, true); } else { LowerTapirRTCalls(F, TFEntry); } @@ -813,7 +881,8 @@ void OpenCilkABI::postProcessRootSpawner(Function &F, BasicBlock *TFEntry) { // popping the frame if no landingpad exists for a given call. if (TapirRTCalls[TFEntry].empty()) InsertStackFramePop(F, /*PromoteCallsToInvokes*/ false, - /*InsertPauseFrame*/ false, /*Helper*/ false); + /*InsertPauseFrame*/ false, /*Helper*/ false, + /*Spawner*/ true); } void OpenCilkABI::processSubTaskCall(TaskOutlineInfo &TOI, DominatorTree &DT) { @@ -821,9 +890,18 @@ void OpenCilkABI::processSubTaskCall(TaskOutlineInfo &TOI, DominatorTree &DT) { Instruction *ReplCall = TOI.ReplCall; Function &F = *ReplCall->getFunction(); + LLVMContext &C = F.getContext(); Value *SF = DetachCtxToStackFrame[&F]; assert(SF && "No frame found for spawning task"); + const unsigned ParentSFArgNum = getParentSFArgNum(*TOI.Outline); + assert(ReplCall->getOperand(ParentSFArgNum) == + ConstantPointerNull::get(PointerType::getUnqual(C))); + ReplCall->setOperand(ParentSFArgNum, SF); + Argument *ParentSFArg = TOI.Outline->getArg(ParentSFArgNum); + ParentSFArg->addAttr( + Attribute::getWithAlignment(C, StackFrameAlign.valueOrOne())); + // Split the basic block containing the detach replacement just before the // start of the detach-replacement instructions. BasicBlock *DetBlock = ReplStart->getParent(); @@ -1110,8 +1188,8 @@ bool OpenCilkABI::processOrdinaryFunction(Function &F, BasicBlock *TFEntry) { void OpenCilkABI::postProcessHelper(Function &F) {} -LoopOutlineProcessor *OpenCilkABI::getLoopOutlineProcessor( - const TapirLoopInfo *TL) const { +LoopOutlineProcessor * +OpenCilkABI::getLoopOutlineProcessor(const TapirLoopInfo *TL) { if (UseRuntimeCilkFor) return new RuntimeCilkFor(M); return nullptr; diff --git a/llvm/lib/Transforms/Tapir/TapirToTarget.cpp b/llvm/lib/Transforms/Tapir/TapirToTarget.cpp index 0e87a441fd18..ceaf9d13e579 100644 --- a/llvm/lib/Transforms/Tapir/TapirToTarget.cpp +++ b/llvm/lib/Transforms/Tapir/TapirToTarget.cpp @@ -62,7 +62,6 @@ class TapirToTargetImpl { bool run(); private: - bool unifyReturns(Function &F); bool processFunction(Function &F, SmallVectorImpl &NewHelpers); TFOutlineMapTy outlineAllTasks(Function &F, SmallVectorImpl &AllTaskFrames, @@ -87,46 +86,6 @@ class TapirToTargetImpl { function_ref GetTLI; }; -bool TapirToTargetImpl::unifyReturns(Function &F) { - NamedRegionTimer NRT("unifyReturns", "Unify returns", TimerGroupName, - TimerGroupDescription, TimePassesIsEnabled); - SmallVector ReturningBlocks; - for (BasicBlock &BB : F) - if (isa(BB.getTerminator())) - ReturningBlocks.push_back(&BB); - - // If this function already has no returns or a single return, then terminate - // early. - if (ReturningBlocks.size() <= 1) - return false; - - BasicBlock *NewRetBlock = BasicBlock::Create(F.getContext(), - "UnifiedReturnBlock", &F); - PHINode *PN = nullptr; - if (F.getReturnType()->isVoidTy()) { - ReturnInst::Create(F.getContext(), nullptr, NewRetBlock); - } else { - // If the function doesn't return void... add a PHI node to the block... - PN = PHINode::Create(F.getReturnType(), ReturningBlocks.size(), - "UnifiedRetVal", NewRetBlock); - ReturnInst::Create(F.getContext(), PN, NewRetBlock); - } - - // Loop over all of the blocks, replacing the return instruction with an - // unconditional branch. - // - for (BasicBlock *BB : ReturningBlocks) { - // Add an incoming element to the PHI node for every return instruction that - // is merging into this new block... - if (PN) - PN->addIncoming(BB->getTerminator()->getOperand(0), BB); - - BB->back().eraseFromParent(); // Remove the return insn - BranchInst::Create(NewRetBlock, BB); - } - return true; -} - /// Outline all tasks in this function in post order. TFOutlineMapTy TapirToTargetImpl::outlineAllTasks(Function &F, @@ -150,12 +109,9 @@ TapirToTargetImpl::outlineAllTasks(Function &F, replaceTaskFrameWithCallToOutline(SubTF, TFToOutline[SubTF], HelperInputs[SubTF])); - // TODO: Add support for outlining taskframes with no associated task. Such - // a facility would allow the frontend to create nested sync regions that - // are properly outlined. - Task *T = TF->getTaskFromTaskFrame(); if (!T) { + // Outline taskframe with no associated task. ValueToValueMapTy VMap; ValueToValueMapTy InputMap; TFToOutline[TF] = outlineTaskFrame(TF, TFInputs[TF], HelperInputs[TF], @@ -187,10 +143,10 @@ TapirToTargetImpl::outlineAllTasks(Function &F, ValueToValueMapTy VMap; ValueToValueMapTy InputMap; - TFToOutline[TF] = outlineTask(T, TFInputs[TF], HelperInputs[TF], - &Target->getDestinationModule(), VMap, - Target->getArgStructMode(), - Target->getReturnType(), InputMap, OA); + TFToOutline[TF] = outlineTask( + T, TFInputs[TF], HelperInputs[TF], &Target->getDestinationModule(), + VMap, Target->getArgStructMode(), Target->getReturnType(), InputMap, OA, + Target); // If the detach for task T does not catch an exception from the task, then // the outlined function cannot throw. if (F.doesNotThrow() && !T->getDetach()->hasUnwindDest())