From b313da335ad0532bab977a1bd538d0cadd0ff477 Mon Sep 17 00:00:00 2001 From: TB Schardl Date: Fri, 6 Dec 2024 20:46:55 -0500 Subject: [PATCH 1/5] [Verifier] Improve verifier checks of Tapir taskframes and intrinsics. --- llvm/lib/IR/Verifier.cpp | 90 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 58df691c4d74..e64f4ce0f8ee 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -593,6 +593,8 @@ class Verifier : public InstVisitor, VerifierSupport { void verifyTask(const DetachInst *DI); void visitDetachInst(DetachInst &DI); void visitReattachInst(ReattachInst &RI); + void visitSyncInst(SyncInst &SI); + void verifyTaskFrame(const CallBase *TF); void verifySwiftErrorCall(CallBase &Call, const Value *SwiftErrorVal); void verifySwiftErrorValue(const Value *SwiftErrorVal); @@ -3175,6 +3177,73 @@ void Verifier::visitReattachInst(ReattachInst &RI) { visitTerminator(RI); } +void Verifier::visitSyncInst(SyncInst &SI) { + visitTerminator(SI); +} + +void Verifier::verifyTaskFrame(const CallBase *TF) { + // Gather endpoints of the taskframe. + SmallPtrSet TFEnds; + bool IsDead = true; + for (const User *U : TF->users()) { + if (const CallBase *CB = dyn_cast(U)) { + if (const Function *Called = CB->getCalledFunction()) { + // All taskframe.end and taskframe.resume users directly indicate + // taskframe ends. All taskframe.use users identify spawned tasks. + if (Intrinsic::taskframe_end == Called->getIntrinsicID() || + Intrinsic::taskframe_resume == Called->getIntrinsicID()) { + TFEnds.insert(CB->getParent()); + IsDead = false; + } else if (Intrinsic::taskframe_use == Called->getIntrinsicID()) { + // Use the continuation block of the spawned task as the taskframe + // end. + if (const BasicBlock *Detacher = CB->getParent()->getUniquePredecessor()) { + if (const DetachInst *Detach = dyn_cast(Detacher->getTerminator())) { + TFEnds.insert(Detach->getContinue()); + IsDead = false; + } + } + // Also include the block containing the taskframe.use. If this + // taskframe is really used in a spawned task, and is not just dead + // code, then verifyTask() will check the spawned task itself. + TFEnds.insert(CB->getParent()); + } + } + } + } + SmallVector Worklist; + SmallPtrSet Visited; + Worklist.push_back(TF->getParent()); + do { + const BasicBlock *BB = Worklist.pop_back_val(); + if (!Visited.insert(BB).second) + continue; + + // If this block is a taskframe end, stop the traversal. + if (TFEnds.contains(BB)) + continue; + + // Check that do not encounter a return or resume in the middle of the + // task. + Check(IsDead || (!isa(BB->getTerminator()) && + !isa(BB->getTerminator())), + "Unexpected return or resume in taskframe", TF, BB->getTerminator()); + + // Ignore the placeholder continuation of a taskframe.resume or + // detached.rethrow. + const BasicBlock *SuccToIgnore = nullptr; + if (const InvokeInst *II = dyn_cast(BB->getTerminator())) + if (isTapirIntrinsic(Intrinsic::taskframe_resume, II, TF) || + isDetachedRethrow(II)) + SuccToIgnore = II->getNormalDest(); + + // Add the successors of this basic block. + for (const BasicBlock *Successor : successors(BB)) + if (Successor != SuccToIgnore) + Worklist.push_back(Successor); + } while (!Worklist.empty()); +} + void Verifier::visitDetachInst(DetachInst &DI) { if (DetachesVisited.insert(&DI).second) verifyTask(&DI); @@ -6260,6 +6329,27 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { "tapir.runtime.start has no associated tapir.runtime.end", &Call); break; } + case Intrinsic::taskframe_create: { + if (DT.isReachableFromEntry(Call.getParent())) + verifyTaskFrame(&Call); + break; + } + case Intrinsic::taskframe_resume: { + Check(isa(Call), "taskframe.resume is not invoked", &Call); + if (InvokeInst *I = dyn_cast(&Call)) { + Check(isa(I->getNormalDest()->getTerminator()), + "taskframe.resume normal destination is not unreachable", &Call); + } + break; + } + case Intrinsic::detached_rethrow: { + Check(isa(Call), "detached.rethrow is not invoked", &Call); + if (InvokeInst *I = dyn_cast(&Call)) { + Check(isa(I->getNormalDest()->getTerminator()), + "detached.rethrow normal destination is not unreachable", &Call); + } + break; + } }; // Verify that there aren't any unmediated control transfers between funclets. From adeb711e3a34665134b5c2812ef09e5b9004c595 Mon Sep 17 00:00:00 2001 From: TB Schardl Date: Fri, 6 Dec 2024 20:56:20 -0500 Subject: [PATCH 2/5] [Tapir] Improve logic for inserting and cleaning up taskframe intrinsics. --- clang/lib/CodeGen/CodeGenFunction.h | 2 ++ .../InstCombine/InstCombineCalls.cpp | 2 +- llvm/lib/Transforms/Tapir/LoweringUtils.cpp | 19 ++++++------ llvm/lib/Transforms/Tapir/TapirToTarget.cpp | 4 +++ llvm/lib/Transforms/Utils/InlineFunction.cpp | 30 ++++++++++++------- 5 files changed, 36 insertions(+), 21 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 86d2a38ddd1a..e3c88f93e156 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -1523,6 +1523,8 @@ class CodeGenFunction : public CodeGenTypeCache { ~DetachScope() { if (TempInvokeDest && TempInvokeDest->use_empty()) delete TempInvokeDest; + if (TaskFrame && TaskFrame->use_empty()) + cast(TaskFrame)->eraseFromParent(); CGF.CurDetachScope = ParentScope; } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 22cd821c44ee..4b093b27882b 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -2867,7 +2867,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { if (Instruction *I = dyn_cast(U)) if (isTapirIntrinsic(Intrinsic::taskframe_use, I) || isTapirIntrinsic(Intrinsic::taskframe_end, I) || - isTaskFrameResume(I)) { + isTapirIntrinsic(Intrinsic::taskframe_resume, I)) { ++NumUsers; break; } diff --git a/llvm/lib/Transforms/Tapir/LoweringUtils.cpp b/llvm/lib/Transforms/Tapir/LoweringUtils.cpp index 124b9f57ed3a..bb906d297230 100644 --- a/llvm/lib/Transforms/Tapir/LoweringUtils.cpp +++ b/llvm/lib/Transforms/Tapir/LoweringUtils.cpp @@ -923,6 +923,7 @@ Function *llvm::createHelperForTaskFrame( // values in old function. AddAlignmentAssumptions(&F, Args, VMap, &Header->front(), &OA.AC, &OA.DT); + SmallVector TaskEnds; // Move allocas in the newly cloned detached CFG to the entry block of the // helper. { @@ -930,9 +931,9 @@ Function *llvm::createHelperForTaskFrame( TimerGroupName, TimerGroupDescription, TimePassesIsEnabled); // Collect the end instructions of the task. - SmallVector TaskEnds; for (BasicBlock *EndBlock : TFEndBlocks) - TaskEnds.push_back(cast(VMap[EndBlock])->getTerminator()); + TaskEnds.push_back( + cast(VMap[EndBlock])->getTerminator()->getPrevNode()); for (BasicBlock *EndBlock : TFResumeBlocks) TaskEnds.push_back(cast(VMap[EndBlock])->getTerminator()); @@ -953,14 +954,12 @@ Function *llvm::createHelperForTaskFrame( TimerGroupName, TimerGroupDescription, TimePassesIsEnabled); SmallVector TFEndsToRemove; - for (BasicBlock *EndBlock : TFEndBlocks) { - BasicBlock *ClonedEndBlock = cast(VMap[EndBlock]); - if (Instruction *Prev = ClonedEndBlock->getTerminator()->getPrevNode()) - if (isTapirIntrinsic(Intrinsic::taskframe_end, Prev)) - TFEndsToRemove.push_back(Prev); - } - for (Instruction *ClonedTFEnd : TFEndsToRemove) - ClonedTFEnd->eraseFromParent(); + for (Instruction *TFEnd : TaskEnds) + if (isTapirIntrinsic(Intrinsic::taskframe_end, TFEnd)) + TFEndsToRemove.push_back(TFEnd); + + for (Instruction *TFEnd : TFEndsToRemove) + TFEnd->eraseFromParent(); } Helper->setMemoryEffects(computeFunctionBodyMemoryAccess(*Helper, OA.AA)); diff --git a/llvm/lib/Transforms/Tapir/TapirToTarget.cpp b/llvm/lib/Transforms/Tapir/TapirToTarget.cpp index 84d9ffc43fb3..297d99c79eb6 100644 --- a/llvm/lib/Transforms/Tapir/TapirToTarget.cpp +++ b/llvm/lib/Transforms/Tapir/TapirToTarget.cpp @@ -135,6 +135,10 @@ TapirToTargetImpl::outlineAllTasks(Function &F, for (Spindle *SubTF : TF->subtaskframes()) TFToOutline[SubTF].remapOutlineInfo(VMap, InputMap); + if (Instruction *ClonedTFCreate = + dyn_cast(VMap[TF->getTaskFrameCreate()])) + ClonedTFCreate->eraseFromParent(); + continue; } diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp index ed4498307acd..7456e4a33760 100644 --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -2443,6 +2443,16 @@ static void HandleInlinedResumeInTask(BasicBlock *EntryBlock, BasicBlock *Ctx, } } +// Simple RAII object for managing creation of taskframe for inlined function. +struct TaskFrameScope { + CallInst *TFCreate = nullptr; + TaskFrameScope() = default; + ~TaskFrameScope() { + if (TFCreate && TFCreate->use_empty()) + TFCreate->eraseFromParent(); + } +}; + /// This function inlines the called function into the basic block of the /// caller. This returns false if it is not possible to inline this call. /// The program is still in a well defined state if this occurs though. @@ -3084,7 +3094,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, // If the inlined code contained dynamic alloca instructions, wrap the inlined // code with llvm.stacksave/llvm.stackrestore intrinsics. - CallInst *TFCreate = nullptr; + TaskFrameScope TFI; BasicBlock *TFEntryBlock = DetachedCtxEntryBlock; if (InlinedFunctionInfo.ContainsDetach && (InlinedFunctionInfo.ContainsDynamicAllocas || MayBeUnsyncedAtCall)) { @@ -3094,9 +3104,9 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, Intrinsic::getDeclaration(M, Intrinsic::taskframe_create); // Insert the llvm.taskframe.create. - TFCreate = IRBuilder<>(&*FirstNewBlock, FirstNewBlock->begin()) - .CreateCall(TFCreateFn, {}, "tf.i"); - TFCreate->setDebugLoc(CB.getDebugLoc()); + TFI.TFCreate = IRBuilder<>(&*FirstNewBlock, FirstNewBlock->begin()) + .CreateCall(TFCreateFn, {}, "tf.i"); + TFI.TFCreate->setDebugLoc(CB.getDebugLoc()); TFEntryBlock = &*FirstNewBlock; // If we're inlining an invoke, insert a taskframe.resume at the unwind @@ -3112,9 +3122,9 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, } // Create an unwind edge for the taskframe. - BasicBlock *TaskFrameUnwindEdge = CreateSubTaskUnwindEdge( - Intrinsic::taskframe_resume, TFCreate, UnwindEdge, - UnreachableBlk, II); + BasicBlock *TaskFrameUnwindEdge = + CreateSubTaskUnwindEdge(Intrinsic::taskframe_resume, TFI.TFCreate, + UnwindEdge, UnreachableBlk, II); for (PHINode &PN : UnwindEdge->phis()) PN.replaceIncomingBlockWith(II->getParent(), TaskFrameUnwindEdge); @@ -3149,7 +3159,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, BasicBlock *UnwindDest = II->getUnwindDest(); Instruction *FirstNonPHI = UnwindDest->getFirstNonPHI(); if (isa(FirstNonPHI)) { - HandleInlinedLandingPad(II, &*FirstNewBlock, TFCreate, + HandleInlinedLandingPad(II, &*FirstNewBlock, TFI.TFCreate, InlinedFunctionInfo); } else { HandleInlinedEHPad(II, &*FirstNewBlock, InlinedFunctionInfo); @@ -3419,10 +3429,10 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, // If we inserted a taskframe.create, insert a taskframe.end at the start of // AfterCallBB. - if (TFCreate) { + if (TFI.TFCreate) { Function *TFEndFn = Intrinsic::getDeclaration(Caller->getParent(), Intrinsic::taskframe_end); - IRBuilder<>(&AfterCallBB->front()).CreateCall(TFEndFn, TFCreate); + IRBuilder<>(&AfterCallBB->front()).CreateCall(TFEndFn, TFI.TFCreate); } // Change the branch that used to go to AfterCallBB to branch to the first From 22332419291436a25e3b27a2de3915183e037f6a Mon Sep 17 00:00:00 2001 From: TB Schardl Date: Fri, 6 Dec 2024 20:57:57 -0500 Subject: [PATCH 3/5] [SimplifyCFG] To handle test cases with no syncregion intrinsic, avoid moving static allocas based on the syncregion. --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index f42d516a55be..a9117b87beeb 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -7860,8 +7860,9 @@ static bool serializeDetachToImmediateSync(BasicBlock *BB, // Move static alloca instructions in the detached block to the // appropriate entry block. - MoveStaticAllocasInBlock(cast(SyncRegion)->getParent(), - Detached, ReattachPreds); + if (isa(SyncRegion)) + MoveStaticAllocasInBlock(cast(SyncRegion)->getParent(), + Detached, ReattachPreds); // Erase any instructions marked to be erased. for (Instruction *I : ToErase) From ebc6f7fc400d0a48e3746a58c3fe8e544128e5b0 Mon Sep 17 00:00:00 2001 From: TB Schardl Date: Fri, 6 Dec 2024 21:00:26 -0500 Subject: [PATCH 4/5] [TapirUtils] Fix logic to insert fixups for outputs of taskframes. - Fix crash when inserting taskframe fixups for a taskframe with no continuation. - Ensure that allocas for taskframe fixups are properly inserted within the parent taskframe. - Avoid inserting taskframe load guards for unassociated taskframes. --- llvm/include/llvm/Analysis/TapirTaskInfo.h | 7 + llvm/lib/Transforms/Utils/TapirUtils.cpp | 60 ++++--- .../Tapir/alloca-insert-split-taskframe.ll | 161 ++++++++++++++++++ .../Transforms/Tapir/inline-detach-unwind.ll | 13 +- ...-nounwind-detach-into-invoked-taskframe.ll | 16 +- .../Tapir/inline-taskframe-split.ll | 8 +- .../Tapir/jump-threading-tapir-vh.ll | 8 +- .../Tapir/nested-serialize-detach.ll | 11 +- .../Tapir/outline-shared-unreachable.ll | 6 +- ...-runtime-start-after-unreachable-blocks.ll | 5 +- .../Transforms/Tapir/simple-loop-unswitch.ll | 2 +- .../Tapir/simplify-taskframe-with-resume.ll | 2 +- .../Tapir/sroa-preserve-task-info.ll | 4 +- ...a-update-multiple-reattach-predecessors.ll | 12 +- .../Tapir/tapir-lowering-empty-debugloc.ll | 8 +- .../Tapir/task-in-loop-task-exit.ll | 9 +- .../Tapir/taskframe-fixup-outline.ll | 133 +++++++++++++++ .../Transforms/Tapir/tsan-task-unreachable.ll | 6 +- .../Tapir/unlink-unreachable-detach-unwind.ll | 6 +- 19 files changed, 410 insertions(+), 67 deletions(-) create mode 100644 llvm/test/Transforms/Tapir/alloca-insert-split-taskframe.ll create mode 100644 llvm/test/Transforms/Tapir/taskframe-fixup-outline.ll diff --git a/llvm/include/llvm/Analysis/TapirTaskInfo.h b/llvm/include/llvm/Analysis/TapirTaskInfo.h index 6c95c70ee001..8e266e5b54b8 100644 --- a/llvm/include/llvm/Analysis/TapirTaskInfo.h +++ b/llvm/include/llvm/Analysis/TapirTaskInfo.h @@ -29,6 +29,7 @@ #include "llvm/IR/PassManager.h" #include "llvm/Pass.h" #include "llvm/Support/Allocator.h" +#include "llvm/Support/Casting.h" #include namespace llvm { @@ -101,6 +102,12 @@ class Spindle { ++D; return D; } + BasicBlock::iterator getTaskFrameFirstInsertionPt() { + if (Instruction *TFCreate = + dyn_cast_or_null(getTaskFrameCreate())) + return TFCreate->getNextNode()->getIterator(); + return getEntry()->getFirstInsertionPt(); + } Task *getTaskFromTaskFrame() const; diff --git a/llvm/lib/Transforms/Utils/TapirUtils.cpp b/llvm/lib/Transforms/Utils/TapirUtils.cpp index 4b1fcc3ba496..9db58d82b2dd 100644 --- a/llvm/lib/Transforms/Utils/TapirUtils.cpp +++ b/llvm/lib/Transforms/Utils/TapirUtils.cpp @@ -19,8 +19,10 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/EHPersonalities.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/Support/Casting.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" @@ -1003,6 +1005,10 @@ void llvm::SerializeDetach(DetachInst *DI, BasicBlock *ParentEntry, // Erase instructions marked to be erased. for (Instruction *I : ToErase) I->eraseFromParent(); + if (ReplaceWithTaskFrame && TaskFrame && TaskFrame->use_empty()) + cast(TaskFrame)->eraseFromParent(); + if (isa_and_nonnull(SyncRegion) && SyncRegion->use_empty()) + cast(SyncRegion)->eraseFromParent(); // Update dominator tree. if (DT) { @@ -1765,12 +1771,12 @@ void llvm::fixupTaskFrameExternalUses(Spindle *TF, const TaskInfo &TI, return; Task *T = TF->getTaskFrameUser(); - LLVM_DEBUG(dbgs() << "fixupTaskFrameExternalUses: spindle@" - << TF->getEntry()->getName() << "\n"); LLVM_DEBUG({ - if (T) - dbgs() << " used by task@" << T->getEntry()->getName() << "\n"; - }); + dbgs() << "fixupTaskFrameExternalUses: spindle@" + << TF->getEntry()->getName() << "\n"; + if (T) + dbgs() << " used by task@" << T->getEntry()->getName() << "\n"; + }); // Get the set of basic blocks in the taskframe spindles. At the same time, // find the continuation of corresponding taskframe.resume intrinsics. @@ -1880,12 +1886,11 @@ void llvm::fixupTaskFrameExternalUses(Spindle *TF, const TaskInfo &TI, for (auto &TFInstr : ToRewrite) { LLVM_DEBUG(dbgs() << "Fixing taskframe output " << *TFInstr.first << "\n"); // Create an allocation to store the result of the instruction. - BasicBlock *ParentEntry; - if (Spindle *ParentTF = TF->getTaskFrameParent()) - ParentEntry = ParentTF->getEntry(); - else - ParentEntry = TF->getParentTask()->getEntry(); - IRBuilder<> Builder(&*ParentEntry->getFirstInsertionPt()); + Spindle *ParentS = TF->getTaskFrameParent() + ? TF->getTaskFrameParent() + : TF->getParentTask()->getEntrySpindle(); + BasicBlock::iterator ParentInsertionPt = ParentS->getTaskFrameFirstInsertionPt(); + IRBuilder<> Builder(&*ParentInsertionPt); Type *TFInstrTy = TFInstr.first->getType(); AllocaInst *AI = Builder.CreateAlloca(TFInstrTy); AI->setName(TFInstr.first->getName()); @@ -1898,13 +1903,18 @@ void llvm::fixupTaskFrameExternalUses(Spindle *TF, const TaskInfo &TI, Builder.SetInsertPoint(&*(++TFInstr.first->getIterator())); Builder.CreateStore(TFInstr.first, AI); - // Load the result of the instruction at the continuation. - Builder.SetInsertPoint(&*Continuation->getFirstInsertionPt()); - Builder.CreateCall( - Intrinsic::getDeclaration(M, Intrinsic::taskframe_load_guard, - { AI->getType() }), { AI }); - LoadInst *ContinVal = Builder.CreateLoad(TFInstrTy, AI); + LoadInst *ContinVal = nullptr; LoadInst *EHContinVal = nullptr; + // Load the result of the instruction at the continuation. + if (Continuation) { + Builder.SetInsertPoint(&*Continuation->getFirstInsertionPt()); + if (T) + Builder.CreateCall( + Intrinsic::getDeclaration(M, Intrinsic::taskframe_load_guard, + {AI->getType()}), + {AI}); + ContinVal = Builder.CreateLoad(TFInstrTy, AI); + } // For each external use, replace the use with a load from the alloca. for (Use *UseToRewrite : TFInstr.second) { @@ -1919,9 +1929,11 @@ void llvm::fixupTaskFrameExternalUses(Spindle *TF, const TaskInfo &TI, // If necessary, load the value at the taskframe.resume continuation. if (!EHContinVal) { Builder.SetInsertPoint(&*(TFResumeContin->getFirstInsertionPt())); - Builder.CreateCall( - Intrinsic::getDeclaration(M, Intrinsic::taskframe_load_guard, - { AI->getType() }), { AI }); + if (T) + Builder.CreateCall( + Intrinsic::getDeclaration(M, Intrinsic::taskframe_load_guard, + {AI->getType()}), + {AI}); EHContinVal = Builder.CreateLoad(TFInstrTy, AI); } @@ -1933,9 +1945,11 @@ void llvm::fixupTaskFrameExternalUses(Spindle *TF, const TaskInfo &TI, } // Rewrite to use the value loaded at the continuation. - if (UseToRewrite->get()->hasValueHandle()) - ValueHandleBase::ValueIsRAUWd(*UseToRewrite, ContinVal); - UseToRewrite->set(ContinVal); + if (ContinVal) { + if (UseToRewrite->get()->hasValueHandle()) + ValueHandleBase::ValueIsRAUWd(*UseToRewrite, ContinVal); + UseToRewrite->set(ContinVal); + } } } } diff --git a/llvm/test/Transforms/Tapir/alloca-insert-split-taskframe.ll b/llvm/test/Transforms/Tapir/alloca-insert-split-taskframe.ll new file mode 100644 index 000000000000..a2debb6481a2 --- /dev/null +++ b/llvm/test/Transforms/Tapir/alloca-insert-split-taskframe.ll @@ -0,0 +1,161 @@ +; RUN: opt < %s -passes="tapir2target" -tapir-target=opencilk -use-opencilk-runtime-bc=false -debug-abi-calls -S | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%"struct.parlay::sequence_internal::sequence_base, false>::storage_impl::capacitated_buffer::header" = type { i64, %union.anon.229 } +%union.anon.229 = type { [1 x i8], [7 x i8] } + +; Function Attrs: nounwind willreturn memory(argmem: readwrite) +declare token @llvm.syncregion.start() #0 + +; Function Attrs: nounwind willreturn memory(argmem: readwrite) +declare token @llvm.tapir.runtime.start() #0 + +; Function Attrs: nounwind willreturn memory(argmem: readwrite) +declare token @llvm.taskframe.create() #0 + +; Function Attrs: nounwind willreturn memory(argmem: readwrite) +declare void @llvm.tapir.runtime.end(token) #0 + +; Function Attrs: nounwind willreturn memory(argmem: readwrite) +declare void @llvm.taskframe.end(token) #0 + +define fastcc void @_Z6kmeansIFdRKN6parlay8sequenceIdNS0_9allocatorIdEELb0EEES6_EEDaRNS1_IS4_NS2_IS4_EELb0EEEiRT_d.outline_pfor.cond.i.i.i182.ls2(ptr %call.i.i.i.i31.i874.ls2) personality ptr null { +invoke.cont61.tf.i.i.ls2: + %syncreg15.i.i.i.i.i.ls2 = tail call token @llvm.syncregion.start() + %0 = tail call token @llvm.syncregion.start() + %tf.i.i.i.i.ls2 = call token @llvm.taskframe.create() + %1 = call token @llvm.tapir.runtime.start() + call void @llvm.tapir.runtime.end(token %1) + %tf.i872.ls2 = call token @llvm.taskframe.create() + %2 = getelementptr %"struct.parlay::sequence_internal::sequence_base, false>::storage_impl::capacitated_buffer::header", ptr %call.i.i.i.i31.i874.ls2, i64 0, i32 1 + %3 = call token @llvm.tapir.runtime.start() + detach within %syncreg15.i.i.i.i.i.ls2, label %det.achd.i.i.ls2, label %det.cont.i.i.ls2 unwind label %lpad17.i.i.ls2 + +vector.ph3079.ls2: ; No predecessors! + %4 = getelementptr double, ptr %2, i64 0 + br label %invoke.cont.i.i.i.i.i266.i.i.ls2 + +invoke.cont.i.i.i.i.i266.i.i.ls2: ; preds = %det.cont.i.i.ls2, %vector.ph3079.ls2 + call void @llvm.taskframe.end(token %tf.i872.ls2) + %tf.i1536.ls2 = call token @llvm.taskframe.create() + %5 = call token @llvm.tapir.runtime.start() + detach within %syncreg15.i.i.i.i.i.ls2, label %det.achd.i.i1532.ls2, label %det.cont.i.i1526.ls2 unwind label %lpad17.i.i1523.ls2 + +det.cont.i.i1526.ls2: ; preds = %det.achd.i.i1532.ls2, %invoke.cont.i.i.i.i.i266.i.i.ls2 + sync within %syncreg15.i.i.i.i.i.ls2, label %sync.continue28.i.i1530.ls2 + +sync.continue28.i.i1530.ls2: ; preds = %det.cont.i.i1526.ls2 + call void @llvm.taskframe.end(token %tf.i1536.ls2) + call void @llvm.taskframe.end(token %tf.i.i.i.i.ls2) + ret void + +lpad17.i.i1523.ls2: ; preds = %invoke.cont.i.i.i.i.i266.i.i.ls2 + %6 = landingpad { ptr, i32 } + cleanup + call void @llvm.tapir.runtime.end(token %5) + unreachable + +det.cont.i.i.ls2: ; preds = %det.achd.i.i.ls2, %invoke.cont61.tf.i.i.ls2 + sync within %syncreg15.i.i.i.i.i.ls2, label %invoke.cont.i.i.i.i.i266.i.i.ls2 + +lpad17.i.i.ls2: ; preds = %invoke.cont61.tf.i.i.ls2 + %7 = landingpad { ptr, i32 } + cleanup + call void @llvm.tapir.runtime.end(token %3) + unreachable + +det.achd.i.i1532.ls2: ; preds = %invoke.cont.i.i.i.i.i266.i.i.ls2 + reattach within %syncreg15.i.i.i.i.i.ls2, label %det.cont.i.i1526.ls2 + +det.achd.i.i.ls2: ; preds = %invoke.cont61.tf.i.i.ls2 + reattach within %syncreg15.i.i.i.i.i.ls2, label %det.cont.i.i.ls2 +} + +; CHECK-LABEL: define {{.*}}void @_Z6kmeansIFdRKN6parlay8sequenceIdNS0_9allocatorIdEELb0EEES6_EEDaRNS1_IS4_NS2_IS4_EELb0EEEiRT_d.outline_pfor.cond.i.i.i182.ls2(ptr %call.i.i.i.i31.i874.ls2) +; CHECK: invoke.cont61.tf.i.i.ls2: +; CHECK-NEXT: %[[FIXUP_ALLOCA:.+]] = alloca ptr +; CHECK-NEXT: call token @llvm.syncregion.start() +; CHECK-NEXT: call token @llvm.syncregion.start() +; CHECK-NEXT: call {{.*}}void @_Z6kmeansIFdRKN6parlay8sequenceIdNS0_9allocatorIdEELb0EEES6_EEDaRNS1_IS4_NS2_IS4_EELb0EEEiRT_d.outline_pfor.cond.i.i.i182.ls2.outline_invoke.cont61.tf.i.i.ls2.tf.otf0(ptr %call.i.i.i.i31.i874.ls2, ptr %[[FIXUP_ALLOCA]]) +; CHECK-NEXT: br label %sync.continue28.i.i1530.ls2.tfend + +; CHECK: sync.continue28.i.i1530.ls2.tfend: +; CHECK: ret void + +; CHECK-LABEL: define {{.*}}void @_Z6kmeansIFdRKN6parlay8sequenceIdNS0_9allocatorIdEELb0EEES6_EEDaRNS1_IS4_NS2_IS4_EELb0EEEiRT_d.outline_pfor.cond.i.i.i182.ls2.outline_det.achd.i.i.ls2.otd1 +; CHECK: (ptr %[[ARG:.+]]) +; CHECK: call void @__cilkrts_detach( +; CHECK: ret void + +; CHECK-LABEL: define {{.*}}void @_Z6kmeansIFdRKN6parlay8sequenceIdNS0_9allocatorIdEELb0EEES6_EEDaRNS1_IS4_NS2_IS4_EELb0EEEiRT_d.outline_pfor.cond.i.i.i182.ls2.outline_invoke.cont61.tf.i.i.ls2.tf.tf.otf1(ptr {{.*}}%call.i.i.i.i31.i874.ls2.otf1, +; CHECK: ptr {{.*}}%[[ARG:.+]]) + +; CHECK: invoke.cont61.tf.i.i.ls2.tf.tf.otf1: +; CHECK: %[[ADDR:.+]] = getelementptr %"struct.parlay::sequence_internal::sequence_base, false>::storage_impl::capacitated_buffer::header", ptr %call.i.i.i.i31.i874.ls2.otf1, i64 0, i32 1 +; CHECK-NEXT: store ptr %[[ADDR]], ptr %[[ARG]] +; CHECK-NEXT: call void @__cilkrts_enter_frame( +; CHECK-NEXT: %[[TAPIR_RT_START:.+]] = call token @llvm.tapir.runtime.start() +; CHECK-NEXT: call i32 @__cilk_prepare_spawn( + +; CHECK: invoke {{.*}}void @_Z6kmeansIFdRKN6parlay8sequenceIdNS0_9allocatorIdEELb0EEES6_EEDaRNS1_IS4_NS2_IS4_EELb0EEEiRT_d.outline_pfor.cond.i.i.i182.ls2.outline_det.achd.i.i.ls2.otd1( +; CHECK-NEXT: to label %det.cont.i.i.ls2.otf1 unwind label %lpad17.i.i.ls2.otf1 + +; CHECK: lpad17.i.i.ls2.otf1: +; CHECK: landingpad +; CHECK: call void @llvm.tapir.runtime.end(token %[[TAPIR_RT_START]]) +; CHECK-NEXT: unreachable + +; CHECK: det.cont.i.i.ls2.otf1: +; CHECK-NEXT: call void @__cilk_sync( + +; CHECK: ret void + +; CHECK-LABEL: define {{.*}}void @_Z6kmeansIFdRKN6parlay8sequenceIdNS0_9allocatorIdEELb0EEES6_EEDaRNS1_IS4_NS2_IS4_EELb0EEEiRT_d.outline_pfor.cond.i.i.i182.ls2.outline_det.achd.i.i1532.ls2.otd1( +; CHECK: (ptr %[[ARG:.+]]) +; CHECK: call void @__cilkrts_detach( +; CHECK: ret void + +; CHECK-LABEL: define {{.*}}void @_Z6kmeansIFdRKN6parlay8sequenceIdNS0_9allocatorIdEELb0EEES6_EEDaRNS1_IS4_NS2_IS4_EELb0EEEiRT_d.outline_pfor.cond.i.i.i182.ls2.outline_invoke.cont.i.i.i.i.i266.i.i.ls2.tf.otf1() +; CHECK: invoke.cont.i.i.i.i.i266.i.i.ls2.tf.otf1: +; CHECK: call void @__cilkrts_enter_frame( +; CHECK-NEXT: %[[TAPIR_RT_START:.+]] = call token @llvm.tapir.runtime.start() +; CHECK-NEXT: call i32 @__cilk_prepare_spawn( + +; CHECK: invoke {{.*}}void @_Z6kmeansIFdRKN6parlay8sequenceIdNS0_9allocatorIdEELb0EEES6_EEDaRNS1_IS4_NS2_IS4_EELb0EEEiRT_d.outline_pfor.cond.i.i.i182.ls2.outline_det.achd.i.i1532.ls2.otd1( +; CHECK-NEXT: to label %det.cont.i.i1526.ls2.otf1 unwind label %lpad17.i.i1523.ls2.otf1 + +; CHECK: lpad17.i.i1523.ls2.otf1: +; CHECK-NEXT: landingpad +; CHECK: call void @llvm.tapir.runtime.end(token %[[TAPIR_RT_START]]) +; CHECK-NEXT: unreachable + +; CHECK: det.cont.i.i1526.ls2.otf1: +; CHECK-NEXT: call void @__cilk_sync( + +; CHECK: ret void + +; CHECK-LABEL: define {{.*}}void @_Z6kmeansIFdRKN6parlay8sequenceIdNS0_9allocatorIdEELb0EEES6_EEDaRNS1_IS4_NS2_IS4_EELb0EEEiRT_d.outline_pfor.cond.i.i.i182.ls2.outline_invoke.cont61.tf.i.i.ls2.tf.otf0(ptr align 1 %call.i.i.i.i31.i874.ls2.otf0, +; CHECK: ptr {{.*}}%[[ARG:.+]]) +; CHECK: %[[FIXUP_ALLOCA:.+]] = alloca ptr + +; CHECK: call void @llvm.lifetime.start.p0(i64 8, ptr %[[FIXUP_ALLOCA]]) +; CHECK-NEXT: call void @__cilkrts_enter_frame( +; CHECK-NEXT: %[[TAPIR_RT_START:.+]] = call token @llvm.tapir.runtime.start() +; CHECK-NEXT: call void @__cilk_parent_epilogue( +; CHECK-NEXT: call void @llvm.tapir.runtime.end(token %[[TAPIR_RT_START]]) +; CHECK-NEXT: call {{.*}}void @_Z6kmeansIFdRKN6parlay8sequenceIdNS0_9allocatorIdEELb0EEES6_EEDaRNS1_IS4_NS2_IS4_EELb0EEEiRT_d.outline_pfor.cond.i.i.i182.ls2.outline_invoke.cont61.tf.i.i.ls2.tf.tf.otf1(ptr %call.i.i.i.i31.i874.ls2.otf0, ptr %[[FIXUP_ALLOCA]]) + +; CHECK: %[[FIXUP_LOAD:.+]] = load ptr, ptr %[[FIXUP_ALLOCA]] +; CHECK-NEXT: store ptr %[[FIXUP_LOAD]], ptr %[[ARG]] +; CHECK-NEXT: call {{.*}}void @_Z6kmeansIFdRKN6parlay8sequenceIdNS0_9allocatorIdEELb0EEES6_EEDaRNS1_IS4_NS2_IS4_EELb0EEEiRT_d.outline_pfor.cond.i.i.i182.ls2.outline_invoke.cont.i.i.i.i.i266.i.i.ls2.tf.otf1() + +; CHECK: call void @llvm.lifetime.end.p0(i64 8, ptr %[[FIXUP_ALLOCA]]) + +; CHECK: ret void + +; uselistorder directives +uselistorder ptr null, { 1, 2, 0 } + +attributes #0 = { nounwind willreturn memory(argmem: readwrite) } diff --git a/llvm/test/Transforms/Tapir/inline-detach-unwind.ll b/llvm/test/Transforms/Tapir/inline-detach-unwind.ll index 368fc091228c..0f74c52a7b9d 100644 --- a/llvm/test/Transforms/Tapir/inline-detach-unwind.ll +++ b/llvm/test/Transforms/Tapir/inline-detach-unwind.ll @@ -4,10 +4,13 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "arm64-apple-macosx15.0.0" +; Function Attrs: nounwind willreturn memory(argmem: readwrite) +declare token @llvm.syncregion.start() #0 + ; CHECK: define void @_ZNK5Graph17pbfs_walk_PennantEP7PennantIiERH3BagIiEjPj() ; CHECK: pfor.cond: -; CHECK-NEXT: detach within none, label %pfor.body.entry, label %pfor.cond unwind label %lpad59 +; CHECK-NEXT: detach within %syncreg, label %pfor.body.entry, label %pfor.cond unwind label %lpad59 ; CHECK: pfor.body.entry: ; CHECK: br label %[[INLINED_PFOR_COND:.+]] @@ -25,18 +28,19 @@ target triple = "arm64-apple-macosx15.0.0" define void @_ZNK5Graph17pbfs_walk_PennantEP7PennantIiERH3BagIiEjPj() personality ptr null { entry: + %syncreg = tail call token @llvm.syncregion.start() %0 = call token @llvm.tapir.runtime.start() br label %pfor.cond pfor.cond: ; preds = %pfor.preattach, %pfor.cond, %entry - detach within none, label %pfor.body.entry, label %pfor.cond unwind label %lpad59 + detach within %syncreg, label %pfor.body.entry, label %pfor.cond unwind label %lpad59 pfor.body.entry: ; preds = %pfor.cond invoke fastcc void @_ZL14pbfs_proc_NodePKiiRH3BagIiEjPjS0_S0_(ptr null, i32 0, ptr null, i32 0, ptr null, ptr null, ptr null) to label %pfor.preattach unwind label %lpad49 pfor.preattach: ; preds = %pfor.body.entry - reattach within none, label %pfor.cond + reattach within %syncreg, label %pfor.cond lpad49: ; preds = %pfor.body.entry %1 = landingpad { ptr, i32 } @@ -52,10 +56,11 @@ lpad59: ; preds = %pfor.cond define fastcc void @_ZL14pbfs_proc_NodePKiiRH3BagIiEjPjS0_S0_(ptr %n, i32 %fillSize, ptr %next, i32 %newdist, ptr %distances, ptr %nodes, ptr %edges) personality ptr null { entry: + %syncreg = tail call token @llvm.syncregion.start() br label %pfor.cond pfor.cond: ; preds = %pfor.cond, %entry - detach within none, label %pfor.body.entry, label %pfor.cond unwind label %lpad20 + detach within %syncreg, label %pfor.body.entry, label %pfor.cond unwind label %lpad20 pfor.body.entry: ; preds = %pfor.cond br label %for.cond diff --git a/llvm/test/Transforms/Tapir/inline-nounwind-detach-into-invoked-taskframe.ll b/llvm/test/Transforms/Tapir/inline-nounwind-detach-into-invoked-taskframe.ll index fe09ca213269..8a9ecb7e2497 100644 --- a/llvm/test/Transforms/Tapir/inline-nounwind-detach-into-invoked-taskframe.ll +++ b/llvm/test/Transforms/Tapir/inline-nounwind-detach-into-invoked-taskframe.ll @@ -22,12 +22,12 @@ for.cond86: ; preds = %for.cond86, %entry ; CHECK: [[IF_ELSE_I]]: ; CHECK-NEXT: %[[TF_I:.+]] = call token @llvm.taskframe.create() -; CHECK-NEXT: %syncreg19.i.i3 = call token @llvm.syncregion.start() -; CHECK-NEXT: detach within %syncreg19.i.i3, label %det.achd.i.i, label +; CHECK: %syncreg19.i.i5 = call token @llvm.syncregion.start() +; CHECK-NEXT: detach within %syncreg19.i.i5, label %det.achd.i.i, label ; CHECK-NOT: unwind label ; CHECK: det.achd.i.i: -; CHECK-NEXT: reattach within %syncreg19.i.i3, label +; CHECK-NEXT: reattach within %syncreg19.i.i5, label lpad90: ; preds = %for.cond86 %0 = landingpad { ptr, i32 } @@ -53,10 +53,11 @@ if.else: ; preds = %entry define linkonce_odr void @_ZN6parlay12parallel_forIZN4gbbs9vertexMapINS1_16vertexSubsetDataINS1_5emptyEEENS1_2bc37SSBetweennessCentrality_Back_Vertex_FINS_8sequenceIbSaIbELb0EEENS8_IdSaIdELb0EEEEELi0EEEvRT_T0_mEUlmE_EEvmmOSE_lb() { entry: - detach within none, label %pfor.body.entry, label %pfor.inc + %syncreg = tail call token @llvm.syncregion.start() + detach within %syncreg, label %pfor.body.entry, label %pfor.inc pfor.body.entry: ; preds = %entry - reattach within none, label %pfor.inc + reattach within %syncreg, label %pfor.inc pfor.inc: ; preds = %pfor.body.entry, %entry ret void @@ -64,16 +65,17 @@ pfor.inc: ; preds = %pfor.body.entry, %e define linkonce_odr void @_ZN6parlay12parallel_forIZN4gbbs9vertexMapINS1_16vertexSubsetDataINS1_5emptyEEENS1_2bc37SSBetweennessCentrality_Back_Vertex_FINS_8sequenceIbSaIbELb0EEENS8_IdSaIdELb0EEEEELi0EEEvRT_T0_mEUlmE0_EEvmmOSE_lb(i64 %granularity) { entry: + %syncreg = tail call token @llvm.syncregion.start() %syncreg19 = call token @llvm.syncregion.start() %cmp = icmp eq i64 %granularity, 0 br i1 %cmp, label %pfor.cond, label %if.else pfor.cond: ; preds = %pfor.body.entry, %pfor.cond, %entry - detach within none, label %pfor.body.entry, label %pfor.cond + detach within %syncreg, label %pfor.body.entry, label %pfor.cond pfor.body.entry: ; preds = %pfor.cond call void @_ZZN4gbbs9vertexMapINS_16vertexSubsetDataINS_5emptyEEENS_2bc37SSBetweennessCentrality_Back_Vertex_FIN6parlay8sequenceIbSaIbELb0EEENS7_IdSaIdELb0EEEEELi0EEEvRT_T0_mENKUlmE0_clEm() - reattach within none, label %pfor.cond + reattach within %syncreg, label %pfor.cond if.else: ; preds = %entry detach within %syncreg19, label %det.achd, label %det.cont diff --git a/llvm/test/Transforms/Tapir/inline-taskframe-split.ll b/llvm/test/Transforms/Tapir/inline-taskframe-split.ll index a40eff1cd8bd..913039ee663a 100644 --- a/llvm/test/Transforms/Tapir/inline-taskframe-split.ll +++ b/llvm/test/Transforms/Tapir/inline-taskframe-split.ll @@ -44,7 +44,7 @@ define internal fastcc void @_Z12timeMatching9edgeArrayIiEiPc(%struct.edgeArray* br label %13 13: ; preds = %16, %12 - detach within none, label %14, label %16 + detach within %9, label %14, label %16 14: ; preds = %13 br label %15 @@ -59,7 +59,7 @@ define internal fastcc void @_Z12timeMatching9edgeArrayIiEiPc(%struct.edgeArray* br label %18 18: ; preds = %17 - sync within none, label %19 + sync within %9, label %19 19: ; preds = %18 br label %20 @@ -73,7 +73,7 @@ define internal fastcc void @_Z12timeMatching9edgeArrayIiEiPc(%struct.edgeArray* br label %23 23: ; preds = %22, %26 - detach within none, label %24, label %26 + detach within %9, label %24, label %26 24: ; preds = %23 br label %25 @@ -88,7 +88,7 @@ define internal fastcc void @_Z12timeMatching9edgeArrayIiEiPc(%struct.edgeArray* br label %28 28: ; preds = %27 - sync within none, label %29 + sync within %9, label %29 29: ; preds = %28 br label %30 diff --git a/llvm/test/Transforms/Tapir/jump-threading-tapir-vh.ll b/llvm/test/Transforms/Tapir/jump-threading-tapir-vh.ll index e3f997d405c0..84539b4b604f 100644 --- a/llvm/test/Transforms/Tapir/jump-threading-tapir-vh.ll +++ b/llvm/test/Transforms/Tapir/jump-threading-tapir-vh.ll @@ -5,15 +5,19 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" +; Function Attrs: nounwind willreturn memory(argmem: readwrite) +declare token @llvm.syncregion.start() + define linkonce_odr void @_ZN4gbbs8nn_chain3HACINS_10MinLinkageINS_15symmetric_graphINS_16symmetric_vertexEjEENS_20SimilarityClusteringENS_12ActualWeightEEES4_jEEDaRNS3_IT0_T1_EERT_() personality ptr null { entry: + %syncreg = call token @llvm.syncregion.start() unreachable pfor.body.entry.i.i.i: ; preds = %pfor.inc.i.i.i - reattach within none, label %pfor.inc.i.i.i + reattach within %syncreg, label %pfor.inc.i.i.i pfor.inc.i.i.i: ; preds = %pfor.inc.i.i.i, %pfor.body.entry.i.i.i - detach within none, label %pfor.body.entry.i.i.i, label %pfor.inc.i.i.i + detach within %syncreg, label %pfor.body.entry.i.i.i, label %pfor.inc.i.i.i } ; CHECK: define linkonce_odr void @_ZN4gbbs8nn_chain3HACINS_10MinLinkageINS_15symmetric_graphINS_16symmetric_vertexEjEENS_20SimilarityClusteringENS_12ActualWeightEEES4_jEEDaRNS3_IT0_T1_EERT_() diff --git a/llvm/test/Transforms/Tapir/nested-serialize-detach.ll b/llvm/test/Transforms/Tapir/nested-serialize-detach.ll index f83422740dd2..477339292ca2 100644 --- a/llvm/test/Transforms/Tapir/nested-serialize-detach.ll +++ b/llvm/test/Transforms/Tapir/nested-serialize-detach.ll @@ -16,6 +16,9 @@ declare void @llvm.detached.rethrow.sl_p0i32s(token, { ptr, i32 }) #1 ; Function Attrs: nounwind willreturn memory(argmem: readwrite) declare token @llvm.taskframe.create() #0 +; Function Attrs: nounwind willreturn memory(argmem: readwrite) +declare void @llvm.taskframe.end(token) #0 + ; CHECK: define void @_ZNK5Graph17pbfs_walk_PennantEP7PennantIiERH3BagIiEjPj() ; CHECK-NEXT: entry: ; CHECK-NOT: detach within @@ -35,14 +38,14 @@ pfor.body.entry.tf: ; preds = %entry pfor.cond.i.strpm.detachloop.entry: ; preds = %pfor.body.entry.tf %syncreg.i.strpm.detachloop = tail call token @llvm.syncregion.start() - detach within none, label %pfor.body.entry.i.strpm.outer.1, label %pfor.inc.i.strpm.outer.1 unwind label %lpad4924.loopexit.strpm + detach within %syncreg.i.strpm.detachloop, label %pfor.body.entry.i.strpm.outer.1, label %pfor.inc.i.strpm.outer.1 unwind label %lpad4924.loopexit.strpm pfor.body.entry.i.strpm.outer.1: ; preds = %pfor.cond.i.strpm.detachloop.entry - invoke void @llvm.detached.rethrow.sl_p0i32s(token none, { ptr, i32 } zeroinitializer) + invoke void @llvm.detached.rethrow.sl_p0i32s(token %syncreg.i.strpm.detachloop, { ptr, i32 } zeroinitializer) to label %lpad4924.unreachable unwind label %lpad4924.loopexit.strpm pfor.inc.i.strpm.outer.1: ; preds = %pfor.cond.i.strpm.detachloop.entry - sync within none, label %pfor.cond.i.strpm.detachloop.reattach.split + sync within %syncreg.i.strpm.detachloop, label %pfor.cond.i.strpm.detachloop.reattach.split pfor.cond.i.strpm.detachloop.reattach.split: ; preds = %pfor.inc.i.strpm.outer.1 reattach within %syncreg.i, label %pfor.cond.cleanup.i @@ -66,13 +69,13 @@ lpad4924.loopexit.strpm.unreachable: ; preds = %lpad4924.loopexit.s lpad4924.loopexit.split-lp: ; preds = %lpad4924.loopexit.strpm, %sync.continue.i, %pfor.body.entry.tf %lpad.loopexit.split-lp = landingpad { ptr, i32 } cleanup - call void @llvm.detached.rethrow.sl_p0i32s(token none, { ptr, i32 } zeroinitializer) unreachable lpad4924.unreachable: ; preds = %pfor.body.entry.i.strpm.outer.1 unreachable pfor.preattach: ; preds = %sync.continue.i + call void @llvm.taskframe.end(token %tf.i) reattach within %syncreg45, label %pfor.inc pfor.inc: ; preds = %pfor.preattach, %entry diff --git a/llvm/test/Transforms/Tapir/outline-shared-unreachable.ll b/llvm/test/Transforms/Tapir/outline-shared-unreachable.ll index 90f113d5ebd1..f6e71bf3e256 100644 --- a/llvm/test/Transforms/Tapir/outline-shared-unreachable.ll +++ b/llvm/test/Transforms/Tapir/outline-shared-unreachable.ll @@ -6,15 +6,19 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "arm64-apple-macosx13.0.0" +; Function Attrs: nounwind willreturn memory(argmem: readwrite) +declare token @llvm.syncregion.start() + define void @_ZN9LAMMPS_NS6Verlet14run_stencil_mdEiRNSt3__13mapIiNS1_6vectorIiNS1_9allocatorIiEEEENS1_4lessIiEENS4_INS1_4pairIKiS6_EEEEEESE_RNS2_IiiS8_NS4_INS9_ISA_iEEEEEESI_PPdSK_() personality ptr null { entry: + %syncreg = call token @llvm.syncregion.start() br i1 false, label %entry.unreachable_crit_edge, label %pfor.detach entry.unreachable_crit_edge: ; preds = %entry br label %unreachable pfor.detach: ; preds = %pfor.detach, %entry - detach within none, label %pfor.body, label %pfor.detach unwind label %lpad714.loopexit + detach within %syncreg, label %pfor.body, label %pfor.detach unwind label %lpad714.loopexit pfor.body: ; preds = %pfor.detach br label %unreachable diff --git a/llvm/test/Transforms/Tapir/remove-dead-runtime-start-after-unreachable-blocks.ll b/llvm/test/Transforms/Tapir/remove-dead-runtime-start-after-unreachable-blocks.ll index 822bbae1d630..a3741270cb8d 100644 --- a/llvm/test/Transforms/Tapir/remove-dead-runtime-start-after-unreachable-blocks.ll +++ b/llvm/test/Transforms/Tapir/remove-dead-runtime-start-after-unreachable-blocks.ll @@ -11,15 +11,16 @@ target triple = "arm64-apple-macosx15.0.0" define void @_ZNK5Graph17pbfs_walk_PennantEP7PennantIiERH3BagIiEjPj() personality ptr null { entry: + %syncreg = call token @llvm.syncregion.start() %0 = tail call token @llvm.tapir.runtime.start() - detach within none, label %pfor.body.entry.tf, label %pfor.inc unwind label %lpad59.loopexit + detach within %syncreg, label %pfor.body.entry.tf, label %pfor.inc unwind label %lpad59.loopexit pfor.body.entry.tf: ; preds = %entry %call.i15.i26.1 = call ptr @_Znwm() unreachable pfor.inc: ; preds = %entry - sync within none, label %sync.continue + sync within %syncreg, label %sync.continue common.ret: ; preds = %sync.continue, %lpad59.loopexit ret void diff --git a/llvm/test/Transforms/Tapir/simple-loop-unswitch.ll b/llvm/test/Transforms/Tapir/simple-loop-unswitch.ll index a23f0b1af6fc..38c9b3e15b30 100644 --- a/llvm/test/Transforms/Tapir/simple-loop-unswitch.ll +++ b/llvm/test/Transforms/Tapir/simple-loop-unswitch.ll @@ -83,7 +83,7 @@ pfor.inc.i: ; preds = %if.end.i, %pfor.con br i1 false, label %pfor.cond.cleanup.i, label %pfor.cond.i pfor.cond.cleanup.i: ; preds = %pfor.inc.i - sync within none, label %sync.continue.i + sync within %syncreg.i, label %sync.continue.i ehcleanup18.i: ; preds = %lpad5.i invoke void @llvm.detached.rethrow.sl_p0i8i32s(token %syncreg.i, { i8*, i32 } zeroinitializer) diff --git a/llvm/test/Transforms/Tapir/simplify-taskframe-with-resume.ll b/llvm/test/Transforms/Tapir/simplify-taskframe-with-resume.ll index 484bfd97343c..e4f7d76fb2f9 100644 --- a/llvm/test/Transforms/Tapir/simplify-taskframe-with-resume.ll +++ b/llvm/test/Transforms/Tapir/simplify-taskframe-with-resume.ll @@ -58,10 +58,10 @@ pfor.cond.cleanup.i: ; preds = %pfor.cond.i.strpm.d lpad4924.loopexit.strpm.detachloop.unwind: ; preds = %pfor.body.entry.tf %lpad.strpm.detachloop.unwind = landingpad { ptr, i32 } cleanup - call void @llvm.taskframe.resume.sl_p0i32s(token %tf.i, { ptr, i32 } %lpad.strpm.detachloop.unwind) unreachable sync.continue.i: ; preds = %pfor.cond.cleanup.i + call void @llvm.taskframe.end(token %tf.i) reattach within %syncreg45, label %pfor.inc pfor.inc: ; preds = %sync.continue.i, %entry diff --git a/llvm/test/Transforms/Tapir/sroa-preserve-task-info.ll b/llvm/test/Transforms/Tapir/sroa-preserve-task-info.ll index 76ca0b82679b..d3ad5726560f 100644 --- a/llvm/test/Transforms/Tapir/sroa-preserve-task-info.ll +++ b/llvm/test/Transforms/Tapir/sroa-preserve-task-info.ll @@ -38,10 +38,10 @@ entry: ; CHECK: br label %pfor.cond pfor.cond: ; preds = %pfor.body.entry, %pfor.cond, %entry - detach within none, label %pfor.body.entry, label %pfor.cond + detach within %syncreg105, label %pfor.body.entry, label %pfor.cond pfor.body.entry: ; preds = %pfor.cond - reattach within none, label %pfor.cond + reattach within %syncreg105, label %pfor.cond } ; uselistorder directives diff --git a/llvm/test/Transforms/Tapir/ssa-update-multiple-reattach-predecessors.ll b/llvm/test/Transforms/Tapir/ssa-update-multiple-reattach-predecessors.ll index afa37e06efe4..187e74adb602 100644 --- a/llvm/test/Transforms/Tapir/ssa-update-multiple-reattach-predecessors.ll +++ b/llvm/test/Transforms/Tapir/ssa-update-multiple-reattach-predecessors.ll @@ -6,8 +6,12 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" +; Function Attrs: nounwind willreturn memory(argmem: readwrite) +declare token @llvm.syncregion.start() + define linkonce_odr void @_Z10matmul_dacIfLb0ELb1EEvPT_PKS0_S3_llllll(ptr %arrayidx83) { if.end.lr.ph.lr.ph.preheader: + %syncreg = call token @llvm.syncregion.start() br label %if.end.lr.ph.lr.ph if.end.lr.ph.lr.ph: ; preds = %tailrecurse.outer.outer.backedge, %if.end.lr.ph.lr.ph.preheader @@ -24,10 +28,10 @@ land.lhs.true: ; preds = %if.end br label %det.cont22.tf det.cont22.tf: ; preds = %land.lhs.true - detach within none, label %det.achd32, label %tailrecurse.outer.outer.backedge + detach within %syncreg, label %det.achd32, label %tailrecurse.outer.outer.backedge det.achd32: ; preds = %det.cont22.tf - reattach within none, label %tailrecurse.outer.outer.backedge + reattach within %syncreg, label %tailrecurse.outer.outer.backedge if.else57: ; preds = %if.end br i1 false, label %if.then59, label %if.else75 @@ -36,10 +40,10 @@ if.then59: ; preds = %if.else57 br label %_ZL9split_diml.exit164 _ZL9split_diml.exit164: ; preds = %if.then59 - detach within none, label %det.achd66, label %tailrecurse.outer.outer.backedge + detach within %syncreg, label %det.achd66, label %tailrecurse.outer.outer.backedge det.achd66: ; preds = %_ZL9split_diml.exit164 - reattach within none, label %tailrecurse.outer.outer.backedge + reattach within %syncreg, label %tailrecurse.outer.outer.backedge tailrecurse.outer.outer.backedge: ; preds = %det.achd66, %_ZL9split_diml.exit164, %det.achd32, %det.cont22.tf %lhs.tr.ph.ph.be = getelementptr inbounds float, ptr %lhs.tr2511, i64 0 diff --git a/llvm/test/Transforms/Tapir/tapir-lowering-empty-debugloc.ll b/llvm/test/Transforms/Tapir/tapir-lowering-empty-debugloc.ll index c44ae67ff09c..ea1e322a3ea7 100644 --- a/llvm/test/Transforms/Tapir/tapir-lowering-empty-debugloc.ll +++ b/llvm/test/Transforms/Tapir/tapir-lowering-empty-debugloc.ll @@ -4,12 +4,16 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" +; Function Attrs: nounwind willreturn memory(argmem: readwrite) +declare token @llvm.syncregion.start() + define ptr @_Z12generateNodePP5rangePP5eventS0_ii() personality ptr null !dbg !78 { entry: - detach within none, label %det.achd.peel, label %for.body.tf, !dbg !80 + %syncreg = call token @llvm.syncregion.start() + detach within %syncreg, label %det.achd.peel, label %for.body.tf, !dbg !80 det.achd.peel: ; preds = %entry - reattach within none, label %for.body.tf + reattach within %syncreg, label %for.body.tf for.body.tf: ; preds = %det.achd.peel, %entry ret ptr null diff --git a/llvm/test/Transforms/Tapir/task-in-loop-task-exit.ll b/llvm/test/Transforms/Tapir/task-in-loop-task-exit.ll index 2d9a9f4e8de0..20c41993fd50 100644 --- a/llvm/test/Transforms/Tapir/task-in-loop-task-exit.ll +++ b/llvm/test/Transforms/Tapir/task-in-loop-task-exit.ll @@ -61,14 +61,15 @@ entry: define internal fastcc void @"_ZN6parlay12parallel_forIZNS_8sequenceINS1_ISt6vectorIiSaIiEENS_9allocatorIS4_EELb0EEENS5_IS7_EELb0EEC1IZNS_8internal7delayed25block_delayed_filter_op_tIRKNS_16delayed_sequenceIS4_S4_ZN58TestDelayedFilterOp_TestFilterOpNonTrivialTemporaries_Test8TestBodyEvE3$_0EEZNSF_8TestBodyEvE3$_1E13filter_blocksISJ_SK_EEDaOT_OT0_EUlmE_EEmSO_NS9_18_from_function_tagEmEUlmE_EEvmmSO_lb"() #1 { entry: + %syncreg = call token @llvm.syncregion.start() br label %pfor.cond pfor.cond: ; preds = %pfor.body.entry, %pfor.cond, %entry - detach within none, label %pfor.body.entry, label %pfor.cond + detach within %syncreg, label %pfor.body.entry, label %pfor.cond pfor.body.entry: ; preds = %pfor.cond call fastcc void @"_ZZN6parlay8sequenceINS0_ISt6vectorIiSaIiEENS_9allocatorIS3_EELb0EEENS4_IS6_EELb0EEC1IZNS_8internal7delayed25block_delayed_filter_op_tIRKNS_16delayed_sequenceIS3_S3_ZN58TestDelayedFilterOp_TestFilterOpNonTrivialTemporaries_Test8TestBodyEvE3$_0EEZNSE_8TestBodyEvE3$_1E13filter_blocksISI_SJ_EEDaOT_OT0_EUlmE_EEmSN_NS8_18_from_function_tagEmENKUlmE_clEm"() - reattach within none, label %pfor.cond + reattach within %syncreg, label %pfor.cond } define internal fastcc void @"_ZZN6parlay8sequenceINS0_ISt6vectorIiSaIiEENS_9allocatorIS3_EELb0EEENS4_IS6_EELb0EEC1IZNS_8internal7delayed25block_delayed_filter_op_tIRKNS_16delayed_sequenceIS3_S3_ZN58TestDelayedFilterOp_TestFilterOpNonTrivialTemporaries_Test8TestBodyEvE3$_0EEZNSE_8TestBodyEvE3$_1E13filter_blocksISI_SJ_EEDaOT_OT0_EUlmE_EEmSN_NS8_18_from_function_tagEmENKUlmE_clEm"() personality ptr null { @@ -139,7 +140,7 @@ pfor.body.entry: ; preds = %pfor.cond ; CHECK: define internal fastcc void @"_ZN6parlay12parallel_forIZNS_8sequenceINS1_ISt6vectorIiSaIiEENS_9allocatorIS4_EELb0EEENS5_IS7_EELb0EEC1IZNS_8internal7delayed25block_delayed_filter_op_tIRKNS_16delayed_sequenceIS4_S4_ZN58TestDelayedFilterOp_TestFilterOpNonTrivialTemporaries_Test8TestBodyEvE3$_0EEZNSF_8TestBodyEvE3$_1E13filter_blocksISJ_SK_EEDaOT_OT0_EUlmE_EEmSO_NS9_18_from_function_tagEmEUlmE_EEvmmSO_lb"() #1 personality ptr null { ; CHECK: pfor.cond: -; CHECK-NEXT: detach within none, label %pfor.body.entry, label %pfor.cond.backedge unwind label +; CHECK-NEXT: detach within %syncreg, label %pfor.body.entry, label %pfor.cond.backedge unwind label ; CHECK: pfor.body.entry: ; CHECK-NEXT: [[REFTMP11IIIII:%.+]] = alloca [1 x [1 x [1 x %class.anon.645]]] @@ -163,7 +164,7 @@ pfor.body.entry: ; preds = %pfor.cond ; CHECK-NEXT: reattach within [[SYNCREG19IIIIII]], label %pfor.cond.backedge.i.i.i.i.i.i ; CHECK: "_ZZN6parlay8sequenceINS0_ISt6vectorIiSaIiEENS_9allocatorIS3_EELb0EEENS4_IS6_EELb0EEC1IZNS_8internal7delayed25block_delayed_filter_op_tIRKNS_16delayed_sequenceIS3_S3_ZN58TestDelayedFilterOp_TestFilterOpNonTrivialTemporaries_Test8TestBodyEvE3$_0EEZNSE_8TestBodyEvE3$_1E13filter_blocksISI_SJ_EEDaOT_OT0_EUlmE_EEmSN_NS8_18_from_function_tagEmENKUlmE_clEm.exit": -; CHECK: reattach within none, label %pfor.cond.backedge +; CHECK: reattach within %syncreg, label %pfor.cond.backedge ; uselistorder directives uselistorder ptr null, { 8, 9, 0, 2, 3, 10, 11, 1, 12, 13, 4, 14, 15, 5, 16, 17, 6, 18, 19, 7 } diff --git a/llvm/test/Transforms/Tapir/taskframe-fixup-outline.ll b/llvm/test/Transforms/Tapir/taskframe-fixup-outline.ll new file mode 100644 index 000000000000..8270628f8df9 --- /dev/null +++ b/llvm/test/Transforms/Tapir/taskframe-fixup-outline.ll @@ -0,0 +1,133 @@ +; RUN: opt < %s -passes="tapir2target" -tapir-target=opencilk -use-opencilk-runtime-bc=false -debug-abi-calls -S | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind willreturn memory(argmem: readwrite) +declare token @llvm.syncregion.start() #0 + +; Function Attrs: nounwind willreturn memory(argmem: readwrite) +declare token @llvm.tapir.runtime.start() #0 + +; Function Attrs: nounwind willreturn memory(argmem: readwrite) +declare token @llvm.taskframe.create() #0 + +; Function Attrs: nounwind willreturn memory(argmem: readwrite) +declare void @llvm.tapir.runtime.end(token) #0 + +; Function Attrs: nounwind willreturn memory(argmem: readwrite) +declare void @llvm.taskframe.end(token) #0 + +define fastcc void @_Z6kmeansIFdRKN6parlay8sequenceIdNS0_9allocatorIdEELb0EEES6_EEDaRNS1_IS4_NS2_IS4_EELb0EEEiRT_d.outline_pfor.cond.i.i.i182.ls2() personality ptr null { +invoke.cont61.tf.i.i.ls2: + %syncreg15.i.i.i.i.i.ls2 = tail call token @llvm.syncregion.start() + %0 = tail call token @llvm.syncregion.start() + %tf.i.i.i.i.ls2 = call token @llvm.taskframe.create() + br i1 false, label %if.then.tf.i.i.i.i.ls2, label %for.cond.preheader.i.i.i.i.ls2 + +for.cond.preheader.i.i.i.i.ls2: ; preds = %invoke.cont61.tf.i.i.ls2 + call void @llvm.taskframe.end(token %tf.i.i.i.i.ls2) + ret void + +if.then.tf.i.i.i.i.ls2: ; preds = %invoke.cont61.tf.i.i.ls2 + %tf.i872.ls2 = call token @llvm.taskframe.create() + %cmp5.not.i.i.ls2 = icmp ugt i48 0, 0 + %1 = call token @llvm.tapir.runtime.start() + br label %det.cont.i.i.ls2 + +if.else11.i.i1520.ls2: ; preds = %det.cont.i.i.ls2 + %2 = call token @llvm.tapir.runtime.start() + br label %invoke.cont29.i.i1531.ls2 + +.noexc857.ls2: ; preds = %det.cont.i.i.ls2, %invoke.cont29.i.i1531.ls2 + call void @llvm.taskframe.end(token %tf.i1536.ls2) + unreachable + +invoke.cont29.i.i1531.ls2: ; preds = %if.else11.i.i1520.ls2 + call void @llvm.tapir.runtime.end(token %2) + br label %.noexc857.ls2 + +det.cont.i.i.ls2: ; preds = %if.then.tf.i.i.i.i.ls2 + call void @llvm.tapir.runtime.end(token %1) + call void @llvm.taskframe.end(token %tf.i872.ls2) + %tf.i1536.ls2 = call token @llvm.taskframe.create() + br i1 %cmp5.not.i.i.ls2, label %if.else11.i.i1520.ls2, label %.noexc857.ls2 +} + +; CHECK-LABEL: define {{.*}}void @_Z6kmeansIFdRKN6parlay8sequenceIdNS0_9allocatorIdEELb0EEES6_EEDaRNS1_IS4_NS2_IS4_EELb0EEEiRT_d.outline_pfor.cond.i.i.i182.ls2() +; CHECK: invoke.cont61.tf.i.i.ls2: +; CHECK: call {{.*}}void @_Z6kmeansIFdRKN6parlay8sequenceIdNS0_9allocatorIdEELb0EEES6_EEDaRNS1_IS4_NS2_IS4_EELb0EEEiRT_d.outline_pfor.cond.i.i.i182.ls2.outline_invoke.cont61.tf.i.i.ls2.tf.otf0() +; CHECK-NEXT: br label %[[TFEND:.+]] +; CHECK: [[TFEND]]: +; CHECK-NEXT: ret void + +; CHECK-LABEL: define {{.*}}void @_Z6kmeansIFdRKN6parlay8sequenceIdNS0_9allocatorIdEELb0EEES6_EEDaRNS1_IS4_NS2_IS4_EELb0EEEiRT_d.outline_pfor.cond.i.i.i182.ls2.outline_if.then.tf.i.i.i.i.ls2.tf.otf1 +; CHECK: (ptr {{.*}}%[[CMP5_NOT_ARG:.+]]) +; CHECK: if.then.tf.i.i.i.i.ls2.tf.otf1: +; CHECK-NEXT: %[[CMP5_NOT_VAL:.+]] = icmp ugt i48 0, 0 +; CHECK-NEXT: store i1 %[[CMP5_NOT_VAL]], ptr %[[CMP5_NOT_ARG]] +; CHECK-NEXT: call void @__cilkrts_enter_frame( +; CHECK-NEXT: %[[TAPIR_RT_START:.+]] = call token @llvm.tapir.runtime.start() +; CHECK-NEXT: br label %det.cont.i.i.ls2.otf1 + +; CHECK: det.cont.i.i.ls2.otf1: +; CHECK-NEXT: call void @__cilk_parent_epilogue( +; CHECK-NEXT: call void @llvm.tapir.runtime.end(token %[[TAPIR_RT_START]]) +; CHECK-NEXT: br label %det.cont.i.i.ls2.tfend.otf1 + +; CHECK: det.cont.i.i.ls2.tfend.otf1: +; CHECK-NEXT: ret void + +; CHECK-LABEL: define {{.*}}void @_Z6kmeansIFdRKN6parlay8sequenceIdNS0_9allocatorIdEELb0EEES6_EEDaRNS1_IS4_NS2_IS4_EELb0EEEiRT_d.outline_pfor.cond.i.i.i182.ls2.outline_det.cont.i.i.ls2.tf.otf1 +; CHECK: (i1 %[[ARG:.+]]) +; CHECK: det.cont.i.i.ls2.tf.otf1: +; CHECK-NEXT: br i1 %[[ARG]], label %if.else11.i.i1520.ls2.otf1, label %.noexc857.ls2.otf1 + +; CHECK: .noexc857.ls2.otf1: +; CHECK-NEXT: br label %.noexc857.ls2.tfend.otf1 + +; CHECK: if.else11.i.i1520.ls2.otf1: +; CHECK-NEXT: call void @__cilkrts_enter_frame( +; CHECK-NEXT: %[[TAPIR_RT_START:.+]] = call token @llvm.tapir.runtime.start() +; CHECK-NEXT: br label %invoke.cont29.i.i1531.ls2.otf1 + +; CHECK: invoke.cont29.i.i1531.ls2.otf1: +; CHECK-NEXT: call void @__cilk_parent_epilogue( +; CHECK-NEXT: call void @llvm.tapir.runtime.end(token %[[TAPIR_RT_START]]) +; CHECK-NEXT: br label %.noexc857.ls2.otf1 + +; CHECK: .noexc857.ls2.tfend.otf1: +; CHECK-NEXT: ret void + +; CHECK-LABEL: define {{.*}}void @_Z6kmeansIFdRKN6parlay8sequenceIdNS0_9allocatorIdEELb0EEES6_EEDaRNS1_IS4_NS2_IS4_EELb0EEEiRT_d.outline_pfor.cond.i.i.i182.ls2.outline_invoke.cont61.tf.i.i.ls2.tf.otf0() +; CHECK: invoke.cont61.tf.i.i.ls2.otf0: +; CHECK-NEXT: %[[CMP5_NOT_ALLOCA:.+]] = alloca i1 +; CHECK-NEXT: br label %invoke.cont61.tf.i.i.ls2.tf.otf0 + +; CHECK: invoke.cont61.tf.i.i.ls2.tf.otf0: +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 1, ptr %[[CMP5_NOT_ALLOCA]]) +; CHECK-NEXT: br i1 false, label %if.then.tf.i.i.i.i.ls2.otf0, label %for.cond.preheader.i.i.i.i.ls2.otf0 + +; CHECK: for.cond.preheader.i.i.i.i.ls2.otf0: +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 1, ptr %[[CMP5_NOT_ALLOCA]]) +; CHECK-NEXT: br label %for.cond.preheader.i.i.i.i.ls2.tfend.otf0 + +; CHECK: if.then.tf.i.i.i.i.ls2.otf0: +; CHECK-NEXT: call {{.*}}void @_Z6kmeansIFdRKN6parlay8sequenceIdNS0_9allocatorIdEELb0EEES6_EEDaRNS1_IS4_NS2_IS4_EELb0EEEiRT_d.outline_pfor.cond.i.i.i182.ls2.outline_if.then.tf.i.i.i.i.ls2.tf.otf1(ptr %[[CMP5_NOT_ALLOCA]]) +; CHECK-NEXT: br label %det.cont.i.i.ls2.tfend.otf0 + +; CHECK: det.cont.i.i.ls2.tfend.otf0: +; CHECK-NEXT: %[[CMP5_NOT_LOAD:.+]] = load i1, ptr %[[CMP5_NOT_ALLOCA]] +; CHECK-NEXT: call fastcc void @_Z6kmeansIFdRKN6parlay8sequenceIdNS0_9allocatorIdEELb0EEES6_EEDaRNS1_IS4_NS2_IS4_EELb0EEEiRT_d.outline_pfor.cond.i.i.i182.ls2.outline_det.cont.i.i.ls2.tf.otf1(i1 %[[CMP5_NOT_LOAD]]) +; CHECK-NEXT: br label %.noexc857.ls2.tfend.otf0 + +; CHECK: .noexc857.ls2.tfend.otf0: +; CHECK-NEXT: unreachable + +; CHECK: for.cond.preheader.i.i.i.i.ls2.tfend.otf0 +; CHECK-NEXT: ret void + +; uselistorder directives +uselistorder ptr null, { 1, 2, 0 } + +attributes #0 = { nounwind willreturn memory(argmem: readwrite) } diff --git a/llvm/test/Transforms/Tapir/tsan-task-unreachable.ll b/llvm/test/Transforms/Tapir/tsan-task-unreachable.ll index fffeb813cde0..250cdef9d50f 100644 --- a/llvm/test/Transforms/Tapir/tsan-task-unreachable.ll +++ b/llvm/test/Transforms/Tapir/tsan-task-unreachable.ll @@ -18,18 +18,18 @@ entry: ret void if.then48.tf: ; No predecessors! - detach within none, label %det.achd, label %det.cont + detach within %syncreg, label %det.achd, label %det.cont det.achd: ; preds = %if.then48.tf call void @_Z21walk_bicut_pbc_helperiiP9AtomGraphRSt3mapIiS1_ISt5tupleIJiiiEESt3setIiSt4lessIiESaIiEES5_IS3_ESaISt4pairIKS3_S8_EEES6_SaISA_IKiSE_EEE4cutsRK14base_case_args() - reattach within none, label %det.cont + reattach within %syncreg, label %det.cont det.cont: ; preds = %det.achd, %if.then48.tf ret void } ; CHECK: if.then48.tf: -; CHECK-NEXT: detach within none, label %det.achd, label %det.cont unwind label %[[TSAN_CLEANUP:.+]] +; CHECK-NEXT: detach within %syncreg, label %det.achd, label %det.cont unwind label %[[TSAN_CLEANUP:.+]] ; CHECK: det.achd: ; CHECK-NEXT: invoke void @_Z21walk_bicut_pbc_helperiiP9AtomGraphRSt3mapIiS1_ISt5tupleIJiiiEESt3setIiSt4lessIiESaIiEES5_IS3_ESaISt4pairIKS3_S8_EEES6_SaISA_IKiSE_EEE4cutsRK14base_case_args() diff --git a/llvm/test/Transforms/Tapir/unlink-unreachable-detach-unwind.ll b/llvm/test/Transforms/Tapir/unlink-unreachable-detach-unwind.ll index 6d62b0a69df7..9463321692ba 100644 --- a/llvm/test/Transforms/Tapir/unlink-unreachable-detach-unwind.ll +++ b/llvm/test/Transforms/Tapir/unlink-unreachable-detach-unwind.ll @@ -33,7 +33,7 @@ pfor.inc273: ; preds = %pfor.inc273, %pfor. detach within %syncreg, label %pfor.body.entry, label %pfor.inc273 unwind label %lpad651 pfor.detach470: ; preds = %pfor.detach470, %entry - detach within none, label %pfor.body.entry472, label %pfor.detach470 unwind label %lpad651 + detach within %syncreg359, label %pfor.body.entry472, label %pfor.detach470 unwind label %lpad651 pfor.body.entry472: ; preds = %pfor.detach470 %syncreg481 = call token @llvm.syncregion.start() @@ -49,10 +49,10 @@ lpad651: ; preds = %pfor.inc907, %pfor. pfor.body.entry799: ; preds = %pfor.inc907 %syncreg843 = call token @llvm.syncregion.start() - reattach within none, label %pfor.inc907 + reattach within %syncreg461, label %pfor.inc907 pfor.inc907: ; preds = %pfor.inc907, %pfor.body.entry799 - detach within none, label %pfor.body.entry799, label %pfor.inc907 unwind label %lpad651 + detach within %syncreg461, label %pfor.body.entry799, label %pfor.inc907 unwind label %lpad651 } ; CHECK: define void @_ZN9LAMMPS_NS6Verlet14run_stencil_mdEiPNSt3__16vectorIiNS1_9allocatorIiEEEES6_PiS7_PPdS9_() From e5786856c0de4d737d05e2631fb6abe6d4df7591 Mon Sep 17 00:00:00 2001 From: TB Schardl Date: Sat, 7 Dec 2024 15:32:16 -0500 Subject: [PATCH 5/5] [Verifier] Require Tapir instructions to have valid syncregions. --- llvm/lib/IR/Verifier.cpp | 6 ++++++ llvm/test/Transforms/Tapir/simplify-none-syncregion.ll | 4 ++++ 2 files changed, 10 insertions(+) diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index e64f4ce0f8ee..d357597e7b4c 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -3160,6 +3160,8 @@ void Verifier::verifyTask(const DetachInst *DI) { } void Verifier::visitReattachInst(ReattachInst &RI) { + Check(isa(RI.getSyncRegion()), + "reattach has an invalid syncregion", RI); if (DT.isReachableFromEntry(RI.getParent())) { // Check that the continuation of the reattach has a detach predecessor. const BasicBlock *Continue = RI.getDetachContinue(); @@ -3178,6 +3180,8 @@ void Verifier::visitReattachInst(ReattachInst &RI) { } void Verifier::visitSyncInst(SyncInst &SI) { + Check(isa(SI.getSyncRegion()), "sync has an invalid syncregion", + SI); visitTerminator(SI); } @@ -3245,6 +3249,8 @@ void Verifier::verifyTaskFrame(const CallBase *TF) { } void Verifier::visitDetachInst(DetachInst &DI) { + Check(isa(DI.getSyncRegion()), + "detach has an invalid syncregion", DI); if (DetachesVisited.insert(&DI).second) verifyTask(&DI); diff --git a/llvm/test/Transforms/Tapir/simplify-none-syncregion.ll b/llvm/test/Transforms/Tapir/simplify-none-syncregion.ll index 86991a3a8e4f..85e842848792 100644 --- a/llvm/test/Transforms/Tapir/simplify-none-syncregion.ll +++ b/llvm/test/Transforms/Tapir/simplify-none-syncregion.ll @@ -1,6 +1,10 @@ ; Check that task-simplify handles sync regions that are not instructions, specifically when debugging. ; ; RUN: opt < %s -passes="function(task-simplify)" -S | FileCheck %s +; +; The verifier has been changed to require valid syncregions, meaning this IR is no longer valid. +; Keeping this test around in case none syncregions are allowed in the future. +; XFAIL: * target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "arm64-apple-macosx15.0.0"