From 00e79347fe7f4806ebc5acf5564f92cb3fe20d89 Mon Sep 17 00:00:00 2001 From: TB Schardl Date: Sun, 22 Oct 2023 19:12:01 +0000 Subject: [PATCH] [SimpleLoopUnswitch] Fix nontrivial loop-unswitching to work with Tapir, and restore relevant regression tests. --- .../Transforms/Scalar/SimpleLoopUnswitch.cpp | 27 +++++++++++++- .../Transforms/Tapir/loop-unswitch-lcssa.ll | 16 +++++---- llvm/test/Transforms/Tapir/loop-unswitch.ll | 36 ++++--------------- 3 files changed, 41 insertions(+), 38 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp index 178a1c7c0eed..017ffb3c4295 100644 --- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -42,6 +42,7 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Use.h" #include "llvm/IR/Value.h" @@ -59,6 +60,7 @@ #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/TapirUtils.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include #include @@ -2852,13 +2854,36 @@ static bool collectUnswitchCandidates( return !UnswitchCandidates.empty(); } +static bool +checkTapirSyncRegionInLoop(const Loop &L, + const SmallPtrSetImpl &TaskExits, + const Instruction &I) { + for (const User *Usr : I.users()) + if (const Instruction *UsrI = dyn_cast(Usr)) { + const BasicBlock *Parent = UsrI->getParent(); + if (!L.contains(Parent) && !TaskExits.contains(Parent)) + return false; + } + return true; +} + static bool isSafeForNoNTrivialUnswitching(Loop &L, LoopInfo &LI) { if (!L.isSafeToClone()) return false; + SmallPtrSet TaskExits; + L.getTaskExits(TaskExits); for (auto *BB : L.blocks()) for (auto &I : *BB) { - if (I.getType()->isTokenTy() && I.isUsedOutsideOfBlock(BB)) + if (I.getType()->isTokenTy() && I.isUsedOutsideOfBlock(BB)) { + if (isTapirIntrinsic(Intrinsic::syncregion_start, &I)) { + if (!checkTapirSyncRegionInLoop(L, TaskExits, I)) + return false; + // All uses of this syncregion.start are inside of the loop, so it's + // safe for unswitching. + continue; + } return false; + } if (auto *CB = dyn_cast(&I)) { assert(!CB->cannotDuplicate() && "Checked by L.isSafeToClone()."); if (CB->isConvergent()) diff --git a/llvm/test/Transforms/Tapir/loop-unswitch-lcssa.ll b/llvm/test/Transforms/Tapir/loop-unswitch-lcssa.ll index c18acc79930c..4e06633a41e5 100644 --- a/llvm/test/Transforms/Tapir/loop-unswitch-lcssa.ll +++ b/llvm/test/Transforms/Tapir/loop-unswitch-lcssa.ll @@ -1,5 +1,4 @@ -; RUN: opt < %s -enable-new-pm=0 -loop-unswitch -S -o - | FileCheck %s -; XFAIL: * +; RUN: opt < %s -passes='simple-loop-unswitch' -S | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -88,17 +87,20 @@ lpad4: ; preds = %invoke.cont7, %pfor invoke void @llvm.detached.rethrow.sl_p0i8i32s(token %syncreg, { i8*, i32 } %5) to label %unreachable unwind label %lpad10.loopexit -; CHECK: lpad4: -; CHECK-NEXT: %call2.i.i.i.i1.i.i.lcssa1 = phi i8* -; CHECK-NEXT: %.lcssa = phi -; CHECK: invoke void @llvm.detached.rethrow.sl_p0i8i32s( +; CHECK: lpad4.us: +; CHECK-NEXT: %call2.i.i.i.i1.i.i.lcssa1.us = phi ptr +; CHECK-NEXT: %.lcssa.us = phi +; CHECK: invoke void @llvm.detached.rethrow.sl_p0i32s( ; CHECK: to label %{{.+}} unwind label %[[DRDEST:.+]] ; CHECK: [[DRDEST]]: -; CHECK-DAG: phi i8* [ %call2.i.i.i.i1.i.i.lcssa1, %lpad4 ] +; CHECK-NEXT: phi ptr +; CHECK-DAG: [ %call2.i.i.i.i1.i.i.lcssa1.us, %lpad4.us ] +; CHECK-NEXT: landingpad ; CHECK: _ZNSt6vectorIiSaIiEED2Ev.exit11: ; CHECK-NOT: %call2.i.i.i.i1.i.i.lcssa1, +; CHECK-NOT: %call2.i.i.i.i1.i.i.lcssa1.us, ; CHECK: unreachable lpad10.loopexit: ; preds = %pfor.cond diff --git a/llvm/test/Transforms/Tapir/loop-unswitch.ll b/llvm/test/Transforms/Tapir/loop-unswitch.ll index 6ddc96042b41..4b0f1849e428 100644 --- a/llvm/test/Transforms/Tapir/loop-unswitch.ll +++ b/llvm/test/Transforms/Tapir/loop-unswitch.ll @@ -1,8 +1,7 @@ ; Thanks to Sai Sameer Pusapaty and Shreyas Balaji for the original ; source code for this test case. ; -; RUN: opt < %s -enable-new-pm=0 -loop-unswitch -S -o - | FileCheck %s -; XFAIL: * +; RUN: opt < %s -passes="simple-loop-unswitch" -S | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -367,7 +366,7 @@ cleanup78: ; preds = %pfor.cond.cleanup72 ; CHECK: define {{.+}}@_Z11algorithm_45graphIiE( ; CHECK: pfor.cond.us: -; CHECK: detach within %syncreg, label %pfor.body.us, label %pfor.inc60.us unwind label %lpad62.loopexit.us-lcssa.us +; CHECK: detach within %syncreg, label %pfor.body.us, label %pfor.inc60.us unwind label %lpad62.loopexit.split.us ; CHECK: pfor.body.us: ; CHECK-NEXT: %[[USSYNCREG:.+]] = {{.+}}call token @llvm.syncregion @@ -388,45 +387,22 @@ cleanup78: ; preds = %pfor.cond.cleanup72 ; CHECK: sync.continue57.us: ; The sync region of this detached-rethrow should match that of the ; detach in pfor.cond.us. -; CHECK-NEXT: invoke void @llvm.detached.rethrow.sl_p0i8i32s(token %syncreg, +; CHECK-NEXT: invoke void @llvm.detached.rethrow.sl_p0i32s(token %syncreg, ; CHECK: lpad.us: ; The sync region of this detached-rethrow should match that of the ; detach in pfor.cond22.us. -; CHECK: invoke void @llvm.detached.rethrow.sl_p0i8i32s(token %[[USSYNCREG]] +; CHECK: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[USSYNCREG]] -; CHECK: lpad62.loopexit.us-lcssa.us: +; CHECK: lpad62.loopexit.split.us: ; CHECK: br label %lpad62.loopexit ; CHECK: pfor.cond: -; CHECK: detach within %syncreg, label %pfor.body, label %pfor.inc60 unwind label %lpad62.loopexit.us-lcssa +; CHECK: detach within %syncreg, label %pfor.body, label %pfor.inc60 unwind label %lpad62.loopexit.split ; CHECK: pfor.body: ; CHECK: %[[OGSYNCREG:.+]] = {{.+}}call token @llvm.syncregion -; CHECK: pfor.cond22: -; CHECK: detach within %[[OGSYNCREG]], label %pfor.body27, label %pfor.inc unwind label %lpad52.loopexit - -; CHECK: pfor.body27: -; CHECK: invoke i32 @_Z11p_intersectPKiiS0_i( -; CHECK-NEXT: to label %invoke.cont unwind label %lpad - -; CHECK: lpad: -; The sync region of this detached-rethrow should match that of the -; detach in pfor.cond22 -; CHECK: invoke void @llvm.detached.rethrow.sl_p0i8i32s(token %[[OGSYNCREG]] - -; CHECK: lpad52.loopexit: -; CHECK: br label %lpad52 - -; CHECK: lpad52: -; CHECK: sync within %[[OGSYNCREG]], label %sync.continue57 - -; CHECK: sync.continue57: -; The sync region of this detached-rethrow should match that of the -; detach in pfor.cond. -; CHECK-NEXT: invoke void @llvm.detached.rethrow.sl_p0i8i32s(token %syncreg - ; CHECK: lpad62.loopexit: ; CHECK: br label %lpad62