From 8f3cded16222ec9c9ee60ce5278a2b834d11d746 Mon Sep 17 00:00:00 2001 From: "Satanovskiy, Leonid" Date: Wed, 13 Sep 2023 12:17:50 +0000 Subject: [PATCH] Simplified global volatile access clobbering checker. Misc: vc::getUnderlyingGlobalVariable and genx::getBitCastedValue to support arbitrary chains of instruction- and constant-expression- -typed bitcasts Test GVClobberingChecker/global_volatile_clobbering_checker_fixup.ll reduced. --- IGC/VectorCompiler/lib/GenXCodeGen/GenX.h | 2 +- .../lib/GenXCodeGen/GenXGVClobberChecker.cpp | 135 ++++++++--------- .../lib/GenXCodeGen/GenXTargetMachine.cpp | 4 +- .../lib/GenXCodeGen/GenXTargetMachine.h | 2 +- .../lib/GenXCodeGen/GenXUtil.cpp | 11 +- IGC/VectorCompiler/lib/GenXCodeGen/GenXUtil.h | 2 +- .../lib/Utils/GenX/GlobalVariable.cpp | 13 +- ...lobal_volatile_clobbering_checker_fixup.ll | 137 ++++-------------- 8 files changed, 108 insertions(+), 198 deletions(-) diff --git a/IGC/VectorCompiler/lib/GenXCodeGen/GenX.h b/IGC/VectorCompiler/lib/GenXCodeGen/GenX.h index 5250cac80d3a..70edf3100d38 100644 --- a/IGC/VectorCompiler/lib/GenXCodeGen/GenX.h +++ b/IGC/VectorCompiler/lib/GenXCodeGen/GenX.h @@ -118,7 +118,7 @@ ModulePass *createGenXNumberingWrapperPass(); ModulePass *createGenXLiveRangesWrapperPass(); ModulePass *createGenXRematerializationWrapperPass(); ModulePass *createGenXCoalescingWrapperPass(); -ModulePass *createGenXGVClobberCheckerWrapperPass(); +ModulePass *createGenXGVClobberCheckerPass(); ModulePass *createGenXAddressCommoningWrapperPass(); ModulePass *createGenXArgIndirectionWrapperPass(); FunctionPass *createGenXTidyControlFlowPass(); diff --git a/IGC/VectorCompiler/lib/GenXCodeGen/GenXGVClobberChecker.cpp b/IGC/VectorCompiler/lib/GenXCodeGen/GenXGVClobberChecker.cpp index 1ad1034f50f4..3f4f91af7e44 100644 --- a/IGC/VectorCompiler/lib/GenXCodeGen/GenXGVClobberChecker.cpp +++ b/IGC/VectorCompiler/lib/GenXCodeGen/GenXGVClobberChecker.cpp @@ -60,7 +60,7 @@ SPDX-License-Identifier: MIT #include "vc/Support/GenXDiagnostic.h" #include "vc/Utils/GenX/GlobalVariable.h" -#include +#include #define DEBUG_TYPE "GENX_CLOBBER_CHECKER" @@ -82,7 +82,7 @@ static cl::opt CheckGVClobberingCollectRelatedGVStoreCallSites( namespace { -class GenXGVClobberChecker : public FGPassImplInterface, +class GenXGVClobberChecker : public ModulePass, public IDMixin { private: GenXBaling *Baling = nullptr; @@ -92,34 +92,34 @@ class GenXGVClobberChecker : public FGPassImplInterface, llvm::SetVector *SIs); public: - explicit GenXGVClobberChecker() {} - static StringRef getPassName() { return "GenX GV clobber checker/fixup"; } - static void getAnalysisUsage(AnalysisUsage &AU) { - AU.addRequired(); - AU.addRequired(); + explicit GenXGVClobberChecker() : ModulePass(ID) {} + StringRef getPassName() const override { + return "GenX GV clobber checker/fixup"; + } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addUsedIfAvailable(); + AU.addUsedIfAvailable(); if (!CheckGVClobberingTryFixup) AU.setPreservesAll(); } - bool runOnFunctionGroup(FunctionGroup &FG) override; + bool runOnModule(Module &) override; }; } // namespace namespace llvm { -void initializeGenXGVClobberCheckerWrapperPass(PassRegistry &); -using GenXGVClobberCheckerWrapper = - FunctionGroupWrapperPass; +void initializeGenXGVClobberCheckerPass(PassRegistry &); } // namespace llvm -INITIALIZE_PASS_BEGIN(GenXGVClobberCheckerWrapper, - "GenXGVClobberCheckerWrapper", + +INITIALIZE_PASS_BEGIN(GenXGVClobberChecker, "GenXGVClobberChecker", "GenX global volatile clobbering checker", false, false) INITIALIZE_PASS_DEPENDENCY(GenXGroupBalingWrapper) INITIALIZE_PASS_DEPENDENCY(GenXLivenessWrapper) -INITIALIZE_PASS_END(GenXGVClobberCheckerWrapper, "GenXGVClobberCheckerWrapper", +INITIALIZE_PASS_END(GenXGVClobberChecker, "GenXGVClobberChecker", "GenX global volatile clobbering checker", false, false) -ModulePass *llvm::createGenXGVClobberCheckerWrapperPass() { - initializeGenXGVClobberCheckerWrapperPass(*PassRegistry::getPassRegistry()); - return new GenXGVClobberCheckerWrapper(); +ModulePass *llvm::createGenXGVClobberCheckerPass() { + initializeGenXGVClobberCheckerPass(*PassRegistry::getPassRegistry()); + return new GenXGVClobberChecker(); } bool GenXGVClobberChecker::checkGVClobberingByInterveningStore( @@ -130,9 +130,7 @@ bool GenXGVClobberChecker::checkGVClobberingByInterveningStore( if (!UI) continue; - const StringRef DiagPrefix = - "Global volatile clobbering checker: clobbering detected," - " some optimizations resulted in over-optimization,"; + const StringRef DiagPrefix = "potential clobbering detected:"; if (auto *SI = genx::getInterveningGVStoreOrNull(LI, UI, SIs)) { vc::diagnose(LI->getContext(), DiagPrefix, @@ -148,18 +146,26 @@ bool GenXGVClobberChecker::checkGVClobberingByInterveningStore( << __FUNCTION__ << ": User: "; UI->print(dbgs()); dbgs() << "\n";); if (CheckGVClobberingTryFixup) { + if (!Baling || !Liveness) + vc::diagnose(LI->getContext(), DiagPrefix, + "Either Baling or Liveness analysis results are not " + "available", + DS_Warning, vc::WarningName::Generic, UI); + if (GenXIntrinsic::isRdRegion(UI) && isa( UI->getOperand(GenXIntrinsic::GenXRegion::RdIndexOperandNum))) { - if (Baling->isBaled(UI)) - Baling->unbale(UI); UI->moveAfter(LI); - if (Liveness->getLiveRangeOrNull(UI)) - Liveness->removeValue(UI); - auto *LR = Liveness->getOrCreateLiveRange(UI); - LR->setCategory(Liveness->getLiveRangeOrNull(LI)->getCategory()); - LR->setLogAlignment( - Liveness->getLiveRangeOrNull(LI)->getLogAlignment()); + if (Baling && Baling->isBaled(UI)) + Baling->unbale(UI); + if (Liveness) { + if (Liveness->getLiveRangeOrNull(UI)) + Liveness->removeValue(UI); + auto *LR = Liveness->getOrCreateLiveRange(UI); + LR->setCategory(Liveness->getLiveRangeOrNull(LI)->getCategory()); + LR->setLogAlignment( + Liveness->getLiveRangeOrNull(LI)->getLogAlignment()); + } Changed |= true; } else { vc::diagnose( @@ -176,55 +182,34 @@ bool GenXGVClobberChecker::checkGVClobberingByInterveningStore( return Changed; }; -bool GenXGVClobberChecker::runOnFunctionGroup(FunctionGroup &FG) { +bool GenXGVClobberChecker::runOnModule(Module &M) { bool Changed = false; - Baling = &getAnalysis(); - Liveness = &getAnalysis(); - - for (auto &GV : FG.getModule()->globals()) { - if (!GV.hasAttribute(genx::FunctionMD::GenXVolatile)) - continue; - - auto *GvLiveRange = Liveness->getLiveRangeOrNull(&GV); - if (!GvLiveRange) - continue; - - llvm::SetVector LoadsInFunctionGroup; - std::map> - GVStoreRelatedCallSitesPerFunction{}; - - for (const auto &User : GV.users()) { - auto *GVUserInst = dyn_cast(User); - if (!GVUserInst) - continue; - - if (llvm::find(FG, GVUserInst->getFunction()) == FG.end()) - continue; - - if (isa(GVUserInst)) - LoadsInFunctionGroup.insert(GVUserInst); - else if (CheckGVClobberingCollectRelatedGVStoreCallSites && - isa(GVUserInst)) - genx::collectRelatedCallSitesPerFunction( - GVUserInst, &FG, GVStoreRelatedCallSitesPerFunction); - - // Global variable is used in a constexpr. - if (&GV != vc::getUnderlyingGlobalVariable(GVUserInst)) - continue; - - // Loads preceded by bitcasts. - for (const auto &User : GVUserInst->users()) - if (auto *Load = dyn_cast(User)) - if (llvm::find(FG, Load->getFunction()) != FG.end()) - LoadsInFunctionGroup.insert(Load); - } - - for (const auto &LI : LoadsInFunctionGroup) - Changed |= checkGVClobberingByInterveningStore( - LI, CheckGVClobberingCollectRelatedGVStoreCallSites - ? &GVStoreRelatedCallSitesPerFunction[LI->getFunction()] - : nullptr); + Baling = getAnalysisIfAvailable(); + Liveness = getAnalysisIfAvailable(); + + llvm::SetVector Loads; + std::unordered_map< + Value *, std::unordered_map>> + ClobberingInsns{}; + + for (auto &F : M.functions()) { + for (auto &BB : F) + for (auto &I : BB) + if (genx::isGlobalVolatileLoad(&I)) + Loads.insert(&I); + else if (CheckGVClobberingCollectRelatedGVStoreCallSites && + genx::isGlobalVolatileStore(&I)) + genx::collectRelatedCallSitesPerFunction( + &I, nullptr, + ClobberingInsns[genx::getBitCastedValue(I.getOperand(1))]); } + for (const auto &LI : Loads) + Changed |= checkGVClobberingByInterveningStore( + LI, CheckGVClobberingCollectRelatedGVStoreCallSites + ? &ClobberingInsns[genx::getBitCastedValue(LI->getOperand(0))] + [LI->getFunction()] + : nullptr); + return Changed; } diff --git a/IGC/VectorCompiler/lib/GenXCodeGen/GenXTargetMachine.cpp b/IGC/VectorCompiler/lib/GenXCodeGen/GenXTargetMachine.cpp index baad18d76853..d6ddf1eb3633 100644 --- a/IGC/VectorCompiler/lib/GenXCodeGen/GenXTargetMachine.cpp +++ b/IGC/VectorCompiler/lib/GenXCodeGen/GenXTargetMachine.cpp @@ -93,7 +93,7 @@ void initializeGenXPasses(PassRegistry ®istry) { initializeGenXCFSimplificationPass(registry); initializeGenXCisaBuilderWrapperPass(registry); initializeGenXCoalescingWrapperPass(registry); - initializeGenXGVClobberCheckerWrapperPass(registry); + initializeGenXGVClobberCheckerPass(registry); initializeGenXDeadVectorRemovalPass(registry); initializeGenXDepressurizerWrapperPass(registry); initializeGenXEarlySimdCFConformancePass(registry); @@ -609,7 +609,7 @@ bool GenXTargetMachine::addPassesToEmitFile(PassManagerBase &PM, vc::addPass(PM, createGenXLiveRangesWrapperPass()); /// .. include:: GenXGVClobberChecker.cpp if (BackendConfig.checkGVClobbering()) - vc::addPass(PM, createGenXGVClobberCheckerWrapperPass()); + vc::addPass(PM, createGenXGVClobberCheckerPass()); /// .. include:: GenXCoalescing.cpp vc::addPass(PM, createGenXCoalescingWrapperPass()); /// .. include:: GenXAddressCommoning.cpp diff --git a/IGC/VectorCompiler/lib/GenXCodeGen/GenXTargetMachine.h b/IGC/VectorCompiler/lib/GenXCodeGen/GenXTargetMachine.h index 46b254900b83..3c4a02f9eb66 100644 --- a/IGC/VectorCompiler/lib/GenXCodeGen/GenXTargetMachine.h +++ b/IGC/VectorCompiler/lib/GenXCodeGen/GenXTargetMachine.h @@ -216,7 +216,7 @@ void initializeGenXCategoryWrapperPass(PassRegistry &); void initializeGenXCFSimplificationPass(PassRegistry &); void initializeGenXCisaBuilderWrapperPass(PassRegistry &); void initializeGenXCoalescingWrapperPass(PassRegistry &); -void initializeGenXGVClobberCheckerWrapperPass(PassRegistry &); +void initializeGenXGVClobberCheckerPass(PassRegistry &); void initializeGenXDeadVectorRemovalPass(PassRegistry &); void initializeGenXDepressurizerWrapperPass(PassRegistry &); void initializeGenXEarlySimdCFConformancePass(PassRegistry &); diff --git a/IGC/VectorCompiler/lib/GenXCodeGen/GenXUtil.cpp b/IGC/VectorCompiler/lib/GenXCodeGen/GenXUtil.cpp index 9a0eff129692..9df8178af978 100644 --- a/IGC/VectorCompiler/lib/GenXCodeGen/GenXUtil.cpp +++ b/IGC/VectorCompiler/lib/GenXCodeGen/GenXUtil.cpp @@ -1575,8 +1575,11 @@ void genx::LayoutBlocks(Function &func) } Value *genx::getBitCastedValue(Value *V) { - while (auto *BCI = dyn_cast(V)) - V = BCI->getOperand(0); + while (isa(V) || + (isa(V) && + cast(V)->getOpcode() == CastInst::BitCast)) + V = isa(V) ? cast(V)->getOperand(0) + : cast(V)->getOperand(0); return V; } @@ -2397,7 +2400,7 @@ genx::getInterveningGVStoreOrNull(Instruction *LI, Instruction *UIOrPos, void genx::collectRelatedCallSitesPerFunction( Instruction *SI, FunctionGroup *FG, - std::map> + std::unordered_map> &CallSitesPerFunction) { using FuncsSeenT = llvm::SetVector; auto collectRelatedCallSites = [&](Function *Func, FuncsSeenT *FuncsSeen, @@ -2409,7 +2412,7 @@ void genx::collectRelatedCallSitesPerFunction( if (isa(FuncUser)) { auto *Call = dyn_cast(FuncUser); auto *curFunction = Call->getFunction(); - if (llvm::find(*FG, curFunction) == FG->end()) + if (FG && llvm::find(*FG, curFunction) == FG->end()) continue; CallSitesPerFunction[curFunction].insert(Call); collectRelatedCallSites(curFunction, FuncsSeen, diff --git a/IGC/VectorCompiler/lib/GenXCodeGen/GenXUtil.h b/IGC/VectorCompiler/lib/GenXCodeGen/GenXUtil.h index da94da674da9..55e8fa4bc99e 100644 --- a/IGC/VectorCompiler/lib/GenXCodeGen/GenXUtil.h +++ b/IGC/VectorCompiler/lib/GenXCodeGen/GenXUtil.h @@ -780,7 +780,7 @@ bool checkGVClobberingByInterveningStore( void collectRelatedCallSitesPerFunction( Instruction *SI, class FunctionGroup *FG, - std::map> + std::unordered_map> &InstAndRelatedCallSitesPerFunction); llvm::SetVector getAncestorGVLoads(Instruction *I, diff --git a/IGC/VectorCompiler/lib/Utils/GenX/GlobalVariable.cpp b/IGC/VectorCompiler/lib/Utils/GenX/GlobalVariable.cpp index ae6e25aa7f3e..8262aa8d60b5 100644 --- a/IGC/VectorCompiler/lib/Utils/GenX/GlobalVariable.cpp +++ b/IGC/VectorCompiler/lib/Utils/GenX/GlobalVariable.cpp @@ -41,14 +41,11 @@ bool vc::isRealGlobalVariable(const GlobalVariable &GV) { } const GlobalVariable *vc::getUnderlyingGlobalVariable(const Value *V) { - while (auto *BI = dyn_cast(V)) - V = BI->getOperand(0); - while (auto *CE = dyn_cast_or_null(V)) { - if (CE->getOpcode() == CastInst::BitCast) - V = CE->getOperand(0); - else - break; - } + while (isa(V) || + (isa(V) && + cast(V)->getOpcode() == CastInst::BitCast)) + V = isa(V) ? cast(V)->getOperand(0) + : cast(V)->getOperand(0); return dyn_cast_or_null(V); } diff --git a/IGC/VectorCompiler/test/GVClobberingChecker/global_volatile_clobbering_checker_fixup.ll b/IGC/VectorCompiler/test/GVClobberingChecker/global_volatile_clobbering_checker_fixup.ll index f4429c6e54c8..1ccb34502cf4 100644 --- a/IGC/VectorCompiler/test/GVClobberingChecker/global_volatile_clobbering_checker_fixup.ll +++ b/IGC/VectorCompiler/test/GVClobberingChecker/global_volatile_clobbering_checker_fixup.ll @@ -6,8 +6,8 @@ ; ;============================ end_copyright_notice ============================= ; -; RUN: opt %use_old_pass_manager% -GenXModule -GenXLiveRangesWrapper -GenXGVClobberCheckerWrapper -check-gv-clobbering=true -check-gv-clobbering-collect-store-related-call-sites=true -check-gv-clobbering-try-fixup=true -march=genx64 -mtriple=spir64-unknown-unknown -mcpu=Gen9 -S < %s | FileCheck %s -; RUN: opt %use_old_pass_manager% -GenXModule -GenXLiveRangesWrapper -GenXGVClobberCheckerWrapper -check-gv-clobbering=true -check-gv-clobbering-collect-store-related-call-sites=false -check-gv-clobbering-try-fixup=true -march=genx64 -mtriple=spir64-unknown-unknown -mcpu=Gen9 -S < %s | FileCheck %s +; RUN: opt %use_old_pass_manager% -GenXGVClobberChecker -check-gv-clobbering=true -check-gv-clobbering-collect-store-related-call-sites=true -check-gv-clobbering-try-fixup=true -march=genx64 -mtriple=spir64-unknown-unknown -mcpu=Gen9 -S < %s | FileCheck %s +; RUN: opt %use_old_pass_manager% -GenXGVClobberChecker -check-gv-clobbering=true -check-gv-clobbering-collect-store-related-call-sites=false -check-gv-clobbering-try-fixup=true -march=genx64 -mtriple=spir64-unknown-unknown -mcpu=Gen9 -S < %s | FileCheck %s ; ------------------------------------------------ ; This test checks global volatile clobbering checker/fixup introduced late in pipeline to catch over-optimizations of global volatile access. ; This is an auxiliary utility used to help in detecting and fixing erroneous over-optimizations cases. @@ -16,156 +16,81 @@ target datalayout = "e-p:64:64-p6:32:32-i64:64-n8:16:32:64" target triple = "genx64-unknown-unknown" -@_ZL8g_global = internal global <4 x i32> zeroinitializer, align 16 #0 +@_ZL8g_global = external global <4 x i32> #0 -define dllexport spir_kernel void @TestGVClobberingFixupStoreInCall(i8 addrspace(1)* nocapture readonly %_arg_buf_gpu, i8 addrspace(1)* nocapture %_arg_res_gpu) local_unnamed_addr #1 { +define spir_kernel void @TestGVClobberingFixupStoreInCall() { entry: - %ptrtoint = ptrtoint i8 addrspace(1)* %_arg_buf_gpu to i64 - %.splatinsert5 = bitcast i64 %ptrtoint to <1 x i64> - %call.i.i.i.i.i.esimd5 = load volatile <4 x i32>, <4 x i32>* @_ZL8g_global - %gather = call <1 x i32> @llvm.genx.svm.gather.v1i32.v1i1.v1i64(<1 x i1> , i32 0, <1 x i64> %.splatinsert5, <1 x i32> undef) - %call4.i.i.i.i.esimd = tail call <4 x i32> @llvm.genx.wrregioni.v4i32.v1i32.i16.v1i1(<4 x i32> %call.i.i.i.i.i.esimd5, <1 x i32> %gather, i32 0, i32 1, i32 1, i16 0, i32 0, <1 x i1> ) - store volatile <4 x i32> %call4.i.i.i.i.esimd, <4 x i32>* @_ZL8g_global - %call.i.i.i8.i.esimd6 = load volatile <4 x i32>, <4 x i32>* @_ZL8g_global + %call.i.i.i8.i.esimd6 = load volatile <4 x i32>, <4 x i32>* @_ZL8g_global, align 16 + tail call spir_func void @UserFunctionRewriteGV1() + %vecext.i.i1.regioncollapsed = tail call i32 @llvm.genx.rdregioni.i32.v4i32.i16(<4 x i32> %call.i.i.i8.i.esimd6, i32 0, i32 1, i32 1, i16 0, i32 undef) + %cmp.i.i = icmp eq i32 %vecext.i.i1.regioncollapsed, 55 + ret void +} ; CHECK: %call.i.i.i8.i.esimd6 = load volatile <4 x i32>, <4 x i32>* @_ZL8g_global ; CHECK-NEXT: %vecext.i.i1.regioncollapsed = tail call i32 @llvm.genx.rdregioni.i32.v4i32.i16(<4 x i32> %call.i.i.i8.i.esimd6, i32 0, i32 1, i32 1, i16 0, i32 undef) ; COM: if -check-gv-clobbering-collect-store-related-call-sites=true is supplied ; COM: store interference is precisely detected here down the call chain. ; COM: if -check-gv-clobbering-collect-store-related-call-sites=false is supplied or omitted ; COM: store interference is speculated because of call to a user function. - tail call spir_func void @UserFunctionRewriteGV1() ; CHECK-NEXT: tail call spir_func void @UserFunctionRewriteGV1() ; CHECK-NOT: %vecext.i.i1.regioncollapsed = tail call i32 @llvm.genx.rdregioni.i32.v4i32.i16(<4 x i32> %call.i.i.i8.i.esimd6, i32 0, i32 1, i32 1, i16 0, i32 undef) ; CHECK-NEXT: %cmp.i.i = icmp eq i32 %vecext.i.i1.regioncollapsed, 55 + +define spir_kernel void @TestGVClobberingFixupLocalStore() { +entry: + %call.i.i.i8.i.esimd6 = load volatile <4 x i32>, <4 x i32>* @_ZL8g_global, align 16 + %call4.i.i.i.i.i.i.esimd = tail call <4 x i32> @llvm.genx.wrregioni.v4i32.v1i32.i16.v1i1(<4 x i32> %call.i.i.i8.i.esimd6, <1 x i32> , i32 0, i32 1, i32 1, i16 0, i32 0, <1 x i1> ) + store volatile <4 x i32> %call4.i.i.i.i.i.i.esimd, <4 x i32>* @_ZL8g_global, align 16 %vecext.i.i1.regioncollapsed = tail call i32 @llvm.genx.rdregioni.i32.v4i32.i16(<4 x i32> %call.i.i.i8.i.esimd6, i32 0, i32 1, i32 1, i16 0, i32 undef) %cmp.i.i = icmp eq i32 %vecext.i.i1.regioncollapsed, 55 - %conv.i.i4 = select i1 %cmp.i.i, i32 1, i32 0 - %bitcast = bitcast i32 %conv.i.i4 to <1 x i32> - %ptrtoint6 = ptrtoint i8 addrspace(1)* %_arg_res_gpu to i64 - %.splatinsert13 = bitcast i64 %ptrtoint6 to <1 x i64> - call void @llvm.genx.svm.scatter.v1i1.v1i64.v1i32(<1 x i1> , i32 0, <1 x i64> %.splatinsert13, <1 x i32> %bitcast) ret void } - -define dllexport spir_kernel void @TestGVClobberingFixupLocalStore(i8 addrspace(1)* nocapture readonly %_arg_buf_gpu, i8 addrspace(1)* nocapture %_arg_res_gpu, i64 %impl.arg.private.base) local_unnamed_addr #1 { -entry: - %ptrtoint = ptrtoint i8 addrspace(1)* %_arg_buf_gpu to i64 - %.splatinsert5 = bitcast i64 %ptrtoint to <1 x i64> - %call.i.i.i.i.i.esimd5 = load volatile <4 x i32>, <4 x i32>* @_ZL8g_global - %gather = call <1 x i32> @llvm.genx.svm.gather.v1i32.v1i1.v1i64(<1 x i1> , i32 0, <1 x i64> %.splatinsert5, <1 x i32> undef) - %call4.i.i.i.i.esimd = tail call <4 x i32> @llvm.genx.wrregioni.v4i32.v1i32.i16.v1i1(<4 x i32> %call.i.i.i.i.i.esimd5, <1 x i32> %gather, i32 0, i32 1, i32 1, i16 0, i32 0, <1 x i1> ) - store volatile <4 x i32> %call4.i.i.i.i.esimd, <4 x i32>* @_ZL8g_global - %call.i.i.i8.i.esimd6 = load volatile <4 x i32>, <4 x i32>* @_ZL8g_global ; CHECK: %call.i.i.i8.i.esimd6 = load volatile <4 x i32>, <4 x i32>* @_ZL8g_global ; CHECK-NEXT: %vecext.i.i1.regioncollapsed = tail call i32 @llvm.genx.rdregioni.i32.v4i32.i16(<4 x i32> %call.i.i.i8.i.esimd6, i32 0, i32 1, i32 1, i16 0, i32 undef) ; COM: store interference is directly detected in this function. - %call4.i.i.i.i.i.i.esimd = tail call <4 x i32> @llvm.genx.wrregioni.v4i32.v1i32.i16.v1i1(<4 x i32> %call.i.i.i8.i.esimd6, <1 x i32> , i32 0, i32 1, i32 1, i16 0, i32 0, <1 x i1> ) ; CHECK-NEXT: %call4.i.i.i.i.i.i.esimd = tail call <4 x i32> @llvm.genx.wrregioni.v4i32.v1i32.i16.v1i1(<4 x i32> %call.i.i.i8.i.esimd6, <1 x i32> , i32 0, i32 1, i32 1, i16 0, i32 0, <1 x i1> ) - store volatile <4 x i32> %call4.i.i.i.i.i.i.esimd, <4 x i32>* @_ZL8g_global ; CHECK-NEXT: store volatile <4 x i32> %call4.i.i.i.i.i.i.esimd, <4 x i32>* @_ZL8g_global - %vecext.i.i1.regioncollapsed = tail call i32 @llvm.genx.rdregioni.i32.v4i32.i16(<4 x i32> %call.i.i.i8.i.esimd6, i32 0, i32 1, i32 1, i16 0, i32 undef) ; CHECK-NOT: %vecext.i.i1.regioncollapsed = tail call i32 @llvm.genx.rdregioni.i32.v4i32.i16(<4 x i32> %call.i.i.i8.i.esimd6, i32 0, i32 1, i32 1, i16 0, i32 undef) - %cmp.i.i = icmp eq i32 %vecext.i.i1.regioncollapsed, 55 ; CHECK-NEXT: %cmp.i.i = icmp eq i32 %vecext.i.i1.regioncollapsed, 55 - %conv.i.i4 = select i1 %cmp.i.i, i32 1, i32 0 - %bitcast = bitcast i32 %conv.i.i4 to <1 x i32> - %ptrtoint6 = ptrtoint i8 addrspace(1)* %_arg_res_gpu to i64 - %.splatinsert13 = bitcast i64 %ptrtoint6 to <1 x i64> - call void @llvm.genx.svm.scatter.v1i1.v1i64.v1i32(<1 x i1> , i32 0, <1 x i64> %.splatinsert13, <1 x i32> %bitcast) - ret void -} -define dllexport spir_kernel void @TestGVClobberingFixupLoopLocalStore(i8 addrspace(1)* nocapture readonly %_arg_input_gpu, i8 addrspace(1)* nocapture %_arg_res_gpu, i64 %impl.arg.private.base) local_unnamed_addr #1 { +define spir_kernel void @TestGVClobberingFixupLoopLocalStore(i32 %bitcast, i32 %p2.0.i2, <1 x i64> %.splatinsert35) { entry: - %ptrtoint = ptrtoint i8 addrspace(1)* %_arg_input_gpu to i64 - %.splatinsert7 = bitcast i64 %ptrtoint to <1 x i64> - %call.i.i.i.i.i.esimd7 = load volatile <4 x i32>, <4 x i32>* @_ZL8g_global - %gather9 = call <1 x i32> @llvm.genx.svm.gather.v1i32.v1i1.v1i64(<1 x i1> , i32 0, <1 x i64> %.splatinsert7, <1 x i32> undef) - %call4.i.i.i.i.esimd = tail call <4 x i32> @llvm.genx.wrregioni.v4i32.v1i32.i16.v1i1(<4 x i32> %call.i.i.i.i.i.esimd7, <1 x i32> %gather9, i32 0, i32 1, i32 1, i16 0, i32 0, <1 x i1> ) - store volatile <4 x i32> %call4.i.i.i.i.esimd, <4 x i32>* @_ZL8g_global - %.iv32cast = bitcast i64 %ptrtoint to <2 x i32> - %.LoSplit = call <1 x i32> @llvm.genx.rdregioni.v1i32.v2i32.i16(<2 x i32> %.iv32cast, i32 0, i32 1, i32 2, i16 0, i32 undef) - %int_emu.add64.lo.aggregate. = call { <1 x i32>, <1 x i32> } @llvm.genx.addc.v1i32.v1i32(<1 x i32> %.LoSplit, <1 x i32> ) - %int_emu.add64.lo.add. = extractvalue { <1 x i32>, <1 x i32> } %int_emu.add64.lo.aggregate., 1 - %int_emu.add64.lo.carry. = extractvalue { <1 x i32>, <1 x i32> } %int_emu.add64.lo.aggregate., 0 - %int_emu.add.partial_join = call <2 x i32> @llvm.genx.wrregioni.v2i32.v1i32.i16.i1(<2 x i32> undef, <1 x i32> %int_emu.add64.lo.add., i32 0, i32 1, i32 2, i16 0, i32 undef, i1 true) - %.HiSplit = call <1 x i32> @llvm.genx.rdregioni.v1i32.v2i32.i16(<2 x i32> %.iv32cast, i32 0, i32 1, i32 2, i16 4, i32 undef) - %add_hi.part = add <1 x i32> %int_emu.add64.lo.carry., %.HiSplit - %int_emu.add.joined = call <2 x i32> @llvm.genx.wrregioni.v2i32.v1i32.i16.i1(<2 x i32> %int_emu.add.partial_join, <1 x i32> %add_hi.part, i32 0, i32 1, i32 2, i16 4, i32 undef, i1 true) - %int_emu.add. = bitcast <2 x i32> %int_emu.add.joined to <1 x i64> - %gather = call <1 x i32> @llvm.genx.svm.gather.v1i32.v1i1.v1i64(<1 x i1> , i32 0, <1 x i64> %int_emu.add., <1 x i32> undef) - %bitcast = bitcast <1 x i32> %gather to i32 - %call.i.i.i17.i.esimd8 = load volatile <4 x i32>, <4 x i32>* @_ZL8g_global -; CHECK: %call.i.i.i17.i.esimd8 = load volatile <4 x i32>, <4 x i32>* @_ZL8g_global -; CHECK-NEXT: %vecext.i.i3.regioncollapsed = call <1 x i32> @llvm.genx.rdregioni.v1i32.v4i32.i16(<4 x i32> %call.i.i.i17.i.esimd8, i32 0, i32 1, i32 1, i16 0, i32 undef) + %call.i.i.i17.i.esimd8 = load volatile <4 x i32>, <4 x i32>* @_ZL8g_global, align 16 %cmp.i1 = icmp sgt i32 %bitcast, 0 -; CHECK-NEXT: %cmp.i1 = icmp sgt i32 %bitcast, 0 - br i1 %cmp.i1, label %entry.while.body.i_crit_edge, label %entry.TestGVClobberingFixupLoopLocalStore.exit_crit_edge - -entry.TestGVClobberingFixupLoopLocalStore.exit_crit_edge: ; preds = %entry - br label %TestGVClobberingFixupLoopLocalStore.exit - -entry.while.body.i_crit_edge: ; preds = %entry - %ptrtoint8 = ptrtoint i8 addrspace(1)* %_arg_res_gpu to i64 - %.splatinsert35 = bitcast i64 %ptrtoint8 to <1 x i64> br label %while.body.i -while.body.i: ; preds = %while.body.i.while.body.i_crit_edge, %entry.while.body.i_crit_edge - %p2.0.i2 = phi i32 [ %dec.i, %while.body.i.while.body.i_crit_edge ], [ %bitcast, %entry.while.body.i_crit_edge ] +while.body.i: ; preds = %while.body.i.while.body.i_crit_edge, %entry %dec.i = add nsw i32 %p2.0.i2, -1 -; CHECK: %dec.i = add nsw i32 %p2.0.i2, -1 %vecext.i.i3.regioncollapsed = call <1 x i32> @llvm.genx.rdregioni.v1i32.v4i32.i16(<4 x i32> %call.i.i.i17.i.esimd8, i32 0, i32 1, i32 1, i16 0, i32 undef) -; CHECK-NOT: %vecext.i.i3.regioncollapsed = call <1 x i32> @llvm.genx.rdregioni.v1i32.v4i32.i16(<4 x i32> %call.i.i.i17.i.esimd8, i32 0, i32 1, i32 1, i16 0, i32 undef) call void @llvm.genx.svm.scatter.v1i1.v1i64.v1i32(<1 x i1> , i32 0, <1 x i64> %.splatinsert35, <1 x i32> %vecext.i.i3.regioncollapsed) -; CHECK-NEXT: call void @llvm.genx.svm.scatter.v1i1.v1i64.v1i32(<1 x i1> , i32 0, <1 x i64> %.splatinsert35, <1 x i32> %vecext.i.i3.regioncollapsed) - %call.i.i.i.i.i.i.i.esimd9 = load volatile <4 x i32>, <4 x i32>* @_ZL8g_global - %call4.i.i.i.i.i.i.esimd = tail call <4 x i32> @llvm.genx.wrregioni.v4i32.v1i32.i16.v1i1(<4 x i32> %call.i.i.i.i.i.i.i.esimd9, <1 x i32> , i32 0, i32 1, i32 1, i16 0, i32 0, <1 x i1> ) - store volatile <4 x i32> %call4.i.i.i.i.i.i.esimd, <4 x i32>* @_ZL8g_global - %cmp.i = icmp sgt i32 %p2.0.i2, 1 - br i1 %cmp.i, label %while.body.i.while.body.i_crit_edge, label %while.body.i.TestGVClobberingFixupLoopLocalStore.exit_crit_edge - -while.body.i.TestGVClobberingFixupLoopLocalStore.exit_crit_edge: ; preds = %while.body.i - br label %TestGVClobberingFixupLoopLocalStore.exit + store volatile <4 x i32> zeroinitializer, <4 x i32>* @_ZL8g_global, align 16 + br label %while.body.i.while.body.i_crit_edge while.body.i.while.body.i_crit_edge: ; preds = %while.body.i br label %while.body.i - -TestGVClobberingFixupLoopLocalStore.exit: ; preds = %while.body.i.TestGVClobberingFixupLoopLocalStore.exit_crit_edge, %entry.TestGVClobberingFixupLoopLocalStore.exit_crit_edge - ret void } +; CHECK: %call.i.i.i17.i.esimd8 = load volatile <4 x i32>, <4 x i32>* @_ZL8g_global +; CHECK-NEXT: %vecext.i.i3.regioncollapsed = call <1 x i32> @llvm.genx.rdregioni.v1i32.v4i32.i16(<4 x i32> %call.i.i.i17.i.esimd8, i32 0, i32 1, i32 1, i16 0, i32 undef) +; CHECK-NEXT: %cmp.i1 = icmp sgt i32 %bitcast, 0 +; CHECK: %dec.i = add nsw i32 %p2.0.i2, -1 +; CHECK-NOT: %vecext.i.i3.regioncollapsed = call <1 x i32> @llvm.genx.rdregioni.v1i32.v4i32.i16(<4 x i32> %call.i.i.i17.i.esimd8, i32 0, i32 1, i32 1, i16 0, i32 undef) +; CHECK-NEXT: call void @llvm.genx.svm.scatter.v1i1.v1i64.v1i32(<1 x i1> , i32 0, <1 x i64> %.splatinsert35, <1 x i32> %vecext.i.i3.regioncollapsed) -define internal spir_func void @UserFunctionRewriteGV1() unnamed_addr { +define internal spir_func void @UserFunctionRewriteGV1() { entry: tail call spir_func void @UserFunctionRewriteGV2() ret void } -define internal spir_func void @UserFunctionRewriteGV2() unnamed_addr { +define internal spir_func void @UserFunctionRewriteGV2() { entry: - %call.i.i.i.i.esimd3 = load volatile <4 x i32>, <4 x i32>* @_ZL8g_global - %call4.i.i.i.esimd = tail call <4 x i32> @llvm.genx.wrregioni.v4i32.v1i32.i16.v1i1(<4 x i32> %call.i.i.i.i.esimd3, <1 x i32> , i32 0, i32 1, i32 1, i16 0, i32 0, <1 x i1> ) - store volatile <4 x i32> %call4.i.i.i.esimd, <4 x i32>* @_ZL8g_global + store volatile <4 x i32> zeroinitializer, <4 x i32>* @_ZL8g_global, align 16 ret void } declare i32 @llvm.genx.rdregioni.i32.v4i32.i16(<4 x i32>, i32, i32, i32, i16, i32) -declare { <1 x i32>, <1 x i32> } @llvm.genx.addc.v1i32.v1i32(<1 x i32>, <1 x i32>) declare void @llvm.genx.svm.scatter.v1i1.v1i64.v1i32(<1 x i1>, i32, <1 x i64>, <1 x i32>) -declare <1 x i32> @llvm.genx.rdregioni.v1i32.v2i32.i16(<2 x i32>, i32, i32, i32, i16, i32) declare <1 x i32> @llvm.genx.rdregioni.v1i32.v4i32.i16(<4 x i32>, i32, i32, i32, i16, i32) -declare <1 x i32> @llvm.genx.svm.gather.v1i32.v1i1.v1i64(<1 x i1>, i32, <1 x i64>, <1 x i32>) -declare <2 x i32> @llvm.genx.wrregioni.v2i32.v1i32.i16.i1(<2 x i32>, <1 x i32>, i32, i32, i32, i16, i32, i1) declare <4 x i32> @llvm.genx.wrregioni.v4i32.v1i32.i16.v1i1(<4 x i32>, <1 x i32>, i32, i32, i32, i16, i32, <1 x i1>) -attributes #0 = { "VCByteOffset"="128" "VCGlobalVariable" "VCVolatile" "genx_byte_offset"="128" "genx_volatile" } -attributes #1 = { nounwind "CMGenxMain" "VC.Stack.Amount"="0" "oclrt"="1" } - -!genx.kernels = !{!1, !2, !3} -!genx.kernel.internal = !{!11, !12, !13} - -!1 = !{void (i8 addrspace(1)*, i8 addrspace(1)*)* @TestGVClobberingFixupStoreInCall, !"TestGVClobberingFixupStoreInCall", !{i32 0, i32 0, i32 96}, i32 0, !{i32 72, i32 80, i32 64}, !{i32 0, i32 0}, !{!"svmptr_t", !"svmptr_t"}, i32 0} -!2 = !{void (i8 addrspace(1)*, i8 addrspace(1)*, i64)* @TestGVClobberingFixupLocalStore, !"TestGVClobberingFixupLocalStore", !{i32 0, i32 0, i32 96}, i32 0, !{i32 72, i32 80, i32 64}, !{i32 0, i32 0}, !{!"svmptr_t", !"svmptr_t"}, i32 0} -!3 = !{void (i8 addrspace(1)*, i8 addrspace(1)*, i64)* @TestGVClobberingFixupLoopLocalStore, !"TestGVClobberingFixupLoopLocalStore", !{i32 0, i32 0, i32 96}, i32 0, !{i32 72, i32 80, i32 64}, !{i32 0, i32 0}, !{!"svmptr_t", !"svmptr_t"}, i32 0} -!11 = !{void (i8 addrspace(1)*, i8 addrspace(1)*)* @TestGVClobberingFixupStoreInCall, !{i32 0, i32 0, i32 0}, !{i32 0, i32 1, i32 2}, !{}, !{i32 255, i32 255, i32 255}} -!12 = !{void (i8 addrspace(1)*, i8 addrspace(1)*, i64)* @TestGVClobberingFixupLocalStore, !{i32 0, i32 0, i32 0}, !{i32 0, i32 1, i32 2}, !{}, !{i32 255, i32 255, i32 255}} -!13 = !{void (i8 addrspace(1)*, i8 addrspace(1)*, i64)* @TestGVClobberingFixupLoopLocalStore, !{i32 0, i32 0, i32 0}, !{i32 0, i32 1, i32 2}, !{}, !{i32 255, i32 255, i32 255}} +attributes #0 = { "genx_volatile" }