From f477776089dbb4ad95707a33fc733f6159552e36 Mon Sep 17 00:00:00 2001 From: "Chen, Kai" Date: Wed, 4 Dec 2024 23:31:57 +0000 Subject: [PATCH] Update resource loop nested lit tests Update resource loop nested lit tests. --- IGC/Compiler/CISACodeGen/EmitVISAPass.cpp | 2 + .../CISACodeGen/ResourceLoopUnroll.cpp | 34 +++- .../ResourceloopUnrollNestedLsc.ll | 168 +++++++++++----- .../ResourceloopUnrollNestedSampler.ll | 179 +++++++++++++----- 4 files changed, 283 insertions(+), 100 deletions(-) diff --git a/IGC/Compiler/CISACodeGen/EmitVISAPass.cpp b/IGC/Compiler/CISACodeGen/EmitVISAPass.cpp index b26a4b974e4f..b0b3eb4d0c7c 100644 --- a/IGC/Compiler/CISACodeGen/EmitVISAPass.cpp +++ b/IGC/Compiler/CISACodeGen/EmitVISAPass.cpp @@ -8117,6 +8117,7 @@ void EmitPass::emitSampleInstruction(SampleIntrinsic* inst) if (predicationMap.count(inst)) { m_encoder->SetPredicate(m_currShader->GetSymbol(cast(predicationMap[inst]))); + m_encoder->Lifetime(LIFETIME_START, dst); } else { @@ -19481,6 +19482,7 @@ void EmitPass::emitLSCVectorLoad(Instruction* inst, if (predicationMap.count(inst)) { m_encoder->SetPredicate(m_currShader->GetSymbol(cast(predicationMap[inst]))); + m_encoder->Lifetime(LIFETIME_START, destCVar); } else { diff --git a/IGC/Compiler/CISACodeGen/ResourceLoopUnroll.cpp b/IGC/Compiler/CISACodeGen/ResourceLoopUnroll.cpp index a5df3ee6d864..2f08b9ffbcbe 100644 --- a/IGC/Compiler/CISACodeGen/ResourceLoopUnroll.cpp +++ b/IGC/Compiler/CISACodeGen/ResourceLoopUnroll.cpp @@ -138,7 +138,7 @@ bool ResourceLoopUnroll::emitResourceLoop(llvm::CallInst* CI) LLVM3DBuilder<> builder(context, platform); auto createResLoopIter = [&builder, this] - (Instruction* inst, BasicBlock* checkBB, BasicBlock* nextBB, BasicBlock* exitBB) + (Instruction* inst, BasicBlock* checkBB, BasicBlock* sendBB, BasicBlock* nextBB, BasicBlock* exitBB) { Value* resource = nullptr; Value* sampler = nullptr; @@ -225,16 +225,38 @@ bool ResourceLoopUnroll::emitResourceLoop(llvm::CallInst* CI) } } + // Here we swap the last loop load and goto, such as + // From + // (P89) lsc_load.ugm.ca.ca(M1, 16) V1395:d32x3 bss(firstActiveRes)[V1385] : a32 /// $1953 + // (!P89) goto (M1, 16) ___realTimePathTracingRayGeneration__YAXXZ_093_partial_check1736 /// $1954 + // To + // (!P89) goto (M1, 16) ___realTimePathTracingRayGeneration__YAXXZ_093_partial_check1736 /// $1954 + // (P89) lsc_load.ugm.ca.ca(M1, 16) V1395:d32x3 bss(firstActiveRes)[V1385] : a32 /// $1953 + // However, as CreateCondBr is generating terminator, we put the last send into a BB. + // Without swapping, each iteration, the load is loading some channels. + if (sendBB) + { + builder.CreateCondBr(cond, sendBB, nextBB); + builder.SetInsertPoint(sendBB); + } + llvm::Instruction* predSendInstr = inst->clone(); SetResourceOperand(predSendInstr, resourceNew, pairTextureNew, textureNew, samplerNew); predSendInstr->setName("resLoopSubIterSend"); builder.Insert(predSendInstr); + if (sendBB) + { + builder.CreateBr(exitBB); + } + else + { + builder.CreateCondBr(cond, exitBB, nextBB); + } + // add the cmp/instruction combo to our predication map m_pCodeGenContext->getModuleMetaData()->predicationMap[predSendInstr] = cond; - builder.CreateCondBr(cond, exitBB, nextBB); - return predSendInstr; }; @@ -254,10 +276,12 @@ bool ResourceLoopUnroll::emitResourceLoop(llvm::CallInst* CI) { // Basicblocks for loop BasicBlock* partialCheckBB = BasicBlock::Create(context, "partial_check", BB->getParent(), before); + // Since it's created from the end, the i == 0 is the last loop + BasicBlock* lastSendBB = (i == 0) ? BasicBlock::Create(context, "last_send", BB->getParent(), before) : nullptr; - auto send = createResLoopIter(CI, partialCheckBB, before, mergeBB); + auto send = createResLoopIter(CI, partialCheckBB, lastSendBB, before, mergeBB); - PN->addIncoming(send, partialCheckBB); + PN->addIncoming(send, lastSendBB ? lastSendBB : partialCheckBB); before = partialCheckBB; } diff --git a/IGC/Compiler/tests/ResourceLoopUnroll/ResourceloopUnrollNestedLsc.ll b/IGC/Compiler/tests/ResourceLoopUnroll/ResourceloopUnrollNestedLsc.ll index 071e550bc087..082c6a8d1bce 100644 --- a/IGC/Compiler/tests/ResourceLoopUnroll/ResourceloopUnrollNestedLsc.ll +++ b/IGC/Compiler/tests/ResourceLoopUnroll/ResourceloopUnrollNestedLsc.ll @@ -7,62 +7,138 @@ ; ;============================ end_copyright_notice ============================= ; REQUIRES: llvm-14-plus, regkeys -; RUN: igc_opt -platformbmg -igc-resource-loop-unroll -regkey ResourceLoopUnrollNested=4 -verify -S < %s | FileCheck %s +; RUN: igc_opt -platformbmg -igc-resource-loop-unroll -regkey ResourceLoopUnrollNested=4 -verify -S < %s | FileCheck %s --check-prefix=CHECK-LL +; RUN: igc_opt -platformbmg -igc-resource-loop-unroll -igc-emit-visa -simd-mode 16 -inputrt -regkey ResourceLoopUnrollNested=4 -regkey DumpVISAASMToConsole -S < %s | FileCheck %s --check-prefix=CHECK-VISAASM ; ; Test checks how we emit ResourceLoop - @ThreadGroupSize_X = constant i32 64 @ThreadGroupSize_Y = constant i32 1 @ThreadGroupSize_Z = constant i32 1 define spir_kernel void @test1(i32 %src1, i32 %val, i32 addrspace(1)* %dst) { -; CHECK-LABEL: @test1( -; CHECK-NEXT: [[SVN:%.*]] = call i16 @llvm.genx.GenISA.DCL.SystemValue.i16(i32 17) -; CHECK-NEXT: [[NONUNIFORM:%.*]] = zext i16 [[SVN]] to i32 -; CHECK-NEXT: [[NONUNIFORMRESOURCE:%.*]] = inttoptr i32 [[NONUNIFORM]] to <4 x float> addrspace(2621440)* -; CHECK-NEXT: [[OFFSET:%.*]] = add i32 [[SRC1:%.*]], 1 -; CHECK-NEXT: br label [[PARTIAL_CHECK5:%.*]] -; CHECK: partial_check5: -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.genx.GenISA.WaveBallot(i1 true, i32 0) -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.genx.GenISA.firstbitLo(i32 [[TMP1]]) -; CHECK-NEXT: [[FIRSTACTIVERES6:%.*]] = call <4 x float> addrspace(2621440)* @llvm.genx.GenISA.WaveShuffleIndex.p2621440v4f32(<4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], i32 [[TMP2]], i32 0) -; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], [[FIRSTACTIVERES6]] -; CHECK-NEXT: [[TMP4:%.*]] = call <3 x i32> @llvm.genx.GenISA.ldrawvector.indexed.v3i32.p2621440v4f32(<4 x float> addrspace(2621440)* [[FIRSTACTIVERES6]], i32 [[OFFSET]], i32 4, i1 false) -; CHECK-NEXT: br i1 [[TMP3]], label [[UNROLL_MERGE:%.*]], label [[PARTIAL_CHECK3:%.*]] -; CHECK: partial_check3: -; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.genx.GenISA.WaveBallot(i1 true, i32 0) -; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.genx.GenISA.firstbitLo(i32 [[TMP5]]) -; CHECK-NEXT: [[FIRSTACTIVERES4:%.*]] = call <4 x float> addrspace(2621440)* @llvm.genx.GenISA.WaveShuffleIndex.p2621440v4f32(<4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], i32 [[TMP6]], i32 0) -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], [[FIRSTACTIVERES4]] -; CHECK-NEXT: [[TMP8:%.*]] = call <3 x i32> @llvm.genx.GenISA.ldrawvector.indexed.v3i32.p2621440v4f32(<4 x float> addrspace(2621440)* [[FIRSTACTIVERES4]], i32 [[OFFSET]], i32 4, i1 false) -; CHECK-NEXT: br i1 [[TMP7]], label [[UNROLL_MERGE]], label [[PARTIAL_CHECK1:%.*]] -; CHECK: partial_check1: -; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.genx.GenISA.WaveBallot(i1 true, i32 0) -; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.genx.GenISA.firstbitLo(i32 [[TMP9]]) -; CHECK-NEXT: [[FIRSTACTIVERES2:%.*]] = call <4 x float> addrspace(2621440)* @llvm.genx.GenISA.WaveShuffleIndex.p2621440v4f32(<4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], i32 [[TMP10]], i32 0) -; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], [[FIRSTACTIVERES2]] -; CHECK-NEXT: [[TMP12:%.*]] = call <3 x i32> @llvm.genx.GenISA.ldrawvector.indexed.v3i32.p2621440v4f32(<4 x float> addrspace(2621440)* [[FIRSTACTIVERES2]], i32 [[OFFSET]], i32 4, i1 false) -; CHECK-NEXT: br i1 [[TMP11]], label [[UNROLL_MERGE]], label [[PARTIAL_CHECK:%.*]] -; CHECK: partial_check: -; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.genx.GenISA.WaveBallot(i1 true, i32 0) -; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.genx.GenISA.firstbitLo(i32 [[TMP13]]) -; CHECK-NEXT: [[FIRSTACTIVERES:%.*]] = call <4 x float> addrspace(2621440)* @llvm.genx.GenISA.WaveShuffleIndex.p2621440v4f32(<4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], i32 [[TMP14]], i32 0) -; CHECK-NEXT: [[TMP15:%.*]] = icmp eq <4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], [[FIRSTACTIVERES]] -; CHECK-NEXT: [[TMP16:%.*]] = call <3 x i32> @llvm.genx.GenISA.ldrawvector.indexed.v3i32.p2621440v4f32(<4 x float> addrspace(2621440)* [[FIRSTACTIVERES]], i32 [[OFFSET]], i32 4, i1 false) -; CHECK-NEXT: br i1 [[TMP15]], label [[UNROLL_MERGE]], label [[LATCH:%.*]] -; CHECK: latch: -; CHECK-NEXT: br label [[PARTIAL_CHECK5]] -; CHECK: unroll-merge: -; CHECK-NEXT: [[TMP17:%.*]] = phi <3 x i32> [ [[TMP16]], [[PARTIAL_CHECK]] ], [ [[TMP12]], [[PARTIAL_CHECK1]] ], [ [[TMP8]], [[PARTIAL_CHECK3]] ], [ [[TMP4]], [[PARTIAL_CHECK5]] ], !MyUniqueExclusiveLoadMetadata !24 -; CHECK-NEXT: [[OUT:%.*]] = extractelement <3 x i32> [[TMP17]], i32 [[VAL:%.*]] -; CHECK-NEXT: store i32 [[OUT]], i32 addrspace(1)* [[DST:%.*]], align 1 -; CHECK-NEXT: ret void +; CHECK-LL-LABEL: @test1( +; CHECK-LL: [[SVN:%.*]] = call i16 @llvm.genx.GenISA.DCL.SystemValue.i16(i32 17) +; CHECK-LL-NEXT: [[NONUNIFORM:%.*]] = zext i16 [[SVN]] to i32 +; CHECK-LL-NEXT: [[NONUNIFORMRESOURCE:%.*]] = inttoptr i32 [[NONUNIFORM]] to <4 x float> addrspace(2621440)* +; CHECK-LL-NEXT: [[OFFSET:%.*]] = add i32 [[SRC1:%.*]], %nonuniform +; CHECK-LL-NEXT: br label [[PARTIAL_CHECK5:%.*]] +; CHECK-LL: partial_check5: +; CHECK-LL-NEXT: [[TMP1:%.*]] = call i32 @llvm.genx.GenISA.WaveBallot(i1 true, i32 0) +; CHECK-LL-NEXT: [[TMP2:%.*]] = call i32 @llvm.genx.GenISA.firstbitLo(i32 [[TMP1]]) +; CHECK-LL-NEXT: [[FIRSTACTIVERES6:%.*]] = call <4 x float> addrspace(2621440)* @llvm.genx.GenISA.WaveShuffleIndex.p2621440v4f32(<4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], i32 [[TMP2]], i32 0) +; CHECK-LL-NEXT: [[TMP3:%.*]] = icmp eq <4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], [[FIRSTACTIVERES6]] +; CHECK-LL-NEXT: [[TMP4:%.*]] = call <3 x i32> @llvm.genx.GenISA.ldrawvector.indexed.v3i32.p2621440v4f32(<4 x float> addrspace(2621440)* [[FIRSTACTIVERES6]], i32 [[OFFSET]], i32 4, i1 false) +; CHECK-LL-NEXT: br i1 [[TMP3]], label [[UNROLL_MERGE:%.*]], label [[PARTIAL_CHECK3:%.*]] +; CHECK-LL: partial_check3: +; CHECK-LL-NEXT: [[TMP5:%.*]] = call i32 @llvm.genx.GenISA.WaveBallot(i1 true, i32 0) +; CHECK-LL-NEXT: [[TMP6:%.*]] = call i32 @llvm.genx.GenISA.firstbitLo(i32 [[TMP5]]) +; CHECK-LL-NEXT: [[FIRSTACTIVERES4:%.*]] = call <4 x float> addrspace(2621440)* @llvm.genx.GenISA.WaveShuffleIndex.p2621440v4f32(<4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], i32 [[TMP6]], i32 0) +; CHECK-LL-NEXT: [[TMP7:%.*]] = icmp eq <4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], [[FIRSTACTIVERES4]] +; CHECK-LL-NEXT: [[TMP8:%.*]] = call <3 x i32> @llvm.genx.GenISA.ldrawvector.indexed.v3i32.p2621440v4f32(<4 x float> addrspace(2621440)* [[FIRSTACTIVERES4]], i32 [[OFFSET]], i32 4, i1 false) +; CHECK-LL-NEXT: br i1 [[TMP7]], label [[UNROLL_MERGE:%.*]], label [[PARTIAL_CHECK1:%.*]] +; CHECK-LL: partial_check1: +; CHECK-LL-NEXT: [[TMP9:%.*]] = call i32 @llvm.genx.GenISA.WaveBallot(i1 true, i32 0) +; CHECK-LL-NEXT: [[TMP10:%.*]] = call i32 @llvm.genx.GenISA.firstbitLo(i32 [[TMP9]]) +; CHECK-LL-NEXT: [[FIRSTACTIVERES2:%.*]] = call <4 x float> addrspace(2621440)* @llvm.genx.GenISA.WaveShuffleIndex.p2621440v4f32(<4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], i32 [[TMP10]], i32 0) +; CHECK-LL-NEXT: [[TMP11:%.*]] = icmp eq <4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], [[FIRSTACTIVERES2]] +; CHECK-LL-NEXT: [[TMP12:%.*]] = call <3 x i32> @llvm.genx.GenISA.ldrawvector.indexed.v3i32.p2621440v4f32(<4 x float> addrspace(2621440)* [[FIRSTACTIVERES2]], i32 [[OFFSET]], i32 4, i1 false) +; CHECK-LL-NEXT: br i1 [[TMP11]], label [[UNROLL_MERGE:%.*]], label [[PARTIAL_CHECK:%.*]] +; CHECK-LL: partial_check: +; CHECK-LL-NEXT: [[TMP13:%.*]] = call i32 @llvm.genx.GenISA.WaveBallot(i1 true, i32 0) +; CHECK-LL-NEXT: [[TMP14:%.*]] = call i32 @llvm.genx.GenISA.firstbitLo(i32 [[TMP13]]) +; CHECK-LL-NEXT: [[FIRSTACTIVERES:%.*]] = call <4 x float> addrspace(2621440)* @llvm.genx.GenISA.WaveShuffleIndex.p2621440v4f32(<4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], i32 [[TMP14]], i32 0) +; CHECK-LL-NEXT: [[TMP15:%.*]] = icmp eq <4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], [[FIRSTACTIVERES]] +; CHECK-LL-NEXT: br i1 [[TMP15]], label [[LAST_SEND:%.*]], label [[LATCH:%.*]] +; CHECK-LL: last_send: +; CHECK-LL-NEXT: [[TMP16:%.*]] = call <3 x i32> @llvm.genx.GenISA.ldrawvector.indexed.v3i32.p2621440v4f32(<4 x float> addrspace(2621440)* [[FIRSTACTIVERES]], i32 [[OFFSET]], i32 4, i1 false) +; CHECK-LL-NEXT: br label [[UNROLL_MERGE]] +; CHECK-LL: latch: +; CHECK-LL-NEXT: br label [[PARTIAL_CHECK5]] +; CHECK-LL: unroll-merge: +; CHECK-LL-NEXT: [[TMP17:%.*]] = phi <3 x i32> [ [[TMP16]], [[LAST_SEND]] ], [ [[TMP12]], [[PARTIAL_CHECK1]] ], [ [[TMP8]], [[PARTIAL_CHECK3]] ], [ [[TMP4]], [[PARTIAL_CHECK5]] ], !MyUniqueExclusiveLoadMetadata !24 +; CHECK-LL-NEXT: [[OUT:%.*]] = extractelement <3 x i32> [[TMP17]], i32 [[VAL:%.*]] +; CHECK-LL-NEXT: store i32 [[OUT]], i32 addrspace(1)* [[DST:%.*]], align 1 +; CHECK-LL-NEXT: ret void +; +; COM: check predicate load and lifetime.start +; CHECK-VISAASM: _main_0: +; CHECK-VISAASM-NEXT: mov (M1, 16) svn(0,0)<1> threadIdInGroupX(0,0)<1;1,0> +; CHECK-VISAASM-NEXT: mov (M1, 16) nonuniform(0,0)<1> svn_0(0,0)<1;1,0> +; CHECK-VISAASM-NEXT: add (M1, 16) offset(0,0)<1> src1(0,0)<0;1,0> nonuniform(0,0)<1;1,0> +; +; CHECK-VISAASM: _test1_001_partial_check5: +; CHECK-VISAASM-NEXT: setp (M1_NM, 16) P1 0x0:ud +; CHECK-VISAASM-NEXT: cmp.eq (M1, 16) P1 V0034(0,0)<0;1,0> V0034(0,0)<0;1,0> +; CHECK-VISAASM-NEXT: mov (M1_NM, 1) V0035(0,0)<1> P1 +; CHECK-VISAASM-NEXT: mov (M1_NM, 1) V0033(0,0)<1> V0035(0,0)<0;1,0> +; CHECK-VISAASM-NEXT: fbl (M1_NM, 1) V0037(0,0)<1> V0033(0,0)<0;1,0> +; CHECK-VISAASM-NEXT: shl (M1_NM, 1) ShuffleTmp(0,0)<1> V0038(0,0)<0;1,0> 0x2:uw +; CHECK-VISAASM-NEXT: addr_add (M1_NM, 1) A0(0)<1> &nonuniform_0 ShuffleTmp(0,0)<0;1,0> +; CHECK-VISAASM-NEXT: mov (M1_NM, 1) firstActiveRes6(0,0)<1> r[A0(0),0]<0;1,0>:ud +; CHECK-VISAASM-NEXT: cmp.eq (M1, 16) P2 nonuniform_0(0,0)<1;1,0> firstActiveRes6(0,0)<0;1,0> +; CHECK-VISAASM-NEXT: lifetime.start V0039 +; CHECK-VISAASM-NEXT: (P2) lsc_load.ugm.ca.ca (M1, 16) V0039:d32x3 bss(firstActiveRes6)[offset]:a32 +; CHECK-VISAASM-NEXT: (P2) goto (M1, 16) _test1_007_unroll_merge +; +; CHECK-VISAASM: _test1_002_partial_check3: +; CHECK-VISAASM-NEXT: setp (M1_NM, 16) P3 0x0:ud +; CHECK-VISAASM-NEXT: cmp.eq (M1, 16) P3 V0042(0,0)<0;1,0> V0042(0,0)<0;1,0> +; CHECK-VISAASM-NEXT: mov (M1_NM, 1) V0043(0,0)<1> P3 +; CHECK-VISAASM-NEXT: mov (M1_NM, 1) V0041(0,0)<1> V0043(0,0)<0;1,0> +; CHECK-VISAASM-NEXT: fbl (M1_NM, 1) V0045(0,0)<1> V0041(0,0)<0;1,0> +; CHECK-VISAASM-NEXT: shl (M1_NM, 1) ShuffleTmp_0(0,0)<1> V0046(0,0)<0;1,0> 0x2:uw +; CHECK-VISAASM-NEXT: addr_add (M1_NM, 1) A1(0)<1> &nonuniform_0 ShuffleTmp_0(0,0)<0;1,0> +; CHECK-VISAASM-NEXT: mov (M1_NM, 1) firstActiveRes4(0,0)<1> r[A1(0),0]<0;1,0>:ud +; CHECK-VISAASM-NEXT: cmp.eq (M1, 16) P4 nonuniform_0(0,0)<1;1,0> firstActiveRes4(0,0)<0;1,0> +; CHECK-VISAASM-NEXT: lifetime.start V0039 +; CHECK-VISAASM-NEXT: (P4) lsc_load.ugm.ca.ca (M1, 16) V0039:d32x3 bss(firstActiveRes4)[offset]:a32 +; CHECK-VISAASM-NEXT: (P4) goto (M1, 16) _test1_007_unroll_merge +; +; CHECK-VISAASM: _test1_003_partial_check1: +; CHECK-VISAASM-NEXT: setp (M1_NM, 16) P5 0x0:ud +; CHECK-VISAASM-NEXT: cmp.eq (M1, 16) P5 V0049(0,0)<0;1,0> V0049(0,0)<0;1,0> +; CHECK-VISAASM-NEXT: mov (M1_NM, 1) V0050(0,0)<1> P5 +; CHECK-VISAASM-NEXT: mov (M1_NM, 1) V0048(0,0)<1> V0050(0,0)<0;1,0> +; CHECK-VISAASM-NEXT: fbl (M1_NM, 1) V0052(0,0)<1> V0048(0,0)<0;1,0> +; CHECK-VISAASM-NEXT: shl (M1_NM, 1) ShuffleTmp_1(0,0)<1> V0053(0,0)<0;1,0> 0x2:uw +; CHECK-VISAASM-NEXT: addr_add (M1_NM, 1) A2(0)<1> &nonuniform_0 ShuffleTmp_1(0,0)<0;1,0> +; CHECK-VISAASM-NEXT: mov (M1_NM, 1) firstActiveRes2(0,0)<1> r[A2(0),0]<0;1,0>:ud +; CHECK-VISAASM-NEXT: cmp.eq (M1, 16) P6 nonuniform_0(0,0)<1;1,0> firstActiveRes2(0,0)<0;1,0> +; CHECK-VISAASM-NEXT: lifetime.start V0039 +; CHECK-VISAASM-NEXT: (P6) lsc_load.ugm.ca.ca (M1, 16) V0039:d32x3 bss(firstActiveRes2)[offset]:a32 +; CHECK-VISAASM-NEXT: (P6) goto (M1, 16) _test1_007_unroll_merge +; +; CHECK-VISAASM: _test1_004_partial_check: +; CHECK-VISAASM-NEXT: setp (M1_NM, 16) P7 0x0:ud +; CHECK-VISAASM-NEXT: cmp.eq (M1, 16) P7 V0056(0,0)<0;1,0> V0056(0,0)<0;1,0> +; CHECK-VISAASM-NEXT: mov (M1_NM, 1) V0057(0,0)<1> P7 +; CHECK-VISAASM-NEXT: mov (M1_NM, 1) V0055(0,0)<1> V0057(0,0)<0;1,0> +; CHECK-VISAASM-NEXT: fbl (M1_NM, 1) V0059(0,0)<1> V0055(0,0)<0;1,0> +; CHECK-VISAASM-NEXT: shl (M1_NM, 1) ShuffleTmp_2(0,0)<1> V0060(0,0)<0;1,0> 0x2:uw +; CHECK-VISAASM-NEXT: addr_add (M1_NM, 1) A3(0)<1> &nonuniform_0 ShuffleTmp_2(0,0)<0;1,0> +; CHECK-VISAASM-NEXT: mov (M1_NM, 1) firstActiveRes(0,0)<1> r[A3(0),0]<0;1,0>:ud +; CHECK-VISAASM-NEXT: cmp.eq (M1, 16) P8 nonuniform_0(0,0)<1;1,0> firstActiveRes(0,0)<0;1,0> +; CHECK-VISAASM-NEXT: (!P8) goto (M1, 16) _test1_001_partial_check5 ; +; CHECK-VISAASM: _test1_005_last_send: +; CHECK-VISAASM-NEXT: lifetime.start V0039 +; CHECK-VISAASM-NEXT: (P8) lsc_load.ugm.ca.ca (M1, 16) V0039:d32x3 bss(firstActiveRes)[offset]:a32 +; +; CHECK-VISAASM: _test1_007_unroll_merge: +; CHECK-VISAASM-NEXT: mul (M1_NM, 1) V0061(0,0)<1> val_0(0,0)<0;1,0> 0x40:uw +; CHECK-VISAASM-NEXT: addr_add (M1_NM, 1) A4(0)<1> &V0039 V0061(0,0)<0;1,0> +; CHECK-VISAASM-NEXT: mov (M1, 16) out(0,0)<1> r[A4(0),0]<8;8,1>:d +; CHECK-VISAASM-NEXT: mov (M1_NM, 1) dst_0(0,0)<1> dst(0,0)<0;1,0> +; CHECK-VISAASM-NEXT: mov (M1, 16) dstBroadcast_0(0,0)<2> dst_1(0,0)<0;1,0> +; CHECK-VISAASM-NEXT: mov (M1, 16) dstBroadcast_0(0,1)<2> dst_1(0,1)<0;1,0> +; CHECK-VISAASM-NEXT: lsc_store.ugm.wb.wb (M1, 16) flat[dstBroadcast]:a64 out:d32 +; CHECK-VISAASM-NEXT: ret (M1, 1) + %svn = call i16 @llvm.genx.GenISA.DCL.SystemValue.i16(i32 17) %nonuniform = zext i16 %svn to i32 %NonUniformResource = inttoptr i32 %nonuniform to <4 x float> addrspace(2621440)* - %offset = add i32 %src1, 1 + %offset = add i32 %src1, %nonuniform %call = call <3 x i32> @llvm.genx.GenISA.ldrawvector.indexed.v3i32.p2621440v4f32(<4 x float> addrspace(2621440)* %NonUniformResource, i32 %offset, i32 4, i1 false) @@ -71,8 +147,6 @@ define spir_kernel void @test1(i32 %src1, i32 %val, i32 addrspace(1)* %dst) { ret void } - - declare <3 x i32> @llvm.genx.GenISA.ldrawvector.indexed.v3i32.p2621440v4f32(<4 x float> addrspace(2621440)*, i32, i32, i1) #4 declare i16 @llvm.genx.GenISA.DCL.SystemValue.i16(i32) #1 @@ -84,7 +158,6 @@ declare i32 @llvm.genx.GenISA.firstbitLo(i32) attributes #4 = { argmemonly nounwind readonly } - !IGCMetadata = !{!0} !igc.functions = !{!21} @@ -112,4 +185,3 @@ attributes #4 = { argmemonly nounwind readonly } !21 = !{void (i32, i32, i32 addrspace(1)*)* @test1, !22} !22 = !{!23} !23 = !{!"function_type", i32 0} - diff --git a/IGC/Compiler/tests/ResourceLoopUnroll/ResourceloopUnrollNestedSampler.ll b/IGC/Compiler/tests/ResourceLoopUnroll/ResourceloopUnrollNestedSampler.ll index 372544f88a26..d1baf1cf095d 100644 --- a/IGC/Compiler/tests/ResourceLoopUnroll/ResourceloopUnrollNestedSampler.ll +++ b/IGC/Compiler/tests/ResourceLoopUnroll/ResourceloopUnrollNestedSampler.ll @@ -7,11 +7,11 @@ ; ;============================ end_copyright_notice ============================= ; REQUIRES: llvm-14-plus, regkeys -; RUN: igc_opt -platformbmg -igc-resource-loop-unroll -regkey ResourceLoopUnrollNested=4 -verify -S < %s | FileCheck %s +; RUN: igc_opt -platformbmg -igc-resource-loop-unroll -regkey ResourceLoopUnrollNested=4 -verify -S < %s | FileCheck %s --check-prefix=CHECK-LL +; RUN: igc_opt -platformbmg -igc-resource-loop-unroll -igc-emit-visa -simd-mode 16 -inputrt -regkey ResourceLoopUnrollNested=4 -regkey DumpVISAASMToConsole -S < %s | FileCheck %s --check-prefix=CHECK-VISAASM ; ; Test checks how we emit ResourceLoop - @ThreadGroupSize_X = constant i32 64 @ThreadGroupSize_Y = constant i32 1 @ThreadGroupSize_Z = constant i32 1 @@ -19,51 +19,137 @@ %__2D_DIM_Resource = type opaque define spir_kernel void @test1(<64 x i32> %src, float addrspace(1)* %dst) { -; CHECK-LABEL: @test1( -; CHECK-NEXT: [[SVN0:%.*]] = call i16 @llvm.genx.GenISA.DCL.SystemValue.i16(i32 7) -; CHECK-NEXT: [[SAMPLER:%.*]] = zext i16 [[SVN0]] to i32 -; CHECK-NEXT: [[NONUNIFORMSAMPLER:%.*]] = inttoptr i32 [[SAMPLER]] to <4 x float> addrspace(2752518)* -; CHECK-NEXT: [[SVN1:%.*]] = extractelement <64 x i32> %src, i32 40 -; CHECK-NEXT: [[TEXTURE:%.*]] = add i32 %svn1, 1280 -; CHECK-NEXT: [[NONUNIFORMTEXTURE:%.*]] = inttoptr i32 [[TEXTURE]] to %__2D_DIM_Resource.0 addrspace(2621450)* -; CHECK-NEXT: br label [[PARTIAL_CHECK5:%.*]] -; CHECK: partial_check5: -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.genx.GenISA.WaveBallot(i1 true, i32 0) -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.genx.GenISA.firstbitLo(i32 [[TMP1]]) -; CHECK-NEXT: [[FIRSTACTIVESAMPLER6:%.*]] = call <4 x float> addrspace(2752518)* @llvm.genx.GenISA.WaveShuffleIndex.p2752518v4f32(<4 x float> addrspace(2752518)* [[NONUNIFORMSAMPLER]], i32 [[TMP2]], i32 0) -; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x float> addrspace(2752518)* [[NONUNIFORMSAMPLER]], [[FIRSTACTIVESAMPLER6]] -; CHECK-NEXT: [[TMP4:%.*]] = tail call fast <4 x float> @llvm.genx.GenISA.sampleLptr.v4f32.f32.p2621443__2D_DIM_Resource.p2621443__2D_DIM_Resource.p2752518v4f32(float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, %__2D_DIM_Resource.0 addrspace(2621450)* undef, %__2D_DIM_Resource.0 addrspace(2621450)* [[NONUNIFORMTEXTURE]], <4 x float> addrspace(2752518)* [[FIRSTACTIVESAMPLER6]], i32 0, i32 0, i32 0) -; CHECK-NEXT: br i1 [[TMP3]], label [[UNROLL_MERGE:%.*]], label [[PARTIAL_CHECK3:%.*]] -; CHECK: partial_check3: -; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.genx.GenISA.WaveBallot(i1 true, i32 0) -; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.genx.GenISA.firstbitLo(i32 [[TMP5]]) -; CHECK-NEXT: [[FIRSTACTIVESAMPLER4:%.*]] = call <4 x float> addrspace(2752518)* @llvm.genx.GenISA.WaveShuffleIndex.p2752518v4f32(<4 x float> addrspace(2752518)* [[NONUNIFORMSAMPLER]], i32 [[TMP6]], i32 0) -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x float> addrspace(2752518)* [[NONUNIFORMSAMPLER]], [[FIRSTACTIVESAMPLER4]] -; CHECK-NEXT: [[TMP8:%.*]] = tail call fast <4 x float> @llvm.genx.GenISA.sampleLptr.v4f32.f32.p2621443__2D_DIM_Resource.p2621443__2D_DIM_Resource.p2752518v4f32(float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, %__2D_DIM_Resource.0 addrspace(2621450)* undef, %__2D_DIM_Resource.0 addrspace(2621450)* [[NONUNIFORMTEXTURE]], <4 x float> addrspace(2752518)* [[FIRSTACTIVESAMPLER4]], i32 0, i32 0, i32 0) -; CHECK-NEXT: br i1 [[TMP7]], label [[UNROLL_MERGE]], label [[PARTIAL_CHECK1:%.*]] -; CHECK: partial_check1: -; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.genx.GenISA.WaveBallot(i1 true, i32 0) -; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.genx.GenISA.firstbitLo(i32 [[TMP9]]) -; CHECK-NEXT: [[FIRSTACTIVESAMPLER2:%.*]] = call <4 x float> addrspace(2752518)* @llvm.genx.GenISA.WaveShuffleIndex.p2752518v4f32(<4 x float> addrspace(2752518)* [[NONUNIFORMSAMPLER]], i32 [[TMP10]], i32 0) -; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <4 x float> addrspace(2752518)* [[NONUNIFORMSAMPLER]], [[FIRSTACTIVESAMPLER2]] -; CHECK-NEXT: [[TMP12:%.*]] = tail call fast <4 x float> @llvm.genx.GenISA.sampleLptr.v4f32.f32.p2621443__2D_DIM_Resource.p2621443__2D_DIM_Resource.p2752518v4f32(float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, %__2D_DIM_Resource.0 addrspace(2621450)* undef, %__2D_DIM_Resource.0 addrspace(2621450)* [[NONUNIFORMTEXTURE]], <4 x float> addrspace(2752518)* [[FIRSTACTIVESAMPLER2]], i32 0, i32 0, i32 0) -; CHECK-NEXT: br i1 [[TMP11]], label [[UNROLL_MERGE]], label [[PARTIAL_CHECK:%.*]] -; CHECK: partial_check: -; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.genx.GenISA.WaveBallot(i1 true, i32 0) -; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.genx.GenISA.firstbitLo(i32 [[TMP13]]) -; CHECK-NEXT: [[FIRSTACTIVESAMPLER:%.*]] = call <4 x float> addrspace(2752518)* @llvm.genx.GenISA.WaveShuffleIndex.p2752518v4f32(<4 x float> addrspace(2752518)* [[NONUNIFORMSAMPLER]], i32 [[TMP14]], i32 0) -; CHECK-NEXT: [[TMP15:%.*]] = icmp eq <4 x float> addrspace(2752518)* [[NONUNIFORMSAMPLER]], [[FIRSTACTIVESAMPLER]] -; CHECK-NEXT: [[TMP16:%.*]] = tail call fast <4 x float> @llvm.genx.GenISA.sampleLptr.v4f32.f32.p2621443__2D_DIM_Resource.p2621443__2D_DIM_Resource.p2752518v4f32(float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, %__2D_DIM_Resource.0 addrspace(2621450)* undef, %__2D_DIM_Resource.0 addrspace(2621450)* [[NONUNIFORMTEXTURE]], <4 x float> addrspace(2752518)* [[FIRSTACTIVESAMPLER]], i32 0, i32 0, i32 0) -; CHECK-NEXT: br i1 [[TMP15]], label [[UNROLL_MERGE]], label [[LATCH:%.*]] -; CHECK: latch: -; CHECK-NEXT: br label [[PARTIAL_CHECK5]] -; CHECK: unroll-merge: -; CHECK-NEXT: [[TMP17:%.*]] = phi <4 x float> [ [[TMP16]], [[PARTIAL_CHECK]] ], [ [[TMP12]], [[PARTIAL_CHECK1]] ], [ [[TMP8]], [[PARTIAL_CHECK3]] ], [ [[TMP4]], [[PARTIAL_CHECK5]] ], !MyUniqueExclusiveLoadMetadata !24 -; CHECK-NEXT: [[OUT:%.*]] = extractelement <4 x float> [[TMP17]], i32 0 -; CHECK-NEXT: store float [[OUT]], float addrspace(1)* [[DST:%.*]], align 4 -; CHECK-NEXT: ret void +; CHECK-LL-LABEL: @test1( +; CHECK-LL-NEXT: [[SVN0:%.*]] = call i16 @llvm.genx.GenISA.DCL.SystemValue.i16(i32 17) +; CHECK-LL-NEXT: [[SAMPLER:%.*]] = zext i16 [[SVN0]] to i32 +; CHECK-LL-NEXT: [[NONUNIFORMSAMPLER:%.*]] = inttoptr i32 [[SAMPLER]] to <4 x float> addrspace(2752518)* +; CHECK-LL-NEXT: [[SVN1:%.*]] = extractelement <64 x i32> %src, i32 40 +; CHECK-LL-NEXT: [[TEXTURE:%.*]] = add i32 %svn1, 1280 +; CHECK-LL-NEXT: [[NONUNIFORMTEXTURE:%.*]] = inttoptr i32 [[TEXTURE]] to %__2D_DIM_Resource.0 addrspace(2621450)* +; CHECK-LL-NEXT: br label [[PARTIAL_CHECK5:%.*]] +; CHECK-LL: partial_check5: +; CHECK-LL-NEXT: [[TMP1:%.*]] = call i32 @llvm.genx.GenISA.WaveBallot(i1 true, i32 0) +; CHECK-LL-NEXT: [[TMP2:%.*]] = call i32 @llvm.genx.GenISA.firstbitLo(i32 [[TMP1]]) +; CHECK-LL-NEXT: [[FIRSTACTIVESAMPLER6:%.*]] = call <4 x float> addrspace(2752518)* @llvm.genx.GenISA.WaveShuffleIndex.p2752518v4f32(<4 x float> addrspace(2752518)* [[NONUNIFORMSAMPLER]], i32 [[TMP2]], i32 0) +; CHECK-LL-NEXT: [[TMP3:%.*]] = icmp eq <4 x float> addrspace(2752518)* [[NONUNIFORMSAMPLER]], [[FIRSTACTIVESAMPLER6]] +; CHECK-LL-NEXT: [[TMP4:%.*]] = tail call fast <4 x float> @llvm.genx.GenISA.sampleLptr.v4f32.f32.p2621443__2D_DIM_Resource.p2621443__2D_DIM_Resource.p2752518v4f32(float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, %__2D_DIM_Resource.0 addrspace(2621450)* undef, %__2D_DIM_Resource.0 addrspace(2621450)* [[NONUNIFORMTEXTURE]], <4 x float> addrspace(2752518)* [[FIRSTACTIVESAMPLER6]], i32 0, i32 0, i32 0) +; CHECK-LL-NEXT: br i1 [[TMP3]], label [[UNROLL_MERGE:%.*]], label [[PARTIAL_CHECK3:%.*]] +; CHECK-LL: partial_check3: +; CHECK-LL-NEXT: [[TMP5:%.*]] = call i32 @llvm.genx.GenISA.WaveBallot(i1 true, i32 0) +; CHECK-LL-NEXT: [[TMP6:%.*]] = call i32 @llvm.genx.GenISA.firstbitLo(i32 [[TMP5]]) +; CHECK-LL-NEXT: [[FIRSTACTIVESAMPLER4:%.*]] = call <4 x float> addrspace(2752518)* @llvm.genx.GenISA.WaveShuffleIndex.p2752518v4f32(<4 x float> addrspace(2752518)* [[NONUNIFORMSAMPLER]], i32 [[TMP6]], i32 0) +; CHECK-LL-NEXT: [[TMP7:%.*]] = icmp eq <4 x float> addrspace(2752518)* [[NONUNIFORMSAMPLER]], [[FIRSTACTIVESAMPLER4]] +; CHECK-LL-NEXT: [[TMP8:%.*]] = tail call fast <4 x float> @llvm.genx.GenISA.sampleLptr.v4f32.f32.p2621443__2D_DIM_Resource.p2621443__2D_DIM_Resource.p2752518v4f32(float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, %__2D_DIM_Resource.0 addrspace(2621450)* undef, %__2D_DIM_Resource.0 addrspace(2621450)* [[NONUNIFORMTEXTURE]], <4 x float> addrspace(2752518)* [[FIRSTACTIVESAMPLER4]], i32 0, i32 0, i32 0) +; CHECK-LL-NEXT: br i1 [[TMP7]], label [[UNROLL_MERGE:%.*]], label [[PARTIAL_CHECK1:%.*]] +; CHECK-LL: partial_check1: +; CHECK-LL-NEXT: [[TMP9:%.*]] = call i32 @llvm.genx.GenISA.WaveBallot(i1 true, i32 0) +; CHECK-LL-NEXT: [[TMP10:%.*]] = call i32 @llvm.genx.GenISA.firstbitLo(i32 [[TMP9]]) +; CHECK-LL-NEXT: [[FIRSTACTIVESAMPLER2:%.*]] = call <4 x float> addrspace(2752518)* @llvm.genx.GenISA.WaveShuffleIndex.p2752518v4f32(<4 x float> addrspace(2752518)* [[NONUNIFORMSAMPLER]], i32 [[TMP10]], i32 0) +; CHECK-LL-NEXT: [[TMP11:%.*]] = icmp eq <4 x float> addrspace(2752518)* [[NONUNIFORMSAMPLER]], [[FIRSTACTIVESAMPLER2]] +; CHECK-LL-NEXT: [[TMP12:%.*]] = tail call fast <4 x float> @llvm.genx.GenISA.sampleLptr.v4f32.f32.p2621443__2D_DIM_Resource.p2621443__2D_DIM_Resource.p2752518v4f32(float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, %__2D_DIM_Resource.0 addrspace(2621450)* undef, %__2D_DIM_Resource.0 addrspace(2621450)* [[NONUNIFORMTEXTURE]], <4 x float> addrspace(2752518)* [[FIRSTACTIVESAMPLER2]], i32 0, i32 0, i32 0) +; CHECK-LL-NEXT: br i1 [[TMP11]], label [[UNROLL_MERGE:%.*]], label [[PARTIAL_CHECK:%.*]] +; CHECK-LL: partial_check: +; CHECK-LL-NEXT: [[TMP13:%.*]] = call i32 @llvm.genx.GenISA.WaveBallot(i1 true, i32 0) +; CHECK-LL-NEXT: [[TMP14:%.*]] = call i32 @llvm.genx.GenISA.firstbitLo(i32 [[TMP13]]) +; CHECK-LL-NEXT: [[FIRSTACTIVESAMPLER:%.*]] = call <4 x float> addrspace(2752518)* @llvm.genx.GenISA.WaveShuffleIndex.p2752518v4f32(<4 x float> addrspace(2752518)* [[NONUNIFORMSAMPLER]], i32 [[TMP14]], i32 0) +; CHECK-LL-NEXT: [[TMP15:%.*]] = icmp eq <4 x float> addrspace(2752518)* [[NONUNIFORMSAMPLER]], [[FIRSTACTIVESAMPLER]] +; CHECK-LL-NEXT: br i1 [[TMP15]], label [[LAST_SEND:%.*]], label [[LATCH:%.*]] +; CHECK-LL: last_send: +; CHECK-LL-NEXT: [[TMP16:%.*]] = tail call fast <4 x float> @llvm.genx.GenISA.sampleLptr.v4f32.f32.p2621443__2D_DIM_Resource.p2621443__2D_DIM_Resource.p2752518v4f32(float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, %__2D_DIM_Resource.0 addrspace(2621450)* undef, %__2D_DIM_Resource.0 addrspace(2621450)* [[NONUNIFORMTEXTURE]], <4 x float> addrspace(2752518)* [[FIRSTACTIVESAMPLER]], i32 0, i32 0, i32 0) +; CHECK-LL-NEXT: br label [[UNROLL_MERGE]] +; CHECK-LL: latch: +; CHECK-LL-NEXT: br label [[PARTIAL_CHECK5]] +; CHECK-LL: unroll-merge: +; CHECK-LL-NEXT: [[TMP17:%.*]] = phi <4 x float> [ [[TMP16]], [[LAST_SEND]] ], [ [[TMP12]], [[PARTIAL_CHECK1]] ], [ [[TMP8]], [[PARTIAL_CHECK3]] ], [ [[TMP4]], [[PARTIAL_CHECK5]] ], !MyUniqueExclusiveLoadMetadata !24 +; CHECK-LL-NEXT: [[OUT:%.*]] = extractelement <4 x float> [[TMP17]], i32 0 +; CHECK-LL-NEXT: store float [[OUT]], float addrspace(1)* [[DST:%.*]], align 4 +; CHECK-LL-NEXT: ret void +; +; COM: check predicate load and lifetime.start +; CHECK-VISAASM: _main_0: +; CHECK-VISAASM-NEXT: mov (M1, 16) svn0(0,0)<1> threadIdInGroupX(0,0)<1;1,0> +; CHECK-VISAASM-NEXT: mov (M1, 16) sampler(0,0)<1> svn0_0(0,0)<1;1,0> +; CHECK-VISAASM-NEXT: add (M1_NM, 1) texture(0,0)<1> src(2,8)<0;1,0> 0x500:w +; +; CHECK-VISAASM: _test1_001_partial_check5: +; CHECK-VISAASM-NEXT: setp (M1_NM, 16) P1 0x0:ud +; CHECK-VISAASM-NEXT: cmp.eq (M1, 16) P1 V0034(0,0)<0;1,0> V0034(0,0)<0;1,0> +; CHECK-VISAASM-NEXT: mov (M1_NM, 1) V0035(0,0)<1> P1 +; CHECK-VISAASM-NEXT: mov (M1_NM, 1) V0033(0,0)<1> V0035(0,0)<0;1,0> +; CHECK-VISAASM-NEXT: fbl (M1_NM, 1) V0037(0,0)<1> V0033(0,0)<0;1,0> +; CHECK-VISAASM-NEXT: shl (M1_NM, 1) ShuffleTmp(0,0)<1> V0038(0,0)<0;1,0> 0x2:uw +; CHECK-VISAASM-NEXT: addr_add (M1_NM, 1) A0(0)<1> &sampler_0 ShuffleTmp(0,0)<0;1,0> +; CHECK-VISAASM-NEXT: mov (M1_NM, 1) firstActiveSampler6(0,0)<1> r[A0(0),0]<0;1,0>:ud +; CHECK-VISAASM-NEXT: cmp.eq (M1, 16) P2 sampler_0(0,0)<1;1,0> firstActiveSampler6(0,0)<0;1,0> +; CHECK-VISAASM-NEXT: mov (M1, 16) V0040(0,0)<1> 0x0:f +; CHECK-VISAASM-NEXT: lifetime.start V0039 +; CHECK-VISAASM-NEXT: movs (M1_NM, 1) S31(0) firstActiveSampler6(0,0)<0;1,0> +; CHECK-VISAASM-NEXT: movs (M1_NM, 1) %bss(0) texture(0,0)<0;1,0> +; CHECK-VISAASM-NEXT: (P2) sample_lz.RGBA (M1, 16) 0x0:uw S31 %bss V0039.0 %null.0 V0040.0 +; CHECK-VISAASM-NEXT: (P2) goto (M1, 16) _test1_007_unroll_merge +; +; CHECK-VISAASM: _test1_002_partial_check3: +; CHECK-VISAASM-NEXT: setp (M1_NM, 16) P3 0x0:ud +; CHECK-VISAASM-NEXT: cmp.eq (M1, 16) P3 V0043(0,0)<0;1,0> V0043(0,0)<0;1,0> +; CHECK-VISAASM-NEXT: mov (M1_NM, 1) V0044(0,0)<1> P3 +; CHECK-VISAASM-NEXT: mov (M1_NM, 1) V0042(0,0)<1> V0044(0,0)<0;1,0> +; CHECK-VISAASM-NEXT: fbl (M1_NM, 1) V0046(0,0)<1> V0042(0,0)<0;1,0> +; CHECK-VISAASM-NEXT: shl (M1_NM, 1) ShuffleTmp_0(0,0)<1> V0047(0,0)<0;1,0> 0x2:uw +; CHECK-VISAASM-NEXT: addr_add (M1_NM, 1) A1(0)<1> &sampler_0 ShuffleTmp_0(0,0)<0;1,0> +; CHECK-VISAASM-NEXT: mov (M1_NM, 1) firstActiveSampler4(0,0)<1> r[A1(0),0]<0;1,0>:ud +; CHECK-VISAASM-NEXT: cmp.eq (M1, 16) P4 sampler_0(0,0)<1;1,0> firstActiveSampler4(0,0)<0;1,0> +; CHECK-VISAASM-NEXT: mov (M1, 16) V0048(0,0)<1> 0x0:f +; CHECK-VISAASM-NEXT: lifetime.start V0039 +; CHECK-VISAASM-NEXT: movs (M1_NM, 1) S31(0) firstActiveSampler4(0,0)<0;1,0> +; CHECK-VISAASM-NEXT: movs (M1_NM, 1) %bss(0) texture(0,0)<0;1,0> +; CHECK-VISAASM-NEXT: (P4) sample_lz.RGBA (M1, 16) 0x0:uw S31 %bss V0039.0 %null.0 V0048.0 +; CHECK-VISAASM-NEXT: (P4) goto (M1, 16) _test1_007_unroll_merge +; +; CHECK-VISAASM: _test1_003_partial_check1: +; CHECK-VISAASM-NEXT: setp (M1_NM, 16) P5 0x0:ud +; CHECK-VISAASM-NEXT: cmp.eq (M1, 16) P5 V0051(0,0)<0;1,0> V0051(0,0)<0;1,0> +; CHECK-VISAASM-NEXT: mov (M1_NM, 1) V0052(0,0)<1> P5 +; CHECK-VISAASM-NEXT: mov (M1_NM, 1) V0050(0,0)<1> V0052(0,0)<0;1,0> +; CHECK-VISAASM-NEXT: fbl (M1_NM, 1) V0054(0,0)<1> V0050(0,0)<0;1,0> +; CHECK-VISAASM-NEXT: shl (M1_NM, 1) ShuffleTmp_1(0,0)<1> V0055(0,0)<0;1,0> 0x2:uw +; CHECK-VISAASM-NEXT: addr_add (M1_NM, 1) A2(0)<1> &sampler_0 ShuffleTmp_1(0,0)<0;1,0> +; CHECK-VISAASM-NEXT: mov (M1_NM, 1) firstActiveSampler2(0,0)<1> r[A2(0),0]<0;1,0>:ud +; CHECK-VISAASM-NEXT: cmp.eq (M1, 16) P6 sampler_0(0,0)<1;1,0> firstActiveSampler2(0,0)<0;1,0> +; CHECK-VISAASM-NEXT: mov (M1, 16) V0056(0,0)<1> 0x0:f +; CHECK-VISAASM-NEXT: lifetime.start V0039 +; CHECK-VISAASM-NEXT: movs (M1_NM, 1) S31(0) firstActiveSampler2(0,0)<0;1,0> +; CHECK-VISAASM-NEXT: movs (M1_NM, 1) %bss(0) texture(0,0)<0;1,0> +; CHECK-VISAASM-NEXT: (P6) sample_lz.RGBA (M1, 16) 0x0:uw S31 %bss V0039.0 %null.0 V0056.0 +; CHECK-VISAASM-NEXT: (P6) goto (M1, 16) _test1_007_unroll_merge ; - %svn0 = call i16 @llvm.genx.GenISA.DCL.SystemValue.i16(i32 7) +; CHECK-VISAASM: _test1_004_partial_check: +; CHECK-VISAASM-NEXT: setp (M1_NM, 16) P7 0x0:ud +; CHECK-VISAASM-NEXT: cmp.eq (M1, 16) P7 V0059(0,0)<0;1,0> V0059(0,0)<0;1,0> +; CHECK-VISAASM-NEXT: mov (M1_NM, 1) V0060(0,0)<1> P7 +; CHECK-VISAASM-NEXT: mov (M1_NM, 1) V0058(0,0)<1> V0060(0,0)<0;1,0> +; CHECK-VISAASM-NEXT: fbl (M1_NM, 1) V0062(0,0)<1> V0058(0,0)<0;1,0> +; CHECK-VISAASM-NEXT: shl (M1_NM, 1) ShuffleTmp_2(0,0)<1> V0063(0,0)<0;1,0> 0x2:uw +; CHECK-VISAASM-NEXT: addr_add (M1_NM, 1) A3(0)<1> &sampler_0 ShuffleTmp_2(0,0)<0;1,0> +; CHECK-VISAASM-NEXT: mov (M1_NM, 1) firstActiveSampler(0,0)<1> r[A3(0),0]<0;1,0>:ud +; CHECK-VISAASM-NEXT: cmp.eq (M1, 16) P8 sampler_0(0,0)<1;1,0> firstActiveSampler(0,0)<0;1,0> +; CHECK-VISAASM-NEXT: (!P8) goto (M1, 16) _test1_001_partial_check5 +; +; CHECK-VISAASM: _test1_005_last_send: +; CHECK-VISAASM-NEXT: mov (M1, 16) V0064(0,0)<1> 0x0:f +; CHECK-VISAASM-NEXT: lifetime.start V0039 +; CHECK-VISAASM-NEXT: movs (M1_NM, 1) S31(0) firstActiveSampler(0,0)<0;1,0> +; CHECK-VISAASM-NEXT: movs (M1_NM, 1) %bss(0) texture(0,0)<0;1,0> +; CHECK-VISAASM-NEXT: (P8) sample_lz.RGBA (M1, 16) 0x0:uw S31 %bss V0039.0 %null.0 V0064.0 +; +; CHECK-VISAASM: _test1_007_unroll_merge: +; CHECK-VISAASM-NEXT: mov (M1, 16) out(0,0)<1> V0039(0,0)<1;1,0> +; CHECK-VISAASM-NEXT: mov (M1_NM, 1) dst_0(0,0)<1> dst(0,0)<0;1,0> +; CHECK-VISAASM-NEXT: mov (M1, 16) dstBroadcast_0(0,0)<2> dst_1(0,0)<0;1,0> +; CHECK-VISAASM-NEXT: mov (M1, 16) dstBroadcast_0(0,1)<2> dst_1(0,1)<0;1,0> +; CHECK-VISAASM-NEXT: lsc_store.ugm.wb.wb (M1, 16) flat[dstBroadcast]:a64 out:d32 +; CHECK-VISAASM-NEXT: ret (M1, 1) + + %svn0 = call i16 @llvm.genx.GenISA.DCL.SystemValue.i16(i32 17) %sampler = zext i16 %svn0 to i32 %NonUniformSampler = inttoptr i32 %sampler to <4 x float> addrspace(2752518)* @@ -89,7 +175,6 @@ declare i32 @llvm.genx.GenISA.firstbitLo(i32) attributes #4 = { argmemonly nounwind readonly } - !IGCMetadata = !{!0} !igc.functions = !{!21}