diff --git a/IGC/VectorCompiler/lib/Utils/GenX/TransformArgCopy.cpp b/IGC/VectorCompiler/lib/Utils/GenX/TransformArgCopy.cpp index dcac767712e9..4678e6acb191 100644 --- a/IGC/VectorCompiler/lib/Utils/GenX/TransformArgCopy.cpp +++ b/IGC/VectorCompiler/lib/Utils/GenX/TransformArgCopy.cpp @@ -157,9 +157,22 @@ static bool argToTransform(const Argument &Arg, if (!PtrTy) return false; Type *ElemTy = IGCLLVM::getNonOpaquePtrEltTy(PtrTy); - if ((ElemTy->isVectorTy() || onlyUsedBySimpleValueLoadStore(Arg)) && - (ElemTy->isIntOrIntVectorTy() || ElemTy->isFPOrFPVectorTy())) - return true; + if (ElemTy->isIntOrIntVectorTy() || ElemTy->isFPOrFPVectorTy()) { + if (ElemTy->isVectorTy()) { + for (auto *U : Arg.users()) { + auto *GEP = dyn_cast(U); + if (!GEP) + continue; + if (&Arg != GEP->getPointerOperand()) + continue; + auto *ConstIdx = dyn_cast(*GEP->idx_begin()); + if (!ConstIdx || ConstIdx->getZExtValue() != 0) + return false; + } + return true; + } + return onlyUsedBySimpleValueLoadStore(Arg); + } if (auto *StrTy = dyn_cast(ElemTy)) { const DataLayout &DL = Arg.getParent()->getParent()->getDataLayout(); if (structSafeToPassByVal(Arg) && diff --git a/IGC/VectorCompiler/test/CMABI/do_not_copy_in_out_vector.ll b/IGC/VectorCompiler/test/CMABI/do_not_copy_in_out_vector.ll new file mode 100644 index 000000000000..30d32ee5908e --- /dev/null +++ b/IGC/VectorCompiler/test/CMABI/do_not_copy_in_out_vector.ll @@ -0,0 +1,37 @@ +;=========================== begin_copyright_notice ============================ +; +; Copyright (C) 2024 Intel Corporation +; +; SPDX-License-Identifier: MIT +; +;============================ end_copyright_notice ============================= + +; RUN: %opt %use_old_pass_manager% -cmabi -march=genx64 -mcpu=Gen9 -S < %s | FileCheck %s + +declare void @llvm.genx.svm.scatter.v16i1.v16i64.v16f32(<16 x i1>, i32, <16 x i64>, <16 x float>) #0 + +; CHECK: define internal spir_func void @test(<16 x float>* +define internal spir_func void @test(<16 x float>* noalias nocapture %a) #3 { + %a_load_offset = getelementptr <16 x float>, <16 x float>* %a, i64 5 + %ptr_to_int.i.i = ptrtoint <16 x float>* %a_load_offset to i64 + %base.i.i = insertelement <16 x i64> undef, i64 %ptr_to_int.i.i, i64 0 + %shuffle.i.i = shufflevector <16 x i64> %base.i.i, <16 x i64> undef, <16 x i32> zeroinitializer + %new_offsets.i.i = add <16 x i64> %shuffle.i.i, + call void @llvm.genx.svm.scatter.v16i1.v16i64.v16f32(<16 x i1> , i32 0, <16 x i64> %new_offsets.i.i, <16 x float> zeroinitializer) + ret void +} + +define dllexport spir_kernel void @kernel() #4 { + %x = alloca [10 x <16 x float>], align 64 + %x_offset = getelementptr inbounds [10 x <16 x float>], [10 x <16 x float>]* %x, i64 0, i64 0 + call spir_func void @test(<16 x float>* noalias nocapture nonnull %x_offset) #5 + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } +attributes #2 = { nounwind readonly } +attributes #3 = { noinline nounwind "CMStackCall" } +attributes #4 = { nounwind "CMGenxMain" "oclrt"="1" } +attributes #5 = { noinline nounwind } +