diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index b21c9c9e4ad9..e3c0d6d14306 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -781,9 +781,10 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { // cost of the split itself. Count that as 1, to be consistent with // TLI->getTypeLegalizationCost(). if ((TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) == - TargetLowering::TypeSplitVector) || - (TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) == - TargetLowering::TypeSplitVector)) { + TargetLowering::TypeSplitVector || + TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) == + TargetLowering::TypeSplitVector) && + Src->getVectorNumElements() > 1 && Dst->getVectorNumElements() > 1) { Type *SplitDst = VectorType::get(Dst->getVectorElementType(), Dst->getVectorNumElements() / 2); Type *SplitSrc = VectorType::get(Src->getVectorElementType(), diff --git a/llvm/test/Transforms/LICM/AMDGPU/bitcast.ll b/llvm/test/Transforms/LICM/AMDGPU/bitcast.ll new file mode 100644 index 000000000000..5f45465516ba --- /dev/null +++ b/llvm/test/Transforms/LICM/AMDGPU/bitcast.ll @@ -0,0 +1,21 @@ +; RUN: opt -licm -mtriple=amdgcn -S -o - %s | FileCheck %s + +; CHECK-LABEL: foo +; CHECK: ret +define void @foo(i8* %d, <1 x i32>* %s, i32 %idx) { +entry: + br label %for.body + +for.body: + %v0 = load <1 x i32>, <1 x i32>* %s + %v1 = bitcast <1 x i32> %v0 to <4 x i8> + br label %for.cond + +for.cond: + %e0 = extractelement <4 x i8> %v1, i32 %idx + store i8 %e0, i8* %d + br i1 false, label %for.exit, label %for.body + +for.exit: + ret void +} diff --git a/llvm/test/Transforms/LICM/AMDGPU/lit.local.cfg b/llvm/test/Transforms/LICM/AMDGPU/lit.local.cfg new file mode 100644 index 000000000000..2a665f06be72 --- /dev/null +++ b/llvm/test/Transforms/LICM/AMDGPU/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'AMDGPU' in config.root.targets: + config.unsupported = True