From a4e47586b9c0566761b7fb704011da6ded823398 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 3 Jan 2025 10:23:13 -0800 Subject: [PATCH] [ExpandMemCmp] Recognize canonical form of (icmp sle/sge X, 0) in getMemCmpOneBlock. (#121540) This code recognizes special cases where the result of memcmp is compared with 0. If the compare is sle/sge, then InstCombine canonicalizes to (icmp slt X, 1) or (icmp sgt X, -1). We should recognize those patterns too. --- llvm/lib/CodeGen/ExpandMemCmp.cpp | 8 +++++++ llvm/test/CodeGen/AArch64/memcmp.ll | 10 ++------ llvm/test/CodeGen/RISCV/memcmp.ll | 36 +++++++---------------------- llvm/test/CodeGen/X86/memcmp.ll | 12 ++-------- 4 files changed, 20 insertions(+), 46 deletions(-) diff --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/CodeGen/ExpandMemCmp.cpp index cc75a01c6477a2..74f93e19795327 100644 --- a/llvm/lib/CodeGen/ExpandMemCmp.cpp +++ b/llvm/lib/CodeGen/ExpandMemCmp.cpp @@ -680,6 +680,14 @@ Value *MemCmpExpansion::getMemCmpOneBlock() { m_SpecificInt(CI->getType()->getIntegerBitWidth() - 1)))) { Pred = ICmpInst::ICMP_SLT; NeedsZExt = true; + } else if (match(UI, m_SpecificICmp(ICmpInst::ICMP_SGT, m_Specific(CI), + m_AllOnes()))) { + // Adjust predicate as if it compared with 0. + Pred = ICmpInst::ICMP_SGE; + } else if (match(UI, m_SpecificICmp(ICmpInst::ICMP_SLT, m_Specific(CI), + m_One()))) { + // Adjust predicate as if it compared with 0. + Pred = ICmpInst::ICMP_SLE; } else { // In case of a successful match this call will set `Pred` variable match(UI, m_ICmp(Pred, m_Specific(CI), m_Zero())); diff --git a/llvm/test/CodeGen/AArch64/memcmp.ll b/llvm/test/CodeGen/AArch64/memcmp.ll index 864f38468842a1..98ea86b06d6c59 100644 --- a/llvm/test/CodeGen/AArch64/memcmp.ll +++ b/llvm/test/CodeGen/AArch64/memcmp.ll @@ -265,10 +265,7 @@ define i1 @length4_le(ptr %X, ptr %Y) nounwind { ; CHECK-NEXT: rev w8, w8 ; CHECK-NEXT: rev w9, w9 ; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: cset w8, hi -; CHECK-NEXT: csinv w8, w8, wzr, hs -; CHECK-NEXT: cmp w8, #1 -; CHECK-NEXT: cset w0, lt +; CHECK-NEXT: cset w0, ls ; CHECK-NEXT: ret %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind %c = icmp slt i32 %m, 1 @@ -283,10 +280,7 @@ define i1 @length4_ge(ptr %X, ptr %Y) nounwind { ; CHECK-NEXT: rev w8, w8 ; CHECK-NEXT: rev w9, w9 ; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: cset w8, hi -; CHECK-NEXT: csinv w8, w8, wzr, hs -; CHECK-NEXT: mvn w8, w8 -; CHECK-NEXT: lsr w0, w8, #31 +; CHECK-NEXT: cset w0, hs ; CHECK-NEXT: ret %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind %c = icmp sgt i32 %m, -1 diff --git a/llvm/test/CodeGen/RISCV/memcmp.ll b/llvm/test/CodeGen/RISCV/memcmp.ll index 5adda28acb427d..f0290298e362a0 100644 --- a/llvm/test/CodeGen/RISCV/memcmp.ll +++ b/llvm/test/CodeGen/RISCV/memcmp.ll @@ -6664,10 +6664,8 @@ define i1 @memcmp_le_zero(ptr %s1, ptr %s2) nounwind { ; CHECK-UNALIGNED-RV32-ZBB-NEXT: lw a1, 0(a1) ; CHECK-UNALIGNED-RV32-ZBB-NEXT: rev8 a0, a0 ; CHECK-UNALIGNED-RV32-ZBB-NEXT: rev8 a1, a1 -; CHECK-UNALIGNED-RV32-ZBB-NEXT: sltu a2, a0, a1 ; CHECK-UNALIGNED-RV32-ZBB-NEXT: sltu a0, a1, a0 -; CHECK-UNALIGNED-RV32-ZBB-NEXT: sub a0, a0, a2 -; CHECK-UNALIGNED-RV32-ZBB-NEXT: slti a0, a0, 1 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: xori a0, a0, 1 ; CHECK-UNALIGNED-RV32-ZBB-NEXT: ret ; ; CHECK-UNALIGNED-RV64-ZBB-LABEL: memcmp_le_zero: @@ -6678,10 +6676,8 @@ define i1 @memcmp_le_zero(ptr %s1, ptr %s2) nounwind { ; CHECK-UNALIGNED-RV64-ZBB-NEXT: rev8 a1, a1 ; CHECK-UNALIGNED-RV64-ZBB-NEXT: srli a0, a0, 32 ; CHECK-UNALIGNED-RV64-ZBB-NEXT: srli a1, a1, 32 -; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a2, a0, a1 ; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a0, a1, a0 -; CHECK-UNALIGNED-RV64-ZBB-NEXT: sub a0, a0, a2 -; CHECK-UNALIGNED-RV64-ZBB-NEXT: slti a0, a0, 1 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: xori a0, a0, 1 ; CHECK-UNALIGNED-RV64-ZBB-NEXT: ret ; ; CHECK-UNALIGNED-RV32-ZBKB-LABEL: memcmp_le_zero: @@ -6690,10 +6686,8 @@ define i1 @memcmp_le_zero(ptr %s1, ptr %s2) nounwind { ; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lw a1, 0(a1) ; CHECK-UNALIGNED-RV32-ZBKB-NEXT: rev8 a0, a0 ; CHECK-UNALIGNED-RV32-ZBKB-NEXT: rev8 a1, a1 -; CHECK-UNALIGNED-RV32-ZBKB-NEXT: sltu a2, a0, a1 ; CHECK-UNALIGNED-RV32-ZBKB-NEXT: sltu a0, a1, a0 -; CHECK-UNALIGNED-RV32-ZBKB-NEXT: sub a0, a0, a2 -; CHECK-UNALIGNED-RV32-ZBKB-NEXT: slti a0, a0, 1 +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: xori a0, a0, 1 ; CHECK-UNALIGNED-RV32-ZBKB-NEXT: ret ; ; CHECK-UNALIGNED-RV64-ZBKB-LABEL: memcmp_le_zero: @@ -6704,10 +6698,8 @@ define i1 @memcmp_le_zero(ptr %s1, ptr %s2) nounwind { ; CHECK-UNALIGNED-RV64-ZBKB-NEXT: rev8 a1, a1 ; CHECK-UNALIGNED-RV64-ZBKB-NEXT: srli a0, a0, 32 ; CHECK-UNALIGNED-RV64-ZBKB-NEXT: srli a1, a1, 32 -; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a2, a0, a1 ; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a0, a1, a0 -; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sub a0, a0, a2 -; CHECK-UNALIGNED-RV64-ZBKB-NEXT: slti a0, a0, 1 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: xori a0, a0, 1 ; CHECK-UNALIGNED-RV64-ZBKB-NEXT: ret ; ; CHECK-UNALIGNED-RV32-V-LABEL: memcmp_le_zero: @@ -6864,10 +6856,7 @@ define i1 @memcmp_ge_zero(ptr %s1, ptr %s2) nounwind { ; CHECK-UNALIGNED-RV32-ZBB-NEXT: lw a1, 0(a1) ; CHECK-UNALIGNED-RV32-ZBB-NEXT: rev8 a0, a0 ; CHECK-UNALIGNED-RV32-ZBB-NEXT: rev8 a1, a1 -; CHECK-UNALIGNED-RV32-ZBB-NEXT: sltu a2, a0, a1 -; CHECK-UNALIGNED-RV32-ZBB-NEXT: sltu a0, a1, a0 -; CHECK-UNALIGNED-RV32-ZBB-NEXT: sub a0, a0, a2 -; CHECK-UNALIGNED-RV32-ZBB-NEXT: slti a0, a0, 0 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: sltu a0, a0, a1 ; CHECK-UNALIGNED-RV32-ZBB-NEXT: xori a0, a0, 1 ; CHECK-UNALIGNED-RV32-ZBB-NEXT: ret ; @@ -6879,10 +6868,7 @@ define i1 @memcmp_ge_zero(ptr %s1, ptr %s2) nounwind { ; CHECK-UNALIGNED-RV64-ZBB-NEXT: rev8 a1, a1 ; CHECK-UNALIGNED-RV64-ZBB-NEXT: srli a0, a0, 32 ; CHECK-UNALIGNED-RV64-ZBB-NEXT: srli a1, a1, 32 -; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a2, a0, a1 -; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a0, a1, a0 -; CHECK-UNALIGNED-RV64-ZBB-NEXT: sub a0, a0, a2 -; CHECK-UNALIGNED-RV64-ZBB-NEXT: slti a0, a0, 0 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a0, a0, a1 ; CHECK-UNALIGNED-RV64-ZBB-NEXT: xori a0, a0, 1 ; CHECK-UNALIGNED-RV64-ZBB-NEXT: ret ; @@ -6892,10 +6878,7 @@ define i1 @memcmp_ge_zero(ptr %s1, ptr %s2) nounwind { ; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lw a1, 0(a1) ; CHECK-UNALIGNED-RV32-ZBKB-NEXT: rev8 a0, a0 ; CHECK-UNALIGNED-RV32-ZBKB-NEXT: rev8 a1, a1 -; CHECK-UNALIGNED-RV32-ZBKB-NEXT: sltu a2, a0, a1 -; CHECK-UNALIGNED-RV32-ZBKB-NEXT: sltu a0, a1, a0 -; CHECK-UNALIGNED-RV32-ZBKB-NEXT: sub a0, a0, a2 -; CHECK-UNALIGNED-RV32-ZBKB-NEXT: slti a0, a0, 0 +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: sltu a0, a0, a1 ; CHECK-UNALIGNED-RV32-ZBKB-NEXT: xori a0, a0, 1 ; CHECK-UNALIGNED-RV32-ZBKB-NEXT: ret ; @@ -6907,10 +6890,7 @@ define i1 @memcmp_ge_zero(ptr %s1, ptr %s2) nounwind { ; CHECK-UNALIGNED-RV64-ZBKB-NEXT: rev8 a1, a1 ; CHECK-UNALIGNED-RV64-ZBKB-NEXT: srli a0, a0, 32 ; CHECK-UNALIGNED-RV64-ZBKB-NEXT: srli a1, a1, 32 -; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a2, a0, a1 -; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a0, a1, a0 -; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sub a0, a0, a2 -; CHECK-UNALIGNED-RV64-ZBKB-NEXT: slti a0, a0, 0 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a0, a0, a1 ; CHECK-UNALIGNED-RV64-ZBKB-NEXT: xori a0, a0, 1 ; CHECK-UNALIGNED-RV64-ZBKB-NEXT: ret ; diff --git a/llvm/test/CodeGen/X86/memcmp.ll b/llvm/test/CodeGen/X86/memcmp.ll index e744d2a06e55f6..bb089e5ddda87b 100644 --- a/llvm/test/CodeGen/X86/memcmp.ll +++ b/llvm/test/CodeGen/X86/memcmp.ll @@ -268,11 +268,7 @@ define i1 @length4_le(ptr %X, ptr %Y) nounwind { ; X64-NEXT: bswapl %eax ; X64-NEXT: bswapl %ecx ; X64-NEXT: cmpl %ecx, %eax -; X64-NEXT: seta %al -; X64-NEXT: sbbb $0, %al -; X64-NEXT: movsbl %al, %eax -; X64-NEXT: testl %eax, %eax -; X64-NEXT: setle %al +; X64-NEXT: setbe %al ; X64-NEXT: retq %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind %c = icmp slt i32 %m, 1 @@ -287,11 +283,7 @@ define i1 @length4_ge(ptr %X, ptr %Y) nounwind { ; X64-NEXT: bswapl %eax ; X64-NEXT: bswapl %ecx ; X64-NEXT: cmpl %ecx, %eax -; X64-NEXT: seta %al -; X64-NEXT: sbbb $0, %al -; X64-NEXT: movsbl %al, %eax -; X64-NEXT: testl %eax, %eax -; X64-NEXT: setns %al +; X64-NEXT: setae %al ; X64-NEXT: retq %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 4) nounwind %c = icmp sgt i32 %m, -1