diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td index 1af89903e0068c..a86c255f0820ed 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td @@ -883,8 +883,16 @@ foreach mx = SchedMxList in { let Latency = 3, ReleaseAtCycles = [LMulLat] in { defm "" : LMULWriteResMX<"WriteVSHA2CHV", [SiFiveP400VEXQ0], mx, IsWorstCase>; defm "" : LMULWriteResMX<"WriteVSHA2CLV", [SiFiveP400VEXQ0], mx, IsWorstCase>; - foreach sew = !listremove(SchedSEWSet.val, [8, 16]) in - defm "" : LMULSEWWriteResMXSEW<"WriteVSHA2MSV", [SiFiveP400VEXQ0], mx, sew, IsWorstCase>; + defvar ZvknhSEWs = !listremove(SchedSEWSet.val, [8, 16]); + // Largest SEW is the last element, assuming SchedSEWSet is sorted in ascending + // order. + defvar LargestZvknhSEW = !foldl(!head(ZvknhSEWs), ZvknhSEWs, last, curr, curr); + foreach sew = ZvknhSEWs in { + // The worst case for Zvknh[ab] is designated to the largest SEW and LMUL. + defvar IsWorstCaseVSHA2MSV = !and(IsWorstCase, !eq(sew, LargestZvknhSEW)); + defm "" : LMULSEWWriteResMXSEW<"WriteVSHA2MSV", [SiFiveP400VEXQ0], mx, sew, + IsWorstCaseVSHA2MSV>; + } } // Zvkned let Latency = 2, ReleaseAtCycles = [LMulLat] in { diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td index c2d93d4c0a7f0a..0c695c9ef30710 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td @@ -1135,9 +1135,16 @@ foreach mx = SchedMxList in { let Latency = 3, ReleaseAtCycles = [LMulLat] in { defm "" : LMULWriteResMX<"WriteVSHA2CHV", [SiFiveP600VectorCrypto], mx, IsWorstCase>; defm "" : LMULWriteResMX<"WriteVSHA2CLV", [SiFiveP600VectorCrypto], mx, IsWorstCase>; - foreach sew = !listremove(SchedSEWSet.val, [8, 16]) in { + defvar ZvknhSEWs = !listremove(SchedSEWSet.val, [8, 16]); + // Largest SEW is the last element, assuming SchedSEWSet is sorted in ascending + // order. + defvar LargestZvknhSEW = !foldl(!head(ZvknhSEWs), ZvknhSEWs, last, curr, curr); + foreach sew = ZvknhSEWs in { + // The worst case for Zvknh[ab] is designated to the largest SEW and LMUL. + defvar IsWorstCaseVSHA2MSV = !and(IsWorstCase, !eq(sew, LargestZvknhSEW)); let ReleaseAtCycles = [SiFiveP600VSHA2MSCycles.c] in - defm "" : LMULSEWWriteResMXSEW<"WriteVSHA2MSV", [SiFiveP600VectorCrypto], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVSHA2MSV", [SiFiveP600VectorCrypto], mx, sew, + IsWorstCaseVSHA2MSV>; } } // Zvkned diff --git a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvknhb.s b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvknhb.s index 20ac87a724af16..adf780279c8954 100644 --- a/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvknhb.s +++ b/llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvknhb.s @@ -1,6 +1,9 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -mtriple=riscv64 -mcpu=sifive-p670 -iterations=1 < %s | FileCheck %s +# Worst case for vsha2ms should be that of LMUL=8 and SEW=64. +vsha2ms.vv v4, v8, v12 + # SEW is only e32 or e64 vsetvli zero, zero, e32, m1, tu, mu @@ -44,14 +47,14 @@ vsha2ch.vv v8, v16, v24 vsha2cl.vv v8, v16, v24 # CHECK: Iterations: 1 -# CHECK-NEXT: Instructions: 32 -# CHECK-NEXT: Total Cycles: 108 -# CHECK-NEXT: Total uOps: 32 +# CHECK-NEXT: Instructions: 33 +# CHECK-NEXT: Total Cycles: 119 +# CHECK-NEXT: Total uOps: 33 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 0.30 -# CHECK-NEXT: IPC: 0.30 -# CHECK-NEXT: Block RThroughput: 97.0 +# CHECK-NEXT: uOps Per Cycle: 0.28 +# CHECK-NEXT: IPC: 0.28 +# CHECK-NEXT: Block RThroughput: 109.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps @@ -62,6 +65,7 @@ vsha2cl.vv v8, v16, v24 # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 3 12.00 vsha2ms.vv v4, v8, v12 # CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m1, tu, mu # CHECK-NEXT: 1 3 1.00 vsha2ms.vv v4, v8, v12 # CHECK-NEXT: 1 3 1.00 vsha2ch.vv v4, v8, v12 @@ -115,10 +119,11 @@ vsha2cl.vv v8, v16, v24 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14] -# CHECK-NEXT: - - - - 8.00 - - - - - - 97.00 - - - - +# CHECK-NEXT: - - - - 8.00 - - - - - - 109.00 - - - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14] Instructions: +# CHECK-NEXT: - - - - - - - - - - - 12.00 - - - - vsha2ms.vv v4, v8, v12 # CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m1, tu, mu # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vsha2ms.vv v4, v8, v12 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vsha2ch.vv v4, v8, v12