Skip to content

Commit

Permalink
[RISCV] Fix the worst case for VSHA2MS in SiFive P400/P600 scheduling…
Browse files Browse the repository at this point in the history
… models (llvm#116893)

For each RVV instruction we should have a single WriteRes assignment to
the worst case scheduling class. This assignment is usually equal to
that of the largest LMUL + smallest SEW. My llvm#114317 accidentally made
two of these assignments on `WriteVSHA2MSV_WorstCase`. This won't affect
our MachineScheduler nor most of our llvm-mca use cases (assuming you
populate the correct LMUL and SEW), yet it's not ideal either.

This patch fixes this issue by assigning the correct numbers and
resource mapping to `WriteVSHA2MSV_WorstCase`, which is equal to that of
largest LMUL + _largest_ SEW (Zvknh's scheduling properties are
special). I also added a MCA test to make sure we always pick up the
correct worst case numbers for P600's scheduling model.

Original issue was reported by @reidtatge
  • Loading branch information
mshockwave authored Nov 21, 2024
1 parent 040f1c7 commit 0165f88
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 11 deletions.
12 changes: 10 additions & 2 deletions llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td
Original file line number Diff line number Diff line change
Expand Up @@ -883,8 +883,16 @@ foreach mx = SchedMxList in {
let Latency = 3, ReleaseAtCycles = [LMulLat] in {
defm "" : LMULWriteResMX<"WriteVSHA2CHV", [SiFiveP400VEXQ0], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVSHA2CLV", [SiFiveP400VEXQ0], mx, IsWorstCase>;
foreach sew = !listremove(SchedSEWSet<mx>.val, [8, 16]) in
defm "" : LMULSEWWriteResMXSEW<"WriteVSHA2MSV", [SiFiveP400VEXQ0], mx, sew, IsWorstCase>;
defvar ZvknhSEWs = !listremove(SchedSEWSet<mx>.val, [8, 16]);
// Largest SEW is the last element, assuming SchedSEWSet is sorted in ascending
// order.
defvar LargestZvknhSEW = !foldl(!head(ZvknhSEWs), ZvknhSEWs, last, curr, curr);
foreach sew = ZvknhSEWs in {
// The worst case for Zvknh[ab] is designated to the largest SEW and LMUL.
defvar IsWorstCaseVSHA2MSV = !and(IsWorstCase, !eq(sew, LargestZvknhSEW));
defm "" : LMULSEWWriteResMXSEW<"WriteVSHA2MSV", [SiFiveP400VEXQ0], mx, sew,
IsWorstCaseVSHA2MSV>;
}
}
// Zvkned
let Latency = 2, ReleaseAtCycles = [LMulLat] in {
Expand Down
11 changes: 9 additions & 2 deletions llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td
Original file line number Diff line number Diff line change
Expand Up @@ -1135,9 +1135,16 @@ foreach mx = SchedMxList in {
let Latency = 3, ReleaseAtCycles = [LMulLat] in {
defm "" : LMULWriteResMX<"WriteVSHA2CHV", [SiFiveP600VectorCrypto], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVSHA2CLV", [SiFiveP600VectorCrypto], mx, IsWorstCase>;
foreach sew = !listremove(SchedSEWSet<mx>.val, [8, 16]) in {
defvar ZvknhSEWs = !listremove(SchedSEWSet<mx>.val, [8, 16]);
// Largest SEW is the last element, assuming SchedSEWSet is sorted in ascending
// order.
defvar LargestZvknhSEW = !foldl(!head(ZvknhSEWs), ZvknhSEWs, last, curr, curr);
foreach sew = ZvknhSEWs in {
// The worst case for Zvknh[ab] is designated to the largest SEW and LMUL.
defvar IsWorstCaseVSHA2MSV = !and(IsWorstCase, !eq(sew, LargestZvknhSEW));
let ReleaseAtCycles = [SiFiveP600VSHA2MSCycles<mx, sew>.c] in
defm "" : LMULSEWWriteResMXSEW<"WriteVSHA2MSV", [SiFiveP600VectorCrypto], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVSHA2MSV", [SiFiveP600VectorCrypto], mx, sew,
IsWorstCaseVSHA2MSV>;
}
}
// Zvkned
Expand Down
19 changes: 12 additions & 7 deletions llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvknhb.s
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=riscv64 -mcpu=sifive-p670 -iterations=1 < %s | FileCheck %s

# Worst case for vsha2ms should be that of LMUL=8 and SEW=64.
vsha2ms.vv v4, v8, v12

# SEW is only e32 or e64

vsetvli zero, zero, e32, m1, tu, mu
Expand Down Expand Up @@ -44,14 +47,14 @@ vsha2ch.vv v8, v16, v24
vsha2cl.vv v8, v16, v24

# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 32
# CHECK-NEXT: Total Cycles: 108
# CHECK-NEXT: Total uOps: 32
# CHECK-NEXT: Instructions: 33
# CHECK-NEXT: Total Cycles: 119
# CHECK-NEXT: Total uOps: 33

# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 0.30
# CHECK-NEXT: IPC: 0.30
# CHECK-NEXT: Block RThroughput: 97.0
# CHECK-NEXT: uOps Per Cycle: 0.28
# CHECK-NEXT: IPC: 0.28
# CHECK-NEXT: Block RThroughput: 109.0

# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
Expand All @@ -62,6 +65,7 @@ vsha2cl.vv v8, v16, v24
# CHECK-NEXT: [6]: HasSideEffects (U)

# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 3 12.00 vsha2ms.vv v4, v8, v12
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m1, tu, mu
# CHECK-NEXT: 1 3 1.00 vsha2ms.vv v4, v8, v12
# CHECK-NEXT: 1 3 1.00 vsha2ch.vv v4, v8, v12
Expand Down Expand Up @@ -115,10 +119,11 @@ vsha2cl.vv v8, v16, v24

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14]
# CHECK-NEXT: - - - - 8.00 - - - - - - 97.00 - - - -
# CHECK-NEXT: - - - - 8.00 - - - - - - 109.00 - - - -

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8.0] [8.1] [9] [10] [11] [12] [13] [14] Instructions:
# CHECK-NEXT: - - - - - - - - - - - 12.00 - - - - vsha2ms.vv v4, v8, v12
# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - vsetvli zero, zero, e32, m1, tu, mu
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vsha2ms.vv v4, v8, v12
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - vsha2ch.vv v4, v8, v12
Expand Down

0 comments on commit 0165f88

Please sign in to comment.