Skip to content

Commit

Permalink
add additional arm irate tests
Browse files Browse the repository at this point in the history
  • Loading branch information
clamchowder committed Aug 4, 2023
1 parent cfa90f2 commit 06c7f86
Show file tree
Hide file tree
Showing 2 changed files with 227 additions and 56 deletions.
130 changes: 74 additions & 56 deletions instructionrate/arm_instructionrate.c
Original file line number Diff line number Diff line change
@@ -1,12 +1,19 @@
#define _GNU_SOURCE
#include <stdio.h>
#include <sys/time.h>
#include <time.h>
#include <stdint.h>
#include <sys/types.h>
#include <unistd.h>
#include <sched.h>

extern uint64_t noptest(uint64_t iterations);
extern uint64_t clktest(uint64_t iterations);

extern uint64_t addtest(uint64_t iterations);
extern uint64_t eortest(uint64_t iterations);
extern uint64_t maddaddtest(uint64_t iterations);
extern uint64_t cmptest(uint64_t iterations);
extern uint64_t addmultest(uint64_t iterations);
extern uint64_t addmul21test(uint64_t iterations);
extern uint64_t mul32test(uint64_t iterations);
Expand Down Expand Up @@ -111,6 +118,14 @@ int main(int argc, char *argv[]) {
uint64_t iterationsHigh = iterations * 5;
uint64_t time_diff_ms;
float latency, opsPerNs, clockSpeedGhz;

if (argc > 1) {
int targetCpu = atoi(argv[1]);
cpu_set_t cpuset;
CPU_ZERO(&cpuset);
CPU_SET(targetCpu, &cpuset);
sched_setaffinity(gettid(), sizeof(cpu_set_t), &cpuset);
}

// figure out clock speed
gettimeofday(&startTv, &startTz);
Expand All @@ -120,62 +135,65 @@ int main(int argc, char *argv[]) {
latency = 1e6 * (float)time_diff_ms / (float)iterations;
// clk speed should be 1/latency, assuming we got one add per clk, roughly
clockSpeedGhz = 1/latency;
printf("Estimated clock speed: %.2f GHz\n", clockSpeedGhz);

printf("Adds per clk: %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, addtest));
printf("Nops per clk: %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, noptest));
printf("Indepdent movs per clk: %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, indepmovtest));
printf("Dependent movs per clk: %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, depmovtest));
printf("eor -> 0 per clk: %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, xorzerotest));
printf("mov -> 0 per clk: %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, movzerotest));
printf("sub -> 0 per clk: %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, subzerotest));


printf("Not taken jmps per clk: %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, jmptest));
printf("Jump fusion test: %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, fusejmptest));
printf("1:1 mixed not taken jmps / muls per clk: %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, mixmuljmptest));
printf("1:2 mixed not taken jmps / muls per clk: %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, mixmuljmptest21));
printf("1:1 mixed not taken jmps / adds per clk: %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, mixaddjmptest));
printf("1:2 mixed not taken jmps / adds per clk: %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, mixaddjmp21test));
printf("1:1 mixed add/mul per clk: %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, addmultest));
printf("2:1 mixed add/mul per clk: %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, addmul21test));
printf("ror per clk: %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, rortest));
printf("1:1 mixed mul/ror per clk: %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, mixmulrortest));
printf("32-bit mul per clk: %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, mul32test));
printf("64-bit mul per clk: %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, mul32test));
printf("scalar fp32 add per clk: %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, faddwrapper));
printf("128-bit vec int32 add per clk: %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, vecadd128wrapper));
printf("128-bit vec int32 multiply per clk: %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, vecmul128wrapper));
printf("128-bit vec int32 mixed multiply and add per clk: %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, mixvecaddmul128wrapper));
printf("128-bit vec fp32 add per clk: %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, vecfadd128wrapper));
printf("128-bit vec fp32 multiply per clk: %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, vecfmul128wrapper));
printf("128-bit vec fp32 mixed multiply and add per clk: %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, mixvecfaddfmul128wrapper));
printf("2:1 mixed scalar adds and 128-bit vec int32 add per clk: %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, mixaddvecadd128wrapper));
printf("3:1 mixed scalar adds and 128-bit vec int32 add per clk: %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, mix3to1addvecadd128wrapper));
printf("1:1 mixed scalar adds and 128-bit vec int32 add per clk: %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, mix1to1addvecadd128wrapper));
printf("1:1 mixed scalar 32-bit multiply and 128-bit vec int32 multiply per clk: %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, mixmulvecmulwrapper));
printf("1:1 mixed 128-bit vec fp32 multiply and 128-bit vec int32 multiply per clk: %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, mixvecmulfmulwrapper));
printf("1:1 mixed 128-bit vec fp32 add and 128-bit vec int32 add per clk: %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, mixvecaddfaddwrapper));
printf("1:2 mixed not taken jumps and 128-bit vec int32 add per clk: %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, mixjmpvecaddwrapper));
printf("1:1 mixed not taken jumps and 128-bit vec int32 mul per clk: %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, mixjmpvecmulwrapper));
printf("128-bit vec loads per clk: %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, vecloadwrapper));
printf("128-bit vec stores per clk: %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, vecstorewrapper));
printf("64-bit loads per clk: %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, loadwrapper));
printf("1:1 mixed 64-bit loads/stores per clk: %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, mixloadstorewrapper));
printf("2:1 mixed 64-bit loads/stores per clk: %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, mix21loadstorewrapper));
printf("64-bit multiply latency: %.2f clocks\n", 1 / measureFunction(iterations, clockSpeedGhz, latmul64test));
printf("128-bit vec int32 add latency: %.2f clocks\n", 1 / measureFunction(iterations, clockSpeedGhz, latvecadd128wrapper));
printf("128-bit vec int32 mul latency: %.2f clocks\n", 1 / measureFunction(iterations, clockSpeedGhz, latvecmul128wrapper));
printf("Scalar FADD Latency: %.2f clocks\n", 1 / measureFunction(iterationsHigh, clockSpeedGhz, latfaddwrapper));
printf("128-bit vector FADD latency: %.2f clocks\n", 1 / measureFunction(iterations, clockSpeedGhz, latvecfadd128wrapper));
printf("128-bit vector FMUL latency: %.2f clocks\n", 1 / measureFunction(iterations, clockSpeedGhz, latvecfmul128wrapper));

printf("128-bit vector FMA per clk: %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, vecfma128wrapper));
printf("128-bit vector FMA latency: %.2f clocks\n", 1 / measureFunction(iterations, clockSpeedGhz, latvecfma128wrapper));
printf("Scalar FMA per clk: %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, scalarfmawrapper));
printf("Scalar FMA latency: %.2f clocks\n", 1 / measureFunction(iterationsHigh, clockSpeedGhz, latscalarfmawrapper));
printf("1:1 mixed 128-bit vector FMA/FADD per clk: %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, mixvecfaddfma128wrapper));
printf("1:1 mixed 128-bit vector FMA/FMUL per clk: %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, mixvecfmulfma128wrapper));
printf("Estimated clock speed> %.2f GHz\n", clockSpeedGhz);

printf("Adds per clk> %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, addtest));
printf("XORs per clk> %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, eortest));
printf("CMPs per clk> %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, cmptest));
printf("1:3 madd:add per clk> %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, maddaddtest));
printf("Nops per clk> %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, noptest));
printf("Indepdent movs per clk> %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, indepmovtest));
printf("Dependent movs per clk> %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, depmovtest));
printf("eor -> 0 per clk> %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, xorzerotest));
printf("mov -> 0 per clk> %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, movzerotest));
printf("sub -> 0 per clk> %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, subzerotest));


printf("Not taken jmps per clk> %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, jmptest));
printf("Jump fusion test> %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, fusejmptest));
printf("1:1 mixed not taken jmps / muls per clk> %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, mixmuljmptest));
printf("1:2 mixed not taken jmps / muls per clk> %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, mixmuljmptest21));
printf("1:1 mixed not taken jmps / adds per clk> %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, mixaddjmptest));
printf("1:2 mixed not taken jmps / adds per clk> %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, mixaddjmp21test));
printf("1:1 mixed add/mul per clk> %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, addmultest));
printf("2:1 mixed add/mul per clk> %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, addmul21test));
printf("ror per clk> %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, rortest));
printf("1:1 mixed mul/ror per clk> %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, mixmulrortest));
printf("32-bit mul per clk> %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, mul32test));
printf("64-bit mul per clk> %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, mul32test));
printf("scalar fp32 add per clk> %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, faddwrapper));
printf("128-bit vec int32 add per clk> %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, vecadd128wrapper));
printf("128-bit vec int32 multiply per clk> %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, vecmul128wrapper));
printf("128-bit vec int32 mixed multiply and add per clk> %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, mixvecaddmul128wrapper));
printf("128-bit vec fp32 add per clk> %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, vecfadd128wrapper));
printf("128-bit vec fp32 multiply per clk> %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, vecfmul128wrapper));
printf("128-bit vec fp32 mixed multiply and add per clk> %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, mixvecfaddfmul128wrapper));
printf("2:1 mixed scalar adds and 128-bit vec int32 add per clk> %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, mixaddvecadd128wrapper));
printf("3:1 mixed scalar adds and 128-bit vec int32 add per clk> %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, mix3to1addvecadd128wrapper));
printf("1:1 mixed scalar adds and 128-bit vec int32 add per clk> %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, mix1to1addvecadd128wrapper));
printf("1:1 mixed scalar 32-bit multiply and 128-bit vec int32 multiply per clk> %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, mixmulvecmulwrapper));
printf("1:1 mixed 128-bit vec fp32 multiply and 128-bit vec int32 multiply per clk> %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, mixvecmulfmulwrapper));
printf("1:1 mixed 128-bit vec fp32 add and 128-bit vec int32 add per clk> %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, mixvecaddfaddwrapper));
printf("1:2 mixed not taken jumps and 128-bit vec int32 add per clk> %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, mixjmpvecaddwrapper));
printf("1:1 mixed not taken jumps and 128-bit vec int32 mul per clk> %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, mixjmpvecmulwrapper));
printf("128-bit vec loads per clk> %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, vecloadwrapper));
printf("128-bit vec stores per clk> %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, vecstorewrapper));
printf("64-bit loads per clk> %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, loadwrapper));
printf("1:1 mixed 64-bit loads/stores per clk> %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, mixloadstorewrapper));
printf("2:1 mixed 64-bit loads/stores per clk> %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, mix21loadstorewrapper));
printf("64-bit multiply latency> %.2f clocks\n", 1 / measureFunction(iterations, clockSpeedGhz, latmul64test));
printf("128-bit vec int32 add latency> %.2f clocks\n", 1 / measureFunction(iterations, clockSpeedGhz, latvecadd128wrapper));
printf("128-bit vec int32 mul latency> %.2f clocks\n", 1 / measureFunction(iterations, clockSpeedGhz, latvecmul128wrapper));
printf("Scalar FADD Latency> %.2f clocks\n", 1 / measureFunction(iterationsHigh, clockSpeedGhz, latfaddwrapper));
printf("128-bit vector FADD latency> %.2f clocks\n", 1 / measureFunction(iterations, clockSpeedGhz, latvecfadd128wrapper));
printf("128-bit vector FMUL latency> %.2f clocks\n", 1 / measureFunction(iterations, clockSpeedGhz, latvecfmul128wrapper));

printf("128-bit vector FMA per clk> %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, vecfma128wrapper));
printf("128-bit vector FMA latency> %.2f clocks\n", 1 / measureFunction(iterations, clockSpeedGhz, latvecfma128wrapper));
printf("Scalar FMA per clk> %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, scalarfmawrapper));
printf("Scalar FMA latency> %.2f clocks\n", 1 / measureFunction(iterationsHigh, clockSpeedGhz, latscalarfmawrapper));
printf("1:1 mixed 128-bit vector FMA/FADD per clk> %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, mixvecfaddfma128wrapper));
printf("1:1 mixed 128-bit vector FMA/FMUL per clk> %.2f\n", measureFunction(iterationsHigh, clockSpeedGhz, mixvecfmulfma128wrapper));
return 0;
}

Expand Down
153 changes: 153 additions & 0 deletions instructionrate/arm_instructionrate.s
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

.global clktest
.global addtest
.global eortest
.global maddaddtest
.global cmptest
.global addmultest
.global addmul21test
.global mixaddjmp21test
Expand Down Expand Up @@ -192,8 +195,158 @@ addtest_loop:
ldp x12, x13, [sp, #0x20]
ldp x14, x15, [sp, #0x10]
add sp, sp, #0x50
ret

maddaddtest:
sub sp, sp, #0x50
stp x14, x15, [sp, #0x10]
stp x12, x13, [sp, #0x20]
stp x10, x11, [sp, #0x30]
stp x8, x9, [sp, #0x40]
mov x15, 1
mov x14, 20
eor x13, x13, x13
eor x12, x12, x12
eor x11, x11, x11
mov x10, 2
eor x9, x9, x9
mov x8, 3
maddaddtest_loop:
add x13, x13, x15
add x12, x12, x15
add x11, x11, x15
madd x10, x8, x0, x15
add x13, x13, x15
add x12, x12, x15
add x11, x11, x15
madd x10, x8, x0, x15
add x13, x13, x15
add x12, x12, x15
add x11, x11, x15
madd x10, x8, x0, x15
add x13, x13, x15
add x12, x12, x15
add x11, x11, x15
madd x10, x8, x0, x15
add x13, x13, x15
add x12, x12, x15
add x11, x11, x15
madd x10, x8, x0, x15
sub x0, x0, x14
cbnz x0, maddaddtest_loop
ldp x8, x9, [sp, #0x40]
ldp x10, x11, [sp, #0x30]
ldp x12, x13, [sp, #0x20]
ldp x14, x15, [sp, #0x10]
add sp, sp, #0x50
ret

eortest:
sub sp, sp, #0x50
stp x14, x15, [sp, #0x10]
stp x12, x13, [sp, #0x20]
stp x10, x11, [sp, #0x30]
stp x8, x9, [sp, #0x40]
mov x15, 1
mov x14, 30
eor x13, x13, x13
eor x12, x12, x12
eor x11, x11, x11
eor x10, x10, x10
eor x9, x9, x9
eortest_loop:
eor x13, x13, x15
eor x12, x12, x15
eor x11, x11, x15
eor x10, x10, x15
eor x9, x9, x15
eor x13, x13, x15
eor x12, x12, x15
eor x11, x11, x15
eor x10, x10, x15
eor x9, x9, x15
eor x13, x13, x15
eor x12, x12, x15
eor x11, x11, x15
eor x10, x10, x15
eor x9, x9, x15
eor x13, x13, x15
eor x12, x12, x15
eor x11, x11, x15
eor x10, x10, x15
eor x9, x9, x15
eor x13, x13, x15
eor x12, x12, x15
eor x11, x11, x15
eor x10, x10, x15
eor x9, x9, x15
eor x13, x13, x15
eor x12, x12, x15
eor x11, x11, x15
eor x10, x10, x15
eor x9, x9, x15
sub x0, x0, x14
cbnz x0, eortest_loop
ldp x8, x9, [sp, #0x40]
ldp x10, x11, [sp, #0x30]
ldp x12, x13, [sp, #0x20]
ldp x14, x15, [sp, #0x10]
add sp, sp, #0x50
ret

cmptest:
sub sp, sp, #0x50
stp x14, x15, [sp, #0x10]
stp x12, x13, [sp, #0x20]
stp x10, x11, [sp, #0x30]
stp x8, x9, [sp, #0x40]
mov x15, 1
mov x14, 30
eor x13, x13, x13
eor x12, x12, x12
eor x11, x11, x11
eor x10, x10, x10
eor x9, x9, x9
cmptest_loop:
cmp x13, x13
cmp x12, x12
cmp x11, x11
cmp x10, x10
cmp x9, x9
cmp x13, x13
cmp x12, x12
cmp x11, x11
cmp x10, x10
cmp x9, x9
cmp x13, x13
cmp x12, x12
cmp x11, x11
cmp x10, x10
cmp x9, x9
cmp x13, x13
cmp x12, x12
cmp x11, x11
cmp x10, x10
cmp x9, x9
cmp x13, x13
cmp x12, x12
cmp x11, x11
cmp x10, x10
cmp x9, x9
cmp x13, x13
cmp x12, x12
cmp x11, x11
cmp x10, x10
cmp x9, x9
sub x0, x0, x14
cbnz x0, cmptest_loop
ldp x8, x9, [sp, #0x40]
ldp x10, x11, [sp, #0x30]
ldp x12, x13, [sp, #0x20]
ldp x14, x15, [sp, #0x10]
add sp, sp, #0x50
ret

addmultest:
sub sp, sp, #0x50
stp x14, x15, [sp, #0x10]
Expand Down

0 comments on commit 06c7f86

Please sign in to comment.