Skip to content

Commit

Permalink
ppc64 mt instructiornate
Browse files Browse the repository at this point in the history
  • Loading branch information
clamchowder committed Jan 18, 2024
1 parent 58b5703 commit 74708dd
Show file tree
Hide file tree
Showing 6 changed files with 271 additions and 1 deletion.
2 changes: 2 additions & 0 deletions mt_instructionrate/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,5 @@ x86:
gcc -pthread -masm=intel x86_mt_instructionrate.s mt_instructionrate.c ../Common/timing.c -o x86_mt_instructionrate
aarch64:
gcc -pthread mt_instructionrate.c arm_mt_instructionrate.s ../Common/timing.c -o arm_mt_instructionrate
ppc64:
gcc -pthread -mregnames mt_instructionrate.c ppc64_mt_instructionrate.s ../Common/timing.c -o ppc64_mt_instructionrate
Binary file removed mt_instructionrate/arm_mt_instructionrate
Binary file not shown.
6 changes: 5 additions & 1 deletion mt_instructionrate/mt_instructionrate.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,11 @@ int *coreList = NULL;
#include "x86_mt_instructionrate.c"
#endif


#ifdef __PPC64__
#include "ppc64_mt_instructionrate.c"
#endif

int main(int argc, char *argv[]) {
char parseBuffer[512];
int parseIndices[64];
Expand Down Expand Up @@ -111,7 +116,6 @@ float measureFunction(uint64_t baseIterations, uint64_t (*testFunc)(uint64_t, vo
pthread_t* testThreads = (pthread_t*)malloc(threadCount * sizeof(pthread_t));
#else
HANDLE* testThreads = (HANDLE*)malloc(threadCount * sizeof(HANDLE));
DWORD* tids = (DWORD*)malloc(threadCount * sizeof(DWORD));
#endif

do {
Expand Down
40 changes: 40 additions & 0 deletions mt_instructionrate/ppc64_mt_instructionrate.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
extern uint64_t vec_int32_add_test(uint64_t iterations, void *data);
extern uint64_t vec_int32_mul_test(uint64_t iterations, void *data);
extern uint64_t vec_fp32_add_test(uint64_t iterations, void *data);
extern uint64_t vec_fp32_fma_test(uint64_t iterations, void *data);
extern uint64_t vec_fp32_isqrt_test(uint64_t iterations, void *data);
extern uint64_t fp64_add_test(uint64_t iterations, void *data);
extern uint64_t fp64_fma_test(uint64_t iterations, void *data);

void RunTests() {
uint64_t iterations = 3500000000;
int testDataLength = 256;
uint32_t *intTestArr = (uint32_t *)malloc(sizeof(uint32_t) * testDataLength);
uint32_t *fpTestArr = (uint32_t *)malloc(sizeof(uint32_t) * testDataLength);
for (int i = 0; i < testDataLength; i++) {
intTestArr[i] = i;
fpTestArr[i] = i * 1.2f;
}

fprintf(stderr, "Measuring INT32 adds\n");
float int32adds = measureFunction(iterations, vec_int32_add_test, intTestArr);
float int32muls = measureFunction(iterations, vec_int32_mul_test, intTestArr);
float fp32adds = measureFunction(iterations, vec_fp32_add_test, fpTestArr);
float fp32fmas = measureFunction(iterations, vec_fp32_fma_test, fpTestArr);
float fp32isqrt = measureFunction(iterations, vec_fp32_isqrt_test, fpTestArr);
float fp64adds = measureFunction(iterations, fp64_add_test, fpTestArr);
float fp64fmas = measureFunction(iterations, fp64_fma_test, fpTestArr);

printf("-----GOPS/s-----\n");
printf("Altivec INT32 Add: %f\n", int32adds);
printf("Altivec INT32 Multiply: %f\n", int32muls);
printf("Altivec FP32 Add: %f\n", fp32adds);
printf("Altivec FP32 FMA: %f (%f GFLOPS)\n", fp32fmas, 2 * fp32fmas);
printf("Altivec FP32 Inverse Square Root: %f\n", fp32isqrt);
printf("FP64 Add: %f\n", fp64adds);
printf("FP64 FMA: %f (%f GFLOPS)\n", fp64fmas, 2 * fp64fmas);

free(intTestArr);
free(fpTestArr);
return;
}
224 changes: 224 additions & 0 deletions mt_instructionrate/ppc64_mt_instructionrate.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,224 @@
.text

.global vec_int32_add_test
.global vec_int32_mul_test
.global vec_fp32_add_test
.global vec_fp32_fma_test
.global vec_fp32_isqrt_test
.global fp64_add_test
.global fp64_fma_test

/* r3 = iterations, r4 = ptr to arr */
vec_int32_add_test:
.quad .L.vec_int32_add_test,.TOC.@tocbase,0
.L.vec_int32_add_test:
li r9, 0
lvx v0, r4, r9
li r9, 16
lvx v1, r4, r9
li r9, 32
lvx v2, r4, r9
li r9, 48
lvx v3, r4, r9
li r9, 64
lvx v4, r4, r9
li r9, 80
lvx v5, r4, r9
li r9, 0
vec_int32_add_test_loop:
vadduwm v0, v0, v0
vadduwm v1, v1, v1
vadduwm v2, v2, v2
vadduwm v3, v3, v3
vadduwm v4, v4, v4
vadduwm v5, v4, v4
addi r9, r9, 24
cmpld cr7, r3, r9
bgt cr7, vec_int32_add_test_loop
blr

vec_int32_mul_test:
.quad .L.vec_int32_mul_test,.TOC.@tocbase,0
.L.vec_int32_mul_test:
li r9, 0
lvx v0, r4, r9
li r9, 16
lvx v1, r4, r9
li r9, 32
lvx v2, r4, r9
li r9, 48
lvx v3, r4, r9
li r9, 64
lvx v4, r4, r9
li r9, 80
lvx v5, r4, r9
li r9, 96
lvx v6, r4, r9
li r9, 128
lvx v7, r4, r9
li r9, 0
vec_int32_mul_test_loop:
vmuleuh v0, v0, v0
vmuleuh v1, v1, v1
vmuleuh v2, v2, v2
vmuleuh v3, v3, v3
vmuleuh v4, v4, v4
vmuleuh v5, v5, v5
vmuleuh v6, v6, v6
vmuleuh v7, v7, v7
addi r9, r9, 32
cmpld cr7, r3, r9
bgt cr7, vec_int32_mul_test_loop
blr

vec_fp32_add_test:
.quad .L.vec_fp32_add_test,.TOC.@tocbase,0
.L.vec_fp32_add_test:
li r9, 0
lvx v0, r4, r9
li r9, 16
lvx v1, r4, r9
li r9, 32
lvx v2, r4, r9
li r9, 48
lvx v3, r4, r9
li r9, 64
lvx v4, r4, r9
li r9, 80
lvx v5, r4, r9
li r9, 96
lvx v6, r4, r9
li r9, 128
lvx v7, r4, r9
li r9, 0
vec_fp32_add_test_loop:
vaddfp v0, v0, v0
vaddfp v1, v1, v1
vaddfp v2, v2, v2
vaddfp v3, v3, v3
vaddfp v4, v4, v4
vaddfp v5, v5, v5
vaddfp v6, v6, v6
vaddfp v7, v7, v7
addi r9, r9, 32
cmpld cr7, r3, r9
bgt cr7, vec_fp32_add_test_loop
blr

vec_fp32_fma_test:
.quad .L.vec_fp32_fma_test,.TOC.@tocbase,0
.L.vec_fp32_fma_test:
li r9, 0
lvx v0, r4, r9
li r9, 16
lvx v1, r4, r9
li r9, 32
lvx v2, r4, r9
li r9, 48
lvx v3, r4, r9
li r9, 64
lvx v4, r4, r9
li r9, 80
lvx v5, r4, r9
li r9, 96
lvx v6, r4, r9
li r9, 128
lvx v7, r4, r9
li r9, 0
vec_fp32_fma_test_loop:
vmaddfp v0, v0, v0, v0
vmaddfp v1, v1, v1, v1
vmaddfp v2, v2, v2, v2
vmaddfp v3, v3, v3, v3
vmaddfp v4, v4, v4, v4
vmaddfp v5, v5, v5, v5
vmaddfp v6, v6, v6, v6
vmaddfp v7, v7, v7, v7
addi r9, r9, 32
cmpld cr7, r3, r9
bgt cr7, vec_fp32_add_test_loop
blr

vec_fp32_isqrt_test:
.quad .L.vec_fp32_isqrt_test,.TOC.@tocbase,0
.L.vec_fp32_isqrt_test:
li r9, 0
lvx v0, r4, r9
li r9, 16
lvx v1, r4, r9
li r9, 32
lvx v2, r4, r9
li r9, 48
lvx v3, r4, r9
li r9, 64
lvx v4, r4, r9
li r9, 80
lvx v5, r4, r9
li r9, 96
lvx v6, r4, r9
li r9, 128
lvx v7, r4, r9
li r9, 0
vec_fp32_isqrt_test_loop:
vrsqrtefp v0, v0
vrsqrtefp v1, v1
vrsqrtefp v2, v2
vrsqrtefp v3, v3
vrsqrtefp v4, v4
vrsqrtefp v5, v5
vrsqrtefp v6, v6
vrsqrtefp v7, v7
addi r9, r9, 32
cmpld cr7, r3, r9
bgt cr7, vec_fp32_isqrt_test_loop
blr

fp64_add_test:
.quad .L.fp64_add_test,.TOC.@tocbase,0
.L.fp64_add_test:
lfd f0, 0(r4)
lfd f1, 8(r4)
lfd f2, 16(r4)
lfd f3, 24(r4)
lfd f4, 32(r4)
lfd f5, 40(r4)
lfd f6, 48(r4)
lfd f7, 56(r4)
fp64_add_test_loop:
fadd f0, f0, f0
fadd f1, f1, f1
fadd f2, f2, f2
fadd f3, f3, f3
fadd f4, f4, f4
fadd f5, f5, f5
fadd f6, f6, f6
fadd f7, f7, f7
addi r9, r9, 8
cmpld cr7, r3, r9
bgt cr7, fp64_add_test_loop
blr

fp64_fma_test:
.quad .L.fp64_fma_test,.TOC.@tocbase,0
.L.fp64_fma_test:
lfd f0, 0(r4)
lfd f1, 8(r4)
lfd f2, 16(r4)
lfd f3, 24(r4)
lfd f4, 32(r4)
lfd f5, 40(r4)
lfd f6, 48(r4)
lfd f7, 56(r4)
fp64_fma_test_loop:
fmadd f0, f0, f0, f0
fmadd f1, f1, f1, f1
fmadd f2, f2, f2, f2
fmadd f3, f3, f3, f3
fmadd f4, f4, f4, f4
fmadd f5, f5, f5, f5
fmadd f6, f6, f6, f6
fmadd f7, f7, f7, f7
addi r9, r9, 8
cmpld cr7, r3, r9
bgt cr7, fp64_fma_test_loop
blr
Binary file removed mt_instructionrate/x86_mt_instructionrate
Binary file not shown.

0 comments on commit 74708dd

Please sign in to comment.