Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
clamchowder committed Oct 11, 2024
2 parents 037b35e + fd381a0 commit 21218b0
Show file tree
Hide file tree
Showing 6 changed files with 54 additions and 7 deletions.
4 changes: 4 additions & 0 deletions InstructionRate/x86_instructionrate.c
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ extern uint64_t aesencadd128(uint64_t iterations) __attribute((sysv_abi));
extern uint64_t aesencfma128(uint64_t iterations) __attribute((sysv_abi));
extern uint64_t aesencmul128(uint64_t iterations) __attribute((sysv_abi));
extern uint64_t mix256faddintadd(uint64_t iterations) __attribute((sysv_abi));
extern uint64_t movqtoxmmtest(uint64_t iterations) __attribute((sysv_abi));

extern uint64_t fma4_256(uint64_t iterations) __attribute((sysv_abi));
extern uint64_t fma4_128(uint64_t iterations) __attribute((sysv_abi));
Expand Down Expand Up @@ -448,6 +449,9 @@ int main(int argc, char *argv[]) {
if (avx2Supported && (testName == NULL || argc > 1 && strncmp(argv[1], "fmul256", 6) == 0))
printf("256-bit FMUL per clk: %.2f\n", measureFunction(iterations, clockSpeedGhz, mul256fp));

if (testName == NULL || argc > 1 && strncmp(argv[1], "movqtoxmm", 9) == 0)
printf("MOVQ GPR <-> XMM: %.2f clocks\n", 1 / measureFunction(iterations, clockSpeedGhz, movqtoxmmtest));

// integer multiply. zhaoxin appears to handle 16-bit and 64-bit multiplies differntly
// unlike Intel/AMD CPUs that behave similarly regardless of register width
if (testName == NULL || argc > 1 && strncmp(argv[1], "latmul16", 8) == 0)
Expand Down
37 changes: 36 additions & 1 deletion InstructionRate/x86_instructionrate.s
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@
.global add128int
.global mul128int
.global mix256faddintadd
.global movqtoxmmtest

.global pdeptest
.global pexttest
Expand Down Expand Up @@ -5230,7 +5231,6 @@ fdivtest_loop:
sub %r9, %rdi
jnz fdivtest_loop
movq %xmm1, %rax
vzeroupper
pop %r8
pop %r9
ret
Expand Down Expand Up @@ -5365,3 +5365,38 @@ fmuldenormtest_loop:
pop %r8
pop %r9
ret

movqtoxmmtest:
push %r9
push %r8
push %r10
mov $20, %r9
mov $123, %r10
movqtoxmmtest_loop:
movq %r10, %xmm1
movq %xmm1, %r10
movq %r10, %xmm1
movq %xmm1, %r10
movq %r10, %xmm1
movq %xmm1, %r10
movq %r10, %xmm1
movq %xmm1, %r10
movq %r10, %xmm1
movq %xmm1, %r10
movq %r10, %xmm1
movq %xmm1, %r10
movq %r10, %xmm1
movq %xmm1, %r10
movq %r10, %xmm1
movq %xmm1, %r10
movq %r10, %xmm1
movq %xmm1, %r10
movq %r10, %xmm1
movq %xmm1, %r10
sub %r9, %rdi
jnz movqtoxmmtest_loop
movq %xmm1, %rax
pop %r10
pop %r8
pop %r9
ret
8 changes: 8 additions & 0 deletions LoadedMemoryLatency/LoadedMemoryLatency.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,14 @@ int main(int argc, char *argv[]) {
int coreCount = get_nprocs();
int latencyCore = 0;
int *customCores = NULL;
if (argc == 1) {
fprintf(stderr, "Options:\n");
fprintf(stderr, "-bwthreads [int]: Number of bandwidth test threads\n");
fprintf(stderr, "-latencyaffinity [int]: Core to run latency test thread on\n");
fprintf(stderr, "-bwcores [comma separated list]: Cores to run bandwidth load on\n");
fprintf(stderr, "-scaleiterations [int]: Iterations scaling factor\n");
fprintf(stderr, "-throttle [int]: Reduce bandwidth load per bandwidth test thread\n");
}
for (int argIdx = 1; argIdx < argc; argIdx++) {
if (*(argv[argIdx]) == '-') {
char *arg = argv[argIdx] + 1;
Expand Down
4 changes: 2 additions & 2 deletions MemoryBandwidth/MemoryBandwidth.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@

#pragma GCC diagnostic ignored "-Wattributes"

int default_test_sizes[] = { 2, 4, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 512, 600, 768, 1024, 1536, 2048,
3072, 4096, 5120, 6144, 8192, 10240, 12288, 16384, 24567, 32768, 65536, 98304,
int default_test_sizes[] = { 2, 4, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 512, 600, 768, 1024, 1536, 2048, 2560,
3072, 4096, 5120, 6144, 8192, 10240, 12288, 14336, 15360, 16384, 18432, 20480, 24567, 32768, 65536, 98304,
131072, 262144, 393216, 524288, 1048576, 1572864, 2097152, 3145728 };

typedef struct BandwidthTestThreadData {
Expand Down
4 changes: 2 additions & 2 deletions MemoryBandwidth/MemoryBandwidth_x86.s
Original file line number Diff line number Diff line change
Expand Up @@ -797,7 +797,7 @@ repstosb_copy_pass_loop:
rep stosb
dec %r8
jnz repstosb_copy_pass_loop
movss (%r12), %xmm0
movss (%r13), %xmm0
pop %rdi
pop %rsi
pop %r12
Expand All @@ -823,7 +823,7 @@ repstosd_copy_pass_loop:
rep stosl
dec %r8
jnz repstosd_copy_pass_loop
movss (%r12), %xmm0
movss (%r13), %xmm0
pop %rdi
pop %rsi
pop %r12
Expand Down
4 changes: 2 additions & 2 deletions MemoryLatency/MemoryLatency.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@
#define PAGE_SIZE 4096
#define CACHELINE_SIZE 64

int default_test_sizes[] = { 2, 4, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 384, 512, 600, 768, 1024, 1536, 2048,
3072, 4096, 5120, 6144, 8192, 10240, 12288, 16384, 24567, 32768, 65536, 98304,
int default_test_sizes[] = { 2, 4, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 384, 512, 600, 768, 1024, 1536, 2048, 2304, 2560,
3072, 4096, 5120, 6144, 8192, 10240, 12288, 13312, 14336, 15360, 16384, 18432, 20480, 24567, 32768, 65536, 98304,
131072, 262144, 393216, 524288, 1048576 }; //2097152 };

#ifdef __x86_64
Expand Down

0 comments on commit 21218b0

Please sign in to comment.