Skip to content

Commit

Permalink
Merge pull request #6 from dingobits/darwin
Browse files Browse the repository at this point in the history
Fix building issues on Darwin
  • Loading branch information
clamchowder authored Sep 18, 2023
2 parents 06c7f86 + 2438048 commit 28985e9
Show file tree
Hide file tree
Showing 8 changed files with 155 additions and 9 deletions.
5 changes: 5 additions & 0 deletions CoreClockChecker/BoostClockChecker_arm.s
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
.text
.global clktsctest

.global _clktsctest

.balign 4

/* x0 = iterations, return elapsed TSC in x0 */
_clktsctest:
clktsctest:
sub sp, sp, #0x40
stp x10, x11, [sp, #0x10]
Expand Down
4 changes: 3 additions & 1 deletion GpuMemLatency/instruction_rate.c
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,8 @@ float instruction_rate_test(cl_context context,
float4_element_count, a_mem_obj, result_obj, A, result, opsPerIteration);
fprintf(stderr, "INT8 G Multiplies/sec: %f\n", int8_mul_rate);

short checkExtensionSupport(const char *extension_name);

if (checkExtensionSupport("cl_khr_fp64")) {
fp64_instruction_rate_test(context, command_queue, thread_count, local_size, chase_iterations, float4_element_count,
a_mem_obj, result_obj, A, result);
Expand Down Expand Up @@ -462,4 +464,4 @@ float fp16_instruction_rate_test(cl_context context,
fprintf(stderr, "FP16 G FMAs/sec: %f : %f FP16 GFLOPs\n", gOpsPerSec, gOpsPerSec * 2);

return gOpsPerSec;
}
}
5 changes: 4 additions & 1 deletion MemoryBandwidth/MemoryBandwidth.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,10 @@
#include <sched.h>
#include <math.h>
#include <sys/mman.h>
#include <sys/sysinfo.h>
#include <errno.h>

#ifdef NUMA
#include <sys/sysinfo.h>
#include <numa.h>
#endif

Expand All @@ -39,7 +40,9 @@ typedef struct BandwidthTestThreadData {
uint64_t start;
float* arr;
float bw; // written to by the thread
#ifdef NUMA
cpu_set_t cpuset; // if numa set, will set affinity
#endif
} BandwidthTestThreadData;

float MeasureBw(uint64_t sizeKb, uint64_t iterations, uint64_t threads, int shared, int nopBytes, int coreNode, int memNode);
Expand Down
2 changes: 2 additions & 0 deletions MemoryBandwidth/MemoryBandwidth_arm.s
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
.global _flush_icache
.global _readbankconflict

.balign 4

/* x0 = ptr to array (was rcx)
* x1 = arr length (was rdx)
* x2 = iterations (was r8)
Expand Down
10 changes: 6 additions & 4 deletions MemoryLatency/MemoryLatency.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <numaif.h>
#include <sys/sysinfo.h>
#endif

#include <errno.h>
#include <sched.h>

Expand Down Expand Up @@ -204,7 +205,7 @@ int main(int argc, char* argv[]) {
fprintf(stderr, "Usage: [-test <c/asm/tlb/mlp>] [-maxsizemb <max test size in MB>] [-iter <base iterations, default 100000000]\n");
}

#ifndef __MINGW32__
#ifdef __linux__
if (hugePages) {
size_t hugePageSize = 1 << 21;
size_t testSizeKb = singleSize ? singleSize : default_test_sizes[testSizeCount - 1];
Expand Down Expand Up @@ -595,10 +596,11 @@ float RunAsmTest(uint32_t size_kb, uint32_t iterations, uint32_t *preallocatedAr
// Run test
gettimeofday(&startTv, &startTz);
#ifdef LONGPATTERN
if (longpattern) sum = longpatternlatencytest(scaled_iterations, A);
else
#endif
if (longpattern)
sum = longpatternlatencytest(scaled_iterations, A);
else
sum = latencytest(scaled_iterations, A);
#endif
gettimeofday(&endTv, &endTz);
uint64_t time_diff_ms = 1000 * (endTv.tv_sec - startTv.tv_sec) + ((endTv.tv_usec - startTv.tv_usec) / 1000);
float latency = 1e6 * (float)time_diff_ms / (float)scaled_iterations;
Expand Down
17 changes: 17 additions & 0 deletions MemoryLatency/MemoryLatency_arm.s
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,20 @@
.global stlftest128
.global matchedstlftest

.global _latencytest
.global _longpatternlatencytest
.global _preplatencyarr
.global _stlftest
.global _stlftest32
.global _stlftest128
.global _matchedstlftest

.balign 4

/* x0 = ptr to arr
x1 = arr len
convert values in array from array indexes to pointers */
_preplatencyarr:
preplatencyarr:
sub sp, sp, #0x20
stp x14, x15, [sp, #0x10]
Expand All @@ -30,6 +41,7 @@ preplatencyarr_loop:
/* x0 = iteration count
x1 = ptr to arr
do pointer chasing for specified iteration count */
_latencytest:
latencytest:
sub sp, sp, #0x20
stp x14, x15, [sp, #0x10]
Expand All @@ -49,6 +61,7 @@ latencytest_loop:
x1 = ptr to arr
do pointer chasing with longer pattern, given different patterns
within each cacheline */
_longpatternlatencytest:
longpatternlatencytest:
sub sp, sp, #0x50
stp x14, x15, [sp, #0x10]
Expand Down Expand Up @@ -86,6 +99,7 @@ longpatternlatencytest_loop_inc:

/* x0 = iteration count
x1 = ptr to arr. first 32-bit int = store offset, second = load offset */
_stlftest:
stlftest:
sub sp, sp, #0x40
stp x14, x15, [sp, #0x10]
Expand Down Expand Up @@ -114,6 +128,7 @@ stlftest_loop:
add sp, sp, #0x40
ret

_stlftest32:
stlftest32:
sub sp, sp, #0x40
stp x14, x15, [sp, #0x10]
Expand Down Expand Up @@ -144,6 +159,7 @@ stlftest32_loop:

/* x0 = iteration count
x1 = ptr to arr. first 32-bit int = store offset, second = load offset */
_stlftest128:
stlftest128:
sub sp, sp, #0x40
stp x14, x15, [sp, #0x10]
Expand Down Expand Up @@ -172,6 +188,7 @@ stlftest128_loop:
add sp, sp, #0x40
ret

_matchedstlftest:
matchedstlftest:
sub sp, sp, #0x40
stp x14, x15, [sp, #0x10]
Expand Down
Binary file removed instructionrate/arm_instructionrate
Binary file not shown.
Loading

0 comments on commit 28985e9

Please sign in to comment.