diff --git a/CoreClockChecker/BoostClockChecker_arm.s b/CoreClockChecker/BoostClockChecker_arm.s index 1fdd912..ba6dac5 100644 --- a/CoreClockChecker/BoostClockChecker_arm.s +++ b/CoreClockChecker/BoostClockChecker_arm.s @@ -1,7 +1,12 @@ .text .global clktsctest +.global _clktsctest + +.balign 4 + /* x0 = iterations, return elapsed TSC in x0 */ +_clktsctest: clktsctest: sub sp, sp, #0x40 stp x10, x11, [sp, #0x10] diff --git a/GpuMemLatency/instruction_rate.c b/GpuMemLatency/instruction_rate.c index 2c3c264..6aa6b0d 100644 --- a/GpuMemLatency/instruction_rate.c +++ b/GpuMemLatency/instruction_rate.c @@ -214,6 +214,8 @@ float instruction_rate_test(cl_context context, float4_element_count, a_mem_obj, result_obj, A, result, opsPerIteration); fprintf(stderr, "INT8 G Multiplies/sec: %f\n", int8_mul_rate); + short checkExtensionSupport(const char *extension_name); + if (checkExtensionSupport("cl_khr_fp64")) { fp64_instruction_rate_test(context, command_queue, thread_count, local_size, chase_iterations, float4_element_count, a_mem_obj, result_obj, A, result); @@ -462,4 +464,4 @@ float fp16_instruction_rate_test(cl_context context, fprintf(stderr, "FP16 G FMAs/sec: %f : %f FP16 GFLOPs\n", gOpsPerSec, gOpsPerSec * 2); return gOpsPerSec; -} +} diff --git a/MemoryBandwidth/MemoryBandwidth.c b/MemoryBandwidth/MemoryBandwidth.c index ee883a4..5a0b744 100644 --- a/MemoryBandwidth/MemoryBandwidth.c +++ b/MemoryBandwidth/MemoryBandwidth.c @@ -18,9 +18,10 @@ #include #include #include -#include #include + #ifdef NUMA +#include #include #endif @@ -39,7 +40,9 @@ typedef struct BandwidthTestThreadData { uint64_t start; float* arr; float bw; // written to by the thread + #ifdef NUMA cpu_set_t cpuset; // if numa set, will set affinity + #endif } BandwidthTestThreadData; float MeasureBw(uint64_t sizeKb, uint64_t iterations, uint64_t threads, int shared, int nopBytes, int coreNode, int memNode); diff --git a/MemoryBandwidth/MemoryBandwidth_arm.s b/MemoryBandwidth/MemoryBandwidth_arm.s index fad94bc..dcdc65c 100644 --- a/MemoryBandwidth/MemoryBandwidth_arm.s +++ b/MemoryBandwidth/MemoryBandwidth_arm.s @@ -18,6 +18,8 @@ .global _flush_icache .global _readbankconflict +.balign 4 + /* x0 = ptr to array (was rcx) * x1 = arr length (was rdx) * x2 = iterations (was r8) diff --git a/MemoryLatency/MemoryLatency.c b/MemoryLatency/MemoryLatency.c index cc5fa8f..7acb38a 100644 --- a/MemoryLatency/MemoryLatency.c +++ b/MemoryLatency/MemoryLatency.c @@ -17,6 +17,7 @@ #include #include #endif + #include #include @@ -204,7 +205,7 @@ int main(int argc, char* argv[]) { fprintf(stderr, "Usage: [-test ] [-maxsizemb ] [-iter