diff --git a/mt_instructionrate/Makefile b/mt_instructionrate/Makefile index 6f083d0..d24b4d6 100644 --- a/mt_instructionrate/Makefile +++ b/mt_instructionrate/Makefile @@ -1,2 +1,4 @@ +x86: + gcc -pthread mt_instructionrate.c x86_mt_instructionrate.s ../Common/timing.c -o x86_mt_instructionrate aarch64: gcc -pthread mt_instructionrate.c arm_mt_instructionrate.s ../Common/timing.c -o arm_mt_instructionrate diff --git a/mt_instructionrate/mt_instructionrate.c b/mt_instructionrate/mt_instructionrate.c index 73f2848..ac1f0ac 100644 --- a/mt_instructionrate/mt_instructionrate.c +++ b/mt_instructionrate/mt_instructionrate.c @@ -5,18 +5,22 @@ #include #include #include +#include +#include #include "../Common/timing.h" +#define gettid() ((pid_t)syscall(SYS_gettid)) + struct TestThreadData { float timeMs; // written by thread to indicate elapsed runtime for that thread uint64_t iterations; void *testData; int core; // -1 = don't set affinity. otherwise set affinity to specified core - uint64_t (*testfunc)(uint64_t, void *); + uint64_t (*testfunc)(uint64_t, void *) __attribute__((ms_abi)); }; -float measureFunction(uint64_t baseIterations, uint64_t (*testFunc)(uint64_t, void *), void *data); +float measureFunction(uint64_t baseIterations, uint64_t (*testFunc)(uint64_t, void *) __attribute((ms_abi)), void *data); void *TestThread(void *param); int threadCount = 1; @@ -26,6 +30,10 @@ int *coreList = NULL; #include "arm_mt_instructionrate.c" #endif +#ifdef __x86_64 +#include "x86_mt_instructionrate.c" +#endif + int main(int argc, char *argv[]) { char parseBuffer[512]; int parseIndices[64]; @@ -75,7 +83,7 @@ int main(int argc, char *argv[]) { // return billion operations per second // test function must perform iterations ops -float measureFunction(uint64_t baseIterations, uint64_t (*testFunc)(uint64_t, void *), void *data){ +float measureFunction(uint64_t baseIterations, uint64_t (*testFunc)(uint64_t, void *) __attribute__((ms_abi)), void *data){ int toleranceMet = 0, minTimeMet = 0; unsigned int timeMs; pthread_t *testThreads = (pthread_t *)malloc(threadCount * sizeof(pthread_t)); diff --git a/mt_instructionrate/x86_mt_instructionrate b/mt_instructionrate/x86_mt_instructionrate new file mode 100755 index 0000000..24316df Binary files /dev/null and b/mt_instructionrate/x86_mt_instructionrate differ diff --git a/mt_instructionrate/x86_mt_instructionrate.c b/mt_instructionrate/x86_mt_instructionrate.c new file mode 100644 index 0000000..2847538 --- /dev/null +++ b/mt_instructionrate/x86_mt_instructionrate.c @@ -0,0 +1,22 @@ +extern uint64_t sse_int32_add_test(uint64_t iterations, void *data) __attribute((ms_abi)); + +void RunTests() { + uint64_t iterations = 5500000000; + int testDataLength = 256; + uint32_t *intTestArr = (uint32_t *)malloc(sizeof(uint32_t) * testDataLength); + uint32_t *fpTestArr = (uint32_t *)malloc(sizeof(uint32_t) * testDataLength); + for (int i = 0; i < testDataLength; i++) { + intTestArr[i] = i; + fpTestArr[i] = i * 1.2f; + } + + fprintf(stderr, "Measuring INT32 adds with SSE\n"); + float sseInt32Adds = measureFunction(iterations, sse_int32_add_test, intTestArr); + + printf("-----GOPS/s-----\n"); + printf("SSE INT32 Adds: %f\n", sseInt32Adds); + + free(intTestArr); + free(fpTestArr); + return; +} diff --git a/mt_instructionrate/x86_mt_instructionrate.s b/mt_instructionrate/x86_mt_instructionrate.s new file mode 100644 index 0000000..6b075b7 --- /dev/null +++ b/mt_instructionrate/x86_mt_instructionrate.s @@ -0,0 +1,23 @@ +.text + +.global sse_int32_add_test + +/* rcx = iteration count, rdx = data */ +sse_int32_add_test: + movups (%rdx), %xmm0 + movups 16(%rdx), %xmm1 + movups 32(%rdx), %xmm2 + movups 48(%rdx), %xmm3 + movups 64(%rdx), %xmm4 + movups 72(%rdx), %xmm5 +sse_int32_add_test_loop: + paddd %xmm0, %xmm0 + paddd %xmm1, %xmm1 + paddd %xmm2, %xmm2 + paddd %xmm3, %xmm3 + paddd %xmm4, %xmm4 + paddd %xmm5, %xmm5 + sub $24, %rcx + cmp $0, %rcx + jg sse_int32_add_test_loop + ret