From 82d9068d394496695befdec5adf8015e8f8b2faa Mon Sep 17 00:00:00 2001 From: Azim Afroozeh Date: Wed, 11 Sep 2024 22:09:58 +0200 Subject: [PATCH] no benchmark --- CMakeLists.txt | 5 - fls_bench/fls_bench.hpp | 2262 ----------- fls_bench/google/benchmark/LICENSE | 202 - ...allback_scalar_aav_1024_uf1_rsum_bench.cpp | 116 - ...lback_scalar_aav_1024_uf1_unffor_bench.cpp | 3367 ----------------- .../fallback/scalar_aav_uf1/ffor.cmake | 2 - .../fallback/scalar_aav_uf1/pack.cmake | 2 - .../fallback/scalar_aav_uf1/rsum.cmake | 23 +- .../fallback/scalar_aav_uf1/transpose.cmake | 23 +- .../fallback/scalar_aav_uf1/unffor.cmake | 23 +- .../fallback/scalar_aav_uf1/unpack.cmake | 23 +- .../fallback/scalar_aav_uf1/unrsum.cmake | 25 +- .../fallback/scalar_aav_uf1/untranspose.cmake | 13 +- 13 files changed, 38 insertions(+), 6048 deletions(-) delete mode 100644 fls_bench/fls_bench.hpp delete mode 100644 fls_bench/google/benchmark/LICENSE delete mode 100644 primitives/fls_generated/fallback/scalar_aav_uf1/fallback_scalar_aav_1024_uf1_rsum_bench.cpp delete mode 100644 primitives/fls_generated/fallback/scalar_aav_uf1/fallback_scalar_aav_1024_uf1_unffor_bench.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 1a0495b..de150c7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -154,11 +154,6 @@ endif () if (FLS_BUILD_BENCHMARKING) message("---------------------------------------------------------------------------------------------------------") message("- Benchmark:") - if (NOT DEFINED CMAKE_TOOLCHAIN_FILE) - message(FATAL_ERROR "CMAKE_TOOLCHAIN_FILE is not set") - endif (NOT DEFINED CMAKE_TOOLCHAIN_FILE) - - configure_file(${CMAKE_SOURCE_DIR}/fls_bench/fls_bench.hpp ${CMAKE_CURRENT_BINARY_DIR}/fls_bench.hpp) include_directories(${CMAKE_CURRENT_BINARY_DIR}) include_directories(${CMAKE_SOURCE_DIR}/benchmark/include) diff --git a/fls_bench/fls_bench.hpp b/fls_bench/fls_bench.hpp deleted file mode 100644 index afb7ae2..0000000 --- a/fls_bench/fls_bench.hpp +++ /dev/null @@ -1,2262 +0,0 @@ -#ifndef FASTLANES_COMPRESSION_FLS_BENCH_FLS_BENCH_HPP -#define FASTLANES_COMPRESSION_FLS_BENCH_FLS_BENCH_HPP - -#pragma clang diagnostic ignored "-Wconversion" - - -/* - * The M1 cycle counter is from Lemire repo. https://github.com/lemire/Code-used-on-Daniel-Lemire-s-blog/tree/master/2021/03/24 - * The other parts are from google benchmark repo, edited heavily. todo -> add the link - */ -#include -#include -#include -#include -#include -#include // for errno -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include // for memset -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include // for ioctl -#include -#include // for syscall -#include -#include - -#if defined(__linux__) -#include // for __NR_perf_event_open -#include // for perf event constants -#endif -/*---------------------------------------------------------------------------------------------------------------------\ - * Macros: -\---------------------------------------------------------------------------------------------------------------------*/ -// The _MSVC_LANG check should detect Visual Studio 2015 Update 3 and newer. -#if __cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L) -#define BENCHMARK_HAS_CXX11 -#endif - -// This _MSC_VER check should detect VS 2017 v15.3 and newer. -#if __cplusplus >= 201703L || (defined(_MSC_VER) && _MSC_VER >= 1911 && _MSVC_LANG >= 201703L) -#define BENCHMARK_HAS_CXX17 -#endif - -#if defined(BENCHMARK_HAS_CXX11) -#include -#include -#include -#endif - -#if defined(_MSC_VER) -#include // for _ReadWriteBarrier -#endif - -#ifndef BENCHMARK_HAS_CXX11 -#define BENCHMARK_DISALLOW_COPY_AND_ASSIGN(TypeName) \ - TypeName(const TypeName&); \ - TypeName& operator=(const TypeName&) -#else -#define BENCHMARK_DISALLOW_COPY_AND_ASSIGN(TypeName) \ - TypeName(const TypeName&) = delete; \ - TypeName& operator=(const TypeName&) = delete -#endif - -#ifdef BENCHMARK_HAS_CXX17 -#define BENCHMARK_UNUSED FLS_BENCH_MAYBE_UNUSED -#elif defined(__GNUC__) || defined(__clang__) -#define BENCHMARK_UNUSED __attribute__((unused)) -#else -#define BENCHMARK_UNUSED -#endif - -#if defined(__GNUC__) || defined(__clang__) -#define BENCHMARK_ALWAYS_INLINE __attribute__((always_inline)) -#define BENCHMARK_NOEXCEPT noexcept -#define BENCHMARK_NOEXCEPT_OP(x) noexcept(x) -#elif defined(_MSC_VER) && !defined(__clang__) -#define BENCHMARK_ALWAYS_INLINE __forceinline -#if _MSC_VER >= 1900 -#define BENCHMARK_NOEXCEPT noexcept -#define BENCHMARK_NOEXCEPT_OP(x) noexcept(x) -#else -#define BENCHMARK_NOEXCEPT -#define BENCHMARK_NOEXCEPT_OP(x) -#endif -#define __func__ __FUNCTION__ -#else -#define BENCHMARK_ALWAYS_INLINE -#define BENCHMARK_NOEXCEPT -#define BENCHMARK_NOEXCEPT_OP(x) -#endif - -#define BENCHMARK_INTERNAL_TOSTRING2(x) #x -#define BENCHMARK_INTERNAL_TOSTRING(x) BENCHMARK_INTERNAL_TOSTRING2(x) - -#if defined(__GNUC__) || defined(__clang__) -#define BENCHMARK_BUILTIN_EXPECT(x, y) __builtin_expect(x, y) -#define BENCHMARK_DEPRECATED_MSG(msg) __attribute__((deprecated(msg))) -#else -#define BENCHMARK_BUILTIN_EXPECT(x, y) x -#define BENCHMARK_DEPRECATED_MSG(msg) -#define BENCHMARK_WARNING_MSG(msg) \ - __pragma(message(__FILE__ "(" BENCHMARK_INTERNAL_TOSTRING(__LINE__) ") : warning note: " msg)) -#endif - -#if defined(__GNUC__) && !defined(__clang__) -#define BENCHMARK_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) -#endif - -#ifndef __has_builtin -#define __has_builtin(x) 0 -#endif - -#if defined(__GNUC__) || __has_builtin(__builtin_unreachable) -#define BENCHMARK_UNREACHABLE() __builtin_unreachable() -#elif defined(_MSC_VER) -#define BENCHMARK_UNREACHABLE() __assume(false) -#else -#define BENCHMARK_UNREACHABLE() ((void)0) -#endif - -#ifdef BENCHMARK_HAS_CXX11 -#else -#endif - -// clang-format off - -#ifndef __has_feature - #define __has_feature(x) 0 -#endif - -#if defined(__clang__) - #if defined(__ibmxl__) - #if !defined(COMPILER_IBMXL) - #define COMPILER_IBMXL - #endif - #elif !defined(COMPILER_CLANG) - #define COMPILER_CLANG - #endif -#elif defined(_MSC_VER) - #if !defined(COMPILER_MSVC) - #define COMPILER_MSVC - #endif -#elif defined(__GNUC__) - #if !defined(COMPILER_GCC) - #define COMPILER_GCC - #endif -#endif - -#if __has_feature(cxx_attributes) - #define BENCHMARK_NORETURN [[noreturn]] -#elif defined(__GNUC__) - #define BENCHMARK_NORETURN __attribute__((noreturn)) -#elif defined(COMPILER_MSVC) - #define BENCHMARK_NORETURN __declspec(noreturn) -#else - #define BENCHMARK_NORETURN -#endif - -#if defined(__CYGWIN__) - #define BENCHMARK_OS_CYGWIN 1 -#elif defined(_WIN32) - #define BENCHMARK_OS_WINDOWS 1 - #if defined(__MINGW32__) - #define BENCHMARK_OS_MINGW 1 - #endif -#elif defined(__APPLE__) - #define BENCHMARK_OS_APPLE 1 - #include "TargetConditionals.h" - #if defined(TARGET_OS_MAC) - #define BENCHMARK_OS_MACOSX 1 - #if defined(TARGET_OS_IPHONE) - #define BENCHMARK_OS_IOS 1 - #endif - #endif -#elif defined(__FreeBSD__) - #define BENCHMARK_OS_FREEBSD 1 -#elif defined(__NetBSD__) - #define BENCHMARK_OS_NETBSD 1 -#elif defined(__OpenBSD__) - #define BENCHMARK_OS_OPENBSD 1 -#elif defined(__DragonFly__) - #define BENCHMARK_OS_DRAGONFLY 1 -#elif defined(__linux__) - #define BENCHMARK_OS_LINUX 1 -#elif defined(__native_client__) - #define BENCHMARK_OS_NACL 1 -#elif defined(__EMSCRIPTEN__) - #define BENCHMARK_OS_EMSCRIPTEN 1 -#elif defined(__rtems__) - #define BENCHMARK_OS_RTEMS 1 -#elif defined(__Fuchsia__) - #define BENCHMARK_OS_FUCHSIA 1 -#elif defined (__SVR4) && defined (__sun) - #define BENCHMARK_OS_SOLARIS 1 -#elif defined(__QNX__) - #define BENCHMARK_OS_QNX 1 -#elif defined(__MVS__) - #define BENCHMARK_OS_ZOS 1 -#endif - -#if defined(__ANDROID__) && defined(__GLIBCXX__) - #define BENCHMARK_STL_ANDROID_GNUSTL 1 -#endif - -#if !__has_feature(cxx_exceptions) && !defined(__cpp_exceptions) \ - && !defined(__EXCEPTIONS) - #define BENCHMARK_HAS_NO_EXCEPTIONS -#endif - -#if defined(COMPILER_CLANG) || defined(COMPILER_GCC) - #define FLS_BENCH_MAYBE_UNUSED __attribute__((unused)) -#else - #define BENCHMARK_MAYBE_UNUSED -#endif - -// clang-format on - -#ifdef BENCHMARK_OS_WINDOWS -#include -#undef StrCat // Don't let StrCat in string_util.h be renamed to lstrcatA -#include -#include -#include -#else -#include -#ifndef BENCHMARK_OS_FUCHSIA -#include -#endif -#include -#include // this header must be included before 'sys/sysctl.h' to avoid compilation error on FreeBSD -#include -#if defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_MACOSX || defined BENCHMARK_OS_NETBSD || \ - defined BENCHMARK_OS_OPENBSD || defined BENCHMARK_OS_DRAGONFLY -#define BENCHMARK_HAS_SYSCTL -#include -#endif -#endif -#if defined(BENCHMARK_OS_SOLARIS) -#include -#endif -#if defined(BENCHMARK_OS_QNX) -#include -#endif - -#if defined(__GNUC__) || defined(__clang__) -#define BENCHMARK_ALWAYS_INLINE __attribute__((always_inline)) -#define BENCHMARK_NOEXCEPT noexcept -#define BENCHMARK_NOEXCEPT_OP(x) noexcept(x) -#elif defined(_MSC_VER) && !defined(__clang__) -#define BENCHMARK_ALWAYS_INLINE __forceinline -#if _MSC_VER >= 1900 -#define BENCHMARK_NOEXCEPT noexcept -#define BENCHMARK_NOEXCEPT_OP(x) noexcept(x) -#else -#define BENCHMARK_NOEXCEPT -#define BENCHMARK_NOEXCEPT_OP(x) -#endif -#define __func__ __FUNCTION__ -#else -#define BENCHMARK_ALWAYS_INLINE -#define BENCHMARK_NOEXCEPT -#define BENCHMARK_NOEXCEPT_OP(x) -#endif - -#if defined(BENCHMARK_OS_MACOSX) -#include -#endif -// For MSVC, we want to use '_asm rdtsc' when possible (since it works -// with even ancient MSVC compilers), and when not possible the -// __rdtsc intrinsic, declared in . Unfortunately, in some -// environments, and have conflicting -// declarations of some other intrinsics, breaking compilation. -// Therefore, we simply declare __rdtsc ourselves. See also -// http://connect.microsoft.com/VisualStudio/feedback/details/262047 -#if defined(COMPILER_MSVC) && !defined(_M_IX86) && !defined(_M_ARM64) -extern "C" uint64_t __rdtsc(); -#pragma intrinsic(__rdtsc) -#endif - -#if !defined(BENCHMARK_OS_WINDOWS) || defined(BENCHMARK_OS_MINGW) -#include -#include -#endif - -#ifdef BENCHMARK_OS_EMSCRIPTEN -#include -#endif - -#ifdef __aarch64__ -#define KPERF_LIST \ - /* ret, name, params */ \ - F(int, kpc_get_counting, void) \ - F(int, kpc_force_all_ctrs_set, int) \ - F(int, kpc_set_counting, uint32_t) \ - F(int, kpc_set_thread_counting, uint32_t) \ - F(int, kpc_set_config, uint32_t, void*) \ - F(int, kpc_get_config, uint32_t, void*) \ - F(int, kpc_set_period, uint32_t, void*) \ - F(int, kpc_get_period, uint32_t, void*) \ - F(uint32_t, kpc_get_counter_count, uint32_t) \ - F(uint32_t, kpc_get_config_count, uint32_t) \ - F(int, kperf_sample_get, int*) \ - F(int, kpc_get_thread_counters, int, unsigned int, void*) - -#define F(ret, name, ...) \ - typedef ret name##proc(__VA_ARGS__); \ - static name##proc* name; -KPERF_LIST -#undef F - -#define CFGWORD_EL0A32EN_MASK (0x10000) -#define CFGWORD_EL0A64EN_MASK (0x20000) -#define CFGWORD_EL1EN_MASK (0x40000) -#define CFGWORD_EL3EN_MASK (0x80000) -#define CFGWORD_ALLMODES_MASK (0xf0000) - -#define CPMU_NONE 0 -#define CPMU_CORE_CYCLE 0x02 -#define CPMU_INST_A64 0x8c -#define CPMU_INST_BRANCH 0x8d -#define CPMU_SYNC_DC_LOAD_MISS 0xbf -#define CPMU_SYNC_DC_STORE_MISS 0xc0 -#define CPMU_SYNC_DTLB_MISS 0xc1 -#define CPMU_SYNC_ST_HIT_YNGR_LD 0xc4 -#define CPMU_SYNC_BR_ANY_MISP 0xcb -#define CPMU_FED_IC_MISS_DEM 0xd3 -#define CPMU_FED_ITLB_MISS 0xd4 - -#define KPC_CLASS_FIXED (0) -#define KPC_CLASS_CONFIGURABLE (1) -#define KPC_CLASS_POWER (2) -#define KPC_CLASS_RAWPMU (3) -#define KPC_CLASS_FIXED_MASK (1u << KPC_CLASS_FIXED) -#define KPC_CLASS_CONFIGURABLE_MASK (1u << KPC_CLASS_CONFIGURABLE) -#define KPC_CLASS_POWER_MASK (1u << KPC_CLASS_POWER) -#define KPC_CLASS_RAWPMU_MASK (1u << KPC_CLASS_RAWPMU) - -#define COUNTERS_COUNT 10 -#define CONFIG_COUNT 8 -#define KPC_MASK (KPC_CLASS_CONFIGURABLE_MASK | KPC_CLASS_FIXED_MASK) - -#endif - -#ifdef BENCHMARK_OS_WINDOWS -#include -#endif - -#ifdef BENCHMARK_OS_ZOS -#include -#endif - -#include -#ifdef BENCHMARK_STL_ANDROID_GNUSTL -#include -#endif - -#ifdef BENCHMARK_OS_WINDOWS -#include -#undef StrCat // Don't let StrCat in string_util.h be renamed to lstrcatA -#include -#include -#include -#else -#include -#ifndef BENCHMARK_OS_FUCHSIA -#include -#endif -#include -#include // this header must be included before 'sys/sysctl.h' to avoid compilation error on FreeBSD -#include -#if defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_MACOSX || defined BENCHMARK_OS_NETBSD || \ - defined BENCHMARK_OS_OPENBSD || defined BENCHMARK_OS_DRAGONFLY -#define BENCHMARK_HAS_SYSCTL -#include -#endif -#endif -#if defined(BENCHMARK_OS_SOLARIS) -#include -#endif -#if defined(BENCHMARK_OS_QNX) -#include -#endif - -#define SOURCE_DIR "${CMAKE_SOURCE_DIR}" -#define CMAKE_OSX_ARCHITECTURES "${CMAKE_OSX_ARCHITECTURES}" -#define CMAKE_HOST_SYSTEM_PROCESSOR "${CMAKE_HOST_SYSTEM_PROCESSOR}" -#define CMAKE_SYSTEM_PROCESSOR "${CMAKE_SYSTEM_PROCESSOR}" -#define CMAKE_HOST_SYSTEM_NAME "${CMAKE_HOST_SYSTEM_NAME}" -#define CMAKE_SYSTEM_NAME "${CMAKE_SYSTEM_NAME}" -#define CMAKE_C_COMPILER "${CMAKE_C_COMPILER}" -#define CMAKE_CXX_COMPILER "${CMAKE_CXX_COMPILER}" -#define CMAKE_CXX_COMPILER_ID "${CMAKE_CXX_COMPILER_ID}" -#define CMAKE_CXX_COMPILER_VERSION "${CMAKE_CXX_COMPILER_VERSION}" -#define CMAKE_CROSSCOMPILING "${CMAKE_CROSSCOMPILING}" -#define CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}" -#define CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}" -#define CMAKE_BUILD_TYPE "${CMAKE_BUILD_TYPE}" -#define CMAKE_TOOLCHAIN_FILE "${CMAKE_TOOLCHAIN_FILE}" - -#define TARGET_NAME "${TARGET_NAME}" -#define TARGET_COMPILE_OPTIONS "${TARGET_COMPILE_OPTIONS}" - -/*---------------------------------------------------------------------------------------------------------------------\ - * Lib: -\---------------------------------------------------------------------------------------------------------------------*/ -namespace benchmark { -/* From: https://github.com/WojciechMula/toys/blob/master/000helpers/linux-perf-events.h - * Now api has been added to be compatible with the rest of fls_bench. - */ -#if defined(__linuxm__) -namespace perf { -template -class LinuxEvents { - - int fd; - perf_event_attr attribs; - bool running; - -public: - LinuxEvents(int config) - : fd(0) { - memset(&attribs, 0, sizeof(attribs)); - attribs.type = TYPE; - attribs.size = sizeof(attribs); - attribs.config = config; - attribs.disabled = 1; - attribs.exclude_kernel = 1; - attribs.exclude_hv = 1; - - const int pid = 0; // the current process - const int cpu = -1; // all CPUs - const int group = -1; // no group - const unsigned long flags = 0; - fd = syscall(__NR_perf_event_open, &attribs, pid, cpu, group, flags); - if (fd == -1) { report_error("perf_event_open"); } - - running = false; - } - - ~LinuxEvents() { close(fd); } - - void start() { - if (ioctl(fd, PERF_EVENT_IOC_RESET, 0) == -1) { report_error("ioctl(PERF_EVENT_IOC_RESET)"); } - - if (ioctl(fd, PERF_EVENT_IOC_ENABLE, 0) == -1) { report_error("ioctl(PERF_EVENT_IOC_ENABLE)"); } - } - - unsigned long end() { - if (ioctl(fd, PERF_EVENT_IOC_DISABLE, 0) == -1) { report_error("ioctl(PERF_EVENT_IOC_DISABLE)"); } - - unsigned long result; - if (read(fd, &result, sizeof(result)) == -1) { report_error("read"); } - - return result; - } - - unsigned long now() { - if (!running) { - if (ioctl(fd, PERF_EVENT_IOC_RESET, 0) == -1) { report_error("ioctl(PERF_EVENT_IOC_RESET)"); } - - if (ioctl(fd, PERF_EVENT_IOC_ENABLE, 0) == -1) { report_error("ioctl(PERF_EVENT_IOC_ENABLE)"); } - - running = true; - return 0; - } else { - if (ioctl(fd, PERF_EVENT_IOC_DISABLE, 0) == -1) { report_error("ioctl(PERF_EVENT_IOC_DISABLE)"); } - - unsigned long result; - if (read(fd, &result, sizeof(result)) == -1) { report_error("read"); } - - running = false; - return result; - } - } - -private: - void report_error(const std::string& context) { - throw std::runtime_error(context + ": " + std::string(strerror(errno))); - } -}; - -} // namespace perf - -perf::LinuxEvents cycles(PERF_COUNT_HW_CPU_CYCLES); -#endif - -// NOTE: only i386 and x86_64 have been well tested. -// PPC, sparc, alpha, and ia64 are based on -// http://peter.kuscsik.com/wordpress/?p=14 -// with modifications by m3b. See also -// https://setisvn.ssl.berkeley.edu/svn/lib/fftw-3.0.1/kernel/cycle.h -namespace cycleclock { - -#if defined(__aarch64__) -#if defined(__APPLE__) -static uint64_t g_counters[COUNTERS_COUNT]; -static uint64_t g_config[COUNTERS_COUNT]; -#endif -#endif - -FLS_BENCH_MAYBE_UNUSED static uint64_t get_counters() { -#if defined(__aarch64__) -#if defined(__APPLE__) - static bool WARNED = false; - if (kpc_get_thread_counters(0, COUNTERS_COUNT, g_counters)) { - if (!WARNED) { - printf("kpc_get_thread_counters failed, run as sudo?\n"); - WARNED = true; - } - return 1; - } - // g_counters[3 + 2] gives you the number of instructions 'decoded' - // whereas g_counters[1] might give you the number of instructions 'retired'. - return g_counters[0 + 2]; -#endif -#endif - return 0; -} - -FLS_BENCH_MAYBE_UNUSED static void configure_rdtsc() { -#if defined(__aarch64__) -#if defined(__APPLE__) - if (kpc_set_config(KPC_MASK, g_config)) { - printf("kpc_set_config failed\n"); - return; - } - - if (kpc_force_all_ctrs_set(1)) { - printf("kpc_force_all_ctrs_set failed\n"); - return; - } - - if (kpc_set_counting(KPC_MASK)) { - printf("kpc_set_counting failed\n"); - return; - } - - if (kpc_set_thread_counting(KPC_MASK)) { - printf("kpc_set_thread_counting failed\n"); - return; - } -#endif -#endif -} - -static void Init() { -#if defined(__aarch64__) -#if defined(__APPLE__) - void* kperf = dlopen("/System/Library/PrivateFrameworks/kperf.framework/Versions/A/kperf", RTLD_LAZY); - if (!kperf) { - printf("kperf = %p\n", kperf); - return; - } -#define F(ret, name, ...) \ - name = (name##proc*)(dlsym(kperf, #name)); \ - if (!name) { \ - printf("%s = %p\n", #name, (void*)name); \ - return; \ - } - KPERF_LIST -#undef F - - if (kpc_get_counter_count(KPC_MASK) != COUNTERS_COUNT) { - printf("wrong fixed counters count\n"); - return; - } - - if (kpc_get_config_count(KPC_MASK) != CONFIG_COUNT) { - printf("wrong fixed config count\n"); - return; - } - g_config[0] = CPMU_CORE_CYCLE | CFGWORD_EL0A64EN_MASK; - g_config[3] = CPMU_INST_BRANCH | CFGWORD_EL0A64EN_MASK; - g_config[4] = CPMU_SYNC_BR_ANY_MISP | CFGWORD_EL0A64EN_MASK; - g_config[5] = CPMU_INST_A64 | CFGWORD_EL0A64EN_MASK; - - configure_rdtsc(); -#endif -#endif -} -static uint64_t get_counters(); -// This should return the number of cycles since power-on. Thread-safe. -inline BENCHMARK_ALWAYS_INLINE int64_t Now() { - // #if defined(BENCHMARK_OS_MACOSX) - // // this goes at the top because we need ALL Macs, regardless of - // // architecture, to return the number of "mach time units" that - // // have passed since startup. See sysinfo.cc where - // // InitializeSystemInfo() sets the supposed cpu clock frequency of - // // macs to the number of mach time units per second, not actual - // // CPU clock frequency (which can change in the face of CPU - // // frequency scaling). Also note that when the Mac sleeps, this - // // counter pauses; it does not continue counting, nor does it - // // reset to zero. - // return mach_absolute_time(); - // #el -#if defined(BENCHMARK_OS_EMSCRIPTEN) - // this goes above x86-specific code because old versions of Emscripten - // define __x86_64__, although they have nothing to do with it. - // return static_cast(emscripten_get_now() * 1e+6); - - return std::chrono::high_resolution_clock::now().time_since_epoch().count(); -#elif defined(__i386__) - int64_t ret; - __asm__ volatile("rdtsc" : "=A"(ret)); - return ret; -#elif defined(__x86_64__) || defined(__amd64__) - uint64_t low, high; - __asm__ volatile("rdtsc" : "=a"(low), "=d"(high)); - return (high << 32) | low; -#elif defined(__powerpc__) || defined(__ppc__) - // This returns a time-base, which is not always precisely a cycle-count. -#if defined(__powerpc64__) || defined(__ppc64__) - int64_t tb; - asm volatile("mfspr %0, 268" : "=r"(tb)); - return tb; -#else - uint32_t tbl, tbu0, tbu1; - asm volatile("mftbu %0\n" - "mftb %1\n" - "mftbu %2" - : "=r"(tbu0), "=r"(tbl), "=r"(tbu1)); - tbl &= -static_cast(tbu0 == tbu1); - // high 32 bits in tbu1; low 32 bits in tbl (tbu0 is no longer needed) - return (static_cast(tbu1) << 32) | tbl; -#endif -#elif defined(__sparc__) - int64_t tick; - asm(".byte 0x83, 0x41, 0x00, 0x00"); - asm("mov %%g1, %0" : "=r"(tick)); - return tick; -#elif defined(__ia64__) - int64_t itc; - asm("mov %0 = ar.itc" : "=r"(itc)); - return itc; -#elif defined(COMPILER_MSVC) && defined(_M_IX86) - // Older MSVC compilers (like 7.x) don't seem to support the - // __rdtsc intrinsic properly, so I prefer to use _asm instead - // when I know it will work. Otherwise, I'll use __rdtsc and hope - // the code is being compiled with a non-ancient compiler. - _asm rdtsc -#elif defined(COMPILER_MSVC) && defined(_M_ARM64) - // See - // https://docs.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics?view=vs-2019 - // and https://reviews.llvm.org/D53115 - int64_t virtual_timer_value; - virtual_timer_value = _ReadStatusReg(ARM64_CNTVCT); - return virtual_timer_value; -#elif defined(COMPILER_MSVC) - return __rdtsc(); -#elif defined(BENCHMARK_OS_NACL) - // Native Client validator on x86/x86-64 allows RDTSC instructions, - // and this case is handled above. Native Client validator on ARM - // rejects MRC instructions (used in the ARM-specific sequence below), - // so we handle it here. Portable Native Client compiles to - // architecture-agnostic bytecode, which doesn't provide any - // cycle counter access mnemonics. - - // Native Client does not provide any API to access cycle counter. - // Use clock_gettime(CLOCK_MONOTONIC, ...) instead of gettimeofday - // because is provides nanosecond resolution (which is noticable at - // least for PNaCl modules running on x86 Mac & Linux). - // Initialize to always return 0 if clock_gettime fails. - struct timespec ts = {0, 0}; - clock_gettime(CLOCK_MONOTONIC, &ts); - return static_cast(ts.tv_sec) * 1000000000 + ts.tv_nsec; -#elif defined(__aarch64__) - // System timer of ARMv8 runs at a different frequency than the CPU's. - // The frequency is fixed, typically in the range 1-50MHz. It can be - // read at CNTFRQ special register. We assume the OS has set up - // the virtual timer properly. - // int64_t virtual_timer_value; - // asm volatile("mrs %0, cntvct_el0" : "=r"(virtual_timer_value)); - // return virtual_timer_value; -#if defined(__APPLE__) - return get_counters(); -#else - return cycles.now(); -#endif - -#elif defined(__ARM_ARCH) - // V6 is the earliest arch that has a standard cyclecount - // Native Client validator doesn't allow MRC instructions. -#if (__ARM_ARCH >= 6) - uint32_t pmccntr; - uint32_t pmuseren; - uint32_t pmcntenset; - // Read the user mode perf monitor counter access permissions. - asm volatile("mrc p15, 0, %0, c9, c14, 0" : "=r"(pmuseren)); - if (pmuseren & 1) // Allows reading perfmon counters for user mode code. - { - asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r"(pmcntenset)); - if (pmcntenset & 0x80000000ul) // Is it counting? - { - asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(pmccntr)); - // The counter is set up to count every 64th cycle - return static_cast(pmccntr) * 64; // Should optimize to << 6 - } - } -#endif - struct timeval tv; - gettimeofday(&tv, nullptr); - return static_cast(tv.tv_sec) * 1000000 + tv.tv_usec; -#elif defined(__mips__) || defined(__m68k__) - // mips apparently only allows rdtsc for superusers, so we fall - // back to gettimeofday. It's possible clock_gettime would be better. - struct timeval tv; - gettimeofday(&tv, nullptr); - return static_cast(tv.tv_sec) * 1000000 + tv.tv_usec; -#elif defined(__s390__) // Covers both s390 and s390x. - // Return the CPU clock. - uint64_t tsc; -#if defined(BENCHMARK_OS_ZOS) && defined(COMPILER_IBMXL) - // z/OS XL compiler HLASM syntax. - asm(" stck %0" : "=m"(tsc) : : "cc"); -#else - asm("stck %0" : "=Q"(tsc) : : "cc"); -#endif - return tsc; -#elif defined(__riscv) // RISC-V - // Use RDCYCLE (and RDCYCLEH on riscv32) -#if __riscv_xlen == 32 - uint32_t cycles_lo, cycles_hi0, cycles_hi1; - // This asm also includes the PowerPC overflow handling strategy, as above. - // Implemented in assembly because Clang insisted on branching. - asm volatile("rdcycleh %0\n" - "rdcycle %1\n" - "rdcycleh %2\n" - "sub %0, %0, %2\n" - "seqz %0, %0\n" - "sub %0, zero, %0\n" - "and %1, %1, %0\n" - : "=r"(cycles_hi0), "=r"(cycles_lo), "=r"(cycles_hi1)); - return (static_cast(cycles_hi1) << 32) | cycles_lo; -#else - uint64_t cycles; - asm volatile("rdcycle %0" : "=r"(cycles)); - return cycles; -#endif -#elif defined(__e2k__) || defined(__elbrus__) - struct timeval tv; - gettimeofday(&tv, nullptr); - return static_cast(tv.tv_sec) * 1000000 + tv.tv_usec; -#else - // The soft failover to a generic implementation is automatic only for ARM. - // For other platforms the developer is expected to make an attempt to create - // a fast implementation and use generic version if nothing better is available. -#error You need to define CycleTimer for your OS and CPU - // return - // std::chrono::high_resolution_clock::now().time_since_epoch().count(); - -#endif -} -} // end namespace cycleclock - -namespace timer { -inline uint64_t Now() { return std::chrono::high_resolution_clock::now().time_since_epoch().count(); } -} // namespace timer - -const int kNumMillisPerSecond = 1000; -const int kNumMicrosPerMilli = 1000; -const int kNumMicrosPerSecond = kNumMillisPerSecond * 1000; -const int kNumNanosPerMicro = 1000; -const int kNumNanosPerSecond = kNumNanosPerMicro * kNumMicrosPerSecond; - -#ifdef BENCHMARK_OS_WINDOWS -// Window's Sleep takes milliseconds argument. -void SleepForMilliseconds(int milliseconds) { Sleep(milliseconds); } -void SleepForSeconds(double seconds) { SleepForMilliseconds(static_cast(kNumMillisPerSecond * seconds)); } -#else // BENCHMARK_OS_WINDOWS -static void SleepForMicroseconds(int microseconds) { -#ifdef BENCHMARK_OS_ZOS - // z/OS does not support nanosleep. Instead call sleep() and then usleep() to - // sleep for the remaining microseconds because usleep() will fail if its - // argument is greater than 1000000. - div_t sleepTime = div(microseconds, kNumMicrosPerSecond); - int seconds = sleepTime.quot; - while (seconds != 0) { - seconds = sleep(seconds); - } - while (usleep(sleepTime.rem) == -1 && errno == EINTR) - ; -#else - struct timespec sleep_time; - sleep_time.tv_sec = microseconds / kNumMicrosPerSecond; - sleep_time.tv_nsec = (microseconds % kNumMicrosPerSecond) * kNumNanosPerMicro; - while (nanosleep(&sleep_time, &sleep_time) != 0 && errno == EINTR) - ; // Ignore signals and wait for the full interval to elapse. -#endif -} - -static void SleepForMilliseconds(int milliseconds) { SleepForMicroseconds(milliseconds * kNumMicrosPerMilli); } - -FLS_BENCH_MAYBE_UNUSED static void SleepForSeconds(double seconds) { - SleepForMicroseconds(static_cast(seconds * kNumMicrosPerSecond)); -} -#endif // BENCHMARK_OS_WINDOWS - -namespace internal { -// The arraysize(arr) macro returns the # of elements in an array arr. -// The expression is a compile-time constant, and therefore can be -// used in defining new arrays, for example. If you use arraysize on -// a pointer by mistake, you will get a compile-time error. -// - -// This template function declaration is used in defining arraysize. -// Note that the function doesn't need an implementation, as we only -// use its type. -template -char (&ArraySizeHelper(T (&array)[N]))[N]; - -// That gcc wants both of these prototypes seems mysterious. VC, for -// its part, can't decide which to use (another mystery). Matching of -// template overloads: the final frontier. -#ifndef COMPILER_MSVC -template -char (&ArraySizeHelper(const T (&array)[N]))[N]; -#endif - -#define arraysize(array) (sizeof(::benchmark::internal::ArraySizeHelper(array))) - -} // namespace internal - -// kilo, Mega, Giga, Tera, Peta, Exa, Zetta, Yotta. -const char kBigSIUnits[] = "kMGTPEZY"; -// Kibi, Mebi, Gibi, Tebi, Pebi, Exbi, Zebi, Yobi. -const char kBigIECUnits[] = "KMGTPEZY"; -// milli, micro, nano, pico, femto, atto, zepto, yocto. -const char kSmallSIUnits[] = "munpfazy"; - -// We require that all three arrays have the same size. -static_assert(arraysize(kBigSIUnits) == arraysize(kBigIECUnits), "SI and IEC unit arrays must be the same size"); -static_assert(arraysize(kSmallSIUnits) == arraysize(kBigSIUnits), - "Small SI and Big SI unit arrays must be the same size"); - -static const int64_t kUnitsSize = arraysize(kBigSIUnits); - -static void ToExponentAndMantissa( - double val, double thresh, int precision, double one_k, std::string* mantissa, int64_t* exponent) { - std::stringstream mantissa_stream; - - if (val < 0) { - mantissa_stream << "-"; - val = -val; - } - - // Adjust threshold so that it never excludes things which can't be rendered - // in 'precision' digits. - const double adjusted_threshold = std::max(thresh, 1.0 / std::pow(10.0, precision)); - const double big_threshold = adjusted_threshold * one_k; - const double small_threshold = adjusted_threshold; - // Values in ]simple_threshold,small_threshold[ will be printed as-is - const double simple_threshold = 0.01; - - if (val > big_threshold) { - // Positive powers - double scaled = val; - for (size_t i = 0; i < arraysize(kBigSIUnits); ++i) { - scaled /= one_k; - if (scaled <= big_threshold) { - mantissa_stream << scaled; - *exponent = i + 1; - *mantissa = mantissa_stream.str(); - return; - } - } - mantissa_stream << val; - *exponent = 0; - } else if (val < small_threshold) { - // Negative powers - if (val < simple_threshold) { - double scaled = val; - for (size_t i = 0; i < arraysize(kSmallSIUnits); ++i) { - scaled *= one_k; - if (scaled >= small_threshold) { - mantissa_stream << scaled; - *exponent = -static_cast(i + 1); - *mantissa = mantissa_stream.str(); - return; - } - } - } - mantissa_stream << val; - *exponent = 0; - } else { - mantissa_stream << val; - *exponent = 0; - } - *mantissa = mantissa_stream.str(); -} - -static std::string ExponentToPrefix(int64_t exponent, bool iec) { - if (exponent == 0) { return ""; } - - const int64_t index = (exponent > 0 ? exponent - 1 : -exponent - 1); - if (index >= kUnitsSize) { return ""; } - - const char* array = (exponent > 0 ? (iec ? kBigIECUnits : kBigSIUnits) : kSmallSIUnits); - if (iec) { - return array[index] + std::string("i"); - } else { - return std::string(1, array[index]); - } -} - -static std::string ToBinaryStringFullySpecified(double value, double threshold, int precision, double one_k = 1024.0) { - std::string mantissa; - int64_t exponent; - ToExponentAndMantissa(value, threshold, precision, one_k, &mantissa, &exponent); - return mantissa + ExponentToPrefix(exponent, false); -} - -FLS_BENCH_MAYBE_UNUSED static void AppendHumanReadable(int n, std::string* str) { - std::stringstream ss; - // Round down to the nearest SI prefix. - ss << ToBinaryStringFullySpecified(n, 1.0, 0); - *str += ss.str(); -} - -FLS_BENCH_MAYBE_UNUSED static std::string HumanReadableNumber(double n, double one_k = 1024.0) { - // 1.1 means that figures up to 1.1k should be shown with the next unit down; - // this softens edge effects. - // 1 means that we should show one decimal place of precision. - return ToBinaryStringFullySpecified(n, 1.1, 1, one_k); -} - -static std::string StrFormatImp(const char* msg, va_list args) { - // we might need a second shot at this, so pre-emptivly make a copy - va_list args_cp; - va_copy(args_cp, args); - - // TODO(ericwf): use std::array for first attempt to avoid one memory - // allocation guess what the size might be - std::array local_buff; - std::size_t size = local_buff.size(); - // 2015-10-08: vsnprintf is used instead of snd::vsnprintf due to a limitation - // in the android-ndk - auto ret = vsnprintf(local_buff.data(), size, msg, args_cp); - - va_end(args_cp); - - // handle empty expansion - if (ret == 0) return std::string {}; - if (static_cast(ret) < size) { return std::string(local_buff.data()); } - - // we did not provide a long enough buffer on our first attempt. - // add 1 to size to account for null-byte in size cast to prevent overflow - size = static_cast(ret) + 1; - auto buff_ptr = std::unique_ptr(new char[size]); - // 2015-10-08: vsnprintf is used instead of snd::vsnprintf due to a limitation - // in the android-ndk - ret = vsnprintf(buff_ptr.get(), size, msg, args); - return std::string(buff_ptr.get()); -} - -#if defined(__MINGW32__) -__attribute__((format(__MINGW_PRINTF_FORMAT, 1, 2))) -#elif defined(__GNUC__) -__attribute__((format(printf, 1, 2))) -#endif - -static std::string -StrFormat(const char* format, ...) { - va_list args; - va_start(args, format); - std::string tmp = StrFormatImp(format, args); - va_end(args); - return tmp; -} - -inline std::ostream& StrCatImp(std::ostream& out) { return out; } - -template -inline std::ostream& StrCatImp(std::ostream& out, First&& f, Rest&&... rest) { - out << std::forward(f); - return StrCatImp(out, std::forward(rest)...); -} - -template -inline std::string StrCat(Args&&... args) { - std::ostringstream ss; - StrCatImp(ss, std::forward(args)...); - return ss.str(); -} - -std::vector StrSplit(const std::string& str, char delim); - -#ifdef BENCHMARK_STL_ANDROID_GNUSTL -/* - * GNU STL in Android NDK lacks support for some C++11 functions, including - * stoul, stoi, stod. We reimplement them here using C functions strtoul, - * strtol, strtod. Note that reimplemented functions are in benchmark:: - * namespace, not std:: namespace. - */ -unsigned long stoul(const std::string& str, size_t* pos = nullptr, int base = 10); -int stoi(const std::string& str, size_t* pos = nullptr, int base = 10); -double stod(const std::string& str, size_t* pos = nullptr); -#else -using std::stod; -using std::stoi; -using std::stoul; -#endif - -class CPUInfo { -public: - struct CacheInfo { - std::string type; - int level; - int size; - int num_sharing; - }; - - enum Scaling { UNKNOWN, ENABLED, DISABLED }; - - static inline std::string ToString(Scaling v) { - switch (v) { - case UNKNOWN: - return "UNKNOWN"; - case ENABLED: - return "ENABLED"; - case DISABLED: - return "DISABLED"; - default: - return "UNKNOWN"; - } - } - - // Getters - static const CPUInfo& getInstance() { - static const CPUInfo info; - return info; - } - int getNumCpus() const { return num_cpus; }; - double getCyclesPerSecond() const { return cycles_per_second; }; - const std::vector& getCaches() const { return caches; }; - const std::vector& getLoadAvg() const { return load_avg; }; - std::string getScaling() const { return ToString(scaling); }; - - int num_cpus; - Scaling scaling; - double cycles_per_second; - std::vector caches; - std::vector load_avg; - -private: - // private constructor - CPUInfo(); - - BENCHMARK_DISALLOW_COPY_AND_ASSIGN(CPUInfo); -}; - -static void PrintImp(std::ostream& out) { out << std::endl; } - -template -void PrintImp(std::ostream& out, First&& f, Rest&&... rest) { - out << std::forward(f); - PrintImp(out, std::forward(rest)...); -} - -template -BENCHMARK_NORETURN void PrintErrorAndDie(ARGS&&... args) { - PrintImp(std::cerr, std::forward(args)...); - std::exit(EXIT_FAILURE); -} - -#ifdef BENCHMARK_HAS_SYSCTL - -/// ValueUnion - A type used to correctly alias the byte-for-byte output of -/// `sysctl` with the result type it's to be interpreted as. -struct ValueUnion { - union DataT { - uint32_t uint32_value; - uint64_t uint64_value; - // For correct aliasing of union members from bytes. - char bytes[8]; - }; - using DataPtr = std::unique_ptr; - - // The size of the data union member + its trailing array size. - size_t Size; - DataPtr Buff; - -public: - ValueUnion() - : Size(0) - , Buff(nullptr, &std::free) {} - - explicit ValueUnion(size_t buff_size) - : Size(sizeof(DataT) + buff_size) - , Buff(::new(std::malloc(Size)) DataT(), &std::free) {} - - ValueUnion(ValueUnion&& other) = default; - explicit operator bool() const { return bool(Buff); } - char* data() const { return Buff->bytes; } - std::string GetAsString() const { return {data()}; } - int64_t GetAsInteger() const { - if (Size == sizeof(Buff->uint32_value)) { - return static_cast(Buff->uint32_value); - } else if (Size == sizeof(Buff->uint64_value)) { - return static_cast(Buff->uint64_value); - } - BENCHMARK_UNREACHABLE(); - } - uint64_t GetAsUnsigned() const { - if (Size == sizeof(Buff->uint32_value)) { - return Buff->uint32_value; - } else if (Size == sizeof(Buff->uint64_value)) { - return Buff->uint64_value; - } - BENCHMARK_UNREACHABLE(); - } - template - std::array GetAsArray() { - const int ArrSize = sizeof(T) * N; - // CHECK_LE(ArrSize, Size); - std::array arr; - std::memcpy(arr.data(), data(), ArrSize); - return arr; - } -}; - -static ValueUnion GetSysctlImp(std::string const& name) { -#if defined BENCHMARK_OS_OPENBSD - int mib[2]; - - mib[0] = CTL_HW; - if ((Name == "hw.ncpu") || (Name == "hw.cpuspeed")) { - ValueUnion buff(sizeof(int)); - - if (Name == "hw.ncpu") { - mib[1] = HW_NCPU; - } else { - mib[1] = HW_CPUSPEED; - } - - if (sysctl(mib, 2, buff.data(), &buff.Size, nullptr, 0) == -1) { return ValueUnion(); } - return buff; - } - return ValueUnion(); -#else - size_t cur_buff_size = 0; - if (sysctlbyname(name.c_str(), nullptr, &cur_buff_size, nullptr, 0) == -1) { return {}; } - - ValueUnion buff(cur_buff_size); - if (sysctlbyname(name.c_str(), buff.data(), &buff.Size, nullptr, 0) == 0) { return buff; } - return {}; -#endif -} - -FLS_BENCH_MAYBE_UNUSED static bool GetSysctl(std::string const& name, std::string* out) { - out->clear(); - auto buff = GetSysctlImp(name); - if (!buff) { return false; } - out->assign(buff.data()); - return true; -} - -template ::value>::type> -bool GetSysctl(std::string const& name, TP* Out) { - *Out = 0; - auto buff = GetSysctlImp(name); - if (!buff) { return false; } - *Out = static_cast(buff.GetAsUnsigned()); - return true; -} - -template -bool GetSysctl(std::string const& name, std::array* Out) { - auto buff = GetSysctlImp(name); - if (!buff) { return false; } - *Out = buff.GetAsArray(); - return true; -} -#endif - -template -bool ReadFromFile(std::string const& fname, ARG_T* arg) { - *arg = ARG_T(); - std::ifstream f(fname.c_str()); - if (!f.is_open()) { return false; } - f >> *arg; - return f.good(); -} - -static CPUInfo::Scaling CpuScaling(int num_cpus) { - // We don't have a valid CPU count, so don't even bother. - if (num_cpus <= 0) { return CPUInfo::Scaling::UNKNOWN; } -#ifdef BENCHMARK_OS_QNX - return CPUInfo::Scaling::UNKNOWN; -#endif -#ifndef BENCHMARK_OS_WINDOWS - // On Linux, the CPUfreq subsystem exposes CPU information as files on the - // local file system. If reading the exported files fails, then we may not be - // running on Linux, so we silently ignore all the read errors. - std::string res; - for (int cpu = 0; cpu < num_cpus; ++cpu) { - std::string governor_file = StrCat("/sys/devices/system/cpu/cpu", cpu, "/cpufreq/scaling_governor"); - if (ReadFromFile(governor_file, &res) && res != "performance") { return CPUInfo::Scaling::ENABLED; } - } - return CPUInfo::Scaling::DISABLED; -#endif - return CPUInfo::Scaling::UNKNOWN; -} - -static int CountSetBitsInCPUMap(std::string val) { - auto count_bits = [](std::string Part) { - using CPUMask = std::bitset; - Part = "0x" + Part; - CPUMask mask(benchmark::stoul(Part, nullptr, 16)); - return static_cast(mask.count()); - }; - size_t pos; - int total = 0; - while ((pos = val.find(',')) != std::string::npos) { - total += count_bits(val.substr(0, pos)); - val = val.substr(pos + 1); - } - if (!val.empty()) { total += count_bits(val); } - return total; -} - -FLS_BENCH_MAYBE_UNUSED -static std::vector GetCacheSizesFromKVFS() { - std::vector res; - std::string dir = "/sys/devices/system/cpu/cpu0/cache/"; - int idx = 0; - while (true) { - CPUInfo::CacheInfo info; - std::string f_path = StrCat(dir, "index", idx++, "/"); - std::ifstream f(StrCat(f_path, "size").c_str()); - if (!f.is_open()) { break; } - std::string suffix; - f >> info.size; - if (f.fail()) { PrintErrorAndDie("Failed while reading file '", f_path, "size'"); } - if (f.good()) { - f >> suffix; - if (f.bad()) { - PrintErrorAndDie("Invalid cache size format: failed to read size suffix"); - } else if (f && suffix != "K") { - PrintErrorAndDie("Invalid cache size format: Expected bytes ", suffix); - } else if (suffix == "K") { - info.size *= 1024; - } - } - if (!ReadFromFile(StrCat(f_path, "type"), &info.type)) { - PrintErrorAndDie("Failed to read from file ", f_path, "type"); - } - if (!ReadFromFile(StrCat(f_path, "level"), &info.level)) { - PrintErrorAndDie("Failed to read from file ", f_path, "level"); - } - std::string map_str; - if (!ReadFromFile(StrCat(f_path, "shared_cpu_map"), &map_str)) { - PrintErrorAndDie("Failed to read from file ", f_path, "shared_cpu_map"); - } - info.num_sharing = CountSetBitsInCPUMap(map_str); - res.push_back(info); - } - - return res; -} - -#ifdef BENCHMARK_OS_MACOSX -std::vector GetCacheSizesMacOSX() { - std::vector res; - std::array cache_counts {{0, 0, 0, 0}}; - GetSysctl("hw.cacheconfig", &cache_counts); - - struct { - std::string name; - std::string type; - int level; - uint64_t num_sharing; - } Cases[] = {{"hw.l1dcachesize", "Data", 1, cache_counts[1]}, - {"hw.l1icachesize", "Instruction", 1, cache_counts[1]}, - {"hw.l2cachesize", "Unified", 2, cache_counts[2]}, - {"hw.l3cachesize", "Unified", 3, cache_counts[3]}}; - for (auto& c : Cases) { - int val; - if (!GetSysctl(c.name, &val)) { continue; } - CPUInfo::CacheInfo info; - info.type = c.type; - info.level = c.level; - info.size = val; - info.num_sharing = static_cast(c.num_sharing); - res.push_back(std::move(info)); - } - return res; -} -#elif defined(BENCHMARK_OS_WINDOWS) -std::vector GetCacheSizesWindows() { - std::vector res; - DWORD buffer_size = 0; - using PInfo = SYSTEM_LOGICAL_PROCESSOR_INFORMATION; - using CInfo = CACHE_DESCRIPTOR; - - using UPtr = std::unique_ptr; - GetLogicalProcessorInformation(nullptr, &buffer_size); - UPtr buff((PInfo*)malloc(buffer_size), &std::free); - if (!GetLogicalProcessorInformation(buff.get(), &buffer_size)) - PrintErrorAndDie("Failed during call to GetLogicalProcessorInformation: ", GetLastError()); - - PInfo* it = buff.get(); - PInfo* end = buff.get() + (buffer_size / sizeof(PInfo)); - - for (; it != end; ++it) { - if (it->Relationship != RelationCache) { continue; } - using BitSet = std::bitset; - BitSet B(it->ProcessorMask); - // To prevent duplicates, only consider caches where CPU 0 is specified - if (!B.test(0)) { continue; } - CInfo* Cache = &it->Cache; - CPUInfo::CacheInfo C; - C.num_sharing = static_cast(B.count()); - C.level = Cache->Level; - C.size = Cache->Size; - switch (Cache->Type) { - case CacheUnified: - C.type = "Unified"; - break; - case CacheInstruction: - C.type = "Instruction"; - break; - case CacheData: - C.type = "Data"; - break; - case CacheTrace: - C.type = "Trace"; - break; - default: - C.type = "Unknown"; - break; - } - res.push_back(C); - } - return res; -} -#elif BENCHMARK_OS_QNX -std::vector GetCacheSizesQNX() { - std::vector res; - struct cacheattr_entry* cache = SYSPAGE_ENTRY(cacheattr); - uint32_t const elsize = SYSPAGE_ELEMENT_SIZE(cacheattr); - int num = SYSPAGE_ENTRY_SIZE(cacheattr) / elsize; - for (int i = 0; i < num; ++i) { - CPUInfo::CacheInfo info; - switch (cache->flags) { - case CACHE_FLAG_INSTR: - info.type = "Instruction"; - info.level = 1; - break; - case CACHE_FLAG_DATA: - info.type = "Data"; - info.level = 1; - break; - case CACHE_FLAG_UNIFIED: - info.type = "Unified"; - info.level = 2; - break; - case CACHE_FLAG_SHARED: - info.type = "Shared"; - info.level = 3; - break; - default: - continue; - break; - } - info.size = cache->line_size * cache->num_lines; - info.num_sharing = 0; - res.push_back(std::move(info)); - cache = SYSPAGE_ARRAY_ADJ_OFFSET(cacheattr, cache, elsize); - } - return res; -} -#endif - -static std::vector GetCacheSizes() { -#ifdef BENCHMARK_OS_MACOSX - return GetCacheSizesMacOSX(); -#elif defined(BENCHMARK_OS_WINDOWS) - return GetCacheSizesWindows(); -#elif defined(BENCHMARK_OS_QNX) - return GetCacheSizesQNX(); -#else - return GetCacheSizesFromKVFS(); -#endif -} - -FLS_BENCH_MAYBE_UNUSED std::string GetSystemName() { -#if defined(BENCHMARK_OS_WINDOWS) - std::string str; - const unsigned COUNT = MAX_COMPUTERNAME_LENGTH + 1; - TCHAR hostname[COUNT] = {'\0'}; - DWORD DWCOUNT = COUNT; - if (!GetComputerName(hostname, &DWCOUNT)) { return std::string(""); } -#ifndef UNICODE - str = std::string(hostname, DWCOUNT); -#else - // Using wstring_convert, Is deprecated in C++17 - using convert_type = std::codecvt_utf8; - std::wstring_convert converter; - std::wstring wStr(hostname, DWCOUNT); - str = converter.to_bytes(wStr); -#endif - return str; -#else // defined(BENCHMARK_OS_WINDOWS) -#ifndef HOST_NAME_MAX -#ifdef BENCHMARK_HAS_SYSCTL // BSD/Mac Doesnt have HOST_NAME_MAX defined -#define HOST_NAME_MAX 64 -#elif defined(BENCHMARK_OS_NACL) -#define HOST_NAME_MAX 64 -#elif defined(BENCHMARK_OS_QNX) -#define HOST_NAME_MAX 154 -#elif defined(BENCHMARK_OS_RTEMS) -#define HOST_NAME_MAX 256 -#else -#warning "HOST_NAME_MAX not defined. using 64" -#define HOST_NAME_MAX 64 -#endif -#endif // def HOST_NAME_MAX - char hostname[HOST_NAME_MAX]; - int retVal = gethostname(hostname, HOST_NAME_MAX); - if (retVal != 0) { return std::string(""); } - return std::string(hostname); -#endif // Catch-all POSIX block. -} - -static int GetNumCPUs() { -#ifdef BENCHMARK_HAS_SYSCTL - int NumCPU = -1; - if (GetSysctl("hw.ncpu", &NumCPU)) { return NumCPU; } - fprintf(stderr, "Err: %s\n", strerror(errno)); - std::exit(EXIT_FAILURE); -#elif defined(BENCHMARK_OS_WINDOWS) - SYSTEM_INFO sysinfo; - // Use memset as opposed to = {} to avoid GCC missing initializer false - // positives. - std::memset(&sysinfo, 0, sizeof(SYSTEM_INFO)); - GetSystemInfo(&sysinfo); - return sysinfo.dwNumberOfProcessors; // number of logical - // processors in the current - // group -#elif defined(BENCHMARK_OS_SOLARIS) - // Returns -1 in case of a failure. - int NumCPU = sysconf(_SC_NPROCESSORS_ONLN); - if (NumCPU < 0) { fprintf(stderr, "sysconf(_SC_NPROCESSORS_ONLN) failed with error: %s\n", strerror(errno)); } - return NumCPU; -#elif defined(BENCHMARK_OS_QNX) - return static_cast(_syspage_ptr->num_cpu); -#else - int NumCPUs = 0; - int MaxID = -1; - std::ifstream f("/proc/cpuinfo"); - if (!f.is_open()) { - std::cerr << "failed to open /proc/cpuinfo\n"; - return -1; - } - const std::string Key = "processor"; - std::string ln; - while (std::getline(f, ln)) { - if (ln.empty()) { continue; } - size_t SplitIdx = ln.find(':'); - std::string value; -#if defined(__s390__) - // s390 has another format in /proc/cpuinfo - // it needs to be parsed differently - if (SplitIdx != std::string::npos) { value = ln.substr(Key.size() + 1, SplitIdx - Key.size() - 1); } -#else - if (SplitIdx != std::string::npos) { value = ln.substr(SplitIdx + 1); } -#endif - if (ln.size() >= Key.size() && ln.compare(0, Key.size(), Key) == 0) { - NumCPUs++; - if (!value.empty()) { - int CurID = benchmark::stoi(value); - MaxID = std::max(CurID, MaxID); - } - } - } - if (f.bad()) { - std::cerr << "Failure reading /proc/cpuinfo\n"; - return -1; - } - if (!f.eof()) { - std::cerr << "Failed to read to end of /proc/cpuinfo\n"; - return -1; - } - f.close(); - - if ((MaxID + 1) != NumCPUs) { - fprintf(stderr, - "CPU ID assignments in /proc/cpuinfo seem messed up." - " This is usually caused by a bad BIOS.\n"); - } - return NumCPUs; -#endif - BENCHMARK_UNREACHABLE(); -} - -static double GetCPUCyclesPerSecond(CPUInfo::Scaling scaling) { - // Currently, scaling is only used on linux path here, - // suppress diagnostics about it being unused on other paths. - (void)scaling; - -#if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN - long freq; - - // If the kernel is exporting the tsc frequency use that. There are issues - // where cpuinfo_max_freq cannot be relied on because the BIOS may be - // exporintg an invalid p-state (on x86) or p-states may be used to put the - // processor in a new mode (turbo mode). Essentially, those frequencies - // cannot always be relied upon. The same reasons apply to /proc/cpuinfo as - // well. - if (ReadFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq) - // If CPU scaling is disabled, use the the *current* frequency. - // Note that we specifically don't want to read cpuinfo_cur_freq, - // because it is only readable by root. - || (scaling == CPUInfo::Scaling::DISABLED && - ReadFromFile("/sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq", &freq)) - // Otherwise, if CPU scaling may be in effect, we want to use - // the *maximum* frequency, not whatever CPU speed some random processor - // happens to be using now. - || ReadFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", &freq)) { - // The value is in kHz (as the file name suggests). For example, on a - // 2GHz warpstation, the file contains the value "2000000". - return freq * 1000.0; - } - - const double error_value = -1; - double bogo_clock = error_value; - - std::ifstream f("/proc/cpuinfo"); - if (!f.is_open()) { - std::cerr << "failed to open /proc/cpuinfo\n"; - return error_value; - } - - auto startsWithKey = [](std::string const& Value, std::string const& Key) { - if (Key.size() > Value.size()) { return false; } - auto Cmp = [&](char X, char Y) { - return std::tolower(X) == std::tolower(Y); - }; - return std::equal(Key.begin(), Key.end(), Value.begin(), Cmp); - }; - - std::string ln; - while (std::getline(f, ln)) { - if (ln.empty()) { continue; } - size_t SplitIdx = ln.find(':'); - std::string value; - if (SplitIdx != std::string::npos) { value = ln.substr(SplitIdx + 1); } - // When parsing the "cpu MHz" and "bogomips" (fallback) entries, we only - // accept positive values. Some environments (virtual machines) report zero, - // which would cause infinite looping in WallTime_Init. - if (startsWithKey(ln, "cpu MHz")) { - if (!value.empty()) { - double cycles_per_second = benchmark::stod(value) * 1000000.0; - if (cycles_per_second > 0) { return cycles_per_second; } - } - } else if (startsWithKey(ln, "bogomips")) { - if (!value.empty()) { - bogo_clock = benchmark::stod(value) * 1000000.0; - if (bogo_clock < 0.0) { bogo_clock = error_value; } - } - } - } - if (f.bad()) { - std::cerr << "Failure reading /proc/cpuinfo\n"; - return error_value; - } - if (!f.eof()) { - std::cerr << "Failed to read to end of /proc/cpuinfo\n"; - return error_value; - } - f.close(); - // If we found the bogomips clock, but nothing better, we'll use it (but - // we're not happy about it); otherwise, fallback to the rough estimation - // below. - if (bogo_clock >= 0.0) { return bogo_clock; } - -#elif defined BENCHMARK_HAS_SYSCTL - constexpr auto* FreqStr = -#if defined(BENCHMARK_OS_FREEBSD) || defined(BENCHMARK_OS_NETBSD) - "machdep.tsc_freq"; -#elif defined BENCHMARK_OS_OPENBSD - "hw.cpuspeed"; -#elif defined BENCHMARK_OS_DRAGONFLY - "hw.tsc_frequency"; -#else - "hw.cpufrequency"; -#endif - unsigned long long hz = 0; -#if defined BENCHMARK_OS_OPENBSD - if (GetSysctl(FreqStr, &hz)) { return hz * 1000000; } -#else - if (GetSysctl(FreqStr, &hz)) { return hz; } -#endif - fprintf(stderr, "Unable to determine clock rate from sysctl: %s: %s\n", FreqStr, strerror(errno)); - -#elif defined BENCHMARK_OS_WINDOWS - // In NT, read MHz from the registry. If we fail to do so or we're in win9x - // then make a crude estimate. - DWORD data, data_size = sizeof(data); - if (IsWindowsXPOrGreater() && SUCCEEDED(SHGetValueA(HKEY_LOCAL_MACHINE, - "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0", - "~MHz", - nullptr, - &data, - &data_size))) - return static_cast((int64_t)data * (int64_t)(1000 * 1000)); // was mhz -#elif defined(BENCHMARK_OS_SOLARIS) - kstat_ctl_t* kc = kstat_open(); - if (!kc) { - std::cerr << "failed to open /dev/kstat\n"; - return -1; - } - kstat_t* ksp = kstat_lookup(kc, (char*)"cpu_info", -1, (char*)"cpu_info0"); - if (!ksp) { - std::cerr << "failed to lookup in /dev/kstat\n"; - return -1; - } - if (kstat_read(kc, ksp, NULL) < 0) { - std::cerr << "failed to read from /dev/kstat\n"; - return -1; - } - kstat_named_t* knp = (kstat_named_t*)kstat_data_lookup(ksp, (char*)"current_clock_Hz"); - if (!knp) { - std::cerr << "failed to lookup data in /dev/kstat\n"; - return -1; - } - if (knp->data_type != KSTAT_DATA_UINT64) { - std::cerr << "current_clock_Hz is of unexpected data type: " << knp->data_type << "\n"; - return -1; - } - double clock_hz = knp->value.ui64; - kstat_close(kc); - return clock_hz; -#elif defined(BENCHMARK_OS_QNX) - return static_cast((int64_t)(SYSPAGE_ENTRY(cpuinfo)->speed) * (int64_t)(1000 * 1000)); -#endif - // If we've fallen through, attempt to roughly estimate the CPU clock rate. - const int estimate_time_ms = 1000; - cycleclock::Init(); - const auto start_ticks = cycleclock::Now(); - SleepForMilliseconds(estimate_time_ms); - return static_cast(cycleclock::Now() - start_ticks); -} - -static std::vector GetLoadAvg() { -#if (defined BENCHMARK_OS_FREEBSD || defined(BENCHMARK_OS_LINUX) || defined BENCHMARK_OS_MACOSX || \ - defined BENCHMARK_OS_NETBSD || defined BENCHMARK_OS_OPENBSD || defined BENCHMARK_OS_DRAGONFLY) && \ - !defined(__ANDROID__) - constexpr int kMaxSamples = 3; - std::vector res(kMaxSamples, 0.0); - const int nelem = getloadavg(res.data(), kMaxSamples); - if (nelem < 1) { - res.clear(); - } else { - res.resize(nelem); - } - return res; -#else - return {}; -#endif -} - -// private constructor -CPUInfo::CPUInfo() - : num_cpus(GetNumCPUs()) - , scaling(CpuScaling(num_cpus)) - , cycles_per_second(GetCPUCyclesPerSecond(scaling)) - , caches(GetCacheSizes()) - , load_avg(GetLoadAvg()) {} - -struct SystemInfo { - - static std::string GetSystemName() { -#if defined(BENCHMARK_OS_WINDOWS) - std::string str; - const unsigned COUNT = MAX_COMPUTERNAME_LENGTH + 1; - TCHAR hostname[COUNT] = {'\0'}; - DWORD DWCOUNT = COUNT; - if (!GetComputerName(hostname, &DWCOUNT)) { return std::string(""); } -#ifndef UNICODE - str = std::string(hostname, DWCOUNT); -#else - // Using wstring_convert, Is deprecated in C++17 - using convert_type = std::codecvt_utf8; - std::wstring_convert converter; - std::wstring wStr(hostname, DWCOUNT); - str = converter.to_bytes(wStr); -#endif - return str; -#else // defined(BENCHMARK_OS_WINDOWS) -#ifndef HOST_NAME_MAX -#ifdef BENCHMARK_HAS_SYSCTL // BSD/Mac Doesnt have HOST_NAME_MAX defined -#define HOST_NAME_MAX 64 -#elif defined(BENCHMARK_OS_NACL) -#define HOST_NAME_MAX 64 -#elif defined(BENCHMARK_OS_QNX) -#define HOST_NAME_MAX 154 -#elif defined(BENCHMARK_OS_RTEMS) -#define HOST_NAME_MAX 256 -#else -#warning "HOST_NAME_MAX not defined. using 64" -#define HOST_NAME_MAX 64 -#endif -#endif // def HOST_NAME_MAX - char hostname[HOST_NAME_MAX]; - int ret_val = gethostname(hostname, HOST_NAME_MAX); - if (ret_val != 0) { return std::string(""); } - return {hostname}; -#endif // Catch-all POSIX block. - } - - static const SystemInfo& getInstance() { - static const SystemInfo INFO; - return INFO; - } - -private: - SystemInfo() - : m_name(GetSystemName()) {} - -public: - const std::string& getName() const { return m_name; } - -private: - std::string m_name; - - BENCHMARK_DISALLOW_COPY_AND_ASSIGN(SystemInfo); -}; - -static std::string LocalDateTimeString() { - // Write the local time in RFC3339 format yyyy-mm-ddTHH:MM:SS+/-HH:MM. - using Clock = std::chrono::system_clock; - std::time_t now = Clock::to_time_t(Clock::now()); - const std::size_t kTzOffsetLen = 6; - const std::size_t kTimestampLen = 19; - - std::size_t tz_len; - std::size_t timestamp_len; - long int offset_minutes; - char tz_offset_sign = '+'; - // tz_offset is set in one of three ways: - // * strftime with %z - This either returns empty or the ISO 8601 time. The - // maximum length an - // ISO 8601 string can be is 7 (e.g. -03:30, plus trailing zero). - // * snprintf with %c%02li:%02li - The maximum length is 41 (one for %c, up to - // 19 for %02li, - // one for :, up to 19 %02li, plus trailing zero). - // * A fixed string of "-00:00". The maximum length is 7 (-00:00, plus - // trailing zero). - // - // Thus, the maximum size this needs to be is 41. - char tz_offset[41]; - // Long enough buffer to avoid format-overflow warnings - char storage[128]; - -#if defined(BENCHMARK_OS_WINDOWS) - std::tm* timeinfo_p = ::localtime(&now); -#else - std::tm timeinfo; - std::tm* timeinfo_p = &timeinfo; - ::localtime_r(&now, &timeinfo); -#endif - - tz_len = std::strftime(tz_offset, sizeof(tz_offset), "%z", timeinfo_p); - - if (tz_len < kTzOffsetLen && tz_len > 1) { - // Timezone offset was written. strftime writes offset as +HHMM or -HHMM, - // RFC3339 specifies an offset as +HH:MM or -HH:MM. To convert, we parse - // the offset as an integer, then reprint it to a string. - - offset_minutes = ::strtol(tz_offset, NULL, 10); - if (offset_minutes < 0) { - offset_minutes *= -1; - tz_offset_sign = '-'; - } - - tz_len = ::snprintf( - tz_offset, sizeof(tz_offset), "%c%02li:%02li", tz_offset_sign, offset_minutes / 100, offset_minutes % 100); - ((void)tz_len); // Prevent unused variable warning in optimized build. - } else { - // Unknown offset. RFC3339 specifies that unknown local offsets should be - // written as UTC time with -00:00 timezone. -#if defined(BENCHMARK_OS_WINDOWS) - // Potential race condition if another thread calls localtime or gmtime. - timeinfo_p = ::gmtime(&now); -#else - ::gmtime_r(&now, &timeinfo); -#endif - - strncpy(tz_offset, "-00:00", kTzOffsetLen + 1); - } - - timestamp_len = std::strftime(storage, sizeof(storage), "%Y-%m-%dT%H:%M:%S", timeinfo_p); - // Prevent unused variable warning in optimized build. - ((void)kTimestampLen); - - std::strncat(storage, tz_offset, sizeof(storage) - timestamp_len - 1); - return {storage}; -} - -class CPUInfo; -struct SystemInfo; -class BenchmarkReporter { -public: - struct Context { - CPUInfo const& cpu_info; - SystemInfo const& sys_info; - // The number of chars in the longest benchmark name. - size_t name_field_width; - - Context() - : cpu_info(CPUInfo::getInstance()) - , sys_info(SystemInfo::getInstance()) {} - }; - - class Run { - public: - static const int64_t no_repetition_index = -1; - enum RunType { RT_Iteration, RT_Aggregate }; - - explicit Run(int benchmark_number, std::string& name, uint64_t iterations, double cycles_per_tuple) - : benchmark_number(benchmark_number) - , name(name) - , iterations(iterations) - , cycles_per_tuple(cycles_per_tuple) - // time_unit(kNanosecond), - // real_accumulated_time(0), - // cpu_accumulated_time(0), - // max_heapbytes_used(0), - // complexity(oNone), - // complexity_lambda(), - // complexity_n(0), - // report_big_o(false), - // report_rms(false), - // counters(), - // has_memory_result(false), - // allocs_per_iter(0.0), - // max_bytes_used(0) - {} - - std::string benchmark_name() const; - int benchmark_number; - std::string name; - uint64_t iterations; - RunType run_type; - std::string aggregate_name; - bool error_occurred; - std::string error_message; - - // TimeUnit time_unit; - double cycles_per_tuple; - - // Memory metrics. - bool has_memory_result; - double allocs_per_iter; - int64_t max_bytes_used; - }; - - // Construct a BenchmarkReporter with the output stream set to 'std::cout' - // and the error stream set to 'std::cerr' - BenchmarkReporter() - : output_stream_(&std::cout) - , error_stream_(&std::cerr) {} - - // Called once for every suite of benchmarks run. - // The parameter "context" contains information that the - // reporter may wish to use when generating its report, for example the - // platform under which the benchmarks are running. The benchmark run is - // never started if this function returns false, allowing the reporter - // to skip runs based on the context information. - virtual bool ReportContext(const Context& context) = 0; - - // Called once for each group of benchmark runs, gives information about - // cpu-time and heap memory usage during the benchmark run. If the group - // of runs contained more than two entries then 'report' contains additional - // elements representing the mean and standard deviation of those runs. - // Additionally if this group of runs was the last in a family of benchmarks - // 'reports' contains additional entries representing the asymptotic - // complexity and RMS of that benchmark family. - virtual void ReportRuns(std::vector& report) = 0; - - // Called once and only once after ever group of benchmarks is run and - // reported. - virtual void Finalize() {} - - // REQUIRES: The object referenced by 'out' is valid for the lifetime - // of the reporter. - void SetOutputStream(std::ostream* out) { - assert(out); - output_stream_ = out; - } - - // REQUIRES: The object referenced by 'err' is valid for the lifetime - // of the reporter. - void SetErrorStream(std::ostream* err) { - assert(err); - error_stream_ = err; - } - - static std::ostream& GetOutputStream() { return std::cout; } - - static std::ostream& GetErrorStream() { return std::cerr; } - - virtual ~BenchmarkReporter(); - - // Write a human readable string to 'out' representing the specified - // 'context'. - // REQUIRES: 'out' is non-null. - static void PrintBasicContext(std::ostream& out, Context const& context) { - // CHECK(out) << "cannot be null"; - auto& Out = out; - - Out << LocalDateTimeString() << "\n"; - - const CPUInfo& info = context.cpu_info; - Out << "Run on (" << info.num_cpus << " X " << (info.cycles_per_second / 1000000.0) << " MHz CPU " - << ((info.num_cpus > 1) ? "s" : "") << ")\n"; - if (info.caches.size() != 0) { - Out << "CPU Caches:\n"; - for (auto& CInfo : info.caches) { - Out << " L" << CInfo.level << " " << CInfo.type << " " << (CInfo.size / 1024) << " KiB"; - if (CInfo.num_sharing != 0) { Out << " (x" << (info.num_cpus / CInfo.num_sharing) << ")"; } - Out << "\n"; - } - } - if (!info.load_avg.empty()) { - Out << "Load Average: "; - for (auto It = info.load_avg.begin(); It != info.load_avg.end();) { - Out << StrFormat("%.2f", *It++); - if (It != info.load_avg.end()) { Out << ", "; } - } - Out << "\n"; - } - - if (CPUInfo::Scaling::ENABLED == info.scaling) { - Out << "***WARNING*** CPU scaling is enabled, the benchmark " - "real time measurements may be noisy and will incur extra " - "overhead.\n"; - } - -#ifndef NDEBUG - Out << "***WARNING*** Library was built as DEBUG. Timings may be " - "affected.\n"; -#endif - } - -private: - std::ostream* output_stream_; - std::ostream* error_stream_; -}; - -// https://github.com/emscripten-core/emscripten/wiki/Emterpreter or use node.js -// https://stackoverflow.com/questions/32573289/text-written-to-stdout-doesnt-appear-until-program-completion -static void printRun(std::ostream& out, benchmark::BenchmarkReporter::Run& run) { - out << run.benchmark_number << ","; - out << run.name << ","; - out << run.iterations << ","; - out << run.cycles_per_tuple; - out << '\n'; -} - -FLS_BENCH_MAYBE_UNUSED static std::string CsvEscape(const std::string& s) { - std::string tmp; - tmp.reserve(s.size() + 2); - for (char c : s) { - switch (c) { - case '"': - tmp += "\"\""; - break; - default: - tmp += c; - break; - } - } - return '"' + tmp + '"'; -} - -static void printHeader(std::ostream& out) { - out << "benchmark_number,"; - out << "name,"; - out << "iterations,"; - out << "cycles_per_tuple"; - out << "\n"; -} - -class CSVReporter : public BenchmarkReporter { -public: - explicit CSVReporter(std::string path) - : path(std::move(path)) - , printed_header(false) {} - bool ReportContext(const Context& context) override { - PrintBasicContext(GetErrorStream(), context); - return true; - } - static void PrintContext() { PrintBasicContext(GetErrorStream(), benchmark::BenchmarkReporter::Context()); } - static void WriteRuns(std::vector& reports, const std::string& path) { -#ifdef BENCHMARK_OS_EMSCRIPTEN - // alternatives : - // https://stackoverflow.com/questions/67174663/cannot-save-the-file-to-specific-directory-by-wasm - std::cerr << "Modern web browsers do not allow web pages to write/open a local file in your machine."; -#else - std::fstream file; - file.open(path, std::fstream::out); - printHeader(file); - - // print results for each run - for (auto& run : reports) { - printRun(file, run); - } - - if (file.fail()) { - std::cerr << "Error: " << strerror(errno) << ": " << path << "\n"; - throw std::exception(); - } - - std::cout << "benchmark result has been writen at " << path << '\n'; -#endif - } - static void WriteContext(std::string& cmake_info, const std::string& path) { -#ifdef BENCHMARK_OS_EMSCRIPTEN - // alternatives : - // https://stackoverflow.com/questions/67174663/cannot-save-the-file-to-specific-directory-by-wasm - std::cerr << "Modern web browsers do not allow web pages to write/open a local file in your machine."; -#else - std::fstream file; - file.open(path, std::fstream::out); - PrintBasicContext(file, benchmark::BenchmarkReporter::Context()); - file << cmake_info; - if (file.fail()) { - std::cerr << "Error: " << strerror(errno) << ": " << path << "\n"; - throw std::exception(); - } - - std::cout << "benchmark metadata has been writen at " << path << '\n'; -#endif - } - static void PrintRun(Run& run) { - std::ostream& out = GetOutputStream(); - printRun(out, run); - } - -public: - const std::string path; - bool printed_header; - std::set user_counter_names; -}; - -using report = benchmark::BenchmarkReporter::Run; - -class Benchmark { -private: - explicit Benchmark(std::string name) - : m_name(std::move(name)) - , m_cpu_info(CPUInfo::getInstance()) - , m_system_info(SystemInfo::getInstance()) - , m_enable_print(false) - , m_result_file {' '} - , m_metadata_file {' '} {} // -public: - friend class BenchmarkBuilder; - void Run(benchmark::BenchmarkReporter::Run run) { - if (m_enable_print) { benchmark::CSVReporter::PrintRun(run); } - - m_runs.push_back(run); - } - ~Benchmark() { - if (m_enable_save && !m_runs.empty()) { - CSVReporter::WriteRuns(m_runs, m_result_file); - CSVReporter::WriteContext(m_extra_info, m_metadata_file); - } - } // -private: - std::string m_name; - const CPUInfo& m_cpu_info; - const SystemInfo& m_system_info; - std::string m_extra_info; - bool m_enable_save; - bool m_enable_print; - std::string m_result_file; - std::string m_metadata_file; - std::string m_dir; - std::vector m_runs; -}; - -class CmakeInfo; -constexpr auto METADATA_PREFIX {".metadata"}; -constexpr auto CSV_PREFIX {".csv"}; - -class BenchmarkBuilder { -public: - explicit BenchmarkBuilder(std::string name) - : m_benchmark(std::move(name)) {} // -public: - operator Benchmark() const { return m_benchmark; } - benchmark::BenchmarkBuilder& save() { - m_benchmark.m_metadata_file = "./" + m_benchmark.m_metadata_file + METADATA_PREFIX; - m_benchmark.m_result_file = "./" + m_benchmark.m_metadata_file + CSV_PREFIX; - m_benchmark.m_enable_print = true; - return *this; - } - benchmark::BenchmarkBuilder& print() { - m_benchmark.m_enable_save = true; - return *this; - } - benchmark::BenchmarkBuilder& at(const std::string& dir) { - m_benchmark.m_metadata_file = dir + "/" + m_benchmark.m_name + METADATA_PREFIX; - m_benchmark.m_result_file = dir + "/" + m_benchmark.m_name + CSV_PREFIX; - return *this; - } - benchmark::BenchmarkBuilder& add_extra_info(const std::string& info) { - m_benchmark.m_extra_info += info; - return *this; - } - -private: - Benchmark m_benchmark; // -}; - -FLS_BENCH_MAYBE_UNUSED static BenchmarkBuilder create(const std::string& name) { - cycleclock::Init(); - return BenchmarkBuilder(name); -} - -class CmakeInfo { -public: - static const CmakeInfo& getInstance() { - static const CmakeInfo INFO; - return INFO; - } - -public: - const std::string source_dir = SOURCE_DIR; - const std::string cmake_osx_architectures = CMAKE_OSX_ARCHITECTURES; - const std::string cmake_host_system_processor = CMAKE_HOST_SYSTEM_PROCESSOR; - const std::string cmake_system_processor = CMAKE_SYSTEM_PROCESSOR; - const std::string cmake_host_system_name = CMAKE_HOST_SYSTEM_NAME; - const std::string cmake_system_name = CMAKE_SYSTEM_NAME; - const std::string cmake_c_compiler = CMAKE_C_COMPILER; - const std::string cmake_cxx_compiler = CMAKE_CXX_COMPILER; - const std::string cmake_cxx_compiler_id = CMAKE_CXX_COMPILER_ID; - const std::string cmake_cxx_compiler_version = CMAKE_CXX_COMPILER_VERSION; - const std::string cmake_crosscompiling = CMAKE_CROSSCOMPILING; - const std::string cmake_cxx_flags_debug = CMAKE_CXX_FLAGS_DEBUG; - const std::string cmake_cxx_flags_release = CMAKE_CXX_FLAGS_RELEASE; - const std::string cmake_build_type = CMAKE_BUILD_TYPE; - const std::string cmake_toolchain_file = CMAKE_TOOLCHAIN_FILE; - const std::string target_name = TARGET_NAME; - const std::string target_compile_options = TARGET_COMPILE_OPTIONS; - -public: - const std::string& getSourceDir() const { return source_dir; } - const std::string& getCmakeOsxArchitectures() const { return cmake_osx_architectures; } - const std::string& getCmakeHostSystemProcessor() const { return cmake_host_system_processor; } - const std::string& getCmakeSystemProcessor() const { return cmake_system_processor; } - const std::string& getCmakeHostSystemName() const { return cmake_host_system_name; } - const std::string& getCmakeSystemName() const { return cmake_system_name; } - const std::string& getCmakeCCompiler() const { return cmake_c_compiler; } - const std::string& getCmakeCxxCompiler() const { return cmake_cxx_compiler; } - const std::string& getCmakeCxxCompilerId() const { return cmake_cxx_compiler_id; } - const std::string& getCmakeCxxCompilerVersion() const { return cmake_cxx_compiler_version; } - const std::string& getCmakeCrosscompiling() const { return cmake_crosscompiling; } - const std::string& getCmakeCxxFlagsDebug() const { return cmake_cxx_flags_debug; } - const std::string& getCmakeCxxFlagsRelease() const { return cmake_cxx_flags_release; } - const std::string& getCmakeBuildType() const { return cmake_build_type; } - const std::string& get_cmakeToolchainFile() const { return cmake_toolchain_file; } - const std::string& getTargetName() const { return target_name; } - const std::string& getTargetCompileOptions() const { return target_compile_options; } - - static void PrintCmake() { printCmakeInfo(std::cout); } - static void AppendCmake(const std::string& path) { - std::fstream file; - file.open(path, std::fstream::app); - printCmakeInfo(file); - - if (file.fail()) { - std::cerr << "Error: " << strerror(errno) << "\n"; - throw std::exception(); - } - - std::cout << "result has been writen at " + path; - } - static void printCmakeInfo(std::ostream& out) { - const CmakeInfo& info = getInstance(); - out << info.getCmakeInfo(); - } - - // https://stackoverflow.com/a/46931770/5165633 - static std::vector split(const std::string& s, char delim) { - std::vector result; - std::stringstream ss(s); - std::string item; - - while (getline(ss, item, delim)) { - result.push_back(item); - } - - return result; - } - - static std::string getCmakeToolchainFile() { - const CmakeInfo& info = getInstance(); - std::vector v = split(info.get_cmakeToolchainFile(), '/'); - - auto tool_chain_file_str = v[v.size() - 1]; - return tool_chain_file_str.substr(0, tool_chain_file_str.size() - 6); - } - - static std::string getCmakeInfo() { - std::ostringstream out; - const CmakeInfo& info = getInstance(); - out << "cmake info: \n"; - out << " source_dir: " << info.getSourceDir() << '\n'; - out << " cmake_osx_architectures" << info.getCmakeOsxArchitectures() << '\n'; - out << " cmake_host_system_processor: " << info.getCmakeHostSystemProcessor() << '\n'; - out << " cmake_system_processor: " << info.getCmakeSystemProcessor() << '\n'; - out << " cmake_host_system_name: " << info.getCmakeHostSystemName() << '\n'; - out << " cmake_system_name: " << info.getCmakeSystemName() << '\n'; - out << " cmake_c_compiler: " << info.getCmakeCCompiler() << '\n'; - out << " cmake_cxx_compiler: " << info.getCmakeCxxCompiler() << '\n'; - out << " cmake_cxx_compiler_id: " << info.getCmakeCxxCompilerId() << '\n'; - out << " cmake_cxx_compiler_version: " << info.getCmakeCxxCompilerVersion() << '\n'; - out << " cmake_crosscompiling: " << info.getCmakeCrosscompiling() << '\n'; - out << " cmake_cxx_flags_debug: " << info.getCmakeCxxFlagsDebug() << '\n'; - out << " cmake_cxx_flags_release: " << info.getCmakeCxxFlagsRelease() << '\n'; - out << " cmake_build_type: " << info.getCmakeBuildType() << '\n'; - out << " cmake_toolchain_file: " << info.getCmakeToolchainFile() << '\n'; - out << "target info: \n"; - out << " target_name: " << info.getTargetName() << '\n'; - out << " target_compile_options: " << info.getTargetCompileOptions() << '\n'; - return out.str(); - } - -private: - CmakeInfo() = default; // - BENCHMARK_DISALLOW_COPY_AND_ASSIGN(CmakeInfo); // -}; -} // namespace benchmark -#endif diff --git a/fls_bench/google/benchmark/LICENSE b/fls_bench/google/benchmark/LICENSE deleted file mode 100644 index 7a4a3ea..0000000 --- a/fls_bench/google/benchmark/LICENSE +++ /dev/null @@ -1,202 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. \ No newline at end of file diff --git a/primitives/fls_generated/fallback/scalar_aav_uf1/fallback_scalar_aav_1024_uf1_rsum_bench.cpp b/primitives/fls_generated/fallback/scalar_aav_uf1/fallback_scalar_aav_1024_uf1_rsum_bench.cpp deleted file mode 100644 index 20de1a1..0000000 --- a/primitives/fls_generated/fallback/scalar_aav_uf1/fallback_scalar_aav_1024_uf1_rsum_bench.cpp +++ /dev/null @@ -1,116 +0,0 @@ -// generated! -// NOLINTBEGIN -#include "fallback_scalar_aav_1024_uf1_rsum_bench.hpp" -#include "fallback_scalar_aav_1024_uf1_rsum_helper.hpp" -#include "fls_gen/rsum/rsum.hpp" -#include "fls_gen/rsum_and_untranspose/rsum_and_untranspose.hpp" -#include "fls_gen/transpose/transpose.hpp" -#include "fls_gen/untranspose/untranspose.hpp" -#include -static benchmark::BenchmarkReporter::Run bench_0(const uint8_t* transposed8, uint8_t* untransposed8) { - int benchmark_number = 0; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - std::string benchmark_name = "rsum_8"; - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::rsum::fallback::scalar::rsum(helper::delta_arr_ow8, untransposed8, helper::base_arr_ow8); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench_1(const uint16_t* transposed16, uint16_t* untransposed16) { - int benchmark_number = 1; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - std::string benchmark_name = "rsum_16"; - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::rsum::fallback::scalar::rsum(helper::delta_arr_ow16, untransposed16, helper::base_arr_ow16); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench_2(const uint32_t* transposed32, uint32_t* untransposed32) { - int benchmark_number = 2; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - std::string benchmark_name = "rsum_32"; - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::rsum::fallback::scalar::rsum(helper::delta_arr_ow32, untransposed32, helper::base_arr_ow32); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench_3(const uint64_t* transposed64, uint64_t* untransposed64) { - int benchmark_number = 3; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - std::string benchmark_name = "rsum_64"; - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::rsum::fallback::scalar::rsum(helper::delta_arr_ow64, untransposed64, helper::base_arr_ow64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -void benchmark_all(benchmark::Benchmark& benchmark) { - const auto* transposed8 = new (std::align_val_t {64}) uint8_t[1024]; - const auto* transposed16 = new (std::align_val_t {64}) uint16_t[1024]; - const auto* transposed32 = new (std::align_val_t {64}) uint32_t[1024]; - const auto* transposed64 = new (std::align_val_t {64}) uint64_t[1024]; - auto* untransposed8 = new (std::align_val_t {64}) uint8_t[1024]; - auto* untransposed16 = new (std::align_val_t {64}) uint16_t[1024]; - auto* untransposed32 = new (std::align_val_t {64}) uint32_t[1024]; - auto* untransposed64 = new (std::align_val_t {64}) uint64_t[1024]; - benchmark.Run(bench_0(transposed8, untransposed8)); - benchmark.Run(bench_1(transposed16, untransposed16)); - benchmark.Run(bench_2(transposed32, untransposed32)); - benchmark.Run(bench_3(transposed64, untransposed64)); -} -int main() { - benchmark::Benchmark benchmark = - benchmark::create("fallback_scalar_aav_1024_uf1_rsum") - .save() - .at(std::string(SOURCE_DIR) + "/fls_pub/results/" + benchmark::CmakeInfo::getCmakeToolchainFile()) - .print() - .add_extra_info(benchmark::CmakeInfo::getCmakeInfo()); - benchmark_all(benchmark); -} diff --git a/primitives/fls_generated/fallback/scalar_aav_uf1/fallback_scalar_aav_1024_uf1_unffor_bench.cpp b/primitives/fls_generated/fallback/scalar_aav_uf1/fallback_scalar_aav_1024_uf1_unffor_bench.cpp deleted file mode 100644 index 8bfe797..0000000 --- a/primitives/fls_generated/fallback/scalar_aav_uf1/fallback_scalar_aav_1024_uf1_unffor_bench.cpp +++ /dev/null @@ -1,3367 +0,0 @@ -// generated! -// NOLINTBEGIN -#include "fallback_scalar_aav_1024_uf1_unffor_bench.hpp" -#include "fallback_scalar_aav_1024_uf1_unffor_helper.hpp" -#include "fls_gen/ffor/ffor.hpp" -#include "fls_gen/pack/pack.hpp" -#include "fls_gen/unffor/unffor.hpp" -#include -static benchmark::BenchmarkReporter::Run bench0_unpack_0bw_8ow_8crw_1uf(const uint8_t* packed8, uint8_t* unpacked8) { - int benchmark_number = 0; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint8_t base8[1] = {1}; - std::string benchmark_name = "bench0_unpack_0bw_8ow_8crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_0_b0_w8_arr), const_cast(packed8), 0, base8); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed8, const_cast(unpacked8), 0, base8); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench1_unpack_1bw_8ow_8crw_1uf(const uint8_t* packed8, uint8_t* unpacked8) { - int benchmark_number = 1; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint8_t base8[1] = {1}; - std::string benchmark_name = "bench1_unpack_1bw_8ow_8crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_1_b1_w8_arr), const_cast(packed8), 1, base8); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed8, const_cast(unpacked8), 1, base8); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench2_unpack_2bw_8ow_8crw_1uf(const uint8_t* packed8, uint8_t* unpacked8) { - int benchmark_number = 2; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint8_t base8[1] = {1}; - std::string benchmark_name = "bench2_unpack_2bw_8ow_8crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_2_b2_w8_arr), const_cast(packed8), 2, base8); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed8, const_cast(unpacked8), 2, base8); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench3_unpack_3bw_8ow_8crw_1uf(const uint8_t* packed8, uint8_t* unpacked8) { - int benchmark_number = 3; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint8_t base8[1] = {1}; - std::string benchmark_name = "bench3_unpack_3bw_8ow_8crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_3_b3_w8_arr), const_cast(packed8), 3, base8); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed8, const_cast(unpacked8), 3, base8); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench4_unpack_4bw_8ow_8crw_1uf(const uint8_t* packed8, uint8_t* unpacked8) { - int benchmark_number = 4; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint8_t base8[1] = {1}; - std::string benchmark_name = "bench4_unpack_4bw_8ow_8crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_4_b4_w8_arr), const_cast(packed8), 4, base8); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed8, const_cast(unpacked8), 4, base8); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench5_unpack_5bw_8ow_8crw_1uf(const uint8_t* packed8, uint8_t* unpacked8) { - int benchmark_number = 5; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint8_t base8[1] = {1}; - std::string benchmark_name = "bench5_unpack_5bw_8ow_8crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_5_b5_w8_arr), const_cast(packed8), 5, base8); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed8, const_cast(unpacked8), 5, base8); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench6_unpack_6bw_8ow_8crw_1uf(const uint8_t* packed8, uint8_t* unpacked8) { - int benchmark_number = 6; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint8_t base8[1] = {1}; - std::string benchmark_name = "bench6_unpack_6bw_8ow_8crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_6_b6_w8_arr), const_cast(packed8), 6, base8); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed8, const_cast(unpacked8), 6, base8); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench7_unpack_7bw_8ow_8crw_1uf(const uint8_t* packed8, uint8_t* unpacked8) { - int benchmark_number = 7; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint8_t base8[1] = {1}; - std::string benchmark_name = "bench7_unpack_7bw_8ow_8crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_7_b7_w8_arr), const_cast(packed8), 7, base8); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed8, const_cast(unpacked8), 7, base8); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench8_unpack_8bw_8ow_8crw_1uf(const uint8_t* packed8, uint8_t* unpacked8) { - int benchmark_number = 8; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint8_t base8[1] = {1}; - std::string benchmark_name = "bench8_unpack_8bw_8ow_8crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_8_b8_w8_arr), const_cast(packed8), 8, base8); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed8, const_cast(unpacked8), 8, base8); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench9_unpack_0bw_16ow_16crw_1uf(const uint16_t* packed16, - uint16_t* unpacked16) { - int benchmark_number = 9; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint16_t base16[1] = {1}; - std::string benchmark_name = "bench9_unpack_0bw_16ow_16crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_9_b0_w16_arr), const_cast(packed16), 0, base16); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed16, const_cast(unpacked16), 0, base16); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench10_unpack_1bw_16ow_16crw_1uf(const uint16_t* packed16, - uint16_t* unpacked16) { - int benchmark_number = 10; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint16_t base16[1] = {1}; - std::string benchmark_name = "bench10_unpack_1bw_16ow_16crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_10_b1_w16_arr), const_cast(packed16), 1, base16); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed16, const_cast(unpacked16), 1, base16); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench11_unpack_2bw_16ow_16crw_1uf(const uint16_t* packed16, - uint16_t* unpacked16) { - int benchmark_number = 11; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint16_t base16[1] = {1}; - std::string benchmark_name = "bench11_unpack_2bw_16ow_16crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_11_b2_w16_arr), const_cast(packed16), 2, base16); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed16, const_cast(unpacked16), 2, base16); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench12_unpack_3bw_16ow_16crw_1uf(const uint16_t* packed16, - uint16_t* unpacked16) { - int benchmark_number = 12; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint16_t base16[1] = {1}; - std::string benchmark_name = "bench12_unpack_3bw_16ow_16crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_12_b3_w16_arr), const_cast(packed16), 3, base16); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed16, const_cast(unpacked16), 3, base16); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench13_unpack_4bw_16ow_16crw_1uf(const uint16_t* packed16, - uint16_t* unpacked16) { - int benchmark_number = 13; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint16_t base16[1] = {1}; - std::string benchmark_name = "bench13_unpack_4bw_16ow_16crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_13_b4_w16_arr), const_cast(packed16), 4, base16); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed16, const_cast(unpacked16), 4, base16); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench14_unpack_5bw_16ow_16crw_1uf(const uint16_t* packed16, - uint16_t* unpacked16) { - int benchmark_number = 14; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint16_t base16[1] = {1}; - std::string benchmark_name = "bench14_unpack_5bw_16ow_16crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_14_b5_w16_arr), const_cast(packed16), 5, base16); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed16, const_cast(unpacked16), 5, base16); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench15_unpack_6bw_16ow_16crw_1uf(const uint16_t* packed16, - uint16_t* unpacked16) { - int benchmark_number = 15; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint16_t base16[1] = {1}; - std::string benchmark_name = "bench15_unpack_6bw_16ow_16crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_15_b6_w16_arr), const_cast(packed16), 6, base16); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed16, const_cast(unpacked16), 6, base16); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench16_unpack_7bw_16ow_16crw_1uf(const uint16_t* packed16, - uint16_t* unpacked16) { - int benchmark_number = 16; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint16_t base16[1] = {1}; - std::string benchmark_name = "bench16_unpack_7bw_16ow_16crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_16_b7_w16_arr), const_cast(packed16), 7, base16); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed16, const_cast(unpacked16), 7, base16); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench17_unpack_8bw_16ow_16crw_1uf(const uint16_t* packed16, - uint16_t* unpacked16) { - int benchmark_number = 17; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint16_t base16[1] = {1}; - std::string benchmark_name = "bench17_unpack_8bw_16ow_16crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_17_b8_w16_arr), const_cast(packed16), 8, base16); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed16, const_cast(unpacked16), 8, base16); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench18_unpack_9bw_16ow_16crw_1uf(const uint16_t* packed16, - uint16_t* unpacked16) { - int benchmark_number = 18; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint16_t base16[1] = {1}; - std::string benchmark_name = "bench18_unpack_9bw_16ow_16crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_18_b9_w16_arr), const_cast(packed16), 9, base16); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed16, const_cast(unpacked16), 9, base16); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench19_unpack_10bw_16ow_16crw_1uf(const uint16_t* packed16, - uint16_t* unpacked16) { - int benchmark_number = 19; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint16_t base16[1] = {1}; - std::string benchmark_name = "bench19_unpack_10bw_16ow_16crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_19_b10_w16_arr), const_cast(packed16), 10, base16); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed16, const_cast(unpacked16), 10, base16); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench20_unpack_11bw_16ow_16crw_1uf(const uint16_t* packed16, - uint16_t* unpacked16) { - int benchmark_number = 20; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint16_t base16[1] = {1}; - std::string benchmark_name = "bench20_unpack_11bw_16ow_16crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_20_b11_w16_arr), const_cast(packed16), 11, base16); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed16, const_cast(unpacked16), 11, base16); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench21_unpack_12bw_16ow_16crw_1uf(const uint16_t* packed16, - uint16_t* unpacked16) { - int benchmark_number = 21; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint16_t base16[1] = {1}; - std::string benchmark_name = "bench21_unpack_12bw_16ow_16crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_21_b12_w16_arr), const_cast(packed16), 12, base16); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed16, const_cast(unpacked16), 12, base16); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench22_unpack_13bw_16ow_16crw_1uf(const uint16_t* packed16, - uint16_t* unpacked16) { - int benchmark_number = 22; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint16_t base16[1] = {1}; - std::string benchmark_name = "bench22_unpack_13bw_16ow_16crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_22_b13_w16_arr), const_cast(packed16), 13, base16); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed16, const_cast(unpacked16), 13, base16); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench23_unpack_14bw_16ow_16crw_1uf(const uint16_t* packed16, - uint16_t* unpacked16) { - int benchmark_number = 23; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint16_t base16[1] = {1}; - std::string benchmark_name = "bench23_unpack_14bw_16ow_16crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_23_b14_w16_arr), const_cast(packed16), 14, base16); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed16, const_cast(unpacked16), 14, base16); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench24_unpack_15bw_16ow_16crw_1uf(const uint16_t* packed16, - uint16_t* unpacked16) { - int benchmark_number = 24; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint16_t base16[1] = {1}; - std::string benchmark_name = "bench24_unpack_15bw_16ow_16crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_24_b15_w16_arr), const_cast(packed16), 15, base16); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed16, const_cast(unpacked16), 15, base16); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench25_unpack_16bw_16ow_16crw_1uf(const uint16_t* packed16, - uint16_t* unpacked16) { - int benchmark_number = 25; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint16_t base16[1] = {1}; - std::string benchmark_name = "bench25_unpack_16bw_16ow_16crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_25_b16_w16_arr), const_cast(packed16), 16, base16); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed16, const_cast(unpacked16), 16, base16); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench26_unpack_0bw_32ow_32crw_1uf(const uint32_t* packed32, - uint32_t* unpacked32) { - int benchmark_number = 26; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint32_t base32[1] = {1}; - std::string benchmark_name = "bench26_unpack_0bw_32ow_32crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_26_b0_w32_arr), const_cast(packed32), 0, base32); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed32, const_cast(unpacked32), 0, base32); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench27_unpack_1bw_32ow_32crw_1uf(const uint32_t* packed32, - uint32_t* unpacked32) { - int benchmark_number = 27; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint32_t base32[1] = {1}; - std::string benchmark_name = "bench27_unpack_1bw_32ow_32crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_27_b1_w32_arr), const_cast(packed32), 1, base32); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed32, const_cast(unpacked32), 1, base32); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench28_unpack_2bw_32ow_32crw_1uf(const uint32_t* packed32, - uint32_t* unpacked32) { - int benchmark_number = 28; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint32_t base32[1] = {1}; - std::string benchmark_name = "bench28_unpack_2bw_32ow_32crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_28_b2_w32_arr), const_cast(packed32), 2, base32); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed32, const_cast(unpacked32), 2, base32); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench29_unpack_3bw_32ow_32crw_1uf(const uint32_t* packed32, - uint32_t* unpacked32) { - int benchmark_number = 29; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint32_t base32[1] = {1}; - std::string benchmark_name = "bench29_unpack_3bw_32ow_32crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_29_b3_w32_arr), const_cast(packed32), 3, base32); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed32, const_cast(unpacked32), 3, base32); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench30_unpack_4bw_32ow_32crw_1uf(const uint32_t* packed32, - uint32_t* unpacked32) { - int benchmark_number = 30; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint32_t base32[1] = {1}; - std::string benchmark_name = "bench30_unpack_4bw_32ow_32crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_30_b4_w32_arr), const_cast(packed32), 4, base32); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed32, const_cast(unpacked32), 4, base32); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench31_unpack_5bw_32ow_32crw_1uf(const uint32_t* packed32, - uint32_t* unpacked32) { - int benchmark_number = 31; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint32_t base32[1] = {1}; - std::string benchmark_name = "bench31_unpack_5bw_32ow_32crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_31_b5_w32_arr), const_cast(packed32), 5, base32); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed32, const_cast(unpacked32), 5, base32); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench32_unpack_6bw_32ow_32crw_1uf(const uint32_t* packed32, - uint32_t* unpacked32) { - int benchmark_number = 32; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint32_t base32[1] = {1}; - std::string benchmark_name = "bench32_unpack_6bw_32ow_32crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_32_b6_w32_arr), const_cast(packed32), 6, base32); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed32, const_cast(unpacked32), 6, base32); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench33_unpack_7bw_32ow_32crw_1uf(const uint32_t* packed32, - uint32_t* unpacked32) { - int benchmark_number = 33; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint32_t base32[1] = {1}; - std::string benchmark_name = "bench33_unpack_7bw_32ow_32crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_33_b7_w32_arr), const_cast(packed32), 7, base32); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed32, const_cast(unpacked32), 7, base32); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench34_unpack_8bw_32ow_32crw_1uf(const uint32_t* packed32, - uint32_t* unpacked32) { - int benchmark_number = 34; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint32_t base32[1] = {1}; - std::string benchmark_name = "bench34_unpack_8bw_32ow_32crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_34_b8_w32_arr), const_cast(packed32), 8, base32); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed32, const_cast(unpacked32), 8, base32); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench35_unpack_9bw_32ow_32crw_1uf(const uint32_t* packed32, - uint32_t* unpacked32) { - int benchmark_number = 35; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint32_t base32[1] = {1}; - std::string benchmark_name = "bench35_unpack_9bw_32ow_32crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_35_b9_w32_arr), const_cast(packed32), 9, base32); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed32, const_cast(unpacked32), 9, base32); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench36_unpack_10bw_32ow_32crw_1uf(const uint32_t* packed32, - uint32_t* unpacked32) { - int benchmark_number = 36; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint32_t base32[1] = {1}; - std::string benchmark_name = "bench36_unpack_10bw_32ow_32crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_36_b10_w32_arr), const_cast(packed32), 10, base32); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed32, const_cast(unpacked32), 10, base32); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench37_unpack_11bw_32ow_32crw_1uf(const uint32_t* packed32, - uint32_t* unpacked32) { - int benchmark_number = 37; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint32_t base32[1] = {1}; - std::string benchmark_name = "bench37_unpack_11bw_32ow_32crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_37_b11_w32_arr), const_cast(packed32), 11, base32); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed32, const_cast(unpacked32), 11, base32); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench38_unpack_12bw_32ow_32crw_1uf(const uint32_t* packed32, - uint32_t* unpacked32) { - int benchmark_number = 38; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint32_t base32[1] = {1}; - std::string benchmark_name = "bench38_unpack_12bw_32ow_32crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_38_b12_w32_arr), const_cast(packed32), 12, base32); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed32, const_cast(unpacked32), 12, base32); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench39_unpack_13bw_32ow_32crw_1uf(const uint32_t* packed32, - uint32_t* unpacked32) { - int benchmark_number = 39; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint32_t base32[1] = {1}; - std::string benchmark_name = "bench39_unpack_13bw_32ow_32crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_39_b13_w32_arr), const_cast(packed32), 13, base32); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed32, const_cast(unpacked32), 13, base32); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench40_unpack_14bw_32ow_32crw_1uf(const uint32_t* packed32, - uint32_t* unpacked32) { - int benchmark_number = 40; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint32_t base32[1] = {1}; - std::string benchmark_name = "bench40_unpack_14bw_32ow_32crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_40_b14_w32_arr), const_cast(packed32), 14, base32); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed32, const_cast(unpacked32), 14, base32); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench41_unpack_15bw_32ow_32crw_1uf(const uint32_t* packed32, - uint32_t* unpacked32) { - int benchmark_number = 41; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint32_t base32[1] = {1}; - std::string benchmark_name = "bench41_unpack_15bw_32ow_32crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_41_b15_w32_arr), const_cast(packed32), 15, base32); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed32, const_cast(unpacked32), 15, base32); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench42_unpack_16bw_32ow_32crw_1uf(const uint32_t* packed32, - uint32_t* unpacked32) { - int benchmark_number = 42; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint32_t base32[1] = {1}; - std::string benchmark_name = "bench42_unpack_16bw_32ow_32crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_42_b16_w32_arr), const_cast(packed32), 16, base32); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed32, const_cast(unpacked32), 16, base32); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench43_unpack_17bw_32ow_32crw_1uf(const uint32_t* packed32, - uint32_t* unpacked32) { - int benchmark_number = 43; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint32_t base32[1] = {1}; - std::string benchmark_name = "bench43_unpack_17bw_32ow_32crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_43_b17_w32_arr), const_cast(packed32), 17, base32); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed32, const_cast(unpacked32), 17, base32); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench44_unpack_18bw_32ow_32crw_1uf(const uint32_t* packed32, - uint32_t* unpacked32) { - int benchmark_number = 44; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint32_t base32[1] = {1}; - std::string benchmark_name = "bench44_unpack_18bw_32ow_32crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_44_b18_w32_arr), const_cast(packed32), 18, base32); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed32, const_cast(unpacked32), 18, base32); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench45_unpack_19bw_32ow_32crw_1uf(const uint32_t* packed32, - uint32_t* unpacked32) { - int benchmark_number = 45; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint32_t base32[1] = {1}; - std::string benchmark_name = "bench45_unpack_19bw_32ow_32crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_45_b19_w32_arr), const_cast(packed32), 19, base32); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed32, const_cast(unpacked32), 19, base32); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench46_unpack_20bw_32ow_32crw_1uf(const uint32_t* packed32, - uint32_t* unpacked32) { - int benchmark_number = 46; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint32_t base32[1] = {1}; - std::string benchmark_name = "bench46_unpack_20bw_32ow_32crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_46_b20_w32_arr), const_cast(packed32), 20, base32); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed32, const_cast(unpacked32), 20, base32); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench47_unpack_21bw_32ow_32crw_1uf(const uint32_t* packed32, - uint32_t* unpacked32) { - int benchmark_number = 47; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint32_t base32[1] = {1}; - std::string benchmark_name = "bench47_unpack_21bw_32ow_32crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_47_b21_w32_arr), const_cast(packed32), 21, base32); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed32, const_cast(unpacked32), 21, base32); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench48_unpack_22bw_32ow_32crw_1uf(const uint32_t* packed32, - uint32_t* unpacked32) { - int benchmark_number = 48; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint32_t base32[1] = {1}; - std::string benchmark_name = "bench48_unpack_22bw_32ow_32crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_48_b22_w32_arr), const_cast(packed32), 22, base32); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed32, const_cast(unpacked32), 22, base32); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench49_unpack_23bw_32ow_32crw_1uf(const uint32_t* packed32, - uint32_t* unpacked32) { - int benchmark_number = 49; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint32_t base32[1] = {1}; - std::string benchmark_name = "bench49_unpack_23bw_32ow_32crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_49_b23_w32_arr), const_cast(packed32), 23, base32); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed32, const_cast(unpacked32), 23, base32); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench50_unpack_24bw_32ow_32crw_1uf(const uint32_t* packed32, - uint32_t* unpacked32) { - int benchmark_number = 50; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint32_t base32[1] = {1}; - std::string benchmark_name = "bench50_unpack_24bw_32ow_32crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_50_b24_w32_arr), const_cast(packed32), 24, base32); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed32, const_cast(unpacked32), 24, base32); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench51_unpack_25bw_32ow_32crw_1uf(const uint32_t* packed32, - uint32_t* unpacked32) { - int benchmark_number = 51; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint32_t base32[1] = {1}; - std::string benchmark_name = "bench51_unpack_25bw_32ow_32crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_51_b25_w32_arr), const_cast(packed32), 25, base32); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed32, const_cast(unpacked32), 25, base32); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench52_unpack_26bw_32ow_32crw_1uf(const uint32_t* packed32, - uint32_t* unpacked32) { - int benchmark_number = 52; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint32_t base32[1] = {1}; - std::string benchmark_name = "bench52_unpack_26bw_32ow_32crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_52_b26_w32_arr), const_cast(packed32), 26, base32); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed32, const_cast(unpacked32), 26, base32); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench53_unpack_27bw_32ow_32crw_1uf(const uint32_t* packed32, - uint32_t* unpacked32) { - int benchmark_number = 53; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint32_t base32[1] = {1}; - std::string benchmark_name = "bench53_unpack_27bw_32ow_32crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_53_b27_w32_arr), const_cast(packed32), 27, base32); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed32, const_cast(unpacked32), 27, base32); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench54_unpack_28bw_32ow_32crw_1uf(const uint32_t* packed32, - uint32_t* unpacked32) { - int benchmark_number = 54; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint32_t base32[1] = {1}; - std::string benchmark_name = "bench54_unpack_28bw_32ow_32crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_54_b28_w32_arr), const_cast(packed32), 28, base32); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed32, const_cast(unpacked32), 28, base32); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench55_unpack_29bw_32ow_32crw_1uf(const uint32_t* packed32, - uint32_t* unpacked32) { - int benchmark_number = 55; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint32_t base32[1] = {1}; - std::string benchmark_name = "bench55_unpack_29bw_32ow_32crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_55_b29_w32_arr), const_cast(packed32), 29, base32); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed32, const_cast(unpacked32), 29, base32); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench56_unpack_30bw_32ow_32crw_1uf(const uint32_t* packed32, - uint32_t* unpacked32) { - int benchmark_number = 56; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint32_t base32[1] = {1}; - std::string benchmark_name = "bench56_unpack_30bw_32ow_32crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_56_b30_w32_arr), const_cast(packed32), 30, base32); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed32, const_cast(unpacked32), 30, base32); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench57_unpack_31bw_32ow_32crw_1uf(const uint32_t* packed32, - uint32_t* unpacked32) { - int benchmark_number = 57; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint32_t base32[1] = {1}; - std::string benchmark_name = "bench57_unpack_31bw_32ow_32crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_57_b31_w32_arr), const_cast(packed32), 31, base32); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed32, const_cast(unpacked32), 31, base32); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench58_unpack_32bw_32ow_32crw_1uf(const uint32_t* packed32, - uint32_t* unpacked32) { - int benchmark_number = 58; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint32_t base32[1] = {1}; - std::string benchmark_name = "bench58_unpack_32bw_32ow_32crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_58_b32_w32_arr), const_cast(packed32), 32, base32); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed32, const_cast(unpacked32), 32, base32); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench59_unpack_0bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 59; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench59_unpack_0bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_59_b0_w64_arr), const_cast(packed64), 0, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 0, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench60_unpack_1bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 60; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench60_unpack_1bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_60_b1_w64_arr), const_cast(packed64), 1, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 1, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench61_unpack_2bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 61; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench61_unpack_2bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_61_b2_w64_arr), const_cast(packed64), 2, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 2, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench62_unpack_3bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 62; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench62_unpack_3bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_62_b3_w64_arr), const_cast(packed64), 3, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 3, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench63_unpack_4bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 63; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench63_unpack_4bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_63_b4_w64_arr), const_cast(packed64), 4, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 4, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench64_unpack_5bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 64; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench64_unpack_5bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_64_b5_w64_arr), const_cast(packed64), 5, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 5, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench65_unpack_6bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 65; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench65_unpack_6bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_65_b6_w64_arr), const_cast(packed64), 6, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 6, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench66_unpack_7bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 66; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench66_unpack_7bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_66_b7_w64_arr), const_cast(packed64), 7, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 7, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench67_unpack_8bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 67; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench67_unpack_8bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_67_b8_w64_arr), const_cast(packed64), 8, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 8, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench68_unpack_9bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 68; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench68_unpack_9bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_68_b9_w64_arr), const_cast(packed64), 9, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 9, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench69_unpack_10bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 69; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench69_unpack_10bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_69_b10_w64_arr), const_cast(packed64), 10, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 10, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench70_unpack_11bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 70; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench70_unpack_11bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_70_b11_w64_arr), const_cast(packed64), 11, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 11, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench71_unpack_12bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 71; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench71_unpack_12bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_71_b12_w64_arr), const_cast(packed64), 12, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 12, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench72_unpack_13bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 72; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench72_unpack_13bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_72_b13_w64_arr), const_cast(packed64), 13, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 13, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench73_unpack_14bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 73; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench73_unpack_14bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_73_b14_w64_arr), const_cast(packed64), 14, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 14, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench74_unpack_15bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 74; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench74_unpack_15bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_74_b15_w64_arr), const_cast(packed64), 15, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 15, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench75_unpack_16bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 75; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench75_unpack_16bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_75_b16_w64_arr), const_cast(packed64), 16, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 16, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench76_unpack_17bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 76; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench76_unpack_17bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_76_b17_w64_arr), const_cast(packed64), 17, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 17, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench77_unpack_18bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 77; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench77_unpack_18bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_77_b18_w64_arr), const_cast(packed64), 18, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 18, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench78_unpack_19bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 78; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench78_unpack_19bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_78_b19_w64_arr), const_cast(packed64), 19, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 19, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench79_unpack_20bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 79; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench79_unpack_20bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_79_b20_w64_arr), const_cast(packed64), 20, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 20, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench80_unpack_21bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 80; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench80_unpack_21bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_80_b21_w64_arr), const_cast(packed64), 21, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 21, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench81_unpack_22bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 81; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench81_unpack_22bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_81_b22_w64_arr), const_cast(packed64), 22, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 22, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench82_unpack_23bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 82; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench82_unpack_23bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_82_b23_w64_arr), const_cast(packed64), 23, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 23, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench83_unpack_24bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 83; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench83_unpack_24bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_83_b24_w64_arr), const_cast(packed64), 24, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 24, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench84_unpack_25bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 84; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench84_unpack_25bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_84_b25_w64_arr), const_cast(packed64), 25, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 25, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench85_unpack_26bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 85; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench85_unpack_26bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_85_b26_w64_arr), const_cast(packed64), 26, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 26, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench86_unpack_27bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 86; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench86_unpack_27bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_86_b27_w64_arr), const_cast(packed64), 27, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 27, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench87_unpack_28bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 87; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench87_unpack_28bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_87_b28_w64_arr), const_cast(packed64), 28, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 28, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench88_unpack_29bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 88; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench88_unpack_29bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_88_b29_w64_arr), const_cast(packed64), 29, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 29, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench89_unpack_30bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 89; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench89_unpack_30bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_89_b30_w64_arr), const_cast(packed64), 30, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 30, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench90_unpack_31bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 90; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench90_unpack_31bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_90_b31_w64_arr), const_cast(packed64), 31, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 31, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench91_unpack_32bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 91; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench91_unpack_32bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_91_b32_w64_arr), const_cast(packed64), 32, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 32, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench92_unpack_33bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 92; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench92_unpack_33bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_92_b33_w64_arr), const_cast(packed64), 33, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 33, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench93_unpack_34bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 93; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench93_unpack_34bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_93_b34_w64_arr), const_cast(packed64), 34, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 34, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench94_unpack_35bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 94; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench94_unpack_35bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_94_b35_w64_arr), const_cast(packed64), 35, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 35, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench95_unpack_36bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 95; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench95_unpack_36bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_95_b36_w64_arr), const_cast(packed64), 36, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 36, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench96_unpack_37bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 96; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench96_unpack_37bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_96_b37_w64_arr), const_cast(packed64), 37, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 37, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench97_unpack_38bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 97; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench97_unpack_38bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_97_b38_w64_arr), const_cast(packed64), 38, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 38, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench98_unpack_39bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 98; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench98_unpack_39bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_98_b39_w64_arr), const_cast(packed64), 39, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 39, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench99_unpack_40bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 99; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench99_unpack_40bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_99_b40_w64_arr), const_cast(packed64), 40, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 40, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench100_unpack_41bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 100; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench100_unpack_41bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_100_b41_w64_arr), const_cast(packed64), 41, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 41, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench101_unpack_42bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 101; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench101_unpack_42bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_101_b42_w64_arr), const_cast(packed64), 42, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 42, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench102_unpack_43bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 102; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench102_unpack_43bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_102_b43_w64_arr), const_cast(packed64), 43, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 43, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench103_unpack_44bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 103; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench103_unpack_44bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_103_b44_w64_arr), const_cast(packed64), 44, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 44, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench104_unpack_45bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 104; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench104_unpack_45bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_104_b45_w64_arr), const_cast(packed64), 45, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 45, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench105_unpack_46bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 105; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench105_unpack_46bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_105_b46_w64_arr), const_cast(packed64), 46, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 46, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench106_unpack_47bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 106; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench106_unpack_47bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_106_b47_w64_arr), const_cast(packed64), 47, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 47, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench107_unpack_48bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 107; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench107_unpack_48bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_107_b48_w64_arr), const_cast(packed64), 48, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 48, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench108_unpack_49bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 108; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench108_unpack_49bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_108_b49_w64_arr), const_cast(packed64), 49, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 49, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench109_unpack_50bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 109; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench109_unpack_50bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_109_b50_w64_arr), const_cast(packed64), 50, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 50, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench110_unpack_51bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 110; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench110_unpack_51bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_110_b51_w64_arr), const_cast(packed64), 51, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 51, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench111_unpack_52bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 111; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench111_unpack_52bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_111_b52_w64_arr), const_cast(packed64), 52, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 52, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench112_unpack_53bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 112; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench112_unpack_53bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_112_b53_w64_arr), const_cast(packed64), 53, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 53, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench113_unpack_54bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 113; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench113_unpack_54bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_113_b54_w64_arr), const_cast(packed64), 54, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 54, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench114_unpack_55bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 114; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench114_unpack_55bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_114_b55_w64_arr), const_cast(packed64), 55, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 55, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench115_unpack_56bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 115; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench115_unpack_56bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_115_b56_w64_arr), const_cast(packed64), 56, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 56, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench116_unpack_57bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 116; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench116_unpack_57bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_116_b57_w64_arr), const_cast(packed64), 57, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 57, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench117_unpack_58bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 117; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench117_unpack_58bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_117_b58_w64_arr), const_cast(packed64), 58, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 58, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench118_unpack_59bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 118; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench118_unpack_59bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_118_b59_w64_arr), const_cast(packed64), 59, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 59, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench119_unpack_60bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 119; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench119_unpack_60bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_119_b60_w64_arr), const_cast(packed64), 60, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 60, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench120_unpack_61bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 120; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench120_unpack_61bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_120_b61_w64_arr), const_cast(packed64), 61, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 61, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench121_unpack_62bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 121; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench121_unpack_62bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_121_b62_w64_arr), const_cast(packed64), 62, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 62, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench122_unpack_63bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 122; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench122_unpack_63bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_122_b63_w64_arr), const_cast(packed64), 63, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 63, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -static benchmark::BenchmarkReporter::Run bench123_unpack_64bw_64ow_64crw_1uf(const uint64_t* packed64, - uint64_t* unpacked64) { - int benchmark_number = 123; - -#ifdef NDEBUG - uint64_t iterations = 3000000; -#else - uint64_t iterations = 1; -#endif - - [[maybe_unused]] const uint64_t base64[1] = {1}; - std::string benchmark_name = "bench123_unpack_64bw_64ow_64crw_1uf"; - - generated::ffor::fallback::scalar::ffor( - const_cast(helper::rand_arr_123_b64_w64_arr), const_cast(packed64), 64, base64); - - uint64_t cycles = benchmark::cycleclock::Now(); - for (uint64_t i = 0; i < iterations; ++i) { - generated::unffor::fallback::scalar::unffor(packed64, const_cast(unpacked64), 64, base64); - } - - cycles = benchmark::cycleclock::Now() - cycles; - - return benchmark::BenchmarkReporter::Run( - benchmark_number, benchmark_name, iterations, double(cycles) / (double(iterations) * 1024)); -} -void benchmark_all(benchmark::Benchmark& benchmark) { - const auto packed8 = new (std::align_val_t {64}) uint8_t[1024]; - const auto packed16 = new (std::align_val_t {64}) uint16_t[1024]; - const auto packed32 = new (std::align_val_t {64}) uint32_t[1024]; - const auto packed64 = new (std::align_val_t {64}) uint64_t[1024]; - auto unpacked8 = new (std::align_val_t {64}) uint8_t[1024]; - auto unpacked16 = new (std::align_val_t {64}) uint16_t[1024]; - auto unpacked32 = new (std::align_val_t {64}) uint32_t[1024]; - auto unpacked64 = new (std::align_val_t {64}) uint64_t[1024]; - benchmark.Run(bench0_unpack_0bw_8ow_8crw_1uf(packed8, unpacked8)); - benchmark.Run(bench1_unpack_1bw_8ow_8crw_1uf(packed8, unpacked8)); - benchmark.Run(bench2_unpack_2bw_8ow_8crw_1uf(packed8, unpacked8)); - benchmark.Run(bench3_unpack_3bw_8ow_8crw_1uf(packed8, unpacked8)); - benchmark.Run(bench4_unpack_4bw_8ow_8crw_1uf(packed8, unpacked8)); - benchmark.Run(bench5_unpack_5bw_8ow_8crw_1uf(packed8, unpacked8)); - benchmark.Run(bench6_unpack_6bw_8ow_8crw_1uf(packed8, unpacked8)); - benchmark.Run(bench7_unpack_7bw_8ow_8crw_1uf(packed8, unpacked8)); - benchmark.Run(bench8_unpack_8bw_8ow_8crw_1uf(packed8, unpacked8)); - benchmark.Run(bench9_unpack_0bw_16ow_16crw_1uf(packed16, unpacked16)); - benchmark.Run(bench10_unpack_1bw_16ow_16crw_1uf(packed16, unpacked16)); - benchmark.Run(bench11_unpack_2bw_16ow_16crw_1uf(packed16, unpacked16)); - benchmark.Run(bench12_unpack_3bw_16ow_16crw_1uf(packed16, unpacked16)); - benchmark.Run(bench13_unpack_4bw_16ow_16crw_1uf(packed16, unpacked16)); - benchmark.Run(bench14_unpack_5bw_16ow_16crw_1uf(packed16, unpacked16)); - benchmark.Run(bench15_unpack_6bw_16ow_16crw_1uf(packed16, unpacked16)); - benchmark.Run(bench16_unpack_7bw_16ow_16crw_1uf(packed16, unpacked16)); - benchmark.Run(bench17_unpack_8bw_16ow_16crw_1uf(packed16, unpacked16)); - benchmark.Run(bench18_unpack_9bw_16ow_16crw_1uf(packed16, unpacked16)); - benchmark.Run(bench19_unpack_10bw_16ow_16crw_1uf(packed16, unpacked16)); - benchmark.Run(bench20_unpack_11bw_16ow_16crw_1uf(packed16, unpacked16)); - benchmark.Run(bench21_unpack_12bw_16ow_16crw_1uf(packed16, unpacked16)); - benchmark.Run(bench22_unpack_13bw_16ow_16crw_1uf(packed16, unpacked16)); - benchmark.Run(bench23_unpack_14bw_16ow_16crw_1uf(packed16, unpacked16)); - benchmark.Run(bench24_unpack_15bw_16ow_16crw_1uf(packed16, unpacked16)); - benchmark.Run(bench25_unpack_16bw_16ow_16crw_1uf(packed16, unpacked16)); - benchmark.Run(bench26_unpack_0bw_32ow_32crw_1uf(packed32, unpacked32)); - benchmark.Run(bench27_unpack_1bw_32ow_32crw_1uf(packed32, unpacked32)); - benchmark.Run(bench28_unpack_2bw_32ow_32crw_1uf(packed32, unpacked32)); - benchmark.Run(bench29_unpack_3bw_32ow_32crw_1uf(packed32, unpacked32)); - benchmark.Run(bench30_unpack_4bw_32ow_32crw_1uf(packed32, unpacked32)); - benchmark.Run(bench31_unpack_5bw_32ow_32crw_1uf(packed32, unpacked32)); - benchmark.Run(bench32_unpack_6bw_32ow_32crw_1uf(packed32, unpacked32)); - benchmark.Run(bench33_unpack_7bw_32ow_32crw_1uf(packed32, unpacked32)); - benchmark.Run(bench34_unpack_8bw_32ow_32crw_1uf(packed32, unpacked32)); - benchmark.Run(bench35_unpack_9bw_32ow_32crw_1uf(packed32, unpacked32)); - benchmark.Run(bench36_unpack_10bw_32ow_32crw_1uf(packed32, unpacked32)); - benchmark.Run(bench37_unpack_11bw_32ow_32crw_1uf(packed32, unpacked32)); - benchmark.Run(bench38_unpack_12bw_32ow_32crw_1uf(packed32, unpacked32)); - benchmark.Run(bench39_unpack_13bw_32ow_32crw_1uf(packed32, unpacked32)); - benchmark.Run(bench40_unpack_14bw_32ow_32crw_1uf(packed32, unpacked32)); - benchmark.Run(bench41_unpack_15bw_32ow_32crw_1uf(packed32, unpacked32)); - benchmark.Run(bench42_unpack_16bw_32ow_32crw_1uf(packed32, unpacked32)); - benchmark.Run(bench43_unpack_17bw_32ow_32crw_1uf(packed32, unpacked32)); - benchmark.Run(bench44_unpack_18bw_32ow_32crw_1uf(packed32, unpacked32)); - benchmark.Run(bench45_unpack_19bw_32ow_32crw_1uf(packed32, unpacked32)); - benchmark.Run(bench46_unpack_20bw_32ow_32crw_1uf(packed32, unpacked32)); - benchmark.Run(bench47_unpack_21bw_32ow_32crw_1uf(packed32, unpacked32)); - benchmark.Run(bench48_unpack_22bw_32ow_32crw_1uf(packed32, unpacked32)); - benchmark.Run(bench49_unpack_23bw_32ow_32crw_1uf(packed32, unpacked32)); - benchmark.Run(bench50_unpack_24bw_32ow_32crw_1uf(packed32, unpacked32)); - benchmark.Run(bench51_unpack_25bw_32ow_32crw_1uf(packed32, unpacked32)); - benchmark.Run(bench52_unpack_26bw_32ow_32crw_1uf(packed32, unpacked32)); - benchmark.Run(bench53_unpack_27bw_32ow_32crw_1uf(packed32, unpacked32)); - benchmark.Run(bench54_unpack_28bw_32ow_32crw_1uf(packed32, unpacked32)); - benchmark.Run(bench55_unpack_29bw_32ow_32crw_1uf(packed32, unpacked32)); - benchmark.Run(bench56_unpack_30bw_32ow_32crw_1uf(packed32, unpacked32)); - benchmark.Run(bench57_unpack_31bw_32ow_32crw_1uf(packed32, unpacked32)); - benchmark.Run(bench58_unpack_32bw_32ow_32crw_1uf(packed32, unpacked32)); - benchmark.Run(bench59_unpack_0bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench60_unpack_1bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench61_unpack_2bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench62_unpack_3bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench63_unpack_4bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench64_unpack_5bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench65_unpack_6bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench66_unpack_7bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench67_unpack_8bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench68_unpack_9bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench69_unpack_10bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench70_unpack_11bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench71_unpack_12bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench72_unpack_13bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench73_unpack_14bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench74_unpack_15bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench75_unpack_16bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench76_unpack_17bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench77_unpack_18bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench78_unpack_19bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench79_unpack_20bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench80_unpack_21bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench81_unpack_22bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench82_unpack_23bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench83_unpack_24bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench84_unpack_25bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench85_unpack_26bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench86_unpack_27bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench87_unpack_28bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench88_unpack_29bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench89_unpack_30bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench90_unpack_31bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench91_unpack_32bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench92_unpack_33bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench93_unpack_34bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench94_unpack_35bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench95_unpack_36bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench96_unpack_37bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench97_unpack_38bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench98_unpack_39bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench99_unpack_40bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench100_unpack_41bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench101_unpack_42bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench102_unpack_43bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench103_unpack_44bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench104_unpack_45bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench105_unpack_46bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench106_unpack_47bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench107_unpack_48bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench108_unpack_49bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench109_unpack_50bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench110_unpack_51bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench111_unpack_52bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench112_unpack_53bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench113_unpack_54bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench114_unpack_55bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench115_unpack_56bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench116_unpack_57bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench117_unpack_58bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench118_unpack_59bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench119_unpack_60bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench120_unpack_61bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench121_unpack_62bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench122_unpack_63bw_64ow_64crw_1uf(packed64, unpacked64)); - benchmark.Run(bench123_unpack_64bw_64ow_64crw_1uf(packed64, unpacked64)); -} -int main() { - benchmark::Benchmark benchmark = - benchmark::create("fallback_scalar_aav_1024_uf1_unffor") - .save() - .at(std::string(SOURCE_DIR) + "/fls_pub/results/" + benchmark::CmakeInfo::getCmakeToolchainFile()) - .print() - .add_extra_info(benchmark::CmakeInfo::getCmakeInfo()); - benchmark_all(benchmark); -} -// NOLINTEND diff --git a/primitives/fls_generated/fallback/scalar_aav_uf1/ffor.cmake b/primitives/fls_generated/fallback/scalar_aav_uf1/ffor.cmake index 1a5cfdd..0183140 100644 --- a/primitives/fls_generated/fallback/scalar_aav_uf1/ffor.cmake +++ b/primitives/fls_generated/fallback/scalar_aav_uf1/ffor.cmake @@ -4,5 +4,3 @@ target_compile_definitions(fallback_scalar_aav_1024_uf1_ffor PRIVATE IS_SCALAR) LIST(APPEND FLS_GENERATED_OBJECT_FILES $) -get_target_property(TARGET_NAME fallback_scalar_aav_1024_uf1_ffor NAME) -get_target_property(TARGET_COMPILE_OPTIONS fallback_scalar_aav_1024_uf1_ffor COMPILE_OPTIONS) diff --git a/primitives/fls_generated/fallback/scalar_aav_uf1/pack.cmake b/primitives/fls_generated/fallback/scalar_aav_uf1/pack.cmake index 5caf52d..8d58770 100644 --- a/primitives/fls_generated/fallback/scalar_aav_uf1/pack.cmake +++ b/primitives/fls_generated/fallback/scalar_aav_uf1/pack.cmake @@ -4,5 +4,3 @@ target_compile_definitions(fallback_scalar_aav_1024_uf1_pack PRIVATE IS_SCALAR) LIST(APPEND FLS_GENERATED_OBJECT_FILES $) -get_target_property(TARGET_NAME fallback_scalar_aav_1024_uf1_pack NAME) -get_target_property(TARGET_COMPILE_OPTIONS fallback_scalar_aav_1024_uf1_pack COMPILE_OPTIONS) diff --git a/primitives/fls_generated/fallback/scalar_aav_uf1/rsum.cmake b/primitives/fls_generated/fallback/scalar_aav_uf1/rsum.cmake index 32c0ec3..d4e86eb 100644 --- a/primitives/fls_generated/fallback/scalar_aav_uf1/rsum.cmake +++ b/primitives/fls_generated/fallback/scalar_aav_uf1/rsum.cmake @@ -4,21 +4,12 @@ target_compile_definitions(fallback_scalar_aav_1024_uf1_rsum PRIVATE IS_SCALAR) LIST(APPEND FLS_GENERATED_OBJECT_FILES $) -get_target_property(TARGET_NAME fallback_scalar_aav_1024_uf1_rsum NAME) -get_target_property(TARGET_COMPILE_OPTIONS fallback_scalar_aav_1024_uf1_rsum COMPILE_OPTIONS) #------------------------------------------------------------------------------------------------------ -if (ENABLE_TESTING) - add_executable(fallback_scalar_aav_1024_uf1_rsum_test fallback_scalar_aav_1024_uf1_rsum_test.cpp) - target_link_libraries(fallback_scalar_aav_1024_uf1_rsum_test PRIVATE fallback_scalar_aav_1024_uf1_rsum) - target_link_libraries(fallback_scalar_aav_1024_uf1_rsum_test PRIVATE fallback_scalar_aav_1024_uf1_unrsum fallback_scalar_aav_1024_uf1_untranspose) - target_link_libraries(fallback_scalar_aav_1024_uf1_rsum_test PRIVATE gtest_main) - target_include_directories(fallback_scalar_aav_1024_uf1_rsum_test PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) - gtest_discover_tests(fallback_scalar_aav_1024_uf1_rsum_test) +if (FLS_BUILD_TESTING) +# add_executable(fallback_scalar_aav_1024_uf1_rsum_test fallback_scalar_aav_1024_uf1_rsum_test.cpp) +# target_link_libraries(fallback_scalar_aav_1024_uf1_rsum_test PRIVATE fallback_scalar_aav_1024_uf1_rsum) +# target_link_libraries(fallback_scalar_aav_1024_uf1_rsum_test PRIVATE fallback_scalar_aav_1024_uf1_unrsum fallback_scalar_aav_1024_uf1_untranspose) +# target_link_libraries(fallback_scalar_aav_1024_uf1_rsum_test PRIVATE gtest_main) +# target_include_directories(fallback_scalar_aav_1024_uf1_rsum_test PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) +# gtest_discover_tests(fallback_scalar_aav_1024_uf1_rsum_test) endif () -#------------------------------------------------------------------------------------------------------ -configure_file(${CMAKE_SOURCE_DIR}/fls_bench/fls_bench.hpp ${CMAKE_CURRENT_BINARY_DIR}/fallback_scalar_aav_1024_uf1_rsum_bench.hpp) -add_executable(fallback_scalar_aav_1024_uf1_rsum_bench fallback_scalar_aav_1024_uf1_rsum_bench.cpp) -target_link_libraries(fallback_scalar_aav_1024_uf1_rsum_bench PRIVATE fallback_scalar_aav_1024_uf1_rsum) -target_link_libraries(fallback_scalar_aav_1024_uf1_rsum_bench PRIVATE fallback_scalar_aav_1024_uf1_unrsum fallback_scalar_aav_1024_uf1_untranspose) -target_include_directories(fallback_scalar_aav_1024_uf1_rsum_bench PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) -add_fls_benchmark(fallback_scalar_aav_1024_uf1_rsum_bench) diff --git a/primitives/fls_generated/fallback/scalar_aav_uf1/transpose.cmake b/primitives/fls_generated/fallback/scalar_aav_uf1/transpose.cmake index 3f5ed59..a267cac 100644 --- a/primitives/fls_generated/fallback/scalar_aav_uf1/transpose.cmake +++ b/primitives/fls_generated/fallback/scalar_aav_uf1/transpose.cmake @@ -4,21 +4,12 @@ target_compile_definitions(fallback_scalar_aav_1024_uf1_transpose PRIVATE IS_SCA LIST(APPEND FLS_GENERATED_OBJECT_FILES $) -get_target_property(TARGET_NAME fallback_scalar_aav_1024_uf1_transpose NAME) -get_target_property(TARGET_COMPILE_OPTIONS fallback_scalar_aav_1024_uf1_transpose COMPILE_OPTIONS) #------------------------------------------------------------------------------------------------------ -if (ENABLE_TESTING) - add_executable(fallback_scalar_aav_1024_uf1_transpose_test fallback_scalar_aav_1024_uf1_transpose_test.cpp) - target_link_libraries(fallback_scalar_aav_1024_uf1_transpose_test PRIVATE fallback_scalar_aav_1024_uf1_transpose) - target_link_libraries(fallback_scalar_aav_1024_uf1_transpose_test PRIVATE fallback_scalar_aav_1024_uf1_unrsum fallback_scalar_aav_1024_uf1_untranspose) - target_link_libraries(fallback_scalar_aav_1024_uf1_transpose_test PRIVATE gtest_main) - target_include_directories(fallback_scalar_aav_1024_uf1_transpose_test PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) - gtest_discover_tests(fallback_scalar_aav_1024_uf1_transpose_test) +if (FLS_BUILD_TESTING) +# add_executable(fallback_scalar_aav_1024_uf1_transpose_test fallback_scalar_aav_1024_uf1_transpose_test.cpp) +# target_link_libraries(fallback_scalar_aav_1024_uf1_transpose_test PRIVATE fallback_scalar_aav_1024_uf1_transpose) +# target_link_libraries(fallback_scalar_aav_1024_uf1_transpose_test PRIVATE fallback_scalar_aav_1024_uf1_unrsum fallback_scalar_aav_1024_uf1_untranspose) +# target_link_libraries(fallback_scalar_aav_1024_uf1_transpose_test PRIVATE gtest_main) +# target_include_directories(fallback_scalar_aav_1024_uf1_transpose_test PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) +# gtest_discover_tests(fallback_scalar_aav_1024_uf1_transpose_test) endif () -#------------------------------------------------------------------------------------------------------ -configure_file(${CMAKE_SOURCE_DIR}/fls_bench/fls_bench.hpp ${CMAKE_CURRENT_BINARY_DIR}/fallback_scalar_aav_1024_uf1_transpose_bench.hpp) -add_executable(fallback_scalar_aav_1024_uf1_transpose_bench fallback_scalar_aav_1024_uf1_transpose_bench.cpp) -target_link_libraries(fallback_scalar_aav_1024_uf1_transpose_bench PRIVATE fallback_scalar_aav_1024_uf1_transpose) -target_link_libraries(fallback_scalar_aav_1024_uf1_transpose_bench PRIVATE fallback_scalar_aav_1024_uf1_unrsum fallback_scalar_aav_1024_uf1_untranspose) -target_include_directories(fallback_scalar_aav_1024_uf1_transpose_bench PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) -add_fls_benchmark(fallback_scalar_aav_1024_uf1_transpose_bench) diff --git a/primitives/fls_generated/fallback/scalar_aav_uf1/unffor.cmake b/primitives/fls_generated/fallback/scalar_aav_uf1/unffor.cmake index 380f7f8..7b4fe69 100644 --- a/primitives/fls_generated/fallback/scalar_aav_uf1/unffor.cmake +++ b/primitives/fls_generated/fallback/scalar_aav_uf1/unffor.cmake @@ -4,21 +4,12 @@ target_compile_definitions(fallback_scalar_aav_1024_uf1_unffor PRIVATE IS_SCALAR LIST(APPEND FLS_GENERATED_OBJECT_FILES $) -get_target_property(TARGET_NAME fallback_scalar_aav_1024_uf1_unffor NAME) -get_target_property(TARGET_COMPILE_OPTIONS fallback_scalar_aav_1024_uf1_unffor COMPILE_OPTIONS) #------------------------------------------------------------------------------------------------------ -if (ENABLE_TESTING) - add_executable(fallback_scalar_aav_1024_uf1_unffor_test fallback_scalar_aav_1024_uf1_unffor_test.cpp) - target_link_libraries(fallback_scalar_aav_1024_uf1_unffor_test PRIVATE fallback_scalar_aav_1024_uf1_unffor) - target_link_libraries(fallback_scalar_aav_1024_uf1_unffor_test PRIVATE fallback_scalar_aav_1024_uf1_pack fallback_scalar_aav_1024_uf1_ffor) - target_link_libraries(fallback_scalar_aav_1024_uf1_unffor_test PRIVATE gtest_main) - target_include_directories(fallback_scalar_aav_1024_uf1_unffor_test PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) - gtest_discover_tests(fallback_scalar_aav_1024_uf1_unffor_test) +if (FLS_BUILD_TESTING) +# add_executable(fallback_scalar_aav_1024_uf1_unffor_test fallback_scalar_aav_1024_uf1_unffor_test.cpp) +# target_link_libraries(fallback_scalar_aav_1024_uf1_unffor_test PRIVATE fallback_scalar_aav_1024_uf1_unffor) +# target_link_libraries(fallback_scalar_aav_1024_uf1_unffor_test PRIVATE fallback_scalar_aav_1024_uf1_pack fallback_scalar_aav_1024_uf1_ffor) +# target_link_libraries(fallback_scalar_aav_1024_uf1_unffor_test PRIVATE gtest_main) +# target_include_directories(fallback_scalar_aav_1024_uf1_unffor_test PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) +# gtest_discover_tests(fallback_scalar_aav_1024_uf1_unffor_test) endif () -#------------------------------------------------------------------------------------------------------ -configure_file(${CMAKE_SOURCE_DIR}/fls_bench/fls_bench.hpp ${CMAKE_CURRENT_BINARY_DIR}/fallback_scalar_aav_1024_uf1_unffor_bench.hpp) -add_executable(fallback_scalar_aav_1024_uf1_unffor_bench fallback_scalar_aav_1024_uf1_unffor_bench.cpp) -target_link_libraries(fallback_scalar_aav_1024_uf1_unffor_bench PRIVATE fallback_scalar_aav_1024_uf1_unffor) -target_link_libraries(fallback_scalar_aav_1024_uf1_unffor_bench PRIVATE fallback_scalar_aav_1024_uf1_pack fallback_scalar_aav_1024_uf1_ffor) -target_include_directories(fallback_scalar_aav_1024_uf1_unffor_bench PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) -add_fls_benchmark(fallback_scalar_aav_1024_uf1_unffor_bench) diff --git a/primitives/fls_generated/fallback/scalar_aav_uf1/unpack.cmake b/primitives/fls_generated/fallback/scalar_aav_uf1/unpack.cmake index f348cb0..6fde375 100644 --- a/primitives/fls_generated/fallback/scalar_aav_uf1/unpack.cmake +++ b/primitives/fls_generated/fallback/scalar_aav_uf1/unpack.cmake @@ -4,21 +4,12 @@ target_compile_definitions(fallback_scalar_aav_1024_uf1_unpack PRIVATE IS_SCALAR LIST(APPEND FLS_GENERATED_OBJECT_FILES $) -get_target_property(TARGET_NAME fallback_scalar_aav_1024_uf1_unpack NAME) -get_target_property(TARGET_COMPILE_OPTIONS fallback_scalar_aav_1024_uf1_unpack COMPILE_OPTIONS) #------------------------------------------------------------------------------------------------------ -if (ENABLE_TESTING) - add_executable(fallback_scalar_aav_1024_uf1_unpack_test fallback_scalar_aav_1024_uf1_unpack_test.cpp) - target_link_libraries(fallback_scalar_aav_1024_uf1_unpack_test PRIVATE fallback_scalar_aav_1024_uf1_unpack) - target_link_libraries(fallback_scalar_aav_1024_uf1_unpack_test PRIVATE fallback_scalar_aav_1024_uf1_pack fallback_scalar_aav_1024_uf1_ffor) - target_link_libraries(fallback_scalar_aav_1024_uf1_unpack_test PRIVATE gtest_main) - target_include_directories(fallback_scalar_aav_1024_uf1_unpack_test PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) - gtest_discover_tests(fallback_scalar_aav_1024_uf1_unpack_test) +if (FLS_BUILD_TESTING) + # add_executable(fallback_scalar_aav_1024_uf1_unpack_test fallback_scalar_aav_1024_uf1_unpack_test.cpp) + # target_link_libraries(fallback_scalar_aav_1024_uf1_unpack_test PRIVATE fallback_scalar_aav_1024_uf1_unpack) + # target_link_libraries(fallback_scalar_aav_1024_uf1_unpack_test PRIVATE fallback_scalar_aav_1024_uf1_pack fallback_scalar_aav_1024_uf1_ffor) + # target_link_libraries(fallback_scalar_aav_1024_uf1_unpack_test PRIVATE gtest_main) + # target_include_directories(fallback_scalar_aav_1024_uf1_unpack_test PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) + # gtest_discover_tests(fallback_scalar_aav_1024_uf1_unpack_test) endif () -#------------------------------------------------------------------------------------------------------ -configure_file(${CMAKE_SOURCE_DIR}/fls_bench/fls_bench.hpp ${CMAKE_CURRENT_BINARY_DIR}/fallback_scalar_aav_1024_uf1_unpack_bench.hpp) -add_executable(fallback_scalar_aav_1024_uf1_unpack_bench fallback_scalar_aav_1024_uf1_unpack_bench.cpp) -target_link_libraries(fallback_scalar_aav_1024_uf1_unpack_bench PRIVATE fallback_scalar_aav_1024_uf1_unpack) -target_link_libraries(fallback_scalar_aav_1024_uf1_unpack_bench PRIVATE fallback_scalar_aav_1024_uf1_pack fallback_scalar_aav_1024_uf1_ffor) -target_include_directories(fallback_scalar_aav_1024_uf1_unpack_bench PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) -add_fls_benchmark(fallback_scalar_aav_1024_uf1_unpack_bench) diff --git a/primitives/fls_generated/fallback/scalar_aav_uf1/unrsum.cmake b/primitives/fls_generated/fallback/scalar_aav_uf1/unrsum.cmake index 4ad1cbd..8b0f319 100644 --- a/primitives/fls_generated/fallback/scalar_aav_uf1/unrsum.cmake +++ b/primitives/fls_generated/fallback/scalar_aav_uf1/unrsum.cmake @@ -4,21 +4,12 @@ target_compile_definitions(fallback_scalar_aav_1024_uf1_unrsum PRIVATE IS_SCALAR LIST(APPEND FLS_GENERATED_OBJECT_FILES $) -get_target_property(TARGET_NAME fallback_scalar_aav_1024_uf1_unrsum NAME) -get_target_property(TARGET_COMPILE_OPTIONS fallback_scalar_aav_1024_uf1_unrsum COMPILE_OPTIONS) #------------------------------------------------------------------------------------------------------ -if (ENABLE_TESTING) - add_executable(fallback_scalar_aav_1024_uf1_unrsum_test fallback_scalar_aav_1024_uf1_unrsum_test.cpp) - target_link_libraries(fallback_scalar_aav_1024_uf1_unrsum_test PRIVATE fallback_scalar_aav_1024_uf1_unrsum) - target_link_libraries(fallback_scalar_aav_1024_uf1_unrsum_test PRIVATE fallback_scalar_aav_1024_uf1_unrsum fallback_scalar_aav_1024_uf1_untranspose) - target_link_libraries(fallback_scalar_aav_1024_uf1_unrsum_test PRIVATE gtest_main) - target_include_directories(fallback_scalar_aav_1024_uf1_unrsum_test PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) - gtest_discover_tests(fallback_scalar_aav_1024_uf1_unrsum_test) -endif () -#------------------------------------------------------------------------------------------------------ -configure_file(${CMAKE_SOURCE_DIR}/fls_bench/fls_bench.hpp ${CMAKE_CURRENT_BINARY_DIR}/fallback_scalar_aav_1024_uf1_unrsum_bench.hpp) -add_executable(fallback_scalar_aav_1024_uf1_unrsum_bench fallback_scalar_aav_1024_uf1_unrsum_bench.cpp) -target_link_libraries(fallback_scalar_aav_1024_uf1_unrsum_bench PRIVATE fallback_scalar_aav_1024_uf1_unrsum) -target_link_libraries(fallback_scalar_aav_1024_uf1_unrsum_bench PRIVATE fallback_scalar_aav_1024_uf1_unrsum fallback_scalar_aav_1024_uf1_untranspose) -target_include_directories(fallback_scalar_aav_1024_uf1_unrsum_bench PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) -add_fls_benchmark(fallback_scalar_aav_1024_uf1_unrsum_bench) +if (FLS_BUILD_TESTING) +# add_executable(fallback_scalar_aav_1024_uf1_unrsum_test fallback_scalar_aav_1024_uf1_unrsum_test.cpp) +# target_link_libraries(fallback_scalar_aav_1024_uf1_unrsum_test PRIVATE fallback_scalar_aav_1024_uf1_unrsum) +# target_link_libraries(fallback_scalar_aav_1024_uf1_unrsum_test PRIVATE fallback_scalar_aav_1024_uf1_unrsum fallback_scalar_aav_1024_uf1_untranspose) +# target_link_libraries(fallback_scalar_aav_1024_uf1_unrsum_test PRIVATE gtest_main) +# target_include_directories(fallback_scalar_aav_1024_uf1_unrsum_test PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) +# gtest_discover_tests(fallback_scalar_aav_1024_uf1_unrsum_test) +endif () \ No newline at end of file diff --git a/primitives/fls_generated/fallback/scalar_aav_uf1/untranspose.cmake b/primitives/fls_generated/fallback/scalar_aav_uf1/untranspose.cmake index fc29ed6..dc656aa 100644 --- a/primitives/fls_generated/fallback/scalar_aav_uf1/untranspose.cmake +++ b/primitives/fls_generated/fallback/scalar_aav_uf1/untranspose.cmake @@ -4,21 +4,12 @@ target_compile_definitions(fallback_scalar_aav_1024_uf1_untranspose PRIVATE IS_S LIST(APPEND FLS_GENERATED_OBJECT_FILES $) -get_target_property(TARGET_NAME fallback_scalar_aav_1024_uf1_untranspose NAME) -get_target_property(TARGET_COMPILE_OPTIONS fallback_scalar_aav_1024_uf1_untranspose COMPILE_OPTIONS) #------------------------------------------------------------------------------------------------------ -if (ENABLE_TESTING) +if (FLS_BUILD_TESTING) add_executable(fallback_scalar_aav_1024_uf1_untranspose_test fallback_scalar_aav_1024_uf1_untranspose_test.cpp) target_link_libraries(fallback_scalar_aav_1024_uf1_untranspose_test PRIVATE fallback_scalar_aav_1024_uf1_untranspose) target_link_libraries(fallback_scalar_aav_1024_uf1_untranspose_test PRIVATE fallback_scalar_aav_1024_uf1_unrsum fallback_scalar_aav_1024_uf1_untranspose) target_link_libraries(fallback_scalar_aav_1024_uf1_untranspose_test PRIVATE gtest_main) target_include_directories(fallback_scalar_aav_1024_uf1_untranspose_test PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) gtest_discover_tests(fallback_scalar_aav_1024_uf1_untranspose_test) -endif () -#------------------------------------------------------------------------------------------------------ -configure_file(${CMAKE_SOURCE_DIR}/fls_bench/fls_bench.hpp ${CMAKE_CURRENT_BINARY_DIR}/fallback_scalar_aav_1024_uf1_untranspose_bench.hpp) -add_executable(fallback_scalar_aav_1024_uf1_untranspose_bench fallback_scalar_aav_1024_uf1_untranspose_bench.cpp) -target_link_libraries(fallback_scalar_aav_1024_uf1_untranspose_bench PRIVATE fallback_scalar_aav_1024_uf1_untranspose) -target_link_libraries(fallback_scalar_aav_1024_uf1_untranspose_bench PRIVATE fallback_scalar_aav_1024_uf1_unrsum fallback_scalar_aav_1024_uf1_untranspose) -target_include_directories(fallback_scalar_aav_1024_uf1_untranspose_bench PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) -add_fls_benchmark(fallback_scalar_aav_1024_uf1_untranspose_bench) +endif () \ No newline at end of file