diff --git a/BUILD.bazel b/BUILD.bazel index 3ce5e116cd8..fa3fc9b54f0 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -110,6 +110,7 @@ MICROKERNEL_DEFS = [ "src/f32-pavgpool/f32-pavgpool-minmax.h", "src/f32-qs8-vcvt/f32-qs8-vcvt.h", "src/f32-qu8-vcvt/f32-qu8-vcvt.h", + "src/f32-radddextexp/f32-radddextexp.h", "src/f32-vabs/f32-vabs.h", "src/f32-vbinary/f32-vadd.h", "src/f32-vbinary/f32-vaddc.h", diff --git a/scripts/generate-tests.sh b/scripts/generate-tests.sh index ef2565fb91c..5bf4f879eb7 100755 --- a/scripts/generate-tests.sh +++ b/scripts/generate-tests.sh @@ -246,9 +246,6 @@ tools/generate-ibilinear-chw-test.py --spec test/f32-ibilinear-chw.yaml --output ### Tests for RAddExpMinusMax micro-kernels tools/generate-raddexpminusmax-test.py --spec test/f32-raddexpminusmax.yaml --output test/f32-raddexpminusmax.cc & -### Tests for RAddExtExp micro-kernels -tools/generate-raddextexp-test.py --spec test/f32-raddextexp.yaml --output test/f32-raddextexp.cc & - ### Tests for RAddStoreExpMinusMax micro-kernels tools/generate-raddstoreexpminusmax-test.py --spec test/f16-raddstoreexpminusmax.yaml --output test/f16-raddstoreexpminusmax.cc & tools/generate-raddstoreexpminusmax-test.py --spec test/f32-raddstoreexpminusmax.yaml --output test/f32-raddstoreexpminusmax.cc & diff --git a/src/f32-raddextexp/f32-raddextexp.h b/src/f32-raddextexp/f32-raddextexp.h new file mode 100644 index 00000000000..22f3dbf9db3 --- /dev/null +++ b/src/f32-raddextexp/f32-raddextexp.h @@ -0,0 +1,55 @@ +// Copyright 2023 Google LLC +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. +#ifndef XNN_UKERNEL_WITH_PARAMS +#define XNN_UKERNEL_WITH_PARAMS(arch_flags, ukernel, element_tile, datatype, params_type, init_params) \ + XNN_UKERNEL(arch_flags, ukernel, element_tile, datatype) +#define XNN_DEFINED_UKERNEL_WITH_PARAMS +#endif + +#ifndef XNN_UKERNEL +#define XNN_UKERNEL(arch_flags, ukernel, element_tile, datatype) \ + XNN_UKERNEL_WITH_PARAMS(arch_flags, ukernel, element_tile, datatype, void, /*init_params=*/nullptr) +#define XNN_DEFINED_UKERNEL +#endif + +#if XNN_ARCH_X86 || XNN_ARCH_X86_64 +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86, xnn_f32_raddextexp_ukernel__avx2_p5_u64, 64, float, struct xnn_f32_default_params, xnn_f32_raddextexp_ukernel_fn) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86, xnn_f32_raddextexp_ukernel__avx2_p5_u64_acc2, 64, float, struct xnn_f32_default_params, xnn_f32_raddextexp_ukernel_fn) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86, xnn_f32_raddextexp_ukernel__avx2_p5_u64_acc4, 64, float, struct xnn_f32_default_params, xnn_f32_raddextexp_ukernel_fn) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86, xnn_f32_raddextexp_ukernel__avx2_p5_u72, 72, float, struct xnn_f32_default_params, xnn_f32_raddextexp_ukernel_fn) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86, xnn_f32_raddextexp_ukernel__avx2_p5_u72_acc3, 72, float, struct xnn_f32_default_params, xnn_f32_raddextexp_ukernel_fn) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86, xnn_f32_raddextexp_ukernel__avx2_p5_u80, 80, float, struct xnn_f32_default_params, xnn_f32_raddextexp_ukernel_fn) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86, xnn_f32_raddextexp_ukernel__avx2_p5_u80_acc2, 80, float, struct xnn_f32_default_params, xnn_f32_raddextexp_ukernel_fn) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86, xnn_f32_raddextexp_ukernel__avx2_p5_u80_acc5, 80, float, struct xnn_f32_default_params, xnn_f32_raddextexp_ukernel_fn) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86, xnn_f32_raddextexp_ukernel__avx2_p5_u96, 96, float, struct xnn_f32_default_params, xnn_f32_raddextexp_ukernel_fn) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86, xnn_f32_raddextexp_ukernel__avx2_p5_u96_acc2, 96, float, struct xnn_f32_default_params, xnn_f32_raddextexp_ukernel_fn) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86, xnn_f32_raddextexp_ukernel__avx2_p5_u96_acc3, 96, float, struct xnn_f32_default_params, xnn_f32_raddextexp_ukernel_fn) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86, xnn_f32_raddextexp_ukernel__avx2_p5_u96_acc6, 96, float, struct xnn_f32_default_params, xnn_f32_raddextexp_ukernel_fn) +#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 + +#if XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx512f, xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u128, 128, float, struct xnn_f32_default_params, xnn_f32_raddextexp_ukernel_fn) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx512f, xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u128_acc2, 128, float, struct xnn_f32_default_params, xnn_f32_raddextexp_ukernel_fn) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx512f, xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u128_acc4, 128, float, struct xnn_f32_default_params, xnn_f32_raddextexp_ukernel_fn) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx512f, xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u144, 144, float, struct xnn_f32_default_params, xnn_f32_raddextexp_ukernel_fn) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx512f, xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u144_acc3, 144, float, struct xnn_f32_default_params, xnn_f32_raddextexp_ukernel_fn) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx512f, xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u160, 160, float, struct xnn_f32_default_params, xnn_f32_raddextexp_ukernel_fn) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx512f, xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u160_acc2, 160, float, struct xnn_f32_default_params, xnn_f32_raddextexp_ukernel_fn) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx512f, xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u160_acc5, 160, float, struct xnn_f32_default_params, xnn_f32_raddextexp_ukernel_fn) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx512f, xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u192, 192, float, struct xnn_f32_default_params, xnn_f32_raddextexp_ukernel_fn) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx512f, xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u192_acc2, 192, float, struct xnn_f32_default_params, xnn_f32_raddextexp_ukernel_fn) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx512f, xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u192_acc3, 192, float, struct xnn_f32_default_params, xnn_f32_raddextexp_ukernel_fn) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_avx512f, xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u192_acc6, 192, float, struct xnn_f32_default_params, xnn_f32_raddextexp_ukernel_fn) +#endif // XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) + +#ifdef XNN_DEFINED_UKERNEL_WITH_PARAMS +#undef XNN_DEFINED_UKERNEL_WITH_PARAMS +#undef XNN_UKERNEL_WITH_PARAMS +#endif + +#ifdef XNN_DEFINED_UKERNEL +#undef XNN_DEFINED_UKERNEL +#undef XNN_UKERNEL +#endif diff --git a/test/f32-raddextexp.cc b/test/f32-raddextexp.cc index afd9bc0a021..caddb080008 100644 --- a/test/f32-raddextexp.cc +++ b/test/f32-raddextexp.cc @@ -2,10 +2,6 @@ // // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. -// -// Auto-generated file. Do not edit! -// Specification: test/f32-raddextexp.yaml -// Generator: tools/generate-raddextexp-test.py #include @@ -14,890 +10,37 @@ #include "xnnpack/raddextexp.h" #include "raddextexp-microkernel-tester.h" - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F32_RADDEXTEXP__AVX2_P5_U64, elements_eq_64) { - TEST_REQUIRES_X86_AVX2; - RAddExtExpMicrokernelTester() - .elements(64) - .Test(xnn_f32_raddextexp_ukernel__avx2_p5_u64); - } - - TEST(F32_RADDEXTEXP__AVX2_P5_U64, elements_div_64) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 128; elements < 640; elements += 64) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx2_p5_u64); - } - } - - TEST(F32_RADDEXTEXP__AVX2_P5_U64, elements_lt_64) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 1; elements < 64; elements++) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx2_p5_u64); - } - } - - TEST(F32_RADDEXTEXP__AVX2_P5_U64, elements_gt_64) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 65; elements < 128; elements++) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx2_p5_u64); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F32_RADDEXTEXP__AVX2_P5_U64_ACC2, elements_eq_64) { - TEST_REQUIRES_X86_AVX2; - RAddExtExpMicrokernelTester() - .elements(64) - .Test(xnn_f32_raddextexp_ukernel__avx2_p5_u64_acc2); - } - - TEST(F32_RADDEXTEXP__AVX2_P5_U64_ACC2, elements_div_64) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 128; elements < 640; elements += 64) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx2_p5_u64_acc2); - } - } - - TEST(F32_RADDEXTEXP__AVX2_P5_U64_ACC2, elements_lt_64) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 1; elements < 64; elements++) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx2_p5_u64_acc2); - } - } - - TEST(F32_RADDEXTEXP__AVX2_P5_U64_ACC2, elements_gt_64) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 65; elements < 128; elements++) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx2_p5_u64_acc2); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F32_RADDEXTEXP__AVX2_P5_U64_ACC4, elements_eq_64) { - TEST_REQUIRES_X86_AVX2; - RAddExtExpMicrokernelTester() - .elements(64) - .Test(xnn_f32_raddextexp_ukernel__avx2_p5_u64_acc4); - } - - TEST(F32_RADDEXTEXP__AVX2_P5_U64_ACC4, elements_div_64) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 128; elements < 640; elements += 64) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx2_p5_u64_acc4); - } - } - - TEST(F32_RADDEXTEXP__AVX2_P5_U64_ACC4, elements_lt_64) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 1; elements < 64; elements++) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx2_p5_u64_acc4); - } - } - - TEST(F32_RADDEXTEXP__AVX2_P5_U64_ACC4, elements_gt_64) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 65; elements < 128; elements++) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx2_p5_u64_acc4); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F32_RADDEXTEXP__AVX2_P5_U72, elements_eq_72) { - TEST_REQUIRES_X86_AVX2; - RAddExtExpMicrokernelTester() - .elements(72) - .Test(xnn_f32_raddextexp_ukernel__avx2_p5_u72); - } - - TEST(F32_RADDEXTEXP__AVX2_P5_U72, elements_div_72) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 144; elements < 720; elements += 72) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx2_p5_u72); - } - } - - TEST(F32_RADDEXTEXP__AVX2_P5_U72, elements_lt_72) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 1; elements < 72; elements++) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx2_p5_u72); - } - } - - TEST(F32_RADDEXTEXP__AVX2_P5_U72, elements_gt_72) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 73; elements < 144; elements++) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx2_p5_u72); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F32_RADDEXTEXP__AVX2_P5_U72_ACC3, elements_eq_72) { - TEST_REQUIRES_X86_AVX2; - RAddExtExpMicrokernelTester() - .elements(72) - .Test(xnn_f32_raddextexp_ukernel__avx2_p5_u72_acc3); - } - - TEST(F32_RADDEXTEXP__AVX2_P5_U72_ACC3, elements_div_72) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 144; elements < 720; elements += 72) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx2_p5_u72_acc3); - } - } - - TEST(F32_RADDEXTEXP__AVX2_P5_U72_ACC3, elements_lt_72) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 1; elements < 72; elements++) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx2_p5_u72_acc3); - } - } - - TEST(F32_RADDEXTEXP__AVX2_P5_U72_ACC3, elements_gt_72) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 73; elements < 144; elements++) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx2_p5_u72_acc3); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F32_RADDEXTEXP__AVX2_P5_U80, elements_eq_80) { - TEST_REQUIRES_X86_AVX2; - RAddExtExpMicrokernelTester() - .elements(80) - .Test(xnn_f32_raddextexp_ukernel__avx2_p5_u80); - } - - TEST(F32_RADDEXTEXP__AVX2_P5_U80, elements_div_80) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 160; elements < 800; elements += 80) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx2_p5_u80); - } - } - - TEST(F32_RADDEXTEXP__AVX2_P5_U80, elements_lt_80) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 1; elements < 80; elements++) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx2_p5_u80); - } - } - - TEST(F32_RADDEXTEXP__AVX2_P5_U80, elements_gt_80) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 81; elements < 160; elements++) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx2_p5_u80); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F32_RADDEXTEXP__AVX2_P5_U80_ACC2, elements_eq_80) { - TEST_REQUIRES_X86_AVX2; - RAddExtExpMicrokernelTester() - .elements(80) - .Test(xnn_f32_raddextexp_ukernel__avx2_p5_u80_acc2); - } - - TEST(F32_RADDEXTEXP__AVX2_P5_U80_ACC2, elements_div_80) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 160; elements < 800; elements += 80) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx2_p5_u80_acc2); - } - } - - TEST(F32_RADDEXTEXP__AVX2_P5_U80_ACC2, elements_lt_80) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 1; elements < 80; elements++) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx2_p5_u80_acc2); - } - } - - TEST(F32_RADDEXTEXP__AVX2_P5_U80_ACC2, elements_gt_80) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 81; elements < 160; elements++) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx2_p5_u80_acc2); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F32_RADDEXTEXP__AVX2_P5_U80_ACC5, elements_eq_80) { - TEST_REQUIRES_X86_AVX2; - RAddExtExpMicrokernelTester() - .elements(80) - .Test(xnn_f32_raddextexp_ukernel__avx2_p5_u80_acc5); - } - - TEST(F32_RADDEXTEXP__AVX2_P5_U80_ACC5, elements_div_80) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 160; elements < 800; elements += 80) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx2_p5_u80_acc5); - } - } - - TEST(F32_RADDEXTEXP__AVX2_P5_U80_ACC5, elements_lt_80) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 1; elements < 80; elements++) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx2_p5_u80_acc5); - } - } - - TEST(F32_RADDEXTEXP__AVX2_P5_U80_ACC5, elements_gt_80) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 81; elements < 160; elements++) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx2_p5_u80_acc5); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F32_RADDEXTEXP__AVX2_P5_U96, elements_eq_96) { - TEST_REQUIRES_X86_AVX2; - RAddExtExpMicrokernelTester() - .elements(96) - .Test(xnn_f32_raddextexp_ukernel__avx2_p5_u96); - } - - TEST(F32_RADDEXTEXP__AVX2_P5_U96, elements_div_96) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 192; elements < 960; elements += 96) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx2_p5_u96); - } - } - - TEST(F32_RADDEXTEXP__AVX2_P5_U96, elements_lt_96) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 1; elements < 96; elements++) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx2_p5_u96); - } - } - - TEST(F32_RADDEXTEXP__AVX2_P5_U96, elements_gt_96) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 97; elements < 192; elements++) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx2_p5_u96); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F32_RADDEXTEXP__AVX2_P5_U96_ACC2, elements_eq_96) { - TEST_REQUIRES_X86_AVX2; - RAddExtExpMicrokernelTester() - .elements(96) - .Test(xnn_f32_raddextexp_ukernel__avx2_p5_u96_acc2); - } - - TEST(F32_RADDEXTEXP__AVX2_P5_U96_ACC2, elements_div_96) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 192; elements < 960; elements += 96) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx2_p5_u96_acc2); - } - } - - TEST(F32_RADDEXTEXP__AVX2_P5_U96_ACC2, elements_lt_96) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 1; elements < 96; elements++) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx2_p5_u96_acc2); - } - } - - TEST(F32_RADDEXTEXP__AVX2_P5_U96_ACC2, elements_gt_96) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 97; elements < 192; elements++) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx2_p5_u96_acc2); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F32_RADDEXTEXP__AVX2_P5_U96_ACC3, elements_eq_96) { - TEST_REQUIRES_X86_AVX2; - RAddExtExpMicrokernelTester() - .elements(96) - .Test(xnn_f32_raddextexp_ukernel__avx2_p5_u96_acc3); - } - - TEST(F32_RADDEXTEXP__AVX2_P5_U96_ACC3, elements_div_96) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 192; elements < 960; elements += 96) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx2_p5_u96_acc3); - } - } - - TEST(F32_RADDEXTEXP__AVX2_P5_U96_ACC3, elements_lt_96) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 1; elements < 96; elements++) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx2_p5_u96_acc3); - } - } - - TEST(F32_RADDEXTEXP__AVX2_P5_U96_ACC3, elements_gt_96) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 97; elements < 192; elements++) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx2_p5_u96_acc3); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F32_RADDEXTEXP__AVX2_P5_U96_ACC6, elements_eq_96) { - TEST_REQUIRES_X86_AVX2; - RAddExtExpMicrokernelTester() - .elements(96) - .Test(xnn_f32_raddextexp_ukernel__avx2_p5_u96_acc6); - } - - TEST(F32_RADDEXTEXP__AVX2_P5_U96_ACC6, elements_div_96) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 192; elements < 960; elements += 96) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx2_p5_u96_acc6); - } - } - - TEST(F32_RADDEXTEXP__AVX2_P5_U96_ACC6, elements_lt_96) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 1; elements < 96; elements++) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx2_p5_u96_acc6); - } - } - - TEST(F32_RADDEXTEXP__AVX2_P5_U96_ACC6, elements_gt_96) { - TEST_REQUIRES_X86_AVX2; - for (size_t elements = 97; elements < 192; elements++) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx2_p5_u96_acc6); - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - TEST(F32_RADDEXTEXP__AVX512F_P5_SCALEF_U128, elements_eq_128) { - TEST_REQUIRES_X86_AVX512F; - RAddExtExpMicrokernelTester() - .elements(128) - .Test(xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u128); - } - - TEST(F32_RADDEXTEXP__AVX512F_P5_SCALEF_U128, elements_div_128) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 256; elements < 1280; elements += 128) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u128); - } - } - - TEST(F32_RADDEXTEXP__AVX512F_P5_SCALEF_U128, elements_lt_128) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 1; elements < 128; elements++) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u128); - } - } - - TEST(F32_RADDEXTEXP__AVX512F_P5_SCALEF_U128, elements_gt_128) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 129; elements < 256; elements++) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u128); - } - } -#endif // XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - - -#if XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - TEST(F32_RADDEXTEXP__AVX512F_P5_SCALEF_U128_ACC2, elements_eq_128) { - TEST_REQUIRES_X86_AVX512F; - RAddExtExpMicrokernelTester() - .elements(128) - .Test(xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u128_acc2); - } - - TEST(F32_RADDEXTEXP__AVX512F_P5_SCALEF_U128_ACC2, elements_div_128) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 256; elements < 1280; elements += 128) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u128_acc2); - } - } - - TEST(F32_RADDEXTEXP__AVX512F_P5_SCALEF_U128_ACC2, elements_lt_128) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 1; elements < 128; elements++) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u128_acc2); - } - } - - TEST(F32_RADDEXTEXP__AVX512F_P5_SCALEF_U128_ACC2, elements_gt_128) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 129; elements < 256; elements++) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u128_acc2); - } - } -#endif // XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - - -#if XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - TEST(F32_RADDEXTEXP__AVX512F_P5_SCALEF_U128_ACC4, elements_eq_128) { - TEST_REQUIRES_X86_AVX512F; - RAddExtExpMicrokernelTester() - .elements(128) - .Test(xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u128_acc4); - } - - TEST(F32_RADDEXTEXP__AVX512F_P5_SCALEF_U128_ACC4, elements_div_128) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 256; elements < 1280; elements += 128) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u128_acc4); - } - } - - TEST(F32_RADDEXTEXP__AVX512F_P5_SCALEF_U128_ACC4, elements_lt_128) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 1; elements < 128; elements++) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u128_acc4); - } - } - - TEST(F32_RADDEXTEXP__AVX512F_P5_SCALEF_U128_ACC4, elements_gt_128) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 129; elements < 256; elements++) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u128_acc4); - } - } -#endif // XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - - -#if XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - TEST(F32_RADDEXTEXP__AVX512F_P5_SCALEF_U144, elements_eq_144) { - TEST_REQUIRES_X86_AVX512F; - RAddExtExpMicrokernelTester() - .elements(144) - .Test(xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u144); - } - - TEST(F32_RADDEXTEXP__AVX512F_P5_SCALEF_U144, elements_div_144) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 288; elements < 1440; elements += 144) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u144); - } - } - - TEST(F32_RADDEXTEXP__AVX512F_P5_SCALEF_U144, elements_lt_144) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 1; elements < 144; elements++) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u144); - } - } - - TEST(F32_RADDEXTEXP__AVX512F_P5_SCALEF_U144, elements_gt_144) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 145; elements < 288; elements++) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u144); - } - } -#endif // XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - - -#if XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - TEST(F32_RADDEXTEXP__AVX512F_P5_SCALEF_U144_ACC3, elements_eq_144) { - TEST_REQUIRES_X86_AVX512F; - RAddExtExpMicrokernelTester() - .elements(144) - .Test(xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u144_acc3); - } - - TEST(F32_RADDEXTEXP__AVX512F_P5_SCALEF_U144_ACC3, elements_div_144) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 288; elements < 1440; elements += 144) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u144_acc3); - } - } - - TEST(F32_RADDEXTEXP__AVX512F_P5_SCALEF_U144_ACC3, elements_lt_144) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 1; elements < 144; elements++) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u144_acc3); - } - } - - TEST(F32_RADDEXTEXP__AVX512F_P5_SCALEF_U144_ACC3, elements_gt_144) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 145; elements < 288; elements++) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u144_acc3); - } - } -#endif // XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - - -#if XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - TEST(F32_RADDEXTEXP__AVX512F_P5_SCALEF_U160, elements_eq_160) { - TEST_REQUIRES_X86_AVX512F; - RAddExtExpMicrokernelTester() - .elements(160) - .Test(xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u160); - } - - TEST(F32_RADDEXTEXP__AVX512F_P5_SCALEF_U160, elements_div_160) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 320; elements < 1600; elements += 160) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u160); - } - } - - TEST(F32_RADDEXTEXP__AVX512F_P5_SCALEF_U160, elements_lt_160) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 1; elements < 160; elements++) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u160); - } - } - - TEST(F32_RADDEXTEXP__AVX512F_P5_SCALEF_U160, elements_gt_160) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 161; elements < 320; elements++) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u160); - } - } -#endif // XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - - -#if XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - TEST(F32_RADDEXTEXP__AVX512F_P5_SCALEF_U160_ACC2, elements_eq_160) { - TEST_REQUIRES_X86_AVX512F; - RAddExtExpMicrokernelTester() - .elements(160) - .Test(xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u160_acc2); - } - - TEST(F32_RADDEXTEXP__AVX512F_P5_SCALEF_U160_ACC2, elements_div_160) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 320; elements < 1600; elements += 160) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u160_acc2); - } - } - - TEST(F32_RADDEXTEXP__AVX512F_P5_SCALEF_U160_ACC2, elements_lt_160) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 1; elements < 160; elements++) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u160_acc2); - } - } - - TEST(F32_RADDEXTEXP__AVX512F_P5_SCALEF_U160_ACC2, elements_gt_160) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 161; elements < 320; elements++) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u160_acc2); - } - } -#endif // XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - - -#if XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - TEST(F32_RADDEXTEXP__AVX512F_P5_SCALEF_U160_ACC5, elements_eq_160) { - TEST_REQUIRES_X86_AVX512F; - RAddExtExpMicrokernelTester() - .elements(160) - .Test(xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u160_acc5); - } - - TEST(F32_RADDEXTEXP__AVX512F_P5_SCALEF_U160_ACC5, elements_div_160) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 320; elements < 1600; elements += 160) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u160_acc5); - } - } - - TEST(F32_RADDEXTEXP__AVX512F_P5_SCALEF_U160_ACC5, elements_lt_160) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 1; elements < 160; elements++) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u160_acc5); - } - } - - TEST(F32_RADDEXTEXP__AVX512F_P5_SCALEF_U160_ACC5, elements_gt_160) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 161; elements < 320; elements++) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u160_acc5); - } - } -#endif // XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - - -#if XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - TEST(F32_RADDEXTEXP__AVX512F_P5_SCALEF_U192, elements_eq_192) { - TEST_REQUIRES_X86_AVX512F; - RAddExtExpMicrokernelTester() - .elements(192) - .Test(xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u192); - } - - TEST(F32_RADDEXTEXP__AVX512F_P5_SCALEF_U192, elements_div_192) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 384; elements < 1920; elements += 192) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u192); - } - } - - TEST(F32_RADDEXTEXP__AVX512F_P5_SCALEF_U192, elements_lt_192) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 1; elements < 192; elements++) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u192); - } - } - - TEST(F32_RADDEXTEXP__AVX512F_P5_SCALEF_U192, elements_gt_192) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 193; elements < 384; elements++) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u192); - } - } -#endif // XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - - -#if XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - TEST(F32_RADDEXTEXP__AVX512F_P5_SCALEF_U192_ACC2, elements_eq_192) { - TEST_REQUIRES_X86_AVX512F; - RAddExtExpMicrokernelTester() - .elements(192) - .Test(xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u192_acc2); - } - - TEST(F32_RADDEXTEXP__AVX512F_P5_SCALEF_U192_ACC2, elements_div_192) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 384; elements < 1920; elements += 192) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u192_acc2); - } - } - - TEST(F32_RADDEXTEXP__AVX512F_P5_SCALEF_U192_ACC2, elements_lt_192) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 1; elements < 192; elements++) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u192_acc2); - } - } - - TEST(F32_RADDEXTEXP__AVX512F_P5_SCALEF_U192_ACC2, elements_gt_192) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 193; elements < 384; elements++) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u192_acc2); - } - } -#endif // XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - - -#if XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - TEST(F32_RADDEXTEXP__AVX512F_P5_SCALEF_U192_ACC3, elements_eq_192) { - TEST_REQUIRES_X86_AVX512F; - RAddExtExpMicrokernelTester() - .elements(192) - .Test(xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u192_acc3); - } - - TEST(F32_RADDEXTEXP__AVX512F_P5_SCALEF_U192_ACC3, elements_div_192) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 384; elements < 1920; elements += 192) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u192_acc3); - } - } - - TEST(F32_RADDEXTEXP__AVX512F_P5_SCALEF_U192_ACC3, elements_lt_192) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 1; elements < 192; elements++) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u192_acc3); - } - } - - TEST(F32_RADDEXTEXP__AVX512F_P5_SCALEF_U192_ACC3, elements_gt_192) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 193; elements < 384; elements++) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u192_acc3); - } - } -#endif // XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - - -#if XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) - TEST(F32_RADDEXTEXP__AVX512F_P5_SCALEF_U192_ACC6, elements_eq_192) { - TEST_REQUIRES_X86_AVX512F; - RAddExtExpMicrokernelTester() - .elements(192) - .Test(xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u192_acc6); - } - - TEST(F32_RADDEXTEXP__AVX512F_P5_SCALEF_U192_ACC6, elements_div_192) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 384; elements < 1920; elements += 192) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u192_acc6); - } - } - - TEST(F32_RADDEXTEXP__AVX512F_P5_SCALEF_U192_ACC6, elements_lt_192) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 1; elements < 192; elements++) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u192_acc6); - } - } - - TEST(F32_RADDEXTEXP__AVX512F_P5_SCALEF_U192_ACC6, elements_gt_192) { - TEST_REQUIRES_X86_AVX512F; - for (size_t elements = 193; elements < 384; elements++) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u192_acc6); - } - } -#endif // XNN_ENABLE_AVX512F && (XNN_ARCH_X86 || XNN_ARCH_X86_64) +#define XNN_TEST_RADDEXTEXP_ELEMENT_EQ(ukernel, arch_flags, element_tile, ...) \ + TEST(ukernel, element_eq) \ + { \ + RAddExtExpMicrokernelTester().elements(element_tile).Test(ukernel); \ + } +#define XNN_TEST_RADDEXTEXP_ELEMENT_DIV(ukernel, arch_flags, element_tile, ...) \ + TEST(ukernel, element_gt) \ + { \ + for (size_t element_size = element_tile * 2; element_size < element_tile * 10; element_size += element_tile) { \ + RAddExtExpMicrokernelTester().elements(element_size).Test(ukernel); \ + } \ + } +#define XNN_TEST_RADDEXTEXP_ELEMENT_LT(ukernel, arch_flags, element_tile, ...) \ + TEST(ukernel, element_lt) \ + { \ + for (size_t element_size = 1; element_size < element_tile; element_size++) { \ + RAddExtExpMicrokernelTester().elements(element_size).Test(ukernel); \ + } \ + } +#define XNN_TEST_RADDEXTEXP_ELEMENT_GT(ukernel, arch_flags, element_tile, ...) \ + TEST(ukernel, element_div) \ + { \ + for (size_t element_size = element_tile + 1; element_size < (element_tile == 1 ? 10 : element_tile * 2); \ + element_size++) { \ + RAddExtExpMicrokernelTester().elements(element_size).Test(ukernel); \ + } \ + } + +#define XNN_UKERNEL_WITH_PARAMS(arch_flags, ukernel, element_tile, datatype, params_type, init_params) XNN_TEST_RADDEXTEXP_ELEMENT_EQ(ukernel,arch_flags, element_tile, init_params); \ +XNN_TEST_RADDEXTEXP_ELEMENT_DIV(ukernel, arch_flags, element_tile, init_params); \ +XNN_TEST_RADDEXTEXP_ELEMENT_LT(ukernel, arch_flags, element_tile, init_params); \ +XNN_TEST_RADDEXTEXP_ELEMENT_GT(ukernel, arch_flags, element_tile, init_params); +#include "f32-raddextexp/f32-raddextexp.h" +#undef XNN_UKERNEL_WITH_PARAMS diff --git a/test/f32-raddextexp.yaml b/test/f32-raddextexp.yaml deleted file mode 100644 index ac18103efef..00000000000 --- a/test/f32-raddextexp.yaml +++ /dev/null @@ -1,31 +0,0 @@ -# Copyright 2019 Google LLC -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -# x86 AVX -- name: xnn_f32_raddextexp_ukernel__avx2_p5_u64 -- name: xnn_f32_raddextexp_ukernel__avx2_p5_u64_acc2 -- name: xnn_f32_raddextexp_ukernel__avx2_p5_u64_acc4 -- name: xnn_f32_raddextexp_ukernel__avx2_p5_u72 -- name: xnn_f32_raddextexp_ukernel__avx2_p5_u72_acc3 -- name: xnn_f32_raddextexp_ukernel__avx2_p5_u80 -- name: xnn_f32_raddextexp_ukernel__avx2_p5_u80_acc2 -- name: xnn_f32_raddextexp_ukernel__avx2_p5_u80_acc5 -- name: xnn_f32_raddextexp_ukernel__avx2_p5_u96 -- name: xnn_f32_raddextexp_ukernel__avx2_p5_u96_acc2 -- name: xnn_f32_raddextexp_ukernel__avx2_p5_u96_acc3 -- name: xnn_f32_raddextexp_ukernel__avx2_p5_u96_acc6 -# x86 AVX512 -- name: xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u128 -- name: xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u128_acc2 -- name: xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u128_acc4 -- name: xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u144 -- name: xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u144_acc3 -- name: xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u160 -- name: xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u160_acc2 -- name: xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u160_acc5 -- name: xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u192 -- name: xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u192_acc2 -- name: xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u192_acc3 -- name: xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_u192_acc6 diff --git a/tools/generate-raddextexp-test.py b/tools/generate-raddextexp-test.py deleted file mode 100755 index 404d1ba7334..00000000000 --- a/tools/generate-raddextexp-test.py +++ /dev/null @@ -1,142 +0,0 @@ -#!/usr/bin/env python -# Copyright 2019 Google LLC -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import argparse -import codecs -import math -import os -import re -import sys -import yaml - -sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) -import xngen -import xnncommon - - -parser = argparse.ArgumentParser( - description='RAddExtExp microkernel test generator') -parser.add_argument("-s", "--spec", metavar="FILE", required=True, - help="Specification (YAML) file") -parser.add_argument("-o", "--output", metavar="FILE", required=True, - help='Output (C++ source) file') -parser.set_defaults(defines=list()) - - -def split_ukernel_name(name): - match = re.fullmatch(r"xnn_(f16|f32)_raddextexp_ukernel__(.+)_u(\d+)(_acc(\d+))?", name) - if match is None: - raise ValueError("Unexpected microkernel name: " + name) - elements_tile = int(match.group(3)) - - arch, isa, assembly = xnncommon.parse_target_name(target_name=match.group(2)) - return elements_tile, arch, isa - - -RADDEXTEXP_TEST_TEMPLATE = """\ -TEST(${TEST_NAME}, elements_eq_${ELEMENTS_TILE}) { - $if ISA_CHECK: - ${ISA_CHECK}; - RAddExtExpMicrokernelTester() - .elements(${ELEMENTS_TILE}) - .Test(${TEST_FUNCTION}); -} - -$if ELEMENTS_TILE > 1: - TEST(${TEST_NAME}, elements_div_${ELEMENTS_TILE}) { - $if ISA_CHECK: - ${ISA_CHECK}; - for (size_t elements = ${ELEMENTS_TILE*2}; elements < ${ELEMENTS_TILE*10}; elements += ${ELEMENTS_TILE}) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(${TEST_FUNCTION}); - } - } - - TEST(${TEST_NAME}, elements_lt_${ELEMENTS_TILE}) { - $if ISA_CHECK: - ${ISA_CHECK}; - for (size_t elements = 1; elements < ${ELEMENTS_TILE}; elements++) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(${TEST_FUNCTION}); - } - } - -TEST(${TEST_NAME}, elements_gt_${ELEMENTS_TILE}) { - $if ISA_CHECK: - ${ISA_CHECK}; - for (size_t elements = ${ELEMENTS_TILE+1}; elements < ${10 if ELEMENTS_TILE == 1 else ELEMENTS_TILE*2}; elements++) { - RAddExtExpMicrokernelTester() - .elements(elements) - .Test(${TEST_FUNCTION}); - } -} -""" - - -def generate_test_cases(ukernel, elements_tile, isa): - """Generates all tests cases for a RAddExtExp micro-kernel. - - Args: - ukernel: C name of the micro-kernel function. - elements_tile: Number of batch elements processed per one iteration of the - inner loop of the micro-kernel. - isa: instruction set required to run the micro-kernel. Generated unit test - will skip execution if the host processor doesn't support this ISA. - - Returns: - Code for the test case. - """ - _, test_name = ukernel.split("_", 1) - _, datatype, _ = ukernel.split("_", 2) - return xngen.preprocess(RADDEXTEXP_TEST_TEMPLATE, { - "TEST_FUNCTION": ukernel, - "TEST_NAME": test_name.upper().replace("UKERNEL_", ""), - "DATATYPE": datatype, - "ELEMENTS_TILE": elements_tile, - "ISA_CHECK": xnncommon.generate_isa_check_macro(isa), - }) - - -def main(args): - options = parser.parse_args(args) - - with codecs.open(options.spec, "r", encoding="utf-8") as spec_file: - spec_yaml = yaml.safe_load(spec_file) - if not isinstance(spec_yaml, list): - raise ValueError("expected a list of micro-kernels in the spec") - - tests = """\ -// Copyright 2019 Google LLC -// -// This source code is licensed under the BSD-style license found in the -// LICENSE file in the root directory of this source tree. -// -// Auto-generated file. Do not edit! -// Specification: {specification} -// Generator: {generator} - - -#include -#include "xnnpack/common.h" -#include "xnnpack/isa-checks.h" -#include "xnnpack/raddextexp.h" -#include "raddextexp-microkernel-tester.h" -""".format(specification=options.spec, generator=sys.argv[0]) - - for ukernel_spec in spec_yaml: - name = ukernel_spec["name"] - elements_tile, arch, isa = split_ukernel_name(name) - - test_case = generate_test_cases(name, elements_tile, isa) - tests += "\n\n" + xnncommon.postprocess_test_case(test_case, arch, isa) - - xnncommon.overwrite_if_changed(options.output, tests) - - -if __name__ == "__main__": - main(sys.argv[1:])