Skip to content

Commit

Permalink
Resolve 'illegal instruction' for AVX512 tests
Browse files Browse the repository at this point in the history
  • Loading branch information
pratham-mcw committed Nov 28, 2024
1 parent 533a84e commit cba8c4d
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 14 deletions.
28 changes: 14 additions & 14 deletions src/x8-lut/x8-lut.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,23 +22,23 @@ XNN_UKERNEL_WITH_PARAMS(0, xnn_x8_lut_ukernel__scalar_u8, 8, uint8_t, void, null
XNN_UKERNEL_WITH_PARAMS(0, xnn_x8_lut_ukernel__scalar_u16, 16, uint8_t, void, nullptr)

#if XNN_ARCH_ARM64
XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm64, xnn_x8_lut_ukernel__aarch64_neon_tbx128x4_u16, 16, uint8_t, void, nullptr)
XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm64, xnn_x8_lut_ukernel__aarch64_neon_tbx128x4_u32, 32, uint8_t, void, nullptr)
XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm64, xnn_x8_lut_ukernel__aarch64_neon_tbx128x4_u48, 48, uint8_t, void, nullptr)
XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm64, xnn_x8_lut_ukernel__aarch64_neon_tbx128x4_u64, 64, uint8_t, void, nullptr)
XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon, xnn_x8_lut_ukernel__aarch64_neon_tbx128x4_u16, 16, uint8_t, void, nullptr)
XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon, xnn_x8_lut_ukernel__aarch64_neon_tbx128x4_u32, 32, uint8_t, void, nullptr)
XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon, xnn_x8_lut_ukernel__aarch64_neon_tbx128x4_u48, 48, uint8_t, void, nullptr)
XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon, xnn_x8_lut_ukernel__aarch64_neon_tbx128x4_u64, 64, uint8_t, void, nullptr)
#endif // XNN_ARCH_ARM64

#if XNN_ARCH_X86 || XNN_ARCH_X86_64
XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86, xnn_x8_lut_ukernel__ssse3_u16, 16, uint8_t, void, nullptr)
XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86, xnn_x8_lut_ukernel__ssse3_u32, 32, uint8_t, void, nullptr)
XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86, xnn_x8_lut_ukernel__avx_u16, 16, uint8_t, void, nullptr)
XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86, xnn_x8_lut_ukernel__avx_u32, 32, uint8_t, void, nullptr)
XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86, xnn_x8_lut_ukernel__avx_u48, 48, uint8_t, void, nullptr)
XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86, xnn_x8_lut_ukernel__avx_u64, 64, uint8_t, void, nullptr)
XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86, xnn_x8_lut_ukernel__avx2_u32, 32, uint8_t, void, nullptr)
XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86, xnn_x8_lut_ukernel__avx2_u64, 64, uint8_t, void, nullptr)
XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86, xnn_x8_lut_ukernel__avx2_u96, 96, uint8_t, void, nullptr)
XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86, xnn_x8_lut_ukernel__avx2_u128, 128, uint8_t, void, nullptr)
XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_ssse3, xnn_x8_lut_ukernel__ssse3_u16, 16, uint8_t, void, nullptr)
XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_ssse3, xnn_x8_lut_ukernel__ssse3_u32, 32, uint8_t, void, nullptr)
XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_ssse3, xnn_x8_lut_ukernel__avx_u16, 16, uint8_t, void, nullptr)
XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_ssse3, xnn_x8_lut_ukernel__avx_u32, 32, uint8_t, void, nullptr)
XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_ssse3, xnn_x8_lut_ukernel__avx_u48, 48, uint8_t, void, nullptr)
XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_ssse3, xnn_x8_lut_ukernel__avx_u64, 64, uint8_t, void, nullptr)
XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_ssse3, xnn_x8_lut_ukernel__avx2_u32, 32, uint8_t, void, nullptr)
XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_ssse3, xnn_x8_lut_ukernel__avx2_u64, 64, uint8_t, void, nullptr)
XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_ssse3, xnn_x8_lut_ukernel__avx2_u96, 96, uint8_t, void, nullptr)
XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_ssse3, xnn_x8_lut_ukernel__avx2_u128, 128, uint8_t, void, nullptr)
#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64

#if XNN_ENABLE_AVX512VBMI && (XNN_ARCH_X86 || XNN_ARCH_X86_64)
Expand Down
5 changes: 5 additions & 0 deletions test/x8-lut.cc
Original file line number Diff line number Diff line change
Expand Up @@ -99,18 +99,21 @@ class LUTMicrokernelTester {
#define XNN_TEST_LUT_BATCH_EQ(arch_flags, ukernel, batch_tile, ...) \
TEST(ukernel, batch_eq) \
{ \
TEST_REQUIRES_ARCH_FLAGS(arch_flags); \
LUTMicrokernelTester().batch_size(batch_tile).Test(ukernel); \
}

#define XNN_TEST_LUT_BATCH_DIV(arch_flags, ukernel, batch_tile, ...) \
TEST(ukernel, batch_div) \
{ \
TEST_REQUIRES_ARCH_FLAGS(arch_flags); \
LUTMicrokernelTester().batch_size(batch_tile).Test(ukernel); \
}

#define XNN_TEST_LUT_BATCH_LT(arch_flags, ukernel, batch_tile, ...) \
TEST(ukernel, batch_lt) \
{ \
TEST_REQUIRES_ARCH_FLAGS(arch_flags); \
for (size_t batch= 1; batch < batch_tile; batch++) { \
LUTMicrokernelTester().batch_size(batch).Test(ukernel); \
} \
Expand All @@ -119,6 +122,7 @@ class LUTMicrokernelTester {
#define XNN_TEST_LUT_BATCH_GT(arch_flags, ukernel, batch_tile, ...) \
TEST(ukernel, batch_gt) \
{ \
TEST_REQUIRES_ARCH_FLAGS(arch_flags); \
for (size_t batch = 2 * batch_tile; batch < 10 * batch_tile; batch += batch_tile) { \
LUTMicrokernelTester().batch_size(batch).Test(ukernel); \
} \
Expand All @@ -127,6 +131,7 @@ class LUTMicrokernelTester {
#define XNN_TEST_LUT_BATCH_IP(arch_flags, ukernel, batch_tile, ...) \
TEST(ukernel, batch_ip) \
{ \
TEST_REQUIRES_ARCH_FLAGS(arch_flags); \
for (size_t batch = 2 * batch_tile; batch < 10 * batch_tile; batch += batch_tile) { \
LUTMicrokernelTester().batch_size(batch).inplace(true).Test(ukernel); \
} \
Expand Down

0 comments on commit cba8c4d

Please sign in to comment.