diff --git a/compute/cker/include/cker/ctdef.h b/compute/cker/include/cker/ctdef.h
new file mode 100644
index 00000000000..f84e40d2935
--- /dev/null
+++ b/compute/cker/include/cker/ctdef.h
@@ -0,0 +1,53 @@
+#pragma once
+
+#include <type_traits>
+
+// Compile time definitions and related trait-like utilities
+
+namespace nnfw
+{
+// This enum specifies all compile-time features that should be testable using the `is_defined` and
+// `is_defined_v` templates below.
+enum class Define
+{
+  PLATFORM_X86,
+  PLATFORM_AARCH64,
+  USE_NEON
+};
+
+// A compile-time feature-detection structure which defaults to false
+// for all defines. This struct should be specialized for each Define enum value.
+//
+// This struct should be used the following way:
+//
+//   if constexpr (nnfw::is_defined<Define::PLATFORM_X86>::value) {}
+template <Define> struct is_defined : std::false_type
+{
+};
+
+// A helper template variable to be used the following way:
+//
+//   if constexpr (nnfw::is_defined_v<Define::PLATFORM_X86>) {}
+template <Define def> inline constexpr const bool is_defined_v = is_defined<def>::value;
+
+/* ********************************************************************************************** */
+
+#if defined(CKER_X86_PLATFORM)
+template <> struct is_defined<Define::PLATFORM_X86> : std::true_type
+{
+};
+#endif
+
+#if defined(__aarch64__) || defined(_M_ARM64)
+template <> struct is_defined<Define::PLATFORM_AARCH64> : std::true_type
+{
+};
+#endif
+
+#if defined(__ARM_NEON__) || defined(__ARM_NEON)
+template <> struct is_defined<Define::USE_NEON> : std::true_type
+{
+};
+#endif
+
+} // namespace nnfw
diff --git a/compute/cker/include/cker/operation/Common.h b/compute/cker/include/cker/operation/Common.h
index 24d4cc4c7e2..7be152341a1 100644
--- a/compute/cker/include/cker/operation/Common.h
+++ b/compute/cker/include/cker/operation/Common.h
@@ -20,14 +20,16 @@
 
 #include "cker/neon/neon_check.h"
 #include "cker/Utils.h"
+#include "cker/ctdef.h"
 
 namespace nnfw
 {
 namespace cker
 {
 
-inline void BiasAndClamp(float clamp_min, float clamp_max, int bias_size, const float *bias_data,
-                         int array_size, float *array_data)
+template <typename T = float>
+inline void BiasAndClamp(T clamp_min, T clamp_max, int bias_size, const T *bias_data,
+                         int array_size, T *array_data)
 {
   // Note: see b/132215220: in May 2019 we thought it would be OK to replace
   // this with the Eigen one-liner:
@@ -35,66 +37,70 @@ inline void BiasAndClamp(float clamp_min, float clamp_max, int bias_size, const
   // This turned out to severely regress performance: +4ms (i.e. 8%) on
   // MobileNet v2 / 1.0 / 224. So we keep custom NEON code for now.
   assert((array_size % bias_size) == 0);
-#ifdef USE_NEON
-  float *array_ptr = array_data;
-  float *array_end_ptr = array_ptr + array_size;
-  const auto clamp_min_vec = vdupq_n_f32(clamp_min);
-  const auto clamp_max_vec = vdupq_n_f32(clamp_max);
-  for (; array_ptr != array_end_ptr; array_ptr += bias_size)
+
+  if constexpr (is_defined_v<Define::USE_NEON>)
   {
-    int i = 0;
-    for (; i <= bias_size - 16; i += 16)
-    {
-      auto b0 = vld1q_f32(bias_data + i);
-      auto b1 = vld1q_f32(bias_data + i + 4);
-      auto b2 = vld1q_f32(bias_data + i + 8);
-      auto b3 = vld1q_f32(bias_data + i + 12);
-      auto a0 = vld1q_f32(array_ptr + i);
-      auto a1 = vld1q_f32(array_ptr + i + 4);
-      auto a2 = vld1q_f32(array_ptr + i + 8);
-      auto a3 = vld1q_f32(array_ptr + i + 12);
-      auto x0 = vaddq_f32(a0, b0);
-      auto x1 = vaddq_f32(a1, b1);
-      auto x2 = vaddq_f32(a2, b2);
-      auto x3 = vaddq_f32(a3, b3);
-      x0 = vmaxq_f32(clamp_min_vec, x0);
-      x1 = vmaxq_f32(clamp_min_vec, x1);
-      x2 = vmaxq_f32(clamp_min_vec, x2);
-      x3 = vmaxq_f32(clamp_min_vec, x3);
-      x0 = vminq_f32(clamp_max_vec, x0);
-      x1 = vminq_f32(clamp_max_vec, x1);
-      x2 = vminq_f32(clamp_max_vec, x2);
-      x3 = vminq_f32(clamp_max_vec, x3);
-      vst1q_f32(array_ptr + i, x0);
-      vst1q_f32(array_ptr + i + 4, x1);
-      vst1q_f32(array_ptr + i + 8, x2);
-      vst1q_f32(array_ptr + i + 12, x3);
-    }
-    for (; i <= bias_size - 4; i += 4)
-    {
-      auto b = vld1q_f32(bias_data + i);
-      auto a = vld1q_f32(array_ptr + i);
-      auto x = vaddq_f32(a, b);
-      x = vmaxq_f32(clamp_min_vec, x);
-      x = vminq_f32(clamp_max_vec, x);
-      vst1q_f32(array_ptr + i, x);
-    }
-    for (; i < bias_size; i++)
+    T *array_ptr = array_data;
+    T *array_end_ptr = array_ptr + array_size;
+    const auto clamp_min_vec = vdupq_n_f32(clamp_min);
+    const auto clamp_max_vec = vdupq_n_f32(clamp_max);
+    for (; array_ptr != array_end_ptr; array_ptr += bias_size)
     {
-      array_ptr[i] =
-        ActivationFunctionWithMinMax(array_ptr[i] + bias_data[i], clamp_min, clamp_max);
+      int i = 0;
+      for (; i <= bias_size - 16; i += 16)
+      {
+        auto b0 = vld1q_f32(bias_data + i);
+        auto b1 = vld1q_f32(bias_data + i + 4);
+        auto b2 = vld1q_f32(bias_data + i + 8);
+        auto b3 = vld1q_f32(bias_data + i + 12);
+        auto a0 = vld1q_f32(array_ptr + i);
+        auto a1 = vld1q_f32(array_ptr + i + 4);
+        auto a2 = vld1q_f32(array_ptr + i + 8);
+        auto a3 = vld1q_f32(array_ptr + i + 12);
+        auto x0 = vaddq_f32(a0, b0);
+        auto x1 = vaddq_f32(a1, b1);
+        auto x2 = vaddq_f32(a2, b2);
+        auto x3 = vaddq_f32(a3, b3);
+        x0 = vmaxq_f32(clamp_min_vec, x0);
+        x1 = vmaxq_f32(clamp_min_vec, x1);
+        x2 = vmaxq_f32(clamp_min_vec, x2);
+        x3 = vmaxq_f32(clamp_min_vec, x3);
+        x0 = vminq_f32(clamp_max_vec, x0);
+        x1 = vminq_f32(clamp_max_vec, x1);
+        x2 = vminq_f32(clamp_max_vec, x2);
+        x3 = vminq_f32(clamp_max_vec, x3);
+        vst1q_f32(array_ptr + i, x0);
+        vst1q_f32(array_ptr + i + 4, x1);
+        vst1q_f32(array_ptr + i + 8, x2);
+        vst1q_f32(array_ptr + i + 12, x3);
+      }
+      for (; i <= bias_size - 4; i += 4)
+      {
+        auto b = vld1q_f32(bias_data + i);
+        auto a = vld1q_f32(array_ptr + i);
+        auto x = vaddq_f32(a, b);
+        x = vmaxq_f32(clamp_min_vec, x);
+        x = vminq_f32(clamp_max_vec, x);
+        vst1q_f32(array_ptr + i, x);
+      }
+      for (; i < bias_size; i++)
+      {
+        array_ptr[i] =
+          ActivationFunctionWithMinMax(array_ptr[i] + bias_data[i], clamp_min, clamp_max);
+      }
     }
   }
-#else // not NEON
-  for (int array_offset = 0; array_offset < array_size; array_offset += bias_size)
+  else
   {
-    for (int i = 0; i < bias_size; i++)
+    for (int array_offset = 0; array_offset < array_size; array_offset += bias_size)
     {
-      array_data[array_offset + i] = ActivationFunctionWithMinMax(
-        array_data[array_offset + i] + bias_data[i], clamp_min, clamp_max);
+      for (int i = 0; i < bias_size; i++)
+      {
+        array_data[array_offset + i] = ActivationFunctionWithMinMax(
+          array_data[array_offset + i] + bias_data[i], clamp_min, clamp_max);
+      }
     }
   }
-#endif
 }
 
 } // namespace cker