diff --git a/cmake/cpu_extension.cmake b/cmake/cpu_extension.cmake index 5912c5c02ede7..426189481575b 100644 --- a/cmake/cpu_extension.cmake +++ b/cmake/cpu_extension.cmake @@ -16,10 +16,16 @@ include_directories("${CMAKE_SOURCE_DIR}/csrc") # # Check the compile flags # -list(APPEND CXX_COMPILE_FLAGS - "-fopenmp" - "-mf16c" - "-DVLLM_CPU_EXTENSION") +if (CMAKE_SYSTEM_PROCESSOR STREQUAL "ppc64le") + list(APPEND CXX_COMPILE_FLAGS + "-fopenmp" + "-DVLLM_CPU_EXTENSION") +else() + list(APPEND CXX_COMPILE_FLAGS + "-fopenmp" + "-mf16c" + "-DVLLM_CPU_EXTENSION") +endif() execute_process(COMMAND cat /proc/cpuinfo RESULT_VARIABLE CPUINFO_RET diff --git a/csrc/cpu/attention.cpp b/csrc/cpu/attention.cpp index e73eca1b345fd..e6c03dcb034fd 100644 --- a/csrc/cpu/attention.cpp +++ b/csrc/cpu/attention.cpp @@ -24,12 +24,20 @@ struct KernelVecType { template <> struct KernelVecType { +#ifdef __powerpc64__ + // Power architecture-specific vector types + using q_load_vec_type = vec_op::FP32Vec8; + using k_load_vec_type = vec_op::FP32Vec16; + using v_load_vec_type = vec_op::FP32Vec16; +#else + // Fallback for other architectures, including x86 using q_load_vec_type = vec_op::FP16Vec8; - using q_vec_type = vec_op::FP32Vec16; using k_load_vec_type = vec_op::FP16Vec16; + using v_load_vec_type = vec_op::FP16Vec16; +#endif + using q_vec_type = vec_op::FP32Vec16; using k_vec_type = vec_op::FP32Vec16; using qk_acc_vec_type = vec_op::FP32Vec16; - using v_load_vec_type = vec_op::FP16Vec16; }; #ifdef __AVX512BF16__ diff --git a/csrc/cpu/quant.cpp b/csrc/cpu/quant.cpp index f42fa2361a2db..d9aed657a3113 100644 --- a/csrc/cpu/quant.cpp +++ b/csrc/cpu/quant.cpp @@ -25,7 +25,13 @@ struct KernelVecType { template <> struct KernelVecType { +#ifdef __powerpc64__ + // Power architecture-specific vector type + using load_vec_type = vec_op::FP32Vec16; +#else + // Fallback for other architectures using load_vec_type = vec_op::FP16Vec16; +#endif using azp_adj_load_vec_type = vec_op::INT32Vec16; using cvt_vec_type = vec_op::FP32Vec16; };