Skip to content

Commit

Permalink
Merge branch 'vllm-project:main' into add-doc-linter
Browse files Browse the repository at this point in the history
  • Loading branch information
rafvasq authored Nov 19, 2024
2 parents 0ad8b0d + fd9f124 commit defdc21
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 9 deletions.
14 changes: 10 additions & 4 deletions cmake/cpu_extension.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,16 @@ include_directories("${CMAKE_SOURCE_DIR}/csrc")
#
# Check the compile flags
#
list(APPEND CXX_COMPILE_FLAGS
"-fopenmp"
"-mf16c"
"-DVLLM_CPU_EXTENSION")
if (CMAKE_SYSTEM_PROCESSOR STREQUAL "ppc64le")
list(APPEND CXX_COMPILE_FLAGS
"-fopenmp"
"-DVLLM_CPU_EXTENSION")
else()
list(APPEND CXX_COMPILE_FLAGS
"-fopenmp"
"-mf16c"
"-DVLLM_CPU_EXTENSION")
endif()

execute_process(COMMAND cat /proc/cpuinfo
RESULT_VARIABLE CPUINFO_RET
Expand Down
12 changes: 10 additions & 2 deletions csrc/cpu/attention.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,20 @@ struct KernelVecType<float> {

template <>
struct KernelVecType<c10::Half> {
#ifdef __powerpc64__
// Power architecture-specific vector types
using q_load_vec_type = vec_op::FP32Vec8;
using k_load_vec_type = vec_op::FP32Vec16;
using v_load_vec_type = vec_op::FP32Vec16;
#else
// Fallback for other architectures, including x86
using q_load_vec_type = vec_op::FP16Vec8;
using q_vec_type = vec_op::FP32Vec16;
using k_load_vec_type = vec_op::FP16Vec16;
using v_load_vec_type = vec_op::FP16Vec16;
#endif
using q_vec_type = vec_op::FP32Vec16;
using k_vec_type = vec_op::FP32Vec16;
using qk_acc_vec_type = vec_op::FP32Vec16;
using v_load_vec_type = vec_op::FP16Vec16;
};

#ifdef __AVX512BF16__
Expand Down
6 changes: 6 additions & 0 deletions csrc/cpu/quant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,13 @@ struct KernelVecType<c10::BFloat16> {

template <>
struct KernelVecType<c10::Half> {
#ifdef __powerpc64__
// Power architecture-specific vector type
using load_vec_type = vec_op::FP32Vec16;
#else
// Fallback for other architectures
using load_vec_type = vec_op::FP16Vec16;
#endif
using azp_adj_load_vec_type = vec_op::INT32Vec16;
using cvt_vec_type = vec_op::FP32Vec16;
};
Expand Down
2 changes: 1 addition & 1 deletion vllm/model_executor/model_loader/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def _initialize_model(vllm_config: VllmConfig, prefix: str = "") -> nn.Module:
msg = ("vLLM model class should accept `vllm_config` and `prefix` as "
"input arguments. Possibly you have an old-style model class"
" registered from out of tree and it is used for new vLLM version. "
"Check https://docs.vllm.ai/en/latest/design/class_hierarchy.html "
"Check https://docs.vllm.ai/en/latest/design/arch_overview.html "
"for the design and update the model class accordingly.")
logger.warning(msg)
logger.warning(
Expand Down
5 changes: 3 additions & 2 deletions vllm/model_executor/models/pixtral.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,7 @@ class VisionEncoderArgs:
num_attention_heads: int
rope_theta: float # for rope-2D
image_token_id: int
adapter_bias: bool = True


def _reshape_for_broadcast(freqs_cis: torch.Tensor,
Expand Down Expand Up @@ -595,10 +596,10 @@ def __init__(self, args: VisionEncoderArgs, dim: int):
self.w_in = nn.Linear(
args.hidden_size,
dim,
bias=True,
bias=args.adapter_bias,
)
self.gelu = nn.GELU()
self.w_out = nn.Linear(dim, dim, bias=True)
self.w_out = nn.Linear(dim, dim, bias=args.adapter_bias)

def forward(self, x: torch.Tensor) -> torch.Tensor:
return self.w_out(self.gelu(self.w_in(x)))
Expand Down

0 comments on commit defdc21

Please sign in to comment.