From 11fd7ea639cf3c4fae29322d8e5c839ff6f8a1ca Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Tue, 19 Nov 2024 18:33:06 +0100 Subject: [PATCH 1/3] [Pixtral-Large] Pixtral actually has no bias in vision-lang adapter (#10449) --- vllm/model_executor/models/pixtral.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/models/pixtral.py b/vllm/model_executor/models/pixtral.py index f7f46770057e2..d14b89d6b3f85 100644 --- a/vllm/model_executor/models/pixtral.py +++ b/vllm/model_executor/models/pixtral.py @@ -331,6 +331,7 @@ class VisionEncoderArgs: num_attention_heads: int rope_theta: float # for rope-2D image_token_id: int + adapter_bias: bool = True def _reshape_for_broadcast(freqs_cis: torch.Tensor, @@ -595,10 +596,10 @@ def __init__(self, args: VisionEncoderArgs, dim: int): self.w_in = nn.Linear( args.hidden_size, dim, - bias=True, + bias=args.adapter_bias, ) self.gelu = nn.GELU() - self.w_out = nn.Linear(dim, dim, bias=True) + self.w_out = nn.Linear(dim, dim, bias=args.adapter_bias) def forward(self, x: torch.Tensor) -> torch.Tensor: return self.w_out(self.gelu(self.w_in(x))) From 1ea291a4173a82c537ab42487e23375be4926d30 Mon Sep 17 00:00:00 2001 From: Manjul Mohan <49657164+mikejuliet13@users.noreply.github.com> Date: Tue, 19 Nov 2024 23:04:57 +0530 Subject: [PATCH 2/3] Fix: Build error seen on Power Architecture (#10421) Signed-off-by: Manjul Mohan Signed-off-by: B-201 Signed-off-by: Isotr0py <2037008807@qq.com> Signed-off-by: youkaichao Signed-off-by: ismael-dm Signed-off-by: Andrew Nesbitt Signed-off-by: mgoin Signed-off-by: yan ma Signed-off-by: Angus Wang Signed-off-by: Lucas Wilkinson Signed-off-by: rickyx Signed-off-by: Jee Jee Li Signed-off-by: Mengqing Cao Signed-off-by: Travis Johnson Co-authored-by: Manjul Mohan manjul.mohan@ibm.com Co-authored-by: B-201 Co-authored-by: Isotr0py <2037008807@qq.com> Co-authored-by: youkaichao Co-authored-by: ismael-dm Co-authored-by: Andrew Nesbitt Co-authored-by: Michael Goin Co-authored-by: Yan Ma Co-authored-by: Angus Wang Co-authored-by: Lucas Wilkinson Co-authored-by: Ricky Xu Co-authored-by: Kevin H. Luu Co-authored-by: Jee Jee Li Co-authored-by: Mengqing Cao Co-authored-by: Travis Johnson Co-authored-by: Russell Bryant --- cmake/cpu_extension.cmake | 14 ++++++++++---- csrc/cpu/attention.cpp | 12 ++++++++++-- csrc/cpu/quant.cpp | 6 ++++++ 3 files changed, 26 insertions(+), 6 deletions(-) diff --git a/cmake/cpu_extension.cmake b/cmake/cpu_extension.cmake index 5912c5c02ede7..426189481575b 100644 --- a/cmake/cpu_extension.cmake +++ b/cmake/cpu_extension.cmake @@ -16,10 +16,16 @@ include_directories("${CMAKE_SOURCE_DIR}/csrc") # # Check the compile flags # -list(APPEND CXX_COMPILE_FLAGS - "-fopenmp" - "-mf16c" - "-DVLLM_CPU_EXTENSION") +if (CMAKE_SYSTEM_PROCESSOR STREQUAL "ppc64le") + list(APPEND CXX_COMPILE_FLAGS + "-fopenmp" + "-DVLLM_CPU_EXTENSION") +else() + list(APPEND CXX_COMPILE_FLAGS + "-fopenmp" + "-mf16c" + "-DVLLM_CPU_EXTENSION") +endif() execute_process(COMMAND cat /proc/cpuinfo RESULT_VARIABLE CPUINFO_RET diff --git a/csrc/cpu/attention.cpp b/csrc/cpu/attention.cpp index e73eca1b345fd..e6c03dcb034fd 100644 --- a/csrc/cpu/attention.cpp +++ b/csrc/cpu/attention.cpp @@ -24,12 +24,20 @@ struct KernelVecType { template <> struct KernelVecType { +#ifdef __powerpc64__ + // Power architecture-specific vector types + using q_load_vec_type = vec_op::FP32Vec8; + using k_load_vec_type = vec_op::FP32Vec16; + using v_load_vec_type = vec_op::FP32Vec16; +#else + // Fallback for other architectures, including x86 using q_load_vec_type = vec_op::FP16Vec8; - using q_vec_type = vec_op::FP32Vec16; using k_load_vec_type = vec_op::FP16Vec16; + using v_load_vec_type = vec_op::FP16Vec16; +#endif + using q_vec_type = vec_op::FP32Vec16; using k_vec_type = vec_op::FP32Vec16; using qk_acc_vec_type = vec_op::FP32Vec16; - using v_load_vec_type = vec_op::FP16Vec16; }; #ifdef __AVX512BF16__ diff --git a/csrc/cpu/quant.cpp b/csrc/cpu/quant.cpp index f42fa2361a2db..d9aed657a3113 100644 --- a/csrc/cpu/quant.cpp +++ b/csrc/cpu/quant.cpp @@ -25,7 +25,13 @@ struct KernelVecType { template <> struct KernelVecType { +#ifdef __powerpc64__ + // Power architecture-specific vector type + using load_vec_type = vec_op::FP32Vec16; +#else + // Fallback for other architectures using load_vec_type = vec_op::FP16Vec16; +#endif using azp_adj_load_vec_type = vec_op::INT32Vec16; using cvt_vec_type = vec_op::FP32Vec16; }; From fd9f124971c58376ca294091951dfcc96cc03474 Mon Sep 17 00:00:00 2001 From: Russell Bryant Date: Tue, 19 Nov 2024 12:48:30 -0500 Subject: [PATCH 3/3] [Doc] fix link for page that was renamed (#10455) Signed-off-by: Russell Bryant --- vllm/model_executor/model_loader/loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/model_loader/loader.py b/vllm/model_executor/model_loader/loader.py index b41c23704b7ff..936c2fe415375 100644 --- a/vllm/model_executor/model_loader/loader.py +++ b/vllm/model_executor/model_loader/loader.py @@ -105,7 +105,7 @@ def _initialize_model(vllm_config: VllmConfig, prefix: str = "") -> nn.Module: msg = ("vLLM model class should accept `vllm_config` and `prefix` as " "input arguments. Possibly you have an old-style model class" " registered from out of tree and it is used for new vLLM version. " - "Check https://docs.vllm.ai/en/latest/design/class_hierarchy.html " + "Check https://docs.vllm.ai/en/latest/design/arch_overview.html " "for the design and update the model class accordingly.") logger.warning(msg) logger.warning(