diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
index f5591f1098534..455f02a2062f1 100644
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -481,7 +481,6 @@ steps:
 
 - label: LoRA TP Test (Distributed)
   num_gpus: 4
-  soft_fail: true
   source_file_dependencies:
   - vllm/lora
   - tests/lora
diff --git a/vllm/lora/fully_sharded_layers.py b/vllm/lora/fully_sharded_layers.py
index 5f2d32defe030..e25e453201f01 100644
--- a/vllm/lora/fully_sharded_layers.py
+++ b/vllm/lora/fully_sharded_layers.py
@@ -77,13 +77,6 @@ def apply(self, x: torch.Tensor,
                                        add_input=True)
         # now have column partitioned output
 
-        if self.bias_stacked is not None:
-            self.bias_stacked = self.bias_stacked.view(
-                -1, self.bias_stacked.shape[-1])
-            self.bias_stacked = self.bias_stacked[
-                self.punica_wrapper.token_lora_indices]
-            output += self.bias_stacked
-
         output = output.view(*out_orig_shape)
         return output
 
@@ -222,7 +215,7 @@ def apply(self, x: torch.Tensor,
         self.punica_wrapper.add_expand(output,
                                        buffer,
                                        self.lora_b_stacked,
-                                       self.bias_all,
+                                       self.bias_stacked,
                                        add_input=True)
         # now have column partitioned output
         output = output.view(*out_orig_shape)