fix format

Signed-off-by: Hanzhi Zhou <[email protected]>
vllm-project · Nov 7, 2024 · 9307bfa · 9307bfa
1 parent d89f510
commit 9307bfa
Showing 1 changed file with 1 addition and 1 deletion.
diff --git a/vllm/distributed/device_communicators/custom_all_reduce.py b/vllm/distributed/device_communicators/custom_all_reduce.py
@@ -278,7 +278,7 @@ def custom_all_reduce(self, input: torch.Tensor) -> Optional[torch.Tensor]:
                 return torch.empty_like(input)
         else:
             # Note: outside of cuda graph context, custom allreduce incurs a
-            # cost of cudaMemcpy, which should be small (<=1% of overall 
+            # cost of cudaMemcpy, which should be small (<=1% of overall
             # latency) compared to the performance gain of using custom kernels
             return self.all_reduce(input, registered=False)