diff --git a/docs/source/getting_started/debugging.rst b/docs/source/getting_started/debugging.rst index d6c83014dc69f..7f36d65a227f0 100644 --- a/docs/source/getting_started/debugging.rst +++ b/docs/source/getting_started/debugging.rst @@ -86,6 +86,11 @@ If GPU/CPU communication cannot be established, you can use the following Python from vllm.distributed.device_communicators.pynccl import PyNcclCommunicator pynccl = PyNcclCommunicator(group=gloo_group, device=local_rank) + # pynccl is enabled by default for 0.6.5+, + # but for 0.6.4 and below, we need to enable it manually. + # keep the code for backward compatibility when because people + # prefer to read the latest documentation. + pynccl.disabled = False s = torch.cuda.Stream() with torch.cuda.stream(s):