Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
Chao1Han committed Dec 2, 2024
1 parent 0560c8b commit 31d2bab
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 6 deletions.
1 change: 1 addition & 0 deletions caffe2/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1104,6 +1104,7 @@ if(USE_XPU)
message(WARNING "Failed to include ATen XPU implementation target")
else()
target_link_libraries(torch_xpu PRIVATE torch_xpu_ops)
# USE_C10D_XCCL from third_party torch-xpu-ops repository for xccl registration.
if(USE_C10D_XCCL)
target_compile_definitions(torch_xpu PUBLIC USE_C10D_XCCL)
endif()
Expand Down
2 changes: 0 additions & 2 deletions torch/csrc/distributed/c10d/ProcessGroup.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -641,7 +641,6 @@ class TORCH_API ProcessGroup : public torch::CustomClassHolder {
// TODO: HACK for backend name to get sequence number for that backend.
if (backendType == ProcessGroup::BackendType::GLOO ||
backendType == ProcessGroup::BackendType::NCCL ||
backendType == ProcessGroup::BackendType::XCCL ||
backendType == ProcessGroup::BackendType::UCC) {
getDefaultBackend()->setSequenceNumberForGroup();
} else {
Expand All @@ -663,7 +662,6 @@ class TORCH_API ProcessGroup : public torch::CustomClassHolder {
// TODO: HACK for backend name to get sequence number for that backend.
if (backendType == ProcessGroup::BackendType::GLOO ||
backendType == ProcessGroup::BackendType::NCCL ||
backendType == ProcessGroup::BackendType::XCCL ||
backendType == ProcessGroup::BackendType::UCC) {
return getDefaultBackend()->getSequenceNumberForGroup();
} else {
Expand Down
11 changes: 7 additions & 4 deletions torch/distributed/distributed_c10d.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,7 @@ def register_backend(
Backend.backend_list.append(name.lower())
if devices is not None:
for device in devices:
if device != "cpu" and device != "cuda" and device != "xpu":
if device != "cpu" and device != "cuda":
Backend.default_device_backend_map[device] = name.lower()
Backend.backend_type_map[name.lower()] = ProcessGroup.BackendType.CUSTOM

Expand Down Expand Up @@ -1495,7 +1495,7 @@ def init_process_group(
Args:
backend (str or Backend, optional): The backend to use. Depending on
build-time configurations, valid values include ``mpi``, ``gloo``, ``xccl``,
build-time configurations, valid values include ``mpi``, ``gloo``,
``nccl``, and ``ucc``. If the backend is not provided, then both a ``gloo``
and ``nccl`` backend will be created, see notes below for how multiple
backends are managed. This field can be given as a lowercase string
Expand Down Expand Up @@ -1775,9 +1775,10 @@ def _new_process_group_helper(
"created, please use a different group name"
)

if device_id is not None and device_id.index is None:
if device_id is not None and (device_id.index is None or device_id.type != "cuda"):
raise ValueError(
"init_process_group device_id parameter must be a device with an index"
"init_process_group device_id parameter must be a cuda device with an "
"id, e.g. cuda:0, not just cuda or cpu"
)

# Note: _new_process_group_helper is only called from init_process_group, which always provides a timeout value
Expand Down Expand Up @@ -2730,6 +2731,7 @@ def all_reduce(tensor, op=ReduceOp.SUM, group=None, async_op=False):
else:
work.wait()


@_exception_logger
@deprecated(
"`torch.distributed.all_reduce_coalesced` will be deprecated. If you must "
Expand Down Expand Up @@ -4095,6 +4097,7 @@ def reduce_scatter_tensor(output, input, op=ReduceOp.SUM, group=None, async_op=F
else:
work.wait()


@deprecated(
"`torch.distributed._reduce_scatter_base` is a private function and will be deprecated. "
"Please use `torch.distributed.reduce_scatter_tensor` instead.",
Expand Down

0 comments on commit 31d2bab

Please sign in to comment.