From 31d2bab16bef4a0e2c470a758f74b3544a9abc45 Mon Sep 17 00:00:00 2001
From: hanchao <chao1.han@intel.com>
Date: Mon, 2 Dec 2024 07:03:37 +0000
Subject: [PATCH] update

---
 caffe2/CMakeLists.txt                        |  1 +
 torch/csrc/distributed/c10d/ProcessGroup.hpp |  2 --
 torch/distributed/distributed_c10d.py        | 11 +++++++----
 3 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
index a47fcbd0d7c1a4..97cbf86b63b024 100644
--- a/caffe2/CMakeLists.txt
+++ b/caffe2/CMakeLists.txt
@@ -1104,6 +1104,7 @@ if(USE_XPU)
     message(WARNING "Failed to include ATen XPU implementation target")
   else()
     target_link_libraries(torch_xpu PRIVATE torch_xpu_ops)
+    # USE_C10D_XCCL from third_party torch-xpu-ops repository for xccl registration.
     if(USE_C10D_XCCL)
       target_compile_definitions(torch_xpu PUBLIC USE_C10D_XCCL)
     endif()
diff --git a/torch/csrc/distributed/c10d/ProcessGroup.hpp b/torch/csrc/distributed/c10d/ProcessGroup.hpp
index 1259813d5d7a5f..b3d9ce3b91d674 100644
--- a/torch/csrc/distributed/c10d/ProcessGroup.hpp
+++ b/torch/csrc/distributed/c10d/ProcessGroup.hpp
@@ -641,7 +641,6 @@ class TORCH_API ProcessGroup : public torch::CustomClassHolder {
     // TODO: HACK for backend name to get sequence number for that backend.
     if (backendType == ProcessGroup::BackendType::GLOO ||
         backendType == ProcessGroup::BackendType::NCCL ||
-        backendType == ProcessGroup::BackendType::XCCL ||
         backendType == ProcessGroup::BackendType::UCC) {
       getDefaultBackend()->setSequenceNumberForGroup();
     } else {
@@ -663,7 +662,6 @@ class TORCH_API ProcessGroup : public torch::CustomClassHolder {
     // TODO: HACK for backend name to get sequence number for that backend.
     if (backendType == ProcessGroup::BackendType::GLOO ||
         backendType == ProcessGroup::BackendType::NCCL ||
-        backendType == ProcessGroup::BackendType::XCCL ||
         backendType == ProcessGroup::BackendType::UCC) {
       return getDefaultBackend()->getSequenceNumberForGroup();
     } else {
diff --git a/torch/distributed/distributed_c10d.py b/torch/distributed/distributed_c10d.py
index 18d1f418180f83..d0cae808fcaa43 100644
--- a/torch/distributed/distributed_c10d.py
+++ b/torch/distributed/distributed_c10d.py
@@ -341,7 +341,7 @@ def register_backend(
         Backend.backend_list.append(name.lower())
         if devices is not None:
             for device in devices:
-                if device != "cpu" and device != "cuda" and device != "xpu":
+                if device != "cpu" and device != "cuda":
                     Backend.default_device_backend_map[device] = name.lower()
         Backend.backend_type_map[name.lower()] = ProcessGroup.BackendType.CUSTOM
 
@@ -1495,7 +1495,7 @@ def init_process_group(
 
     Args:
         backend (str or Backend, optional): The backend to use. Depending on
-            build-time configurations, valid values include ``mpi``, ``gloo``, ``xccl``,
+            build-time configurations, valid values include ``mpi``, ``gloo``,
             ``nccl``, and ``ucc``. If the backend is not provided, then both a ``gloo``
             and ``nccl`` backend will be created, see notes below for how multiple
             backends are managed. This field can be given as a lowercase string
@@ -1775,9 +1775,10 @@ def _new_process_group_helper(
             "created, please use a different group name"
         )
 
-    if device_id is not None and device_id.index is None:
+    if device_id is not None and (device_id.index is None or device_id.type != "cuda"):
         raise ValueError(
-            "init_process_group device_id parameter must be a device with an index"
+            "init_process_group device_id parameter must be a cuda device with an "
+            "id, e.g. cuda:0, not just cuda or cpu"
         )
 
     # Note: _new_process_group_helper is only called from init_process_group, which always provides a timeout value
@@ -2730,6 +2731,7 @@ def all_reduce(tensor, op=ReduceOp.SUM, group=None, async_op=False):
     else:
         work.wait()
 
+
 @_exception_logger
 @deprecated(
     "`torch.distributed.all_reduce_coalesced` will be deprecated. If you must "
@@ -4095,6 +4097,7 @@ def reduce_scatter_tensor(output, input, op=ReduceOp.SUM, group=None, async_op=F
     else:
         work.wait()
 
+
 @deprecated(
     "`torch.distributed._reduce_scatter_base` is a private function and will be deprecated. "
     "Please use `torch.distributed.reduce_scatter_tensor` instead.",