diff --git a/build_variables.bzl b/build_variables.bzl index 55a3f0023b571f..b903a55b17439b 100644 --- a/build_variables.bzl +++ b/build_variables.bzl @@ -542,7 +542,6 @@ libtorch_distributed_extra_sources = [ "torch/csrc/distributed/autograd/rpc_messages/rref_backward_req.cpp", "torch/csrc/distributed/autograd/rpc_messages/rref_backward_resp.cpp", "torch/csrc/distributed/c10d/HashStore.cpp", - "torch/csrc/distributed/c10d/ProcessGroupXCCL.cpp", "torch/csrc/distributed/rpc/agent_utils.cpp", "torch/csrc/distributed/rpc/message.cpp", "torch/csrc/distributed/rpc/profiler/remote_profiler_manager.cpp", @@ -787,6 +786,7 @@ libtorch_python_cuda_sources = libtorch_python_cuda_core_sources + [ libtorch_python_xpu_sources = [ "torch/csrc/xpu/xccl.cpp", + "torch/csrc/distributed/c10d/ProcessGroupXCCL.cpp", "torch/csrc/xpu/Event.cpp", "torch/csrc/xpu/Module.cpp", "torch/csrc/xpu/Stream.cpp", diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt index 28e7d0c96ba877..01d280cb3fc7c4 100644 --- a/caffe2/CMakeLists.txt +++ b/caffe2/CMakeLists.txt @@ -1015,13 +1015,12 @@ endif() if(USE_XPU) if(USE_XCCL) - list(APPEND Caffe2_XPU_SRCS - ${TORCH_SRC_DIR}/csrc/xpu/xccl.cpp) + list(APPEND Caffe2_XPU_SRCS + ${TORCH_SRC_DIR}/csrc/xpu/xccl.cpp) endif() add_library(torch_xpu ${Caffe2_XPU_SRCS}) torch_compile_options(torch_xpu) # see cmake/public/utils.cmake target_compile_definitions(torch_xpu PRIVATE USE_XPU) - # ATen XPU implementation set(TORCH_XPU_OPS_DIR ${TORCH_ROOT}/third_party/torch-xpu-ops) set(TORCH_XPU_OPS_REPO_URL https://github.com/intel/torch-xpu-ops.git) diff --git a/cmake/Summary.cmake b/cmake/Summary.cmake index 0b601cf2a6a329..229ff112ab3187 100644 --- a/cmake/Summary.cmake +++ b/cmake/Summary.cmake @@ -155,6 +155,7 @@ function(caffe2_print_configuration_summary) message(STATUS " USE_ITT : ${USE_ITT}") message(STATUS " USE_XCCL : ${USE_XCCL}") if(${USE_XCCL}) + message(STATUS " USE_C10D_XCCL : ${USE_C10D_XCCL}") message(STATUS " XCCL include path : ${XCCL_INCLUDE_DIR}") message(STATUS " XCCL library : ${XCCL_LIBRARY}") endif() diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt index 8ab7d7aeb095b6..f50ae4e02c3386 100644 --- a/torch/CMakeLists.txt +++ b/torch/CMakeLists.txt @@ -165,6 +165,9 @@ if(USE_XPU) append_filelist("libtorch_python_xpu_sources" TORCH_PYTHON_SRCS) list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_XPU) + # if(USE_XCCL) + list(APPEND TORCH_PYTHON_LINK_LIBRARIES torch::xpurt) + # endif() endif() if(USE_CUDNN OR USE_ROCM) @@ -419,6 +422,8 @@ endif() target_compile_definitions(torch_python PRIVATE "-DTHP_BUILD_MAIN_LIB") target_link_libraries(torch_python PRIVATE ${TORCH_LIB} ${TORCH_PYTHON_LINK_LIBRARIES}) +target_link_libraries(torch_python PRIVATE torch::xpurt) +target_link_libraries(torch_python PRIVATE c10_xpu) target_compile_definitions(torch_python PRIVATE ${TORCH_PYTHON_COMPILE_DEFINITIONS}) diff --git a/torch/csrc/distributed/c10d/ProcessGroupXCCL.hpp b/torch/csrc/distributed/c10d/ProcessGroupXCCL.hpp index d14d677205ecbb..01a5966b811069 100644 --- a/torch/csrc/distributed/c10d/ProcessGroupXCCL.hpp +++ b/torch/csrc/distributed/c10d/ProcessGroupXCCL.hpp @@ -11,8 +11,6 @@ #include #include -#include -#include #include #include #include diff --git a/torch/csrc/xpu/xccl.h b/torch/csrc/xpu/xccl.h index 31fc594e71cc0b..c7a67975bb286c 100644 --- a/torch/csrc/xpu/xccl.h +++ b/torch/csrc/xpu/xccl.h @@ -6,6 +6,8 @@ #include #include #include +#include +#include namespace torch::xpu::xccl {