Skip to content

Commit

Permalink
[Windows][PT2.6] Split larger Other Kernels lib (#1096)
Browse files Browse the repository at this point in the history
torch_xpu_ops_sycl_kernels leads to around 1.83GB in size on windows,
splitting it to reduce the lib size.

New libs introduced in this PR:

torch_xpu_ops_sycl_tensor_srcs
torch_xpu_ops_sycl_norm_loss_srcs
torch_xpu_ops_sycl_poly_srcs
torch_xpu_ops_sycl_dist_srcs

---------

Co-authored-by: Feng Yuan <[email protected]>
  • Loading branch information
ratnampa and fengyuan14 authored Nov 22, 2024
1 parent 0d189dd commit bfdbaf4
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 9 deletions.
2 changes: 1 addition & 1 deletion cmake/BuildFlags.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID STREQUAL "MSVC"
set(SYCL_OFFLINE_COMPILER_CG_OPTIONS "-options '${SYCL_OFFLINE_COMPILER_CG_OPTIONS}'")

if(WIN32)
set(AOT_TARGETS "ats-m150,lnl-m,mtl-u,mtl-h")
set(AOT_TARGETS "ats-m150,mtl-u,mtl-h,xe2-lpg,xe2-hpg")
else()
set(AOT_TARGETS "pvc,xe-lpg,ats-m150")
endif()
Expand Down
83 changes: 75 additions & 8 deletions src/BuildOnWindows.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,6 @@
set(TORCH_XPU_OPS_LIBRARIES)
set(SYCL_LINK_LIBRARIES_KEYWORD PRIVATE)

# Walk around cyclic dependence
# libtorch_xpu.so links to libtorch_xpu_ops.a
# Load libtorch_xpu_ops_aten.so explicitly by torch/__init__.py:_load_dll_libraries (Break cycle)
# libtorch_xpu_ops_aten.so links to libtorch_xpu_ops_sycl_unary_binary_kernels.so and libtorch_xpu_ops_sycl_kernels.so
# libtorch_xpu_ops_sycl_unary_binary_kernels.so and libtorch_xpu_ops_sycl_kernels.so links to libtorch_xpu.so
add_library(
torch_xpu_ops
STATIC
Expand All @@ -21,7 +16,6 @@ add_library(
${ATen_XPU_NATIVE_CPP_SRCS}
${ATen_XPU_GEN_SRCS})
install(TARGETS torch_xpu_ops_aten DESTINATION "${TORCH_INSTALL_LIB_DIR}")
# target_compile_definitions(torch_xpu_ops_aten PRIVATE CAFFE2_BUILD_MAIN_LIB)
target_compile_definitions(torch_xpu_ops_aten PRIVATE TORCH_XPU_BUILD_MAIN_LIB)
target_link_libraries(torch_xpu_ops_aten PUBLIC torch_xpu)
target_link_libraries(torch_xpu_ops_aten PUBLIC torch_cpu)
Expand All @@ -48,8 +42,11 @@ else()
set(ATen_XPU_SYCL_REDUCE_SRCS)
set(ATen_XPU_SYCL_ACTIVATION_SRCS)
set(ATen_XPU_SYCL_FOREACH_SRCS)
set(ATen_XPU_SYCL_TENSOR_SRCS)
set(ATen_XPU_SYCL_NORM_LOSS_SRCS)
set(ATen_XPU_SYCL_POLY_SRCS)
set(ATen_XPU_SYCL_DISTRIBUTION_SRCS)
set(ATen_XPU_SYCL_OTHERS_SRCS)

foreach(sycl_src ${ATen_XPU_SYCL_SRCS})
string(REGEX MATCH "Binary" IS_BINARY ${sycl_src})
string(REGEX MATCH "Unary" IS_UNARY ${sycl_src})
Expand All @@ -63,6 +60,13 @@ else()
string(REGEX MATCH "Activation" IS_ACTIVATION ${sycl_src})
string(REGEX MATCH "Foreach" IS_FOREACH ${sycl_src})
string(REGEX MATCH "Reduce" IS_REDUCE ${sycl_src})
string(REGEX MATCH "Tensor" IS_TENSOR ${sycl_src})
string(REGEX MATCH "Norm" IS_NORM ${sycl_src})
string(REGEX MATCH "Loss" IS_LOSS ${sycl_src})
string(REGEX MATCH "Polynomial" IS_POLY ${sycl_src})
#Move resize kernel to Norm and Loss lib, to resolve symbol.
string(REGEX MATCH "Resize" IS_RESIZE ${sycl_src})
string(REGEX MATCH "Distribution" IS_DISTRIBUTION ${sycl_src})

if(NOT IS_FOREACH STREQUAL "")
list(APPEND ATen_XPU_SYCL_FOREACH_SRCS ${sycl_src})
Expand All @@ -74,11 +78,18 @@ else()
list(APPEND ATen_XPU_SYCL_REDUCE_SRCS ${sycl_src})
elseif(NOT IS_ACTIVATION STREQUAL "")
list(APPEND ATen_XPU_SYCL_ACTIVATION_SRCS ${sycl_src})
elseif(NOT IS_TENSOR STREQUAL "")
list(APPEND ATen_XPU_SYCL_TENSOR_SRCS ${sycl_src})
elseif(NOT IS_DISTRIBUTION STREQUAL "")
list(APPEND ATen_XPU_SYCL_DISTRIBUTION_SRCS ${sycl_src})
elseif(NOT IS_NORM STREQUAL "" OR NOT IS_LOSS STREQUAL "" OR NOT IS_RESIZE STREQUAL "")
list(APPEND ATen_XPU_SYCL_NORM_LOSS_SRCS ${sycl_src})
elseif(NOT IS_POLY STREQUAL "")
list(APPEND ATen_XPU_SYCL_POLY_SRCS ${sycl_src})
else()
list(APPEND ATen_XPU_SYCL_OTHERS_SRCS ${sycl_src})
endif()
endforeach()

# Binary kernel lib
set(sycl_binary_lib torch_xpu_ops_sycl_binary_kernels)
sycl_add_library(
Expand Down Expand Up @@ -148,7 +159,63 @@ else()

# Decouple with PyTorch cmake definition.
install(TARGETS ${sycl_foreach_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}")

# Tensor kernel lib
set(sycl_tensor_lib torch_xpu_ops_sycl_tensor_kernels)
sycl_add_library(
${sycl_tensor_lib}
SHARED
SYCL_SOURCES ${ATen_XPU_SYCL_TENSOR_SRCS})
target_compile_definitions(${sycl_tensor_lib} PRIVATE TORCH_XPU_BUILD_MAIN_LIB)
target_link_libraries(torch_xpu_ops_aten PUBLIC ${sycl_tensor_lib})
target_link_libraries(${sycl_tensor_lib} PUBLIC torch_xpu)
list(APPEND TORCH_XPU_OPS_LIBRARIES ${sycl_tensor_lib})

# Decouple with PyTorch cmake definition.
install(TARGETS ${sycl_tensor_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}")

# Norm and Loss kernel lib
set(sycl_norm_loss_lib torch_xpu_ops_sycl_norm_loss_kernels)
sycl_add_library(
${sycl_norm_loss_lib}
SHARED
SYCL_SOURCES ${ATen_XPU_SYCL_NORM_LOSS_SRCS})
target_compile_definitions(${sycl_norm_loss_lib} PRIVATE TORCH_XPU_BUILD_MAIN_LIB)
target_link_libraries(torch_xpu_ops_aten PUBLIC ${sycl_norm_loss_lib})
target_link_libraries(${sycl_norm_loss_lib} PUBLIC torch_xpu)
list(APPEND TORCH_XPU_OPS_LIBRARIES ${sycl_norm_loss_lib})

# Decouple with PyTorch cmake definition.
install(TARGETS ${sycl_norm_loss_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}")

# Polynomial kernel lib
set(sycl_poly_lib torch_xpu_ops_sycl_poly_kernels)
sycl_add_library(
${sycl_poly_lib}
SHARED
SYCL_SOURCES ${ATen_XPU_SYCL_POLY_SRCS})
target_compile_definitions(${sycl_poly_lib} PRIVATE TORCH_XPU_BUILD_MAIN_LIB)
target_link_libraries(torch_xpu_ops_aten PUBLIC ${sycl_poly_lib})
target_link_libraries(${sycl_poly_lib} PUBLIC torch_xpu)
list(APPEND TORCH_XPU_OPS_LIBRARIES ${sycl_poly_lib})

# Decouple with PyTorch cmake definition.
install(TARGETS ${sycl_poly_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}")

# Distribution kernel lib
set(sycl_dist_lib torch_xpu_ops_sycl_dist_kernels)
sycl_add_library(
${sycl_dist_lib}
SHARED
SYCL_SOURCES ${ATen_XPU_SYCL_DISTRIBUTION_SRCS})
target_compile_definitions(${sycl_dist_lib} PRIVATE TORCH_XPU_BUILD_MAIN_LIB)
target_link_libraries(torch_xpu_ops_aten PUBLIC ${sycl_dist_lib})
target_link_libraries(${sycl_dist_lib} PUBLIC torch_xpu)
list(APPEND TORCH_XPU_OPS_LIBRARIES ${sycl_dist_lib})

# Decouple with PyTorch cmake definition.
install(TARGETS ${sycl_dist_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}")

# Other kernel lib
set(sycl_lib torch_xpu_ops_sycl_kernels)
sycl_add_library(
Expand Down

0 comments on commit bfdbaf4

Please sign in to comment.