Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPARSE] Add support for cuSPARSE backend #527

Merged
merged 44 commits into from
Oct 29, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
fac276d
[SPARSE] Add support for cuSPARSE backend
Rbiessy Sep 6, 2024
3d02b34
Remove previous compile time example
Rbiessy Sep 10, 2024
1122221
Update compile time example description
Rbiessy Sep 10, 2024
f42eab5
Remove unused mkl_helper file
Rbiessy Sep 10, 2024
8584899
Update README with cuSPARSE
Rbiessy Sep 10, 2024
7aa5177
Fix typos and rewording
Rbiessy Sep 11, 2024
70cdbe0
Rework test accuracy for spsv
Rbiessy Sep 12, 2024
8173331
Remove get_mem for USM
Rbiessy Sep 12, 2024
9c16425
Map statuses CUSPARSE_STATUS_NOT_INITIALIZED and CUSPARSE_STATUS_MATR…
Rbiessy Sep 13, 2024
36599a9
Reword comment
Rbiessy Sep 13, 2024
3b29e32
Reword comment
Rbiessy Sep 20, 2024
84565a9
Remove redundant namespace
Rbiessy Sep 20, 2024
44dc73d
Remove redundant namespace in MKL backends
Rbiessy Sep 20, 2024
050f22a
Add comments on the assumption made for buffers
Rbiessy Sep 23, 2024
77d19e8
Throw unimplemented for some cases with csr_alg3
Rbiessy Sep 23, 2024
d23b24d
Introduce sorted_by_rows property
Rbiessy Sep 23, 2024
4db2187
Avoid placeholder accessor
Rbiessy Sep 24, 2024
f73fd0b
Remove description from algorithms tables
Rbiessy Sep 26, 2024
18156e5
Use COO sorted_by_rows in example
Rbiessy Sep 26, 2024
5addc14
Document missing feature
Rbiessy Sep 30, 2024
6946d64
Remove workaround for alpha and beta spmv
Rbiessy Sep 30, 2024
8e77ead
Reword comment on empty accessors
Rbiessy Sep 30, 2024
325f794
Fix function name in exceptions
Rbiessy Sep 30, 2024
9bc77d1
Throw unimplemented for spsv using no_optimize_alg
Rbiessy Sep 30, 2024
710b80b
Fix documentation typo
Rbiessy Sep 30, 2024
00e5ced
an -> a
Rbiessy Sep 30, 2024
342380e
Revert throwing unsupported for spsv + no_optimize_alg
Rbiessy Sep 30, 2024
96b38fd
Fix documentation enums
Rbiessy Oct 1, 2024
c5ba2c4
Do not retrieve global handle for set_*_data functions using buffer API
Rbiessy Sep 13, 2024
1f5c80c
Remove host_task for set_*_data functions using USM API
Rbiessy Sep 13, 2024
58f08c9
Cache CUstream and cusparseHandle_t in operation descriptor
Rbiessy Oct 22, 2024
c0eae1e
Ensure descriptor is kept alive long enough when buffers are used
Rbiessy Oct 24, 2024
2149e39
USM tests using reset_data wait before updating device values
Rbiessy Oct 24, 2024
bad6bfb
Force inputs to be copied on device before the optimize step
Rbiessy Jul 29, 2024
6318d53
Disable specific case of spmm with csr_alg3 failing
Rbiessy Oct 24, 2024
956ae49
Disable enqueue_native_command extension for out-of-order queues
Rbiessy Oct 24, 2024
43428ca
Test in-order queues
Rbiessy Oct 25, 2024
790d018
clang-format-19.1
Rbiessy Oct 25, 2024
ae5e387
Merge branch 'develop' into romain/cusparse
Rbiessy Oct 25, 2024
97eef5c
Move more functions to the detail namespace
Rbiessy Oct 25, 2024
2888232
Fix CT example return value and expected result
Rbiessy Oct 25, 2024
b4f553c
Update example README output
Rbiessy Oct 25, 2024
a2177f7
clang-format
Rbiessy Oct 25, 2024
eef836a
Rename variables that are not placeholder anymore
Rbiessy Oct 28, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ option(ENABLE_CUFFT_BACKEND "Enable the cuFFT backend for the DFT interface" OFF
option(ENABLE_ROCFFT_BACKEND "Enable the rocFFT backend for the DFT interface" OFF)
option(ENABLE_PORTFFT_BACKEND "Enable the portFFT DFT backend for the DFT interface. Cannot be used with other DFT backends." OFF)

# sparse
option(ENABLE_CUSPARSE_BACKEND "Enable the cuSPARSE backend for the SPARSE_BLAS interface" OFF)

set(ONEMKL_SYCL_IMPLEMENTATION "dpc++" CACHE STRING "Name of the SYCL compiler")
set(HIP_TARGETS "" CACHE STRING "Target HIP architectures")

Expand Down Expand Up @@ -102,7 +105,8 @@ if(ENABLE_MKLGPU_BACKEND
list(APPEND DOMAINS_LIST "dft")
endif()
if(ENABLE_MKLCPU_BACKEND
OR ENABLE_MKLGPU_BACKEND)
OR ENABLE_MKLGPU_BACKEND
OR ENABLE_CUSPARSE_BACKEND)
list(APPEND DOMAINS_LIST "sparse_blas")
endif()

Expand All @@ -129,7 +133,7 @@ if(CMAKE_CXX_COMPILER OR NOT ONEMKL_SYCL_IMPLEMENTATION STREQUAL "dpc++")
string(REPLACE "\\" "/" CMAKE_CXX_COMPILER ${CMAKE_CXX_COMPILER})
endif()
else()
if(ENABLE_CUBLAS_BACKEND OR ENABLE_CURAND_BACKEND OR ENABLE_CUSOLVER_BACKEND OR ENABLE_CUFFT_BACKEND
if(ENABLE_CUBLAS_BACKEND OR ENABLE_CURAND_BACKEND OR ENABLE_CUSOLVER_BACKEND OR ENABLE_CUFFT_BACKEND OR ENABLE_CUSPARSE_BACKEND
OR ENABLE_ROCBLAS_BACKEND OR ENABLE_ROCRAND_BACKEND OR ENABLE_ROCSOLVER_BACKEND OR ENABLE_ROCFFT_BACKEND)
set(CMAKE_CXX_COMPILER "clang++")
elseif(ENABLE_MKLGPU_BACKEND)
Expand Down
4 changes: 2 additions & 2 deletions cmake/FindCompiler.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ if(is_dpcpp)
# Check if the Nvidia target is supported. PortFFT uses this for choosing default configuration.
check_cxx_compiler_flag("-fsycl -fsycl-targets=nvptx64-nvidia-cuda" dpcpp_supports_nvptx64)

if(ENABLE_CURAND_BACKEND OR ENABLE_CUSOLVER_BACKEND)
if(ENABLE_CURAND_BACKEND OR ENABLE_CUSOLVER_BACKEND OR ENABLE_CUSPARSE_BACKEND)
list(APPEND UNIX_INTERFACE_COMPILE_OPTIONS
-fsycl-targets=nvptx64-nvidia-cuda -fsycl-unnamed-lambda)
list(APPEND UNIX_INTERFACE_LINK_OPTIONS
Expand All @@ -51,7 +51,7 @@ if(is_dpcpp)
-fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend
--offload-arch=${HIP_TARGETS})
endif()
if(ENABLE_CURAND_BACKEND OR ENABLE_CUSOLVER_BACKEND OR ENABLE_ROCBLAS_BACKEND
if(ENABLE_CURAND_BACKEND OR ENABLE_CUSOLVER_BACKEND OR ENABLE_CUSPARSE_BACKEND OR ENABLE_ROCBLAS_BACKEND
OR ENABLE_ROCRAND_BACKEND OR ENABLE_ROCSOLVER_BACKEND)
set_target_properties(ONEMKL::SYCL::SYCL PROPERTIES
INTERFACE_COMPILE_OPTIONS "${UNIX_INTERFACE_COMPILE_OPTIONS}"
Expand Down
8 changes: 6 additions & 2 deletions docs/building_the_project_with_dpcpp.rst
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,9 @@ The most important supported build options are:
* - ENABLE_CURAND_BACKEND
- True, False
- False
* - ENABLE_CUSPARSE_BACKEND
- True, False
- False
* - ENABLE_NETLIB_BACKEND
- True, False
- False
Expand Down Expand Up @@ -183,8 +186,8 @@ Building for CUDA
^^^^^^^^^^^^^^^^^

The CUDA backends can be enabled with ``ENABLE_CUBLAS_BACKEND``,
``ENABLE_CUFFT_BACKEND``, ``ENABLE_CURAND_BACKEND``, and
``ENABLE_CUSOLVER_BACKEND``.
``ENABLE_CUFFT_BACKEND``, ``ENABLE_CURAND_BACKEND``,
``ENABLE_CUSOLVER_BACKEND``, and ``ENABLE_CUSPARSE_BACKEND``.

No additional parameters are required for using CUDA libraries. In most cases,
the CUDA libraries should be found automatically by CMake.
Expand Down Expand Up @@ -356,6 +359,7 @@ disabled using the Ninja build system:
-DENABLE_CUBLAS_BACKEND=True \
-DENABLE_CUSOLVER_BACKEND=True \
-DENABLE_CURAND_BACKEND=True \
-DENABLE_CUSPARSE_BACKEND=True \
-DBUILD_FUNCTIONAL_TESTS=False

``$ONEMKL_DIR`` points at the oneMKL source directly. The x86 CPU (``MKLCPU``)
Expand Down
136 changes: 136 additions & 0 deletions docs/domains/sparse_linear_algebra.rst
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,139 @@ Currently known limitations:
``oneapi::mkl::unimplemented`` exception.
- Scalar parameters ``alpha`` and ``beta`` should be host pointers to prevent
synchronizations and copies to the host.


cuSPARSE backend
----------------

Currently known limitations:

- Using ``spmv`` with a ``type_view`` other than ``matrix_descr::general`` will
throw an ``oneapi::mkl::unimplemented`` exception.
- The COO format requires the indices to be sorted by row. See the `cuSPARSE
documentation
<https://docs.nvidia.com/cuda/cusparse/index.html#coordinate-coo>`_.


Operation algorithms mapping
----------------------------

The following tables describe how a oneMKL SYCL Interface algorithm maps to the
backend's algorithms. Refer to the backend's documentation for a more detailed
explanation of the algorithms.

Backends with no equivalent algorithms will fallback to the backend's default
behavior.
gajanan-choudhary marked this conversation as resolved.
Show resolved Hide resolved


spmm
^^^^

.. list-table::
:header-rows: 1
:widths: 10 30 45

* - Value
- Description
- Backend equivalent
* - ``default_optimize_alg``
- Default algorithm.
- | MKL: none
| cuSPARSE: ``CUSPARSE_SPMM_ALG_DEFAULT``
* - ``no_optimize_alg``
- Default algorithm but may skip some optimizations. Useful only if an
operation with the same configuration is run once.
gajanan-choudhary marked this conversation as resolved.
Show resolved Hide resolved
- | MKL: none
| cuSPARSE: ``CUSPARSE_SPMM_ALG_DEFAULT``
* - ``coo_alg1``
- Should provide best performance for COO format, small ``nnz`` and
column-major layout.
- | MKL: none
| cuSPARSE: ``CUSPARSE_SPMM_COO_ALG1``
gajanan-choudhary marked this conversation as resolved.
Show resolved Hide resolved
* - ``coo_alg2``
- Should provide best performance for COO format and column-major layout.
Produces deterministic results.
- | MKL: none
| cuSPARSE: ``CUSPARSE_SPMM_COO_ALG2``
* - ``coo_alg3``
- Should provide best performance for COO format and large ``nnz``.
- | MKL: none
| cuSPARSE: ``CUSPARSE_SPMM_COO_ALG3``
* - ``coo_alg4``
- Should provide best performance for COO format and row-major layout.
- | MKL: none
| cuSPARSE: ``CUSPARSE_SPMM_COO_ALG4``
* - ``csr_alg1``
- Should provide best performance for CSR format and column-major layout.
- | MKL: none
| cuSPARSE: ``CUSPARSE_SPMM_CSR_ALG1``
* - ``csr_alg2``
- Should provide best performance for CSR format and row-major layout.
- | MKL: none
| cuSPARSE: ``CUSPARSE_SPMM_CSR_ALG2``
* - ``csr_alg3``
- Deterministic algorithm for CSR format.
- | MKL: none
| cuSPARSE: ``CUSPARSE_SPMM_CSR_ALG3``


spmv
^^^^

.. list-table::
:header-rows: 1
:widths: 10 30 45

* - Value
- Description
- Backend equivalent
* - ``default_alg``
- Default algorithm.
- | MKL: none
| cuSPARSE: ``CUSPARSE_SPMV_ALG_DEFAULT``
* - ``no_optimize_alg``
- Default algorithm but may skip some optimizations. Useful only if an
operation with the same configuration is run once.
- | MKL: none
| cuSPARSE: ``CUSPARSE_SPMM_ALG_DEFAULT``
* - ``coo_alg1``
- Default algorithm for COO format.
- | MKL: none
| cuSPARSE: ``CUSPARSE_SPMV_COO_ALG1``
* - ``coo_alg2``
- Deterministic algorithm for COO format.
- | MKL: none
| cuSPARSE: ``CUSPARSE_SPMV_COO_ALG2``
* - ``csr_alg1``
- Default algorithm for CSR format.
- | MKL: none
| cuSPARSE: ``CUSPARSE_SPMV_CSR_ALG1``
* - ``csr_alg2``
- Deterministic algorithm for CSR format.
- | MKL: none
| cuSPARSE: ``CUSPARSE_SPMV_CSR_ALG2``
* - ``csr_alg3``
- LRB variant of the algorithm for CSR format.
- | MKL: none
| cuSPARSE: none


spsv
^^^^

.. list-table::
:header-rows: 1
:widths: 10 30 45

* - Value
- Description
- Backend equivalent
* - ``default_optimize_alg``
gajanan-choudhary marked this conversation as resolved.
Show resolved Hide resolved
- Default algorithm.
- | MKL: none
| cuSPARSE: ``CUSPARSE_SPMM_ALG_DEFAULT``
* - ``no_optimize_alg``
- Default algorithm but may skip some optimizations. Useful only if an
operation with the same configuration is run once.
- | MKL: none
| cuSPARSE: ``CUSPARSE_SPMM_ALG_DEFAULT``
21 changes: 9 additions & 12 deletions examples/sparse_blas/compile_time_dispatching/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,27 +18,24 @@
#===============================================================================

#Build object from all sources
set(SPARSE_BLAS_BACKENDS "")

if(ENABLE_MKLCPU_BACKEND)
list(APPEND SPARSE_BLAS_BACKENDS "mklcpu")
set(SPARSE_CT_SOURCES "")
if(ENABLE_MKLCPU_BACKEND AND ENABLE_CUSPARSE_BACKEND)
gajanan-choudhary marked this conversation as resolved.
Show resolved Hide resolved
list(APPEND SPARSE_CT_SOURCES "sparse_blas_spmv_usm_mklcpu_cusparse")
endif()

include(WarningsUtils)

foreach(backend ${SPARSE_BLAS_BACKENDS})
set(EXAMPLE_NAME example_sparse_blas_spmv_usm_${backend})
add_executable(${EXAMPLE_NAME} sparse_blas_spmv_usm_${backend}.cpp)
target_include_directories(${EXAMPLE_NAME}
foreach(sparse_ct_source ${SPARSE_CT_SOURCES})
add_executable(${sparse_ct_source} ${sparse_ct_source}.cpp)
target_include_directories(${sparse_ct_source}
PUBLIC ${PROJECT_SOURCE_DIR}/examples/include
PUBLIC ${PROJECT_SOURCE_DIR}/include
PUBLIC ${CMAKE_BINARY_DIR}/bin
)

add_dependencies(${EXAMPLE_NAME} onemkl_sparse_blas_${backend})
target_link_libraries(${EXAMPLE_NAME} PRIVATE ONEMKL::SYCL::SYCL onemkl_sparse_blas_${backend})
target_link_libraries(${sparse_ct_source} PRIVATE ONEMKL::SYCL::SYCL onemkl_sparse_blas_mklcpu onemkl_sparse_blas_cusparse)

# Register example as ctest
add_test(NAME sparse_blas/EXAMPLE/CT/sparse_blas_spmv_usm_${backend} COMMAND ${EXAMPLE_NAME})
endforeach(backend)
add_test(NAME sparse_blas/EXAMPLE/CT/${sparse_ct_source} COMMAND ${sparse_ct_source})
endforeach(sparse_ct_source)

Loading