Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPARSE] Add support for rocSPARSE backend #544

Open
wants to merge 3 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ option(ENABLE_PORTFFT_BACKEND "Enable the portFFT DFT backend for the DFT interf

# sparse
option(ENABLE_CUSPARSE_BACKEND "Enable the cuSPARSE backend for the SPARSE_BLAS interface" OFF)
option(ENABLE_ROCSPARSE_BACKEND "Enable the rocSPARSE backend for the SPARSE_BLAS interface" OFF)

set(ONEMKL_SYCL_IMPLEMENTATION "dpc++" CACHE STRING "Name of the SYCL compiler")
set(HIP_TARGETS "" CACHE STRING "Target HIP architectures")
Expand Down Expand Up @@ -106,7 +107,8 @@ if(ENABLE_MKLGPU_BACKEND
endif()
if(ENABLE_MKLCPU_BACKEND
OR ENABLE_MKLGPU_BACKEND
OR ENABLE_CUSPARSE_BACKEND)
OR ENABLE_CUSPARSE_BACKEND
OR ENABLE_ROCSPARSE_BACKEND)
list(APPEND DOMAINS_LIST "sparse_blas")
endif()

Expand Down Expand Up @@ -134,7 +136,7 @@ if(CMAKE_CXX_COMPILER OR NOT ONEMKL_SYCL_IMPLEMENTATION STREQUAL "dpc++")
endif()
else()
if(ENABLE_CUBLAS_BACKEND OR ENABLE_CURAND_BACKEND OR ENABLE_CUSOLVER_BACKEND OR ENABLE_CUFFT_BACKEND OR ENABLE_CUSPARSE_BACKEND
OR ENABLE_ROCBLAS_BACKEND OR ENABLE_ROCRAND_BACKEND OR ENABLE_ROCSOLVER_BACKEND OR ENABLE_ROCFFT_BACKEND)
OR ENABLE_ROCBLAS_BACKEND OR ENABLE_ROCRAND_BACKEND OR ENABLE_ROCSOLVER_BACKEND OR ENABLE_ROCFFT_BACKEND OR ENABLE_ROCSPARSE_BACKEND)
set(CMAKE_CXX_COMPILER "clang++")
elseif(ENABLE_MKLGPU_BACKEND)
if(UNIX)
Expand Down
19 changes: 15 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ oneMKL is part of the [UXL Foundation](http://www.uxlfoundation.org).
</thead>
<tbody>
<tr>
<td rowspan=13 align="center">oneMKL interface</td>
<td rowspan=13 align="center">oneMKL selector</td>
<td rowspan=14 align="center">oneMKL interface</td>
<td rowspan=14 align="center">oneMKL selector</td>
<td align="center"><a href="https://software.intel.com/en-us/oneapi/onemkl">Intel(R) oneAPI Math Kernel Library (oneMKL)</a></td>
<td align="center">x86 CPU, Intel GPU</td>
</tr>
Expand Down Expand Up @@ -61,7 +61,11 @@ oneMKL is part of the [UXL Foundation](http://www.uxlfoundation.org).
<td align="center">AMD GPU</td>
</tr>
<tr>
<td align="center"><a href="https://github.com/ROCmSoftwarePlatform/rocFFT">AMD rocFFT</a></td>
<td align="center"><a href="https://github.com/ROCmSoftwarePlatform/rocFFT"> AMD rocFFT</a></td>
<td align="center">AMD GPU</td>
</tr>
<tr>
<td align="center"><a href="https://github.com/ROCmSoftwarePlatform/rocSPARSE"> AMD rocSPARSE</a></td>
Copy link
Contributor

@gajanan-choudhary gajanan-choudhary Nov 24, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please also add rocSPARSE version info in the table later in this README file in the table in the Product and Version Information section

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for catching that, done in dc22051

<td align="center">AMD GPU</td>
</tr>
<tr>
Expand Down Expand Up @@ -333,7 +337,7 @@ Supported compilers include:
<td align="center">Dynamic, Static</td>
</tr>
<tr>
<td rowspan=3 align="center">SPARSE_BLAS</td>
<td rowspan=4 align="center">SPARSE_BLAS</td>
<td align="center">x86 CPU</td>
<td align="center">Intel(R) oneMKL</td>
<td align="center">Intel DPC++</td>
Expand All @@ -351,6 +355,12 @@ Supported compilers include:
<td align="center">Open DPC++</td>
<td align="center">Dynamic, Static</td>
</tr>
<tr>
<td align="center">AMD GPU</td>
<td align="center">AMD rocSPARSE</td>
<td align="center">Open DPC++</td>
<td align="center">Dynamic, Static</td>
</tr>
</tbody>
</table>

Expand Down Expand Up @@ -537,6 +547,7 @@ Product | Supported Version | License
[AMD rocRAND](https://github.com/ROCm/rocRAND) | 5.1.0 | [AMD License](https://github.com/ROCm/rocRAND/blob/develop/LICENSE.txt)
[AMD rocSOLVER](https://github.com/ROCm/rocSOLVER) | 5.0.0 | [AMD License](https://github.com/ROCm/rocSOLVER/blob/develop/LICENSE.md)
[AMD rocFFT](https://github.com/ROCm/rocFFT) | rocm-5.4.3 | [AMD License](https://github.com/ROCm/rocFFT/blob/rocm-5.4.3/LICENSE.md)
[AMD rocSPARSE](https://github.com/ROCm/rocSPARSE) | 3.1.2 | [AMD License](https://github.com/ROCm/rocSPARSE/blob/develop/LICENSE.md)
[NETLIB LAPACK](https://www.netlib.org/) | [5d4180c](https://github.com/Reference-LAPACK/lapack/commit/5d4180cf8288ae6ad9a771d18793d15bd0c5643c) | [BSD like license](http://www.netlib.org/lapack/LICENSE.txt)
[portBLAS](https://github.com/codeplaysoftware/portBLAS) | 0.1 | [Apache License v2.0](https://github.com/codeplaysoftware/portBLAS/blob/main/LICENSE)
[portFFT](https://github.com/codeplaysoftware/portFFT) | 0.1 | [Apache License v2.0](https://github.com/codeplaysoftware/portFFT/blob/main/LICENSE)
Expand Down
6 changes: 3 additions & 3 deletions cmake/FindCompiler.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ if(is_dpcpp)
list(APPEND UNIX_INTERFACE_LINK_OPTIONS
-fsycl-targets=nvptx64-nvidia-cuda)
elseif(ENABLE_ROCBLAS_BACKEND OR ENABLE_ROCRAND_BACKEND
OR ENABLE_ROCSOLVER_BACKEND)
OR ENABLE_ROCSOLVER_BACKEND OR ENABLE_ROCSPARSE_BACKEND)
list(APPEND UNIX_INTERFACE_COMPILE_OPTIONS
-fsycl-targets=amdgcn-amd-amdhsa -fsycl-unnamed-lambda
-Xsycl-target-backend --offload-arch=${HIP_TARGETS})
Expand All @@ -52,7 +52,7 @@ if(is_dpcpp)
--offload-arch=${HIP_TARGETS})
endif()
if(ENABLE_CURAND_BACKEND OR ENABLE_CUSOLVER_BACKEND OR ENABLE_CUSPARSE_BACKEND OR ENABLE_ROCBLAS_BACKEND
OR ENABLE_ROCRAND_BACKEND OR ENABLE_ROCSOLVER_BACKEND)
OR ENABLE_ROCRAND_BACKEND OR ENABLE_ROCSOLVER_BACKEND OR ENABLE_ROCSPARSE_BACKEND)
set_target_properties(ONEMKL::SYCL::SYCL PROPERTIES
INTERFACE_COMPILE_OPTIONS "${UNIX_INTERFACE_COMPILE_OPTIONS}"
INTERFACE_LINK_OPTIONS "${UNIX_INTERFACE_LINK_OPTIONS}"
Expand All @@ -69,7 +69,7 @@ if(is_dpcpp)
INTERFACE_LINK_LIBRARIES ${SYCL_LIBRARY})
endif()

if(ENABLE_ROCBLAS_BACKEND OR ENABLE_ROCRAND_BACKEND OR ENABLE_ROCSOLVER_BACKEND)
if(ENABLE_ROCBLAS_BACKEND OR ENABLE_ROCRAND_BACKEND OR ENABLE_ROCSOLVER_BACKEND OR ENABLE_ROCSPARSE_BACKEND)
# Allow find_package(HIP) to find the correct path to libclang_rt.builtins.a
# HIP's CMake uses the command `${HIP_CXX_COMPILER} -print-libgcc-file-name --rtlib=compiler-rt` to find this path.
# This can print a non-existing file if the compiler used is icpx.
Expand Down
20 changes: 12 additions & 8 deletions docs/building_the_project_with_dpcpp.rst
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,9 @@ The most important supported build options are:
* - ENABLE_ROCRAND_BACKEND
- True, False
- False
* - ENABLE_ROCSPARSE_BACKEND
- True, False
- False
* - ENABLE_MKLCPU_THREAD_TBB
- True, False
- True
Expand Down Expand Up @@ -198,14 +201,14 @@ Building for ROCm
^^^^^^^^^^^^^^^^^

The ROCm backends can be enabled with ``ENABLE_ROCBLAS_BACKEND``,
``ENABLE_ROCFFT_BACKEND``, ``ENABLE_ROCSOLVER_BACKEND`` and
``ENABLE_ROCRAND_BACKEND``.
``ENABLE_ROCFFT_BACKEND``, ``ENABLE_ROCSOLVER_BACKEND``,
``ENABLE_ROCRAND_BACKEND``, and ``ENABLE_ROCSPARSE_BACKEND``.

For *RocBLAS*, *RocSOLVER* and *RocRAND*, the target device architecture must be
set. This can be set with using the ``HIP_TARGETS`` parameter. For example, to
enable a build for MI200 series GPUs, ``-DHIP_TARGETS=gfx90a`` should be set.
Currently, DPC++ can only build for a single HIP target at a time. This may
change in future versions.
For *RocBLAS*, *RocSOLVER*, *RocRAND*, and *RocSPARSE*, the target device
architecture must be set. This can be set with using the ``HIP_TARGETS``
parameter. For example, to enable a build for MI200 series GPUs,
``-DHIP_TARGETS=gfx90a`` should be set. Currently, DPC++ can only build for a
single HIP target at a time. This may change in future versions.

A few often-used architectures are listed below:

Expand Down Expand Up @@ -394,7 +397,8 @@ disabled:
-DENABLE_MKLGPU_BACKEND=False \
-DENABLE_ROCFFT_BACKEND=True \
-DENABLE_ROCBLAS_BACKEND=True \
-DENABLE_ROCSOLVER_BACKEND=True \
-DENABLE_ROCSOLVER_BACKEND=True \
-DENABLE_ROCSPARSE_BACKEND=True \
-DHIP_TARGETS=gfx90a \
-DBUILD_FUNCTIONAL_TESTS=False

Expand Down
46 changes: 46 additions & 0 deletions docs/domains/sparse_linear_algebra.rst
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,31 @@ Currently known limitations:
``cusparseSpMV_preprocess``. Feel free to create an issue if this is needed.


rocSPARSE backend
----------------

Currently known limitations:

- Using ``spmv`` with a ``type_view`` other than ``matrix_descr::general`` will
throw a ``oneapi::mkl::unimplemented`` exception.
- The COO format requires the indices to be sorted by row then by column. See
the `rocSPARSE COO documentation
<https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#coo-storage-format>`_.
Sparse operations using matrices with the COO format without the property
``matrix_property::sorted`` will throw a ``oneapi::mkl::unimplemented``
exception.
- The CSR format requires the column indices to be sorted within each row. See
the `rocSPARSE CSR documentation
<https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#csr-storage-format>`_.
Sparse operations using matrices with the CSR format without the property
``matrix_property::sorted`` will throw a ``oneapi::mkl::unimplemented``
exception.
- The same sparse matrix handle cannot be reused for multiple operations
``spmm``, ``spmv``, or ``spsv``. Doing so will throw a
``oneapi::mkl::unimplemented`` exception. See `#332
<https://github.com/ROCm/rocSPARSE/issues/332>`_.
Comment on lines +90 to +93
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wow, this is quite severe, but seems to be a legitimate issue on rocSPARSE side right now.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If and when they fix this issue, though, will it be easy for us to make changes (with a version check of course) that correctly performs the operations rather than throwing an unimplemented exception?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it's easy to fix on oneMath side. The issue is also referenced in this comment: https://github.com/oneapi-src/oneMKL/pull/544/files#diff-3b8c1c2c71abd54f8f90f43415c2f17b2a7fdb81c2b882c210f3cba56b4679adR63
One would just need to remove the used member, its 2 usages below as well as the mark_used method.



Operation algorithms mapping
----------------------------

Expand All @@ -89,33 +114,43 @@ spmm
* - ``spmm_alg`` value
- MKLCPU/MKLGPU
- cuSPARSE
- rocSPARSE
* - ``default_alg``
- none
- ``CUSPARSE_SPMM_ALG_DEFAULT``
- ``rocsparse_spmm_alg_default``
* - ``no_optimize_alg``
- none
- ``CUSPARSE_SPMM_ALG_DEFAULT``
- ``rocsparse_spmm_alg_default``
* - ``coo_alg1``
- none
- ``CUSPARSE_SPMM_COO_ALG1``
- ``rocsparse_spmm_alg_coo_segmented``
* - ``coo_alg2``
- none
- ``CUSPARSE_SPMM_COO_ALG2``
- ``rocsparse_spmm_alg_coo_atomic``
* - ``coo_alg3``
- none
- ``CUSPARSE_SPMM_COO_ALG3``
- ``rocsparse_spmm_alg_coo_segmented_atomic``
* - ``coo_alg4``
- none
- ``CUSPARSE_SPMM_COO_ALG4``
- ``rocsparse_spmm_alg_default``
* - ``csr_alg1``
- none
- ``CUSPARSE_SPMM_CSR_ALG1``
- ``rocsparse_spmm_alg_csr``
* - ``csr_alg2``
- none
- ``CUSPARSE_SPMM_CSR_ALG2``
- ``rocsparse_spmm_alg_csr_row_split``
* - ``csr_alg3``
- none
- ``CUSPARSE_SPMM_CSR_ALG3``
- ``rocsparse_spmm_alg_csr_merge``


spmv
Expand All @@ -128,27 +163,35 @@ spmv
* - ``spmv_alg`` value
- MKLCPU/MKLGPU
- cuSPARSE
- rocSPARSE
* - ``default_alg``
- none
- ``CUSPARSE_SPMV_ALG_DEFAULT``
- ``rocsparse_spmv_alg_default``
* - ``no_optimize_alg``
- none
- ``CUSPARSE_SPMV_ALG_DEFAULT``
- ``rocsparse_spmv_alg_default``
* - ``coo_alg1``
- none
- ``CUSPARSE_SPMV_COO_ALG1``
- ``rocsparse_spmv_alg_coo``
* - ``coo_alg2``
- none
- ``CUSPARSE_SPMV_COO_ALG2``
- ``rocsparse_spmv_alg_coo_atomic``
* - ``csr_alg1``
- none
- ``CUSPARSE_SPMV_CSR_ALG1``
- ``rocsparse_spmv_alg_csr_adaptive``
* - ``csr_alg2``
- none
- ``CUSPARSE_SPMV_CSR_ALG2``
- ``rocsparse_spmv_alg_csr_stream``
* - ``csr_alg3``
- none
- ``CUSPARSE_SPMV_ALG_DEFAULT``
- ``rocsparse_spmv_alg_csr_lrb``


spsv
Expand All @@ -161,9 +204,12 @@ spsv
* - ``spsv_alg`` value
- MKLCPU/MKLGPU
- cuSPARSE
- rocSPARSE
* - ``default_alg``
- none
- ``CUSPARSE_SPSV_ALG_DEFAULT``
- ``rocsparse_spsv_alg_default``
* - ``no_optimize_alg``
- none
- ``CUSPARSE_SPSV_ALG_DEFAULT``
- ``rocsparse_spsv_alg_default``
3 changes: 3 additions & 0 deletions examples/sparse_blas/run_time_dispatching/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ endif()
if(ENABLE_CUSPARSE_BACKEND)
list(APPEND DEVICE_FILTERS "cuda:gpu")
endif()
if(ENABLE_ROCSPARSE_BACKEND)
list(APPEND DEVICE_FILTERS "hip:gpu")
endif()

message(STATUS "ONEAPI_DEVICE_SELECTOR will be set to the following value(s): [${DEVICE_FILTERS}] for run-time dispatching examples")

Expand Down
2 changes: 2 additions & 0 deletions include/oneapi/mkl/detail/backends.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ enum class backend {
rocfft,
portfft,
cusparse,
rocsparse,
unsupported
};

Expand All @@ -63,6 +64,7 @@ static backendmap backend_map = { { backend::mklcpu, "mklcpu" },
{ backend::rocfft, "rocfft" },
{ backend::portfft, "portfft" },
{ backend::cusparse, "cusparse" },
{ backend::rocsparse, "rocsparse" },
{ backend::unsupported, "unsupported" } };
// clang-format on

Expand Down
6 changes: 6 additions & 0 deletions include/oneapi/mkl/detail/backends_table.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,12 @@ static std::map<domain, std::map<device, std::vector<const char*>>> libraries =
{
#ifdef ONEMKL_ENABLE_CUSPARSE_BACKEND
LIB_NAME("sparse_blas_cusparse")
#endif
} },
{ device::amdgpu,
{
#ifdef ONEMKL_ENABLE_ROCSPARSE_BACKEND
LIB_NAME("sparse_blas_rocsparse")
#endif
} } } },
};
Expand Down
3 changes: 3 additions & 0 deletions include/oneapi/mkl/sparse_blas.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@
#ifdef ONEMKL_ENABLE_CUSPARSE_BACKEND
#include "sparse_blas/detail/cusparse/sparse_blas_ct.hpp"
#endif
#ifdef ONEMKL_ENABLE_ROCSPARSE_BACKEND
#include "sparse_blas/detail/rocsparse/sparse_blas_ct.hpp"
#endif

#include "sparse_blas/detail/sparse_blas_rt.hpp"

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/***************************************************************************
* Copyright (C) Codeplay Software Limited
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* For your convenience, a copy of the License has been included in this
* repository.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
**************************************************************************/

#ifndef _ONEMKL_SPARSE_BLAS_DETAIL_ROCSPARSE_ONEMKL_SPARSE_BLAS_ROCSPARSE_HPP_
#define _ONEMKL_SPARSE_BLAS_DETAIL_ROCSPARSE_ONEMKL_SPARSE_BLAS_ROCSPARSE_HPP_

#include "oneapi/mkl/detail/export.hpp"
#include "oneapi/mkl/sparse_blas/detail/helper_types.hpp"
#include "oneapi/mkl/sparse_blas/types.hpp"

namespace oneapi::mkl::sparse::rocsparse {

#include "oneapi/mkl/sparse_blas/detail/onemkl_sparse_blas_backends.hxx"

} // namespace oneapi::mkl::sparse::rocsparse

#endif // _ONEMKL_SPARSE_BLAS_DETAIL_ROCSPARSE_ONEMKL_SPARSE_BLAS_ROCSPARSE_HPP_
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/***************************************************************************
* Copyright (C) Codeplay Software Limited
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* For your convenience, a copy of the License has been included in this
* repository.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
**************************************************************************/

#ifndef _ONEMKL_SPARSE_BLAS_DETAIL_ROCSPARSE_SPARSE_BLAS_CT_HPP_
#define _ONEMKL_SPARSE_BLAS_DETAIL_ROCSPARSE_SPARSE_BLAS_CT_HPP_

#include "oneapi/mkl/detail/backends.hpp"
#include "oneapi/mkl/detail/backend_selector.hpp"

#include "onemkl_sparse_blas_rocsparse.hpp"

namespace oneapi {
namespace mkl {
namespace sparse {

#define BACKEND rocsparse
#include "oneapi/mkl/sparse_blas/detail/sparse_blas_ct.hxx"
#undef BACKEND

} //namespace sparse
} //namespace mkl
} //namespace oneapi

#endif // _ONEMKL_SPARSE_BLAS_DETAIL_ROCSPARSE_SPARSE_BLAS_CT_HPP_
Loading