Skip to content

Commit

Permalink
[SPARSE] Update oneMKL backends to match new sparse API (#500)
Browse files Browse the repository at this point in the history
  • Loading branch information
Rbiessy authored Sep 6, 2024
1 parent 1ce98a6 commit c9d0b47
Show file tree
Hide file tree
Showing 66 changed files with 6,453 additions and 2,944 deletions.
40 changes: 40 additions & 0 deletions docs/domains/sparse_linear_algebra.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
.. _onemkl_sparse_linear_algebra:

Sparse Linear Algebra
=====================

See the latest specification for the sparse domain `here
<https://oneapi-spec.uxlfoundation.org/specifications/oneapi/latest/elements/onemkl/source/domains/spblas/spblas>`_.

This page documents implementation specific or backend specific details of the
sparse domain.

OneMKL Intel CPU and GPU backends
---------------------------------

Currently known limitations:

- All operations' algorithms except ``no_optimize_alg`` map to the default
algorithm.
- The required external workspace size is always 0 bytes.
- ``oneapi::mkl::sparse::set_csr_data`` and
``oneapi::mkl::sparse::set_coo_data`` functions cannot be used on a handle
that has already been used for an operation or its optimize function. Doing so
will throw an ``oneapi::mkl::unimplemented`` exception.
- Using ``spsv`` with the ``oneapi::mkl::sparse::spsv_alg::no_optimize_alg`` and
a sparse matrix that does not have the
``oneapi::mkl::sparse::matrix_property::sorted`` property will throw an
``oneapi::mkl::unimplemented`` exception.
- Using ``spmm`` on Intel GPU with a sparse matrix that is
``oneapi::mkl::transpose::conjtrans`` and has the
``oneapi::mkl::sparse::matrix_property::symmetric`` property will throw an
``oneapi::mkl::unimplemented`` exception.
- Using ``spmv`` with a sparse matrix that is
``oneapi::mkl::transpose::conjtrans`` with a ``type_view``
``matrix_descr::symmetric`` or ``matrix_descr::hermitian`` will throw an
``oneapi::mkl::unimplemented`` exception.
- Using ``spsv`` on Intel GPU with a sparse matrix that is
``oneapi::mkl::transpose::conjtrans`` and will throw an
``oneapi::mkl::unimplemented`` exception.
- Scalar parameters ``alpha`` and ``beta`` should be host pointers to prevent
synchronizations and copies to the host.
1 change: 1 addition & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,5 @@ Contents

onemkl-datatypes.rst
domains/dense_linear_algebra.rst
domains/sparse_linear_algebra.rst
create_new_backend.rst
6 changes: 3 additions & 3 deletions examples/sparse_blas/compile_time_dispatching/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ endif()
include(WarningsUtils)

foreach(backend ${SPARSE_BLAS_BACKENDS})
set(EXAMPLE_NAME example_sparse_blas_gemv_usm_${backend})
add_executable(${EXAMPLE_NAME} sparse_blas_gemv_usm_${backend}.cpp)
set(EXAMPLE_NAME example_sparse_blas_spmv_usm_${backend})
add_executable(${EXAMPLE_NAME} sparse_blas_spmv_usm_${backend}.cpp)
target_include_directories(${EXAMPLE_NAME}
PUBLIC ${PROJECT_SOURCE_DIR}/examples/include
PUBLIC ${PROJECT_SOURCE_DIR}/include
Expand All @@ -39,6 +39,6 @@ foreach(backend ${SPARSE_BLAS_BACKENDS})
target_link_libraries(${EXAMPLE_NAME} PRIVATE ONEMKL::SYCL::SYCL onemkl_sparse_blas_${backend})

# Register example as ctest
add_test(NAME sparse_blas/EXAMPLE/CT/sparse_blas_gemv_usm_${backend} COMMAND ${EXAMPLE_NAME})
add_test(NAME sparse_blas/EXAMPLE/CT/sparse_blas_spmv_usm_${backend} COMMAND ${EXAMPLE_NAME})
endforeach(backend)

Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
/*
*
* Content:
* This example demonstrates use of DPCPP API oneapi::mkl::sparse::gemv
* This example demonstrates use of DPCPP API oneapi::mkl::sparse::spmv
* using unified shared memory to perform general sparse matrix-vector
* multiplication on a INTEL CPU SYCL device.
*
Expand All @@ -32,7 +32,7 @@
*
*
* This example demonstrates only single precision (float) data type for
* gemv matrix data
* spmv matrix data
*
*
*******************************************************************************/
Expand Down Expand Up @@ -77,7 +77,7 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &cpu_dev) {
}
catch (sycl::exception const &e) {
std::cout << "Caught asynchronous SYCL "
"exception during sparse::gemv:\n"
"exception during sparse::spmv:\n"
<< e.what() << std::endl;
}
}
Expand Down Expand Up @@ -128,7 +128,10 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &cpu_dev) {
//

oneapi::mkl::transpose transA = oneapi::mkl::transpose::nontrans;
std::cout << "\n\t\tsparse::gemv parameters:\n";
oneapi::mkl::sparse::spmv_alg alg = oneapi::mkl::sparse::spmv_alg::default_alg;
oneapi::mkl::sparse::matrix_view A_view;

std::cout << "\n\t\tsparse::spmv parameters:\n";
std::cout << "\t\t\ttransA = "
<< (transA == oneapi::mkl::transpose::nontrans
? "nontrans"
Expand All @@ -137,23 +140,49 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &cpu_dev) {
std::cout << "\t\t\tnrows = " << nrows << std::endl;
std::cout << "\t\t\talpha = " << alpha << ", beta = " << beta << std::endl;

// create and initialize handle for a Sparse Matrix in CSR format
oneapi::mkl::sparse::matrix_handle_t handle = nullptr;

oneapi::mkl::sparse::init_matrix_handle(cpu_selector, &handle);

auto ev_set = oneapi::mkl::sparse::set_csr_data(cpu_selector, handle, nrows, nrows, nnz,
oneapi::mkl::index_base::zero, ia, ja, a);

auto ev_opt = oneapi::mkl::sparse::optimize_gemv(cpu_selector, transA, handle, { ev_set });

auto ev_gemv =
oneapi::mkl::sparse::gemv(cpu_selector, transA, alpha, handle, x, beta, y, { ev_opt });

auto ev_release =
oneapi::mkl::sparse::release_matrix_handle(cpu_selector, &handle, { ev_gemv });

ev_release.wait_and_throw();
// Create and initialize handle for a Sparse Matrix in CSR format
oneapi::mkl::sparse::matrix_handle_t A_handle = nullptr;
oneapi::mkl::sparse::init_csr_matrix(cpu_selector, &A_handle, nrows, nrows, nnz,
oneapi::mkl::index_base::zero, ia, ja, a);

// Create and initialize dense vector handles
oneapi::mkl::sparse::dense_vector_handle_t x_handle = nullptr;
oneapi::mkl::sparse::dense_vector_handle_t y_handle = nullptr;
oneapi::mkl::sparse::init_dense_vector(cpu_selector, &x_handle, sizevec, x);
oneapi::mkl::sparse::init_dense_vector(cpu_selector, &y_handle, sizevec, y);

// Create operation descriptor
oneapi::mkl::sparse::spmv_descr_t descr = nullptr;
oneapi::mkl::sparse::init_spmv_descr(cpu_selector, &descr);

// Allocate external workspace
std::size_t workspace_size = 0;
oneapi::mkl::sparse::spmv_buffer_size(cpu_selector, transA, &alpha, A_view, A_handle, x_handle,
&beta, y_handle, alg, descr, workspace_size);
void *workspace = sycl::malloc_device(workspace_size, cpu_queue);

// Optimize spmv
auto ev_opt =
oneapi::mkl::sparse::spmv_optimize(cpu_selector, transA, &alpha, A_view, A_handle, x_handle,
&beta, y_handle, alg, descr, workspace);

// Run spmv
auto ev_spmv = oneapi::mkl::sparse::spmv(cpu_selector, transA, &alpha, A_view, A_handle,
x_handle, &beta, y_handle, alg, descr, { ev_opt });

// Release handles and descriptor
std::vector<sycl::event> release_events;
release_events.push_back(
oneapi::mkl::sparse::release_dense_vector(cpu_selector, x_handle, { ev_spmv }));
release_events.push_back(
oneapi::mkl::sparse::release_dense_vector(cpu_selector, y_handle, { ev_spmv }));
release_events.push_back(
oneapi::mkl::sparse::release_sparse_matrix(cpu_selector, A_handle, { ev_spmv }));
release_events.push_back(
oneapi::mkl::sparse::release_spmv_descr(cpu_selector, descr, { ev_spmv }));
for (auto event : release_events) {
event.wait_and_throw();
}

//
// Post Processing
Expand Down Expand Up @@ -181,7 +210,7 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &cpu_dev) {
good &= check_result(res[row], z[row], nrows, row);
}

std::cout << "\n\t\t sparse::gemv example " << (good ? "passed" : "failed") << "\n\tFinished"
std::cout << "\n\t\t sparse::spmv example " << (good ? "passed" : "failed") << "\n\tFinished"
<< std::endl;

free_vec(fp_ptr_vec, cpu_queue);
Expand Down Expand Up @@ -211,7 +240,7 @@ void print_example_banner() {
std::cout << "# and alpha, beta are floating point type precision scalars." << std::endl;
std::cout << "# " << std::endl;
std::cout << "# Using apis:" << std::endl;
std::cout << "# sparse::gemv" << std::endl;
std::cout << "# sparse::spmv" << std::endl;
std::cout << "# " << std::endl;
std::cout << "# Using single precision (float) data type" << std::endl;
std::cout << "# " << std::endl;
Expand All @@ -232,22 +261,22 @@ int main(int /*argc*/, char ** /*argv*/) {
// TODO: Add cuSPARSE compile-time dispatcher in this example once it is supported.
sycl::device cpu_dev(sycl::cpu_selector_v);

std::cout << "Running Sparse BLAS GEMV USM example on CPU device." << std::endl;
std::cout << "Running Sparse BLAS SPMV USM example on CPU device." << std::endl;
std::cout << "Device name is: " << cpu_dev.get_info<sycl::info::device::name>()
<< std::endl;
std::cout << "Running with single precision real data type:" << std::endl;

run_sparse_matrix_vector_multiply_example<float, std::int32_t>(cpu_dev);
std::cout << "Sparse BLAS GEMV USM example ran OK." << std::endl;
std::cout << "Sparse BLAS SPMV USM example ran OK." << std::endl;
}
catch (sycl::exception const &e) {
std::cerr << "Caught synchronous SYCL exception during Sparse GEMV:" << std::endl;
std::cerr << "Caught synchronous SYCL exception during Sparse SPMV:" << std::endl;
std::cerr << "\t" << e.what() << std::endl;
std::cerr << "\tSYCL error code: " << e.code().value() << std::endl;
return 1;
}
catch (std::exception const &e) {
std::cerr << "Caught std::exception during Sparse GEMV:" << std::endl;
std::cerr << "Caught std::exception during Sparse SPMV:" << std::endl;
std::cerr << "\t" << e.what() << std::endl;
return 1;
}
Expand Down
2 changes: 1 addition & 1 deletion examples/sparse_blas/run_time_dispatching/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
include(WarningsUtils)

# Build object from all example sources
set(SPARSE_BLAS_RT_SOURCES "sparse_blas_gemv_usm")
set(SPARSE_BLAS_RT_SOURCES "sparse_blas_spmv_usm")
# Set up for the right backend for run-time dispatching examples
# If users build more than one backend (i.e. mklcpu and mklgpu, or mklcpu and CUDA), they may need to
# overwrite ONEAPI_DEVICE_SELECTOR in their environment to run on the desired backend
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
/*
*
* Content:
* This example demonstrates use of DPCPP API oneapi::mkl::sparse::gemv
* This example demonstrates use of DPCPP API oneapi::mkl::sparse::spmv
* using unified shared memory to perform general sparse matrix-vector
* multiplication on a SYCL device (HOST, CPU, GPU) that is selected
* during runtime.
Expand All @@ -33,7 +33,7 @@
*
*
* This example demonstrates only single precision (float) data type for
* gemv matrix data
* spmv matrix data
*
*
*******************************************************************************/
Expand Down Expand Up @@ -78,7 +78,7 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &dev) {
}
catch (sycl::exception const &e) {
std::cout << "Caught asynchronous SYCL "
"exception during sparse::gemv:\n"
"exception during sparse::spmv:\n"
<< e.what() << std::endl;
}
}
Expand All @@ -93,6 +93,7 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &dev) {
std::size_t sizeja = static_cast<std::size_t>(27 * nrows);
std::size_t sizeia = static_cast<std::size_t>(nrows + 1);
std::size_t sizevec = static_cast<std::size_t>(nrows);
auto sizevec_i64 = static_cast<std::int64_t>(sizevec);

ia = (intType *)sycl::malloc_shared(sizeia * sizeof(intType), main_queue);
ja = (intType *)sycl::malloc_shared(sizeja * sizeof(intType), main_queue);
Expand Down Expand Up @@ -128,7 +129,10 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &dev) {
//

oneapi::mkl::transpose transA = oneapi::mkl::transpose::nontrans;
std::cout << "\n\t\tsparse::gemv parameters:\n";
oneapi::mkl::sparse::spmv_alg alg = oneapi::mkl::sparse::spmv_alg::default_alg;
oneapi::mkl::sparse::matrix_view A_view;

std::cout << "\n\t\tsparse::spmv parameters:\n";
std::cout << "\t\t\ttransA = "
<< (transA == oneapi::mkl::transpose::nontrans
? "nontrans"
Expand All @@ -137,22 +141,49 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &dev) {
std::cout << "\t\t\tnrows = " << nrows << std::endl;
std::cout << "\t\t\talpha = " << alpha << ", beta = " << beta << std::endl;

// create and initialize handle for a Sparse Matrix in CSR format
oneapi::mkl::sparse::matrix_handle_t handle = nullptr;

oneapi::mkl::sparse::init_matrix_handle(main_queue, &handle);

auto ev_set = oneapi::mkl::sparse::set_csr_data(main_queue, handle, nrows, nrows, nnz,
oneapi::mkl::index_base::zero, ia, ja, a);

auto ev_opt = oneapi::mkl::sparse::optimize_gemv(main_queue, transA, handle, { ev_set });

auto ev_gemv =
oneapi::mkl::sparse::gemv(main_queue, transA, alpha, handle, x, beta, y, { ev_opt });

auto ev_release = oneapi::mkl::sparse::release_matrix_handle(main_queue, &handle, { ev_gemv });

ev_release.wait_and_throw();
// Create and initialize handle for a Sparse Matrix in CSR format
oneapi::mkl::sparse::matrix_handle_t A_handle = nullptr;
oneapi::mkl::sparse::init_csr_matrix(main_queue, &A_handle, nrows, nrows, nnz,
oneapi::mkl::index_base::zero, ia, ja, a);

// Create and initialize dense vector handles
oneapi::mkl::sparse::dense_vector_handle_t x_handle = nullptr;
oneapi::mkl::sparse::dense_vector_handle_t y_handle = nullptr;
oneapi::mkl::sparse::init_dense_vector(main_queue, &x_handle, sizevec_i64, x);
oneapi::mkl::sparse::init_dense_vector(main_queue, &y_handle, sizevec_i64, y);

// Create operation descriptor
oneapi::mkl::sparse::spmv_descr_t descr = nullptr;
oneapi::mkl::sparse::init_spmv_descr(main_queue, &descr);

// Allocate external workspace
std::size_t workspace_size = 0;
oneapi::mkl::sparse::spmv_buffer_size(main_queue, transA, &alpha, A_view, A_handle, x_handle,
&beta, y_handle, alg, descr, workspace_size);
void *workspace = sycl::malloc_device(workspace_size, main_queue);

// Optimize spmv
auto ev_opt =
oneapi::mkl::sparse::spmv_optimize(main_queue, transA, &alpha, A_view, A_handle, x_handle,
&beta, y_handle, alg, descr, workspace);

// Run spmv
auto ev_spmv = oneapi::mkl::sparse::spmv(main_queue, transA, &alpha, A_view, A_handle, x_handle,
&beta, y_handle, alg, descr, { ev_opt });

// Release handles and descriptor
std::vector<sycl::event> release_events;
release_events.push_back(
oneapi::mkl::sparse::release_dense_vector(main_queue, x_handle, { ev_spmv }));
release_events.push_back(
oneapi::mkl::sparse::release_dense_vector(main_queue, y_handle, { ev_spmv }));
release_events.push_back(
oneapi::mkl::sparse::release_sparse_matrix(main_queue, A_handle, { ev_spmv }));
release_events.push_back(
oneapi::mkl::sparse::release_spmv_descr(main_queue, descr, { ev_spmv }));
for (auto event : release_events) {
event.wait_and_throw();
}

//
// Post Processing
Expand Down Expand Up @@ -180,7 +211,7 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &dev) {
good &= check_result(res[row], z[row], nrows, row);
}

std::cout << "\n\t\t sparse::gemv example " << (good ? "passed" : "failed") << "\n\tFinished"
std::cout << "\n\t\t sparse::spmv example " << (good ? "passed" : "failed") << "\n\tFinished"
<< std::endl;

free_vec(fp_ptr_vec, main_queue);
Expand Down Expand Up @@ -210,7 +241,7 @@ void print_example_banner() {
std::cout << "# and alpha, beta are floating point type precision scalars." << std::endl;
std::cout << "# " << std::endl;
std::cout << "# Using apis:" << std::endl;
std::cout << "# sparse::gemv" << std::endl;
std::cout << "# sparse::spmv" << std::endl;
std::cout << "# " << std::endl;
std::cout << "# Using single precision (float) data type" << std::endl;
std::cout << "# " << std::endl;
Expand All @@ -234,28 +265,28 @@ int main(int /*argc*/, char ** /*argv*/) {
sycl::device dev = sycl::device();

if (dev.is_gpu()) {
std::cout << "Running Sparse BLAS GEMV USM example on GPU device." << std::endl;
std::cout << "Running Sparse BLAS SPMV USM example on GPU device." << std::endl;
std::cout << "Device name is: " << dev.get_info<sycl::info::device::name>()
<< std::endl;
}
else {
std::cout << "Running Sparse BLAS GEMV USM example on CPU device." << std::endl;
std::cout << "Running Sparse BLAS SPMV USM example on CPU device." << std::endl;
std::cout << "Device name is: " << dev.get_info<sycl::info::device::name>()
<< std::endl;
}
std::cout << "Running with single precision real data type:" << std::endl;

run_sparse_matrix_vector_multiply_example<float, std::int32_t>(dev);
std::cout << "Sparse BLAS GEMV USM example ran OK." << std::endl;
std::cout << "Sparse BLAS SPMV USM example ran OK." << std::endl;
}
catch (sycl::exception const &e) {
std::cerr << "Caught synchronous SYCL exception during Sparse GEMV:" << std::endl;
std::cerr << "Caught synchronous SYCL exception during Sparse SPMV:" << std::endl;
std::cerr << "\t" << e.what() << std::endl;
std::cerr << "\tSYCL error code: " << e.code().value() << std::endl;
return 1;
}
catch (std::exception const &e) {
std::cerr << "Caught std::exception during Sparse GEMV:" << std::endl;
std::cerr << "Caught std::exception during Sparse SPMV:" << std::endl;
std::cerr << "\t" << e.what() << std::endl;
return 1;
}
Expand Down
Loading

0 comments on commit c9d0b47

Please sign in to comment.