From 42b820795506428c1e9a7025cd1ff5e41ac0f0ab Mon Sep 17 00:00:00 2001 From: mvandelle Date: Wed, 20 Nov 2024 17:05:29 -0500 Subject: [PATCH 1/3] Updated SparseLinAlg.hpp and Test_SparseLinAlg.cpp --- .../lightning_qubit/utils/SparseLinAlg.hpp | 89 +++++++++++++++---- .../utils/tests/Test_SparseLinAlg.cpp | 13 +++ 2 files changed, 87 insertions(+), 15 deletions(-) diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/utils/SparseLinAlg.hpp b/pennylane_lightning/core/src/simulators/lightning_qubit/utils/SparseLinAlg.hpp index df8b616303..2e49071ecf 100644 --- a/pennylane_lightning/core/src/simulators/lightning_qubit/utils/SparseLinAlg.hpp +++ b/pennylane_lightning/core/src/simulators/lightning_qubit/utils/SparseLinAlg.hpp @@ -20,10 +20,43 @@ #pragma once #include #include +#include namespace Pennylane::LightningQubit::Util { + +/** + * @brief Worker function to compute a segment of the matrix-vector multiplication for a sparse matrix. + * + * @tparam fp_precision data float point precision. + * @tparam index_type integer type used as indices of the sparse matrix. + * @param vector_ptr pointer to the vector. + * @param row_map_ptr Pointer to the row_map array. Elements of this array + * return the number of non-zero terms in all rows before it. + * @param entries_ptr pointer to the column indices of the non-zero elements. + * @param values_ptr non-zero elements. + * @param result Reference to the output vector where results are stored. + * @param start Index of the first row to process. + * @param end Index of the last row (exclusive) to process. + */ +template +void sparse_worker(const std::complex *vector_ptr, + const index_type *row_map_ptr, + const index_type *entries_ptr, + const std::complex *values_ptr, + std::vector> &result, + index_type start, index_type end) { + for (index_type i = start; i < end; i++) { + std::complex temp = 0.0; + // Loop through all non-zero elements in row `i` + for (index_type j = row_map_ptr[i]; j < row_map_ptr[i + 1]; j++) { + temp += values_ptr[j] * vector_ptr[entries_ptr[j]]; + } + result[i] = temp; // Store the computed value in the result vector + } +} + /** - * @brief Apply a sparse matrix to a vector. + * @brief Applies a sparse matrix to a vector using multi-threading. * * @tparam fp_precision data float point precision. * @tparam index_type integer type used as indices of the sparse matrix. @@ -40,21 +73,47 @@ namespace Pennylane::LightningQubit::Util { template std::vector> apply_Sparse_Matrix(const std::complex *vector_ptr, - const index_type vector_size, const index_type *row_map_ptr, - [[maybe_unused]] const index_type row_map_size, - const index_type *entries_ptr, - const std::complex *values_ptr, - [[maybe_unused]] const index_type numNNZ) { - std::vector> result; - result.resize(vector_size); - std::size_t count = 0; - for (index_type i = 0; i < vector_size; i++) { - result[i] = 0.0; - for (index_type j = 0; j < row_map_ptr[i + 1] - row_map_ptr[i]; j++) { - result[i] += values_ptr[count] * vector_ptr[entries_ptr[count]]; - count++; + const index_type vector_size, + const index_type *row_map_ptr, + [[maybe_unused]] const index_type row_map_size, + const index_type *entries_ptr, + const std::complex *values_ptr, + [[maybe_unused]] const index_type numNNZ, + index_type num_threads = 0) { + // Output vector initialized to zero + std::vector> result(vector_size, std::complex(0.0)); + + // Determine the number of threads to use + if (num_threads <= 0) { + const int max_threads = std::thread::hardware_concurrency(); + num_threads = std::min(vector_size, static_cast(max_threads)); + } + + // Divide the rows approximately evenly among the threads + index_type chunk_size = (vector_size + num_threads - 1) / num_threads; + std::vector threads; + + // Create and launch threads + for (index_type t = 0; t < num_threads; ++t) { + index_type start = t * chunk_size; + index_type end = std::min(start + chunk_size, vector_size); + + // Only launch threads if there are rows to process + if (start < vector_size) { + threads.emplace_back(sparse_worker, + vector_ptr, row_map_ptr, entries_ptr, values_ptr, + std::ref(result), start, end); } } + + // Wait for all threads to complete + for (auto &th : threads) { + if (th.joinable()) { + th.join(); + } + } + return result; }; -} // namespace Pennylane::LightningQubit::Util + +} // namespace Pennylane::LightningQubit::Util \ No newline at end of file diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/utils/tests/Test_SparseLinAlg.cpp b/pennylane_lightning/core/src/simulators/lightning_qubit/utils/tests/Test_SparseLinAlg.cpp index 56c41a0df5..22008e4fd4 100644 --- a/pennylane_lightning/core/src/simulators/lightning_qubit/utils/tests/Test_SparseLinAlg.cpp +++ b/pennylane_lightning/core/src/simulators/lightning_qubit/utils/tests/Test_SparseLinAlg.cpp @@ -81,4 +81,17 @@ TEMPLATE_TEST_CASE("apply_Sparse_Matrix", "[Sparse]", float, double) { REQUIRE(result_refs[vec] == approx(result).margin(1e-6)); }; } + + SECTION("Testing with different number of threads") { + for (size_t num_threads : { 1, 2, 4, 8, 16, 32}) { + for (size_t vec = 0; vec < vectors.size(); vec++) { + auto result = apply_Sparse_Matrix( + vectors[vec].data(), vectors[vec].size(), row_map.data(), + row_map.size(), entries.data(), values.data(), values.size(), + num_threads); + + REQUIRE(result_refs[vec] == approx(result).margin(1e-6)); + } + } + } } \ No newline at end of file From e8af71ebf0b3b3219c7d051c547634208dfe2fa9 Mon Sep 17 00:00:00 2001 From: mvandelle Date: Fri, 22 Nov 2024 15:12:31 -0500 Subject: [PATCH 2/3] Format SparseLinAlg and Test --- .../lightning_qubit/utils/SparseLinAlg.hpp | 37 +++++++++---------- .../utils/tests/Test_SparseLinAlg.cpp | 6 +-- 2 files changed, 21 insertions(+), 22 deletions(-) diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/utils/SparseLinAlg.hpp b/pennylane_lightning/core/src/simulators/lightning_qubit/utils/SparseLinAlg.hpp index 2e49071ecf..8ae59f1f6a 100644 --- a/pennylane_lightning/core/src/simulators/lightning_qubit/utils/SparseLinAlg.hpp +++ b/pennylane_lightning/core/src/simulators/lightning_qubit/utils/SparseLinAlg.hpp @@ -19,13 +19,14 @@ #pragma once #include -#include #include +#include namespace Pennylane::LightningQubit::Util { /** - * @brief Worker function to compute a segment of the matrix-vector multiplication for a sparse matrix. + * @brief Worker function to compute a segment of the matrix-vector + * multiplication for a sparse matrix. * * @tparam fp_precision data float point precision. * @tparam index_type integer type used as indices of the sparse matrix. @@ -40,8 +41,7 @@ namespace Pennylane::LightningQubit::Util { */ template void sparse_worker(const std::complex *vector_ptr, - const index_type *row_map_ptr, - const index_type *entries_ptr, + const index_type *row_map_ptr, const index_type *entries_ptr, const std::complex *values_ptr, std::vector> &result, index_type start, index_type end) { @@ -71,25 +71,24 @@ void sparse_worker(const std::complex *vector_ptr, * @return result result of the matrix vector multiplication. */ template -std::vector> -apply_Sparse_Matrix(const std::complex *vector_ptr, - const index_type vector_size, - const index_type *row_map_ptr, - [[maybe_unused]] const index_type row_map_size, - const index_type *entries_ptr, - const std::complex *values_ptr, - [[maybe_unused]] const index_type numNNZ, - index_type num_threads = 0) { +std::vector> apply_Sparse_Matrix( + const std::complex *vector_ptr, const index_type vector_size, + const index_type *row_map_ptr, + [[maybe_unused]] const index_type row_map_size, + const index_type *entries_ptr, const std::complex *values_ptr, + [[maybe_unused]] const index_type numNNZ, index_type num_threads = 0) { // Output vector initialized to zero - std::vector> result(vector_size, std::complex(0.0)); + std::vector> result( + vector_size, std::complex(0.0)); // Determine the number of threads to use if (num_threads <= 0) { const int max_threads = std::thread::hardware_concurrency(); - num_threads = std::min(vector_size, static_cast(max_threads)); + num_threads = + std::min(vector_size, static_cast(max_threads)); } - - // Divide the rows approximately evenly among the threads + + // Divide the rows approximately evenly among the threads index_type chunk_size = (vector_size + num_threads - 1) / num_threads; std::vector threads; @@ -101,8 +100,8 @@ apply_Sparse_Matrix(const std::complex *vector_ptr, // Only launch threads if there are rows to process if (start < vector_size) { threads.emplace_back(sparse_worker, - vector_ptr, row_map_ptr, entries_ptr, values_ptr, - std::ref(result), start, end); + vector_ptr, row_map_ptr, entries_ptr, + values_ptr, std::ref(result), start, end); } } diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/utils/tests/Test_SparseLinAlg.cpp b/pennylane_lightning/core/src/simulators/lightning_qubit/utils/tests/Test_SparseLinAlg.cpp index 22008e4fd4..76eeb5f458 100644 --- a/pennylane_lightning/core/src/simulators/lightning_qubit/utils/tests/Test_SparseLinAlg.cpp +++ b/pennylane_lightning/core/src/simulators/lightning_qubit/utils/tests/Test_SparseLinAlg.cpp @@ -83,12 +83,12 @@ TEMPLATE_TEST_CASE("apply_Sparse_Matrix", "[Sparse]", float, double) { } SECTION("Testing with different number of threads") { - for (size_t num_threads : { 1, 2, 4, 8, 16, 32}) { + for (size_t num_threads : {1, 2, 4, 8, 16, 32}) { for (size_t vec = 0; vec < vectors.size(); vec++) { auto result = apply_Sparse_Matrix( vectors[vec].data(), vectors[vec].size(), row_map.data(), - row_map.size(), entries.data(), values.data(), values.size(), - num_threads); + row_map.size(), entries.data(), values.data(), + values.size(), num_threads); REQUIRE(result_refs[vec] == approx(result).margin(1e-6)); } From 8e27978d1a8f99dea8df3db2497515064832e5f0 Mon Sep 17 00:00:00 2001 From: ringo-but-quantum Date: Fri, 22 Nov 2024 20:13:58 +0000 Subject: [PATCH 3/3] Auto update version from '0.40.0-dev13' to '0.40.0-dev15' --- pennylane_lightning/core/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pennylane_lightning/core/_version.py b/pennylane_lightning/core/_version.py index fec481f46d..e3474d59e8 100644 --- a/pennylane_lightning/core/_version.py +++ b/pennylane_lightning/core/_version.py @@ -16,4 +16,4 @@ Version number (major.minor.patch[-label]) """ -__version__ = "0.40.0-dev13" +__version__ = "0.40.0-dev15"