From c0949e8f66db5939c93abdd0d150e8ad0809d1da Mon Sep 17 00:00:00 2001 From: Dylan Copeland Date: Thu, 30 May 2024 09:45:01 -0700 Subject: [PATCH] Get R factor from QR factorization (#288) Added a QR function that returns the R factor. Changed QR API to use unique_ptr instead of raw pointers. Added a QR unit test that works in parallel. --- .github/workflows/run_tests/action.yml | 2 + CMakeLists.txt | 1 + lib/hyperreduction/S_OPT.cpp | 9 +- lib/linalg/Matrix.cpp | 138 +++++++++++++++++++--- lib/linalg/Matrix.h | 23 +++- lib/linalg/scalapack_f_wrapper.f90 | 2 +- lib/linalg/svd/RandomizedSVD.cpp | 3 +- unit_tests/test_HDFDatabase.cpp | 8 +- unit_tests/test_QR.cpp | 155 +++++++++++++++++++++++++ unit_tests/test_StaticSVD.cpp | 2 +- 10 files changed, 312 insertions(+), 31 deletions(-) create mode 100644 unit_tests/test_QR.cpp diff --git a/.github/workflows/run_tests/action.yml b/.github/workflows/run_tests/action.yml index 994f45af3..9132805e9 100644 --- a/.github/workflows/run_tests/action.yml +++ b/.github/workflows/run_tests/action.yml @@ -26,6 +26,8 @@ runs: mpirun -n 3 --oversubscribe tests/test_IncrementalSVDBrand ./tests/test_HDFDatabase mpirun -n 3 --oversubscribe tests/test_HDFDatabase + ./tests/test_QR + mpirun -n 3 --oversubscribe tests/test_QR ./tests/test_NNLS mpirun -n 3 --oversubscribe tests/test_NNLS shell: bash diff --git a/CMakeLists.txt b/CMakeLists.txt index af3d38f9d..f4cb815cc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -274,6 +274,7 @@ if(GTEST_FOUND) DMD GNAT QDEIM + QR S_OPT StaticSVD RandomizedSVD diff --git a/lib/hyperreduction/S_OPT.cpp b/lib/hyperreduction/S_OPT.cpp index 81c5b24de..6460bbeae 100644 --- a/lib/hyperreduction/S_OPT.cpp +++ b/lib/hyperreduction/S_OPT.cpp @@ -110,11 +110,12 @@ S_OPT(const Matrix* f_basis, } const Matrix* Vo = NULL; - + std::unique_ptr f_basis_truncated_Q; // Use the QR factorization of the input matrix, if requested if (qr_factorize) { - Vo = f_basis_truncated->qr_factorize(); + f_basis_truncated_Q = f_basis_truncated->qr_factorize(); + Vo = f_basis_truncated_Q.get(); } else { @@ -520,10 +521,6 @@ S_OPT(const Matrix* f_basis, MPI_Type_free(&MaxRowType); MPI_Op_free(&RowInfoOp); - if (qr_factorize) - { - delete Vo; - } if (num_basis_vectors < f_basis->numColumns()) { delete f_basis_truncated; diff --git a/lib/linalg/Matrix.cpp b/lib/linalg/Matrix.cpp index 2313ae515..21dcd55e2 100644 --- a/lib/linalg/Matrix.cpp +++ b/lib/linalg/Matrix.cpp @@ -1121,11 +1121,27 @@ Matrix::calculateNumDistributedRows() { } } -Matrix* -Matrix::qr_factorize() const +std::unique_ptr Matrix::qr_factorize() const +{ + std::vector QR; + qr_factorize(false, QR); + return std::unique_ptr(QR[0]); +} + +void Matrix::qr_factorize(std::vector> & QR) const +{ + std::vector QRraw; // Raw pointers + qr_factorize(true, QRraw); + QR.clear(); + for (int i=0; i<2; ++i) + QR.push_back(std::unique_ptr(QRraw[i])); +} + +void Matrix::qr_factorize(bool computeR, + std::vector & QR) const { - int myid; - MPI_Comm_rank(MPI_COMM_WORLD, &myid); + const int myid = d_rank; + const int ncols = numColumns(); std::vector row_offset(d_num_procs + 1); row_offset[d_num_procs] = numDistributedRows(); @@ -1151,11 +1167,11 @@ Matrix::qr_factorize() const int blocksize = row_offset[d_num_procs] / d_num_procs; if (row_offset[d_num_procs] % d_num_procs != 0) blocksize += 1; - initialize_matrix(&slpk, numColumns(), numDistributedRows(), - ncol_blocks, nrow_blocks, numColumns(), blocksize); + initialize_matrix(&slpk, ncols, numDistributedRows(), + ncol_blocks, nrow_blocks, ncols, blocksize); for (int rank = 0; rank < d_num_procs; ++rank) { scatter_block(&slpk, 1, row_offset[rank] + 1, - getData(), numColumns(), + getData(), ncols, row_offset[rank + 1] - row_offset[rank], rank); } @@ -1163,9 +1179,97 @@ Matrix::qr_factorize() const qr_init(&QRmgr, &slpk); lqfactorize(&QRmgr); + Matrix *R_matrix = nullptr; + if (computeR) + { + // Obtain L from the distributed overwritten matrix QRmgr.A + Matrix* L_dist_matrix = new Matrix(row_offset[myid + 1] - row_offset[myid], + ncols, distributed()); + + for (int rank = 0; rank < d_num_procs; ++rank) { + gather_block(&L_dist_matrix->item(0, 0), QRmgr.A, 1, + row_offset[rank] + 1, ncols, + row_offset[rank + 1] - row_offset[rank], rank); + } + + R_matrix = new Matrix(ncols, ncols, false); + if (myid == 0) + { + const int numLocalRowsR = std::min(ncols, row_offset[myid + 1]); + for (int i=0; i allNumRowsR(d_num_procs); + MPI_Gather(&numLocalRowsR, 1, MPI_INT, allNumRowsR.data(), 1, MPI_INT, + 0, MPI_COMM_WORLD); + + std::vector localRowsR; + + if (myid > 0) + { + localRowsR.resize(numLocalRowsR * ncols); + for (int i=0; i recvRowsR; + if (myid == 0) + recvRowsR.resize((ncols - numLocalRowsR) * ncols); + + if (recvRowsR.size() == 0) + recvRowsR.resize(1); // Just to have a valid pointer from data() call. + + std::vector disp(d_num_procs); + disp[0] = 0; + allNumRowsR[0] = 0; + for (int i=1; igetData(), ncols * ncols, MPI_DOUBLE, 0, + MPI_COMM_WORLD); + } + // Manipulate QRmgr.A to get elementary household reflectors. - for (int i = row_offset[myid]; i < numColumns(); i++) { - for (int j = 0; j < i - row_offset[myid] && j < row_offset[myid + 1]; j++) { + for (int i = row_offset[myid]; i < ncols; i++) { + for (int j = 0; j < i - row_offset[myid] && j < row_offset[myid + 1]; j++) { QRmgr.A->mdata[j * QRmgr.A->mm + i] = 0; } if (i < row_offset[myid + 1]) { @@ -1175,14 +1279,14 @@ Matrix::qr_factorize() const // Obtain Q lqcompute(&QRmgr); - Matrix* qr_factorized_matrix = new Matrix(row_offset[myid + 1] - + Matrix *qr_factorized_matrix = new Matrix(row_offset[myid + 1] - row_offset[myid], - numColumns(), distributed()); + ncols, distributed()); + for (int rank = 0; rank < d_num_procs; ++rank) { - gather_block(&qr_factorized_matrix->item(0, 0), QRmgr.A, - 1, row_offset[rank] + 1, - numColumns(), row_offset[rank + 1] - row_offset[rank], - rank); + gather_block(&qr_factorized_matrix->item(0, 0), QRmgr.A, 1, + row_offset[rank] + 1, ncols, + row_offset[rank + 1] - row_offset[rank], rank); } free_matrix_data(&slpk); @@ -1191,7 +1295,9 @@ Matrix::qr_factorize() const free(QRmgr.tau); free(QRmgr.ipiv); - return qr_factorized_matrix; + QR.resize(2); + QR[0] = qr_factorized_matrix; + QR[1] = R_matrix; } void diff --git a/lib/linalg/Matrix.h b/lib/linalg/Matrix.h index dc9445895..68414b4dc 100644 --- a/lib/linalg/Matrix.h +++ b/lib/linalg/Matrix.h @@ -18,6 +18,7 @@ #include "Vector.h" #include #include +#include #include namespace CAROM { @@ -917,9 +918,16 @@ class Matrix * * @return The Q of the QR factorization of this. */ - Matrix* + std::unique_ptr qr_factorize() const; + /** + * @brief Computes and returns the Q and R factors of the QR factorization. + * + * @return The Q and R of the QR factorization of this. + */ + void qr_factorize(std::vector> & QR) const; + /** * @brief Compute the leading numColumns() column pivots from a * QR decomposition with column pivots (QRCP) of the transpose @@ -1240,6 +1248,19 @@ class Matrix qrcp_pivots_transpose_distributed_elemental_unbalanced (int* row_pivot, int* row_pivot_owner, int pivots_requested) const; + /** + * @brief Computes the QR factorization of this. The R factor is computed + * only if requested. + * + * @param[in] computeR If true, the R factor is computed and returned. + * @param[out] QR Vector of raw pointers, with QR[0] pointing to Q and + * QR[1] pointing to R if computeR is true, nullptr otherwise. + * + * @return The Q and R of the QR factorization of this. If computeR is + * false, the R pointer will be nullptr. + */ + void qr_factorize(bool computeR, std::vector & QR) const; + /** * @brief The storage for the Matrix's values on this processor. */ diff --git a/lib/linalg/scalapack_f_wrapper.f90 b/lib/linalg/scalapack_f_wrapper.f90 index bfed8c5a7..100787186 100644 --- a/lib/linalg/scalapack_f_wrapper.f90 +++ b/lib/linalg/scalapack_f_wrapper.f90 @@ -202,7 +202,7 @@ subroutine check_init() ! initialized with all of the information required to proceed with a ! computation. ! - m, n: Number of rows and columns of the global matrix. For the ROM -! application, the data dimension and the number of shapshot vectors +! application, the data dimension and the number of snapshot vectors, ! respectively. ! - nprow, npcol: Number of rows and columns in the processor grid; ! MPI_Comm_size must give a number _greater than or equal to_ nprow*npcol. diff --git a/lib/linalg/svd/RandomizedSVD.cpp b/lib/linalg/svd/RandomizedSVD.cpp index c5ea51f13..5232e5ed2 100644 --- a/lib/linalg/svd/RandomizedSVD.cpp +++ b/lib/linalg/svd/RandomizedSVD.cpp @@ -104,7 +104,7 @@ RandomizedSVD::computeSVD() int rand_proj_rows = rand_proj->numRows(); delete rand_mat; - Matrix* Q = rand_proj->qr_factorize(); + std::unique_ptr Q = rand_proj->qr_factorize(); // Project d_samples onto Q Matrix* svd_input_mat = Q->transposeMult(snapshot_matrix); @@ -197,7 +197,6 @@ RandomizedSVD::computeSVD() } d_basis_is_current = true; - delete Q; release_context(&svd_input); if (d_debug_algorithm) { diff --git a/unit_tests/test_HDFDatabase.cpp b/unit_tests/test_HDFDatabase.cpp index 68b43beb7..68fc5222b 100644 --- a/unit_tests/test_HDFDatabase.cpp +++ b/unit_tests/test_HDFDatabase.cpp @@ -820,7 +820,7 @@ TEST(BasisGeneratorIO, Scaling_test) auto stop = steady_clock::now(); auto duration = duration_cast(stop-start); if (rank == 0) - printf("Base writeSnapshot- duration: %dms\n", duration); + printf("Base writeSnapshot- duration: %dms\n", duration.count()); MPI_Barrier(MPI_COMM_WORLD); start = steady_clock::now(); @@ -829,7 +829,7 @@ TEST(BasisGeneratorIO, Scaling_test) stop = steady_clock::now(); duration = duration_cast(stop-start); if (rank == 0) - printf("MPIO writeSnapshot- duration: %dms\n", duration); + printf("MPIO writeSnapshot- duration: %dms\n", duration.count()); MPI_Barrier(MPI_COMM_WORLD); start = steady_clock::now(); @@ -838,7 +838,7 @@ TEST(BasisGeneratorIO, Scaling_test) stop = steady_clock::now(); duration = duration_cast(stop-start); if (rank == 0) - printf("Base endSamples- duration: %dms\n", duration); + printf("Base endSamples- duration: %dms\n", duration.count()); MPI_Barrier(MPI_COMM_WORLD); start = steady_clock::now(); @@ -847,7 +847,7 @@ TEST(BasisGeneratorIO, Scaling_test) stop = steady_clock::now(); duration = duration_cast(stop-start); if (rank == 0) - printf("MPIO endSamples- duration: %dms\n", duration); + printf("MPIO endSamples- duration: %dms\n", duration.count()); } int main(int argc, char* argv[]) diff --git a/unit_tests/test_QR.cpp b/unit_tests/test_QR.cpp new file mode 100644 index 000000000..ec52b5d67 --- /dev/null +++ b/unit_tests/test_QR.cpp @@ -0,0 +1,155 @@ +/****************************************************************************** + * + * Copyright (c) 2013-2024, Lawrence Livermore National Security, LLC + * and other libROM project developers. See the top-level COPYRIGHT + * file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + * + *****************************************************************************/ + +#ifdef CAROM_HAS_GTEST + +#include +#include "linalg/scalapack_wrapper.h" +#include "linalg/BasisGenerator.h" +#include +#include +#include +#include // for memcpy +#include +#include +#include "mpi.h" +#include "utils/mpi_utils.h" + +/** + * Simple smoke test to make sure Google Test is properly linked + */ +TEST(GoogleTestFramework, GoogleTestFrameworkFound) { + SUCCEED(); +} + +TEST(QRfactorizeTest, Test_QR) +{ + // Get the rank of this process, and the number of processors. + int mpi_init, rank, num_procs; + MPI_Initialized(&mpi_init); + if (mpi_init == 0) { + MPI_Init(nullptr, nullptr); + } + + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &num_procs); + + constexpr int num_total_rows = 5; + constexpr int num_columns = 3; + int loc_num_rows = CAROM::split_dimension(num_total_rows, MPI_COMM_WORLD); + std::vector row_offset(num_procs + 1); + const int total_rows = CAROM::get_global_offsets(loc_num_rows, row_offset, + MPI_COMM_WORLD); + EXPECT_EQ(total_rows, num_total_rows); + + double* q_data = new double[15] { + 3.08158946098238906153E-01, -9.49897947980619661301E-02, -4.50691774108525788911E-01, + -1.43697905723455976457E-01, 9.53289043424090820622E-01, 8.77767692937209131898E-02, + -2.23655845793717528158E-02, -2.10628953513210204207E-01, 8.42235962392685943989E-01, + -7.29903965154318323805E-01, -1.90917141788945754488E-01, -2.77280930877637610266E-01, + -5.92561353877168350834E-01, -3.74570084880578441089E-02, 5.40928141934190823137E-02 + }; + + double* r_data = new double[9] { + -1.78651649346571794741E-01, 5.44387957786310106023E-01, -8.19588518467042281834E-01, + 0.0, -3.13100149275943651084E-01, -9.50441422536040881122E-04, + 0.0, 0.0, 5.72951792961765460355E-01 + }; + + CAROM::Matrix exactQ(loc_num_rows, num_columns, true); + CAROM::Matrix exactR(r_data, num_columns, num_columns, false); + + for (int i = 0; i < loc_num_rows; i++) { + for (int j = 0; j < num_columns; j++) { + exactQ(i, j) = q_data[((i + row_offset[rank]) * num_columns) + j]; + } + } + + EXPECT_EQ(exactQ.numRows(), loc_num_rows); + EXPECT_EQ(exactQ.numColumns(), num_columns); + + // Verify that the columns of Q are orthonormal. + { + CAROM::Matrix *id = exactQ.transposeMult(exactQ); + + EXPECT_EQ(id->numRows(), num_columns); + EXPECT_EQ(id->numColumns(), num_columns); + + double maxError = 0.0; + for (int i = 0; i < num_columns; i++) { + for (int j = 0; j < num_columns; j++) { + const double delta_ij = i == j ? 1.0 : 0.0; + const double error = std::abs((*id)(i,j) - delta_ij); + maxError = std::max(maxError, error); + } + } + + EXPECT_NEAR(maxError, 0.0, 1.0e-15); + + delete id; + } + + CAROM::Matrix *A = exactQ.mult(exactR); // Compute A = QR + + // Factorize A + std::vector> QRfactors; + A->qr_factorize(QRfactors); + std::unique_ptr & Q = QRfactors[0]; + std::unique_ptr & R = QRfactors[1]; + + delete A; + + EXPECT_EQ(Q->numRows(), loc_num_rows); + EXPECT_EQ(Q->numColumns(), num_columns); + + // Verify that Q == -exactQ and R == -exactR + + double maxError = 0.0; + for (int i = 0; i < loc_num_rows; i++) { + for (int j = 0; j < num_columns; j++) { + const double error = std::abs(exactQ(i, j) + (*Q)(i, j)); + maxError = std::max(maxError, error); + } + } + + EXPECT_NEAR(maxError, 0.0, 1.0e-15); + + EXPECT_EQ(R->numRows(), num_columns); + EXPECT_EQ(R->numColumns(), num_columns); + + for (int i = 0; i < num_columns; i++) { + for (int j = 0; j < num_columns; j++) { + const double error = std::abs(exactR(i, j) + (*R)(i, j)); + maxError = std::max(maxError, error); + } + } + + EXPECT_NEAR(maxError, 0.0, 1.0e-15); +} + +int main(int argc, char* argv[]) +{ + ::testing::InitGoogleTest(&argc, argv); + MPI_Init(&argc, &argv); + int result = RUN_ALL_TESTS(); + MPI_Finalize(); + return result; +} + +#else // #ifndef CAROM_HAS_GTEST + +int main() +{ + std::cout << "libROM was compiled without Google Test support, so unit " + << "tests have been disabled. To enable unit tests, compile " + << "libROM with Google Test support." << std::endl; +} + +#endif // #endif CAROM_HAS_GTEST diff --git a/unit_tests/test_StaticSVD.cpp b/unit_tests/test_StaticSVD.cpp index c1420344b..63a599198 100644 --- a/unit_tests/test_StaticSVD.cpp +++ b/unit_tests/test_StaticSVD.cpp @@ -422,4 +422,4 @@ int main() << "libROM with Google Test support." << std::endl; } -#endif // #endif CAROM_HAS_GTEST \ No newline at end of file +#endif // #endif CAROM_HAS_GTEST