Skip to content

Commit

Permalink
Get R factor from QR factorization (#288)
Browse files Browse the repository at this point in the history
Added a QR function that returns the R factor. Changed QR API to use unique_ptr instead of raw pointers. Added a QR unit test that works in parallel.
  • Loading branch information
dylan-copeland authored May 30, 2024
1 parent 0302faa commit c0949e8
Show file tree
Hide file tree
Showing 10 changed files with 312 additions and 31 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/run_tests/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ runs:
mpirun -n 3 --oversubscribe tests/test_IncrementalSVDBrand
./tests/test_HDFDatabase
mpirun -n 3 --oversubscribe tests/test_HDFDatabase
./tests/test_QR
mpirun -n 3 --oversubscribe tests/test_QR
./tests/test_NNLS
mpirun -n 3 --oversubscribe tests/test_NNLS
shell: bash
Expand Down
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,7 @@ if(GTEST_FOUND)
DMD
GNAT
QDEIM
QR
S_OPT
StaticSVD
RandomizedSVD
Expand Down
9 changes: 3 additions & 6 deletions lib/hyperreduction/S_OPT.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,11 +110,12 @@ S_OPT(const Matrix* f_basis,
}

const Matrix* Vo = NULL;

std::unique_ptr<Matrix> f_basis_truncated_Q;
// Use the QR factorization of the input matrix, if requested
if (qr_factorize)
{
Vo = f_basis_truncated->qr_factorize();
f_basis_truncated_Q = f_basis_truncated->qr_factorize();
Vo = f_basis_truncated_Q.get();
}
else
{
Expand Down Expand Up @@ -520,10 +521,6 @@ S_OPT(const Matrix* f_basis,
MPI_Type_free(&MaxRowType);
MPI_Op_free(&RowInfoOp);

if (qr_factorize)
{
delete Vo;
}
if (num_basis_vectors < f_basis->numColumns())
{
delete f_basis_truncated;
Expand Down
138 changes: 122 additions & 16 deletions lib/linalg/Matrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1121,11 +1121,27 @@ Matrix::calculateNumDistributedRows() {
}
}

Matrix*
Matrix::qr_factorize() const
std::unique_ptr<Matrix> Matrix::qr_factorize() const
{
std::vector<Matrix*> QR;
qr_factorize(false, QR);
return std::unique_ptr<Matrix>(QR[0]);
}

void Matrix::qr_factorize(std::vector<std::unique_ptr<Matrix>> & QR) const
{
std::vector<Matrix*> QRraw; // Raw pointers
qr_factorize(true, QRraw);
QR.clear();
for (int i=0; i<2; ++i)
QR.push_back(std::unique_ptr<Matrix>(QRraw[i]));
}

void Matrix::qr_factorize(bool computeR,
std::vector<Matrix*> & QR) const
{
int myid;
MPI_Comm_rank(MPI_COMM_WORLD, &myid);
const int myid = d_rank;
const int ncols = numColumns();

std::vector<int> row_offset(d_num_procs + 1);
row_offset[d_num_procs] = numDistributedRows();
Expand All @@ -1151,21 +1167,109 @@ Matrix::qr_factorize() const

int blocksize = row_offset[d_num_procs] / d_num_procs;
if (row_offset[d_num_procs] % d_num_procs != 0) blocksize += 1;
initialize_matrix(&slpk, numColumns(), numDistributedRows(),
ncol_blocks, nrow_blocks, numColumns(), blocksize);
initialize_matrix(&slpk, ncols, numDistributedRows(),
ncol_blocks, nrow_blocks, ncols, blocksize);
for (int rank = 0; rank < d_num_procs; ++rank) {
scatter_block(&slpk, 1, row_offset[rank] + 1,
getData(), numColumns(),
getData(), ncols,
row_offset[rank + 1] - row_offset[rank], rank);
}

QRManager QRmgr;
qr_init(&QRmgr, &slpk);
lqfactorize(&QRmgr);

Matrix *R_matrix = nullptr;
if (computeR)
{
// Obtain L from the distributed overwritten matrix QRmgr.A
Matrix* L_dist_matrix = new Matrix(row_offset[myid + 1] - row_offset[myid],
ncols, distributed());

for (int rank = 0; rank < d_num_procs; ++rank) {
gather_block(&L_dist_matrix->item(0, 0), QRmgr.A, 1,
row_offset[rank] + 1, ncols,
row_offset[rank + 1] - row_offset[rank], rank);
}

R_matrix = new Matrix(ncols, ncols, false);
if (myid == 0)
{
const int numLocalRowsR = std::min(ncols, row_offset[myid + 1]);
for (int i=0; i<numLocalRowsR; ++i)
{
for (int j=i; j<ncols; ++j)
(*R_matrix)(i,j) = (*L_dist_matrix)(i,j);

for (int j=0; j<i; ++j)
(*R_matrix)(i,j) = 0.0;
}
}

// Gather any rows of R to root from other ranks.
const int maxLocalRowsR = std::min(ncols, row_offset[myid + 1]);
const int numLocalRowsR = std::max(0, maxLocalRowsR - row_offset[myid]);

std::vector<int> allNumRowsR(d_num_procs);
MPI_Gather(&numLocalRowsR, 1, MPI_INT, allNumRowsR.data(), 1, MPI_INT,
0, MPI_COMM_WORLD);

std::vector<double> localRowsR;

if (myid > 0)
{
localRowsR.resize(numLocalRowsR * ncols);
for (int i=0; i<numLocalRowsR; ++i)
{
const int row = row_offset[myid] + i;
for (int j=row; j<ncols; ++j)
localRowsR[(i * ncols) + j] = (*L_dist_matrix)(i,j);

for (int j=0; j<row; ++j)
localRowsR[(i * ncols) + j] = 0.0;
}
}
else
localRowsR.resize(1); // Just to have a valid pointer from data() call.

delete L_dist_matrix;

std::vector<double> recvRowsR;
if (myid == 0)
recvRowsR.resize((ncols - numLocalRowsR) * ncols);

if (recvRowsR.size() == 0)
recvRowsR.resize(1); // Just to have a valid pointer from data() call.

std::vector<int> disp(d_num_procs);
disp[0] = 0;
allNumRowsR[0] = 0;
for (int i=1; i<d_num_procs; ++i)
{
allNumRowsR[i] *= ncols;
disp[i] = disp[i - 1] + allNumRowsR[i-1];
}

MPI_Gatherv(localRowsR.data(), myid == 0 ? 0 : numLocalRowsR * ncols,
MPI_DOUBLE, recvRowsR.data(),
allNumRowsR.data(), disp.data(), MPI_DOUBLE, 0, MPI_COMM_WORLD);

if (myid == 0)
{
for (int i=numLocalRowsR; i<ncols; ++i)
{
for (int j=0; j<ncols; ++j)
(*R_matrix)(i,j) = recvRowsR[(i - numLocalRowsR) * ncols + j];
}
}

MPI_Bcast(R_matrix->getData(), ncols * ncols, MPI_DOUBLE, 0,
MPI_COMM_WORLD);
}

// Manipulate QRmgr.A to get elementary household reflectors.
for (int i = row_offset[myid]; i < numColumns(); i++) {
for (int j = 0; j < i - row_offset[myid] && j < row_offset[myid + 1]; j++) {
for (int i = row_offset[myid]; i < ncols; i++) {
for (int j = 0; j < i - row_offset[myid] && j < row_offset[myid + 1]; j++) {
QRmgr.A->mdata[j * QRmgr.A->mm + i] = 0;
}
if (i < row_offset[myid + 1]) {
Expand All @@ -1175,14 +1279,14 @@ Matrix::qr_factorize() const

// Obtain Q
lqcompute(&QRmgr);
Matrix* qr_factorized_matrix = new Matrix(row_offset[myid + 1] -
Matrix *qr_factorized_matrix = new Matrix(row_offset[myid + 1] -
row_offset[myid],
numColumns(), distributed());
ncols, distributed());

for (int rank = 0; rank < d_num_procs; ++rank) {
gather_block(&qr_factorized_matrix->item(0, 0), QRmgr.A,
1, row_offset[rank] + 1,
numColumns(), row_offset[rank + 1] - row_offset[rank],
rank);
gather_block(&qr_factorized_matrix->item(0, 0), QRmgr.A, 1,
row_offset[rank] + 1, ncols,
row_offset[rank + 1] - row_offset[rank], rank);
}

free_matrix_data(&slpk);
Expand All @@ -1191,7 +1295,9 @@ Matrix::qr_factorize() const
free(QRmgr.tau);
free(QRmgr.ipiv);

return qr_factorized_matrix;
QR.resize(2);
QR[0] = qr_factorized_matrix;
QR[1] = R_matrix;
}

void
Expand Down
23 changes: 22 additions & 1 deletion lib/linalg/Matrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "Vector.h"
#include <vector>
#include <complex>
#include <memory>
#include <string>

namespace CAROM {
Expand Down Expand Up @@ -917,9 +918,16 @@ class Matrix
*
* @return The Q of the QR factorization of this.
*/
Matrix*
std::unique_ptr<Matrix>
qr_factorize() const;

/**
* @brief Computes and returns the Q and R factors of the QR factorization.
*
* @return The Q and R of the QR factorization of this.
*/
void qr_factorize(std::vector<std::unique_ptr<Matrix>> & QR) const;

/**
* @brief Compute the leading numColumns() column pivots from a
* QR decomposition with column pivots (QRCP) of the transpose
Expand Down Expand Up @@ -1240,6 +1248,19 @@ class Matrix
qrcp_pivots_transpose_distributed_elemental_unbalanced
(int* row_pivot, int* row_pivot_owner, int pivots_requested) const;

/**
* @brief Computes the QR factorization of this. The R factor is computed
* only if requested.
*
* @param[in] computeR If true, the R factor is computed and returned.
* @param[out] QR Vector of raw pointers, with QR[0] pointing to Q and
* QR[1] pointing to R if computeR is true, nullptr otherwise.
*
* @return The Q and R of the QR factorization of this. If computeR is
* false, the R pointer will be nullptr.
*/
void qr_factorize(bool computeR, std::vector<Matrix*> & QR) const;

/**
* @brief The storage for the Matrix's values on this processor.
*/
Expand Down
2 changes: 1 addition & 1 deletion lib/linalg/scalapack_f_wrapper.f90
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ subroutine check_init()
! initialized with all of the information required to proceed with a
! computation.
! - m, n: Number of rows and columns of the global matrix. For the ROM
! application, the data dimension and the number of shapshot vectors
! application, the data dimension and the number of snapshot vectors,
! respectively.
! - nprow, npcol: Number of rows and columns in the processor grid;
! MPI_Comm_size must give a number _greater than or equal to_ nprow*npcol.
Expand Down
3 changes: 1 addition & 2 deletions lib/linalg/svd/RandomizedSVD.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ RandomizedSVD::computeSVD()
int rand_proj_rows = rand_proj->numRows();
delete rand_mat;

Matrix* Q = rand_proj->qr_factorize();
std::unique_ptr<Matrix> Q = rand_proj->qr_factorize();

// Project d_samples onto Q
Matrix* svd_input_mat = Q->transposeMult(snapshot_matrix);
Expand Down Expand Up @@ -197,7 +197,6 @@ RandomizedSVD::computeSVD()
}

d_basis_is_current = true;
delete Q;
release_context(&svd_input);

if (d_debug_algorithm) {
Expand Down
8 changes: 4 additions & 4 deletions unit_tests/test_HDFDatabase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -820,7 +820,7 @@ TEST(BasisGeneratorIO, Scaling_test)
auto stop = steady_clock::now();
auto duration = duration_cast<milliseconds>(stop-start);
if (rank == 0)
printf("Base writeSnapshot- duration: %dms\n", duration);
printf("Base writeSnapshot- duration: %dms\n", duration.count());

MPI_Barrier(MPI_COMM_WORLD);
start = steady_clock::now();
Expand All @@ -829,7 +829,7 @@ TEST(BasisGeneratorIO, Scaling_test)
stop = steady_clock::now();
duration = duration_cast<milliseconds>(stop-start);
if (rank == 0)
printf("MPIO writeSnapshot- duration: %dms\n", duration);
printf("MPIO writeSnapshot- duration: %dms\n", duration.count());

MPI_Barrier(MPI_COMM_WORLD);
start = steady_clock::now();
Expand All @@ -838,7 +838,7 @@ TEST(BasisGeneratorIO, Scaling_test)
stop = steady_clock::now();
duration = duration_cast<milliseconds>(stop-start);
if (rank == 0)
printf("Base endSamples- duration: %dms\n", duration);
printf("Base endSamples- duration: %dms\n", duration.count());

MPI_Barrier(MPI_COMM_WORLD);
start = steady_clock::now();
Expand All @@ -847,7 +847,7 @@ TEST(BasisGeneratorIO, Scaling_test)
stop = steady_clock::now();
duration = duration_cast<milliseconds>(stop-start);
if (rank == 0)
printf("MPIO endSamples- duration: %dms\n", duration);
printf("MPIO endSamples- duration: %dms\n", duration.count());
}

int main(int argc, char* argv[])
Expand Down
Loading

0 comments on commit c0949e8

Please sign in to comment.