Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Get R factor from QR factorization #288

Merged
merged 5 commits into from
May 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/run_tests/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ runs:
mpirun -n 3 --oversubscribe tests/test_IncrementalSVDBrand
./tests/test_HDFDatabase
mpirun -n 3 --oversubscribe tests/test_HDFDatabase
./tests/test_QR
mpirun -n 3 --oversubscribe tests/test_QR
./tests/test_NNLS
mpirun -n 3 --oversubscribe tests/test_NNLS
shell: bash
Expand Down
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,7 @@ if(GTEST_FOUND)
DMD
GNAT
QDEIM
QR
S_OPT
StaticSVD
RandomizedSVD
Expand Down
9 changes: 3 additions & 6 deletions lib/hyperreduction/S_OPT.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,11 +110,12 @@ S_OPT(const Matrix* f_basis,
}

const Matrix* Vo = NULL;

std::unique_ptr<Matrix> f_basis_truncated_Q;
// Use the QR factorization of the input matrix, if requested
if (qr_factorize)
{
Vo = f_basis_truncated->qr_factorize();
f_basis_truncated_Q = f_basis_truncated->qr_factorize();
Vo = f_basis_truncated_Q.get();
}
else
{
Expand Down Expand Up @@ -520,10 +521,6 @@ S_OPT(const Matrix* f_basis,
MPI_Type_free(&MaxRowType);
MPI_Op_free(&RowInfoOp);

if (qr_factorize)
{
delete Vo;
}
if (num_basis_vectors < f_basis->numColumns())
{
delete f_basis_truncated;
Expand Down
138 changes: 122 additions & 16 deletions lib/linalg/Matrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1121,11 +1121,27 @@ Matrix::calculateNumDistributedRows() {
}
}

Matrix*
Matrix::qr_factorize() const
std::unique_ptr<Matrix> Matrix::qr_factorize() const
{
std::vector<Matrix*> QR;
qr_factorize(false, QR);
return std::unique_ptr<Matrix>(QR[0]);
}

void Matrix::qr_factorize(std::vector<std::unique_ptr<Matrix>> & QR) const
{
std::vector<Matrix*> QRraw; // Raw pointers
qr_factorize(true, QRraw);
QR.clear();
for (int i=0; i<2; ++i)
QR.push_back(std::unique_ptr<Matrix>(QRraw[i]));
}

void Matrix::qr_factorize(bool computeR,
std::vector<Matrix*> & QR) const
{
int myid;
MPI_Comm_rank(MPI_COMM_WORLD, &myid);
const int myid = d_rank;
const int ncols = numColumns();

std::vector<int> row_offset(d_num_procs + 1);
row_offset[d_num_procs] = numDistributedRows();
Expand All @@ -1151,21 +1167,109 @@ Matrix::qr_factorize() const

int blocksize = row_offset[d_num_procs] / d_num_procs;
if (row_offset[d_num_procs] % d_num_procs != 0) blocksize += 1;
initialize_matrix(&slpk, numColumns(), numDistributedRows(),
ncol_blocks, nrow_blocks, numColumns(), blocksize);
initialize_matrix(&slpk, ncols, numDistributedRows(),
ncol_blocks, nrow_blocks, ncols, blocksize);
for (int rank = 0; rank < d_num_procs; ++rank) {
scatter_block(&slpk, 1, row_offset[rank] + 1,
getData(), numColumns(),
getData(), ncols,
row_offset[rank + 1] - row_offset[rank], rank);
}

QRManager QRmgr;
qr_init(&QRmgr, &slpk);
lqfactorize(&QRmgr);

Matrix *R_matrix = nullptr;
if (computeR)
{
// Obtain L from the distributed overwritten matrix QRmgr.A
Matrix* L_dist_matrix = new Matrix(row_offset[myid + 1] - row_offset[myid],
ncols, distributed());

for (int rank = 0; rank < d_num_procs; ++rank) {
gather_block(&L_dist_matrix->item(0, 0), QRmgr.A, 1,
row_offset[rank] + 1, ncols,
row_offset[rank + 1] - row_offset[rank], rank);
}

R_matrix = new Matrix(ncols, ncols, false);
if (myid == 0)
{
const int numLocalRowsR = std::min(ncols, row_offset[myid + 1]);
for (int i=0; i<numLocalRowsR; ++i)
{
for (int j=i; j<ncols; ++j)
(*R_matrix)(i,j) = (*L_dist_matrix)(i,j);

for (int j=0; j<i; ++j)
(*R_matrix)(i,j) = 0.0;
}
}

// Gather any rows of R to root from other ranks.
const int maxLocalRowsR = std::min(ncols, row_offset[myid + 1]);
const int numLocalRowsR = std::max(0, maxLocalRowsR - row_offset[myid]);

std::vector<int> allNumRowsR(d_num_procs);
MPI_Gather(&numLocalRowsR, 1, MPI_INT, allNumRowsR.data(), 1, MPI_INT,
0, MPI_COMM_WORLD);

std::vector<double> localRowsR;

if (myid > 0)
{
localRowsR.resize(numLocalRowsR * ncols);
for (int i=0; i<numLocalRowsR; ++i)
{
const int row = row_offset[myid] + i;
for (int j=row; j<ncols; ++j)
localRowsR[(i * ncols) + j] = (*L_dist_matrix)(i,j);

for (int j=0; j<row; ++j)
localRowsR[(i * ncols) + j] = 0.0;
}
}
else
localRowsR.resize(1); // Just to have a valid pointer from data() call.

delete L_dist_matrix;

std::vector<double> recvRowsR;
if (myid == 0)
recvRowsR.resize((ncols - numLocalRowsR) * ncols);

if (recvRowsR.size() == 0)
recvRowsR.resize(1); // Just to have a valid pointer from data() call.

std::vector<int> disp(d_num_procs);
disp[0] = 0;
allNumRowsR[0] = 0;
for (int i=1; i<d_num_procs; ++i)
{
allNumRowsR[i] *= ncols;
disp[i] = disp[i - 1] + allNumRowsR[i-1];
}

MPI_Gatherv(localRowsR.data(), myid == 0 ? 0 : numLocalRowsR * ncols,
MPI_DOUBLE, recvRowsR.data(),
allNumRowsR.data(), disp.data(), MPI_DOUBLE, 0, MPI_COMM_WORLD);

if (myid == 0)
{
for (int i=numLocalRowsR; i<ncols; ++i)
{
for (int j=0; j<ncols; ++j)
(*R_matrix)(i,j) = recvRowsR[(i - numLocalRowsR) * ncols + j];
}
}

MPI_Bcast(R_matrix->getData(), ncols * ncols, MPI_DOUBLE, 0,
MPI_COMM_WORLD);
}

// Manipulate QRmgr.A to get elementary household reflectors.
for (int i = row_offset[myid]; i < numColumns(); i++) {
for (int j = 0; j < i - row_offset[myid] && j < row_offset[myid + 1]; j++) {
for (int i = row_offset[myid]; i < ncols; i++) {
for (int j = 0; j < i - row_offset[myid] && j < row_offset[myid + 1]; j++) {
QRmgr.A->mdata[j * QRmgr.A->mm + i] = 0;
}
if (i < row_offset[myid + 1]) {
Expand All @@ -1175,14 +1279,14 @@ Matrix::qr_factorize() const

// Obtain Q
lqcompute(&QRmgr);
Matrix* qr_factorized_matrix = new Matrix(row_offset[myid + 1] -
Matrix *qr_factorized_matrix = new Matrix(row_offset[myid + 1] -
row_offset[myid],
numColumns(), distributed());
ncols, distributed());

for (int rank = 0; rank < d_num_procs; ++rank) {
gather_block(&qr_factorized_matrix->item(0, 0), QRmgr.A,
1, row_offset[rank] + 1,
numColumns(), row_offset[rank + 1] - row_offset[rank],
rank);
gather_block(&qr_factorized_matrix->item(0, 0), QRmgr.A, 1,
row_offset[rank] + 1, ncols,
row_offset[rank + 1] - row_offset[rank], rank);
}

free_matrix_data(&slpk);
Expand All @@ -1191,7 +1295,9 @@ Matrix::qr_factorize() const
free(QRmgr.tau);
free(QRmgr.ipiv);

return qr_factorized_matrix;
QR.resize(2);
QR[0] = qr_factorized_matrix;
QR[1] = R_matrix;
}

void
Expand Down
23 changes: 22 additions & 1 deletion lib/linalg/Matrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "Vector.h"
#include <vector>
#include <complex>
#include <memory>
#include <string>

namespace CAROM {
Expand Down Expand Up @@ -917,9 +918,16 @@ class Matrix
*
* @return The Q of the QR factorization of this.
*/
Matrix*
std::unique_ptr<Matrix>
qr_factorize() const;

/**
* @brief Computes and returns the Q and R factors of the QR factorization.
*
* @return The Q and R of the QR factorization of this.
*/
void qr_factorize(std::vector<std::unique_ptr<Matrix>> & QR) const;

/**
* @brief Compute the leading numColumns() column pivots from a
* QR decomposition with column pivots (QRCP) of the transpose
Expand Down Expand Up @@ -1240,6 +1248,19 @@ class Matrix
qrcp_pivots_transpose_distributed_elemental_unbalanced
(int* row_pivot, int* row_pivot_owner, int pivots_requested) const;

/**
* @brief Computes the QR factorization of this. The R factor is computed
* only if requested.
*
* @param[in] computeR If true, the R factor is computed and returned.
* @param[out] QR Vector of raw pointers, with QR[0] pointing to Q and
* QR[1] pointing to R if computeR is true, nullptr otherwise.
*
* @return The Q and R of the QR factorization of this. If computeR is
* false, the R pointer will be nullptr.
*/
void qr_factorize(bool computeR, std::vector<Matrix*> & QR) const;

/**
* @brief The storage for the Matrix's values on this processor.
*/
Expand Down
2 changes: 1 addition & 1 deletion lib/linalg/scalapack_f_wrapper.f90
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ subroutine check_init()
! initialized with all of the information required to proceed with a
! computation.
! - m, n: Number of rows and columns of the global matrix. For the ROM
! application, the data dimension and the number of shapshot vectors
! application, the data dimension and the number of snapshot vectors,
! respectively.
! - nprow, npcol: Number of rows and columns in the processor grid;
! MPI_Comm_size must give a number _greater than or equal to_ nprow*npcol.
Expand Down
3 changes: 1 addition & 2 deletions lib/linalg/svd/RandomizedSVD.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ RandomizedSVD::computeSVD()
int rand_proj_rows = rand_proj->numRows();
delete rand_mat;

Matrix* Q = rand_proj->qr_factorize();
std::unique_ptr<Matrix> Q = rand_proj->qr_factorize();

// Project d_samples onto Q
Matrix* svd_input_mat = Q->transposeMult(snapshot_matrix);
Expand Down Expand Up @@ -197,7 +197,6 @@ RandomizedSVD::computeSVD()
}

d_basis_is_current = true;
delete Q;
release_context(&svd_input);

if (d_debug_algorithm) {
Expand Down
8 changes: 4 additions & 4 deletions unit_tests/test_HDFDatabase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -820,7 +820,7 @@ TEST(BasisGeneratorIO, Scaling_test)
auto stop = steady_clock::now();
auto duration = duration_cast<milliseconds>(stop-start);
if (rank == 0)
printf("Base writeSnapshot- duration: %dms\n", duration);
printf("Base writeSnapshot- duration: %dms\n", duration.count());

MPI_Barrier(MPI_COMM_WORLD);
start = steady_clock::now();
Expand All @@ -829,7 +829,7 @@ TEST(BasisGeneratorIO, Scaling_test)
stop = steady_clock::now();
duration = duration_cast<milliseconds>(stop-start);
if (rank == 0)
printf("MPIO writeSnapshot- duration: %dms\n", duration);
printf("MPIO writeSnapshot- duration: %dms\n", duration.count());

MPI_Barrier(MPI_COMM_WORLD);
start = steady_clock::now();
Expand All @@ -838,7 +838,7 @@ TEST(BasisGeneratorIO, Scaling_test)
stop = steady_clock::now();
duration = duration_cast<milliseconds>(stop-start);
if (rank == 0)
printf("Base endSamples- duration: %dms\n", duration);
printf("Base endSamples- duration: %dms\n", duration.count());

MPI_Barrier(MPI_COMM_WORLD);
start = steady_clock::now();
Expand All @@ -847,7 +847,7 @@ TEST(BasisGeneratorIO, Scaling_test)
stop = steady_clock::now();
duration = duration_cast<milliseconds>(stop-start);
if (rank == 0)
printf("MPIO endSamples- duration: %dms\n", duration);
printf("MPIO endSamples- duration: %dms\n", duration.count());
}

int main(int argc, char* argv[])
Expand Down
Loading
Loading