diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 0148a7e1..f3d0c8a7 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -14,6 +14,10 @@ target_link_libraries(klu_klu.exe PRIVATE ReSolve) add_executable(klu_klu_standalone.exe r_KLU_KLU_standalone.cpp) target_link_libraries(klu_klu_standalone.exe PRIVATE ReSolve) +# Build example with a configurable system solver +add_executable(system.exe r_SysSolver.cpp) +target_link_libraries(system.exe PRIVATE ReSolve) + # Create CUDA examples if(RESOLVE_USE_CUDA) @@ -40,6 +44,11 @@ if(RESOLVE_USE_CUDA) #rand solver add_executable(gmres_cusparse_rand.exe r_randGMRES_CUDA.cpp) target_link_libraries(gmres_cusparse_rand.exe PRIVATE ReSolve) + + # Build example with a configurable system solver + add_executable(system_cuda.exe r_SysSolverCuda.cpp) + target_link_libraries(system_cuda.exe PRIVATE ReSolve) + endif(RESOLVE_USE_CUDA) # Create HIP examples @@ -60,10 +69,17 @@ if(RESOLVE_USE_HIP) # Rand GMRES test with rocsparse add_executable(gmres_rocsparse_rand.exe r_randGMRES.cpp) target_link_libraries(gmres_rocsparse_rand.exe PRIVATE ReSolve) + # Build example with a configurable system solver + add_executable(system_hip.exe r_SysSolverHip.cpp) + target_link_libraries(system_hip.exe PRIVATE ReSolve) + + # Build example with a configurable system solver + add_executable(system_hip_fgmres.exe r_SysSolverHipRefine.cpp) + target_link_libraries(system_hip_fgmres.exe PRIVATE ReSolve) endif(RESOLVE_USE_HIP) # Install all examples in bin directory -set(installable_executables klu_klu.exe klu_klu_standalone.exe) +set(installable_executables klu_klu.exe klu_klu_standalone.exe system.exe) if(RESOLVE_USE_CUDA) set(installable_executables ${installable_executables} klu_glu.exe klu_rf.exe klu_rf_fgmres.exe klu_glu_values_update.exe gmres_cusparse_rand.exe) diff --git a/examples/r_KLU_GLU.cpp b/examples/r_KLU_GLU.cpp index 9f271254..a59441d8 100644 --- a/examples/r_KLU_GLU.cpp +++ b/examples/r_KLU_GLU.cpp @@ -115,9 +115,6 @@ int main(int argc, char *argv[]) } std::cout<<"COO to CSR completed. Expanded NNZ: "<< A->getNnzExpanded()<setupParameters(1, 0.1, false); - } int status; if (i < 1) { KLU->setup(A); @@ -133,25 +130,23 @@ int main(int argc, char *argv[]) GLU->setup(A, L, U, P, Q); status = GLU->solve(vec_rhs, vec_x); std::cout<<"GLU solve status: "<solve(vec_rhs, vec_x); - // std::cout<<"KLU solve status: "<refactorize(); std::cout<<"Using CUSOLVER GLU"<refactorize(); std::cout<<"CUSOLVER GLU refactorization status: "<solve(vec_rhs, vec_x); std::cout<<"CUSOLVER GLU solve status: "<update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - + // Estimate solution error + vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); + real_type bnorm = sqrt(vector_handler->dot(vec_r, vec_r, "cuda")); matrix_handler->setValuesChanged(true, "cuda"); matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "cuda"); std::cout << "\t 2-Norm of the residual: " << std::scientific << std::setprecision(16) - << sqrt(vector_handler->dot(vec_r, vec_r, "cuda")) << "\n"; + << sqrt(vector_handler->dot(vec_r, vec_r, "cuda"))/bnorm << "\n"; } // for (int i = 0; i < numSystems; ++i) diff --git a/examples/r_KLU_GLU_matrix_values_update.cpp b/examples/r_KLU_GLU_matrix_values_update.cpp index ded685ac..3dde51eb 100644 --- a/examples/r_KLU_GLU_matrix_values_update.cpp +++ b/examples/r_KLU_GLU_matrix_values_update.cpp @@ -125,9 +125,6 @@ int main(int argc, char *argv[]) } std::cout<<"COO to CSR completed. Expanded NNZ: "<< A->getNnzExpanded()<setupParameters(1, 0.1, false); - } int status; if (i < 1){ KLU->setup(A); diff --git a/examples/r_KLU_KLU.cpp b/examples/r_KLU_KLU.cpp index 901e36a5..bf4b9179 100644 --- a/examples/r_KLU_KLU.cpp +++ b/examples/r_KLU_KLU.cpp @@ -116,9 +116,6 @@ int main(int argc, char *argv[]) } std::cout<<"COO to CSR completed. Expanded NNZ: "<< A->getNnzExpanded()<setupParameters(1, 0.1, false); - } int status; if (i < 2){ KLU->setup(A); diff --git a/examples/r_KLU_KLU_standalone.cpp b/examples/r_KLU_KLU_standalone.cpp index 3dfaf716..3821f36d 100644 --- a/examples/r_KLU_KLU_standalone.cpp +++ b/examples/r_KLU_KLU_standalone.cpp @@ -87,7 +87,6 @@ int main(int argc, char *argv[]) vec_rhs->setDataUpdated(ReSolve::memory::HOST); std::cout << "COO to CSR completed. Expanded NNZ: " << A->getNnzExpanded() << std::endl; //Now call direct solver - KLU->setupParameters(1, 0.1, false); int status; KLU->setup(A); status = KLU->analyze(); diff --git a/examples/r_KLU_rf.cpp b/examples/r_KLU_rf.cpp index b61029c5..2e99c1e7 100644 --- a/examples/r_KLU_rf.cpp +++ b/examples/r_KLU_rf.cpp @@ -115,9 +115,6 @@ int main(int argc, char *argv[] ) } std::cout<<"COO to CSR completed. Expanded NNZ: "<< A->getNnzExpanded()<setupParameters(1, 0.1, false); - } int status; if (i < 2){ KLU->setup(A); diff --git a/examples/r_KLU_rf_FGMRES.cpp b/examples/r_KLU_rf_FGMRES.cpp index 584fcd10..95857596 100644 --- a/examples/r_KLU_rf_FGMRES.cpp +++ b/examples/r_KLU_rf_FGMRES.cpp @@ -119,9 +119,6 @@ int main(int argc, char *argv[]) } std::cout<<"COO to CSR completed. Expanded NNZ: "<< A->getNnzExpanded()<setupParameters(1, 0.1, false); - } int status; real_type norm_b; if (i < 2){ diff --git a/examples/r_KLU_rf_FGMRES_reuse_factorization.cpp b/examples/r_KLU_rf_FGMRES_reuse_factorization.cpp index c4ab285b..04586e53 100644 --- a/examples/r_KLU_rf_FGMRES_reuse_factorization.cpp +++ b/examples/r_KLU_rf_FGMRES_reuse_factorization.cpp @@ -121,9 +121,6 @@ int main(int argc, char *argv[]) } std::cout<<"COO to CSR completed. Expanded NNZ: "<< A->getNnzExpanded()<setupParameters(1, 0.1, false); - } int status; real_type norm_b; if (i < 2){ diff --git a/examples/r_KLU_rocSolverRf_FGMRES.cpp b/examples/r_KLU_rocSolverRf_FGMRES.cpp index 32d1865f..4650b661 100644 --- a/examples/r_KLU_rocSolverRf_FGMRES.cpp +++ b/examples/r_KLU_rocSolverRf_FGMRES.cpp @@ -118,10 +118,6 @@ int main(int argc, char *argv[]) vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); } std::cout<<"COO to CSR completed. Expanded NNZ: "<< A->getNnzExpanded()<setupParameters(1, 0.1, false); - } int status; real_type norm_b; if (i < 2){ @@ -175,17 +171,17 @@ int main(int argc, char *argv[]) << rnrm/norm_b << "\n"; vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - if(!std::isnan(rnrm) && !std::isinf(rnrm)) { - FGMRES->solve(vec_rhs, vec_x); - - std::cout << "FGMRES: init nrm: " - << std::scientific << std::setprecision(16) - << FGMRES->getInitResidualNorm()/norm_b - << " final nrm: " - << FGMRES->getFinalResidualNorm()/norm_b - << " iter: " << FGMRES->getNumIter() << "\n"; - } - } + if(!std::isnan(rnrm) && !std::isinf(rnrm)) { + FGMRES->solve(vec_rhs, vec_x); + + std::cout << "FGMRES: init nrm: " + << std::scientific << std::setprecision(16) + << FGMRES->getInitResidualNorm()/norm_b + << " final nrm: " + << FGMRES->getFinalResidualNorm()/norm_b + << " iter: " << FGMRES->getNumIter() << "\n"; + } + } } // for (int i = 0; i < numSystems; ++i) diff --git a/examples/r_KLU_rocsolverrf.cpp b/examples/r_KLU_rocsolverrf.cpp index 5651ed56..73e39131 100644 --- a/examples/r_KLU_rocsolverrf.cpp +++ b/examples/r_KLU_rocsolverrf.cpp @@ -115,11 +115,8 @@ int main(int argc, char *argv[] ) } std::cout<<"COO to CSR completed. Expanded NNZ: "<< A->getNnzExpanded()<setupParameters(1, 0.1, false); - } int status; - if (i < 2){ + if (i < 2) { KLU->setup(A); status = KLU->analyze(); std::cout<<"KLU analysis status: "<getQOrdering(); vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); Rf->setup(A, L, U, P, Q, vec_rhs); - Rf->refactorize(); } } else { std::cout<<"Using rocsolver rf"<solve(vec_rhs, vec_x); std::cout<<"rocsolver rf solve status: "<update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); + vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); + real_type bnorm = sqrt(vector_handler->dot(vec_r, vec_r, "hip")); matrix_handler->setValuesChanged(true, "hip"); - matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "hip"); std::cout << "\t 2-Norm of the residual: " << std::scientific << std::setprecision(16) - << sqrt(vector_handler->dot(vec_r, vec_r, "hip")) << "\n"; + << sqrt(vector_handler->dot(vec_r, vec_r, "hip"))/bnorm << "\n"; } // for (int i = 0; i < numSystems; ++i) diff --git a/examples/r_KLU_rocsolverrf_redo_factorization.cpp b/examples/r_KLU_rocsolverrf_redo_factorization.cpp index 234a413d..232c8e69 100644 --- a/examples/r_KLU_rocsolverrf_redo_factorization.cpp +++ b/examples/r_KLU_rocsolverrf_redo_factorization.cpp @@ -118,9 +118,6 @@ int main(int argc, char *argv[] ) } std::cout<<"COO to CSR completed. Expanded NNZ: "<< A->getNnzExpanded()<setupParameters(1, 0.1, false); - } int status; if (i < 2){ KLU->setup(A); diff --git a/examples/r_SysSolver.cpp b/examples/r_SysSolver.cpp new file mode 100644 index 00000000..88ca7324 --- /dev/null +++ b/examples/r_SysSolver.cpp @@ -0,0 +1,160 @@ +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace ReSolve::constants; + +int main(int argc, char *argv[]) +{ + // Use the same data types as those you specified in ReSolve build. + using index_type = ReSolve::index_type; + using real_type = ReSolve::real_type; + using vector_type = ReSolve::vector::Vector; + + (void) argc; // TODO: Check if the number of input parameters is correct. + std::string matrixFileName = argv[1]; + std::string rhsFileName = argv[2]; + + index_type numSystems = atoi(argv[3]); + std::cout<<"Family mtx file name: "<< matrixFileName << ", total number of matrices: "<getNumRows(), + A_coo->getNumColumns(), + A_coo->getNnz(), + A_coo->symmetric(), + A_coo->expanded()); + + rhs = ReSolve::io::readRhsFromFile(rhs_file); + x = new real_type[A->getNumRows()]; + vec_rhs = new vector_type(A->getNumRows()); + vec_x = new vector_type(A->getNumRows()); + vec_r = new vector_type(A->getNumRows()); + } else { + ReSolve::io::readAndUpdateMatrix(mat_file, A_coo); + ReSolve::io::readAndUpdateRhs(rhs_file, &rhs); + } + std::cout << "Finished reading the matrix and rhs, size: " << A->getNumRows() + << " x " << A->getNumColumns() + << ", nnz: " << A->getNnz() + << ", symmetric? " << A->symmetric() + << ", Expanded? " << A->expanded() << std::endl; + mat_file.close(); + rhs_file.close(); + + //Now convert to CSR. + if (i < 2) { + matrix_handler->coo2csr(A_coo, A, "cpu"); + vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); + vec_rhs->setDataUpdated(ReSolve::memory::HOST); + } else { + matrix_handler->coo2csr(A_coo, A, "cpu"); + vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); + } + std::cout<<"COO to CSR completed. Expanded NNZ: "<< A->getNnzExpanded()<setMatrix(A); + // if (i == 0) { + // solver->setupParameters(1, 0.1, false); + // } + int status; + if (i < 2){ + // solver->setup(A); + status = solver->analyze(); + std::cout<<"solver analysis status: "<factorize(); + std::cout<<"solver factorization status: "<solve(vec_rhs, vec_x); + std::cout<<"solver solve status: "<refactorize(); + std::cout<<"solver re-factorization status: "<solve(vec_rhs, vec_x); + std::cout<<"solver solve status: "<update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); + + matrix_handler->setValuesChanged(true, "cpu"); + + matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "cpu"); + + std::cout << "\t 2-Norm of the residual: " + << std::scientific << std::setprecision(16) + << sqrt(vector_handler->dot(vec_r, vec_r, "cpu")) << "\n"; + } + + //now DELETE + delete A; + delete solver; + delete [] x; + delete [] rhs; + delete vec_r; + delete vec_x; + delete matrix_handler; + delete vector_handler; + + return 0; +} diff --git a/examples/r_SysSolverCuda.cpp b/examples/r_SysSolverCuda.cpp new file mode 100644 index 00000000..43a2a31d --- /dev/null +++ b/examples/r_SysSolverCuda.cpp @@ -0,0 +1,154 @@ +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace ReSolve::constants; + +int main(int argc, char *argv[]) +{ + // Use the same data types as those you specified in ReSolve build. + using index_type = ReSolve::index_type; + using real_type = ReSolve::real_type; + using vector_type = ReSolve::vector::Vector; + + (void) argc; // TODO: Check if the number of input parameters is correct. + std::string matrixFileName = argv[1]; + std::string rhsFileName = argv[2]; + + index_type numSystems = atoi(argv[3]); + std::cout<<"Family mtx file name: "<< matrixFileName << ", total number of matrices: "<initializeHandles(); + ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace_CUDA); + real_type* rhs = nullptr; + real_type* x = nullptr; + + vector_type* vec_rhs; + vector_type* vec_x; + + ReSolve::SystemSolver* solver = new ReSolve::SystemSolver(workspace_CUDA); + + for (int i = 0; i < numSystems; ++i) + { + index_type j = 4 + i * 2; + fileId = argv[j]; + rhsId = argv[j + 1]; + + matrixFileNameFull = ""; + rhsFileNameFull = ""; + + // Read matrix first + matrixFileNameFull = matrixFileName + fileId + ".mtx"; + rhsFileNameFull = rhsFileName + rhsId + ".mtx"; + std::cout << std::endl << std::endl << std::endl; + std::cout << "========================================================================================================================"<getNumRows(), + A_coo->getNumColumns(), + A_coo->getNnz(), + A_coo->symmetric(), + A_coo->expanded()); + + rhs = ReSolve::io::readRhsFromFile(rhs_file); + x = new real_type[A->getNumRows()]; + vec_rhs = new vector_type(A->getNumRows()); + vec_x = new vector_type(A->getNumRows()); + vec_x->allocate(ReSolve::memory::HOST);//for KLU + vec_x->allocate(ReSolve::memory::DEVICE); + } else { + ReSolve::io::readAndUpdateMatrix(mat_file, A_coo); + ReSolve::io::readAndUpdateRhs(rhs_file, &rhs); + } + std::cout << "Finished reading the matrix and rhs, size: " << A->getNumRows() + << " x " << A->getNumColumns() + << ", nnz: " << A->getNnz() + << ", symmetric? "<< A->symmetric() + << ", Expanded? " << A->expanded() << std::endl; + mat_file.close(); + rhs_file.close(); + + //Now convert to CSR. + if (i < 1) { + matrix_handler->coo2csr(A_coo, A, "cpu"); + vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); + vec_rhs->setDataUpdated(ReSolve::memory::HOST); + } else { + matrix_handler->coo2csr(A_coo, A, "cuda"); + vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); + } + std::cout<<"COO to CSR completed. Expanded NNZ: "<< A->getNnzExpanded()<setMatrix(A); + int status = -1; + if (i < 1) { + status = solver->analyze(); + std::cout << "KLU analysis status: " << status << std::endl; + status = solver->factorize(); + std::cout << "KLU factorization status: " << status << std::endl; + status = solver->refactorizationSetup(); + std::cout << "GLU refactorization setup status: " << status << std::endl; + status = solver->solve(vec_rhs, vec_x); + std::cout << "GLU solve status: " << status << std::endl; + } else { + std::cout << "Using CUSOLVER GLU" << std::endl; + status = solver->refactorize(); + std::cout << "CUSOLVER GLU refactorization status: " << status << std::endl; + status = solver->solve(vec_rhs, vec_x); + std::cout << "CUSOLVER GLU solve status: " << status << std::endl; + } + + std::cout << "\t 2-Norm of the residual: " + << std::scientific << std::setprecision(16) + << solver->getResidualNorm(vec_rhs, vec_x) << "\n"; + + } // for (int i = 0; i < numSystems; ++i) + + //now DELETE + delete A; + delete solver; + delete [] x; + delete [] rhs; + delete vec_rhs; + delete vec_x; + delete workspace_CUDA; + delete matrix_handler; + + return 0; +} diff --git a/examples/r_SysSolverHip.cpp b/examples/r_SysSolverHip.cpp new file mode 100644 index 00000000..dc067a59 --- /dev/null +++ b/examples/r_SysSolverHip.cpp @@ -0,0 +1,156 @@ +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace ReSolve::constants; + +int main(int argc, char *argv[] ) +{ + // Use the same data types as those you specified in ReSolve build. + using index_type = ReSolve::index_type; + using real_type = ReSolve::real_type; + using vector_type = ReSolve::vector::Vector; + + (void) argc; // TODO: Check if the number of input parameters is correct. + std::string matrixFileName = argv[1]; + std::string rhsFileName = argv[2]; + + index_type numSystems = atoi(argv[3]); + std::cout<<"Family mtx file name: "<< matrixFileName << ", total number of matrices: "<initializeHandles(); + ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace_HIP); + real_type* rhs = nullptr; + real_type* x = nullptr; + + vector_type* vec_rhs = nullptr; + vector_type* vec_x = nullptr; + + ReSolve::SystemSolver* solver = new ReSolve::SystemSolver(workspace_HIP); + + for (int i = 0; i < numSystems; ++i) + { + index_type j = 4 + i * 2; + fileId = argv[j]; + rhsId = argv[j + 1]; + + matrixFileNameFull = ""; + rhsFileNameFull = ""; + + // Read matrix first + matrixFileNameFull = matrixFileName + fileId + ".mtx"; + rhsFileNameFull = rhsFileName + rhsId + ".mtx"; + std::cout << std::endl << std::endl << std::endl; + std::cout << "========================================================================================================================"<getNumRows(), + A_coo->getNumColumns(), + A_coo->getNnz(), + A_coo->symmetric(), + A_coo->expanded()); + + rhs = ReSolve::io::readRhsFromFile(rhs_file); + x = new real_type[A->getNumRows()]; + vec_rhs = new vector_type(A->getNumRows()); + vec_x = new vector_type(A->getNumRows()); + } else { + ReSolve::io::readAndUpdateMatrix(mat_file, A_coo); + ReSolve::io::readAndUpdateRhs(rhs_file, &rhs); + } + std::cout << "Finished reading the matrix and rhs, size: " << A->getNumRows() + << " x " << A->getNumColumns() + << ", nnz: " << A->getNnz() + << ", symmetric? "<< A->symmetric() + << ", Expanded? " << A->expanded() << std::endl; + mat_file.close(); + rhs_file.close(); + + //Now convert to CSR. + if (i < 2) { + matrix_handler->coo2csr(A_coo, A, "cpu"); + vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); + vec_rhs->setDataUpdated(ReSolve::memory::HOST); + } else { + matrix_handler->coo2csr(A_coo, A, "hip"); + vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); + } + std::cout<<"COO to CSR completed. Expanded NNZ: "<< A->getNnzExpanded()<setMatrix(A); + int status = -1; + if (i < 2) { + status = solver->analyze(); + std::cout << "KLU analysis status: " << status << std::endl; + status = solver->factorize(); + std::cout << "KLU factorization status: " << status << std::endl; + status = solver->solve(vec_rhs, vec_x); + std::cout << "KLU solve status: " << status << std::endl; + if (i == 1) { + status = solver->refactorizationSetup(); + std::cout << "rocsolver rf refactorization setup status: " << status << std::endl; + } + } else { + std::cout << "Using rocsolver rf" << std::endl; + status = solver->refactorize(); + std::cout << "rocsolver rf refactorization status: " << status << std::endl; + status = solver->solve(vec_rhs, vec_x); + std::cout << "rocsolver rf solve status: " << status << std::endl; + } + + std::cout << "\t 2-Norm of the residual: " + << std::scientific << std::setprecision(16) + << solver->getResidualNorm(vec_rhs, vec_x) << "\n"; + + } // for (int i = 0; i < numSystems; ++i) + + //now DELETE + delete A; + delete A_coo; + delete [] x; + delete [] rhs; + delete vec_rhs; + delete vec_x; + delete workspace_HIP; + delete matrix_handler; + + return 0; +} diff --git a/examples/r_SysSolverHipRefine.cpp b/examples/r_SysSolverHipRefine.cpp new file mode 100644 index 00000000..be46e182 --- /dev/null +++ b/examples/r_SysSolverHipRefine.cpp @@ -0,0 +1,175 @@ +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace ReSolve::constants; + +int main(int argc, char *argv[]) +{ + // Use the same data types as those you specified in ReSolve build. + using index_type = ReSolve::index_type; + using real_type = ReSolve::real_type; + using vector_type = ReSolve::vector::Vector; + + (void) argc; // TODO: Check if the number of input parameters is correct. + std::string matrixFileName = argv[1]; + std::string rhsFileName = argv[2]; + + index_type numSystems = atoi(argv[3]); + std::cout<<"Family mtx file name: "<< matrixFileName << ", total number of matrices: "<initializeHandles(); + ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace_HIP); + real_type* rhs = nullptr; + real_type* x = nullptr; + + vector_type* vec_rhs = nullptr; + vector_type* vec_x = nullptr; + + ReSolve::SystemSolver* solver = new ReSolve::SystemSolver(workspace_HIP, "fgmres"); + + for (int i = 0; i < numSystems; ++i) + { + index_type j = 4 + i * 2; + fileId = argv[j]; + rhsId = argv[j + 1]; + + matrixFileNameFull = ""; + rhsFileNameFull = ""; + + // Read matrix first + matrixFileNameFull = matrixFileName + fileId + ".mtx"; + rhsFileNameFull = rhsFileName + rhsId + ".mtx"; + std::cout << std::endl << std::endl << std::endl; + std::cout << "========================================================================================================================"<getNumRows(), + A_coo->getNumColumns(), + A_coo->getNnz(), + A_coo->symmetric(), + A_coo->expanded()); + + rhs = ReSolve::io::readRhsFromFile(rhs_file); + x = new real_type[A->getNumRows()]; + vec_rhs = new vector_type(A->getNumRows()); + vec_x = new vector_type(A->getNumRows()); + vec_x->allocate(ReSolve::memory::HOST);//for KLU + vec_x->allocate(ReSolve::memory::DEVICE); + } else { + ReSolve::io::readAndUpdateMatrix(mat_file, A_coo); + ReSolve::io::readAndUpdateRhs(rhs_file, &rhs); + } + std::cout << "Finished reading the matrix and rhs, size: " << A->getNumRows() + << " x " << A->getNumColumns() + << ", nnz: " << A->getNnz() + << ", symmetric? "<< A->symmetric() + << ", Expanded? " << A->expanded() << std::endl; + mat_file.close(); + rhs_file.close(); + + //Now convert to CSR. + if (i < 2) { + matrix_handler->coo2csr(A_coo, A, "cpu"); + vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); + vec_rhs->setDataUpdated(ReSolve::memory::HOST); + } else { + matrix_handler->coo2csr(A_coo, A, "hip"); + vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); + } + std::cout<<"COO to CSR completed. Expanded NNZ: "<< A->getNnzExpanded()<setMatrix(A); + int status; + if (i < 2) { + status = solver->analyze(); + std::cout << "KLU analysis status: " << status << std::endl; + status = solver->factorize(); + std::cout << "KLU factorization status: " << status << std::endl; + + status = solver->solve(vec_rhs, vec_x); + std::cout << "KLU solve status: " << status << std::endl; + std::cout << "\t 2-Norm of the residual: " + << std::scientific << std::setprecision(16) + << solver->getResidualNorm(vec_rhs, vec_x) << "\n"; + if (i == 1) { + solver->refactorizationSetup(); + std::cout << "rocsolver rf refactorization setup status: " << status << std::endl; + } + } else { + std::cout << "Using ROCSOLVER RF" << std::endl; + status = solver->refactorize(); + std::cout << "ROCSOLVER RF refactorization status: " << status << std::endl; + status = solver->solve(vec_rhs, vec_x); + std::cout << "ROCSOLVER RF solve status: " << status << std::endl; + real_type rnrm = solver->getResidualNorm(vec_rhs, vec_x); + std::cout << "\t 2-Norm of the residual (before IR): " + << std::scientific << std::setprecision(16) + << rnrm << "\n"; + + vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); + if (!std::isnan(rnrm) && !std::isinf(rnrm)) { + status = solver->refine(vec_rhs, vec_x); + std::cout << "FGMRES solve status: " << status << std::endl; + + std::cout << "FGMRES: init nrm: " + << std::scientific << std::setprecision(16) + << rnrm + << " final nrm: " + << solver->getResidualNorm(vec_rhs, vec_x) + << " iter: " << solver->getIterativeSolver().getNumIter() + << "\n"; + } + } + + } // for (int i = 0; i < numSystems; ++i) + + delete A; + delete A_coo; + delete solver; + delete [] x; + delete [] rhs; + delete vec_rhs; + delete vec_x; + delete workspace_HIP; + delete matrix_handler; + + return 0; +} diff --git a/resolve/CMakeLists.txt b/resolve/CMakeLists.txt index 501b0609..078bb4d6 100644 --- a/resolve/CMakeLists.txt +++ b/resolve/CMakeLists.txt @@ -12,6 +12,7 @@ add_subdirectory(utilities) set(ReSolve_SRC LinSolver.cpp LinSolverDirectKLU.cpp + SystemSolver.cpp ) # Temporary until there is CPU-only option for FGMRES diff --git a/resolve/Common.hpp b/resolve/Common.hpp index 974cb8b8..67730bd6 100644 --- a/resolve/Common.hpp +++ b/resolve/Common.hpp @@ -1,4 +1,5 @@ #pragma once +#include namespace ReSolve { @@ -15,6 +16,7 @@ namespace ReSolve { constexpr real_type ZERO = 0.0; constexpr real_type ONE = 1.0; constexpr real_type MINUSONE = -1.0; + constexpr real_type DEFAULT_TOL = 100*std::numeric_limits::epsilon(); } namespace colors diff --git a/resolve/LinSolver.cpp b/resolve/LinSolver.cpp index 5682ec40..a1eab72c 100644 --- a/resolve/LinSolver.cpp +++ b/resolve/LinSolver.cpp @@ -1,9 +1,13 @@ #include +#include + #include "LinSolver.hpp" namespace ReSolve { + using out = io::Logger; + LinSolver::LinSolver() { } @@ -19,6 +23,10 @@ namespace ReSolve return 1.0; } + // + // Direct solver methods implementations + // + LinSolverDirect::LinSolverDirect() { L_ = nullptr; @@ -43,30 +51,28 @@ namespace ReSolve index_type* /* Q */, vector_type* /* rhs */) { + if (A == nullptr) { + return 1; + } this->A_ = A; return 0; } int LinSolverDirect::analyze() { - return 0; + return 1; } //the same as symbolic factorization int LinSolverDirect::factorize() { factors_extracted_ = false; - return 0; + return 1; } int LinSolverDirect::refactorize() { factors_extracted_ = false; - return 0; - } - - int LinSolverDirect::solve(vector_type* /* rhs */, vector_type* /* x */) - { - return 0; + return 1; } matrix::Sparse* LinSolverDirect::getLFactor() @@ -87,7 +93,32 @@ namespace ReSolve index_type* LinSolverDirect::getQOrdering() { return nullptr; - } + } + + void LinSolverDirect::setPivotThreshold(real_type tol) + { + pivot_threshold_tol_ = tol; + } + + void LinSolverDirect::setOrdering(int ordering) + { + ordering_ = ordering; + } + + void LinSolverDirect::setHaltIfSingular(bool is_halt) + { + halt_if_singular_ = is_halt; + } + + real_type LinSolverDirect::getMatrixConditionNumber() + { + out::error() << "Solver does not implement returning system matrix condition number.\n"; + return -1.0; + } + + // + // Iterative solver methods implementations + // LinSolverIterative::LinSolverIterative() { @@ -99,13 +130,77 @@ namespace ReSolve int LinSolverIterative::setup(matrix::Sparse* A) { + if (A == nullptr) { + return 1; + } this->A_ = A; return 0; } - int LinSolverIterative::solve(vector_type* /* rhs */, vector_type* /* init_guess */) + real_type LinSolverIterative::getFinalResidualNorm() const { - return 0; + return final_residual_norm_; + } + + real_type LinSolverIterative::getInitResidualNorm() const + { + return initial_residual_norm_; + } + + index_type LinSolverIterative::getNumIter() const + { + return total_iters_; + } + + + real_type LinSolverIterative::getTol() + { + return tol_; + } + + index_type LinSolverIterative::getMaxit() + { + return maxit_; + } + + index_type LinSolverIterative::getRestart() + { + return restart_; + } + + index_type LinSolverIterative::getConvCond() + { + return conv_cond_; + } + + bool LinSolverIterative::getFlexible() + { + return flexible_; + } + + void LinSolverIterative::setTol(real_type new_tol) + { + this->tol_ = new_tol; + } + + void LinSolverIterative::setMaxit(index_type new_maxit) + { + this->maxit_ = new_maxit; + } + + void LinSolverIterative::setRestart(index_type new_restart) + { + this->restart_ = new_restart; + } + + void LinSolverIterative::setConvCond(index_type new_conv_cond) + { + this->conv_cond_ = new_conv_cond; + } + + void LinSolverIterative::setFlexible(bool new_flex) + { + this->flexible_ = new_flex; } } diff --git a/resolve/LinSolver.hpp b/resolve/LinSolver.hpp index a34aeba0..f58e8847 100644 --- a/resolve/LinSolver.hpp +++ b/resolve/LinSolver.hpp @@ -47,23 +47,29 @@ namespace ReSolve public: LinSolverDirect(); virtual ~LinSolverDirect(); - //return 0 if successful! - virtual int setup(matrix::Sparse* A, - matrix::Sparse* L, - matrix::Sparse* U, - index_type* P, - index_type* Q, - vector_type* rhs); - + virtual int setup(matrix::Sparse* A = nullptr, + matrix::Sparse* L = nullptr, + matrix::Sparse* U = nullptr, + index_type* P = nullptr, + index_type* Q = nullptr, + vector_type* rhs = nullptr); + virtual int analyze(); //the same as symbolic factorization virtual int factorize(); virtual int refactorize(); - virtual int solve(vector_type* rhs, vector_type* x); + virtual int solve(vector_type* rhs, vector_type* x) = 0; + virtual int solve(vector_type* x) = 0; virtual matrix::Sparse* getLFactor(); virtual matrix::Sparse* getUFactor(); virtual index_type* getPOrdering(); virtual index_type* getQOrdering(); + + virtual void setPivotThreshold(real_type tol); + virtual void setOrdering(int ordering); + virtual void setHaltIfSingular(bool is_halt); + + virtual real_type getMatrixConditionNumber(); protected: matrix::Sparse* L_; @@ -71,15 +77,49 @@ namespace ReSolve index_type* P_; index_type* Q_; bool factors_extracted_; + + int ordering_{1}; // 0 = AMD, 1 = COLAMD, 2 = user provided P, Q + real_type pivot_threshold_tol_{0.1}; + bool halt_if_singular_{false}; }; class LinSolverIterative : public LinSolver { public: LinSolverIterative(); - ~LinSolverIterative(); + virtual ~LinSolverIterative(); virtual int setup(matrix::Sparse* A); + virtual int resetMatrix(matrix::Sparse* A) = 0; + virtual int setupPreconditioner(std::string type, LinSolverDirect* LU_solver) = 0; + + virtual int solve(vector_type* rhs, vector_type* init_guess) = 0; + + virtual real_type getFinalResidualNorm() const; + virtual real_type getInitResidualNorm() const; + virtual index_type getNumIter() const; + + + real_type getTol(); + index_type getMaxit(); + index_type getRestart(); + index_type getConvCond(); + bool getFlexible(); + + void setTol(real_type new_tol); + void setMaxit(index_type new_maxit); + void setRestart(index_type new_restart); + void setConvCond(index_type new_conv_cond); + void setFlexible(bool new_flexible); + + protected: + real_type initial_residual_norm_; + real_type final_residual_norm_; + index_type total_iters_; - virtual int solve(vector_type* rhs, vector_type* init_guess); + real_type tol_{1e-14}; + index_type maxit_{100}; + index_type restart_{10}; + index_type conv_cond_{0}; + bool flexible_{true}; // if can be run as "normal" GMRES if needed, set flexible_ to false. Default is true of course. }; } diff --git a/resolve/LinSolverDirectCuSolverGLU.cpp b/resolve/LinSolverDirectCuSolverGLU.cpp index 65af5812..062f0e69 100644 --- a/resolve/LinSolverDirectCuSolverGLU.cpp +++ b/resolve/LinSolverDirectCuSolverGLU.cpp @@ -4,11 +4,13 @@ #include #include #include +#include #include "LinSolverDirectCuSolverGLU.hpp" namespace ReSolve { using vector_type = vector::Vector; + using out = io::Logger; LinSolverDirectCuSolverGLU::LinSolverDirectCuSolverGLU(LinAlgWorkspaceCUDA* workspace) { @@ -174,7 +176,6 @@ namespace ReSolve int LinSolverDirectCuSolverGLU::solve(vector_type* rhs, vector_type* x) { - status_cusolver_ = cusolverSpDgluSolve(handle_cusolversp_, A_->getNumRows(), /* A is original matrix */ @@ -192,4 +193,10 @@ namespace ReSolve return status_cusolver_; } + int LinSolverDirectCuSolverGLU::solve(vector_type* ) + { + out::error() << "Function solve(Vector* x) not implemented in CuSolverGLU!\n" + << "Consider using solve(Vector* rhs, Vector* x) instead.\n"; + return 1; + } } diff --git a/resolve/LinSolverDirectCuSolverGLU.hpp b/resolve/LinSolverDirectCuSolverGLU.hpp index c6f5416c..274e34fc 100644 --- a/resolve/LinSolverDirectCuSolverGLU.hpp +++ b/resolve/LinSolverDirectCuSolverGLU.hpp @@ -30,6 +30,7 @@ namespace ReSolve int refactorize(); int solve(vector_type* rhs, vector_type* x); + int solve(vector_type* x); int setup(matrix::Sparse* A, matrix::Sparse* L, diff --git a/resolve/LinSolverDirectKLU.cpp b/resolve/LinSolverDirectKLU.cpp index c30d92d4..f8d0670c 100644 --- a/resolve/LinSolverDirectKLU.cpp +++ b/resolve/LinSolverDirectKLU.cpp @@ -1,10 +1,13 @@ #include // includes memcpy #include #include +#include #include "LinSolverDirectKLU.hpp" namespace ReSolve { + using out = io::Logger; + LinSolverDirectKLU::LinSolverDirectKLU() { Symbolic_ = nullptr; @@ -13,7 +16,20 @@ namespace ReSolve L_ = nullptr; U_ = nullptr; - klu_defaults(&Common_) ; + // Populate KLU data structure holding solver parameters + klu_defaults(&Common_); + Common_.btf = 0; + Common_.scale = -1; + Common_.ordering = ordering_; + Common_.tol = pivot_threshold_tol_; + Common_.halt_if_singular = halt_if_singular_; + + out::summary() << "KLU solver set with parameters:\n" + << "\tbtf = " << Common_.btf << "\n" + << "\tscale = " << Common_.scale << "\n" + << "\tordering = " << Common_.ordering << "\n" + << "\tpivot threshold = " << Common_.tol << "\n" + << "\thalt if singular = " << Common_.halt_if_singular << "\n"; } LinSolverDirectKLU::~LinSolverDirectKLU() @@ -33,15 +49,6 @@ namespace ReSolve return 0; } - void LinSolverDirectKLU::setupParameters(int ordering, double KLU_threshold, bool halt_if_singular) - { - Common_.btf = 0; - Common_.ordering = ordering; - Common_.tol = KLU_threshold; - Common_.scale = -1; - Common_.halt_if_singular = halt_if_singular; - } - int LinSolverDirectKLU::analyze() { // in case we called this function AGAIN @@ -135,6 +142,13 @@ namespace ReSolve return 0; } + int LinSolverDirectKLU::solve(vector_type* ) + { + out::error() << "Function solve(Vector* x) not implemented in LinSolverDirectKLU!\n" + << "Consider using solve(Vector* rhs, Vector* x) instead.\n"; + return 1; + } + matrix::Sparse* LinSolverDirectKLU::getLFactor() { if (!factors_extracted_) { @@ -145,6 +159,7 @@ namespace ReSolve U_ = new matrix::Csc(A_->getNumRows(), A_->getNumColumns(), nnzU); L_->allocateMatrixData(memory::HOST); U_->allocateMatrixData(memory::HOST); + int ok = klu_extract(Numeric_, Symbolic_, L_->getColData(memory::HOST), @@ -230,4 +245,28 @@ namespace ReSolve return nullptr; } } + + void LinSolverDirectKLU::setPivotThreshold(real_type tol) + { + pivot_threshold_tol_ = tol; + Common_.tol = tol; + } + + void LinSolverDirectKLU::setOrdering(int ordering) + { + ordering_ = ordering; + Common_.ordering = ordering; + } + + void LinSolverDirectKLU::setHaltIfSingular(bool isHalt) + { + halt_if_singular_ = isHalt; + Common_.halt_if_singular = isHalt; + } + + real_type LinSolverDirectKLU::getMatrixConditionNumber() + { + klu_rcond(Symbolic_, Numeric_, &Common_); + return Common_.rcond; + } } diff --git a/resolve/LinSolverDirectKLU.hpp b/resolve/LinSolverDirectKLU.hpp index b4edadb1..03cf2dc3 100644 --- a/resolve/LinSolverDirectKLU.hpp +++ b/resolve/LinSolverDirectKLU.hpp @@ -30,23 +30,28 @@ namespace ReSolve matrix::Sparse* U = nullptr, index_type* P = nullptr, index_type* Q = nullptr, - vector_type* rhs = nullptr); + vector_type* rhs = nullptr) override; - void setupParameters(int ordering, double KLU_threshold, bool halt_if_singular); - - int analyze(); //the same as symbolic factorization - int factorize(); - int refactorize(); - int solve(vector_type* rhs, vector_type* x); + int analyze() override; //the same as symbolic factorization + int factorize() override; + int refactorize() override; + int solve(vector_type* rhs, vector_type* x) override; + int solve(vector_type* x) override; - matrix::Sparse* getLFactor(); - matrix::Sparse* getUFactor(); - index_type* getPOrdering(); - index_type* getQOrdering(); + matrix::Sparse* getLFactor() override; + matrix::Sparse* getUFactor() override; + index_type* getPOrdering() override; + index_type* getQOrdering() override; + + virtual void setPivotThreshold(real_type tol) override; + virtual void setOrdering(int ordering) override; + virtual void setHaltIfSingular(bool isHalt) override; + + virtual real_type getMatrixConditionNumber() override; private: klu_common Common_; //settings - klu_symbolic* Symbolic_; - klu_numeric* Numeric_; + klu_symbolic* Symbolic_{nullptr}; + klu_numeric* Numeric_{nullptr}; }; } diff --git a/resolve/LinSolverDirectRocSolverRf.cpp b/resolve/LinSolverDirectRocSolverRf.cpp index 2f49c57c..2a7ac3f8 100644 --- a/resolve/LinSolverDirectRocSolverRf.cpp +++ b/resolve/LinSolverDirectRocSolverRf.cpp @@ -9,7 +9,7 @@ namespace ReSolve { workspace_ = workspace; infoM_ = nullptr; - solve_mode_ = 0; //solve mode - slow mode is default + solve_mode_ = 1; //solve mode - fast mode is default } LinSolverDirectRocSolverRf::~LinSolverDirectRocSolverRf() @@ -74,7 +74,6 @@ namespace ReSolve mem_.deviceSynchronize(); error_sum += status_rocblas_; - // tri solve setup if (solve_mode_ == 1) { // fast mode @@ -207,13 +206,11 @@ namespace ReSolve d_Q_, infoM_); - mem_.deviceSynchronize(); error_sum += status_rocblas_; if (solve_mode_ == 1) { //split M, fill L and U with correct values - printf("solve mode 1, splitting the factors again \n"); status_rocblas_ = rocsolver_dcsrrf_splitlu(workspace_->getRocblasHandle(), A_->getNumRows(), M_->getNnzExpanded(), @@ -274,6 +271,7 @@ namespace ReSolve L_buffer_); error_sum += status_rocsparse_; + //mem_.deviceSynchronize(); rocsparse_dcsrsv_solve(workspace_->getRocsparseHandle(), rocsparse_operation_none, A_->getNumRows(), @@ -290,6 +288,7 @@ namespace ReSolve U_buffer_); error_sum += status_rocsparse_; + //mem_.deviceSynchronize(); permuteVectorQ(A_->getNumRows(), d_Q_,d_aux1_,rhs->getData(ReSolve::memory::DEVICE)); mem_.deviceSynchronize(); } diff --git a/resolve/LinSolverIterativeFGMRES.cpp b/resolve/LinSolverIterativeFGMRES.cpp index c0dcf48d..3585ea78 100644 --- a/resolve/LinSolverIterativeFGMRES.cpp +++ b/resolve/LinSolverIterativeFGMRES.cpp @@ -15,11 +15,12 @@ namespace ReSolve memspace_ = memspace; this->matrix_handler_ = nullptr; this->vector_handler_ = nullptr; - tol_ = 1e-14; //default - maxit_= 100; //default - restart_ = 10; - conv_cond_ = 0;//default - flexible_ = 1; + // Defaults: + // tol_ = 1e-14; + // maxit_= 100; + // restart_ = 10; + // conv_cond_ = 0; + // flexible_ = true; d_V_ = nullptr; d_Z_ = nullptr; @@ -35,11 +36,12 @@ namespace ReSolve this->vector_handler_ = vector_handler; this->GS_ = gs; - tol_ = 1e-14; //default - maxit_= 100; //default - restart_ = 10; - conv_cond_ = 0;//default - flexible_ = 1; + // Defaults: + // tol_ = 1e-14; + // maxit_= 100; + // restart_ = 10; + // conv_cond_ = 0; + // flexible_ = true; d_V_ = nullptr; d_Z_ = nullptr; @@ -63,7 +65,7 @@ namespace ReSolve maxit_= maxit; restart_ = restart; conv_cond_ = conv_cond; - flexible_ = 1; + flexible_ = true; d_V_ = nullptr; d_Z_ = nullptr; @@ -163,7 +165,7 @@ namespace ReSolve outer_flag = 0; final_residual_norm_ = rnorm; initial_residual_norm_ = rnorm; - fgmres_iters_ = 0; + total_iters_ = 0; break; } @@ -279,7 +281,7 @@ namespace ReSolve if(!outer_flag) { final_residual_norm_ = rnorm; - fgmres_iters_ = it; + total_iters_ = it; } } // outer while return 0; @@ -297,56 +299,6 @@ namespace ReSolve } - real_type LinSolverIterativeFGMRES::getTol() - { - return tol_; - } - - index_type LinSolverIterativeFGMRES::getMaxit() - { - return maxit_; - } - - index_type LinSolverIterativeFGMRES::getRestart() - { - return restart_; - } - - index_type LinSolverIterativeFGMRES::getConvCond() - { - return conv_cond_; - } - - bool LinSolverIterativeFGMRES::getFlexible() - { - return flexible_; - } - - void LinSolverIterativeFGMRES::setTol(real_type new_tol) - { - this->tol_ = new_tol; - } - - void LinSolverIterativeFGMRES::setMaxit(index_type new_maxit) - { - this->maxit_ = new_maxit; - } - - void LinSolverIterativeFGMRES::setRestart(index_type new_restart) - { - this->restart_ = new_restart; - } - - void LinSolverIterativeFGMRES::setConvCond(index_type new_conv_cond) - { - this->conv_cond_ = new_conv_cond; - } - - void LinSolverIterativeFGMRES::setFlexible(bool new_flex) - { - this->flexible_ = new_flex; - } - int LinSolverIterativeFGMRES::resetMatrix(matrix::Sparse* new_matrix) { A_ = new_matrix; @@ -362,18 +314,4 @@ namespace ReSolve // x->update(rhs->getData(memory::DEVICE), memory::DEVICE, memory::DEVICE); } - real_type LinSolverIterativeFGMRES::getFinalResidualNorm() - { - return final_residual_norm_; - } - - real_type LinSolverIterativeFGMRES::getInitResidualNorm() - { - return initial_residual_norm_; - } - - index_type LinSolverIterativeFGMRES::getNumIter() - { - return fgmres_iters_; - } }//namespace diff --git a/resolve/LinSolverIterativeFGMRES.hpp b/resolve/LinSolverIterativeFGMRES.hpp index c9c140f9..d018e6ec 100644 --- a/resolve/LinSolverIterativeFGMRES.hpp +++ b/resolve/LinSolverIterativeFGMRES.hpp @@ -33,33 +33,13 @@ namespace ReSolve int resetMatrix(matrix::Sparse* new_A); int setupPreconditioner(std::string name, LinSolverDirect* LU_solver); - real_type getTol(); - index_type getMaxit(); - index_type getRestart(); - index_type getConvCond(); - bool getFlexible(); - - void setTol(real_type new_tol); - void setMaxit(index_type new_maxit); - void setRestart(index_type new_restart); - void setConvCond(index_type new_conv_cond); - void setFlexible(bool new_flexible); - - real_type getFinalResidualNorm(); - real_type getInitResidualNorm(); - index_type getNumIter(); private: //remember matrix handler and vector handler are inherited. std::string memspace_; - real_type tol_; - index_type maxit_; - index_type restart_; std::string orth_option_; - index_type conv_cond_; - bool flexible_{true}; // if can be run as "normal" GMRES if needed, set flexible_ to false. Default is true of course. vector_type* d_V_{nullptr}; vector_type* d_Z_{nullptr}; @@ -73,9 +53,6 @@ namespace ReSolve void precV(vector_type* rhs, vector_type* x); //multiply the vector by preconditioner LinSolverDirect* LU_solver_; index_type n_;// for simplicity - real_type final_residual_norm_; - real_type initial_residual_norm_; - index_type fgmres_iters_; MemoryHandler mem_; ///< Device memory manager object }; diff --git a/resolve/LinSolverIterativeRandFGMRES.cpp b/resolve/LinSolverIterativeRandFGMRES.cpp index 40cd08ac..24d3790d 100644 --- a/resolve/LinSolverIterativeRandFGMRES.cpp +++ b/resolve/LinSolverIterativeRandFGMRES.cpp @@ -223,7 +223,7 @@ namespace ReSolve outer_flag = 0; final_residual_norm_ = rnorm; initial_residual_norm_ = rnorm; - fgmres_iters_ = 0; + total_iters_ = 0; break; } @@ -397,7 +397,7 @@ namespace ReSolve << rnorm << "\n"; final_residual_norm_ = rnorm; - fgmres_iters_ = it; + total_iters_ = it; } } // outer while return 0; @@ -485,18 +485,4 @@ namespace ReSolve // x->update(rhs->getData(memory::DEVICE), memory::DEVICE, memory::DEVICE); } - real_type LinSolverIterativeRandFGMRES::getFinalResidualNorm() - { - return final_residual_norm_; - } - - real_type LinSolverIterativeRandFGMRES::getInitResidualNorm() - { - return initial_residual_norm_; - } - - index_type LinSolverIterativeRandFGMRES::getNumIter() - { - return fgmres_iters_; - } } // namespace ReSolve diff --git a/resolve/LinSolverIterativeRandFGMRES.hpp b/resolve/LinSolverIterativeRandFGMRES.hpp index f779f257..d695021a 100644 --- a/resolve/LinSolverIterativeRandFGMRES.hpp +++ b/resolve/LinSolverIterativeRandFGMRES.hpp @@ -57,10 +57,6 @@ namespace ReSolve void setFlexible(bool new_flexible); void getRandSketchingMethod(std::string rand_method); - real_type getFinalResidualNorm(); - real_type getInitResidualNorm(); - index_type getNumIter(); - private: //remember matrix handler and vector handler are inherited. @@ -89,9 +85,6 @@ namespace ReSolve void precV(vector_type* rhs, vector_type* x); //multiply the vector by preconditioner LinSolverDirect* LU_solver_; index_type n_;// for simplicity - real_type final_residual_norm_; - real_type initial_residual_norm_; - index_type fgmres_iters_; real_type one_over_k_{1.0}; index_type k_rand_{0}; // size of sketch space, we need to know it so we can allocate S! diff --git a/resolve/SystemSolver.cpp b/resolve/SystemSolver.cpp index 0e979d2c..59d160bb 100644 --- a/resolve/SystemSolver.cpp +++ b/resolve/SystemSolver.cpp @@ -1,31 +1,450 @@ -namespace +#include +#include + +#include + +#ifdef RESOLVE_USE_CUDA +#include +#include +#include +#include +#include +#endif + +#ifdef RESOLVE_USE_HIP +#include +#include +#include +#include +#endif + +// Handlers +#include +#include + +// Utilities +#include + +#include "SystemSolver.hpp" + + +namespace ReSolve { - SystemSolver::SystemSolver(){ - //set defaults: - factorizationMethod = "klu"; - refactorizationMethod = "glu"; - solveMethod = "glu"; - IRMethod = "none"; - - this->setup(); + // Create a shortcut name for Logger static class + using out = io::Logger; + + SystemSolver::SystemSolver() + { + //set defaults: + memspace_ = "cpu"; + factorizationMethod_ = "klu"; + refactorizationMethod_ = "klu"; + solveMethod_ = "klu"; + irMethod_ = "none"; + + initialize(); + } + +#ifdef RESOLVE_USE_CUDA + SystemSolver::SystemSolver(LinAlgWorkspaceCUDA* workspaceCuda, + std::string factor, + std::string refactor, + std::string solve, + std::string ir) + : workspaceCuda_(workspaceCuda), + factorizationMethod_(factor), + refactorizationMethod_(refactor), + solveMethod_(solve), + irMethod_(ir) + { + // Instantiate handlers + matrixHandler_ = new MatrixHandler(workspaceCuda_); + vectorHandler_ = new VectorHandler(workspaceCuda_); + + memspace_ = "cuda"; + + initialize(); + } +#endif + +#ifdef RESOLVE_USE_HIP + SystemSolver::SystemSolver(LinAlgWorkspaceHIP* workspaceHip, + std::string factor, + std::string refactor, + std::string solve, + std::string ir) + : workspaceHip_(workspaceHip), + factorizationMethod_(factor), + refactorizationMethod_(refactor), + solveMethod_(solve), + irMethod_(ir) + { + // Instantiate handlers + matrixHandler_ = new MatrixHandler(workspaceHip_); + vectorHandler_ = new VectorHandler(workspaceHip_); + + memspace_ = "hip"; + + initialize(); + } +#endif + + SystemSolver::~SystemSolver() + { + delete resVector_; + delete factorizationSolver_; + delete refactorizationSolver_; + if (irMethod_ != "none") { + delete iterativeSolver_; + delete gs_; + } + + delete matrixHandler_; + delete vectorHandler_; + } + + int SystemSolver::setMatrix(matrix::Sparse* A) + { + A_ = A; + resVector_ = new vector_type(A->getNumRows()); + if (memspace_ == "cpu") { + resVector_->allocate(memory::HOST); + } else { + resVector_->allocate(memory::DEVICE); + } + return 0; + } + + /** + * @brief Sets up the system solver + * + * This method instantiates components of the system solver based on + * user inputs. + */ + int SystemSolver::initialize() + { + // First delete old objects + if (factorizationSolver_) + delete factorizationSolver_; + if (refactorizationSolver_) + delete refactorizationSolver_; + + // Create factorization solver + if (factorizationMethod_ == "klu") { + factorizationSolver_ = new ReSolve::LinSolverDirectKLU(); + } else { + out::error() << "Unrecognized factorization " << factorizationMethod_ << "\n"; + return 1; + } + + // Create refactorization solver + if (refactorizationMethod_ == "klu") { + // do nothing for now +#ifdef RESOLVE_USE_CUDA + } else if (refactorizationMethod_ == "glu") { + refactorizationSolver_ = new ReSolve::LinSolverDirectCuSolverGLU(workspaceCuda_); + } else if (refactorizationMethod_ == "cusolverrf") { + refactorizationSolver_ = new ReSolve::LinSolverDirectCuSolverRf(); +#endif +#ifdef RESOLVE_USE_HIP + } else if (refactorizationMethod_ == "rocsolverrf") { + refactorizationSolver_ = new ReSolve::LinSolverDirectRocSolverRf(workspaceHip_); +#endif + } else { + out::error() << "Refactorization method " << refactorizationMethod_ + << " not recognized ...\n"; + return 1; + } + +#if defined(RESOLVE_USE_HIP) || defined(RESOLVE_USE_CUDA) + if (irMethod_ == "fgmres") { + // GramSchmidt::GSVariant variant; + if (gsMethod_ == "cgs2") { + gs_ = new GramSchmidt(vectorHandler_, GramSchmidt::cgs2); + } else if (gsMethod_ == "mgs") { + gs_ = new GramSchmidt(vectorHandler_, GramSchmidt::mgs); + } else if (gsMethod_ == "mgs_two_synch") { + gs_ = new GramSchmidt(vectorHandler_, GramSchmidt::mgs_two_synch); + } else if (gsMethod_ == "mgs_pm") { + gs_ = new GramSchmidt(vectorHandler_, GramSchmidt::mgs_pm); + } else if (gsMethod_ == "cgs1") { + gs_ = new GramSchmidt(vectorHandler_, GramSchmidt::cgs1); + } else { + out::warning() << "Gram-Schmidt variant " << gsMethod_ << " not recognized.\n"; + out::warning() << "Using default cgs2 Gram-Schmidt variant.\n"; + gs_ = new GramSchmidt(vectorHandler_, GramSchmidt::cgs2); + gsMethod_ = "cgs2"; + } + + iterativeSolver_ = new LinSolverIterativeFGMRES(matrixHandler_, + vectorHandler_, + gs_, + memspace_); + } +#endif + + return 0; + } + + int SystemSolver::analyze() + { + if (A_ == nullptr) { + out::error() << "System matrix not set!\n"; + return 1; + } + + if (factorizationMethod_ == "klu") { + factorizationSolver_->setup(A_); + return factorizationSolver_->analyze(); + } + return 1; } - SystemSolver::~SystemSoler() + + int SystemSolver::factorize() { - //delete the matrix and all the solvers and all their workspace - + if (factorizationMethod_ == "klu") { + return factorizationSolver_->factorize(); + } + return 1; + } + + int SystemSolver::refactorize() + { + if (refactorizationMethod_ == "klu") { + return factorizationSolver_->refactorize(); + } + +#ifdef RESOLVE_USE_CUDA + if (refactorizationMethod_ == "glu") { + return refactorizationSolver_->refactorize(); + } +#endif + +#ifdef RESOLVE_USE_HIP + if (refactorizationMethod_ == "rocsolverrf") { + return refactorizationSolver_->refactorize(); + } +#endif + + return 1; } - SystemSolver::setup(){ - if (factorizationMethod == "klu"){ - + int SystemSolver::refactorizationSetup() + { + int status = 0; + // Get factors and permutation vectors + L_ = factorizationSolver_->getLFactor(); + U_ = factorizationSolver_->getUFactor(); + P_ = factorizationSolver_->getPOrdering(); + Q_ = factorizationSolver_->getQOrdering(); + + if (L_ == nullptr) { + out::warning() << "Factorization failed, cannot extract factors ...\n"; + status = 1; } + +#ifdef RESOLVE_USE_CUDA + if (refactorizationMethod_ == "glu") { + isSolveOnDevice_ = true; + status += refactorizationSolver_->setup(A_, L_, U_, P_, Q_); + } +#endif + +#ifdef RESOLVE_USE_HIP + if (refactorizationMethod_ == "rocsolverrf") { + std::cout << "Refactorization setup using rocsolverRf ...\n"; + isSolveOnDevice_ = true; + auto* Rf = dynamic_cast(refactorizationSolver_); + Rf->setSolveMode(1); + status += refactorizationSolver_->setup(A_, L_, U_, P_, Q_, resVector_); + } +#endif + +#if defined(RESOLVE_USE_HIP) || defined(RESOLVE_USE_CUDA) + if (irMethod_ == "fgmres") { + std::cout << "Setting up FGMRES ...\n"; + gs_->setup(A_->getNumRows(), iterativeSolver_->getRestart()); + status += iterativeSolver_->setup(A_); + } +#endif + + return status; } - SystemSolver::analyze() + // TODO: First argument in solve function is rhs (const vector) and second is the solution (overwritten) + int SystemSolver::solve(vector_type* rhs, vector_type* x) { - if (factorizationMethod == "klu"){ - //call klu_analyze + int status = 1; + if (solveMethod_ == "klu") { + status = factorizationSolver_->solve(rhs, x); } - + +#ifdef RESOLVE_USE_CUDA + if (solveMethod_ == "glu") { + if (isSolveOnDevice_) { + // std::cout << "Solving with GLU ...\n"; + status = refactorizationSolver_->solve(rhs, x); + } else { + // std::cout << "Solving with KLU ...\n"; + status = factorizationSolver_->solve(rhs, x); + } + } +#endif + +#ifdef RESOLVE_USE_HIP + if (solveMethod_ == "rocsolverrf") { + if (isSolveOnDevice_) { + // std::cout << "Solving with RocSolver ...\n"; + status = refactorizationSolver_->solve(rhs, x); + } else { + // std::cout << "Solving with KLU ...\n"; + status = factorizationSolver_->solve(rhs, x); + } + } +#endif + + return status; + } + + int SystemSolver::precondition() + { + // Not implemented yet + return 1; + } + + int SystemSolver::preconditionerSetup() + { + // Not implemented yet + return 1; + } + + int SystemSolver::refine(vector_type* rhs, vector_type* x) + { + int status = 0; +#if defined(RESOLVE_USE_HIP) || defined(RESOLVE_USE_CUDA) + status += iterativeSolver_->resetMatrix(A_); + status += iterativeSolver_->setupPreconditioner("LU", refactorizationSolver_); + status += iterativeSolver_->solve(rhs, x); +#endif + return status; + } + + LinSolverDirect& SystemSolver::getFactorizationSolver() + { + return *factorizationSolver_; + } + + LinSolverDirect& SystemSolver::getRefactorizationSolver() + { + return *refactorizationSolver_; + } + + LinSolverIterative& SystemSolver::getIterativeSolver() + { + return *iterativeSolver_; + } + + void SystemSolver::setFactorizationMethod(std::string method) + { + factorizationMethod_ = method; + // initialize(); + } + + void SystemSolver::setRefactorizationMethod(std::string method) + { + refactorizationMethod_ = method; + // initialize(); + } + + void SystemSolver::setSolveMethod(std::string method) + { + solveMethod_ = method; + // initialize(); + } + + void SystemSolver::setRefinementMethod(std::string method, std::string gsMethod) + { + if (iterativeSolver_ != nullptr) + delete iterativeSolver_; + + if(gs_ != nullptr) + delete gs_; + + if(method == "none") + return; + + gsMethod_ = gsMethod; + +#if defined(RESOLVE_USE_HIP) || defined(RESOLVE_USE_CUDA) + if (method == "fgmres") { + if (gsMethod == "cgs2") { + gs_ = new GramSchmidt(vectorHandler_, GramSchmidt::cgs2); + } else if (gsMethod == "mgs") { + gs_ = new GramSchmidt(vectorHandler_, GramSchmidt::mgs); + } else if (gsMethod == "mgs_two_synch") { + gs_ = new GramSchmidt(vectorHandler_, GramSchmidt::mgs_two_synch); + } else if (gsMethod == "mgs_pm") { + gs_ = new GramSchmidt(vectorHandler_, GramSchmidt::mgs_pm); + } else if (gsMethod == "cgs1") { + gs_ = new GramSchmidt(vectorHandler_, GramSchmidt::cgs1); + } else { + out::warning() << "Gram-Schmidt variant " << gsMethod_ << " not recognized.\n"; + out::warning() << "Using default cgs2 Gram-Schmidt variant.\n"; + gs_ = new GramSchmidt(vectorHandler_, GramSchmidt::cgs2); + gsMethod_ = "cgs2"; + } + + iterativeSolver_ = new LinSolverIterativeFGMRES(matrixHandler_, + vectorHandler_, + gs_, + memspace_); + irMethod_ = method; + } else { + out::error() << "Iterative refinement method " << method << " not recognized.\n"; + } +#endif + } + + real_type SystemSolver::getResidualNorm(vector_type* rhs, vector_type* x) + { + using namespace ReSolve::constants; + assert(rhs->getSize() == resVector_->getSize()); + real_type norm_b = 0.0; + + if (memspace_ == "cpu") { + resVector_->update(rhs, memory::HOST, memory::HOST); + matrixHandler_->setValuesChanged(true, "cpu"); +#ifdef RESOLVE_USE_CUDA + } else if (memspace_ == "cuda") { + if (isSolveOnDevice_) { + resVector_->update(rhs, memory::DEVICE, memory::DEVICE); + } else { + resVector_->update(rhs, memory::HOST, memory::DEVICE); + } + matrixHandler_->setValuesChanged(true, "cuda"); +#endif +#ifdef RESOLVE_USE_HIP + } else if (memspace_ == "hip") { + if (isSolveOnDevice_) { + resVector_->update(rhs, memory::DEVICE, memory::DEVICE); + } else { + resVector_->update(rhs, memory::HOST, memory::DEVICE); + } + matrixHandler_->setValuesChanged(true, "hip"); +#endif + } else { + out::error() << "Unrecognized device " << memspace_ << "\n"; + return -1.0; + } + norm_b = std::sqrt(vectorHandler_->dot(resVector_, resVector_, memspace_)); + matrixHandler_->matvec(A_, x, resVector_, &ONE, &MINUSONE, "csr", memspace_); + real_type resnorm = std::sqrt(vectorHandler_->dot(resVector_, resVector_, memspace_)); + return resnorm/norm_b; } -} + + const std::string SystemSolver::getFactorizationMethod() const + { + return factorizationMethod_; + } + +} // namespace ReSolve diff --git a/resolve/SystemSolver.hpp b/resolve/SystemSolver.hpp index 4e842b06..de6b3d44 100644 --- a/resolve/SystemSolver.hpp +++ b/resolve/SystemSolver.hpp @@ -1,35 +1,109 @@ //this is to solve the system, can call different linear solvers if necessary -namespace +namespace ReSolve { - class SystemSolver + class LinSolverDirectKLU; + class LinSolverDirect; + class LinSolverIterative; + class GramSchmidt; + class LinAlgWorkspaceCUDA; + class LinAlgWorkspaceHIP; + class MatrixHandler; + class VectorHandler; + + namespace vector { - SystemSolver(); - SystemSolver(std::string factorizationMethod, std::string refactorizationMethod, std::string solveMethod, std::string IRMethod); + class Vector; + } - ~SystemSolver(); + namespace matrix + { + class Sparse; + } + class SystemSolver + { public: - analyze(); // symbolic part - factorize(); // numeric part - refactorize(); - solve(double* x, double* rhs); // for triangular solve - refine(double, double* rhs); // for iterative refinement + using vector_type = vector::Vector; + using matrix_type = matrix::Sparse; + + SystemSolver(); + SystemSolver(LinAlgWorkspaceCUDA* workspaceCuda, + std::string factor = "klu", + std::string refactor = "glu", + std::string solve = "glu", + std::string ir = "none"); + SystemSolver(LinAlgWorkspaceHIP* workspaceHip, + std::string factor = "klu", + std::string refactor = "rocsolverrf", + std::string solve = "rocsolverrf", + std::string ir = "none"); + + ~SystemSolver(); + + int initialize(); + int setMatrix(matrix::Sparse* A); + int analyze(); // symbolic part + int factorize(); // numeric part + int refactorize(); + int refactorizationSetup(); + int precondition(); + int preconditionerSetup(); + int solve(vector_type* rhs, vector_type* x); // for triangular solve + int refine(vector_type* rhs, vector_type* x); // for iterative refinement + + // we update the matrix once it changed + int updateMatrix(std::string format, int* ia, int* ja, double* a); + + LinSolverDirect& getFactorizationSolver(); + LinSolverDirect& getRefactorizationSolver(); + LinSolverIterative& getIterativeSolver(); - // we update the matrix once it changed - updateMatrix(std::string format, int * ia, int *ja, double *a); + real_type getResidualNorm(vector_type* rhs, vector_type* x); + + // Get solver parameters + const std::string getFactorizationMethod() const; + const std::string getRefactorizationMethod() const; + const std::string getSolveMethod() const; + const std::string getRefinementMethod() const; + const std::string getOrthogonalizationMethod() const; + + // Set solver parameters + void setFactorizationMethod(std::string method); + void setRefactorizationMethod(std::string method); + void setSolveMethod(std::string method); + void setRefinementMethod(std::string method, std::string gs = "cgs2"); private: - - Sparse A; - std::string factorizationMethod; - std::string refactorizationMethod; - std::string solveMethod; - std::string IRMethod; + LinSolverDirect* factorizationSolver_{nullptr}; + LinSolverDirect* refactorizationSolver_{nullptr}; + LinSolverIterative* iterativeSolver_{nullptr}; + GramSchmidt* gs_{nullptr}; + + LinAlgWorkspaceCUDA* workspaceCuda_{nullptr}; + LinAlgWorkspaceHIP* workspaceHip_{nullptr}; + + MatrixHandler* matrixHandler_{nullptr}; + VectorHandler* vectorHandler_{nullptr}; - setup(); - //internal function to setup the different solvers. IT IS RUN ONCE THROUGH CONSTRUCTOR. + bool isSolveOnDevice_{false}; + + matrix_type* L_{nullptr}; + matrix_type* U_{nullptr}; + + index_type* P_{nullptr}; + index_type* Q_{nullptr}; + + vector_type* resVector_{nullptr}; + + matrix::Sparse* A_{nullptr}; - // add factorizationSolver, iterativeSolver, triangularSolver + // Configuration parameters + std::string factorizationMethod_; + std::string refactorizationMethod_; + std::string solveMethod_; + std::string irMethod_; + std::string gsMethod_; + std::string memspace_; }; -} +} // namespace ReSolve diff --git a/resolve/matrix/MatrixHandlerCuda.cpp b/resolve/matrix/MatrixHandlerCuda.cpp index e0ac7bb4..81460c40 100644 --- a/resolve/matrix/MatrixHandlerCuda.cpp +++ b/resolve/matrix/MatrixHandlerCuda.cpp @@ -103,8 +103,8 @@ namespace ReSolve { error_sum += status; mem_.deviceSynchronize(); if (status) - out::error() << "Matvec status: " << status - << "Last error code: " << mem_.getLastDeviceError() << std::endl; + out::error() << "Matvec status: " << status << ". " + << "Last error code: " << mem_.getLastDeviceError() << ".\n"; vec_result->setDataUpdated(memory::DEVICE); cusparseDestroyDnVec(vecx); diff --git a/resolve/matrix/MatrixHandlerHip.cpp b/resolve/matrix/MatrixHandlerHip.cpp index ff10e973..e78fb30c 100644 --- a/resolve/matrix/MatrixHandlerHip.cpp +++ b/resolve/matrix/MatrixHandlerHip.cpp @@ -91,8 +91,8 @@ namespace ReSolve { error_sum += status; mem_.deviceSynchronize(); if (status) - out::error() << "Matvec status: " << status - << "Last error code: " << mem_.getLastDeviceError() << std::endl; + out::error() << "Matvec status: " << status << ". " + << "Last error code: " << mem_.getLastDeviceError() << ".\n"; vec_result->setDataUpdated(memory::DEVICE); return error_sum; diff --git a/resolve/vector/Vector.cpp b/resolve/vector/Vector.cpp index 3b4f9e72..741ed58b 100644 --- a/resolve/vector/Vector.cpp +++ b/resolve/vector/Vector.cpp @@ -37,7 +37,7 @@ namespace ReSolve { namespace vector { } - index_type Vector::getSize() + index_type Vector::getSize() const { return n_; } @@ -84,6 +84,12 @@ namespace ReSolve { namespace vector { } } + int Vector::update(Vector* v, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut) + { + real_type* data = v->getData(memspaceIn); + return update(data, memspaceIn, memspaceOut); + } + int Vector::update(real_type* data, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut) { int control=-1; diff --git a/resolve/vector/Vector.hpp b/resolve/vector/Vector.hpp index 5f86ef7f..f2403e4d 100644 --- a/resolve/vector/Vector.hpp +++ b/resolve/vector/Vector.hpp @@ -12,10 +12,11 @@ namespace ReSolve { namespace vector { ~Vector(); int update(real_type* data, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut); + int update(Vector* v, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut); real_type* getData(memory::MemorySpace memspace); real_type* getData(index_type i, memory::MemorySpace memspace); // get pointer to i-th vector in multivector - index_type getSize(); + index_type getSize() const; index_type getCurrentSize(); index_type getNumVectors(); @@ -29,8 +30,8 @@ namespace ReSolve { namespace vector { int copyData(memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut); int setCurrentSize(index_type new_n_current); real_type* getVectorData(index_type i, memory::MemorySpace memspace); // get ith vector data out of multivector - int deepCopyVectorData(real_type* dest, index_type i, memory::MemorySpace memspace); - int deepCopyVectorData(real_type* dest, memory::MemorySpace memspace); //copy FULL multivector + int deepCopyVectorData(real_type* dest, index_type i, memory::MemorySpace memspace); + int deepCopyVectorData(real_type* dest, memory::MemorySpace memspace); //copy FULL multivector private: index_type n_; ///< size diff --git a/resolve/workspace/LinAlgWorkspaceHIP.cpp b/resolve/workspace/LinAlgWorkspaceHIP.cpp index e64dff17..de01653e 100644 --- a/resolve/workspace/LinAlgWorkspaceHIP.cpp +++ b/resolve/workspace/LinAlgWorkspaceHIP.cpp @@ -70,6 +70,6 @@ namespace ReSolve void LinAlgWorkspaceHIP::initializeHandles() { rocsparse_create_handle(&handle_rocsparse_); - rocblas_create_handle(&handle_rocblas_); - } - } // namespace ReSolve + rocblas_create_handle(&handle_rocblas_); + } +} // namespace ReSolve diff --git a/tests/functionality/CMakeLists.txt b/tests/functionality/CMakeLists.txt index 7d2aaa1b..5fae41cd 100644 --- a/tests/functionality/CMakeLists.txt +++ b/tests/functionality/CMakeLists.txt @@ -24,9 +24,14 @@ if(RESOLVE_USE_CUDA) add_executable(klu_glu_test.exe testKLU_GLU.cpp) target_link_libraries(klu_glu_test.exe PRIVATE ReSolve) + # System solver test with GLU + add_executable(sys_cuda_test.exe testSysCuda.cpp) + target_link_libraries(sys_cuda_test.exe PRIVATE ReSolve) + # Build randSolver test add_executable(rand_gmres_cuda_test.exe testRandGMRES_Cuda.cpp) target_link_libraries(rand_gmres_cuda_test.exe PRIVATE ReSolve) + endif(RESOLVE_USE_CUDA) @@ -40,9 +45,14 @@ if(RESOLVE_USE_HIP) add_executable(rocsolver_rf_fgmres_test.exe testKLU_RocSolver_FGMRES.cpp) target_link_libraries(rocsolver_rf_fgmres_test.exe PRIVATE ReSolve) + # System solver test with rocm rf and iterative refinement + add_executable(sys_hip_refine.exe testSysHipRefine.cpp) + target_link_libraries(sys_hip_refine.exe PRIVATE ReSolve) + # Build randSolver test add_executable(rand_gmres_hip_test.exe testRandGMRES_Rocm.cpp) target_link_libraries(rand_gmres_hip_test.exe PRIVATE ReSolve) + endif(RESOLVE_USE_HIP) # Install tests @@ -76,11 +86,13 @@ if(RESOLVE_USE_CUDA) add_test(NAME klu_rf_test COMMAND $ "${test_data_dir}") add_test(NAME klu_rf_fgmres_test COMMAND $ "${test_data_dir}") add_test(NAME klu_glu_test COMMAND $ "${test_data_dir}") + add_test(NAME sys_cuda_test COMMAND $ "${test_data_dir}") add_test(NAME rand_gmres_cuda_test COMMAND $) endif(RESOLVE_USE_CUDA) if(RESOLVE_USE_HIP) add_test(NAME rocsolver_rf_test COMMAND $ "${test_data_dir}") add_test(NAME rocsolver_rf_fgmres_test COMMAND $ "${test_data_dir}") + add_test(NAME sys_hip_refine_test COMMAND $ "${test_data_dir}") add_test(NAME rand_gmres_hip_test COMMAND $) endif(RESOLVE_USE_HIP) diff --git a/tests/functionality/testKLU.cpp b/tests/functionality/testKLU.cpp index 083c11d1..ad8410cd 100644 --- a/tests/functionality/testKLU.cpp +++ b/tests/functionality/testKLU.cpp @@ -31,7 +31,6 @@ int main(int argc, char *argv[]) ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace); ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace); ReSolve::LinSolverDirectKLU* KLU = new ReSolve::LinSolverDirectKLU; - KLU->setupParameters(1, 0.1, false); // Input to this code is location of `data` directory where matrix files are stored const std::string data_path = (argc == 2) ? argv[1] : "./"; @@ -203,12 +202,13 @@ int main(int argc, char *argv[]) std::cout<<"\t ||x-x_true||_2/||x_true||_2 : "< 1e-16 ) || (normRmatrix2/normB2 > 1e-16)) { + std::cout << "Result inaccurate!\n"; + error_sum++; + } + if (error_sum == 0) { std::cout<<"Test 1 (KLU with KLU refactorization) PASSED"<setupParameters(1, 0.1, false); ReSolve::LinSolverDirectCuSolverGLU* GLU = new ReSolve::LinSolverDirectCuSolverGLU(workspace_CUDA); // Input to this code is location of `data` directory where matrix files are stored @@ -145,8 +144,11 @@ int main(int argc, char *argv[]) real_type normDiffMatrix1 = sqrt(vector_handler->dot(vec_diff, vec_diff, "cuda")); //compute the residual using exact solution + vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); + status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE, "csr", "cuda"); + vec_x->update(vec_x->getData(ReSolve::memory::DEVICE), ReSolve::memory::DEVICE, ReSolve::memory::HOST); - status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE,"csr", "cuda"); + //status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE,"csr", "cuda"); error_sum += status; real_type exactSol_normRmatrix1 = sqrt(vector_handler->dot(vec_r, vec_r, "cuda")); //evaluate the residual ON THE CPU using COMPUTED solution @@ -226,12 +228,13 @@ int main(int argc, char *argv[]) std::cout<<"\t ||x-x_true||_2/||x_true||_2 : "< 1e-16 ) || (normRmatrix2/normB2 > 1e-16)) { + std::cout << "Result inaccurate!\n"; + error_sum++; + } + if (error_sum == 0) { std::cout<<"Test 3 (KLU with cuSolverGLU refactorization) PASSED"<setupParameters(1, 0.1, false); ReSolve::LinSolverDirectCuSolverRf* Rf = new ReSolve::LinSolverDirectCuSolverRf; // Input to this code is location of `data` directory where matrix files are stored @@ -223,23 +222,17 @@ int main(int argc, char *argv[]) std::cout<<"\t ||x-x_true||_2/||x_true||_2 : "< 1e-16 ) || (normRmatrix2/normB2 > 1e-16)) { + std::cout << "Result inaccurate!\n"; + error_sum++; + } + if (error_sum == 0) { std::cout<<"Test 2 (KLU with cuSolverRf refactorization) PASSED"<setupParameters(1, 0.1, false); ReSolve::LinSolverDirectCuSolverRf* Rf = new ReSolve::LinSolverDirectCuSolverRf; ReSolve::GramSchmidt* GS = new ReSolve::GramSchmidt(vector_handler, ReSolve::GramSchmidt::mgs_pm); @@ -253,10 +252,14 @@ int main(int argc, char *argv[]) std::cout<<"\t IR starting res. norm : "<getInitResidualNorm()<<" "<getFinalResidualNorm()<<" (tol 1e-14)"< 1e-12 ) || (normRmatrix2/normB2 > 1e-15)) { + std::cout << "Result inaccurate!\n"; + error_sum++; + } + if (error_sum == 0) { + std::cout<<"Test 4 (KLU with cuSolverRf refactorization + IR) PASSED"<setupParameters(1, 0.1, false); ReSolve::LinSolverDirectRocSolverRf* Rf = new ReSolve::LinSolverDirectRocSolverRf(workspace_HIP); // Input to this code is location of `data` directory where matrix files are stored @@ -229,7 +228,11 @@ int main(int argc, char *argv[]) - if ((error_sum == 0) && (normRmatrix1/normB1 < 1e-16 ) && (normRmatrix2/normB2 < 1e-16)) { + if ((normRmatrix1/normB1 > 1e-16 ) || (normRmatrix2/normB2 > 1e-16)) { + std::cout << "Result inaccurate!\n"; + error_sum++; + } + if (error_sum == 0) { std::cout<<"Test 3 (KLU with rocSolverRf refactorization) PASSED"<setupParameters(1, 0.1, false); ReSolve::LinSolverDirectRocSolverRf* Rf = new ReSolve::LinSolverDirectRocSolverRf(workspace_HIP); ReSolve::GramSchmidt* GS = new ReSolve::GramSchmidt(vector_handler, ReSolve::GramSchmidt::cgs2); @@ -205,7 +204,8 @@ int main(int argc, char *argv[]) error_sum += status; vec_x->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - status = Rf->solve(vec_x); + // TODO: Investigate why results are different when using Rf->solve(vec_x) !! + status = Rf->solve(vec_rhs, vec_x); error_sum += status; FGMRES->resetMatrix(A); @@ -248,7 +248,13 @@ int main(int argc, char *argv[]) std::cout<<"\t IR iterations : "<getNumIter()<<" (max 200, restart 100)"<getInitResidualNorm()<<" "<getFinalResidualNorm()<<" (tol 1e-14)"< 1e-12 ) || (normRmatrix2/normB2 > 1e-9)) { + std::cout << "Result inaccurate!\n"; + error_sum++; + } + if (error_sum == 0) { std::cout<<"Test 4 (KLU with rocsolverrf refactorization + IR) PASSED"<getFinalResidualNorm()/norm_b <<" \n" << "\t Number of iterations : " << FGMRES->getNumIter() << "\n"; - if ((error_sum == 0) && (final_norm_first/norm_b < 1e-11) && (FGMRES->getFinalResidualNorm()/norm_b < 1e-11 )) { + if ((final_norm_first/norm_b > 1e-11) || (FGMRES->getFinalResidualNorm()/norm_b > 1e-11 )) { + std::cout << "Result inaccurate!\n"; + error_sum++; + } + if (error_sum == 0) { std::cout<<"Test 5 (randomized GMRES) PASSED"<getFinalResidualNorm()/norm_b <<" \n" << "\t Number of iterations : " << FGMRES->getNumIter() << "\n"; - if ((error_sum == 0) && (final_norm_first/norm_b < 1e-11) && (FGMRES->getFinalResidualNorm()/norm_b < 1e-11 )) { + if ((final_norm_first/norm_b > 1e-11) || (FGMRES->getFinalResidualNorm()/norm_b > 1e-11 )) { + std::cout << "Result inaccurate!\n"; + error_sum++; + } + if (error_sum == 0) { std::cout<<"Test 5 (randomized GMRES) PASSED"< +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +//author: KS +//functionality test to check whether cuSolverGLU works correctly. + +using namespace ReSolve::constants; + +int main(int argc, char *argv[]) +{ + // Use ReSolve data types. + using real_type = ReSolve::real_type; + using vector_type = ReSolve::vector::Vector; + + //we want error sum to be 0 at the end + //that means PASS. + //otheriwse it is a FAIL. + int error_sum = 0; + int status = 0; + + ReSolve::LinAlgWorkspaceCUDA* workspace_CUDA = new ReSolve::LinAlgWorkspaceCUDA(); + workspace_CUDA->initializeHandles(); + ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace_CUDA); + ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace_CUDA); + + ReSolve::SystemSolver* solver = new ReSolve::SystemSolver(workspace_CUDA); + + // Input to this code is location of `data` directory where matrix files are stored + const std::string data_path = (argc == 2) ? argv[1] : "./"; + + + std::string matrixFileName1 = data_path + "data/matrix_ACTIVSg200_AC_10.mtx"; + std::string matrixFileName2 = data_path + "data/matrix_ACTIVSg200_AC_11.mtx"; + + std::string rhsFileName1 = data_path + "data/rhs_ACTIVSg200_AC_10.mtx.ones"; + std::string rhsFileName2 = data_path + "data/rhs_ACTIVSg200_AC_11.mtx.ones"; + + // Read first matrix + std::ifstream mat1(matrixFileName1); + if(!mat1.is_open()) + { + std::cout << "Failed to open file " << matrixFileName1 << "\n"; + return -1; + } + ReSolve::matrix::Coo* A_coo = ReSolve::io::readMatrixFromFile(mat1); + ReSolve::matrix::Csr* A = new ReSolve::matrix::Csr(A_coo->getNumRows(), + A_coo->getNumColumns(), + A_coo->getNnz(), + A_coo->symmetric(), + A_coo->expanded()); + mat1.close(); + + // Read first rhs vector + std::ifstream rhs1_file(rhsFileName1); + if(!rhs1_file.is_open()) + { + std::cout << "Failed to open file " << rhsFileName1 << "\n"; + return -1; + } + real_type* rhs = ReSolve::io::readRhsFromFile(rhs1_file); + real_type* x = new real_type[A->getNumRows()]; + vector_type* vec_rhs = new vector_type(A->getNumRows()); + vector_type* vec_x = new vector_type(A->getNumRows()); + vec_x->allocate(ReSolve::memory::HOST);//for KLU + vec_x->allocate(ReSolve::memory::DEVICE); + vector_type* vec_r = new vector_type(A->getNumRows()); + rhs1_file.close(); + + // Convert first matrix to CSR format + matrix_handler->coo2csr(A_coo, A, "cpu"); + vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); + vec_rhs->setDataUpdated(ReSolve::memory::HOST); + + status = solver->setMatrix(A); + error_sum += status; + + // Solve the first system using KLU + status = solver->analyze(); + error_sum += status; + + status = solver->factorize(); + error_sum += status; + + // but DO NOT SOLVE with KLU! + + status = solver->refactorizationSetup(); + error_sum += status; + std::cout << "GLU setup status: " << status << std::endl; + + vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); + status = solver->solve(vec_rhs, vec_x); + error_sum += status; + std::cout << "GLU solve status: " << status << std::endl; + + vector_type* vec_test; + vector_type* vec_diff; + vec_test = new vector_type(A->getNumRows()); + vec_diff = new vector_type(A->getNumRows()); + real_type* x_data = new real_type[A->getNumRows()]; + for (int i=0; igetNumRows(); ++i){ + x_data[i] = 1.0; + } + + vec_test->setData(x_data, ReSolve::memory::HOST); + vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); + vec_diff->update(x_data, ReSolve::memory::HOST, ReSolve::memory::DEVICE); + + // real_type normXmatrix1 = sqrt(vector_handler->dot(vec_test, vec_test, "cuda")); + matrix_handler->setValuesChanged(true, "cuda"); + status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr","cuda"); + error_sum += status; + + real_type normRmatrix1 = sqrt(vector_handler->dot(vec_r, vec_r, "cuda")); + + + //for testing only - control + + real_type normXtrue = sqrt(vector_handler->dot(vec_x, vec_x, "cuda")); + real_type normB1 = sqrt(vector_handler->dot(vec_rhs, vec_rhs, "cuda")); + + //compute x-x_true + vector_handler->axpy(&MINUSONE, vec_x, vec_diff, "cuda"); + //evaluate its norm + real_type normDiffMatrix1 = sqrt(vector_handler->dot(vec_diff, vec_diff, "cuda")); + + //compute the residual using exact solution + vec_x->update(vec_x->getData(ReSolve::memory::DEVICE), ReSolve::memory::DEVICE, ReSolve::memory::HOST); + + vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); + status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE,"csr", "cuda"); + error_sum += status; + real_type exactSol_normRmatrix1 = sqrt(vector_handler->dot(vec_r, vec_r, "cuda")); + //evaluate the residual ON THE CPU using COMPUTED solution + + vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); + + status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "cpu"); + error_sum += status; + + real_type normRmatrix1CPU = sqrt(vector_handler->dot(vec_r, vec_r, "cuda")); + + std::cout<<"Results (first matrix): "<coo2csr(A_coo, A, "cuda"); + vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); + + status = solver->refactorize(); + error_sum += status; + + std::cout<<"CUSOLVER GLU refactorization status: "<solve(vec_rhs, vec_x); + error_sum += status; + + vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); + matrix_handler->setValuesChanged(true, "cuda"); + + status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "cuda"); + error_sum += status; + + real_type normRmatrix2 = sqrt(vector_handler->dot(vec_r, vec_r, "cuda")); + + //for testing only - control + real_type normB2 = sqrt(vector_handler->dot(vec_rhs, vec_rhs, "cuda")); + //compute x-x_true + vec_diff->update(x_data, ReSolve::memory::HOST, ReSolve::memory::DEVICE); + vector_handler->axpy(&MINUSONE, vec_x, vec_diff, "cuda"); + //evaluate its norm + real_type normDiffMatrix2 = sqrt(vector_handler->dot(vec_diff, vec_diff, "cuda")); + + //compute the residual using exact solution + vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); + status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE, "csr", "cuda"); + error_sum += status; + real_type exactSol_normRmatrix2 = sqrt(vector_handler->dot(vec_r, vec_r, "cuda")); + + std::cout<<"Results (second matrix): "< 1e-16 ) || (normRmatrix2/normB2 > 1e-16)) { + std::cout << "Result inaccurate!\n"; + error_sum++; + } + if (error_sum == 0) { + std::cout<<"Test 3 (KLU with cuSolverGLU refactorization) PASSED"< +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +//author: KS +//functionality test to check whether cuSolverRf/FGMRES works correctly. + +using namespace ReSolve::constants; + +int main(int argc, char *argv[]) +{ + // Use ReSolve data types. + using real_type = ReSolve::real_type; + using vector_type = ReSolve::vector::Vector; + + //we want error sum to be 0 at the end + //that means PASS. + //otheriwse it is a FAIL. + int error_sum = 0; + int status = 0; + + ReSolve::LinAlgWorkspaceHIP workspace_HIP; + workspace_HIP.initializeHandles(); + ReSolve::MatrixHandler matrix_handler(&workspace_HIP); + ReSolve::VectorHandler vector_handler(&workspace_HIP); + + ReSolve::SystemSolver solver(&workspace_HIP); + solver.setRefinementMethod("fgmres", "cgs2"); + solver.getIterativeSolver().setRestart(100); + solver.getIterativeSolver().setMaxit(200); + + // Input to this code is location of `data` directory where matrix files are stored + const std::string data_path = (argc == 2) ? argv[1] : "./"; + + + std::string matrixFileName1 = data_path + "data/matrix_ACTIVSg2000_AC_00.mtx"; + std::string matrixFileName2 = data_path + "data/matrix_ACTIVSg2000_AC_02.mtx"; + + std::string rhsFileName1 = data_path + "data/rhs_ACTIVSg2000_AC_00.mtx.ones"; + std::string rhsFileName2 = data_path + "data/rhs_ACTIVSg2000_AC_02.mtx.ones"; + + + // Read first matrix + std::ifstream mat1(matrixFileName1); + if(!mat1.is_open()) + { + std::cout << "Failed to open file " << matrixFileName1 << "\n"; + return -1; + } + ReSolve::matrix::Coo* A_coo = ReSolve::io::readMatrixFromFile(mat1); + ReSolve::matrix::Csr* A = new ReSolve::matrix::Csr(A_coo->getNumRows(), + A_coo->getNumColumns(), + A_coo->getNnz(), + A_coo->symmetric(), + A_coo->expanded()); + mat1.close(); + + // Read first rhs vector + std::ifstream rhs1_file(rhsFileName1); + if(!rhs1_file.is_open()) + { + std::cout << "Failed to open file " << rhsFileName1 << "\n"; + return -1; + } + real_type* rhs = ReSolve::io::readRhsFromFile(rhs1_file); + real_type* x = new real_type[A->getNumRows()]; + vector_type* vec_rhs = new vector_type(A->getNumRows()); + vector_type* vec_x = new vector_type(A->getNumRows()); + vector_type* vec_r = new vector_type(A->getNumRows()); + rhs1_file.close(); + + // Convert first matrix to CSR format + matrix_handler.coo2csr(A_coo, A, "cpu"); + vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); + vec_rhs->setDataUpdated(ReSolve::memory::HOST); + + status = solver.setMatrix(A); + error_sum += status; + + // Solve the first system using KLU + status = solver.analyze(); + error_sum += status; + + status = solver.factorize(); + error_sum += status; + + status = solver.solve(vec_rhs, vec_x); + error_sum += status; + + vector_type* vec_test; + vector_type* vec_diff; + + vec_test = new vector_type(A->getNumRows()); + vec_diff = new vector_type(A->getNumRows()); + real_type* x_data = new real_type[A->getNumRows()]; + + for (int i=0; igetNumRows(); ++i){ + x_data[i] = 1.0; + } + + vec_test->setData(x_data, ReSolve::memory::HOST); + vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); + vec_diff->update(x_data, ReSolve::memory::HOST, ReSolve::memory::DEVICE); + + // real_type normXmatrix1 = sqrt(vector_handler.dot(vec_test, vec_test, ReSolve::memory::DEVICE)); + matrix_handler.setValuesChanged(true, "hip"); + //evaluate the residual ||b-Ax|| + status = matrix_handler.matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr","hip"); + error_sum += status; + + real_type normRmatrix1 = sqrt(vector_handler.dot(vec_r, vec_r, "hip")); + + + //for testing only - control + + real_type normXtrue = sqrt(vector_handler.dot(vec_x, vec_x, "hip")); + real_type normB1 = sqrt(vector_handler.dot(vec_rhs, vec_rhs, "hip")); + + //compute x-x_true + vector_handler.axpy(&MINUSONE, vec_x, vec_diff, "hip"); + //evaluate its norm + real_type normDiffMatrix1 = sqrt(vector_handler.dot(vec_diff, vec_diff, "hip")); + + //compute the residual using exact solution + vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); + status = matrix_handler.matvec(A, vec_test, vec_r, &ONE, &MINUSONE,"csr", "hip"); + error_sum += status; + real_type exactSol_normRmatrix1 = sqrt(vector_handler.dot(vec_r, vec_r, "hip")); + //evaluate the residual ON THE CPU using COMPUTED solution + + vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); + + status = matrix_handler.matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "cpu"); + error_sum += status; + + real_type normRmatrix1CPU = sqrt(vector_handler.dot(vec_r, vec_r, "hip")); + + std::cout<<"Results (first matrix): "<update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); + + status = solver.refactorize(); + error_sum += status; + + vec_x->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); + status = solver.solve(vec_rhs, vec_x); + error_sum += status; + + vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); + status = solver.refine(vec_rhs, vec_x); + error_sum += status; + + vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); + matrix_handler.setValuesChanged(true, "hip"); + + //evaluate final residual + status = matrix_handler.matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "hip"); + error_sum += status; + + real_type normRmatrix2 = sqrt(vector_handler.dot(vec_r, vec_r, "hip")); + + + //for testing only - control + real_type normB2 = sqrt(vector_handler.dot(vec_rhs, vec_rhs, "hip")); + //compute x-x_true + vec_diff->update(x_data, ReSolve::memory::HOST, ReSolve::memory::DEVICE); + vector_handler.axpy(&MINUSONE, vec_x, vec_diff, "hip"); + //evaluate its norm + real_type normDiffMatrix2 = sqrt(vector_handler.dot(vec_diff, vec_diff, "hip")); + + //compute the residual using exact solution + vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); + status = matrix_handler.matvec(A, vec_test, vec_r, &ONE, &MINUSONE, "csr", "hip"); + error_sum += status; + real_type exactSol_normRmatrix2 = sqrt(vector_handler.dot(vec_r, vec_r, "hip")); + std::cout<<"Results (second matrix): "< 1e-12 ) || (normRmatrix2/normB2 > 1e-9)) { + std::cout << "Result inaccurate!\n"; + error_sum++; + } + if (error_sum == 0) { + std::cout<<"Test 4 (KLU with rocsolverrf refactorization + IR) PASSED"<