diff --git a/examples/r_KLU_KLU.cpp b/examples/r_KLU_KLU.cpp index e777833a..bee712bd 100644 --- a/examples/r_KLU_KLU.cpp +++ b/examples/r_KLU_KLU.cpp @@ -35,9 +35,9 @@ int main(int argc, char *argv[]) ReSolve::matrix::Coo* A_coo; ReSolve::matrix::Csr* A; - ReSolve::LinAlgWorkspace* workspace = ReSolve::createLinAlgWorkspace("cpu"); - ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace); - ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace); + ReSolve::LinAlgWorkspace* workspace = new ReSolve::LinAlgWorkspace(); + ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace); + ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace); real_type* rhs; real_type* x; diff --git a/examples/r_KLU_KLU_standalone.cpp b/examples/r_KLU_KLU_standalone.cpp index daeae43a..a1aa1fad 100644 --- a/examples/r_KLU_KLU_standalone.cpp +++ b/examples/r_KLU_KLU_standalone.cpp @@ -31,9 +31,9 @@ int main(int argc, char *argv[]) ReSolve::matrix::Coo* A_coo; ReSolve::matrix::Csr* A; - ReSolve::LinAlgWorkspace* workspace = ReSolve::createLinAlgWorkspace("cpu"); - ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace); - ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace); + ReSolve::LinAlgWorkspace* workspace = new ReSolve::LinAlgWorkspace(); + ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace); + ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace); real_type* rhs; real_type* x; diff --git a/resolve/LinSolverIterativeFGMRES.cpp b/resolve/LinSolverIterativeFGMRES.cpp index bd7dca42..fa63f2d5 100644 --- a/resolve/LinSolverIterativeFGMRES.cpp +++ b/resolve/LinSolverIterativeFGMRES.cpp @@ -66,12 +66,12 @@ namespace ReSolve { if (d_V_ != nullptr) { // cudaFree(d_V_); - delete [] d_V_; + delete d_V_; } if (d_Z_ != nullptr) { // cudaFree(d_Z_); - delete [] d_Z_; + delete d_Z_; } } diff --git a/resolve/matrix/MatrixHandler.cpp b/resolve/matrix/MatrixHandler.cpp index c605ce5a..50dd3082 100644 --- a/resolve/matrix/MatrixHandler.cpp +++ b/resolve/matrix/MatrixHandler.cpp @@ -15,6 +15,15 @@ namespace ReSolve { // Create a shortcut name for Logger static class using out = io::Logger; + /** + * @brief Default constructor + * + * @post Instantiates CPU and CUDA matrix handlers, but does not + * create a workspace. + * + * @todo There is little utility for the default constructor. Rethink its purpose. + * Consider making it private method. + */ MatrixHandler::MatrixHandler() { this->new_matrix_ = true; @@ -22,15 +31,43 @@ namespace ReSolve { cudaImpl_ = new MatrixHandlerCuda(); } + /** + * @brief Destructor + * + */ MatrixHandler::~MatrixHandler() { + if (isCpuEnabled_) delete cpuImpl_; + if (isCudaEnabled_) delete cudaImpl_; } + /** + * @brief Constructor taking pointer to the workspace as its parameter. + * + * @note The CPU implementation currently does not require a workspace. + * The workspace pointer parameter is provided for forward compatibility. + */ MatrixHandler::MatrixHandler(LinAlgWorkspace* new_workspace) { - workspace_ = new_workspace; cpuImpl_ = new MatrixHandlerCpu(new_workspace); + isCpuEnabled_ = true; + isCudaEnabled_ = false; + } + + /** + * @brief Constructor taking pointer to the CUDA workspace as its parameter. + * + * @post A CPU implementation instance is created because it is cheap and + * it does not require a workspace. + * + * @post A CUDA implementation instance is created with supplied workspace. + */ + MatrixHandler::MatrixHandler(LinAlgWorkspaceCUDA* new_workspace) + { + cpuImpl_ = new MatrixHandlerCpu(); cudaImpl_ = new MatrixHandlerCuda(new_workspace); + isCpuEnabled_ = true; + isCudaEnabled_ = true; } void MatrixHandler::setValuesChanged(bool isValuesChanged, std::string memspace) diff --git a/resolve/matrix/MatrixHandler.hpp b/resolve/matrix/MatrixHandler.hpp index 7ba01a08..fed6b593 100644 --- a/resolve/matrix/MatrixHandler.hpp +++ b/resolve/matrix/MatrixHandler.hpp @@ -1,8 +1,3 @@ -// this class encapsulates various matrix manipulation operations, commonly required by linear solvers: -// this includes -// (1) Matrix format conversion: coo2csr, csr2csc -// (2) Matrix vector product (SpMV) -// (3) Matrix 1-norm #pragma once #include #include @@ -22,12 +17,25 @@ namespace ReSolve class Csr; } class LinAlgWorkspace; + class LinAlgWorkspaceCUDA; class MatrixHandlerImpl; } namespace ReSolve { + /** + * @brief this class encapsulates various matrix manipulation operations, + * commonly required by linear solvers. + * + * This includes: + * - Matrix format conversion: coo2csr, csr2csc + * - Matrix vector product (SpMV) + * - Matrix 1-norm + * + * @author Kasia Swirydowicz + * @author Slaven Peles + */ class MatrixHandler { using vector_type = vector::Vector; @@ -35,6 +43,7 @@ namespace ReSolve { public: MatrixHandler(); MatrixHandler(LinAlgWorkspace* workspace); + MatrixHandler(LinAlgWorkspaceCUDA* workspace); ~MatrixHandler(); int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr, std::string memspace); //memspace decides on what is returned (cpu or cuda pointer) @@ -52,12 +61,14 @@ namespace ReSolve { void setValuesChanged(bool toWhat, std::string memspace); private: - LinAlgWorkspace* workspace_{nullptr}; bool new_matrix_{true}; ///< if the structure changed, you need a new handler. MemoryHandler mem_; ///< Device memory manager object MatrixHandlerImpl* cpuImpl_{nullptr}; MatrixHandlerImpl* cudaImpl_{nullptr}; + + bool isCpuEnabled_{false}; + bool isCudaEnabled_{false}; }; } // namespace ReSolve diff --git a/resolve/matrix/MatrixHandlerCuda.cpp b/resolve/matrix/MatrixHandlerCuda.cpp index deb09180..15252d85 100644 --- a/resolve/matrix/MatrixHandlerCuda.cpp +++ b/resolve/matrix/MatrixHandlerCuda.cpp @@ -14,7 +14,6 @@ namespace ReSolve { MatrixHandlerCuda::MatrixHandlerCuda() { - // new_matrix_ = true; values_changed_ = true; } @@ -22,186 +21,28 @@ namespace ReSolve { { } - MatrixHandlerCuda::MatrixHandlerCuda(LinAlgWorkspace* new_workspace) + MatrixHandlerCuda::MatrixHandlerCuda(LinAlgWorkspaceCUDA* new_workspace) { workspace_ = new_workspace; } - // bool MatrixHandlerCuda::getValuesChanged() - // { - // return this->values_changed_; - // } + MatrixHandlerCuda::MatrixHandlerCuda(LinAlgWorkspace* new_workspace) + { + workspace_ = (LinAlgWorkspaceCUDA*) new_workspace; + } void MatrixHandlerCuda::setValuesChanged(bool values_changed) { values_changed_ = values_changed; } -// int MatrixHandlerCuda::coo2csr(matrix::Coo* A_coo, matrix::Csr* A_csr, std::string memspace) -// { -// //this happens on the CPU not on the GPU -// //but will return whatever memspace requested. - -// //count nnzs first - -// index_type nnz_unpacked = 0; -// index_type nnz = A_coo->getNnz(); -// index_type n = A_coo->getNumRows(); -// bool symmetric = A_coo->symmetric(); -// bool expanded = A_coo->expanded(); - -// index_type* nnz_counts = new index_type[n]; -// std::fill_n(nnz_counts, n, 0); -// index_type* coo_rows = A_coo->getRowData("cpu"); -// index_type* coo_cols = A_coo->getColData("cpu"); -// real_type* coo_vals = A_coo->getValues("cpu"); - -// index_type* diag_control = new index_type[n]; //for DEDUPLICATION of the diagonal -// std::fill_n(diag_control, n, 0); -// index_type nnz_unpacked_no_duplicates = 0; -// index_type nnz_no_duplicates = nnz; - - -// //maybe check if they exist? -// for (index_type i = 0; i < nnz; ++i) -// { -// nnz_counts[coo_rows[i]]++; -// nnz_unpacked++; -// nnz_unpacked_no_duplicates++; -// if ((coo_rows[i] != coo_cols[i])&& (symmetric) && (!expanded)) -// { -// nnz_counts[coo_cols[i]]++; -// nnz_unpacked++; -// nnz_unpacked_no_duplicates++; -// } -// if (coo_rows[i] == coo_cols[i]){ -// if (diag_control[coo_rows[i]] > 0) { -// //duplicate -// nnz_unpacked_no_duplicates--; -// nnz_no_duplicates--; -// } -// diag_control[coo_rows[i]]++; -// } -// } -// A_csr->setExpanded(true); -// A_csr->setNnzExpanded(nnz_unpacked_no_duplicates); -// index_type* csr_ia = new index_type[n+1]; -// std::fill_n(csr_ia, n + 1, 0); -// index_type* csr_ja = new index_type[nnz_unpacked]; -// real_type* csr_a = new real_type[nnz_unpacked]; -// index_type* nnz_shifts = new index_type[n]; -// std::fill_n(nnz_shifts, n , 0); - -// IndexValuePair* tmp = new IndexValuePair[nnz_unpacked]; - -// csr_ia[0] = 0; - -// for (index_type i = 1; i < n + 1; ++i){ -// csr_ia[i] = csr_ia[i - 1] + nnz_counts[i - 1] - (diag_control[i-1] - 1); -// } - -// int r, start; - - -// for (index_type i = 0; i < nnz; ++i){ -// //which row -// r = coo_rows[i]; -// start = csr_ia[r]; - -// if ((start + nnz_shifts[r]) > nnz_unpacked) { -// out::warning() << "index out of bounds (case 1) start: " << start << "nnz_shifts[" << r << "] = " << nnz_shifts[r] << std::endl; -// } -// if ((r == coo_cols[i]) && (diag_control[r] > 1)) {//diagonal, and there are duplicates -// bool already_there = false; -// for (index_type j = start; j < start + nnz_shifts[r]; ++j) -// { -// index_type c = tmp[j].getIdx(); -// if (c == r) { -// real_type val = tmp[j].getValue(); -// val += coo_vals[i]; -// tmp[j].setValue(val); -// already_there = true; -// out::warning() << " duplicate found, row " << c << " adding in place " << j << " current value: " << val << std::endl; -// } -// } -// if (!already_there){ // first time this duplicates appears - -// tmp[start + nnz_shifts[r]].setIdx(coo_cols[i]); -// tmp[start + nnz_shifts[r]].setValue(coo_vals[i]); - -// nnz_shifts[r]++; -// } -// } else {//not diagonal -// tmp[start + nnz_shifts[r]].setIdx(coo_cols[i]); -// tmp[start + nnz_shifts[r]].setValue(coo_vals[i]); -// nnz_shifts[r]++; - -// if ((coo_rows[i] != coo_cols[i]) && (symmetric == 1)) -// { -// r = coo_cols[i]; -// start = csr_ia[r]; - -// if ((start + nnz_shifts[r]) > nnz_unpacked) -// out::warning() << "index out of bounds (case 2) start: " << start << "nnz_shifts[" << r << "] = " << nnz_shifts[r] << std::endl; -// tmp[start + nnz_shifts[r]].setIdx(coo_rows[i]); -// tmp[start + nnz_shifts[r]].setValue(coo_vals[i]); -// nnz_shifts[r]++; -// } -// } -// } -// //now sort whatever is inside rows - -// for (int i = 0; i < n; ++i) -// { - -// //now sorting (and adding 1) -// int colStart = csr_ia[i]; -// int colEnd = csr_ia[i + 1]; -// int length = colEnd - colStart; -// std::sort(&tmp[colStart],&tmp[colStart] + length); -// } - -// for (index_type i = 0; i < nnz_unpacked; ++i) -// { -// csr_ja[i] = tmp[i].getIdx(); -// csr_a[i] = tmp[i].getValue(); -// } -// #if 0 -// for (int i = 0; isetNnz(nnz_no_duplicates); -// if (memspace == "cpu"){ -// A_csr->updateData(csr_ia, csr_ja, csr_a, "cpu", "cpu"); -// } else { -// if (memspace == "cuda"){ -// A_csr->updateData(csr_ia, csr_ja, csr_a, "cpu", "cuda"); -// } else { -// //display error -// } -// } -// delete [] nnz_counts; -// delete [] tmp; -// delete [] nnz_shifts; -// delete [] csr_ia; -// delete [] csr_ja; -// delete [] csr_a; -// delete [] diag_control; - -// return 0; -// } int MatrixHandlerCuda::matvec(matrix::Sparse* Ageneric, - vector_type* vec_x, - vector_type* vec_result, - const real_type* alpha, - const real_type* beta, - std::string matrixFormat) + vector_type* vec_x, + vector_type* vec_result, + const real_type* alpha, + const real_type* beta, + std::string matrixFormat) { using namespace constants; int error_sum = 0; @@ -209,11 +50,8 @@ namespace ReSolve { matrix::Csr* A = dynamic_cast(Ageneric); //result = alpha *A*x + beta * result cusparseStatus_t status; - // std::cout << "Matvec on NVIDIA GPU ...\n"; - LinAlgWorkspaceCUDA* workspaceCUDA = (LinAlgWorkspaceCUDA*) workspace_; + LinAlgWorkspaceCUDA* workspaceCUDA = workspace_; cusparseDnVecDescr_t vecx = workspaceCUDA->getVecX(); - //printf("is vec_x NULL? %d\n", vec_x->getData("cuda") == nullptr); - //printf("is vec_result NULL? %d\n", vec_result->getData("cuda") == nullptr); cusparseCreateDnVec(&vecx, A->getNumRows(), vec_x->getData("cuda"), CUDA_R_64F); @@ -224,7 +62,7 @@ namespace ReSolve { void* buffer_spmv = workspaceCUDA->getSpmvBuffer(); cusparseHandle_t handle_cusparse = workspaceCUDA->getCusparseHandle(); - if (values_changed_) { + if (values_changed_) { status = cusparseCreateCsr(&matA, A->getNumRows(), A->getNumColumns(), diff --git a/resolve/matrix/MatrixHandlerCuda.hpp b/resolve/matrix/MatrixHandlerCuda.hpp index f2250008..c77eb795 100644 --- a/resolve/matrix/MatrixHandlerCuda.hpp +++ b/resolve/matrix/MatrixHandlerCuda.hpp @@ -38,6 +38,7 @@ namespace ReSolve { public: MatrixHandlerCuda(); MatrixHandlerCuda(LinAlgWorkspace* workspace); + MatrixHandlerCuda(LinAlgWorkspaceCUDA* workspace); virtual ~MatrixHandlerCuda(); int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr); //memspace decides on what is returned (cpu or cuda pointer) @@ -54,7 +55,7 @@ namespace ReSolve { void setValuesChanged(bool isValuesChanged); private: - LinAlgWorkspace* workspace_{nullptr}; + LinAlgWorkspaceCUDA* workspace_{nullptr}; // bool new_matrix_{true}; ///< if the structure changed, you need a new handler. bool values_changed_{true}; ///< needed for matvec diff --git a/tests/functionality/testKLU.cpp b/tests/functionality/testKLU.cpp index 70ee83b0..66790c83 100644 --- a/tests/functionality/testKLU.cpp +++ b/tests/functionality/testKLU.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include @@ -9,7 +10,7 @@ #include #include #include -#include +#include //author: KS //functionality test to check whether KLU works correctly. using namespace ReSolve::constants; @@ -26,9 +27,9 @@ int main(int argc, char *argv[]) int error_sum = 0; int status = 0; - ReSolve::LinAlgWorkspace* workspace = ReSolve::createLinAlgWorkspace("cpu"); - ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace); - ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace); + ReSolve::LinAlgWorkspace* workspace = new ReSolve::LinAlgWorkspace(); + ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace); + ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace); ReSolve::LinSolverDirectKLU* KLU = new ReSolve::LinSolverDirectKLU; KLU->setupParameters(1, 0.1, false); diff --git a/tests/functionality/testKLU_GLU.cpp b/tests/functionality/testKLU_GLU.cpp index 1c66fda1..1384f405 100644 --- a/tests/functionality/testKLU_GLU.cpp +++ b/tests/functionality/testKLU_GLU.cpp @@ -30,7 +30,7 @@ int main(int argc, char *argv[]) int error_sum = 0; int status = 0; - ReSolve::LinAlgWorkspaceCUDA* workspace_CUDA = new ReSolve::LinAlgWorkspaceCUDA; + ReSolve::LinAlgWorkspaceCUDA* workspace_CUDA = new ReSolve::LinAlgWorkspaceCUDA(); workspace_CUDA->initializeHandles(); ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace_CUDA); ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace_CUDA); diff --git a/tests/functionality/testKLU_Rf.cpp b/tests/functionality/testKLU_Rf.cpp index 656aa724..b7e725a7 100644 --- a/tests/functionality/testKLU_Rf.cpp +++ b/tests/functionality/testKLU_Rf.cpp @@ -30,7 +30,7 @@ int main(int argc, char *argv[]) int error_sum = 0; int status = 0; - ReSolve::LinAlgWorkspaceCUDA* workspace_CUDA = new ReSolve::LinAlgWorkspaceCUDA; + ReSolve::LinAlgWorkspaceCUDA* workspace_CUDA = new ReSolve::LinAlgWorkspaceCUDA(); workspace_CUDA->initializeHandles(); ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace_CUDA); ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace_CUDA); diff --git a/tests/functionality/testKLU_Rf_FGMRES.cpp b/tests/functionality/testKLU_Rf_FGMRES.cpp index 14bb0f33..3e4b9d61 100644 --- a/tests/functionality/testKLU_Rf_FGMRES.cpp +++ b/tests/functionality/testKLU_Rf_FGMRES.cpp @@ -31,7 +31,7 @@ int main(int argc, char *argv[]) int error_sum = 0; int status = 0; - ReSolve::LinAlgWorkspaceCUDA* workspace_CUDA = new ReSolve::LinAlgWorkspaceCUDA; + ReSolve::LinAlgWorkspaceCUDA* workspace_CUDA = new ReSolve::LinAlgWorkspaceCUDA(); workspace_CUDA->initializeHandles(); ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace_CUDA); ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace_CUDA); @@ -258,5 +258,18 @@ int main(int argc, char *argv[]) } else { std::cout<<"Test 4 (KLU with cuSolverRf refactorization + IR) FAILED, error sum: "<setValuesChanged(true, memspace_); + handler->matvec(A, &x, &y, &alpha, &beta, "csr", memspace_); status *= verifyAnswer(y, 4.0, memspace_); - delete workspace; + delete handler; delete A; return status.report(__func__); @@ -71,6 +70,21 @@ class MatrixHandlerTests : TestBase private: std::string memspace_{"cpu"}; + ReSolve::MatrixHandler* createMatrixHandler() + { + if (memspace_ == "cpu") { + LinAlgWorkspace* workpsace = new LinAlgWorkspace(); + return new MatrixHandler(workpsace); + } else if (memspace_ == "cuda") { + LinAlgWorkspaceCUDA* workspace = new LinAlgWorkspaceCUDA(); + workspace->initializeHandles(); + return new MatrixHandler(workspace); + } else { + std::cout << "Invalid memory space " << memspace_ << "\n"; + } + return nullptr; + } + bool verifyAnswer(vector::Vector& x, real_type answer, std::string memspace) { bool status = true;