From 949680f3b320f8cb4541d30148f7b3c7f670f7c2 Mon Sep 17 00:00:00 2001 From: pelesh Date: Tue, 31 Oct 2023 20:18:27 -0400 Subject: [PATCH] Use enums instead of strings to denote memory space (#42) * Use enums for memory space ID in matrix classes. * Use enums for vector class memory space IDs. --- examples/r_KLU_GLU.cpp | 12 +- examples/r_KLU_GLU_matrix_values_update.cpp | 14 +- examples/r_KLU_KLU.cpp | 8 +- examples/r_KLU_KLU_standalone.cpp | 6 +- examples/r_KLU_rf.cpp | 8 +- examples/r_KLU_rf_FGMRES.cpp | 18 +- .../r_KLU_rf_FGMRES_reuse_factorization.cpp | 24 +- resolve/GramSchmidt.cpp | 72 +++--- resolve/LinSolverDirectCuSolverGLU.cpp | 44 ++-- resolve/LinSolverDirectCuSolverRf.cpp | 32 +-- resolve/LinSolverDirectKLU.cpp | 52 ++-- resolve/LinSolverIterativeFGMRES.cpp | 18 +- resolve/MemoryUtils.hpp | 10 + resolve/matrix/Coo.cpp | 159 ++++++------ resolve/matrix/Coo.hpp | 14 +- resolve/matrix/Csc.cpp | 159 ++++++------ resolve/matrix/Csc.hpp | 14 +- resolve/matrix/Csr.cpp | 161 ++++++------ resolve/matrix/Csr.hpp | 14 +- resolve/matrix/MatrixHandler.cpp | 12 +- resolve/matrix/MatrixHandlerCpu.cpp | 24 +- resolve/matrix/MatrixHandlerCuda.cpp | 38 +-- resolve/matrix/MatrixHandlerHip.cpp | 36 +-- resolve/matrix/Sparse.cpp | 109 ++++---- resolve/matrix/Sparse.hpp | 24 +- resolve/matrix/io.cpp | 10 +- resolve/vector/Vector.cpp | 239 +++++++++--------- resolve/vector/Vector.hpp | 28 +- resolve/vector/VectorHandlerCpu.cpp | 10 +- resolve/vector/VectorHandlerCuda.cpp | 36 +-- resolve/vector/VectorHandlerHip.cpp | 36 +-- tests/functionality/testKLU.cpp | 24 +- tests/functionality/testKLU_GLU.cpp | 28 +- tests/functionality/testKLU_Rf.cpp | 22 +- tests/functionality/testKLU_Rf_FGMRES.cpp | 28 +- tests/unit/matrix/MatrixHandlerTests.hpp | 35 +-- tests/unit/matrix/MatrixIoTests.hpp | 8 +- tests/unit/vector/GramSchmidtTests.hpp | 32 ++- tests/unit/vector/VectorHandlerTests.hpp | 107 +++++--- 39 files changed, 898 insertions(+), 827 deletions(-) diff --git a/examples/r_KLU_GLU.cpp b/examples/r_KLU_GLU.cpp index e7b19f4e..9f271254 100644 --- a/examples/r_KLU_GLU.cpp +++ b/examples/r_KLU_GLU.cpp @@ -93,8 +93,8 @@ int main(int argc, char *argv[]) x = new real_type[A->getNumRows()]; vec_rhs = new vector_type(A->getNumRows()); vec_x = new vector_type(A->getNumRows()); - vec_x->allocate("cpu");//for KLU - vec_x->allocate("cuda"); + vec_x->allocate(ReSolve::memory::HOST);//for KLU + vec_x->allocate(ReSolve::memory::DEVICE); vec_r = new vector_type(A->getNumRows()); } else { ReSolve::io::readAndUpdateMatrix(mat_file, A_coo); @@ -107,11 +107,11 @@ int main(int argc, char *argv[]) //Now convert to CSR. if (i < 1) { matrix_handler->coo2csr(A_coo, A, "cpu"); - vec_rhs->update(rhs, "cpu", "cpu"); - vec_rhs->setDataUpdated("cpu"); + vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); + vec_rhs->setDataUpdated(ReSolve::memory::HOST); } else { matrix_handler->coo2csr(A_coo, A, "cuda"); - vec_rhs->update(rhs, "cpu", "cuda"); + vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); } std::cout<<"COO to CSR completed. Expanded NNZ: "<< A->getNnzExpanded()<solve(vec_rhs, vec_x); std::cout<<"CUSOLVER GLU solve status: "<update(rhs, "cpu", "cuda"); + vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); matrix_handler->setValuesChanged(true, "cuda"); diff --git a/examples/r_KLU_GLU_matrix_values_update.cpp b/examples/r_KLU_GLU_matrix_values_update.cpp index ee99f0a0..ded685ac 100644 --- a/examples/r_KLU_GLU_matrix_values_update.cpp +++ b/examples/r_KLU_GLU_matrix_values_update.cpp @@ -96,8 +96,8 @@ int main(int argc, char *argv[]) x = new real_type[A->getNumRows()]; vec_rhs = new vector_type(A->getNumRows()); vec_x = new vector_type(A->getNumRows()); - vec_x->allocate("cpu");//for KLU - vec_x->allocate("cuda"); + vec_x->allocate(ReSolve::memory::HOST);//for KLU + vec_x->allocate(ReSolve::memory::DEVICE); vec_r = new vector_type(A->getNumRows()); } else { if (i==1) { @@ -106,7 +106,7 @@ int main(int argc, char *argv[]) ReSolve::io::readAndUpdateMatrix(mat_file, A_exp_coo); } std::cout<<"Updating values of A_coo!"<updateValues(A_exp_coo->getValues("cpu"), "cpu", "cpu"); + A_coo->updateValues(A_exp_coo->getValues(ReSolve::memory::HOST), ReSolve::memory::HOST, ReSolve::memory::HOST); //ReSolve::io::readAndUpdateMatrix(mat_file, A_coo); ReSolve::io::readAndUpdateRhs(rhs_file, &rhs); } @@ -117,11 +117,11 @@ int main(int argc, char *argv[]) //Now convert to CSR. if (i < 1) { matrix_handler->coo2csr(A_coo, A, "cpu"); - vec_rhs->update(rhs, "cpu", "cpu"); - vec_rhs->setDataUpdated("cpu"); + vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); + vec_rhs->setDataUpdated(ReSolve::memory::HOST); } else { matrix_handler->coo2csr(A_coo, A, "cuda"); - vec_rhs->update(rhs, "cpu", "cuda"); + vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); } std::cout<<"COO to CSR completed. Expanded NNZ: "<< A->getNnzExpanded()<solve(vec_rhs, vec_x); std::cout<<"CUSOLVER GLU solve status: "<update(rhs, "cpu", "cuda"); + vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); matrix_handler->setValuesChanged(true, "cuda"); diff --git a/examples/r_KLU_KLU.cpp b/examples/r_KLU_KLU.cpp index b9328e8a..901e36a5 100644 --- a/examples/r_KLU_KLU.cpp +++ b/examples/r_KLU_KLU.cpp @@ -108,11 +108,11 @@ int main(int argc, char *argv[]) //Now convert to CSR. if (i < 2) { matrix_handler->coo2csr(A_coo, A, "cpu"); - vec_rhs->update(rhs, "cpu", "cpu"); - vec_rhs->setDataUpdated("cpu"); + vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); + vec_rhs->setDataUpdated(ReSolve::memory::HOST); } else { matrix_handler->coo2csr(A_coo, A, "cpu"); - vec_rhs->update(rhs, "cpu", "cpu"); + vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); } std::cout<<"COO to CSR completed. Expanded NNZ: "<< A->getNnzExpanded()<solve(vec_rhs, vec_x); std::cout<<"KLU solve status: "<update(rhs, "cpu", "cpu"); + vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); matrix_handler->setValuesChanged(true, "cpu"); diff --git a/examples/r_KLU_KLU_standalone.cpp b/examples/r_KLU_KLU_standalone.cpp index 0b8f6114..3dfaf716 100644 --- a/examples/r_KLU_KLU_standalone.cpp +++ b/examples/r_KLU_KLU_standalone.cpp @@ -83,8 +83,8 @@ int main(int argc, char *argv[]) //Now convert to CSR. matrix_handler->coo2csr(A_coo, A, "cpu"); - vec_rhs->update(rhs, "cpu", "cpu"); - vec_rhs->setDataUpdated("cpu"); + vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); + vec_rhs->setDataUpdated(ReSolve::memory::HOST); std::cout << "COO to CSR completed. Expanded NNZ: " << A->getNnzExpanded() << std::endl; //Now call direct solver KLU->setupParameters(1, 0.1, false); @@ -96,7 +96,7 @@ int main(int argc, char *argv[]) std::cout << "KLU factorization status: " << status << std::endl; status = KLU->solve(vec_rhs, vec_x); std::cout << "KLU solve status: " << status << std::endl; - vec_r->update(rhs, "cpu", "cpu"); + vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); matrix_handler->setValuesChanged(true, "cpu"); diff --git a/examples/r_KLU_rf.cpp b/examples/r_KLU_rf.cpp index 7369af18..d9310773 100644 --- a/examples/r_KLU_rf.cpp +++ b/examples/r_KLU_rf.cpp @@ -107,11 +107,11 @@ int main(int argc, char *argv[] ) //Now convert to CSR. if (i < 2) { matrix_handler->coo2csr(A_coo, A, "cpu"); - vec_rhs->update(rhs, "cpu", "cpu"); - vec_rhs->setDataUpdated("cpu"); + vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); + vec_rhs->setDataUpdated(ReSolve::memory::HOST); } else { matrix_handler->coo2csr(A_coo, A, "cuda"); - vec_rhs->update(rhs, "cpu", "cuda"); + vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); } std::cout<<"COO to CSR completed. Expanded NNZ: "<< A->getNnzExpanded()<solve(vec_rhs, vec_x); //std::cout<<"KLU solve status: "<update(rhs, "cpu", "cuda"); + vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); matrix_handler->setValuesChanged(true, "cuda"); diff --git a/examples/r_KLU_rf_FGMRES.cpp b/examples/r_KLU_rf_FGMRES.cpp index 07839cbb..6df5419a 100644 --- a/examples/r_KLU_rf_FGMRES.cpp +++ b/examples/r_KLU_rf_FGMRES.cpp @@ -96,8 +96,8 @@ int main(int argc, char *argv[]) x = new real_type[A->getNumRows()]; vec_rhs = new vector_type(A->getNumRows()); vec_x = new vector_type(A->getNumRows()); - vec_x->allocate("cpu");//for KLU - vec_x->allocate("cuda"); + vec_x->allocate(ReSolve::memory::HOST);//for KLU + vec_x->allocate(ReSolve::memory::DEVICE); vec_r = new vector_type(A->getNumRows()); } else { @@ -111,11 +111,11 @@ int main(int argc, char *argv[]) //Now convert to CSR. if (i < 2) { matrix_handler->coo2csr(A_coo, A, "cpu"); - vec_rhs->update(rhs, "cpu", "cpu"); - vec_rhs->setDataUpdated("cpu"); + vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); + vec_rhs->setDataUpdated(ReSolve::memory::HOST); } else { matrix_handler->coo2csr(A_coo,A, "cuda"); - vec_rhs->update(rhs, "cpu", "cuda"); + vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); } std::cout<<"COO to CSR completed. Expanded NNZ: "<< A->getNnzExpanded()<solve(vec_rhs, vec_x); std::cout<<"KLU solve status: "<update(rhs, "cpu", "cuda"); + vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); norm_b = vector_handler->dot(vec_r, vec_r, "cuda"); norm_b = sqrt(norm_b); matrix_handler->setValuesChanged(true, "cuda"); @@ -162,8 +162,8 @@ int main(int argc, char *argv[]) status = Rf->solve(vec_rhs, vec_x); std::cout<<"CUSOLVER RF solve status: "<update(rhs, "cpu", "cuda"); - norm_b = vector_handler->dot(vec_r, vec_r, "cuda"); + vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); + norm_b = vector_handler->dot(vec_r, vec_r, "cuda"); norm_b = sqrt(norm_b); //matrix_handler->setValuesChanged(true, "cuda"); @@ -176,7 +176,7 @@ int main(int argc, char *argv[]) << std::scientific << std::setprecision(16) << sqrt(vector_handler->dot(vec_r, vec_r, "cuda"))/norm_b << "\n"; - vec_rhs->update(rhs, "cpu", "cuda"); + vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); FGMRES->solve(vec_rhs, vec_x); std::cout << "FGMRES: init nrm: " diff --git a/examples/r_KLU_rf_FGMRES_reuse_factorization.cpp b/examples/r_KLU_rf_FGMRES_reuse_factorization.cpp index 56ab43fe..5ead8186 100644 --- a/examples/r_KLU_rf_FGMRES_reuse_factorization.cpp +++ b/examples/r_KLU_rf_FGMRES_reuse_factorization.cpp @@ -98,8 +98,8 @@ int main(int argc, char *argv[]) x = new real_type[A->getNumRows()]; vec_rhs = new vector_type(A->getNumRows()); vec_x = new vector_type(A->getNumRows()); - vec_x->allocate("cpu");//for KLU - vec_x->allocate("cuda"); + vec_x->allocate(ReSolve::memory::HOST);//for KLU + vec_x->allocate(ReSolve::memory::DEVICE); vec_r = new vector_type(A->getNumRows()); } else { @@ -113,11 +113,11 @@ int main(int argc, char *argv[]) //Now convert to CSR. if (i < 2) { matrix_handler->coo2csr(A_coo,A, "cpu"); - vec_rhs->update(rhs, "cpu", "cpu"); - vec_rhs->setDataUpdated("cpu"); + vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); + vec_rhs->setDataUpdated(ReSolve::memory::HOST); } else { matrix_handler->coo2csr(A_coo, A, "cuda"); - vec_rhs->update(rhs, "cpu", "cuda"); + vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); } std::cout<<"COO to CSR completed. Expanded NNZ: "<< A->getNnzExpanded()<solve(vec_rhs, vec_x); std::cout<<"KLU solve status: "<update(rhs, "cpu", "cuda"); + vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); norm_b = vector_handler->dot(vec_r, vec_r, "cuda"); norm_b = sqrt(norm_b); matrix_handler->setValuesChanged(true, "cuda"); @@ -171,20 +171,20 @@ int main(int argc, char *argv[]) status = Rf->refactorize(); std::cout << "CUSOLVER RF, using REAL refactorization, refactorization status: " << status << std::endl; - vec_rhs->update(rhs, "cpu", "cuda"); + vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); status = Rf->solve(vec_rhs, vec_x); FGMRES->setupPreconditioner("CuSolverRf", Rf); } - //if (i%2!=0) vec_x->setToZero("cuda"); + //if (i%2!=0) vec_x->setToZero(ReSolve::memory::DEVICE); real_type norm_x = vector_handler->dot(vec_x, vec_x, "cuda"); std::cout << "Norm of x (before solve): " << std::scientific << std::setprecision(16) << sqrt(norm_x) << "\n"; std::cout<<"CUSOLVER RF solve status: "<update(rhs, "cpu", "cuda"); - vec_r->update(rhs, "cpu", "cuda"); - norm_b = vector_handler->dot(vec_r, vec_r, "cuda"); + vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); + vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); + norm_b = vector_handler->dot(vec_r, vec_r, "cuda"); norm_b = sqrt(norm_b); matrix_handler->setValuesChanged(true, "cuda"); @@ -199,7 +199,7 @@ int main(int argc, char *argv[]) << std::scientific << std::setprecision(16) << norm_b << "\n"; - vec_rhs->update(rhs, "cpu", "cuda"); + vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); FGMRES->solve(vec_rhs, vec_x); std::cout << "FGMRES: init nrm: " diff --git a/resolve/GramSchmidt.cpp b/resolve/GramSchmidt.cpp index b6a27b04..fb86fc8d 100644 --- a/resolve/GramSchmidt.cpp +++ b/resolve/GramSchmidt.cpp @@ -36,10 +36,10 @@ namespace ReSolve delete h_L_; delete h_rv_; - vec_rv_->setData(nullptr, "cuda"); - vec_rv_->setData(nullptr, "cpu"); - vec_Hcolumn_->setData(nullptr, "cuda"); - vec_Hcolumn_->setData(nullptr, "cpu"); + vec_rv_->setData(nullptr, memory::DEVICE); + vec_rv_->setData(nullptr, memory::HOST); + vec_Hcolumn_->setData(nullptr, memory::DEVICE); + vec_Hcolumn_->setData(nullptr, memory::HOST); delete [] vec_rv_; delete [] vec_Hcolumn_;; @@ -47,18 +47,18 @@ namespace ReSolve if(variant_ == cgs2) { delete h_aux_; - vec_Hcolumn_->setData(nullptr, "cuda"); - // vec_Hcolumn_->setData(nullptr, "cpu"); + vec_Hcolumn_->setData(nullptr, memory::DEVICE); + // vec_Hcolumn_->setData(nullptr, memory::HOST); delete [] vec_Hcolumn_; } if(variant_ == mgs_pm) { delete h_aux_; } - vec_v_->setData(nullptr, "cuda"); - vec_v_->setData(nullptr, "cpu"); - vec_w_->setData(nullptr, "cuda"); - vec_w_->setData(nullptr, "cpu"); + vec_v_->setData(nullptr, memory::DEVICE); + vec_v_->setData(nullptr, memory::HOST); + vec_w_->setData(nullptr, memory::DEVICE); + vec_w_->setData(nullptr, memory::HOST); delete [] vec_w_; delete [] vec_v_; @@ -103,15 +103,15 @@ namespace ReSolve h_rv_ = new real_type[num_vecs_ + 1]; vec_rv_ = new vector_type(num_vecs_ + 1, 2); - vec_rv_->allocate("cuda"); + vec_rv_->allocate(memory::DEVICE); vec_Hcolumn_ = new vector_type(num_vecs_ + 1); - vec_Hcolumn_->allocate("cuda"); + vec_Hcolumn_->allocate(memory::DEVICE); } if(variant_ == cgs2) { h_aux_ = new real_type[num_vecs_ + 1]; vec_Hcolumn_ = new vector_type(num_vecs_ + 1); - vec_Hcolumn_->allocate("cuda"); + vec_Hcolumn_->allocate(memory::DEVICE); } if(variant_ == mgs_pm) { @@ -135,10 +135,10 @@ namespace ReSolve switch (variant_){ case mgs: - vec_w_->setData(V->getVectorData(i + 1, "cuda"), "cuda"); + vec_w_->setData(V->getVectorData(i + 1, memory::DEVICE), memory::DEVICE); for(int j = 0; j <= i; ++j) { t = 0.0; - vec_v_->setData( V->getVectorData(j, "cuda"), "cuda"); + vec_v_->setData( V->getVectorData(j, memory::DEVICE), memory::DEVICE); t = vector_handler_->dot(vec_v_, vec_w_, "cuda"); H[ idxmap(i, j, num_vecs_ + 1) ] = t; t *= -1.0; @@ -159,26 +159,26 @@ namespace ReSolve break; case cgs2: - vec_v_->setData(V->getVectorData(i + 1, "cuda"), "cuda"); - vector_handler_->gemv("T", n, i + 1, &ONE, &ZERO, V, vec_v_, vec_Hcolumn_,"cuda"); + vec_v_->setData(V->getVectorData(i + 1, memory::DEVICE), memory::DEVICE); + vector_handler_->gemv("T", n, i + 1, &ONE, &ZERO, V, vec_v_, vec_Hcolumn_, "cuda"); // V(:,i+1) = V(:, i+1) - V(:,1:i)*Hcol vector_handler_->gemv("N", n, i + 1, &ONE, &MINUSONE, V, vec_Hcolumn_, vec_v_, "cuda" ); // copy H_col to aux, we will need it later - vec_Hcolumn_->setDataUpdated("cuda"); + vec_Hcolumn_->setDataUpdated(memory::DEVICE); vec_Hcolumn_->setCurrentSize(i + 1); - vec_Hcolumn_->deepCopyVectorData(h_aux_, 0, "cpu"); + vec_Hcolumn_->deepCopyVectorData(h_aux_, 0, memory::HOST); //Hcol = V(:,1:i)^T*V(:,i+1); - vector_handler_->gemv("T", n, i + 1, &ONE, &ZERO, V, vec_v_, vec_Hcolumn_,"cuda"); + vector_handler_->gemv("T", n, i + 1, &ONE, &ZERO, V, vec_v_, vec_Hcolumn_, "cuda"); // V(:,i+1) = V(:, i+1) - V(:,1:i)*Hcol vector_handler_->gemv("N", n, i + 1, &ONE, &MINUSONE, V, vec_Hcolumn_, vec_v_, "cuda" ); // copy H_col to H - vec_Hcolumn_->setDataUpdated("cuda"); - vec_Hcolumn_->deepCopyVectorData(&H[ idxmap(i, 0, num_vecs_ + 1)], 0, "cpu"); + vec_Hcolumn_->setDataUpdated(memory::DEVICE); + vec_Hcolumn_->deepCopyVectorData(&H[ idxmap(i, 0, num_vecs_ + 1)], 0, memory::HOST); // add both pieces together (unstable otherwise, careful here!!) t = 0.0; @@ -201,16 +201,16 @@ namespace ReSolve break; case mgs_two_synch: // V[1:i]^T[V[i] w] - vec_v_->setData(V->getVectorData(i, "cuda"), "cuda"); - vec_w_->setData(V->getVectorData(i + 1, "cuda"), "cuda"); + vec_v_->setData(V->getVectorData(i, memory::DEVICE), memory::DEVICE); + vec_w_->setData(V->getVectorData(i + 1, memory::DEVICE), memory::DEVICE); vec_rv_->setCurrentSize(i + 1); vector_handler_->massDot2Vec(n, V, i, vec_v_, vec_rv_, "cuda"); - vec_rv_->setDataUpdated("cuda"); - vec_rv_->copyData("cuda", "cpu"); + vec_rv_->setDataUpdated(memory::DEVICE); + vec_rv_->copyData(memory::DEVICE, memory::HOST); - vec_rv_->deepCopyVectorData(&h_L_[idxmap(i, 0, num_vecs_ + 1)], 0, "cpu"); - h_rv_ = vec_rv_->getVectorData(1, "cpu"); + vec_rv_->deepCopyVectorData(&h_L_[idxmap(i, 0, num_vecs_ + 1)], 0, memory::HOST); + h_rv_ = vec_rv_->getVectorData(1, memory::HOST); for(int j=0; j<=i; ++j) { H[ idxmap(i, j, num_vecs_ + 1) ] = 0.0; @@ -225,7 +225,7 @@ namespace ReSolve H[ idxmap(i, j, num_vecs_ + 1) ] -= s; } // for j vec_Hcolumn_->setCurrentSize(i + 1); - vec_Hcolumn_->update(&H[ idxmap(i, 0, num_vecs_ + 1)], "cpu", "cuda"); + vec_Hcolumn_->update(&H[ idxmap(i, 0, num_vecs_ + 1)], memory::HOST, memory::DEVICE); vector_handler_->massAxpy(n, vec_Hcolumn_, i, V, vec_w_, "cuda"); // normalize (second synch) @@ -243,16 +243,16 @@ namespace ReSolve return 0; break; case mgs_pm: - vec_v_->setData(V->getVectorData(i, "cuda"), "cuda"); - vec_w_->setData(V->getVectorData(i + 1, "cuda"), "cuda"); + vec_v_->setData(V->getVectorData(i, memory::DEVICE), memory::DEVICE); + vec_w_->setData(V->getVectorData(i + 1, memory::DEVICE), memory::DEVICE); vec_rv_->setCurrentSize(i + 1); vector_handler_->massDot2Vec(n, V, i, vec_v_, vec_rv_, "cuda"); - vec_rv_->setDataUpdated("cuda"); - vec_rv_->copyData("cuda", "cpu"); + vec_rv_->setDataUpdated(memory::DEVICE); + vec_rv_->copyData(memory::DEVICE, memory::HOST); - vec_rv_->deepCopyVectorData(&h_L_[idxmap(i, 0, num_vecs_ + 1)], 0, "cpu"); - h_rv_ = vec_rv_->getVectorData(1, "cpu"); + vec_rv_->deepCopyVectorData(&h_L_[idxmap(i, 0, num_vecs_ + 1)], 0, memory::HOST); + h_rv_ = vec_rv_->getVectorData(1, memory::HOST); for(int j = 0; j <= i; ++j) { H[ idxmap(i, j, num_vecs_ + 1) ] = 0.0; @@ -295,7 +295,7 @@ namespace ReSolve } vec_Hcolumn_->setCurrentSize(i + 1); - vec_Hcolumn_->update(&H[ idxmap(i, 0, num_vecs_ + 1)], "cpu", "cuda"); + vec_Hcolumn_->update(&H[ idxmap(i, 0, num_vecs_ + 1)], memory::HOST, memory::DEVICE); vector_handler_->massAxpy(n, vec_Hcolumn_, i, V, vec_w_, "cuda"); // normalize (second synch) diff --git a/resolve/LinSolverDirectCuSolverGLU.cpp b/resolve/LinSolverDirectCuSolverGLU.cpp index 75039ff4..0350efea 100644 --- a/resolve/LinSolverDirectCuSolverGLU.cpp +++ b/resolve/LinSolverDirectCuSolverGLU.cpp @@ -50,14 +50,14 @@ namespace ReSolve n, nnz, descr_A_, - A_->getRowData("cpu"), //kRowPtr_, - A_->getColData("cpu"), //jCol_, + A_->getRowData(memory::HOST), //kRowPtr_, + A_->getColData(memory::HOST), //jCol_, P, /* base-0 */ Q, /* base-0 */ M_->getNnz(), /* nnzM */ descr_M_, - M_->getRowData("cpu"), - M_->getColData("cpu"), + M_->getRowData(memory::HOST), + M_->getColData(memory::HOST), info_M_); error_sum += status_cusolver_; //NOW the buffer @@ -77,9 +77,9 @@ namespace ReSolve /* A is original matrix */ nnz, descr_A_, - A_->getValues("cuda"), //da_, - A_->getRowData("cuda"), //kRowPtr_, - A_->getColData("cuda"), //jCol_, + A_->getValues( memory::DEVICE), //da_, + A_->getRowData(memory::DEVICE), //kRowPtr_, + A_->getColData(memory::DEVICE), //jCol_, info_M_); error_sum += status_cusolver_; @@ -93,15 +93,15 @@ namespace ReSolve { // L and U need to be in CSC format index_type n = L->getNumRows(); - index_type* Lp = L->getColData("cpu"); - index_type* Li = L->getRowData("cpu"); - index_type* Up = U->getColData("cpu"); - index_type* Ui = U->getRowData("cpu"); + index_type* Lp = L->getColData(memory::HOST); + index_type* Li = L->getRowData(memory::HOST); + index_type* Up = U->getColData(memory::HOST); + index_type* Ui = U->getRowData(memory::HOST); index_type nnzM = ( L->getNnz() + U->getNnz() - n ); M_ = new matrix::Csr(n, n, nnzM); - M_->allocateMatrixData("cpu"); - index_type* mia = M_->getRowData("cpu"); - index_type* mja = M_->getColData("cpu"); + M_->allocateMatrixData(memory::HOST); + index_type* mia = M_->getRowData(memory::HOST); + index_type* mja = M_->getColData(memory::HOST); index_type row; for(index_type i = 0; i < n; ++i) { // go through EACH COLUMN OF L first @@ -153,9 +153,9 @@ namespace ReSolve /* A is original matrix */ A_->getNnzExpanded(), descr_A_, - A_->getValues("cuda"), //da_, - A_->getRowData("cuda"), //kRowPtr_, - A_->getColData("cuda"), //jCol_, + A_->getValues( memory::DEVICE), //da_, + A_->getRowData(memory::DEVICE), //kRowPtr_, + A_->getColData(memory::DEVICE), //jCol_, info_M_); error_sum += status_cusolver_; @@ -173,11 +173,11 @@ namespace ReSolve /* A is original matrix */ A_->getNnz(), descr_A_, - A_->getValues("cuda"), //da_, - A_->getRowData("cuda"), //kRowPtr_, - A_->getColData("cuda"), //jCol_, - rhs->getData("cuda"),/* right hand side */ - x->getData("cuda"),/* left hand side */ + A_->getValues( memory::DEVICE), //da_, + A_->getRowData(memory::DEVICE), //kRowPtr_, + A_->getColData(memory::DEVICE), //jCol_, + rhs->getData(memory::DEVICE),/* right hand side */ + x->getData(memory::DEVICE),/* left hand side */ &ite_refine_succ_, &r_nrminf_, info_M_, diff --git a/resolve/LinSolverDirectCuSolverRf.cpp b/resolve/LinSolverDirectCuSolverRf.cpp index d51218cc..37a3ffda 100644 --- a/resolve/LinSolverDirectCuSolverRf.cpp +++ b/resolve/LinSolverDirectCuSolverRf.cpp @@ -35,17 +35,17 @@ namespace ReSolve error_sum += status_cusolverrf_; status_cusolverrf_ = cusolverRfSetupDevice(n, A_->getNnzExpanded(), - A_->getRowData("cuda"), //dia_, - A_->getColData("cuda"), //dja_, - A_->getValues("cuda"), //da_, + A_->getRowData(memory::DEVICE), //dia_, + A_->getColData(memory::DEVICE), //dja_, + A_->getValues( memory::DEVICE), //da_, L->getNnz(), - L->getRowData("cuda"), - L->getColData("cuda"), - L->getValues("cuda"), + L->getRowData(memory::DEVICE), + L->getColData(memory::DEVICE), + L->getValues( memory::DEVICE), U->getNnz(), - U->getRowData("cuda"), - U->getColData("cuda"), - U->getValues("cuda"), + U->getRowData(memory::DEVICE), + U->getColData(memory::DEVICE), + U->getValues( memory::DEVICE), d_P_, d_Q_, handle_cusolverrf_); @@ -76,9 +76,9 @@ namespace ReSolve int error_sum = 0; status_cusolverrf_ = cusolverRfResetValues(A_->getNumRows(), A_->getNnzExpanded(), - A_->getRowData("cuda"), //dia_, - A_->getColData("cuda"), //dja_, - A_->getValues("cuda"), //da_, + A_->getRowData(memory::DEVICE), //dia_, + A_->getColData(memory::DEVICE), //dja_, + A_->getValues( memory::DEVICE), //da_, d_P_, d_Q_, handle_cusolverrf_); @@ -100,22 +100,22 @@ namespace ReSolve 1, d_T_, A_->getNumRows(), - rhs->getData("cuda"), + rhs->getData(memory::DEVICE), A_->getNumRows()); return status_cusolverrf_; } int LinSolverDirectCuSolverRf::solve(vector_type* rhs, vector_type* x) { - x->update(rhs->getData("cuda"), "cuda", "cuda"); - x->setDataUpdated("cuda"); + x->update(rhs->getData(memory::DEVICE), memory::DEVICE, memory::DEVICE); + x->setDataUpdated(memory::DEVICE); status_cusolverrf_ = cusolverRfSolve(handle_cusolverrf_, d_P_, d_Q_, 1, d_T_, A_->getNumRows(), - x->getData("cuda"), + x->getData(memory::DEVICE), A_->getNumRows()); return status_cusolverrf_; } diff --git a/resolve/LinSolverDirectKLU.cpp b/resolve/LinSolverDirectKLU.cpp index 43b612b3..6af27d10 100644 --- a/resolve/LinSolverDirectKLU.cpp +++ b/resolve/LinSolverDirectKLU.cpp @@ -35,7 +35,7 @@ namespace ReSolve int LinSolverDirectKLU::analyze() { - Symbolic_ = klu_analyze(A_->getNumRows(), A_->getRowData("cpu"), A_->getColData("cpu"), &Common_) ; + Symbolic_ = klu_analyze(A_->getNumRows(), A_->getRowData(memory::HOST), A_->getColData(memory::HOST), &Common_) ; if (Symbolic_ == nullptr){ printf("Symbolic_ factorization crashed withCommon_.status = %d \n", Common_.status); @@ -46,7 +46,7 @@ namespace ReSolve int LinSolverDirectKLU::factorize() { - Numeric_ = klu_factor(A_->getRowData("cpu"), A_->getColData("cpu"),A_->getValues("cpu"), Symbolic_, &Common_); + Numeric_ = klu_factor(A_->getRowData(memory::HOST), A_->getColData(memory::HOST), A_->getValues(memory::HOST), Symbolic_, &Common_); if (Numeric_ == nullptr){ return 1; @@ -56,7 +56,7 @@ namespace ReSolve int LinSolverDirectKLU::refactorize() { - int kluStatus = klu_refactor (A_->getRowData("cpu"), A_->getColData("cpu"), A_->getValues("cpu"), Symbolic_, Numeric_, &Common_); + int kluStatus = klu_refactor (A_->getRowData(memory::HOST), A_->getColData(memory::HOST), A_->getValues(memory::HOST), Symbolic_, Numeric_, &Common_); if (!kluStatus){ //display error @@ -71,10 +71,10 @@ namespace ReSolve // std::memcpy(x, rhs, A->getNumRows() * sizeof(real_type)); - x->update(rhs->getData("cpu"), "cpu", "cpu"); - x->setDataUpdated("cpu"); + x->update(rhs->getData(memory::HOST), memory::HOST, memory::HOST); + x->setDataUpdated(memory::HOST); - int kluStatus = klu_solve(Symbolic_, Numeric_, A_->getNumRows(), 1, x->getData("cpu"), &Common_); + int kluStatus = klu_solve(Symbolic_, Numeric_, A_->getNumRows(), 1, x->getData(memory::HOST), &Common_); if (!kluStatus){ return 1; @@ -90,16 +90,16 @@ namespace ReSolve L_ = new matrix::Csc(A_->getNumRows(), A_->getNumColumns(), nnzL); U_ = new matrix::Csc(A_->getNumRows(), A_->getNumColumns(), nnzU); - L_->allocateMatrixData("cpu"); - U_->allocateMatrixData("cpu"); + L_->allocateMatrixData(memory::HOST); + U_->allocateMatrixData(memory::HOST); int ok = klu_extract(Numeric_, Symbolic_, - L_->getColData("cpu"), - L_->getRowData("cpu"), - L_->getValues("cpu"), - U_->getColData("cpu"), - U_->getRowData("cpu"), - U_->getValues("cpu"), + L_->getColData(memory::HOST), + L_->getRowData(memory::HOST), + L_->getValues( memory::HOST), + U_->getColData(memory::HOST), + U_->getRowData(memory::HOST), + U_->getValues( memory::HOST), nullptr, nullptr, nullptr, @@ -109,8 +109,8 @@ namespace ReSolve nullptr, &Common_); - L_->setUpdated("cpu"); - U_->setUpdated("cpu"); + L_->setUpdated(memory::HOST); + U_->setUpdated(memory::HOST); (void) ok; // TODO: Check status in ok before setting `factors_extracted_` factors_extracted_ = true; } @@ -125,16 +125,16 @@ namespace ReSolve L_ = new matrix::Csc(A_->getNumRows(), A_->getNumColumns(), nnzL); U_ = new matrix::Csc(A_->getNumRows(), A_->getNumColumns(), nnzU); - L_->allocateMatrixData("cpu"); - U_->allocateMatrixData("cpu"); + L_->allocateMatrixData(memory::HOST); + U_->allocateMatrixData(memory::HOST); int ok = klu_extract(Numeric_, Symbolic_, - L_->getColData("cpu"), - L_->getRowData("cpu"), - L_->getValues("cpu"), - U_->getColData("cpu"), - U_->getRowData("cpu"), - U_->getValues("cpu"), + L_->getColData(memory::HOST), + L_->getRowData(memory::HOST), + L_->getValues( memory::HOST), + U_->getColData(memory::HOST), + U_->getRowData(memory::HOST), + U_->getValues( memory::HOST), nullptr, nullptr, nullptr, @@ -144,8 +144,8 @@ namespace ReSolve nullptr, &Common_); - L_->setUpdated("cpu"); - U_->setUpdated("cpu"); + L_->setUpdated(memory::HOST); + U_->setUpdated(memory::HOST); (void) ok; // TODO: Check status in ok before setting `factors_extracted_` factors_extracted_ = true; diff --git a/resolve/LinSolverIterativeFGMRES.cpp b/resolve/LinSolverIterativeFGMRES.cpp index fa63f2d5..0bf1720f 100644 --- a/resolve/LinSolverIterativeFGMRES.cpp +++ b/resolve/LinSolverIterativeFGMRES.cpp @@ -82,9 +82,9 @@ namespace ReSolve n_ = A_->getNumRows(); d_V_ = new vector_type(n_, restart_ + 1); - d_V_->allocate("cuda"); + d_V_->allocate(memory::DEVICE); d_Z_ = new vector_type(n_, restart_ + 1); - d_Z_->allocate("cuda"); + d_Z_->allocate(memory::DEVICE); h_H_ = new real_type[restart_ * (restart_ + 1)]; h_c_ = new real_type[restart_]; // needed for givens h_s_ = new real_type[restart_]; // same @@ -114,7 +114,7 @@ namespace ReSolve vector_type* vec_z = new vector_type(n_); //V[0] = b-A*x_0 - rhs->deepCopyVectorData(d_V_->getData("cuda"), 0, "cuda"); + rhs->deepCopyVectorData(d_V_->getData(memory::DEVICE), 0, memory::DEVICE); matrix_handler_->matvec(A_, x, d_V_, &MINUSONE, &ONE, "csr", "cuda"); rnorm = 0.0; bnorm = vector_handler_->dot(rhs, rhs, "cuda"); @@ -166,14 +166,14 @@ namespace ReSolve // Z_i = (LU)^{-1}*V_i - vec_v->setData( d_V_->getVectorData(i, "cuda"), "cuda"); - vec_z->setData( d_Z_->getVectorData(i, "cuda"), "cuda"); + vec_v->setData( d_V_->getVectorData(i, memory::DEVICE), memory::DEVICE); + vec_z->setData( d_Z_->getVectorData(i, memory::DEVICE), memory::DEVICE); this->precV(vec_v, vec_z); mem_.deviceSynchronize(); // V_{i+1}=A*Z_i - vec_v->setData( d_V_->getVectorData(i + 1, "cuda"), "cuda"); + vec_v->setData( d_V_->getVectorData(i + 1, memory::DEVICE), memory::DEVICE); matrix_handler_->matvec(A_, vec_z, vec_v, &ONE, &ZERO,"csr", "cuda"); @@ -228,7 +228,7 @@ namespace ReSolve // get solution for(j = 0; j <= i; j++) { - vec_z->setData( d_Z_->getVectorData(j, "cuda"), "cuda"); + vec_z->setData( d_Z_->getVectorData(j, memory::DEVICE), memory::DEVICE); vector_handler_->axpy(&h_rs_[j], vec_z, x, "cuda"); } @@ -239,7 +239,7 @@ namespace ReSolve outer_flag = 0; } - rhs->deepCopyVectorData(d_V_->getData("cuda"), 0, "cuda"); + rhs->deepCopyVectorData(d_V_->getData(memory::DEVICE), 0, memory::DEVICE); matrix_handler_->matvec(A_, x, d_V_, &MINUSONE, &ONE,"csr", "cuda"); rnorm = vector_handler_->dot(d_V_, d_V_, "cuda"); // rnorm = ||V_1|| @@ -317,7 +317,7 @@ namespace ReSolve void LinSolverIterativeFGMRES::precV(vector_type* rhs, vector_type* x) { LU_solver_->solve(rhs, x); - // x->update(rhs->getData("cuda"), "cuda", "cuda"); + // x->update(rhs->getData(memory::DEVICE), memory::DEVICE, memory::DEVICE); } real_type LinSolverIterativeFGMRES::getFinalResidualNorm() diff --git a/resolve/MemoryUtils.hpp b/resolve/MemoryUtils.hpp index 5e2da403..d87c621f 100644 --- a/resolve/MemoryUtils.hpp +++ b/resolve/MemoryUtils.hpp @@ -2,6 +2,16 @@ #include + +namespace ReSolve +{ + namespace memory + { + enum MemorySpace{HOST = 0, DEVICE}; + enum MemoryDirection{HOST_TO_HOST = 0, HOST_TO_DEVICE, DEVICE_TO_HOST, DEVICE_TO_DEVICE}; + } +} + namespace ReSolve { /** diff --git a/resolve/matrix/Coo.cpp b/resolve/matrix/Coo.cpp index eeff7b86..326eba59 100644 --- a/resolve/matrix/Coo.cpp +++ b/resolve/matrix/Coo.cpp @@ -27,52 +27,49 @@ namespace ReSolve { } - index_type* matrix::Coo::getRowData(std::string memspace) + index_type* matrix::Coo::getRowData(memory::MemorySpace memspace) { - if (memspace == "cpu") { - copyData("cpu"); - return this->h_row_data_; - } else { - if ((memspace == "cuda") || (memspace == "hip")) { - copyData(memspace); + using namespace ReSolve::memory; + copyData(memspace); + switch (memspace) { + case HOST: + return this->h_row_data_; + case DEVICE: return this->d_row_data_; - } else { + default: return nullptr; - } } } - index_type* matrix::Coo::getColData(std::string memspace) + index_type* matrix::Coo::getColData(memory::MemorySpace memspace) { - if (memspace == "cpu") { - copyData("cpu"); - return this->h_col_data_; - } else { - if ((memspace == "cuda") || (memspace == "hip")) { - copyData(memspace); + using namespace ReSolve::memory; + copyData(memspace); + switch (memspace) { + case HOST: + return this->h_col_data_; + case DEVICE: return this->d_col_data_; - } else { + default: return nullptr; - } } } - real_type* matrix::Coo::getValues(std::string memspace) + real_type* matrix::Coo::getValues(memory::MemorySpace memspace) { - if (memspace == "cpu") { - copyData("cpu"); - return this->h_val_data_; - } else { - if ((memspace == "cuda") || (memspace == "hip")) { - copyData(memspace); + using namespace ReSolve::memory; + copyData(memspace); + switch (memspace) { + case HOST: + return this->h_val_data_; + case DEVICE: return this->d_val_data_; - } else { + default: return nullptr; - } } } - index_type matrix::Coo::updateData(index_type* row_data, index_type* col_data, real_type* val_data, std::string memspaceIn, std::string memspaceOut) + index_type matrix::Coo::updateData(index_type* row_data, index_type* col_data, real_type* val_data, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut) { //four cases (for now) @@ -80,12 +77,12 @@ namespace ReSolve if (is_expanded_) {nnz_current = nnz_expanded_;} setNotUpdated(); int control=-1; - if ((memspaceIn == "cpu") && (memspaceOut == "cpu")){ control = 0;} - if ((memspaceIn == "cpu") && ((memspaceOut == "cuda") || (memspaceOut == "hip"))){ control = 1;} - if (((memspaceIn == "cuda") || (memspaceIn == "hip")) && (memspaceOut == "cpu")){ control = 2;} - if (((memspaceIn == "cuda") || (memspaceIn == "hip")) && ((memspaceOut == "cuda") || (memspaceOut == "hip"))){ control = 3;} + if ((memspaceIn == memory::HOST) && (memspaceOut == memory::HOST)){ control = 0;} + if ((memspaceIn == memory::HOST) && ((memspaceOut == memory::DEVICE))){ control = 1;} + if (((memspaceIn == memory::DEVICE)) && (memspaceOut == memory::HOST)){ control = 2;} + if (((memspaceIn == memory::DEVICE)) && ((memspaceOut == memory::DEVICE))){ control = 3;} - if (memspaceOut == "cpu") { + if (memspaceOut == memory::HOST) { //check if cpu data allocated if (h_row_data_ == nullptr) { this->h_row_data_ = new index_type[nnz_current]; @@ -98,7 +95,7 @@ namespace ReSolve } } - if ((memspaceOut == "cuda") || (memspaceOut == "hip")) { + if (memspaceOut == memory::DEVICE) { //check if cuda data allocated if (d_row_data_ == nullptr) { mem_.allocateArrayOnDevice(&d_row_data_, nnz_current); @@ -150,7 +147,7 @@ namespace ReSolve return 0; } - index_type matrix::Coo::updateData(index_type* row_data, index_type* col_data, real_type* val_data, index_type new_nnz, std::string memspaceIn, std::string memspaceOut) + index_type matrix::Coo::updateData(index_type* row_data, index_type* col_data, real_type* val_data, index_type new_nnz, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut) { this->destroyMatrixData(memspaceOut); this->nnz_ = new_nnz; @@ -158,13 +155,13 @@ namespace ReSolve return i; } - index_type matrix::Coo::allocateMatrixData(std::string memspace) + index_type matrix::Coo::allocateMatrixData(memory::MemorySpace memspace) { index_type nnz_current = nnz_; if (is_expanded_) {nnz_current = nnz_expanded_;} destroyMatrixData(memspace);//just in case - if (memspace == "cpu") { + if (memspace == memory::HOST) { this->h_row_data_ = new index_type[nnz_current]; std::fill(h_row_data_, h_row_data_ + nnz_current, 0); this->h_col_data_ = new index_type[nnz_current]; @@ -176,7 +173,7 @@ namespace ReSolve return 0; } - if ((memspace == "cuda") || (memspace == "hip")) { + if (memspace == memory::DEVICE) { mem_.allocateArrayOnDevice(&d_row_data_, nnz_current); mem_.allocateArrayOnDevice(&d_col_data_, nnz_current); mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); @@ -187,55 +184,57 @@ namespace ReSolve return -1; } - int matrix::Coo::copyData(std::string memspaceOut) + int matrix::Coo::copyData(memory::MemorySpace memspaceOut) { + using namespace ReSolve::memory; index_type nnz_current = nnz_; - if (is_expanded_) {nnz_current = nnz_expanded_;} - - if (memspaceOut == "cpu") { - //check if we need to copy or not - if ((d_data_updated_ == true) && (h_data_updated_ == false)) { - if (h_row_data_ == nullptr) { - h_row_data_ = new index_type[nnz_current]; - } - if (h_col_data_ == nullptr) { - h_col_data_ = new index_type[nnz_current]; - } - if (h_val_data_ == nullptr) { - h_val_data_ = new real_type[nnz_current]; - } - mem_.copyArrayDeviceToHost(h_row_data_, d_row_data_, nnz_current); - mem_.copyArrayDeviceToHost(h_col_data_, d_col_data_, nnz_current); - mem_.copyArrayDeviceToHost(h_val_data_, d_val_data_, nnz_current); - h_data_updated_ = true; - owns_cpu_data_ = true; - owns_cpu_vals_ = true; - } - return 0; + if (is_expanded_) { + nnz_current = nnz_expanded_; } - if ((memspaceOut == "cuda") || (memspaceOut == "hip")) { - if ((d_data_updated_ == false) && (h_data_updated_ == true)) { - if (d_row_data_ == nullptr) { - mem_.allocateArrayOnDevice(&d_row_data_, nnz_current); - } - if (d_col_data_ == nullptr) { - mem_.allocateArrayOnDevice(&d_col_data_, nnz_current); + switch (memspaceOut) { + case HOST: + if ((d_data_updated_ == true) && (h_data_updated_ == false)) { + if (h_row_data_ == nullptr) { + h_row_data_ = new index_type[nnz_current]; + } + if (h_col_data_ == nullptr) { + h_col_data_ = new index_type[nnz_current]; + } + if (h_val_data_ == nullptr) { + h_val_data_ = new real_type[nnz_current]; + } + mem_.copyArrayDeviceToHost(h_row_data_, d_row_data_, nnz_current); + mem_.copyArrayDeviceToHost(h_col_data_, d_col_data_, nnz_current); + mem_.copyArrayDeviceToHost(h_val_data_, d_val_data_, nnz_current); + h_data_updated_ = true; + owns_cpu_data_ = true; + owns_cpu_vals_ = true; } - if (d_val_data_ == nullptr) { - mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); + return 0; + case DEVICE: + if ((d_data_updated_ == false) && (h_data_updated_ == true)) { + if (d_row_data_ == nullptr) { + mem_.allocateArrayOnDevice(&d_row_data_, nnz_current); + } + if (d_col_data_ == nullptr) { + mem_.allocateArrayOnDevice(&d_col_data_, nnz_current); + } + if (d_val_data_ == nullptr) { + mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); + } + mem_.copyArrayHostToDevice(d_row_data_, h_row_data_, nnz_current); + mem_.copyArrayHostToDevice(d_col_data_, h_col_data_, nnz_current); + mem_.copyArrayHostToDevice(d_val_data_, h_val_data_, nnz_current); + d_data_updated_ = true; + owns_gpu_data_ = true; + owns_gpu_vals_ = true; } - mem_.copyArrayHostToDevice(d_row_data_, h_row_data_, nnz_current); - mem_.copyArrayHostToDevice(d_col_data_, h_col_data_, nnz_current); - mem_.copyArrayHostToDevice(d_val_data_, h_val_data_, nnz_current); - d_data_updated_ = true; - owns_gpu_data_ = true; - owns_gpu_vals_ = true; - } - return 0; - } - return -1; + return 0; + default: + return -1; + } // switch } void matrix::Coo::print() diff --git a/resolve/matrix/Coo.hpp b/resolve/matrix/Coo.hpp index 3ec045c3..bc67ceef 100644 --- a/resolve/matrix/Coo.hpp +++ b/resolve/matrix/Coo.hpp @@ -15,18 +15,18 @@ namespace ReSolve { namespace matrix { bool expanded); ~Coo(); - virtual index_type* getRowData(std::string memspace); - virtual index_type* getColData(std::string memspace); - virtual real_type* getValues(std::string memspace); + virtual index_type* getRowData(memory::MemorySpace memspace); + virtual index_type* getColData(memory::MemorySpace memspace); + virtual real_type* getValues( memory::MemorySpace memspace); - virtual index_type updateData(index_type* row_data, index_type* col_data, real_type* val_data, std::string memspaceIn, std::string memspaceOut); - virtual index_type updateData(index_type* row_data, index_type* col_data, real_type* val_data, index_type new_nnz, std::string memspaceIn, std::string memspaceOut); + virtual index_type updateData(index_type* row_data, index_type* col_data, real_type* val_data, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut); + virtual index_type updateData(index_type* row_data, index_type* col_data, real_type* val_data, index_type new_nnz, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut); - virtual index_type allocateMatrixData(std::string memspace); + virtual index_type allocateMatrixData(memory::MemorySpace memspace); virtual void print(); - virtual int copyData(std::string memspaceOut); + virtual int copyData(memory::MemorySpace memspaceOut); }; }} // namespace ReSolve::matrix diff --git a/resolve/matrix/Csc.cpp b/resolve/matrix/Csc.cpp index f6358df3..e6fed07c 100644 --- a/resolve/matrix/Csc.cpp +++ b/resolve/matrix/Csc.cpp @@ -24,64 +24,61 @@ namespace ReSolve { } - index_type* matrix::Csc::getRowData(std::string memspace) + index_type* matrix::Csc::getRowData(memory::MemorySpace memspace) { - if (memspace == "cpu") { - copyData("cpu"); - return this->h_row_data_; - } else { - if ((memspace == "cuda") || (memspace == "hip")) { - copyData(memspace); + using namespace ReSolve::memory; + copyData(memspace); + switch (memspace) { + case HOST: + return this->h_row_data_; + case DEVICE: return this->d_row_data_; - } else { + default: return nullptr; - } } } - index_type* matrix::Csc::getColData(std::string memspace) + index_type* matrix::Csc::getColData(memory::MemorySpace memspace) { - if (memspace == "cpu") { - copyData("cpu"); - return this->h_col_data_; - } else { - if ((memspace == "cuda") || (memspace == "hip")) { - copyData(memspace); + using namespace ReSolve::memory; + copyData(memspace); + switch (memspace) { + case HOST: + return this->h_col_data_; + case DEVICE: return this->d_col_data_; - } else { + default: return nullptr; - } } } - real_type* matrix::Csc::getValues(std::string memspace) + real_type* matrix::Csc::getValues(memory::MemorySpace memspace) { - if (memspace == "cpu") { - copyData("cpu"); - return this->h_val_data_; - } else { - if ((memspace == "cuda") || (memspace == "hip")) { - copyData(memspace); + using namespace ReSolve::memory; + copyData(memspace); + switch (memspace) { + case HOST: + return this->h_val_data_; + case DEVICE: return this->d_val_data_; - } else { + default: return nullptr; - } } } - int matrix::Csc::updateData(index_type* row_data, index_type* col_data, real_type* val_data, std::string memspaceIn, std::string memspaceOut) + int matrix::Csc::updateData(index_type* row_data, index_type* col_data, real_type* val_data, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut) { index_type nnz_current = nnz_; if (is_expanded_) {nnz_current = nnz_expanded_;} //four cases (for now) int control=-1; setNotUpdated(); - if ((memspaceIn == "cpu") && (memspaceOut == "cpu")){ control = 0;} - if ((memspaceIn == "cpu") && ((memspaceOut == "cuda") || (memspaceOut == "hip"))){ control = 1;} - if (((memspaceIn == "cuda") || (memspaceIn == "hip")) && (memspaceOut == "cpu")){ control = 2;} - if (((memspaceIn == "cuda") || (memspaceIn == "hip")) && ((memspaceOut == "cuda") || (memspaceOut == "hip"))){ control = 3;} + if ((memspaceIn == memory::HOST) && (memspaceOut == memory::HOST)) { control = 0;} + if ((memspaceIn == memory::HOST) && ((memspaceOut == memory::DEVICE))){ control = 1;} + if (((memspaceIn == memory::DEVICE)) && (memspaceOut == memory::HOST)) { control = 2;} + if (((memspaceIn == memory::DEVICE)) && ((memspaceOut == memory::DEVICE))){ control = 3;} - if (memspaceOut == "cpu") { + if (memspaceOut == memory::HOST) { //check if cpu data allocated if (h_col_data_ == nullptr) { this->h_col_data_ = new index_type[n_ + 1]; @@ -94,7 +91,7 @@ namespace ReSolve } } - if ((memspaceOut == "cuda") || (memspaceOut == "hip")) { + if (memspaceOut == memory::DEVICE) { //check if cuda data allocated if (d_col_data_ == nullptr) { mem_.allocateArrayOnDevice(&d_col_data_, n_ + 1); @@ -147,7 +144,7 @@ namespace ReSolve } - int matrix::Csc::updateData(index_type* row_data, index_type* col_data, real_type* val_data, index_type new_nnz, std::string memspaceIn, std::string memspaceOut) + int matrix::Csc::updateData(index_type* row_data, index_type* col_data, real_type* val_data, index_type new_nnz, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut) { this->destroyMatrixData(memspaceOut); this->nnz_ = new_nnz; @@ -155,13 +152,13 @@ namespace ReSolve return i; } - int matrix::Csc::allocateMatrixData(std::string memspace) + int matrix::Csc::allocateMatrixData(memory::MemorySpace memspace) { index_type nnz_current = nnz_; if (is_expanded_) {nnz_current = nnz_expanded_;} destroyMatrixData(memspace);//just in case - if (memspace == "cpu") { + if (memspace == memory::HOST) { this->h_col_data_ = new index_type[n_ + 1]; std::fill(h_col_data_, h_col_data_ + n_ + 1, 0); this->h_row_data_ = new index_type[nnz_current]; @@ -173,7 +170,7 @@ namespace ReSolve return 0; } - if ((memspace == "cuda") || (memspace == "hip")) { + if (memspace == memory::DEVICE) { mem_.allocateArrayOnDevice(&d_col_data_, n_ + 1); mem_.allocateArrayOnDevice(&d_row_data_, nnz_current); mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); @@ -184,54 +181,56 @@ namespace ReSolve return -1; } - int matrix::Csc::copyData(std::string memspaceOut) + int matrix::Csc::copyData(memory::MemorySpace memspaceOut) { + using namespace ReSolve::memory; index_type nnz_current = nnz_; - if (is_expanded_) {nnz_current = nnz_expanded_;} - - if (memspaceOut == "cpu") { - //check if we need to copy or not - if ((d_data_updated_ == true) && (h_data_updated_ == false)) { - if (h_col_data_ == nullptr) { - h_col_data_ = new index_type[n_ + 1]; - } - if (h_row_data_ == nullptr) { - h_row_data_ = new index_type[nnz_current]; - } - if (h_val_data_ == nullptr) { - h_val_data_ = new real_type[nnz_current]; - } - mem_.copyArrayDeviceToHost(h_col_data_, d_col_data_, n_ + 1); - mem_.copyArrayDeviceToHost(h_row_data_, d_row_data_, nnz_current); - mem_.copyArrayDeviceToHost(h_val_data_, d_val_data_, nnz_current); - h_data_updated_ = true; - owns_cpu_data_ = true; - owns_cpu_vals_ = true; - } - return 0; + if (is_expanded_) { + nnz_current = nnz_expanded_; } - if ((memspaceOut == "cuda") || (memspaceOut == "hip")) { - if ((d_data_updated_ == false) && (h_data_updated_ == true)) { - if (d_col_data_ == nullptr) { - mem_.allocateArrayOnDevice(&d_col_data_, n_ + 1); + switch(memspaceOut) { + case HOST: + if ((d_data_updated_ == true) && (h_data_updated_ == false)) { + if (h_col_data_ == nullptr) { + h_col_data_ = new index_type[n_ + 1]; + } + if (h_row_data_ == nullptr) { + h_row_data_ = new index_type[nnz_current]; + } + if (h_val_data_ == nullptr) { + h_val_data_ = new real_type[nnz_current]; + } + mem_.copyArrayDeviceToHost(h_col_data_, d_col_data_, n_ + 1); + mem_.copyArrayDeviceToHost(h_row_data_, d_row_data_, nnz_current); + mem_.copyArrayDeviceToHost(h_val_data_, d_val_data_, nnz_current); + h_data_updated_ = true; + owns_cpu_data_ = true; + owns_cpu_vals_ = true; } - if (d_row_data_ == nullptr) { - mem_.allocateArrayOnDevice(&d_row_data_, nnz_current); + return 0; + case DEVICE: + if ((d_data_updated_ == false) && (h_data_updated_ == true)) { + if (d_col_data_ == nullptr) { + mem_.allocateArrayOnDevice(&d_col_data_, n_ + 1); + } + if (d_row_data_ == nullptr) { + mem_.allocateArrayOnDevice(&d_row_data_, nnz_current); + } + if (d_val_data_ == nullptr) { + mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); + } + mem_.copyArrayHostToDevice(d_col_data_, h_col_data_, n_ + 1); + mem_.copyArrayHostToDevice(d_row_data_, h_row_data_, nnz_current); + mem_.copyArrayHostToDevice(d_val_data_, h_val_data_, nnz_current); + d_data_updated_ = true; + owns_gpu_data_ = true; + owns_gpu_vals_ = true; } - if (d_val_data_ == nullptr) { - mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); - } - mem_.copyArrayHostToDevice(d_col_data_, h_col_data_, n_ + 1); - mem_.copyArrayHostToDevice(d_row_data_, h_row_data_, nnz_current); - mem_.copyArrayHostToDevice(d_val_data_, h_val_data_, nnz_current); - d_data_updated_ = true; - owns_gpu_data_ = true; - owns_gpu_vals_ = true; - } - return 0; - } - return -1; + return 0; + default: + return -1; + } // switch } } diff --git a/resolve/matrix/Csc.hpp b/resolve/matrix/Csc.hpp index f0598314..8a5dc551 100644 --- a/resolve/matrix/Csc.hpp +++ b/resolve/matrix/Csc.hpp @@ -15,18 +15,18 @@ namespace ReSolve { namespace matrix { bool expanded); ~Csc(); - virtual index_type* getRowData(std::string memspace); - virtual index_type* getColData(std::string memspace); - virtual real_type* getValues(std::string memspace); + virtual index_type* getRowData(memory::MemorySpace memspace); + virtual index_type* getColData(memory::MemorySpace memspace); + virtual real_type* getValues( memory::MemorySpace memspace); - virtual int updateData(index_type* row_data, index_type* col_data, real_type* val_data, std::string memspaceIn, std::string memspaceOut); - virtual int updateData(index_type* row_data, index_type* col_data, real_type* val_data, index_type new_nnz, std::string memspaceIn, std::string memspaceOut); + virtual int updateData(index_type* row_data, index_type* col_data, real_type* val_data, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut); + virtual int updateData(index_type* row_data, index_type* col_data, real_type* val_data, index_type new_nnz, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut); - virtual int allocateMatrixData(std::string memspace); + virtual int allocateMatrixData(memory::MemorySpace memspace); virtual void print() {return;} - virtual int copyData(std::string memspaceOut); + virtual int copyData(memory::MemorySpace memspaceOut); }; diff --git a/resolve/matrix/Csr.cpp b/resolve/matrix/Csr.cpp index 04e8dff1..0c08b641 100644 --- a/resolve/matrix/Csr.cpp +++ b/resolve/matrix/Csr.cpp @@ -24,64 +24,61 @@ namespace ReSolve { } - index_type* matrix::Csr::getRowData(std::string memspace) + index_type* matrix::Csr::getRowData(memory::MemorySpace memspace) { - if (memspace == "cpu") { - copyData("cpu"); - return this->h_row_data_; - } else { - if ((memspace == "cuda") || (memspace == "hip")) { - copyData(memspace); + using namespace ReSolve::memory; + copyData(memspace); + switch (memspace) { + case HOST: + return this->h_row_data_; + case DEVICE: return this->d_row_data_; - } else { + default: return nullptr; - } } } - index_type* matrix::Csr::getColData(std::string memspace) + index_type* matrix::Csr::getColData(memory::MemorySpace memspace) { - if (memspace == "cpu") { - copyData("cpu"); - return this->h_col_data_; - } else { - if ((memspace == "cuda") || (memspace == "hip")) { - copyData(memspace); + using namespace ReSolve::memory; + copyData(memspace); + switch (memspace) { + case HOST: + return this->h_col_data_; + case DEVICE: return this->d_col_data_; - } else { + default: return nullptr; - } } } - real_type* matrix::Csr::getValues(std::string memspace) + real_type* matrix::Csr::getValues(memory::MemorySpace memspace) { - if (memspace == "cpu") { - copyData("cpu"); - return this->h_val_data_; - } else { - if ((memspace == "cuda") || (memspace == "hip")) { - copyData(memspace); + using namespace ReSolve::memory; + copyData(memspace); + switch (memspace) { + case HOST: + return this->h_val_data_; + case DEVICE: return this->d_val_data_; - } else { + default: return nullptr; - } } } - int matrix::Csr::updateData(index_type* row_data, index_type* col_data, real_type* val_data, std::string memspaceIn, std::string memspaceOut) + int matrix::Csr::updateData(index_type* row_data, index_type* col_data, real_type* val_data, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut) { //four cases (for now) index_type nnz_current = nnz_; if (is_expanded_) {nnz_current = nnz_expanded_;} setNotUpdated(); int control = -1; - if ((memspaceIn == "cpu") && (memspaceOut == "cpu")){ control = 0;} - if ((memspaceIn == "cpu") && ((memspaceOut == "cuda") || (memspaceOut == "hip"))){ control = 1;} - if (((memspaceIn == "cuda") || (memspaceIn == "hip")) && (memspaceOut == "cpu")){ control = 2;} - if (((memspaceIn == "cuda") || (memspaceIn == "hip")) && ((memspaceOut == "cuda") || (memspaceOut == "hip"))){ control = 3;} + if ((memspaceIn == memory::HOST) && (memspaceOut == memory::HOST)) { control = 0;} + if ((memspaceIn == memory::HOST) && ((memspaceOut == memory::DEVICE))){ control = 1;} + if (((memspaceIn == memory::DEVICE)) && (memspaceOut == memory::HOST)) { control = 2;} + if (((memspaceIn == memory::DEVICE)) && ((memspaceOut == memory::DEVICE))){ control = 3;} - if (memspaceOut == "cpu") { + if (memspaceOut == memory::HOST) { //check if cpu data allocated if (h_row_data_ == nullptr) { this->h_row_data_ = new index_type[n_ + 1]; @@ -94,7 +91,7 @@ namespace ReSolve } } - if ((memspaceOut == "cuda") || (memspaceOut == "hip")) { + if (memspaceOut == memory::DEVICE) { //check if cuda data allocated if (d_row_data_ == nullptr) { mem_.allocateArrayOnDevice(&d_row_data_, n_ + 1); @@ -148,7 +145,7 @@ namespace ReSolve return 0; } - int matrix::Csr::updateData(index_type* row_data, index_type* col_data, real_type* val_data, index_type new_nnz, std::string memspaceIn, std::string memspaceOut) + int matrix::Csr::updateData(index_type* row_data, index_type* col_data, real_type* val_data, index_type new_nnz, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut) { this->destroyMatrixData(memspaceOut); this->nnz_ = new_nnz; @@ -156,13 +153,13 @@ namespace ReSolve return i; } - int matrix::Csr::allocateMatrixData(std::string memspace) + int matrix::Csr::allocateMatrixData(memory::MemorySpace memspace) { index_type nnz_current = nnz_; if (is_expanded_) {nnz_current = nnz_expanded_;} destroyMatrixData(memspace);//just in case - if (memspace == "cpu") { + if (memspace == memory::HOST) { this->h_row_data_ = new index_type[n_ + 1]; std::fill(h_row_data_, h_row_data_ + n_ + 1, 0); this->h_col_data_ = new index_type[nnz_current]; @@ -174,7 +171,7 @@ namespace ReSolve return 0; } - if ((memspace == "cuda") || (memspace == "hip")) { + if (memspace == memory::DEVICE) { mem_.allocateArrayOnDevice(&d_row_data_, n_ + 1); mem_.allocateArrayOnDevice(&d_col_data_, nnz_current); mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); @@ -185,54 +182,58 @@ namespace ReSolve return -1; } - int matrix::Csr::copyData(std::string memspaceOut) + int matrix::Csr::copyData(memory::MemorySpace memspaceOut) { - index_type nnz_current = nnz_; - if (is_expanded_) {nnz_current = nnz_expanded_;} + using namespace ReSolve::memory; - if (memspaceOut == "cpu") { - //check if we need to copy or not - if ((d_data_updated_ == true) && (h_data_updated_ == false)) { - if (h_row_data_ == nullptr) { - h_row_data_ = new index_type[n_ + 1]; - } - if (h_col_data_ == nullptr) { - h_col_data_ = new index_type[nnz_current]; - } - if (h_val_data_ == nullptr) { - h_val_data_ = new real_type[nnz_current]; - } - mem_.copyArrayDeviceToHost(h_row_data_, d_row_data_, n_ + 1); - mem_.copyArrayDeviceToHost(h_col_data_, d_col_data_, nnz_current); - mem_.copyArrayDeviceToHost(h_val_data_, d_val_data_, nnz_current); - h_data_updated_ = true; - owns_cpu_data_ = true; - owns_cpu_vals_ = true; - } - return 0; + index_type nnz_current = nnz_; + if (is_expanded_) { + nnz_current = nnz_expanded_; } - if ((memspaceOut == "cuda") || (memspaceOut == "hip")) { - if ((d_data_updated_ == false) && (h_data_updated_ == true)) { - if (d_row_data_ == nullptr) { - mem_.allocateArrayOnDevice(&d_row_data_, n_ + 1); - } - if (d_col_data_ == nullptr) { - mem_.allocateArrayOnDevice(&d_col_data_, nnz_current); + switch (memspaceOut) { + case HOST: + //check if we need to copy or not + if ((d_data_updated_ == true) && (h_data_updated_ == false)) { + if (h_row_data_ == nullptr) { + h_row_data_ = new index_type[n_ + 1]; + } + if (h_col_data_ == nullptr) { + h_col_data_ = new index_type[nnz_current]; + } + if (h_val_data_ == nullptr) { + h_val_data_ = new real_type[nnz_current]; + } + mem_.copyArrayDeviceToHost(h_row_data_, d_row_data_, n_ + 1); + mem_.copyArrayDeviceToHost(h_col_data_, d_col_data_, nnz_current); + mem_.copyArrayDeviceToHost(h_val_data_, d_val_data_, nnz_current); + h_data_updated_ = true; + owns_cpu_data_ = true; + owns_cpu_vals_ = true; } - if (d_val_data_ == nullptr) { - mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); + return 0; + case DEVICE: + if ((d_data_updated_ == false) && (h_data_updated_ == true)) { + if (d_row_data_ == nullptr) { + mem_.allocateArrayOnDevice(&d_row_data_, n_ + 1); + } + if (d_col_data_ == nullptr) { + mem_.allocateArrayOnDevice(&d_col_data_, nnz_current); + } + if (d_val_data_ == nullptr) { + mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); + } + mem_.copyArrayHostToDevice(d_row_data_, h_row_data_, n_ + 1); + mem_.copyArrayHostToDevice(d_col_data_, h_col_data_, nnz_current); + mem_.copyArrayHostToDevice(d_val_data_, h_val_data_, nnz_current); + d_data_updated_ = true; + owns_gpu_data_ = true; + owns_gpu_vals_ = true; } - mem_.copyArrayHostToDevice(d_row_data_, h_row_data_, n_ + 1); - mem_.copyArrayHostToDevice(d_col_data_, h_col_data_, nnz_current); - mem_.copyArrayHostToDevice(d_val_data_, h_val_data_, nnz_current); - d_data_updated_ = true; - owns_gpu_data_ = true; - owns_gpu_vals_ = true; - } - return 0; - } - return -1; + return 0; + default: + return -1; + } // switch } } // namespace ReSolve diff --git a/resolve/matrix/Csr.hpp b/resolve/matrix/Csr.hpp index 43c317de..a5d8f682 100644 --- a/resolve/matrix/Csr.hpp +++ b/resolve/matrix/Csr.hpp @@ -18,18 +18,18 @@ namespace ReSolve { namespace matrix { ~Csr(); - virtual index_type* getRowData(std::string memspace); - virtual index_type* getColData(std::string memspace); - virtual real_type* getValues(std::string memspace); + virtual index_type* getRowData(memory::MemorySpace memspace); + virtual index_type* getColData(memory::MemorySpace memspace); + virtual real_type* getValues( memory::MemorySpace memspace); - virtual int updateData(index_type* row_data, index_type* col_data, real_type* val_data, std::string memspaceIn, std::string memspaceOut); - virtual int updateData(index_type* row_data, index_type* col_data, real_type* val_data, index_type new_nnz, std::string memspaceIn, std::string memspaceOut); + virtual int updateData(index_type* row_data, index_type* col_data, real_type* val_data, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut); + virtual int updateData(index_type* row_data, index_type* col_data, real_type* val_data, index_type new_nnz, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut); - virtual int allocateMatrixData(std::string memspace); + virtual int allocateMatrixData(memory::MemorySpace memspace); virtual void print() {return;} - virtual int copyData(std::string memspaceOut); + virtual int copyData(memory::MemorySpace memspaceOut); }; }} // namespace ReSolve::matrix diff --git a/resolve/matrix/MatrixHandler.cpp b/resolve/matrix/MatrixHandler.cpp index 133a09f9..0a7124da 100644 --- a/resolve/matrix/MatrixHandler.cpp +++ b/resolve/matrix/MatrixHandler.cpp @@ -124,9 +124,9 @@ namespace ReSolve { index_type* nnz_counts = new index_type[n]; std::fill_n(nnz_counts, n, 0); - index_type* coo_rows = A_coo->getRowData("cpu"); - index_type* coo_cols = A_coo->getColData("cpu"); - real_type* coo_vals = A_coo->getValues("cpu"); + index_type* coo_rows = A_coo->getRowData(memory::HOST); + index_type* coo_cols = A_coo->getColData(memory::HOST); + real_type* coo_vals = A_coo->getValues( memory::HOST); index_type* diag_control = new index_type[n]; //for DEDUPLICATION of the diagonal std::fill_n(diag_control, n, 0); @@ -249,12 +249,12 @@ namespace ReSolve { #endif A_csr->setNnz(nnz_no_duplicates); if (memspace == "cpu"){ - A_csr->updateData(csr_ia, csr_ja, csr_a, "cpu", "cpu"); + A_csr->updateData(csr_ia, csr_ja, csr_a, memory::HOST, memory::HOST); } else { if (memspace == "cuda"){ - A_csr->updateData(csr_ia, csr_ja, csr_a, "cpu", "cuda"); + A_csr->updateData(csr_ia, csr_ja, csr_a, memory::HOST, memory::DEVICE); } else if (memspace == "hip"){ - A_csr->updateData(csr_ia, csr_ja, csr_a, "cpu", "cuda"); + A_csr->updateData(csr_ia, csr_ja, csr_a, memory::HOST, memory::DEVICE); } else { //display error } diff --git a/resolve/matrix/MatrixHandlerCpu.cpp b/resolve/matrix/MatrixHandlerCpu.cpp index 2c434dcb..d4799ffd 100644 --- a/resolve/matrix/MatrixHandlerCpu.cpp +++ b/resolve/matrix/MatrixHandlerCpu.cpp @@ -45,12 +45,12 @@ namespace ReSolve { // int error_sum = 0; if (matrixFormat == "csr") { matrix::Csr* A = (matrix::Csr*) Ageneric; - index_type* ia = A->getRowData("cpu"); - index_type* ja = A->getColData("cpu"); - real_type* a = A->getValues("cpu"); + index_type* ia = A->getRowData(memory::HOST); + index_type* ja = A->getColData(memory::HOST); + real_type* a = A->getValues( memory::HOST); - real_type* x_data = vec_x->getData("cpu"); - real_type* result_data = vec_result->getData("cpu"); + real_type* x_data = vec_x->getData(memory::HOST); + real_type* result_data = vec_result->getData(memory::HOST); real_type sum; real_type y; real_type t; @@ -70,7 +70,7 @@ namespace ReSolve { sum *= (*alpha); result_data[i] = result_data[i]*(*beta) + sum; } - vec_result->setDataUpdated("cpu"); + vec_result->setDataUpdated(memory::HOST); return 0; } else { out::error() << "MatVec not implemented (yet) for " @@ -100,13 +100,13 @@ namespace ReSolve { index_type nnz = A_csc->getNnz(); index_type n = A_csc->getNumColumns(); - index_type* rowIdxCsc = A_csc->getRowData("cpu"); - index_type* colPtrCsc = A_csc->getColData("cpu"); - real_type* valuesCsc = A_csc->getValues("cpu"); + index_type* rowIdxCsc = A_csc->getRowData(memory::HOST); + index_type* colPtrCsc = A_csc->getColData(memory::HOST); + real_type* valuesCsc = A_csc->getValues( memory::HOST); - index_type* rowPtrCsr = A_csr->getRowData("cpu"); - index_type* colIdxCsr = A_csr->getColData("cpu"); - real_type* valuesCsr = A_csr->getValues("cpu"); + index_type* rowPtrCsr = A_csr->getRowData(memory::HOST); + index_type* colIdxCsr = A_csr->getColData(memory::HOST); + real_type* valuesCsr = A_csr->getValues( memory::HOST); // Set all CSR row pointers to zero for (index_type i = 0; i <= n; ++i) { diff --git a/resolve/matrix/MatrixHandlerCuda.cpp b/resolve/matrix/MatrixHandlerCuda.cpp index 3405ba8d..e0ac7bb4 100644 --- a/resolve/matrix/MatrixHandlerCuda.cpp +++ b/resolve/matrix/MatrixHandlerCuda.cpp @@ -42,11 +42,11 @@ namespace ReSolve { cusparseStatus_t status; LinAlgWorkspaceCUDA* workspaceCUDA = workspace_; cusparseDnVecDescr_t vecx = workspaceCUDA->getVecX(); - cusparseCreateDnVec(&vecx, A->getNumRows(), vec_x->getData("cuda"), CUDA_R_64F); + cusparseCreateDnVec(&vecx, A->getNumRows(), vec_x->getData(memory::DEVICE), CUDA_R_64F); cusparseDnVecDescr_t vecAx = workspaceCUDA->getVecY(); - cusparseCreateDnVec(&vecAx, A->getNumRows(), vec_result->getData("cuda"), CUDA_R_64F); + cusparseCreateDnVec(&vecAx, A->getNumRows(), vec_result->getData(memory::DEVICE), CUDA_R_64F); cusparseSpMatDescr_t matA = workspaceCUDA->getSpmvMatrixDescriptor(); @@ -57,9 +57,9 @@ namespace ReSolve { A->getNumRows(), A->getNumColumns(), A->getNnzExpanded(), - A->getRowData("cuda"), - A->getColData("cuda"), - A->getValues("cuda"), + A->getRowData(memory::DEVICE), + A->getColData(memory::DEVICE), + A->getValues( memory::DEVICE), CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, @@ -105,7 +105,7 @@ namespace ReSolve { if (status) out::error() << "Matvec status: " << status << "Last error code: " << mem_.getLastDeviceError() << std::endl; - vec_result->setDataUpdated("cuda"); + vec_result->setDataUpdated(memory::DEVICE); cusparseDestroyDnVec(vecx); cusparseDestroyDnVec(vecAx); @@ -127,7 +127,7 @@ namespace ReSolve { index_type error_sum = 0; LinAlgWorkspaceCUDA* workspaceCUDA = (LinAlgWorkspaceCUDA*) workspace_; - A_csr->allocateMatrixData("cuda"); + A_csr->allocateMatrixData(memory::DEVICE); index_type n = A_csc->getNumRows(); index_type m = A_csc->getNumRows(); index_type nnz = A_csc->getNnz(); @@ -137,12 +137,12 @@ namespace ReSolve { n, m, nnz, - A_csc->getValues("cuda"), - A_csc->getColData("cuda"), - A_csc->getRowData("cuda"), - A_csr->getValues("cuda"), - A_csr->getRowData("cuda"), - A_csr->getColData("cuda"), + A_csc->getValues( memory::DEVICE), + A_csc->getColData(memory::DEVICE), + A_csc->getRowData(memory::DEVICE), + A_csr->getValues( memory::DEVICE), + A_csr->getRowData(memory::DEVICE), + A_csr->getColData(memory::DEVICE), CUDA_R_64F, CUSPARSE_ACTION_NUMERIC, CUSPARSE_INDEX_BASE_ZERO, @@ -154,12 +154,12 @@ namespace ReSolve { n, m, nnz, - A_csc->getValues("cuda"), - A_csc->getColData("cuda"), - A_csc->getRowData("cuda"), - A_csr->getValues("cuda"), - A_csr->getRowData("cuda"), - A_csr->getColData("cuda"), + A_csc->getValues( memory::DEVICE), + A_csc->getColData(memory::DEVICE), + A_csc->getRowData(memory::DEVICE), + A_csr->getValues( memory::DEVICE), + A_csr->getRowData(memory::DEVICE), + A_csr->getColData(memory::DEVICE), CUDA_R_64F, CUSPARSE_ACTION_NUMERIC, CUSPARSE_INDEX_BASE_ZERO, diff --git a/resolve/matrix/MatrixHandlerHip.cpp b/resolve/matrix/MatrixHandlerHip.cpp index 370849fa..b4f8e483 100644 --- a/resolve/matrix/MatrixHandlerHip.cpp +++ b/resolve/matrix/MatrixHandlerHip.cpp @@ -62,9 +62,9 @@ namespace ReSolve { A->getNumColumns(), A->getNnzExpanded(), descrA, - A->getValues("cuda"), - A->getRowData("cuda"), - A->getColData("cuda"), // cuda is used as "device" + A->getValues( memory::DEVICE), + A->getRowData(memory::DEVICE), + A->getColData(memory::DEVICE), // cuda is used as "device" infoA); error_sum += status; mem_.deviceSynchronize(); @@ -79,20 +79,20 @@ namespace ReSolve { A->getNnzExpanded(), alpha, descrA, - A->getValues("cuda"), - A->getRowData("cuda"), - A->getColData("cuda"), + A->getValues( memory::DEVICE), + A->getRowData(memory::DEVICE), + A->getColData(memory::DEVICE), infoA, - vec_x->getData("cuda"), + vec_x->getData(memory::DEVICE), beta, - vec_result->getData("cuda")); + vec_result->getData(memory::DEVICE)); error_sum += status; mem_.deviceSynchronize(); if (status) out::error() << "Matvec status: " << status << "Last error code: " << mem_.getLastDeviceError() << std::endl; - vec_result->setDataUpdated("cuda"); + vec_result->setDataUpdated(memory::DEVICE); return error_sum; } else { @@ -114,7 +114,7 @@ namespace ReSolve { rocsparse_status status; - A_csr->allocateMatrixData("cuda"); + A_csr->allocateMatrixData(memory::DEVICE); index_type n = A_csc->getNumRows(); index_type m = A_csc->getNumRows(); index_type nnz = A_csc->getNnz(); @@ -125,8 +125,8 @@ namespace ReSolve { n, m, nnz, - A_csc->getColData("cuda"), - A_csc->getRowData("cuda"), + A_csc->getColData(memory::DEVICE), + A_csc->getRowData(memory::DEVICE), rocsparse_action_numeric, &bufferSize); @@ -137,12 +137,12 @@ namespace ReSolve { n, m, nnz, - A_csc->getValues("cuda"), - A_csc->getColData("cuda"), - A_csc->getRowData("cuda"), - A_csr->getValues("cuda"), - A_csr->getRowData("cuda"), - A_csr->getColData("cuda"), + A_csc->getValues( memory::DEVICE), + A_csc->getColData(memory::DEVICE), + A_csc->getRowData(memory::DEVICE), + A_csr->getValues( memory::DEVICE), + A_csr->getRowData(memory::DEVICE), + A_csr->getColData(memory::DEVICE), rocsparse_action_numeric, rocsparse_index_base_zero, d_work); diff --git a/resolve/matrix/Sparse.cpp b/resolve/matrix/Sparse.cpp index 4a16ec98..faa86e11 100644 --- a/resolve/matrix/Sparse.cpp +++ b/resolve/matrix/Sparse.cpp @@ -73,8 +73,8 @@ namespace ReSolve { namespace matrix { Sparse::~Sparse() { - this->destroyMatrixData("cpu"); - this->destroyMatrixData("cuda"); + this->destroyMatrixData(memory::HOST); + this->destroyMatrixData(memory::DEVICE); } void Sparse::setNotUpdated() @@ -133,58 +133,59 @@ namespace ReSolve { namespace matrix { this->nnz_ = nnz_new; } - int Sparse::setUpdated(std::string what) + int Sparse::setUpdated(memory::MemorySpace memspace) { - if (what == "cpu") - { - h_data_updated_ = true; - d_data_updated_ = false; - } else { - if (what == "cuda"){ + using namespace ReSolve::memory; + switch (memspace) { + case HOST: + h_data_updated_ = true; + d_data_updated_ = false; + break; + case DEVICE: d_data_updated_ = true; h_data_updated_ = false; - } else { - return -1; - } + break; } return 0; } - int Sparse::setMatrixData(index_type* row_data, index_type* col_data, real_type* val_data, std::string memspace) + int Sparse::setMatrixData(index_type* row_data, index_type* col_data, real_type* val_data, memory::MemorySpace memspace) { + using namespace ReSolve::memory; setNotUpdated(); - if (memspace == "cpu"){ - this->h_row_data_ = row_data; - this->h_col_data_ = col_data; - this->h_val_data_ = val_data; - h_data_updated_ = true; - } else { - if (memspace == "cuda"){ + switch (memspace) { + case HOST: + this->h_row_data_ = row_data; + this->h_col_data_ = col_data; + this->h_val_data_ = val_data; + h_data_updated_ = true; + break; + case DEVICE: this->d_row_data_ = row_data; this->d_col_data_ = col_data; this->d_val_data_ = val_data; d_data_updated_ = true; - } else { - return -1; - } + break; } return 0; } - int Sparse::destroyMatrixData(std::string memspace) - { - if (memspace == "cpu"){ - if (owns_cpu_data_) { - delete [] h_row_data_; - delete [] h_col_data_; - } - if (owns_cpu_vals_) { - delete [] h_val_data_; - } - } else { - if (memspace == "cuda"){ + int Sparse::destroyMatrixData(memory::MemorySpace memspace) + { + using namespace ReSolve::memory; + switch (memspace) { + case HOST: + if (owns_cpu_data_) { + delete [] h_row_data_; + delete [] h_col_data_; + } + if (owns_cpu_vals_) { + delete [] h_val_data_; + } + return 0; + case DEVICE: if (owns_gpu_data_) { mem_.deleteOnDevice(d_row_data_); mem_.deleteOnDevice(d_col_data_); @@ -192,14 +193,13 @@ namespace ReSolve { namespace matrix { if (owns_gpu_vals_) { mem_.deleteOnDevice(d_val_data_); } - } else { + return 0; + default: return -1; - } } - return 0; } - int Sparse::updateValues(real_type* new_vals, std::string memspaceIn, std::string memspaceOut) + int Sparse::updateValues(real_type* new_vals, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut) { index_type nnz_current = nnz_; @@ -207,19 +207,19 @@ namespace ReSolve { namespace matrix { //four cases (for now) setNotUpdated(); int control=-1; - if ((memspaceIn == "cpu") && (memspaceOut == "cpu")){ control = 0;} - if ((memspaceIn == "cpu") && (memspaceOut == "cuda")){ control = 1;} - if ((memspaceIn == "cuda") && (memspaceOut == "cpu")){ control = 2;} - if ((memspaceIn == "cuda") && (memspaceOut == "cuda")){ control = 3;} + if ((memspaceIn == memory::HOST) && (memspaceOut == memory::HOST)) { control = 0;} + if ((memspaceIn == memory::HOST) && (memspaceOut == memory::DEVICE)){ control = 1;} + if ((memspaceIn == memory::DEVICE) && (memspaceOut == memory::HOST)) { control = 2;} + if ((memspaceIn == memory::DEVICE) && (memspaceOut == memory::DEVICE)){ control = 3;} - if (memspaceOut == "cpu") { + if (memspaceOut == memory::HOST) { //check if cpu data allocated if (h_val_data_ == nullptr) { this->h_val_data_ = new real_type[nnz_current]; } } - if (memspaceOut == "cuda") { + if (memspaceOut == memory::DEVICE) { //check if cuda data allocated if (d_val_data_ == nullptr) { mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); @@ -253,21 +253,22 @@ namespace ReSolve { namespace matrix { return 0; } - int Sparse::setNewValues(real_type* new_vals, std::string memspace) + int Sparse::setNewValues(real_type* new_vals, memory::MemorySpace memspace) { - + using namespace ReSolve::memory; setNotUpdated(); - if (memspace == "cpu"){ - this->h_val_data_ = new_vals; - h_data_updated_ = true; - } else { - if (memspace == "cuda"){ + switch (memspace) { + case HOST: + this->h_val_data_ = new_vals; + h_data_updated_ = true; + break; + case DEVICE: this->d_val_data_ = new_vals; d_data_updated_ = true; - } else { + break; + default: return -1; - } } return 0; } diff --git a/resolve/matrix/Sparse.hpp b/resolve/matrix/Sparse.hpp index 1196c38e..96121acb 100644 --- a/resolve/matrix/Sparse.hpp +++ b/resolve/matrix/Sparse.hpp @@ -31,31 +31,31 @@ namespace ReSolve { namespace matrix { void setExpanded(bool expanded); void setNnzExpanded(index_type nnz_expanded_new); void setNnz(index_type nnz_new); // for resetting when removing duplicates - index_type setUpdated(std::string what); + index_type setUpdated(memory::MemorySpace what); - virtual index_type* getRowData(std::string memspace) = 0; - virtual index_type* getColData(std::string memspace) = 0; - virtual real_type* getValues(std::string memspace) = 0; + virtual index_type* getRowData(memory::MemorySpace memspace) = 0; + virtual index_type* getColData(memory::MemorySpace memspace) = 0; + virtual real_type* getValues( memory::MemorySpace memspace) = 0; - virtual int updateData(index_type* row_data, index_type* col_data, real_type* val_data, std::string memspaceIn, std::string memspaceOut) = 0; - virtual int updateData(index_type* row_data, index_type* col_data, real_type* val_data, index_type new_nnz, std::string memspaceIn, std::string memspaceOut) = 0; + virtual int updateData(index_type* row_data, index_type* col_data, real_type* val_data, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut) = 0; + virtual int updateData(index_type* row_data, index_type* col_data, real_type* val_data, index_type new_nnz, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut) = 0; - virtual int allocateMatrixData(std::string memspace) = 0; - int setMatrixData(index_type* row_data, index_type* col_data, real_type* val_data, std::string memspace); + virtual int allocateMatrixData(memory::MemorySpace memspace) = 0; + int setMatrixData(index_type* row_data, index_type* col_data, real_type* val_data, memory::MemorySpace memspace); - int destroyMatrixData(std::string memspace); + int destroyMatrixData(memory::MemorySpace memspace); virtual void print() = 0; - virtual int copyData(std::string memspaceOut) = 0; + virtual int copyData(memory::MemorySpace memspaceOut) = 0; //update Values just updates values; it allocates if necessary. //values have the same dimensions between different formats - virtual int updateValues(real_type* new_vals, std::string memspaceIn, std::string memspaceOut); + virtual int updateValues(real_type* new_vals, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut); //set new values just sets the pointer, use caution. - virtual int setNewValues(real_type* new_vals, std::string memspace); + virtual int setNewValues(real_type* new_vals, memory::MemorySpace memspace); protected: //size diff --git a/resolve/matrix/io.cpp b/resolve/matrix/io.cpp index 36fb5f1b..0d96a5e1 100644 --- a/resolve/matrix/io.cpp +++ b/resolve/matrix/io.cpp @@ -53,7 +53,7 @@ namespace ReSolve { namespace io { coo_vals[i] = c; i++; } - A->setMatrixData(coo_rows, coo_cols, coo_vals, "cpu"); + A->setMatrixData(coo_rows, coo_cols, coo_vals, memory::HOST); return A; } @@ -116,9 +116,9 @@ namespace ReSolve { namespace io { } A->setNnz(nnz); //create coo arrays - index_type* coo_rows = A->getRowData("cpu"); - index_type* coo_cols = A->getColData("cpu"); - real_type* coo_vals = A->getValues("cpu"); + index_type* coo_rows = A->getRowData(memory::HOST); + index_type* coo_cols = A->getColData(memory::HOST); + real_type* coo_vals = A->getValues( memory::HOST); i = 0; index_type a, b; real_type c; @@ -171,7 +171,7 @@ namespace ReSolve { namespace io { int writeVectorToFile(vector_type* vec_x, std::ostream& file_out) { - real_type* x_data = vec_x->getData("cpu"); + real_type* x_data = vec_x->getData(memory::HOST); // std::ofstream file_out (filename, std::ofstream::out); file_out << "%%MatrixMarket matrix array real general \n"; file_out << "% ID: XXX \n"; diff --git a/resolve/vector/Vector.cpp b/resolve/vector/Vector.cpp index df3c475d..0a62bd02 100644 --- a/resolve/vector/Vector.cpp +++ b/resolve/vector/Vector.cpp @@ -52,52 +52,51 @@ namespace ReSolve { namespace vector { return k_; } - void Vector::setData(real_type* data, std::string memspace) + void Vector::setData(real_type* data, memory::MemorySpace memspace) { - - if (memspace == "cpu") { - h_data_ = data; - cpu_updated_ = true; - gpu_updated_ = false; - } else { - if ((memspace == "cuda") || (memspace == "hip")) { + using namespace ReSolve::memory; + switch (memspace) { + case HOST: + h_data_ = data; + cpu_updated_ = true; + gpu_updated_ = false; + break; + case DEVICE: d_data_ = data; gpu_updated_ = true; cpu_updated_ = false; - } else { - //error - } + break; } } - void Vector::setDataUpdated(std::string memspace) + void Vector::setDataUpdated(memory::MemorySpace memspace) { - if (memspace == "cpu") { - cpu_updated_ = true; - gpu_updated_ = false; - } else { - if ((memspace == "cuda") || (memspace == "hip")) { + using namespace ReSolve::memory; + switch (memspace) { + case HOST: + cpu_updated_ = true; + gpu_updated_ = false; + break; + case DEVICE: gpu_updated_ = true; cpu_updated_ = false; - } else { - //error - } + break; } } - int Vector::update(real_type* data, std::string memspaceIn, std::string memspaceOut) + int Vector::update(real_type* data, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut) { int control=-1; - if ((memspaceIn == "cpu") && (memspaceOut == "cpu")){ control = 0;} - if ((memspaceIn == "cpu") && ((memspaceOut == "cuda") || (memspaceOut == "hip"))){ control = 1;} - if (((memspaceIn == "cuda") || (memspaceIn == "hip")) && (memspaceOut == "cpu")){ control = 2;} - if (((memspaceIn == "cuda") || (memspaceIn == "hip")) && ((memspaceOut == "cuda") || (memspaceOut == "hip"))){ control = 3;} + if ((memspaceIn == memory::HOST) && (memspaceOut == memory::HOST)) { control = 0;} + if ((memspaceIn == memory::HOST) && (memspaceOut == memory::DEVICE)){ control = 1;} + if ((memspaceIn == memory::DEVICE) && (memspaceOut == memory::HOST)) { control = 2;} + if ((memspaceIn == memory::DEVICE) && (memspaceOut == memory::DEVICE)){ control = 3;} - if ((memspaceOut == "cpu") && (h_data_ == nullptr)){ + if ((memspaceOut == memory::HOST) && (h_data_ == nullptr)) { //allocate first h_data_ = new real_type[n_ * k_]; } - if (((memspaceOut == "cuda") || (memspaceOut == "hip")) && (d_data_ == nullptr)){ + if ((memspaceOut == memory::DEVICE) && (d_data_ == nullptr)) { //allocate first mem_.allocateArrayOnDevice(&d_data_, n_ * k_); } @@ -133,26 +132,26 @@ namespace ReSolve { namespace vector { return 0; } - real_type* Vector::getData(std::string memspace) + real_type* Vector::getData(memory::MemorySpace memspace) { return this->getData(0, memspace); } - real_type* Vector::getData(index_type i, std::string memspace) + real_type* Vector::getData(index_type i, memory::MemorySpace memspace) { - if ((memspace == "cpu") && (cpu_updated_ == false) && (gpu_updated_ == true )) { - copyData(memspace, "cpu"); + if ((memspace == memory::HOST) && (cpu_updated_ == false) && (gpu_updated_ == true )) { + copyData(memspace, memory::HOST); owns_cpu_data_ = true; } - if (((memspace == "cuda") || (memspace == "hip")) && (gpu_updated_ == false) && (cpu_updated_ == true )) { - copyData("cpu", memspace); + if ((memspace == memory::DEVICE) && (gpu_updated_ == false) && (cpu_updated_ == true )) { + copyData(memory::HOST, memspace); owns_gpu_data_ = true; } - if (memspace == "cpu") { + if (memspace == memory::HOST) { return &h_data_[i * n_current_]; } else { - if ((memspace == "cuda") || (memspace == "hip")){ + if (memspace == memory::DEVICE){ return &d_data_[i * n_current_]; } else { return nullptr; @@ -161,17 +160,17 @@ namespace ReSolve { namespace vector { } - int Vector::copyData(std::string memspaceIn, std::string memspaceOut) + int Vector::copyData(memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut) { int control=-1; - if ((memspaceIn == "cpu") && ((memspaceOut == "cuda") || (memspaceOut == "hip"))){ control = 0;} - if (((memspaceIn == "cuda") || (memspaceIn == "hip")) && (memspaceOut == "cpu")){ control = 1;} + if ((memspaceIn == memory::HOST) && (memspaceOut == memory::DEVICE)){ control = 0;} + if ((memspaceIn == memory::DEVICE) && (memspaceOut == memory::HOST)) { control = 1;} - if ((memspaceOut == "cpu") && (h_data_ == nullptr)){ + if ((memspaceOut == memory::HOST) && (h_data_ == nullptr)) { //allocate first h_data_ = new real_type[n_ * k_]; } - if (((memspaceOut == "cuda") || (memspaceOut == "hip")) && (d_data_ == nullptr)){ + if ((memspaceOut == memory::DEVICE) && (d_data_ == nullptr)) { //allocate first mem_.allocateArrayOnDevice(&d_data_, n_ * k_); } @@ -193,110 +192,118 @@ namespace ReSolve { namespace vector { return 0; } - void Vector::allocate(std::string memspace) + void Vector::allocate(memory::MemorySpace memspace) { - if (memspace == "cpu") { - delete [] h_data_; - h_data_ = new real_type[n_ * k_]; - owns_cpu_data_ = true; - } else { - if ((memspace == "cuda") || (memspace == "hip")) { + using namespace ReSolve::memory; + switch (memspace) { + case HOST: + delete [] h_data_; + h_data_ = new real_type[n_ * k_]; + owns_cpu_data_ = true; + break; + case DEVICE: mem_.deleteOnDevice(d_data_); mem_.allocateArrayOnDevice(&d_data_, n_ * k_); owns_gpu_data_ = true; - } else { - std::cout<<"wrong memspace " <k_ < i){ return nullptr; @@ -315,38 +322,38 @@ namespace ReSolve { namespace vector { } } - int Vector::deepCopyVectorData(real_type* dest, index_type i, std::string memspaceOut) + int Vector::deepCopyVectorData(real_type* dest, index_type i, memory::MemorySpace memspaceOut) { + using namespace ReSolve::memory; if (i > this->k_) { return -1; } else { real_type* data = this->getData(i, memspaceOut); - if (memspaceOut == "cpu") { - mem_.copyArrayHostToHost(dest, data, n_current_); - } else { - if ((memspaceOut == "cuda") || (memspaceOut == "hip")) { + switch (memspaceOut) { + case HOST: + mem_.copyArrayHostToHost(dest, data, n_current_); + break; + case DEVICE: mem_.copyArrayDeviceToDevice(dest, data, n_current_); - } else { - //error - } + break; } return 0; } } - int Vector::deepCopyVectorData(real_type* dest, std::string memspaceOut) + int Vector::deepCopyVectorData(real_type* dest, memory::MemorySpace memspaceOut) { + using namespace ReSolve::memory; real_type* data = this->getData(memspaceOut); - if (memspaceOut == "cpu") { - mem_.copyArrayHostToHost(dest, data, n_current_ * k_); - } else { - if ((memspaceOut == "cuda") || (memspaceOut == "hip")) { + switch (memspaceOut) { + case HOST: + mem_.copyArrayHostToHost(dest, data, n_current_ * k_); + break; + case DEVICE: mem_.copyArrayDeviceToDevice(dest, data, n_current_ * k_); - } else { - //error - } + break; } return 0; - } + }} // namespace ReSolve::vector diff --git a/resolve/vector/Vector.hpp b/resolve/vector/Vector.hpp index 9d1bd452..5f86ef7f 100644 --- a/resolve/vector/Vector.hpp +++ b/resolve/vector/Vector.hpp @@ -11,26 +11,26 @@ namespace ReSolve { namespace vector { Vector(index_type n, index_type k); ~Vector(); - int update(real_type* data, std::string memspaceIn, std::string memspaceOut); - real_type* getData(std::string memspace); - real_type* getData(index_type i, std::string memspace); // get pointer to i-th vector in multivector + int update(real_type* data, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut); + real_type* getData(memory::MemorySpace memspace); + real_type* getData(index_type i, memory::MemorySpace memspace); // get pointer to i-th vector in multivector index_type getSize(); index_type getCurrentSize(); index_type getNumVectors(); - void setDataUpdated(std::string memspace); - void setData(real_type* data, std::string memspace); - void allocate(std::string memspace); - void setToZero(std::string memspace); - void setToZero(index_type i, std::string memspace); // set i-th ivector to 0 - void setToConst(real_type C, std::string memspace); - void setToConst(index_type i, real_type C, std::string memspace); // set i-th vector to C - needed for unit tests, Gram Schmidt tests - int copyData(std::string memspaceIn, std::string memspaceOut); + void setDataUpdated(memory::MemorySpace memspace); + void setData(real_type* data, memory::MemorySpace memspace); + void allocate(memory::MemorySpace memspace); + void setToZero(memory::MemorySpace memspace); + void setToZero(index_type i, memory::MemorySpace memspace); // set i-th ivector to 0 + void setToConst(real_type C, memory::MemorySpace memspace); + void setToConst(index_type i, real_type C, memory::MemorySpace memspace); // set i-th vector to C - needed for unit tests, Gram Schmidt tests + int copyData(memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut); int setCurrentSize(index_type new_n_current); - real_type* getVectorData(index_type i, std::string memspace); // get ith vector data out of multivector - int deepCopyVectorData(real_type* dest, index_type i, std::string memspace); - int deepCopyVectorData(real_type* dest, std::string memspace); //copy FULL multivector + real_type* getVectorData(index_type i, memory::MemorySpace memspace); // get ith vector data out of multivector + int deepCopyVectorData(real_type* dest, index_type i, memory::MemorySpace memspace); + int deepCopyVectorData(real_type* dest, memory::MemorySpace memspace); //copy FULL multivector private: index_type n_; ///< size diff --git a/resolve/vector/VectorHandlerCpu.cpp b/resolve/vector/VectorHandlerCpu.cpp index f5cc463d..a8317a89 100644 --- a/resolve/vector/VectorHandlerCpu.cpp +++ b/resolve/vector/VectorHandlerCpu.cpp @@ -47,8 +47,8 @@ namespace ReSolve { real_type VectorHandlerCpu::dot(vector::Vector* x, vector::Vector* y) { - real_type* x_data = x->getData("cpu"); - real_type* y_data = y->getData("cpu"); + real_type* x_data = x->getData(memory::HOST); + real_type* y_data = y->getData(memory::HOST); real_type sum = 0.0; real_type c = 0.0; // real_type t, y; @@ -72,7 +72,7 @@ namespace ReSolve { */ void VectorHandlerCpu::scal(const real_type* alpha, vector::Vector* x) { - real_type* x_data = x->getData("cpu"); + real_type* x_data = x->getData(memory::HOST); for (int i = 0; i < x->getSize(); ++i){ x_data[i] *= (*alpha); @@ -91,8 +91,8 @@ namespace ReSolve { void VectorHandlerCpu::axpy(const real_type* alpha, vector::Vector* x, vector::Vector* y) { //AXPY: y = alpha * x + y - real_type* x_data = x->getData("cpu"); - real_type* y_data = y->getData("cpu"); + real_type* x_data = x->getData(memory::HOST); + real_type* y_data = y->getData(memory::HOST); for (int i = 0; i < x->getSize(); ++i) { y_data[i] = (*alpha) * x_data[i] + y_data[i]; } diff --git a/resolve/vector/VectorHandlerCuda.cpp b/resolve/vector/VectorHandlerCuda.cpp index 3c887e85..5871fd5a 100644 --- a/resolve/vector/VectorHandlerCuda.cpp +++ b/resolve/vector/VectorHandlerCuda.cpp @@ -50,7 +50,7 @@ namespace ReSolve { LinAlgWorkspaceCUDA* workspaceCUDA = workspace_; cublasHandle_t handle_cublas = workspaceCUDA->getCublasHandle(); double nrm = 0.0; - cublasStatus_t st= cublasDdot (handle_cublas, x->getSize(), x->getData("cuda"), 1, y->getData("cuda"), 1, &nrm); + cublasStatus_t st= cublasDdot (handle_cublas, x->getSize(), x->getData(memory::DEVICE), 1, y->getData(memory::DEVICE), 1, &nrm); if (st!=0) {printf("dot product crashed with code %d \n", st);} return nrm; } @@ -67,7 +67,7 @@ namespace ReSolve { { LinAlgWorkspaceCUDA* workspaceCUDA = workspace_; cublasHandle_t handle_cublas = workspaceCUDA->getCublasHandle(); - cublasStatus_t st = cublasDscal(handle_cublas, x->getSize(), alpha, x->getData("cuda"), 1); + cublasStatus_t st = cublasDscal(handle_cublas, x->getSize(), alpha, x->getData(memory::DEVICE), 1); if (st!=0) { ReSolve::io::Logger::error() << "scal crashed with code " << st << "\n"; } @@ -90,9 +90,9 @@ namespace ReSolve { cublasDaxpy(handle_cublas, x->getSize(), alpha, - x->getData("cuda"), + x->getData(memory::DEVICE), 1, - y->getData("cuda"), + y->getData(memory::DEVICE), 1); } @@ -131,12 +131,12 @@ namespace ReSolve { n, k, alpha, - V->getData("cuda"), + V->getData(memory::DEVICE), n, - y->getData("cuda"), + y->getData(memory::DEVICE), 1, beta, - x->getData("cuda"), + x->getData(memory::DEVICE), 1); } else { @@ -145,12 +145,12 @@ namespace ReSolve { n, k, alpha, - V->getData("cuda"), + V->getData(memory::DEVICE), n, - y->getData("cuda"), + y->getData(memory::DEVICE), 1, beta, - x->getData("cuda"), + x->getData(memory::DEVICE), 1); } } @@ -171,7 +171,7 @@ namespace ReSolve { { using namespace constants; if (k < 200) { - mass_axpy(size, k, x->getData("cuda"), y->getData("cuda"),alpha->getData("cuda")); + mass_axpy(size, k, x->getData(memory::DEVICE), y->getData(memory::DEVICE),alpha->getData(memory::DEVICE)); } else { LinAlgWorkspaceCUDA* workspaceCUDA = workspace_; cublasHandle_t handle_cublas = workspaceCUDA->getCublasHandle(); @@ -182,12 +182,12 @@ namespace ReSolve { 1, // n k + 1, // k &MINUSONE, // alpha - x->getData("cuda"), // A + x->getData(memory::DEVICE), // A size, // lda - alpha->getData("cuda"), // B + alpha->getData(memory::DEVICE), // B k + 1, // ldb &ONE, - y->getData("cuda"), // c + y->getData(memory::DEVICE), // c size); // ldc } } @@ -212,7 +212,7 @@ namespace ReSolve { using namespace constants; if (k < 200) { - mass_inner_product_two_vectors(size, k, x->getData("cuda") , x->getData(1, "cuda"), V->getData("cuda"), res->getData("cuda")); + mass_inner_product_two_vectors(size, k, x->getData(memory::DEVICE) , x->getData(1, memory::DEVICE), V->getData(memory::DEVICE), res->getData(memory::DEVICE)); } else { LinAlgWorkspaceCUDA* workspaceCUDA = workspace_; cublasHandle_t handle_cublas = workspaceCUDA->getCublasHandle(); @@ -223,12 +223,12 @@ namespace ReSolve { 2, //n size, //k &ONE, //alpha - V->getData("cuda"), //A + V->getData(memory::DEVICE), //A size, //lda - x->getData("cuda"), //B + x->getData(memory::DEVICE), //B size, //ldb &ZERO, - res->getData("cuda"), //c + res->getData(memory::DEVICE), //c k + 1); //ldc } } diff --git a/resolve/vector/VectorHandlerHip.cpp b/resolve/vector/VectorHandlerHip.cpp index 9f2927c7..1e1195fc 100644 --- a/resolve/vector/VectorHandlerHip.cpp +++ b/resolve/vector/VectorHandlerHip.cpp @@ -50,7 +50,7 @@ namespace ReSolve { LinAlgWorkspaceHIP* workspaceHIP = workspace_; rocblas_handle handle_rocblas = workspaceHIP->getRocblasHandle(); double nrm = 0.0; - rocblas_status st= rocblas_ddot (handle_rocblas, x->getSize(), x->getData("hip"), 1, y->getData("hip"), 1, &nrm); + rocblas_status st= rocblas_ddot (handle_rocblas, x->getSize(), x->getData(memory::DEVICE), 1, y->getData(memory::DEVICE), 1, &nrm); if (st!=0) {printf("dot product crashed with code %d \n", st);} return nrm; } @@ -67,7 +67,7 @@ namespace ReSolve { { LinAlgWorkspaceHIP* workspaceHIP = workspace_; rocblas_handle handle_rocblas = workspaceHIP->getRocblasHandle(); - rocblas_status st = rocblas_dscal(handle_rocblas, x->getSize(), alpha, x->getData("hip"), 1); + rocblas_status st = rocblas_dscal(handle_rocblas, x->getSize(), alpha, x->getData(memory::DEVICE), 1); if (st!=0) { ReSolve::io::Logger::error() << "scal crashed with code " << st << "\n"; } @@ -90,9 +90,9 @@ namespace ReSolve { rocblas_daxpy(handle_rocblas, x->getSize(), alpha, - x->getData("hip"), + x->getData(memory::DEVICE), 1, - y->getData("hip"), + y->getData(memory::DEVICE), 1); } @@ -131,12 +131,12 @@ namespace ReSolve { n, k, alpha, - V->getData("hip"), + V->getData(memory::DEVICE), n, - y->getData("hip"), + y->getData(memory::DEVICE), 1, beta, - x->getData("hip"), + x->getData(memory::DEVICE), 1); } else { @@ -145,12 +145,12 @@ namespace ReSolve { n, k, alpha, - V->getData("hip"), + V->getData(memory::DEVICE), n, - y->getData("hip"), + y->getData(memory::DEVICE), 1, beta, - x->getData("hip"), + x->getData(memory::DEVICE), 1); } } @@ -171,7 +171,7 @@ namespace ReSolve { { using namespace constants; if (k < 200) { - mass_axpy(size, k, x->getData("hip"), y->getData("hip"),alpha->getData("hip")); + mass_axpy(size, k, x->getData(memory::DEVICE), y->getData(memory::DEVICE),alpha->getData(memory::DEVICE)); } else { LinAlgWorkspaceHIP* workspaceHIP = workspace_; rocblas_handle handle_rocblas = workspaceHIP->getRocblasHandle(); @@ -182,12 +182,12 @@ namespace ReSolve { 1, // n k, // k &MINUSONE, // alpha - x->getData("hip"), // A + x->getData(memory::DEVICE), // A size, // lda - alpha->getData("hip"), // B + alpha->getData(memory::DEVICE), // B k, // ldb &ONE, - y->getData("hip"), // c + y->getData(memory::DEVICE), // c size); // ldc } } @@ -212,7 +212,7 @@ namespace ReSolve { using namespace constants; if (k < 200) { - mass_inner_product_two_vectors(size, k, x->getData("hip") , x->getData(1, "hip"), V->getData("hip"), res->getData("hip")); + mass_inner_product_two_vectors(size, k, x->getData(memory::DEVICE) , x->getData(1, memory::DEVICE), V->getData(memory::DEVICE), res->getData(memory::DEVICE)); } else { LinAlgWorkspaceHIP* workspaceHIP = workspace_; rocblas_handle handle_rocblas = workspaceHIP->getRocblasHandle(); @@ -223,12 +223,12 @@ namespace ReSolve { 2, //n size, //k &ONE, //alpha - V->getData("hip"), //A + V->getData(memory::DEVICE), //A size, //lda - x->getData("hip"), //B + x->getData(memory::DEVICE), //B size, //ldb &ZERO, - res->getData("hip"), //c + res->getData(memory::DEVICE), //c k + 1); //ldc } } diff --git a/tests/functionality/testKLU.cpp b/tests/functionality/testKLU.cpp index b067f417..083c11d1 100644 --- a/tests/functionality/testKLU.cpp +++ b/tests/functionality/testKLU.cpp @@ -74,8 +74,8 @@ int main(int argc, char *argv[]) // Convert first matrix to CSR format matrix_handler->coo2csr(A_coo, A, "cpu"); - vec_rhs->update(rhs, "cpu", "cpu"); - vec_rhs->setDataUpdated("cpu"); + vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); + vec_rhs->setDataUpdated(ReSolve::memory::HOST); // Solve the first system using KLU status = KLU->setup(A); @@ -100,11 +100,11 @@ int main(int argc, char *argv[]) x_data[i] = 1.0; } - vec_test->setData(x_data, "cpu"); - vec_r->update(rhs, "cpu", "cpu"); - vec_diff->update(x_data, "cpu", "cpu"); + vec_test->setData(x_data, ReSolve::memory::HOST); + vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); + vec_diff->update(x_data, ReSolve::memory::HOST, ReSolve::memory::HOST); - // real_type normXmatrix1 = sqrt(vector_handler->dot(vec_test, vec_test, "cpu")); + // real_type normXmatrix1 = sqrt(vector_handler->dot(vec_test, vec_test, ReSolve::memory::HOST)); matrix_handler->setValuesChanged(true, "cpu"); status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr","cpu"); error_sum += status; @@ -123,13 +123,13 @@ int main(int argc, char *argv[]) real_type normDiffMatrix1 = sqrt(vector_handler->dot(vec_diff, vec_diff, "cpu")); //compute the residual using exact solution - vec_r->update(rhs, "cpu", "cpu"); + vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE,"csr", "cpu"); error_sum += status; real_type exactSol_normRmatrix1 = sqrt(vector_handler->dot(vec_r, vec_r, "cpu")); //evaluate the residual ON THE CPU using COMPUTED solution - vec_r->update(rhs, "cpu", "cpu"); + vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "cpu"); error_sum += status; @@ -165,7 +165,7 @@ int main(int argc, char *argv[]) rhs2_file.close(); matrix_handler->coo2csr(A_coo, A, "cpu"); - vec_rhs->update(rhs, "cpu", "cpu"); + vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); // and solve it too status = KLU->refactorize(); @@ -174,7 +174,7 @@ int main(int argc, char *argv[]) status = KLU->solve(vec_rhs, vec_x); error_sum += status; - vec_r->update(rhs, "cpu", "cpu"); + vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); matrix_handler->setValuesChanged(true, "cpu"); status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "cpu"); @@ -185,13 +185,13 @@ int main(int argc, char *argv[]) //for testing only - control real_type normB2 = sqrt(vector_handler->dot(vec_rhs, vec_rhs, "cpu")); //compute x-x_true - vec_diff->update(x_data, "cpu", "cpu"); + vec_diff->update(x_data, ReSolve::memory::HOST, ReSolve::memory::HOST); vector_handler->axpy(&MINUSONE, vec_x, vec_diff, "cpu"); //evaluate its norm real_type normDiffMatrix2 = sqrt(vector_handler->dot(vec_diff, vec_diff, "cpu")); //compute the residual using exact solution - vec_r->update(rhs, "cpu", "cpu"); + vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE, "csr", "cpu"); error_sum += status; real_type exactSol_normRmatrix2 = sqrt(vector_handler->dot(vec_r, vec_r, "cpu")); diff --git a/tests/functionality/testKLU_GLU.cpp b/tests/functionality/testKLU_GLU.cpp index ddaf3b31..702141ec 100644 --- a/tests/functionality/testKLU_GLU.cpp +++ b/tests/functionality/testKLU_GLU.cpp @@ -75,15 +75,15 @@ int main(int argc, char *argv[]) real_type* x = new real_type[A->getNumRows()]; vector_type* vec_rhs = new vector_type(A->getNumRows()); vector_type* vec_x = new vector_type(A->getNumRows()); - vec_x->allocate("cpu");//for KLU - vec_x->allocate("cuda"); + vec_x->allocate(ReSolve::memory::HOST);//for KLU + vec_x->allocate(ReSolve::memory::DEVICE); vector_type* vec_r = new vector_type(A->getNumRows()); rhs1_file.close(); // Convert first matrix to CSR format matrix_handler->coo2csr(A_coo, A, "cpu"); - vec_rhs->update(rhs, "cpu", "cpu"); - vec_rhs->setDataUpdated("cpu"); + vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); + vec_rhs->setDataUpdated(ReSolve::memory::HOST); // Solve the first system using KLU status = KLU->setup(A); @@ -106,7 +106,7 @@ int main(int argc, char *argv[]) status = GLU->setup(A, L, U, P, Q); error_sum += status; std::cout<<"GLU setup status: "<update(rhs, "cpu", "cuda"); + vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); status = GLU->solve(vec_rhs, vec_x); error_sum += status; std::cout<<"GLU solve status: "<setData(x_data, "cpu"); - vec_r->update(rhs, "cpu", "cuda"); - vec_diff->update(x_data, "cpu", "cuda"); + vec_test->setData(x_data, ReSolve::memory::HOST); + vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); + vec_diff->update(x_data, ReSolve::memory::HOST, ReSolve::memory::DEVICE); // real_type normXmatrix1 = sqrt(vector_handler->dot(vec_test, vec_test, "cuda")); matrix_handler->setValuesChanged(true, "cuda"); @@ -145,13 +145,13 @@ int main(int argc, char *argv[]) real_type normDiffMatrix1 = sqrt(vector_handler->dot(vec_diff, vec_diff, "cuda")); //compute the residual using exact solution - vec_x->update(vec_x->getData("cuda"), "cuda", "cpu"); + vec_x->update(vec_x->getData(ReSolve::memory::DEVICE), ReSolve::memory::DEVICE, ReSolve::memory::HOST); status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE,"csr", "cuda"); error_sum += status; real_type exactSol_normRmatrix1 = sqrt(vector_handler->dot(vec_r, vec_r, "cuda")); //evaluate the residual ON THE CPU using COMPUTED solution - vec_r->update(rhs, "cpu", "cpu"); + vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "cpu"); error_sum += status; @@ -188,7 +188,7 @@ int main(int argc, char *argv[]) rhs2_file.close(); matrix_handler->coo2csr(A_coo, A, "cuda"); - vec_rhs->update(rhs, "cpu", "cuda"); + vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); status = GLU->refactorize(); error_sum += status; @@ -197,7 +197,7 @@ int main(int argc, char *argv[]) status = GLU->solve(vec_rhs, vec_x); error_sum += status; - vec_r->update(rhs, "cpu", "cuda"); + vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); matrix_handler->setValuesChanged(true, "cuda"); status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "cuda"); @@ -208,13 +208,13 @@ int main(int argc, char *argv[]) //for testing only - control real_type normB2 = sqrt(vector_handler->dot(vec_rhs, vec_rhs, "cuda")); //compute x-x_true - vec_diff->update(x_data, "cpu", "cuda"); + vec_diff->update(x_data, ReSolve::memory::HOST, ReSolve::memory::DEVICE); vector_handler->axpy(&MINUSONE, vec_x, vec_diff, "cuda"); //evaluate its norm real_type normDiffMatrix2 = sqrt(vector_handler->dot(vec_diff, vec_diff, "cuda")); //compute the residual using exact solution - vec_r->update(rhs, "cpu", "cuda"); + vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE, "csr", "cuda"); error_sum += status; real_type exactSol_normRmatrix2 = sqrt(vector_handler->dot(vec_r, vec_r, "cuda")); diff --git a/tests/functionality/testKLU_Rf.cpp b/tests/functionality/testKLU_Rf.cpp index 124f07de..a136017e 100644 --- a/tests/functionality/testKLU_Rf.cpp +++ b/tests/functionality/testKLU_Rf.cpp @@ -80,8 +80,8 @@ int main(int argc, char *argv[]) // Convert first matrix to CSR format matrix_handler->coo2csr(A_coo, A, "cpu"); - vec_rhs->update(rhs, "cpu", "cpu"); - vec_rhs->setDataUpdated("cpu"); + vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); + vec_rhs->setDataUpdated(ReSolve::memory::HOST); // Solve the first system using KLU status = KLU->setup(A); @@ -106,9 +106,9 @@ int main(int argc, char *argv[]) x_data[i] = 1.0; } - vec_test->setData(x_data, "cpu"); - vec_r->update(rhs, "cpu", "cuda"); - vec_diff->update(x_data, "cpu", "cuda"); + vec_test->setData(x_data, ReSolve::memory::HOST); + vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); + vec_diff->update(x_data, ReSolve::memory::HOST, ReSolve::memory::DEVICE); // real_type normXmatrix1 = sqrt(vector_handler->dot(vec_test, vec_test, "cuda")); matrix_handler->setValuesChanged(true, "cuda"); @@ -129,13 +129,13 @@ int main(int argc, char *argv[]) real_type normDiffMatrix1 = sqrt(vector_handler->dot(vec_diff, vec_diff, "cuda")); //compute the residual using exact solution - vec_r->update(rhs, "cpu", "cuda"); + vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE,"csr", "cuda"); error_sum += status; real_type exactSol_normRmatrix1 = sqrt(vector_handler->dot(vec_r, vec_r, "cuda")); //evaluate the residual ON THE CPU using COMPUTED solution - vec_r->update(rhs, "cpu", "cpu"); + vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "cpu"); error_sum += status; @@ -186,7 +186,7 @@ int main(int argc, char *argv[]) rhs2_file.close(); matrix_handler->coo2csr(A_coo, A, "cuda"); - vec_rhs->update(rhs, "cpu", "cuda"); + vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); status = Rf->refactorize(); error_sum += status; @@ -194,7 +194,7 @@ int main(int argc, char *argv[]) status = Rf->solve(vec_rhs, vec_x); error_sum += status; - vec_r->update(rhs, "cpu", "cuda"); + vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); matrix_handler->setValuesChanged(true, "cuda"); status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "cuda"); @@ -205,13 +205,13 @@ int main(int argc, char *argv[]) //for testing only - control real_type normB2 = sqrt(vector_handler->dot(vec_rhs, vec_rhs, "cuda")); //compute x-x_true - vec_diff->update(x_data, "cpu", "cuda"); + vec_diff->update(x_data, ReSolve::memory::HOST, ReSolve::memory::DEVICE); vector_handler->axpy(&MINUSONE, vec_x, vec_diff, "cuda"); //evaluate its norm real_type normDiffMatrix2 = sqrt(vector_handler->dot(vec_diff, vec_diff, "cuda")); //compute the residual using exact solution - vec_r->update(rhs, "cpu", "cuda"); + vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE, "csr", "cuda"); error_sum += status; real_type exactSol_normRmatrix2 = sqrt(vector_handler->dot(vec_r, vec_r, "cuda")); diff --git a/tests/functionality/testKLU_Rf_FGMRES.cpp b/tests/functionality/testKLU_Rf_FGMRES.cpp index 6601a3ee..6a81dac1 100644 --- a/tests/functionality/testKLU_Rf_FGMRES.cpp +++ b/tests/functionality/testKLU_Rf_FGMRES.cpp @@ -85,8 +85,8 @@ int main(int argc, char *argv[]) // Convert first matrix to CSR format matrix_handler->coo2csr(A_coo, A, "cpu"); - vec_rhs->update(rhs, "cpu", "cpu"); - vec_rhs->setDataUpdated("cpu"); + vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); + vec_rhs->setDataUpdated(ReSolve::memory::HOST); // Solve the first system using KLU status = KLU->setup(A); @@ -112,11 +112,11 @@ int main(int argc, char *argv[]) x_data[i] = 1.0; } - vec_test->setData(x_data, "cpu"); - vec_r->update(rhs, "cpu", "cuda"); - vec_diff->update(x_data, "cpu", "cuda"); + vec_test->setData(x_data, ReSolve::memory::HOST); + vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); + vec_diff->update(x_data, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - // real_type normXmatrix1 = sqrt(vector_handler->dot(vec_test, vec_test, "cuda")); + // real_type normXmatrix1 = sqrt(vector_handler->dot(vec_test, vec_test, ReSolve::memory::DEVICE)); matrix_handler->setValuesChanged(true, "cuda"); //evaluate the residual ||b-Ax|| status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr","cuda"); @@ -136,13 +136,13 @@ int main(int argc, char *argv[]) real_type normDiffMatrix1 = sqrt(vector_handler->dot(vec_diff, vec_diff, "cuda")); //compute the residual using exact solution - vec_r->update(rhs, "cpu", "cuda"); + vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE,"csr", "cuda"); error_sum += status; real_type exactSol_normRmatrix1 = sqrt(vector_handler->dot(vec_r, vec_r, "cuda")); //evaluate the residual ON THE CPU using COMPUTED solution - vec_r->update(rhs, "cpu", "cpu"); + vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "cpu"); error_sum += status; @@ -202,13 +202,13 @@ int main(int argc, char *argv[]) rhs2_file.close(); matrix_handler->coo2csr(A_coo, A, "cuda"); - vec_rhs->update(rhs, "cpu", "cuda"); + vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); Rf->setNumericalProperties(1e-12, 1e-1); status = Rf->refactorize(); error_sum += status; - vec_x->update(rhs, "cpu", "cuda"); + vec_x->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); status = Rf->solve(vec_x); error_sum += status; @@ -216,11 +216,11 @@ int main(int argc, char *argv[]) status = FGMRES->setupPreconditioner("CuSolverRf", Rf); error_sum += status; - vec_rhs->update(rhs, "cpu", "cuda"); + vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); status = FGMRES->solve(vec_rhs, vec_x); error_sum += status; - vec_r->update(rhs, "cpu", "cuda"); + vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); matrix_handler->setValuesChanged(true, "cuda"); //evaluate final residual @@ -233,13 +233,13 @@ int main(int argc, char *argv[]) //for testing only - control real_type normB2 = sqrt(vector_handler->dot(vec_rhs, vec_rhs, "cuda")); //compute x-x_true - vec_diff->update(x_data, "cpu", "cuda"); + vec_diff->update(x_data, ReSolve::memory::HOST, ReSolve::memory::DEVICE); vector_handler->axpy(&MINUSONE, vec_x, vec_diff, "cuda"); //evaluate its norm real_type normDiffMatrix2 = sqrt(vector_handler->dot(vec_diff, vec_diff, "cuda")); //compute the residual using exact solution - vec_r->update(rhs, "cpu", "cuda"); + vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE, "csr", "cuda"); error_sum += status; real_type exactSol_normRmatrix2 = sqrt(vector_handler->dot(vec_r, vec_r, "cuda")); diff --git a/tests/unit/matrix/MatrixHandlerTests.hpp b/tests/unit/matrix/MatrixHandlerTests.hpp index 0bcfe544..63d2f49b 100644 --- a/tests/unit/matrix/MatrixHandlerTests.hpp +++ b/tests/unit/matrix/MatrixHandlerTests.hpp @@ -42,18 +42,23 @@ class MatrixHandlerTests : TestBase TestOutcome matVec(index_type N) { TestStatus status; + ReSolve::memory::MemorySpace ms; + if (memspace_ == "cpu") + ms = memory::HOST; + else + ms = memory::DEVICE; ReSolve::MatrixHandler* handler = createMatrixHandler(); matrix::Csr* A = createCsrMatrix(N, memspace_); vector::Vector x(N); vector::Vector y(N); - x.allocate(memspace_); - if (x.getData(memspace_) == NULL) printf("oups we have an issue \n"); - y.allocate(memspace_); + x.allocate(ms); + if (x.getData(ms) == NULL) printf("oups we have an issue \n"); + y.allocate(ms); - x.setToConst(1.0, memspace_); - y.setToConst(1.0, memspace_); + x.setToConst(1.0, ms); + y.setToConst(1.0, ms); real_type alpha = 2.0/30.0; real_type beta = 2.0; @@ -98,14 +103,14 @@ class MatrixHandlerTests : TestBase { bool status = true; if (memspace != "cpu") { - x.copyData(memspace, "cpu"); + x.copyData(memory::DEVICE, memory::HOST); } for (index_type i = 0; i < x.getSize(); ++i) { - // std::cout << x.getData("cpu")[i] << "\n"; - if (!isEqual(x.getData("cpu")[i], answer)) { + // std::cout << x.getData(memory::HOST)[i] << "\n"; + if (!isEqual(x.getData(memory::HOST)[i], answer)) { status = false; - std::cout << "Solution vector element x[" << i << "] = " << x.getData("cpu")[i] + std::cout << "Solution vector element x[" << i << "] = " << x.getData(memory::HOST)[i] << ", expected: " << answer << "\n"; break; } @@ -135,11 +140,11 @@ class MatrixHandlerTests : TestBase // Allocate NxN CSR matrix with NNZ nonzeros matrix::Csr* A = new matrix::Csr(N, N, NNZ); - A->allocateMatrixData("cpu"); + A->allocateMatrixData(memory::HOST); - index_type* rowptr = A->getRowData("cpu"); - index_type* colidx = A->getColData("cpu"); - real_type* val = A->getValues("cpu"); + index_type* rowptr = A->getRowData(memory::HOST); + index_type* colidx = A->getColData(memory::HOST); + real_type* val = A->getValues( memory::HOST); // Populate CSR matrix using same row pattern as for NNZ calculation rowptr[0] = 0; @@ -157,10 +162,10 @@ class MatrixHandlerTests : TestBase val[j] = row_sample[static_cast(j - rowptr[i])]; } } - A->setUpdated("cpu"); + A->setUpdated(memory::HOST); if ((memspace == "cuda") || (memspace == "hip")) { - A->copyData(memspace); + A->copyData(memory::DEVICE); } return A; diff --git a/tests/unit/matrix/MatrixIoTests.hpp b/tests/unit/matrix/MatrixIoTests.hpp index ad14f0a7..1ce23ae2 100644 --- a/tests/unit/matrix/MatrixIoTests.hpp +++ b/tests/unit/matrix/MatrixIoTests.hpp @@ -78,7 +78,7 @@ class MatrixIoTests : TestBase // Create a 5x5 COO matrix with 10 nonzeros ReSolve::matrix::Coo A(5, 5, 10); - A.allocateMatrixData("cpu"); + A.allocateMatrixData(memory::HOST); // Read string into istream and status it to `readMatrixFromFile` function. std::istringstream file2(symmetric_coo_matrix_file_); @@ -176,9 +176,9 @@ class MatrixIoTests : TestBase const std::vector& val_data) { for (size_t i = 0; i < val_data.size(); ++i) { - if ((answer.getRowData("cpu")[i] != row_data[i]) || - (answer.getColData("cpu")[i] != col_data[i]) || - (!isEqual(answer.getValues("cpu")[i], val_data[i]))) + if ((answer.getRowData(memory::HOST)[i] != row_data[i]) || + (answer.getColData(memory::HOST)[i] != col_data[i]) || + (!isEqual(answer.getValues(memory::HOST)[i], val_data[i]))) { std::cout << "Incorrect matrix value at storage element " << i << ".\n"; return false; diff --git a/tests/unit/vector/GramSchmidtTests.hpp b/tests/unit/vector/GramSchmidtTests.hpp index 9981ea48..4837b57b 100644 --- a/tests/unit/vector/GramSchmidtTests.hpp +++ b/tests/unit/vector/GramSchmidtTests.hpp @@ -66,15 +66,21 @@ namespace ReSolve { break; } + ReSolve::memory::MemorySpace ms; + if (memspace_ == "cpu") + ms = memory::HOST; + else + ms = memory::DEVICE; + ReSolve::VectorHandler* handler = createVectorHandler(); vector::Vector* V = new vector::Vector(N, 3); // we will be using a space of 3 vectors real_type* H = new real_type[6]; //in this case, Hessenberg matrix is 3 x 2 real_type* aux_data; // needed for setup - V->allocate(memspace_); - if (memspace_ != "cpu") { - V->allocate("cpu"); + V->allocate(ms); + if (ms != memory::HOST) { + V->allocate(memory::HOST); } @@ -82,7 +88,7 @@ namespace ReSolve { GS->setup(N, 3); //fill 2nd and 3rd vector with values - aux_data = V->getVectorData(1, "cpu"); + aux_data = V->getVectorData(1, memory::HOST); for (int i = 0; i < N; ++i) { if ( i % 2 == 0) { aux_data[i] = constants::ONE; @@ -90,7 +96,7 @@ namespace ReSolve { aux_data[i] = var1; } } - aux_data = V->getVectorData(2, "cpu"); + aux_data = V->getVectorData(2, memory::HOST); for (int i = 0; i < N; ++i) { if ( i % 3 > 0) { aux_data[i] = constants::ZERO; @@ -98,11 +104,11 @@ namespace ReSolve { aux_data[i] = var2; } } - V->setDataUpdated("cpu"); - V->copyData("cpu", memspace_); + V->setDataUpdated(memory::HOST); + V->copyData(memory::HOST, ms); //set the first vector to all 1s, normalize - V->setToConst(0, 1.0, memspace_); + V->setToConst(0, 1.0, ms); real_type nrm = handler->dot(V, V, memspace_); nrm = sqrt(nrm); nrm = 1.0 / nrm; @@ -144,6 +150,12 @@ namespace ReSolve { // x is a multivector containing K vectors bool verifyAnswer(vector::Vector* x, index_type K, ReSolve::VectorHandler* handler, std::string memspace) { + ReSolve::memory::MemorySpace ms; + if (memspace == "cpu") + ms = memory::HOST; + else + ms = memory::DEVICE; + vector::Vector* a = new vector::Vector(x->getSize()); vector::Vector* b = new vector::Vector(x->getSize()); @@ -152,8 +164,8 @@ namespace ReSolve { for (index_type i = 0; i < K; ++i) { for (index_type j = 0; j < K; ++j) { - a->update(x->getVectorData(i, memspace), memspace, "cpu"); - b->update(x->getVectorData(j, memspace), memspace, "cpu"); + a->update(x->getVectorData(i, ms), ms, memory::HOST); + b->update(x->getVectorData(j, ms), ms, memory::HOST); ip = handler->dot(a, b, "cpu"); if ( (i != j) && (abs(ip) > 1e-14)) { diff --git a/tests/unit/vector/VectorHandlerTests.hpp b/tests/unit/vector/VectorHandlerTests.hpp index 60020ec5..856bb84d 100644 --- a/tests/unit/vector/VectorHandlerTests.hpp +++ b/tests/unit/vector/VectorHandlerTests.hpp @@ -39,16 +39,22 @@ namespace ReSolve { { TestStatus status; + ReSolve::memory::MemorySpace ms; + if (memspace_ == "cpu") + ms = memory::HOST; + else + ms = memory::DEVICE; + ReSolve::VectorHandler* handler = createVectorHandler(); vector::Vector* x = new vector::Vector(N); vector::Vector* y = new vector::Vector(N); - x->allocate(memspace_); - y->allocate(memspace_); + x->allocate(ms); + y->allocate(ms); - x->setToConst(3.0, memspace_); - y->setToConst(1.0, memspace_); + x->setToConst(3.0, ms); + y->setToConst(1.0, ms); real_type alpha = 0.5; //the result is a vector with y[i] = 2.5; @@ -66,16 +72,22 @@ namespace ReSolve { { TestStatus status; + ReSolve::memory::MemorySpace ms; + if (memspace_ == "cpu") + ms = memory::HOST; + else + ms = memory::DEVICE; + ReSolve::VectorHandler* handler = createVectorHandler(); vector::Vector* x = new vector::Vector(N); vector::Vector* y = new vector::Vector(N); - x->allocate(memspace_); - y->allocate(memspace_); + x->allocate(ms); + y->allocate(ms); - x->setToConst(0.25, memspace_); - y->setToConst(4.0, memspace_); + x->setToConst(0.25, ms); + y->setToConst(4.0, ms); real_type ans; //the result is N ans = handler->dot(x, y, memspace_); @@ -98,13 +110,19 @@ namespace ReSolve { { TestStatus status; + ReSolve::memory::MemorySpace ms; + if (memspace_ == "cpu") + ms = memory::HOST; + else + ms = memory::DEVICE; + ReSolve::VectorHandler* handler = createVectorHandler(); vector::Vector* x = new vector::Vector(N); - x->allocate(memspace_); + x->allocate(ms); - x->setToConst(1.25, memspace_); + x->setToConst(1.25, ms); real_type alpha = 3.5; @@ -122,17 +140,23 @@ namespace ReSolve { { TestStatus status; + ReSolve::memory::MemorySpace ms; + if (memspace_ == "cpu") + ms = memory::HOST; + else + ms = memory::DEVICE; + ReSolve::VectorHandler* handler = createVectorHandler(); vector::Vector* x = new vector::Vector(N, K); vector::Vector* y = new vector::Vector(N); vector::Vector* alpha = new vector::Vector(K);; - x->allocate(memspace_); - y->allocate(memspace_); - alpha->allocate(memspace_); + x->allocate(ms); + y->allocate(ms); + alpha->allocate(ms); - y->setToConst(2.0, memspace_); - alpha->setToConst(-1.0, memspace_); + y->setToConst(2.0, ms); + alpha->setToConst(-1.0, ms); for (int ii = 0; ii < K; ++ii) { real_type c; if (ii % 2 == 0) { @@ -140,7 +164,7 @@ namespace ReSolve { } else { c = 0.5; } - x->setToConst(ii, c, memspace_); + x->setToConst(ii, c, ms); } index_type r = K % 2; @@ -161,17 +185,23 @@ namespace ReSolve { { TestStatus status; + ReSolve::memory::MemorySpace ms; + if (memspace_ == "cpu") + ms = memory::HOST; + else + ms = memory::DEVICE; + ReSolve::VectorHandler* handler = createVectorHandler(); vector::Vector* x = new vector::Vector(N, K); vector::Vector* y = new vector::Vector(N, 2); vector::Vector* res = new vector::Vector(K, 2); - x->allocate(memspace_); - y->allocate(memspace_); - res->allocate(memspace_); + x->allocate(ms); + y->allocate(ms); + res->allocate(ms); - x->setToConst(1.0, memspace_); - y->setToConst(-1.0, memspace_); + x->setToConst(1.0, ms); + y->setToConst(-1.0, ms); handler->massDot2Vec(N, x, K, y, res, memspace_); status *= verifyAnswer(res, (-1.0) * (real_type) N, memspace_); @@ -186,6 +216,13 @@ namespace ReSolve { TestOutcome gemv(index_type N, index_type K) { TestStatus status; + + ReSolve::memory::MemorySpace ms; + if (memspace_ == "cpu") + ms = memory::HOST; + else + ms = memory::DEVICE; + ReSolve::VectorHandler* handler = createVectorHandler(); vector::Vector* V = new vector::Vector(N, K); // for the test with NO TRANSPOSE @@ -195,17 +232,17 @@ namespace ReSolve { vector::Vector* yT = new vector::Vector(N); vector::Vector* xT = new vector::Vector(K); - V->allocate(memspace_); - yN->allocate(memspace_); - xN->allocate(memspace_); - yT->allocate(memspace_); - xT->allocate(memspace_); - - V->setToConst(1.0, memspace_); - yN->setToConst(-1.0, memspace_); - xN->setToConst(.5, memspace_); - yT->setToConst(-1.0, memspace_); - xT->setToConst(.5, memspace_); + V->allocate(ms); + yN->allocate(ms); + xN->allocate(ms); + yT->allocate(ms); + xT->allocate(ms); + + V->setToConst(1.0, ms); + yN->setToConst(-1.0, ms); + xN->setToConst(.5, ms); + yT->setToConst(-1.0, ms); + xT->setToConst(.5, ms); real_type alpha = -1.0; real_type beta = 1.0; @@ -248,15 +285,15 @@ namespace ReSolve { { bool status = true; if (memspace != "cpu") { - x->copyData(memspace, "cpu"); + x->copyData(memory::DEVICE, memory::HOST); } for (index_type i = 0; i < x->getSize(); ++i) { // std::cout << x->getData("cpu")[i] << "\n"; - if (!isEqual(x->getData("cpu")[i], answer)) { + if (!isEqual(x->getData(memory::HOST)[i], answer)) { std::cout << std::setprecision(16); status = false; - std::cout << "Solution vector element x[" << i << "] = " << x->getData("cpu")[i] + std::cout << "Solution vector element x[" << i << "] = " << x->getData(memory::HOST)[i] << ", expected: " << answer << "\n"; break; }