diff --git a/resolve/SystemSolver.cpp b/resolve/SystemSolver.cpp index e57eea10..6adbd923 100644 --- a/resolve/SystemSolver.cpp +++ b/resolve/SystemSolver.cpp @@ -391,7 +391,9 @@ namespace ReSolve } if (refactorizationMethod_ == "cusolverrf") { matrix::Csc* L_csc = dynamic_cast(L_); - matrix::Csc* U_csc = dynamic_cast(U_); + matrix::Csc* U_csc = dynamic_cast(U_); + L_csc->syncData(memory::DEVICE); + U_csc->syncData(memory::DEVICE); matrix::Csr* L_csr = new matrix::Csr(L_csc->getNumRows(), L_csc->getNumColumns(), L_csc->getNnz()); matrix::Csr* U_csr = new matrix::Csr(U_csc->getNumRows(), U_csc->getNumColumns(), U_csc->getNnz()); matrixHandler_->csc2csr(L_csc, L_csr, memory::DEVICE); diff --git a/resolve/matrix/Coo.cpp b/resolve/matrix/Coo.cpp index fe6dfc19..cb3316a9 100644 --- a/resolve/matrix/Coo.cpp +++ b/resolve/matrix/Coo.cpp @@ -1,6 +1,7 @@ #include // <-- includes memcpy #include -#include +#include +#include #include #include "Coo.hpp" @@ -132,7 +133,7 @@ namespace ReSolve index_type* matrix::Coo::getRowData(memory::MemorySpace memspace) { using namespace ReSolve::memory; - syncData(memspace); + switch (memspace) { case HOST: return this->h_row_data_; @@ -146,7 +147,7 @@ namespace ReSolve index_type* matrix::Coo::getColData(memory::MemorySpace memspace) { using namespace ReSolve::memory; - syncData(memspace); + switch (memspace) { case HOST: return this->h_col_data_; @@ -160,7 +161,7 @@ namespace ReSolve real_type* matrix::Coo::getValues(memory::MemorySpace memspace) { using namespace ReSolve::memory; - syncData(memspace); + switch (memspace) { case HOST: return this->h_val_data_; @@ -302,44 +303,56 @@ namespace ReSolve switch (memspace) { case HOST: - if ((d_data_updated_ == true) && (h_data_updated_ == false)) { - if ((h_row_data_ == nullptr) != (h_col_data_ == nullptr)) { - out::error() << "In Coo::syncData one of host row or column data is null!\n"; - } - if ((h_row_data_ == nullptr) && (h_col_data_ == nullptr)) { - h_row_data_ = new index_type[nnz_]; - h_col_data_ = new index_type[nnz_]; - owns_cpu_data_ = true; - } - if (h_val_data_ == nullptr) { - h_val_data_ = new real_type[nnz_]; - owns_cpu_vals_ = true; - } - mem_.copyArrayDeviceToHost(h_row_data_, d_row_data_, nnz_); - mem_.copyArrayDeviceToHost(h_col_data_, d_col_data_, nnz_); - mem_.copyArrayDeviceToHost(h_val_data_, d_val_data_, nnz_); - h_data_updated_ = true; + if (h_data_updated_) { + out::misc() << "In Coo::syncData trying to sync host, but host already up to date!\n"; + return 0; + } + if (!d_data_updated_) { + out::error() << "In Coo::syncData trying to sync host with device, but device is out of date!\n"; + assert(d_data_updated_); + } + if ((h_row_data_ == nullptr) != (h_col_data_ == nullptr)) { + out::error() << "In Coo::syncData one of host row or column data is null!\n"; + } + if ((h_row_data_ == nullptr) && (h_col_data_ == nullptr)) { + h_row_data_ = new index_type[nnz_]; + h_col_data_ = new index_type[nnz_]; + owns_cpu_data_ = true; + } + if (h_val_data_ == nullptr) { + h_val_data_ = new real_type[nnz_]; + owns_cpu_vals_ = true; } + mem_.copyArrayDeviceToHost(h_row_data_, d_row_data_, nnz_); + mem_.copyArrayDeviceToHost(h_col_data_, d_col_data_, nnz_); + mem_.copyArrayDeviceToHost(h_val_data_, d_val_data_, nnz_); + h_data_updated_ = true; return 0; case DEVICE: - if ((d_data_updated_ == false) && (h_data_updated_ == true)) { - if ((d_row_data_ == nullptr) != (d_col_data_ == nullptr)) { - out::error() << "In Coo::syncData one of device row or column data is null!\n"; - } - if ((d_row_data_ == nullptr) && (d_col_data_ == nullptr)) { - mem_.allocateArrayOnDevice(&d_row_data_, nnz_); - mem_.allocateArrayOnDevice(&d_col_data_, nnz_); - owns_gpu_data_ = true; - } - if (d_val_data_ == nullptr) { - mem_.allocateArrayOnDevice(&d_val_data_, nnz_); - owns_gpu_vals_ = true; - } - mem_.copyArrayHostToDevice(d_row_data_, h_row_data_, nnz_); - mem_.copyArrayHostToDevice(d_col_data_, h_col_data_, nnz_); - mem_.copyArrayHostToDevice(d_val_data_, h_val_data_, nnz_); - d_data_updated_ = true; + if (d_data_updated_) { + out::misc() << "In Coo::syncData trying to sync device, but device already up to date!\n"; + return 0; + } + if (!h_data_updated_) { + out::error() << "In Coo::syncData trying to sync device with host, but host is out of date!\n"; + assert(h_data_updated_); + } + if ((d_row_data_ == nullptr) != (d_col_data_ == nullptr)) { + out::error() << "In Coo::syncData one of device row or column data is null!\n"; } + if ((d_row_data_ == nullptr) && (d_col_data_ == nullptr)) { + mem_.allocateArrayOnDevice(&d_row_data_, nnz_); + mem_.allocateArrayOnDevice(&d_col_data_, nnz_); + owns_gpu_data_ = true; + } + if (d_val_data_ == nullptr) { + mem_.allocateArrayOnDevice(&d_val_data_, nnz_); + owns_gpu_vals_ = true; + } + mem_.copyArrayHostToDevice(d_row_data_, h_row_data_, nnz_); + mem_.copyArrayHostToDevice(d_col_data_, h_col_data_, nnz_); + mem_.copyArrayHostToDevice(d_val_data_, h_val_data_, nnz_); + d_data_updated_ = true; return 0; default: return 1; diff --git a/resolve/matrix/Csc.cpp b/resolve/matrix/Csc.cpp index 03601be2..899f37a3 100644 --- a/resolve/matrix/Csc.cpp +++ b/resolve/matrix/Csc.cpp @@ -1,5 +1,6 @@ #include // <-- includes memcpy #include +#include #include #include "Csc.hpp" @@ -33,7 +34,7 @@ namespace ReSolve index_type* matrix::Csc::getRowData(memory::MemorySpace memspace) { using namespace ReSolve::memory; - syncData(memspace); + switch (memspace) { case HOST: return this->h_row_data_; @@ -47,7 +48,7 @@ namespace ReSolve index_type* matrix::Csc::getColData(memory::MemorySpace memspace) { using namespace ReSolve::memory; - syncData(memspace); + switch (memspace) { case HOST: return this->h_col_data_; @@ -61,7 +62,7 @@ namespace ReSolve real_type* matrix::Csc::getValues(memory::MemorySpace memspace) { using namespace ReSolve::memory; - syncData(memspace); + switch (memspace) { case HOST: return this->h_val_data_; @@ -209,44 +210,56 @@ namespace ReSolve switch(memspace) { case HOST: - if ((d_data_updated_ == true) && (h_data_updated_ == false)) { - if ((h_row_data_ == nullptr) != (h_col_data_ == nullptr)) { - out::error() << "In Csc::syncData one of host row or column data is null!\n"; - } - if ((h_col_data_ == nullptr) && (h_row_data_ == nullptr)) { - h_col_data_ = new index_type[m_ + 1]; - h_row_data_ = new index_type[nnz_]; - owns_cpu_data_ = true; - } - if (h_val_data_ == nullptr) { - h_val_data_ = new real_type[nnz_]; - owns_cpu_vals_ = true; - } - mem_.copyArrayDeviceToHost(h_col_data_, d_col_data_, m_ + 1); - mem_.copyArrayDeviceToHost(h_row_data_, d_row_data_, nnz_); - mem_.copyArrayDeviceToHost(h_val_data_, d_val_data_, nnz_); - h_data_updated_ = true; + if (h_data_updated_) { + out::misc() << "In Csc::syncData trying to sync host, but host already up to date!\n"; + return 0; + } + if (!d_data_updated_) { + out::error() << "In Csc::syncData trying to sync host with device, but device is out of date!\n"; + assert(d_data_updated_); + } + if ((h_row_data_ == nullptr) != (h_col_data_ == nullptr)) { + out::error() << "In Csc::syncData one of host row or column data is null!\n"; + } + if ((h_col_data_ == nullptr) && (h_row_data_ == nullptr)) { + h_col_data_ = new index_type[m_ + 1]; + h_row_data_ = new index_type[nnz_]; + owns_cpu_data_ = true; + } + if (h_val_data_ == nullptr) { + h_val_data_ = new real_type[nnz_]; + owns_cpu_vals_ = true; } + mem_.copyArrayDeviceToHost(h_col_data_, d_col_data_, m_ + 1); + mem_.copyArrayDeviceToHost(h_row_data_, d_row_data_, nnz_); + mem_.copyArrayDeviceToHost(h_val_data_, d_val_data_, nnz_); + h_data_updated_ = true; return 0; case DEVICE: - if ((d_data_updated_ == false) && (h_data_updated_ == true)) { - if ((d_row_data_ == nullptr) != (d_col_data_ == nullptr)) { - out::error() << "In Csc::syncData one of device row or column data is null!\n"; - } - if ((d_col_data_ == nullptr) && (d_row_data_ == nullptr)) { - mem_.allocateArrayOnDevice(&d_col_data_, m_ + 1); - mem_.allocateArrayOnDevice(&d_row_data_, nnz_); - owns_gpu_data_ = true; - } - if (d_val_data_ == nullptr) { - mem_.allocateArrayOnDevice(&d_val_data_, nnz_); - owns_gpu_vals_ = true; - } - mem_.copyArrayHostToDevice(d_col_data_, h_col_data_, m_ + 1); - mem_.copyArrayHostToDevice(d_row_data_, h_row_data_, nnz_); - mem_.copyArrayHostToDevice(d_val_data_, h_val_data_, nnz_); - d_data_updated_ = true; + if (d_data_updated_) { + out::misc() << "In Csc::syncData trying to sync device, but device already up to date!\n"; + return 0; + } + if (!h_data_updated_) { + out::error() << "In Csc::syncData trying to sync device with host, but host is out of date!\n"; + assert(h_data_updated_); + } + if ((d_row_data_ == nullptr) != (d_col_data_ == nullptr)) { + out::error() << "In Csc::syncData one of device row or column data is null!\n"; } + if ((d_col_data_ == nullptr) && (d_row_data_ == nullptr)) { + mem_.allocateArrayOnDevice(&d_col_data_, m_ + 1); + mem_.allocateArrayOnDevice(&d_row_data_, nnz_); + owns_gpu_data_ = true; + } + if (d_val_data_ == nullptr) { + mem_.allocateArrayOnDevice(&d_val_data_, nnz_); + owns_gpu_vals_ = true; + } + mem_.copyArrayHostToDevice(d_col_data_, h_col_data_, m_ + 1); + mem_.copyArrayHostToDevice(d_row_data_, h_row_data_, nnz_); + mem_.copyArrayHostToDevice(d_val_data_, h_val_data_, nnz_); + d_data_updated_ = true; return 0; default: return 1; diff --git a/resolve/matrix/Csr.cpp b/resolve/matrix/Csr.cpp index 63ce1f8e..1e4dacb1 100644 --- a/resolve/matrix/Csr.cpp +++ b/resolve/matrix/Csr.cpp @@ -151,7 +151,7 @@ namespace ReSolve index_type* matrix::Csr::getRowData(memory::MemorySpace memspace) { using namespace ReSolve::memory; - syncData(memspace); + switch (memspace) { case HOST: return this->h_row_data_; @@ -165,7 +165,7 @@ namespace ReSolve index_type* matrix::Csr::getColData(memory::MemorySpace memspace) { using namespace ReSolve::memory; - syncData(memspace); + switch (memspace) { case HOST: return this->h_col_data_; @@ -179,7 +179,7 @@ namespace ReSolve real_type* matrix::Csr::getValues(memory::MemorySpace memspace) { using namespace ReSolve::memory; - syncData(memspace); + switch (memspace) { case HOST: return this->h_val_data_; @@ -319,47 +319,59 @@ namespace ReSolve switch (memspace) { case HOST: //check if we need to copy or not - if ((d_data_updated_ == true) && (h_data_updated_ == false)) { - if ((h_row_data_ == nullptr) != (h_col_data_ == nullptr)) { - out::error() << "In Csr::syncData one of host row or column data is null!\n"; - } - if ((h_row_data_ == nullptr) && (h_col_data_ == nullptr)) { - h_row_data_ = new index_type[n_ + 1]; - h_col_data_ = new index_type[nnz_]; - owns_cpu_data_ = true; - } - if (h_val_data_ == nullptr) { - h_val_data_ = new real_type[nnz_]; - owns_cpu_vals_ = true; - } - mem_.copyArrayDeviceToHost(h_row_data_, d_row_data_, n_ + 1); - mem_.copyArrayDeviceToHost(h_col_data_, d_col_data_, nnz_); - mem_.copyArrayDeviceToHost(h_val_data_, d_val_data_, nnz_); - h_data_updated_ = true; + if (h_data_updated_) { + out::misc() << "In Csr::syncData trying to sync host, but host already up to date!\n"; + return 0; + } + if (!d_data_updated_) { + out::error() << "In Csr::syncData trying to sync host with device, but device is out of date!\n"; + assert(d_data_updated_); + } + if ((h_row_data_ == nullptr) != (h_col_data_ == nullptr)) { + out::error() << "In Csr::syncData one of host row or column data is null!\n"; + } + if ((h_row_data_ == nullptr) && (h_col_data_ == nullptr)) { + h_row_data_ = new index_type[n_ + 1]; + h_col_data_ = new index_type[nnz_]; + owns_cpu_data_ = true; + } + if (h_val_data_ == nullptr) { + h_val_data_ = new real_type[nnz_]; + owns_cpu_vals_ = true; } + mem_.copyArrayDeviceToHost(h_row_data_, d_row_data_, n_ + 1); + mem_.copyArrayDeviceToHost(h_col_data_, d_col_data_, nnz_); + mem_.copyArrayDeviceToHost(h_val_data_, d_val_data_, nnz_); + h_data_updated_ = true; return 0; case DEVICE: - if ((d_data_updated_ == false) && (h_data_updated_ == true)) { - if ((d_row_data_ == nullptr) != (d_col_data_ == nullptr)) { - out::error() << "In Csr::syncData one of device row or column data is null!\n"; - } - if ((d_row_data_ == nullptr) && (d_col_data_ == nullptr)) { - mem_.allocateArrayOnDevice(&d_row_data_, n_ + 1); - mem_.allocateArrayOnDevice(&d_col_data_, nnz_); - owns_gpu_data_ = true; - } - if (d_val_data_ == nullptr) { - mem_.allocateArrayOnDevice(&d_val_data_, nnz_); - owns_gpu_vals_ = true; - } - mem_.copyArrayHostToDevice(d_row_data_, h_row_data_, n_ + 1); - mem_.copyArrayHostToDevice(d_col_data_, h_col_data_, nnz_); - mem_.copyArrayHostToDevice(d_val_data_, h_val_data_, nnz_); - d_data_updated_ = true; + if (d_data_updated_) { + out::misc() << "In Csr::syncData trying to sync device, but device already up to date!\n"; + return 0; + } + if (!h_data_updated_) { + out::error() << "In Csr::syncData trying to sync device with host, but host is out of date!\n"; + assert(h_data_updated_); + } + if ((d_row_data_ == nullptr) != (d_col_data_ == nullptr)) { + out::error() << "In Csr::syncData one of device row or column data is null!\n"; + } + if ((d_row_data_ == nullptr) && (d_col_data_ == nullptr)) { + mem_.allocateArrayOnDevice(&d_row_data_, n_ + 1); + mem_.allocateArrayOnDevice(&d_col_data_, nnz_); + owns_gpu_data_ = true; } + if (d_val_data_ == nullptr) { + mem_.allocateArrayOnDevice(&d_val_data_, nnz_); + owns_gpu_vals_ = true; + } + mem_.copyArrayHostToDevice(d_row_data_, h_row_data_, n_ + 1); + mem_.copyArrayHostToDevice(d_col_data_, h_col_data_, nnz_); + mem_.copyArrayHostToDevice(d_val_data_, h_val_data_, nnz_); + d_data_updated_ = true; return 0; default: - return -1; + return 1; } // switch } diff --git a/tests/functionality/testKLU_GLU.cpp b/tests/functionality/testKLU_GLU.cpp index 53f0870b..9cf993d7 100644 --- a/tests/functionality/testKLU_GLU.cpp +++ b/tests/functionality/testKLU_GLU.cpp @@ -57,6 +57,7 @@ int main(int argc, char *argv[]) return -1; } ReSolve::matrix::Csr* A = ReSolve::io::createCsrFromFile(mat1); + A->syncData(ReSolve::memory::DEVICE); mat1.close(); // Read first rhs vector @@ -165,6 +166,7 @@ int main(int argc, char *argv[]) return -1; } ReSolve::io::updateMatrixFromFile(mat2, A); + A->syncData(ReSolve::memory::DEVICE); mat2.close(); // Load the second rhs vector diff --git a/tests/functionality/testKLU_Rf.cpp b/tests/functionality/testKLU_Rf.cpp index 60cd4afa..f9975460 100644 --- a/tests/functionality/testKLU_Rf.cpp +++ b/tests/functionality/testKLU_Rf.cpp @@ -57,6 +57,7 @@ int main(int argc, char *argv[]) return -1; } ReSolve::matrix::Csr* A = ReSolve::io::createCsrFromFile(mat1); + A->syncData(ReSolve::memory::DEVICE); mat1.close(); // Read first rhs vector @@ -145,6 +146,8 @@ int main(int argc, char *argv[]) ReSolve::matrix::Csc* L_csc = (ReSolve::matrix::Csc*) KLU->getLFactor(); ReSolve::matrix::Csc* U_csc = (ReSolve::matrix::Csc*) KLU->getUFactor(); + L_csc->syncData(ReSolve::memory::DEVICE); + U_csc->syncData(ReSolve::memory::DEVICE); ReSolve::matrix::Csr* L = new ReSolve::matrix::Csr(L_csc->getNumRows(), L_csc->getNumColumns(), L_csc->getNnz()); ReSolve::matrix::Csr* U = new ReSolve::matrix::Csr(U_csc->getNumRows(), U_csc->getNumColumns(), U_csc->getNnz()); error_sum += matrix_handler->csc2csr(L_csc,L, ReSolve::memory::DEVICE); @@ -164,6 +167,7 @@ int main(int argc, char *argv[]) return -1; } ReSolve::io::updateMatrixFromFile(mat2, A); + A->syncData(ReSolve::memory::DEVICE); mat2.close(); // Load the second rhs vector diff --git a/tests/functionality/testKLU_Rf_FGMRES.cpp b/tests/functionality/testKLU_Rf_FGMRES.cpp index 46542d42..8587d910 100644 --- a/tests/functionality/testKLU_Rf_FGMRES.cpp +++ b/tests/functionality/testKLU_Rf_FGMRES.cpp @@ -77,6 +77,7 @@ int main(int argc, char *argv[]) return -1; } ReSolve::matrix::Csr* A = ReSolve::io::createCsrFromFile(mat1); + A->syncData(ReSolve::memory::DEVICE); mat1.close(); // Read first rhs vector @@ -171,6 +172,8 @@ int main(int argc, char *argv[]) ReSolve::matrix::Csc* U_csc = (ReSolve::matrix::Csc*) KLU->getUFactor(); ReSolve::matrix::Csr* L = new ReSolve::matrix::Csr(L_csc->getNumRows(), L_csc->getNumColumns(), L_csc->getNnz()); ReSolve::matrix::Csr* U = new ReSolve::matrix::Csr(U_csc->getNumRows(), U_csc->getNumColumns(), U_csc->getNnz()); + L_csc->syncData(ReSolve::memory::DEVICE); + U_csc->syncData(ReSolve::memory::DEVICE); error_sum += matrix_handler->csc2csr(L_csc,L, ReSolve::memory::DEVICE); error_sum += matrix_handler->csc2csr(U_csc,U, ReSolve::memory::DEVICE); @@ -198,6 +201,7 @@ int main(int argc, char *argv[]) return -1; } ReSolve::io::updateMatrixFromFile(mat2, A); + A->syncData(ReSolve::memory::DEVICE); mat2.close(); // Load the second rhs vector diff --git a/tests/functionality/testKLU_RocSolver.cpp b/tests/functionality/testKLU_RocSolver.cpp index 97759667..0ceba109 100644 --- a/tests/functionality/testKLU_RocSolver.cpp +++ b/tests/functionality/testKLU_RocSolver.cpp @@ -61,6 +61,7 @@ int main(int argc, char *argv[]) return -1; } ReSolve::matrix::Csr* A = ReSolve::io::createCsrFromFile(mat1); + A->syncData(ReSolve::memory::DEVICE); mat1.close(); // Read first rhs vector @@ -173,6 +174,7 @@ int main(int argc, char *argv[]) return -1; } ReSolve::io::updateMatrixFromFile(mat2, A); + A->syncData(ReSolve::memory::DEVICE); mat2.close(); // Load the second rhs vector diff --git a/tests/functionality/testKLU_RocSolver_FGMRES.cpp b/tests/functionality/testKLU_RocSolver_FGMRES.cpp index 4d27a84b..c388140d 100644 --- a/tests/functionality/testKLU_RocSolver_FGMRES.cpp +++ b/tests/functionality/testKLU_RocSolver_FGMRES.cpp @@ -77,6 +77,7 @@ int main(int argc, char *argv[]) return -1; } ReSolve::matrix::Csr* A = ReSolve::io::createCsrFromFile(mat1); + A->syncData(ReSolve::memory::DEVICE); mat1.close(); // Read first rhs vector @@ -193,6 +194,7 @@ int main(int argc, char *argv[]) return -1; } ReSolve::io::updateMatrixFromFile(mat2, A); + A->syncData(ReSolve::memory::DEVICE); mat2.close(); // Load the second rhs vector diff --git a/tests/functionality/testSysGLU.cpp b/tests/functionality/testSysGLU.cpp index cf2aba15..c903229f 100644 --- a/tests/functionality/testSysGLU.cpp +++ b/tests/functionality/testSysGLU.cpp @@ -66,6 +66,7 @@ int main(int argc, char *argv[]) return -1; } ReSolve::matrix::Csr* A = ReSolve::io::createCsrFromFile(mat1); + A->syncData(ReSolve::memory::DEVICE); mat1.close(); // Read first rhs vector @@ -202,6 +203,7 @@ int main(int argc, char *argv[]) return -1; } ReSolve::io::updateMatrixFromFile(mat2, A); + A->syncData(ReSolve::memory::DEVICE); mat2.close(); // Load the second rhs vector diff --git a/tests/functionality/testSysRefactor.cpp b/tests/functionality/testSysRefactor.cpp index 74b039ce..1ae563d7 100644 --- a/tests/functionality/testSysRefactor.cpp +++ b/tests/functionality/testSysRefactor.cpp @@ -92,6 +92,7 @@ int main(int argc, char *argv[]) return -1; } ReSolve::matrix::Csr* A = ReSolve::io::createCsrFromFile(mat1, true); + A->syncData(ReSolve::memory::DEVICE); mat1.close(); // Read first rhs vector @@ -224,6 +225,7 @@ int main(int argc, char *argv[]) return -1; } ReSolve::io::updateMatrixFromFile(mat2, A); + A->syncData(ReSolve::memory::DEVICE); mat2.close(); // Load the second rhs vector