From 0b9eb34b0a18702e6a6f38162f376e98a429b213 Mon Sep 17 00:00:00 2001 From: Kasia Swirydowicz Date: Wed, 31 Jul 2024 12:52:28 -0500 Subject: [PATCH] timings for GLU, adding guards for timings for cusolverrf --- examples/r_KLU_GLU.cpp | 42 +++++++++++++++++++++++++++++++++++- examples/r_KLU_rf_FGMRES.cpp | 25 ++++++++++++--------- 2 files changed, 56 insertions(+), 11 deletions(-) diff --git a/examples/r_KLU_GLU.cpp b/examples/r_KLU_GLU.cpp index a050e324..376e7bea 100644 --- a/examples/r_KLU_GLU.cpp +++ b/examples/r_KLU_GLU.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include @@ -52,8 +53,20 @@ int main(int argc, char *argv[]) ReSolve::LinSolverDirectKLU* KLU = new ReSolve::LinSolverDirectKLU; ReSolve::LinSolverDirectCuSolverGLU* GLU = new ReSolve::LinSolverDirectCuSolverGLU(workspace_CUDA); + struct timeval t1; + struct timeval t2; + + double time_io = 0.0; + double time_convert = 0.0; + double time_factorize = 0.0; + double time_solve = 0.0; for (int i = 0; i < numSystems; ++i) { + time_io = 0.0; + time_convert = 0.0; + time_factorize = 0.0; + time_solve = 0.0; + index_type j = 4 + i * 2; fileId = argv[j]; rhsId = argv[j + 1]; @@ -82,6 +95,8 @@ int main(int argc, char *argv[]) std::cout << "Failed to open file " << rhsFileNameFull << "\n"; return -1; } + // Time system I/O + gettimeofday(&t1, 0); if (i == 0) { A_coo = ReSolve::io::readMatrixFromFile(mat_file); A = new ReSolve::matrix::Csr(A_coo->getNumRows(), @@ -101,11 +116,15 @@ int main(int argc, char *argv[]) ReSolve::io::readAndUpdateMatrix(mat_file, A_coo); ReSolve::io::readAndUpdateRhs(rhs_file, &rhs); } + gettimeofday(&t2, 0); + time_io += (1000000.0 * (t2.tv_sec - t1.tv_sec) + t2.tv_usec - t1.tv_usec) / 1000.0; std::cout<<"Finished reading the matrix and rhs, size: "<getNumRows()<<" x "<getNumColumns()<< ", nnz: "<< A->getNnz()<< ", symmetric? "<symmetric()<< ", Expanded? "<expanded()<updateFromCoo(A_coo, ReSolve::memory::HOST); vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); @@ -114,28 +133,44 @@ int main(int argc, char *argv[]) A->updateFromCoo(A_coo, ReSolve::memory::DEVICE); vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); } + gettimeofday(&t2, 0); + time_convert += (1000000.0 * (t2.tv_sec - t1.tv_sec) + t2.tv_usec - t1.tv_usec) / 1000.0; std::cout<<"COO to CSR completed. Expanded NNZ: "<< A->getNnzExpanded()<setup(A); status = KLU->analyze(); std::cout<<"KLU analysis status: "<factorize(); - std::cout<<"KLU factorization status: "<getLFactor(); matrix_type* U = KLU->getUFactor(); if (L == nullptr) {printf("ERROR");} index_type* P = KLU->getPOrdering(); index_type* Q = KLU->getQOrdering(); GLU->setup(A, L, U, P, Q); + time_factorize += (1000000.0 * (t2.tv_sec - t1.tv_sec) + t2.tv_usec - t1.tv_usec) / 1000.0; + std::cout<<"KLU factorization status: "<solve(vec_rhs, vec_x); + gettimeofday(&t2, 0); + time_solve += (1000000.0 * (t2.tv_sec - t1.tv_sec) + t2.tv_usec - t1.tv_usec) / 1000.0; std::cout<<"GLU solve status: "<refactorize(); + gettimeofday(&t2, 0); + time_factorize += (1000000.0 * (t2.tv_sec - t1.tv_sec) + t2.tv_usec - t1.tv_usec) / 1000.0; std::cout<<"CUSOLVER GLU refactorization status: "<solve(vec_rhs, vec_x); + gettimeofday(&t2, 0); + time_solve += (1000000.0 * (t2.tv_sec - t1.tv_sec) + t2.tv_usec - t1.tv_usec) / 1000.0; std::cout<<"CUSOLVER GLU solve status: "<dot(vec_r, vec_r, ReSolve::memory::DEVICE))/bnorm << "\n"; + // Print timing summary + std::cout << std::defaultfloat << std::setprecision(4) + << "I/O time: " << time_io << ", conversion time: " << time_convert + << ", factorization time: " << time_factorize << ", solve time: " << time_solve + << "\nTOTAL: " << time_factorize + time_solve << "\n"; } // for (int i = 0; i < numSystems; ++i) //now DELETE diff --git a/examples/r_KLU_rf_FGMRES.cpp b/examples/r_KLU_rf_FGMRES.cpp index 99f57520..d8fe1f72 100644 --- a/examples/r_KLU_rf_FGMRES.cpp +++ b/examples/r_KLU_rf_FGMRES.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include @@ -67,6 +68,7 @@ int main(int argc, char *argv[]) double time_convert = 0.0; double time_factorize = 0.0; double time_solve = 0.0; + double time_ir = 0.0; index_type j = 4 + i * 2; fileId = argv[j]; @@ -222,7 +224,7 @@ int main(int argc, char *argv[]) FGMRES->resetMatrix(A); FGMRES->setupPreconditioner("LU", Rf); gettimeofday(&t2, 0); - time_solve += (1000000.0 * (t2.tv_sec - t1.tv_sec) + t2.tv_usec - t1.tv_usec) / 1000.0; + time_ir += (1000000.0 * (t2.tv_sec - t1.tv_sec) + t2.tv_usec - t1.tv_usec) / 1000.0; matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", ReSolve::memory::DEVICE); @@ -241,24 +243,27 @@ int main(int argc, char *argv[]) matrix_handler->matrixInfNorm(A, &norm_A, ReSolve::memory::DEVICE); vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - if(!std::isnan(norm_r) && !std::isinf(norm_r)) { + if(!std::isnan(norm_r) && !std::isinf(norm_r) && !std::isnan(norm_x) && !isinf(norm_x)) { gettimeofday(&t1, 0); FGMRES->solve(vec_rhs, vec_x); gettimeofday(&t2, 0); - time_solve += (1000000.0 * (t2.tv_sec - t1.tv_sec) + t2.tv_usec - t1.tv_usec) / 1000.0; - + time_ir += (1000000.0 * (t2.tv_sec - t1.tv_sec) + t2.tv_usec - t1.tv_usec) / 1000.0; std::cout << "FGMRES: init nrm: " - << std::scientific << std::setprecision(16) - << FGMRES->getInitResidualNorm()/norm_b - << " final nrm: " - << FGMRES->getFinalResidualNorm()/norm_b - << " iter: " << FGMRES->getNumIter() << "\n"; + << std::scientific << std::setprecision(16) + << FGMRES->getInitResidualNorm()/norm_b + << " final nrm: " + << FGMRES->getFinalResidualNorm()/norm_b + << " iter: " << FGMRES->getNumIter() << "\n"; + } else { + std::cout << "This is a bad system, IR is not performed (inf or nan in x or r) " << std::endl; } } + // Print timing summary std::cout << std::defaultfloat << std::setprecision(4) << "I/O time: " << time_io << ", conversion time: " << time_convert << ", factorization time: " << time_factorize << ", solve time: " << time_solve - << "\nTOTAL: " << time_factorize + time_solve << "\n"; + << ", IR time: " << time_ir + << "\nTOTAL: " << time_factorize + time_solve + time_ir << "\n"; } // for (int i = 0; i < numSystems; ++i) delete A;