Skip to content

Commit

Permalink
timings for GLU, adding guards for timings for cusolverrf
Browse files Browse the repository at this point in the history
  • Loading branch information
Kasia Swirydowicz authored and Kasia Swirydowicz committed Jul 31, 2024
1 parent 578be22 commit 0b9eb34
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 11 deletions.
42 changes: 41 additions & 1 deletion examples/r_KLU_GLU.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include <string>
#include <iostream>
#include <iomanip>
#include <sys/time.h>

#include <resolve/matrix/Coo.hpp>
#include <resolve/matrix/Csr.hpp>
Expand Down Expand Up @@ -52,8 +53,20 @@ int main(int argc, char *argv[])
ReSolve::LinSolverDirectKLU* KLU = new ReSolve::LinSolverDirectKLU;
ReSolve::LinSolverDirectCuSolverGLU* GLU = new ReSolve::LinSolverDirectCuSolverGLU(workspace_CUDA);

struct timeval t1;
struct timeval t2;

double time_io = 0.0;
double time_convert = 0.0;
double time_factorize = 0.0;
double time_solve = 0.0;
for (int i = 0; i < numSystems; ++i)
{
time_io = 0.0;
time_convert = 0.0;
time_factorize = 0.0;
time_solve = 0.0;

index_type j = 4 + i * 2;
fileId = argv[j];
rhsId = argv[j + 1];
Expand Down Expand Up @@ -82,6 +95,8 @@ int main(int argc, char *argv[])
std::cout << "Failed to open file " << rhsFileNameFull << "\n";
return -1;
}
// Time system I/O
gettimeofday(&t1, 0);
if (i == 0) {
A_coo = ReSolve::io::readMatrixFromFile(mat_file);
A = new ReSolve::matrix::Csr(A_coo->getNumRows(),
Expand All @@ -101,11 +116,15 @@ int main(int argc, char *argv[])
ReSolve::io::readAndUpdateMatrix(mat_file, A_coo);
ReSolve::io::readAndUpdateRhs(rhs_file, &rhs);
}
gettimeofday(&t2, 0);
time_io += (1000000.0 * (t2.tv_sec - t1.tv_sec) + t2.tv_usec - t1.tv_usec) / 1000.0;
std::cout<<"Finished reading the matrix and rhs, size: "<<A->getNumRows()<<" x "<<A->getNumColumns()<< ", nnz: "<< A->getNnz()<< ", symmetric? "<<A->symmetric()<< ", Expanded? "<<A->expanded()<<std::endl;
mat_file.close();
rhs_file.close();

//Now convert to CSR.
// Time matrix conversion
gettimeofday(&t1, 0);
if (i < 1) {
A->updateFromCoo(A_coo, ReSolve::memory::HOST);
vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST);
Expand All @@ -114,28 +133,44 @@ int main(int argc, char *argv[])
A->updateFromCoo(A_coo, ReSolve::memory::DEVICE);
vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE);
}
gettimeofday(&t2, 0);
time_convert += (1000000.0 * (t2.tv_sec - t1.tv_sec) + t2.tv_usec - t1.tv_usec) / 1000.0;
std::cout<<"COO to CSR completed. Expanded NNZ: "<< A->getNnzExpanded()<<std::endl;
//Now call direct solver
int status;
if (i < 1) {
// Time factorization (CPU part)
gettimeofday(&t1, 0);
KLU->setup(A);
status = KLU->analyze();
std::cout<<"KLU analysis status: "<<status<<std::endl;
status = KLU->factorize();
std::cout<<"KLU factorization status: "<<status<<std::endl;
gettimeofday(&t2, 0);
matrix_type* L = KLU->getLFactor();
matrix_type* U = KLU->getUFactor();
if (L == nullptr) {printf("ERROR");}
index_type* P = KLU->getPOrdering();
index_type* Q = KLU->getQOrdering();
GLU->setup(A, L, U, P, Q);
time_factorize += (1000000.0 * (t2.tv_sec - t1.tv_sec) + t2.tv_usec - t1.tv_usec) / 1000.0;
std::cout<<"KLU factorization status: "<<status<<std::endl;
// time solve (cpu part)
gettimeofday(&t1, 0);
status = GLU->solve(vec_rhs, vec_x);
gettimeofday(&t2, 0);
time_solve += (1000000.0 * (t2.tv_sec - t1.tv_sec) + t2.tv_usec - t1.tv_usec) / 1000.0;
std::cout<<"GLU solve status: "<<status<<std::endl;
} else {
std::cout<<"Using CUSOLVER GLU"<<std::endl;
gettimeofday(&t1, 0);
status = GLU->refactorize();
gettimeofday(&t2, 0);
time_factorize += (1000000.0 * (t2.tv_sec - t1.tv_sec) + t2.tv_usec - t1.tv_usec) / 1000.0;
std::cout<<"CUSOLVER GLU refactorization status: "<<status<<std::endl;
gettimeofday(&t1, 0);
status = GLU->solve(vec_rhs, vec_x);
gettimeofday(&t2, 0);
time_solve += (1000000.0 * (t2.tv_sec - t1.tv_sec) + t2.tv_usec - t1.tv_usec) / 1000.0;
std::cout<<"CUSOLVER GLU solve status: "<<status<<std::endl;
}

Expand All @@ -158,6 +193,11 @@ int main(int argc, char *argv[])
<< std::scientific << std::setprecision(16)
<< sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::DEVICE))/bnorm << "\n";

// Print timing summary
std::cout << std::defaultfloat << std::setprecision(4)
<< "I/O time: " << time_io << ", conversion time: " << time_convert
<< ", factorization time: " << time_factorize << ", solve time: " << time_solve
<< "\nTOTAL: " << time_factorize + time_solve << "\n";
} // for (int i = 0; i < numSystems; ++i)

//now DELETE
Expand Down
25 changes: 15 additions & 10 deletions examples/r_KLU_rf_FGMRES.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include <iostream>
#include <iomanip>
#include <sys/time.h>
#include <sys/time.h>

#include <resolve/matrix/Coo.hpp>
#include <resolve/matrix/Csr.hpp>
Expand Down Expand Up @@ -67,6 +68,7 @@ int main(int argc, char *argv[])
double time_convert = 0.0;
double time_factorize = 0.0;
double time_solve = 0.0;
double time_ir = 0.0;

index_type j = 4 + i * 2;
fileId = argv[j];
Expand Down Expand Up @@ -222,7 +224,7 @@ int main(int argc, char *argv[])
FGMRES->resetMatrix(A);
FGMRES->setupPreconditioner("LU", Rf);
gettimeofday(&t2, 0);
time_solve += (1000000.0 * (t2.tv_sec - t1.tv_sec) + t2.tv_usec - t1.tv_usec) / 1000.0;
time_ir += (1000000.0 * (t2.tv_sec - t1.tv_sec) + t2.tv_usec - t1.tv_usec) / 1000.0;

matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", ReSolve::memory::DEVICE);

Expand All @@ -241,24 +243,27 @@ int main(int argc, char *argv[])
matrix_handler->matrixInfNorm(A, &norm_A, ReSolve::memory::DEVICE);
vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE);

if(!std::isnan(norm_r) && !std::isinf(norm_r)) {
if(!std::isnan(norm_r) && !std::isinf(norm_r) && !std::isnan(norm_x) && !isinf(norm_x)) {
gettimeofday(&t1, 0);
FGMRES->solve(vec_rhs, vec_x);
gettimeofday(&t2, 0);
time_solve += (1000000.0 * (t2.tv_sec - t1.tv_sec) + t2.tv_usec - t1.tv_usec) / 1000.0;

time_ir += (1000000.0 * (t2.tv_sec - t1.tv_sec) + t2.tv_usec - t1.tv_usec) / 1000.0;
std::cout << "FGMRES: init nrm: "
<< std::scientific << std::setprecision(16)
<< FGMRES->getInitResidualNorm()/norm_b
<< " final nrm: "
<< FGMRES->getFinalResidualNorm()/norm_b
<< " iter: " << FGMRES->getNumIter() << "\n";
<< std::scientific << std::setprecision(16)
<< FGMRES->getInitResidualNorm()/norm_b
<< " final nrm: "
<< FGMRES->getFinalResidualNorm()/norm_b
<< " iter: " << FGMRES->getNumIter() << "\n";
} else {
std::cout << "This is a bad system, IR is not performed (inf or nan in x or r) " << std::endl;
}
}
// Print timing summary
std::cout << std::defaultfloat << std::setprecision(4)
<< "I/O time: " << time_io << ", conversion time: " << time_convert
<< ", factorization time: " << time_factorize << ", solve time: " << time_solve
<< "\nTOTAL: " << time_factorize + time_solve << "\n";
<< ", IR time: " << time_ir
<< "\nTOTAL: " << time_factorize + time_solve + time_ir << "\n";
} // for (int i = 0; i < numSystems; ++i)

delete A;
Expand Down

0 comments on commit 0b9eb34

Please sign in to comment.