Skip to content

Commit

Permalink
Enable coexisitng GPU and CPU instances of iterative solvers (#147)
Browse files Browse the repository at this point in the history
* Refactor CUDA and HIP vector kernel(s).

* No need for std::move when initializing logger.

* Clean up sketching functions.

* Separate HIP and CUDA sketching kernels from handler kernels.

* Add sketching handler using PIMPL idiom.

* Standardize naming for sketching methods.

* Create separate CPU random sketching implementations.

* Enable CPU and GPU instances of random FGMRES when GPU backend is built.

* Simplify iterative solver test.

* Protect HIP and CUDA sketching kernels with namespaces.

* Document random sketching functions.

* Fix bug in GMRES solver on CPU.

---------

Co-authored-by: kswirydo <[email protected]>
  • Loading branch information
pelesh and kswirydo authored Mar 15, 2024
1 parent b129e0f commit 40529e0
Show file tree
Hide file tree
Showing 62 changed files with 2,620 additions and 1,330 deletions.
24 changes: 15 additions & 9 deletions resolve/LinSolverIterativeFGMRES.cpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
/**
* @file LinSolverIterativeFGMRES.cpp
* @author Kasia Swirydowicz ([email protected])
* @brief Implementation of LinSolverIterativeFGMRES class
*
*/
#include <iostream>
#include <cassert>
#include <cmath>
Expand Down Expand Up @@ -52,16 +58,14 @@ namespace ReSolve

LinSolverIterativeFGMRES::~LinSolverIterativeFGMRES()
{
if (d_V_ != nullptr) {
// cudaFree(d_V_);
if (is_solver_set_) {
delete [] h_H_ ;
delete [] h_c_ ;
delete [] h_s_ ;
delete [] h_rs_;
delete d_V_;
delete d_Z_;
}

if (d_Z_ != nullptr) {
// cudaFree(d_Z_);
delete d_Z_;
}

}

int LinSolverIterativeFGMRES::setup(matrix::Sparse* A)
Expand All @@ -83,6 +87,8 @@ namespace ReSolve
h_s_ = new real_type[restart_]; // same
h_rs_ = new real_type[restart_ + 1]; // for residual norm history

is_solver_set_ = true;

return 0;
}

Expand Down Expand Up @@ -241,7 +247,7 @@ namespace ReSolve
vector_handler_->axpy(&h_rs_[j], vec_z, x, memspace_);
}
} else {
mem_.setZeroArrayOnDevice(d_Z_->getData(memspace_), d_Z_->getSize());
d_Z_->setToZero(memspace_);
vec_z->setData( d_Z_->getVectorData(0, memspace_), memspace_);
for(j = 0; j <= i; j++) {
vec_v->setData( d_V_->getVectorData(j, memspace_), memspace_);
Expand Down
7 changes: 7 additions & 0 deletions resolve/LinSolverIterativeFGMRES.hpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
/**
* @file LinSolverIterativeFGMRES.hpp
* @author Kasia Swirydowicz ([email protected])
* @brief Declaration of LinSolverIterativeFGMRES class
*
*/
#pragma once
#include "Common.hpp"
#include <resolve/matrix/Sparse.hpp>
Expand Down Expand Up @@ -57,6 +63,7 @@ namespace ReSolve
GramSchmidt* GS_{nullptr};
LinSolverDirect* LU_solver_{nullptr};
index_type n_{0};
bool is_solver_set_{false};

MemoryHandler mem_; ///< Device memory manager object
};
Expand Down
125 changes: 71 additions & 54 deletions resolve/LinSolverIterativeRandFGMRES.cpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
/**
* @file LinSolverIterativeRandFGMRES.cpp
* @author Kasia Swirydowicz ([email protected])
* @brief Implementation of LinSolverIterativeRandFGMRES class.
*
*/
#include <iostream>
#include <cassert>
#include <cmath>
Expand All @@ -9,9 +15,8 @@
#include <resolve/vector/Vector.hpp>
#include <resolve/GramSchmidt.hpp>
#include <resolve/matrix/MatrixHandler.hpp>
#include <resolve/random/SketchingHandler.hpp>
#include "LinSolverIterativeRandFGMRES.hpp"
#include <resolve/random/RandSketchingCountSketch.hpp>
#include <resolve/random/RandSketchingFWHT.hpp>

namespace ReSolve
{
Expand All @@ -24,7 +29,7 @@ namespace ReSolve
{
this->matrix_handler_ = matrix_handler;
this->vector_handler_ = vector_handler;
this->rand_method_ = rand_method;
this->sketching_method_ = rand_method;
this->GS_ = gs;

setMemorySpace();
Expand All @@ -50,7 +55,7 @@ namespace ReSolve
{
this->matrix_handler_ = matrix_handler;
this->vector_handler_ = vector_handler;
this->rand_method_ = rand_method;
this->sketching_method_ = rand_method;
this->GS_ = gs;

setMemorySpace();
Expand All @@ -63,21 +68,25 @@ namespace ReSolve

d_V_ = nullptr;
d_Z_ = nullptr;

}

LinSolverIterativeRandFGMRES::~LinSolverIterativeRandFGMRES()
{
if (d_V_ != nullptr) {
// cudaFree(d_V_);
if (is_solver_set_) {
delete [] h_H_ ;
delete [] h_c_ ;
delete [] h_s_ ;
delete [] h_rs_;
if (memspace_ == memory::DEVICE) {
mem_.deleteOnDevice(d_aux_);
} else {
delete [] d_aux_;
}
delete d_V_;
delete d_Z_;
delete d_S_;
delete sketching_handler_;
}

if (d_Z_ != nullptr) {
// cudaFree(d_Z_);
delete d_Z_;
}

}

int LinSolverIterativeRandFGMRES::setup(matrix::Sparse* A)
Expand All @@ -90,54 +99,57 @@ namespace ReSolve
if (flexible_) {
d_Z_ = new vector_type(n_, restart_ + 1);
} else {
// otherwise Z is just a one vector, not multivector and we dont keep it
// otherwise Z is just one vector, not multivector and we dont keep it
d_Z_ = new vector_type(n_);
}
d_Z_->allocate(memspace_);
d_Z_->allocate(memspace_);
h_H_ = new real_type[restart_ * (restart_ + 1)];
h_c_ = new real_type[restart_]; // needed for givens
h_s_ = new real_type[restart_]; // same
h_rs_ = new real_type[restart_ + 1]; // for residual norm history
h_rs_ = new real_type[restart_ + 1]; // for residual norm history
if (memspace_ == memory::DEVICE) {
mem_.allocateArrayOnDevice(&d_aux_, restart_ + 1);
} else {
d_aux_ = new real_type[restart_ + 1];
}
// rand method
// Set randomized method
k_rand_ = n_;
switch(rand_method_) {
switch (sketching_method_) {
case cs:
if (ceil(restart_ * log(n_)) < k_rand_) {
k_rand_ = static_cast<index_type>(std::ceil(restart_ * std::log(static_cast<real_type>(n_))));
}
rand_manager_ = new RandSketchingCountSketch(memspace_);
//set k and n
sketching_handler_ = new SketchingHandler(sketching_method_, device_type_);
// set k and n
break;
case fwht:
if (ceil(2.0 * restart_ * log(n_) / log(restart_)) < k_rand_) {
k_rand_ = static_cast<index_type>(std::ceil(2.0 * restart_ * std::log(n_) / std::log(restart_)));
}
rand_manager_ = new RandSketchingFWHT(memspace_);
sketching_handler_ = new SketchingHandler(sketching_method_, device_type_);
break;
default:
io::Logger::warning() << "Wrong sketching method, setting to default (CountSketch)\n";
rand_method_ = cs;
sketching_method_ = cs;
if (ceil(restart_ * log(n_)) < k_rand_) {
k_rand_ = static_cast<index_type>(std::ceil(restart_ * std::log(n_)));
}
rand_manager_ = new RandSketchingCountSketch(memspace_);
sketching_handler_ = new SketchingHandler(cs, device_type_);
break;
}

rand_manager_->setup(n_, k_rand_);
sketching_handler_->setup(n_, k_rand_);

one_over_k_ = 1.0 / sqrt((real_type) k_rand_);

d_S_ = new vector_type(k_rand_, restart_ + 1);
d_S_->allocate(memspace_);
if (rand_method_ == cs) {
if (sketching_method_ == cs) {
d_S_->setToZero(memspace_);
}

is_solver_set_ = true;

return 0;
}

Expand Down Expand Up @@ -174,20 +186,17 @@ namespace ReSolve
vec_v->setData( d_V_->getVectorData(0, memspace_), memspace_);
vec_s->setData( d_S_->getVectorData(0, memspace_), memspace_);

rand_manager_->Theta(vec_v, vec_s);
sketching_handler_->Theta(vec_v, vec_s);

if (rand_method_ == fwht){
// cublasDscal(cublas_handle, k_rand, &oneOverK, d_S, 1);
if (sketching_method_ == fwht) {
vector_handler_->scal(&one_over_k_, vec_s, memspace_);
}
mem_.deviceSynchronize();

rnorm = 0.0;
bnorm = vector_handler_->dot(rhs, rhs, memspace_);
rnorm = vector_handler_->dot(vec_s, vec_s, memspace_);
// double rnorm_true = vector_handler_->dot(vec_v, vec_v, memspace_);
//rnorm = ||V_1||
rnorm = sqrt(rnorm);
rnorm = sqrt(rnorm); // rnorm = ||V_1||
bnorm = sqrt(bnorm);
io::Logger::misc() << "it 0: norm of residual "
<< std::scientific << std::setprecision(16)
Expand Down Expand Up @@ -234,7 +243,6 @@ namespace ReSolve

// Z_i = (LU)^{-1}*V_i
vec_v->setData( d_V_->getVectorData(i, memspace_), memspace_);
//for (int ii=0; ii<10; ++ii) printf("V_0 [%d] = %16.16f \n ", ii, vec_v->getData(ReSolve::memory::HOST)[ii]);
if (flexible_) {
vec_z->setData( d_Z_->getVectorData(i, memspace_), memspace_);
} else {
Expand All @@ -245,34 +253,28 @@ namespace ReSolve
mem_.deviceSynchronize();

// V_{i+1}=A*Z_i

vec_v->setData( d_V_->getVectorData(i + 1, memspace_), memspace_);

matrix_handler_->matvec(A_, vec_z, vec_v, &ONE, &ZERO,"csr", memspace_);
matrix_handler_->matvec(A_, vec_z, vec_v, &ONE, &ZERO, "csr", memspace_);

// orthogonalize V[i+1], form a column of h_H_
// this is where it differs from normal solver GS
vec_s->setData( d_S_->getVectorData(i + 1, memspace_), memspace_);
rand_manager_->Theta(vec_v, vec_s);
if (rand_method_ == fwht){
// cublasDscal(cublas_handle, k_rand, &oneOverK, d_S, 1);
sketching_handler_->Theta(vec_v, vec_s);
if (sketching_method_ == fwht) {
vector_handler_->scal(&one_over_k_, vec_s, memspace_);
}
mem_.deviceSynchronize();
GS_->orthogonalize(k_rand_, d_S_, h_H_, i); //, memspace_);
// now post-process
//checkCudaErrors(cudaMemcpy(d_Hcolumn, &h_H[i * (restart + 1)], sizeof(double) * (i + 1), cudaMemcpyHostToDevice));
if (memspace_ == memory::DEVICE) {
mem_.copyArrayHostToDevice(d_aux_, &h_H_[i * (restart_ + 1)], i + 2);
} else {
for (index_type ii = 0; ii < i + 2; ++ii) {
d_aux_[ii] = h_H_[i * (restart_ + 1) + ii];
}
mem_.copyArrayHostToHost(d_aux_, &h_H_[i * (restart_ + 1)], i + 2);
}
vec_z->setData(d_aux_, memspace_);
vec_z->setCurrentSize(i + 1);
//V(:, i+1) =w-V(:, 1:i)*d_H_col = V(:, i+1)-d_H_col * V(:,1:i);
//checkCudaErrors( cublasDgemv(cublas_handle, CUBLAS_OP_N, n, i + 1, &minusone, d_V, n, d_Hcolumn, 1,&one , &d_V[n * (i + 1)], 1));
// V(:, i+1) = w - V(:, 1:i)*d_H_col = V(:, i+1) - d_H_col*V(:,1:i);

vector_handler_->gemv('N', n_, i + 1, &MINUSONE, &ONE, d_V_, vec_z, vec_v, memspace_ );

Expand All @@ -289,7 +291,7 @@ namespace ReSolve
h_H_[i * (restart_ + 1) + k1] = h_c_[k1] * t + h_s_[k1] * h_H_[i * (restart_ + 1) + k];
h_H_[i * (restart_ + 1) + k] = -h_s_[k1] * t + h_c_[k1] * h_H_[i * (restart_ + 1) + k];
}
} // if i!=0
} // if (i != 0)
double Hii = h_H_[i * (restart_ + 1) + i];
double Hii1 = h_H_[(i) * (restart_ + 1) + i + 1];
double gam = sqrt(Hii * Hii + Hii1 * Hii1);
Expand Down Expand Up @@ -371,9 +373,9 @@ namespace ReSolve
matrix_handler_->matvec(A_, x, d_V_, &MINUSONE, &ONE,"csr", memspace_);
if (outer_flag) {

rand_manager_->reset();
sketching_handler_->reset();

if (rand_method_ == cs) {
if (sketching_method_ == cs) {
if (memspace_ == memory::DEVICE) {
mem_.setZeroArrayOnDevice(d_S_->getData(memspace_), d_S_->getSize() * d_S_->getNumVectors());
} else {
Expand All @@ -382,9 +384,8 @@ namespace ReSolve
}
vec_v->setData( d_V_->getVectorData(0, memspace_), memspace_);
vec_s->setData( d_S_->getVectorData(0, memspace_), memspace_);
rand_manager_->Theta(vec_v, vec_s);
if (rand_method_ == fwht){
// cublasDscal(cublas_handle, k_rand, &oneOverK, d_S, 1);
sketching_handler_->Theta(vec_v, vec_s);
if (sketching_method_ == fwht) {
vector_handler_->scal(&one_over_k_, vec_s, memspace_);
}
mem_.deviceSynchronize();
Expand Down Expand Up @@ -433,16 +434,22 @@ namespace ReSolve
return 0;
}

/**
* @brief Set sketching method based on input string.
*
* @param[in] method - string describing sketching method
* @return 0 if successful, 1 otherwise.
*/
int LinSolverIterativeRandFGMRES::setSketchingMethod(std::string method)
{
if (method == "count") {
rand_method_ = LinSolverIterativeRandFGMRES::cs;
sketching_method_ = LinSolverIterativeRandFGMRES::cs;
} else if (method == "fwht") {
rand_method_ = LinSolverIterativeRandFGMRES::fwht;
sketching_method_ = LinSolverIterativeRandFGMRES::fwht;
} else {
out::warning() << "Sketching method " << method << " not recognized!\n"
<< "Using default.\n";
rand_method_ = LinSolverIterativeRandFGMRES::cs;
<< "Using default.\n";
sketching_method_ = LinSolverIterativeRandFGMRES::cs;
return 1;
}
return 0;
Expand All @@ -458,6 +465,11 @@ namespace ReSolve
}


/**
* @brief Set memory space and device tape based on how MatrixHandler
* and VectorHandler are configured.
*
*/
void LinSolverIterativeRandFGMRES::setMemorySpace()
{
bool is_matrix_handler_cuda = matrix_handler_->getIsCudaEnabled();
Expand All @@ -470,10 +482,15 @@ namespace ReSolve
out::error() << "Matrix and vector handler backends are incompatible!\n";
}

if (is_matrix_handler_cuda || is_matrix_handler_hip) {
if (is_matrix_handler_cuda) {
memspace_ = memory::DEVICE;
device_type_ = memory::CUDADEVICE;
} else if (is_matrix_handler_hip) {
memspace_ = memory::DEVICE;
device_type_ = memory::HIPDEVICE;
} else {
memspace_ = memory::HOST;
device_type_ = memory::NONE;
}
}

Expand Down
Loading

0 comments on commit 40529e0

Please sign in to comment.