diff --git a/resolve/vector/CMakeLists.txt b/resolve/vector/CMakeLists.txt index 8fa9cd59..f418bf55 100644 --- a/resolve/vector/CMakeLists.txt +++ b/resolve/vector/CMakeLists.txt @@ -10,6 +10,8 @@ set(Vector_SRC Vector.cpp VectorHandler.cpp + VectorHandlerCpu.cpp + VectorHandlerCuda.cpp ) diff --git a/resolve/vector/VectorHandler.cpp b/resolve/vector/VectorHandler.cpp index 71bfce9c..9648058e 100644 --- a/resolve/vector/VectorHandler.cpp +++ b/resolve/vector/VectorHandler.cpp @@ -4,6 +4,9 @@ #include #include #include +#include +#include +#include #include "VectorHandler.hpp" namespace ReSolve { @@ -14,6 +17,8 @@ namespace ReSolve { */ VectorHandler::VectorHandler() { + cpuImpl_ = new VectorHandlerCpu(); + isCpuEnabled_ = true; } /** @@ -21,9 +26,24 @@ namespace ReSolve { * * @param new_workspace - workspace to be set */ - VectorHandler:: VectorHandler(LinAlgWorkspace* new_workspace) + VectorHandler::VectorHandler(LinAlgWorkspace* new_workspace) { - workspace_ = new_workspace; + cpuImpl_ = new VectorHandlerCpu(new_workspace); + isCpuEnabled_ = true; + } + + /** + * @brief constructor + * + * @param new_workspace - workspace to be set + */ + VectorHandler::VectorHandler(LinAlgWorkspaceCUDA* new_workspace) + { + cudaImpl_ = new VectorHandlerCuda(new_workspace); + cpuImpl_ = new VectorHandlerCpu(); + + isCudaEnabled_ = true; + isCpuEnabled_ = true; } /** @@ -46,28 +66,11 @@ namespace ReSolve { real_type VectorHandler::dot(vector::Vector* x, vector::Vector* y, std::string memspace) { - if (memspace == "cuda" ){ - LinAlgWorkspaceCUDA* workspaceCUDA = (LinAlgWorkspaceCUDA*) workspace_; - cublasHandle_t handle_cublas = workspaceCUDA->getCublasHandle(); - double nrm = 0.0; - cublasStatus_t st= cublasDdot (handle_cublas, x->getSize(), x->getData("cuda"), 1, y->getData("cuda"), 1, &nrm); - if (st!=0) {printf("dot product crashed with code %d \n", st);} - return nrm; + if (memspace == "cuda" ) { + return cudaImpl_->dot(x, y); } else { if (memspace == "cpu") { - real_type* x_data = x->getData("cpu"); - real_type* y_data = y->getData("cpu"); - real_type sum = 0.0; - real_type c = 0.0; - real_type t, y; - for (int i = 0; i < x->getSize(); ++i){ - y = (x_data[i] * y_data[i]) - c; - t = sum + y; - c = (t - sum) - y; - sum = t; - // sum += (x_data[i] * y_data[i]); - } - return sum; + return cpuImpl_->dot(x, y); } else { out::error() << "Not implemented (yet)" << std::endl; return NAN; @@ -85,20 +88,11 @@ namespace ReSolve { */ void VectorHandler::scal(const real_type* alpha, vector::Vector* x, std::string memspace) { - if (memspace == "cuda" ) { - LinAlgWorkspaceCUDA* workspaceCUDA = (LinAlgWorkspaceCUDA*) workspace_; - cublasHandle_t handle_cublas = workspaceCUDA->getCublasHandle(); - cublasStatus_t st = cublasDscal(handle_cublas, x->getSize(), alpha, x->getData("cuda"), 1); - if (st!=0) { - ReSolve::io::Logger::error() << "scal crashed with code " << st << "\n"; - } + if (memspace == "cuda" ) { + cudaImpl_->scal(alpha, x); } else { if (memspace == "cpu") { - real_type* x_data = x->getData("cpu"); - - for (int i = 0; i < x->getSize(); ++i){ - x_data[i] *= (*alpha); - } + cpuImpl_->scal(alpha, x); } else { out::error() << "Not implemented (yet)" << std::endl; } @@ -114,26 +108,14 @@ namespace ReSolve { * @param[in] memspace String containg memspace (cpu or cuda) * */ - void VectorHandler::axpy(const real_type* alpha, vector::Vector* x, vector::Vector* y, std::string memspace ) + void VectorHandler::axpy(const real_type* alpha, vector::Vector* x, vector::Vector* y, std::string memspace) { //AXPY: y = alpha * x + y - if (memspace == "cuda" ) { - LinAlgWorkspaceCUDA* workspaceCUDA = (LinAlgWorkspaceCUDA*) workspace_; - cublasHandle_t handle_cublas = workspaceCUDA->getCublasHandle(); - cublasDaxpy(handle_cublas, - x->getSize(), - alpha, - x->getData("cuda"), - 1, - y->getData("cuda"), - 1); + if (memspace == "cuda" ) { + cudaImpl_->axpy(alpha, x, y); } else { if (memspace == "cpu") { - real_type* x_data = x->getData("cpu"); - real_type* y_data = y->getData("cpu"); - for (int i = 0; i < x->getSize(); ++i){ - y_data[i] = (*alpha) * x_data[i] + y_data[i]; - } + cpuImpl_->axpy(alpha, x, y); } else { out::error() <<"Not implemented (yet)" << std::endl; } @@ -160,38 +142,9 @@ namespace ReSolve { void VectorHandler::gemv(std::string transpose, index_type n, index_type k, const real_type* alpha, const real_type* beta, vector::Vector* V, vector::Vector* y, vector::Vector* x, std::string memspace) { if (memspace == "cuda") { - LinAlgWorkspaceCUDA* workspaceCUDA = (LinAlgWorkspaceCUDA*) workspace_; - cublasHandle_t handle_cublas = workspaceCUDA->getCublasHandle(); - if (transpose == "T") { - - cublasDgemv(handle_cublas, - CUBLAS_OP_T, - n, - k, - alpha, - V->getData("cuda"), - n, - y->getData("cuda"), - 1, - beta, - x->getData("cuda"), - 1); - - } else { - cublasDgemv(handle_cublas, - CUBLAS_OP_N, - n, - k, - alpha, - V->getData("cuda"), - n, - y->getData("cuda"), - 1, - beta, - x->getData("cuda"), - 1); - } - + cudaImpl_->gemv(transpose, n, k, alpha, beta, V, y, x); + } else if (memspace == "cpu") { + cpuImpl_->gemv(transpose, n, k, alpha, beta, V, y, x); } else { out::error() << "Not implemented (yet)" << std::endl; } @@ -213,26 +166,9 @@ namespace ReSolve { { using namespace constants; if (memspace == "cuda") { - if (k < 200) { - mass_axpy(size, k, x->getData("cuda"), y->getData("cuda"),alpha->getData("cuda")); - } else { - LinAlgWorkspaceCUDA* workspaceCUDA = (LinAlgWorkspaceCUDA*) workspace_; - cublasHandle_t handle_cublas = workspaceCUDA->getCublasHandle(); - cublasDgemm(handle_cublas, - CUBLAS_OP_N, - CUBLAS_OP_N, - size, // m - 1, // n - k + 1, // k - &MINUSONE, // alpha - x->getData("cuda"), // A - size, // lda - alpha->getData("cuda"), // B - k + 1, // ldb - &ONE, - y->getData("cuda"), // c - size); // ldc - } + cudaImpl_->massAxpy(size, alpha, k, x, y); + } else if (memspace == "cpu") { + cpuImpl_->massAxpy(size, alpha, k, x, y); } else { out::error() << "Not implemented (yet)" << std::endl; } @@ -254,29 +190,10 @@ namespace ReSolve { */ void VectorHandler::massDot2Vec(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res, std::string memspace) { - using namespace constants; - if (memspace == "cuda") { - if (k < 200) { - mass_inner_product_two_vectors(size, k, x->getData("cuda") , x->getData(1, "cuda"), V->getData("cuda"), res->getData("cuda")); - } else { - LinAlgWorkspaceCUDA* workspaceCUDA = (LinAlgWorkspaceCUDA*) workspace_; - cublasHandle_t handle_cublas = workspaceCUDA->getCublasHandle(); - cublasDgemm(handle_cublas, - CUBLAS_OP_T, - CUBLAS_OP_N, - k + 1, //m - 2, //n - size, //k - &ONE, //alpha - V->getData("cuda"), //A - size, //lda - x->getData("cuda"), //B - size, //ldb - &ZERO, - res->getData("cuda"), //c - k + 1); //ldc - } + cudaImpl_->massDot2Vec(size, V, k, x, res); + } else if (memspace == "cpu") { + cpuImpl_->massDot2Vec(size, V, k, x, res); } else { out::error() << "Not implemented (yet)" << std::endl; } diff --git a/resolve/vector/VectorHandler.hpp b/resolve/vector/VectorHandler.hpp index bf2d37d7..53ec0bca 100644 --- a/resolve/vector/VectorHandler.hpp +++ b/resolve/vector/VectorHandler.hpp @@ -7,7 +7,9 @@ namespace ReSolve { class Vector; } + class VectorHandlerImpl; class LinAlgWorkspace; + class LinAlgWorkspaceCUDA; } @@ -16,13 +18,14 @@ namespace ReSolve { //namespace vector { public: VectorHandler(); VectorHandler(LinAlgWorkspace* new_workspace); + VectorHandler(LinAlgWorkspaceCUDA* new_workspace); ~VectorHandler(); //y = alpha x + y - void axpy(const real_type* alpha, vector::Vector* x, vector::Vector* y, std::string memspace ); + void axpy(const real_type* alpha, vector::Vector* x, vector::Vector* y, std::string memspace); //dot: x \cdot y - real_type dot(vector::Vector* x, vector::Vector* y, std::string memspace ); + real_type dot(vector::Vector* x, vector::Vector* y, std::string memspace); //scal = alpha * x void scal(const real_type* alpha, vector::Vector* x, std::string memspace); @@ -40,9 +43,21 @@ namespace ReSolve { //namespace vector { * if `transpose = T` (yes), `x = beta*x + alpha*V^T*y`, * where `x` is `[k x 1]`, `V` is `[n x k]` and `y` is `[n x 1]`. */ - void gemv(std::string transpose, index_type n, index_type k, const real_type* alpha, const real_type* beta, vector::Vector* V, vector::Vector* y, vector::Vector* x, std::string memspace); + void gemv(std::string transpose, + index_type n, + index_type k, + const real_type* alpha, + const real_type* beta, + vector::Vector* V, + vector::Vector* y, + vector::Vector* x, + std::string memspace); private: - LinAlgWorkspace* workspace_; + VectorHandlerImpl* cpuImpl_{nullptr}; + VectorHandlerImpl* cudaImpl_{nullptr}; + + bool isCpuEnabled_{false}; + bool isCudaEnabled_{false}; }; } //} // namespace ReSolve::vector diff --git a/resolve/vector/VectorHandlerCpu.cpp b/resolve/vector/VectorHandlerCpu.cpp new file mode 100644 index 00000000..593ba5b0 --- /dev/null +++ b/resolve/vector/VectorHandlerCpu.cpp @@ -0,0 +1,159 @@ +#include + +#include +#include +#include +#include +#include +#include "VectorHandlerCpu.hpp" + +namespace ReSolve { + using out = io::Logger; + + /** + * @brief empty constructor that does absolutely nothing + */ + VectorHandlerCpu::VectorHandlerCpu() + { + } + + /** + * @brief constructor + * + * @param new_workspace - workspace to be set + */ + VectorHandlerCpu:: VectorHandlerCpu(LinAlgWorkspace* new_workspace) + { + workspace_ = new_workspace; + } + + /** + * @brief destructor + */ + VectorHandlerCpu::~VectorHandlerCpu() + { + //delete the workspace TODO + } + + /** + * @brief dot product of two vectors i.e, a = x^Ty + * + * @param[in] x The first vector + * @param[in] y The second vector + * @param[in] memspace String containg memspace (cpu or cuda) + * + * @return dot product (real number) of _x_ and _y_ + */ + + real_type VectorHandlerCpu::dot(vector::Vector* x, vector::Vector* y) + { + real_type* x_data = x->getData("cpu"); + real_type* y_data = y->getData("cpu"); + real_type sum = 0.0; + real_type c = 0.0; + // real_type t, y; + for (int i = 0; i < x->getSize(); ++i) { + real_type y = (x_data[i] * y_data[i]) - c; + real_type t = sum + y; + c = (t - sum) - y; + sum = t; + // sum += (x_data[i] * y_data[i]); + } + return sum; + } + + /** + * @brief scale a vector by a constant i.e, x = alpha*x where alpha is a constant + * + * @param[in] alpha The constant + * @param[in,out] x The vector + * @param memspace string containg memspace (cpu or cuda) + * + */ + void VectorHandlerCpu::scal(const real_type* alpha, vector::Vector* x) + { + real_type* x_data = x->getData("cpu"); + + for (int i = 0; i < x->getSize(); ++i){ + x_data[i] *= (*alpha); + } + } + + /** + * @brief axpy i.e, y = alpha*x+y where alpha is a constant + * + * @param[in] alpha The constant + * @param[in] x The first vector + * @param[in,out] y The second vector (result is return in y) + * @param[in] memspace String containg memspace (cpu or cuda) + * + */ + void VectorHandlerCpu::axpy(const real_type* alpha, vector::Vector* x, vector::Vector* y) + { + //AXPY: y = alpha * x + y + real_type* x_data = x->getData("cpu"); + real_type* y_data = y->getData("cpu"); + for (int i = 0; i < x->getSize(); ++i) { + y_data[i] = (*alpha) * x_data[i] + y_data[i]; + } + } + + /** + * @brief gemv computes matrix-vector product where both matrix and vectors are dense. + * i.e., x = beta*x + alpha*V*y + * + * @param[in] Transpose - yes (T) or no (N) + * @param[in] n Number of rows in (non-transposed) matrix + * @param[in] k Number of columns in (non-transposed) + * @param[in] alpha Constant real number + * @param[in] beta Constant real number + * @param[in] V Multivector containing the matrix, organized columnwise + * @param[in] y Vector, k x 1 if N and n x 1 if T + * @param[in,out] x Vector, n x 1 if N and k x 1 if T + * @param[in] memspace cpu or cuda (for now) + * + * @pre V is stored colum-wise, _n_ > 0, _k_ > 0 + * + */ + void VectorHandlerCpu::gemv(std::string transpose, index_type n, index_type k, const real_type* alpha, const real_type* beta, vector::Vector* V, vector::Vector* y, vector::Vector* x) + { + out::error() << "Not implemented (yet)" << std::endl; + } + + /** + * @brief mass (bulk) axpy i.e, y = y - x*alpha where alpha is a vector + * + * @param[in] size number of elements in y + * @param[in] alpha vector size k x 1 + * @param[in] x (multi)vector size size x k + * @param[in,out] y vector size size x 1 (this is where the result is stored) + * @param[in] memspace string containg memspace (cpu or cuda) + * + * @pre _k_ > 0, _size_ > 0, _size_ = x->getSize() + * + */ + void VectorHandlerCpu::massAxpy(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y) + { + out::error() << "Not implemented (yet)" << std::endl; + } + + /** + * @brief mass (bulk) dot product i.e, V^T x, where V is n x k dense multivector (a dense multivector consisting of k vectors size n) + * and x is k x 2 dense multivector (a multivector consisiting of two vectors size n each) + * + * @param[in] size Number of elements in a single vector in V + * @param[in] V Multivector; k vectors size n x 1 each + * @param[in] k Number of vectors in V + * @param[in] x Multivector; 2 vectors size n x 1 each + * @param[out] res Multivector; 2 vectors size k x 1 each (result is returned in res) + * @param[in] memspace String containg memspace (cpu or cuda) + * + * @pre _size_ > 0, _k_ > 0, size = x->getSize(), _res_ needs to be allocated + * + */ + void VectorHandlerCpu::massDot2Vec(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res) + { + out::error() << "Not implemented (yet)" << std::endl; + } + +} // namespace ReSolve diff --git a/resolve/vector/VectorHandlerCpu.hpp b/resolve/vector/VectorHandlerCpu.hpp new file mode 100644 index 00000000..a5f09162 --- /dev/null +++ b/resolve/vector/VectorHandlerCpu.hpp @@ -0,0 +1,57 @@ +#pragma once +#include + +namespace ReSolve +{ + namespace vector + { + class Vector; + } + class LinAlgWorkspace; + class VectorHandlerImpl; +} + + +namespace ReSolve { //namespace vector { + class VectorHandlerCpu : public VectorHandlerImpl + { + public: + VectorHandlerCpu(); + VectorHandlerCpu(LinAlgWorkspace* workspace); + virtual ~VectorHandlerCpu(); + + //y = alpha x + y + virtual void axpy(const real_type* alpha, vector::Vector* x, vector::Vector* y); + + //dot: x \cdot y + virtual real_type dot(vector::Vector* x, vector::Vector* y); + + //scal = alpha * x + virtual void scal(const real_type* alpha, vector::Vector* x); + + //mass axpy: x*alpha + y where x is [n x k] and alpha is [k x 1]; x is stored columnwise + virtual void massAxpy(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y); + + //mass dot: V^T x, where V is [n x k] and x is [k x 2], everything is stored and returned columnwise + //Size = n + virtual void massDot2Vec(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res); + + /** gemv: + * if `transpose = N` (no), `x = beta*x + alpha*V*y`, + * where `x` is `[n x 1]`, `V` is `[n x k]` and `y` is `[k x 1]`. + * if `transpose = T` (yes), `x = beta*x + alpha*V^T*y`, + * where `x` is `[k x 1]`, `V` is `[n x k]` and `y` is `[n x 1]`. + */ + virtual void gemv(std::string transpose, + index_type n, + index_type k, + const real_type* alpha, + const real_type* beta, + vector::Vector* V, + vector::Vector* y, + vector::Vector* x); + private: + LinAlgWorkspace* workspace_; + }; + +} //} // namespace ReSolve::vector diff --git a/resolve/vector/VectorHandlerCuda.cpp b/resolve/vector/VectorHandlerCuda.cpp new file mode 100644 index 00000000..69044f37 --- /dev/null +++ b/resolve/vector/VectorHandlerCuda.cpp @@ -0,0 +1,236 @@ +#include + +#include +#include +#include +#include +#include +#include "VectorHandlerCuda.hpp" + +namespace ReSolve { + using out = io::Logger; + + /** + * @brief empty constructor that does absolutely nothing + */ + VectorHandlerCuda::VectorHandlerCuda() + { + } + + /** + * @brief constructor + * + * @param new_workspace - workspace to be set + */ + VectorHandlerCuda:: VectorHandlerCuda(LinAlgWorkspaceCUDA* new_workspace) + { + workspace_ = new_workspace; + } + + /** + * @brief destructor + */ + VectorHandlerCuda::~VectorHandlerCuda() + { + //delete the workspace TODO + } + + /** + * @brief dot product of two vectors i.e, a = x^Ty + * + * @param[in] x The first vector + * @param[in] y The second vector + * @param[in] memspace String containg memspace (cpu or cuda) + * + * @return dot product (real number) of _x_ and _y_ + */ + + real_type VectorHandlerCuda::dot(vector::Vector* x, vector::Vector* y) + { + LinAlgWorkspaceCUDA* workspaceCUDA = workspace_; + cublasHandle_t handle_cublas = workspaceCUDA->getCublasHandle(); + double nrm = 0.0; + cublasStatus_t st= cublasDdot (handle_cublas, x->getSize(), x->getData("cuda"), 1, y->getData("cuda"), 1, &nrm); + if (st!=0) {printf("dot product crashed with code %d \n", st);} + return nrm; + } + + /** + * @brief scale a vector by a constant i.e, x = alpha*x where alpha is a constant + * + * @param[in] alpha The constant + * @param[in,out] x The vector + * @param memspace string containg memspace (cpu or cuda) + * + */ + void VectorHandlerCuda::scal(const real_type* alpha, vector::Vector* x) + { + LinAlgWorkspaceCUDA* workspaceCUDA = workspace_; + cublasHandle_t handle_cublas = workspaceCUDA->getCublasHandle(); + cublasStatus_t st = cublasDscal(handle_cublas, x->getSize(), alpha, x->getData("cuda"), 1); + if (st!=0) { + ReSolve::io::Logger::error() << "scal crashed with code " << st << "\n"; + } + } + + /** + * @brief axpy i.e, y = alpha*x+y where alpha is a constant + * + * @param[in] alpha The constant + * @param[in] x The first vector + * @param[in,out] y The second vector (result is return in y) + * @param[in] memspace String containg memspace (cpu or cuda) + * + */ + void VectorHandlerCuda::axpy(const real_type* alpha, vector::Vector* x, vector::Vector* y) + { + //AXPY: y = alpha * x + y + LinAlgWorkspaceCUDA* workspaceCUDA = workspace_; + cublasHandle_t handle_cublas = workspaceCUDA->getCublasHandle(); + cublasDaxpy(handle_cublas, + x->getSize(), + alpha, + x->getData("cuda"), + 1, + y->getData("cuda"), + 1); + } + + /** + * @brief gemv computes matrix-vector product where both matrix and vectors are dense. + * i.e., x = beta*x + alpha*V*y + * + * @param[in] Transpose - yes (T) or no (N) + * @param[in] n Number of rows in (non-transposed) matrix + * @param[in] k Number of columns in (non-transposed) + * @param[in] alpha Constant real number + * @param[in] beta Constant real number + * @param[in] V Multivector containing the matrix, organized columnwise + * @param[in] y Vector, k x 1 if N and n x 1 if T + * @param[in,out] x Vector, n x 1 if N and k x 1 if T + * @param[in] memspace cpu or cuda (for now) + * + * @pre V is stored colum-wise, _n_ > 0, _k_ > 0 + * + */ + void VectorHandlerCuda::gemv(std::string transpose, + index_type n, + index_type k, + const real_type* alpha, + const real_type* beta, + vector::Vector* V, + vector::Vector* y, + vector::Vector* x) + { + LinAlgWorkspaceCUDA* workspaceCUDA = workspace_; + cublasHandle_t handle_cublas = workspaceCUDA->getCublasHandle(); + if (transpose == "T") { + + cublasDgemv(handle_cublas, + CUBLAS_OP_T, + n, + k, + alpha, + V->getData("cuda"), + n, + y->getData("cuda"), + 1, + beta, + x->getData("cuda"), + 1); + + } else { + cublasDgemv(handle_cublas, + CUBLAS_OP_N, + n, + k, + alpha, + V->getData("cuda"), + n, + y->getData("cuda"), + 1, + beta, + x->getData("cuda"), + 1); + } + } + + /** + * @brief mass (bulk) axpy i.e, y = y - x*alpha where alpha is a vector + * + * @param[in] size number of elements in y + * @param[in] alpha vector size k x 1 + * @param[in] x (multi)vector size size x k + * @param[in,out] y vector size size x 1 (this is where the result is stored) + * @param[in] memspace string containg memspace (cpu or cuda) + * + * @pre _k_ > 0, _size_ > 0, _size_ = x->getSize() + * + */ + void VectorHandlerCuda::massAxpy(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y) + { + using namespace constants; + if (k < 200) { + mass_axpy(size, k, x->getData("cuda"), y->getData("cuda"),alpha->getData("cuda")); + } else { + LinAlgWorkspaceCUDA* workspaceCUDA = workspace_; + cublasHandle_t handle_cublas = workspaceCUDA->getCublasHandle(); + cublasDgemm(handle_cublas, + CUBLAS_OP_N, + CUBLAS_OP_N, + size, // m + 1, // n + k + 1, // k + &MINUSONE, // alpha + x->getData("cuda"), // A + size, // lda + alpha->getData("cuda"), // B + k + 1, // ldb + &ONE, + y->getData("cuda"), // c + size); // ldc + } + } + + /** + * @brief mass (bulk) dot product i.e, V^T x, where V is n x k dense multivector + * (a dense multivector consisting of k vectors size n) and x is k x 2 dense + * multivector (a multivector consisiting of two vectors size n each) + * + * @param[in] size Number of elements in a single vector in V + * @param[in] V Multivector; k vectors size n x 1 each + * @param[in] k Number of vectors in V + * @param[in] x Multivector; 2 vectors size n x 1 each + * @param[out] res Multivector; 2 vectors size k x 1 each (result is returned in res) + * @param[in] memspace String containg memspace (cpu or cuda) + * + * @pre _size_ > 0, _k_ > 0, size = x->getSize(), _res_ needs to be allocated + * + */ + void VectorHandlerCuda::massDot2Vec(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res) + { + using namespace constants; + + if (k < 200) { + mass_inner_product_two_vectors(size, k, x->getData("cuda") , x->getData(1, "cuda"), V->getData("cuda"), res->getData("cuda")); + } else { + LinAlgWorkspaceCUDA* workspaceCUDA = workspace_; + cublasHandle_t handle_cublas = workspaceCUDA->getCublasHandle(); + cublasDgemm(handle_cublas, + CUBLAS_OP_T, + CUBLAS_OP_N, + k + 1, //m + 2, //n + size, //k + &ONE, //alpha + V->getData("cuda"), //A + size, //lda + x->getData("cuda"), //B + size, //ldb + &ZERO, + res->getData("cuda"), //c + k + 1); //ldc + } + } + +} // namespace ReSolve diff --git a/resolve/vector/VectorHandlerCuda.hpp b/resolve/vector/VectorHandlerCuda.hpp new file mode 100644 index 00000000..0ee2752d --- /dev/null +++ b/resolve/vector/VectorHandlerCuda.hpp @@ -0,0 +1,57 @@ +#pragma once +#include + +namespace ReSolve +{ + namespace vector + { + class Vector; + } + class LinAlgWorkspaceCUDA; + class VectorHandlerImpl; +} + + +namespace ReSolve { //namespace vector { + class VectorHandlerCuda : public VectorHandlerImpl + { + public: + VectorHandlerCuda(); + VectorHandlerCuda(LinAlgWorkspaceCUDA* workspace); + virtual ~VectorHandlerCuda(); + + //y = alpha x + y + virtual void axpy(const real_type* alpha, vector::Vector* x, vector::Vector* y); + + //dot: x \cdot y + virtual real_type dot(vector::Vector* x, vector::Vector* y); + + //scal = alpha * x + virtual void scal(const real_type* alpha, vector::Vector* x); + + //mass axpy: x*alpha + y where x is [n x k] and alpha is [k x 1]; x is stored columnwise + virtual void massAxpy(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y); + + //mass dot: V^T x, where V is [n x k] and x is [k x 2], everything is stored and returned columnwise + //Size = n + virtual void massDot2Vec(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res); + + /** gemv: + * if `transpose = N` (no), `x = beta*x + alpha*V*y`, + * where `x` is `[n x 1]`, `V` is `[n x k]` and `y` is `[k x 1]`. + * if `transpose = T` (yes), `x = beta*x + alpha*V^T*y`, + * where `x` is `[k x 1]`, `V` is `[n x k]` and `y` is `[n x 1]`. + */ + virtual void gemv(std::string transpose, + index_type n, + index_type k, + const real_type* alpha, + const real_type* beta, + vector::Vector* V, + vector::Vector* y, + vector::Vector* x); + private: + LinAlgWorkspaceCUDA* workspace_; + }; + +} //} // namespace ReSolve::vector diff --git a/resolve/vector/VectorHandlerImpl.hpp b/resolve/vector/VectorHandlerImpl.hpp new file mode 100644 index 00000000..229a7461 --- /dev/null +++ b/resolve/vector/VectorHandlerImpl.hpp @@ -0,0 +1,57 @@ +#pragma once +#include + +namespace ReSolve +{ + namespace vector + { + class Vector; + } + class VectorHandlerCpu; + class VectorHandlerCuda; +} + + +namespace ReSolve +{ + class VectorHandlerImpl + { + public: + VectorHandlerImpl() + {} + virtual ~VectorHandlerImpl() + {} + + //y = alpha x + y + virtual void axpy(const real_type* alpha, vector::Vector* x, vector::Vector* y ) = 0; + + //dot: x \cdot y + virtual real_type dot(vector::Vector* x, vector::Vector* y ) = 0; + + //scal = alpha * x + virtual void scal(const real_type* alpha, vector::Vector* x) = 0; + + //mass axpy: x*alpha + y where x is [n x k] and alpha is [k x 1]; x is stored columnwise + virtual void massAxpy(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y) = 0; + + //mass dot: V^T x, where V is [n x k] and x is [k x 2], everything is stored and returned columnwise + //Size = n + virtual void massDot2Vec(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res) = 0; + + /** gemv: + * if `transpose = N` (no), `x = beta*x + alpha*V*y`, + * where `x` is `[n x 1]`, `V` is `[n x k]` and `y` is `[k x 1]`. + * if `transpose = T` (yes), `x = beta*x + alpha*V^T*y`, + * where `x` is `[k x 1]`, `V` is `[n x k]` and `y` is `[n x 1]`. + */ + virtual void gemv(std::string transpose, + index_type n, + index_type k, + const real_type* alpha, + const real_type* beta, + vector::Vector* V, + vector::Vector* y, + vector::Vector* x) = 0; + }; + +} //} // namespace ReSolve::vector diff --git a/tests/unit/vector/GramSchmidtTests.hpp b/tests/unit/vector/GramSchmidtTests.hpp index 2d801c49..eddd800e 100644 --- a/tests/unit/vector/GramSchmidtTests.hpp +++ b/tests/unit/vector/GramSchmidtTests.hpp @@ -66,8 +66,7 @@ namespace ReSolve { break; } - ReSolve::LinAlgWorkspace* workspace = createLinAlgWorkspace(memspace_); - ReSolve::VectorHandler* handler = new ReSolve::VectorHandler(workspace); + ReSolve::VectorHandler* handler = createVectorHandler(); vector::Vector* V = new vector::Vector(N, 3); // we will be using a space of 3 vectors real_type* H = new real_type[6]; //in this case, Hessenberg matrix is 3 x 2 @@ -114,7 +113,7 @@ namespace ReSolve { status *= verifyAnswer(V, 3, handler, memspace_); - delete workspace; + delete handler; delete [] H; delete V; delete GS; @@ -125,6 +124,21 @@ namespace ReSolve { private: std::string memspace_{"cuda"}; + ReSolve::VectorHandler* createVectorHandler() + { + if (memspace_ == "cpu") { // TODO: Fix memory leak here + LinAlgWorkspace* workpsace = new LinAlgWorkspace(); + return new VectorHandler(workpsace); + } else if (memspace_ == "cuda") { + LinAlgWorkspaceCUDA* workspace = new LinAlgWorkspaceCUDA(); + workspace->initializeHandles(); + return new VectorHandler(workspace); + } else { + std::cout << "Invalid memory space " << memspace_ << "\n"; + } + return nullptr; + } + // x is a multivector containing K vectors bool verifyAnswer(vector::Vector* x, index_type K, ReSolve::VectorHandler* handler, std::string memspace) { diff --git a/tests/unit/vector/VectorHandlerTests.hpp b/tests/unit/vector/VectorHandlerTests.hpp index ad0d3cfb..cc84f25b 100644 --- a/tests/unit/vector/VectorHandlerTests.hpp +++ b/tests/unit/vector/VectorHandlerTests.hpp @@ -38,8 +38,7 @@ namespace ReSolve { { TestStatus status; - ReSolve::LinAlgWorkspace* workspace = createLinAlgWorkspace(memspace_); - ReSolve::VectorHandler handler(workspace); + ReSolve::VectorHandler* handler = createVectorHandler(); vector::Vector* x = new vector::Vector(N); vector::Vector* y = new vector::Vector(N); @@ -52,10 +51,10 @@ namespace ReSolve { real_type alpha = 0.5; //the result is a vector with y[i] = 2.5; - handler.axpy(&alpha, x, y, memspace_); + handler->axpy(&alpha, x, y, memspace_); status *= verifyAnswer(y, 2.5, memspace_); - delete workspace; + delete handler; delete x; delete y; @@ -66,8 +65,7 @@ namespace ReSolve { { TestStatus status; - ReSolve::LinAlgWorkspace* workspace = createLinAlgWorkspace(memspace_); - ReSolve::VectorHandler handler(workspace); + ReSolve::VectorHandler* handler = createVectorHandler(); vector::Vector* x = new vector::Vector(N); vector::Vector* y = new vector::Vector(N); @@ -79,7 +77,7 @@ namespace ReSolve { y->setToConst(4.0, memspace_); real_type ans; //the result is N - ans = handler.dot(x, y, memspace_); + ans = handler->dot(x, y, memspace_); bool st = true;; if (ans != (real_type) N) { @@ -88,7 +86,7 @@ namespace ReSolve { } status *= st; - delete workspace; + delete handler; delete x; delete y; @@ -99,8 +97,7 @@ namespace ReSolve { { TestStatus status; - ReSolve::LinAlgWorkspace* workspace = createLinAlgWorkspace(memspace_); - ReSolve::VectorHandler handler(workspace); + ReSolve::VectorHandler* handler = createVectorHandler(); vector::Vector* x = new vector::Vector(N); @@ -111,10 +108,10 @@ namespace ReSolve { real_type alpha = 3.5; //the answer is x[i] = 4.375; - handler.scal(&alpha, x, memspace_); + handler->scal(&alpha, x, memspace_); status *= verifyAnswer(x, 4.375, memspace_); - delete workspace; + delete handler; delete x; return status.report(__func__); @@ -124,8 +121,7 @@ namespace ReSolve { { TestStatus status; - ReSolve::LinAlgWorkspace* workspace = createLinAlgWorkspace(memspace_); - ReSolve::VectorHandler handler(workspace); + ReSolve::VectorHandler* handler = createVectorHandler(); vector::Vector* x = new vector::Vector(N, K); vector::Vector* y = new vector::Vector(N); @@ -148,11 +144,11 @@ namespace ReSolve { index_type r = K % 2; real_type res = (real_type) ((floor((real_type) K / 2.0) + r) * 1.0 + floor((real_type) K / 2.0) * (-0.5)); - handler.massAxpy(N, alpha, K, x, y, memspace_); + handler->massAxpy(N, alpha, K, x, y, memspace_); status *= verifyAnswer(y, 2.0 - res, memspace_); - delete workspace; + delete handler; delete x; delete y; delete alpha; @@ -164,8 +160,7 @@ namespace ReSolve { { TestStatus status; - ReSolve::LinAlgWorkspace* workspace = createLinAlgWorkspace(memspace_); - ReSolve::VectorHandler handler(workspace); + ReSolve::VectorHandler* handler = createVectorHandler(); vector::Vector* x = new vector::Vector(N, K); vector::Vector* y = new vector::Vector(N, 2); @@ -176,11 +171,11 @@ namespace ReSolve { x->setToConst(1.0, memspace_); y->setToConst(-1.0, memspace_); - handler.massDot2Vec(N, x, K, y, res, memspace_); + handler->massDot2Vec(N, x, K, y, res, memspace_); status *= verifyAnswer(res, (-1.0) * (real_type) N, memspace_); - delete workspace; + delete handler; delete x; delete y; delete res; @@ -190,8 +185,8 @@ namespace ReSolve { TestOutcome gemv(index_type N, index_type K) { TestStatus status; - ReSolve::LinAlgWorkspace* workspace = createLinAlgWorkspace(memspace_); - ReSolve::VectorHandler handler(workspace); + // ReSolve::LinAlgWorkspace* workspace = createLinAlgWorkspace(memspace_); + ReSolve::VectorHandler* handler = createVectorHandler(); vector::Vector* V = new vector::Vector(N, K); // for the test with NO TRANSPOSE vector::Vector* yN = new vector::Vector(K); @@ -214,9 +209,9 @@ namespace ReSolve { real_type alpha = -1.0; real_type beta = 1.0; - handler.gemv("N", N, K, &alpha, &beta, V, yN, xN, memspace_); + handler->gemv("N", N, K, &alpha, &beta, V, yN, xN, memspace_); status *= verifyAnswer(xN, (real_type) (K) + 0.5, memspace_); - handler.gemv("T", N, K, &alpha, &beta, V, yT, xT, memspace_); + handler->gemv("T", N, K, &alpha, &beta, V, yT, xT, memspace_); status *= verifyAnswer(xT, (real_type) (N) + 0.5, memspace_); return status.report(__func__); @@ -225,7 +220,22 @@ namespace ReSolve { private: std::string memspace_{"cpu"}; - // we can verify through norm but that would defeat the purpose of testing vector handler... + ReSolve::VectorHandler* createVectorHandler() + { + if (memspace_ == "cpu") { + LinAlgWorkspace* workpsace = new LinAlgWorkspace(); + return new VectorHandler(workpsace); + } else if (memspace_ == "cuda") { + LinAlgWorkspaceCUDA* workspace = new LinAlgWorkspaceCUDA(); + workspace->initializeHandles(); + return new VectorHandler(workspace); + } else { + std::cout << "Invalid memory space " << memspace_ << "\n"; + } + return nullptr; + } + + // we can verify through norm but that would defeat the purpose of testing vector handler ... bool verifyAnswer(vector::Vector* x, real_type answer, std::string memspace) { bool status = true;