diff --git a/resolve/vector/Vector.cpp b/resolve/vector/Vector.cpp index 741ed58b..88a8f8be 100644 --- a/resolve/vector/Vector.cpp +++ b/resolve/vector/Vector.cpp @@ -295,7 +295,7 @@ namespace ReSolve { namespace vector { h_data_ = new real_type[n_ * k_]; owns_cpu_data_ = true; } - for (int i = j * n_current_; i < (j + 1 ) * n_current_ * k_; ++i){ + for (int i = j * n_current_; i < (j + 1 ) * n_current_ ; ++i) { h_data_[i] = C; } break; diff --git a/resolve/vector/VectorHandlerCpu.cpp b/resolve/vector/VectorHandlerCpu.cpp index 15b41606..b47a1341 100644 --- a/resolve/vector/VectorHandlerCpu.cpp +++ b/resolve/vector/VectorHandlerCpu.cpp @@ -131,16 +131,45 @@ namespace ReSolve { * @pre V is stored colum-wise, _n_ > 0, _k_ > 0 * */ - void VectorHandlerCpu::gemv(char /* transpose */, - index_type /* n */, - index_type /* k */, - const real_type* /* alpha */, - const real_type* /* beta */, - vector::Vector* /* V */, - vector::Vector* /* y */, - vector::Vector* /* x */) + void VectorHandlerCpu::gemv(char transpose, + index_type n, + index_type k, + const real_type* alpha, + const real_type* beta, + vector::Vector* V, + vector::Vector* y, + vector::Vector* x) { - out::error() << "Not implemented (yet)" << std::endl; + // x = beta*x + alpha*V*y OR x = beta*x + alpha*V^Ty + real_type* V_data = V->getData(memory::HOST); + real_type* y_data = y->getData(memory::HOST); + real_type* x_data = x->getData(memory::HOST); + index_type i, j; + real_type sum; + switch (transpose) { + case 'T': + for (i = 0; i < k; ++i) { + sum = 0; + for (j = 0; j < n; ++j) { + sum += V_data[i * n + j] * y_data[j]; + } + x_data[i] = (*beta) * x_data[i] + (*alpha) * sum; + } + break; + default: + for (i = 0; i < n; ++i) { + sum = 0.0; + for (j = 0; j < k; ++j) { + sum += V_data[n * j + i] * y_data[j]; + } + x_data[i] = (*beta) * x_data[i] + (*alpha) * sum; + } + break; + if (transpose != 'N') { + out::warning() << "Unrecognized transpose option " << transpose + << " in gemv. Using non-transposed multivector.\n"; + } + } // switch } /** @@ -154,9 +183,26 @@ namespace ReSolve { * @pre _k_ > 0, _size_ > 0, _size_ = x->getSize() * */ - void VectorHandlerCpu::massAxpy(index_type /* size */, vector::Vector* /* alpha */, index_type /* k */, vector::Vector* /* x */, vector::Vector* /* y */) + void VectorHandlerCpu::massAxpy(index_type size, + vector::Vector* alpha, + index_type k, + vector::Vector* x, + vector::Vector* y) { - out::error() << "Not implemented (yet)" << std::endl; + + real_type* alpha_data = alpha->getData(memory::HOST); + real_type* y_data = y->getData(memory::HOST); + real_type* x_data = x->getData(memory::HOST); + index_type i, j; + real_type sum; + + for (i = 0; i < size; ++i) { + sum = 0.0; + for (j = 0; j < k; ++j) { + sum += x_data[j * size + i] * alpha_data[j]; + } + y_data[i] = y_data[i] - sum; + } } /** @@ -172,9 +218,25 @@ namespace ReSolve { * @pre _size_ > 0, _k_ > 0, size = x->getSize(), _res_ needs to be allocated * */ - void VectorHandlerCpu::massDot2Vec(index_type /* size */, vector::Vector* /* V */, index_type /* k */, vector::Vector* /* x */, vector::Vector* /* res */) + void VectorHandlerCpu::massDot2Vec(index_type size, + vector::Vector* V, + index_type k, + vector::Vector* x, + vector::Vector* res) { - out::error() << "Not implemented (yet)" << std::endl; + real_type* res_data = res->getData(memory::HOST); + real_type* x_data = x->getData(memory::HOST); + real_type* V_data = V->getData(memory::HOST); + index_type i, j; + + for (i = 0; i < k; ++i) { + res_data[i] = 0.0; + res_data[i + k] = 0.0; + for (j = 0; j < size; ++j) { + res_data[i] += V_data[i * size + j] * x_data[j]; + res_data[i + k] += V_data[i * size + j] * x_data[j + size]; + } + } } } // namespace ReSolve diff --git a/tests/unit/vector/VectorHandlerTests.hpp b/tests/unit/vector/VectorHandlerTests.hpp index 82b8b9bf..b1447f6d 100644 --- a/tests/unit/vector/VectorHandlerTests.hpp +++ b/tests/unit/vector/VectorHandlerTests.hpp @@ -185,12 +185,14 @@ namespace ReSolve { vector::Vector* x = new vector::Vector(N, K); vector::Vector* y = new vector::Vector(N); vector::Vector* alpha = new vector::Vector(K);; + x->allocate(ms); y->allocate(ms); alpha->allocate(ms); - y->setToConst(2.0, ms); alpha->setToConst(-1.0, ms); + y->setToConst(2.0, ms); + for (int ii = 0; ii < K; ++ii) { real_type c; if (ii % 2 == 0) { diff --git a/tests/unit/vector/runVectorHandlerTests.cpp b/tests/unit/vector/runVectorHandlerTests.cpp index daed994f..877249e2 100644 --- a/tests/unit/vector/runVectorHandlerTests.cpp +++ b/tests/unit/vector/runVectorHandlerTests.cpp @@ -16,6 +16,9 @@ int main(int, char**) result += test.axpy(50); result += test.scal(50); result += test.infNorm(50); + result += test.gemv(5000, 10); + result += test.massAxpy(100, 10); + result += test.massDot(100, 10); std::cout << "\n"; }