Skip to content

Commit

Permalink
Separate CPU and CUDA mmethods for csc2csr conversion.
Browse files Browse the repository at this point in the history
  • Loading branch information
pelesh committed Oct 17, 2023
1 parent e40c43d commit 6ad87aa
Show file tree
Hide file tree
Showing 9 changed files with 146 additions and 183 deletions.
2 changes: 1 addition & 1 deletion examples/r_KLU_KLU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ int main(int argc, char *argv[])
status = KLU->solve(vec_rhs, vec_x);
std::cout<<"KLU solve status: "<<status<<std::endl;
}
// vec_r->update(rhs, "cpu", "cuda");
vec_r->update(rhs, "cpu", "cpu");

matrix_handler->setValuesChanged(true);

Expand Down
1 change: 0 additions & 1 deletion resolve/matrix/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ set(Matrix_HEADER_INSTALL
Csr.hpp
Csc.hpp
MatrixHandler.hpp
MatrixHandlerImpl.hpp
)

# Build shared library ReSolve::matrix
Expand Down
62 changes: 7 additions & 55 deletions resolve/matrix/MatrixHandler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,12 +66,6 @@ namespace ReSolve {
cudaImpl_ = new MatrixHandlerCuda(new_workspace);
}

bool MatrixHandler::getValuesChanged()
{
values_changed_ = cpuImpl_->isValuesChanged();
return this->values_changed_;
}

void MatrixHandler::setValuesChanged(bool toWhat)
{
this->values_changed_ = toWhat;
Expand Down Expand Up @@ -269,58 +263,16 @@ namespace ReSolve {

int MatrixHandler::csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr, std::string memspace)
{
//it ONLY WORKS WITH CUDA
index_type error_sum = 0;
if (memspace == "cuda") {
LinAlgWorkspaceCUDA* workspaceCUDA = (LinAlgWorkspaceCUDA*) workspace_;

A_csr->allocateMatrixData("cuda");
index_type n = A_csc->getNumRows();
index_type m = A_csc->getNumRows();
index_type nnz = A_csc->getNnz();
size_t bufferSize;
void* d_work;
cusparseStatus_t status = cusparseCsr2cscEx2_bufferSize(workspaceCUDA->getCusparseHandle(),
n,
m,
nnz,
A_csc->getValues("cuda"),
A_csc->getColData("cuda"),
A_csc->getRowData("cuda"),
A_csr->getValues("cuda"),
A_csr->getRowData("cuda"),
A_csr->getColData("cuda"),
CUDA_R_64F,
CUSPARSE_ACTION_NUMERIC,
CUSPARSE_INDEX_BASE_ZERO,
CUSPARSE_CSR2CSC_ALG1,
&bufferSize);
error_sum += status;
mem_.allocateBufferOnDevice(&d_work, bufferSize);
status = cusparseCsr2cscEx2(workspaceCUDA->getCusparseHandle(),
n,
m,
nnz,
A_csc->getValues("cuda"),
A_csc->getColData("cuda"),
A_csc->getRowData("cuda"),
A_csr->getValues("cuda"),
A_csr->getRowData("cuda"),
A_csr->getColData("cuda"),
CUDA_R_64F,
CUSPARSE_ACTION_NUMERIC,
CUSPARSE_INDEX_BASE_ZERO,
CUSPARSE_CSR2CSC_ALG1,
d_work);
error_sum += status;
return error_sum;
mem_.deleteOnDevice(d_work);
} else {
out::error() << "Not implemented (yet)" << std::endl;
return cudaImpl_->csc2csr(A_csc, A_csr);
} else if (memspace == "cpu") {
out::warning() << "Using untested csc2csr on CPU ..." << std::endl;
return cpuImpl_->csc2csr(A_csc, A_csr);
} else {
out::error() << "csc2csr not implemented for " << memspace << " device." << std::endl;
return -1;
}


}
}

} // namespace ReSolve
3 changes: 1 addition & 2 deletions resolve/matrix/MatrixHandler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
#include <resolve/Common.hpp>
#include <resolve/MemoryUtils.hpp>

#include <resolve/matrix/MatrixHandlerImpl.hpp>

namespace ReSolve
{
Expand All @@ -23,6 +22,7 @@ namespace ReSolve
class Csr;
}
class LinAlgWorkspace;
class MatrixHandlerImpl;
}


Expand Down Expand Up @@ -71,7 +71,6 @@ namespace ReSolve {
std::string matrix_type,
std::string memspace);
int Matrix1Norm(matrix::Sparse *A, real_type* norm);
bool getValuesChanged();
void setValuesChanged(bool toWhat);

private:
Expand Down
130 changes: 77 additions & 53 deletions resolve/matrix/MatrixHandlerCpu.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include <algorithm>
#include <cassert>

#include <resolve/utilities/logger/Logger.hpp>
#include <resolve/vector/Vector.hpp>
Expand Down Expand Up @@ -279,60 +280,83 @@ namespace ReSolve {
return -1;
}

// int MatrixHandlerCpu::csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr, std::string memspace)
// {
// //it ONLY WORKS WITH CUDA
// index_type error_sum = 0;
// if (memspace == "cuda") {
// LinAlgWorkspaceCUDA* workspaceCUDA = (LinAlgWorkspaceCUDA*) workspace_;

// A_csr->allocateMatrixData("cuda");
// index_type n = A_csc->getNumRows();
// index_type m = A_csc->getNumRows();
// index_type nnz = A_csc->getNnz();
// size_t bufferSize;
// void* d_work;
// cusparseStatus_t status = cusparseCsr2cscEx2_bufferSize(workspaceCUDA->getCusparseHandle(),
// n,
// m,
// nnz,
// A_csc->getValues("cuda"),
// A_csc->getColData("cuda"),
// A_csc->getRowData("cuda"),
// A_csr->getValues("cuda"),
// A_csr->getRowData("cuda"),
// A_csr->getColData("cuda"),
// CUDA_R_64F,
// CUSPARSE_ACTION_NUMERIC,
// CUSPARSE_INDEX_BASE_ZERO,
// CUSPARSE_CSR2CSC_ALG1,
// &bufferSize);
// error_sum += status;
// mem_.allocateBufferOnDevice(&d_work, bufferSize);
// status = cusparseCsr2cscEx2(workspaceCUDA->getCusparseHandle(),
// n,
// m,
// nnz,
// A_csc->getValues("cuda"),
// A_csc->getColData("cuda"),
// A_csc->getRowData("cuda"),
// A_csr->getValues("cuda"),
// A_csr->getRowData("cuda"),
// A_csr->getColData("cuda"),
// CUDA_R_64F,
// CUSPARSE_ACTION_NUMERIC,
// CUSPARSE_INDEX_BASE_ZERO,
// CUSPARSE_CSR2CSC_ALG1,
// d_work);
// error_sum += status;
// return error_sum;
// mem_.deleteOnDevice(d_work);
// } else {
// out::error() << "Not implemented (yet)" << std::endl;
// return -1;
// }
/**
* @authors Slaven Peles <[email protected]>, Daniel Reynolds (SMU), and
* David Gardner and Carol Woodward (LLNL)
*/
int MatrixHandlerCpu::csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr)
{
int error_sum = 0;
assert(A_csc->getNnz() == A_csr->getNnz());
assert(A_csc->getNumRows() == A_csr->getNumColumns());
assert(A_csr->getNumRows() == A_csc->getNumColumns());

index_type nnz = A_csc->getNnz();
index_type n = A_csc->getNumColumns();

// }
index_type* rowIdxCsc = A_csc->getRowData("cpu");
index_type* colPtrCsc = A_csc->getColData("cpu");
real_type* valuesCsc = A_csc->getValues("cpu");

index_type* rowPtrCsr = A_csr->getRowData("cpu");
index_type* colIdxCsr = A_csr->getColData("cpu");
real_type* valuesCsr = A_csr->getValues("cpu");

// Set all CSR row pointers to zero
for (index_type i = 0; i <= n; ++i) {
rowPtrCsr[i] = 0;
}

// Set all CSR values and column indices to zero
for (index_type i = 0; i < nnz; ++i) {
colIdxCsr[i] = 0;
valuesCsr[i] = 0.0;
}

// Compute number of entries per row
for (index_type i = 0; i < nnz; ++i) {
rowPtrCsr[rowIdxCsc[i]]++;
}

// Compute cumualtive sum of nnz per row
for (index_type row = 0, rowsum = 0; row < n; ++row)
{
// Store value in row pointer to temp
index_type temp = rowPtrCsr[row];

// Copy cumulative sum to the row pointer
rowPtrCsr[row] = rowsum;

// Update row sum
rowsum += temp;
}
rowPtrCsr[n] = nnz;

for (index_type col = 0; col < n; ++col)
{
// Compute positions of column indices and values in CSR matrix and store them there
// Overwrites CSR row pointers in the process
for (index_type jj = colPtrCsc[col]; jj < colPtrCsc[col+1]; jj++)
{
index_type row = rowIdxCsc[jj];
index_type dest = rowPtrCsr[row];

colIdxCsr[dest] = col;
valuesCsr[dest] = valuesCsc[jj];

rowPtrCsr[row]++;
}
}

// Restore CSR row pointer values
for (index_type row = 0, last = 0; row <= n; row++)
{
index_type temp = rowPtrCsr[row];
rowPtrCsr[row] = last;
last = temp;
}

return 0;
}

} // namespace ReSolve
3 changes: 1 addition & 2 deletions resolve/matrix/MatrixHandlerCpu.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ namespace ReSolve {
MatrixHandlerCpu(LinAlgWorkspace* workspace);
virtual ~MatrixHandlerCpu();

// int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr, std::string memspace); //memspace decides on what is returned (cpu or cuda pointer)
int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr); //memspace decides on what is returned (cpu or cuda pointer)
// int coo2csr(matrix::Coo* A_coo, matrix::Csr* A_csr, std::string memspace);

/// Should compute vec_result := alpha*A*vec_x + beta*vec_result, but at least on cpu alpha and beta are flipped
Expand All @@ -51,7 +51,6 @@ namespace ReSolve {
const real_type* beta,
std::string matrix_type);
virtual int Matrix1Norm(matrix::Sparse *A, real_type* norm);
// bool getValuesChanged();
// void setValuesChanged(bool toWhat);

private:
Expand Down
Loading

0 comments on commit 6ad87aa

Please sign in to comment.