Separate CPU and CUDA mmethods for csc2csr conversion.

ORNL · Oct 17, 2023 · 6ad87aa · 6ad87aa
1 parent e40c43d
commit 6ad87aa
Show file tree

Hide file tree

Showing 9 changed files with 146 additions and 183 deletions.
diff --git a/examples/r_KLU_KLU.cpp b/examples/r_KLU_KLU.cpp
@@ -132,7 +132,7 @@ int main(int argc, char *argv[])
       status = KLU->solve(vec_rhs, vec_x);
       std::cout<<"KLU solve status: "<<status<<std::endl;      
     }
-    // vec_r->update(rhs, "cpu", "cuda");
+    vec_r->update(rhs, "cpu", "cpu");
 
     matrix_handler->setValuesChanged(true);
 

diff --git a/resolve/matrix/CMakeLists.txt b/resolve/matrix/CMakeLists.txt
@@ -26,7 +26,6 @@ set(Matrix_HEADER_INSTALL
     Csr.hpp
     Csc.hpp
     MatrixHandler.hpp
-    MatrixHandlerImpl.hpp
 )
 
 # Build shared library ReSolve::matrix

diff --git a/resolve/matrix/MatrixHandler.cpp b/resolve/matrix/MatrixHandler.cpp
@@ -66,12 +66,6 @@ namespace ReSolve {
     cudaImpl_ = new MatrixHandlerCuda(new_workspace);
   }
 
-  bool MatrixHandler::getValuesChanged()
-  {
-    values_changed_ = cpuImpl_->isValuesChanged();
-    return this->values_changed_;
-  }
-
   void MatrixHandler::setValuesChanged(bool toWhat)
   {
     this->values_changed_ = toWhat;
@@ -269,58 +263,16 @@ namespace ReSolve {
 
   int MatrixHandler::csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr, std::string memspace)
   {
-    //it ONLY WORKS WITH CUDA
     index_type error_sum = 0;
     if (memspace == "cuda") { 
-      LinAlgWorkspaceCUDA* workspaceCUDA = (LinAlgWorkspaceCUDA*) workspace_;
-
-      A_csr->allocateMatrixData("cuda");
-      index_type n = A_csc->getNumRows();
-      index_type m = A_csc->getNumRows();
-      index_type nnz = A_csc->getNnz();
-      size_t bufferSize;
-      void* d_work;
-      cusparseStatus_t status = cusparseCsr2cscEx2_bufferSize(workspaceCUDA->getCusparseHandle(),
-                                                              n, 
-                                                              m, 
-                                                              nnz, 
-                                                              A_csc->getValues("cuda"), 
-                                                              A_csc->getColData("cuda"), 
-                                                              A_csc->getRowData("cuda"), 
-                                                              A_csr->getValues("cuda"), 
-                                                              A_csr->getRowData("cuda"),
-                                                              A_csr->getColData("cuda"), 
-                                                              CUDA_R_64F, 
-                                                              CUSPARSE_ACTION_NUMERIC,
-                                                              CUSPARSE_INDEX_BASE_ZERO, 
-                                                              CUSPARSE_CSR2CSC_ALG1, 
-                                                              &bufferSize);
-      error_sum += status;
-      mem_.allocateBufferOnDevice(&d_work, bufferSize);
-      status = cusparseCsr2cscEx2(workspaceCUDA->getCusparseHandle(),
-                                  n, 
-                                  m, 
-                                  nnz, 
-                                  A_csc->getValues("cuda"), 
-                                  A_csc->getColData("cuda"), 
-                                  A_csc->getRowData("cuda"), 
-                                  A_csr->getValues("cuda"), 
-                                  A_csr->getRowData("cuda"),
-                                  A_csr->getColData("cuda"), 
-                                  CUDA_R_64F,
-                                  CUSPARSE_ACTION_NUMERIC,
-                                  CUSPARSE_INDEX_BASE_ZERO,
-                                  CUSPARSE_CSR2CSC_ALG1,
-                                  d_work);
-      error_sum += status;
-      return error_sum;
-      mem_.deleteOnDevice(d_work);
-    } else { 
-      out::error() << "Not implemented (yet)" << std::endl;
+      return cudaImpl_->csc2csr(A_csc, A_csr);
+    } else if (memspace == "cpu") { 
+      out::warning() << "Using untested csc2csr on CPU ..." << std::endl;
+      return cpuImpl_->csc2csr(A_csc, A_csr);
+    } else {
+      out::error() << "csc2csr not implemented for " << memspace << " device." << std::endl;
       return -1;
-    } 
-
-
+    }
   }
 
 } // namespace ReSolve
diff --git a/resolve/matrix/MatrixHandler.hpp b/resolve/matrix/MatrixHandler.hpp
@@ -7,7 +7,6 @@
 #include <resolve/Common.hpp>
 #include <resolve/MemoryUtils.hpp>
 
-#include <resolve/matrix/MatrixHandlerImpl.hpp>
 
 namespace ReSolve
 { 
@@ -23,6 +22,7 @@ namespace ReSolve
     class Csr;
   }
   class LinAlgWorkspace;
+  class MatrixHandlerImpl;
 }
 
 
@@ -71,7 +71,6 @@ namespace ReSolve {
                  std::string matrix_type,
                  std::string memspace);
       int Matrix1Norm(matrix::Sparse *A, real_type* norm);
-      bool getValuesChanged();
       void setValuesChanged(bool toWhat); 
 
     private: 

diff --git a/resolve/matrix/MatrixHandlerCpu.cpp b/resolve/matrix/MatrixHandlerCpu.cpp
@@ -1,4 +1,5 @@
 #include <algorithm>
+#include <cassert>
 
 #include <resolve/utilities/logger/Logger.hpp>
 #include <resolve/vector/Vector.hpp>
@@ -279,60 +280,83 @@ namespace ReSolve {
     return -1;
   }
 
-  // int MatrixHandlerCpu::csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr, std::string memspace)
-  // {
-  //   //it ONLY WORKS WITH CUDA
-  //   index_type error_sum = 0;
-  //   if (memspace == "cuda") { 
-  //     LinAlgWorkspaceCUDA* workspaceCUDA = (LinAlgWorkspaceCUDA*) workspace_;
-
-  //     A_csr->allocateMatrixData("cuda");
-  //     index_type n = A_csc->getNumRows();
-  //     index_type m = A_csc->getNumRows();
-  //     index_type nnz = A_csc->getNnz();
-  //     size_t bufferSize;
-  //     void* d_work;
-  //     cusparseStatus_t status = cusparseCsr2cscEx2_bufferSize(workspaceCUDA->getCusparseHandle(),
-  //                                                             n, 
-  //                                                             m, 
-  //                                                             nnz, 
-  //                                                             A_csc->getValues("cuda"), 
-  //                                                             A_csc->getColData("cuda"), 
-  //                                                             A_csc->getRowData("cuda"), 
-  //                                                             A_csr->getValues("cuda"), 
-  //                                                             A_csr->getRowData("cuda"),
-  //                                                             A_csr->getColData("cuda"), 
-  //                                                             CUDA_R_64F, 
-  //                                                             CUSPARSE_ACTION_NUMERIC,
-  //                                                             CUSPARSE_INDEX_BASE_ZERO, 
-  //                                                             CUSPARSE_CSR2CSC_ALG1, 
-  //                                                             &bufferSize);
-  //     error_sum += status;
-  //     mem_.allocateBufferOnDevice(&d_work, bufferSize);
-  //     status = cusparseCsr2cscEx2(workspaceCUDA->getCusparseHandle(),
-  //                                 n, 
-  //                                 m, 
-  //                                 nnz, 
-  //                                 A_csc->getValues("cuda"), 
-  //                                 A_csc->getColData("cuda"), 
-  //                                 A_csc->getRowData("cuda"), 
-  //                                 A_csr->getValues("cuda"), 
-  //                                 A_csr->getRowData("cuda"),
-  //                                 A_csr->getColData("cuda"), 
-  //                                 CUDA_R_64F,
-  //                                 CUSPARSE_ACTION_NUMERIC,
-  //                                 CUSPARSE_INDEX_BASE_ZERO,
-  //                                 CUSPARSE_CSR2CSC_ALG1,
-  //                                 d_work);
-  //     error_sum += status;
-  //     return error_sum;
-  //     mem_.deleteOnDevice(d_work);
-  //   } else { 
-  //     out::error() << "Not implemented (yet)" << std::endl;
-  //     return -1;
-  //   } 
+  /**
+   * @authors Slaven Peles <[email protected]>, Daniel Reynolds (SMU), and
+   * David Gardner and Carol Woodward (LLNL)
+   */
+  int MatrixHandlerCpu::csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr)
+  {
+    int error_sum = 0;
+    assert(A_csc->getNnz() == A_csr->getNnz());
+    assert(A_csc->getNumRows() == A_csr->getNumColumns());
+    assert(A_csr->getNumRows() == A_csc->getNumColumns());
 
+    index_type nnz = A_csc->getNnz();
+    index_type n   = A_csc->getNumColumns();
 
-  // }
+    index_type* rowIdxCsc = A_csc->getRowData("cpu");
+    index_type* colPtrCsc = A_csc->getColData("cpu");
+    real_type*  valuesCsc = A_csc->getValues("cpu");
+
+    index_type* rowPtrCsr = A_csr->getRowData("cpu");
+    index_type* colIdxCsr = A_csr->getColData("cpu");
+    real_type*  valuesCsr = A_csr->getValues("cpu");
+
+    // Set all CSR row pointers to zero
+    for (index_type i = 0; i <= n; ++i) {
+      rowPtrCsr[i] = 0;
+    }
+
+    // Set all CSR values and column indices to zero
+    for (index_type i = 0; i < nnz; ++i) {
+      colIdxCsr[i] = 0;
+      valuesCsr[i] = 0.0;
+    }
+
+    // Compute number of entries per row
+    for (index_type i = 0; i < nnz; ++i) {
+      rowPtrCsr[rowIdxCsc[i]]++;
+    }
+
+    // Compute cumualtive sum of nnz per row
+    for (index_type row = 0, rowsum = 0; row < n; ++row)
+    {
+      // Store value in row pointer to temp
+      index_type temp  = rowPtrCsr[row];
+
+      // Copy cumulative sum to the row pointer
+      rowPtrCsr[row] = rowsum;
+
+      // Update row sum
+      rowsum += temp;
+    }
+    rowPtrCsr[n] = nnz;
+
+    for (index_type col = 0; col < n; ++col)
+    {
+      // Compute positions of column indices and values in CSR matrix and store them there
+      // Overwrites CSR row pointers in the process
+      for (index_type jj = colPtrCsc[col]; jj < colPtrCsc[col+1]; jj++)
+      {
+          index_type row  = rowIdxCsc[jj];
+          index_type dest = rowPtrCsr[row];
+
+          colIdxCsr[dest] = col;
+          valuesCsr[dest] = valuesCsc[jj];
+
+          rowPtrCsr[row]++;
+      }
+    }
+
+    // Restore CSR row pointer values
+    for (index_type row = 0, last = 0; row <= n; row++)
+    {
+        index_type temp  = rowPtrCsr[row];
+        rowPtrCsr[row] = last;
+        last    = temp;
+    }
+
+    return 0;
+  }
 
 } // namespace ReSolve
diff --git a/resolve/matrix/MatrixHandlerCpu.hpp b/resolve/matrix/MatrixHandlerCpu.hpp
@@ -40,7 +40,7 @@ namespace ReSolve {
       MatrixHandlerCpu(LinAlgWorkspace* workspace);
       virtual ~MatrixHandlerCpu();
 
-      // int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr, std::string memspace); //memspace decides on what is returned (cpu or cuda pointer)
+      int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr); //memspace decides on what is returned (cpu or cuda pointer)
       // int coo2csr(matrix::Coo* A_coo, matrix::Csr* A_csr, std::string memspace);
 
       /// Should compute vec_result := alpha*A*vec_x + beta*vec_result, but at least on cpu alpha and beta are flipped
@@ -51,7 +51,6 @@ namespace ReSolve {
                  const real_type* beta,
                  std::string matrix_type);
       virtual int Matrix1Norm(matrix::Sparse *A, real_type* norm);
-      // bool getValuesChanged();
       // void setValuesChanged(bool toWhat); 
 
     private:
-Original file line number
+Diff line change
@@ Expand Up / @@ -26,7 +26,6 @@ set(Matrix_HEADER_INSTALL @@
         Csr.hpp
         Csc.hpp
         MatrixHandler.hpp
-        MatrixHandlerImpl.hpp
     )
     # Build shared library ReSolve::matrix
@@ Expand Down @@