Skip to content

Commit

Permalink
Create backends for GPU memory management
Browse files Browse the repository at this point in the history
  • Loading branch information
pelesh committed Oct 14, 2023
1 parent 3672542 commit 93f5ac1
Show file tree
Hide file tree
Showing 41 changed files with 837 additions and 444 deletions.
56 changes: 37 additions & 19 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}")
endif()

option(RESOLVE_TEST_WITH_BSUB "Use `jsrun` instead of `mpirun` commands when running tests" OFF)
option(RESOLVE_USE_KLU "Use KLU, AMD and COLAMD libraries from SuiteSparse" ON)
option(RESOLVE_USE_KLU "Use KLU, AMD and COLAMD libraries from SuiteSparse" ON)
option(RESOLVE_USE_GPU "Use GPU device for computations" ON)
option(RESOLVE_USE_CUDA "Use CUDA language and SDK" ON)
set(RESOLVE_CTEST_OUTPUT_DIR ${PROJECT_BINARY_DIR} CACHE PATH "Directory where CTest outputs are saved")

set(CMAKE_MACOSX_RPATH 1)
Expand All @@ -47,12 +49,6 @@ list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
# TODO: Set up clang-format
#include(./cmake/clang-format)

# Configure CUDA
include(CheckLanguage)
enable_language(CUDA)
check_language(CUDA)


if (RESOLVE_USE_KLU)
include(FindKLU)
if(NOT KLU_LIBRARY)
Expand All @@ -63,22 +59,42 @@ else()
message(STATUS "Not using SuiteSparse KLU")
endif()

if(NOT DEFINED CMAKE_CUDA_STANDARD)
set(CMAKE_CUDA_STANDARD 11)
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
endif()
include(CheckLanguage)

# Configure CUDA
if(RESOLVE_USE_CUDA)
enable_language(CUDA)
check_language(CUDA)

if(NOT DEFINED CMAKE_CUDA_STANDARD)
set(CMAKE_CUDA_STANDARD 11)
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
endif()

if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
set(CMAKE_CUDA_ARCHITECTURES 60 CACHE STRING "Selects CUDA architectures")
if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
set(CMAKE_CUDA_ARCHITECTURES 60 CACHE STRING "Selects CUDA architectures")
endif()

set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda")

include(ReSolveFindCudaLibraries)
else()
message(STATUS "Not using CUDA")
endif()

set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda")
# The binary dir is already a global include directory
configure_file(
${CMAKE_SOURCE_DIR}/resolve/resolve_defs.hpp.in
${CMAKE_BINARY_DIR}/resolve/resolve_defs.hpp)

# include build directory for Fortran name mangling header
include_directories(${CMAKE_BINARY_DIR})

# Link in required cuda dependencies
#find_package(CUDAToolkit REQUIRED)
install(
FILES ${CMAKE_BINARY_DIR}/resolve/resolve_defs.hpp
DESTINATION include/resolve
)

include(ReSolveFindCudaLibraries)

include_directories(${CMAKE_SOURCE_DIR})

Expand Down Expand Up @@ -114,8 +130,10 @@ install(FILES "${CMAKE_CURRENT_BINARY_DIR}/ReSolveConfig.cmake"
"${CMAKE_CURRENT_BINARY_DIR}/ReSolveConfigVersion.cmake"
DESTINATION share/resolve/cmake)

# Add examples
add_subdirectory(examples)
# Add examples (for now only CUDA examples are available)
if(RESOLVE_USE_CUDA)
add_subdirectory(examples)
endif(RESOLVE_USE_CUDA)

# Add tests
add_subdirectory(tests)
52 changes: 35 additions & 17 deletions resolve/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,8 @@ set(ReSolve_SRC
)


set(ReSolve_SRC_CUDA
cudaKernels.cu
memoryUtils.cu
)

set(ReSolve_HEADER_INSTALL
Common.hpp
cudaKernels.h
cusolver_defs.hpp
LinAlgWorkspace.hpp
LinSolver.hpp
Expand All @@ -37,18 +31,18 @@ set(ReSolve_HEADER_INSTALL
RefactorizationSolver.hpp
SystemSolver.hpp
GramSchmidt.hpp
memoryUtils.hpp
MemoryUtils.hpp
)

set_source_files_properties(${ReSolve_SRC_CUDA} PROPERTIES LANGUAGE CUDA)
# If GPU support is not enabled, add dummy device backend
if(NOT RESOLVE_USE_GPU)
add_subdirectory(cpu)
endif()

# First create CUDA backend (this should really be CUDA _API_ backend, separate backend will be needed for CUDA SDK)
add_library(resolve_backend_cuda SHARED ${ReSolve_SRC_CUDA})
target_link_libraries(resolve_backend_cuda PUBLIC resolve_cuda)
target_include_directories(resolve_backend_cuda INTERFACE
$<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}>
$<INSTALL_INTERFACE:include>
)
if(RESOLVE_USE_CUDA)
add_subdirectory(cuda)
endif()

# Next build vector and matrix objects that may use this backend.
add_subdirectory(vector)
Expand All @@ -57,9 +51,33 @@ add_subdirectory(matrix)

# Build shared library ReSolve
add_library(resolve_tpl INTERFACE)
target_link_libraries(resolve_tpl INTERFACE resolve_cuda KLU)

install(TARGETS resolve_matrix resolve_vector resolve_backend_cuda resolve_logger resolve_tpl EXPORT ReSolveTargets)
if(RESOLVE_USE_KLU)
target_link_libraries(resolve_tpl INTERFACE KLU)
endif(RESOLVE_USE_KLU)

if(RESOLVE_USE_CUDA)
target_link_libraries(resolve_tpl INTERFACE resolve_cuda)
endif(RESOLVE_USE_CUDA)


set(ReSolve_Targets_List
resolve_matrix
resolve_vector
resolve_logger
resolve_tpl
)

if(RESOLVE_USE_GPU)
if(RESOLVE_USE_CUDA)
set(ReSolve_Targets_List ${ReSolve_Targets_List} resolve_backend_cuda)
endif()
else(RESOLVE_USE_GPU)
set(ReSolve_Targets_List ${ReSolve_Targets_List} resolve_backend_cpu)
endif(RESOLVE_USE_GPU)


install(TARGETS ${ReSolve_Targets_List} EXPORT ReSolveTargets)

add_library(ReSolve SHARED ${ReSolve_SRC})

Expand All @@ -69,7 +87,7 @@ target_include_directories(ReSolve INTERFACE
)

# TODO: Make this PRIVATE dependency (requires refactoring ReSolve code)
target_link_libraries(ReSolve PUBLIC resolve_matrix resolve_vector resolve_backend_cuda resolve_logger resolve_tpl)
target_link_libraries(ReSolve PUBLIC ${ReSolve_Targets_List})

install(TARGETS ReSolve
EXPORT ReSolveTargets
Expand Down
5 changes: 2 additions & 3 deletions resolve/LinAlgWorkspace.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#include <resolve/memoryUtils.hpp>
#include "LinAlgWorkspace.hpp"

namespace ReSolve
Expand All @@ -24,8 +23,8 @@ namespace ReSolve

LinAlgWorkspaceCUDA::~LinAlgWorkspaceCUDA()
{
if (buffer_spmv_ != nullptr) deleteOnDevice(buffer_spmv_);
if (buffer_1norm_ != nullptr) deleteOnDevice(buffer_1norm_);
if (buffer_spmv_ != nullptr) mem_.deleteOnDevice(buffer_spmv_);
if (buffer_1norm_ != nullptr) mem_.deleteOnDevice(buffer_1norm_);
cusparseDestroy(handle_cusparse_);
cusolverSpDestroy(handle_cusolversp_);
cublasDestroy(handle_cublas_);
Expand Down
5 changes: 4 additions & 1 deletion resolve/LinAlgWorkspace.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,17 @@
#include "cusparse.h"
#include "cusolverSp.h"

#include <resolve/MemoryUtils.hpp>

namespace ReSolve
{
class LinAlgWorkspace
{
public:
LinAlgWorkspace();
~LinAlgWorkspace();
private:
protected:
MemoryHandler mem_;
};


Expand Down
6 changes: 3 additions & 3 deletions resolve/LinSolverDirectCuSolverGLU.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#include <cstring> // includes memcpy
#include <vector>
#include <resolve/memoryUtils.hpp>

#include <resolve/vector/Vector.hpp>
#include <resolve/matrix/Csr.hpp>
#include "LinSolverDirectCuSolverGLU.hpp"
Expand All @@ -14,7 +14,7 @@ namespace ReSolve

LinSolverDirectCuSolverGLU::~LinSolverDirectCuSolverGLU()
{
deleteOnDevice(glu_buffer_);
mem_.deleteOnDevice(glu_buffer_);
cusparseDestroyMatDescr(descr_M_);
cusparseDestroyMatDescr(descr_A_);
cusolverSpDestroyGluInfo(info_M_);
Expand Down Expand Up @@ -64,7 +64,7 @@ namespace ReSolve
status_cusolver_ = cusolverSpDgluBufferSize(handle_cusolversp_, info_M_, &buffer_size);
error_sum += status_cusolver_;

allocateBufferOnDevice(&glu_buffer_, buffer_size);
mem_.allocateBufferOnDevice(&glu_buffer_, buffer_size);

status_cusolver_ = cusolverSpDgluAnalysis(handle_cusolversp_, info_M_, glu_buffer_);
error_sum += status_cusolver_;
Expand Down
3 changes: 3 additions & 0 deletions resolve/LinSolverDirectCuSolverGLU.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include <resolve/LinAlgWorkspace.hpp>
#include "LinSolver.hpp"
#include "cusolver_defs.hpp"
#include <resolve/MemoryUtils.hpp>

namespace ReSolve
{
Expand Down Expand Up @@ -45,5 +46,7 @@ namespace ReSolve
void* glu_buffer_;
double r_nrminf_;
int ite_refine_succ_;

MemoryHandler mem_; ///< Device memory manager object
};
}
21 changes: 10 additions & 11 deletions resolve/LinSolverDirectCuSolverRf.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#include <resolve/memoryUtils.hpp>
#include <resolve/vector/Vector.hpp>
#include <resolve/matrix/Csr.hpp>
#include "LinSolverDirectCuSolverRf.hpp"
Expand All @@ -13,9 +12,9 @@ namespace ReSolve
LinSolverDirectCuSolverRf::~LinSolverDirectCuSolverRf()
{
cusolverRfDestroy(handle_cusolverrf_);
deleteOnDevice(d_P_);
deleteOnDevice(d_Q_);
deleteOnDevice(d_T_);
mem_.deleteOnDevice(d_P_);
mem_.deleteOnDevice(d_Q_);
mem_.deleteOnDevice(d_T_);
}

int LinSolverDirectCuSolverRf::setup(matrix::Sparse* A, matrix::Sparse* L, matrix::Sparse* U, index_type* P, index_type* Q)
Expand All @@ -24,12 +23,12 @@ namespace ReSolve
int error_sum = 0;
this->A_ = (matrix::Csr*) A;
index_type n = A_->getNumRows();
allocateArrayOnDevice(&d_P_, n);
allocateArrayOnDevice(&d_Q_, n);
allocateArrayOnDevice(&d_T_, n);
mem_.allocateArrayOnDevice(&d_P_, n);
mem_.allocateArrayOnDevice(&d_Q_, n);
mem_.allocateArrayOnDevice(&d_T_, n);

copyArrayHostToDevice(d_P_, P, n);
copyArrayHostToDevice(d_Q_, Q, n);
mem_.copyArrayHostToDevice(d_P_, P, n);
mem_.copyArrayHostToDevice(d_Q_, Q, n);


status_cusolverrf_ = cusolverRfSetResetValuesFastMode(handle_cusolverrf_, CUSOLVERRF_RESET_VALUES_FAST_MODE_ON);
Expand All @@ -52,7 +51,7 @@ namespace ReSolve
handle_cusolverrf_);
error_sum += status_cusolverrf_;

deviceSynchronize();
mem_.deviceSynchronize();
status_cusolverrf_ = cusolverRfAnalyze(handle_cusolverrf_);
error_sum += status_cusolverrf_;

Expand Down Expand Up @@ -85,7 +84,7 @@ namespace ReSolve
handle_cusolverrf_);
error_sum += status_cusolverrf_;

deviceSynchronize();
mem_.deviceSynchronize();
status_cusolverrf_ = cusolverRfRefactor(handle_cusolverrf_);
error_sum += status_cusolverrf_;

Expand Down
3 changes: 3 additions & 0 deletions resolve/LinSolverDirectCuSolverRf.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include "Common.hpp"
#include "LinSolver.hpp"
#include "cusolverRf.h"
#include <resolve/MemoryUtils.hpp>

namespace ReSolve
{
Expand Down Expand Up @@ -40,5 +41,7 @@ namespace ReSolve
index_type* d_P_;
index_type* d_Q_;
real_type* d_T_;

MemoryHandler mem_; ///< Device memory manager object
};
}
3 changes: 1 addition & 2 deletions resolve/LinSolverIterativeFGMRES.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
#include <cmath>

#include <resolve/utilities/logger/Logger.hpp>
#include <resolve/memoryUtils.hpp>
#include <resolve/matrix/MatrixHandler.hpp>
#include "LinSolverIterativeFGMRES.hpp"

Expand Down Expand Up @@ -170,7 +169,7 @@ namespace ReSolve
vec_v->setData( d_V_->getVectorData(i, "cuda"), "cuda");
vec_z->setData( d_Z_->getVectorData(i, "cuda"), "cuda");
this->precV(vec_v, vec_z);
deviceSynchronize();
mem_.deviceSynchronize();

// V_{i+1}=A*Z_i

Expand Down
2 changes: 2 additions & 0 deletions resolve/LinSolverIterativeFGMRES.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,5 +70,7 @@ namespace ReSolve
real_type final_residual_norm_;
real_type initial_residual_norm_;
index_type fgmres_iters_;

MemoryHandler mem_; ///< Device memory manager object
};
}
Loading

0 comments on commit 93f5ac1

Please sign in to comment.