diff --git a/.gitlab/pnnl/incline.gitlab-ci.yml b/.gitlab/pnnl/incline.gitlab-ci.yml index 67614de8f..54521755b 100644 --- a/.gitlab/pnnl/incline.gitlab-ci.yml +++ b/.gitlab/pnnl/incline.gitlab-ci.yml @@ -45,4 +45,4 @@ failure: - .report-status rules: - when: on_failure - \ No newline at end of file + diff --git a/CMakeLists.txt b/CMakeLists.txt index 10c66f1fe..559b41b52 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,7 +13,7 @@ project(ReSolve VERSION "0.1.0") set(CMAKE_CXX_STANDARD 11) -set(PACKAGE_NAME "ReSolve") +set(PACKAGE_NAME "ReSolve") set(PACKAGE_TARNAME "resolve") # Prohibit in-source build @@ -21,23 +21,34 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}") message(FATAL_ERROR "In-source build prohibited.") endif() -option(RESOLVE_TEST_WITH_BSUB "Use `jsrun` instead of `mpirun` commands when running tests" OFF) -option(RESOLVE_USE_KLU "Use KLU, AMD and COLAMD libraries from SuiteSparse" ON) +option(RESOLVE_TEST_WITH_BSUB + "Use `jsrun` instead of `mpirun` commands when running tests" OFF +) +option(RESOLVE_USE_KLU "Use KLU, AMD and COLAMD libraries from SuiteSparse" ON) option(RESOLVE_USE_CUDA "Use CUDA language and SDK" OFF) -option(RESOLVE_USE_HIP "Use HIP language and ROCm library" OFF) +option(RESOLVE_USE_HIP "Use HIP language and ROCm library" OFF) -option(RESOLVE_USE_GPU "Use GPU device for computations" OFF) +option(RESOLVE_USE_GPU "Use GPU device for computations" OFF) mark_as_advanced(FORCE RESOLVE_USE_GPU) option(RESOLVE_USE_DOXYGEN "Use Doxygen to generate Re::Solve documentation" ON) -set(RESOLVE_CTEST_OUTPUT_DIR ${PROJECT_BINARY_DIR} CACHE PATH "Directory where CTest outputs are saved") +set(RESOLVE_CTEST_OUTPUT_DIR + ${PROJECT_BINARY_DIR} + CACHE PATH "Directory where CTest outputs are saved" +) if(RESOLVE_USE_CUDA) - set(RESOLVE_USE_GPU ON CACHE BOOL "Using CUDA GPU!" FORCE) + set(RESOLVE_USE_GPU + ON + CACHE BOOL "Using CUDA GPU!" FORCE + ) endif() if(RESOLVE_USE_HIP) - set(RESOLVE_USE_GPU ON CACHE BOOL "Using HIP GPU!" FORCE) + set(RESOLVE_USE_GPU + ON + CACHE BOOL "Using HIP GPU!" FORCE + ) endif() # MacOS specific things @@ -51,53 +62,56 @@ set(CMAKE_INSTALL_RPATH ${CMAKE_INSTALL_PREFIX}/lib) # Add CMake sources from `cmake` dir list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) -# Including clang-format cmake files to do automatic checking of formating -# TODO: Set up clang-format -#include(./cmake/clang-format) +# Including clang-format cmake files to do automatic checking of formating TODO: +# Set up clang-format include(./cmake/clang-format) -if (RESOLVE_USE_DOXYGEN) +if(RESOLVE_USE_DOXYGEN) find_package(Doxygen) endif() -if ( DOXYGEN_FOUND ) - set( DOXYGEN_OUTPUT_DIRECTORY ./sphinx/_build/doxygen ) - set( DOXYGEN_COLLABORATION_GRAPH YES ) - set( DOXYGEN_EXTRACT_ALL YES ) - set( DOXYGEN_CLASS_DIAGRAMS YES ) - set( DOXYGEN_HIDE_UNDOC_RELATIONS NO ) - set( DOXYGEN_HAVE_DOT YES ) - set( DOXYGEN_CLASS_GRAPH YES ) - set( DOXYGEN_CALL_GRAPH YES ) - set( DOXYGEN_CALLER_GRAPH YES ) - set( DOXYGEN_COLLABORATION_GRAPH YES ) - set( DOXYGEN_BUILTIN_STL_SUPPORT YES ) - set( DOXYGEN_EXTRACT_PRIVATE YES ) - set( DOXYGEN_EXTRACT_PACKAGE YES ) - set( DOXYGEN_EXTRACT_STATIC YES ) - set( DOXYGEN_EXTRACT_LOCALMETHODS YES ) - set( DOXYGEN_UML_LOOK YES ) - set( DOXYGEN_UML_LIMIT_NUM_FIELDS 50 ) - set( DOXYGEN_TEMPLATE_RELATIONS YES ) - set( DOXYGEN_DOT_GRAPH_MAX_NODES 100 ) - set( DOXYGEN_MAX_DOT_GRAPH_DEPTH 0 ) - set( DOXYGEN_DOT_TRANSPARENT YES ) +if(DOXYGEN_FOUND) + set(DOXYGEN_OUTPUT_DIRECTORY ./sphinx/_build/doxygen) + set(DOXYGEN_COLLABORATION_GRAPH YES) + set(DOXYGEN_EXTRACT_ALL YES) + set(DOXYGEN_CLASS_DIAGRAMS YES) + set(DOXYGEN_HIDE_UNDOC_RELATIONS NO) + set(DOXYGEN_HAVE_DOT YES) + set(DOXYGEN_CLASS_GRAPH YES) + set(DOXYGEN_CALL_GRAPH YES) + set(DOXYGEN_CALLER_GRAPH YES) + set(DOXYGEN_COLLABORATION_GRAPH YES) + set(DOXYGEN_BUILTIN_STL_SUPPORT YES) + set(DOXYGEN_EXTRACT_PRIVATE YES) + set(DOXYGEN_EXTRACT_PACKAGE YES) + set(DOXYGEN_EXTRACT_STATIC YES) + set(DOXYGEN_EXTRACT_LOCALMETHODS YES) + set(DOXYGEN_UML_LOOK YES) + set(DOXYGEN_UML_LIMIT_NUM_FIELDS 50) + set(DOXYGEN_TEMPLATE_RELATIONS YES) + set(DOXYGEN_DOT_GRAPH_MAX_NODES 100) + set(DOXYGEN_MAX_DOT_GRAPH_DEPTH 0) + set(DOXYGEN_DOT_TRANSPARENT YES) set(DOXYGEN_DISABLE_INDEX NO) set(DOXYGEN_FULL_SIDEBAR NO) set(DOXYGEN_GENERATE_TREEVIEW YES) - set(DOXYGEN_HTML_EXTRA_STYLESHEET "./docs/doxygen/doxygen-awesome-css/doxygen-awesome.css") + set(DOXYGEN_HTML_EXTRA_STYLESHEET + "./docs/doxygen/doxygen-awesome-css/doxygen-awesome.css" + ) set(DOXYGEN_HTML_COLORSTYLE LIGHT) - doxygen_add_docs( doxygen ${RPP_PROJECT_SOURCE_DIR} ) + doxygen_add_docs(doxygen ${RPP_PROJECT_SOURCE_DIR}) else() - message( "Doxygen need to be installed to generate the doxygen documentation" ) + message("Doxygen need to be installed to generate the doxygen documentation") endif() - -if (RESOLVE_USE_KLU) +if(RESOLVE_USE_KLU) include(FindKLU) if(NOT KLU_LIBRARY) message(STATUS "Cannot find KLU, disabling SuiteSparse module ...") - set(RESOLVE_USE_KLU OFF CACHE BOOL "Build without SuiteSparse AMD module." FORCE) + set(RESOLVE_USE_KLU + OFF + CACHE BOOL "Build without SuiteSparse AMD module." FORCE + ) endif() else() message(STATUS "Not using SuiteSparse KLU") @@ -116,7 +130,10 @@ if(RESOLVE_USE_CUDA) endif() if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) - set(CMAKE_CUDA_ARCHITECTURES 60 CACHE STRING "Selects CUDA architectures") + set(CMAKE_CUDA_ARCHITECTURES + 60 + CACHE STRING "Selects CUDA architectures" + ) endif() set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda") @@ -134,16 +151,14 @@ else() message(STATUS "Not using HIP") endif(RESOLVE_USE_HIP) - # The binary dir is already a global include directory configure_file( ${CMAKE_SOURCE_DIR}/resolve/resolve_defs.hpp.in - ${CMAKE_BINARY_DIR}/resolve/resolve_defs.hpp) -install( - FILES ${CMAKE_BINARY_DIR}/resolve/resolve_defs.hpp - DESTINATION include/resolve - ) - + ${CMAKE_BINARY_DIR}/resolve/resolve_defs.hpp +) +install(FILES ${CMAKE_BINARY_DIR}/resolve/resolve_defs.hpp + DESTINATION include/resolve +) # Enable testing enable_testing() @@ -155,31 +170,38 @@ add_subdirectory(resolve) include(CMakePackageConfigHelpers) # Creates a version file for the package -write_basic_package_version_file(ReSolveConfigVersion.cmake - VERSION ${CMAKE_PROJECT_VERSION} - COMPATIBILITY AnyNewerVersion) +write_basic_package_version_file( + ReSolveConfigVersion.cmake + VERSION ${CMAKE_PROJECT_VERSION} + COMPATIBILITY AnyNewerVersion +) # Generate install rules for targets -install(EXPORT ReSolveTargets - FILE ReSolveTargets.cmake - NAMESPACE ReSolve:: - DESTINATION share/resolve/cmake +install( + EXPORT ReSolveTargets + FILE ReSolveTargets.cmake + NAMESPACE ReSolve:: + DESTINATION share/resolve/cmake ) - # Creates a config file -configure_package_config_file(./cmake/ReSolveConfig.cmake.in - ${CMAKE_CURRENT_BINARY_DIR}/ReSolveConfig.cmake - INSTALL_DESTINATION share/resolve/cmake) +configure_package_config_file( + ./cmake/ReSolveConfig.cmake.in ${CMAKE_CURRENT_BINARY_DIR}/ReSolveConfig.cmake + INSTALL_DESTINATION share/resolve/cmake +) # Generates install rules for cmake config files install(FILES "${CMAKE_CURRENT_BINARY_DIR}/ReSolveConfig.cmake" - "${CMAKE_CURRENT_BINARY_DIR}/ReSolveConfigVersion.cmake" - DESTINATION share/resolve/cmake) - + "${CMAKE_CURRENT_BINARY_DIR}/ReSolveConfigVersion.cmake" + DESTINATION share/resolve/cmake +) + # Add usage examples add_subdirectory(examples) # Add tests -set(RESOLVE_CTEST_OUTPUT_DIR ${PROJECT_BINARY_DIR} CACHE PATH "Directory where CTest outputs are saved") +set(RESOLVE_CTEST_OUTPUT_DIR + ${PROJECT_BINARY_DIR} + CACHE PATH "Directory where CTest outputs are saved" +) add_subdirectory(tests) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index b836299b1..6449b0a1c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -185,5 +185,3 @@ class Matrix // No, class is outside ReSolve namespace { // matrix code }; - - diff --git a/LICENSE b/LICENSE index 4514082c8..03430d780 100644 --- a/LICENSE +++ b/LICENSE @@ -54,4 +54,4 @@ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -****************************************************************************** \ No newline at end of file +****************************************************************************** diff --git a/README.md b/README.md index 93acdc6f2..e7ac245cb 100644 --- a/README.md +++ b/README.md @@ -111,4 +111,3 @@ contributions to ReSolve must be made under the smae licensing terms. **Please Note** If you are using ReSolve with any third party libraries linked in (e.g., KLU), be sure to review the respective license of the package as that license may have more restrictive terms than the ReSolve license. - diff --git a/buildsystem/ascent-env.sh b/buildsystem/ascent-env.sh index ded2f1d74..8984a5b52 100644 --- a/buildsystem/ascent-env.sh +++ b/buildsystem/ascent-env.sh @@ -2,4 +2,3 @@ module load gcc/10.2 # Load spack deps source ./buildsystem/spack/ascent/modules/dependencies.sh - diff --git a/buildsystem/incline-env.sh b/buildsystem/incline-env.sh index 348139ff9..3b4b9cbb2 100644 --- a/buildsystem/incline-env.sh +++ b/buildsystem/incline-env.sh @@ -12,4 +12,3 @@ unset GPU_DEVICE_ORDINAL # Load spack generated modules source ./buildsystem/spack/incline/modules/dependencies.sh - diff --git a/buildsystem/init-mirror.sh b/buildsystem/init-mirror.sh index 3a175d02a..cf8d563d9 100755 --- a/buildsystem/init-mirror.sh +++ b/buildsystem/init-mirror.sh @@ -15,4 +15,3 @@ res=$? chmod -R ugo+wrx $SPACK_MIRROR & exit $res - diff --git a/buildsystem/spack/ascent/env.sh b/buildsystem/spack/ascent/env.sh index 6b6d97c5e..42c0851b8 100644 --- a/buildsystem/spack/ascent/env.sh +++ b/buildsystem/spack/ascent/env.sh @@ -21,4 +21,3 @@ export SPACK_PYTHON=$(which python) export tempdir=$SPACK_CACHE export TMP=$SPACK_CACHE export TMPDIR=$SPACK_CACHE - diff --git a/buildsystem/spack/ascent/spack.yaml b/buildsystem/spack/ascent/spack.yaml index 912e0e968..38ffabcf3 100644 --- a/buildsystem/spack/ascent/spack.yaml +++ b/buildsystem/spack/ascent/spack.yaml @@ -65,4 +65,3 @@ spack: modules: [gcc/10.2] operating_system: rhel8 target: ppc64le - diff --git a/buildsystem/spack/deception/spack.yaml b/buildsystem/spack/deception/spack.yaml index bd885b575..cb675f399 100644 --- a/buildsystem/spack/deception/spack.yaml +++ b/buildsystem/spack/deception/spack.yaml @@ -57,4 +57,4 @@ spack: fc: /share/apps/gcc/9.1.0/bin/gfortran operating_system: centos7 target: x86_64 - modules: [gcc/9.1.0] \ No newline at end of file + modules: [gcc/9.1.0] diff --git a/buildsystem/spack/incline/env.sh b/buildsystem/spack/incline/env.sh index 035ef6cc3..1447691d8 100644 --- a/buildsystem/spack/incline/env.sh +++ b/buildsystem/spack/incline/env.sh @@ -24,4 +24,3 @@ export SPACK_PYTHON=$(which python3) export tempdir=$SPACK_CACHE export TMP=$SPACK_CACHE export TMPDIR=$SPACK_CACHE - diff --git a/cmake/FindKLU.cmake b/cmake/FindKLU.cmake index 5e9af6fb3..cae61904c 100644 --- a/cmake/FindKLU.cmake +++ b/cmake/FindKLU.cmake @@ -1,4 +1,3 @@ - #[[ Looks for `klu` library and header directory. @@ -12,29 +11,29 @@ Users may set the following variables: ]] - -find_library(KLU_LIBRARY - NAMES - klu - PATHS - ${KLU_DIR} $ENV{KLU_DIR} ${HIOP_KLU_DIR} - ENV LD_LIBRARY_PATH ENV DYLD_LIBRARY_PATH - PATH_SUFFIXES - lib64 lib) +find_library( + KLU_LIBRARY + NAMES klu + PATHS ${KLU_DIR} + $ENV{KLU_DIR} + ${HIOP_KLU_DIR} + ENV + LD_LIBRARY_PATH + ENV + DYLD_LIBRARY_PATH + PATH_SUFFIXES lib64 lib +) if(KLU_LIBRARY) get_filename_component(KLU_LIBRARY_DIR ${KLU_LIBRARY} DIRECTORY) endif() -find_path(KLU_INCLUDE_DIR - NAMES - klu.h - PATHS - ${KLU_DIR} $ENV{KLU_DIR} ${HIOP_KLU_DIR} ${KLU_LIBRARY_DIR}/.. - PATH_SUFFIXES - include - include/suitesparse - include/klu) +find_path( + KLU_INCLUDE_DIR + NAMES klu.h + PATHS ${KLU_DIR} $ENV{KLU_DIR} ${HIOP_KLU_DIR} ${KLU_LIBRARY_DIR}/.. + PATH_SUFFIXES include include/suitesparse include/klu +) if(KLU_LIBRARY) message(STATUS "Found klu include: ${KLU_INCLUDE_DIR}") @@ -51,4 +50,3 @@ endif() set(KLU_INCLUDE_DIR CACHE PATH "Path to klu.h") set(KLU_LIBRARY CACHE PATH "Path to klu library") - diff --git a/cmake/ReSolveFindCudaLibraries.cmake b/cmake/ReSolveFindCudaLibraries.cmake index 4a789d168..06dd74e23 100644 --- a/cmake/ReSolveFindCudaLibraries.cmake +++ b/cmake/ReSolveFindCudaLibraries.cmake @@ -1,15 +1,13 @@ -# Exports target `resolve_cuda` which finds all cuda libraries needed by resolve. - +# Exports target `resolve_cuda` which finds all cuda libraries needed by +# resolve. add_library(resolve_cuda INTERFACE) find_package(CUDAToolkit REQUIRED) -target_link_libraries(resolve_cuda INTERFACE - CUDA::cusolver - CUDA::cublas - CUDA::cusparse - CUDA::cudart - ) +target_link_libraries( + resolve_cuda INTERFACE CUDA::cusolver CUDA::cublas CUDA::cusparse + CUDA::cudart +) install(TARGETS resolve_cuda EXPORT ReSolveTargets) diff --git a/cmake/ReSolveFindHipLibraries.cmake b/cmake/ReSolveFindHipLibraries.cmake index b23d80219..6808f9434 100644 --- a/cmake/ReSolveFindHipLibraries.cmake +++ b/cmake/ReSolveFindHipLibraries.cmake @@ -1,6 +1,5 @@ # Exports target `resolve_hip` which finds all hip libraries needed by resolve. - add_library(resolve_hip INTERFACE) find_package(hip REQUIRED) @@ -8,18 +7,15 @@ find_package(rocblas REQUIRED) find_package(rocsparse REQUIRED) find_package(rocsolver REQUIRED) -target_link_libraries(resolve_hip INTERFACE - hip::host - hip::device - roc::rocblas - roc::rocsparse - roc::rocsolver +target_link_libraries( + resolve_hip INTERFACE hip::host hip::device roc::rocblas roc::rocsparse + roc::rocsolver ) get_target_property(hip_includes hip::device INTERFACE_INCLUDE_DIRECTORIES) -target_include_directories(resolve_hip INTERFACE - $) +target_include_directories( + resolve_hip INTERFACE $ +) install(TARGETS resolve_hip EXPORT ReSolveTargets) - diff --git a/docs/doxygen/Doxyfile.in b/docs/doxygen/Doxyfile.in index 470dcf901..3c2aad74e 100644 --- a/docs/doxygen/Doxyfile.in +++ b/docs/doxygen/Doxyfile.in @@ -2845,4 +2845,4 @@ GENERATE_TREEVIEW = YES # required! DISABLE_INDEX = NO FULL_SIDEBAR = NO HTML_EXTRA_STYLESHEET = doxygen/doxygen-awesome.css -HTML_COLORSTYLE = LIGHT # required with Doxygen >= 1.9.5 \ No newline at end of file +HTML_COLORSTYLE = LIGHT # required with Doxygen >= 1.9.5 diff --git a/docs/doxygen/doxygen-awesome.css b/docs/doxygen/doxygen-awesome.css index 1d7e53f3d..08238977a 100644 --- a/docs/doxygen/doxygen-awesome.css +++ b/docs/doxygen/doxygen-awesome.css @@ -2527,4 +2527,4 @@ h2:hover a.anchorlink, h1:hover a.anchorlink, h3:hover a.anchorlink, h4:hover a. height: 3px; border-radius: var(--border-radius-small) var(--border-radius-small) 0 0; background-color: var(--primary-color); -} \ No newline at end of file +} diff --git a/docs/sphinx/_build/_static/theme_overrides.css b/docs/sphinx/_build/_static/theme_overrides.css index c9feb6f0a..d68b9dfcc 100644 --- a/docs/sphinx/_build/_static/theme_overrides.css +++ b/docs/sphinx/_build/_static/theme_overrides.css @@ -19,4 +19,4 @@ /* Sidebar header (and topbar for mobile) */ .wy-side-nav-search, .wy-nav-top { background: #343131; - } \ No newline at end of file + } diff --git a/docs/sphinx/coding_guide/index.md b/docs/sphinx/coding_guide/index.md index b836299b1..6449b0a1c 100644 --- a/docs/sphinx/coding_guide/index.md +++ b/docs/sphinx/coding_guide/index.md @@ -185,5 +185,3 @@ class Matrix // No, class is outside ReSolve namespace { // matrix code }; - - diff --git a/docs/sphinx/licenses.md b/docs/sphinx/licenses.md index 4514082c8..03430d780 100644 --- a/docs/sphinx/licenses.md +++ b/docs/sphinx/licenses.md @@ -54,4 +54,4 @@ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -****************************************************************************** \ No newline at end of file +****************************************************************************** diff --git a/docs/sphinx/licenses.rst b/docs/sphinx/licenses.rst index 699a9530e..9f3a39849 100644 --- a/docs/sphinx/licenses.rst +++ b/docs/sphinx/licenses.rst @@ -58,5 +58,3 @@ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 5fb26a284..9b5058169 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -16,7 +16,7 @@ target_link_libraries(klu_klu_standalone.exe PRIVATE ReSolve) # Create CUDA examples if(RESOLVE_USE_CUDA) - + # Build example with KLU factorization and GLU refactorization add_executable(klu_glu.exe r_KLU_GLU.cpp) target_link_libraries(klu_glu.exe PRIVATE ReSolve) @@ -25,15 +25,20 @@ if(RESOLVE_USE_CUDA) add_executable(klu_rf.exe r_KLU_rf.cpp) target_link_libraries(klu_rf.exe PRIVATE ReSolve) - # Build example with KLU factorization, Rf refactorization, and FGMRES iterative refinement + # Build example with KLU factorization, Rf refactorization, and FGMRES + # iterative refinement add_executable(klu_rf_fgmres.exe r_KLU_rf_FGMRES.cpp) target_link_libraries(klu_rf_fgmres.exe PRIVATE ReSolve) - # Build example where matrix is factorized once, refactorized once and then the preconditioner is REUSED - add_executable(klu_rf_fgmres_reuse_refactorization.exe r_KLU_rf_FGMRES_reuse_factorization.cpp) + # Build example where matrix is factorized once, refactorized once and then + # the preconditioner is REUSED + add_executable( + klu_rf_fgmres_reuse_refactorization.exe + r_KLU_rf_FGMRES_reuse_factorization.cpp + ) target_link_libraries(klu_rf_fgmres_reuse_refactorization.exe PRIVATE ReSolve) - # Build example where matrix data is updated + # Build example where matrix data is updated add_executable(klu_glu_values_update.exe r_KLU_GLU_matrix_values_update.cpp) target_link_libraries(klu_glu_values_update.exe PRIVATE ReSolve) @@ -45,12 +50,15 @@ if(RESOLVE_USE_HIP) add_executable(klu_rocsolverrf.exe r_KLU_rocsolverrf.cpp) target_link_libraries(klu_rocsolverrf.exe PRIVATE ReSolve) - # Build example with KLU factorization, rocsolver Rf refactorization, and FGMRES iterative refinement + # Build example with KLU factorization, rocsolver Rf refactorization, and + # FGMRES iterative refinement add_executable(klu_rocsolverrf_fgmres.exe r_KLU_rocSolverRf_FGMRES.cpp) target_link_libraries(klu_rocsolverrf_fgmres.exe PRIVATE ReSolve) - + # Example in which factorization is redone if solution is bad - add_executable(klu_rocsolverrf_check_redo.exe r_KLU_rocsolverrf_redo_factorization.cpp) + add_executable( + klu_rocsolverrf_check_redo.exe r_KLU_rocsolverrf_redo_factorization.cpp + ) target_link_libraries(klu_rocsolverrf_check_redo.exe PRIVATE ReSolve) endif(RESOLVE_USE_HIP) @@ -59,15 +67,19 @@ endif(RESOLVE_USE_HIP) set(installable_executables klu_klu.exe klu_klu_standalone.exe) if(RESOLVE_USE_CUDA) - set(installable_executables ${installable_executables} klu_glu.exe klu_rf.exe klu_rf_fgmres.exe klu_glu_values_update.exe) + set(installable_executables ${installable_executables} klu_glu.exe klu_rf.exe + klu_rf_fgmres.exe klu_glu_values_update.exe + ) endif(RESOLVE_USE_CUDA) if(RESOLVE_USE_HIP) - set(installable_executables ${installable_executables} klu_rocsolverrf.exe klu_rocsolverrf_fgmres.exe klu_rocsolverrf_check_redo.exe) + set(installable_executables + ${installable_executables} klu_rocsolverrf.exe klu_rocsolverrf_fgmres.exe + klu_rocsolverrf_check_redo.exe + ) endif(RESOLVE_USE_HIP) -install(TARGETS ${installable_executables} - RUNTIME DESTINATION bin) +install(TARGETS ${installable_executables} RUNTIME DESTINATION bin) # Path where the consumer test code will be installed set(CONSUMER_PATH ${CMAKE_INSTALL_PREFIX}/share/examples) @@ -75,8 +87,8 @@ set(CONSUMER_PATH ${CMAKE_INSTALL_PREFIX}/share/examples) # Make the resolve consumer test script exectuable install(PROGRAMS test.sh DESTINATION ${CONSUMER_PATH}) -# Select consumer app -# TODO - have an outer loop that adds a unique consumer test for each backend supproted +# Select consumer app TODO - have an outer loop that adds a unique consumer test +# for each backend supproted if(RESOLVE_USE_CUDA) set(RESOLVE_CONSUMER_APP "testKLU_Rf_FGMRES.cpp") elseif(RESOLVE_USE_HIP) @@ -87,9 +99,15 @@ endif() # Install directory with example on how to consume ReSolve install(DIRECTORY resolve_consumer DESTINATION share/examples) -install(FILES ${PROJECT_SOURCE_DIR}/tests/functionality/${RESOLVE_CONSUMER_APP} DESTINATION share/examples/resolve_consumer RENAME consumer.cpp) - -# Shell script argumets: -# 1. Path to where resolve is installed. -# 2. Path to data directory -add_custom_target(test_install COMMAND ${CONSUMER_PATH}/test.sh ${CMAKE_INSTALL_PREFIX} ${PROJECT_SOURCE_DIR}/tests/functionality/) +install( + FILES ${PROJECT_SOURCE_DIR}/tests/functionality/${RESOLVE_CONSUMER_APP} + DESTINATION share/examples/resolve_consumer + RENAME consumer.cpp +) + +# Shell script argumets: 1. Path to where resolve is installed. 2. Path to data +# directory +add_custom_target( + test_install COMMAND ${CONSUMER_PATH}/test.sh ${CMAKE_INSTALL_PREFIX} + ${PROJECT_SOURCE_DIR}/tests/functionality/ +) diff --git a/examples/r_KLU_GLU.cpp b/examples/r_KLU_GLU.cpp index 9f2712543..852cdeb85 100644 --- a/examples/r_KLU_GLU.cpp +++ b/examples/r_KLU_GLU.cpp @@ -1,15 +1,15 @@ -#include -#include #include +#include +#include +#include +#include #include #include -#include -#include #include +#include +#include #include -#include -#include #include using namespace ReSolve::constants; @@ -18,41 +18,40 @@ int main(int argc, char *argv[]) { // Use the same data types as those you specified in ReSolve build. using index_type = ReSolve::index_type; - using real_type = ReSolve::real_type; + using real_type = ReSolve::real_type; using vector_type = ReSolve::vector::Vector; using matrix_type = ReSolve::matrix::Sparse; - (void) argc; // TODO: Check if the number of input parameters is correct. - std::string matrixFileName = argv[1]; - std::string rhsFileName = argv[2]; + (void)argc; // TODO: Check if the number of input parameters is correct. + std::string matrixFileName = argv[1]; + std::string rhsFileName = argv[2]; index_type numSystems = atoi(argv[3]); - std::cout<<"Family mtx file name: "<< matrixFileName << ", total number of matrices: "<initializeHandles(); - ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace_CUDA); - ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace_CUDA); - real_type* rhs = nullptr; - real_type* x = nullptr; + ReSolve::MatrixHandler *matrix_handler = new ReSolve::MatrixHandler(workspace_CUDA); + ReSolve::VectorHandler *vector_handler = new ReSolve::VectorHandler(workspace_CUDA); + real_type *rhs = nullptr; + real_type *x = nullptr; - vector_type* vec_rhs; - vector_type* vec_x; - vector_type* vec_r; + vector_type *vec_rhs; + vector_type *vec_x; + vector_type *vec_r; - ReSolve::LinSolverDirectKLU* KLU = new ReSolve::LinSolverDirectKLU; - ReSolve::LinSolverDirectCuSolverGLU* GLU = new ReSolve::LinSolverDirectCuSolverGLU(workspace_CUDA); + ReSolve::LinSolverDirectKLU *KLU = new ReSolve::LinSolverDirectKLU; + ReSolve::LinSolverDirectCuSolverGLU *GLU = new ReSolve::LinSolverDirectCuSolverGLU(workspace_CUDA); - for (int i = 0; i < numSystems; ++i) - { + for (int i = 0; i < numSystems; ++i) { index_type j = 4 + i * 2; fileId = argv[j]; rhsId = argv[j + 1]; @@ -64,57 +63,54 @@ int main(int argc, char *argv[]) matrixFileNameFull = matrixFileName + fileId + ".mtx"; rhsFileNameFull = rhsFileName + rhsId + ".mtx"; std::cout << std::endl << std::endl << std::endl; - std::cout << "========================================================================================================================"<getNumRows(), - A_coo->getNumColumns(), - A_coo->getNnz(), - A_coo->symmetric(), - A_coo->expanded()); + A = new ReSolve::matrix::Csr(A_coo->getNumRows(), A_coo->getNumColumns(), A_coo->getNnz(), A_coo->symmetric(), A_coo->expanded()); rhs = ReSolve::io::readRhsFromFile(rhs_file); x = new real_type[A->getNumRows()]; vec_rhs = new vector_type(A->getNumRows()); vec_x = new vector_type(A->getNumRows()); - vec_x->allocate(ReSolve::memory::HOST);//for KLU + vec_x->allocate(ReSolve::memory::HOST); // for KLU vec_x->allocate(ReSolve::memory::DEVICE); vec_r = new vector_type(A->getNumRows()); } else { ReSolve::io::readAndUpdateMatrix(mat_file, A_coo); ReSolve::io::readAndUpdateRhs(rhs_file, &rhs); } - std::cout<<"Finished reading the matrix and rhs, size: "<getNumRows()<<" x "<getNumColumns()<< ", nnz: "<< A->getNnz()<< ", symmetric? "<symmetric()<< ", Expanded? "<expanded()<getNumRows() << " x " << A->getNumColumns() << ", nnz: " << A->getNnz() + << ", symmetric? " << A->symmetric() << ", Expanded? " << A->expanded() << std::endl; mat_file.close(); rhs_file.close(); - //Now convert to CSR. - if (i < 1) { - matrix_handler->coo2csr(A_coo, A, "cpu"); + // Now convert to CSR. + if (i < 1) { + matrix_handler->coo2csr(A_coo, A, "cpu"); vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); vec_rhs->setDataUpdated(ReSolve::memory::HOST); - } else { + } else { matrix_handler->coo2csr(A_coo, A, "cuda"); vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); } - std::cout<<"COO to CSR completed. Expanded NNZ: "<< A->getNnzExpanded()<getNnzExpanded() << std::endl; + // Now call direct solver if (i == 0) { KLU->setupParameters(1, 0.1, false); } @@ -122,45 +118,44 @@ int main(int argc, char *argv[]) if (i < 1) { KLU->setup(A); status = KLU->analyze(); - std::cout<<"KLU analysis status: "<factorize(); - std::cout<<"KLU factorization status: "<getLFactor(); - matrix_type* U = KLU->getUFactor(); - if (L == nullptr) {printf("ERROR");} - index_type* P = KLU->getPOrdering(); - index_type* Q = KLU->getQOrdering(); - GLU->setup(A, L, U, P, Q); + std::cout << "KLU factorization status: " << status << std::endl; + matrix_type *L = KLU->getLFactor(); + matrix_type *U = KLU->getUFactor(); + if (L == nullptr) { + printf("ERROR"); + } + index_type *P = KLU->getPOrdering(); + index_type *Q = KLU->getQOrdering(); + GLU->setup(A, L, U, P, Q); status = GLU->solve(vec_rhs, vec_x); - std::cout<<"GLU solve status: "<solve(vec_rhs, vec_x); - // std::cout<<"KLU solve status: "<refactorize(); - std::cout<<"Using CUSOLVER GLU"<refactorize(); + std::cout << "Using CUSOLVER GLU" << std::endl; status = GLU->refactorize(); - std::cout<<"CUSOLVER GLU refactorization status: "<solve(vec_rhs, vec_x); - std::cout<<"CUSOLVER GLU solve status: "<update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - matrix_handler->setValuesChanged(true, "cuda"); - matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "cuda"); + matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "cuda"); - std::cout << "\t 2-Norm of the residual: " - << std::scientific << std::setprecision(16) - << sqrt(vector_handler->dot(vec_r, vec_r, "cuda")) << "\n"; + std::cout << "\t 2-Norm of the residual: " << std::scientific << std::setprecision(16) << sqrt(vector_handler->dot(vec_r, vec_r, "cuda")) << "\n"; } // for (int i = 0; i < numSystems; ++i) - //now DELETE + // now DELETE delete A; delete KLU; delete GLU; - delete [] x; - delete [] rhs; + delete[] x; + delete[] rhs; delete vec_r; delete vec_x; delete workspace_CUDA; diff --git a/examples/r_KLU_GLU_matrix_values_update.cpp b/examples/r_KLU_GLU_matrix_values_update.cpp index ded685ac3..27e1e710a 100644 --- a/examples/r_KLU_GLU_matrix_values_update.cpp +++ b/examples/r_KLU_GLU_matrix_values_update.cpp @@ -1,18 +1,18 @@ -#include -#include #include +#include +#include -#include -#include +#include +#include #include #include #include +#include +#include #include -#include -#include #include -// this updates the matrix values to simulate what CFD/optimization software does. +// this updates the matrix values to simulate what CFD/optimization software does. using namespace ReSolve::constants; @@ -20,42 +20,41 @@ int main(int argc, char *argv[]) { // Use the same data types as those you specified in ReSolve build. using index_type = ReSolve::index_type; - using real_type = ReSolve::real_type; + using real_type = ReSolve::real_type; using vector_type = ReSolve::vector::Vector; using matrix_type = ReSolve::matrix::Sparse; - (void) argc; // TODO: Check if the number of input parameters is correct. - std::string matrixFileName = argv[1]; - std::string rhsFileName = argv[2]; + (void)argc; // TODO: Check if the number of input parameters is correct. + std::string matrixFileName = argv[1]; + std::string rhsFileName = argv[2]; index_type numSystems = atoi(argv[3]); - std::cout<<"Family mtx file name: "<< matrixFileName << ", total number of matrices: "<initializeHandles(); - ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace_CUDA); - ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace_CUDA); - real_type* rhs = nullptr; - real_type* x = nullptr; + ReSolve::MatrixHandler *matrix_handler = new ReSolve::MatrixHandler(workspace_CUDA); + ReSolve::VectorHandler *vector_handler = new ReSolve::VectorHandler(workspace_CUDA); + real_type *rhs = nullptr; + real_type *x = nullptr; - vector_type* vec_rhs; - vector_type* vec_x; - vector_type* vec_r; + vector_type *vec_rhs; + vector_type *vec_x; + vector_type *vec_r; - ReSolve::LinSolverDirectKLU* KLU = new ReSolve::LinSolverDirectKLU; - ReSolve::LinSolverDirectCuSolverGLU* GLU = new ReSolve::LinSolverDirectCuSolverGLU(workspace_CUDA); + ReSolve::LinSolverDirectKLU *KLU = new ReSolve::LinSolverDirectKLU; + ReSolve::LinSolverDirectCuSolverGLU *GLU = new ReSolve::LinSolverDirectCuSolverGLU(workspace_CUDA); - for (int i = 0; i < numSystems; ++i) - { + for (int i = 0; i < numSystems; ++i) { index_type j = 4 + i * 2; fileId = argv[j]; rhsId = argv[j + 1]; @@ -67,116 +66,110 @@ int main(int argc, char *argv[]) matrixFileNameFull = matrixFileName + fileId + ".mtx"; rhsFileNameFull = rhsFileName + rhsId + ".mtx"; std::cout << std::endl << std::endl << std::endl; - std::cout << "========================================================================================================================"<getNumRows(), - A_coo->getNumColumns(), - A_coo->getNnz(), - A_coo->symmetric(), - A_coo->expanded()); + A = new ReSolve::matrix::Csr(A_coo->getNumRows(), A_coo->getNumColumns(), A_coo->getNnz(), A_coo->symmetric(), A_coo->expanded()); rhs = ReSolve::io::readRhsFromFile(rhs_file); x = new real_type[A->getNumRows()]; vec_rhs = new vector_type(A->getNumRows()); vec_x = new vector_type(A->getNumRows()); - vec_x->allocate(ReSolve::memory::HOST);//for KLU + vec_x->allocate(ReSolve::memory::HOST); // for KLU vec_x->allocate(ReSolve::memory::DEVICE); vec_r = new vector_type(A->getNumRows()); } else { - if (i==1) { + if (i == 1) { A_exp_coo = ReSolve::io::readMatrixFromFile(mat_file); } else { ReSolve::io::readAndUpdateMatrix(mat_file, A_exp_coo); } - std::cout<<"Updating values of A_coo!"<updateValues(A_exp_coo->getValues(ReSolve::memory::HOST), ReSolve::memory::HOST, ReSolve::memory::HOST); - //ReSolve::io::readAndUpdateMatrix(mat_file, A_coo); + // ReSolve::io::readAndUpdateMatrix(mat_file, A_coo); ReSolve::io::readAndUpdateRhs(rhs_file, &rhs); } - std::cout<<"Finished reading the matrix and rhs, size: "<getNumRows()<<" x "<getNumColumns()<< ", nnz: "<< A->getNnz()<< ", symmetric? "<symmetric()<< ", Expanded? "<expanded()<coo2csr(A_coo, A, "cpu"); - vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); - vec_rhs->setDataUpdated(ReSolve::memory::HOST); - } else { - matrix_handler->coo2csr(A_coo, A, "cuda"); - vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - } - std::cout<<"COO to CSR completed. Expanded NNZ: "<< A->getNnzExpanded()<setupParameters(1, 0.1, false); - } - int status; - if (i < 1){ - KLU->setup(A); - status = KLU->analyze(); - std::cout<<"KLU analysis status: "<factorize(); - std::cout<<"KLU factorization status: "<getLFactor(); - matrix_type* U = KLU->getUFactor(); - if (L == nullptr) {printf("ERROR");} - index_type* P = KLU->getPOrdering(); - index_type* Q = KLU->getQOrdering(); - GLU->setup(A, L, U, P, Q); - status = GLU->solve(vec_rhs, vec_x); - std::cout<<"GLU solve status: "<solve(vec_rhs, vec_x); - // std::cout<<"KLU solve status: "<refactorize(); - std::cout<<"Using CUSOLVER GLU"<refactorize(); - std::cout<<"CUSOLVER GLU refactorization status: "<solve(vec_rhs, vec_x); - std::cout<<"CUSOLVER GLU solve status: "<getNumRows() << " x " << A->getNumColumns() << ", nnz: " << A->getNnz() + << ", symmetric? " << A->symmetric() << ", Expanded? " << A->expanded() << std::endl; + mat_file.close(); + rhs_file.close(); + + // Now convert to CSR. + if (i < 1) { + matrix_handler->coo2csr(A_coo, A, "cpu"); + vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); + vec_rhs->setDataUpdated(ReSolve::memory::HOST); + } else { + matrix_handler->coo2csr(A_coo, A, "cuda"); + vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); + } + std::cout << "COO to CSR completed. Expanded NNZ: " << A->getNnzExpanded() << std::endl; + // Now call direct solver + if (i == 0) { + KLU->setupParameters(1, 0.1, false); + } + int status; + if (i < 1) { + KLU->setup(A); + status = KLU->analyze(); + std::cout << "KLU analysis status: " << status << std::endl; + status = KLU->factorize(); + std::cout << "KLU factorization status: " << status << std::endl; + matrix_type *L = KLU->getLFactor(); + matrix_type *U = KLU->getUFactor(); + if (L == nullptr) { + printf("ERROR"); } - vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - - - matrix_handler->setValuesChanged(true, "cuda"); - matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "cuda"); - - std::cout << "\t 2-Norm of the residual: " - << std::scientific << std::setprecision(16) - << sqrt(vector_handler->dot(vec_r, vec_r, "cuda")) << "\n"; - - + index_type *P = KLU->getPOrdering(); + index_type *Q = KLU->getQOrdering(); + GLU->setup(A, L, U, P, Q); + status = GLU->solve(vec_rhs, vec_x); + std::cout << "GLU solve status: " << status << std::endl; + // status = KLU->solve(vec_rhs, vec_x); + // std::cout<<"KLU solve status: "<refactorize(); + std::cout << "Using CUSOLVER GLU" << std::endl; + status = GLU->refactorize(); + std::cout << "CUSOLVER GLU refactorization status: " << status << std::endl; + status = GLU->solve(vec_rhs, vec_x); + std::cout << "CUSOLVER GLU solve status: " << status << std::endl; } + vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); + + matrix_handler->setValuesChanged(true, "cuda"); + matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "cuda"); - //now DELETE - delete A; - delete KLU; - delete GLU; - delete [] x; - delete [] rhs; - delete vec_r; - delete vec_x; - delete workspace_CUDA; - delete matrix_handler; - delete vector_handler; - - return 0; + std::cout << "\t 2-Norm of the residual: " << std::scientific << std::setprecision(16) << sqrt(vector_handler->dot(vec_r, vec_r, "cuda")) << "\n"; } + + // now DELETE + delete A; + delete KLU; + delete GLU; + delete[] x; + delete[] rhs; + delete vec_r; + delete vec_x; + delete workspace_CUDA; + delete matrix_handler; + delete vector_handler; + + return 0; +} diff --git a/examples/r_KLU_KLU.cpp b/examples/r_KLU_KLU.cpp index 901e36a53..f7e471285 100644 --- a/examples/r_KLU_KLU.cpp +++ b/examples/r_KLU_KLU.cpp @@ -1,16 +1,16 @@ -#include -#include -#include #include +#include +#include +#include +#include #include -#include #include -#include -#include +#include #include +#include +#include #include -#include #include using namespace ReSolve::constants; @@ -19,38 +19,37 @@ int main(int argc, char *argv[]) { // Use the same data types as those you specified in ReSolve build. using index_type = ReSolve::index_type; - using real_type = ReSolve::real_type; + using real_type = ReSolve::real_type; using vector_type = ReSolve::vector::Vector; - (void) argc; // TODO: Check if the number of input parameters is correct. - std::string matrixFileName = argv[1]; - std::string rhsFileName = argv[2]; + (void)argc; // TODO: Check if the number of input parameters is correct. + std::string matrixFileName = argv[1]; + std::string rhsFileName = argv[2]; index_type numSystems = atoi(argv[3]); - std::cout<<"Family mtx file name: "<< matrixFileName << ", total number of matrices: "<getNumRows(), - A_coo->getNumColumns(), - A_coo->getNnz(), - A_coo->symmetric(), - A_coo->expanded()); + A = new ReSolve::matrix::Csr(A_coo->getNumRows(), A_coo->getNumColumns(), A_coo->getNnz(), A_coo->symmetric(), A_coo->expanded()); rhs = ReSolve::io::readRhsFromFile(rhs_file); x = new real_type[A->getNumRows()]; vec_rhs = new vector_type(A->getNumRows()); vec_x = new vector_type(A->getNumRows()); vec_r = new vector_type(A->getNumRows()); - } - else { + } else { ReSolve::io::readAndUpdateMatrix(mat_file, A_coo); ReSolve::io::readAndUpdateRhs(rhs_file, &rhs); } - std::cout << "Finished reading the matrix and rhs, size: " << A->getNumRows() - << " x " << A->getNumColumns() - << ", nnz: " << A->getNnz() - << ", symmetric? " << A->symmetric() - << ", Expanded? " << A->expanded() << std::endl; + std::cout << "Finished reading the matrix and rhs, size: " << A->getNumRows() << " x " << A->getNumColumns() << ", nnz: " << A->getNnz() + << ", symmetric? " << A->symmetric() << ", Expanded? " << A->expanded() << std::endl; mat_file.close(); rhs_file.close(); - //Now convert to CSR. - if (i < 2) { + // Now convert to CSR. + if (i < 2) { matrix_handler->coo2csr(A_coo, A, "cpu"); vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); vec_rhs->setDataUpdated(ReSolve::memory::HOST); - } else { + } else { matrix_handler->coo2csr(A_coo, A, "cpu"); vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); } - std::cout<<"COO to CSR completed. Expanded NNZ: "<< A->getNnzExpanded()<getNnzExpanded() << std::endl; + // Now call direct solver if (i == 0) { KLU->setupParameters(1, 0.1, false); } int status; - if (i < 2){ + if (i < 2) { KLU->setup(A); status = KLU->analyze(); - std::cout<<"KLU analysis status: "<factorize(); - std::cout<<"KLU factorization status: "<solve(vec_rhs, vec_x); - std::cout<<"KLU solve status: "<refactorize(); - std::cout<<"KLU re-factorization status: "<refactorize(); + std::cout << "KLU re-factorization status: " << status << std::endl; status = KLU->solve(vec_rhs, vec_x); - std::cout<<"KLU solve status: "<update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); matrix_handler->setValuesChanged(true, "cpu"); - matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "cpu"); + matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "cpu"); - std::cout << "\t 2-Norm of the residual: " - << std::scientific << std::setprecision(16) - << sqrt(vector_handler->dot(vec_r, vec_r, "cpu")) << "\n"; + std::cout << "\t 2-Norm of the residual: " << std::scientific << std::setprecision(16) << sqrt(vector_handler->dot(vec_r, vec_r, "cpu")) << "\n"; } - //now DELETE + // now DELETE delete A; delete KLU; - delete [] x; - delete [] rhs; + delete[] x; + delete[] rhs; delete vec_r; delete vec_x; delete matrix_handler; diff --git a/examples/r_KLU_KLU_standalone.cpp b/examples/r_KLU_KLU_standalone.cpp index 3dfaf7165..a1d9b2c6d 100644 --- a/examples/r_KLU_KLU_standalone.cpp +++ b/examples/r_KLU_KLU_standalone.cpp @@ -1,16 +1,16 @@ -#include -#include -#include #include +#include +#include +#include +#include #include -#include #include -#include -#include +#include #include +#include +#include #include -#include #include using namespace ReSolve::constants; @@ -18,101 +18,92 @@ using namespace ReSolve::constants; int main(int argc, char *argv[]) { // Use the same data types as those you specified in ReSolve build. - using real_type = ReSolve::real_type; + using real_type = ReSolve::real_type; using vector_type = ReSolve::vector::Vector; - (void) argc; // TODO: Check if the number of input parameters is correct. - std::string matrixFileName = argv[1]; - std::string rhsFileName = argv[2]; + (void)argc; // TODO: Check if the number of input parameters is correct. + std::string matrixFileName = argv[1]; + std::string rhsFileName = argv[2]; - std::cout<<"Family mtx file name: "<< matrixFileName << std::endl; - std::cout<<"Family rhs file name: "<< rhsFileName << std::endl; + std::cout << "Family mtx file name: " << matrixFileName << std::endl; + std::cout << "Family rhs file name: " << rhsFileName << std::endl; std::string fileId; std::string rhsId; - ReSolve::matrix::Coo* A_coo; - ReSolve::matrix::Csr* A; - ReSolve::LinAlgWorkspaceCpu* workspace = new ReSolve::LinAlgWorkspaceCpu(); - ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace); - ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace); - real_type* rhs = nullptr; - real_type* x = nullptr; - - vector_type* vec_rhs; - vector_type* vec_x; - vector_type* vec_r; - - ReSolve::LinSolverDirectKLU* KLU = new ReSolve::LinSolverDirectKLU; + ReSolve::matrix::Coo *A_coo; + ReSolve::matrix::Csr *A; + ReSolve::LinAlgWorkspaceCpu *workspace = new ReSolve::LinAlgWorkspaceCpu(); + ReSolve::MatrixHandler *matrix_handler = new ReSolve::MatrixHandler(workspace); + ReSolve::VectorHandler *vector_handler = new ReSolve::VectorHandler(workspace); + real_type *rhs = nullptr; + real_type *x = nullptr; + vector_type *vec_rhs; + vector_type *vec_x; + vector_type *vec_r; + ReSolve::LinSolverDirectKLU *KLU = new ReSolve::LinSolverDirectKLU; // Read matrix first - std::cout << "========================================================================================================================"<getNumRows(), - A_coo->getNumColumns(), - A_coo->getNnz(), - A_coo->symmetric(), - A_coo->expanded()); + A = new ReSolve::matrix::Csr(A_coo->getNumRows(), A_coo->getNumColumns(), A_coo->getNnz(), A_coo->symmetric(), A_coo->expanded()); rhs = ReSolve::io::readRhsFromFile(rhs_file); x = new real_type[A->getNumRows()]; vec_rhs = new vector_type(A->getNumRows()); vec_x = new vector_type(A->getNumRows()); vec_r = new vector_type(A->getNumRows()); - std::cout<<"Finished reading the matrix and rhs, size: "<getNumRows()<<" x "<getNumColumns()<< ", nnz: "<< A->getNnz()<< ", symmetric? "<symmetric()<< ", Expanded? "<expanded()<getNumRows() << " x " << A->getNumColumns() << ", nnz: " << A->getNnz() + << ", symmetric? " << A->symmetric() << ", Expanded? " << A->expanded() << std::endl; mat_file.close(); rhs_file.close(); - //Now convert to CSR. + // Now convert to CSR. matrix_handler->coo2csr(A_coo, A, "cpu"); vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); vec_rhs->setDataUpdated(ReSolve::memory::HOST); std::cout << "COO to CSR completed. Expanded NNZ: " << A->getNnzExpanded() << std::endl; - //Now call direct solver + // Now call direct solver KLU->setupParameters(1, 0.1, false); int status; KLU->setup(A); status = KLU->analyze(); - std::cout<<"KLU analysis status: "<factorize(); std::cout << "KLU factorization status: " << status << std::endl; status = KLU->solve(vec_rhs, vec_x); - std::cout << "KLU solve status: " << status << std::endl; + std::cout << "KLU solve status: " << status << std::endl; vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); matrix_handler->setValuesChanged(true, "cpu"); - matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "cpu"); - - std::cout << "\t 2-Norm of the residual: " - << std::scientific << std::setprecision(16) - << sqrt(vector_handler->dot(vec_r, vec_r, "cpu")) << "\n"; - + matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "cpu"); + std::cout << "\t 2-Norm of the residual: " << std::scientific << std::setprecision(16) << sqrt(vector_handler->dot(vec_r, vec_r, "cpu")) << "\n"; - //now DELETE + // now DELETE delete A; delete KLU; - delete [] x; - delete [] rhs; + delete[] x; + delete[] rhs; delete vec_r; delete vec_x; delete matrix_handler; diff --git a/examples/r_KLU_rf.cpp b/examples/r_KLU_rf.cpp index b61029c52..701746a3a 100644 --- a/examples/r_KLU_rf.cpp +++ b/examples/r_KLU_rf.cpp @@ -1,59 +1,58 @@ -#include -#include #include +#include +#include +#include +#include #include -#include #include -#include -#include +#include #include +#include +#include #include -#include -#include #include using namespace ReSolve::constants; -int main(int argc, char *argv[] ) +int main(int argc, char *argv[]) { // Use the same data types as those you specified in ReSolve build. using index_type = ReSolve::index_type; - using real_type = ReSolve::real_type; + using real_type = ReSolve::real_type; using vector_type = ReSolve::vector::Vector; - (void) argc; // TODO: Check if the number of input parameters is correct. - std::string matrixFileName = argv[1]; - std::string rhsFileName = argv[2]; + (void)argc; // TODO: Check if the number of input parameters is correct. + std::string matrixFileName = argv[1]; + std::string rhsFileName = argv[2]; index_type numSystems = atoi(argv[3]); - std::cout<<"Family mtx file name: "<< matrixFileName << ", total number of matrices: "<initializeHandles(); - ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace_CUDA); - ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace_CUDA); - real_type* rhs = nullptr; - real_type* x = nullptr; + ReSolve::MatrixHandler *matrix_handler = new ReSolve::MatrixHandler(workspace_CUDA); + ReSolve::VectorHandler *vector_handler = new ReSolve::VectorHandler(workspace_CUDA); + real_type *rhs = nullptr; + real_type *x = nullptr; - vector_type* vec_rhs; - vector_type* vec_x; - vector_type* vec_r; + vector_type *vec_rhs; + vector_type *vec_x; + vector_type *vec_r; - ReSolve::LinSolverDirectKLU* KLU = new ReSolve::LinSolverDirectKLU; - ReSolve::LinSolverDirectCuSolverRf* Rf = new ReSolve::LinSolverDirectCuSolverRf; + ReSolve::LinSolverDirectKLU *KLU = new ReSolve::LinSolverDirectKLU; + ReSolve::LinSolverDirectCuSolverRf *Rf = new ReSolve::LinSolverDirectCuSolverRf; - for (int i = 0; i < numSystems; ++i) - { + for (int i = 0; i < numSystems; ++i) { index_type j = 4 + i * 2; fileId = argv[j]; rhsId = argv[j + 1]; @@ -65,112 +64,108 @@ int main(int argc, char *argv[] ) matrixFileNameFull = matrixFileName + fileId + ".mtx"; rhsFileNameFull = rhsFileName + rhsId + ".mtx"; std::cout << std::endl << std::endl << std::endl; - std::cout << "========================================================================================================================"<getNumRows(), - A_coo->getNumColumns(), - A_coo->getNnz(), - A_coo->symmetric(), - A_coo->expanded()); + A = new ReSolve::matrix::Csr(A_coo->getNumRows(), A_coo->getNumColumns(), A_coo->getNnz(), A_coo->symmetric(), A_coo->expanded()); rhs = ReSolve::io::readRhsFromFile(rhs_file); x = new real_type[A->getNumRows()]; vec_rhs = new vector_type(A->getNumRows()); vec_x = new vector_type(A->getNumRows()); vec_r = new vector_type(A->getNumRows()); - } - else { + } else { ReSolve::io::readAndUpdateMatrix(mat_file, A_coo); ReSolve::io::readAndUpdateRhs(rhs_file, &rhs); } - std::cout<<"Finished reading the matrix and rhs, size: "<getNumRows()<<" x "<getNumColumns()<< ", nnz: "<< A->getNnz()<< ", symmetric? "<symmetric()<< ", Expanded? "<expanded()<getNumRows() << " x " << A->getNumColumns() << ", nnz: " << A->getNnz() + << ", symmetric? " << A->symmetric() << ", Expanded? " << A->expanded() << std::endl; mat_file.close(); rhs_file.close(); - //Now convert to CSR. - if (i < 2) { + // Now convert to CSR. + if (i < 2) { matrix_handler->coo2csr(A_coo, A, "cpu"); vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); vec_rhs->setDataUpdated(ReSolve::memory::HOST); - } else { + } else { matrix_handler->coo2csr(A_coo, A, "cuda"); vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); } - std::cout<<"COO to CSR completed. Expanded NNZ: "<< A->getNnzExpanded()<getNnzExpanded() << std::endl; + // Now call direct solver if (i == 0) { KLU->setupParameters(1, 0.1, false); } int status; - if (i < 2){ + if (i < 2) { KLU->setup(A); status = KLU->analyze(); - std::cout<<"KLU analysis status: "<factorize(); - std::cout<<"KLU factorization status: "<solve(vec_rhs, vec_x); - std::cout<<"KLU solve status: "<getLFactor(); - ReSolve::matrix::Csc* U_csc = (ReSolve::matrix::Csc*) KLU->getUFactor(); - ReSolve::matrix::Csr* L = new ReSolve::matrix::Csr(L_csc->getNumRows(), L_csc->getNumColumns(), L_csc->getNnz()); - ReSolve::matrix::Csr* U = new ReSolve::matrix::Csr(U_csc->getNumRows(), U_csc->getNumColumns(), U_csc->getNnz()); - matrix_handler->csc2csr(L_csc,L, "cuda"); - matrix_handler->csc2csr(U_csc,U, "cuda"); - if (L == nullptr) {printf("ERROR");} - index_type* P = KLU->getPOrdering(); - index_type* Q = KLU->getQOrdering(); - Rf->setup(A, L, U, P, Q); + ReSolve::matrix::Csc *L_csc = (ReSolve::matrix::Csc *)KLU->getLFactor(); + ReSolve::matrix::Csc *U_csc = (ReSolve::matrix::Csc *)KLU->getUFactor(); + ReSolve::matrix::Csr *L = new ReSolve::matrix::Csr(L_csc->getNumRows(), L_csc->getNumColumns(), L_csc->getNnz()); + ReSolve::matrix::Csr *U = new ReSolve::matrix::Csr(U_csc->getNumRows(), U_csc->getNumColumns(), U_csc->getNnz()); + matrix_handler->csc2csr(L_csc, L, "cuda"); + matrix_handler->csc2csr(U_csc, U, "cuda"); + if (L == nullptr) { + printf("ERROR"); + } + index_type *P = KLU->getPOrdering(); + index_type *Q = KLU->getQOrdering(); + Rf->setup(A, L, U, P, Q); delete L; delete U; } } else { - //status = KLU->refactorize(); - std::cout<<"Using CUSOLVER RF"<refactorize(); + std::cout << "Using CUSOLVER RF" << std::endl; status = Rf->refactorize(); - std::cout<<"CUSOLVER RF refactorization status: "<solve(vec_rhs, vec_x); - std::cout<<"CUSOLVER RF solve status: "<solve(vec_rhs, vec_x); - //std::cout<<"KLU solve status: "<solve(vec_rhs, vec_x); + // std::cout<<"KLU solve status: "<update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); matrix_handler->setValuesChanged(true, "cuda"); - matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "cuda"); + matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "cuda"); - std::cout << "\t 2-Norm of the residual: " - << std::scientific << std::setprecision(16) - << sqrt(vector_handler->dot(vec_r, vec_r, "cuda")) << "\n"; + std::cout << "\t 2-Norm of the residual: " << std::scientific << std::setprecision(16) << sqrt(vector_handler->dot(vec_r, vec_r, "cuda")) << "\n"; } // for (int i = 0; i < numSystems; ++i) - //now DELETE + // now DELETE delete A; delete KLU; delete Rf; - delete [] x; - delete [] rhs; + delete[] x; + delete[] rhs; delete vec_r; delete vec_x; delete workspace_CUDA; diff --git a/examples/r_KLU_rf_FGMRES.cpp b/examples/r_KLU_rf_FGMRES.cpp index 584fcd102..2ba7dbeca 100644 --- a/examples/r_KLU_rf_FGMRES.cpp +++ b/examples/r_KLU_rf_FGMRES.cpp @@ -1,17 +1,17 @@ -#include -#include #include +#include +#include +#include +#include +#include #include -#include #include -#include -#include +#include #include +#include +#include #include -#include -#include -#include #include using namespace ReSolve::constants; @@ -20,42 +20,41 @@ int main(int argc, char *argv[]) { // Use the same data types as those you specified in ReSolve build. using index_type = ReSolve::index_type; - using real_type = ReSolve::real_type; + using real_type = ReSolve::real_type; using vector_type = ReSolve::vector::Vector; - (void) argc; // TODO: Check if the number of input parameters is correct. - std::string matrixFileName = argv[1]; - std::string rhsFileName = argv[2]; + (void)argc; // TODO: Check if the number of input parameters is correct. + std::string matrixFileName = argv[1]; + std::string rhsFileName = argv[2]; index_type numSystems = atoi(argv[3]); - std::cout<<"Family mtx file name: "<< matrixFileName << ", total number of matrices: "<initializeHandles(); - ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace_CUDA); - ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace_CUDA); - real_type* rhs = nullptr; - real_type* x = nullptr; - - vector_type* vec_rhs; - vector_type* vec_x; - vector_type* vec_r; - - ReSolve::GramSchmidt* GS = new ReSolve::GramSchmidt(vector_handler, ReSolve::GramSchmidt::cgs2); - ReSolve::LinSolverDirectKLU* KLU = new ReSolve::LinSolverDirectKLU; - ReSolve::LinSolverDirectCuSolverRf* Rf = new ReSolve::LinSolverDirectCuSolverRf; - ReSolve::LinSolverIterativeFGMRES* FGMRES = new ReSolve::LinSolverIterativeFGMRES(matrix_handler, vector_handler, GS); - - for (int i = 0; i < numSystems; ++i) - { + ReSolve::MatrixHandler *matrix_handler = new ReSolve::MatrixHandler(workspace_CUDA); + ReSolve::VectorHandler *vector_handler = new ReSolve::VectorHandler(workspace_CUDA); + real_type *rhs = nullptr; + real_type *x = nullptr; + + vector_type *vec_rhs; + vector_type *vec_x; + vector_type *vec_r; + + ReSolve::GramSchmidt *GS = new ReSolve::GramSchmidt(vector_handler, ReSolve::GramSchmidt::cgs2); + ReSolve::LinSolverDirectKLU *KLU = new ReSolve::LinSolverDirectKLU; + ReSolve::LinSolverDirectCuSolverRf *Rf = new ReSolve::LinSolverDirectCuSolverRf; + ReSolve::LinSolverIterativeFGMRES *FGMRES = new ReSolve::LinSolverIterativeFGMRES(matrix_handler, vector_handler, GS); + + for (int i = 0; i < numSystems; ++i) { index_type j = 4 + i * 2; fileId = argv[j]; rhsId = argv[j + 1]; @@ -67,124 +66,117 @@ int main(int argc, char *argv[]) matrixFileNameFull = matrixFileName + fileId + ".mtx"; rhsFileNameFull = rhsFileName + rhsId + ".mtx"; std::cout << std::endl << std::endl << std::endl; - std::cout << "========================================================================================================================"<getNumRows(), - A_coo->getNumColumns(), - A_coo->getNnz(), - A_coo->symmetric(), - A_coo->expanded()); + A = new ReSolve::matrix::Csr(A_coo->getNumRows(), A_coo->getNumColumns(), A_coo->getNnz(), A_coo->symmetric(), A_coo->expanded()); rhs = ReSolve::io::readRhsFromFile(rhs_file); x = new real_type[A->getNumRows()]; vec_rhs = new vector_type(A->getNumRows()); vec_x = new vector_type(A->getNumRows()); - vec_x->allocate(ReSolve::memory::HOST);//for KLU + vec_x->allocate(ReSolve::memory::HOST); // for KLU vec_x->allocate(ReSolve::memory::DEVICE); vec_r = new vector_type(A->getNumRows()); - } - else { + } else { ReSolve::io::readAndUpdateMatrix(mat_file, A_coo); ReSolve::io::readAndUpdateRhs(rhs_file, &rhs); } - std::cout<<"Finished reading the matrix and rhs, size: "<getNumRows()<<" x "<getNumColumns()<< ", nnz: "<< A->getNnz()<< ", symmetric? "<symmetric()<< ", Expanded? "<expanded()<getNumRows() << " x " << A->getNumColumns() << ", nnz: " << A->getNnz() + << ", symmetric? " << A->symmetric() << ", Expanded? " << A->expanded() << std::endl; mat_file.close(); rhs_file.close(); - //Now convert to CSR. - if (i < 2) { + // Now convert to CSR. + if (i < 2) { matrix_handler->coo2csr(A_coo, A, "cpu"); vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); vec_rhs->setDataUpdated(ReSolve::memory::HOST); - } else { - matrix_handler->coo2csr(A_coo,A, "cuda"); + } else { + matrix_handler->coo2csr(A_coo, A, "cuda"); vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); } - std::cout<<"COO to CSR completed. Expanded NNZ: "<< A->getNnzExpanded()<getNnzExpanded() << std::endl; + // Now call direct solver if (i == 0) { KLU->setupParameters(1, 0.1, false); } int status; real_type norm_b; - if (i < 2){ + if (i < 2) { KLU->setup(A); matrix_handler->setValuesChanged(true, "cuda"); status = KLU->analyze(); - std::cout<<"KLU analysis status: "<factorize(); - std::cout<<"KLU factorization status: "<solve(vec_rhs, vec_x); - std::cout<<"KLU solve status: "<update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); norm_b = vector_handler->dot(vec_r, vec_r, "cuda"); norm_b = sqrt(norm_b); matrix_handler->setValuesChanged(true, "cuda"); - matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "cuda"); - printf("\t 2-Norm of the residual : %16.16e\n", sqrt(vector_handler->dot(vec_r, vec_r, "cuda"))/norm_b); + matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "cuda"); + printf("\t 2-Norm of the residual : %16.16e\n", sqrt(vector_handler->dot(vec_r, vec_r, "cuda")) / norm_b); if (i == 1) { - ReSolve::matrix::Csc* L_csc = (ReSolve::matrix::Csc*) KLU->getLFactor(); - ReSolve::matrix::Csc* U_csc = (ReSolve::matrix::Csc*) KLU->getUFactor(); - ReSolve::matrix::Csr* L = new ReSolve::matrix::Csr(L_csc->getNumRows(), L_csc->getNumColumns(), L_csc->getNnz()); - ReSolve::matrix::Csr* U = new ReSolve::matrix::Csr(U_csc->getNumRows(), U_csc->getNumColumns(), U_csc->getNnz()); - matrix_handler->csc2csr(L_csc,L, "cuda"); - matrix_handler->csc2csr(U_csc,U, "cuda"); - if (L == nullptr) {printf("ERROR");} - index_type* P = KLU->getPOrdering(); - index_type* Q = KLU->getQOrdering(); + ReSolve::matrix::Csc *L_csc = (ReSolve::matrix::Csc *)KLU->getLFactor(); + ReSolve::matrix::Csc *U_csc = (ReSolve::matrix::Csc *)KLU->getUFactor(); + ReSolve::matrix::Csr *L = new ReSolve::matrix::Csr(L_csc->getNumRows(), L_csc->getNumColumns(), L_csc->getNnz()); + ReSolve::matrix::Csr *U = new ReSolve::matrix::Csr(U_csc->getNumRows(), U_csc->getNumColumns(), U_csc->getNnz()); + matrix_handler->csc2csr(L_csc, L, "cuda"); + matrix_handler->csc2csr(U_csc, U, "cuda"); + if (L == nullptr) { + printf("ERROR"); + } + index_type *P = KLU->getPOrdering(); + index_type *Q = KLU->getQOrdering(); Rf->setup(A, L, U, P, Q); - std::cout<<"about to set FGMRES" <setup(A->getNumRows(), FGMRES->getRestart()); - FGMRES->setup(A); + std::cout << "about to set FGMRES" << std::endl; + GS->setup(A->getNumRows(), FGMRES->getRestart()); + FGMRES->setup(A); } } else { - //status = KLU->refactorize(); - std::cout<<"Using CUSOLVER RF"<refactorize(); + std::cout << "Using CUSOLVER RF" << std::endl; status = Rf->refactorize(); - std::cout<<"CUSOLVER RF refactorization status: "<solve(vec_rhs, vec_x); - std::cout<<"CUSOLVER RF solve status: "<update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); norm_b = vector_handler->dot(vec_r, vec_r, "cuda"); norm_b = sqrt(norm_b); - //matrix_handler->setValuesChanged(true, "cuda"); + // matrix_handler->setValuesChanged(true, "cuda"); FGMRES->resetMatrix(A); FGMRES->setupPreconditioner("LU", Rf); - - matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "cuda"); - std::cout << "\t 2-Norm of the residual (before IR): " - << std::scientific << std::setprecision(16) - << sqrt(vector_handler->dot(vec_r, vec_r, "cuda"))/norm_b << "\n"; + matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "cuda"); + + std::cout << "\t 2-Norm of the residual (before IR): " << std::scientific << std::setprecision(16) + << sqrt(vector_handler->dot(vec_r, vec_r, "cuda")) / norm_b << "\n"; vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); FGMRES->solve(vec_rhs, vec_x); - std::cout << "FGMRES: init nrm: " - << std::scientific << std::setprecision(16) - << FGMRES->getInitResidualNorm()/norm_b - << " final nrm: " - << FGMRES->getFinalResidualNorm()/norm_b - << " iter: " << FGMRES->getNumIter() << "\n"; + std::cout << "FGMRES: init nrm: " << std::scientific << std::setprecision(16) << FGMRES->getInitResidualNorm() / norm_b + << " final nrm: " << FGMRES->getFinalResidualNorm() / norm_b << " iter: " << FGMRES->getNumIter() << "\n"; } } // for (int i = 0; i < numSystems; ++i) @@ -192,8 +184,8 @@ int main(int argc, char *argv[]) delete A; delete KLU; delete Rf; - delete [] x; - delete [] rhs; + delete[] x; + delete[] rhs; delete vec_r; delete vec_x; delete workspace_CUDA; diff --git a/examples/r_KLU_rf_FGMRES_reuse_factorization.cpp b/examples/r_KLU_rf_FGMRES_reuse_factorization.cpp index c4ab285b4..c44bcb1ce 100644 --- a/examples/r_KLU_rf_FGMRES_reuse_factorization.cpp +++ b/examples/r_KLU_rf_FGMRES_reuse_factorization.cpp @@ -1,18 +1,18 @@ -#include -#include #include +#include +#include #include +#include +#include +#include #include -#include #include -#include -#include +#include #include +#include +#include #include -#include -#include -#include #include using namespace ReSolve::constants; @@ -21,44 +21,43 @@ int main(int argc, char *argv[]) { // Use the same data types as those you specified in ReSolve build. using index_type = ReSolve::index_type; - using real_type = ReSolve::real_type; + using real_type = ReSolve::real_type; using vector_type = ReSolve::vector::Vector; - (void) argc; // TODO: Check if the number of input parameters is correct. - std::string matrixFileName = argv[1]; - std::string rhsFileName = argv[2]; + (void)argc; // TODO: Check if the number of input parameters is correct. + std::string matrixFileName = argv[1]; + std::string rhsFileName = argv[2]; index_type numSystems = atoi(argv[3]); - std::cout<<"Family mtx file name: "<< matrixFileName << ", total number of matrices: "<initializeHandles(); - ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace_CUDA); - ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace_CUDA); - real_type* rhs = nullptr; - real_type* x = nullptr; - - vector_type* vec_rhs; - vector_type* vec_x; - vector_type* vec_r; - - ReSolve::GramSchmidt* GS = new ReSolve::GramSchmidt(vector_handler, ReSolve::GramSchmidt::cgs2); - - ReSolve::LinSolverDirectKLU* KLU = new ReSolve::LinSolverDirectKLU; - ReSolve::LinSolverDirectCuSolverRf* Rf = new ReSolve::LinSolverDirectCuSolverRf; - ReSolve::LinSolverIterativeFGMRES* FGMRES = new ReSolve::LinSolverIterativeFGMRES(matrix_handler, vector_handler, GS); - - for (int i = 0; i < numSystems; ++i) - { + ReSolve::MatrixHandler *matrix_handler = new ReSolve::MatrixHandler(workspace_CUDA); + ReSolve::VectorHandler *vector_handler = new ReSolve::VectorHandler(workspace_CUDA); + real_type *rhs = nullptr; + real_type *x = nullptr; + + vector_type *vec_rhs; + vector_type *vec_x; + vector_type *vec_r; + + ReSolve::GramSchmidt *GS = new ReSolve::GramSchmidt(vector_handler, ReSolve::GramSchmidt::cgs2); + + ReSolve::LinSolverDirectKLU *KLU = new ReSolve::LinSolverDirectKLU; + ReSolve::LinSolverDirectCuSolverRf *Rf = new ReSolve::LinSolverDirectCuSolverRf; + ReSolve::LinSolverIterativeFGMRES *FGMRES = new ReSolve::LinSolverIterativeFGMRES(matrix_handler, vector_handler, GS); + + for (int i = 0; i < numSystems; ++i) { index_type j = 4 + i * 2; fileId = argv[j]; rhsId = argv[j + 1]; @@ -70,118 +69,109 @@ int main(int argc, char *argv[]) matrixFileNameFull = matrixFileName + fileId + ".mtx"; rhsFileNameFull = rhsFileName + rhsId + ".mtx"; std::cout << std::endl << std::endl << std::endl; - std::cout << "========================================================================================================================"<getNumRows(), - A_coo->getNumColumns(), - A_coo->getNnz(), - A_coo->symmetric(), - A_coo->expanded()); + A = new ReSolve::matrix::Csr(A_coo->getNumRows(), A_coo->getNumColumns(), A_coo->getNnz(), A_coo->symmetric(), A_coo->expanded()); rhs = ReSolve::io::readRhsFromFile(rhs_file); x = new real_type[A->getNumRows()]; vec_rhs = new vector_type(A->getNumRows()); vec_x = new vector_type(A->getNumRows()); - vec_x->allocate(ReSolve::memory::HOST);//for KLU + vec_x->allocate(ReSolve::memory::HOST); // for KLU vec_x->allocate(ReSolve::memory::DEVICE); vec_r = new vector_type(A->getNumRows()); - } - else { + } else { ReSolve::io::readAndUpdateMatrix(mat_file, A_coo); ReSolve::io::readAndUpdateRhs(rhs_file, &rhs); } - std::cout<<"Finished reading the matrix and rhs, size: "<getNumRows()<<" x "<getNumColumns()<< ", nnz: "<< A->getNnz()<< ", symmetric? "<symmetric()<< ", Expanded? "<expanded()<getNumRows() << " x " << A->getNumColumns() << ", nnz: " << A->getNnz() + << ", symmetric? " << A->symmetric() << ", Expanded? " << A->expanded() << std::endl; mat_file.close(); rhs_file.close(); - //Now convert to CSR. - if (i < 2) { - matrix_handler->coo2csr(A_coo,A, "cpu"); + // Now convert to CSR. + if (i < 2) { + matrix_handler->coo2csr(A_coo, A, "cpu"); vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); vec_rhs->setDataUpdated(ReSolve::memory::HOST); - } else { + } else { matrix_handler->coo2csr(A_coo, A, "cuda"); vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); } - std::cout<<"COO to CSR completed. Expanded NNZ: "<< A->getNnzExpanded()<getNnzExpanded() << std::endl; + // Now call direct solver if (i == 0) { KLU->setupParameters(1, 0.1, false); } int status; real_type norm_b; - if (i < 2){ + if (i < 2) { KLU->setup(A); matrix_handler->setValuesChanged(true, "cuda"); status = KLU->analyze(); - std::cout<<"KLU analysis status: "<factorize(); - std::cout<<"KLU factorization status: "<solve(vec_rhs, vec_x); - std::cout<<"KLU solve status: "<update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); norm_b = vector_handler->dot(vec_r, vec_r, "cuda"); norm_b = sqrt(norm_b); matrix_handler->setValuesChanged(true, "cuda"); - matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "cuda"); - std::cout << "\t 2-Norm of the residual : " - << std::scientific << std::setprecision(16) - << sqrt(vector_handler->dot(vec_r, vec_r, "cuda"))/norm_b << "\n"; + matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "cuda"); + std::cout << "\t 2-Norm of the residual : " << std::scientific << std::setprecision(16) + << sqrt(vector_handler->dot(vec_r, vec_r, "cuda")) / norm_b << "\n"; if (i == 1) { - ReSolve::matrix::Csc* L_csc = (ReSolve::matrix::Csc*) KLU->getLFactor(); - ReSolve::matrix::Csc* U_csc = (ReSolve::matrix::Csc*) KLU->getUFactor(); - ReSolve::matrix::Csr* L = new ReSolve::matrix::Csr(L_csc->getNumRows(), L_csc->getNumColumns(), L_csc->getNnz()); - ReSolve::matrix::Csr* U = new ReSolve::matrix::Csr(U_csc->getNumRows(), U_csc->getNumColumns(), U_csc->getNnz()); + ReSolve::matrix::Csc *L_csc = (ReSolve::matrix::Csc *)KLU->getLFactor(); + ReSolve::matrix::Csc *U_csc = (ReSolve::matrix::Csc *)KLU->getUFactor(); + ReSolve::matrix::Csr *L = new ReSolve::matrix::Csr(L_csc->getNumRows(), L_csc->getNumColumns(), L_csc->getNnz()); + ReSolve::matrix::Csr *U = new ReSolve::matrix::Csr(U_csc->getNumRows(), U_csc->getNumColumns(), U_csc->getNnz()); - matrix_handler->csc2csr(L_csc,L, "cuda"); - matrix_handler->csc2csr(U_csc,U, "cuda"); + matrix_handler->csc2csr(L_csc, L, "cuda"); + matrix_handler->csc2csr(U_csc, U, "cuda"); if (L == nullptr) { std::cout << "ERROR\n"; } - index_type* P = KLU->getPOrdering(); - index_type* Q = KLU->getQOrdering(); + index_type *P = KLU->getPOrdering(); + index_type *Q = KLU->getQOrdering(); Rf->setup(A, L, U, P, Q); - std::cout<<"about to set FGMRES" <setRestart(1000); + std::cout << "about to set FGMRES" << std::endl; + FGMRES->setRestart(1000); FGMRES->setMaxit(2000); - GS->setup(A->getNumRows(), FGMRES->getRestart()); + GS->setup(A->getNumRows(), FGMRES->getRestart()); FGMRES->setup(A); } } else { - //status = KLU->refactorize(); - std::cout<<"Using CUSOLVER RF"<refactorize(); + std::cout << "Using CUSOLVER RF" << std::endl; + if ((i % 2 == 0)) { status = Rf->refactorize(); - std::cout << "CUSOLVER RF, using REAL refactorization, refactorization status: " - << status << std::endl; + std::cout << "CUSOLVER RF, using REAL refactorization, refactorization status: " << status << std::endl; vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); status = Rf->solve(vec_rhs, vec_x); FGMRES->setupPreconditioner("LU", Rf); } - //if (i%2!=0) vec_x->setToZero(ReSolve::memory::DEVICE); - real_type norm_x = vector_handler->dot(vec_x, vec_x, "cuda"); - std::cout << "Norm of x (before solve): " - << std::scientific << std::setprecision(16) - << sqrt(norm_x) << "\n"; - std::cout<<"CUSOLVER RF solve status: "<setToZero(ReSolve::memory::DEVICE); + real_type norm_x = vector_handler->dot(vec_x, vec_x, "cuda"); + std::cout << "Norm of x (before solve): " << std::scientific << std::setprecision(16) << sqrt(norm_x) << "\n"; + std::cout << "CUSOLVER RF solve status: " << status << std::endl; + vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); norm_b = vector_handler->dot(vec_r, vec_r, "cuda"); @@ -189,39 +179,28 @@ int main(int argc, char *argv[]) matrix_handler->setValuesChanged(true, "cuda"); FGMRES->resetMatrix(A); - - matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "cuda"); - std::cout << "\t 2-Norm of the residual (before IR): " - << std::scientific << std::setprecision(16) - << sqrt(vector_handler->dot(vec_r, vec_r, "cuda"))/norm_b << "\n"; - std::cout << "\t 2-Norm of the RIGHT HAND SIDE: " - << std::scientific << std::setprecision(16) - << norm_b << "\n"; + matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "cuda"); + + std::cout << "\t 2-Norm of the residual (before IR): " << std::scientific << std::setprecision(16) + << sqrt(vector_handler->dot(vec_r, vec_r, "cuda")) / norm_b << "\n"; + std::cout << "\t 2-Norm of the RIGHT HAND SIDE: " << std::scientific << std::setprecision(16) << norm_b << "\n"; vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); FGMRES->solve(vec_rhs, vec_x); - std::cout << "FGMRES: init nrm: " - << std::scientific << std::setprecision(16) - << FGMRES->getInitResidualNorm()/norm_b - << " final nrm: " - << FGMRES->getFinalResidualNorm()/norm_b - << " iter: " << FGMRES->getNumIter() << "\n"; + std::cout << "FGMRES: init nrm: " << std::scientific << std::setprecision(16) << FGMRES->getInitResidualNorm() / norm_b + << " final nrm: " << FGMRES->getFinalResidualNorm() / norm_b << " iter: " << FGMRES->getNumIter() << "\n"; norm_x = vector_handler->dot(vec_x, vec_x, "cuda"); - std::cout << "Norm of x (after IR): " - << std::scientific << std::setprecision(16) - << sqrt(norm_x) << "\n"; + std::cout << "Norm of x (after IR): " << std::scientific << std::setprecision(16) << sqrt(norm_x) << "\n"; } - - } delete A; delete KLU; delete Rf; - delete [] x; - delete [] rhs; + delete[] x; + delete[] rhs; delete vec_r; delete vec_x; delete workspace_CUDA; diff --git a/examples/r_KLU_rocSolverRf_FGMRES.cpp b/examples/r_KLU_rocSolverRf_FGMRES.cpp index 32d1865f4..5f59208ac 100644 --- a/examples/r_KLU_rocSolverRf_FGMRES.cpp +++ b/examples/r_KLU_rocSolverRf_FGMRES.cpp @@ -1,17 +1,17 @@ -#include -#include #include +#include +#include +#include +#include +#include #include -#include #include -#include -#include +#include #include +#include +#include #include -#include -#include -#include #include using namespace ReSolve::constants; @@ -20,42 +20,41 @@ int main(int argc, char *argv[]) { // Use the same data types as those you specified in ReSolve build. using index_type = ReSolve::index_type; - using real_type = ReSolve::real_type; + using real_type = ReSolve::real_type; using vector_type = ReSolve::vector::Vector; - (void) argc; // TODO: Check if the number of input parameters is correct. - std::string matrixFileName = argv[1]; - std::string rhsFileName = argv[2]; + (void)argc; // TODO: Check if the number of input parameters is correct. + std::string matrixFileName = argv[1]; + std::string rhsFileName = argv[2]; index_type numSystems = atoi(argv[3]); - std::cout<<"Family mtx file name: "<< matrixFileName << ", total number of matrices: "<initializeHandles(); - ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace_HIP); - ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace_HIP); - real_type* rhs = nullptr; - real_type* x = nullptr; - - vector_type* vec_rhs; - vector_type* vec_x; - vector_type* vec_r; - - ReSolve::GramSchmidt* GS = new ReSolve::GramSchmidt(vector_handler, ReSolve::GramSchmidt::cgs2); - ReSolve::LinSolverDirectKLU* KLU = new ReSolve::LinSolverDirectKLU; - ReSolve::LinSolverDirectRocSolverRf* Rf = new ReSolve::LinSolverDirectRocSolverRf(workspace_HIP); - ReSolve::LinSolverIterativeFGMRES* FGMRES = new ReSolve::LinSolverIterativeFGMRES(matrix_handler, vector_handler, GS, "hip"); - - for (int i = 0; i < numSystems; ++i) - { + ReSolve::MatrixHandler *matrix_handler = new ReSolve::MatrixHandler(workspace_HIP); + ReSolve::VectorHandler *vector_handler = new ReSolve::VectorHandler(workspace_HIP); + real_type *rhs = nullptr; + real_type *x = nullptr; + + vector_type *vec_rhs; + vector_type *vec_x; + vector_type *vec_r; + + ReSolve::GramSchmidt *GS = new ReSolve::GramSchmidt(vector_handler, ReSolve::GramSchmidt::cgs2); + ReSolve::LinSolverDirectKLU *KLU = new ReSolve::LinSolverDirectKLU; + ReSolve::LinSolverDirectRocSolverRf *Rf = new ReSolve::LinSolverDirectRocSolverRf(workspace_HIP); + ReSolve::LinSolverIterativeFGMRES *FGMRES = new ReSolve::LinSolverIterativeFGMRES(matrix_handler, vector_handler, GS, "hip"); + + for (int i = 0; i < numSystems; ++i) { index_type j = 4 + i * 2; fileId = argv[j]; rhsId = argv[j + 1]; @@ -67,125 +66,117 @@ int main(int argc, char *argv[]) matrixFileNameFull = matrixFileName + fileId + ".mtx"; rhsFileNameFull = rhsFileName + rhsId + ".mtx"; std::cout << std::endl << std::endl << std::endl; - std::cout << "========================================================================================================================"<getNumRows(), - A_coo->getNumColumns(), - A_coo->getNnz(), - A_coo->symmetric(), - A_coo->expanded()); + A = new ReSolve::matrix::Csr(A_coo->getNumRows(), A_coo->getNumColumns(), A_coo->getNnz(), A_coo->symmetric(), A_coo->expanded()); rhs = ReSolve::io::readRhsFromFile(rhs_file); x = new real_type[A->getNumRows()]; vec_rhs = new vector_type(A->getNumRows()); vec_x = new vector_type(A->getNumRows()); - vec_x->allocate(ReSolve::memory::HOST);//for KLU + vec_x->allocate(ReSolve::memory::HOST); // for KLU vec_x->allocate(ReSolve::memory::DEVICE); vec_r = new vector_type(A->getNumRows()); - } - else { + } else { ReSolve::io::readAndUpdateMatrix(mat_file, A_coo); ReSolve::io::readAndUpdateRhs(rhs_file, &rhs); } - std::cout<<"Finished reading the matrix and rhs, size: "<getNumRows()<<" x "<getNumColumns()<< ", nnz: "<< A->getNnz()<< ", symmetric? "<symmetric()<< ", Expanded? "<expanded()<getNumRows() << " x " << A->getNumColumns() << ", nnz: " << A->getNnz() + << ", symmetric? " << A->symmetric() << ", Expanded? " << A->expanded() << std::endl; mat_file.close(); rhs_file.close(); - //Now convert to CSR. - if (i < 2) { + // Now convert to CSR. + if (i < 2) { matrix_handler->coo2csr(A_coo, A, "cpu"); vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); vec_rhs->setDataUpdated(ReSolve::memory::HOST); - } else { - matrix_handler->coo2csr(A_coo,A, "hip"); + } else { + matrix_handler->coo2csr(A_coo, A, "hip"); vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); } - std::cout<<"COO to CSR completed. Expanded NNZ: "<< A->getNnzExpanded()<getNnzExpanded() << std::endl; + // Now call direct solver if (i == 0) { KLU->setupParameters(1, 0.1, false); } int status; real_type norm_b; - if (i < 2){ + if (i < 2) { KLU->setup(A); matrix_handler->setValuesChanged(true, "hip"); status = KLU->analyze(); - std::cout<<"KLU analysis status: "<factorize(); - std::cout<<"KLU factorization status: "<solve(vec_rhs, vec_x); - std::cout<<"KLU solve status: "<update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); norm_b = vector_handler->dot(vec_r, vec_r, "hip"); norm_b = sqrt(norm_b); matrix_handler->setValuesChanged(true, "hip"); - matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "hip"); - printf("\t 2-Norm of the residual : %16.16e\n", sqrt(vector_handler->dot(vec_r, vec_r, "hip"))/norm_b); + matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "hip"); + printf("\t 2-Norm of the residual : %16.16e\n", sqrt(vector_handler->dot(vec_r, vec_r, "hip")) / norm_b); if (i == 1) { - ReSolve::matrix::Csc* L = (ReSolve::matrix::Csc*) KLU->getLFactor(); - ReSolve::matrix::Csc* U = (ReSolve::matrix::Csc*) KLU->getUFactor(); - if (L == nullptr) {printf("ERROR");} - index_type* P = KLU->getPOrdering(); - index_type* Q = KLU->getQOrdering(); + ReSolve::matrix::Csc *L = (ReSolve::matrix::Csc *)KLU->getLFactor(); + ReSolve::matrix::Csc *U = (ReSolve::matrix::Csc *)KLU->getUFactor(); + if (L == nullptr) { + printf("ERROR"); + } + index_type *P = KLU->getPOrdering(); + index_type *Q = KLU->getQOrdering(); Rf->setSolveMode(1); Rf->setup(A, L, U, P, Q, vec_rhs); Rf->refactorize(); - std::cout<<"about to set FGMRES" <setup(A->getNumRows(), FGMRES->getRestart()); - FGMRES->setup(A); + std::cout << "about to set FGMRES" << std::endl; + GS->setup(A->getNumRows(), FGMRES->getRestart()); + FGMRES->setup(A); } } else { - //status = KLU->refactorize(); - std::cout<<"Using ROCSOLVER RF"<refactorize(); + std::cout << "Using ROCSOLVER RF" << std::endl; status = Rf->refactorize(); - std::cout<<"ROCSOLVER RF refactorization status: "<solve(vec_rhs, vec_x); - std::cout<<"ROCSOLVER RF solve status: "<update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); norm_b = vector_handler->dot(vec_r, vec_r, "hip"); norm_b = sqrt(norm_b); - //matrix_handler->setValuesChanged(true, "hip"); + // matrix_handler->setValuesChanged(true, "hip"); FGMRES->resetMatrix(A); FGMRES->setupPreconditioner("LU", Rf); - - matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "hip"); + + matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "hip"); real_type rnrm = sqrt(vector_handler->dot(vec_r, vec_r, "hip")); - std::cout << "\t 2-Norm of the residual (before IR): " - << std::scientific << std::setprecision(16) - << rnrm/norm_b << "\n"; + std::cout << "\t 2-Norm of the residual (before IR): " << std::scientific << std::setprecision(16) << rnrm / norm_b << "\n"; vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - if(!std::isnan(rnrm) && !std::isinf(rnrm)) { - FGMRES->solve(vec_rhs, vec_x); - - std::cout << "FGMRES: init nrm: " - << std::scientific << std::setprecision(16) - << FGMRES->getInitResidualNorm()/norm_b - << " final nrm: " - << FGMRES->getFinalResidualNorm()/norm_b - << " iter: " << FGMRES->getNumIter() << "\n"; - } - } + if (!std::isnan(rnrm) && !std::isinf(rnrm)) { + FGMRES->solve(vec_rhs, vec_x); + + std::cout << "FGMRES: init nrm: " << std::scientific << std::setprecision(16) << FGMRES->getInitResidualNorm() / norm_b + << " final nrm: " << FGMRES->getFinalResidualNorm() / norm_b << " iter: " << FGMRES->getNumIter() << "\n"; + } + } } // for (int i = 0; i < numSystems; ++i) @@ -193,8 +184,8 @@ int main(int argc, char *argv[]) delete A_coo; delete KLU; delete Rf; - delete [] x; - delete [] rhs; + delete[] x; + delete[] rhs; delete vec_r; delete vec_x; delete workspace_HIP; diff --git a/examples/r_KLU_rocsolverrf.cpp b/examples/r_KLU_rocsolverrf.cpp index 5651ed564..c2b5ef63e 100644 --- a/examples/r_KLU_rocsolverrf.cpp +++ b/examples/r_KLU_rocsolverrf.cpp @@ -1,59 +1,58 @@ -#include -#include #include +#include +#include +#include +#include #include -#include #include -#include -#include +#include #include +#include +#include #include -#include -#include #include using namespace ReSolve::constants; -int main(int argc, char *argv[] ) +int main(int argc, char *argv[]) { // Use the same data types as those you specified in ReSolve build. using index_type = ReSolve::index_type; - using real_type = ReSolve::real_type; + using real_type = ReSolve::real_type; using vector_type = ReSolve::vector::Vector; - (void) argc; // TODO: Check if the number of input parameters is correct. - std::string matrixFileName = argv[1]; - std::string rhsFileName = argv[2]; + (void)argc; // TODO: Check if the number of input parameters is correct. + std::string matrixFileName = argv[1]; + std::string rhsFileName = argv[2]; index_type numSystems = atoi(argv[3]); - std::cout<<"Family mtx file name: "<< matrixFileName << ", total number of matrices: "<initializeHandles(); - ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace_HIP); - ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace_HIP); - real_type* rhs = nullptr; - real_type* x = nullptr; + ReSolve::MatrixHandler *matrix_handler = new ReSolve::MatrixHandler(workspace_HIP); + ReSolve::VectorHandler *vector_handler = new ReSolve::VectorHandler(workspace_HIP); + real_type *rhs = nullptr; + real_type *x = nullptr; - vector_type* vec_rhs; - vector_type* vec_x; - vector_type* vec_r; + vector_type *vec_rhs; + vector_type *vec_x; + vector_type *vec_r; - ReSolve::LinSolverDirectKLU* KLU = new ReSolve::LinSolverDirectKLU; - ReSolve::LinSolverDirectRocSolverRf* Rf = new ReSolve::LinSolverDirectRocSolverRf(workspace_HIP); + ReSolve::LinSolverDirectKLU *KLU = new ReSolve::LinSolverDirectKLU; + ReSolve::LinSolverDirectRocSolverRf *Rf = new ReSolve::LinSolverDirectRocSolverRf(workspace_HIP); - for (int i = 0; i < numSystems; ++i) - { + for (int i = 0; i < numSystems; ++i) { index_type j = 4 + i * 2; fileId = argv[j]; rhsId = argv[j + 1]; @@ -65,103 +64,97 @@ int main(int argc, char *argv[] ) matrixFileNameFull = matrixFileName + fileId + ".mtx"; rhsFileNameFull = rhsFileName + rhsId + ".mtx"; std::cout << std::endl << std::endl << std::endl; - std::cout << "========================================================================================================================"<getNumRows(), - A_coo->getNumColumns(), - A_coo->getNnz(), - A_coo->symmetric(), - A_coo->expanded()); + A = new ReSolve::matrix::Csr(A_coo->getNumRows(), A_coo->getNumColumns(), A_coo->getNnz(), A_coo->symmetric(), A_coo->expanded()); rhs = ReSolve::io::readRhsFromFile(rhs_file); x = new real_type[A->getNumRows()]; vec_rhs = new vector_type(A->getNumRows()); vec_x = new vector_type(A->getNumRows()); vec_r = new vector_type(A->getNumRows()); - } - else { + } else { ReSolve::io::readAndUpdateMatrix(mat_file, A_coo); ReSolve::io::readAndUpdateRhs(rhs_file, &rhs); } - std::cout<<"Finished reading the matrix and rhs, size: "<getNumRows()<<" x "<getNumColumns()<< ", nnz: "<< A->getNnz()<< ", symmetric? "<symmetric()<< ", Expanded? "<expanded()<getNumRows() << " x " << A->getNumColumns() << ", nnz: " << A->getNnz() + << ", symmetric? " << A->symmetric() << ", Expanded? " << A->expanded() << std::endl; mat_file.close(); rhs_file.close(); - //Now convert to CSR. - if (i < 2) { + // Now convert to CSR. + if (i < 2) { matrix_handler->coo2csr(A_coo, A, "cpu"); vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); vec_rhs->setDataUpdated(ReSolve::memory::HOST); - } else { + } else { matrix_handler->coo2csr(A_coo, A, "hip"); vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); } - std::cout<<"COO to CSR completed. Expanded NNZ: "<< A->getNnzExpanded()<getNnzExpanded() << std::endl; + // Now call direct solver if (i == 0) { KLU->setupParameters(1, 0.1, false); } int status; - if (i < 2){ + if (i < 2) { KLU->setup(A); status = KLU->analyze(); - std::cout<<"KLU analysis status: "<factorize(); - std::cout<<"KLU factorization status: "<solve(vec_rhs, vec_x); - std::cout<<"KLU solve status: "<getLFactor(); - ReSolve::matrix::Csc* U = (ReSolve::matrix::Csc*) KLU->getUFactor(); - index_type* P = KLU->getPOrdering(); - index_type* Q = KLU->getQOrdering(); + ReSolve::matrix::Csc *L = (ReSolve::matrix::Csc *)KLU->getLFactor(); + ReSolve::matrix::Csc *U = (ReSolve::matrix::Csc *)KLU->getUFactor(); + index_type *P = KLU->getPOrdering(); + index_type *Q = KLU->getQOrdering(); vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - Rf->setup(A, L, U, P, Q, vec_rhs); + Rf->setup(A, L, U, P, Q, vec_rhs); Rf->refactorize(); } } else { - std::cout<<"Using rocsolver rf"<refactorize(); - std::cout<<"rocsolver rf refactorization status: "<solve(vec_rhs, vec_x); - std::cout<<"rocsolver rf solve status: "<update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); matrix_handler->setValuesChanged(true, "hip"); - matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "hip"); + matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "hip"); - std::cout << "\t 2-Norm of the residual: " - << std::scientific << std::setprecision(16) - << sqrt(vector_handler->dot(vec_r, vec_r, "hip")) << "\n"; + std::cout << "\t 2-Norm of the residual: " << std::scientific << std::setprecision(16) << sqrt(vector_handler->dot(vec_r, vec_r, "hip")) << "\n"; } // for (int i = 0; i < numSystems; ++i) - //now DELETE + // now DELETE delete A; delete A_coo; delete KLU; delete Rf; - delete [] x; - delete [] rhs; + delete[] x; + delete[] rhs; delete vec_r; delete vec_x; delete workspace_HIP; diff --git a/examples/r_KLU_rocsolverrf_redo_factorization.cpp b/examples/r_KLU_rocsolverrf_redo_factorization.cpp index 234a413d3..b29fecbf6 100644 --- a/examples/r_KLU_rocsolverrf_redo_factorization.cpp +++ b/examples/r_KLU_rocsolverrf_redo_factorization.cpp @@ -1,62 +1,61 @@ -#include -#include #include +#include +#include +#include +#include #include -#include #include -#include -#include +#include #include +#include +#include #include -#include -#include #include using namespace ReSolve::constants; -int main(int argc, char *argv[] ) +int main(int argc, char *argv[]) { // Use the same data types as those you specified in ReSolve build. using index_type = ReSolve::index_type; - using real_type = ReSolve::real_type; + using real_type = ReSolve::real_type; using vector_type = ReSolve::vector::Vector; - (void) argc; // TODO: Check if the number of input parameters is correct. - std::string matrixFileName = argv[1]; - std::string rhsFileName = argv[2]; + (void)argc; // TODO: Check if the number of input parameters is correct. + std::string matrixFileName = argv[1]; + std::string rhsFileName = argv[2]; index_type numSystems = atoi(argv[3]); - std::cout<<"Family mtx file name: "<< matrixFileName << ", total number of matrices: "<initializeHandles(); - ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace_HIP); - ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace_HIP); - real_type* rhs = nullptr; - real_type* x = nullptr; - - vector_type* vec_rhs; - vector_type* vec_x; - vector_type* vec_r; - - ReSolve::LinSolverDirectKLU* KLU = new ReSolve::LinSolverDirectKLU; - ReSolve::LinSolverDirectRocSolverRf* Rf = new ReSolve::LinSolverDirectRocSolverRf(workspace_HIP); - + ReSolve::MatrixHandler *matrix_handler = new ReSolve::MatrixHandler(workspace_HIP); + ReSolve::VectorHandler *vector_handler = new ReSolve::VectorHandler(workspace_HIP); + real_type *rhs = nullptr; + real_type *x = nullptr; + + vector_type *vec_rhs; + vector_type *vec_x; + vector_type *vec_r; + + ReSolve::LinSolverDirectKLU *KLU = new ReSolve::LinSolverDirectKLU; + ReSolve::LinSolverDirectRocSolverRf *Rf = new ReSolve::LinSolverDirectRocSolverRf(workspace_HIP); + real_type res_nrm; real_type b_nrm; - for (int i = 0; i < numSystems; ++i) - { + for (int i = 0; i < numSystems; ++i) { index_type j = 4 + i * 2; fileId = argv[j]; rhsId = argv[j + 1]; @@ -68,139 +67,129 @@ int main(int argc, char *argv[] ) matrixFileNameFull = matrixFileName + fileId + ".mtx"; rhsFileNameFull = rhsFileName + rhsId + ".mtx"; std::cout << std::endl << std::endl << std::endl; - std::cout << "========================================================================================================================"<getNumRows(), - A_coo->getNumColumns(), - A_coo->getNnz(), - A_coo->symmetric(), - A_coo->expanded()); + A = new ReSolve::matrix::Csr(A_coo->getNumRows(), A_coo->getNumColumns(), A_coo->getNnz(), A_coo->symmetric(), A_coo->expanded()); rhs = ReSolve::io::readRhsFromFile(rhs_file); x = new real_type[A->getNumRows()]; vec_rhs = new vector_type(A->getNumRows()); vec_x = new vector_type(A->getNumRows()); vec_r = new vector_type(A->getNumRows()); - } - else { + } else { ReSolve::io::readAndUpdateMatrix(mat_file, A_coo); ReSolve::io::readAndUpdateRhs(rhs_file, &rhs); } - std::cout<<"Finished reading the matrix and rhs, size: "<getNumRows()<<" x "<getNumColumns()<< ", nnz: "<< A->getNnz()<< ", symmetric? "<symmetric()<< ", Expanded? "<expanded()<getNumRows() << " x " << A->getNumColumns() << ", nnz: " << A->getNnz() + << ", symmetric? " << A->symmetric() << ", Expanded? " << A->expanded() << std::endl; mat_file.close(); rhs_file.close(); - //Now convert to CSR. - if (i < 2) { + // Now convert to CSR. + if (i < 2) { matrix_handler->coo2csr(A_coo, A, "cpu"); vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); vec_rhs->setDataUpdated(ReSolve::memory::HOST); - } else { + } else { matrix_handler->coo2csr(A_coo, A, "hip"); vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); } - std::cout<<"COO to CSR completed. Expanded NNZ: "<< A->getNnzExpanded()<getNnzExpanded() << std::endl; + // Now call direct solver if (i == 0) { KLU->setupParameters(1, 0.1, false); } int status; - if (i < 2){ + if (i < 2) { KLU->setup(A); status = KLU->analyze(); - std::cout<<"KLU analysis status: "<factorize(); - std::cout<<"KLU factorization status: "<solve(vec_rhs, vec_x); - std::cout<<"KLU solve status: "<getLFactor(); - ReSolve::matrix::Csc* U = (ReSolve::matrix::Csc*) KLU->getUFactor(); - index_type* P = KLU->getPOrdering(); - index_type* Q = KLU->getQOrdering(); + ReSolve::matrix::Csc *L = (ReSolve::matrix::Csc *)KLU->getLFactor(); + ReSolve::matrix::Csc *U = (ReSolve::matrix::Csc *)KLU->getUFactor(); + index_type *P = KLU->getPOrdering(); + index_type *Q = KLU->getQOrdering(); vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - Rf->setup(A, L, U, P, Q, vec_rhs); + Rf->setup(A, L, U, P, Q, vec_rhs); Rf->refactorize(); } } else { - std::cout<<"Using rocsolver rf"<refactorize(); - std::cout<<"rocsolver rf refactorization status: "<solve(vec_rhs, vec_x); - std::cout<<"rocsolver rf solve status: "<update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); matrix_handler->setValuesChanged(true, "hip"); - matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "hip"); + matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "hip"); res_nrm = sqrt(vector_handler->dot(vec_r, vec_r, "hip")); b_nrm = sqrt(vector_handler->dot(vec_rhs, vec_rhs, "hip")); - std::cout << "\t 2-Norm of the residual: " - << std::scientific << std::setprecision(16) - << res_nrm/b_nrm << "\n"; + std::cout << "\t 2-Norm of the residual: " << std::scientific << std::setprecision(16) << res_nrm / b_nrm << "\n"; if (!isnan(res_nrm)) { - if (res_nrm/b_nrm > 1e-7 ) { - std::cout << "\n \t !!! ALERT !!! Residual norm is too large; redoing KLU symbolic and numeric factorization. !!! ALERT !!! \n \n"; - - KLU->setup(A); - status = KLU->analyze(); - std::cout<<"KLU analysis status: "<factorize(); - std::cout<<"KLU factorization status: "<solve(vec_rhs, vec_x); - std::cout<<"KLU solve status: "<update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - - matrix_handler->setValuesChanged(true, "hip"); - - matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "hip"); - res_nrm = sqrt(vector_handler->dot(vec_r, vec_r, "hip")); - - std::cout<<"\t New residual norm: " - << std::scientific << std::setprecision(16) - << res_nrm/b_nrm << "\n"; - - - ReSolve::matrix::Csc* L = (ReSolve::matrix::Csc*) KLU->getLFactor(); - ReSolve::matrix::Csc* U = (ReSolve::matrix::Csc*) KLU->getUFactor(); - - index_type* P = KLU->getPOrdering(); - index_type* Q = KLU->getQOrdering(); - - Rf->setup(A, L, U, P, Q, vec_rhs); - } - } + if (res_nrm / b_nrm > 1e-7) { + std::cout << "\n \t !!! ALERT !!! Residual norm is too large; redoing KLU symbolic and numeric factorization. !!! ALERT !!! \n \n"; + + KLU->setup(A); + status = KLU->analyze(); + std::cout << "KLU analysis status: " << status << std::endl; + status = KLU->factorize(); + std::cout << "KLU factorization status: " << status << std::endl; + status = KLU->solve(vec_rhs, vec_x); + std::cout << "KLU solve status: " << status << std::endl; + + vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); + vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); + matrix_handler->setValuesChanged(true, "hip"); + + matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "hip"); + res_nrm = sqrt(vector_handler->dot(vec_r, vec_r, "hip")); + + std::cout << "\t New residual norm: " << std::scientific << std::setprecision(16) << res_nrm / b_nrm << "\n"; + + ReSolve::matrix::Csc *L = (ReSolve::matrix::Csc *)KLU->getLFactor(); + ReSolve::matrix::Csc *U = (ReSolve::matrix::Csc *)KLU->getUFactor(); + + index_type *P = KLU->getPOrdering(); + index_type *Q = KLU->getQOrdering(); + + Rf->setup(A, L, U, P, Q, vec_rhs); + } + } } // for (int i = 0; i < numSystems; ++i) - //now DELETE + // now DELETE delete A; delete A_coo; delete KLU; delete Rf; - delete [] x; - delete [] rhs; + delete[] x; + delete[] rhs; delete vec_r; delete vec_x; delete workspace_HIP; diff --git a/examples/resolve_consumer/CMakeLists.txt b/examples/resolve_consumer/CMakeLists.txt index e03a4e6f0..76f56ecb5 100644 --- a/examples/resolve_consumer/CMakeLists.txt +++ b/examples/resolve_consumer/CMakeLists.txt @@ -1,7 +1,7 @@ -# Example of how to consume Resolve as package via cmake -# Below is a standard example of a CMakeLists.txt file that ultizies Resolve -# See the Readme on how to build and install ReSolve -#---------------------------------------------------------------------------------- +# Example of how to consume Resolve as package via cmake Below is a standard +# example of a CMakeLists.txt file that ultizies Resolve See the Readme on how +# to build and install ReSolve +# ---------------------------------------------------------------------------------- cmake_minimum_required(VERSION 3.20) project(resolve_consumer LANGUAGES CXX) @@ -13,9 +13,12 @@ find_package(ReSolve REQUIRED) add_executable(consume.exe consumer.cpp) target_link_libraries(consume.exe PRIVATE ReSolve::ReSolve) -#------------------------------------------------------------------------------------ +# ------------------------------------------------------------------------------------ # Testing of exported Resolve Configurations enable_testing() -# RESOLVE_DATA is set in test.sh and is the file path the matrix data files used in the testKLU_Rf_FGMRES -add_test(NAME resolve_consumer COMMAND $ "${RESOLVE_DATA}") \ No newline at end of file +# RESOLVE_DATA is set in test.sh and is the file path the matrix data files used +# in the testKLU_Rf_FGMRES +add_test(NAME resolve_consumer COMMAND $ + "${RESOLVE_DATA}" +) diff --git a/examples/resolve_consumer/README.md b/examples/resolve_consumer/README.md index 8131e94f1..d9021d01a 100644 --- a/examples/resolve_consumer/README.md +++ b/examples/resolve_consumer/README.md @@ -18,4 +18,4 @@ CI is ran per every merge request that makes sure ReSolve can be consumed as a p If you follow the [developer guidelines](CONTRIBUTING.md) for building resolve and run make test you will see ReSolve consumed and linked with an example test in Test #1 (resolve_Consume). -This ReSolve Consume test is executed via a cmake test that exectutes test.sh. This shell script then goes through the cmake build process to ensure that ReSolve can be built from scratch and linked to another cmake project. \ No newline at end of file +This ReSolve Consume test is executed via a cmake test that exectutes test.sh. This shell script then goes through the cmake build process to ensure that ReSolve can be built from scratch and linked to another cmake project. diff --git a/resolve/CMakeLists.txt b/resolve/CMakeLists.txt index b98c82344..8ebbd83eb 100644 --- a/resolve/CMakeLists.txt +++ b/resolve/CMakeLists.txt @@ -9,26 +9,17 @@ add_subdirectory(utilities) # C++ files -set(ReSolve_SRC - LinSolver.cpp - LinSolverDirectKLU.cpp -) +set(ReSolve_SRC LinSolver.cpp LinSolverDirectKLU.cpp) # Temporary until there is CPU-only option for FGMRES -set(ReSolve_GPU_SRC - GramSchmidt.cpp - LinSolverIterativeFGMRES.cpp -) +set(ReSolve_GPU_SRC GramSchmidt.cpp LinSolverIterativeFGMRES.cpp) # C++ code that links to CUDA SDK libraries -set(ReSolve_CUDASDK_SRC - LinSolverDirectCuSolverGLU.cpp - LinSolverDirectCuSolverRf.cpp +set(ReSolve_CUDASDK_SRC LinSolverDirectCuSolverGLU.cpp + LinSolverDirectCuSolverRf.cpp ) # HIP files -set(ReSolve_ROCM_SRC - LinSolverDirectRocSolverRf.cpp -) +set(ReSolve_ROCM_SRC LinSolverDirectRocSolverRf.cpp) # Header files to be installed set(ReSolve_HEADER_INSTALL Common.hpp @@ -56,15 +47,11 @@ add_subdirectory(matrix) add_library(resolve_tpl INTERFACE) if(RESOLVE_USE_KLU) - target_link_libraries(resolve_tpl INTERFACE KLU) + target_link_libraries(resolve_tpl INTERFACE KLU) endif(RESOLVE_USE_KLU) -set(ReSolve_Targets_List - resolve_matrix - resolve_vector - resolve_logger - resolve_tpl - resolve_workspace +set(ReSolve_Targets_List resolve_matrix resolve_vector resolve_logger + resolve_tpl resolve_workspace ) # Temporary until there is CPU-only option for FGMRES @@ -100,20 +87,21 @@ install(TARGETS ${ReSolve_Targets_List} EXPORT ReSolveTargets) # Create ReSolve library add_library(ReSolve SHARED ${ReSolve_SRC}) -target_include_directories(ReSolve INTERFACE - $ - $ +target_include_directories( + ReSolve INTERFACE $ + $ ) # TODO: Make this PRIVATE dependency (requires refactoring ReSolve code) target_link_libraries(ReSolve PUBLIC ${ReSolve_Targets_List}) # Install targets -install(TARGETS ReSolve - EXPORT ReSolveTargets - ARCHIVE DESTINATION lib - LIBRARY DESTINATION lib) +install( + TARGETS ReSolve + EXPORT ReSolveTargets + ARCHIVE DESTINATION lib + LIBRARY DESTINATION lib +) # install include headers install(FILES ${ReSolve_HEADER_INSTALL} DESTINATION include/resolve) - diff --git a/resolve/Common.hpp b/resolve/Common.hpp index 974cb8b8e..81c0f4309 100644 --- a/resolve/Common.hpp +++ b/resolve/Common.hpp @@ -1,32 +1,31 @@ #pragma once -namespace ReSolve { +namespace ReSolve +{ +constexpr double EPSILON = 1.0e-18; +constexpr double EPSMAC = 1.0e-16; - constexpr double EPSILON = 1.0e-18; - constexpr double EPSMAC = 1.0e-16; - - - using real_type = double; - using index_type = int; - - namespace constants - { - constexpr real_type ZERO = 0.0; - constexpr real_type ONE = 1.0; - constexpr real_type MINUSONE = -1.0; - } +using real_type = double; +using index_type = int; - namespace colors - { - // must be const pointer and const dest for - // const string declarations to pass -Wwrite-strings - static const char * const RED = "\033[1;31m"; - static const char * const GREEN = "\033[1;32m"; - static const char * const YELLOW = "\033[33;1m"; - static const char * const BLUE = "\033[34;1m"; - static const char * const ORANGE = "\u001b[38;5;208m"; - static const char * const CLEAR = "\033[0m"; - } +namespace constants +{ +constexpr real_type ZERO = 0.0; +constexpr real_type ONE = 1.0; +constexpr real_type MINUSONE = -1.0; +} // namespace constants + +namespace colors +{ +// must be const pointer and const dest for +// const string declarations to pass -Wwrite-strings +static const char *const RED = "\033[1;31m"; +static const char *const GREEN = "\033[1;32m"; +static const char *const YELLOW = "\033[33;1m"; +static const char *const BLUE = "\033[34;1m"; +static const char *const ORANGE = "\u001b[38;5;208m"; +static const char *const CLEAR = "\033[0m"; +} // namespace colors } // namespace ReSolve diff --git a/resolve/GramSchmidt.cpp b/resolve/GramSchmidt.cpp index 7a6572b3b..bafaff792 100644 --- a/resolve/GramSchmidt.cpp +++ b/resolve/GramSchmidt.cpp @@ -1,315 +1,305 @@ -#include #include #include +#include +#include "GramSchmidt.hpp" #include #include -#include "GramSchmidt.hpp" namespace ReSolve { - using out = io::Logger; +using out = io::Logger; - int idxmap(index_type i, index_type j, index_type col_lenght) { - return i * (col_lenght) + j; - } +int idxmap(index_type i, index_type j, index_type col_lenght) { return i * (col_lenght) + j; } + +GramSchmidt::GramSchmidt() +{ + variant_ = mgs; // variant is enum now + h_L_ = nullptr; + this->setup_complete_ = false; +} + +GramSchmidt::GramSchmidt(VectorHandler *vh, GSVariant variant) +{ + this->setVariant(variant); + this->vector_handler_ = vh; + h_L_ = nullptr; + this->setup_complete_ = false; +} + +GramSchmidt::~GramSchmidt() +{ + if (setup_complete_) { + if (variant_ == mgs_two_synch || variant_ == mgs_pm) { + delete h_L_; + delete h_rv_; + + delete vec_rv_; + delete vec_Hcolumn_; + ; + } + + if (variant_ == cgs2) { + delete h_aux_; + delete vec_Hcolumn_; + } + if (variant_ == mgs_pm) { + delete h_aux_; + } - GramSchmidt::GramSchmidt() - { - variant_ = mgs; //variant is enum now - h_L_ = nullptr; - this->setup_complete_ = false; + delete vec_w_; + delete vec_v_; } +} - GramSchmidt::GramSchmidt(VectorHandler* vh, GSVariant variant) - { - this->setVariant(variant); - this->vector_handler_ = vh; - h_L_ = nullptr; - this->setup_complete_ = false; +int GramSchmidt::setVariant(GSVariant variant) +{ + if ((variant != mgs) && (variant != cgs2) && (variant != mgs_two_synch) && (variant != mgs_pm) && (variant != cgs1)) { + this->variant_ = mgs; + return 2; } + variant_ = variant; + return 0; +} - GramSchmidt::~GramSchmidt() - { - if (setup_complete_) { - if(variant_ == mgs_two_synch || variant_ == mgs_pm) { - delete h_L_; - delete h_rv_; +GramSchmidt::GSVariant GramSchmidt::getVariant() { return variant_; } - delete vec_rv_; - delete vec_Hcolumn_;; - } +real_type *GramSchmidt::getL() { return h_L_; } - if(variant_ == cgs2) { - delete h_aux_; - delete vec_Hcolumn_; - } - if(variant_ == mgs_pm) { - delete h_aux_; - } +bool GramSchmidt::isSetupComplete() { return setup_complete_; } - delete vec_w_; - delete vec_v_; +int GramSchmidt::setup(index_type n, index_type restart) +{ + if (setup_complete_) { + return 1; // display some nasty comment too + } else { + vec_w_ = new vector_type(n); + vec_v_ = new vector_type(n); + this->num_vecs_ = restart; + if (variant_ == mgs_two_synch || variant_ == mgs_pm) { + h_L_ = new real_type[num_vecs_ * (num_vecs_ + 1)]; + h_rv_ = new real_type[num_vecs_ + 1]; + + vec_rv_ = new vector_type(num_vecs_ + 1, 2); + vec_rv_->allocate(memory::DEVICE); + + vec_Hcolumn_ = new vector_type(num_vecs_ + 1); + vec_Hcolumn_->allocate(memory::DEVICE); + } + if (variant_ == cgs2) { + h_aux_ = new real_type[num_vecs_ + 1]; + vec_Hcolumn_ = new vector_type(num_vecs_ + 1); + vec_Hcolumn_->allocate(memory::DEVICE); } - } - int GramSchmidt::setVariant(GSVariant variant) - { - if ((variant != mgs) && (variant != cgs2) && (variant != mgs_two_synch) && (variant != mgs_pm) && (variant != cgs1)) { - this->variant_ = mgs; - return 2; + if (variant_ == mgs_pm) { + h_aux_ = new real_type[num_vecs_ + 1]; } - variant_ = variant; - return 0; } - GramSchmidt::GSVariant GramSchmidt::getVariant() - { - return variant_; - } + return 0; +} - real_type* GramSchmidt::getL() - { - return h_L_; - } +// this always happen on the GPU +int GramSchmidt::orthogonalize(index_type n, vector::Vector *V, real_type *H, index_type i, std::string memspace) +{ + using namespace constants; - bool GramSchmidt::isSetupComplete() - { - return setup_complete_; - } + if ((memspace == "cuda") || (memspace == "hip")) { // or hip - int GramSchmidt::setup(index_type n, index_type restart) - { - if (setup_complete_) { - return 1; // display some nasty comment too - } else { - vec_w_ = new vector_type(n); - vec_v_ = new vector_type(n); - this->num_vecs_ = restart; - if(variant_ == mgs_two_synch || variant_ == mgs_pm) { - h_L_ = new real_type[num_vecs_ * (num_vecs_ + 1)]; - h_rv_ = new real_type[num_vecs_ + 1]; - - vec_rv_ = new vector_type(num_vecs_ + 1, 2); - vec_rv_->allocate(memory::DEVICE); - - vec_Hcolumn_ = new vector_type(num_vecs_ + 1); - vec_Hcolumn_->allocate(memory::DEVICE); + double t; + double s; + + switch (variant_) { + case mgs: + + vec_w_->setData(V->getVectorData(i + 1, memory::DEVICE), memory::DEVICE); + for (int j = 0; j <= i; ++j) { + t = 0.0; + vec_v_->setData(V->getVectorData(j, memory::DEVICE), memory::DEVICE); + t = vector_handler_->dot(vec_v_, vec_w_, memspace); + H[idxmap(i, j, num_vecs_ + 1)] = t; + t *= -1.0; + vector_handler_->axpy(&t, vec_v_, vec_w_, memspace); } - if(variant_ == cgs2) { - h_aux_ = new real_type[num_vecs_ + 1]; - vec_Hcolumn_ = new vector_type(num_vecs_ + 1); - vec_Hcolumn_->allocate(memory::DEVICE); + t = 0.0; + t = vector_handler_->dot(vec_w_, vec_w_, memspace); + // set the last entry in Hessenberg matrix + t = sqrt(t); + H[idxmap(i, i + 1, num_vecs_ + 1)] = t; + if (fabs(t) > EPSILON) { + t = 1.0 / t; + vector_handler_->scal(&t, vec_w_, memspace); + } else { + assert(0 && "Gram-Schmidt failed, vector with ZERO norm\n"); + return -1; } + break; + case cgs2: + + vec_v_->setData(V->getVectorData(i + 1, memory::DEVICE), memory::DEVICE); + vector_handler_->gemv("T", n, i + 1, &ONE, &ZERO, V, vec_v_, vec_Hcolumn_, memspace); + // V(:,i+1) = V(:, i+1) - V(:,1:i)*Hcol + vector_handler_->gemv("N", n, i + 1, &ONE, &MINUSONE, V, vec_Hcolumn_, vec_v_, memspace); - if(variant_ == mgs_pm) { - h_aux_ = new real_type[num_vecs_ + 1]; + // copy H_col to aux, we will need it later + vec_Hcolumn_->setDataUpdated(memory::DEVICE); + vec_Hcolumn_->setCurrentSize(i + 1); + vec_Hcolumn_->deepCopyVectorData(h_aux_, 0, memory::HOST); + + // Hcol = V(:,1:i)^T*V(:,i+1); + vector_handler_->gemv("T", n, i + 1, &ONE, &ZERO, V, vec_v_, vec_Hcolumn_, memspace); + + // V(:,i+1) = V(:, i+1) - V(:,1:i)*Hcol + vector_handler_->gemv("N", n, i + 1, &ONE, &MINUSONE, V, vec_Hcolumn_, vec_v_, memspace); + + // copy H_col to H + vec_Hcolumn_->setDataUpdated(memory::DEVICE); + vec_Hcolumn_->deepCopyVectorData(&H[idxmap(i, 0, num_vecs_ + 1)], 0, memory::HOST); + + // add both pieces together (unstable otherwise, careful here!!) + t = 0.0; + for (int j = 0; j <= i; ++j) { + H[idxmap(i, j, num_vecs_ + 1)] += h_aux_[j]; } - } - return 0; - } + t = vector_handler_->dot(vec_v_, vec_v_, memspace); + // set the last entry in Hessenberg matrix + t = sqrt(t); + H[idxmap(i, i + 1, num_vecs_ + 1)] = t; + if (fabs(t) > EPSILON) { + t = 1.0 / t; + vector_handler_->scal(&t, vec_v_, memspace); + } else { + assert(0 && "Gram-Schmidt failed, vector with ZERO norm\n"); + return -1; + } + return 0; + break; + case mgs_two_synch: + // V[1:i]^T[V[i] w] + vec_v_->setData(V->getVectorData(i, memory::DEVICE), memory::DEVICE); + vec_w_->setData(V->getVectorData(i + 1, memory::DEVICE), memory::DEVICE); + vec_rv_->setCurrentSize(i + 1); + + vector_handler_->massDot2Vec(n, V, i, vec_v_, vec_rv_, memspace); + vec_rv_->setDataUpdated(memory::DEVICE); + vec_rv_->copyData(memory::DEVICE, memory::HOST); + + vec_rv_->deepCopyVectorData(&h_L_[idxmap(i, 0, num_vecs_ + 1)], 0, memory::HOST); + h_rv_ = vec_rv_->getVectorData(1, memory::HOST); + + for (int j = 0; j <= i; ++j) { + H[idxmap(i, j, num_vecs_ + 1)] = 0.0; + } + // triangular solve + for (int j = 0; j <= i; ++j) { + H[idxmap(i, j, num_vecs_ + 1)] = h_rv_[j]; + s = 0.0; + for (int k = 0; k < j; ++k) { + s += h_L_[idxmap(j, k, num_vecs_ + 1)] * H[idxmap(i, k, num_vecs_ + 1)]; + } // for k + H[idxmap(i, j, num_vecs_ + 1)] -= s; + } // for j + vec_Hcolumn_->setCurrentSize(i + 1); + vec_Hcolumn_->update(&H[idxmap(i, 0, num_vecs_ + 1)], memory::HOST, memory::DEVICE); + vector_handler_->massAxpy(n, vec_Hcolumn_, i, V, vec_w_, memspace); + + // normalize (second synch) + t = vector_handler_->dot(vec_w_, vec_w_, memspace); + // set the last entry in Hessenberg matrix + t = sqrt(t); + H[idxmap(i, i + 1, num_vecs_ + 1)] = t; + if (fabs(t) > EPSILON) { + t = 1.0 / t; + vector_handler_->scal(&t, vec_w_, memspace); + } else { + assert(0 && "Iterative refinement failed, Krylov vector with ZERO norm\n"); + return -1; + } + return 0; + break; + case mgs_pm: + vec_v_->setData(V->getVectorData(i, memory::DEVICE), memory::DEVICE); + vec_w_->setData(V->getVectorData(i + 1, memory::DEVICE), memory::DEVICE); + vec_rv_->setCurrentSize(i + 1); + + vector_handler_->massDot2Vec(n, V, i, vec_v_, vec_rv_, memspace); + vec_rv_->setDataUpdated(memory::DEVICE); + vec_rv_->copyData(memory::DEVICE, memory::HOST); + + vec_rv_->deepCopyVectorData(&h_L_[idxmap(i, 0, num_vecs_ + 1)], 0, memory::HOST); + h_rv_ = vec_rv_->getVectorData(1, memory::HOST); + + for (int j = 0; j <= i; ++j) { + H[idxmap(i, j, num_vecs_ + 1)] = 0.0; + } + + // triangular solve + for (int j = 0; j <= i; ++j) { + H[idxmap(i, j, num_vecs_ + 1)] = h_rv_[j]; + s = 0.0; + for (int k = 0; k < j; ++k) { + s += h_L_[idxmap(j, k, num_vecs_ + 1)] * H[idxmap(i, k, num_vecs_ + 1)]; + } // for k + H[idxmap(i, j, num_vecs_ + 1)] -= s; + } // for j + + // now compute h_rv = L^T h_H + double h; + for (int j = 0; j <= i; ++j) { + // go through COLUMN OF L + h_rv_[j] = 0.0; + for (int k = j + 1; k <= i; ++k) { + h = h_L_[idxmap(k, j, num_vecs_ + 1)]; + h_rv_[j] += H[idxmap(i, k, num_vecs_ + 1)] * h; + } + } + + // and do one more tri solve with L^T: h_aux = (I-L)^{-1}h_rv + for (int j = 0; j <= i; ++j) { + h_aux_[j] = h_rv_[j]; + s = 0.0; + for (int k = 0; k < j; ++k) { + s += h_L_[idxmap(j, k, num_vecs_ + 1)] * h_aux_[k]; + } // for k + h_aux_[j] -= s; + } // for j + + // and now subtract that from h_H + for (int j = 0; j <= i; ++j) { + H[idxmap(i, j, num_vecs_ + 1)] -= h_aux_[j]; + } - //this always happen on the GPU - int GramSchmidt::orthogonalize(index_type n, vector::Vector* V, real_type* H, index_type i, std::string memspace) - { - using namespace constants; - - if ((memspace == "cuda") || (memspace == "hip")) { // or hip - - double t; - double s; - - switch (variant_){ - case mgs: - - vec_w_->setData(V->getVectorData(i + 1, memory::DEVICE), memory::DEVICE); - for(int j = 0; j <= i; ++j) { - t = 0.0; - vec_v_->setData( V->getVectorData(j, memory::DEVICE), memory::DEVICE); - t = vector_handler_->dot(vec_v_, vec_w_, memspace); - H[ idxmap(i, j, num_vecs_ + 1) ] = t; - t *= -1.0; - vector_handler_->axpy(&t, vec_v_, vec_w_, memspace); - } - t = 0.0; - t = vector_handler_->dot(vec_w_, vec_w_, memspace); - //set the last entry in Hessenberg matrix - t = sqrt(t); - H[ idxmap(i, i + 1, num_vecs_ + 1) ] = t; - if(fabs(t) > EPSILON) { - t = 1.0/t; - vector_handler_->scal(&t, vec_w_, memspace); - } else { - assert(0 && "Gram-Schmidt failed, vector with ZERO norm\n"); - return -1; - } - break; - case cgs2: - - vec_v_->setData(V->getVectorData(i + 1, memory::DEVICE), memory::DEVICE); - vector_handler_->gemv("T", n, i + 1, &ONE, &ZERO, V, vec_v_, vec_Hcolumn_, memspace); - // V(:,i+1) = V(:, i+1) - V(:,1:i)*Hcol - vector_handler_->gemv("N", n, i + 1, &ONE, &MINUSONE, V, vec_Hcolumn_, vec_v_, memspace ); - - // copy H_col to aux, we will need it later - vec_Hcolumn_->setDataUpdated(memory::DEVICE); - vec_Hcolumn_->setCurrentSize(i + 1); - vec_Hcolumn_->deepCopyVectorData(h_aux_, 0, memory::HOST); - - //Hcol = V(:,1:i)^T*V(:,i+1); - vector_handler_->gemv("T", n, i + 1, &ONE, &ZERO, V, vec_v_, vec_Hcolumn_, memspace); - - // V(:,i+1) = V(:, i+1) - V(:,1:i)*Hcol - vector_handler_->gemv("N", n, i + 1, &ONE, &MINUSONE, V, vec_Hcolumn_, vec_v_, memspace ); - - // copy H_col to H - vec_Hcolumn_->setDataUpdated(memory::DEVICE); - vec_Hcolumn_->deepCopyVectorData(&H[ idxmap(i, 0, num_vecs_ + 1)], 0, memory::HOST); - - // add both pieces together (unstable otherwise, careful here!!) - t = 0.0; - for(int j = 0; j <= i; ++j) { - H[ idxmap(i, j, num_vecs_ + 1)] += h_aux_[j]; - } - - t = vector_handler_->dot(vec_v_, vec_v_, memspace); - //set the last entry in Hessenberg matrix - t = sqrt(t); - H[ idxmap(i, i + 1, num_vecs_ + 1) ] = t; - if(fabs(t) > EPSILON) { - t = 1.0/t; - vector_handler_->scal(&t, vec_v_, memspace); - } else { - assert(0 && "Gram-Schmidt failed, vector with ZERO norm\n"); - return -1; - } - return 0; - break; - case mgs_two_synch: - // V[1:i]^T[V[i] w] - vec_v_->setData(V->getVectorData(i, memory::DEVICE), memory::DEVICE); - vec_w_->setData(V->getVectorData(i + 1, memory::DEVICE), memory::DEVICE); - vec_rv_->setCurrentSize(i + 1); - - vector_handler_->massDot2Vec(n, V, i, vec_v_, vec_rv_, memspace); - vec_rv_->setDataUpdated(memory::DEVICE); - vec_rv_->copyData(memory::DEVICE, memory::HOST); - - vec_rv_->deepCopyVectorData(&h_L_[idxmap(i, 0, num_vecs_ + 1)], 0, memory::HOST); - h_rv_ = vec_rv_->getVectorData(1, memory::HOST); - - for(int j=0; j<=i; ++j) { - H[ idxmap(i, j, num_vecs_ + 1) ] = 0.0; - } - // triangular solve - for(int j = 0; j <= i; ++j) { - H[ idxmap(i, j, num_vecs_ + 1) ] = h_rv_[j]; - s = 0.0; - for(int k = 0; k < j; ++k) { - s += h_L_[ idxmap(j, k, num_vecs_ + 1) ] * H[ idxmap(i, k, num_vecs_ + 1) ]; - } // for k - H[ idxmap(i, j, num_vecs_ + 1) ] -= s; - } // for j - vec_Hcolumn_->setCurrentSize(i + 1); - vec_Hcolumn_->update(&H[ idxmap(i, 0, num_vecs_ + 1)], memory::HOST, memory::DEVICE); - vector_handler_->massAxpy(n, vec_Hcolumn_, i, V, vec_w_, memspace); - - // normalize (second synch) - t = vector_handler_->dot(vec_w_, vec_w_, memspace); - //set the last entry in Hessenberg matrix - t = sqrt(t); - H[ idxmap(i, i + 1, num_vecs_ + 1)] = t; - if(fabs(t) > EPSILON) { - t = 1.0 / t; - vector_handler_->scal(&t, vec_w_, memspace); - } else { - assert(0 && "Iterative refinement failed, Krylov vector with ZERO norm\n"); - return -1; - } - return 0; - break; - case mgs_pm: - vec_v_->setData(V->getVectorData(i, memory::DEVICE), memory::DEVICE); - vec_w_->setData(V->getVectorData(i + 1, memory::DEVICE), memory::DEVICE); - vec_rv_->setCurrentSize(i + 1); - - vector_handler_->massDot2Vec(n, V, i, vec_v_, vec_rv_, memspace); - vec_rv_->setDataUpdated(memory::DEVICE); - vec_rv_->copyData(memory::DEVICE, memory::HOST); - - vec_rv_->deepCopyVectorData(&h_L_[idxmap(i, 0, num_vecs_ + 1)], 0, memory::HOST); - h_rv_ = vec_rv_->getVectorData(1, memory::HOST); - - for(int j = 0; j <= i; ++j) { - H[ idxmap(i, j, num_vecs_ + 1) ] = 0.0; - } - - //triangular solve - for(int j = 0; j <= i; ++j) { - H[ idxmap(i, j, num_vecs_ + 1) ] = h_rv_[j]; - s = 0.0; - for(int k = 0; k < j; ++k) { - s += h_L_[ idxmap(j, k, num_vecs_ + 1) ] * H[ idxmap(i, k, num_vecs_ + 1) ]; - } // for k - H[ idxmap(i, j, num_vecs_ + 1) ] -= s; - } // for j - - // now compute h_rv = L^T h_H - double h; - for(int j = 0; j <= i; ++j) { - // go through COLUMN OF L - h_rv_[j] = 0.0; - for(int k = j + 1; k <= i; ++k) { - h = h_L_[ idxmap(k, j, num_vecs_ + 1)]; - h_rv_[j] += H[ idxmap(i, k, num_vecs_ + 1) ] * h; - } - } - - // and do one more tri solve with L^T: h_aux = (I-L)^{-1}h_rv - for(int j = 0; j <= i; ++j) { - h_aux_[j] = h_rv_[j]; - s = 0.0; - for(int k = 0; k < j; ++k) { - s += h_L_[ idxmap(j, k, num_vecs_ + 1) ] * h_aux_[k]; - } // for k - h_aux_[j] -= s; - } // for j - - // and now subtract that from h_H - for(int j=0; j<=i; ++j) { - H[ idxmap(i, j, num_vecs_ + 1) ] -= h_aux_[j]; - } - - vec_Hcolumn_->setCurrentSize(i + 1); - vec_Hcolumn_->update(&H[ idxmap(i, 0, num_vecs_ + 1)], memory::HOST, memory::DEVICE); - - vector_handler_->massAxpy(n, vec_Hcolumn_, i, V, vec_w_, memspace); - // normalize (second synch) - t = vector_handler_->dot(vec_w_, vec_w_, memspace); - //set the last entry in Hessenberg matrix - t = sqrt(t); - H[ idxmap(i, i + 1, num_vecs_ + 1) ] = t; - if(fabs(t) > EPSILON) { - t = 1.0 / t; - vector_handler_->scal(&t, vec_w_, memspace); - } else { - assert(0 && "Iterative refinement failed, Krylov vector with ZERO norm\n"); - return -1; - } - return 0; - break; - - default: - assert(0 && "Iterative refinement failed, wrong orthogonalization.\n"); - return -1; - break; - - }//switch - } else { - out::error() << "Not implemented (yet)" << std::endl; + vec_Hcolumn_->setCurrentSize(i + 1); + vec_Hcolumn_->update(&H[idxmap(i, 0, num_vecs_ + 1)], memory::HOST, memory::DEVICE); + + vector_handler_->massAxpy(n, vec_Hcolumn_, i, V, vec_w_, memspace); + // normalize (second synch) + t = vector_handler_->dot(vec_w_, vec_w_, memspace); + // set the last entry in Hessenberg matrix + t = sqrt(t); + H[idxmap(i, i + 1, num_vecs_ + 1)] = t; + if (fabs(t) > EPSILON) { + t = 1.0 / t; + vector_handler_->scal(&t, vec_w_, memspace); + } else { + assert(0 && "Iterative refinement failed, Krylov vector with ZERO norm\n"); + return -1; + } + return 0; + break; + + default: + assert(0 && "Iterative refinement failed, wrong orthogonalization.\n"); return -1; - } - return 0; - }//orthogonalize -} + break; + + } // switch + } else { + out::error() << "Not implemented (yet)" << std::endl; + return -1; + } + return 0; +} // orthogonalize +} // namespace ReSolve diff --git a/resolve/GramSchmidt.hpp b/resolve/GramSchmidt.hpp index 7d7b93bef..1dfcdb988 100644 --- a/resolve/GramSchmidt.hpp +++ b/resolve/GramSchmidt.hpp @@ -1,47 +1,43 @@ #pragma once #include "Common.hpp" -#include -#include #include -namespace ReSolve +#include +#include +namespace ReSolve { - class GramSchmidt - { - using vector_type = vector::Vector; - public: - enum GSVariant { mgs = 0, - cgs2 = 1, - mgs_two_synch = 2, - mgs_pm = 3, - cgs1 = 4 }; - - GramSchmidt(); - GramSchmidt(VectorHandler* vh, GSVariant variant); - ~GramSchmidt(); - int setVariant(GramSchmidt::GSVariant variant); - GSVariant getVariant(); - real_type* getL(); //only for low synch, returns null ptr otherwise - - int setup(index_type n, index_type restart); - int orthogonalize(index_type n, vector_type* V, real_type* H, index_type i, std::string memspace); - bool isSetupComplete(); - - private: - - GSVariant variant_; - bool setup_complete_; //to avoid double allocations and stuff - - index_type num_vecs_; //the same as restart - vector_type* vec_rv_{nullptr}; - vector_type* vec_Hcolumn_{nullptr}; - - real_type* h_L_{nullptr}; - real_type* h_rv_{nullptr}; - real_type* h_aux_{nullptr}; - VectorHandler* vector_handler_{nullptr}; - - vector_type* vec_v_{nullptr}; // aux variable - vector_type* vec_w_{nullptr}; // aux variable - }; - -}//namespace +class GramSchmidt +{ + using vector_type = vector::Vector; + + public: + enum GSVariant { mgs = 0, cgs2 = 1, mgs_two_synch = 2, mgs_pm = 3, cgs1 = 4 }; + + GramSchmidt(); + GramSchmidt(VectorHandler *vh, GSVariant variant); + ~GramSchmidt(); + int setVariant(GramSchmidt::GSVariant variant); + GSVariant getVariant(); + real_type *getL(); // only for low synch, returns null ptr otherwise + + int setup(index_type n, index_type restart); + int orthogonalize(index_type n, vector_type *V, real_type *H, index_type i, std::string memspace); + bool isSetupComplete(); + + private: + GSVariant variant_; + bool setup_complete_; // to avoid double allocations and stuff + + index_type num_vecs_; // the same as restart + vector_type *vec_rv_{nullptr}; + vector_type *vec_Hcolumn_{nullptr}; + + real_type *h_L_{nullptr}; + real_type *h_rv_{nullptr}; + real_type *h_aux_{nullptr}; + VectorHandler *vector_handler_{nullptr}; + + vector_type *vec_v_{nullptr}; // aux variable + vector_type *vec_w_{nullptr}; // aux variable +}; + +} // namespace ReSolve diff --git a/resolve/LinSolver.cpp b/resolve/LinSolver.cpp index 5682ec40e..27cac7767 100644 --- a/resolve/LinSolver.cpp +++ b/resolve/LinSolver.cpp @@ -1,113 +1,78 @@ -#include #include "LinSolver.hpp" +#include + +namespace ReSolve +{ +LinSolver::LinSolver() {} + +LinSolver::~LinSolver() +{ + // destroy the matrix and hadlers +} + +real_type LinSolver::evaluateResidual() +{ + // to be implemented + return 1.0; +} + +LinSolverDirect::LinSolverDirect() +{ + L_ = nullptr; + U_ = nullptr; + P_ = nullptr; + Q_ = nullptr; + factors_extracted_ = false; +} + +LinSolverDirect::~LinSolverDirect() +{ + delete L_; + delete U_; + delete[] P_; + delete[] Q_; +} + +int LinSolverDirect::setup(matrix::Sparse *A, matrix::Sparse * /* L */, matrix::Sparse * /* U */, index_type * /* P */, index_type * /* Q */, + vector_type * /* rhs */) +{ + this->A_ = A; + return 0; +} +int LinSolverDirect::analyze() { return 0; } // the same as symbolic factorization -namespace ReSolve +int LinSolverDirect::factorize() { - LinSolver::LinSolver() - { - } - - LinSolver::~LinSolver() - { - //destroy the matrix and hadlers - } - - real_type LinSolver::evaluateResidual() - { - //to be implemented - return 1.0; - } - - LinSolverDirect::LinSolverDirect() - { - L_ = nullptr; - U_ = nullptr; - P_ = nullptr; - Q_ = nullptr; - factors_extracted_ = false; - } - - LinSolverDirect::~LinSolverDirect() - { - delete L_; - delete U_; - delete [] P_; - delete [] Q_; - } - - int LinSolverDirect::setup(matrix::Sparse* A, - matrix::Sparse* /* L */, - matrix::Sparse* /* U */, - index_type* /* P */, - index_type* /* Q */, - vector_type* /* rhs */) - { - this->A_ = A; - return 0; - } - - int LinSolverDirect::analyze() - { - return 0; - } //the same as symbolic factorization - - int LinSolverDirect::factorize() - { - factors_extracted_ = false; - return 0; - } - - int LinSolverDirect::refactorize() - { - factors_extracted_ = false; - return 0; - } - - int LinSolverDirect::solve(vector_type* /* rhs */, vector_type* /* x */) - { - return 0; - } - - matrix::Sparse* LinSolverDirect::getLFactor() - { - return nullptr; - } - - matrix::Sparse* LinSolverDirect::getUFactor() - { - return nullptr; - } - - index_type* LinSolverDirect::getPOrdering() - { - return nullptr; - } - - index_type* LinSolverDirect::getQOrdering() - { - return nullptr; - } - - LinSolverIterative::LinSolverIterative() - { - } - - LinSolverIterative::~LinSolverIterative() - { - } - - int LinSolverIterative::setup(matrix::Sparse* A) - { - this->A_ = A; - return 0; - } - - int LinSolverIterative::solve(vector_type* /* rhs */, vector_type* /* init_guess */) - { - return 0; - } + factors_extracted_ = false; + return 0; } +int LinSolverDirect::refactorize() +{ + factors_extracted_ = false; + return 0; +} + +int LinSolverDirect::solve(vector_type * /* rhs */, vector_type * /* x */) { return 0; } + +matrix::Sparse *LinSolverDirect::getLFactor() { return nullptr; } +matrix::Sparse *LinSolverDirect::getUFactor() { return nullptr; } + +index_type *LinSolverDirect::getPOrdering() { return nullptr; } + +index_type *LinSolverDirect::getQOrdering() { return nullptr; } + +LinSolverIterative::LinSolverIterative() {} + +LinSolverIterative::~LinSolverIterative() {} + +int LinSolverIterative::setup(matrix::Sparse *A) +{ + this->A_ = A; + return 0; +} +int LinSolverIterative::solve(vector_type * /* rhs */, vector_type * /* init_guess */) { return 0; } +} // namespace ReSolve diff --git a/resolve/LinSolver.hpp b/resolve/LinSolver.hpp index a34aeba0e..f48664495 100644 --- a/resolve/LinSolver.hpp +++ b/resolve/LinSolver.hpp @@ -1,85 +1,80 @@ #pragma once -#include #include "Common.hpp" +#include + +namespace ReSolve +{ +// Forward declaration of vector::Vector class +namespace vector +{ +class Vector; +} + +// Forward declaration of VectorHandler class +class VectorHandler; -namespace ReSolve +// Forward declaration of matrix::Sparse class +namespace matrix { - // Forward declaration of vector::Vector class - namespace vector - { - class Vector; - } +class Sparse; +} - // Forward declaration of VectorHandler class - class VectorHandler; +// Forward declaration of MatrixHandler class +class MatrixHandler; - // Forward declaration of matrix::Sparse class - namespace matrix - { - class Sparse; - } +class LinSolver +{ + protected: + using vector_type = vector::Vector; - // Forward declaration of MatrixHandler class - class MatrixHandler; + public: + LinSolver(); + virtual ~LinSolver(); - class LinSolver - { - protected: - using vector_type = vector::Vector; + real_type evaluateResidual(); - public: - LinSolver(); - virtual ~LinSolver(); + protected: + matrix::Sparse *A_; + real_type *rhs_; + real_type *sol_; - real_type evaluateResidual(); - - protected: - matrix::Sparse* A_; - real_type* rhs_; - real_type* sol_; + MatrixHandler *matrix_handler_; + VectorHandler *vector_handler_; +}; - MatrixHandler* matrix_handler_; - VectorHandler* vector_handler_; - }; +class LinSolverDirect : public LinSolver +{ + public: + LinSolverDirect(); + virtual ~LinSolverDirect(); + // return 0 if successful! + virtual int setup(matrix::Sparse *A, matrix::Sparse *L, matrix::Sparse *U, index_type *P, index_type *Q, vector_type *rhs); - class LinSolverDirect : public LinSolver - { - public: - LinSolverDirect(); - virtual ~LinSolverDirect(); - //return 0 if successful! - virtual int setup(matrix::Sparse* A, - matrix::Sparse* L, - matrix::Sparse* U, - index_type* P, - index_type* Q, - vector_type* rhs); - - virtual int analyze(); //the same as symbolic factorization - virtual int factorize(); - virtual int refactorize(); - virtual int solve(vector_type* rhs, vector_type* x); - - virtual matrix::Sparse* getLFactor(); - virtual matrix::Sparse* getUFactor(); - virtual index_type* getPOrdering(); - virtual index_type* getQOrdering(); - - protected: - matrix::Sparse* L_; - matrix::Sparse* U_; - index_type* P_; - index_type* Q_; - bool factors_extracted_; - }; + virtual int analyze(); // the same as symbolic factorization + virtual int factorize(); + virtual int refactorize(); + virtual int solve(vector_type *rhs, vector_type *x); - class LinSolverIterative : public LinSolver - { - public: - LinSolverIterative(); - ~LinSolverIterative(); - virtual int setup(matrix::Sparse* A); + virtual matrix::Sparse *getLFactor(); + virtual matrix::Sparse *getUFactor(); + virtual index_type *getPOrdering(); + virtual index_type *getQOrdering(); - virtual int solve(vector_type* rhs, vector_type* init_guess); - }; -} + protected: + matrix::Sparse *L_; + matrix::Sparse *U_; + index_type *P_; + index_type *Q_; + bool factors_extracted_; +}; + +class LinSolverIterative : public LinSolver +{ + public: + LinSolverIterative(); + ~LinSolverIterative(); + virtual int setup(matrix::Sparse *A); + + virtual int solve(vector_type *rhs, vector_type *init_guess); +}; +} // namespace ReSolve diff --git a/resolve/LinSolverDirectCuSolverGLU.cpp b/resolve/LinSolverDirectCuSolverGLU.cpp index 65af58124..9e8430b4d 100644 --- a/resolve/LinSolverDirectCuSolverGLU.cpp +++ b/resolve/LinSolverDirectCuSolverGLU.cpp @@ -1,195 +1,172 @@ #include // includes memcpy #include -#include +#include "LinSolverDirectCuSolverGLU.hpp" #include +#include #include -#include "LinSolverDirectCuSolverGLU.hpp" namespace ReSolve { - using vector_type = vector::Vector; +using vector_type = vector::Vector; - LinSolverDirectCuSolverGLU::LinSolverDirectCuSolverGLU(LinAlgWorkspaceCUDA* workspace) - { - this->workspace_ = workspace; - } +LinSolverDirectCuSolverGLU::LinSolverDirectCuSolverGLU(LinAlgWorkspaceCUDA *workspace) { this->workspace_ = workspace; } - LinSolverDirectCuSolverGLU::~LinSolverDirectCuSolverGLU() - { - mem_.deleteOnDevice(glu_buffer_); - cusparseDestroyMatDescr(descr_M_); - cusparseDestroyMatDescr(descr_A_); - cusolverSpDestroyGluInfo(info_M_); - delete M_; - } +LinSolverDirectCuSolverGLU::~LinSolverDirectCuSolverGLU() +{ + mem_.deleteOnDevice(glu_buffer_); + cusparseDestroyMatDescr(descr_M_); + cusparseDestroyMatDescr(descr_A_); + cusolverSpDestroyGluInfo(info_M_); + delete M_; +} - int LinSolverDirectCuSolverGLU::setup(matrix::Sparse* A, - matrix::Sparse* L, - matrix::Sparse* U, - index_type* P, - index_type* Q, - vector_type* /* rhs */) - { - int error_sum = 0; - - LinAlgWorkspaceCUDA* workspaceCUDA = workspace_; - //get the handle - handle_cusolversp_ = workspaceCUDA->getCusolverSpHandle(); - A_ = (matrix::Csr*) A; - index_type n = A_->getNumRows(); - index_type nnz = A_->getNnzExpanded(); - //create combined factor - addFactors(L,U); - - //set up descriptors - cusparseCreateMatDescr(&descr_M_); - cusparseSetMatType(descr_M_, CUSPARSE_MATRIX_TYPE_GENERAL); - cusparseSetMatIndexBase(descr_M_, CUSPARSE_INDEX_BASE_ZERO); - cusolverSpCreateGluInfo(&info_M_); - - cusparseCreateMatDescr(&descr_A_); - cusparseSetMatType(descr_A_, CUSPARSE_MATRIX_TYPE_GENERAL); - cusparseSetMatIndexBase(descr_A_, CUSPARSE_INDEX_BASE_ZERO); - - //set up the GLU - status_cusolver_ = cusolverSpDgluSetup(handle_cusolversp_, - n, - nnz, - descr_A_, - A_->getRowData(memory::HOST), //kRowPtr_, - A_->getColData(memory::HOST), //jCol_, - P, /* base-0 */ - Q, /* base-0 */ - M_->getNnz(), /* nnzM */ - descr_M_, - M_->getRowData(memory::HOST), - M_->getColData(memory::HOST), - info_M_); - error_sum += status_cusolver_; - //NOW the buffer - size_t buffer_size; - status_cusolver_ = cusolverSpDgluBufferSize(handle_cusolversp_, info_M_, &buffer_size); - error_sum += status_cusolver_; - - mem_.allocateBufferOnDevice(&glu_buffer_, buffer_size); - - status_cusolver_ = cusolverSpDgluAnalysis(handle_cusolversp_, info_M_, glu_buffer_); - error_sum += status_cusolver_; - - // reset and refactor so factors are ON THE GPU - - status_cusolver_ = cusolverSpDgluReset(handle_cusolversp_, - n, - /* A is original matrix */ - nnz, - descr_A_, - A_->getValues( memory::DEVICE), //da_, - A_->getRowData(memory::DEVICE), //kRowPtr_, - A_->getColData(memory::DEVICE), //jCol_, - info_M_); - error_sum += status_cusolver_; - - status_cusolver_ = cusolverSpDgluFactor(handle_cusolversp_, info_M_, glu_buffer_); - error_sum += status_cusolver_; - - return error_sum; - } +int LinSolverDirectCuSolverGLU::setup(matrix::Sparse *A, matrix::Sparse *L, matrix::Sparse *U, index_type *P, index_type *Q, vector_type * /* rhs */) +{ + int error_sum = 0; + + LinAlgWorkspaceCUDA *workspaceCUDA = workspace_; + // get the handle + handle_cusolversp_ = workspaceCUDA->getCusolverSpHandle(); + A_ = (matrix::Csr *)A; + index_type n = A_->getNumRows(); + index_type nnz = A_->getNnzExpanded(); + // create combined factor + addFactors(L, U); + + // set up descriptors + cusparseCreateMatDescr(&descr_M_); + cusparseSetMatType(descr_M_, CUSPARSE_MATRIX_TYPE_GENERAL); + cusparseSetMatIndexBase(descr_M_, CUSPARSE_INDEX_BASE_ZERO); + cusolverSpCreateGluInfo(&info_M_); + + cusparseCreateMatDescr(&descr_A_); + cusparseSetMatType(descr_A_, CUSPARSE_MATRIX_TYPE_GENERAL); + cusparseSetMatIndexBase(descr_A_, CUSPARSE_INDEX_BASE_ZERO); + + // set up the GLU + status_cusolver_ = cusolverSpDgluSetup(handle_cusolversp_, n, nnz, descr_A_, + A_->getRowData(memory::HOST), // kRowPtr_, + A_->getColData(memory::HOST), // jCol_, + P, /* base-0 */ + Q, /* base-0 */ + M_->getNnz(), /* nnzM */ + descr_M_, M_->getRowData(memory::HOST), M_->getColData(memory::HOST), info_M_); + error_sum += status_cusolver_; + // NOW the buffer + size_t buffer_size; + status_cusolver_ = cusolverSpDgluBufferSize(handle_cusolversp_, info_M_, &buffer_size); + error_sum += status_cusolver_; + + mem_.allocateBufferOnDevice(&glu_buffer_, buffer_size); + + status_cusolver_ = cusolverSpDgluAnalysis(handle_cusolversp_, info_M_, glu_buffer_); + error_sum += status_cusolver_; + + // reset and refactor so factors are ON THE GPU + + status_cusolver_ = cusolverSpDgluReset(handle_cusolversp_, n, + /* A is original matrix */ + nnz, descr_A_, + A_->getValues(memory::DEVICE), // da_, + A_->getRowData(memory::DEVICE), // kRowPtr_, + A_->getColData(memory::DEVICE), // jCol_, + info_M_); + error_sum += status_cusolver_; + + status_cusolver_ = cusolverSpDgluFactor(handle_cusolversp_, info_M_, glu_buffer_); + error_sum += status_cusolver_; + + return error_sum; +} - void LinSolverDirectCuSolverGLU::addFactors(matrix::Sparse* L, matrix::Sparse* U) - { -// L and U need to be in CSC format - index_type n = L->getNumRows(); - index_type* Lp = L->getColData(memory::HOST); - index_type* Li = L->getRowData(memory::HOST); - index_type* Up = U->getColData(memory::HOST); - index_type* Ui = U->getRowData(memory::HOST); - index_type nnzM = ( L->getNnz() + U->getNnz() - n ); - M_ = new matrix::Csr(n, n, nnzM); - M_->allocateMatrixData(memory::HOST); - index_type* mia = M_->getRowData(memory::HOST); - index_type* mja = M_->getColData(memory::HOST); - index_type row; - for(index_type i = 0; i < n; ++i) { - // go through EACH COLUMN OF L first - for(index_type j = Lp[i]; j < Lp[i + 1]; ++j) { - row = Li[j]; - // BUT dont count diagonal twice, important - if(row != i) { - mia[row + 1]++; - } - } - // then each column of U - for(index_type j = Up[i]; j < Up[i + 1]; ++j) { - row = Ui[j]; +void LinSolverDirectCuSolverGLU::addFactors(matrix::Sparse *L, matrix::Sparse *U) +{ + // L and U need to be in CSC format + index_type n = L->getNumRows(); + index_type *Lp = L->getColData(memory::HOST); + index_type *Li = L->getRowData(memory::HOST); + index_type *Up = U->getColData(memory::HOST); + index_type *Ui = U->getRowData(memory::HOST); + index_type nnzM = (L->getNnz() + U->getNnz() - n); + M_ = new matrix::Csr(n, n, nnzM); + M_->allocateMatrixData(memory::HOST); + index_type *mia = M_->getRowData(memory::HOST); + index_type *mja = M_->getColData(memory::HOST); + index_type row; + for (index_type i = 0; i < n; ++i) { + // go through EACH COLUMN OF L first + for (index_type j = Lp[i]; j < Lp[i + 1]; ++j) { + row = Li[j]; + // BUT dont count diagonal twice, important + if (row != i) { mia[row + 1]++; } } - // then organize mia_; - mia[0] = 0; - for(index_type i = 1; i < n + 1; i++) { - mia[i] += mia[i - 1]; + // then each column of U + for (index_type j = Up[i]; j < Up[i + 1]; ++j) { + row = Ui[j]; + mia[row + 1]++; } + } + // then organize mia_; + mia[0] = 0; + for (index_type i = 1; i < n + 1; i++) { + mia[i] += mia[i - 1]; + } - std::vector Mshifts(n, 0); - for(index_type i = 0; i < n; ++i) { - // go through EACH COLUMN OF L first - for(int j = Lp[i]; j < Lp[i + 1]; ++j) { - row = Li[j]; - if(row != i) { - // place (row, i) where it belongs! - mja[mia[row] + Mshifts[row]] = i; - Mshifts[row]++; - } - } - // each column of U next - for(index_type j = Up[i]; j < Up[i + 1]; ++j) { - row = Ui[j]; + std::vector Mshifts(n, 0); + for (index_type i = 0; i < n; ++i) { + // go through EACH COLUMN OF L first + for (int j = Lp[i]; j < Lp[i + 1]; ++j) { + row = Li[j]; + if (row != i) { + // place (row, i) where it belongs! mja[mia[row] + Mshifts[row]] = i; Mshifts[row]++; } } - //Mshifts.~vector(); + // each column of U next + for (index_type j = Up[i]; j < Up[i + 1]; ++j) { + row = Ui[j]; + mja[mia[row] + Mshifts[row]] = i; + Mshifts[row]++; + } } + // Mshifts.~vector(); +} - int LinSolverDirectCuSolverGLU::refactorize() - { - int error_sum = 0; - status_cusolver_ = cusolverSpDgluReset(handle_cusolversp_, - A_->getNumRows(), - /* A is original matrix */ - A_->getNnzExpanded(), - descr_A_, - A_->getValues( memory::DEVICE), //da_, - A_->getRowData(memory::DEVICE), //kRowPtr_, - A_->getColData(memory::DEVICE), //jCol_, - info_M_); - error_sum += status_cusolver_; - - status_cusolver_ = cusolverSpDgluFactor(handle_cusolversp_, info_M_, glu_buffer_); - error_sum += status_cusolver_; - - return error_sum; - } +int LinSolverDirectCuSolverGLU::refactorize() +{ + int error_sum = 0; + status_cusolver_ = cusolverSpDgluReset(handle_cusolversp_, A_->getNumRows(), + /* A is original matrix */ + A_->getNnzExpanded(), descr_A_, + A_->getValues(memory::DEVICE), // da_, + A_->getRowData(memory::DEVICE), // kRowPtr_, + A_->getColData(memory::DEVICE), // jCol_, + info_M_); + error_sum += status_cusolver_; + + status_cusolver_ = cusolverSpDgluFactor(handle_cusolversp_, info_M_, glu_buffer_); + error_sum += status_cusolver_; + + return error_sum; +} - int LinSolverDirectCuSolverGLU::solve(vector_type* rhs, vector_type* x) - { - - status_cusolver_ = cusolverSpDgluSolve(handle_cusolversp_, - A_->getNumRows(), - /* A is original matrix */ - A_->getNnz(), - descr_A_, - A_->getValues( memory::DEVICE), //da_, - A_->getRowData(memory::DEVICE), //kRowPtr_, - A_->getColData(memory::DEVICE), //jCol_, - rhs->getData(memory::DEVICE),/* right hand side */ - x->getData(memory::DEVICE),/* left hand side */ - &ite_refine_succ_, - &r_nrminf_, - info_M_, - glu_buffer_); - return status_cusolver_; - } +int LinSolverDirectCuSolverGLU::solve(vector_type *rhs, vector_type *x) +{ + status_cusolver_ = cusolverSpDgluSolve(handle_cusolversp_, A_->getNumRows(), + /* A is original matrix */ + A_->getNnz(), descr_A_, + A_->getValues(memory::DEVICE), // da_, + A_->getRowData(memory::DEVICE), // kRowPtr_, + A_->getColData(memory::DEVICE), // jCol_, + rhs->getData(memory::DEVICE), /* right hand side */ + x->getData(memory::DEVICE), /* left hand side */ + &ite_refine_succ_, &r_nrminf_, info_M_, glu_buffer_); + return status_cusolver_; } + +} // namespace ReSolve diff --git a/resolve/LinSolverDirectCuSolverGLU.hpp b/resolve/LinSolverDirectCuSolverGLU.hpp index c6f5416c7..f303064d1 100644 --- a/resolve/LinSolverDirectCuSolverGLU.hpp +++ b/resolve/LinSolverDirectCuSolverGLU.hpp @@ -3,56 +3,51 @@ #include "LinSolver.hpp" #include "cusolver_defs.hpp" #include -namespace ReSolve +namespace ReSolve { - // Forward declaration of vector::Vector class - namespace vector - { - class Vector; - } - - // Forward declaration of matrix::Sparse class - namespace matrix - { - class Sparse; - } - - // Forward declaration of ReSolve handlers workspace - class LinAlgWorkspaceCUDA; - - class LinSolverDirectCuSolverGLU : public LinSolverDirect - { - using vector_type = vector::Vector; - - public: - LinSolverDirectCuSolverGLU(LinAlgWorkspaceCUDA* workspace); - ~LinSolverDirectCuSolverGLU(); - - int refactorize(); - int solve(vector_type* rhs, vector_type* x); - - int setup(matrix::Sparse* A, - matrix::Sparse* L, - matrix::Sparse* U, - index_type* P, - index_type* Q, - vector_type* rhs = nullptr); - - private: - void addFactors(matrix::Sparse* L, matrix::Sparse* U); //create L+U from sepeate L, U factors - matrix::Sparse* M_;//the matrix that contains added factors - //note: we need cuSolver handle, we can copy it from the workspace to avoid double allocation - cusparseMatDescr_t descr_M_; //this is NOT sparse matrix descriptor - cusparseMatDescr_t descr_A_; //this is NOT sparse matrix descriptor - LinAlgWorkspaceCUDA* workspace_;// so we can copy cusparse handle - cusolverSpHandle_t handle_cusolversp_; - cusolverStatus_t status_cusolver_; - cusparseStatus_t status_cusparse_; - csrgluInfo_t info_M_; - void* glu_buffer_; - double r_nrminf_; - int ite_refine_succ_; - - MemoryHandler mem_; ///< Device memory manager object - }; +// Forward declaration of vector::Vector class +namespace vector +{ +class Vector; +} + +// Forward declaration of matrix::Sparse class +namespace matrix +{ +class Sparse; } + +// Forward declaration of ReSolve handlers workspace +class LinAlgWorkspaceCUDA; + +class LinSolverDirectCuSolverGLU : public LinSolverDirect +{ + using vector_type = vector::Vector; + + public: + LinSolverDirectCuSolverGLU(LinAlgWorkspaceCUDA *workspace); + ~LinSolverDirectCuSolverGLU(); + + int refactorize(); + int solve(vector_type *rhs, vector_type *x); + + int setup(matrix::Sparse *A, matrix::Sparse *L, matrix::Sparse *U, index_type *P, index_type *Q, vector_type *rhs = nullptr); + + private: + void addFactors(matrix::Sparse *L, matrix::Sparse *U); // create L+U from sepeate L, U factors + matrix::Sparse *M_; // the matrix that contains added factors + // note: we need cuSolver handle, we can copy it from the workspace to avoid double allocation + cusparseMatDescr_t descr_M_; // this is NOT sparse matrix descriptor + cusparseMatDescr_t descr_A_; // this is NOT sparse matrix descriptor + LinAlgWorkspaceCUDA *workspace_; // so we can copy cusparse handle + cusolverSpHandle_t handle_cusolversp_; + cusolverStatus_t status_cusolver_; + cusparseStatus_t status_cusparse_; + csrgluInfo_t info_M_; + void *glu_buffer_; + double r_nrminf_; + int ite_refine_succ_; + + MemoryHandler mem_; ///< Device memory manager object +}; +} // namespace ReSolve diff --git a/resolve/LinSolverDirectCuSolverRf.cpp b/resolve/LinSolverDirectCuSolverRf.cpp index 905a0e6ee..56c098147 100644 --- a/resolve/LinSolverDirectCuSolverRf.cpp +++ b/resolve/LinSolverDirectCuSolverRf.cpp @@ -1,133 +1,96 @@ -#include -#include #include "LinSolverDirectCuSolverRf.hpp" +#include +#include -namespace ReSolve +namespace ReSolve { - LinSolverDirectCuSolverRf::LinSolverDirectCuSolverRf() - { - cusolverRfCreate(&handle_cusolverrf_); - } - - LinSolverDirectCuSolverRf::~LinSolverDirectCuSolverRf() - { - cusolverRfDestroy(handle_cusolverrf_); - mem_.deleteOnDevice(d_P_); - mem_.deleteOnDevice(d_Q_); - mem_.deleteOnDevice(d_T_); - } - - int LinSolverDirectCuSolverRf::setup(matrix::Sparse* A, - matrix::Sparse* L, - matrix::Sparse* U, - index_type* P, - index_type* Q, - vector_type* /* rhs */) - { - //remember - P and Q are generally CPU variables - int error_sum = 0; - this->A_ = (matrix::Csr*) A; - index_type n = A_->getNumRows(); - mem_.allocateArrayOnDevice(&d_P_, n); - mem_.allocateArrayOnDevice(&d_Q_, n); - mem_.allocateArrayOnDevice(&d_T_, n); - - mem_.copyArrayHostToDevice(d_P_, P, n); - mem_.copyArrayHostToDevice(d_Q_, Q, n); - - - status_cusolverrf_ = cusolverRfSetResetValuesFastMode(handle_cusolverrf_, CUSOLVERRF_RESET_VALUES_FAST_MODE_ON); - error_sum += status_cusolverrf_; - status_cusolverrf_ = cusolverRfSetupDevice(n, - A_->getNnzExpanded(), - A_->getRowData(memory::DEVICE), //dia_, - A_->getColData(memory::DEVICE), //dja_, - A_->getValues( memory::DEVICE), //da_, - L->getNnz(), - L->getRowData(memory::DEVICE), - L->getColData(memory::DEVICE), - L->getValues( memory::DEVICE), - U->getNnz(), - U->getRowData(memory::DEVICE), - U->getColData(memory::DEVICE), - U->getValues( memory::DEVICE), - d_P_, - d_Q_, - handle_cusolverrf_); - error_sum += status_cusolverrf_; - - mem_.deviceSynchronize(); - status_cusolverrf_ = cusolverRfAnalyze(handle_cusolverrf_); - error_sum += status_cusolverrf_; - - this->A_ = A; - //default +LinSolverDirectCuSolverRf::LinSolverDirectCuSolverRf() { cusolverRfCreate(&handle_cusolverrf_); } - const cusolverRfFactorization_t fact_alg = - CUSOLVERRF_FACTORIZATION_ALG0; // 0 - default, 1 or 2 - const cusolverRfTriangularSolve_t solve_alg = - CUSOLVERRF_TRIANGULAR_SOLVE_ALG1; // 1- default, 2 or 3 // 1 causes error - this->setAlgorithms(fact_alg, solve_alg); - return error_sum; - } - - void LinSolverDirectCuSolverRf::setAlgorithms(cusolverRfFactorization_t fact_alg, cusolverRfTriangularSolve_t solve_alg) - { - cusolverRfSetAlgs(handle_cusolverrf_, fact_alg, solve_alg); - } - - int LinSolverDirectCuSolverRf::refactorize() - { - int error_sum = 0; - status_cusolverrf_ = cusolverRfResetValues(A_->getNumRows(), - A_->getNnzExpanded(), - A_->getRowData(memory::DEVICE), //dia_, - A_->getColData(memory::DEVICE), //dja_, - A_->getValues( memory::DEVICE), //da_, - d_P_, - d_Q_, - handle_cusolverrf_); - error_sum += status_cusolverrf_; - - mem_.deviceSynchronize(); - status_cusolverrf_ = cusolverRfRefactor(handle_cusolverrf_); - error_sum += status_cusolverrf_; +LinSolverDirectCuSolverRf::~LinSolverDirectCuSolverRf() +{ + cusolverRfDestroy(handle_cusolverrf_); + mem_.deleteOnDevice(d_P_); + mem_.deleteOnDevice(d_Q_); + mem_.deleteOnDevice(d_T_); +} - return error_sum; - } +int LinSolverDirectCuSolverRf::setup(matrix::Sparse *A, matrix::Sparse *L, matrix::Sparse *U, index_type *P, index_type *Q, vector_type * /* rhs */) +{ + // remember - P and Q are generally CPU variables + int error_sum = 0; + this->A_ = (matrix::Csr *)A; + index_type n = A_->getNumRows(); + mem_.allocateArrayOnDevice(&d_P_, n); + mem_.allocateArrayOnDevice(&d_Q_, n); + mem_.allocateArrayOnDevice(&d_T_, n); + + mem_.copyArrayHostToDevice(d_P_, P, n); + mem_.copyArrayHostToDevice(d_Q_, Q, n); + + status_cusolverrf_ = cusolverRfSetResetValuesFastMode(handle_cusolverrf_, CUSOLVERRF_RESET_VALUES_FAST_MODE_ON); + error_sum += status_cusolverrf_; + status_cusolverrf_ = cusolverRfSetupDevice(n, A_->getNnzExpanded(), + A_->getRowData(memory::DEVICE), // dia_, + A_->getColData(memory::DEVICE), // dja_, + A_->getValues(memory::DEVICE), // da_, + L->getNnz(), L->getRowData(memory::DEVICE), L->getColData(memory::DEVICE), L->getValues(memory::DEVICE), + U->getNnz(), U->getRowData(memory::DEVICE), U->getColData(memory::DEVICE), U->getValues(memory::DEVICE), + d_P_, d_Q_, handle_cusolverrf_); + error_sum += status_cusolverrf_; + + mem_.deviceSynchronize(); + status_cusolverrf_ = cusolverRfAnalyze(handle_cusolverrf_); + error_sum += status_cusolverrf_; + + this->A_ = A; + // default + + const cusolverRfFactorization_t fact_alg = CUSOLVERRF_FACTORIZATION_ALG0; // 0 - default, 1 or 2 + const cusolverRfTriangularSolve_t solve_alg = CUSOLVERRF_TRIANGULAR_SOLVE_ALG1; // 1- default, 2 or 3 // 1 causes error + this->setAlgorithms(fact_alg, solve_alg); + return error_sum; +} + +void LinSolverDirectCuSolverRf::setAlgorithms(cusolverRfFactorization_t fact_alg, cusolverRfTriangularSolve_t solve_alg) +{ + cusolverRfSetAlgs(handle_cusolverrf_, fact_alg, solve_alg); +} - // solution is returned in RHS - int LinSolverDirectCuSolverRf::solve(vector_type* rhs) - { - status_cusolverrf_ = cusolverRfSolve(handle_cusolverrf_, - d_P_, - d_Q_, - 1, - d_T_, - A_->getNumRows(), - rhs->getData(memory::DEVICE), - A_->getNumRows()); - return status_cusolverrf_; - } +int LinSolverDirectCuSolverRf::refactorize() +{ + int error_sum = 0; + status_cusolverrf_ = cusolverRfResetValues(A_->getNumRows(), A_->getNnzExpanded(), + A_->getRowData(memory::DEVICE), // dia_, + A_->getColData(memory::DEVICE), // dja_, + A_->getValues(memory::DEVICE), // da_, + d_P_, d_Q_, handle_cusolverrf_); + error_sum += status_cusolverrf_; + + mem_.deviceSynchronize(); + status_cusolverrf_ = cusolverRfRefactor(handle_cusolverrf_); + error_sum += status_cusolverrf_; + + return error_sum; +} + +// solution is returned in RHS +int LinSolverDirectCuSolverRf::solve(vector_type *rhs) +{ + status_cusolverrf_ = cusolverRfSolve(handle_cusolverrf_, d_P_, d_Q_, 1, d_T_, A_->getNumRows(), rhs->getData(memory::DEVICE), A_->getNumRows()); + return status_cusolverrf_; +} - int LinSolverDirectCuSolverRf::solve(vector_type* rhs, vector_type* x) - { - x->update(rhs->getData(memory::DEVICE), memory::DEVICE, memory::DEVICE); - x->setDataUpdated(memory::DEVICE); - status_cusolverrf_ = cusolverRfSolve(handle_cusolverrf_, - d_P_, - d_Q_, - 1, - d_T_, - A_->getNumRows(), - x->getData(memory::DEVICE), - A_->getNumRows()); - return status_cusolverrf_; - } +int LinSolverDirectCuSolverRf::solve(vector_type *rhs, vector_type *x) +{ + x->update(rhs->getData(memory::DEVICE), memory::DEVICE, memory::DEVICE); + x->setDataUpdated(memory::DEVICE); + status_cusolverrf_ = cusolverRfSolve(handle_cusolverrf_, d_P_, d_Q_, 1, d_T_, A_->getNumRows(), x->getData(memory::DEVICE), A_->getNumRows()); + return status_cusolverrf_; +} - int LinSolverDirectCuSolverRf::setNumericalProperties(double nzero, double nboost) - { - status_cusolverrf_ = cusolverRfSetNumericProperties(handle_cusolverrf_, nzero, nboost); - return status_cusolverrf_; - } -}// namespace resolve +int LinSolverDirectCuSolverRf::setNumericalProperties(double nzero, double nboost) +{ + status_cusolverrf_ = cusolverRfSetNumericProperties(handle_cusolverrf_, nzero, nboost); + return status_cusolverrf_; +} +} // namespace ReSolve diff --git a/resolve/LinSolverDirectCuSolverRf.hpp b/resolve/LinSolverDirectCuSolverRf.hpp index 77e8b94f6..c2de1330b 100644 --- a/resolve/LinSolverDirectCuSolverRf.hpp +++ b/resolve/LinSolverDirectCuSolverRf.hpp @@ -4,49 +4,44 @@ #include "cusolverRf.h" #include -namespace ReSolve +namespace ReSolve { - // Forward declaration of vector::Vector class - namespace vector - { - class Vector; - } - - // Forward declaration of matrix::Sparse class - namespace matrix - { - class Sparse; - } - - class LinSolverDirectCuSolverRf : public LinSolverDirect - { - using vector_type = vector::Vector; - - public: - LinSolverDirectCuSolverRf(); - ~LinSolverDirectCuSolverRf(); - - int setup(matrix::Sparse* A, - matrix::Sparse* L, - matrix::Sparse* U, - index_type* P, - index_type* Q, - vector_type* rhs = nullptr); - - void setAlgorithms(cusolverRfFactorization_t fact_alg, cusolverRfTriangularSolve_t solve_alg); - - int refactorize(); - int solve(vector_type* rhs, vector_type* x); - int solve(vector_type* rhs);// the solutuon is returned IN RHS (rhs is overwritten) - int setNumericalProperties(double nzero, double nboost);//these two NEED TO BE DOUBLE - private: - cusolverRfHandle_t handle_cusolverrf_; - cusolverStatus_t status_cusolverrf_; - - index_type* d_P_; - index_type* d_Q_; - real_type* d_T_; - - MemoryHandler mem_; ///< Device memory manager object - }; +// Forward declaration of vector::Vector class +namespace vector +{ +class Vector; +} + +// Forward declaration of matrix::Sparse class +namespace matrix +{ +class Sparse; } + +class LinSolverDirectCuSolverRf : public LinSolverDirect +{ + using vector_type = vector::Vector; + + public: + LinSolverDirectCuSolverRf(); + ~LinSolverDirectCuSolverRf(); + + int setup(matrix::Sparse *A, matrix::Sparse *L, matrix::Sparse *U, index_type *P, index_type *Q, vector_type *rhs = nullptr); + + void setAlgorithms(cusolverRfFactorization_t fact_alg, cusolverRfTriangularSolve_t solve_alg); + + int refactorize(); + int solve(vector_type *rhs, vector_type *x); + int solve(vector_type *rhs); // the solutuon is returned IN RHS (rhs is overwritten) + int setNumericalProperties(double nzero, double nboost); // these two NEED TO BE DOUBLE + private: + cusolverRfHandle_t handle_cusolverrf_; + cusolverStatus_t status_cusolverrf_; + + index_type *d_P_; + index_type *d_Q_; + real_type *d_T_; + + MemoryHandler mem_; ///< Device memory manager object +}; +} // namespace ReSolve diff --git a/resolve/LinSolverDirectKLU.cpp b/resolve/LinSolverDirectKLU.cpp index c30d92d40..5f927290d 100644 --- a/resolve/LinSolverDirectKLU.cpp +++ b/resolve/LinSolverDirectKLU.cpp @@ -1,233 +1,203 @@ +#include "LinSolverDirectKLU.hpp" #include // includes memcpy -#include #include -#include "LinSolverDirectKLU.hpp" +#include -namespace ReSolve +namespace ReSolve +{ +LinSolverDirectKLU::LinSolverDirectKLU() { - LinSolverDirectKLU::LinSolverDirectKLU() - { - Symbolic_ = nullptr; - Numeric_ = nullptr; + Symbolic_ = nullptr; + Numeric_ = nullptr; + L_ = nullptr; + U_ = nullptr; + + klu_defaults(&Common_); +} + +LinSolverDirectKLU::~LinSolverDirectKLU() +{ + klu_free_symbolic(&Symbolic_, &Common_); + klu_free_numeric(&Numeric_, &Common_); +} + +int LinSolverDirectKLU::setup(matrix::Sparse *A, matrix::Sparse * /* L */, matrix::Sparse * /* U */, index_type * /* P */, index_type * /* Q */, + vector_type * /* rhs */) +{ + this->A_ = A; + return 0; +} + +void LinSolverDirectKLU::setupParameters(int ordering, double KLU_threshold, bool halt_if_singular) +{ + Common_.btf = 0; + Common_.ordering = ordering; + Common_.tol = KLU_threshold; + Common_.scale = -1; + Common_.halt_if_singular = halt_if_singular; +} + +int LinSolverDirectKLU::analyze() +{ + // in case we called this function AGAIN + if (Symbolic_ != nullptr) { + klu_free_symbolic(&Symbolic_, &Common_); + } + + Symbolic_ = klu_analyze(A_->getNumRows(), A_->getRowData(memory::HOST), A_->getColData(memory::HOST), &Common_); + + factors_extracted_ = false; + if (L_ != nullptr) { + delete L_; L_ = nullptr; + } + + if (U_ != nullptr) { + delete U_; U_ = nullptr; + } - klu_defaults(&Common_) ; - } + if (Symbolic_ == nullptr) { + printf("Symbolic_ factorization crashed withCommon_.status = %d \n", Common_.status); + return 1; + } + return 0; +} - LinSolverDirectKLU::~LinSolverDirectKLU() - { - klu_free_symbolic(&Symbolic_, &Common_); +int LinSolverDirectKLU::factorize() +{ + if (Numeric_ != nullptr) { klu_free_numeric(&Numeric_, &Common_); } - int LinSolverDirectKLU::setup(matrix::Sparse* A, - matrix::Sparse* /* L */, - matrix::Sparse* /* U */, - index_type* /* P */, - index_type* /* Q */, - vector_type* /* rhs */) - { - this->A_ = A; - return 0; - } - - void LinSolverDirectKLU::setupParameters(int ordering, double KLU_threshold, bool halt_if_singular) - { - Common_.btf = 0; - Common_.ordering = ordering; - Common_.tol = KLU_threshold; - Common_.scale = -1; - Common_.halt_if_singular = halt_if_singular; - } - - int LinSolverDirectKLU::analyze() - { - // in case we called this function AGAIN - if (Symbolic_ != nullptr) { - klu_free_symbolic(&Symbolic_, &Common_); - } - - Symbolic_ = klu_analyze(A_->getNumRows(), A_->getRowData(memory::HOST), A_->getColData(memory::HOST), &Common_) ; - - factors_extracted_ = false; - if (L_ != nullptr) { - delete L_; - L_ = nullptr; - } - - if (U_ != nullptr) { - delete U_; - U_ = nullptr; - } - - if (Symbolic_ == nullptr){ - printf("Symbolic_ factorization crashed withCommon_.status = %d \n", Common_.status); - return 1; - } - return 0; - } - - int LinSolverDirectKLU::factorize() - { - if (Numeric_ != nullptr) { - klu_free_numeric(&Numeric_, &Common_); - } - - Numeric_ = klu_factor(A_->getRowData(memory::HOST), A_->getColData(memory::HOST), A_->getValues(memory::HOST), Symbolic_, &Common_); - - factors_extracted_ = false; - - if (L_ != nullptr) { - delete L_; - L_ = nullptr; - } - - if (U_ != nullptr) { - delete U_; - U_ = nullptr; - } - - if (Numeric_ == nullptr){ - return 1; - } - return 0; - } - - int LinSolverDirectKLU::refactorize() - { - int kluStatus = klu_refactor (A_->getRowData(memory::HOST), A_->getColData(memory::HOST), A_->getValues(memory::HOST), Symbolic_, Numeric_, &Common_); - - factors_extracted_ = false; - - if (L_ != nullptr) { - delete L_; - L_ = nullptr; - } - - if (U_ != nullptr) { - delete U_; - U_ = nullptr; - } - - if (!kluStatus){ - //display error - return 1; - } - return 0; - } - - int LinSolverDirectKLU::solve(vector_type* rhs, vector_type* x) - { - //copy the vector - - // std::memcpy(x, rhs, A->getNumRows() * sizeof(real_type)); - - x->update(rhs->getData(memory::HOST), memory::HOST, memory::HOST); - x->setDataUpdated(memory::HOST); - - int kluStatus = klu_solve(Symbolic_, Numeric_, A_->getNumRows(), 1, x->getData(memory::HOST), &Common_); - - if (!kluStatus){ - return 1; - } - return 0; - } - - matrix::Sparse* LinSolverDirectKLU::getLFactor() - { - if (!factors_extracted_) { - const int nnzL = Numeric_->lnz; - const int nnzU = Numeric_->unz; - - L_ = new matrix::Csc(A_->getNumRows(), A_->getNumColumns(), nnzL); - U_ = new matrix::Csc(A_->getNumRows(), A_->getNumColumns(), nnzU); - L_->allocateMatrixData(memory::HOST); - U_->allocateMatrixData(memory::HOST); - int ok = klu_extract(Numeric_, - Symbolic_, - L_->getColData(memory::HOST), - L_->getRowData(memory::HOST), - L_->getValues( memory::HOST), - U_->getColData(memory::HOST), - U_->getRowData(memory::HOST), - U_->getValues( memory::HOST), - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - &Common_); - - L_->setUpdated(memory::HOST); - U_->setUpdated(memory::HOST); - (void) ok; // TODO: Check status in ok before setting `factors_extracted_` - factors_extracted_ = true; - } - return L_; - } - - matrix::Sparse* LinSolverDirectKLU::getUFactor() - { - if (!factors_extracted_) { - const int nnzL = Numeric_->lnz; - const int nnzU = Numeric_->unz; - - L_ = new matrix::Csc(A_->getNumRows(), A_->getNumColumns(), nnzL); - U_ = new matrix::Csc(A_->getNumRows(), A_->getNumColumns(), nnzU); - L_->allocateMatrixData(memory::HOST); - U_->allocateMatrixData(memory::HOST); - int ok = klu_extract(Numeric_, - Symbolic_, - L_->getColData(memory::HOST), - L_->getRowData(memory::HOST), - L_->getValues( memory::HOST), - U_->getColData(memory::HOST), - U_->getRowData(memory::HOST), - U_->getValues( memory::HOST), - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - &Common_); - - L_->setUpdated(memory::HOST); - U_->setUpdated(memory::HOST); - - (void) ok; // TODO: Check status in ok before setting `factors_extracted_` - factors_extracted_ = true; - } - return U_; - } - - index_type* LinSolverDirectKLU::getPOrdering() - { - if (Numeric_ != nullptr){ - P_ = new index_type[A_->getNumRows()]; - size_t nrows = static_cast(A_->getNumRows()); - std::memcpy(P_, Numeric_->Pnum, nrows * sizeof(index_type)); - return P_; - } else { - return nullptr; - } - } - - - index_type* LinSolverDirectKLU::getQOrdering() - { - if (Numeric_ != nullptr){ - Q_ = new index_type[A_->getNumRows()]; - size_t nrows = static_cast(A_->getNumRows()); - std::memcpy(Q_, Symbolic_->Q, nrows * sizeof(index_type)); - return Q_; - } else { - return nullptr; - } + Numeric_ = klu_factor(A_->getRowData(memory::HOST), A_->getColData(memory::HOST), A_->getValues(memory::HOST), Symbolic_, &Common_); + + factors_extracted_ = false; + + if (L_ != nullptr) { + delete L_; + L_ = nullptr; + } + + if (U_ != nullptr) { + delete U_; + U_ = nullptr; + } + + if (Numeric_ == nullptr) { + return 1; + } + return 0; +} + +int LinSolverDirectKLU::refactorize() +{ + int kluStatus = + klu_refactor(A_->getRowData(memory::HOST), A_->getColData(memory::HOST), A_->getValues(memory::HOST), Symbolic_, Numeric_, &Common_); + + factors_extracted_ = false; + + if (L_ != nullptr) { + delete L_; + L_ = nullptr; + } + + if (U_ != nullptr) { + delete U_; + U_ = nullptr; + } + + if (!kluStatus) { + // display error + return 1; + } + return 0; +} + +int LinSolverDirectKLU::solve(vector_type *rhs, vector_type *x) +{ + // copy the vector + + // std::memcpy(x, rhs, A->getNumRows() * sizeof(real_type)); + + x->update(rhs->getData(memory::HOST), memory::HOST, memory::HOST); + x->setDataUpdated(memory::HOST); + + int kluStatus = klu_solve(Symbolic_, Numeric_, A_->getNumRows(), 1, x->getData(memory::HOST), &Common_); + + if (!kluStatus) { + return 1; + } + return 0; +} + +matrix::Sparse *LinSolverDirectKLU::getLFactor() +{ + if (!factors_extracted_) { + const int nnzL = Numeric_->lnz; + const int nnzU = Numeric_->unz; + + L_ = new matrix::Csc(A_->getNumRows(), A_->getNumColumns(), nnzL); + U_ = new matrix::Csc(A_->getNumRows(), A_->getNumColumns(), nnzU); + L_->allocateMatrixData(memory::HOST); + U_->allocateMatrixData(memory::HOST); + int ok = klu_extract(Numeric_, Symbolic_, L_->getColData(memory::HOST), L_->getRowData(memory::HOST), L_->getValues(memory::HOST), + U_->getColData(memory::HOST), U_->getRowData(memory::HOST), U_->getValues(memory::HOST), nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, nullptr, &Common_); + + L_->setUpdated(memory::HOST); + U_->setUpdated(memory::HOST); + (void)ok; // TODO: Check status in ok before setting `factors_extracted_` + factors_extracted_ = true; + } + return L_; +} + +matrix::Sparse *LinSolverDirectKLU::getUFactor() +{ + if (!factors_extracted_) { + const int nnzL = Numeric_->lnz; + const int nnzU = Numeric_->unz; + + L_ = new matrix::Csc(A_->getNumRows(), A_->getNumColumns(), nnzL); + U_ = new matrix::Csc(A_->getNumRows(), A_->getNumColumns(), nnzU); + L_->allocateMatrixData(memory::HOST); + U_->allocateMatrixData(memory::HOST); + int ok = klu_extract(Numeric_, Symbolic_, L_->getColData(memory::HOST), L_->getRowData(memory::HOST), L_->getValues(memory::HOST), + U_->getColData(memory::HOST), U_->getRowData(memory::HOST), U_->getValues(memory::HOST), nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, nullptr, &Common_); + + L_->setUpdated(memory::HOST); + U_->setUpdated(memory::HOST); + + (void)ok; // TODO: Check status in ok before setting `factors_extracted_` + factors_extracted_ = true; + } + return U_; +} + +index_type *LinSolverDirectKLU::getPOrdering() +{ + if (Numeric_ != nullptr) { + P_ = new index_type[A_->getNumRows()]; + size_t nrows = static_cast(A_->getNumRows()); + std::memcpy(P_, Numeric_->Pnum, nrows * sizeof(index_type)); + return P_; + } else { + return nullptr; + } +} + +index_type *LinSolverDirectKLU::getQOrdering() +{ + if (Numeric_ != nullptr) { + Q_ = new index_type[A_->getNumRows()]; + size_t nrows = static_cast(A_->getNumRows()); + std::memcpy(Q_, Symbolic_->Q, nrows * sizeof(index_type)); + return Q_; + } else { + return nullptr; } } +} // namespace ReSolve diff --git a/resolve/LinSolverDirectKLU.hpp b/resolve/LinSolverDirectKLU.hpp index b4edadb19..7425f83fb 100644 --- a/resolve/LinSolverDirectKLU.hpp +++ b/resolve/LinSolverDirectKLU.hpp @@ -1,52 +1,48 @@ #pragma once -#include "klu.h" #include "Common.hpp" #include "LinSolver.hpp" +#include "klu.h" -namespace ReSolve +namespace ReSolve { - // Forward declaration of vector::Vector class - namespace vector - { - class Vector; - } - - // Forward declaration of matrix::Sparse class - namespace matrix - { - class Sparse; - } - - class LinSolverDirectKLU : public LinSolverDirect - { - using vector_type = vector::Vector; - - public: - LinSolverDirectKLU(); - ~LinSolverDirectKLU(); - - int setup(matrix::Sparse* A, - matrix::Sparse* L = nullptr, - matrix::Sparse* U = nullptr, - index_type* P = nullptr, - index_type* Q = nullptr, - vector_type* rhs = nullptr); - - void setupParameters(int ordering, double KLU_threshold, bool halt_if_singular); - - int analyze(); //the same as symbolic factorization - int factorize(); - int refactorize(); - int solve(vector_type* rhs, vector_type* x); - - matrix::Sparse* getLFactor(); - matrix::Sparse* getUFactor(); - index_type* getPOrdering(); - index_type* getQOrdering(); - - private: - klu_common Common_; //settings - klu_symbolic* Symbolic_; - klu_numeric* Numeric_; - }; +// Forward declaration of vector::Vector class +namespace vector +{ +class Vector; } + +// Forward declaration of matrix::Sparse class +namespace matrix +{ +class Sparse; +} + +class LinSolverDirectKLU : public LinSolverDirect +{ + using vector_type = vector::Vector; + + public: + LinSolverDirectKLU(); + ~LinSolverDirectKLU(); + + int setup(matrix::Sparse *A, matrix::Sparse *L = nullptr, matrix::Sparse *U = nullptr, index_type *P = nullptr, index_type *Q = nullptr, + vector_type *rhs = nullptr); + + void setupParameters(int ordering, double KLU_threshold, bool halt_if_singular); + + int analyze(); // the same as symbolic factorization + int factorize(); + int refactorize(); + int solve(vector_type *rhs, vector_type *x); + + matrix::Sparse *getLFactor(); + matrix::Sparse *getUFactor(); + index_type *getPOrdering(); + index_type *getQOrdering(); + + private: + klu_common Common_; // settings + klu_symbolic *Symbolic_; + klu_numeric *Numeric_; +}; +} // namespace ReSolve diff --git a/resolve/LinSolverDirectRocSolverRf.cpp b/resolve/LinSolverDirectRocSolverRf.cpp index 2f49c57ca..fe19dd2f7 100644 --- a/resolve/LinSolverDirectRocSolverRf.cpp +++ b/resolve/LinSolverDirectRocSolverRf.cpp @@ -1,435 +1,325 @@ -#include -#include #include "LinSolverDirectRocSolverRf.hpp" #include +#include +#include -namespace ReSolve +namespace ReSolve { - LinSolverDirectRocSolverRf::LinSolverDirectRocSolverRf(LinAlgWorkspaceHIP* workspace) - { - workspace_ = workspace; - infoM_ = nullptr; - solve_mode_ = 0; //solve mode - slow mode is default - } +LinSolverDirectRocSolverRf::LinSolverDirectRocSolverRf(LinAlgWorkspaceHIP *workspace) +{ + workspace_ = workspace; + infoM_ = nullptr; + solve_mode_ = 0; // solve mode - slow mode is default +} + +LinSolverDirectRocSolverRf::~LinSolverDirectRocSolverRf() +{ + mem_.deleteOnDevice(d_P_); + mem_.deleteOnDevice(d_Q_); + + mem_.deleteOnDevice(d_aux1_); + mem_.deleteOnDevice(d_aux2_); - LinSolverDirectRocSolverRf::~LinSolverDirectRocSolverRf() - { - mem_.deleteOnDevice(d_P_); - mem_.deleteOnDevice(d_Q_); + delete L_csr_; + delete U_csr_; +} + +int LinSolverDirectRocSolverRf::setup(matrix::Sparse *A, matrix::Sparse *L, matrix::Sparse *U, index_type *P, index_type *Q, vector_type *rhs) +{ + // remember - P and Q are generally CPU variables + int error_sum = 0; + this->A_ = (matrix::Csr *)A; + index_type n = A_->getNumRows(); + // set matrix info + rocsolver_create_rfinfo(&infoM_, workspace_->getRocblasHandle()); + // create combined factor - mem_.deleteOnDevice(d_aux1_); - mem_.deleteOnDevice(d_aux2_); + addFactors(L, U); - delete L_csr_; - delete U_csr_; + M_->setUpdated(ReSolve::memory::HOST); + M_->copyData(ReSolve::memory::DEVICE); + + if (d_P_ == nullptr) { + mem_.allocateArrayOnDevice(&d_P_, n); } - int LinSolverDirectRocSolverRf::setup(matrix::Sparse* A, - matrix::Sparse* L, - matrix::Sparse* U, - index_type* P, - index_type* Q, - vector_type* rhs) - { - //remember - P and Q are generally CPU variables - int error_sum = 0; - this->A_ = (matrix::Csr*) A; - index_type n = A_->getNumRows(); - //set matrix info - rocsolver_create_rfinfo(&infoM_, workspace_->getRocblasHandle()); - //create combined factor - - addFactors(L, U); - - M_->setUpdated(ReSolve::memory::HOST); - M_->copyData(ReSolve::memory::DEVICE); - - if (d_P_ == nullptr) { - mem_.allocateArrayOnDevice(&d_P_, n); + if (d_Q_ == nullptr) { + mem_.allocateArrayOnDevice(&d_Q_, n); + } + mem_.copyArrayHostToDevice(d_P_, P, n); + mem_.copyArrayHostToDevice(d_Q_, Q, n); + + mem_.deviceSynchronize(); + status_rocblas_ = rocsolver_dcsrrf_analysis(workspace_->getRocblasHandle(), n, 1, A_->getNnzExpanded(), + A_->getRowData(ReSolve::memory::DEVICE), // kRowPtr_, + A_->getColData(ReSolve::memory::DEVICE), // jCol_, + A_->getValues(ReSolve::memory::DEVICE), // vals_, + M_->getNnzExpanded(), M_->getRowData(ReSolve::memory::DEVICE), M_->getColData(ReSolve::memory::DEVICE), + M_->getValues(ReSolve::memory::DEVICE), // vals_, + d_P_, d_Q_, rhs->getData(ReSolve::memory::DEVICE), n, infoM_); + + mem_.deviceSynchronize(); + error_sum += status_rocblas_; + + // tri solve setup + if (solve_mode_ == 1) { // fast mode + + if (L_csr_ != nullptr) { + delete L_csr_; } - if (d_Q_ == nullptr) { - mem_.allocateArrayOnDevice(&d_Q_, n); + L_csr_ = new ReSolve::matrix::Csr(L->getNumRows(), L->getNumColumns(), L->getNnz()); + L_csr_->allocateMatrixData(ReSolve::memory::DEVICE); + + if (U_csr_ != nullptr) { + delete U_csr_; } - mem_.copyArrayHostToDevice(d_P_, P, n); - mem_.copyArrayHostToDevice(d_Q_, Q, n); - mem_.deviceSynchronize(); - status_rocblas_ = rocsolver_dcsrrf_analysis(workspace_->getRocblasHandle(), - n, - 1, - A_->getNnzExpanded(), - A_->getRowData(ReSolve::memory::DEVICE), //kRowPtr_, - A_->getColData(ReSolve::memory::DEVICE), //jCol_, - A_->getValues(ReSolve::memory::DEVICE), //vals_, - M_->getNnzExpanded(), - M_->getRowData(ReSolve::memory::DEVICE), - M_->getColData(ReSolve::memory::DEVICE), - M_->getValues(ReSolve::memory::DEVICE), //vals_, - d_P_, - d_Q_, - rhs->getData(ReSolve::memory::DEVICE), - n, - infoM_); + U_csr_ = new ReSolve::matrix::Csr(U->getNumRows(), U->getNumColumns(), U->getNnz()); + U_csr_->allocateMatrixData(ReSolve::memory::DEVICE); - mem_.deviceSynchronize(); - error_sum += status_rocblas_; + rocsparse_create_mat_descr(&(descr_L_)); + rocsparse_set_mat_fill_mode(descr_L_, rocsparse_fill_mode_lower); + rocsparse_set_mat_index_base(descr_L_, rocsparse_index_base_zero); - // tri solve setup - if (solve_mode_ == 1) { // fast mode + rocsparse_create_mat_descr(&(descr_U_)); + rocsparse_set_mat_index_base(descr_U_, rocsparse_index_base_zero); + rocsparse_set_mat_fill_mode(descr_U_, rocsparse_fill_mode_upper); - if (L_csr_ != nullptr) { - delete L_csr_; - } + rocsparse_create_mat_info(&info_L_); + rocsparse_create_mat_info(&info_U_); - L_csr_ = new ReSolve::matrix::Csr(L->getNumRows(), L->getNumColumns(), L->getNnz()); - L_csr_->allocateMatrixData(ReSolve::memory::DEVICE); + // local variables + size_t L_buffer_size; + size_t U_buffer_size; - if (U_csr_ != nullptr) { - delete U_csr_; - } + status_rocblas_ = rocsolver_dcsrrf_splitlu( + workspace_->getRocblasHandle(), n, M_->getNnzExpanded(), M_->getRowData(ReSolve::memory::DEVICE), M_->getColData(ReSolve::memory::DEVICE), + M_->getValues(ReSolve::memory::DEVICE), // vals_, + L_csr_->getRowData(ReSolve::memory::DEVICE), L_csr_->getColData(ReSolve::memory::DEVICE), + L_csr_->getValues(ReSolve::memory::DEVICE), // vals_, + U_csr_->getRowData(ReSolve::memory::DEVICE), U_csr_->getColData(ReSolve::memory::DEVICE), U_csr_->getValues(ReSolve::memory::DEVICE)); - U_csr_ = new ReSolve::matrix::Csr(U->getNumRows(), U->getNumColumns(), U->getNnz()); - U_csr_->allocateMatrixData(ReSolve::memory::DEVICE); - - - rocsparse_create_mat_descr(&(descr_L_)); - rocsparse_set_mat_fill_mode(descr_L_, rocsparse_fill_mode_lower); - rocsparse_set_mat_index_base(descr_L_, rocsparse_index_base_zero); - - rocsparse_create_mat_descr(&(descr_U_)); - rocsparse_set_mat_index_base(descr_U_, rocsparse_index_base_zero); - rocsparse_set_mat_fill_mode(descr_U_, rocsparse_fill_mode_upper); - - rocsparse_create_mat_info(&info_L_); - rocsparse_create_mat_info(&info_U_); - - // local variables - size_t L_buffer_size; - size_t U_buffer_size; - - status_rocblas_ = rocsolver_dcsrrf_splitlu(workspace_->getRocblasHandle(), - n, - M_->getNnzExpanded(), - M_->getRowData(ReSolve::memory::DEVICE), - M_->getColData(ReSolve::memory::DEVICE), - M_->getValues(ReSolve::memory::DEVICE), //vals_, - L_csr_->getRowData(ReSolve::memory::DEVICE), - L_csr_->getColData(ReSolve::memory::DEVICE), - L_csr_->getValues(ReSolve::memory::DEVICE), //vals_, - U_csr_->getRowData(ReSolve::memory::DEVICE), - U_csr_->getColData(ReSolve::memory::DEVICE), - U_csr_->getValues(ReSolve::memory::DEVICE)); - - error_sum += status_rocblas_; - - status_rocsparse_ = rocsparse_dcsrsv_buffer_size(workspace_->getRocsparseHandle(), - rocsparse_operation_none, - n, - L_csr_->getNnz(), - descr_L_, - L_csr_->getValues(ReSolve::memory::DEVICE), //vals_, - L_csr_->getRowData(ReSolve::memory::DEVICE), - L_csr_->getColData(ReSolve::memory::DEVICE), - info_L_, - &L_buffer_size); - error_sum += status_rocsparse_; - - mem_.allocateBufferOnDevice(&L_buffer_, L_buffer_size); - status_rocsparse_ = rocsparse_dcsrsv_buffer_size(workspace_->getRocsparseHandle(), - rocsparse_operation_none, - n, - U_csr_->getNnz(), - descr_U_, - U_csr_->getValues(ReSolve::memory::DEVICE), //vals_, - U_csr_->getRowData(ReSolve::memory::DEVICE), - U_csr_->getColData(ReSolve::memory::DEVICE), - info_U_, - &U_buffer_size); - error_sum += status_rocsparse_; - mem_.allocateBufferOnDevice(&U_buffer_, U_buffer_size); - - status_rocsparse_ = rocsparse_dcsrsv_analysis(workspace_->getRocsparseHandle(), - rocsparse_operation_none, - n, - L_csr_->getNnz(), - descr_L_, - L_csr_->getValues(ReSolve::memory::DEVICE), //vals_, - L_csr_->getRowData(ReSolve::memory::DEVICE), - L_csr_->getColData(ReSolve::memory::DEVICE), - info_L_, - rocsparse_analysis_policy_force, - rocsparse_solve_policy_auto, - L_buffer_); - error_sum += status_rocsparse_; - if (status_rocsparse_!=0)printf("status after analysis 1 %d \n", status_rocsparse_); - status_rocsparse_ = rocsparse_dcsrsv_analysis(workspace_->getRocsparseHandle(), - rocsparse_operation_none, - n, - U_csr_->getNnz(), - descr_U_, - U_csr_->getValues(ReSolve::memory::DEVICE), //vals_, - U_csr_->getRowData(ReSolve::memory::DEVICE), - U_csr_->getColData(ReSolve::memory::DEVICE), - info_U_, - rocsparse_analysis_policy_force, - rocsparse_solve_policy_auto, - U_buffer_); - error_sum += status_rocsparse_; - if (status_rocsparse_!=0)printf("status after analysis 2 %d \n", status_rocsparse_); - //allocate aux data - if (d_aux1_ == nullptr) { - mem_.allocateArrayOnDevice(&d_aux1_,n); - } - if (d_aux2_ == nullptr) { - mem_.allocateArrayOnDevice(&d_aux2_,n); - } + error_sum += status_rocblas_; + status_rocsparse_ = rocsparse_dcsrsv_buffer_size(workspace_->getRocsparseHandle(), rocsparse_operation_none, n, L_csr_->getNnz(), descr_L_, + L_csr_->getValues(ReSolve::memory::DEVICE), // vals_, + L_csr_->getRowData(ReSolve::memory::DEVICE), L_csr_->getColData(ReSolve::memory::DEVICE), + info_L_, &L_buffer_size); + error_sum += status_rocsparse_; + + mem_.allocateBufferOnDevice(&L_buffer_, L_buffer_size); + status_rocsparse_ = rocsparse_dcsrsv_buffer_size(workspace_->getRocsparseHandle(), rocsparse_operation_none, n, U_csr_->getNnz(), descr_U_, + U_csr_->getValues(ReSolve::memory::DEVICE), // vals_, + U_csr_->getRowData(ReSolve::memory::DEVICE), U_csr_->getColData(ReSolve::memory::DEVICE), + info_U_, &U_buffer_size); + error_sum += status_rocsparse_; + mem_.allocateBufferOnDevice(&U_buffer_, U_buffer_size); + + status_rocsparse_ = rocsparse_dcsrsv_analysis(workspace_->getRocsparseHandle(), rocsparse_operation_none, n, L_csr_->getNnz(), descr_L_, + L_csr_->getValues(ReSolve::memory::DEVICE), // vals_, + L_csr_->getRowData(ReSolve::memory::DEVICE), L_csr_->getColData(ReSolve::memory::DEVICE), info_L_, + rocsparse_analysis_policy_force, rocsparse_solve_policy_auto, L_buffer_); + error_sum += status_rocsparse_; + if (status_rocsparse_ != 0) + printf("status after analysis 1 %d \n", status_rocsparse_); + status_rocsparse_ = rocsparse_dcsrsv_analysis(workspace_->getRocsparseHandle(), rocsparse_operation_none, n, U_csr_->getNnz(), descr_U_, + U_csr_->getValues(ReSolve::memory::DEVICE), // vals_, + U_csr_->getRowData(ReSolve::memory::DEVICE), U_csr_->getColData(ReSolve::memory::DEVICE), info_U_, + rocsparse_analysis_policy_force, rocsparse_solve_policy_auto, U_buffer_); + error_sum += status_rocsparse_; + if (status_rocsparse_ != 0) + printf("status after analysis 2 %d \n", status_rocsparse_); + // allocate aux data + if (d_aux1_ == nullptr) { + mem_.allocateArrayOnDevice(&d_aux1_, n); + } + if (d_aux2_ == nullptr) { + mem_.allocateArrayOnDevice(&d_aux2_, n); } - return error_sum; } + return error_sum; +} - int LinSolverDirectRocSolverRf::refactorize() - { - int error_sum = 0; - mem_.deviceSynchronize(); - status_rocblas_ = rocsolver_dcsrrf_refactlu(workspace_->getRocblasHandle(), - A_->getNumRows(), - A_->getNnzExpanded(), - A_->getRowData(ReSolve::memory::DEVICE), //kRowPtr_, - A_->getColData(ReSolve::memory::DEVICE), //jCol_, - A_->getValues(ReSolve::memory::DEVICE), //vals_, - M_->getNnzExpanded(), - M_->getRowData(ReSolve::memory::DEVICE), - M_->getColData(ReSolve::memory::DEVICE), - M_->getValues(ReSolve::memory::DEVICE), //OUTPUT, - d_P_, - d_Q_, - infoM_); - +int LinSolverDirectRocSolverRf::refactorize() +{ + int error_sum = 0; + mem_.deviceSynchronize(); + status_rocblas_ = rocsolver_dcsrrf_refactlu(workspace_->getRocblasHandle(), A_->getNumRows(), A_->getNnzExpanded(), + A_->getRowData(ReSolve::memory::DEVICE), // kRowPtr_, + A_->getColData(ReSolve::memory::DEVICE), // jCol_, + A_->getValues(ReSolve::memory::DEVICE), // vals_, + M_->getNnzExpanded(), M_->getRowData(ReSolve::memory::DEVICE), M_->getColData(ReSolve::memory::DEVICE), + M_->getValues(ReSolve::memory::DEVICE), // OUTPUT, + d_P_, d_Q_, infoM_); + + mem_.deviceSynchronize(); + error_sum += status_rocblas_; + + if (solve_mode_ == 1) { + // split M, fill L and U with correct values + printf("solve mode 1, splitting the factors again \n"); + status_rocblas_ = rocsolver_dcsrrf_splitlu(workspace_->getRocblasHandle(), A_->getNumRows(), M_->getNnzExpanded(), + M_->getRowData(ReSolve::memory::DEVICE), M_->getColData(ReSolve::memory::DEVICE), + M_->getValues(ReSolve::memory::DEVICE), // vals_, + L_csr_->getRowData(ReSolve::memory::DEVICE), L_csr_->getColData(ReSolve::memory::DEVICE), + L_csr_->getValues(ReSolve::memory::DEVICE), // vals_, + U_csr_->getRowData(ReSolve::memory::DEVICE), U_csr_->getColData(ReSolve::memory::DEVICE), + U_csr_->getValues(ReSolve::memory::DEVICE)); mem_.deviceSynchronize(); error_sum += status_rocblas_; + } - if (solve_mode_ == 1) { - //split M, fill L and U with correct values - printf("solve mode 1, splitting the factors again \n"); - status_rocblas_ = rocsolver_dcsrrf_splitlu(workspace_->getRocblasHandle(), - A_->getNumRows(), - M_->getNnzExpanded(), - M_->getRowData(ReSolve::memory::DEVICE), - M_->getColData(ReSolve::memory::DEVICE), - M_->getValues(ReSolve::memory::DEVICE), //vals_, - L_csr_->getRowData(ReSolve::memory::DEVICE), - L_csr_->getColData(ReSolve::memory::DEVICE), - L_csr_->getValues(ReSolve::memory::DEVICE), //vals_, - U_csr_->getRowData(ReSolve::memory::DEVICE), - U_csr_->getColData(ReSolve::memory::DEVICE), - U_csr_->getValues(ReSolve::memory::DEVICE)); - - mem_.deviceSynchronize(); - error_sum += status_rocblas_; + return error_sum; +} - } - - return error_sum; +// solution is returned in RHS +int LinSolverDirectRocSolverRf::solve(vector_type *rhs) +{ + int error_sum = 0; + if (solve_mode_ == 0) { + mem_.deviceSynchronize(); + status_rocblas_ = + rocsolver_dcsrrf_solve(workspace_->getRocblasHandle(), A_->getNumRows(), 1, M_->getNnz(), M_->getRowData(ReSolve::memory::DEVICE), + M_->getColData(ReSolve::memory::DEVICE), M_->getValues(ReSolve::memory::DEVICE), d_P_, d_Q_, + rhs->getData(ReSolve::memory::DEVICE), A_->getNumRows(), infoM_); + mem_.deviceSynchronize(); + } else { + // not implemented yet + permuteVectorP(A_->getNumRows(), d_P_, rhs->getData(ReSolve::memory::DEVICE), d_aux1_); + mem_.deviceSynchronize(); + rocsparse_dcsrsv_solve(workspace_->getRocsparseHandle(), rocsparse_operation_none, A_->getNumRows(), L_csr_->getNnz(), &(constants::ONE), + descr_L_, + L_csr_->getValues(ReSolve::memory::DEVICE), // vals_, + L_csr_->getRowData(ReSolve::memory::DEVICE), L_csr_->getColData(ReSolve::memory::DEVICE), info_L_, d_aux1_, + d_aux2_, // result + rocsparse_solve_policy_auto, L_buffer_); + error_sum += status_rocsparse_; + + rocsparse_dcsrsv_solve(workspace_->getRocsparseHandle(), rocsparse_operation_none, A_->getNumRows(), U_csr_->getNnz(), &(constants::ONE), + descr_L_, + U_csr_->getValues(ReSolve::memory::DEVICE), // vals_, + U_csr_->getRowData(ReSolve::memory::DEVICE), U_csr_->getColData(ReSolve::memory::DEVICE), info_U_, + d_aux2_, // input + d_aux1_, // result + rocsparse_solve_policy_auto, U_buffer_); + error_sum += status_rocsparse_; + + permuteVectorQ(A_->getNumRows(), d_Q_, d_aux1_, rhs->getData(ReSolve::memory::DEVICE)); + mem_.deviceSynchronize(); } + return error_sum; +} - // solution is returned in RHS - int LinSolverDirectRocSolverRf::solve(vector_type* rhs) - { - int error_sum = 0; - if (solve_mode_ == 0) { - mem_.deviceSynchronize(); - status_rocblas_ = rocsolver_dcsrrf_solve(workspace_->getRocblasHandle(), - A_->getNumRows(), - 1, - M_->getNnz(), - M_->getRowData(ReSolve::memory::DEVICE), - M_->getColData(ReSolve::memory::DEVICE), - M_->getValues(ReSolve::memory::DEVICE), - d_P_, - d_Q_, - rhs->getData(ReSolve::memory::DEVICE), - A_->getNumRows(), - infoM_); - mem_.deviceSynchronize(); - } else { - // not implemented yet - permuteVectorP(A_->getNumRows(), d_P_, rhs->getData(ReSolve::memory::DEVICE), d_aux1_); - mem_.deviceSynchronize(); - rocsparse_dcsrsv_solve(workspace_->getRocsparseHandle(), - rocsparse_operation_none, - A_->getNumRows(), - L_csr_->getNnz(), - &(constants::ONE), - descr_L_, - L_csr_->getValues(ReSolve::memory::DEVICE), //vals_, - L_csr_->getRowData(ReSolve::memory::DEVICE), - L_csr_->getColData(ReSolve::memory::DEVICE), - info_L_, - d_aux1_, - d_aux2_, //result - rocsparse_solve_policy_auto, - L_buffer_); - error_sum += status_rocsparse_; - - rocsparse_dcsrsv_solve(workspace_->getRocsparseHandle(), - rocsparse_operation_none, - A_->getNumRows(), - U_csr_->getNnz(), - &(constants::ONE), - descr_L_, - U_csr_->getValues(ReSolve::memory::DEVICE), //vals_, - U_csr_->getRowData(ReSolve::memory::DEVICE), - U_csr_->getColData(ReSolve::memory::DEVICE), - info_U_, - d_aux2_, //input - d_aux1_,//result - rocsparse_solve_policy_auto, - U_buffer_); - error_sum += status_rocsparse_; - - permuteVectorQ(A_->getNumRows(), d_Q_,d_aux1_,rhs->getData(ReSolve::memory::DEVICE)); - mem_.deviceSynchronize(); - } - return error_sum; - } +int LinSolverDirectRocSolverRf::solve(vector_type *rhs, vector_type *x) +{ + x->update(rhs->getData(ReSolve::memory::DEVICE), ReSolve::memory::DEVICE, ReSolve::memory::DEVICE); + x->setDataUpdated(ReSolve::memory::DEVICE); + int error_sum = 0; + if (solve_mode_ == 0) { + mem_.deviceSynchronize(); + status_rocblas_ = + rocsolver_dcsrrf_solve(workspace_->getRocblasHandle(), A_->getNumRows(), 1, M_->getNnz(), M_->getRowData(ReSolve::memory::DEVICE), + M_->getColData(ReSolve::memory::DEVICE), M_->getValues(ReSolve::memory::DEVICE), d_P_, d_Q_, + x->getData(ReSolve::memory::DEVICE), A_->getNumRows(), infoM_); + error_sum += status_rocblas_; + mem_.deviceSynchronize(); + } else { + // not implemented yet - int LinSolverDirectRocSolverRf::solve(vector_type* rhs, vector_type* x) - { - x->update(rhs->getData(ReSolve::memory::DEVICE), ReSolve::memory::DEVICE, ReSolve::memory::DEVICE); - x->setDataUpdated(ReSolve::memory::DEVICE); - int error_sum = 0; - if (solve_mode_ == 0) { - mem_.deviceSynchronize(); - status_rocblas_ = rocsolver_dcsrrf_solve(workspace_->getRocblasHandle(), - A_->getNumRows(), - 1, - M_->getNnz(), - M_->getRowData(ReSolve::memory::DEVICE), - M_->getColData(ReSolve::memory::DEVICE), - M_->getValues(ReSolve::memory::DEVICE), - d_P_, - d_Q_, - x->getData(ReSolve::memory::DEVICE), - A_->getNumRows(), - infoM_); - error_sum += status_rocblas_; - mem_.deviceSynchronize(); - } else { - // not implemented yet - - permuteVectorP(A_->getNumRows(), d_P_, rhs->getData(ReSolve::memory::DEVICE), d_aux1_); - mem_.deviceSynchronize(); - - rocsparse_dcsrsv_solve(workspace_->getRocsparseHandle(), - rocsparse_operation_none, - A_->getNumRows(), - L_csr_->getNnz(), - &(constants::ONE), - descr_L_, - L_csr_->getValues(ReSolve::memory::DEVICE), //vals_, - L_csr_->getRowData(ReSolve::memory::DEVICE), - L_csr_->getColData(ReSolve::memory::DEVICE), - info_L_, - d_aux1_, - d_aux2_, //result - rocsparse_solve_policy_auto, - L_buffer_); - error_sum += status_rocsparse_; - - rocsparse_dcsrsv_solve(workspace_->getRocsparseHandle(), - rocsparse_operation_none, - A_->getNumRows(), - U_csr_->getNnz(), - &(constants::ONE), - descr_U_, - U_csr_->getValues(ReSolve::memory::DEVICE), //vals_, - U_csr_->getRowData(ReSolve::memory::DEVICE), - U_csr_->getColData(ReSolve::memory::DEVICE), - info_U_, - d_aux2_, //input - d_aux1_,//result - rocsparse_solve_policy_auto, - U_buffer_); - error_sum += status_rocsparse_; - - permuteVectorQ(A_->getNumRows(), d_Q_,d_aux1_,x->getData(ReSolve::memory::DEVICE)); - mem_.deviceSynchronize(); - } - return error_sum; - } + permuteVectorP(A_->getNumRows(), d_P_, rhs->getData(ReSolve::memory::DEVICE), d_aux1_); + mem_.deviceSynchronize(); - int LinSolverDirectRocSolverRf::setSolveMode(int mode) - { - solve_mode_ = mode; - return 0; + rocsparse_dcsrsv_solve(workspace_->getRocsparseHandle(), rocsparse_operation_none, A_->getNumRows(), L_csr_->getNnz(), &(constants::ONE), + descr_L_, + L_csr_->getValues(ReSolve::memory::DEVICE), // vals_, + L_csr_->getRowData(ReSolve::memory::DEVICE), L_csr_->getColData(ReSolve::memory::DEVICE), info_L_, d_aux1_, + d_aux2_, // result + rocsparse_solve_policy_auto, L_buffer_); + error_sum += status_rocsparse_; + + rocsparse_dcsrsv_solve(workspace_->getRocsparseHandle(), rocsparse_operation_none, A_->getNumRows(), U_csr_->getNnz(), &(constants::ONE), + descr_U_, + U_csr_->getValues(ReSolve::memory::DEVICE), // vals_, + U_csr_->getRowData(ReSolve::memory::DEVICE), U_csr_->getColData(ReSolve::memory::DEVICE), info_U_, + d_aux2_, // input + d_aux1_, // result + rocsparse_solve_policy_auto, U_buffer_); + error_sum += status_rocsparse_; + + permuteVectorQ(A_->getNumRows(), d_Q_, d_aux1_, x->getData(ReSolve::memory::DEVICE)); + mem_.deviceSynchronize(); } + return error_sum; +} - int LinSolverDirectRocSolverRf::getSolveMode() - { - return solve_mode_; - } +int LinSolverDirectRocSolverRf::setSolveMode(int mode) +{ + solve_mode_ = mode; + return 0; +} - void LinSolverDirectRocSolverRf::addFactors(matrix::Sparse* L, matrix::Sparse* U) - { - // L and U need to be in CSC format - index_type n = L->getNumRows(); - index_type* Lp = L->getColData(ReSolve::memory::HOST); - index_type* Li = L->getRowData(ReSolve::memory::HOST); - index_type* Up = U->getColData(ReSolve::memory::HOST); - index_type* Ui = U->getRowData(ReSolve::memory::HOST); - if (M_ != nullptr) { - delete M_; - } +int LinSolverDirectRocSolverRf::getSolveMode() { return solve_mode_; } - index_type nnzM = ( L->getNnz() + U->getNnz() - n ); - M_ = new matrix::Csr(n, n, nnzM); - M_->allocateMatrixData(ReSolve::memory::DEVICE); - M_->allocateMatrixData(ReSolve::memory::HOST); - index_type* mia = M_->getRowData(ReSolve::memory::HOST); - index_type* mja = M_->getColData(ReSolve::memory::HOST); - index_type row; - for(index_type i = 0; i < n; ++i) { - // go through EACH COLUMN OF L first - for(index_type j = Lp[i]; j < Lp[i + 1]; ++j) { - row = Li[j]; - // BUT dont count diagonal twice, important - if(row != i) { - mia[row + 1]++; - } - } - // then each column of U - for(index_type j = Up[i]; j < Up[i + 1]; ++j) { - row = Ui[j]; +void LinSolverDirectRocSolverRf::addFactors(matrix::Sparse *L, matrix::Sparse *U) +{ + // L and U need to be in CSC format + index_type n = L->getNumRows(); + index_type *Lp = L->getColData(ReSolve::memory::HOST); + index_type *Li = L->getRowData(ReSolve::memory::HOST); + index_type *Up = U->getColData(ReSolve::memory::HOST); + index_type *Ui = U->getRowData(ReSolve::memory::HOST); + if (M_ != nullptr) { + delete M_; + } + + index_type nnzM = (L->getNnz() + U->getNnz() - n); + M_ = new matrix::Csr(n, n, nnzM); + M_->allocateMatrixData(ReSolve::memory::DEVICE); + M_->allocateMatrixData(ReSolve::memory::HOST); + index_type *mia = M_->getRowData(ReSolve::memory::HOST); + index_type *mja = M_->getColData(ReSolve::memory::HOST); + index_type row; + for (index_type i = 0; i < n; ++i) { + // go through EACH COLUMN OF L first + for (index_type j = Lp[i]; j < Lp[i + 1]; ++j) { + row = Li[j]; + // BUT dont count diagonal twice, important + if (row != i) { mia[row + 1]++; } } - // then organize mia_; - mia[0] = 0; - for(index_type i = 1; i < n + 1; i++) { - mia[i] += mia[i - 1]; + // then each column of U + for (index_type j = Up[i]; j < Up[i + 1]; ++j) { + row = Ui[j]; + mia[row + 1]++; } + } + // then organize mia_; + mia[0] = 0; + for (index_type i = 1; i < n + 1; i++) { + mia[i] += mia[i - 1]; + } - std::vector Mshifts(static_cast(n), 0); - for(index_type i = 0; i < n; ++i) { - // go through EACH COLUMN OF L first - for(int j = Lp[i]; j < Lp[i + 1]; ++j) { - row = Li[j]; - if(row != i) { - // place (row, i) where it belongs! - mja[mia[row] + Mshifts[static_cast(row)]] = i; - Mshifts[static_cast(row)]++; - } - } - // each column of U next - for(index_type j = Up[i]; j < Up[i + 1]; ++j) { - row = Ui[j]; + std::vector Mshifts(static_cast(n), 0); + for (index_type i = 0; i < n; ++i) { + // go through EACH COLUMN OF L first + for (int j = Lp[i]; j < Lp[i + 1]; ++j) { + row = Li[j]; + if (row != i) { + // place (row, i) where it belongs! mja[mia[row] + Mshifts[static_cast(row)]] = i; Mshifts[static_cast(row)]++; } } - //Mshifts.~vector(); + // each column of U next + for (index_type j = Up[i]; j < Up[i + 1]; ++j) { + row = Ui[j]; + mja[mia[row] + Mshifts[static_cast(row)]] = i; + Mshifts[static_cast(row)]++; + } } -}// namespace resolve + // Mshifts.~vector(); +} +} // namespace ReSolve diff --git a/resolve/LinSolverDirectRocSolverRf.hpp b/resolve/LinSolverDirectRocSolverRf.hpp index 3343301aa..361a13163 100644 --- a/resolve/LinSolverDirectRocSolverRf.hpp +++ b/resolve/LinSolverDirectRocSolverRf.hpp @@ -4,76 +4,71 @@ #include #include -#include +#include #include #include -#include -namespace ReSolve +#include +namespace ReSolve { - // Forward declaration of vector::Vector class - namespace vector - { - class Vector; - } +// Forward declaration of vector::Vector class +namespace vector +{ +class Vector; +} - // Forward declaration of matrix::Sparse and matrix::Csr classes - namespace matrix - { - class Sparse; - class Csr; - } - - class LinSolverDirectRocSolverRf : public LinSolverDirect - { - using vector_type = vector::Vector; - - public: - LinSolverDirectRocSolverRf(LinAlgWorkspaceHIP* workspace); - ~LinSolverDirectRocSolverRf(); - - int setup(matrix::Sparse* A, - matrix::Sparse* L, - matrix::Sparse* U, - index_type* P, - index_type* Q, - vector_type* rhs); - - int refactorize(); - int solve(vector_type* rhs, vector_type* x); - int solve(vector_type* rhs);// the solutuon is returned IN RHS (rhs is overwritten) - - int setSolveMode(int mode); // should probably be enum - int getSolveMode(); //should be enum too +// Forward declaration of matrix::Sparse and matrix::Csr classes +namespace matrix +{ +class Sparse; +class Csr; +} // namespace matrix - private: - rocblas_status status_rocblas_; - rocsparse_status status_rocsparse_; - index_type* d_P_{nullptr}; - index_type* d_Q_{nullptr}; +class LinSolverDirectRocSolverRf : public LinSolverDirect +{ + using vector_type = vector::Vector; - MemoryHandler mem_; ///< Device memory manager object - LinAlgWorkspaceHIP* workspace_; + public: + LinSolverDirectRocSolverRf(LinAlgWorkspaceHIP *workspace); + ~LinSolverDirectRocSolverRf(); - // to be exported to matrix handler in a later time - void addFactors(matrix::Sparse* L, matrix::Sparse* U); //create L+U from sepeate L, U factors - rocsolver_rfinfo infoM_; - matrix::Sparse* M_{nullptr};//the matrix that contains added factors - int solve_mode_; // 0 is default and 1 is fast + int setup(matrix::Sparse *A, matrix::Sparse *L, matrix::Sparse *U, index_type *P, index_type *Q, vector_type *rhs); - // not used by default - for fast solve - rocsparse_mat_descr descr_L_{nullptr}; - rocsparse_mat_descr descr_U_{nullptr}; + int refactorize(); + int solve(vector_type *rhs, vector_type *x); + int solve(vector_type *rhs); // the solutuon is returned IN RHS (rhs is overwritten) - rocsparse_mat_info info_L_{nullptr}; - rocsparse_mat_info info_U_{nullptr}; + int setSolveMode(int mode); // should probably be enum + int getSolveMode(); // should be enum too - void* L_buffer_{nullptr}; - void* U_buffer_{nullptr}; + private: + rocblas_status status_rocblas_; + rocsparse_status status_rocsparse_; + index_type *d_P_{nullptr}; + index_type *d_Q_{nullptr}; - ReSolve::matrix::Csr* L_csr_{nullptr}; - ReSolve::matrix::Csr* U_csr_{nullptr}; - - real_type* d_aux1_{nullptr}; - real_type* d_aux2_{nullptr}; - }; -} + MemoryHandler mem_; ///< Device memory manager object + LinAlgWorkspaceHIP *workspace_; + + // to be exported to matrix handler in a later time + void addFactors(matrix::Sparse *L, matrix::Sparse *U); // create L+U from sepeate L, U factors + rocsolver_rfinfo infoM_; + matrix::Sparse *M_{nullptr}; // the matrix that contains added factors + int solve_mode_; // 0 is default and 1 is fast + + // not used by default - for fast solve + rocsparse_mat_descr descr_L_{nullptr}; + rocsparse_mat_descr descr_U_{nullptr}; + + rocsparse_mat_info info_L_{nullptr}; + rocsparse_mat_info info_U_{nullptr}; + + void *L_buffer_{nullptr}; + void *U_buffer_{nullptr}; + + ReSolve::matrix::Csr *L_csr_{nullptr}; + ReSolve::matrix::Csr *U_csr_{nullptr}; + + real_type *d_aux1_{nullptr}; + real_type *d_aux2_{nullptr}; +}; +} // namespace ReSolve diff --git a/resolve/LinSolverIterativeFGMRES.cpp b/resolve/LinSolverIterativeFGMRES.cpp index 40fdb22c1..aa18e6840 100644 --- a/resolve/LinSolverIterativeFGMRES.cpp +++ b/resolve/LinSolverIterativeFGMRES.cpp @@ -1,344 +1,300 @@ -#include #include #include +#include -#include -#include #include "LinSolverIterativeFGMRES.hpp" +#include +#include namespace ReSolve { - using out = io::Logger; - - LinSolverIterativeFGMRES::LinSolverIterativeFGMRES(std::string memspace) - { - memspace_ = memspace; - this->matrix_handler_ = nullptr; - this->vector_handler_ = nullptr; - tol_ = 1e-14; //default - maxit_= 100; //default - restart_ = 10; - conv_cond_ = 0;//default - - d_V_ = nullptr; - d_Z_ = nullptr; - } - - LinSolverIterativeFGMRES::LinSolverIterativeFGMRES(MatrixHandler* matrix_handler, - VectorHandler* vector_handler, - GramSchmidt* gs, - std::string memspace) - { - memspace_ = memspace; - this->matrix_handler_ = matrix_handler; - this->vector_handler_ = vector_handler; - this->GS_ = gs; - - tol_ = 1e-14; //default - maxit_= 100; //default - restart_ = 10; - conv_cond_ = 0;//default - - d_V_ = nullptr; - d_Z_ = nullptr; - } +using out = io::Logger; - LinSolverIterativeFGMRES::LinSolverIterativeFGMRES(index_type restart, - real_type tol, - index_type maxit, - index_type conv_cond, - MatrixHandler* matrix_handler, - VectorHandler* vector_handler, - GramSchmidt* gs, - std::string memspace) - { - memspace_ = memspace; - this->matrix_handler_ = matrix_handler; - this->vector_handler_ = vector_handler; - this->GS_ = gs; - - tol_ = tol; - maxit_= maxit; - restart_ = restart; - conv_cond_ = conv_cond; - - d_V_ = nullptr; - d_Z_ = nullptr; - - } +LinSolverIterativeFGMRES::LinSolverIterativeFGMRES(std::string memspace) +{ + memspace_ = memspace; + this->matrix_handler_ = nullptr; + this->vector_handler_ = nullptr; + tol_ = 1e-14; // default + maxit_ = 100; // default + restart_ = 10; + conv_cond_ = 0; // default + + d_V_ = nullptr; + d_Z_ = nullptr; +} + +LinSolverIterativeFGMRES::LinSolverIterativeFGMRES(MatrixHandler *matrix_handler, VectorHandler *vector_handler, GramSchmidt *gs, + std::string memspace) +{ + memspace_ = memspace; + this->matrix_handler_ = matrix_handler; + this->vector_handler_ = vector_handler; + this->GS_ = gs; + + tol_ = 1e-14; // default + maxit_ = 100; // default + restart_ = 10; + conv_cond_ = 0; // default + + d_V_ = nullptr; + d_Z_ = nullptr; +} + +LinSolverIterativeFGMRES::LinSolverIterativeFGMRES(index_type restart, real_type tol, index_type maxit, index_type conv_cond, + MatrixHandler *matrix_handler, VectorHandler *vector_handler, GramSchmidt *gs, + std::string memspace) +{ + memspace_ = memspace; + this->matrix_handler_ = matrix_handler; + this->vector_handler_ = vector_handler; + this->GS_ = gs; - LinSolverIterativeFGMRES::~LinSolverIterativeFGMRES() - { - if (d_V_ != nullptr) { - // cudaFree(d_V_); - delete d_V_; - } + tol_ = tol; + maxit_ = maxit; + restart_ = restart; + conv_cond_ = conv_cond; - if (d_Z_ != nullptr) { - // cudaFree(d_Z_); - delete d_Z_; - } + d_V_ = nullptr; + d_Z_ = nullptr; +} +LinSolverIterativeFGMRES::~LinSolverIterativeFGMRES() +{ + if (d_V_ != nullptr) { + // cudaFree(d_V_); + delete d_V_; } - int LinSolverIterativeFGMRES::setup(matrix::Sparse* A) - { - this->A_ = A; - n_ = A_->getNumRows(); - - d_V_ = new vector_type(n_, restart_ + 1); - d_V_->allocate(memory::DEVICE); - d_Z_ = new vector_type(n_, restart_ + 1); - d_Z_->allocate(memory::DEVICE); - h_H_ = new real_type[restart_ * (restart_ + 1)]; - h_c_ = new real_type[restart_]; // needed for givens - h_s_ = new real_type[restart_]; // same - h_rs_ = new real_type[restart_ + 1]; // for residual norm history - - return 0; + if (d_Z_ != nullptr) { + // cudaFree(d_Z_); + delete d_Z_; } +} - int LinSolverIterativeFGMRES::solve(vector_type* rhs, vector_type* x) - { - using namespace constants; - - int outer_flag = 1; - int notconv = 1; - int i = 0; - int it = 0; - int j; - int k; - int k1; - - real_type t; - real_type rnorm; - real_type bnorm; - // real_type rnorm_aux; - real_type tolrel; - vector_type* vec_v = new vector_type(n_); - vector_type* vec_z = new vector_type(n_); - //V[0] = b-A*x_0 - //debug - d_Z_->setToZero(memory::DEVICE); - d_V_->setToZero(memory::DEVICE); - - rhs->deepCopyVectorData(d_V_->getData(memory::DEVICE), 0, memory::DEVICE); - matrix_handler_->matvec(A_, x, d_V_, &MINUSONE, &ONE, "csr", memspace_); - rnorm = 0.0; - bnorm = vector_handler_->dot(rhs, rhs, memspace_); - rnorm = vector_handler_->dot(d_V_, d_V_, memspace_); - - //rnorm = ||V_1|| - rnorm = sqrt(rnorm); - bnorm = sqrt(bnorm); - initial_residual_norm_ = rnorm; - while(outer_flag) { - // check if maybe residual is already small enough? - if(it == 0) { - tolrel = tol_ * rnorm; - if(fabs(tolrel) < 1e-16) { - tolrel = 1e-16; - } +int LinSolverIterativeFGMRES::setup(matrix::Sparse *A) +{ + this->A_ = A; + n_ = A_->getNumRows(); + + d_V_ = new vector_type(n_, restart_ + 1); + d_V_->allocate(memory::DEVICE); + d_Z_ = new vector_type(n_, restart_ + 1); + d_Z_->allocate(memory::DEVICE); + h_H_ = new real_type[restart_ * (restart_ + 1)]; + h_c_ = new real_type[restart_]; // needed for givens + h_s_ = new real_type[restart_]; // same + h_rs_ = new real_type[restart_ + 1]; // for residual norm history + + return 0; +} + +int LinSolverIterativeFGMRES::solve(vector_type *rhs, vector_type *x) +{ + using namespace constants; + + int outer_flag = 1; + int notconv = 1; + int i = 0; + int it = 0; + int j; + int k; + int k1; + + real_type t; + real_type rnorm; + real_type bnorm; + // real_type rnorm_aux; + real_type tolrel; + vector_type *vec_v = new vector_type(n_); + vector_type *vec_z = new vector_type(n_); + // V[0] = b-A*x_0 + // debug + d_Z_->setToZero(memory::DEVICE); + d_V_->setToZero(memory::DEVICE); + + rhs->deepCopyVectorData(d_V_->getData(memory::DEVICE), 0, memory::DEVICE); + matrix_handler_->matvec(A_, x, d_V_, &MINUSONE, &ONE, "csr", memspace_); + rnorm = 0.0; + bnorm = vector_handler_->dot(rhs, rhs, memspace_); + rnorm = vector_handler_->dot(d_V_, d_V_, memspace_); + + // rnorm = ||V_1|| + rnorm = sqrt(rnorm); + bnorm = sqrt(bnorm); + initial_residual_norm_ = rnorm; + while (outer_flag) { + // check if maybe residual is already small enough? + if (it == 0) { + tolrel = tol_ * rnorm; + if (fabs(tolrel) < 1e-16) { + tolrel = 1e-16; } - int exit_cond = 0; - if (conv_cond_ == 0){ - exit_cond = ((fabs(rnorm - ZERO) <= EPSILON)); + } + int exit_cond = 0; + if (conv_cond_ == 0) { + exit_cond = ((fabs(rnorm - ZERO) <= EPSILON)); + } else { + if (conv_cond_ == 1) { + exit_cond = ((fabs(rnorm - ZERO) <= EPSILON) || (rnorm < tol_)); } else { - if (conv_cond_ == 1){ - exit_cond = ((fabs(rnorm - ZERO) <= EPSILON) || (rnorm < tol_)); - } else { - if (conv_cond_ == 2){ - exit_cond = ((fabs(rnorm - ZERO) <= EPSILON) || (rnorm < (tol_*bnorm))); - } + if (conv_cond_ == 2) { + exit_cond = ((fabs(rnorm - ZERO) <= EPSILON) || (rnorm < (tol_ * bnorm))); } } - if (exit_cond) { - outer_flag = 0; - final_residual_norm_ = rnorm; - initial_residual_norm_ = rnorm; - fgmres_iters_ = 0; - break; - } - - // normalize first vector - t = 1.0 / rnorm; - vector_handler_->scal(&t, d_V_, memspace_); - // initialize norm history - h_rs_[0] = rnorm; - i = -1; - notconv = 1; + } + if (exit_cond) { + outer_flag = 0; + final_residual_norm_ = rnorm; + initial_residual_norm_ = rnorm; + fgmres_iters_ = 0; + break; + } - while((notconv) && (it < maxit_)) { - i++; - it++; + // normalize first vector + t = 1.0 / rnorm; + vector_handler_->scal(&t, d_V_, memspace_); + // initialize norm history + h_rs_[0] = rnorm; + i = -1; + notconv = 1; - // Z_i = (LU)^{-1}*V_i + while ((notconv) && (it < maxit_)) { + i++; + it++; - vec_v->setData( d_V_->getVectorData(i, memory::DEVICE), memory::DEVICE); - vec_z->setData( d_Z_->getVectorData(i, memory::DEVICE), memory::DEVICE); - this->precV(vec_v, vec_z); - mem_.deviceSynchronize(); + // Z_i = (LU)^{-1}*V_i - // V_{i+1}=A*Z_i + vec_v->setData(d_V_->getVectorData(i, memory::DEVICE), memory::DEVICE); + vec_z->setData(d_Z_->getVectorData(i, memory::DEVICE), memory::DEVICE); + this->precV(vec_v, vec_z); + mem_.deviceSynchronize(); - vec_v->setData( d_V_->getVectorData(i + 1, memory::DEVICE), memory::DEVICE); + // V_{i+1}=A*Z_i - matrix_handler_->matvec(A_, vec_z, vec_v, &ONE, &ZERO,"csr", memspace_); + vec_v->setData(d_V_->getVectorData(i + 1, memory::DEVICE), memory::DEVICE); - // orthogonalize V[i+1], form a column of h_H_ + matrix_handler_->matvec(A_, vec_z, vec_v, &ONE, &ZERO, "csr", memspace_); - GS_->orthogonalize(n_, d_V_, h_H_, i, memspace_); ; - if(i != 0) { - for(int k = 1; k <= i; k++) { - k1 = k - 1; - t = h_H_[i * (restart_ + 1) + k1]; - h_H_[i * (restart_ + 1) + k1] = h_c_[k1] * t + h_s_[k1] * h_H_[i * (restart_ + 1) + k]; - h_H_[i * (restart_ + 1) + k] = -h_s_[k1] * t + h_c_[k1] * h_H_[i * (restart_ + 1) + k]; - } - } // if i!=0 - double Hii = h_H_[i * (restart_ + 1) + i]; - double Hii1 = h_H_[(i) * (restart_ + 1) + i + 1]; - double gam = sqrt(Hii * Hii + Hii1 * Hii1); + // orthogonalize V[i+1], form a column of h_H_ - if(fabs(gam - ZERO) <= EPSILON) { - gam = EPSMAC; + GS_->orthogonalize(n_, d_V_, h_H_, i, memspace_); + ; + if (i != 0) { + for (int k = 1; k <= i; k++) { + k1 = k - 1; + t = h_H_[i * (restart_ + 1) + k1]; + h_H_[i * (restart_ + 1) + k1] = h_c_[k1] * t + h_s_[k1] * h_H_[i * (restart_ + 1) + k]; + h_H_[i * (restart_ + 1) + k] = -h_s_[k1] * t + h_c_[k1] * h_H_[i * (restart_ + 1) + k]; } + } // if i!=0 + double Hii = h_H_[i * (restart_ + 1) + i]; + double Hii1 = h_H_[(i) * (restart_ + 1) + i + 1]; + double gam = sqrt(Hii * Hii + Hii1 * Hii1); - /* next Given's rotation */ - h_c_[i] = Hii / gam; - h_s_[i] = Hii1 / gam; - h_rs_[i + 1] = -h_s_[i] * h_rs_[i]; - h_rs_[i] = h_c_[i] * h_rs_[i]; - - h_H_[(i) * (restart_ + 1) + (i)] = h_c_[i] * Hii + h_s_[i] * Hii1; - h_H_[(i) * (restart_ + 1) + (i + 1)] = h_c_[i] * Hii1 - h_s_[i] * Hii; - - // residual norm estimate - rnorm = fabs(h_rs_[i + 1]); - // check convergence - if(i + 1 >= restart_ || rnorm <= tolrel || it >= maxit_) { - notconv = 0; - } - } // inner while - - // solve tri system - h_rs_[i] = h_rs_[i] / h_H_[i * (restart_ + 1) + i]; - for(int ii = 2; ii <= i + 1; ii++) { - k = i - ii + 1; - k1 = k + 1; - t = h_rs_[k]; - for(j = k1; j <= i; j++) { - t -= h_H_[j * (restart_ + 1) + k] * h_rs_[j]; - } - h_rs_[k] = t / h_H_[k * (restart_ + 1) + k]; + if (fabs(gam - ZERO) <= EPSILON) { + gam = EPSMAC; } - // get solution - for(j = 0; j <= i; j++) { - vec_z->setData( d_Z_->getVectorData(j, memory::DEVICE), memory::DEVICE); - vector_handler_->axpy(&h_rs_[j], vec_z, x, memspace_); - } + /* next Given's rotation */ + h_c_[i] = Hii / gam; + h_s_[i] = Hii1 / gam; + h_rs_[i + 1] = -h_s_[i] * h_rs_[i]; + h_rs_[i] = h_c_[i] * h_rs_[i]; - /* test solution */ + h_H_[(i) * (restart_ + 1) + (i)] = h_c_[i] * Hii + h_s_[i] * Hii1; + h_H_[(i) * (restart_ + 1) + (i + 1)] = h_c_[i] * Hii1 - h_s_[i] * Hii; - if(rnorm <= tolrel || it >= maxit_) { - // rnorm_aux = rnorm; - outer_flag = 0; + // residual norm estimate + rnorm = fabs(h_rs_[i + 1]); + // check convergence + if (i + 1 >= restart_ || rnorm <= tolrel || it >= maxit_) { + notconv = 0; } + } // inner while + + // solve tri system + h_rs_[i] = h_rs_[i] / h_H_[i * (restart_ + 1) + i]; + for (int ii = 2; ii <= i + 1; ii++) { + k = i - ii + 1; + k1 = k + 1; + t = h_rs_[k]; + for (j = k1; j <= i; j++) { + t -= h_H_[j * (restart_ + 1) + k] * h_rs_[j]; + } + h_rs_[k] = t / h_H_[k * (restart_ + 1) + k]; + } - rhs->deepCopyVectorData(d_V_->getData(memory::DEVICE), 0, memory::DEVICE); - matrix_handler_->matvec(A_, x, d_V_, &MINUSONE, &ONE,"csr", memspace_); - rnorm = vector_handler_->dot(d_V_, d_V_, memspace_); - // rnorm = ||V_1|| - rnorm = sqrt(rnorm); + // get solution + for (j = 0; j <= i; j++) { + vec_z->setData(d_Z_->getVectorData(j, memory::DEVICE), memory::DEVICE); + vector_handler_->axpy(&h_rs_[j], vec_z, x, memspace_); + } - if(!outer_flag) { - final_residual_norm_ = rnorm; - fgmres_iters_ = it; - } - } // outer while - return 0; - } + /* test solution */ - int LinSolverIterativeFGMRES::setupPreconditioner(std::string type, LinSolverDirect* LU_solver) - { - if (type != "LU") { - out::warning() << "Only cusolverRf tri solve can be used as a preconditioner at this time." << std::endl; - return 1; - } else { - LU_solver_ = LU_solver; - return 0; + if (rnorm <= tolrel || it >= maxit_) { + // rnorm_aux = rnorm; + outer_flag = 0; } - } + rhs->deepCopyVectorData(d_V_->getData(memory::DEVICE), 0, memory::DEVICE); + matrix_handler_->matvec(A_, x, d_V_, &MINUSONE, &ONE, "csr", memspace_); + rnorm = vector_handler_->dot(d_V_, d_V_, memspace_); + // rnorm = ||V_1|| + rnorm = sqrt(rnorm); - real_type LinSolverIterativeFGMRES::getTol() - { - return tol_; - } + if (!outer_flag) { + final_residual_norm_ = rnorm; + fgmres_iters_ = it; + } + } // outer while + return 0; +} - index_type LinSolverIterativeFGMRES::getMaxit() - { - return maxit_; +int LinSolverIterativeFGMRES::setupPreconditioner(std::string type, LinSolverDirect *LU_solver) +{ + if (type != "LU") { + out::warning() << "Only cusolverRf tri solve can be used as a preconditioner at this time." << std::endl; + return 1; + } else { + LU_solver_ = LU_solver; + return 0; } +} - index_type LinSolverIterativeFGMRES::getRestart() - { - return restart_; - } +real_type LinSolverIterativeFGMRES::getTol() { return tol_; } - index_type LinSolverIterativeFGMRES::getConvCond() - { - return conv_cond_; - } +index_type LinSolverIterativeFGMRES::getMaxit() { return maxit_; } - void LinSolverIterativeFGMRES::setTol(real_type new_tol) - { - this->tol_ = new_tol; - } +index_type LinSolverIterativeFGMRES::getRestart() { return restart_; } - void LinSolverIterativeFGMRES::setMaxit(index_type new_maxit) - { - this->maxit_ = new_maxit; - } +index_type LinSolverIterativeFGMRES::getConvCond() { return conv_cond_; } - void LinSolverIterativeFGMRES::setRestart(index_type new_restart) - { - this->restart_ = new_restart; - } +void LinSolverIterativeFGMRES::setTol(real_type new_tol) { this->tol_ = new_tol; } - void LinSolverIterativeFGMRES::setConvCond(index_type new_conv_cond) - { - this->conv_cond_ = new_conv_cond; - } +void LinSolverIterativeFGMRES::setMaxit(index_type new_maxit) { this->maxit_ = new_maxit; } - int LinSolverIterativeFGMRES::resetMatrix(matrix::Sparse* new_matrix) - { - A_ = new_matrix; - matrix_handler_->setValuesChanged(true, memspace_); - return 0; - } +void LinSolverIterativeFGMRES::setRestart(index_type new_restart) { this->restart_ = new_restart; } +void LinSolverIterativeFGMRES::setConvCond(index_type new_conv_cond) { this->conv_cond_ = new_conv_cond; } +int LinSolverIterativeFGMRES::resetMatrix(matrix::Sparse *new_matrix) +{ + A_ = new_matrix; + matrix_handler_->setValuesChanged(true, memspace_); + return 0; +} - void LinSolverIterativeFGMRES::precV(vector_type* rhs, vector_type* x) - { - LU_solver_->solve(rhs, x); - // x->update(rhs->getData(memory::DEVICE), memory::DEVICE, memory::DEVICE); - } +void LinSolverIterativeFGMRES::precV(vector_type *rhs, vector_type *x) +{ + LU_solver_->solve(rhs, x); + // x->update(rhs->getData(memory::DEVICE), memory::DEVICE, memory::DEVICE); +} - real_type LinSolverIterativeFGMRES::getFinalResidualNorm() - { - return final_residual_norm_; - } +real_type LinSolverIterativeFGMRES::getFinalResidualNorm() { return final_residual_norm_; } - real_type LinSolverIterativeFGMRES::getInitResidualNorm() - { - return initial_residual_norm_; - } +real_type LinSolverIterativeFGMRES::getInitResidualNorm() { return initial_residual_norm_; } - index_type LinSolverIterativeFGMRES::getNumIter() - { - return fgmres_iters_; - } -}//namespace +index_type LinSolverIterativeFGMRES::getNumIter() { return fgmres_iters_; } +} // namespace ReSolve diff --git a/resolve/LinSolverIterativeFGMRES.hpp b/resolve/LinSolverIterativeFGMRES.hpp index a9fc5058d..bb9b06ffc 100644 --- a/resolve/LinSolverIterativeFGMRES.hpp +++ b/resolve/LinSolverIterativeFGMRES.hpp @@ -1,80 +1,70 @@ #pragma once #include "Common.hpp" +#include "GramSchmidt.hpp" +#include "LinSolver.hpp" #include #include -#include "LinSolver.hpp" -#include "GramSchmidt.hpp" -namespace ReSolve +namespace ReSolve { - class LinSolverIterativeFGMRES : public LinSolverIterative - { - using vector_type = vector::Vector; - - public: - LinSolverIterativeFGMRES(std::string memspace = "cuda"); - LinSolverIterativeFGMRES( MatrixHandler* matrix_handler, - VectorHandler* vector_handler, - GramSchmidt* gs, - std::string memspace = "cuda"); - LinSolverIterativeFGMRES(index_type restart, - real_type tol, - index_type maxit, - index_type conv_cond, - MatrixHandler* matrix_handler, - VectorHandler* vector_handler, - GramSchmidt* gs, - std::string memspace = "cuda"); - ~LinSolverIterativeFGMRES(); +class LinSolverIterativeFGMRES : public LinSolverIterative +{ + using vector_type = vector::Vector; - int solve(vector_type* rhs, vector_type* x); - int setup(matrix::Sparse* A); - int resetMatrix(matrix::Sparse* new_A); - int setupPreconditioner(std::string name, LinSolverDirect* LU_solver); + public: + LinSolverIterativeFGMRES(std::string memspace = "cuda"); + LinSolverIterativeFGMRES(MatrixHandler *matrix_handler, VectorHandler *vector_handler, GramSchmidt *gs, std::string memspace = "cuda"); + LinSolverIterativeFGMRES(index_type restart, real_type tol, index_type maxit, index_type conv_cond, MatrixHandler *matrix_handler, + VectorHandler *vector_handler, GramSchmidt *gs, std::string memspace = "cuda"); + ~LinSolverIterativeFGMRES(); - real_type getTol(); - index_type getMaxit(); - index_type getRestart(); - index_type getConvCond(); + int solve(vector_type *rhs, vector_type *x); + int setup(matrix::Sparse *A); + int resetMatrix(matrix::Sparse *new_A); + int setupPreconditioner(std::string name, LinSolverDirect *LU_solver); - void setTol(real_type new_tol); - void setMaxit(index_type new_maxit); - void setRestart(index_type new_restart); - void setConvCond(index_type new_conv_cond); + real_type getTol(); + index_type getMaxit(); + index_type getRestart(); + index_type getConvCond(); - real_type getFinalResidualNorm(); - real_type getInitResidualNorm(); - index_type getNumIter(); + void setTol(real_type new_tol); + void setMaxit(index_type new_maxit); + void setRestart(index_type new_restart); + void setConvCond(index_type new_conv_cond); - private: - //remember matrix handler and vector handler are inherited. + real_type getFinalResidualNorm(); + real_type getInitResidualNorm(); + index_type getNumIter(); - std::string memspace_; + private: + // remember matrix handler and vector handler are inherited. - real_type tol_; - index_type maxit_; - index_type restart_; - std::string orth_option_; - index_type conv_cond_; + std::string memspace_; - vector_type* d_V_{nullptr}; - vector_type* d_Z_{nullptr}; + real_type tol_; + index_type maxit_; + index_type restart_; + std::string orth_option_; + index_type conv_cond_; - real_type* h_H_{nullptr}; - real_type* h_c_{nullptr}; - real_type* h_s_{nullptr}; - real_type* h_rs_{nullptr}; + vector_type *d_V_{nullptr}; + vector_type *d_Z_{nullptr}; + real_type *h_H_{nullptr}; + real_type *h_c_{nullptr}; + real_type *h_s_{nullptr}; + real_type *h_rs_{nullptr}; - GramSchmidt* GS_; - void precV(vector_type* rhs, vector_type* x); //multiply the vector by preconditioner - LinSolverDirect* LU_solver_; - index_type n_;// for simplicity - real_type final_residual_norm_; - real_type initial_residual_norm_; - index_type fgmres_iters_; + GramSchmidt *GS_; + void precV(vector_type *rhs, vector_type *x); // multiply the vector by preconditioner + LinSolverDirect *LU_solver_; + index_type n_; // for simplicity + real_type final_residual_norm_; + real_type initial_residual_norm_; + index_type fgmres_iters_; - MemoryHandler mem_; ///< Device memory manager object - }; -} + MemoryHandler mem_; ///< Device memory manager object +}; +} // namespace ReSolve diff --git a/resolve/MemoryUtils.hpp b/resolve/MemoryUtils.hpp index d87c621f2..4c7326478 100644 --- a/resolve/MemoryUtils.hpp +++ b/resolve/MemoryUtils.hpp @@ -2,68 +2,59 @@ #include - namespace ReSolve { - namespace memory - { - enum MemorySpace{HOST = 0, DEVICE}; - enum MemoryDirection{HOST_TO_HOST = 0, HOST_TO_DEVICE, DEVICE_TO_HOST, DEVICE_TO_DEVICE}; - } -} +namespace memory +{ +enum MemorySpace { HOST = 0, DEVICE }; +enum MemoryDirection { HOST_TO_HOST = 0, HOST_TO_DEVICE, DEVICE_TO_HOST, DEVICE_TO_DEVICE }; +} // namespace memory +} // namespace ReSolve namespace ReSolve { - /** - * @class MemoryUtils - * - * @brief Provides basic memory allocation, free and copy functions. - * - * This class provedes abstractions for memory management functiosn for - * different GPU programming models. - * - * @tparam Policy - Memory management policy (vendor specific) - * - * @author Slaven Peles - */ - template - class MemoryUtils +/** + * @class MemoryUtils + * + * @brief Provides basic memory allocation, free and copy functions. + * + * This class provedes abstractions for memory management functiosn for + * different GPU programming models. + * + * @tparam Policy - Memory management policy (vendor specific) + * + * @author Slaven Peles + */ +template class MemoryUtils +{ + public: + MemoryUtils() = default; + ~MemoryUtils() = default; + + void deviceSynchronize(); + int getLastDeviceError(); + int deleteOnDevice(void *v); + + template int allocateArrayOnDevice(T **v, I n); + + template int allocateBufferOnDevice(T **v, I n); + + template int setZeroArrayOnDevice(T *v, I n); + + template int copyArrayDeviceToHost(T *dst, const T *src, I n); + + template int copyArrayDeviceToDevice(T *dst, const T *src, I n); + + template int copyArrayHostToDevice(T *dst, const T *src, I n); + + /// Implemented here as it is always needed + template int copyArrayHostToHost(T *dst, const T *src, I n) { - public: - MemoryUtils() = default; - ~MemoryUtils() = default; - - void deviceSynchronize(); - int getLastDeviceError(); - int deleteOnDevice(void* v); - - template - int allocateArrayOnDevice(T** v, I n); - - template - int allocateBufferOnDevice(T** v, I n); - - template - int setZeroArrayOnDevice(T* v, I n); - - template - int copyArrayDeviceToHost(T* dst, const T* src, I n); - - template - int copyArrayDeviceToDevice(T* dst, const T* src, I n); - - template - int copyArrayHostToDevice(T* dst, const T* src, I n); - - /// Implemented here as it is always needed - template - int copyArrayHostToHost(T* dst, const T* src, I n) - { - size_t nelements = static_cast(n); - memcpy(dst, src, nelements * sizeof(T)); - return 0; - } - }; + size_t nelements = static_cast(n); + memcpy(dst, src, nelements * sizeof(T)); + return 0; + } +}; } // namespace ReSolve @@ -87,4 +78,3 @@ using MemoryHandler = ReSolve::MemoryUtils; using MemoryHandler = ReSolve::MemoryUtils; #endif - diff --git a/resolve/MemoryUtils.tpp b/resolve/MemoryUtils.tpp index 936a33362..ba97bd511 100644 --- a/resolve/MemoryUtils.tpp +++ b/resolve/MemoryUtils.tpp @@ -72,4 +72,4 @@ namespace ReSolve return Policy::template copyArrayHostToDevice(dst, src, n); } -} // namespace ReSolve \ No newline at end of file +} // namespace ReSolve diff --git a/resolve/RefactorizationSolver.cpp b/resolve/RefactorizationSolver.cpp index 45311f6f3..6feeee9e1 100644 --- a/resolve/RefactorizationSolver.cpp +++ b/resolve/RefactorizationSolver.cpp @@ -1,8 +1,6 @@ #include "RefactorizationSolver.hpp" -namespace ReSolve +namespace ReSolve { - RefactorizationSolver::RefactorizationSolver() - { - } -} +RefactorizationSolver::RefactorizationSolver() {} +} // namespace ReSolve diff --git a/resolve/RefactorizationSolver.hpp b/resolve/RefactorizationSolver.hpp index 5beeb8d62..16640a77b 100644 --- a/resolve/RefactorizationSolver.hpp +++ b/resolve/RefactorizationSolver.hpp @@ -4,33 +4,31 @@ namespace ReSolve { - RefactorizationSolver - { - using vector_type = vector::Vector; - - public: - RefactorizationSolver(); - ~RefactorizationSolver(); - int setup(std::string first_solver, - std::string refact_solver_, - std::string use_ir_); +RefactorizationSolver +{ + using vector_type = vector::Vector; + + public: + RefactorizationSolver(); + ~RefactorizationSolver(); + int setup(std::string first_solver, std::string refact_solver_, std::string use_ir_); + + int setup_ir(real_type ir_tol, index_type ir_maxit, index_type ir_gs_); + + int solve(matrix::Sparse * A, vector_type * vec_rhs, vector_type * vec_x); - int setup_ir(real_type ir_tol, index_type ir_maxit, index_type ir_gs_); - - int solve(matrix::Sparse* A, vector_type* vec_rhs, vector_type* vec_x); - - private: - std::string first_solver_name_; - std::string refact_solver_name_; - std::string use_ir_; - //IR parameters - real_type ir_tol_; - index_type ir_maxit_; - index_type ir_gs_; + private: + std::string first_solver_name_; + std::string refact_solver_name_; + std::string use_ir_; + // IR parameters + real_type ir_tol_; + index_type ir_maxit_; + index_type ir_gs_; - LinSolverDirect* first_solver_; - LinSolverDirect* refact_solver_; - LinSolverIterative* ir_solver_; - bool factorization_exists_; - }; -} + LinSolverDirect *first_solver_; + LinSolverDirect *refact_solver_; + LinSolverIterative *ir_solver_; + bool factorization_exists_; +}; +} // namespace ReSolve diff --git a/resolve/SystemSolver.cpp b/resolve/SystemSolver.cpp index 0e979d2ca..753d703a0 100644 --- a/resolve/SystemSolver.cpp +++ b/resolve/SystemSolver.cpp @@ -1,31 +1,30 @@ -namespace +namespace { - SystemSolver::SystemSolver(){ - //set defaults: +SystemSolver::SystemSolver() +{ + // set defaults: factorizationMethod = "klu"; refactorizationMethod = "glu"; solveMethod = "glu"; IRMethod = "none"; - + this->setup(); - } - SystemSolver::~SystemSoler() - { - //delete the matrix and all the solvers and all their workspace - - } +} +SystemSolver::~SystemSoler() +{ + // delete the matrix and all the solvers and all their workspace +} - SystemSolver::setup(){ - if (factorizationMethod == "klu"){ - - } +SystemSolver::setup() +{ + if (factorizationMethod == "klu") { } +} - SystemSolver::analyze() - { - if (factorizationMethod == "klu"){ - //call klu_analyze - } - +SystemSolver::analyze() +{ + if (factorizationMethod == "klu") { + // call klu_analyze } } +} // namespace diff --git a/resolve/SystemSolver.hpp b/resolve/SystemSolver.hpp index 4e842b061..eb4a94edc 100644 --- a/resolve/SystemSolver.hpp +++ b/resolve/SystemSolver.hpp @@ -1,35 +1,33 @@ -//this is to solve the system, can call different linear solvers if necessary -namespace +// this is to solve the system, can call different linear solvers if necessary +namespace { - class SystemSolver - { - SystemSolver(); - SystemSolver(std::string factorizationMethod, std::string refactorizationMethod, std::string solveMethod, std::string IRMethod); - - ~SystemSolver(); +class SystemSolver +{ + SystemSolver(); + SystemSolver(std::string factorizationMethod, std::string refactorizationMethod, std::string solveMethod, std::string IRMethod); - public: - analyze(); // symbolic part - factorize(); // numeric part - refactorize(); - solve(double* x, double* rhs); // for triangular solve - refine(double, double* rhs); // for iterative refinement + ~SystemSolver(); - // we update the matrix once it changed - updateMatrix(std::string format, int * ia, int *ja, double *a); + public: + analyze(); // symbolic part + factorize(); // numeric part + refactorize(); + solve(double *x, double *rhs); // for triangular solve + refine(double, double *rhs); // for iterative refinement - private: - - Sparse A; - std::string factorizationMethod; - std::string refactorizationMethod; - std::string solveMethod; - std::string IRMethod; + // we update the matrix once it changed + updateMatrix(std::string format, int *ia, int *ja, double *a); - setup(); - //internal function to setup the different solvers. IT IS RUN ONCE THROUGH CONSTRUCTOR. + private: + Sparse A; + std::string factorizationMethod; + std::string refactorizationMethod; + std::string solveMethod; + std::string IRMethod; - // add factorizationSolver, iterativeSolver, triangularSolver + setup(); + // internal function to setup the different solvers. IT IS RUN ONCE THROUGH CONSTRUCTOR. - }; -} + // add factorizationSolver, iterativeSolver, triangularSolver +}; +} // namespace diff --git a/resolve/cpu/CMakeLists.txt b/resolve/cpu/CMakeLists.txt index 164553157..248a24614 100644 --- a/resolve/cpu/CMakeLists.txt +++ b/resolve/cpu/CMakeLists.txt @@ -6,14 +6,9 @@ ]] -set(ReSolve_CPU_SRC - MemoryUtils.cpp - cpuVectorKernels.cpp -) +set(ReSolve_CPU_SRC MemoryUtils.cpp cpuVectorKernels.cpp) -set(ReSolve_CPU_HEADER_INSTALL - CpuMemory.hpp -) +set(ReSolve_CPU_HEADER_INSTALL CpuMemory.hpp) # First create dummy backend add_library(resolve_backend_cpu SHARED ${ReSolve_CPU_SRC}) diff --git a/resolve/cpu/CpuMemory.hpp b/resolve/cpu/CpuMemory.hpp index 89a9982d0..fd894abb0 100644 --- a/resolve/cpu/CpuMemory.hpp +++ b/resolve/cpu/CpuMemory.hpp @@ -4,131 +4,124 @@ namespace ReSolve { - namespace memory +namespace memory +{ +/** + * @brief Class containing dummy functions when there is no GPU support. + * + * @author Slaven Peles + */ +struct Cpu { + /** + * @brief Dummy function to stand in when GPU support is not enabled. + */ + static void deviceSynchronize() { - /** - * @brief Class containing dummy functions when there is no GPU support. - * - * @author Slaven Peles - */ - struct Cpu - { - /** - * @brief Dummy function to stand in when GPU support is not enabled. - */ - static void deviceSynchronize() - { - // Nothing to synchronize - } - - /** - * @brief Dummy function to stand in when GPU support is not enabled. - * - * @return Allways return success! - */ - static int getLastDeviceError() - { - // not on device, nothing to get - return 0; - } - - /** - * @brief Dummy function to notify us something is wrong. - * - * This will be called only if GPU device support is not built, so - * trying to access a device should indicate a bug in the code. - * - * @return Allways return failure! - */ - static int deleteOnDevice(void* /* v */) - { - ReSolve::io::Logger::error() << "Trying to delete on a GPU device, but GPU support not available.\n"; - return -1; - } + // Nothing to synchronize + } - /** - * @brief Dummy function to notify us something is wrong. - * - * This will be called only if GPU device support is not built, so - * trying to access a device should indicate a bug in the code. - * - * @return Allways return failure! - */ - template - static int allocateArrayOnDevice(T** /* v */, I /* n */) - { - ReSolve::io::Logger::error() << "Trying to allocate on a GPU device, but GPU support not available.\n"; - return -1; - } + /** + * @brief Dummy function to stand in when GPU support is not enabled. + * + * @return Allways return success! + */ + static int getLastDeviceError() + { + // not on device, nothing to get + return 0; + } - /** - * @brief Dummy function to notify us something is wrong. - * - * This will be called only if GPU device support is not built, so - * trying to access a device should indicate a bug in the code. - * - * @return Allways return failure! - */ - template - static int allocateBufferOnDevice(T** /* v */, I /* n */) - { - ReSolve::io::Logger::error() << "Trying to allocate on a GPU device, but GPU support not available.\n"; - return -1; - } + /** + * @brief Dummy function to notify us something is wrong. + * + * This will be called only if GPU device support is not built, so + * trying to access a device should indicate a bug in the code. + * + * @return Allways return failure! + */ + static int deleteOnDevice(void * /* v */) + { + ReSolve::io::Logger::error() << "Trying to delete on a GPU device, but GPU support not available.\n"; + return -1; + } - /** - * @brief Dummy function to notify us something is wrong. - * - * This will be called only if GPU device support is not built, so - * trying to access a device should indicate a bug in the code. - * - * @return Allways return failure! - */ - template - static int setZeroArrayOnDevice(T* /* v */, I /* n */) - { - ReSolve::io::Logger::error() << "Trying to initialize array on a GPU device, but GPU support not available.\n"; - return -1; - } + /** + * @brief Dummy function to notify us something is wrong. + * + * This will be called only if GPU device support is not built, so + * trying to access a device should indicate a bug in the code. + * + * @return Allways return failure! + */ + template static int allocateArrayOnDevice(T ** /* v */, I /* n */) + { + ReSolve::io::Logger::error() << "Trying to allocate on a GPU device, but GPU support not available.\n"; + return -1; + } - /** - * @brief Dummy function to notify us something is wrong. - * - * This will be called only if GPU device support is not built, so - * trying to access a device should indicate a bug in the code. - * - * @return Allways return failure! - */ - template - static int copyArrayDeviceToHost(T* /* dst */, const T* /* src */, I /* n */) - { - ReSolve::io::Logger::error() << "Trying to copy from a GPU device, but GPU support not available.\n"; - return -1; - } + /** + * @brief Dummy function to notify us something is wrong. + * + * This will be called only if GPU device support is not built, so + * trying to access a device should indicate a bug in the code. + * + * @return Allways return failure! + */ + template static int allocateBufferOnDevice(T ** /* v */, I /* n */) + { + ReSolve::io::Logger::error() << "Trying to allocate on a GPU device, but GPU support not available.\n"; + return -1; + } - /** - * @brief Dummy function to notify us something is wrong. - * - * This will be called only if GPU device support is not built, so - * trying to access a device should indicate a bug in the code. - * - * @return Allways return failure! - */ - template - static int copyArrayDeviceToDevice(T* /* dst */, const T* /* src */, I /* n */) - { - ReSolve::io::Logger::error() << "Trying to copy to a GPU device, but GPU support not available.\n"; - return -1; - } + /** + * @brief Dummy function to notify us something is wrong. + * + * This will be called only if GPU device support is not built, so + * trying to access a device should indicate a bug in the code. + * + * @return Allways return failure! + */ + template static int setZeroArrayOnDevice(T * /* v */, I /* n */) + { + ReSolve::io::Logger::error() << "Trying to initialize array on a GPU device, but GPU support not available.\n"; + return -1; + } - template - static int copyArrayHostToDevice(T* /* dst */, const T* /* src */, I /* n */) - { - ReSolve::io::Logger::error() << "Trying to copy to a GPU device, but GPU support not available.\n"; - return -1; - } + /** + * @brief Dummy function to notify us something is wrong. + * + * This will be called only if GPU device support is not built, so + * trying to access a device should indicate a bug in the code. + * + * @return Allways return failure! + */ + template static int copyArrayDeviceToHost(T * /* dst */, const T * /* src */, I /* n */) + { + ReSolve::io::Logger::error() << "Trying to copy from a GPU device, but GPU support not available.\n"; + return -1; + } + + /** + * @brief Dummy function to notify us something is wrong. + * + * This will be called only if GPU device support is not built, so + * trying to access a device should indicate a bug in the code. + * + * @return Allways return failure! + */ + template static int copyArrayDeviceToDevice(T * /* dst */, const T * /* src */, I /* n */) + { + ReSolve::io::Logger::error() << "Trying to copy to a GPU device, but GPU support not available.\n"; + return -1; + } + + template static int copyArrayHostToDevice(T * /* dst */, const T * /* src */, I /* n */) + { + ReSolve::io::Logger::error() << "Trying to copy to a GPU device, but GPU support not available.\n"; + return -1; + } - }; // struct Cuda - } // namespace memory +}; // struct Cuda +} // namespace memory -} //namespace ReSolve +} // namespace ReSolve diff --git a/resolve/cpu/MemoryUtils.cpp b/resolve/cpu/MemoryUtils.cpp index 03e913bc3..7b3fc1457 100644 --- a/resolve/cpu/MemoryUtils.cpp +++ b/resolve/cpu/MemoryUtils.cpp @@ -1,13 +1,12 @@ /** * @file MemoryUtils.cpp - * + * * This file includes MemoryUtils.tpp and specifies what functions to * instantiate from function templates. - * + * * @author Slaven Peles */ - #include #include @@ -18,23 +17,23 @@ namespace ReSolve { - template void MemoryUtils::deviceSynchronize(); - template int MemoryUtils::getLastDeviceError(); - template int MemoryUtils::deleteOnDevice(void*); +template void MemoryUtils::deviceSynchronize(); +template int MemoryUtils::getLastDeviceError(); +template int MemoryUtils::deleteOnDevice(void *); - template int MemoryUtils::allocateArrayOnDevice( real_type**, index_type); - template int MemoryUtils::allocateArrayOnDevice(index_type**, index_type); +template int MemoryUtils::allocateArrayOnDevice(real_type **, index_type); +template int MemoryUtils::allocateArrayOnDevice(index_type **, index_type); - template int MemoryUtils::allocateBufferOnDevice(void** v, size_t n); +template int MemoryUtils::allocateBufferOnDevice(void **v, size_t n); - template int MemoryUtils::setZeroArrayOnDevice( real_type*, index_type); +template int MemoryUtils::setZeroArrayOnDevice(real_type *, index_type); - template int MemoryUtils::copyArrayDeviceToHost( real_type*, const real_type*, index_type); - template int MemoryUtils::copyArrayDeviceToHost(index_type*, const index_type*, index_type); +template int MemoryUtils::copyArrayDeviceToHost(real_type *, const real_type *, index_type); +template int MemoryUtils::copyArrayDeviceToHost(index_type *, const index_type *, index_type); - template int MemoryUtils::copyArrayDeviceToDevice( real_type*, const real_type*, index_type); - template int MemoryUtils::copyArrayDeviceToDevice(index_type*, const index_type*, index_type); +template int MemoryUtils::copyArrayDeviceToDevice(real_type *, const real_type *, index_type); +template int MemoryUtils::copyArrayDeviceToDevice(index_type *, const index_type *, index_type); - template int MemoryUtils::copyArrayHostToDevice( real_type*, const real_type*, index_type); - template int MemoryUtils::copyArrayHostToDevice(index_type*, const index_type*, index_type); -} +template int MemoryUtils::copyArrayHostToDevice(real_type *, const real_type *, index_type); +template int MemoryUtils::copyArrayHostToDevice(index_type *, const index_type *, index_type); +} // namespace ReSolve diff --git a/resolve/cpu/cpuVectorKernels.cpp b/resolve/cpu/cpuVectorKernels.cpp index 26a204135..9f49ad883 100644 --- a/resolve/cpu/cpuVectorKernels.cpp +++ b/resolve/cpu/cpuVectorKernels.cpp @@ -1,15 +1,17 @@ #include #include +namespace ReSolve +{ +namespace vector +{ -namespace ReSolve { namespace vector { - - -void set_array_const(index_type n, real_type val, real_type* arr) +void set_array_const(index_type n, real_type val, real_type *arr) { - for(index_type i = 0; i < n; ++i) { + for (index_type i = 0; i < n; ++i) { arr[i] = val; } } -}} // namespace ReSolve::vector \ No newline at end of file +} // namespace vector +} // namespace ReSolve diff --git a/resolve/cuda/CMakeLists.txt b/resolve/cuda/CMakeLists.txt index 225ea3c6b..50fcedbbd 100644 --- a/resolve/cuda/CMakeLists.txt +++ b/resolve/cuda/CMakeLists.txt @@ -6,28 +6,19 @@ ]] -set(ReSolve_CUDA_SRC - cudaKernels.cu - cudaVectorKernels.cu - MemoryUtils.cu -) +set(ReSolve_CUDA_SRC cudaKernels.cu cudaVectorKernels.cu MemoryUtils.cu) -set(ReSolve_CUDA_HEADER_INSTALL - cudaKernels.h - cudaVectorKernels.h - CudaMemory.hpp - cuda_check_errors.hpp +set(ReSolve_CUDA_HEADER_INSTALL cudaKernels.h cudaVectorKernels.h + CudaMemory.hpp cuda_check_errors.hpp ) set_source_files_properties(${ReSolve_CUDA_SRC} PROPERTIES LANGUAGE CUDA) -# First create CUDA backend -# (this should really be CUDA _API_ backend, -# separate backend will be needed for CUDA SDK) +# First create CUDA backend (this should really be CUDA _API_ backend, separate +# backend will be needed for CUDA SDK) add_library(resolve_backend_cuda SHARED ${ReSolve_CUDA_SRC}) target_link_libraries(resolve_backend_cuda PRIVATE resolve_logger) target_link_libraries(resolve_backend_cuda PUBLIC resolve_cuda) # install include headers install(FILES ${ReSolve_CUDA_HEADER_INSTALL} DESTINATION include/resolve/cuda) - diff --git a/resolve/cuda/CudaMemory.hpp b/resolve/cuda/CudaMemory.hpp index a56ef37d6..e4c9b5874 100644 --- a/resolve/cuda/CudaMemory.hpp +++ b/resolve/cuda/CudaMemory.hpp @@ -1,152 +1,126 @@ #pragma once -#include #include +#include #include "cuda_check_errors.hpp" namespace ReSolve { - namespace memory - { - /** - * @brief Class containing wrappers for CUDA API functions. - * - * All wrappers are implemented as static functions returning integer - * error code from CUDA API functions. - * - * @author Slaven Peles - */ - struct Cuda - { - static void deviceSynchronize() - { - cudaDeviceSynchronize(); - } - - static int getLastDeviceError() - { - return static_cast(cudaGetLastError()); - } - - /** - * @brief deletes variable from device - * - * @param v - a variable on the device - * - * @post v is freed from the device - */ - static int deleteOnDevice(void* v) - { - return checkCudaErrors(cudaFree(v)); - } +namespace memory +{ +/** + * @brief Class containing wrappers for CUDA API functions. + * + * All wrappers are implemented as static functions returning integer + * error code from CUDA API functions. + * + * @author Slaven Peles + */ +struct Cuda { + static void deviceSynchronize() { cudaDeviceSynchronize(); } + + static int getLastDeviceError() { return static_cast(cudaGetLastError()); } - /** - * @brief allocates array v onto device - * - * @param v - pointer to the array to be allocated on the device - * @param n - number of array elements (int, size_t) - * - * @tparam T - Array element type - * @tparam I - Array index type - * - * @post v is now a array with size n on the device - */ - template - static int allocateArrayOnDevice(T** v, I n) - { - return checkCudaErrors(cudaMalloc((void**) v, sizeof(T) * n)); - } + /** + * @brief deletes variable from device + * + * @param v - a variable on the device + * + * @post v is freed from the device + */ + static int deleteOnDevice(void *v) { return checkCudaErrors(cudaFree(v)); } - /** - * @brief allocates buffer v onto device. - * - * The difference from the array is that buffer size is required in bytes, - * not number of elements. - * - * @param v - pointer to the buffer to be allocated on the device - * @param n - size of the buffer in bytes - * - * @tparam T - Buffer element data type type (typically void) - * @tparam I - Buffer size type (typically size_t) - * - * @post v is now a buffer of n bytes - */ - template - static int allocateBufferOnDevice(T** v, I n) - { - return checkCudaErrors(cudaMalloc((void**) v, n)); - } + /** + * @brief allocates array v onto device + * + * @param v - pointer to the array to be allocated on the device + * @param n - number of array elements (int, size_t) + * + * @tparam T - Array element type + * @tparam I - Array index type + * + * @post v is now a array with size n on the device + */ + template static int allocateArrayOnDevice(T **v, I n) { return checkCudaErrors(cudaMalloc((void **)v, sizeof(T) * n)); } - /** - * @brief Sets elements of device array v to zero - * - * @param v - pointer to the array to be allocated on the device - * @param n - number of the array elements to be set to zero - * - * @tparam T - Array element type - * @tparam I - Array index type - * - * @post First n elements of array v are set to zero - */ - template - static int setZeroArrayOnDevice(T* v, I n) - { - return checkCudaErrors(cudaMemset(v, 0, sizeof(T) * n)); - } + /** + * @brief allocates buffer v onto device. + * + * The difference from the array is that buffer size is required in bytes, + * not number of elements. + * + * @param v - pointer to the buffer to be allocated on the device + * @param n - size of the buffer in bytes + * + * @tparam T - Buffer element data type type (typically void) + * @tparam I - Buffer size type (typically size_t) + * + * @post v is now a buffer of n bytes + */ + template static int allocateBufferOnDevice(T **v, I n) { return checkCudaErrors(cudaMalloc((void **)v, n)); } - /** - * @brief Copies array `src` from device to the array `dst` on the host. - * - * @param[in] n - size of src array - * @param[in] src - array on device - * @param[out] dst - array on host - * - * @pre `src` is a pointer to an allocated array on the device - * @pre `dst` is allocated to size >= n on the host - * @post Content of `dst` is overwritten by the content of `src` - */ - template - static int copyArrayDeviceToHost(T* dst, const T* src, I n) - { - return checkCudaErrors(cudaMemcpy(dst, src, sizeof(T) * n, cudaMemcpyDeviceToHost)); - } + /** + * @brief Sets elements of device array v to zero + * + * @param v - pointer to the array to be allocated on the device + * @param n - number of the array elements to be set to zero + * + * @tparam T - Array element type + * @tparam I - Array index type + * + * @post First n elements of array v are set to zero + */ + template static int setZeroArrayOnDevice(T *v, I n) { return checkCudaErrors(cudaMemset(v, 0, sizeof(T) * n)); } - /** - * @brief Copies array `src` to the array `dst` on the device. - * - * @param n - size of src array - * @param src - array on device to be copied - * @param dst - array on device to be copied onto - * - * @pre `src` is a pointer to an allocated array on the device - * @pre `dst` is allocated to size >= n on the device - * @post Content of `dst` is overwritten by the content of `src` - */ - template - static int copyArrayDeviceToDevice(T* dst, const T* src, I n) - { - return checkCudaErrors(cudaMemcpy(dst, src, sizeof(T) * n, cudaMemcpyDeviceToDevice)); - } + /** + * @brief Copies array `src` from device to the array `dst` on the host. + * + * @param[in] n - size of src array + * @param[in] src - array on device + * @param[out] dst - array on host + * + * @pre `src` is a pointer to an allocated array on the device + * @pre `dst` is allocated to size >= n on the host + * @post Content of `dst` is overwritten by the content of `src` + */ + template static int copyArrayDeviceToHost(T *dst, const T *src, I n) + { + return checkCudaErrors(cudaMemcpy(dst, src, sizeof(T) * n, cudaMemcpyDeviceToHost)); + } - /** - * @brief Copies array `src` from the host to the array `dst` on the device. - * - * @param n - size of src array - * @param src - array on the host to be copied - * @param dst - array on the device to be copied onto - * - * @pre `src` is a pointer to an allocated array on the host - * @pre `dst` is allocated to size >= n on the device - * @post Content of `dst` is overwritten by the content of `src` - */ - template - static int copyArrayHostToDevice(T* dst, const T* src, I n) - { - return checkCudaErrors(cudaMemcpy(dst, src, sizeof(T) * n, cudaMemcpyHostToDevice)); - } + /** + * @brief Copies array `src` to the array `dst` on the device. + * + * @param n - size of src array + * @param src - array on device to be copied + * @param dst - array on device to be copied onto + * + * @pre `src` is a pointer to an allocated array on the device + * @pre `dst` is allocated to size >= n on the device + * @post Content of `dst` is overwritten by the content of `src` + */ + template static int copyArrayDeviceToDevice(T *dst, const T *src, I n) + { + return checkCudaErrors(cudaMemcpy(dst, src, sizeof(T) * n, cudaMemcpyDeviceToDevice)); + } - }; + /** + * @brief Copies array `src` from the host to the array `dst` on the device. + * + * @param n - size of src array + * @param src - array on the host to be copied + * @param dst - array on the device to be copied onto + * + * @pre `src` is a pointer to an allocated array on the host + * @pre `dst` is allocated to size >= n on the device + * @post Content of `dst` is overwritten by the content of `src` + */ + template static int copyArrayHostToDevice(T *dst, const T *src, I n) + { + return checkCudaErrors(cudaMemcpy(dst, src, sizeof(T) * n, cudaMemcpyHostToDevice)); } +}; +} // namespace memory -} //namespace ReSolve +} // namespace ReSolve diff --git a/resolve/cuda/cudaKernels.cu b/resolve/cuda/cudaKernels.cu index 023e114f3..f0ffe2804 100644 --- a/resolve/cuda/cudaKernels.cu +++ b/resolve/cuda/cudaKernels.cu @@ -160,4 +160,4 @@ void matrix_row_sums(int n, double* result) { matrixInfNormPart1<<<1000,1024>>>(n, nnz, a_ia, a_val, result); -} \ No newline at end of file +} diff --git a/resolve/cuda/cudaKernels.h b/resolve/cuda/cudaKernels.h index 9c48783a8..63c73c2df 100644 --- a/resolve/cuda/cudaKernels.h +++ b/resolve/cuda/cudaKernels.h @@ -1,14 +1,5 @@ -void mass_inner_product_two_vectors(int n, - int i, - double* vec1, - double* vec2, - double* mvec, - double* result); -void mass_axpy(int n, int i, double* x, double* y, double* alpha); +void mass_inner_product_two_vectors(int n, int i, double *vec1, double *vec2, double *mvec, double *result); +void mass_axpy(int n, int i, double *x, double *y, double *alpha); -//needed for matrix inf nrm -void matrix_row_sums(int n, - int nnz, - int* a_ia, - double* a_val, - double* result); +// needed for matrix inf nrm +void matrix_row_sums(int n, int nnz, int *a_ia, double *a_val, double *result); diff --git a/resolve/cuda/cudaVectorKernels.cu b/resolve/cuda/cudaVectorKernels.cu index a1c53198c..4ddc23be1 100644 --- a/resolve/cuda/cudaVectorKernels.cu +++ b/resolve/cuda/cudaVectorKernels.cu @@ -25,4 +25,4 @@ void set_array_const(index_type n, real_type val, real_type* arr) kernels::set_const<<>>(n, val, arr); } -}} // namespace ReSolve::vector \ No newline at end of file +}} // namespace ReSolve::vector diff --git a/resolve/cuda/cudaVectorKernels.h b/resolve/cuda/cudaVectorKernels.h index e85016e3f..3e0a09d8f 100644 --- a/resolve/cuda/cudaVectorKernels.h +++ b/resolve/cuda/cudaVectorKernels.h @@ -11,50 +11,55 @@ //**** See VectorKernels.hpp for kernel wrapper functions documentation ****// //***************************************************************************// -namespace ReSolve { namespace vector { +namespace ReSolve +{ +namespace vector +{ -namespace kernels { - // __global__ void adapt_diag_scale(index_type, index_type, real_type*, index_type*, index_type*, real_type*, index_type*, - // index_type*, real_type*, index_type*, index_type*, real_type*, real_type*, real_type*, real_type*); +namespace kernels +{ +// __global__ void adapt_diag_scale(index_type, index_type, real_type*, index_type*, index_type*, real_type*, index_type*, +// index_type*, real_type*, index_type*, index_type*, real_type*, real_type*, real_type*, real_type*); - // __global__ void adapt_row_max(index_type, index_type, real_type*, index_type*, index_type*, real_type*, index_type*, - // index_type*, real_type*, index_type*, index_type*, real_type*); +// __global__ void adapt_row_max(index_type, index_type, real_type*, index_type*, index_type*, real_type*, index_type*, +// index_type*, real_type*, index_type*, index_type*, real_type*); - // __global__ void add_const(index_type, index_type, index_type*); +// __global__ void add_const(index_type, index_type, index_type*); - /** - * @brief CUDA kernel that sets values of an array to a constant. - * - * @param[in] n - length of the array - * @param[in] val - the value the array is set to - * @param[out] arr - a pointer to the array - * - * @pre `arr` is allocated to size `n` - * @post `arr` elements are set to `val` - */ - __global__ void set_const(index_type n, real_type val, real_type* arr); +/** + * @brief CUDA kernel that sets values of an array to a constant. + * + * @param[in] n - length of the array + * @param[in] val - the value the array is set to + * @param[out] arr - a pointer to the array + * + * @pre `arr` is allocated to size `n` + * @post `arr` elements are set to `val` + */ +__global__ void set_const(index_type n, real_type val, real_type *arr); - // __global__ void add_vecs(index_type, real_type*, real_type, real_type*); +// __global__ void add_vecs(index_type, real_type*, real_type, real_type*); - // __global__ void mult_const(index_type, real_type, real_type*); +// __global__ void mult_const(index_type, real_type, real_type*); - // __global__ void add_diag(index_type, real_type, index_type*, index_type*, real_type*); +// __global__ void add_diag(index_type, real_type, index_type*, index_type*, real_type*); - // __global__ void inv_vec_scale(index_type, real_type*, real_type*); +// __global__ void inv_vec_scale(index_type, real_type*, real_type*); - // __global__ void vec_scale(index_type, real_type*, real_type*); +// __global__ void vec_scale(index_type, real_type*, real_type*); - // __global__ void concatenate(index_type, index_type, index_type, index_type, real_type*, index_type*, index_type*, - // real_type*, index_type*, index_type*, real_type*, index_type*, index_type*); +// __global__ void concatenate(index_type, index_type, index_type, index_type, real_type*, index_type*, index_type*, +// real_type*, index_type*, index_type*, real_type*, index_type*, index_type*); - // __global__ void row_scale(index_type, real_type*, index_type*, index_type*, real_type*, real_type*, - // real_type*, real_type*); +// __global__ void row_scale(index_type, real_type*, index_type*, index_type*, real_type*, real_type*, +// real_type*, real_type*); - // __global__ void diag_scale(index_type, index_type, real_type*, index_type*, index_type*, real_type*, index_type*, - // index_type*, real_type*, real_type*, real_type*, index_type); +// __global__ void diag_scale(index_type, index_type, real_type*, index_type*, index_type*, real_type*, index_type*, +// index_type*, real_type*, real_type*, real_type*, index_type); - // __global__ void row_max(index_type, index_type, real_type*, index_type*, index_type*, real_type*, index_type*, index_type*, - // real_type* scale); +// __global__ void row_max(index_type, index_type, real_type*, index_type*, index_type*, real_type*, index_type*, index_type*, +// real_type* scale); } // namespace kernels -}} // namespace ReSolve::vector \ No newline at end of file +} // namespace vector +} // namespace ReSolve diff --git a/resolve/cuda/cuda_check_errors.hpp b/resolve/cuda/cuda_check_errors.hpp index 00a2029e1..5e2ceeeec 100644 --- a/resolve/cuda/cuda_check_errors.hpp +++ b/resolve/cuda/cuda_check_errors.hpp @@ -1,9 +1,9 @@ /** * @file cuda_check_errors.hpp - * + * * Contains macro to get error code from CUDA functions and to stream * appropriate error output to Re::Solve's logger. - * + * * @author Kasia Swirydowicz * @author Slaven Peles */ @@ -11,18 +11,12 @@ #include -template -int check(T result, - char const *const func, - const char *const file, - int const line) +template int check(T result, char const *const func, const char *const file, int const line) { if (result) { - ReSolve::io::Logger::error() << "CUDA error in function " - << func << " at " << file << ":" << line - << ", error# " << result << "\n"; + ReSolve::io::Logger::error() << "CUDA error in function " << func << " at " << file << ":" << line << ", error# " << result << "\n"; return -1; } return 0; } -#define checkCudaErrors(val) check((val), #val, __FILE__, __LINE__) \ No newline at end of file +#define checkCudaErrors(val) check((val), #val, __FILE__, __LINE__) diff --git a/resolve/cusolver_defs.hpp b/resolve/cusolver_defs.hpp index 1bab52da9..ec0e4d269 100644 --- a/resolve/cusolver_defs.hpp +++ b/resolve/cusolver_defs.hpp @@ -3,7 +3,7 @@ * @file cusolver_defs.hpp * * @author Kasia Swirydowicz , PNNL - * + * * Contains prototypes of cuSOLVER functions not in public API. * */ @@ -11,95 +11,56 @@ #ifndef CUSOLVERDEFS_H #define CUSOLVERDEFS_H -#include "cusparse.h" #include "cusolverSp.h" +#include "cusolverSp_LOWLEVEL_PREVIEW.h" +#include "cusparse.h" #include -#include #include -#include "cusolverSp_LOWLEVEL_PREVIEW.h" +#include #include "cusolverRf.h" extern "C" { - /* - * prototype not in public header file - */ - struct csrgluInfo; - typedef struct csrgluInfo *csrgluInfo_t; - - cusolverStatus_t CUSOLVERAPI - cusolverSpCreateGluInfo(csrgluInfo_t *info); +/* + * prototype not in public header file + */ +struct csrgluInfo; +typedef struct csrgluInfo *csrgluInfo_t; - cusolverStatus_t CUSOLVERAPI - cusolverSpDestroyGluInfo(csrgluInfo_t info); +cusolverStatus_t CUSOLVERAPI cusolverSpCreateGluInfo(csrgluInfo_t *info); - cusolverStatus_t CUSOLVERAPI - cusolverSpDgluSetup(cusolverSpHandle_t handle, - int m, - /* A can be base-0 or base-1 */ - int nnzA, - const cusparseMatDescr_t descrA, - const int* h_csrRowPtrA, - const int* h_csrColIndA, - const int* h_P, /* base-0 */ - const int* h_Q, /* base-0 */ - /* M can be base-0 or base-1 */ - int nnzM, - const cusparseMatDescr_t descrM, - const int* h_csrRowPtrM, - const int* h_csrColIndM, - csrgluInfo_t info); +cusolverStatus_t CUSOLVERAPI cusolverSpDestroyGluInfo(csrgluInfo_t info); - cusolverStatus_t CUSOLVERAPI - cusolverSpDgluBufferSize(cusolverSpHandle_t handle, - csrgluInfo_t info, - size_t* pBufferSize); +cusolverStatus_t CUSOLVERAPI cusolverSpDgluSetup(cusolverSpHandle_t handle, int m, + /* A can be base-0 or base-1 */ + int nnzA, const cusparseMatDescr_t descrA, const int *h_csrRowPtrA, const int *h_csrColIndA, + const int *h_P, /* base-0 */ + const int *h_Q, /* base-0 */ + /* M can be base-0 or base-1 */ + int nnzM, const cusparseMatDescr_t descrM, const int *h_csrRowPtrM, const int *h_csrColIndM, + csrgluInfo_t info); - cusolverStatus_t CUSOLVERAPI - cusolverSpDgluAnalysis(cusolverSpHandle_t handle, - csrgluInfo_t info, - void* workspace); +cusolverStatus_t CUSOLVERAPI cusolverSpDgluBufferSize(cusolverSpHandle_t handle, csrgluInfo_t info, size_t *pBufferSize); - cusolverStatus_t CUSOLVERAPI - cusolverSpDgluReset(cusolverSpHandle_t handle, - int m, - /* A is original matrix */ - int nnzA, - const cusparseMatDescr_t descr_A, - const double* d_csrValA, - const int* d_csrRowPtrA, - const int* d_csrColIndA, - csrgluInfo_t info); +cusolverStatus_t CUSOLVERAPI cusolverSpDgluAnalysis(cusolverSpHandle_t handle, csrgluInfo_t info, void *workspace); - cusolverStatus_t CUSOLVERAPI - cusolverSpDgluFactor(cusolverSpHandle_t handle, - csrgluInfo_t info, - void *workspace); +cusolverStatus_t CUSOLVERAPI cusolverSpDgluReset(cusolverSpHandle_t handle, int m, + /* A is original matrix */ + int nnzA, const cusparseMatDescr_t descr_A, const double *d_csrValA, const int *d_csrRowPtrA, + const int *d_csrColIndA, csrgluInfo_t info); - cusolverStatus_t CUSOLVERAPI - cusolverSpDgluSolve(cusolverSpHandle_t handle, - int m, - /* A is original matrix */ - int nnzA, - const cusparseMatDescr_t descr_A, - const double *d_csrValA, - const int* d_csrRowPtrA, - const int* d_csrColIndA, - const double* d_b0, /* right hand side */ - double* d_x, /* left hand side */ - int* ite_refine_succ, - double* r_nrminf_ptr, - csrgluInfo_t info, - void* workspace); +cusolverStatus_t CUSOLVERAPI cusolverSpDgluFactor(cusolverSpHandle_t handle, csrgluInfo_t info, void *workspace); - cusolverStatus_t CUSOLVERAPI - cusolverSpDnrminf(cusolverSpHandle_t handle, - int n, - const double *x, - double* result, /* |x|_inf, host */ - void* d_work /* at least 8192 bytes */ - ); +cusolverStatus_t CUSOLVERAPI cusolverSpDgluSolve(cusolverSpHandle_t handle, int m, + /* A is original matrix */ + int nnzA, const cusparseMatDescr_t descr_A, const double *d_csrValA, const int *d_csrRowPtrA, + const int *d_csrColIndA, const double *d_b0, /* right hand side */ + double *d_x, /* left hand side */ + int *ite_refine_succ, double *r_nrminf_ptr, csrgluInfo_t info, void *workspace); +cusolverStatus_t CUSOLVERAPI cusolverSpDnrminf(cusolverSpHandle_t handle, int n, const double *x, double *result, /* |x|_inf, host */ + void *d_work /* at least 8192 bytes */ +); } // extern "C" #endif // CUSOLVERDEFS_H diff --git a/resolve/hip/CMakeLists.txt b/resolve/hip/CMakeLists.txt index fb71a3bd1..4b33b1216 100644 --- a/resolve/hip/CMakeLists.txt +++ b/resolve/hip/CMakeLists.txt @@ -6,28 +6,19 @@ ]] -set(ReSolve_HIP_SRC - hipKernels.hip - hipVectorKernels.hip - MemoryUtils.hip -) +set(ReSolve_HIP_SRC hipKernels.hip hipVectorKernels.hip MemoryUtils.hip) -set(ReSolve_HIP_HEADER_INSTALL - hipKernels.h - hipVectorKernels.h - HipMemory.hpp - hip_check_errors.hpp +set(ReSolve_HIP_HEADER_INSTALL hipKernels.h hipVectorKernels.h HipMemory.hpp + hip_check_errors.hpp ) set_source_files_properties(${ReSolve_HIP_SRC} PROPERTIES LANGUAGE HIP) -# First create HIP backend -# (this should really be HIP _API_ backend, -# separate backend will be needed for HIP SDK) +# First create HIP backend (this should really be HIP _API_ backend, separate +# backend will be needed for HIP SDK) add_library(resolve_backend_hip SHARED ${ReSolve_HIP_SRC}) target_link_libraries(resolve_backend_hip PRIVATE resolve_logger) target_link_libraries(resolve_backend_hip PUBLIC resolve_hip) # install include headers install(FILES ${ReSolve_HIP_HEADER_INSTALL} DESTINATION include/resolve/hip) - diff --git a/resolve/hip/HipMemory.hpp b/resolve/hip/HipMemory.hpp index a6a482a51..d443e99fc 100644 --- a/resolve/hip/HipMemory.hpp +++ b/resolve/hip/HipMemory.hpp @@ -1,152 +1,126 @@ #pragma once -#include #include +#include #include "hip_check_errors.hpp" namespace ReSolve { - namespace memory - { - /** - * @brief Class containing wrappers for CUDA API functions. - * - * All wrappers are implemented as static functions returning integer - * error code from CUDA API functions. - * - * @author Slaven Peles - */ - struct Hip - { - static void deviceSynchronize() - { - hipDeviceSynchronize(); - } - - static int getLastDeviceError() - { - return static_cast(hipGetLastError()); - } - - /** - * @brief deletes variable from device - * - * @param v - a variable on the device - * - * @post v is freed from the device - */ - static int deleteOnDevice(void* v) - { - return checkHipErrors(hipFree(v)); - } +namespace memory +{ +/** + * @brief Class containing wrappers for CUDA API functions. + * + * All wrappers are implemented as static functions returning integer + * error code from CUDA API functions. + * + * @author Slaven Peles + */ +struct Hip { + static void deviceSynchronize() { hipDeviceSynchronize(); } + + static int getLastDeviceError() { return static_cast(hipGetLastError()); } - /** - * @brief allocates array v onto device - * - * @param v - pointer to the array to be allocated on the device - * @param n - number of array elements (int, size_t) - * - * @tparam T - Array element type - * @tparam I - Array index type - * - * @post v is now a array with size n on the device - */ - template - static int allocateArrayOnDevice(T** v, I n) - { - return checkHipErrors(hipMalloc((void**) v, sizeof(T) * n)); - } + /** + * @brief deletes variable from device + * + * @param v - a variable on the device + * + * @post v is freed from the device + */ + static int deleteOnDevice(void *v) { return checkHipErrors(hipFree(v)); } - /** - * @brief allocates buffer v onto device. - * - * The difference from the array is that buffer size is required in bytes, - * not number of elements. - * - * @param v - pointer to the buffer to be allocated on the device - * @param n - size of the buffer in bytes - * - * @tparam T - Buffer element data type type (typically void) - * @tparam I - Buffer size type (typically size_t) - * - * @post v is now a buffer of n bytes - */ - template - static int allocateBufferOnDevice(T** v, I n) - { - return checkHipErrors(hipMalloc((void**) v, n)); - } + /** + * @brief allocates array v onto device + * + * @param v - pointer to the array to be allocated on the device + * @param n - number of array elements (int, size_t) + * + * @tparam T - Array element type + * @tparam I - Array index type + * + * @post v is now a array with size n on the device + */ + template static int allocateArrayOnDevice(T **v, I n) { return checkHipErrors(hipMalloc((void **)v, sizeof(T) * n)); } - /** - * @brief Sets elements of device array v to zero - * - * @param v - pointer to the array to be allocated on the device - * @param n - number of the array elements to be set to zero - * - * @tparam T - Array element type - * @tparam I - Array index type - * - * @post First n elements of array v are set to zero - */ - template - static int setZeroArrayOnDevice(T* v, I n) - { - return checkHipErrors(hipMemset(v, 0, sizeof(T) * n)); - } + /** + * @brief allocates buffer v onto device. + * + * The difference from the array is that buffer size is required in bytes, + * not number of elements. + * + * @param v - pointer to the buffer to be allocated on the device + * @param n - size of the buffer in bytes + * + * @tparam T - Buffer element data type type (typically void) + * @tparam I - Buffer size type (typically size_t) + * + * @post v is now a buffer of n bytes + */ + template static int allocateBufferOnDevice(T **v, I n) { return checkHipErrors(hipMalloc((void **)v, n)); } - /** - * @brief Copies array `src` from device to the array `dst` on the host. - * - * @param[in] n - size of src array - * @param[in] src - array on device - * @param[out] dst - array on host - * - * @pre `src` is a pointer to an allocated array on the device - * @pre `dst` is allocated to size >= n on the host - * @post Content of `dst` is overwritten by the content of `src` - */ - template - static int copyArrayDeviceToHost(T* dst, const T* src, I n) - { - return checkHipErrors(hipMemcpy(dst, src, sizeof(T) * n, hipMemcpyDeviceToHost)); - } + /** + * @brief Sets elements of device array v to zero + * + * @param v - pointer to the array to be allocated on the device + * @param n - number of the array elements to be set to zero + * + * @tparam T - Array element type + * @tparam I - Array index type + * + * @post First n elements of array v are set to zero + */ + template static int setZeroArrayOnDevice(T *v, I n) { return checkHipErrors(hipMemset(v, 0, sizeof(T) * n)); } - /** - * @brief Copies array `src` to the array `dst` on the device. - * - * @param n - size of src array - * @param src - array on device to be copied - * @param dst - array on device to be copied onto - * - * @pre `src` is a pointer to an allocated array on the device - * @pre `dst` is allocated to size >= n on the device - * @post Content of `dst` is overwritten by the content of `src` - */ - template - static int copyArrayDeviceToDevice(T* dst, const T* src, I n) - { - return checkHipErrors(hipMemcpy(dst, src, sizeof(T) * n, hipMemcpyDeviceToDevice)); - } + /** + * @brief Copies array `src` from device to the array `dst` on the host. + * + * @param[in] n - size of src array + * @param[in] src - array on device + * @param[out] dst - array on host + * + * @pre `src` is a pointer to an allocated array on the device + * @pre `dst` is allocated to size >= n on the host + * @post Content of `dst` is overwritten by the content of `src` + */ + template static int copyArrayDeviceToHost(T *dst, const T *src, I n) + { + return checkHipErrors(hipMemcpy(dst, src, sizeof(T) * n, hipMemcpyDeviceToHost)); + } - /** - * @brief Copies array `src` from the host to the array `dst` on the device. - * - * @param n - size of src array - * @param src - array on the host to be copied - * @param dst - array on the device to be copied onto - * - * @pre `src` is a pointer to an allocated array on the host - * @pre `dst` is allocated to size >= n on the device - * @post Content of `dst` is overwritten by the content of `src` - */ - template - static int copyArrayHostToDevice(T* dst, const T* src, I n) - { - return checkHipErrors(hipMemcpy(dst, src, sizeof(T) * n, hipMemcpyHostToDevice)); - } + /** + * @brief Copies array `src` to the array `dst` on the device. + * + * @param n - size of src array + * @param src - array on device to be copied + * @param dst - array on device to be copied onto + * + * @pre `src` is a pointer to an allocated array on the device + * @pre `dst` is allocated to size >= n on the device + * @post Content of `dst` is overwritten by the content of `src` + */ + template static int copyArrayDeviceToDevice(T *dst, const T *src, I n) + { + return checkHipErrors(hipMemcpy(dst, src, sizeof(T) * n, hipMemcpyDeviceToDevice)); + } - }; + /** + * @brief Copies array `src` from the host to the array `dst` on the device. + * + * @param n - size of src array + * @param src - array on the host to be copied + * @param dst - array on the device to be copied onto + * + * @pre `src` is a pointer to an allocated array on the host + * @pre `dst` is allocated to size >= n on the device + * @post Content of `dst` is overwritten by the content of `src` + */ + template static int copyArrayHostToDevice(T *dst, const T *src, I n) + { + return checkHipErrors(hipMemcpy(dst, src, sizeof(T) * n, hipMemcpyHostToDevice)); } +}; +} // namespace memory -} //namespace ReSolve +} // namespace ReSolve diff --git a/resolve/hip/hipKernels.h b/resolve/hip/hipKernels.h index 986efc841..dfcfccbdf 100644 --- a/resolve/hip/hipKernels.h +++ b/resolve/hip/hipKernels.h @@ -1,25 +1,10 @@ -void mass_inner_product_two_vectors(int n, - int i, - double* vec1, - double* vec2, - double* mvec, - double* result); -void mass_axpy(int n, int i, double* x, double* y, double* alpha); +void mass_inner_product_two_vectors(int n, int i, double *vec1, double *vec2, double *mvec, double *result); +void mass_axpy(int n, int i, double *x, double *y, double *alpha); -//needed for matrix inf nrm -void matrix_row_sums(int n, - int nnz, - int* a_ia, - double* a_val, - double* result); +// needed for matrix inf nrm +void matrix_row_sums(int n, int nnz, int *a_ia, double *a_val, double *result); // needed for triangular solve -void permuteVectorP(int n, - int* perm_vector, - double* vec_in, - double* vec_out); -void permuteVectorQ(int n, - int* perm_vector, - double* vec_in, - double* vec_out); +void permuteVectorP(int n, int *perm_vector, double *vec_in, double *vec_out); +void permuteVectorQ(int n, int *perm_vector, double *vec_in, double *vec_out); diff --git a/resolve/hip/hipVectorKernels.h b/resolve/hip/hipVectorKernels.h index cd23f8227..96675dd22 100644 --- a/resolve/hip/hipVectorKernels.h +++ b/resolve/hip/hipVectorKernels.h @@ -8,50 +8,55 @@ //**** See VectorKernels.hpp for kernel wrapper functions documentation ****// //***************************************************************************// -namespace ReSolve { namespace vector { +namespace ReSolve +{ +namespace vector +{ -namespace kernels { - // __global__ void adapt_diag_scale(index_type, index_type, real_type*, index_type*, index_type*, real_type*, index_type*, - // index_type*, real_type*, index_type*, index_type*, real_type*, real_type*, real_type*, real_type*); +namespace kernels +{ +// __global__ void adapt_diag_scale(index_type, index_type, real_type*, index_type*, index_type*, real_type*, index_type*, +// index_type*, real_type*, index_type*, index_type*, real_type*, real_type*, real_type*, real_type*); - // __global__ void adapt_row_max(index_type, index_type, real_type*, index_type*, index_type*, real_type*, index_type*, - // index_type*, real_type*, index_type*, index_type*, real_type*); +// __global__ void adapt_row_max(index_type, index_type, real_type*, index_type*, index_type*, real_type*, index_type*, +// index_type*, real_type*, index_type*, index_type*, real_type*); - // __global__ void add_const(index_type, index_type, index_type*); +// __global__ void add_const(index_type, index_type, index_type*); - /** - * @brief CUDA kernel that sets values of an array to a constant. - * - * @param[in] n - length of the array - * @param[in] val - the value the array is set to - * @param[out] arr - a pointer to the array - * - * @pre `arr` is allocated to size `n` - * @post `arr` elements are set to `val` - */ - __global__ void set_const(index_type n, real_type val, real_type* arr); +/** + * @brief CUDA kernel that sets values of an array to a constant. + * + * @param[in] n - length of the array + * @param[in] val - the value the array is set to + * @param[out] arr - a pointer to the array + * + * @pre `arr` is allocated to size `n` + * @post `arr` elements are set to `val` + */ +__global__ void set_const(index_type n, real_type val, real_type *arr); - // __global__ void add_vecs(index_type, real_type*, real_type, real_type*); +// __global__ void add_vecs(index_type, real_type*, real_type, real_type*); - // __global__ void mult_const(index_type, real_type, real_type*); +// __global__ void mult_const(index_type, real_type, real_type*); - // __global__ void add_diag(index_type, real_type, index_type*, index_type*, real_type*); +// __global__ void add_diag(index_type, real_type, index_type*, index_type*, real_type*); - // __global__ void inv_vec_scale(index_type, real_type*, real_type*); +// __global__ void inv_vec_scale(index_type, real_type*, real_type*); - // __global__ void vec_scale(index_type, real_type*, real_type*); +// __global__ void vec_scale(index_type, real_type*, real_type*); - // __global__ void concatenate(index_type, index_type, index_type, index_type, real_type*, index_type*, index_type*, - // real_type*, index_type*, index_type*, real_type*, index_type*, index_type*); +// __global__ void concatenate(index_type, index_type, index_type, index_type, real_type*, index_type*, index_type*, +// real_type*, index_type*, index_type*, real_type*, index_type*, index_type*); - // __global__ void row_scale(index_type, real_type*, index_type*, index_type*, real_type*, real_type*, - // real_type*, real_type*); +// __global__ void row_scale(index_type, real_type*, index_type*, index_type*, real_type*, real_type*, +// real_type*, real_type*); - // __global__ void diag_scale(index_type, index_type, real_type*, index_type*, index_type*, real_type*, index_type*, - // index_type*, real_type*, real_type*, real_type*, index_type); +// __global__ void diag_scale(index_type, index_type, real_type*, index_type*, index_type*, real_type*, index_type*, +// index_type*, real_type*, real_type*, real_type*, index_type); - // __global__ void row_max(index_type, index_type, real_type*, index_type*, index_type*, real_type*, index_type*, index_type*, - // real_type* scale); +// __global__ void row_max(index_type, index_type, real_type*, index_type*, index_type*, real_type*, index_type*, index_type*, +// real_type* scale); } // namespace kernels -}} // namespace ReSolve::vector \ No newline at end of file +} // namespace vector +} // namespace ReSolve diff --git a/resolve/hip/hip_check_errors.hpp b/resolve/hip/hip_check_errors.hpp index 1f483d35a..bc8896564 100644 --- a/resolve/hip/hip_check_errors.hpp +++ b/resolve/hip/hip_check_errors.hpp @@ -1,9 +1,9 @@ /** * @file hip_check_errors.hpp - * + * * Contains macro to get error code from CUDA functions and to stream * appropriate error output to Re::Solve's logger. - * + * * @author Kasia Swirydowicz * @author Slaven Peles */ @@ -11,16 +11,10 @@ #include -template -int check(T result, - char const *const func, - const char *const file, - int const line) +template int check(T result, char const *const func, const char *const file, int const line) { if (result) { - ReSolve::io::Logger::error() << "HIP error in function " - << func << " at " << file << ":" << line - << ", error# " << result << "\n"; + ReSolve::io::Logger::error() << "HIP error in function " << func << " at " << file << ":" << line << ", error# " << result << "\n"; return -1; } return 0; diff --git a/resolve/matrix/CMakeLists.txt b/resolve/matrix/CMakeLists.txt index 565fa7c94..2bf8140b4 100644 --- a/resolve/matrix/CMakeLists.txt +++ b/resolve/matrix/CMakeLists.txt @@ -7,7 +7,7 @@ ]] # C++ code -set(Matrix_SRC +set(Matrix_SRC io.cpp Sparse.cpp Csr.cpp @@ -18,23 +18,14 @@ set(Matrix_SRC ) # C++ code that depends on CUDA SDK libraries -set(Matrix_CUDASDK_SRC - MatrixHandlerCuda.cpp -) +set(Matrix_CUDASDK_SRC MatrixHandlerCuda.cpp) # and on HIP -set(Matrix_ROCM_SRC - MatrixHandlerHip.cpp -) +set(Matrix_ROCM_SRC MatrixHandlerHip.cpp) # Header files to be installed -set(Matrix_HEADER_INSTALL - io.hpp - Sparse.hpp - Coo.hpp - Csr.hpp - Csc.hpp - MatrixHandler.hpp +set(Matrix_HEADER_INSTALL io.hpp Sparse.hpp Coo.hpp Csr.hpp Csc.hpp + MatrixHandler.hpp ) # Add CUDA matrix handler if CUDA support is enabled @@ -46,38 +37,32 @@ if(RESOLVE_USE_HIP) set(Matrix_SRC ${Matrix_SRC} ${Matrix_ROCM_SRC}) endif() - # Build shared library ReSolve::matrix add_library(resolve_matrix SHARED ${Matrix_SRC}) target_link_libraries(resolve_matrix PRIVATE resolve_logger resolve_vector) # Link to CUDA ReSolve backend if CUDA is support enabled -if (RESOLVE_USE_CUDA) +if(RESOLVE_USE_CUDA) target_link_libraries(resolve_matrix PUBLIC resolve_backend_cuda) endif() -if (RESOLVE_USE_HIP) +if(RESOLVE_USE_HIP) target_link_libraries(resolve_matrix PUBLIC resolve_backend_hip) endif() # Link to dummy device backend if GPU support is not enabled -if (NOT RESOLVE_USE_GPU) +if(NOT RESOLVE_USE_GPU) target_link_libraries(resolve_matrix PUBLIC resolve_backend_cpu) endif() - -target_include_directories(resolve_matrix INTERFACE - $ - $ +target_include_directories( + resolve_matrix INTERFACE $ + $ ) # # TODO: Make this PRIVATE dependency (requires refactoring ReSolve code) # target_link_libraries(ReSolve PUBLIC resolve_tpl) -# install(TARGETS ReSolve -# EXPORT ReSolveTargets -# ARCHIVE DESTINATION lib -# LIBRARY DESTINATION lib) -# install include headers +# install(TARGETS ReSolve EXPORT ReSolveTargets ARCHIVE DESTINATION lib LIBRARY +# DESTINATION lib) install include headers install(FILES ${Matrix_HEADER_INSTALL} DESTINATION include/resolve/matrix) - diff --git a/resolve/matrix/Coo.cpp b/resolve/matrix/Coo.cpp index 326eba597..df63bebf1 100644 --- a/resolve/matrix/Coo.cpp +++ b/resolve/matrix/Coo.cpp @@ -1,102 +1,221 @@ -#include // <-- includes memcpy +#include // <-- includes memcpy +#include #include -#include #include "Coo.hpp" +namespace ReSolve +{ +matrix::Coo::Coo() {} + +matrix::Coo::Coo(index_type n, index_type m, index_type nnz) : Sparse(n, m, nnz) {} + +matrix::Coo::Coo(index_type n, index_type m, index_type nnz, bool symmetric, bool expanded) : Sparse(n, m, nnz, symmetric, expanded) {} + +matrix::Coo::~Coo() {} -namespace ReSolve +index_type *matrix::Coo::getRowData(memory::MemorySpace memspace) { - matrix::Coo::Coo() - { - } - - matrix::Coo::Coo(index_type n, index_type m, index_type nnz) : Sparse(n, m, nnz) - { - } - - matrix::Coo::Coo(index_type n, - index_type m, - index_type nnz, - bool symmetric, - bool expanded) : Sparse(n, m, nnz, symmetric, expanded) - { - } - - matrix::Coo::~Coo() - { - } - - index_type* matrix::Coo::getRowData(memory::MemorySpace memspace) - { - using namespace ReSolve::memory; - copyData(memspace); - switch (memspace) { - case HOST: - return this->h_row_data_; - case DEVICE: - return this->d_row_data_; - default: - return nullptr; - } + using namespace ReSolve::memory; + copyData(memspace); + switch (memspace) { + case HOST: + return this->h_row_data_; + case DEVICE: + return this->d_row_data_; + default: + return nullptr; + } +} + +index_type *matrix::Coo::getColData(memory::MemorySpace memspace) +{ + using namespace ReSolve::memory; + copyData(memspace); + switch (memspace) { + case HOST: + return this->h_col_data_; + case DEVICE: + return this->d_col_data_; + default: + return nullptr; + } +} + +real_type *matrix::Coo::getValues(memory::MemorySpace memspace) +{ + using namespace ReSolve::memory; + copyData(memspace); + switch (memspace) { + case HOST: + return this->h_val_data_; + case DEVICE: + return this->d_val_data_; + default: + return nullptr; + } +} + +index_type matrix::Coo::updateData(index_type *row_data, index_type *col_data, real_type *val_data, memory::MemorySpace memspaceIn, + memory::MemorySpace memspaceOut) +{ + + // four cases (for now) + index_type nnz_current = nnz_; + if (is_expanded_) { + nnz_current = nnz_expanded_; + } + setNotUpdated(); + int control = -1; + if ((memspaceIn == memory::HOST) && (memspaceOut == memory::HOST)) { + control = 0; + } + if ((memspaceIn == memory::HOST) && ((memspaceOut == memory::DEVICE))) { + control = 1; + } + if (((memspaceIn == memory::DEVICE)) && (memspaceOut == memory::HOST)) { + control = 2; + } + if (((memspaceIn == memory::DEVICE)) && ((memspaceOut == memory::DEVICE))) { + control = 3; } - index_type* matrix::Coo::getColData(memory::MemorySpace memspace) - { - using namespace ReSolve::memory; - copyData(memspace); - switch (memspace) { - case HOST: - return this->h_col_data_; - case DEVICE: - return this->d_col_data_; - default: - return nullptr; + if (memspaceOut == memory::HOST) { + // check if cpu data allocated + if (h_row_data_ == nullptr) { + this->h_row_data_ = new index_type[nnz_current]; + } + if (h_col_data_ == nullptr) { + this->h_col_data_ = new index_type[nnz_current]; + } + if (h_val_data_ == nullptr) { + this->h_val_data_ = new real_type[nnz_current]; } } - real_type* matrix::Coo::getValues(memory::MemorySpace memspace) - { - using namespace ReSolve::memory; - copyData(memspace); - switch (memspace) { - case HOST: - return this->h_val_data_; - case DEVICE: - return this->d_val_data_; - default: - return nullptr; + if (memspaceOut == memory::DEVICE) { + // check if cuda data allocated + if (d_row_data_ == nullptr) { + mem_.allocateArrayOnDevice(&d_row_data_, nnz_current); + } + if (d_col_data_ == nullptr) { + mem_.allocateArrayOnDevice(&d_col_data_, nnz_current); + } + if (d_val_data_ == nullptr) { + mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); } } - index_type matrix::Coo::updateData(index_type* row_data, index_type* col_data, real_type* val_data, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut) - { + switch (control) { + case 0: // cpu->cpu + mem_.copyArrayHostToHost(h_row_data_, row_data, nnz_current); + mem_.copyArrayHostToHost(h_col_data_, col_data, nnz_current); + mem_.copyArrayHostToHost(h_val_data_, val_data, nnz_current); + h_data_updated_ = true; + owns_cpu_data_ = true; + owns_cpu_vals_ = true; + break; + case 2: // gpu->cpu + mem_.copyArrayDeviceToHost(h_row_data_, row_data, nnz_current); + mem_.copyArrayDeviceToHost(h_col_data_, col_data, nnz_current); + mem_.copyArrayDeviceToHost(h_val_data_, val_data, nnz_current); + h_data_updated_ = true; + owns_cpu_data_ = true; + owns_cpu_vals_ = true; + break; + case 1: // cpu->gpu + mem_.copyArrayHostToDevice(d_row_data_, row_data, nnz_current); + mem_.copyArrayHostToDevice(d_col_data_, col_data, nnz_current); + mem_.copyArrayHostToDevice(d_val_data_, val_data, nnz_current); + d_data_updated_ = true; + owns_gpu_data_ = true; + owns_gpu_vals_ = true; + break; + case 3: // gpu->gpua + mem_.copyArrayDeviceToDevice(d_row_data_, row_data, nnz_current); + mem_.copyArrayDeviceToDevice(d_col_data_, col_data, nnz_current); + mem_.copyArrayDeviceToDevice(d_val_data_, val_data, nnz_current); + d_data_updated_ = true; + owns_gpu_data_ = true; + owns_gpu_vals_ = true; + break; + default: + return -1; + } + return 0; +} - //four cases (for now) - index_type nnz_current = nnz_; - if (is_expanded_) {nnz_current = nnz_expanded_;} - setNotUpdated(); - int control=-1; - if ((memspaceIn == memory::HOST) && (memspaceOut == memory::HOST)){ control = 0;} - if ((memspaceIn == memory::HOST) && ((memspaceOut == memory::DEVICE))){ control = 1;} - if (((memspaceIn == memory::DEVICE)) && (memspaceOut == memory::HOST)){ control = 2;} - if (((memspaceIn == memory::DEVICE)) && ((memspaceOut == memory::DEVICE))){ control = 3;} +index_type matrix::Coo::updateData(index_type *row_data, index_type *col_data, real_type *val_data, index_type new_nnz, + memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut) +{ + this->destroyMatrixData(memspaceOut); + this->nnz_ = new_nnz; + int i = this->updateData(row_data, col_data, val_data, memspaceIn, memspaceOut); + return i; +} - if (memspaceOut == memory::HOST) { - //check if cpu data allocated +index_type matrix::Coo::allocateMatrixData(memory::MemorySpace memspace) +{ + index_type nnz_current = nnz_; + if (is_expanded_) { + nnz_current = nnz_expanded_; + } + destroyMatrixData(memspace); // just in case + + if (memspace == memory::HOST) { + this->h_row_data_ = new index_type[nnz_current]; + std::fill(h_row_data_, h_row_data_ + nnz_current, 0); + this->h_col_data_ = new index_type[nnz_current]; + std::fill(h_col_data_, h_col_data_ + nnz_current, 0); + this->h_val_data_ = new real_type[nnz_current]; + std::fill(h_val_data_, h_val_data_ + nnz_current, 0.0); + owns_cpu_data_ = true; + owns_cpu_vals_ = true; + return 0; + } + + if (memspace == memory::DEVICE) { + mem_.allocateArrayOnDevice(&d_row_data_, nnz_current); + mem_.allocateArrayOnDevice(&d_col_data_, nnz_current); + mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); + owns_gpu_data_ = true; + owns_gpu_vals_ = true; + return 0; + } + return -1; +} + +int matrix::Coo::copyData(memory::MemorySpace memspaceOut) +{ + using namespace ReSolve::memory; + + index_type nnz_current = nnz_; + if (is_expanded_) { + nnz_current = nnz_expanded_; + } + + switch (memspaceOut) { + case HOST: + if ((d_data_updated_ == true) && (h_data_updated_ == false)) { if (h_row_data_ == nullptr) { - this->h_row_data_ = new index_type[nnz_current]; + h_row_data_ = new index_type[nnz_current]; } if (h_col_data_ == nullptr) { - this->h_col_data_ = new index_type[nnz_current]; + h_col_data_ = new index_type[nnz_current]; } if (h_val_data_ == nullptr) { - this->h_val_data_ = new real_type[nnz_current]; + h_val_data_ = new real_type[nnz_current]; } + mem_.copyArrayDeviceToHost(h_row_data_, d_row_data_, nnz_current); + mem_.copyArrayDeviceToHost(h_col_data_, d_col_data_, nnz_current); + mem_.copyArrayDeviceToHost(h_val_data_, d_val_data_, nnz_current); + h_data_updated_ = true; + owns_cpu_data_ = true; + owns_cpu_vals_ = true; } - - if (memspaceOut == memory::DEVICE) { - //check if cuda data allocated + return 0; + case DEVICE: + if ((d_data_updated_ == false) && (h_data_updated_ == true)) { if (d_row_data_ == nullptr) { mem_.allocateArrayOnDevice(&d_row_data_, nnz_current); } @@ -106,145 +225,26 @@ namespace ReSolve if (d_val_data_ == nullptr) { mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); } - } - - switch(control) { - case 0: //cpu->cpu - mem_.copyArrayHostToHost(h_row_data_, row_data, nnz_current); - mem_.copyArrayHostToHost(h_col_data_, col_data, nnz_current); - mem_.copyArrayHostToHost(h_val_data_, val_data, nnz_current); - h_data_updated_ = true; - owns_cpu_data_ = true; - owns_cpu_vals_ = true; - break; - case 2://gpu->cpu - mem_.copyArrayDeviceToHost(h_row_data_, row_data, nnz_current); - mem_.copyArrayDeviceToHost(h_col_data_, col_data, nnz_current); - mem_.copyArrayDeviceToHost(h_val_data_, val_data, nnz_current); - h_data_updated_ = true; - owns_cpu_data_ = true; - owns_cpu_vals_ = true; - break; - case 1://cpu->gpu - mem_.copyArrayHostToDevice(d_row_data_, row_data, nnz_current); - mem_.copyArrayHostToDevice(d_col_data_, col_data, nnz_current); - mem_.copyArrayHostToDevice(d_val_data_, val_data, nnz_current); - d_data_updated_ = true; - owns_gpu_data_ = true; - owns_gpu_vals_ = true; - break; - case 3://gpu->gpua - mem_.copyArrayDeviceToDevice(d_row_data_, row_data, nnz_current); - mem_.copyArrayDeviceToDevice(d_col_data_, col_data, nnz_current); - mem_.copyArrayDeviceToDevice(d_val_data_, val_data, nnz_current); - d_data_updated_ = true; - owns_gpu_data_ = true; - owns_gpu_vals_ = true; - break; - default: - return -1; - } - return 0; - } - - index_type matrix::Coo::updateData(index_type* row_data, index_type* col_data, real_type* val_data, index_type new_nnz, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut) - { - this->destroyMatrixData(memspaceOut); - this->nnz_ = new_nnz; - int i = this->updateData(row_data, col_data, val_data, memspaceIn, memspaceOut); - return i; - } - - index_type matrix::Coo::allocateMatrixData(memory::MemorySpace memspace) - { - index_type nnz_current = nnz_; - if (is_expanded_) {nnz_current = nnz_expanded_;} - destroyMatrixData(memspace);//just in case - - if (memspace == memory::HOST) { - this->h_row_data_ = new index_type[nnz_current]; - std::fill(h_row_data_, h_row_data_ + nnz_current, 0); - this->h_col_data_ = new index_type[nnz_current]; - std::fill(h_col_data_, h_col_data_ + nnz_current, 0); - this->h_val_data_ = new real_type[nnz_current]; - std::fill(h_val_data_, h_val_data_ + nnz_current, 0.0); - owns_cpu_data_ = true; - owns_cpu_vals_ = true; - return 0; - } - - if (memspace == memory::DEVICE) { - mem_.allocateArrayOnDevice(&d_row_data_, nnz_current); - mem_.allocateArrayOnDevice(&d_col_data_, nnz_current); - mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); + mem_.copyArrayHostToDevice(d_row_data_, h_row_data_, nnz_current); + mem_.copyArrayHostToDevice(d_col_data_, h_col_data_, nnz_current); + mem_.copyArrayHostToDevice(d_val_data_, h_val_data_, nnz_current); + d_data_updated_ = true; owns_gpu_data_ = true; owns_gpu_vals_ = true; - return 0; } + return 0; + default: return -1; - } + } // switch +} - int matrix::Coo::copyData(memory::MemorySpace memspaceOut) - { - using namespace ReSolve::memory; - - index_type nnz_current = nnz_; - if (is_expanded_) { - nnz_current = nnz_expanded_; - } - - switch (memspaceOut) { - case HOST: - if ((d_data_updated_ == true) && (h_data_updated_ == false)) { - if (h_row_data_ == nullptr) { - h_row_data_ = new index_type[nnz_current]; - } - if (h_col_data_ == nullptr) { - h_col_data_ = new index_type[nnz_current]; - } - if (h_val_data_ == nullptr) { - h_val_data_ = new real_type[nnz_current]; - } - mem_.copyArrayDeviceToHost(h_row_data_, d_row_data_, nnz_current); - mem_.copyArrayDeviceToHost(h_col_data_, d_col_data_, nnz_current); - mem_.copyArrayDeviceToHost(h_val_data_, d_val_data_, nnz_current); - h_data_updated_ = true; - owns_cpu_data_ = true; - owns_cpu_vals_ = true; - } - return 0; - case DEVICE: - if ((d_data_updated_ == false) && (h_data_updated_ == true)) { - if (d_row_data_ == nullptr) { - mem_.allocateArrayOnDevice(&d_row_data_, nnz_current); - } - if (d_col_data_ == nullptr) { - mem_.allocateArrayOnDevice(&d_col_data_, nnz_current); - } - if (d_val_data_ == nullptr) { - mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); - } - mem_.copyArrayHostToDevice(d_row_data_, h_row_data_, nnz_current); - mem_.copyArrayHostToDevice(d_col_data_, h_col_data_, nnz_current); - mem_.copyArrayHostToDevice(d_val_data_, h_val_data_, nnz_current); - d_data_updated_ = true; - owns_gpu_data_ = true; - owns_gpu_vals_ = true; - } - return 0; - default: - return -1; - } // switch - } - - void matrix::Coo::print() - { - std::cout << " Row: Column: Value:\n"; - for(int i = 0; i < nnz_; ++i) { - std::cout << std::setw(12) << h_row_data_[i] << " " - << std::setw(12) << h_col_data_[i] << " " - << std::setw(20) << std::setprecision(16) << h_val_data_[i] << "\n"; - } +void matrix::Coo::print() +{ + std::cout << " Row: Column: Value:\n"; + for (int i = 0; i < nnz_; ++i) { + std::cout << std::setw(12) << h_row_data_[i] << " " << std::setw(12) << h_col_data_[i] << " " << std::setw(20) << std::setprecision(16) + << h_val_data_[i] << "\n"; } +} } // namespace ReSolve diff --git a/resolve/matrix/Coo.hpp b/resolve/matrix/Coo.hpp index bc67ceef1..740f31d9f 100644 --- a/resolve/matrix/Coo.hpp +++ b/resolve/matrix/Coo.hpp @@ -1,32 +1,34 @@ #pragma once #include "Sparse.hpp" -namespace ReSolve { namespace matrix { +namespace ReSolve +{ +namespace matrix +{ - class Coo : public Sparse - { - public: - Coo(); - Coo(index_type n, index_type m, index_type nnz); - Coo(index_type n, - index_type m, - index_type nnz, - bool symmetric, - bool expanded); - ~Coo(); +class Coo : public Sparse +{ + public: + Coo(); + Coo(index_type n, index_type m, index_type nnz); + Coo(index_type n, index_type m, index_type nnz, bool symmetric, bool expanded); + ~Coo(); - virtual index_type* getRowData(memory::MemorySpace memspace); - virtual index_type* getColData(memory::MemorySpace memspace); - virtual real_type* getValues( memory::MemorySpace memspace); + virtual index_type *getRowData(memory::MemorySpace memspace); + virtual index_type *getColData(memory::MemorySpace memspace); + virtual real_type *getValues(memory::MemorySpace memspace); - virtual index_type updateData(index_type* row_data, index_type* col_data, real_type* val_data, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut); - virtual index_type updateData(index_type* row_data, index_type* col_data, real_type* val_data, index_type new_nnz, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut); + virtual index_type updateData(index_type *row_data, index_type *col_data, real_type *val_data, memory::MemorySpace memspaceIn, + memory::MemorySpace memspaceOut); + virtual index_type updateData(index_type *row_data, index_type *col_data, real_type *val_data, index_type new_nnz, memory::MemorySpace memspaceIn, + memory::MemorySpace memspaceOut); - virtual index_type allocateMatrixData(memory::MemorySpace memspace); + virtual index_type allocateMatrixData(memory::MemorySpace memspace); - virtual void print(); + virtual void print(); - virtual int copyData(memory::MemorySpace memspaceOut); - }; + virtual int copyData(memory::MemorySpace memspaceOut); +}; -}} // namespace ReSolve::matrix +} // namespace matrix +} // namespace ReSolve diff --git a/resolve/matrix/Csc.cpp b/resolve/matrix/Csc.cpp index e6fed07c7..f8e3e9b50 100644 --- a/resolve/matrix/Csc.cpp +++ b/resolve/matrix/Csc.cpp @@ -1,236 +1,237 @@ -#include // <-- includes memcpy +#include // <-- includes memcpy #include "Csc.hpp" -namespace ReSolve +namespace ReSolve { - matrix::Csc::Csc() - { +matrix::Csc::Csc() {} + +matrix::Csc::Csc(index_type n, index_type m, index_type nnz) : Sparse(n, m, nnz) {} + +matrix::Csc::Csc(index_type n, index_type m, index_type nnz, bool symmetric, bool expanded) : Sparse(n, m, nnz, symmetric, expanded) {} + +matrix::Csc::~Csc() {} + +index_type *matrix::Csc::getRowData(memory::MemorySpace memspace) +{ + using namespace ReSolve::memory; + copyData(memspace); + switch (memspace) { + case HOST: + return this->h_row_data_; + case DEVICE: + return this->d_row_data_; + default: + return nullptr; } +} - matrix::Csc::Csc(index_type n, index_type m, index_type nnz) : Sparse(n, m, nnz) - { +index_type *matrix::Csc::getColData(memory::MemorySpace memspace) +{ + using namespace ReSolve::memory; + copyData(memspace); + switch (memspace) { + case HOST: + return this->h_col_data_; + case DEVICE: + return this->d_col_data_; + default: + return nullptr; } - - matrix::Csc::Csc(index_type n, - index_type m, - index_type nnz, - bool symmetric, - bool expanded) : Sparse(n, m, nnz, symmetric, expanded) - { +} + +real_type *matrix::Csc::getValues(memory::MemorySpace memspace) +{ + using namespace ReSolve::memory; + copyData(memspace); + switch (memspace) { + case HOST: + return this->h_val_data_; + case DEVICE: + return this->d_val_data_; + default: + return nullptr; } +} - matrix::Csc::~Csc() - { +int matrix::Csc::updateData(index_type *row_data, index_type *col_data, real_type *val_data, memory::MemorySpace memspaceIn, + memory::MemorySpace memspaceOut) +{ + index_type nnz_current = nnz_; + if (is_expanded_) { + nnz_current = nnz_expanded_; + } + // four cases (for now) + int control = -1; + setNotUpdated(); + if ((memspaceIn == memory::HOST) && (memspaceOut == memory::HOST)) { + control = 0; + } + if ((memspaceIn == memory::HOST) && ((memspaceOut == memory::DEVICE))) { + control = 1; + } + if (((memspaceIn == memory::DEVICE)) && (memspaceOut == memory::HOST)) { + control = 2; + } + if (((memspaceIn == memory::DEVICE)) && ((memspaceOut == memory::DEVICE))) { + control = 3; } - index_type* matrix::Csc::getRowData(memory::MemorySpace memspace) - { - using namespace ReSolve::memory; - copyData(memspace); - switch (memspace) { - case HOST: - return this->h_row_data_; - case DEVICE: - return this->d_row_data_; - default: - return nullptr; + if (memspaceOut == memory::HOST) { + // check if cpu data allocated + if (h_col_data_ == nullptr) { + this->h_col_data_ = new index_type[n_ + 1]; + } + if (h_row_data_ == nullptr) { + this->h_row_data_ = new index_type[nnz_current]; + } + if (h_val_data_ == nullptr) { + this->h_val_data_ = new real_type[nnz_current]; } } - index_type* matrix::Csc::getColData(memory::MemorySpace memspace) - { - using namespace ReSolve::memory; - copyData(memspace); - switch (memspace) { - case HOST: - return this->h_col_data_; - case DEVICE: - return this->d_col_data_; - default: - return nullptr; + if (memspaceOut == memory::DEVICE) { + // check if cuda data allocated + if (d_col_data_ == nullptr) { + mem_.allocateArrayOnDevice(&d_col_data_, n_ + 1); + } + if (d_row_data_ == nullptr) { + mem_.allocateArrayOnDevice(&d_row_data_, nnz_current); + } + if (d_val_data_ == nullptr) { + mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); } } - real_type* matrix::Csc::getValues(memory::MemorySpace memspace) - { - using namespace ReSolve::memory; - copyData(memspace); - switch (memspace) { - case HOST: - return this->h_val_data_; - case DEVICE: - return this->d_val_data_; - default: - return nullptr; - } + switch (control) { + case 0: // cpu->cpu + mem_.copyArrayHostToHost(h_col_data_, col_data, n_ + 1); + mem_.copyArrayHostToHost(h_row_data_, row_data, nnz_current); + mem_.copyArrayHostToHost(h_val_data_, val_data, nnz_current); + h_data_updated_ = true; + owns_cpu_data_ = true; + owns_cpu_vals_ = true; + break; + case 2: // gpu->cpu + mem_.copyArrayDeviceToHost(h_col_data_, col_data, n_ + 1); + mem_.copyArrayDeviceToHost(h_row_data_, row_data, nnz_current); + mem_.copyArrayDeviceToHost(h_val_data_, val_data, nnz_current); + h_data_updated_ = true; + owns_cpu_data_ = true; + owns_cpu_vals_ = true; + break; + case 1: // cpu->gpu + mem_.copyArrayHostToDevice(d_col_data_, col_data, n_ + 1); + mem_.copyArrayHostToDevice(d_row_data_, row_data, nnz_current); + mem_.copyArrayHostToDevice(d_val_data_, val_data, nnz_current); + d_data_updated_ = true; + owns_gpu_data_ = true; + owns_gpu_vals_ = true; + break; + case 3: // gpu->gpu + mem_.copyArrayDeviceToDevice(d_col_data_, col_data, n_ + 1); + mem_.copyArrayDeviceToDevice(d_row_data_, row_data, nnz_current); + mem_.copyArrayDeviceToDevice(d_val_data_, val_data, nnz_current); + d_data_updated_ = true; + owns_gpu_data_ = true; + owns_gpu_vals_ = true; + break; + default: + return -1; + } + return 0; +} + +int matrix::Csc::updateData(index_type *row_data, index_type *col_data, real_type *val_data, index_type new_nnz, memory::MemorySpace memspaceIn, + memory::MemorySpace memspaceOut) +{ + this->destroyMatrixData(memspaceOut); + this->nnz_ = new_nnz; + int i = this->updateData(col_data, row_data, val_data, memspaceIn, memspaceOut); + return i; +} + +int matrix::Csc::allocateMatrixData(memory::MemorySpace memspace) +{ + index_type nnz_current = nnz_; + if (is_expanded_) { + nnz_current = nnz_expanded_; + } + destroyMatrixData(memspace); // just in case + + if (memspace == memory::HOST) { + this->h_col_data_ = new index_type[n_ + 1]; + std::fill(h_col_data_, h_col_data_ + n_ + 1, 0); + this->h_row_data_ = new index_type[nnz_current]; + std::fill(h_row_data_, h_row_data_ + nnz_current, 0); + this->h_val_data_ = new real_type[nnz_current]; + std::fill(h_val_data_, h_val_data_ + nnz_current, 0.0); + owns_cpu_data_ = true; + owns_cpu_vals_ = true; + return 0; + } + + if (memspace == memory::DEVICE) { + mem_.allocateArrayOnDevice(&d_col_data_, n_ + 1); + mem_.allocateArrayOnDevice(&d_row_data_, nnz_current); + mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); + owns_gpu_data_ = true; + owns_gpu_vals_ = true; + return 0; } + return -1; +} + +int matrix::Csc::copyData(memory::MemorySpace memspaceOut) +{ + using namespace ReSolve::memory; - int matrix::Csc::updateData(index_type* row_data, index_type* col_data, real_type* val_data, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut) - { - index_type nnz_current = nnz_; - if (is_expanded_) {nnz_current = nnz_expanded_;} - //four cases (for now) - int control=-1; - setNotUpdated(); - if ((memspaceIn == memory::HOST) && (memspaceOut == memory::HOST)) { control = 0;} - if ((memspaceIn == memory::HOST) && ((memspaceOut == memory::DEVICE))){ control = 1;} - if (((memspaceIn == memory::DEVICE)) && (memspaceOut == memory::HOST)) { control = 2;} - if (((memspaceIn == memory::DEVICE)) && ((memspaceOut == memory::DEVICE))){ control = 3;} - - if (memspaceOut == memory::HOST) { - //check if cpu data allocated + index_type nnz_current = nnz_; + if (is_expanded_) { + nnz_current = nnz_expanded_; + } + + switch (memspaceOut) { + case HOST: + if ((d_data_updated_ == true) && (h_data_updated_ == false)) { if (h_col_data_ == nullptr) { - this->h_col_data_ = new index_type[n_ + 1]; + h_col_data_ = new index_type[n_ + 1]; } if (h_row_data_ == nullptr) { - this->h_row_data_ = new index_type[nnz_current]; - } + h_row_data_ = new index_type[nnz_current]; + } if (h_val_data_ == nullptr) { - this->h_val_data_ = new real_type[nnz_current]; + h_val_data_ = new real_type[nnz_current]; } + mem_.copyArrayDeviceToHost(h_col_data_, d_col_data_, n_ + 1); + mem_.copyArrayDeviceToHost(h_row_data_, d_row_data_, nnz_current); + mem_.copyArrayDeviceToHost(h_val_data_, d_val_data_, nnz_current); + h_data_updated_ = true; + owns_cpu_data_ = true; + owns_cpu_vals_ = true; } - - if (memspaceOut == memory::DEVICE) { - //check if cuda data allocated + return 0; + case DEVICE: + if ((d_data_updated_ == false) && (h_data_updated_ == true)) { if (d_col_data_ == nullptr) { - mem_.allocateArrayOnDevice(&d_col_data_, n_ + 1); + mem_.allocateArrayOnDevice(&d_col_data_, n_ + 1); } if (d_row_data_ == nullptr) { mem_.allocateArrayOnDevice(&d_row_data_, nnz_current); } if (d_val_data_ == nullptr) { - mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); + mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); } - } - - switch(control) { - case 0: //cpu->cpu - mem_.copyArrayHostToHost(h_col_data_, col_data, n_ + 1); - mem_.copyArrayHostToHost(h_row_data_, row_data, nnz_current); - mem_.copyArrayHostToHost(h_val_data_, val_data, nnz_current); - h_data_updated_ = true; - owns_cpu_data_ = true; - owns_cpu_vals_ = true; - break; - case 2://gpu->cpu - mem_.copyArrayDeviceToHost(h_col_data_, col_data, n_ + 1); - mem_.copyArrayDeviceToHost(h_row_data_, row_data, nnz_current); - mem_.copyArrayDeviceToHost(h_val_data_, val_data, nnz_current); - h_data_updated_ = true; - owns_cpu_data_ = true; - owns_cpu_vals_ = true; - break; - case 1://cpu->gpu - mem_.copyArrayHostToDevice(d_col_data_, col_data, n_ + 1); - mem_.copyArrayHostToDevice(d_row_data_, row_data, nnz_current); - mem_.copyArrayHostToDevice(d_val_data_, val_data, nnz_current); - d_data_updated_ = true; - owns_gpu_data_ = true; - owns_gpu_vals_ = true; - break; - case 3://gpu->gpu - mem_.copyArrayDeviceToDevice(d_col_data_, col_data, n_ + 1); - mem_.copyArrayDeviceToDevice(d_row_data_, row_data, nnz_current); - mem_.copyArrayDeviceToDevice(d_val_data_, val_data, nnz_current); - d_data_updated_ = true; - owns_gpu_data_ = true; - owns_gpu_vals_ = true; - break; - default: - return -1; - } - return 0; - - } - - int matrix::Csc::updateData(index_type* row_data, index_type* col_data, real_type* val_data, index_type new_nnz, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut) - { - this->destroyMatrixData(memspaceOut); - this->nnz_ = new_nnz; - int i = this->updateData(col_data, row_data, val_data, memspaceIn, memspaceOut); - return i; - } - - int matrix::Csc::allocateMatrixData(memory::MemorySpace memspace) - { - index_type nnz_current = nnz_; - if (is_expanded_) {nnz_current = nnz_expanded_;} - destroyMatrixData(memspace);//just in case - - if (memspace == memory::HOST) { - this->h_col_data_ = new index_type[n_ + 1]; - std::fill(h_col_data_, h_col_data_ + n_ + 1, 0); - this->h_row_data_ = new index_type[nnz_current]; - std::fill(h_row_data_, h_row_data_ + nnz_current, 0); - this->h_val_data_ = new real_type[nnz_current]; - std::fill(h_val_data_, h_val_data_ + nnz_current, 0.0); - owns_cpu_data_ = true; - owns_cpu_vals_ = true; - return 0; - } - - if (memspace == memory::DEVICE) { - mem_.allocateArrayOnDevice(&d_col_data_, n_ + 1); - mem_.allocateArrayOnDevice(&d_row_data_, nnz_current); - mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); + mem_.copyArrayHostToDevice(d_col_data_, h_col_data_, n_ + 1); + mem_.copyArrayHostToDevice(d_row_data_, h_row_data_, nnz_current); + mem_.copyArrayHostToDevice(d_val_data_, h_val_data_, nnz_current); + d_data_updated_ = true; owns_gpu_data_ = true; owns_gpu_vals_ = true; - return 0; } + return 0; + default: return -1; - } - - int matrix::Csc::copyData(memory::MemorySpace memspaceOut) - { - using namespace ReSolve::memory; - - index_type nnz_current = nnz_; - if (is_expanded_) { - nnz_current = nnz_expanded_; - } - - switch(memspaceOut) { - case HOST: - if ((d_data_updated_ == true) && (h_data_updated_ == false)) { - if (h_col_data_ == nullptr) { - h_col_data_ = new index_type[n_ + 1]; - } - if (h_row_data_ == nullptr) { - h_row_data_ = new index_type[nnz_current]; - } - if (h_val_data_ == nullptr) { - h_val_data_ = new real_type[nnz_current]; - } - mem_.copyArrayDeviceToHost(h_col_data_, d_col_data_, n_ + 1); - mem_.copyArrayDeviceToHost(h_row_data_, d_row_data_, nnz_current); - mem_.copyArrayDeviceToHost(h_val_data_, d_val_data_, nnz_current); - h_data_updated_ = true; - owns_cpu_data_ = true; - owns_cpu_vals_ = true; - } - return 0; - case DEVICE: - if ((d_data_updated_ == false) && (h_data_updated_ == true)) { - if (d_col_data_ == nullptr) { - mem_.allocateArrayOnDevice(&d_col_data_, n_ + 1); - } - if (d_row_data_ == nullptr) { - mem_.allocateArrayOnDevice(&d_row_data_, nnz_current); - } - if (d_val_data_ == nullptr) { - mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); - } - mem_.copyArrayHostToDevice(d_col_data_, h_col_data_, n_ + 1); - mem_.copyArrayHostToDevice(d_row_data_, h_row_data_, nnz_current); - mem_.copyArrayHostToDevice(d_val_data_, h_val_data_, nnz_current); - d_data_updated_ = true; - owns_gpu_data_ = true; - owns_gpu_vals_ = true; - } - return 0; - default: - return -1; - } // switch - } + } // switch } +} // namespace ReSolve diff --git a/resolve/matrix/Csc.hpp b/resolve/matrix/Csc.hpp index 8a5dc551e..22200d0fd 100644 --- a/resolve/matrix/Csc.hpp +++ b/resolve/matrix/Csc.hpp @@ -1,33 +1,34 @@ #pragma once #include "Sparse.hpp" -namespace ReSolve { namespace matrix { +namespace ReSolve +{ +namespace matrix +{ - class Csc : public Sparse - { - public: - Csc(); - Csc(index_type n, index_type m, index_type nnz); - Csc(index_type n, - index_type m, - index_type nnz, - bool symmetric, - bool expanded); - ~Csc(); +class Csc : public Sparse +{ + public: + Csc(); + Csc(index_type n, index_type m, index_type nnz); + Csc(index_type n, index_type m, index_type nnz, bool symmetric, bool expanded); + ~Csc(); - virtual index_type* getRowData(memory::MemorySpace memspace); - virtual index_type* getColData(memory::MemorySpace memspace); - virtual real_type* getValues( memory::MemorySpace memspace); + virtual index_type *getRowData(memory::MemorySpace memspace); + virtual index_type *getColData(memory::MemorySpace memspace); + virtual real_type *getValues(memory::MemorySpace memspace); - virtual int updateData(index_type* row_data, index_type* col_data, real_type* val_data, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut); - virtual int updateData(index_type* row_data, index_type* col_data, real_type* val_data, index_type new_nnz, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut); + virtual int updateData(index_type *row_data, index_type *col_data, real_type *val_data, memory::MemorySpace memspaceIn, + memory::MemorySpace memspaceOut); + virtual int updateData(index_type *row_data, index_type *col_data, real_type *val_data, index_type new_nnz, memory::MemorySpace memspaceIn, + memory::MemorySpace memspaceOut); - virtual int allocateMatrixData(memory::MemorySpace memspace); + virtual int allocateMatrixData(memory::MemorySpace memspace); - virtual void print() {return;} + virtual void print() { return; } - virtual int copyData(memory::MemorySpace memspaceOut); + virtual int copyData(memory::MemorySpace memspaceOut); +}; - }; - -}} // namespace ReSolve::matrix +} // namespace matrix +} // namespace ReSolve diff --git a/resolve/matrix/Csr.cpp b/resolve/matrix/Csr.cpp index 0c08b641f..762126414 100644 --- a/resolve/matrix/Csr.cpp +++ b/resolve/matrix/Csr.cpp @@ -1,240 +1,240 @@ -#include // <-- includes memcpy +#include // <-- includes memcpy #include "Csr.hpp" -namespace ReSolve +namespace ReSolve { - matrix::Csr::Csr() - { +matrix::Csr::Csr() {} + +matrix::Csr::Csr(index_type n, index_type m, index_type nnz) : Sparse(n, m, nnz) {} + +matrix::Csr::Csr(index_type n, index_type m, index_type nnz, bool symmetric, bool expanded) : Sparse(n, m, nnz, symmetric, expanded) {} + +matrix::Csr::~Csr() {} + +index_type *matrix::Csr::getRowData(memory::MemorySpace memspace) +{ + using namespace ReSolve::memory; + copyData(memspace); + switch (memspace) { + case HOST: + return this->h_row_data_; + case DEVICE: + return this->d_row_data_; + default: + return nullptr; } +} - matrix::Csr::Csr(index_type n, index_type m, index_type nnz) : Sparse(n, m, nnz) - { +index_type *matrix::Csr::getColData(memory::MemorySpace memspace) +{ + using namespace ReSolve::memory; + copyData(memspace); + switch (memspace) { + case HOST: + return this->h_col_data_; + case DEVICE: + return this->d_col_data_; + default: + return nullptr; } - - matrix::Csr::Csr(index_type n, - index_type m, - index_type nnz, - bool symmetric, - bool expanded) : Sparse(n, m, nnz, symmetric, expanded) - { +} + +real_type *matrix::Csr::getValues(memory::MemorySpace memspace) +{ + using namespace ReSolve::memory; + copyData(memspace); + switch (memspace) { + case HOST: + return this->h_val_data_; + case DEVICE: + return this->d_val_data_; + default: + return nullptr; } +} - matrix::Csr::~Csr() - { +int matrix::Csr::updateData(index_type *row_data, index_type *col_data, real_type *val_data, memory::MemorySpace memspaceIn, + memory::MemorySpace memspaceOut) +{ + // four cases (for now) + index_type nnz_current = nnz_; + if (is_expanded_) { + nnz_current = nnz_expanded_; + } + setNotUpdated(); + int control = -1; + if ((memspaceIn == memory::HOST) && (memspaceOut == memory::HOST)) { + control = 0; + } + if ((memspaceIn == memory::HOST) && ((memspaceOut == memory::DEVICE))) { + control = 1; + } + if (((memspaceIn == memory::DEVICE)) && (memspaceOut == memory::HOST)) { + control = 2; + } + if (((memspaceIn == memory::DEVICE)) && ((memspaceOut == memory::DEVICE))) { + control = 3; } - index_type* matrix::Csr::getRowData(memory::MemorySpace memspace) - { - using namespace ReSolve::memory; - copyData(memspace); - switch (memspace) { - case HOST: - return this->h_row_data_; - case DEVICE: - return this->d_row_data_; - default: - return nullptr; + if (memspaceOut == memory::HOST) { + // check if cpu data allocated + if (h_row_data_ == nullptr) { + this->h_row_data_ = new index_type[n_ + 1]; + } + if (h_col_data_ == nullptr) { + this->h_col_data_ = new index_type[nnz_current]; + } + if (h_val_data_ == nullptr) { + this->h_val_data_ = new real_type[nnz_current]; } } - index_type* matrix::Csr::getColData(memory::MemorySpace memspace) - { - using namespace ReSolve::memory; - copyData(memspace); - switch (memspace) { - case HOST: - return this->h_col_data_; - case DEVICE: - return this->d_col_data_; - default: - return nullptr; + if (memspaceOut == memory::DEVICE) { + // check if cuda data allocated + if (d_row_data_ == nullptr) { + mem_.allocateArrayOnDevice(&d_row_data_, n_ + 1); + } + if (d_col_data_ == nullptr) { + mem_.allocateArrayOnDevice(&d_col_data_, nnz_current); + } + if (d_val_data_ == nullptr) { + mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); } } - real_type* matrix::Csr::getValues(memory::MemorySpace memspace) - { - using namespace ReSolve::memory; - copyData(memspace); - switch (memspace) { - case HOST: - return this->h_val_data_; - case DEVICE: - return this->d_val_data_; - default: - return nullptr; - } + // copy + switch (control) { + case 0: // cpu->cpu + mem_.copyArrayHostToHost(h_row_data_, row_data, n_ + 1); + mem_.copyArrayHostToHost(h_col_data_, col_data, nnz_current); + mem_.copyArrayHostToHost(h_val_data_, val_data, nnz_current); + h_data_updated_ = true; + owns_cpu_data_ = true; + owns_cpu_vals_ = true; + break; + case 2: // gpu->cpu + mem_.copyArrayDeviceToHost(h_row_data_, row_data, n_ + 1); + mem_.copyArrayDeviceToHost(h_col_data_, col_data, nnz_current); + mem_.copyArrayDeviceToHost(h_val_data_, val_data, nnz_current); + h_data_updated_ = true; + owns_cpu_data_ = true; + owns_cpu_vals_ = true; + break; + case 1: // cpu->gpu + mem_.copyArrayHostToDevice(d_row_data_, row_data, n_ + 1); + mem_.copyArrayHostToDevice(d_col_data_, col_data, nnz_current); + mem_.copyArrayHostToDevice(d_val_data_, val_data, nnz_current); + d_data_updated_ = true; + owns_gpu_data_ = true; + owns_gpu_vals_ = true; + break; + case 3: // gpu->gpu + mem_.copyArrayDeviceToDevice(d_row_data_, row_data, n_ + 1); + mem_.copyArrayDeviceToDevice(d_col_data_, col_data, nnz_current); + mem_.copyArrayDeviceToDevice(d_val_data_, val_data, nnz_current); + d_data_updated_ = true; + owns_gpu_data_ = true; + owns_gpu_vals_ = true; + break; + default: + return -1; + } + return 0; +} + +int matrix::Csr::updateData(index_type *row_data, index_type *col_data, real_type *val_data, index_type new_nnz, memory::MemorySpace memspaceIn, + memory::MemorySpace memspaceOut) +{ + this->destroyMatrixData(memspaceOut); + this->nnz_ = new_nnz; + int i = this->updateData(row_data, col_data, val_data, memspaceIn, memspaceOut); + return i; +} + +int matrix::Csr::allocateMatrixData(memory::MemorySpace memspace) +{ + index_type nnz_current = nnz_; + if (is_expanded_) { + nnz_current = nnz_expanded_; + } + destroyMatrixData(memspace); // just in case + + if (memspace == memory::HOST) { + this->h_row_data_ = new index_type[n_ + 1]; + std::fill(h_row_data_, h_row_data_ + n_ + 1, 0); + this->h_col_data_ = new index_type[nnz_current]; + std::fill(h_col_data_, h_col_data_ + nnz_current, 0); + this->h_val_data_ = new real_type[nnz_current]; + std::fill(h_val_data_, h_val_data_ + nnz_current, 0.0); + owns_cpu_data_ = true; + owns_cpu_vals_ = true; + return 0; + } + + if (memspace == memory::DEVICE) { + mem_.allocateArrayOnDevice(&d_row_data_, n_ + 1); + mem_.allocateArrayOnDevice(&d_col_data_, nnz_current); + mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); + owns_gpu_data_ = true; + owns_gpu_vals_ = true; + return 0; + } + return -1; +} + +int matrix::Csr::copyData(memory::MemorySpace memspaceOut) +{ + using namespace ReSolve::memory; + + index_type nnz_current = nnz_; + if (is_expanded_) { + nnz_current = nnz_expanded_; } - int matrix::Csr::updateData(index_type* row_data, index_type* col_data, real_type* val_data, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut) - { - //four cases (for now) - index_type nnz_current = nnz_; - if (is_expanded_) {nnz_current = nnz_expanded_;} - setNotUpdated(); - int control = -1; - if ((memspaceIn == memory::HOST) && (memspaceOut == memory::HOST)) { control = 0;} - if ((memspaceIn == memory::HOST) && ((memspaceOut == memory::DEVICE))){ control = 1;} - if (((memspaceIn == memory::DEVICE)) && (memspaceOut == memory::HOST)) { control = 2;} - if (((memspaceIn == memory::DEVICE)) && ((memspaceOut == memory::DEVICE))){ control = 3;} - - if (memspaceOut == memory::HOST) { - //check if cpu data allocated + switch (memspaceOut) { + case HOST: + // check if we need to copy or not + if ((d_data_updated_ == true) && (h_data_updated_ == false)) { if (h_row_data_ == nullptr) { - this->h_row_data_ = new index_type[n_ + 1]; + h_row_data_ = new index_type[n_ + 1]; } if (h_col_data_ == nullptr) { - this->h_col_data_ = new index_type[nnz_current]; - } + h_col_data_ = new index_type[nnz_current]; + } if (h_val_data_ == nullptr) { - this->h_val_data_ = new real_type[nnz_current]; + h_val_data_ = new real_type[nnz_current]; } + mem_.copyArrayDeviceToHost(h_row_data_, d_row_data_, n_ + 1); + mem_.copyArrayDeviceToHost(h_col_data_, d_col_data_, nnz_current); + mem_.copyArrayDeviceToHost(h_val_data_, d_val_data_, nnz_current); + h_data_updated_ = true; + owns_cpu_data_ = true; + owns_cpu_vals_ = true; } - - if (memspaceOut == memory::DEVICE) { - //check if cuda data allocated + return 0; + case DEVICE: + if ((d_data_updated_ == false) && (h_data_updated_ == true)) { if (d_row_data_ == nullptr) { - mem_.allocateArrayOnDevice(&d_row_data_, n_ + 1); + mem_.allocateArrayOnDevice(&d_row_data_, n_ + 1); } if (d_col_data_ == nullptr) { mem_.allocateArrayOnDevice(&d_col_data_, nnz_current); } if (d_val_data_ == nullptr) { - mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); + mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); } - } - - - //copy - switch(control) { - case 0: //cpu->cpu - mem_.copyArrayHostToHost(h_row_data_, row_data, n_ + 1); - mem_.copyArrayHostToHost(h_col_data_, col_data, nnz_current); - mem_.copyArrayHostToHost(h_val_data_, val_data, nnz_current); - h_data_updated_ = true; - owns_cpu_data_ = true; - owns_cpu_vals_ = true; - break; - case 2://gpu->cpu - mem_.copyArrayDeviceToHost(h_row_data_, row_data, n_ + 1); - mem_.copyArrayDeviceToHost(h_col_data_, col_data, nnz_current); - mem_.copyArrayDeviceToHost(h_val_data_, val_data, nnz_current); - h_data_updated_ = true; - owns_cpu_data_ = true; - owns_cpu_vals_ = true; - break; - case 1://cpu->gpu - mem_.copyArrayHostToDevice(d_row_data_, row_data, n_ + 1); - mem_.copyArrayHostToDevice(d_col_data_, col_data, nnz_current); - mem_.copyArrayHostToDevice(d_val_data_, val_data, nnz_current); - d_data_updated_ = true; - owns_gpu_data_ = true; - owns_gpu_vals_ = true; - break; - case 3://gpu->gpu - mem_.copyArrayDeviceToDevice(d_row_data_, row_data, n_ + 1); - mem_.copyArrayDeviceToDevice(d_col_data_, col_data, nnz_current); - mem_.copyArrayDeviceToDevice(d_val_data_, val_data, nnz_current); - d_data_updated_ = true; - owns_gpu_data_ = true; - owns_gpu_vals_ = true; - break; - default: - return -1; - } - return 0; - } - - int matrix::Csr::updateData(index_type* row_data, index_type* col_data, real_type* val_data, index_type new_nnz, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut) - { - this->destroyMatrixData(memspaceOut); - this->nnz_ = new_nnz; - int i = this->updateData(row_data, col_data, val_data, memspaceIn, memspaceOut); - return i; - } - - int matrix::Csr::allocateMatrixData(memory::MemorySpace memspace) - { - index_type nnz_current = nnz_; - if (is_expanded_) {nnz_current = nnz_expanded_;} - destroyMatrixData(memspace);//just in case - - if (memspace == memory::HOST) { - this->h_row_data_ = new index_type[n_ + 1]; - std::fill(h_row_data_, h_row_data_ + n_ + 1, 0); - this->h_col_data_ = new index_type[nnz_current]; - std::fill(h_col_data_, h_col_data_ + nnz_current, 0); - this->h_val_data_ = new real_type[nnz_current]; - std::fill(h_val_data_, h_val_data_ + nnz_current, 0.0); - owns_cpu_data_ = true; - owns_cpu_vals_ = true; - return 0; - } - - if (memspace == memory::DEVICE) { - mem_.allocateArrayOnDevice(&d_row_data_, n_ + 1); - mem_.allocateArrayOnDevice(&d_col_data_, nnz_current); - mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); + mem_.copyArrayHostToDevice(d_row_data_, h_row_data_, n_ + 1); + mem_.copyArrayHostToDevice(d_col_data_, h_col_data_, nnz_current); + mem_.copyArrayHostToDevice(d_val_data_, h_val_data_, nnz_current); + d_data_updated_ = true; owns_gpu_data_ = true; owns_gpu_vals_ = true; - return 0; } + return 0; + default: return -1; - } - - int matrix::Csr::copyData(memory::MemorySpace memspaceOut) - { - using namespace ReSolve::memory; - - index_type nnz_current = nnz_; - if (is_expanded_) { - nnz_current = nnz_expanded_; - } - - switch (memspaceOut) { - case HOST: - //check if we need to copy or not - if ((d_data_updated_ == true) && (h_data_updated_ == false)) { - if (h_row_data_ == nullptr) { - h_row_data_ = new index_type[n_ + 1]; - } - if (h_col_data_ == nullptr) { - h_col_data_ = new index_type[nnz_current]; - } - if (h_val_data_ == nullptr) { - h_val_data_ = new real_type[nnz_current]; - } - mem_.copyArrayDeviceToHost(h_row_data_, d_row_data_, n_ + 1); - mem_.copyArrayDeviceToHost(h_col_data_, d_col_data_, nnz_current); - mem_.copyArrayDeviceToHost(h_val_data_, d_val_data_, nnz_current); - h_data_updated_ = true; - owns_cpu_data_ = true; - owns_cpu_vals_ = true; - } - return 0; - case DEVICE: - if ((d_data_updated_ == false) && (h_data_updated_ == true)) { - if (d_row_data_ == nullptr) { - mem_.allocateArrayOnDevice(&d_row_data_, n_ + 1); - } - if (d_col_data_ == nullptr) { - mem_.allocateArrayOnDevice(&d_col_data_, nnz_current); - } - if (d_val_data_ == nullptr) { - mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); - } - mem_.copyArrayHostToDevice(d_row_data_, h_row_data_, n_ + 1); - mem_.copyArrayHostToDevice(d_col_data_, h_col_data_, nnz_current); - mem_.copyArrayHostToDevice(d_val_data_, h_val_data_, nnz_current); - d_data_updated_ = true; - owns_gpu_data_ = true; - owns_gpu_vals_ = true; - } - return 0; - default: - return -1; - } // switch + } // switch } -} // namespace ReSolve - +} // namespace ReSolve diff --git a/resolve/matrix/Csr.hpp b/resolve/matrix/Csr.hpp index a5d8f6827..59ebfd265 100644 --- a/resolve/matrix/Csr.hpp +++ b/resolve/matrix/Csr.hpp @@ -1,35 +1,37 @@ #pragma once #include -namespace ReSolve { namespace matrix { +namespace ReSolve +{ +namespace matrix +{ - class Csr : public Sparse - { - public: - Csr(); +class Csr : public Sparse +{ + public: + Csr(); - Csr(index_type n, index_type m, index_type nnz); + Csr(index_type n, index_type m, index_type nnz); - Csr(index_type n, - index_type m, - index_type nnz, - bool symmetric, - bool expanded); + Csr(index_type n, index_type m, index_type nnz, bool symmetric, bool expanded); - ~Csr(); + ~Csr(); - virtual index_type* getRowData(memory::MemorySpace memspace); - virtual index_type* getColData(memory::MemorySpace memspace); - virtual real_type* getValues( memory::MemorySpace memspace); + virtual index_type *getRowData(memory::MemorySpace memspace); + virtual index_type *getColData(memory::MemorySpace memspace); + virtual real_type *getValues(memory::MemorySpace memspace); - virtual int updateData(index_type* row_data, index_type* col_data, real_type* val_data, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut); - virtual int updateData(index_type* row_data, index_type* col_data, real_type* val_data, index_type new_nnz, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut); + virtual int updateData(index_type *row_data, index_type *col_data, real_type *val_data, memory::MemorySpace memspaceIn, + memory::MemorySpace memspaceOut); + virtual int updateData(index_type *row_data, index_type *col_data, real_type *val_data, index_type new_nnz, memory::MemorySpace memspaceIn, + memory::MemorySpace memspaceOut); - virtual int allocateMatrixData(memory::MemorySpace memspace); + virtual int allocateMatrixData(memory::MemorySpace memspace); - virtual void print() {return;} + virtual void print() { return; } - virtual int copyData(memory::MemorySpace memspaceOut); - }; + virtual int copyData(memory::MemorySpace memspaceOut); +}; -}} // namespace ReSolve::matrix +} // namespace matrix +} // namespace ReSolve diff --git a/resolve/matrix/MatrixHandler.cpp b/resolve/matrix/MatrixHandler.cpp index d67bb4b3c..3de174d98 100644 --- a/resolve/matrix/MatrixHandler.cpp +++ b/resolve/matrix/MatrixHandler.cpp @@ -1,14 +1,14 @@ #include -#include -#include +#include "MatrixHandler.hpp" +#include "MatrixHandlerCpu.hpp" #include #include #include -#include +#include #include -#include "MatrixHandler.hpp" -#include "MatrixHandlerCpu.hpp" +#include +#include #ifdef RESOLVE_USE_CUDA #include "MatrixHandlerCuda.hpp" @@ -17,227 +17,222 @@ #include "MatrixHandlerHip.hpp" #endif -namespace ReSolve { - // Create a shortcut name for Logger static class - using out = io::Logger; - - /** - * @brief Default constructor - * - * @post Instantiates CPU and CUDA matrix handlers, but does not - * create a workspace. - * - * @todo There is little utility for the default constructor. Rethink its purpose. - * Consider making it private method. - */ - MatrixHandler::MatrixHandler() - { - new_matrix_ = true; - cpuImpl_ = new MatrixHandlerCpu(); - } - - /** - * @brief Destructor - * - */ - MatrixHandler::~MatrixHandler() - { - delete cpuImpl_; - if (isCudaEnabled_) delete cudaImpl_; - if (isHipEnabled_) delete hipImpl_; - } - - /** - * @brief Constructor taking pointer to the workspace as its parameter. - * - * @note The CPU implementation currently does not require a workspace. - * The workspace pointer parameter is provided for forward compatibility. - */ - MatrixHandler::MatrixHandler(LinAlgWorkspaceCpu* new_workspace) - { - cpuImpl_ = new MatrixHandlerCpu(new_workspace); - isCpuEnabled_ = true; - isCudaEnabled_ = false; - } +namespace ReSolve +{ +// Create a shortcut name for Logger static class +using out = io::Logger; + +/** + * @brief Default constructor + * + * @post Instantiates CPU and CUDA matrix handlers, but does not + * create a workspace. + * + * @todo There is little utility for the default constructor. Rethink its purpose. + * Consider making it private method. + */ +MatrixHandler::MatrixHandler() +{ + new_matrix_ = true; + cpuImpl_ = new MatrixHandlerCpu(); +} + +/** + * @brief Destructor + * + */ +MatrixHandler::~MatrixHandler() +{ + delete cpuImpl_; + if (isCudaEnabled_) + delete cudaImpl_; + if (isHipEnabled_) + delete hipImpl_; +} + +/** + * @brief Constructor taking pointer to the workspace as its parameter. + * + * @note The CPU implementation currently does not require a workspace. + * The workspace pointer parameter is provided for forward compatibility. + */ +MatrixHandler::MatrixHandler(LinAlgWorkspaceCpu *new_workspace) +{ + cpuImpl_ = new MatrixHandlerCpu(new_workspace); + isCpuEnabled_ = true; + isCudaEnabled_ = false; +} #ifdef RESOLVE_USE_CUDA - /** - * @brief Constructor taking pointer to the CUDA workspace as its parameter. - * - * @post A CPU implementation instance is created because it is cheap and - * it does not require a workspace. - * - * @post A CUDA implementation instance is created with supplied workspace. - */ - MatrixHandler::MatrixHandler(LinAlgWorkspaceCUDA* new_workspace) - { - cpuImpl_ = new MatrixHandlerCpu(); - cudaImpl_ = new MatrixHandlerCuda(new_workspace); - isCpuEnabled_ = true; - isCudaEnabled_ = true; - } +/** + * @brief Constructor taking pointer to the CUDA workspace as its parameter. + * + * @post A CPU implementation instance is created because it is cheap and + * it does not require a workspace. + * + * @post A CUDA implementation instance is created with supplied workspace. + */ +MatrixHandler::MatrixHandler(LinAlgWorkspaceCUDA *new_workspace) +{ + cpuImpl_ = new MatrixHandlerCpu(); + cudaImpl_ = new MatrixHandlerCuda(new_workspace); + isCpuEnabled_ = true; + isCudaEnabled_ = true; +} #endif #ifdef RESOLVE_USE_HIP - /** - * @brief Constructor taking pointer to the CUDA workspace as its parameter. - * - * @post A CPU implementation instance is created because it is cheap and - * it does not require a workspace. - * - * @post A HIP implementation instance is created with supplied workspace. - */ - MatrixHandler::MatrixHandler(LinAlgWorkspaceHIP* new_workspace) - { - cpuImpl_ = new MatrixHandlerCpu(); - hipImpl_ = new MatrixHandlerHip(new_workspace); - isCpuEnabled_ = true; - isHipEnabled_ = true; - } +/** + * @brief Constructor taking pointer to the CUDA workspace as its parameter. + * + * @post A CPU implementation instance is created because it is cheap and + * it does not require a workspace. + * + * @post A HIP implementation instance is created with supplied workspace. + */ +MatrixHandler::MatrixHandler(LinAlgWorkspaceHIP *new_workspace) +{ + cpuImpl_ = new MatrixHandlerCpu(); + hipImpl_ = new MatrixHandlerHip(new_workspace); + isCpuEnabled_ = true; + isHipEnabled_ = true; +} #endif - void MatrixHandler::setValuesChanged(bool isValuesChanged, std::string memspace) - { - if (memspace == "cpu") { - cpuImpl_->setValuesChanged(isValuesChanged); - } else if (memspace == "cuda") { - cudaImpl_->setValuesChanged(isValuesChanged); - } else if (memspace == "hip") { - hipImpl_->setValuesChanged(isValuesChanged); - } else { - out::error() << "Unsupported device " << memspace << "\n"; - } +void MatrixHandler::setValuesChanged(bool isValuesChanged, std::string memspace) +{ + if (memspace == "cpu") { + cpuImpl_->setValuesChanged(isValuesChanged); + } else if (memspace == "cuda") { + cudaImpl_->setValuesChanged(isValuesChanged); + } else if (memspace == "hip") { + hipImpl_->setValuesChanged(isValuesChanged); + } else { + out::error() << "Unsupported device " << memspace << "\n"; } - - /** - * @brief Converts COO to CSR matrix format. - * - * Conversion takes place on CPU, and then CSR matrix is copied to `memspace`. - */ - int MatrixHandler::coo2csr(matrix::Coo* A_coo, matrix::Csr* A_csr, std::string memspace) - { - //count nnzs first - index_type nnz_unpacked = 0; - index_type nnz = A_coo->getNnz(); - index_type n = A_coo->getNumRows(); - bool symmetric = A_coo->symmetric(); - bool expanded = A_coo->expanded(); - - index_type* nnz_counts = new index_type[n]; - std::fill_n(nnz_counts, n, 0); - index_type* coo_rows = A_coo->getRowData(memory::HOST); - index_type* coo_cols = A_coo->getColData(memory::HOST); - real_type* coo_vals = A_coo->getValues( memory::HOST); - - index_type* diag_control = new index_type[n]; //for DEDUPLICATION of the diagonal - std::fill_n(diag_control, n, 0); - index_type nnz_unpacked_no_duplicates = 0; - index_type nnz_no_duplicates = nnz; - - - //maybe check if they exist? - for (index_type i = 0; i < nnz; ++i) - { - nnz_counts[coo_rows[i]]++; +} + +/** + * @brief Converts COO to CSR matrix format. + * + * Conversion takes place on CPU, and then CSR matrix is copied to `memspace`. + */ +int MatrixHandler::coo2csr(matrix::Coo *A_coo, matrix::Csr *A_csr, std::string memspace) +{ + // count nnzs first + index_type nnz_unpacked = 0; + index_type nnz = A_coo->getNnz(); + index_type n = A_coo->getNumRows(); + bool symmetric = A_coo->symmetric(); + bool expanded = A_coo->expanded(); + + index_type *nnz_counts = new index_type[n]; + std::fill_n(nnz_counts, n, 0); + index_type *coo_rows = A_coo->getRowData(memory::HOST); + index_type *coo_cols = A_coo->getColData(memory::HOST); + real_type *coo_vals = A_coo->getValues(memory::HOST); + + index_type *diag_control = new index_type[n]; // for DEDUPLICATION of the diagonal + std::fill_n(diag_control, n, 0); + index_type nnz_unpacked_no_duplicates = 0; + index_type nnz_no_duplicates = nnz; + + // maybe check if they exist? + for (index_type i = 0; i < nnz; ++i) { + nnz_counts[coo_rows[i]]++; + nnz_unpacked++; + nnz_unpacked_no_duplicates++; + if ((coo_rows[i] != coo_cols[i]) && (symmetric) && (!expanded)) { + nnz_counts[coo_cols[i]]++; nnz_unpacked++; nnz_unpacked_no_duplicates++; - if ((coo_rows[i] != coo_cols[i])&& (symmetric) && (!expanded)) - { - nnz_counts[coo_cols[i]]++; - nnz_unpacked++; - nnz_unpacked_no_duplicates++; - } - if (coo_rows[i] == coo_cols[i]){ - if (diag_control[coo_rows[i]] > 0) { - //duplicate - nnz_unpacked_no_duplicates--; - nnz_no_duplicates--; - } - diag_control[coo_rows[i]]++; + } + if (coo_rows[i] == coo_cols[i]) { + if (diag_control[coo_rows[i]] > 0) { + // duplicate + nnz_unpacked_no_duplicates--; + nnz_no_duplicates--; } + diag_control[coo_rows[i]]++; } - A_csr->setExpanded(true); - A_csr->setNnzExpanded(nnz_unpacked_no_duplicates); - index_type* csr_ia = new index_type[n+1]; - std::fill_n(csr_ia, n + 1, 0); - index_type* csr_ja = new index_type[nnz_unpacked]; - real_type* csr_a = new real_type[nnz_unpacked]; - index_type* nnz_shifts = new index_type[n]; - std::fill_n(nnz_shifts, n , 0); + } + A_csr->setExpanded(true); + A_csr->setNnzExpanded(nnz_unpacked_no_duplicates); + index_type *csr_ia = new index_type[n + 1]; + std::fill_n(csr_ia, n + 1, 0); + index_type *csr_ja = new index_type[nnz_unpacked]; + real_type *csr_a = new real_type[nnz_unpacked]; + index_type *nnz_shifts = new index_type[n]; + std::fill_n(nnz_shifts, n, 0); - IndexValuePair* tmp = new IndexValuePair[nnz_unpacked]; + IndexValuePair *tmp = new IndexValuePair[nnz_unpacked]; - csr_ia[0] = 0; + csr_ia[0] = 0; - for (index_type i = 1; i < n + 1; ++i){ - csr_ia[i] = csr_ia[i - 1] + nnz_counts[i - 1] - (diag_control[i-1] - 1); - } - - int r, start; + for (index_type i = 1; i < n + 1; ++i) { + csr_ia[i] = csr_ia[i - 1] + nnz_counts[i - 1] - (diag_control[i - 1] - 1); + } + int r, start; - for (index_type i = 0; i < nnz; ++i){ - //which row - r = coo_rows[i]; - start = csr_ia[r]; + for (index_type i = 0; i < nnz; ++i) { + // which row + r = coo_rows[i]; + start = csr_ia[r]; - if ((start + nnz_shifts[r]) > nnz_unpacked) { - out::warning() << "index out of bounds (case 1) start: " << start << "nnz_shifts[" << r << "] = " << nnz_shifts[r] << std::endl; + if ((start + nnz_shifts[r]) > nnz_unpacked) { + out::warning() << "index out of bounds (case 1) start: " << start << "nnz_shifts[" << r << "] = " << nnz_shifts[r] << std::endl; + } + if ((r == coo_cols[i]) && (diag_control[r] > 1)) { // diagonal, and there are duplicates + bool already_there = false; + for (index_type j = start; j < start + nnz_shifts[r]; ++j) { + index_type c = tmp[j].getIdx(); + if (c == r) { + real_type val = tmp[j].getValue(); + val += coo_vals[i]; + tmp[j].setValue(val); + already_there = true; + out::warning() << " duplicate found, row " << c << " adding in place " << j << " current value: " << val << std::endl; + } } - if ((r == coo_cols[i]) && (diag_control[r] > 1)) {//diagonal, and there are duplicates - bool already_there = false; - for (index_type j = start; j < start + nnz_shifts[r]; ++j) - { - index_type c = tmp[j].getIdx(); - if (c == r) { - real_type val = tmp[j].getValue(); - val += coo_vals[i]; - tmp[j].setValue(val); - already_there = true; - out::warning() << " duplicate found, row " << c << " adding in place " << j << " current value: " << val << std::endl; - } - } - if (!already_there){ // first time this duplicates appears + if (!already_there) { // first time this duplicates appears - tmp[start + nnz_shifts[r]].setIdx(coo_cols[i]); - tmp[start + nnz_shifts[r]].setValue(coo_vals[i]); - - nnz_shifts[r]++; - } - } else {//not diagonal tmp[start + nnz_shifts[r]].setIdx(coo_cols[i]); tmp[start + nnz_shifts[r]].setValue(coo_vals[i]); - nnz_shifts[r]++; - - if ((coo_rows[i] != coo_cols[i]) && (symmetric == 1)) - { - r = coo_cols[i]; - start = csr_ia[r]; - if ((start + nnz_shifts[r]) > nnz_unpacked) - out::warning() << "index out of bounds (case 2) start: " << start << "nnz_shifts[" << r << "] = " << nnz_shifts[r] << std::endl; - tmp[start + nnz_shifts[r]].setIdx(coo_rows[i]); - tmp[start + nnz_shifts[r]].setValue(coo_vals[i]); - nnz_shifts[r]++; - } + nnz_shifts[r]++; + } + } else { // not diagonal + tmp[start + nnz_shifts[r]].setIdx(coo_cols[i]); + tmp[start + nnz_shifts[r]].setValue(coo_vals[i]); + nnz_shifts[r]++; + + if ((coo_rows[i] != coo_cols[i]) && (symmetric == 1)) { + r = coo_cols[i]; + start = csr_ia[r]; + + if ((start + nnz_shifts[r]) > nnz_unpacked) + out::warning() << "index out of bounds (case 2) start: " << start << "nnz_shifts[" << r << "] = " << nnz_shifts[r] << std::endl; + tmp[start + nnz_shifts[r]].setIdx(coo_rows[i]); + tmp[start + nnz_shifts[r]].setValue(coo_vals[i]); + nnz_shifts[r]++; } } - //now sort whatever is inside rows + } + // now sort whatever is inside rows - for (int i = 0; i < n; ++i) - { + for (int i = 0; i < n; ++i) { - //now sorting (and adding 1) - int colStart = csr_ia[i]; - int colEnd = csr_ia[i + 1]; - int length = colEnd - colStart; - std::sort(&tmp[colStart],&tmp[colStart] + length); - } + // now sorting (and adding 1) + int colStart = csr_ia[i]; + int colEnd = csr_ia[i + 1]; + int length = colEnd - colStart; + std::sort(&tmp[colStart], &tmp[colStart] + length); + } - for (index_type i = 0; i < nnz_unpacked; ++i) - { - csr_ja[i] = tmp[i].getIdx(); - csr_a[i] = tmp[i].getValue(); - } + for (index_type i = 0; i < nnz_unpacked; ++i) { + csr_ja[i] = tmp[i].getIdx(); + csr_a[i] = tmp[i].getValue(); + } #if 0 for (int i = 0; isetNnz(nnz_no_duplicates); - if (memspace == "cpu"){ - A_csr->updateData(csr_ia, csr_ja, csr_a, memory::HOST, memory::HOST); - } else { - if (memspace == "cuda"){ - A_csr->updateData(csr_ia, csr_ja, csr_a, memory::HOST, memory::DEVICE); - } else if (memspace == "hip"){ - A_csr->updateData(csr_ia, csr_ja, csr_a, memory::HOST, memory::DEVICE); - } else { - //display error - } - } - delete [] nnz_counts; - delete [] tmp; - delete [] nnz_shifts; - delete [] csr_ia; - delete [] csr_ja; - delete [] csr_a; - delete [] diag_control; - - return 0; - } - - /** - * @brief Matrix vector product: result = alpha * A * x + beta * result - * - * @param[in] A - Sparse matrix - * @param[in] vec_x - Vector multiplied by the matrix - * @param[out] vec_result - Vector where the result is stored - * @param[in] alpha - scalar parameter - * @param[in] beta - scalar parameter - * @param[in] matrixFormat - Only CSR format is supported at this time - * @param[in] memspace - Device where the product is computed - * @return result := alpha * A * x + beta * result - */ - int MatrixHandler::matvec(matrix::Sparse* A, - vector_type* vec_x, - vector_type* vec_result, - const real_type* alpha, - const real_type* beta, - std::string matrixFormat, - std::string memspace) - { - if (memspace == "cuda" ) { - return cudaImpl_->matvec(A, vec_x, vec_result, alpha, beta, matrixFormat); - } else if (memspace == "cpu") { - return cpuImpl_->matvec(A, vec_x, vec_result, alpha, beta, matrixFormat); + A_csr->setNnz(nnz_no_duplicates); + if (memspace == "cpu") { + A_csr->updateData(csr_ia, csr_ja, csr_a, memory::HOST, memory::HOST); + } else { + if (memspace == "cuda") { + A_csr->updateData(csr_ia, csr_ja, csr_a, memory::HOST, memory::DEVICE); } else if (memspace == "hip") { - return hipImpl_->matvec(A, vec_x, vec_result, alpha, beta, matrixFormat); + A_csr->updateData(csr_ia, csr_ja, csr_a, memory::HOST, memory::DEVICE); } else { - out::error() << "Support for device " << memspace << " not implemented (yet)" << std::endl; - return 1; + // display error } } - - - int MatrixHandler::csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr, std::string memspace) - { - if (memspace == "cuda") { - return cudaImpl_->csc2csr(A_csc, A_csr); - } else if (memspace == "hip") { - return hipImpl_->csc2csr(A_csc, A_csr); - } else if (memspace == "cpu") { - out::warning() << "Using untested csc2csr on CPU ..." << std::endl; - return cpuImpl_->csc2csr(A_csc, A_csr); - } else { - out::error() << "csc2csr not implemented for " << memspace << " device." << std::endl; - return -1; - } + delete[] nnz_counts; + delete[] tmp; + delete[] nnz_shifts; + delete[] csr_ia; + delete[] csr_ja; + delete[] csr_a; + delete[] diag_control; + + return 0; +} + +/** + * @brief Matrix vector product: result = alpha * A * x + beta * result + * + * @param[in] A - Sparse matrix + * @param[in] vec_x - Vector multiplied by the matrix + * @param[out] vec_result - Vector where the result is stored + * @param[in] alpha - scalar parameter + * @param[in] beta - scalar parameter + * @param[in] matrixFormat - Only CSR format is supported at this time + * @param[in] memspace - Device where the product is computed + * @return result := alpha * A * x + beta * result + */ +int MatrixHandler::matvec(matrix::Sparse *A, vector_type *vec_x, vector_type *vec_result, const real_type *alpha, const real_type *beta, + std::string matrixFormat, std::string memspace) +{ + if (memspace == "cuda") { + return cudaImpl_->matvec(A, vec_x, vec_result, alpha, beta, matrixFormat); + } else if (memspace == "cpu") { + return cpuImpl_->matvec(A, vec_x, vec_result, alpha, beta, matrixFormat); + } else if (memspace == "hip") { + return hipImpl_->matvec(A, vec_x, vec_result, alpha, beta, matrixFormat); + } else { + out::error() << "Support for device " << memspace << " not implemented (yet)" << std::endl; + return 1; + } +} + +int MatrixHandler::csc2csr(matrix::Csc *A_csc, matrix::Csr *A_csr, std::string memspace) +{ + if (memspace == "cuda") { + return cudaImpl_->csc2csr(A_csc, A_csr); + } else if (memspace == "hip") { + return hipImpl_->csc2csr(A_csc, A_csr); + } else if (memspace == "cpu") { + out::warning() << "Using untested csc2csr on CPU ..." << std::endl; + return cpuImpl_->csc2csr(A_csc, A_csr); + } else { + out::error() << "csc2csr not implemented for " << memspace << " device." << std::endl; + return -1; } +} } // namespace ReSolve diff --git a/resolve/matrix/MatrixHandler.hpp b/resolve/matrix/MatrixHandler.hpp index cec610856..23109644c 100644 --- a/resolve/matrix/MatrixHandler.hpp +++ b/resolve/matrix/MatrixHandler.hpp @@ -2,82 +2,75 @@ #include #include - namespace ReSolve -{ - namespace vector - { - class Vector; - } - namespace matrix - { - class Sparse; - class Coo; - class Csc; - class Csr; - } - class LinAlgWorkspaceCpu; - class LinAlgWorkspaceCUDA; - class LinAlgWorkspaceHIP; - class MatrixHandlerImpl; +{ +namespace vector +{ +class Vector; } +namespace matrix +{ +class Sparse; +class Coo; +class Csc; +class Csr; +} // namespace matrix +class LinAlgWorkspaceCpu; +class LinAlgWorkspaceCUDA; +class LinAlgWorkspaceHIP; +class MatrixHandlerImpl; +} // namespace ReSolve +namespace ReSolve +{ -namespace ReSolve { +/** + * @brief this class encapsulates various matrix manipulation operations, + * commonly required by linear solvers. + * + * This includes: + * - Matrix format conversion: coo2csr, csr2csc + * - Matrix vector product (SpMV) + * - Matrix 1-norm + * + * The class uses pointer to implementation (PIMPL) idiom to create + * multiple matrix operation implementations running on CUDA and HIP devices + * as well as on CPU. + * + * @author Kasia Swirydowicz + * @author Slaven Peles + */ +class MatrixHandler +{ + using vector_type = vector::Vector; - /** - * @brief this class encapsulates various matrix manipulation operations, - * commonly required by linear solvers. - * - * This includes: - * - Matrix format conversion: coo2csr, csr2csc - * - Matrix vector product (SpMV) - * - Matrix 1-norm - * - * The class uses pointer to implementation (PIMPL) idiom to create - * multiple matrix operation implementations running on CUDA and HIP devices - * as well as on CPU. - * - * @author Kasia Swirydowicz - * @author Slaven Peles - */ - class MatrixHandler - { - using vector_type = vector::Vector; - - public: - MatrixHandler(); - MatrixHandler(LinAlgWorkspaceCpu* workspace); - MatrixHandler(LinAlgWorkspaceCUDA* workspace); - MatrixHandler(LinAlgWorkspaceHIP* workspace); - ~MatrixHandler(); + public: + MatrixHandler(); + MatrixHandler(LinAlgWorkspaceCpu *workspace); + MatrixHandler(LinAlgWorkspaceCUDA *workspace); + MatrixHandler(LinAlgWorkspaceHIP *workspace); + ~MatrixHandler(); - int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr, std::string memspace); - int coo2csr(matrix::Coo* A_coo, matrix::Csr* A_csr, std::string memspace); + int csc2csr(matrix::Csc *A_csc, matrix::Csr *A_csr, std::string memspace); + int coo2csr(matrix::Coo *A_coo, matrix::Csr *A_csr, std::string memspace); - /// Should compute vec_result := alpha*A*vec_x + beta*vec_result, but at least on cpu alpha and beta are flipped - int matvec(matrix::Sparse* A, - vector_type* vec_x, - vector_type* vec_result, - const real_type* alpha, - const real_type* beta, - std::string matrix_type, - std::string memspace); - int Matrix1Norm(matrix::Sparse *A, real_type* norm); - void setValuesChanged(bool toWhat, std::string memspace); - - private: - bool new_matrix_{true}; ///< if the structure changed, you need a new handler. + /// Should compute vec_result := alpha*A*vec_x + beta*vec_result, but at least on cpu alpha and beta are flipped + int matvec(matrix::Sparse *A, vector_type *vec_x, vector_type *vec_result, const real_type *alpha, const real_type *beta, std::string matrix_type, + std::string memspace); + int Matrix1Norm(matrix::Sparse *A, real_type *norm); + void setValuesChanged(bool toWhat, std::string memspace); - MemoryHandler mem_; ///< Device memory manager object - MatrixHandlerImpl* cpuImpl_{nullptr}; ///< Pointer to CPU implementation - MatrixHandlerImpl* cudaImpl_{nullptr}; ///< Pointer to CUDA implementation - MatrixHandlerImpl* hipImpl_{nullptr}; ///< Pointer to HIP implementation + private: + bool new_matrix_{true}; ///< if the structure changed, you need a new handler. - bool isCpuEnabled_{false}; ///< true if CPU implementation is instantiated - bool isCudaEnabled_{false}; ///< true if CUDA implementation is instantiated - bool isHipEnabled_{false}; ///< true if HIP implementation is instantiated - }; + MemoryHandler mem_; ///< Device memory manager object + MatrixHandlerImpl *cpuImpl_{nullptr}; ///< Pointer to CPU implementation + MatrixHandlerImpl *cudaImpl_{nullptr}; ///< Pointer to CUDA implementation + MatrixHandlerImpl *hipImpl_{nullptr}; ///< Pointer to HIP implementation -} // namespace ReSolve + bool isCpuEnabled_{false}; ///< true if CPU implementation is instantiated + bool isCudaEnabled_{false}; ///< true if CUDA implementation is instantiated + bool isHipEnabled_{false}; ///< true if HIP implementation is instantiated +}; +} // namespace ReSolve diff --git a/resolve/matrix/MatrixHandlerCpu.cpp b/resolve/matrix/MatrixHandlerCpu.cpp index d4799ffd6..4920c5208 100644 --- a/resolve/matrix/MatrixHandlerCpu.cpp +++ b/resolve/matrix/MatrixHandlerCpu.cpp @@ -1,168 +1,146 @@ #include #include -#include -#include +#include "MatrixHandlerCpu.hpp" #include #include #include -#include "MatrixHandlerCpu.hpp" - -namespace ReSolve { - // Create a shortcut name for Logger static class - using out = io::Logger; +#include +#include - MatrixHandlerCpu::MatrixHandlerCpu() - { +namespace ReSolve +{ +// Create a shortcut name for Logger static class +using out = io::Logger; + +MatrixHandlerCpu::MatrixHandlerCpu() {} + +MatrixHandlerCpu::~MatrixHandlerCpu() {} + +MatrixHandlerCpu::MatrixHandlerCpu(LinAlgWorkspaceCpu *new_workspace) { workspace_ = new_workspace; } + +void MatrixHandlerCpu::setValuesChanged(bool values_changed) { values_changed_ = values_changed; } + +/** + * @brief result := alpha * A * x + beta * result + */ +int MatrixHandlerCpu::matvec(matrix::Sparse *Ageneric, vector_type *vec_x, vector_type *vec_result, const real_type *alpha, const real_type *beta, + std::string matrixFormat) +{ + using namespace constants; + // int error_sum = 0; + if (matrixFormat == "csr") { + matrix::Csr *A = (matrix::Csr *)Ageneric; + index_type *ia = A->getRowData(memory::HOST); + index_type *ja = A->getColData(memory::HOST); + real_type *a = A->getValues(memory::HOST); + + real_type *x_data = vec_x->getData(memory::HOST); + real_type *result_data = vec_result->getData(memory::HOST); + real_type sum; + real_type y; + real_type t; + real_type c; + + // Kahan algorithm for stability; Kahan-Babushka version didnt make a difference + for (int i = 0; i < A->getNumRows(); ++i) { + sum = 0.0; + c = 0.0; + for (int j = ia[i]; j < ia[i + 1]; ++j) { + y = (a[j] * x_data[ja[j]]) - c; + t = sum + y; + c = (t - sum) - y; + sum = t; + // sum += ( a[j] * x_data[ja[j]]); + } + sum *= (*alpha); + result_data[i] = result_data[i] * (*beta) + sum; + } + vec_result->setDataUpdated(memory::HOST); + return 0; + } else { + out::error() << "MatVec not implemented (yet) for " << matrixFormat << " matrix format." << std::endl; + return 1; } - - MatrixHandlerCpu::~MatrixHandlerCpu() - { +} + +int MatrixHandlerCpu::Matrix1Norm(matrix::Sparse * /* A */, real_type * /* norm */) { return -1; } + +/** + * @brief Convert CSC to CSR matrix on the host + * + * @authors Slaven Peles , Daniel Reynolds (SMU), and + * David Gardner and Carol Woodward (LLNL) + */ +int MatrixHandlerCpu::csc2csr(matrix::Csc *A_csc, matrix::Csr *A_csr) +{ + // int error_sum = 0; TODO: Collect error output! + assert(A_csc->getNnz() == A_csr->getNnz()); + assert(A_csc->getNumRows() == A_csr->getNumColumns()); + assert(A_csr->getNumRows() == A_csc->getNumColumns()); + + index_type nnz = A_csc->getNnz(); + index_type n = A_csc->getNumColumns(); + + index_type *rowIdxCsc = A_csc->getRowData(memory::HOST); + index_type *colPtrCsc = A_csc->getColData(memory::HOST); + real_type *valuesCsc = A_csc->getValues(memory::HOST); + + index_type *rowPtrCsr = A_csr->getRowData(memory::HOST); + index_type *colIdxCsr = A_csr->getColData(memory::HOST); + real_type *valuesCsr = A_csr->getValues(memory::HOST); + + // Set all CSR row pointers to zero + for (index_type i = 0; i <= n; ++i) { + rowPtrCsr[i] = 0; } - MatrixHandlerCpu::MatrixHandlerCpu(LinAlgWorkspaceCpu* new_workspace) - { - workspace_ = new_workspace; + // Set all CSR values and column indices to zero + for (index_type i = 0; i < nnz; ++i) { + colIdxCsr[i] = 0; + valuesCsr[i] = 0.0; } - void MatrixHandlerCpu::setValuesChanged(bool values_changed) - { - values_changed_ = values_changed; + // Compute number of entries per row + for (index_type i = 0; i < nnz; ++i) { + rowPtrCsr[rowIdxCsc[i]]++; } + // Compute cumualtive sum of nnz per row + for (index_type row = 0, rowsum = 0; row < n; ++row) { + // Store value in row pointer to temp + index_type temp = rowPtrCsr[row]; - /** - * @brief result := alpha * A * x + beta * result - */ - int MatrixHandlerCpu::matvec(matrix::Sparse* Ageneric, - vector_type* vec_x, - vector_type* vec_result, - const real_type* alpha, - const real_type* beta, - std::string matrixFormat) - { - using namespace constants; - // int error_sum = 0; - if (matrixFormat == "csr") { - matrix::Csr* A = (matrix::Csr*) Ageneric; - index_type* ia = A->getRowData(memory::HOST); - index_type* ja = A->getColData(memory::HOST); - real_type* a = A->getValues( memory::HOST); - - real_type* x_data = vec_x->getData(memory::HOST); - real_type* result_data = vec_result->getData(memory::HOST); - real_type sum; - real_type y; - real_type t; - real_type c; - - //Kahan algorithm for stability; Kahan-Babushka version didnt make a difference - for (int i = 0; i < A->getNumRows(); ++i) { - sum = 0.0; - c = 0.0; - for (int j = ia[i]; j < ia[i+1]; ++j) { - y = ( a[j] * x_data[ja[j]]) - c; - t = sum + y; - c = (t - sum) - y; - sum = t; - // sum += ( a[j] * x_data[ja[j]]); - } - sum *= (*alpha); - result_data[i] = result_data[i]*(*beta) + sum; - } - vec_result->setDataUpdated(memory::HOST); - return 0; - } else { - out::error() << "MatVec not implemented (yet) for " - << matrixFormat << " matrix format." << std::endl; - return 1; - } - } + // Copy cumulative sum to the row pointer + rowPtrCsr[row] = rowsum; - int MatrixHandlerCpu::Matrix1Norm(matrix::Sparse* /* A */, real_type* /* norm */) - { - return -1; + // Update row sum + rowsum += temp; } + rowPtrCsr[n] = nnz; - /** - * @brief Convert CSC to CSR matrix on the host - * - * @authors Slaven Peles , Daniel Reynolds (SMU), and - * David Gardner and Carol Woodward (LLNL) - */ - int MatrixHandlerCpu::csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr) - { - // int error_sum = 0; TODO: Collect error output! - assert(A_csc->getNnz() == A_csr->getNnz()); - assert(A_csc->getNumRows() == A_csr->getNumColumns()); - assert(A_csr->getNumRows() == A_csc->getNumColumns()); - - index_type nnz = A_csc->getNnz(); - index_type n = A_csc->getNumColumns(); - - index_type* rowIdxCsc = A_csc->getRowData(memory::HOST); - index_type* colPtrCsc = A_csc->getColData(memory::HOST); - real_type* valuesCsc = A_csc->getValues( memory::HOST); - - index_type* rowPtrCsr = A_csr->getRowData(memory::HOST); - index_type* colIdxCsr = A_csr->getColData(memory::HOST); - real_type* valuesCsr = A_csr->getValues( memory::HOST); - - // Set all CSR row pointers to zero - for (index_type i = 0; i <= n; ++i) { - rowPtrCsr[i] = 0; - } + for (index_type col = 0; col < n; ++col) { + // Compute positions of column indices and values in CSR matrix and store them there + // Overwrites CSR row pointers in the process + for (index_type jj = colPtrCsc[col]; jj < colPtrCsc[col + 1]; jj++) { + index_type row = rowIdxCsc[jj]; + index_type dest = rowPtrCsr[row]; - // Set all CSR values and column indices to zero - for (index_type i = 0; i < nnz; ++i) { - colIdxCsr[i] = 0; - valuesCsr[i] = 0.0; - } + colIdxCsr[dest] = col; + valuesCsr[dest] = valuesCsc[jj]; - // Compute number of entries per row - for (index_type i = 0; i < nnz; ++i) { - rowPtrCsr[rowIdxCsc[i]]++; - } - - // Compute cumualtive sum of nnz per row - for (index_type row = 0, rowsum = 0; row < n; ++row) - { - // Store value in row pointer to temp - index_type temp = rowPtrCsr[row]; - - // Copy cumulative sum to the row pointer - rowPtrCsr[row] = rowsum; - - // Update row sum - rowsum += temp; - } - rowPtrCsr[n] = nnz; - - for (index_type col = 0; col < n; ++col) - { - // Compute positions of column indices and values in CSR matrix and store them there - // Overwrites CSR row pointers in the process - for (index_type jj = colPtrCsc[col]; jj < colPtrCsc[col+1]; jj++) - { - index_type row = rowIdxCsc[jj]; - index_type dest = rowPtrCsr[row]; - - colIdxCsr[dest] = col; - valuesCsr[dest] = valuesCsc[jj]; - - rowPtrCsr[row]++; - } - } - - // Restore CSR row pointer values - for (index_type row = 0, last = 0; row <= n; row++) - { - index_type temp = rowPtrCsr[row]; - rowPtrCsr[row] = last; - last = temp; + rowPtrCsr[row]++; } + } - return 0; + // Restore CSR row pointer values + for (index_type row = 0, last = 0; row <= n; row++) { + index_type temp = rowPtrCsr[row]; + rowPtrCsr[row] = last; + last = temp; } + return 0; +} + } // namespace ReSolve diff --git a/resolve/matrix/MatrixHandlerCpu.hpp b/resolve/matrix/MatrixHandlerCpu.hpp index b6e660668..46ab68b84 100644 --- a/resolve/matrix/MatrixHandlerCpu.hpp +++ b/resolve/matrix/MatrixHandlerCpu.hpp @@ -4,54 +4,49 @@ #include namespace ReSolve -{ - namespace vector - { - class Vector; - } - namespace matrix - { - class Sparse; - class Coo; - class Csc; - class Csr; - } - class LinAlgWorkspaceCpu; +{ +namespace vector +{ +class Vector; } +namespace matrix +{ +class Sparse; +class Coo; +class Csc; +class Csr; +} // namespace matrix +class LinAlgWorkspaceCpu; +} // namespace ReSolve - -namespace ReSolve { - /** - * @class MatrixHandlerCpu - * - * @brief CPU implementation of the matrix handler. - */ - class MatrixHandlerCpu : public MatrixHandlerImpl - { - using vector_type = vector::Vector; - - public: - MatrixHandlerCpu(); - MatrixHandlerCpu(LinAlgWorkspaceCpu* workspace); - virtual ~MatrixHandlerCpu(); - - int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr); - - virtual int matvec(matrix::Sparse* A, - vector_type* vec_x, - vector_type* vec_result, - const real_type* alpha, - const real_type* beta, - std::string matrix_type); - virtual int Matrix1Norm(matrix::Sparse *A, real_type* norm); - void setValuesChanged(bool isValuesChanged); - - private: - LinAlgWorkspaceCpu* workspace_{nullptr}; - bool values_changed_{true}; ///< needed for matvec - - // MemoryHandler mem_; ///< Device memory manager object not used for now - }; +namespace ReSolve +{ +/** + * @class MatrixHandlerCpu + * + * @brief CPU implementation of the matrix handler. + */ +class MatrixHandlerCpu : public MatrixHandlerImpl +{ + using vector_type = vector::Vector; + + public: + MatrixHandlerCpu(); + MatrixHandlerCpu(LinAlgWorkspaceCpu *workspace); + virtual ~MatrixHandlerCpu(); + + int csc2csr(matrix::Csc *A_csc, matrix::Csr *A_csr); + + virtual int matvec(matrix::Sparse *A, vector_type *vec_x, vector_type *vec_result, const real_type *alpha, const real_type *beta, + std::string matrix_type); + virtual int Matrix1Norm(matrix::Sparse *A, real_type *norm); + void setValuesChanged(bool isValuesChanged); + + private: + LinAlgWorkspaceCpu *workspace_{nullptr}; + bool values_changed_{true}; ///< needed for matvec + + // MemoryHandler mem_; ///< Device memory manager object not used for now +}; } // namespace ReSolve - diff --git a/resolve/matrix/MatrixHandlerCuda.cpp b/resolve/matrix/MatrixHandlerCuda.cpp index e0ac7bb4e..47b0bdae9 100644 --- a/resolve/matrix/MatrixHandlerCuda.cpp +++ b/resolve/matrix/MatrixHandlerCuda.cpp @@ -1,173 +1,109 @@ #include -#include -#include +#include "MatrixHandlerCuda.hpp" #include #include #include +#include +#include #include -#include "MatrixHandlerCuda.hpp" - -namespace ReSolve { - // Create a shortcut name for Logger static class - using out = io::Logger; - - MatrixHandlerCuda::~MatrixHandlerCuda() - { - } - - MatrixHandlerCuda::MatrixHandlerCuda(LinAlgWorkspaceCUDA* new_workspace) - { - workspace_ = new_workspace; - } - - void MatrixHandlerCuda::setValuesChanged(bool values_changed) - { - values_changed_ = values_changed; - } +namespace ReSolve +{ +// Create a shortcut name for Logger static class +using out = io::Logger; + +MatrixHandlerCuda::~MatrixHandlerCuda() {} + +MatrixHandlerCuda::MatrixHandlerCuda(LinAlgWorkspaceCUDA *new_workspace) { workspace_ = new_workspace; } + +void MatrixHandlerCuda::setValuesChanged(bool values_changed) { values_changed_ = values_changed; } + +int MatrixHandlerCuda::matvec(matrix::Sparse *Ageneric, vector_type *vec_x, vector_type *vec_result, const real_type *alpha, const real_type *beta, + std::string matrixFormat) +{ + using namespace constants; + int error_sum = 0; + if (matrixFormat == "csr") { + matrix::Csr *A = dynamic_cast(Ageneric); + // result = alpha *A*x + beta * result + cusparseStatus_t status; + LinAlgWorkspaceCUDA *workspaceCUDA = workspace_; + cusparseDnVecDescr_t vecx = workspaceCUDA->getVecX(); + cusparseCreateDnVec(&vecx, A->getNumRows(), vec_x->getData(memory::DEVICE), CUDA_R_64F); + + cusparseDnVecDescr_t vecAx = workspaceCUDA->getVecY(); + cusparseCreateDnVec(&vecAx, A->getNumRows(), vec_result->getData(memory::DEVICE), CUDA_R_64F); + + cusparseSpMatDescr_t matA = workspaceCUDA->getSpmvMatrixDescriptor(); + + void *buffer_spmv = workspaceCUDA->getSpmvBuffer(); + cusparseHandle_t handle_cusparse = workspaceCUDA->getCusparseHandle(); + if (values_changed_) { + status = cusparseCreateCsr(&matA, A->getNumRows(), A->getNumColumns(), A->getNnzExpanded(), A->getRowData(memory::DEVICE), + A->getColData(memory::DEVICE), A->getValues(memory::DEVICE), CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F); + error_sum += status; + values_changed_ = false; + } + if (!workspaceCUDA->matvecSetup()) { + // setup first, allocate, etc. + size_t bufferSize = 0; - int MatrixHandlerCuda::matvec(matrix::Sparse* Ageneric, - vector_type* vec_x, - vector_type* vec_result, - const real_type* alpha, - const real_type* beta, - std::string matrixFormat) - { - using namespace constants; - int error_sum = 0; - if (matrixFormat == "csr") { - matrix::Csr* A = dynamic_cast(Ageneric); - //result = alpha *A*x + beta * result - cusparseStatus_t status; - LinAlgWorkspaceCUDA* workspaceCUDA = workspace_; - cusparseDnVecDescr_t vecx = workspaceCUDA->getVecX(); - cusparseCreateDnVec(&vecx, A->getNumRows(), vec_x->getData(memory::DEVICE), CUDA_R_64F); - - - cusparseDnVecDescr_t vecAx = workspaceCUDA->getVecY(); - cusparseCreateDnVec(&vecAx, A->getNumRows(), vec_result->getData(memory::DEVICE), CUDA_R_64F); - - cusparseSpMatDescr_t matA = workspaceCUDA->getSpmvMatrixDescriptor(); - - void* buffer_spmv = workspaceCUDA->getSpmvBuffer(); - cusparseHandle_t handle_cusparse = workspaceCUDA->getCusparseHandle(); - if (values_changed_) { - status = cusparseCreateCsr(&matA, - A->getNumRows(), - A->getNumColumns(), - A->getNnzExpanded(), - A->getRowData(memory::DEVICE), - A->getColData(memory::DEVICE), - A->getValues( memory::DEVICE), - CUSPARSE_INDEX_32I, - CUSPARSE_INDEX_32I, - CUSPARSE_INDEX_BASE_ZERO, - CUDA_R_64F); - error_sum += status; - values_changed_ = false; - } - if (!workspaceCUDA->matvecSetup()) { - //setup first, allocate, etc. - size_t bufferSize = 0; - - status = cusparseSpMV_bufferSize(handle_cusparse, - CUSPARSE_OPERATION_NON_TRANSPOSE, - &MINUSONE, - matA, - vecx, - &ONE, - vecAx, - CUDA_R_64F, - CUSPARSE_SPMV_CSR_ALG2, - &bufferSize); - error_sum += status; - mem_.deviceSynchronize(); - mem_.allocateBufferOnDevice(&buffer_spmv, bufferSize); - workspaceCUDA->setSpmvMatrixDescriptor(matA); - workspaceCUDA->setSpmvBuffer(buffer_spmv); - - workspaceCUDA->matvecSetupDone(); - } - - status = cusparseSpMV(handle_cusparse, - CUSPARSE_OPERATION_NON_TRANSPOSE, - alpha, - matA, - vecx, - beta, - vecAx, - CUDA_R_64F, - CUSPARSE_SPMV_CSR_ALG2, - buffer_spmv); + status = cusparseSpMV_bufferSize(handle_cusparse, CUSPARSE_OPERATION_NON_TRANSPOSE, &MINUSONE, matA, vecx, &ONE, vecAx, CUDA_R_64F, + CUSPARSE_SPMV_CSR_ALG2, &bufferSize); error_sum += status; mem_.deviceSynchronize(); - if (status) - out::error() << "Matvec status: " << status - << "Last error code: " << mem_.getLastDeviceError() << std::endl; - vec_result->setDataUpdated(memory::DEVICE); - - cusparseDestroyDnVec(vecx); - cusparseDestroyDnVec(vecAx); - return error_sum; - } else { - out::error() << "MatVec not implemented (yet) for " - << matrixFormat << " matrix format." << std::endl; - return 1; - } - } + mem_.allocateBufferOnDevice(&buffer_spmv, bufferSize); + workspaceCUDA->setSpmvMatrixDescriptor(matA); + workspaceCUDA->setSpmvBuffer(buffer_spmv); - int MatrixHandlerCuda::Matrix1Norm(matrix::Sparse* /* A */, real_type* /* norm */) - { - return -1; - } + workspaceCUDA->matvecSetupDone(); + } - int MatrixHandlerCuda::csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr) - { - index_type error_sum = 0; - LinAlgWorkspaceCUDA* workspaceCUDA = (LinAlgWorkspaceCUDA*) workspace_; - - A_csr->allocateMatrixData(memory::DEVICE); - index_type n = A_csc->getNumRows(); - index_type m = A_csc->getNumRows(); - index_type nnz = A_csc->getNnz(); - size_t bufferSize; - void* d_work; - cusparseStatus_t status = cusparseCsr2cscEx2_bufferSize(workspaceCUDA->getCusparseHandle(), - n, - m, - nnz, - A_csc->getValues( memory::DEVICE), - A_csc->getColData(memory::DEVICE), - A_csc->getRowData(memory::DEVICE), - A_csr->getValues( memory::DEVICE), - A_csr->getRowData(memory::DEVICE), - A_csr->getColData(memory::DEVICE), - CUDA_R_64F, - CUSPARSE_ACTION_NUMERIC, - CUSPARSE_INDEX_BASE_ZERO, - CUSPARSE_CSR2CSC_ALG1, - &bufferSize); - error_sum += status; - mem_.allocateBufferOnDevice(&d_work, bufferSize); - status = cusparseCsr2cscEx2(workspaceCUDA->getCusparseHandle(), - n, - m, - nnz, - A_csc->getValues( memory::DEVICE), - A_csc->getColData(memory::DEVICE), - A_csc->getRowData(memory::DEVICE), - A_csr->getValues( memory::DEVICE), - A_csr->getRowData(memory::DEVICE), - A_csr->getColData(memory::DEVICE), - CUDA_R_64F, - CUSPARSE_ACTION_NUMERIC, - CUSPARSE_INDEX_BASE_ZERO, - CUSPARSE_CSR2CSC_ALG1, - d_work); + status = cusparseSpMV(handle_cusparse, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha, matA, vecx, beta, vecAx, CUDA_R_64F, CUSPARSE_SPMV_CSR_ALG2, + buffer_spmv); error_sum += status; + mem_.deviceSynchronize(); + if (status) + out::error() << "Matvec status: " << status << "Last error code: " << mem_.getLastDeviceError() << std::endl; + vec_result->setDataUpdated(memory::DEVICE); + + cusparseDestroyDnVec(vecx); + cusparseDestroyDnVec(vecAx); return error_sum; - mem_.deleteOnDevice(d_work); + } else { + out::error() << "MatVec not implemented (yet) for " << matrixFormat << " matrix format." << std::endl; + return 1; } +} + +int MatrixHandlerCuda::Matrix1Norm(matrix::Sparse * /* A */, real_type * /* norm */) { return -1; } + +int MatrixHandlerCuda::csc2csr(matrix::Csc *A_csc, matrix::Csr *A_csr) +{ + index_type error_sum = 0; + LinAlgWorkspaceCUDA *workspaceCUDA = (LinAlgWorkspaceCUDA *)workspace_; + + A_csr->allocateMatrixData(memory::DEVICE); + index_type n = A_csc->getNumRows(); + index_type m = A_csc->getNumRows(); + index_type nnz = A_csc->getNnz(); + size_t bufferSize; + void *d_work; + cusparseStatus_t status = cusparseCsr2cscEx2_bufferSize( + workspaceCUDA->getCusparseHandle(), n, m, nnz, A_csc->getValues(memory::DEVICE), A_csc->getColData(memory::DEVICE), + A_csc->getRowData(memory::DEVICE), A_csr->getValues(memory::DEVICE), A_csr->getRowData(memory::DEVICE), A_csr->getColData(memory::DEVICE), + CUDA_R_64F, CUSPARSE_ACTION_NUMERIC, CUSPARSE_INDEX_BASE_ZERO, CUSPARSE_CSR2CSC_ALG1, &bufferSize); + error_sum += status; + mem_.allocateBufferOnDevice(&d_work, bufferSize); + status = cusparseCsr2cscEx2(workspaceCUDA->getCusparseHandle(), n, m, nnz, A_csc->getValues(memory::DEVICE), A_csc->getColData(memory::DEVICE), + A_csc->getRowData(memory::DEVICE), A_csr->getValues(memory::DEVICE), A_csr->getRowData(memory::DEVICE), + A_csr->getColData(memory::DEVICE), CUDA_R_64F, CUSPARSE_ACTION_NUMERIC, CUSPARSE_INDEX_BASE_ZERO, CUSPARSE_CSR2CSC_ALG1, + d_work); + error_sum += status; + return error_sum; + mem_.deleteOnDevice(d_work); +} } // namespace ReSolve diff --git a/resolve/matrix/MatrixHandlerCuda.hpp b/resolve/matrix/MatrixHandlerCuda.hpp index efd4c3566..6cac23184 100644 --- a/resolve/matrix/MatrixHandlerCuda.hpp +++ b/resolve/matrix/MatrixHandlerCuda.hpp @@ -4,52 +4,47 @@ #include namespace ReSolve -{ - namespace vector - { - class Vector; - } - namespace matrix - { - class Sparse; - class Coo; - class Csc; - class Csr; - } - class LinAlgWorkspaceCUDA; +{ +namespace vector +{ +class Vector; } +namespace matrix +{ +class Sparse; +class Coo; +class Csc; +class Csr; +} // namespace matrix +class LinAlgWorkspaceCUDA; +} // namespace ReSolve + +namespace ReSolve +{ +/** + * @class MatrixHandlerCuda + * + * @brief CUDA implementation of the matrix handler. + */ +class MatrixHandlerCuda : public MatrixHandlerImpl +{ + using vector_type = vector::Vector; + public: + MatrixHandlerCuda(LinAlgWorkspaceCUDA *workspace); + virtual ~MatrixHandlerCuda(); -namespace ReSolve { - /** - * @class MatrixHandlerCuda - * - * @brief CUDA implementation of the matrix handler. - */ - class MatrixHandlerCuda : public MatrixHandlerImpl - { - using vector_type = vector::Vector; - - public: - MatrixHandlerCuda(LinAlgWorkspaceCUDA* workspace); - virtual ~MatrixHandlerCuda(); + int csc2csr(matrix::Csc *A_csc, matrix::Csr *A_csr); + virtual int matvec(matrix::Sparse *A, vector_type *vec_x, vector_type *vec_result, const real_type *alpha, const real_type *beta, + std::string matrix_type); + virtual int Matrix1Norm(matrix::Sparse *A, real_type *norm); + void setValuesChanged(bool isValuesChanged); - int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr); - virtual int matvec(matrix::Sparse* A, - vector_type* vec_x, - vector_type* vec_result, - const real_type* alpha, - const real_type* beta, - std::string matrix_type); - virtual int Matrix1Norm(matrix::Sparse *A, real_type* norm); - void setValuesChanged(bool isValuesChanged); - - private: - LinAlgWorkspaceCUDA* workspace_{nullptr}; - bool values_changed_{true}; ///< needed for matvec + private: + LinAlgWorkspaceCUDA *workspace_{nullptr}; + bool values_changed_{true}; ///< needed for matvec - MemoryHandler mem_; ///< Device memory manager object - }; + MemoryHandler mem_; ///< Device memory manager object +}; } // namespace ReSolve - diff --git a/resolve/matrix/MatrixHandlerHip.cpp b/resolve/matrix/MatrixHandlerHip.cpp index ff10e9731..6c6af9f77 100644 --- a/resolve/matrix/MatrixHandlerHip.cpp +++ b/resolve/matrix/MatrixHandlerHip.cpp @@ -1,155 +1,105 @@ #include -#include -#include +#include "MatrixHandlerHip.hpp" #include #include #include +#include +#include #include -#include "MatrixHandlerHip.hpp" -namespace ReSolve { - // Create a shortcut name for Logger static class - using out = io::Logger; +namespace ReSolve +{ +// Create a shortcut name for Logger static class +using out = io::Logger; - MatrixHandlerHip::~MatrixHandlerHip() - { - } +MatrixHandlerHip::~MatrixHandlerHip() {} - MatrixHandlerHip::MatrixHandlerHip(LinAlgWorkspaceHIP* new_workspace) - { - workspace_ = new_workspace; - } +MatrixHandlerHip::MatrixHandlerHip(LinAlgWorkspaceHIP *new_workspace) { workspace_ = new_workspace; } - void MatrixHandlerHip::setValuesChanged(bool values_changed) - { - values_changed_ = values_changed; - } +void MatrixHandlerHip::setValuesChanged(bool values_changed) { values_changed_ = values_changed; } + +int MatrixHandlerHip::matvec(matrix::Sparse *Ageneric, vector_type *vec_x, vector_type *vec_result, const real_type *alpha, const real_type *beta, + std::string matrixFormat) +{ + using namespace constants; + int error_sum = 0; + if (matrixFormat == "csr") { + matrix::Csr *A = dynamic_cast(Ageneric); + // result = alpha *A*x + beta * result + rocsparse_status status; + LinAlgWorkspaceHIP *workspaceHIP = workspace_; + rocsparse_handle handle_rocsparse = workspaceHIP->getRocsparseHandle(); - int MatrixHandlerHip::matvec(matrix::Sparse* Ageneric, - vector_type* vec_x, - vector_type* vec_result, - const real_type* alpha, - const real_type* beta, - std::string matrixFormat) - { - using namespace constants; - int error_sum = 0; - if (matrixFormat == "csr") { - matrix::Csr* A = dynamic_cast(Ageneric); - //result = alpha *A*x + beta * result - rocsparse_status status; - LinAlgWorkspaceHIP* workspaceHIP = workspace_; - - rocsparse_handle handle_rocsparse = workspaceHIP->getRocsparseHandle(); - - rocsparse_mat_info infoA = workspaceHIP->getSpmvMatrixInfo(); - rocsparse_mat_descr descrA = workspaceHIP->getSpmvMatrixDescriptor(); - - if (!workspaceHIP->matvecSetup()) { - //setup first, allocate, etc. - rocsparse_create_mat_descr(&(descrA)); - rocsparse_set_mat_index_base(descrA, rocsparse_index_base_zero); - rocsparse_set_mat_type(descrA, rocsparse_matrix_type_general); - - rocsparse_create_mat_info(&infoA); - - status = rocsparse_dcsrmv_analysis(handle_rocsparse, - rocsparse_operation_none, - A->getNumRows(), - A->getNumColumns(), - A->getNnzExpanded(), - descrA, - A->getValues( memory::DEVICE), - A->getRowData(memory::DEVICE), - A->getColData(memory::DEVICE), // cuda is used as "device" - infoA); - error_sum += status; - mem_.deviceSynchronize(); - - workspaceHIP->setSpmvMatrixDescriptor(descrA); - workspaceHIP->setSpmvMatrixInfo(infoA); - workspaceHIP->matvecSetupDone(); - } - - status = rocsparse_dcsrmv(handle_rocsparse, - rocsparse_operation_none, - A->getNumRows(), - A->getNumColumns(), - A->getNnzExpanded(), - alpha, - descrA, - A->getValues( memory::DEVICE), - A->getRowData(memory::DEVICE), - A->getColData(memory::DEVICE), - infoA, - vec_x->getData(memory::DEVICE), - beta, - vec_result->getData(memory::DEVICE)); + rocsparse_mat_info infoA = workspaceHIP->getSpmvMatrixInfo(); + rocsparse_mat_descr descrA = workspaceHIP->getSpmvMatrixDescriptor(); + if (!workspaceHIP->matvecSetup()) { + // setup first, allocate, etc. + rocsparse_create_mat_descr(&(descrA)); + rocsparse_set_mat_index_base(descrA, rocsparse_index_base_zero); + rocsparse_set_mat_type(descrA, rocsparse_matrix_type_general); + + rocsparse_create_mat_info(&infoA); + + status = rocsparse_dcsrmv_analysis(handle_rocsparse, rocsparse_operation_none, A->getNumRows(), A->getNumColumns(), A->getNnzExpanded(), descrA, + A->getValues(memory::DEVICE), A->getRowData(memory::DEVICE), + A->getColData(memory::DEVICE), // cuda is used as "device" + infoA); error_sum += status; mem_.deviceSynchronize(); - if (status) - out::error() << "Matvec status: " << status - << "Last error code: " << mem_.getLastDeviceError() << std::endl; - vec_result->setDataUpdated(memory::DEVICE); - - return error_sum; - } else { - out::error() << "MatVec not implemented (yet) for " - << matrixFormat << " matrix format." << std::endl; - return 1; - } - } - int MatrixHandlerHip::Matrix1Norm(matrix::Sparse* /* A */, real_type* /* norm */) - { - return -1; - } + workspaceHIP->setSpmvMatrixDescriptor(descrA); + workspaceHIP->setSpmvMatrixInfo(infoA); + workspaceHIP->matvecSetupDone(); + } - int MatrixHandlerHip::csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr) - { - index_type error_sum = 0; - LinAlgWorkspaceHIP* workspaceHIP = (LinAlgWorkspaceHIP*) workspace_; + status = rocsparse_dcsrmv(handle_rocsparse, rocsparse_operation_none, A->getNumRows(), A->getNumColumns(), A->getNnzExpanded(), alpha, descrA, + A->getValues(memory::DEVICE), A->getRowData(memory::DEVICE), A->getColData(memory::DEVICE), infoA, + vec_x->getData(memory::DEVICE), beta, vec_result->getData(memory::DEVICE)); - rocsparse_status status; - - A_csr->allocateMatrixData(memory::DEVICE); - index_type n = A_csc->getNumRows(); - index_type m = A_csc->getNumRows(); - index_type nnz = A_csc->getNnz(); - size_t bufferSize; - void* d_work; - - status = rocsparse_csr2csc_buffer_size(workspaceHIP->getRocsparseHandle(), - n, - m, - nnz, - A_csc->getColData(memory::DEVICE), - A_csc->getRowData(memory::DEVICE), - rocsparse_action_numeric, - &bufferSize); - - error_sum += status; - mem_.allocateBufferOnDevice(&d_work, bufferSize); - - status = rocsparse_dcsr2csc(workspaceHIP->getRocsparseHandle(), - n, - m, - nnz, - A_csc->getValues( memory::DEVICE), - A_csc->getColData(memory::DEVICE), - A_csc->getRowData(memory::DEVICE), - A_csr->getValues( memory::DEVICE), - A_csr->getRowData(memory::DEVICE), - A_csr->getColData(memory::DEVICE), - rocsparse_action_numeric, - rocsparse_index_base_zero, - d_work); error_sum += status; + mem_.deviceSynchronize(); + if (status) + out::error() << "Matvec status: " << status << "Last error code: " << mem_.getLastDeviceError() << std::endl; + vec_result->setDataUpdated(memory::DEVICE); + return error_sum; - mem_.deleteOnDevice(d_work); + } else { + out::error() << "MatVec not implemented (yet) for " << matrixFormat << " matrix format." << std::endl; + return 1; } +} + +int MatrixHandlerHip::Matrix1Norm(matrix::Sparse * /* A */, real_type * /* norm */) { return -1; } + +int MatrixHandlerHip::csc2csr(matrix::Csc *A_csc, matrix::Csr *A_csr) +{ + index_type error_sum = 0; + LinAlgWorkspaceHIP *workspaceHIP = (LinAlgWorkspaceHIP *)workspace_; + + rocsparse_status status; + + A_csr->allocateMatrixData(memory::DEVICE); + index_type n = A_csc->getNumRows(); + index_type m = A_csc->getNumRows(); + index_type nnz = A_csc->getNnz(); + size_t bufferSize; + void *d_work; + + status = rocsparse_csr2csc_buffer_size(workspaceHIP->getRocsparseHandle(), n, m, nnz, A_csc->getColData(memory::DEVICE), + A_csc->getRowData(memory::DEVICE), rocsparse_action_numeric, &bufferSize); + + error_sum += status; + mem_.allocateBufferOnDevice(&d_work, bufferSize); + + status = rocsparse_dcsr2csc(workspaceHIP->getRocsparseHandle(), n, m, nnz, A_csc->getValues(memory::DEVICE), A_csc->getColData(memory::DEVICE), + A_csc->getRowData(memory::DEVICE), A_csr->getValues(memory::DEVICE), A_csr->getRowData(memory::DEVICE), + A_csr->getColData(memory::DEVICE), rocsparse_action_numeric, rocsparse_index_base_zero, d_work); + error_sum += status; + return error_sum; + mem_.deleteOnDevice(d_work); +} } // namespace ReSolve diff --git a/resolve/matrix/MatrixHandlerHip.hpp b/resolve/matrix/MatrixHandlerHip.hpp index 37f11a7b2..5c7424a65 100644 --- a/resolve/matrix/MatrixHandlerHip.hpp +++ b/resolve/matrix/MatrixHandlerHip.hpp @@ -4,57 +4,50 @@ #include namespace ReSolve -{ - namespace vector - { - class Vector; - } - namespace matrix - { - class Sparse; - class Coo; - class Csc; - class Csr; - } - class LinAlgWorkspaceHIP; +{ +namespace vector +{ +class Vector; } +namespace matrix +{ +class Sparse; +class Coo; +class Csc; +class Csr; +} // namespace matrix +class LinAlgWorkspaceHIP; +} // namespace ReSolve +namespace ReSolve +{ +/** + * @class MatrixHandlerHip + * + * @brief HIP implementation of the matrix handler. + */ +class MatrixHandlerHip : public MatrixHandlerImpl +{ + using vector_type = vector::Vector; -namespace ReSolve { - /** - * @class MatrixHandlerHip - * - * @brief HIP implementation of the matrix handler. - */ - class MatrixHandlerHip : public MatrixHandlerImpl - { - using vector_type = vector::Vector; - - public: - - MatrixHandlerHip(LinAlgWorkspaceHIP* workspace); - virtual ~MatrixHandlerHip(); - - int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr); - - virtual int matvec(matrix::Sparse* A, - vector_type* vec_x, - vector_type* vec_result, - const real_type* alpha, - const real_type* beta, - std::string matrix_type); - - virtual int Matrix1Norm(matrix::Sparse *A, real_type* norm); - - void setValuesChanged(bool isValuesChanged); - - private: - - LinAlgWorkspaceHIP* workspace_{nullptr}; - bool values_changed_{true}; ///< needed for matvec - - MemoryHandler mem_; ///< Device memory manager object - }; + public: + MatrixHandlerHip(LinAlgWorkspaceHIP *workspace); + virtual ~MatrixHandlerHip(); -} // namespace ReSolve + int csc2csr(matrix::Csc *A_csc, matrix::Csr *A_csr); + + virtual int matvec(matrix::Sparse *A, vector_type *vec_x, vector_type *vec_result, const real_type *alpha, const real_type *beta, + std::string matrix_type); + + virtual int Matrix1Norm(matrix::Sparse *A, real_type *norm); + void setValuesChanged(bool isValuesChanged); + + private: + LinAlgWorkspaceHIP *workspace_{nullptr}; + bool values_changed_{true}; ///< needed for matvec + + MemoryHandler mem_; ///< Device memory manager object +}; + +} // namespace ReSolve diff --git a/resolve/matrix/MatrixHandlerImpl.hpp b/resolve/matrix/MatrixHandlerImpl.hpp index 2bef6b3d7..d374160be 100644 --- a/resolve/matrix/MatrixHandlerImpl.hpp +++ b/resolve/matrix/MatrixHandlerImpl.hpp @@ -3,49 +3,42 @@ #include namespace ReSolve -{ - namespace vector - { - class Vector; - } - namespace matrix - { - class Sparse; - class Coo; - class Csc; - class Csr; - } +{ +namespace vector +{ +class Vector; } +namespace matrix +{ +class Sparse; +class Coo; +class Csc; +class Csr; +} // namespace matrix +} // namespace ReSolve +namespace ReSolve +{ +/** + * @class MatrixHandlerImpl + * + * @brief Base class for different matrix handler implementations. + */ +class MatrixHandlerImpl +{ + using vector_type = vector::Vector; -namespace ReSolve { - /** - * @class MatrixHandlerImpl - * - * @brief Base class for different matrix handler implementations. - */ - class MatrixHandlerImpl - { - using vector_type = vector::Vector; - - public: - MatrixHandlerImpl() - {} - virtual ~MatrixHandlerImpl() - {} + public: + MatrixHandlerImpl() {} + virtual ~MatrixHandlerImpl() {} - virtual int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr) = 0; + virtual int csc2csr(matrix::Csc *A_csc, matrix::Csr *A_csr) = 0; - virtual int matvec(matrix::Sparse* A, - vector_type* vec_x, - vector_type* vec_result, - const real_type* alpha, - const real_type* beta, - std::string matrix_type) = 0; - virtual int Matrix1Norm(matrix::Sparse* A, real_type* norm) = 0; + virtual int matvec(matrix::Sparse *A, vector_type *vec_x, vector_type *vec_result, const real_type *alpha, const real_type *beta, + std::string matrix_type) = 0; + virtual int Matrix1Norm(matrix::Sparse *A, real_type *norm) = 0; - virtual void setValuesChanged(bool isValuesChanged) = 0; - }; + virtual void setValuesChanged(bool isValuesChanged) = 0; +}; } // namespace ReSolve - diff --git a/resolve/matrix/Sparse.cpp b/resolve/matrix/Sparse.cpp index 55aea9d6b..f3d287863 100644 --- a/resolve/matrix/Sparse.cpp +++ b/resolve/matrix/Sparse.cpp @@ -1,277 +1,245 @@ -#include // <-- includes memcpy +#include // <-- includes memcpy #include "Sparse.hpp" -namespace ReSolve { namespace matrix { +namespace ReSolve +{ +namespace matrix +{ - Sparse::Sparse() - { - } +Sparse::Sparse() {} - Sparse::Sparse(index_type n, - index_type m, - index_type nnz): - n_{n}, - m_{m}, - nnz_{nnz} - { - this->is_symmetric_ = false; - this->is_expanded_ = true; //default is a normal non-symmetric fully expanded matrix - this->nnz_expanded_ = nnz; - - setNotUpdated(); - - //set everything to nullptr - h_row_data_ = nullptr; - h_col_data_ = nullptr; - h_val_data_ = nullptr; - - d_row_data_ = nullptr; - d_col_data_ = nullptr; - d_val_data_ = nullptr; - - owns_cpu_data_ = false; - owns_cpu_vals_ = false; - - owns_gpu_data_ = false; - owns_gpu_vals_ = false; - } +Sparse::Sparse(index_type n, index_type m, index_type nnz) : n_{n}, m_{m}, nnz_{nnz} +{ + this->is_symmetric_ = false; + this->is_expanded_ = true; // default is a normal non-symmetric fully expanded matrix + this->nnz_expanded_ = nnz; - Sparse::Sparse(index_type n, - index_type m, - index_type nnz, - bool symmetric, - bool expanded): - n_{n}, - m_{m}, - nnz_{nnz}, - is_symmetric_{symmetric}, - is_expanded_{expanded} - { - if (is_expanded_) { - this->nnz_expanded_ = nnz_; - } else { - this->nnz_expanded_ = 0; - } - setNotUpdated(); - - //set everything to nullptr - h_row_data_ = nullptr; - h_col_data_ = nullptr; - h_val_data_ = nullptr; - - d_row_data_ = nullptr; - d_col_data_ = nullptr; - d_val_data_ = nullptr; - - owns_cpu_data_ = false; - owns_cpu_vals_ = false; - - owns_gpu_data_ = false; - owns_gpu_vals_ = false; - } + setNotUpdated(); - Sparse::~Sparse() - { - this->destroyMatrixData(memory::HOST); - this->destroyMatrixData(memory::DEVICE); - } + // set everything to nullptr + h_row_data_ = nullptr; + h_col_data_ = nullptr; + h_val_data_ = nullptr; - void Sparse::setNotUpdated() - { - h_data_updated_ = false; - d_data_updated_ = false; - } - - index_type Sparse::getNumRows() - { - return this->n_; - } + d_row_data_ = nullptr; + d_col_data_ = nullptr; + d_val_data_ = nullptr; - index_type Sparse::getNumColumns() - { - return this->m_; - } + owns_cpu_data_ = false; + owns_cpu_vals_ = false; - index_type Sparse::getNnz() - { - return this->nnz_; - } + owns_gpu_data_ = false; + owns_gpu_vals_ = false; +} - index_type Sparse::getNnzExpanded() - { - return this->nnz_expanded_; +Sparse::Sparse(index_type n, index_type m, index_type nnz, bool symmetric, bool expanded) + : n_{n}, m_{m}, nnz_{nnz}, is_symmetric_{symmetric}, is_expanded_{expanded} +{ + if (is_expanded_) { + this->nnz_expanded_ = nnz_; + } else { + this->nnz_expanded_ = 0; } + setNotUpdated(); - bool Sparse::symmetric() - { - return is_symmetric_; - } + // set everything to nullptr + h_row_data_ = nullptr; + h_col_data_ = nullptr; + h_val_data_ = nullptr; - bool Sparse::expanded() - { - return is_expanded_; - } + d_row_data_ = nullptr; + d_col_data_ = nullptr; + d_val_data_ = nullptr; - void Sparse::setSymmetric(bool symmetric) - { - this->is_symmetric_ = symmetric; - } + owns_cpu_data_ = false; + owns_cpu_vals_ = false; - void Sparse::setExpanded(bool expanded) - { - this->is_expanded_ = expanded; - } + owns_gpu_data_ = false; + owns_gpu_vals_ = false; +} - void Sparse::setNnzExpanded(index_type nnz_expanded_new) - { - this->nnz_expanded_ = nnz_expanded_new; - } +Sparse::~Sparse() +{ + this->destroyMatrixData(memory::HOST); + this->destroyMatrixData(memory::DEVICE); +} - void Sparse::setNnz(index_type nnz_new) - { - this->nnz_ = nnz_new; - } +void Sparse::setNotUpdated() +{ + h_data_updated_ = false; + d_data_updated_ = false; +} - int Sparse::setUpdated(memory::MemorySpace memspace) - { - using namespace ReSolve::memory; - switch (memspace) { - case HOST: - h_data_updated_ = true; - d_data_updated_ = false; - break; - case DEVICE: - d_data_updated_ = true; - h_data_updated_ = false; - break; - } - return 0; - } +index_type Sparse::getNumRows() { return this->n_; } + +index_type Sparse::getNumColumns() { return this->m_; } + +index_type Sparse::getNnz() { return this->nnz_; } + +index_type Sparse::getNnzExpanded() { return this->nnz_expanded_; } - int Sparse::setMatrixData(index_type* row_data, index_type* col_data, real_type* val_data, memory::MemorySpace memspace) - { - using namespace ReSolve::memory; - - setNotUpdated(); - - switch (memspace) { - case HOST: - this->h_row_data_ = row_data; - this->h_col_data_ = col_data; - this->h_val_data_ = val_data; - h_data_updated_ = true; - break; - case DEVICE: - this->d_row_data_ = row_data; - this->d_col_data_ = col_data; - this->d_val_data_ = val_data; - d_data_updated_ = true; - break; +bool Sparse::symmetric() { return is_symmetric_; } + +bool Sparse::expanded() { return is_expanded_; } + +void Sparse::setSymmetric(bool symmetric) { this->is_symmetric_ = symmetric; } + +void Sparse::setExpanded(bool expanded) { this->is_expanded_ = expanded; } + +void Sparse::setNnzExpanded(index_type nnz_expanded_new) { this->nnz_expanded_ = nnz_expanded_new; } + +void Sparse::setNnz(index_type nnz_new) { this->nnz_ = nnz_new; } + +int Sparse::setUpdated(memory::MemorySpace memspace) +{ + using namespace ReSolve::memory; + switch (memspace) { + case HOST: + h_data_updated_ = true; + d_data_updated_ = false; + break; + case DEVICE: + d_data_updated_ = true; + h_data_updated_ = false; + break; + } + return 0; +} + +int Sparse::setMatrixData(index_type *row_data, index_type *col_data, real_type *val_data, memory::MemorySpace memspace) +{ + using namespace ReSolve::memory; + + setNotUpdated(); + + switch (memspace) { + case HOST: + this->h_row_data_ = row_data; + this->h_col_data_ = col_data; + this->h_val_data_ = val_data; + h_data_updated_ = true; + break; + case DEVICE: + this->d_row_data_ = row_data; + this->d_col_data_ = col_data; + this->d_val_data_ = val_data; + d_data_updated_ = true; + break; + } + return 0; +} + +int Sparse::destroyMatrixData(memory::MemorySpace memspace) +{ + using namespace ReSolve::memory; + switch (memspace) { + case HOST: + if (owns_cpu_data_) { + delete[] h_row_data_; + delete[] h_col_data_; + } + if (owns_cpu_vals_) { + delete[] h_val_data_; } return 0; - } - - int Sparse::destroyMatrixData(memory::MemorySpace memspace) - { - using namespace ReSolve::memory; - switch (memspace) { - case HOST: - if (owns_cpu_data_) { - delete [] h_row_data_; - delete [] h_col_data_; - } - if (owns_cpu_vals_) { - delete [] h_val_data_; - } - return 0; - case DEVICE: - if (owns_gpu_data_) { - mem_.deleteOnDevice(d_row_data_); - mem_.deleteOnDevice(d_col_data_); - } - if (owns_gpu_vals_) { - mem_.deleteOnDevice(d_val_data_); - } - return 0; - default: - return -1; + case DEVICE: + if (owns_gpu_data_) { + mem_.deleteOnDevice(d_row_data_); + mem_.deleteOnDevice(d_col_data_); + } + if (owns_gpu_vals_) { + mem_.deleteOnDevice(d_val_data_); } + return 0; + default: + return -1; } +} - int Sparse::updateValues(real_type* new_vals, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut) - { - - index_type nnz_current = nnz_; - if (is_expanded_) {nnz_current = nnz_expanded_;} - //four cases (for now) - setNotUpdated(); - int control=-1; - if ((memspaceIn == memory::HOST) && (memspaceOut == memory::HOST)) { control = 0;} - if ((memspaceIn == memory::HOST) && (memspaceOut == memory::DEVICE)){ control = 1;} - if ((memspaceIn == memory::DEVICE) && (memspaceOut == memory::HOST)) { control = 2;} - if ((memspaceIn == memory::DEVICE) && (memspaceOut == memory::DEVICE)){ control = 3;} - - if (memspaceOut == memory::HOST) { - //check if cpu data allocated - if (h_val_data_ == nullptr) { - this->h_val_data_ = new real_type[nnz_current]; - } - } +int Sparse::updateValues(real_type *new_vals, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut) +{ - if (memspaceOut == memory::DEVICE) { - //check if cuda data allocated - if (d_val_data_ == nullptr) { - mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); - } - } + index_type nnz_current = nnz_; + if (is_expanded_) { + nnz_current = nnz_expanded_; + } + // four cases (for now) + setNotUpdated(); + int control = -1; + if ((memspaceIn == memory::HOST) && (memspaceOut == memory::HOST)) { + control = 0; + } + if ((memspaceIn == memory::HOST) && (memspaceOut == memory::DEVICE)) { + control = 1; + } + if ((memspaceIn == memory::DEVICE) && (memspaceOut == memory::HOST)) { + control = 2; + } + if ((memspaceIn == memory::DEVICE) && (memspaceOut == memory::DEVICE)) { + control = 3; + } - switch(control) { - case 0: //cpu->cpu - mem_.copyArrayHostToHost(h_val_data_, new_vals, nnz_current); - h_data_updated_ = true; - owns_cpu_vals_ = true; - break; - case 2://cuda->cpu - mem_.copyArrayDeviceToHost(h_val_data_, new_vals, nnz_current); - h_data_updated_ = true; - owns_cpu_vals_ = true; - break; - case 1://cpu->cuda - mem_.copyArrayHostToDevice(d_val_data_, new_vals, nnz_current); - d_data_updated_ = true; - owns_gpu_vals_ = true; - break; - case 3://cuda->cuda - mem_.copyArrayDeviceToDevice(d_val_data_, new_vals, nnz_current); - d_data_updated_ = true; - owns_gpu_vals_ = true; - break; - default: - return -1; + if (memspaceOut == memory::HOST) { + // check if cpu data allocated + if (h_val_data_ == nullptr) { + this->h_val_data_ = new real_type[nnz_current]; } - return 0; } - int Sparse::setNewValues(real_type* new_vals, memory::MemorySpace memspace) - { - using namespace ReSolve::memory; - setNotUpdated(); - - switch (memspace) { - case HOST: - this->h_val_data_ = new_vals; - h_data_updated_ = true; - break; - case DEVICE: - this->d_val_data_ = new_vals; - d_data_updated_ = true; - break; - default: - return -1; + if (memspaceOut == memory::DEVICE) { + // check if cuda data allocated + if (d_val_data_ == nullptr) { + mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); } - return 0; } -}} // namespace ReSolve::matrix - + switch (control) { + case 0: // cpu->cpu + mem_.copyArrayHostToHost(h_val_data_, new_vals, nnz_current); + h_data_updated_ = true; + owns_cpu_vals_ = true; + break; + case 2: // cuda->cpu + mem_.copyArrayDeviceToHost(h_val_data_, new_vals, nnz_current); + h_data_updated_ = true; + owns_cpu_vals_ = true; + break; + case 1: // cpu->cuda + mem_.copyArrayHostToDevice(d_val_data_, new_vals, nnz_current); + d_data_updated_ = true; + owns_gpu_vals_ = true; + break; + case 3: // cuda->cuda + mem_.copyArrayDeviceToDevice(d_val_data_, new_vals, nnz_current); + d_data_updated_ = true; + owns_gpu_vals_ = true; + break; + default: + return -1; + } + return 0; +} + +int Sparse::setNewValues(real_type *new_vals, memory::MemorySpace memspace) +{ + using namespace ReSolve::memory; + setNotUpdated(); + + switch (memspace) { + case HOST: + this->h_val_data_ = new_vals; + h_data_updated_ = true; + break; + case DEVICE: + this->d_val_data_ = new_vals; + d_data_updated_ = true; + break; + default: + return -1; + } + return 0; +} + +} // namespace matrix +} // namespace ReSolve diff --git a/resolve/matrix/Sparse.hpp b/resolve/matrix/Sparse.hpp index 96121acb9..4eeb66643 100644 --- a/resolve/matrix/Sparse.hpp +++ b/resolve/matrix/Sparse.hpp @@ -1,94 +1,94 @@ // Matrix utilities -// Mirroring memory approach +// Mirroring memory approach #pragma once -#include #include #include +#include -namespace ReSolve { namespace matrix { - class Sparse - { - public: - //basic constructor - Sparse(); - Sparse(index_type n, index_type m, index_type nnz); - Sparse(index_type n, - index_type m, - index_type nnz, - bool symmetric, - bool expanded); - virtual ~Sparse(); - - // accessors - index_type getNumRows(); - index_type getNumColumns(); - index_type getNnz(); - index_type getNnzExpanded(); - - bool symmetric(); - bool expanded(); - void setSymmetric(bool symmetric); - void setExpanded(bool expanded); - void setNnzExpanded(index_type nnz_expanded_new); - void setNnz(index_type nnz_new); // for resetting when removing duplicates - index_type setUpdated(memory::MemorySpace what); - - virtual index_type* getRowData(memory::MemorySpace memspace) = 0; - virtual index_type* getColData(memory::MemorySpace memspace) = 0; - virtual real_type* getValues( memory::MemorySpace memspace) = 0; - - virtual int updateData(index_type* row_data, index_type* col_data, real_type* val_data, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut) = 0; - virtual int updateData(index_type* row_data, index_type* col_data, real_type* val_data, index_type new_nnz, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut) = 0; - - virtual int allocateMatrixData(memory::MemorySpace memspace) = 0; - int setMatrixData(index_type* row_data, index_type* col_data, real_type* val_data, memory::MemorySpace memspace); - - int destroyMatrixData(memory::MemorySpace memspace); - - virtual void print() = 0; - - virtual int copyData(memory::MemorySpace memspaceOut) = 0; - - - //update Values just updates values; it allocates if necessary. - //values have the same dimensions between different formats - virtual int updateValues(real_type* new_vals, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut); - - //set new values just sets the pointer, use caution. - virtual int setNewValues(real_type* new_vals, memory::MemorySpace memspace); - - protected: - //size - index_type n_{0}; - index_type m_{0}; - index_type nnz_{0}; - index_type nnz_expanded_{0}; - - bool is_symmetric_{false}; - bool is_expanded_{false}; - - //host data - index_type* h_row_data_{nullptr}; - index_type* h_col_data_{nullptr}; - real_type* h_val_data_{nullptr}; - bool h_data_updated_{false}; - - //gpu data - index_type* d_row_data_{nullptr}; - index_type* d_col_data_{nullptr}; - real_type* d_val_data_{nullptr}; - bool d_data_updated_{false}; - - void setNotUpdated(); - - // Data ownership flags - bool owns_cpu_data_{false}; ///< for row/col data - bool owns_cpu_vals_{false}; ///< for values - - bool owns_gpu_data_{false}; ///< for row/col data - bool owns_gpu_vals_{false}; ///< for values - - MemoryHandler mem_; ///< Device memory manager object - - }; -}} // namespace ReSolve::matrix +namespace ReSolve +{ +namespace matrix +{ +class Sparse +{ + public: + // basic constructor + Sparse(); + Sparse(index_type n, index_type m, index_type nnz); + Sparse(index_type n, index_type m, index_type nnz, bool symmetric, bool expanded); + virtual ~Sparse(); + + // accessors + index_type getNumRows(); + index_type getNumColumns(); + index_type getNnz(); + index_type getNnzExpanded(); + + bool symmetric(); + bool expanded(); + void setSymmetric(bool symmetric); + void setExpanded(bool expanded); + void setNnzExpanded(index_type nnz_expanded_new); + void setNnz(index_type nnz_new); // for resetting when removing duplicates + index_type setUpdated(memory::MemorySpace what); + + virtual index_type *getRowData(memory::MemorySpace memspace) = 0; + virtual index_type *getColData(memory::MemorySpace memspace) = 0; + virtual real_type *getValues(memory::MemorySpace memspace) = 0; + + virtual int updateData(index_type *row_data, index_type *col_data, real_type *val_data, memory::MemorySpace memspaceIn, + memory::MemorySpace memspaceOut) = 0; + virtual int updateData(index_type *row_data, index_type *col_data, real_type *val_data, index_type new_nnz, memory::MemorySpace memspaceIn, + memory::MemorySpace memspaceOut) = 0; + + virtual int allocateMatrixData(memory::MemorySpace memspace) = 0; + int setMatrixData(index_type *row_data, index_type *col_data, real_type *val_data, memory::MemorySpace memspace); + + int destroyMatrixData(memory::MemorySpace memspace); + + virtual void print() = 0; + + virtual int copyData(memory::MemorySpace memspaceOut) = 0; + + // update Values just updates values; it allocates if necessary. + // values have the same dimensions between different formats + virtual int updateValues(real_type *new_vals, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut); + + // set new values just sets the pointer, use caution. + virtual int setNewValues(real_type *new_vals, memory::MemorySpace memspace); + + protected: + // size + index_type n_{0}; + index_type m_{0}; + index_type nnz_{0}; + index_type nnz_expanded_{0}; + + bool is_symmetric_{false}; + bool is_expanded_{false}; + + // host data + index_type *h_row_data_{nullptr}; + index_type *h_col_data_{nullptr}; + real_type *h_val_data_{nullptr}; + bool h_data_updated_{false}; + + // gpu data + index_type *d_row_data_{nullptr}; + index_type *d_col_data_{nullptr}; + real_type *d_val_data_{nullptr}; + bool d_data_updated_{false}; + + void setNotUpdated(); + + // Data ownership flags + bool owns_cpu_data_{false}; ///< for row/col data + bool owns_cpu_vals_{false}; ///< for values + + bool owns_gpu_data_{false}; ///< for row/col data + bool owns_gpu_vals_{false}; ///< for values + + MemoryHandler mem_; ///< Device memory manager object +}; +} // namespace matrix +} // namespace ReSolve diff --git a/resolve/matrix/io.cpp b/resolve/matrix/io.cpp index 0d96a5e1c..b8b04013c 100644 --- a/resolve/matrix/io.cpp +++ b/resolve/matrix/io.cpp @@ -1,186 +1,185 @@ -#include -#include -#include #include +#include +#include #include +#include +#include "io.hpp" +#include #include #include -#include -#include "io.hpp" - -namespace ReSolve { namespace io { +namespace ReSolve +{ +namespace io +{ - matrix::Coo* readMatrixFromFile(std::istream& file) - { - if(!file) { - Logger::error() << "Empty input to readMatrixFromFile function ... \n" << std::endl; - return nullptr; - } +matrix::Coo *readMatrixFromFile(std::istream &file) +{ + if (!file) { + Logger::error() << "Empty input to readMatrixFromFile function ... \n" << std::endl; + return nullptr; + } - std::stringstream ss; - std::string line; - index_type i = 0; - index_type m, n, nnz; - bool symmetric = false; - bool expanded = true; + std::stringstream ss; + std::string line; + index_type i = 0; + index_type m, n, nnz; + bool symmetric = false; + bool expanded = true; + std::getline(file, line); + // symmetric? + size_t found = line.find("symmetric"); + if (found != std::string::npos) { + symmetric = true; + expanded = false; + } + while (line.at(0) == '%') { std::getline(file, line); - //symmetric? - size_t found = line.find("symmetric"); - if (found != std::string::npos) { - symmetric = true; - expanded = false; - } - while (line.at(0) == '%') { - std::getline(file, line); - // std::cout<> n >> m >> nnz; - //create matrix object - matrix::Coo* A = new matrix::Coo(n, m, nnz,symmetric, expanded ); - //create coo arrays - index_type* coo_rows = new index_type[nnz]; - index_type* coo_cols = new index_type[nnz]; - real_type* coo_vals = new real_type[nnz]; - i = 0; - index_type a, b; - real_type c; - while (file >> a >> b >> c) { - coo_rows[i] = a - 1; - coo_cols[i] = b - 1; - coo_vals[i] = c; - i++; - } - A->setMatrixData(coo_rows, coo_cols, coo_vals, memory::HOST); - return A; - } - - - real_type* readRhsFromFile(std::istream& file) - { - if(!file) { - Logger::error() << "Empty input to " << __func__ << " function ... \n" << std::endl; - return nullptr; - } - - std::stringstream ss; - std::string line; - index_type i = 0; - index_type n, m; + // std::cout<> n >> m >> nnz; + // create matrix object + matrix::Coo *A = new matrix::Coo(n, m, nnz, symmetric, expanded); + // create coo arrays + index_type *coo_rows = new index_type[nnz]; + index_type *coo_cols = new index_type[nnz]; + real_type *coo_vals = new real_type[nnz]; + i = 0; + index_type a, b; + real_type c; + while (file >> a >> b >> c) { + coo_rows[i] = a - 1; + coo_cols[i] = b - 1; + coo_vals[i] = c; + i++; + } + A->setMatrixData(coo_rows, coo_cols, coo_vals, memory::HOST); + return A; +} + +real_type *readRhsFromFile(std::istream &file) +{ + if (!file) { + Logger::error() << "Empty input to " << __func__ << " function ... \n" << std::endl; + return nullptr; + } + + std::stringstream ss; + std::string line; + index_type i = 0; + index_type n, m; + std::getline(file, line); + while (line.at(0) == '%') { std::getline(file, line); - while (line.at(0) == '%') { - std::getline(file, line); - // std::cout << line << std::endl; - } - ss << line; - ss >> n >> m ; - - real_type* vec = new real_type[n]; - real_type a; - while (file >> a){ - vec[i] = a; - i++; - } - return vec; - } - - void readAndUpdateMatrix(std::istream& file, matrix::Coo* A) - { - if(!file) { - Logger::error() << "Empty input to readMatrixFromFile function ..." << std::endl; - return; - } - - std::stringstream ss; - A->setExpanded(false); - std::string line; - index_type i = 0; - index_type m, n, nnz; + // std::cout << line << std::endl; + } + ss << line; + ss >> n >> m; + + real_type *vec = new real_type[n]; + real_type a; + while (file >> a) { + vec[i] = a; + i++; + } + return vec; +} + +void readAndUpdateMatrix(std::istream &file, matrix::Coo *A) +{ + if (!file) { + Logger::error() << "Empty input to readMatrixFromFile function ..." << std::endl; + return; + } + + std::stringstream ss; + A->setExpanded(false); + std::string line; + index_type i = 0; + index_type m, n, nnz; + std::getline(file, line); + while (line.at(0) == '%') { std::getline(file, line); - while (line.at(0) == '%') { - std::getline(file, line); - // std::cout << line << std::endl; - } - - ss << line; - ss >> n >> m >> nnz; - if ((A->getNumRows() != n) || (A->getNumColumns() != m) || (A->getNnz() < nnz)) { - Logger::error() << "Wrong matrix size: " << A->getNumRows() - << "x" << A->getNumColumns() - << ", NNZ: " << A->getNnz() - << " Cannot update! \n "; - return; - } - A->setNnz(nnz); - //create coo arrays - index_type* coo_rows = A->getRowData(memory::HOST); - index_type* coo_cols = A->getColData(memory::HOST); - real_type* coo_vals = A->getValues( memory::HOST); - i = 0; - index_type a, b; - real_type c; - while (file >> a >> b >> c) { - coo_rows[i] = a - 1; - coo_cols[i] = b - 1; - coo_vals[i] = c; - i++; - } - } - - void readAndUpdateRhs(std::istream& file, real_type** p_rhs) - { - if (!file) { - Logger::error() << "Empty input to readAndUpdateRhs function ..." << std::endl; - return; - } - - real_type* rhs = *p_rhs; - std::stringstream ss; - std::string line; - index_type n, m; + // std::cout << line << std::endl; + } + ss << line; + ss >> n >> m >> nnz; + if ((A->getNumRows() != n) || (A->getNumColumns() != m) || (A->getNnz() < nnz)) { + Logger::error() << "Wrong matrix size: " << A->getNumRows() << "x" << A->getNumColumns() << ", NNZ: " << A->getNnz() << " Cannot update! \n "; + return; + } + A->setNnz(nnz); + // create coo arrays + index_type *coo_rows = A->getRowData(memory::HOST); + index_type *coo_cols = A->getColData(memory::HOST); + real_type *coo_vals = A->getValues(memory::HOST); + i = 0; + index_type a, b; + real_type c; + while (file >> a >> b >> c) { + coo_rows[i] = a - 1; + coo_cols[i] = b - 1; + coo_vals[i] = c; + i++; + } +} + +void readAndUpdateRhs(std::istream &file, real_type **p_rhs) +{ + if (!file) { + Logger::error() << "Empty input to readAndUpdateRhs function ..." << std::endl; + return; + } + + real_type *rhs = *p_rhs; + std::stringstream ss; + std::string line; + index_type n, m; + + std::getline(file, line); + while (line.at(0) == '%') { std::getline(file, line); - while (line.at(0) == '%') { - std::getline(file, line); - // std::cout<> n >> m ; - - if (rhs == nullptr) { - // std::cout << "Allocating array of size " << n << "\n"; - rhs = new real_type[n]; - } - real_type a; - index_type i = 0; - while (file >> a) { - rhs[i] = a; - // std::cout << i << ": " << a << "\n"; - i++; - } - } - - int writeMatrixToFile(matrix::Sparse* /* A */, std::ostream& /* file_out */) - { - Logger::error() << "writeMatrixToFile function not implemented!\n"; - return -1; - } - - int writeVectorToFile(vector_type* vec_x, std::ostream& file_out) - { - real_type* x_data = vec_x->getData(memory::HOST); - // std::ofstream file_out (filename, std::ofstream::out); - file_out << "%%MatrixMarket matrix array real general \n"; - file_out << "% ID: XXX \n"; - file_out << vec_x->getSize() << " " << 1 << "\n"; - for (int i = 0; i < vec_x->getSize(); ++i) { - file_out << std::setprecision(32) << std::scientific << x_data[i] << "\n"; - } - // file_out.close(); - return 0; - } - -}} // ReSolve::io + // std::cout<> n >> m; + + if (rhs == nullptr) { + // std::cout << "Allocating array of size " << n << "\n"; + rhs = new real_type[n]; + } + real_type a; + index_type i = 0; + while (file >> a) { + rhs[i] = a; + // std::cout << i << ": " << a << "\n"; + i++; + } +} + +int writeMatrixToFile(matrix::Sparse * /* A */, std::ostream & /* file_out */) +{ + Logger::error() << "writeMatrixToFile function not implemented!\n"; + return -1; +} + +int writeVectorToFile(vector_type *vec_x, std::ostream &file_out) +{ + real_type *x_data = vec_x->getData(memory::HOST); + // std::ofstream file_out (filename, std::ofstream::out); + file_out << "%%MatrixMarket matrix array real general \n"; + file_out << "% ID: XXX \n"; + file_out << vec_x->getSize() << " " << 1 << "\n"; + for (int i = 0; i < vec_x->getSize(); ++i) { + file_out << std::setprecision(32) << std::scientific << x_data[i] << "\n"; + } + // file_out.close(); + return 0; +} + +} // namespace io +} // namespace ReSolve diff --git a/resolve/matrix/io.hpp b/resolve/matrix/io.hpp index 41486ece5..f91e8f194 100644 --- a/resolve/matrix/io.hpp +++ b/resolve/matrix/io.hpp @@ -1,22 +1,34 @@ #include -namespace ReSolve { namespace vector { - class Vector; -}} +namespace ReSolve +{ +namespace vector +{ +class Vector; +} +} // namespace ReSolve -namespace ReSolve { namespace matrix { - class Sparse; - class Coo; -}} +namespace ReSolve +{ +namespace matrix +{ +class Sparse; +class Coo; +} // namespace matrix +} // namespace ReSolve -namespace ReSolve { namespace io { - using vector_type = vector::Vector; +namespace ReSolve +{ +namespace io +{ +using vector_type = vector::Vector; - matrix::Coo* readMatrixFromFile(std::istream& file); - void readAndUpdateMatrix(std::istream& file, matrix::Coo* A); - real_type* readRhsFromFile(std::istream& file); - void readAndUpdateRhs(std::istream& file, real_type** rhs); +matrix::Coo *readMatrixFromFile(std::istream &file); +void readAndUpdateMatrix(std::istream &file, matrix::Coo *A); +real_type *readRhsFromFile(std::istream &file); +void readAndUpdateRhs(std::istream &file, real_type **rhs); - int writeMatrixToFile(matrix::Sparse* A, std::ostream& file_out); - int writeVectorToFile(vector_type* vec_x, std::ostream &file_out); -}} // ReSolve::io +int writeMatrixToFile(matrix::Sparse *A, std::ostream &file_out); +int writeVectorToFile(vector_type *vec_x, std::ostream &file_out); +} // namespace io +} // namespace ReSolve diff --git a/resolve/resolve_defs.hpp.in b/resolve/resolve_defs.hpp.in index 15cd57917..e63087a04 100644 --- a/resolve/resolve_defs.hpp.in +++ b/resolve/resolve_defs.hpp.in @@ -25,4 +25,4 @@ #endif -#endif // __RESOLVE_DEFINITIONS_HPP__ \ No newline at end of file +#endif // __RESOLVE_DEFINITIONS_HPP__ diff --git a/resolve/utilities/logger/CMakeLists.txt b/resolve/utilities/logger/CMakeLists.txt index 298009423..4f2e57b68 100644 --- a/resolve/utilities/logger/CMakeLists.txt +++ b/resolve/utilities/logger/CMakeLists.txt @@ -6,21 +6,19 @@ ]] -set(Logger_SRC - Logger.cpp -) +set(Logger_SRC Logger.cpp) -set(Logger_HEADER_INSTALL - Logger.hpp -) +set(Logger_HEADER_INSTALL Logger.hpp) # Build shared library ReSolve add_library(resolve_logger SHARED ${Logger_SRC}) -target_include_directories(resolve_logger PUBLIC - $ - $ - $ +target_include_directories( + resolve_logger + PUBLIC $ + $ $ ) -install(FILES ${Logger_HEADER_INSTALL} DESTINATION include/resolve/utilities/logger) +install(FILES ${Logger_HEADER_INSTALL} + DESTINATION include/resolve/utilities/logger +) diff --git a/resolve/utilities/logger/Logger.cpp b/resolve/utilities/logger/Logger.cpp index 7369978f0..abe18f563 100644 --- a/resolve/utilities/logger/Logger.cpp +++ b/resolve/utilities/logger/Logger.cpp @@ -4,179 +4,177 @@ * @author Slaven Peles */ - -#include #include "Logger.hpp" +#include namespace ReSolve { - namespace io - { - /// @brief Default verbosity is to print error and warning messages - Logger::Verbosity Logger::verbosity_ = Logger::WARNINGS; - - /// @brief Default output is standard output - std::ostream* Logger::logger_ = &std::cout; - - /// @brief User provided output file stream - std::ofstream Logger::file_; - - /// @brief Stream to null device - std::ostream Logger::nullstream_(nullptr); - - /// @brief Auxiliary vector of output streams - std::vector Logger::tmp_; - - /// @brief Vector of different output streams - std::vector Logger::output_streams_(Logger::init()); - - /** - * @brief Sets verbosity level - * - * @pre `output_streams_` vector is allocated - * @post Verbosity level is set to user supplied value `v` and outputs - * for `output_streams_` are set accordingly. - */ - void Logger::setVerbosity(Verbosity v) - { - verbosity_ = v; - updateVerbosity(output_streams_); - } - - /** - * @brief Private method to update verbosity. - * - * This function directs each output stream <= `verbosity_` to user - * selected output and sets all others to null device. Each output stream - * corresponds to different verbosity level. - * - * @param[in] output_streams - vector of pointers to output streams - * - * @pre Vector `output_streams` is allocated and correctly initialized. - * @post All streams `output_stream_[i]`, where `i <= verbosity_` are - * directed to stream `logger_`. The rest are sent to null device - * (not printed). - */ - void Logger::updateVerbosity(std::vector& output_streams) - { - for (std::size_t i = NONE; i <= EVERYTHING; ++i) - { - output_streams[i] = i > verbosity_ ? &nullstream_ : logger_; - } - } - - /** - * @brief Delivers default values for output streams. - */ - std::vector&& Logger::init() - { - tmp_.resize(Logger::EVERYTHING + 1); - updateVerbosity(tmp_); - return std::move(tmp_); - } - - /** - * @brief Returns reference to output stream for error messages. - * - * @return Reference to error messages stream in `output_streams_`. - * - * @pre `output_streams_` vector is allocated and correctly initialized. - */ - std::ostream& Logger::error() - { - using namespace colors; - *(output_streams_[ERRORS]) << "[" << RED << "ERROR" << CLEAR << "] "; - return *(output_streams_[ERRORS]); - } - - /** - * @brief Returns reference to output stream for warning messages. - * - * @return Reference to warning messages stream in `output_streams_`. - * - * @pre `output_streams_` vector is allocated and correctly initialized. - */ - std::ostream& Logger::warning() - { - using namespace colors; - *(output_streams_[WARNINGS]) << "[" << YELLOW << "WARNING" << CLEAR << "] "; - return *(output_streams_[WARNINGS]); - } - - /** - * @brief Returns reference to analysis summary messages output stream. - * - * @return Reference to analysis summary messages stream in `output_streams_`. - * - * @pre `output_streams_` vector is allocated and correctly initialized. - */ - std::ostream& Logger::summary() - { - *(output_streams_[SUMMARY]) << "[SUMMARY] "; - return *(output_streams_[SUMMARY]); - } - - /** - * @brief Returns reference to output stream for all other messages. - * - * @return Reference to output stream to miscellaneous messages - * in `output_streams_`. - * - * @pre `output_streams_` vector is allocated and correctly initialized. - */ - std::ostream& Logger::misc() - { - *(output_streams_[EVERYTHING]) << "[MESSAGE] "; - return *(output_streams_[EVERYTHING]); - } - - /** - * @brief Open file `filename` and update outputs for different verbosities - * streams. - * - * @param[in] filename - The name of the output file. - * - * @pre `output_streams_` vector is allocated and correctly initialized. - * @post All active streams are directed to user supplied file `filename`. - */ - void Logger::openOutputFile(std::string filename) - { - file_.open(filename); - logger_ = &file_; - updateVerbosity(output_streams_); - } - - /** - * @brief Set outputs of active streams to user provided `std::ostream` object. - * - * All active outputs are redirected to `out` stream. All inactive ones are - * directed to null device. - * - * @param[in] out - User provided output stream. - * - * @pre `output_streams_` vector is allocated and correctly initialized. - * @post All active streams (`output_streams_[i]` where `i <= verbosity_`) - * are set to user provided `out` output stream. - */ - void Logger::setOutput(std::ostream& out) - { - logger_ = &out; - updateVerbosity(output_streams_); - } - - /** - * @brief Close output file. - * - * @pre Output file `file_` has been opened. - * @post Output file `file_` is closed and active output streams are - * set to default output `std::cout`. - */ - void Logger::closeOutputFile() - { - file_.close(); - logger_ = &std::cout; - updateVerbosity(output_streams_); - } - - } // namespace io -} // namespace ReSolve \ No newline at end of file +namespace io +{ +/// @brief Default verbosity is to print error and warning messages +Logger::Verbosity Logger::verbosity_ = Logger::WARNINGS; + +/// @brief Default output is standard output +std::ostream *Logger::logger_ = &std::cout; + +/// @brief User provided output file stream +std::ofstream Logger::file_; + +/// @brief Stream to null device +std::ostream Logger::nullstream_(nullptr); + +/// @brief Auxiliary vector of output streams +std::vector Logger::tmp_; + +/// @brief Vector of different output streams +std::vector Logger::output_streams_(Logger::init()); + +/** + * @brief Sets verbosity level + * + * @pre `output_streams_` vector is allocated + * @post Verbosity level is set to user supplied value `v` and outputs + * for `output_streams_` are set accordingly. + */ +void Logger::setVerbosity(Verbosity v) +{ + verbosity_ = v; + updateVerbosity(output_streams_); +} + +/** + * @brief Private method to update verbosity. + * + * This function directs each output stream <= `verbosity_` to user + * selected output and sets all others to null device. Each output stream + * corresponds to different verbosity level. + * + * @param[in] output_streams - vector of pointers to output streams + * + * @pre Vector `output_streams` is allocated and correctly initialized. + * @post All streams `output_stream_[i]`, where `i <= verbosity_` are + * directed to stream `logger_`. The rest are sent to null device + * (not printed). + */ +void Logger::updateVerbosity(std::vector &output_streams) +{ + for (std::size_t i = NONE; i <= EVERYTHING; ++i) { + output_streams[i] = i > verbosity_ ? &nullstream_ : logger_; + } +} + +/** + * @brief Delivers default values for output streams. + */ +std::vector &&Logger::init() +{ + tmp_.resize(Logger::EVERYTHING + 1); + updateVerbosity(tmp_); + return std::move(tmp_); +} + +/** + * @brief Returns reference to output stream for error messages. + * + * @return Reference to error messages stream in `output_streams_`. + * + * @pre `output_streams_` vector is allocated and correctly initialized. + */ +std::ostream &Logger::error() +{ + using namespace colors; + *(output_streams_[ERRORS]) << "[" << RED << "ERROR" << CLEAR << "] "; + return *(output_streams_[ERRORS]); +} + +/** + * @brief Returns reference to output stream for warning messages. + * + * @return Reference to warning messages stream in `output_streams_`. + * + * @pre `output_streams_` vector is allocated and correctly initialized. + */ +std::ostream &Logger::warning() +{ + using namespace colors; + *(output_streams_[WARNINGS]) << "[" << YELLOW << "WARNING" << CLEAR << "] "; + return *(output_streams_[WARNINGS]); +} + +/** + * @brief Returns reference to analysis summary messages output stream. + * + * @return Reference to analysis summary messages stream in `output_streams_`. + * + * @pre `output_streams_` vector is allocated and correctly initialized. + */ +std::ostream &Logger::summary() +{ + *(output_streams_[SUMMARY]) << "[SUMMARY] "; + return *(output_streams_[SUMMARY]); +} + +/** + * @brief Returns reference to output stream for all other messages. + * + * @return Reference to output stream to miscellaneous messages + * in `output_streams_`. + * + * @pre `output_streams_` vector is allocated and correctly initialized. + */ +std::ostream &Logger::misc() +{ + *(output_streams_[EVERYTHING]) << "[MESSAGE] "; + return *(output_streams_[EVERYTHING]); +} + +/** + * @brief Open file `filename` and update outputs for different verbosities + * streams. + * + * @param[in] filename - The name of the output file. + * + * @pre `output_streams_` vector is allocated and correctly initialized. + * @post All active streams are directed to user supplied file `filename`. + */ +void Logger::openOutputFile(std::string filename) +{ + file_.open(filename); + logger_ = &file_; + updateVerbosity(output_streams_); +} + +/** + * @brief Set outputs of active streams to user provided `std::ostream` object. + * + * All active outputs are redirected to `out` stream. All inactive ones are + * directed to null device. + * + * @param[in] out - User provided output stream. + * + * @pre `output_streams_` vector is allocated and correctly initialized. + * @post All active streams (`output_streams_[i]` where `i <= verbosity_`) + * are set to user provided `out` output stream. + */ +void Logger::setOutput(std::ostream &out) +{ + logger_ = &out; + updateVerbosity(output_streams_); +} + +/** + * @brief Close output file. + * + * @pre Output file `file_` has been opened. + * @post Output file `file_` is closed and active output streams are + * set to default output `std::cout`. + */ +void Logger::closeOutputFile() +{ + file_.close(); + logger_ = &std::cout; + updateVerbosity(output_streams_); +} + +} // namespace io +} // namespace ReSolve diff --git a/resolve/utilities/logger/Logger.hpp b/resolve/utilities/logger/Logger.hpp index 540631be0..f9f93d102 100644 --- a/resolve/utilities/logger/Logger.hpp +++ b/resolve/utilities/logger/Logger.hpp @@ -1,55 +1,55 @@ /** - * @file -*/ + * @file + */ #pragma once -#include #include +#include #include namespace ReSolve { - namespace io - { - /** - * @brief Class that manages and logs outputs from Re::Solve code. - * - * All methods and data in this class are static. - * - */ - class Logger - { - public: - /// Enum specifying verbosity level for the output. - enum Verbosity {NONE=0, ERRORS, WARNINGS, SUMMARY, EVERYTHING}; - - // All methods and data are static so delete constructor and destructor. - Logger() = delete; - ~Logger() = delete; - - static std::ostream& error(); - static std::ostream& warning(); - static std::ostream& summary(); - static std::ostream& misc(); - - static void setOutput(std::ostream& out); - static void openOutputFile(std::string filename); - static void closeOutputFile(); - static void setVerbosity(Verbosity v); - - static std::vector&& init(); - - private: - static void updateVerbosity(std::vector& output_streams); - - private: - static std::ostream nullstream_; - static std::ofstream file_; - static std::ostream* logger_; - static std::vector output_streams_; - static std::vector tmp_; - static Verbosity verbosity_; - }; - } // namespace io -} //namespace ReSolve \ No newline at end of file +namespace io +{ +/** + * @brief Class that manages and logs outputs from Re::Solve code. + * + * All methods and data in this class are static. + * + */ +class Logger +{ + public: + /// Enum specifying verbosity level for the output. + enum Verbosity { NONE = 0, ERRORS, WARNINGS, SUMMARY, EVERYTHING }; + + // All methods and data are static so delete constructor and destructor. + Logger() = delete; + ~Logger() = delete; + + static std::ostream &error(); + static std::ostream &warning(); + static std::ostream &summary(); + static std::ostream &misc(); + + static void setOutput(std::ostream &out); + static void openOutputFile(std::string filename); + static void closeOutputFile(); + static void setVerbosity(Verbosity v); + + static std::vector &&init(); + + private: + static void updateVerbosity(std::vector &output_streams); + + private: + static std::ostream nullstream_; + static std::ofstream file_; + static std::ostream *logger_; + static std::vector output_streams_; + static std::vector tmp_; + static Verbosity verbosity_; +}; +} // namespace io +} // namespace ReSolve diff --git a/resolve/utilities/misc/IndexValuePair.hpp b/resolve/utilities/misc/IndexValuePair.hpp index b09ccf972..266142df3 100644 --- a/resolve/utilities/misc/IndexValuePair.hpp +++ b/resolve/utilities/misc/IndexValuePair.hpp @@ -1,43 +1,25 @@ #pragma once +namespace ReSolve +{ -namespace ReSolve { +/// @brief Helper class for COO matrix sorting +class IndexValuePair +{ + public: + IndexValuePair() : idx_(0), value_(0.0) {} + ~IndexValuePair() {} + void setIdx(index_type new_idx) { idx_ = new_idx; } + void setValue(real_type new_value) { value_ = new_value; } - /// @brief Helper class for COO matrix sorting - class IndexValuePair - { - public: - IndexValuePair() : idx_(0), value_(0.0) - {} - ~IndexValuePair() - {} - void setIdx (index_type new_idx) - { - idx_ = new_idx; - } - void setValue (real_type new_value) - { - value_ = new_value; - } + index_type getIdx() { return idx_; } + real_type getValue() { return value_; } - index_type getIdx() - { - return idx_; - } - real_type getValue() - { - return value_; - } + bool operator<(const IndexValuePair &str) const { return (idx_ < str.idx_); } - bool operator < (const IndexValuePair& str) const - { - return (idx_ < str.idx_); - } - - private: - index_type idx_; - real_type value_; - }; + private: + index_type idx_; + real_type value_; +}; } // namespace ReSolve - diff --git a/resolve/vector/CMakeLists.txt b/resolve/vector/CMakeLists.txt index 89b1abc8a..b6ce26dd3 100644 --- a/resolve/vector/CMakeLists.txt +++ b/resolve/vector/CMakeLists.txt @@ -7,30 +7,17 @@ ]] # C++ code -set(Vector_SRC - Vector.cpp - VectorHandler.cpp - VectorHandlerCpu.cpp -) +set(Vector_SRC Vector.cpp VectorHandler.cpp VectorHandlerCpu.cpp) # C++ code that depends on CUDA SDK libraries -set(Vector_CUDASDK_SRC - VectorHandlerCuda.cpp -) - -#and hip +set(Vector_CUDASDK_SRC VectorHandlerCuda.cpp) -set(Vector_ROCM_SRC - VectorHandlerHip.cpp -) +# and hip +set(Vector_ROCM_SRC VectorHandlerHip.cpp) # Header files to be installed -set(Vector_HEADER_INSTALL - Vector.hpp - VectorHandler.hpp - VectorKernels.hpp -) +set(Vector_HEADER_INSTALL Vector.hpp VectorHandler.hpp VectorKernels.hpp) # Add CUDA vector handler if CUDA support is enabled if(RESOLVE_USE_CUDA) @@ -46,11 +33,11 @@ add_library(resolve_vector SHARED ${Vector_SRC}) target_link_libraries(resolve_vector PRIVATE resolve_logger) # Link to ReSolve CUDA backend if CUDA is enabled -if (RESOLVE_USE_CUDA) +if(RESOLVE_USE_CUDA) target_link_libraries(resolve_vector PUBLIC resolve_backend_cuda) endif() -if (RESOLVE_USE_HIP) +if(RESOLVE_USE_HIP) target_link_libraries(resolve_vector PUBLIC resolve_backend_hip) endif() @@ -59,19 +46,14 @@ if(NOT RESOLVE_USE_GPU) target_link_libraries(resolve_vector PUBLIC resolve_backend_cpu) endif(NOT RESOLVE_USE_GPU) - -target_include_directories(resolve_vector INTERFACE - $ - $ +target_include_directories( + resolve_vector INTERFACE $ + $ ) # # TODO: Make this PRIVATE dependency (requires refactoring ReSolve code) # target_link_libraries(ReSolve PUBLIC resolve_tpl) -# install(TARGETS ReSolve -# EXPORT ReSolveTargets -# ARCHIVE DESTINATION lib -# LIBRARY DESTINATION lib) -# install include headers +# install(TARGETS ReSolve EXPORT ReSolveTargets ARCHIVE DESTINATION lib LIBRARY +# DESTINATION lib) install include headers install(FILES ${Vector_HEADER_INSTALL} DESTINATION include/resolve/vector) - diff --git a/resolve/vector/Vector.cpp b/resolve/vector/Vector.cpp index 3b4f9e72e..5f092722e 100644 --- a/resolve/vector/Vector.cpp +++ b/resolve/vector/Vector.cpp @@ -2,358 +2,347 @@ #include #include -namespace ReSolve { namespace vector { +namespace ReSolve +{ +namespace vector +{ - Vector::Vector(index_type n): - n_(n), - k_(1), - n_current_(n_), - d_data_(nullptr), - h_data_(nullptr), - gpu_updated_(false), - cpu_updated_(false), - owns_gpu_data_(false), +Vector::Vector(index_type n) + : n_(n), k_(1), n_current_(n_), d_data_(nullptr), h_data_(nullptr), gpu_updated_(false), cpu_updated_(false), owns_gpu_data_(false), owns_cpu_data_(false) - { - } +{ +} - Vector::Vector(index_type n, index_type k) - : n_(n), - k_(k), - n_current_(n_), - d_data_(nullptr), - h_data_(nullptr), - gpu_updated_(false), - cpu_updated_(false), - owns_gpu_data_(false), - owns_cpu_data_(false) - { - } +Vector::Vector(index_type n, index_type k) + : n_(n), k_(k), n_current_(n_), d_data_(nullptr), h_data_(nullptr), gpu_updated_(false), cpu_updated_(false), owns_gpu_data_(false), + owns_cpu_data_(false) +{ +} - Vector::~Vector() - { - if (owns_cpu_data_) delete [] h_data_; - if (owns_gpu_data_) mem_.deleteOnDevice(d_data_); - } +Vector::~Vector() +{ + if (owns_cpu_data_) + delete[] h_data_; + if (owns_gpu_data_) + mem_.deleteOnDevice(d_data_); +} +index_type Vector::getSize() { return n_; } - index_type Vector::getSize() - { - return n_; - } +index_type Vector::getCurrentSize() { return n_current_; } - index_type Vector::getCurrentSize() - { - return n_current_; +index_type Vector::getNumVectors() { return k_; } + +void Vector::setData(real_type *data, memory::MemorySpace memspace) +{ + using namespace ReSolve::memory; + switch (memspace) { + case HOST: + h_data_ = data; + cpu_updated_ = true; + gpu_updated_ = false; + break; + case DEVICE: + d_data_ = data; + gpu_updated_ = true; + cpu_updated_ = false; + break; } +} - index_type Vector::getNumVectors() - { - return k_; +void Vector::setDataUpdated(memory::MemorySpace memspace) +{ + using namespace ReSolve::memory; + switch (memspace) { + case HOST: + cpu_updated_ = true; + gpu_updated_ = false; + break; + case DEVICE: + gpu_updated_ = true; + cpu_updated_ = false; + break; } +} - void Vector::setData(real_type* data, memory::MemorySpace memspace) - { - using namespace ReSolve::memory; - switch (memspace) { - case HOST: - h_data_ = data; - cpu_updated_ = true; - gpu_updated_ = false; - break; - case DEVICE: - d_data_ = data; - gpu_updated_ = true; - cpu_updated_ = false; - break; - } +int Vector::update(real_type *data, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut) +{ + int control = -1; + if ((memspaceIn == memory::HOST) && (memspaceOut == memory::HOST)) { + control = 0; + } + if ((memspaceIn == memory::HOST) && (memspaceOut == memory::DEVICE)) { + control = 1; + } + if ((memspaceIn == memory::DEVICE) && (memspaceOut == memory::HOST)) { + control = 2; + } + if ((memspaceIn == memory::DEVICE) && (memspaceOut == memory::DEVICE)) { + control = 3; } - void Vector::setDataUpdated(memory::MemorySpace memspace) - { - using namespace ReSolve::memory; - switch (memspace) { - case HOST: - cpu_updated_ = true; - gpu_updated_ = false; - break; - case DEVICE: - gpu_updated_ = true; - cpu_updated_ = false; - break; - } + if ((memspaceOut == memory::HOST) && (h_data_ == nullptr)) { + // allocate first + h_data_ = new real_type[n_ * k_]; + } + if ((memspaceOut == memory::DEVICE) && (d_data_ == nullptr)) { + // allocate first + mem_.allocateArrayOnDevice(&d_data_, n_ * k_); } - int Vector::update(real_type* data, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut) - { - int control=-1; - if ((memspaceIn == memory::HOST) && (memspaceOut == memory::HOST)) { control = 0;} - if ((memspaceIn == memory::HOST) && (memspaceOut == memory::DEVICE)){ control = 1;} - if ((memspaceIn == memory::DEVICE) && (memspaceOut == memory::HOST)) { control = 2;} - if ((memspaceIn == memory::DEVICE) && (memspaceOut == memory::DEVICE)){ control = 3;} + switch (control) { + case 0: // cpu->cpu + mem_.copyArrayHostToHost(h_data_, data, n_current_ * k_); + owns_cpu_data_ = true; + cpu_updated_ = true; + gpu_updated_ = false; + break; + case 2: // gpu->cpu + mem_.copyArrayDeviceToHost(h_data_, data, n_current_ * k_); + owns_gpu_data_ = true; + cpu_updated_ = true; + gpu_updated_ = false; + break; + case 1: // cpu->gpu + mem_.copyArrayHostToDevice(d_data_, data, n_current_ * k_); + owns_gpu_data_ = true; + gpu_updated_ = true; + cpu_updated_ = false; + break; + case 3: // gpu->gpu + mem_.copyArrayDeviceToDevice(d_data_, data, n_current_ * k_); + owns_gpu_data_ = true; + gpu_updated_ = true; + cpu_updated_ = false; + break; + default: + return -1; + } + return 0; +} - if ((memspaceOut == memory::HOST) && (h_data_ == nullptr)) { - //allocate first - h_data_ = new real_type[n_ * k_]; - } - if ((memspaceOut == memory::DEVICE) && (d_data_ == nullptr)) { - //allocate first - mem_.allocateArrayOnDevice(&d_data_, n_ * k_); - } +real_type *Vector::getData(memory::MemorySpace memspace) { return this->getData(0, memspace); } - switch(control) { - case 0: //cpu->cpu - mem_.copyArrayHostToHost(h_data_, data, n_current_ * k_); - owns_cpu_data_ = true; - cpu_updated_ = true; - gpu_updated_ = false; - break; - case 2: //gpu->cpu - mem_.copyArrayDeviceToHost(h_data_, data, n_current_ * k_); - owns_gpu_data_ = true; - cpu_updated_ = true; - gpu_updated_ = false; - break; - case 1: //cpu->gpu - mem_.copyArrayHostToDevice(d_data_, data, n_current_ * k_); - owns_gpu_data_ = true; - gpu_updated_ = true; - cpu_updated_ = false; - break; - case 3: //gpu->gpu - mem_.copyArrayDeviceToDevice(d_data_, data, n_current_ * k_); - owns_gpu_data_ = true; - gpu_updated_ = true; - cpu_updated_ = false; - break; - default: - return -1; - } - return 0; +real_type *Vector::getData(index_type i, memory::MemorySpace memspace) +{ + if ((memspace == memory::HOST) && (cpu_updated_ == false) && (gpu_updated_ == true)) { + // remember IN FIRST OUT SECOND!!! + copyData(memory::DEVICE, memspace); + owns_cpu_data_ = true; } - real_type* Vector::getData(memory::MemorySpace memspace) - { - return this->getData(0, memspace); + if ((memspace == memory::DEVICE) && (gpu_updated_ == false) && (cpu_updated_ == true)) { + copyData(memory::HOST, memspace); + owns_gpu_data_ = true; } - - real_type* Vector::getData(index_type i, memory::MemorySpace memspace) - { - if ((memspace == memory::HOST) && (cpu_updated_ == false) && (gpu_updated_ == true )) { - // remember IN FIRST OUT SECOND!!! - copyData(memory::DEVICE, memspace); - owns_cpu_data_ = true; - } - - if ((memspace == memory::DEVICE) && (gpu_updated_ == false) && (cpu_updated_ == true )) { - copyData(memory::HOST, memspace); - owns_gpu_data_ = true; - } - if (memspace == memory::HOST) { - return &h_data_[i * n_current_]; + if (memspace == memory::HOST) { + return &h_data_[i * n_current_]; + } else { + if (memspace == memory::DEVICE) { + return &d_data_[i * n_current_]; } else { - if (memspace == memory::DEVICE){ - return &d_data_[i * n_current_]; - } else { - return nullptr; - } + return nullptr; } } +} +int Vector::copyData(memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut) +{ + int control = -1; + if ((memspaceIn == memory::HOST) && (memspaceOut == memory::DEVICE)) { + control = 0; + } + if ((memspaceIn == memory::DEVICE) && (memspaceOut == memory::HOST)) { + control = 1; + } - int Vector::copyData(memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut) - { - int control=-1; - if ((memspaceIn == memory::HOST) && (memspaceOut == memory::DEVICE)){ control = 0;} - if ((memspaceIn == memory::DEVICE) && (memspaceOut == memory::HOST)) { control = 1;} + if ((memspaceOut == memory::HOST) && (h_data_ == nullptr)) { + // allocate first + h_data_ = new real_type[n_ * k_]; + } + if ((memspaceOut == memory::DEVICE) && (d_data_ == nullptr)) { + // allocate first + mem_.allocateArrayOnDevice(&d_data_, n_ * k_); + } + switch (control) { + case 0: // cpu->cuda + mem_.copyArrayHostToDevice(d_data_, h_data_, n_current_ * k_); + owns_gpu_data_ = true; + break; + case 1: // cuda->cpu + mem_.copyArrayDeviceToHost(h_data_, d_data_, n_current_ * k_); + owns_cpu_data_ = true; + break; + default: + return -1; + } + cpu_updated_ = true; + gpu_updated_ = true; + return 0; +} - if ((memspaceOut == memory::HOST) && (h_data_ == nullptr)) { - //allocate first - h_data_ = new real_type[n_ * k_]; +void Vector::allocate(memory::MemorySpace memspace) +{ + using namespace ReSolve::memory; + switch (memspace) { + case HOST: + delete[] h_data_; + h_data_ = new real_type[n_ * k_]; + owns_cpu_data_ = true; + break; + case DEVICE: + mem_.deleteOnDevice(d_data_); + mem_.allocateArrayOnDevice(&d_data_, n_ * k_); + owns_gpu_data_ = true; + break; + } +} + +void Vector::setToZero(memory::MemorySpace memspace) +{ + using namespace ReSolve::memory; + switch (memspace) { + case HOST: + if (h_data_ == nullptr) { + h_data_ = new real_type[n_ * k_]; + owns_cpu_data_ = true; + } + for (int i = 0; i < n_ * k_; ++i) { + h_data_[i] = 0.0; } - if ((memspaceOut == memory::DEVICE) && (d_data_ == nullptr)) { - //allocate first + break; + case DEVICE: + if (d_data_ == nullptr) { mem_.allocateArrayOnDevice(&d_data_, n_ * k_); - } - switch(control) { - case 0: //cpu->cuda - mem_.copyArrayHostToDevice(d_data_, h_data_, n_current_ * k_); - owns_gpu_data_ = true; - break; - case 1: //cuda->cpu - mem_.copyArrayDeviceToHost(h_data_, d_data_, n_current_ * k_); - owns_cpu_data_ = true; - break; - default: - return -1; + owns_gpu_data_ = true; } - cpu_updated_ = true; - gpu_updated_ = true; - return 0; + mem_.setZeroArrayOnDevice(d_data_, n_ * k_); + break; } +} - void Vector::allocate(memory::MemorySpace memspace) - { - using namespace ReSolve::memory; - switch (memspace) { - case HOST: - delete [] h_data_; - h_data_ = new real_type[n_ * k_]; - owns_cpu_data_ = true; - break; - case DEVICE: - mem_.deleteOnDevice(d_data_); - mem_.allocateArrayOnDevice(&d_data_, n_ * k_); - owns_gpu_data_ = true; - break; +void Vector::setToZero(index_type j, memory::MemorySpace memspace) +{ + using namespace ReSolve::memory; + switch (memspace) { + case HOST: + if (h_data_ == nullptr) { + h_data_ = new real_type[n_ * k_]; + owns_cpu_data_ = true; } - } - - - void Vector::setToZero(memory::MemorySpace memspace) - { - using namespace ReSolve::memory; - switch (memspace) { - case HOST: - if (h_data_ == nullptr) { - h_data_ = new real_type[n_ * k_]; - owns_cpu_data_ = true; - } - for (int i = 0; i < n_ * k_; ++i){ - h_data_[i] = 0.0; - } - break; - case DEVICE: - if (d_data_ == nullptr) { - mem_.allocateArrayOnDevice(&d_data_, n_ * k_); - owns_gpu_data_ = true; - } - mem_.setZeroArrayOnDevice(d_data_, n_ * k_); - break; + for (int i = (n_current_)*j; i < n_current_ * (j + 1); ++i) { + h_data_[i] = 0.0; } - } - - void Vector::setToZero(index_type j, memory::MemorySpace memspace) - { - using namespace ReSolve::memory; - switch (memspace) { - case HOST: - if (h_data_ == nullptr) { - h_data_ = new real_type[n_ * k_]; - owns_cpu_data_ = true; - } - for (int i = (n_current_) * j; i < n_current_ * (j + 1); ++i) { - h_data_[i] = 0.0; - } - break; - case DEVICE: - if (d_data_ == nullptr) { - mem_.allocateArrayOnDevice(&d_data_, n_ * k_); - owns_gpu_data_ = true; - } - // TODO: We should not need to access raw data in this class - mem_.setZeroArrayOnDevice(&d_data_[j * n_current_], n_current_); - break; + break; + case DEVICE: + if (d_data_ == nullptr) { + mem_.allocateArrayOnDevice(&d_data_, n_ * k_); + owns_gpu_data_ = true; } + // TODO: We should not need to access raw data in this class + mem_.setZeroArrayOnDevice(&d_data_[j * n_current_], n_current_); + break; } +} - void Vector::setToConst(real_type C, memory::MemorySpace memspace) - { - using namespace ReSolve::memory; - switch (memspace) { - case HOST: - if (h_data_ == nullptr) { - h_data_ = new real_type[n_ * k_]; - owns_cpu_data_ = true; - } - for (int i = 0; i < n_ * k_; ++i){ - h_data_[i] = C; - } - break; - case DEVICE: - if (d_data_ == nullptr) { - mem_.allocateArrayOnDevice(&d_data_, n_ * k_); - owns_gpu_data_ = true; - } - set_array_const(n_ * k_, C, d_data_); - break; +void Vector::setToConst(real_type C, memory::MemorySpace memspace) +{ + using namespace ReSolve::memory; + switch (memspace) { + case HOST: + if (h_data_ == nullptr) { + h_data_ = new real_type[n_ * k_]; + owns_cpu_data_ = true; } - } - - void Vector::setToConst(index_type j, real_type C, memory::MemorySpace memspace) - { - using namespace ReSolve::memory; - switch (memspace) { - case HOST: - if (h_data_ == nullptr) { - h_data_ = new real_type[n_ * k_]; - owns_cpu_data_ = true; - } - for (int i = j * n_current_; i < (j + 1 ) * n_current_ * k_; ++i){ - h_data_[i] = C; - } - break; - case DEVICE: - if (d_data_ == nullptr) { - mem_.allocateArrayOnDevice(&d_data_, n_ * k_); - owns_gpu_data_ = true; - } - set_array_const(n_current_ * 1, C, &d_data_[n_current_ * j]); - break; + for (int i = 0; i < n_ * k_; ++i) { + h_data_[i] = C; } + break; + case DEVICE: + if (d_data_ == nullptr) { + mem_.allocateArrayOnDevice(&d_data_, n_ * k_); + owns_gpu_data_ = true; + } + set_array_const(n_ * k_, C, d_data_); + break; } +} - real_type* Vector::getVectorData(index_type i, memory::MemorySpace memspace) - { - if (this->k_ < i){ - return nullptr; - } else { - return this->getData(i, memspace); +void Vector::setToConst(index_type j, real_type C, memory::MemorySpace memspace) +{ + using namespace ReSolve::memory; + switch (memspace) { + case HOST: + if (h_data_ == nullptr) { + h_data_ = new real_type[n_ * k_]; + owns_cpu_data_ = true; + } + for (int i = j * n_current_; i < (j + 1) * n_current_ * k_; ++i) { + h_data_[i] = C; + } + break; + case DEVICE: + if (d_data_ == nullptr) { + mem_.allocateArrayOnDevice(&d_data_, n_ * k_); + owns_gpu_data_ = true; } + set_array_const(n_current_ * 1, C, &d_data_[n_current_ * j]); + break; } +} - int Vector::setCurrentSize(int new_n_current) - { - if (new_n_current > n_) { - return -1; - } else { - n_current_ = new_n_current; - return 0; - } +real_type *Vector::getVectorData(index_type i, memory::MemorySpace memspace) +{ + if (this->k_ < i) { + return nullptr; + } else { + return this->getData(i, memspace); } +} - int Vector::deepCopyVectorData(real_type* dest, index_type i, memory::MemorySpace memspaceOut) - { - using namespace ReSolve::memory; - if (i > this->k_) { - return -1; - } else { - real_type* data = this->getData(i, memspaceOut); - switch (memspaceOut) { - case HOST: - mem_.copyArrayHostToHost(dest, data, n_current_); - break; - case DEVICE: - mem_.copyArrayDeviceToDevice(dest, data, n_current_); - break; - } - return 0; - } - } +int Vector::setCurrentSize(int new_n_current) +{ + if (new_n_current > n_) { + return -1; + } else { + n_current_ = new_n_current; + return 0; + } +} - int Vector::deepCopyVectorData(real_type* dest, memory::MemorySpace memspaceOut) - { - using namespace ReSolve::memory; - real_type* data = this->getData(memspaceOut); +int Vector::deepCopyVectorData(real_type *dest, index_type i, memory::MemorySpace memspaceOut) +{ + using namespace ReSolve::memory; + if (i > this->k_) { + return -1; + } else { + real_type *data = this->getData(i, memspaceOut); switch (memspaceOut) { - case HOST: - mem_.copyArrayHostToHost(dest, data, n_current_ * k_); - break; - case DEVICE: - mem_.copyArrayDeviceToDevice(dest, data, n_current_ * k_); - break; + case HOST: + mem_.copyArrayHostToHost(dest, data, n_current_); + break; + case DEVICE: + mem_.copyArrayDeviceToDevice(dest, data, n_current_); + break; } return 0; } +} + +int Vector::deepCopyVectorData(real_type *dest, memory::MemorySpace memspaceOut) +{ + using namespace ReSolve::memory; + real_type *data = this->getData(memspaceOut); + switch (memspaceOut) { + case HOST: + mem_.copyArrayHostToHost(dest, data, n_current_ * k_); + break; + case DEVICE: + mem_.copyArrayDeviceToDevice(dest, data, n_current_ * k_); + break; + } + return 0; +} -}} // namespace ReSolve::vector +} // namespace vector +} // namespace ReSolve diff --git a/resolve/vector/Vector.hpp b/resolve/vector/Vector.hpp index 5f86ef7fd..d8e65398d 100644 --- a/resolve/vector/Vector.hpp +++ b/resolve/vector/Vector.hpp @@ -1,49 +1,53 @@ #pragma once -#include #include #include +#include + +namespace ReSolve +{ +namespace vector +{ +class Vector +{ + public: + Vector(index_type n); + Vector(index_type n, index_type k); + ~Vector(); -namespace ReSolve { namespace vector { - class Vector - { - public: - Vector(index_type n); - Vector(index_type n, index_type k); - ~Vector(); + int update(real_type *data, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut); + real_type *getData(memory::MemorySpace memspace); + real_type *getData(index_type i, memory::MemorySpace memspace); // get pointer to i-th vector in multivector - int update(real_type* data, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut); - real_type* getData(memory::MemorySpace memspace); - real_type* getData(index_type i, memory::MemorySpace memspace); // get pointer to i-th vector in multivector + index_type getSize(); + index_type getCurrentSize(); + index_type getNumVectors(); - index_type getSize(); - index_type getCurrentSize(); - index_type getNumVectors(); + void setDataUpdated(memory::MemorySpace memspace); + void setData(real_type *data, memory::MemorySpace memspace); + void allocate(memory::MemorySpace memspace); + void setToZero(memory::MemorySpace memspace); + void setToZero(index_type i, memory::MemorySpace memspace); // set i-th ivector to 0 + void setToConst(real_type C, memory::MemorySpace memspace); + void setToConst(index_type i, real_type C, memory::MemorySpace memspace); // set i-th vector to C - needed for unit tests, Gram Schmidt tests + int copyData(memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut); + int setCurrentSize(index_type new_n_current); + real_type *getVectorData(index_type i, memory::MemorySpace memspace); // get ith vector data out of multivector + int deepCopyVectorData(real_type *dest, index_type i, memory::MemorySpace memspace); + int deepCopyVectorData(real_type *dest, memory::MemorySpace memspace); // copy FULL multivector - void setDataUpdated(memory::MemorySpace memspace); - void setData(real_type* data, memory::MemorySpace memspace); - void allocate(memory::MemorySpace memspace); - void setToZero(memory::MemorySpace memspace); - void setToZero(index_type i, memory::MemorySpace memspace); // set i-th ivector to 0 - void setToConst(real_type C, memory::MemorySpace memspace); - void setToConst(index_type i, real_type C, memory::MemorySpace memspace); // set i-th vector to C - needed for unit tests, Gram Schmidt tests - int copyData(memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut); - int setCurrentSize(index_type new_n_current); - real_type* getVectorData(index_type i, memory::MemorySpace memspace); // get ith vector data out of multivector - int deepCopyVectorData(real_type* dest, index_type i, memory::MemorySpace memspace); - int deepCopyVectorData(real_type* dest, memory::MemorySpace memspace); //copy FULL multivector - - private: - index_type n_; ///< size - index_type k_; ///< k_ = 1 for vectors and k_>1 for multivectors (multivectors are accessed column-wise). - index_type n_current_; // if vectors dynamically change size, "current n_" keeps track of this. Needed for some solver implementations. - real_type* d_data_{nullptr}; - real_type* h_data_{nullptr}; - bool gpu_updated_; - bool cpu_updated_; + private: + index_type n_; ///< size + index_type k_; ///< k_ = 1 for vectors and k_>1 for multivectors (multivectors are accessed column-wise). + index_type n_current_; // if vectors dynamically change size, "current n_" keeps track of this. Needed for some solver implementations. + real_type *d_data_{nullptr}; + real_type *h_data_{nullptr}; + bool gpu_updated_; + bool cpu_updated_; - bool owns_gpu_data_{false}; - bool owns_cpu_data_{false}; + bool owns_gpu_data_{false}; + bool owns_cpu_data_{false}; - MemoryHandler mem_; ///< Device memory manager object - }; -}} // namespace ReSolve::vector + MemoryHandler mem_; ///< Device memory manager object +}; +} // namespace vector +} // namespace ReSolve diff --git a/resolve/vector/VectorHandler.cpp b/resolve/vector/VectorHandler.cpp index 49c943857..cb4bf00ed 100644 --- a/resolve/vector/VectorHandler.cpp +++ b/resolve/vector/VectorHandler.cpp @@ -1,12 +1,12 @@ -#include #include +#include +#include "VectorHandler.hpp" #include #include -#include -#include #include -#include "VectorHandler.hpp" +#include +#include #ifdef RESOLVE_USE_CUDA #include @@ -15,228 +15,232 @@ #include #endif -namespace ReSolve { - using out = io::Logger; - - /** - * @brief empty constructor that does absolutely nothing - */ - VectorHandler::VectorHandler() - { - cpuImpl_ = new VectorHandlerCpu(); - isCpuEnabled_ = true; - } - - /** - * @brief constructor - * - * @param new_workspace - workspace to be set - */ - VectorHandler::VectorHandler(LinAlgWorkspaceCpu* new_workspace) - { - cpuImpl_ = new VectorHandlerCpu(new_workspace); - isCpuEnabled_ = true; - } +namespace ReSolve +{ +using out = io::Logger; + +/** + * @brief empty constructor that does absolutely nothing + */ +VectorHandler::VectorHandler() +{ + cpuImpl_ = new VectorHandlerCpu(); + isCpuEnabled_ = true; +} + +/** + * @brief constructor + * + * @param new_workspace - workspace to be set + */ +VectorHandler::VectorHandler(LinAlgWorkspaceCpu *new_workspace) +{ + cpuImpl_ = new VectorHandlerCpu(new_workspace); + isCpuEnabled_ = true; +} #ifdef RESOLVE_USE_CUDA - /** - * @brief constructor - * - * @param new_workspace - workspace to be set - */ - VectorHandler::VectorHandler(LinAlgWorkspaceCUDA* new_workspace) - { - cudaImpl_ = new VectorHandlerCuda(new_workspace); - cpuImpl_ = new VectorHandlerCpu(); - - isCudaEnabled_ = true; - isCpuEnabled_ = true; - } +/** + * @brief constructor + * + * @param new_workspace - workspace to be set + */ +VectorHandler::VectorHandler(LinAlgWorkspaceCUDA *new_workspace) +{ + cudaImpl_ = new VectorHandlerCuda(new_workspace); + cpuImpl_ = new VectorHandlerCpu(); + + isCudaEnabled_ = true; + isCpuEnabled_ = true; +} #endif #ifdef RESOLVE_USE_HIP - /** - * @brief constructor - * - * @param new_workspace - workspace to be set - */ - VectorHandler::VectorHandler(LinAlgWorkspaceHIP* new_workspace) - { - hipImpl_ = new VectorHandlerHip(new_workspace); - cpuImpl_ = new VectorHandlerCpu(); - - isHipEnabled_ = true; - isCpuEnabled_ = true; - } +/** + * @brief constructor + * + * @param new_workspace - workspace to be set + */ +VectorHandler::VectorHandler(LinAlgWorkspaceHIP *new_workspace) +{ + hipImpl_ = new VectorHandlerHip(new_workspace); + cpuImpl_ = new VectorHandlerCpu(); + + isHipEnabled_ = true; + isCpuEnabled_ = true; +} #endif - /** - * @brief destructor - */ - VectorHandler::~VectorHandler() - { - delete cpuImpl_; - if (isCudaEnabled_) delete cudaImpl_; - if (isHipEnabled_) delete hipImpl_; - //delete the workspace TODO - } - - /** - * @brief dot product of two vectors i.e, a = x^Ty - * - * @param[in] x The first vector - * @param[in] y The second vector - * @param[in] memspace String containg memspace (cpu or cuda or hip) - * - * @return dot product (real number) of _x_ and _y_ - */ - - real_type VectorHandler::dot(vector::Vector* x, vector::Vector* y, std::string memspace) - { - if (memspace == "cuda" ) { - return cudaImpl_->dot(x, y); +/** + * @brief destructor + */ +VectorHandler::~VectorHandler() +{ + delete cpuImpl_; + if (isCudaEnabled_) + delete cudaImpl_; + if (isHipEnabled_) + delete hipImpl_; + // delete the workspace TODO +} + +/** + * @brief dot product of two vectors i.e, a = x^Ty + * + * @param[in] x The first vector + * @param[in] y The second vector + * @param[in] memspace String containg memspace (cpu or cuda or hip) + * + * @return dot product (real number) of _x_ and _y_ + */ + +real_type VectorHandler::dot(vector::Vector *x, vector::Vector *y, std::string memspace) +{ + if (memspace == "cuda") { + return cudaImpl_->dot(x, y); + } else { + if (memspace == "hip") { + return hipImpl_->dot(x, y); + } else if (memspace == "cpu") { + return cpuImpl_->dot(x, y); } else { - if (memspace == "hip") { - return hipImpl_->dot(x, y); - } else if (memspace == "cpu") { - return cpuImpl_->dot(x, y); - } else { - out::error() << "Not implemented (yet)" << std::endl; - return NAN; - } + out::error() << "Not implemented (yet)" << std::endl; + return NAN; } } - - /** - * @brief scale a vector by a constant i.e, x = alpha*x where alpha is a constant - * - * @param[in] alpha The constant - * @param[in,out] x The vector - * @param memspace string containg memspace (cpu or cuda or hip) - * - */ - void VectorHandler::scal(const real_type* alpha, vector::Vector* x, std::string memspace) - { - if (memspace == "cuda" ) { - cudaImpl_->scal(alpha, x); - } else if (memspace == "hip") { - hipImpl_->scal(alpha, x); +} + +/** + * @brief scale a vector by a constant i.e, x = alpha*x where alpha is a constant + * + * @param[in] alpha The constant + * @param[in,out] x The vector + * @param memspace string containg memspace (cpu or cuda or hip) + * + */ +void VectorHandler::scal(const real_type *alpha, vector::Vector *x, std::string memspace) +{ + if (memspace == "cuda") { + cudaImpl_->scal(alpha, x); + } else if (memspace == "hip") { + hipImpl_->scal(alpha, x); + } else { + if (memspace == "cpu") { + cpuImpl_->scal(alpha, x); } else { - if (memspace == "cpu") { - cpuImpl_->scal(alpha, x); - } else { - out::error() << "Not implemented (yet)" << std::endl; - } + out::error() << "Not implemented (yet)" << std::endl; } } - - /** - * @brief axpy i.e, y = alpha*x+y where alpha is a constant - * - * @param[in] alpha The constant - * @param[in] x The first vector - * @param[in,out] y The second vector (result is return in y) - * @param[in] memspace String containg memspace (cpu or cuda or hip) - * - */ - void VectorHandler::axpy(const real_type* alpha, vector::Vector* x, vector::Vector* y, std::string memspace) - { - //AXPY: y = alpha * x + y - if (memspace == "cuda" ) { - cudaImpl_->axpy(alpha, x, y); +} + +/** + * @brief axpy i.e, y = alpha*x+y where alpha is a constant + * + * @param[in] alpha The constant + * @param[in] x The first vector + * @param[in,out] y The second vector (result is return in y) + * @param[in] memspace String containg memspace (cpu or cuda or hip) + * + */ +void VectorHandler::axpy(const real_type *alpha, vector::Vector *x, vector::Vector *y, std::string memspace) +{ + // AXPY: y = alpha * x + y + if (memspace == "cuda") { + cudaImpl_->axpy(alpha, x, y); + } else { + if (memspace == "hip") { + hipImpl_->axpy(alpha, x, y); } else { - if (memspace == "hip" ) { - hipImpl_->axpy(alpha, x, y); + if (memspace == "cpu") { + cpuImpl_->axpy(alpha, x, y); } else { - if (memspace == "cpu") { - cpuImpl_->axpy(alpha, x, y); - } else { - out::error() <<"Not implemented (yet)" << std::endl; - } + out::error() << "Not implemented (yet)" << std::endl; } } } - - /** - * @brief gemv computes matrix-vector product where both matrix and vectors are dense. - * i.e., x = beta*x + alpha*V*y - * - * @param[in] Transpose - yes (T) or no (N) - * @param[in] n Number of rows in (non-transposed) matrix - * @param[in] k Number of columns in (non-transposed) - * @param[in] alpha Constant real number - * @param[in] beta Constant real number - * @param[in] V Multivector containing the matrix, organized columnwise - * @param[in] y Vector, k x 1 if N and n x 1 if T - * @param[in,out] x Vector, n x 1 if N and k x 1 if T - * @param[in] memspace cpu or cuda or hip (for now) - * - * @pre V is stored colum-wise, _n_ > 0, _k_ > 0 - * - */ - void VectorHandler::gemv(std::string transpose, index_type n, index_type k, const real_type* alpha, const real_type* beta, vector::Vector* V, vector::Vector* y, vector::Vector* x, std::string memspace) - { - if (memspace == "cuda") { - cudaImpl_->gemv(transpose, n, k, alpha, beta, V, y, x); - } else if (memspace == "hip") { - hipImpl_->gemv(transpose, n, k, alpha, beta, V, y, x); - } else if (memspace == "cpu") { - cpuImpl_->gemv(transpose, n, k, alpha, beta, V, y, x); - } else { - out::error() << "Not implemented (yet)" << std::endl; - } +} + +/** + * @brief gemv computes matrix-vector product where both matrix and vectors are dense. + * i.e., x = beta*x + alpha*V*y + * + * @param[in] Transpose - yes (T) or no (N) + * @param[in] n Number of rows in (non-transposed) matrix + * @param[in] k Number of columns in (non-transposed) + * @param[in] alpha Constant real number + * @param[in] beta Constant real number + * @param[in] V Multivector containing the matrix, organized columnwise + * @param[in] y Vector, k x 1 if N and n x 1 if T + * @param[in,out] x Vector, n x 1 if N and k x 1 if T + * @param[in] memspace cpu or cuda or hip (for now) + * + * @pre V is stored colum-wise, _n_ > 0, _k_ > 0 + * + */ +void VectorHandler::gemv(std::string transpose, index_type n, index_type k, const real_type *alpha, const real_type *beta, vector::Vector *V, + vector::Vector *y, vector::Vector *x, std::string memspace) +{ + if (memspace == "cuda") { + cudaImpl_->gemv(transpose, n, k, alpha, beta, V, y, x); + } else if (memspace == "hip") { + hipImpl_->gemv(transpose, n, k, alpha, beta, V, y, x); + } else if (memspace == "cpu") { + cpuImpl_->gemv(transpose, n, k, alpha, beta, V, y, x); + } else { + out::error() << "Not implemented (yet)" << std::endl; } - - /** - * @brief mass (bulk) axpy i.e, y = y - x*alpha where alpha is a vector - * - * @param[in] size number of elements in y - * @param[in] alpha vector size k x 1 - * @param[in] x (multi)vector size size x k - * @param[in,out] y vector size size x 1 (this is where the result is stored) - * @param[in] memspace string containg memspace (cpu or cuda or hip) - * - * @pre _k_ > 0, _size_ > 0, _size_ = x->getSize() - * - */ - void VectorHandler::massAxpy(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y, std::string memspace) - { - using namespace constants; - if (memspace == "cuda") { - cudaImpl_->massAxpy(size, alpha, k, x, y); - } else if (memspace == "hip") { - hipImpl_->massAxpy(size, alpha, k, x, y); - } else if (memspace == "cpu") { - cpuImpl_->massAxpy(size, alpha, k, x, y); - } else { - out::error() << "Not implemented (yet)" << std::endl; - } +} + +/** + * @brief mass (bulk) axpy i.e, y = y - x*alpha where alpha is a vector + * + * @param[in] size number of elements in y + * @param[in] alpha vector size k x 1 + * @param[in] x (multi)vector size size x k + * @param[in,out] y vector size size x 1 (this is where the result is stored) + * @param[in] memspace string containg memspace (cpu or cuda or hip) + * + * @pre _k_ > 0, _size_ > 0, _size_ = x->getSize() + * + */ +void VectorHandler::massAxpy(index_type size, vector::Vector *alpha, index_type k, vector::Vector *x, vector::Vector *y, std::string memspace) +{ + using namespace constants; + if (memspace == "cuda") { + cudaImpl_->massAxpy(size, alpha, k, x, y); + } else if (memspace == "hip") { + hipImpl_->massAxpy(size, alpha, k, x, y); + } else if (memspace == "cpu") { + cpuImpl_->massAxpy(size, alpha, k, x, y); + } else { + out::error() << "Not implemented (yet)" << std::endl; } - - /** - * @brief mass (bulk) dot product i.e, V^T x, where V is n x k dense multivector (a dense multivector consisting of k vectors size n) - * and x is k x 2 dense multivector (a multivector consisiting of two vectors size n each) - * - * @param[in] size Number of elements in a single vector in V - * @param[in] V Multivector; k vectors size n x 1 each - * @param[in] k Number of vectors in V - * @param[in] x Multivector; 2 vectors size n x 1 each - * @param[out] res Multivector; 2 vectors size k x 1 each (result is returned in res) - * @param[in] memspace String containg memspace (cpu or cuda or hip) - * - * @pre _size_ > 0, _k_ > 0, size = x->getSize(), _res_ needs to be allocated - * - */ - void VectorHandler::massDot2Vec(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res, std::string memspace) - { - if (memspace == "cuda") { - cudaImpl_->massDot2Vec(size, V, k, x, res); - } else if (memspace == "hip") { - hipImpl_->massDot2Vec(size, V, k, x, res); - } else if (memspace == "cpu") { - cpuImpl_->massDot2Vec(size, V, k, x, res); - } else { - out::error() << "Not implemented (yet)" << std::endl; - } +} + +/** + * @brief mass (bulk) dot product i.e, V^T x, where V is n x k dense multivector (a dense multivector consisting of k vectors size n) + * and x is k x 2 dense multivector (a multivector consisiting of two vectors size n each) + * + * @param[in] size Number of elements in a single vector in V + * @param[in] V Multivector; k vectors size n x 1 each + * @param[in] k Number of vectors in V + * @param[in] x Multivector; 2 vectors size n x 1 each + * @param[out] res Multivector; 2 vectors size k x 1 each (result is returned in res) + * @param[in] memspace String containg memspace (cpu or cuda or hip) + * + * @pre _size_ > 0, _k_ > 0, size = x->getSize(), _res_ needs to be allocated + * + */ +void VectorHandler::massDot2Vec(index_type size, vector::Vector *V, index_type k, vector::Vector *x, vector::Vector *res, std::string memspace) +{ + if (memspace == "cuda") { + cudaImpl_->massDot2Vec(size, V, k, x, res); + } else if (memspace == "hip") { + hipImpl_->massDot2Vec(size, V, k, x, res); + } else if (memspace == "cpu") { + cpuImpl_->massDot2Vec(size, V, k, x, res); + } else { + out::error() << "Not implemented (yet)" << std::endl; } +} } // namespace ReSolve diff --git a/resolve/vector/VectorHandler.hpp b/resolve/vector/VectorHandler.hpp index 02d426b5a..fad13bda9 100644 --- a/resolve/vector/VectorHandler.hpp +++ b/resolve/vector/VectorHandler.hpp @@ -2,66 +2,61 @@ #include namespace ReSolve -{ - namespace vector - { - class Vector; - } - class VectorHandlerImpl; - class LinAlgWorkspaceCpu; - class LinAlgWorkspaceCUDA; - class LinAlgWorkspaceHIP; +{ +namespace vector +{ +class Vector; } +class VectorHandlerImpl; +class LinAlgWorkspaceCpu; +class LinAlgWorkspaceCUDA; +class LinAlgWorkspaceHIP; +} // namespace ReSolve - -namespace ReSolve { //namespace vector { - class VectorHandler { - public: - VectorHandler(); - VectorHandler(LinAlgWorkspaceCpu* new_workspace); - VectorHandler(LinAlgWorkspaceCUDA* new_workspace); - VectorHandler(LinAlgWorkspaceHIP* new_workspace); - ~VectorHandler(); - - //y = alpha x + y - void axpy(const real_type* alpha, vector::Vector* x, vector::Vector* y, std::string memspace); - - //dot: x \cdot y - real_type dot(vector::Vector* x, vector::Vector* y, std::string memspace); - - //scal = alpha * x - void scal(const real_type* alpha, vector::Vector* x, std::string memspace); - - //mass axpy: x*alpha + y where x is [n x k] and alpha is [k x 1]; x is stored columnwise - void massAxpy(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y, std::string memspace); - - //mass dot: V^T x, where V is [n x k] and x is [k x 2], everything is stored and returned columnwise - //Size = n - void massDot2Vec(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res, std::string memspace); - - /** gemv: - * if `transpose = N` (no), `x = beta*x + alpha*V*y`, - * where `x` is `[n x 1]`, `V` is `[n x k]` and `y` is `[k x 1]`. - * if `transpose = T` (yes), `x = beta*x + alpha*V^T*y`, - * where `x` is `[k x 1]`, `V` is `[n x k]` and `y` is `[n x 1]`. - */ - void gemv(std::string transpose, - index_type n, - index_type k, - const real_type* alpha, - const real_type* beta, - vector::Vector* V, - vector::Vector* y, - vector::Vector* x, - std::string memspace); - private: - VectorHandlerImpl* cpuImpl_{nullptr}; - VectorHandlerImpl* cudaImpl_{nullptr}; - VectorHandlerImpl* hipImpl_{nullptr}; - - bool isCpuEnabled_{false}; - bool isCudaEnabled_{false}; - bool isHipEnabled_{false}; - }; - -} //} // namespace ReSolve::vector +namespace ReSolve +{ // namespace vector { +class VectorHandler +{ + public: + VectorHandler(); + VectorHandler(LinAlgWorkspaceCpu *new_workspace); + VectorHandler(LinAlgWorkspaceCUDA *new_workspace); + VectorHandler(LinAlgWorkspaceHIP *new_workspace); + ~VectorHandler(); + + // y = alpha x + y + void axpy(const real_type *alpha, vector::Vector *x, vector::Vector *y, std::string memspace); + + // dot: x \cdot y + real_type dot(vector::Vector *x, vector::Vector *y, std::string memspace); + + // scal = alpha * x + void scal(const real_type *alpha, vector::Vector *x, std::string memspace); + + // mass axpy: x*alpha + y where x is [n x k] and alpha is [k x 1]; x is stored columnwise + void massAxpy(index_type size, vector::Vector *alpha, index_type k, vector::Vector *x, vector::Vector *y, std::string memspace); + + // mass dot: V^T x, where V is [n x k] and x is [k x 2], everything is stored and returned columnwise + // Size = n + void massDot2Vec(index_type size, vector::Vector *V, index_type k, vector::Vector *x, vector::Vector *res, std::string memspace); + + /** gemv: + * if `transpose = N` (no), `x = beta*x + alpha*V*y`, + * where `x` is `[n x 1]`, `V` is `[n x k]` and `y` is `[k x 1]`. + * if `transpose = T` (yes), `x = beta*x + alpha*V^T*y`, + * where `x` is `[k x 1]`, `V` is `[n x k]` and `y` is `[n x 1]`. + */ + void gemv(std::string transpose, index_type n, index_type k, const real_type *alpha, const real_type *beta, vector::Vector *V, vector::Vector *y, + vector::Vector *x, std::string memspace); + + private: + VectorHandlerImpl *cpuImpl_{nullptr}; + VectorHandlerImpl *cudaImpl_{nullptr}; + VectorHandlerImpl *hipImpl_{nullptr}; + + bool isCpuEnabled_{false}; + bool isCudaEnabled_{false}; + bool isHipEnabled_{false}; +}; + +} // namespace ReSolve diff --git a/resolve/vector/VectorHandlerCpu.cpp b/resolve/vector/VectorHandlerCpu.cpp index a8317a89f..349975137 100644 --- a/resolve/vector/VectorHandlerCpu.cpp +++ b/resolve/vector/VectorHandlerCpu.cpp @@ -1,166 +1,158 @@ #include -#include +#include "VectorHandlerCpu.hpp" #include +#include #include -#include #include -#include "VectorHandlerCpu.hpp" +#include -namespace ReSolve { - using out = io::Logger; +namespace ReSolve +{ +using out = io::Logger; - /** - * @brief empty constructor that does absolutely nothing - */ - VectorHandlerCpu::VectorHandlerCpu() - { - } +/** + * @brief empty constructor that does absolutely nothing + */ +VectorHandlerCpu::VectorHandlerCpu() {} - /** - * @brief constructor - * - * @param new_workspace - workspace to be set - */ - VectorHandlerCpu:: VectorHandlerCpu(LinAlgWorkspaceCpu* new_workspace) - { - workspace_ = new_workspace; - } +/** + * @brief constructor + * + * @param new_workspace - workspace to be set + */ +VectorHandlerCpu::VectorHandlerCpu(LinAlgWorkspaceCpu *new_workspace) { workspace_ = new_workspace; } - /** - * @brief destructor - */ - VectorHandlerCpu::~VectorHandlerCpu() - { - //delete the workspace TODO - } +/** + * @brief destructor + */ +VectorHandlerCpu::~VectorHandlerCpu() +{ + // delete the workspace TODO +} - /** - * @brief dot product of two vectors i.e, a = x^Ty - * - * @param[in] x The first vector - * @param[in] y The second vector - * @param[in] memspace String containg memspace (cpu or cuda) - * - * @return dot product (real number) of _x_ and _y_ - */ +/** + * @brief dot product of two vectors i.e, a = x^Ty + * + * @param[in] x The first vector + * @param[in] y The second vector + * @param[in] memspace String containg memspace (cpu or cuda) + * + * @return dot product (real number) of _x_ and _y_ + */ - real_type VectorHandlerCpu::dot(vector::Vector* x, vector::Vector* y) - { - real_type* x_data = x->getData(memory::HOST); - real_type* y_data = y->getData(memory::HOST); - real_type sum = 0.0; - real_type c = 0.0; - // real_type t, y; - for (int i = 0; i < x->getSize(); ++i) { - real_type y = (x_data[i] * y_data[i]) - c; - real_type t = sum + y; - c = (t - sum) - y; - sum = t; - // sum += (x_data[i] * y_data[i]); - } - return sum; +real_type VectorHandlerCpu::dot(vector::Vector *x, vector::Vector *y) +{ + real_type *x_data = x->getData(memory::HOST); + real_type *y_data = y->getData(memory::HOST); + real_type sum = 0.0; + real_type c = 0.0; + // real_type t, y; + for (int i = 0; i < x->getSize(); ++i) { + real_type y = (x_data[i] * y_data[i]) - c; + real_type t = sum + y; + c = (t - sum) - y; + sum = t; + // sum += (x_data[i] * y_data[i]); } + return sum; +} - /** - * @brief scale a vector by a constant i.e, x = alpha*x where alpha is a constant - * - * @param[in] alpha The constant - * @param[in,out] x The vector - * @param memspace string containg memspace (cpu or cuda) - * - */ - void VectorHandlerCpu::scal(const real_type* alpha, vector::Vector* x) - { - real_type* x_data = x->getData(memory::HOST); +/** + * @brief scale a vector by a constant i.e, x = alpha*x where alpha is a constant + * + * @param[in] alpha The constant + * @param[in,out] x The vector + * @param memspace string containg memspace (cpu or cuda) + * + */ +void VectorHandlerCpu::scal(const real_type *alpha, vector::Vector *x) +{ + real_type *x_data = x->getData(memory::HOST); - for (int i = 0; i < x->getSize(); ++i){ - x_data[i] *= (*alpha); - } + for (int i = 0; i < x->getSize(); ++i) { + x_data[i] *= (*alpha); } +} - /** - * @brief axpy i.e, y = alpha*x+y where alpha is a constant - * - * @param[in] alpha The constant - * @param[in] x The first vector - * @param[in,out] y The second vector (result is return in y) - * @param[in] memspace String containg memspace (cpu or cuda) - * - */ - void VectorHandlerCpu::axpy(const real_type* alpha, vector::Vector* x, vector::Vector* y) - { - //AXPY: y = alpha * x + y - real_type* x_data = x->getData(memory::HOST); - real_type* y_data = y->getData(memory::HOST); - for (int i = 0; i < x->getSize(); ++i) { - y_data[i] = (*alpha) * x_data[i] + y_data[i]; - } +/** + * @brief axpy i.e, y = alpha*x+y where alpha is a constant + * + * @param[in] alpha The constant + * @param[in] x The first vector + * @param[in,out] y The second vector (result is return in y) + * @param[in] memspace String containg memspace (cpu or cuda) + * + */ +void VectorHandlerCpu::axpy(const real_type *alpha, vector::Vector *x, vector::Vector *y) +{ + // AXPY: y = alpha * x + y + real_type *x_data = x->getData(memory::HOST); + real_type *y_data = y->getData(memory::HOST); + for (int i = 0; i < x->getSize(); ++i) { + y_data[i] = (*alpha) * x_data[i] + y_data[i]; } +} - /** - * @brief gemv computes matrix-vector product where both matrix and vectors are dense. - * i.e., x = beta*x + alpha*V*y - * - * @param[in] Transpose - yes (T) or no (N) - * @param[in] n Number of rows in (non-transposed) matrix - * @param[in] k Number of columns in (non-transposed) - * @param[in] alpha Constant real number - * @param[in] beta Constant real number - * @param[in] V Multivector containing the matrix, organized columnwise - * @param[in] y Vector, k x 1 if N and n x 1 if T - * @param[in,out] x Vector, n x 1 if N and k x 1 if T - * @param[in] memspace cpu or cuda (for now) - * - * @pre V is stored colum-wise, _n_ > 0, _k_ > 0 - * - */ - void VectorHandlerCpu::gemv(std::string /* transpose */, - index_type /* n */, - index_type /* k */, - const real_type* /* alpha */, - const real_type* /* beta */, - vector::Vector* /* V */, - vector::Vector* /* y */, - vector::Vector* /* x */) - { - out::error() << "Not implemented (yet)" << std::endl; - } +/** + * @brief gemv computes matrix-vector product where both matrix and vectors are dense. + * i.e., x = beta*x + alpha*V*y + * + * @param[in] Transpose - yes (T) or no (N) + * @param[in] n Number of rows in (non-transposed) matrix + * @param[in] k Number of columns in (non-transposed) + * @param[in] alpha Constant real number + * @param[in] beta Constant real number + * @param[in] V Multivector containing the matrix, organized columnwise + * @param[in] y Vector, k x 1 if N and n x 1 if T + * @param[in,out] x Vector, n x 1 if N and k x 1 if T + * @param[in] memspace cpu or cuda (for now) + * + * @pre V is stored colum-wise, _n_ > 0, _k_ > 0 + * + */ +void VectorHandlerCpu::gemv(std::string /* transpose */, index_type /* n */, index_type /* k */, const real_type * /* alpha */, + const real_type * /* beta */, vector::Vector * /* V */, vector::Vector * /* y */, vector::Vector * /* x */) +{ + out::error() << "Not implemented (yet)" << std::endl; +} - /** - * @brief mass (bulk) axpy i.e, y = y - x*alpha where alpha is a vector - * - * @param[in] size number of elements in y - * @param[in] alpha vector size k x 1 - * @param[in] x (multi)vector size size x k - * @param[in,out] y vector size size x 1 (this is where the result is stored) - * @param[in] memspace string containg memspace (cpu or cuda) - * - * @pre _k_ > 0, _size_ > 0, _size_ = x->getSize() - * - */ - void VectorHandlerCpu::massAxpy(index_type /* size */, vector::Vector* /* alpha */, index_type /* k */, vector::Vector* /* x */, vector::Vector* /* y */) - { - out::error() << "Not implemented (yet)" << std::endl; - } +/** + * @brief mass (bulk) axpy i.e, y = y - x*alpha where alpha is a vector + * + * @param[in] size number of elements in y + * @param[in] alpha vector size k x 1 + * @param[in] x (multi)vector size size x k + * @param[in,out] y vector size size x 1 (this is where the result is stored) + * @param[in] memspace string containg memspace (cpu or cuda) + * + * @pre _k_ > 0, _size_ > 0, _size_ = x->getSize() + * + */ +void VectorHandlerCpu::massAxpy(index_type /* size */, vector::Vector * /* alpha */, index_type /* k */, vector::Vector * /* x */, + vector::Vector * /* y */) +{ + out::error() << "Not implemented (yet)" << std::endl; +} - /** - * @brief mass (bulk) dot product i.e, V^T x, where V is n x k dense multivector (a dense multivector consisting of k vectors size n) - * and x is k x 2 dense multivector (a multivector consisiting of two vectors size n each) - * - * @param[in] size Number of elements in a single vector in V - * @param[in] V Multivector; k vectors size n x 1 each - * @param[in] k Number of vectors in V - * @param[in] x Multivector; 2 vectors size n x 1 each - * @param[out] res Multivector; 2 vectors size k x 1 each (result is returned in res) - * @param[in] memspace String containg memspace (cpu or cuda) - * - * @pre _size_ > 0, _k_ > 0, size = x->getSize(), _res_ needs to be allocated - * - */ - void VectorHandlerCpu::massDot2Vec(index_type /* size */, vector::Vector* /* V */, index_type /* k */, vector::Vector* /* x */, vector::Vector* /* res */) - { - out::error() << "Not implemented (yet)" << std::endl; - } +/** + * @brief mass (bulk) dot product i.e, V^T x, where V is n x k dense multivector (a dense multivector consisting of k vectors size n) + * and x is k x 2 dense multivector (a multivector consisiting of two vectors size n each) + * + * @param[in] size Number of elements in a single vector in V + * @param[in] V Multivector; k vectors size n x 1 each + * @param[in] k Number of vectors in V + * @param[in] x Multivector; 2 vectors size n x 1 each + * @param[out] res Multivector; 2 vectors size k x 1 each (result is returned in res) + * @param[in] memspace String containg memspace (cpu or cuda) + * + * @pre _size_ > 0, _k_ > 0, size = x->getSize(), _res_ needs to be allocated + * + */ +void VectorHandlerCpu::massDot2Vec(index_type /* size */, vector::Vector * /* V */, index_type /* k */, vector::Vector * /* x */, + vector::Vector * /* res */) +{ + out::error() << "Not implemented (yet)" << std::endl; +} } // namespace ReSolve diff --git a/resolve/vector/VectorHandlerCpu.hpp b/resolve/vector/VectorHandlerCpu.hpp index 3f2f6133d..4c2f18049 100644 --- a/resolve/vector/VectorHandlerCpu.hpp +++ b/resolve/vector/VectorHandlerCpu.hpp @@ -2,56 +2,51 @@ #include namespace ReSolve -{ - namespace vector - { - class Vector; - } - class LinAlgWorkspaceCpu; - class VectorHandlerImpl; +{ +namespace vector +{ +class Vector; } +class LinAlgWorkspaceCpu; +class VectorHandlerImpl; +} // namespace ReSolve - -namespace ReSolve { //namespace vector { - class VectorHandlerCpu : public VectorHandlerImpl - { - public: - VectorHandlerCpu(); - VectorHandlerCpu(LinAlgWorkspaceCpu* workspace); - virtual ~VectorHandlerCpu(); - - //y = alpha x + y - virtual void axpy(const real_type* alpha, vector::Vector* x, vector::Vector* y); - - //dot: x \cdot y - virtual real_type dot(vector::Vector* x, vector::Vector* y); - - //scal = alpha * x - virtual void scal(const real_type* alpha, vector::Vector* x); - - //mass axpy: x*alpha + y where x is [n x k] and alpha is [k x 1]; x is stored columnwise - virtual void massAxpy(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y); - - //mass dot: V^T x, where V is [n x k] and x is [k x 2], everything is stored and returned columnwise - //Size = n - virtual void massDot2Vec(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res); - - /** gemv: - * if `transpose = N` (no), `x = beta*x + alpha*V*y`, - * where `x` is `[n x 1]`, `V` is `[n x k]` and `y` is `[k x 1]`. - * if `transpose = T` (yes), `x = beta*x + alpha*V^T*y`, - * where `x` is `[k x 1]`, `V` is `[n x k]` and `y` is `[n x 1]`. - */ - virtual void gemv(std::string transpose, - index_type n, - index_type k, - const real_type* alpha, - const real_type* beta, - vector::Vector* V, - vector::Vector* y, - vector::Vector* x); - private: - LinAlgWorkspaceCpu* workspace_; - }; - -} //} // namespace ReSolve::vector +namespace ReSolve +{ // namespace vector { +class VectorHandlerCpu : public VectorHandlerImpl +{ + public: + VectorHandlerCpu(); + VectorHandlerCpu(LinAlgWorkspaceCpu *workspace); + virtual ~VectorHandlerCpu(); + + // y = alpha x + y + virtual void axpy(const real_type *alpha, vector::Vector *x, vector::Vector *y); + + // dot: x \cdot y + virtual real_type dot(vector::Vector *x, vector::Vector *y); + + // scal = alpha * x + virtual void scal(const real_type *alpha, vector::Vector *x); + + // mass axpy: x*alpha + y where x is [n x k] and alpha is [k x 1]; x is stored columnwise + virtual void massAxpy(index_type size, vector::Vector *alpha, index_type k, vector::Vector *x, vector::Vector *y); + + // mass dot: V^T x, where V is [n x k] and x is [k x 2], everything is stored and returned columnwise + // Size = n + virtual void massDot2Vec(index_type size, vector::Vector *V, index_type k, vector::Vector *x, vector::Vector *res); + + /** gemv: + * if `transpose = N` (no), `x = beta*x + alpha*V*y`, + * where `x` is `[n x 1]`, `V` is `[n x k]` and `y` is `[k x 1]`. + * if `transpose = T` (yes), `x = beta*x + alpha*V^T*y`, + * where `x` is `[k x 1]`, `V` is `[n x k]` and `y` is `[n x 1]`. + */ + virtual void gemv(std::string transpose, index_type n, index_type k, const real_type *alpha, const real_type *beta, vector::Vector *V, + vector::Vector *y, vector::Vector *x); + + private: + LinAlgWorkspaceCpu *workspace_; +}; + +} // namespace ReSolve diff --git a/resolve/vector/VectorHandlerCuda.cpp b/resolve/vector/VectorHandlerCuda.cpp index 5871fd5a1..1909c3820 100644 --- a/resolve/vector/VectorHandlerCuda.cpp +++ b/resolve/vector/VectorHandlerCuda.cpp @@ -1,236 +1,199 @@ #include -#include +#include "VectorHandlerCuda.hpp" #include +#include #include -#include #include -#include "VectorHandlerCuda.hpp" - -namespace ReSolve { - using out = io::Logger; - - /** - * @brief empty constructor that does absolutely nothing - */ - VectorHandlerCuda::VectorHandlerCuda() - { - } - - /** - * @brief constructor - * - * @param new_workspace - workspace to be set - */ - VectorHandlerCuda:: VectorHandlerCuda(LinAlgWorkspaceCUDA* new_workspace) - { - workspace_ = new_workspace; - } +#include - /** - * @brief destructor - */ - VectorHandlerCuda::~VectorHandlerCuda() - { - //delete the workspace TODO +namespace ReSolve +{ +using out = io::Logger; + +/** + * @brief empty constructor that does absolutely nothing + */ +VectorHandlerCuda::VectorHandlerCuda() {} + +/** + * @brief constructor + * + * @param new_workspace - workspace to be set + */ +VectorHandlerCuda::VectorHandlerCuda(LinAlgWorkspaceCUDA *new_workspace) { workspace_ = new_workspace; } + +/** + * @brief destructor + */ +VectorHandlerCuda::~VectorHandlerCuda() +{ + // delete the workspace TODO +} + +/** + * @brief dot product of two vectors i.e, a = x^Ty + * + * @param[in] x The first vector + * @param[in] y The second vector + * @param[in] memspace String containg memspace (cpu or cuda) + * + * @return dot product (real number) of _x_ and _y_ + */ + +real_type VectorHandlerCuda::dot(vector::Vector *x, vector::Vector *y) +{ + LinAlgWorkspaceCUDA *workspaceCUDA = workspace_; + cublasHandle_t handle_cublas = workspaceCUDA->getCublasHandle(); + double nrm = 0.0; + cublasStatus_t st = cublasDdot(handle_cublas, x->getSize(), x->getData(memory::DEVICE), 1, y->getData(memory::DEVICE), 1, &nrm); + if (st != 0) { + printf("dot product crashed with code %d \n", st); } - - /** - * @brief dot product of two vectors i.e, a = x^Ty - * - * @param[in] x The first vector - * @param[in] y The second vector - * @param[in] memspace String containg memspace (cpu or cuda) - * - * @return dot product (real number) of _x_ and _y_ - */ - - real_type VectorHandlerCuda::dot(vector::Vector* x, vector::Vector* y) - { - LinAlgWorkspaceCUDA* workspaceCUDA = workspace_; - cublasHandle_t handle_cublas = workspaceCUDA->getCublasHandle(); - double nrm = 0.0; - cublasStatus_t st= cublasDdot (handle_cublas, x->getSize(), x->getData(memory::DEVICE), 1, y->getData(memory::DEVICE), 1, &nrm); - if (st!=0) {printf("dot product crashed with code %d \n", st);} - return nrm; + return nrm; +} + +/** + * @brief scale a vector by a constant i.e, x = alpha*x where alpha is a constant + * + * @param[in] alpha The constant + * @param[in,out] x The vector + * @param memspace string containg memspace (cpu or cuda) + * + */ +void VectorHandlerCuda::scal(const real_type *alpha, vector::Vector *x) +{ + LinAlgWorkspaceCUDA *workspaceCUDA = workspace_; + cublasHandle_t handle_cublas = workspaceCUDA->getCublasHandle(); + cublasStatus_t st = cublasDscal(handle_cublas, x->getSize(), alpha, x->getData(memory::DEVICE), 1); + if (st != 0) { + ReSolve::io::Logger::error() << "scal crashed with code " << st << "\n"; } - - /** - * @brief scale a vector by a constant i.e, x = alpha*x where alpha is a constant - * - * @param[in] alpha The constant - * @param[in,out] x The vector - * @param memspace string containg memspace (cpu or cuda) - * - */ - void VectorHandlerCuda::scal(const real_type* alpha, vector::Vector* x) - { - LinAlgWorkspaceCUDA* workspaceCUDA = workspace_; - cublasHandle_t handle_cublas = workspaceCUDA->getCublasHandle(); - cublasStatus_t st = cublasDscal(handle_cublas, x->getSize(), alpha, x->getData(memory::DEVICE), 1); - if (st!=0) { - ReSolve::io::Logger::error() << "scal crashed with code " << st << "\n"; - } +} + +/** + * @brief axpy i.e, y = alpha*x+y where alpha is a constant + * + * @param[in] alpha The constant + * @param[in] x The first vector + * @param[in,out] y The second vector (result is return in y) + * @param[in] memspace String containg memspace (cpu or cuda) + * + */ +void VectorHandlerCuda::axpy(const real_type *alpha, vector::Vector *x, vector::Vector *y) +{ + // AXPY: y = alpha * x + y + LinAlgWorkspaceCUDA *workspaceCUDA = workspace_; + cublasHandle_t handle_cublas = workspaceCUDA->getCublasHandle(); + cublasDaxpy(handle_cublas, x->getSize(), alpha, x->getData(memory::DEVICE), 1, y->getData(memory::DEVICE), 1); +} + +/** + * @brief gemv computes matrix-vector product where both matrix and vectors are dense. + * i.e., x = beta*x + alpha*V*y + * + * @param[in] Transpose - yes (T) or no (N) + * @param[in] n Number of rows in (non-transposed) matrix + * @param[in] k Number of columns in (non-transposed) + * @param[in] alpha Constant real number + * @param[in] beta Constant real number + * @param[in] V Multivector containing the matrix, organized columnwise + * @param[in] y Vector, k x 1 if N and n x 1 if T + * @param[in,out] x Vector, n x 1 if N and k x 1 if T + * @param[in] memspace cpu or cuda (for now) + * + * @pre V is stored colum-wise, _n_ > 0, _k_ > 0 + * + */ +void VectorHandlerCuda::gemv(std::string transpose, index_type n, index_type k, const real_type *alpha, const real_type *beta, vector::Vector *V, + vector::Vector *y, vector::Vector *x) +{ + LinAlgWorkspaceCUDA *workspaceCUDA = workspace_; + cublasHandle_t handle_cublas = workspaceCUDA->getCublasHandle(); + if (transpose == "T") { + + cublasDgemv(handle_cublas, CUBLAS_OP_T, n, k, alpha, V->getData(memory::DEVICE), n, y->getData(memory::DEVICE), 1, beta, + x->getData(memory::DEVICE), 1); + + } else { + cublasDgemv(handle_cublas, CUBLAS_OP_N, n, k, alpha, V->getData(memory::DEVICE), n, y->getData(memory::DEVICE), 1, beta, + x->getData(memory::DEVICE), 1); } - - /** - * @brief axpy i.e, y = alpha*x+y where alpha is a constant - * - * @param[in] alpha The constant - * @param[in] x The first vector - * @param[in,out] y The second vector (result is return in y) - * @param[in] memspace String containg memspace (cpu or cuda) - * - */ - void VectorHandlerCuda::axpy(const real_type* alpha, vector::Vector* x, vector::Vector* y) - { - //AXPY: y = alpha * x + y - LinAlgWorkspaceCUDA* workspaceCUDA = workspace_; - cublasHandle_t handle_cublas = workspaceCUDA->getCublasHandle(); - cublasDaxpy(handle_cublas, - x->getSize(), - alpha, - x->getData(memory::DEVICE), - 1, - y->getData(memory::DEVICE), - 1); +} + +/** + * @brief mass (bulk) axpy i.e, y = y - x*alpha where alpha is a vector + * + * @param[in] size number of elements in y + * @param[in] alpha vector size k x 1 + * @param[in] x (multi)vector size size x k + * @param[in,out] y vector size size x 1 (this is where the result is stored) + * @param[in] memspace string containg memspace (cpu or cuda) + * + * @pre _k_ > 0, _size_ > 0, _size_ = x->getSize() + * + */ +void VectorHandlerCuda::massAxpy(index_type size, vector::Vector *alpha, index_type k, vector::Vector *x, vector::Vector *y) +{ + using namespace constants; + if (k < 200) { + mass_axpy(size, k, x->getData(memory::DEVICE), y->getData(memory::DEVICE), alpha->getData(memory::DEVICE)); + } else { + LinAlgWorkspaceCUDA *workspaceCUDA = workspace_; + cublasHandle_t handle_cublas = workspaceCUDA->getCublasHandle(); + cublasDgemm(handle_cublas, CUBLAS_OP_N, CUBLAS_OP_N, + size, // m + 1, // n + k + 1, // k + &MINUSONE, // alpha + x->getData(memory::DEVICE), // A + size, // lda + alpha->getData(memory::DEVICE), // B + k + 1, // ldb + &ONE, + y->getData(memory::DEVICE), // c + size); // ldc } - - /** - * @brief gemv computes matrix-vector product where both matrix and vectors are dense. - * i.e., x = beta*x + alpha*V*y - * - * @param[in] Transpose - yes (T) or no (N) - * @param[in] n Number of rows in (non-transposed) matrix - * @param[in] k Number of columns in (non-transposed) - * @param[in] alpha Constant real number - * @param[in] beta Constant real number - * @param[in] V Multivector containing the matrix, organized columnwise - * @param[in] y Vector, k x 1 if N and n x 1 if T - * @param[in,out] x Vector, n x 1 if N and k x 1 if T - * @param[in] memspace cpu or cuda (for now) - * - * @pre V is stored colum-wise, _n_ > 0, _k_ > 0 - * - */ - void VectorHandlerCuda::gemv(std::string transpose, - index_type n, - index_type k, - const real_type* alpha, - const real_type* beta, - vector::Vector* V, - vector::Vector* y, - vector::Vector* x) - { - LinAlgWorkspaceCUDA* workspaceCUDA = workspace_; - cublasHandle_t handle_cublas = workspaceCUDA->getCublasHandle(); - if (transpose == "T") { - - cublasDgemv(handle_cublas, - CUBLAS_OP_T, - n, - k, - alpha, - V->getData(memory::DEVICE), - n, - y->getData(memory::DEVICE), - 1, - beta, - x->getData(memory::DEVICE), - 1); - - } else { - cublasDgemv(handle_cublas, - CUBLAS_OP_N, - n, - k, - alpha, - V->getData(memory::DEVICE), - n, - y->getData(memory::DEVICE), - 1, - beta, - x->getData(memory::DEVICE), - 1); - } - } - - /** - * @brief mass (bulk) axpy i.e, y = y - x*alpha where alpha is a vector - * - * @param[in] size number of elements in y - * @param[in] alpha vector size k x 1 - * @param[in] x (multi)vector size size x k - * @param[in,out] y vector size size x 1 (this is where the result is stored) - * @param[in] memspace string containg memspace (cpu or cuda) - * - * @pre _k_ > 0, _size_ > 0, _size_ = x->getSize() - * - */ - void VectorHandlerCuda::massAxpy(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y) - { - using namespace constants; - if (k < 200) { - mass_axpy(size, k, x->getData(memory::DEVICE), y->getData(memory::DEVICE),alpha->getData(memory::DEVICE)); - } else { - LinAlgWorkspaceCUDA* workspaceCUDA = workspace_; - cublasHandle_t handle_cublas = workspaceCUDA->getCublasHandle(); - cublasDgemm(handle_cublas, - CUBLAS_OP_N, - CUBLAS_OP_N, - size, // m - 1, // n - k + 1, // k - &MINUSONE, // alpha - x->getData(memory::DEVICE), // A - size, // lda - alpha->getData(memory::DEVICE), // B - k + 1, // ldb - &ONE, - y->getData(memory::DEVICE), // c - size); // ldc - } - } - - /** - * @brief mass (bulk) dot product i.e, V^T x, where V is n x k dense multivector - * (a dense multivector consisting of k vectors size n) and x is k x 2 dense - * multivector (a multivector consisiting of two vectors size n each) - * - * @param[in] size Number of elements in a single vector in V - * @param[in] V Multivector; k vectors size n x 1 each - * @param[in] k Number of vectors in V - * @param[in] x Multivector; 2 vectors size n x 1 each - * @param[out] res Multivector; 2 vectors size k x 1 each (result is returned in res) - * @param[in] memspace String containg memspace (cpu or cuda) - * - * @pre _size_ > 0, _k_ > 0, size = x->getSize(), _res_ needs to be allocated - * - */ - void VectorHandlerCuda::massDot2Vec(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res) - { - using namespace constants; - - if (k < 200) { - mass_inner_product_two_vectors(size, k, x->getData(memory::DEVICE) , x->getData(1, memory::DEVICE), V->getData(memory::DEVICE), res->getData(memory::DEVICE)); - } else { - LinAlgWorkspaceCUDA* workspaceCUDA = workspace_; - cublasHandle_t handle_cublas = workspaceCUDA->getCublasHandle(); - cublasDgemm(handle_cublas, - CUBLAS_OP_T, - CUBLAS_OP_N, - k + 1, //m - 2, //n - size, //k - &ONE, //alpha - V->getData(memory::DEVICE), //A - size, //lda - x->getData(memory::DEVICE), //B - size, //ldb - &ZERO, - res->getData(memory::DEVICE), //c - k + 1); //ldc - } +} + +/** + * @brief mass (bulk) dot product i.e, V^T x, where V is n x k dense multivector + * (a dense multivector consisting of k vectors size n) and x is k x 2 dense + * multivector (a multivector consisiting of two vectors size n each) + * + * @param[in] size Number of elements in a single vector in V + * @param[in] V Multivector; k vectors size n x 1 each + * @param[in] k Number of vectors in V + * @param[in] x Multivector; 2 vectors size n x 1 each + * @param[out] res Multivector; 2 vectors size k x 1 each (result is returned in res) + * @param[in] memspace String containg memspace (cpu or cuda) + * + * @pre _size_ > 0, _k_ > 0, size = x->getSize(), _res_ needs to be allocated + * + */ +void VectorHandlerCuda::massDot2Vec(index_type size, vector::Vector *V, index_type k, vector::Vector *x, vector::Vector *res) +{ + using namespace constants; + + if (k < 200) { + mass_inner_product_two_vectors(size, k, x->getData(memory::DEVICE), x->getData(1, memory::DEVICE), V->getData(memory::DEVICE), + res->getData(memory::DEVICE)); + } else { + LinAlgWorkspaceCUDA *workspaceCUDA = workspace_; + cublasHandle_t handle_cublas = workspaceCUDA->getCublasHandle(); + cublasDgemm(handle_cublas, CUBLAS_OP_T, CUBLAS_OP_N, + k + 1, // m + 2, // n + size, // k + &ONE, // alpha + V->getData(memory::DEVICE), // A + size, // lda + x->getData(memory::DEVICE), // B + size, // ldb + &ZERO, + res->getData(memory::DEVICE), // c + k + 1); // ldc } +} } // namespace ReSolve diff --git a/resolve/vector/VectorHandlerCuda.hpp b/resolve/vector/VectorHandlerCuda.hpp index 0ee2752d3..a78678a2c 100644 --- a/resolve/vector/VectorHandlerCuda.hpp +++ b/resolve/vector/VectorHandlerCuda.hpp @@ -2,56 +2,51 @@ #include namespace ReSolve -{ - namespace vector - { - class Vector; - } - class LinAlgWorkspaceCUDA; - class VectorHandlerImpl; +{ +namespace vector +{ +class Vector; } +class LinAlgWorkspaceCUDA; +class VectorHandlerImpl; +} // namespace ReSolve - -namespace ReSolve { //namespace vector { - class VectorHandlerCuda : public VectorHandlerImpl - { - public: - VectorHandlerCuda(); - VectorHandlerCuda(LinAlgWorkspaceCUDA* workspace); - virtual ~VectorHandlerCuda(); - - //y = alpha x + y - virtual void axpy(const real_type* alpha, vector::Vector* x, vector::Vector* y); - - //dot: x \cdot y - virtual real_type dot(vector::Vector* x, vector::Vector* y); - - //scal = alpha * x - virtual void scal(const real_type* alpha, vector::Vector* x); - - //mass axpy: x*alpha + y where x is [n x k] and alpha is [k x 1]; x is stored columnwise - virtual void massAxpy(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y); - - //mass dot: V^T x, where V is [n x k] and x is [k x 2], everything is stored and returned columnwise - //Size = n - virtual void massDot2Vec(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res); - - /** gemv: - * if `transpose = N` (no), `x = beta*x + alpha*V*y`, - * where `x` is `[n x 1]`, `V` is `[n x k]` and `y` is `[k x 1]`. - * if `transpose = T` (yes), `x = beta*x + alpha*V^T*y`, - * where `x` is `[k x 1]`, `V` is `[n x k]` and `y` is `[n x 1]`. - */ - virtual void gemv(std::string transpose, - index_type n, - index_type k, - const real_type* alpha, - const real_type* beta, - vector::Vector* V, - vector::Vector* y, - vector::Vector* x); - private: - LinAlgWorkspaceCUDA* workspace_; - }; - -} //} // namespace ReSolve::vector +namespace ReSolve +{ // namespace vector { +class VectorHandlerCuda : public VectorHandlerImpl +{ + public: + VectorHandlerCuda(); + VectorHandlerCuda(LinAlgWorkspaceCUDA *workspace); + virtual ~VectorHandlerCuda(); + + // y = alpha x + y + virtual void axpy(const real_type *alpha, vector::Vector *x, vector::Vector *y); + + // dot: x \cdot y + virtual real_type dot(vector::Vector *x, vector::Vector *y); + + // scal = alpha * x + virtual void scal(const real_type *alpha, vector::Vector *x); + + // mass axpy: x*alpha + y where x is [n x k] and alpha is [k x 1]; x is stored columnwise + virtual void massAxpy(index_type size, vector::Vector *alpha, index_type k, vector::Vector *x, vector::Vector *y); + + // mass dot: V^T x, where V is [n x k] and x is [k x 2], everything is stored and returned columnwise + // Size = n + virtual void massDot2Vec(index_type size, vector::Vector *V, index_type k, vector::Vector *x, vector::Vector *res); + + /** gemv: + * if `transpose = N` (no), `x = beta*x + alpha*V*y`, + * where `x` is `[n x 1]`, `V` is `[n x k]` and `y` is `[k x 1]`. + * if `transpose = T` (yes), `x = beta*x + alpha*V^T*y`, + * where `x` is `[k x 1]`, `V` is `[n x k]` and `y` is `[n x 1]`. + */ + virtual void gemv(std::string transpose, index_type n, index_type k, const real_type *alpha, const real_type *beta, vector::Vector *V, + vector::Vector *y, vector::Vector *x); + + private: + LinAlgWorkspaceCUDA *workspace_; +}; + +} // namespace ReSolve diff --git a/resolve/vector/VectorHandlerHip.cpp b/resolve/vector/VectorHandlerHip.cpp index 1e1195fc8..f53205929 100644 --- a/resolve/vector/VectorHandlerHip.cpp +++ b/resolve/vector/VectorHandlerHip.cpp @@ -1,236 +1,199 @@ #include -#include +#include "VectorHandlerHip.hpp" #include +#include #include -#include #include -#include "VectorHandlerHip.hpp" - -namespace ReSolve { - using out = io::Logger; - - /** - * @brief empty constructor that does absolutely nothing - */ - VectorHandlerHip::VectorHandlerHip() - { - } - - /** - * @brief constructor - * - * @param new_workspace - workspace to be set - */ - VectorHandlerHip:: VectorHandlerHip(LinAlgWorkspaceHIP* new_workspace) - { - workspace_ = new_workspace; - } +#include - /** - * @brief destructor - */ - VectorHandlerHip::~VectorHandlerHip() - { - //delete the workspace TODO +namespace ReSolve +{ +using out = io::Logger; + +/** + * @brief empty constructor that does absolutely nothing + */ +VectorHandlerHip::VectorHandlerHip() {} + +/** + * @brief constructor + * + * @param new_workspace - workspace to be set + */ +VectorHandlerHip::VectorHandlerHip(LinAlgWorkspaceHIP *new_workspace) { workspace_ = new_workspace; } + +/** + * @brief destructor + */ +VectorHandlerHip::~VectorHandlerHip() +{ + // delete the workspace TODO +} + +/** + * @brief dot product of two vectors i.e, a = x^Ty + * + * @param[in] x The first vector + * @param[in] y The second vector + * @param[in] memspace String containg memspace (cpu or hip) + * + * @return dot product (real number) of _x_ and _y_ + */ + +real_type VectorHandlerHip::dot(vector::Vector *x, vector::Vector *y) +{ + LinAlgWorkspaceHIP *workspaceHIP = workspace_; + rocblas_handle handle_rocblas = workspaceHIP->getRocblasHandle(); + double nrm = 0.0; + rocblas_status st = rocblas_ddot(handle_rocblas, x->getSize(), x->getData(memory::DEVICE), 1, y->getData(memory::DEVICE), 1, &nrm); + if (st != 0) { + printf("dot product crashed with code %d \n", st); } - - /** - * @brief dot product of two vectors i.e, a = x^Ty - * - * @param[in] x The first vector - * @param[in] y The second vector - * @param[in] memspace String containg memspace (cpu or hip) - * - * @return dot product (real number) of _x_ and _y_ - */ - - real_type VectorHandlerHip::dot(vector::Vector* x, vector::Vector* y) - { - LinAlgWorkspaceHIP* workspaceHIP = workspace_; - rocblas_handle handle_rocblas = workspaceHIP->getRocblasHandle(); - double nrm = 0.0; - rocblas_status st= rocblas_ddot (handle_rocblas, x->getSize(), x->getData(memory::DEVICE), 1, y->getData(memory::DEVICE), 1, &nrm); - if (st!=0) {printf("dot product crashed with code %d \n", st);} - return nrm; + return nrm; +} + +/** + * @brief scale a vector by a constant i.e, x = alpha*x where alpha is a constant + * + * @param[in] alpha The constant + * @param[in,out] x The vector + * @param memspace string containg memspace (cpu or hip) + * + */ +void VectorHandlerHip::scal(const real_type *alpha, vector::Vector *x) +{ + LinAlgWorkspaceHIP *workspaceHIP = workspace_; + rocblas_handle handle_rocblas = workspaceHIP->getRocblasHandle(); + rocblas_status st = rocblas_dscal(handle_rocblas, x->getSize(), alpha, x->getData(memory::DEVICE), 1); + if (st != 0) { + ReSolve::io::Logger::error() << "scal crashed with code " << st << "\n"; } - - /** - * @brief scale a vector by a constant i.e, x = alpha*x where alpha is a constant - * - * @param[in] alpha The constant - * @param[in,out] x The vector - * @param memspace string containg memspace (cpu or hip) - * - */ - void VectorHandlerHip::scal(const real_type* alpha, vector::Vector* x) - { - LinAlgWorkspaceHIP* workspaceHIP = workspace_; - rocblas_handle handle_rocblas = workspaceHIP->getRocblasHandle(); - rocblas_status st = rocblas_dscal(handle_rocblas, x->getSize(), alpha, x->getData(memory::DEVICE), 1); - if (st!=0) { - ReSolve::io::Logger::error() << "scal crashed with code " << st << "\n"; - } +} + +/** + * @brief axpy i.e, y = alpha*x+y where alpha is a constant + * + * @param[in] alpha The constant + * @param[in] x The first vector + * @param[in,out] y The second vector (result is return in y) + * @param[in] memspace String containg memspace (cpu or hip) + * + */ +void VectorHandlerHip::axpy(const real_type *alpha, vector::Vector *x, vector::Vector *y) +{ + // AXPY: y = alpha * x + y + LinAlgWorkspaceHIP *workspaceHIP = workspace_; + rocblas_handle handle_rocblas = workspaceHIP->getRocblasHandle(); + rocblas_daxpy(handle_rocblas, x->getSize(), alpha, x->getData(memory::DEVICE), 1, y->getData(memory::DEVICE), 1); +} + +/** + * @brief gemv computes matrix-vector product where both matrix and vectors are dense. + * i.e., x = beta*x + alpha*V*y + * + * @param[in] Transpose - yes (T) or no (N) + * @param[in] n Number of rows in (non-transposed) matrix + * @param[in] k Number of columns in (non-transposed) + * @param[in] alpha Constant real number + * @param[in] beta Constant real number + * @param[in] V Multivector containing the matrix, organized columnwise + * @param[in] y Vector, k x 1 if N and n x 1 if T + * @param[in,out] x Vector, n x 1 if N and k x 1 if T + * @param[in] memspace cpu or hip (for now) + * + * @pre V is stored colum-wise, _n_ > 0, _k_ > 0 + * + */ +void VectorHandlerHip::gemv(std::string transpose, index_type n, index_type k, const real_type *alpha, const real_type *beta, vector::Vector *V, + vector::Vector *y, vector::Vector *x) +{ + LinAlgWorkspaceHIP *workspaceHIP = workspace_; + rocblas_handle handle_rocblas = workspaceHIP->getRocblasHandle(); + if (transpose == "T") { + + rocblas_dgemv(handle_rocblas, rocblas_operation_transpose, n, k, alpha, V->getData(memory::DEVICE), n, y->getData(memory::DEVICE), 1, beta, + x->getData(memory::DEVICE), 1); + + } else { + rocblas_dgemv(handle_rocblas, rocblas_operation_none, n, k, alpha, V->getData(memory::DEVICE), n, y->getData(memory::DEVICE), 1, beta, + x->getData(memory::DEVICE), 1); } - - /** - * @brief axpy i.e, y = alpha*x+y where alpha is a constant - * - * @param[in] alpha The constant - * @param[in] x The first vector - * @param[in,out] y The second vector (result is return in y) - * @param[in] memspace String containg memspace (cpu or hip) - * - */ - void VectorHandlerHip::axpy(const real_type* alpha, vector::Vector* x, vector::Vector* y) - { - //AXPY: y = alpha * x + y - LinAlgWorkspaceHIP* workspaceHIP = workspace_; - rocblas_handle handle_rocblas = workspaceHIP->getRocblasHandle(); - rocblas_daxpy(handle_rocblas, - x->getSize(), - alpha, - x->getData(memory::DEVICE), - 1, - y->getData(memory::DEVICE), - 1); +} + +/** + * @brief mass (bulk) axpy i.e, y = y - x*alpha where alpha is a vector + * + * @param[in] size number of elements in y + * @param[in] alpha vector size k x 1 + * @param[in] x (multi)vector size size x k + * @param[in,out] y vector size size x 1 (this is where the result is stored) + * @param[in] memspace string containg memspace (cpu or hip) + * + * @pre _k_ > 0, _size_ > 0, _size_ = x->getSize() + * + */ +void VectorHandlerHip::massAxpy(index_type size, vector::Vector *alpha, index_type k, vector::Vector *x, vector::Vector *y) +{ + using namespace constants; + if (k < 200) { + mass_axpy(size, k, x->getData(memory::DEVICE), y->getData(memory::DEVICE), alpha->getData(memory::DEVICE)); + } else { + LinAlgWorkspaceHIP *workspaceHIP = workspace_; + rocblas_handle handle_rocblas = workspaceHIP->getRocblasHandle(); + rocblas_dgemm(handle_rocblas, rocblas_operation_none, rocblas_operation_none, + size, // m + 1, // n + k, // k + &MINUSONE, // alpha + x->getData(memory::DEVICE), // A + size, // lda + alpha->getData(memory::DEVICE), // B + k, // ldb + &ONE, + y->getData(memory::DEVICE), // c + size); // ldc } - - /** - * @brief gemv computes matrix-vector product where both matrix and vectors are dense. - * i.e., x = beta*x + alpha*V*y - * - * @param[in] Transpose - yes (T) or no (N) - * @param[in] n Number of rows in (non-transposed) matrix - * @param[in] k Number of columns in (non-transposed) - * @param[in] alpha Constant real number - * @param[in] beta Constant real number - * @param[in] V Multivector containing the matrix, organized columnwise - * @param[in] y Vector, k x 1 if N and n x 1 if T - * @param[in,out] x Vector, n x 1 if N and k x 1 if T - * @param[in] memspace cpu or hip (for now) - * - * @pre V is stored colum-wise, _n_ > 0, _k_ > 0 - * - */ - void VectorHandlerHip::gemv(std::string transpose, - index_type n, - index_type k, - const real_type* alpha, - const real_type* beta, - vector::Vector* V, - vector::Vector* y, - vector::Vector* x) - { - LinAlgWorkspaceHIP* workspaceHIP = workspace_; - rocblas_handle handle_rocblas = workspaceHIP->getRocblasHandle(); - if (transpose == "T") { - - rocblas_dgemv(handle_rocblas, - rocblas_operation_transpose, - n, - k, - alpha, - V->getData(memory::DEVICE), - n, - y->getData(memory::DEVICE), - 1, - beta, - x->getData(memory::DEVICE), - 1); - - } else { - rocblas_dgemv(handle_rocblas, - rocblas_operation_none, - n, - k, - alpha, - V->getData(memory::DEVICE), - n, - y->getData(memory::DEVICE), - 1, - beta, - x->getData(memory::DEVICE), - 1); - } - } - - /** - * @brief mass (bulk) axpy i.e, y = y - x*alpha where alpha is a vector - * - * @param[in] size number of elements in y - * @param[in] alpha vector size k x 1 - * @param[in] x (multi)vector size size x k - * @param[in,out] y vector size size x 1 (this is where the result is stored) - * @param[in] memspace string containg memspace (cpu or hip) - * - * @pre _k_ > 0, _size_ > 0, _size_ = x->getSize() - * - */ - void VectorHandlerHip::massAxpy(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y) - { - using namespace constants; - if (k < 200) { - mass_axpy(size, k, x->getData(memory::DEVICE), y->getData(memory::DEVICE),alpha->getData(memory::DEVICE)); - } else { - LinAlgWorkspaceHIP* workspaceHIP = workspace_; - rocblas_handle handle_rocblas = workspaceHIP->getRocblasHandle(); - rocblas_dgemm(handle_rocblas, - rocblas_operation_none, - rocblas_operation_none, - size, // m - 1, // n - k, // k - &MINUSONE, // alpha - x->getData(memory::DEVICE), // A - size, // lda - alpha->getData(memory::DEVICE), // B - k, // ldb - &ONE, - y->getData(memory::DEVICE), // c - size); // ldc - } - } - - /** - * @brief mass (bulk) dot product i.e, V^T x, where V is n x k dense multivector - * (a dense multivector consisting of k vectors size n) and x is k x 2 dense - * multivector (a multivector consisiting of two vectors size n each) - * - * @param[in] size Number of elements in a single vector in V - * @param[in] V Multivector; k vectors size n x 1 each - * @param[in] k Number of vectors in V - * @param[in] x Multivector; 2 vectors size n x 1 each - * @param[out] res Multivector; 2 vectors size k x 1 each (result is returned in res) - * @param[in] memspace String containg memspace (cpu or hip) - * - * @pre _size_ > 0, _k_ > 0, size = x->getSize(), _res_ needs to be allocated - * - */ - void VectorHandlerHip::massDot2Vec(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res) - { - using namespace constants; - - if (k < 200) { - mass_inner_product_two_vectors(size, k, x->getData(memory::DEVICE) , x->getData(1, memory::DEVICE), V->getData(memory::DEVICE), res->getData(memory::DEVICE)); - } else { - LinAlgWorkspaceHIP* workspaceHIP = workspace_; - rocblas_handle handle_rocblas = workspaceHIP->getRocblasHandle(); - rocblas_dgemm(handle_rocblas, - rocblas_operation_transpose, - rocblas_operation_none, - k + 1, //m - 2, //n - size, //k - &ONE, //alpha - V->getData(memory::DEVICE), //A - size, //lda - x->getData(memory::DEVICE), //B - size, //ldb - &ZERO, - res->getData(memory::DEVICE), //c - k + 1); //ldc - } +} + +/** + * @brief mass (bulk) dot product i.e, V^T x, where V is n x k dense multivector + * (a dense multivector consisting of k vectors size n) and x is k x 2 dense + * multivector (a multivector consisiting of two vectors size n each) + * + * @param[in] size Number of elements in a single vector in V + * @param[in] V Multivector; k vectors size n x 1 each + * @param[in] k Number of vectors in V + * @param[in] x Multivector; 2 vectors size n x 1 each + * @param[out] res Multivector; 2 vectors size k x 1 each (result is returned in res) + * @param[in] memspace String containg memspace (cpu or hip) + * + * @pre _size_ > 0, _k_ > 0, size = x->getSize(), _res_ needs to be allocated + * + */ +void VectorHandlerHip::massDot2Vec(index_type size, vector::Vector *V, index_type k, vector::Vector *x, vector::Vector *res) +{ + using namespace constants; + + if (k < 200) { + mass_inner_product_two_vectors(size, k, x->getData(memory::DEVICE), x->getData(1, memory::DEVICE), V->getData(memory::DEVICE), + res->getData(memory::DEVICE)); + } else { + LinAlgWorkspaceHIP *workspaceHIP = workspace_; + rocblas_handle handle_rocblas = workspaceHIP->getRocblasHandle(); + rocblas_dgemm(handle_rocblas, rocblas_operation_transpose, rocblas_operation_none, + k + 1, // m + 2, // n + size, // k + &ONE, // alpha + V->getData(memory::DEVICE), // A + size, // lda + x->getData(memory::DEVICE), // B + size, // ldb + &ZERO, + res->getData(memory::DEVICE), // c + k + 1); // ldc } +} } // namespace ReSolve diff --git a/resolve/vector/VectorHandlerHip.hpp b/resolve/vector/VectorHandlerHip.hpp index 7e5085e36..08896f6b9 100644 --- a/resolve/vector/VectorHandlerHip.hpp +++ b/resolve/vector/VectorHandlerHip.hpp @@ -2,56 +2,51 @@ #include namespace ReSolve -{ - namespace vector - { - class Vector; - } - class LinAlgWorkspaceHIP; - class VectorHandlerImpl; +{ +namespace vector +{ +class Vector; } +class LinAlgWorkspaceHIP; +class VectorHandlerImpl; +} // namespace ReSolve - -namespace ReSolve { //namespace vector { - class VectorHandlerHip : public VectorHandlerImpl - { - public: - VectorHandlerHip(); - VectorHandlerHip(LinAlgWorkspaceHIP* workspace); - virtual ~VectorHandlerHip(); - - //y = alpha x + y - virtual void axpy(const real_type* alpha, vector::Vector* x, vector::Vector* y); - - //dot: x \cdot y - virtual real_type dot(vector::Vector* x, vector::Vector* y); - - //scal = alpha * x - virtual void scal(const real_type* alpha, vector::Vector* x); - - //mass axpy: x*alpha + y where x is [n x k] and alpha is [k x 1]; x is stored columnwise - virtual void massAxpy(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y); - - //mass dot: V^T x, where V is [n x k] and x is [k x 2], everything is stored and returned columnwise - //Size = n - virtual void massDot2Vec(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res); - - /** gemv: - * if `transpose = N` (no), `x = beta*x + alpha*V*y`, - * where `x` is `[n x 1]`, `V` is `[n x k]` and `y` is `[k x 1]`. - * if `transpose = T` (yes), `x = beta*x + alpha*V^T*y`, - * where `x` is `[k x 1]`, `V` is `[n x k]` and `y` is `[n x 1]`. - */ - virtual void gemv(std::string transpose, - index_type n, - index_type k, - const real_type* alpha, - const real_type* beta, - vector::Vector* V, - vector::Vector* y, - vector::Vector* x); - private: - LinAlgWorkspaceHIP* workspace_; - }; - -} //} // namespace ReSolve::vector +namespace ReSolve +{ // namespace vector { +class VectorHandlerHip : public VectorHandlerImpl +{ + public: + VectorHandlerHip(); + VectorHandlerHip(LinAlgWorkspaceHIP *workspace); + virtual ~VectorHandlerHip(); + + // y = alpha x + y + virtual void axpy(const real_type *alpha, vector::Vector *x, vector::Vector *y); + + // dot: x \cdot y + virtual real_type dot(vector::Vector *x, vector::Vector *y); + + // scal = alpha * x + virtual void scal(const real_type *alpha, vector::Vector *x); + + // mass axpy: x*alpha + y where x is [n x k] and alpha is [k x 1]; x is stored columnwise + virtual void massAxpy(index_type size, vector::Vector *alpha, index_type k, vector::Vector *x, vector::Vector *y); + + // mass dot: V^T x, where V is [n x k] and x is [k x 2], everything is stored and returned columnwise + // Size = n + virtual void massDot2Vec(index_type size, vector::Vector *V, index_type k, vector::Vector *x, vector::Vector *res); + + /** gemv: + * if `transpose = N` (no), `x = beta*x + alpha*V*y`, + * where `x` is `[n x 1]`, `V` is `[n x k]` and `y` is `[k x 1]`. + * if `transpose = T` (yes), `x = beta*x + alpha*V^T*y`, + * where `x` is `[k x 1]`, `V` is `[n x k]` and `y` is `[n x 1]`. + */ + virtual void gemv(std::string transpose, index_type n, index_type k, const real_type *alpha, const real_type *beta, vector::Vector *V, + vector::Vector *y, vector::Vector *x); + + private: + LinAlgWorkspaceHIP *workspace_; +}; + +} // namespace ReSolve diff --git a/resolve/vector/VectorHandlerImpl.hpp b/resolve/vector/VectorHandlerImpl.hpp index 229a74618..76948152d 100644 --- a/resolve/vector/VectorHandlerImpl.hpp +++ b/resolve/vector/VectorHandlerImpl.hpp @@ -2,56 +2,47 @@ #include namespace ReSolve -{ - namespace vector - { - class Vector; - } - class VectorHandlerCpu; - class VectorHandlerCuda; +{ +namespace vector +{ +class Vector; } - +class VectorHandlerCpu; +class VectorHandlerCuda; +} // namespace ReSolve namespace ReSolve { - class VectorHandlerImpl - { - public: - VectorHandlerImpl() - {} - virtual ~VectorHandlerImpl() - {} - - //y = alpha x + y - virtual void axpy(const real_type* alpha, vector::Vector* x, vector::Vector* y ) = 0; - - //dot: x \cdot y - virtual real_type dot(vector::Vector* x, vector::Vector* y ) = 0; - - //scal = alpha * x - virtual void scal(const real_type* alpha, vector::Vector* x) = 0; - - //mass axpy: x*alpha + y where x is [n x k] and alpha is [k x 1]; x is stored columnwise - virtual void massAxpy(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y) = 0; - - //mass dot: V^T x, where V is [n x k] and x is [k x 2], everything is stored and returned columnwise - //Size = n - virtual void massDot2Vec(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res) = 0; - - /** gemv: - * if `transpose = N` (no), `x = beta*x + alpha*V*y`, - * where `x` is `[n x 1]`, `V` is `[n x k]` and `y` is `[k x 1]`. - * if `transpose = T` (yes), `x = beta*x + alpha*V^T*y`, - * where `x` is `[k x 1]`, `V` is `[n x k]` and `y` is `[n x 1]`. - */ - virtual void gemv(std::string transpose, - index_type n, - index_type k, - const real_type* alpha, - const real_type* beta, - vector::Vector* V, - vector::Vector* y, - vector::Vector* x) = 0; - }; - -} //} // namespace ReSolve::vector +class VectorHandlerImpl +{ + public: + VectorHandlerImpl() {} + virtual ~VectorHandlerImpl() {} + + // y = alpha x + y + virtual void axpy(const real_type *alpha, vector::Vector *x, vector::Vector *y) = 0; + + // dot: x \cdot y + virtual real_type dot(vector::Vector *x, vector::Vector *y) = 0; + + // scal = alpha * x + virtual void scal(const real_type *alpha, vector::Vector *x) = 0; + + // mass axpy: x*alpha + y where x is [n x k] and alpha is [k x 1]; x is stored columnwise + virtual void massAxpy(index_type size, vector::Vector *alpha, index_type k, vector::Vector *x, vector::Vector *y) = 0; + + // mass dot: V^T x, where V is [n x k] and x is [k x 2], everything is stored and returned columnwise + // Size = n + virtual void massDot2Vec(index_type size, vector::Vector *V, index_type k, vector::Vector *x, vector::Vector *res) = 0; + + /** gemv: + * if `transpose = N` (no), `x = beta*x + alpha*V*y`, + * where `x` is `[n x 1]`, `V` is `[n x k]` and `y` is `[k x 1]`. + * if `transpose = T` (yes), `x = beta*x + alpha*V^T*y`, + * where `x` is `[k x 1]`, `V` is `[n x k]` and `y` is `[n x 1]`. + */ + virtual void gemv(std::string transpose, index_type n, index_type k, const real_type *alpha, const real_type *beta, vector::Vector *V, + vector::Vector *y, vector::Vector *x) = 0; +}; + +} // namespace ReSolve diff --git a/resolve/vector/VectorKernels.hpp b/resolve/vector/VectorKernels.hpp index 9f7d1bca0..af3b2bd8d 100644 --- a/resolve/vector/VectorKernels.hpp +++ b/resolve/vector/VectorKernels.hpp @@ -1,6 +1,9 @@ #pragma once -namespace ReSolve { namespace vector { +namespace ReSolve +{ +namespace vector +{ /** * @brief Sets values of an array to a constant. @@ -8,10 +11,11 @@ namespace ReSolve { namespace vector { * @param[in] n - length of the array * @param[in] val - the value the array is set to * @param[out] arr - a pointer to the array - * + * * @pre `arr` is allocated to size `n` * @post `arr` elements are set to `val` */ -void set_array_const(index_type n, real_type val, real_type* arr); +void set_array_const(index_type n, real_type val, real_type *arr); -}} // namespace ReSolve::vector \ No newline at end of file +} // namespace vector +} // namespace ReSolve diff --git a/resolve/workspace/CMakeLists.txt b/resolve/workspace/CMakeLists.txt index a44f74f81..3830b8896 100644 --- a/resolve/workspace/CMakeLists.txt +++ b/resolve/workspace/CMakeLists.txt @@ -7,33 +7,29 @@ ]] # C++ code -set(ReSolve_Workspace_SRC - LinAlgWorkspaceCpu.cpp -) +set(ReSolve_Workspace_SRC LinAlgWorkspaceCpu.cpp) # C++ code that depends on CUDA SDK libraries -set(ReSolve_Workspace_CUDASDK_SRC - LinAlgWorkspaceCUDA.cpp -) +set(ReSolve_Workspace_CUDASDK_SRC LinAlgWorkspaceCUDA.cpp) -set(ReSolve_Workspace_ROCM_SRC - LinAlgWorkspaceHIP.cpp -) +set(ReSolve_Workspace_ROCM_SRC LinAlgWorkspaceHIP.cpp) set(ReSolve_Workspace_HEADER_INSTALL - LinAlgWorkspace.hpp - LinAlgWorkspaceCpu.hpp - LinAlgWorkspaceCUDA.hpp - LinAlgWorkspaceHIP.hpp + LinAlgWorkspace.hpp LinAlgWorkspaceCpu.hpp LinAlgWorkspaceCUDA.hpp + LinAlgWorkspaceHIP.hpp ) # If cuda is enabled, add CUDA SDK workspace files if(RESOLVE_USE_CUDA) - set(ReSolve_Workspace_SRC ${ReSolve_Workspace_SRC} ${ReSolve_Workspace_CUDASDK_SRC}) + set(ReSolve_Workspace_SRC ${ReSolve_Workspace_SRC} + ${ReSolve_Workspace_CUDASDK_SRC} + ) endif() if(RESOLVE_USE_HIP) - set(ReSolve_Workspace_SRC ${ReSolve_Workspace_SRC} ${ReSolve_Workspace_ROCM_SRC}) + set(ReSolve_Workspace_SRC ${ReSolve_Workspace_SRC} + ${ReSolve_Workspace_ROCM_SRC} + ) endif() add_library(resolve_workspace SHARED ${ReSolve_Workspace_SRC}) @@ -41,17 +37,19 @@ add_library(resolve_workspace SHARED ${ReSolve_Workspace_SRC}) # If CUDA is enabled, link to ReSolve CUDA backend if(RESOLVE_USE_CUDA) target_link_libraries(resolve_workspace PUBLIC resolve_backend_cuda) -endif(RESOLVE_USE_CUDA) +endif(RESOLVE_USE_CUDA) if(RESOLVE_USE_HIP) target_link_libraries(resolve_workspace PUBLIC resolve_backend_hip) -endif(RESOLVE_USE_HIP) +endif(RESOLVE_USE_HIP) -target_include_directories(resolve_workspace PUBLIC - $ - $ - $ +target_include_directories( + resolve_workspace + PUBLIC $ + $ $ ) # install include headers -install(FILES ${ReSolve_Workspace_HEADER_INSTALL} DESTINATION include/resolve/workspace) +install(FILES ${ReSolve_Workspace_HEADER_INSTALL} + DESTINATION include/resolve/workspace +) diff --git a/resolve/workspace/LinAlgWorkspace.hpp b/resolve/workspace/LinAlgWorkspace.hpp index 4efe834e5..f07060d3d 100644 --- a/resolve/workspace/LinAlgWorkspace.hpp +++ b/resolve/workspace/LinAlgWorkspace.hpp @@ -9,4 +9,3 @@ #ifdef RESOLVE_USE_HIP #include #endif - diff --git a/resolve/workspace/LinAlgWorkspaceCUDA.cpp b/resolve/workspace/LinAlgWorkspaceCUDA.cpp index 200ac4370..afdad78d4 100644 --- a/resolve/workspace/LinAlgWorkspaceCUDA.cpp +++ b/resolve/workspace/LinAlgWorkspaceCUDA.cpp @@ -2,111 +2,65 @@ namespace ReSolve { - LinAlgWorkspaceCUDA::LinAlgWorkspaceCUDA() - { - handle_cusolversp_ = nullptr; - handle_cusparse_ = nullptr; - handle_cublas_ = nullptr; - buffer_spmv_ = nullptr; - buffer_1norm_ = nullptr; - - matvec_setup_done_ = false; - } - - LinAlgWorkspaceCUDA::~LinAlgWorkspaceCUDA() - { - if (buffer_spmv_ != nullptr) mem_.deleteOnDevice(buffer_spmv_); - if (buffer_1norm_ != nullptr) mem_.deleteOnDevice(buffer_1norm_); - cusparseDestroy(handle_cusparse_); - cusolverSpDestroy(handle_cusolversp_); - cublasDestroy(handle_cublas_); - cusparseDestroySpMat(mat_A_); - } - - void* LinAlgWorkspaceCUDA::getSpmvBuffer() - { - return buffer_spmv_; - } - - void* LinAlgWorkspaceCUDA::getNormBuffer() - { - return buffer_1norm_; - } - - void LinAlgWorkspaceCUDA::setSpmvBuffer(void* buffer) - { - buffer_spmv_ = buffer; - } - - void LinAlgWorkspaceCUDA::setNormBuffer(void* buffer) - { - buffer_1norm_ = buffer; - } - - cusparseHandle_t LinAlgWorkspaceCUDA::getCusparseHandle() - { - return handle_cusparse_; - } - - void LinAlgWorkspaceCUDA::setCusparseHandle(cusparseHandle_t handle) - { - handle_cusparse_ = handle; - } - - cublasHandle_t LinAlgWorkspaceCUDA::getCublasHandle() - { - return handle_cublas_; - } - - void LinAlgWorkspaceCUDA::setCublasHandle(cublasHandle_t handle) - { - handle_cublas_ = handle; - } - - cusolverSpHandle_t LinAlgWorkspaceCUDA::getCusolverSpHandle() - { - return handle_cusolversp_; - } - - void LinAlgWorkspaceCUDA::setCusolverSpHandle(cusolverSpHandle_t handle) - { - handle_cusolversp_ = handle; - } - - cusparseSpMatDescr_t LinAlgWorkspaceCUDA::getSpmvMatrixDescriptor() - { - return mat_A_; - } - - void LinAlgWorkspaceCUDA::setSpmvMatrixDescriptor(cusparseSpMatDescr_t mat) - { - mat_A_ = mat; - } - - cusparseDnVecDescr_t LinAlgWorkspaceCUDA::getVecX() - { - return vec_x_; - } - - cusparseDnVecDescr_t LinAlgWorkspaceCUDA::getVecY() - { - return vec_y_; - } - - bool LinAlgWorkspaceCUDA::matvecSetup() - { - return matvec_setup_done_; - } - - void LinAlgWorkspaceCUDA::matvecSetupDone() - { - matvec_setup_done_ = true; - } - - void LinAlgWorkspaceCUDA::initializeHandles() - { - cusparseCreate(&handle_cusparse_); - cublasCreate(&handle_cublas_); - cusolverSpCreate(&handle_cusolversp_); - } +LinAlgWorkspaceCUDA::LinAlgWorkspaceCUDA() +{ + handle_cusolversp_ = nullptr; + handle_cusparse_ = nullptr; + handle_cublas_ = nullptr; + buffer_spmv_ = nullptr; + buffer_1norm_ = nullptr; + + matvec_setup_done_ = false; +} + +LinAlgWorkspaceCUDA::~LinAlgWorkspaceCUDA() +{ + if (buffer_spmv_ != nullptr) + mem_.deleteOnDevice(buffer_spmv_); + if (buffer_1norm_ != nullptr) + mem_.deleteOnDevice(buffer_1norm_); + cusparseDestroy(handle_cusparse_); + cusolverSpDestroy(handle_cusolversp_); + cublasDestroy(handle_cublas_); + cusparseDestroySpMat(mat_A_); +} + +void *LinAlgWorkspaceCUDA::getSpmvBuffer() { return buffer_spmv_; } + +void *LinAlgWorkspaceCUDA::getNormBuffer() { return buffer_1norm_; } + +void LinAlgWorkspaceCUDA::setSpmvBuffer(void *buffer) { buffer_spmv_ = buffer; } + +void LinAlgWorkspaceCUDA::setNormBuffer(void *buffer) { buffer_1norm_ = buffer; } + +cusparseHandle_t LinAlgWorkspaceCUDA::getCusparseHandle() { return handle_cusparse_; } + +void LinAlgWorkspaceCUDA::setCusparseHandle(cusparseHandle_t handle) { handle_cusparse_ = handle; } + +cublasHandle_t LinAlgWorkspaceCUDA::getCublasHandle() { return handle_cublas_; } + +void LinAlgWorkspaceCUDA::setCublasHandle(cublasHandle_t handle) { handle_cublas_ = handle; } + +cusolverSpHandle_t LinAlgWorkspaceCUDA::getCusolverSpHandle() { return handle_cusolversp_; } + +void LinAlgWorkspaceCUDA::setCusolverSpHandle(cusolverSpHandle_t handle) { handle_cusolversp_ = handle; } + +cusparseSpMatDescr_t LinAlgWorkspaceCUDA::getSpmvMatrixDescriptor() { return mat_A_; } + +void LinAlgWorkspaceCUDA::setSpmvMatrixDescriptor(cusparseSpMatDescr_t mat) { mat_A_ = mat; } + +cusparseDnVecDescr_t LinAlgWorkspaceCUDA::getVecX() { return vec_x_; } + +cusparseDnVecDescr_t LinAlgWorkspaceCUDA::getVecY() { return vec_y_; } + +bool LinAlgWorkspaceCUDA::matvecSetup() { return matvec_setup_done_; } + +void LinAlgWorkspaceCUDA::matvecSetupDone() { matvec_setup_done_ = true; } + +void LinAlgWorkspaceCUDA::initializeHandles() +{ + cusparseCreate(&handle_cusparse_); + cublasCreate(&handle_cublas_); + cusolverSpCreate(&handle_cusolversp_); +} } // namespace ReSolve diff --git a/resolve/workspace/LinAlgWorkspaceCUDA.hpp b/resolve/workspace/LinAlgWorkspaceCUDA.hpp index 5076563ac..89eb15f88 100644 --- a/resolve/workspace/LinAlgWorkspaceCUDA.hpp +++ b/resolve/workspace/LinAlgWorkspaceCUDA.hpp @@ -1,62 +1,62 @@ #pragma once #include "cublas_v2.h" -#include "cusparse.h" #include "cusolverSp.h" +#include "cusparse.h" #include namespace ReSolve { - class LinAlgWorkspaceCUDA - { - public: - LinAlgWorkspaceCUDA(); - ~LinAlgWorkspaceCUDA(); +class LinAlgWorkspaceCUDA +{ + public: + LinAlgWorkspaceCUDA(); + ~LinAlgWorkspaceCUDA(); - //accessors - void* getSpmvBuffer(); - void* getNormBuffer(); + // accessors + void *getSpmvBuffer(); + void *getNormBuffer(); - void setSpmvBuffer(void* buffer); - void setNormBuffer(void* buffer); + void setSpmvBuffer(void *buffer); + void setNormBuffer(void *buffer); - cublasHandle_t getCublasHandle(); - cusolverSpHandle_t getCusolverSpHandle(); //needed for 1-norms etc - cusparseHandle_t getCusparseHandle(); - cusparseSpMatDescr_t getSpmvMatrixDescriptor(); - cusparseDnVecDescr_t getVecX(); - cusparseDnVecDescr_t getVecY(); + cublasHandle_t getCublasHandle(); + cusolverSpHandle_t getCusolverSpHandle(); // needed for 1-norms etc + cusparseHandle_t getCusparseHandle(); + cusparseSpMatDescr_t getSpmvMatrixDescriptor(); + cusparseDnVecDescr_t getVecX(); + cusparseDnVecDescr_t getVecY(); - void setCublasHandle(cublasHandle_t handle); - void setCusolverSpHandle( cusolverSpHandle_t handle); - void setCusparseHandle(cusparseHandle_t handle); - void setSpmvMatrixDescriptor(cusparseSpMatDescr_t mat); + void setCublasHandle(cublasHandle_t handle); + void setCusolverSpHandle(cusolverSpHandle_t handle); + void setCusparseHandle(cusparseHandle_t handle); + void setSpmvMatrixDescriptor(cusparseSpMatDescr_t mat); - void initializeHandles(); + void initializeHandles(); - bool matvecSetup(); - void matvecSetupDone(); + bool matvecSetup(); + void matvecSetupDone(); - private: - //handles - cublasHandle_t handle_cublas_; - cusolverSpHandle_t handle_cusolversp_;//needed for 1-norm - cusparseHandle_t handle_cusparse_; + private: + // handles + cublasHandle_t handle_cublas_; + cusolverSpHandle_t handle_cusolversp_; // needed for 1-norm + cusparseHandle_t handle_cusparse_; - //matrix descriptors - cusparseSpMatDescr_t mat_A_; + // matrix descriptors + cusparseSpMatDescr_t mat_A_; - //vector descriptors - cusparseDnVecDescr_t vec_x_, vec_y_; + // vector descriptors + cusparseDnVecDescr_t vec_x_, vec_y_; - //buffers - void* buffer_spmv_; - void* buffer_1norm_; + // buffers + void *buffer_spmv_; + void *buffer_1norm_; - bool matvec_setup_done_; //check if setup is done for matvec i.e. if buffer is allocated, csr structure is set etc. + bool matvec_setup_done_; // check if setup is done for matvec i.e. if buffer is allocated, csr structure is set etc. - MemoryHandler mem_; - }; + MemoryHandler mem_; +}; } // namespace ReSolve diff --git a/resolve/workspace/LinAlgWorkspaceCpu.cpp b/resolve/workspace/LinAlgWorkspaceCpu.cpp index c0f252488..05709f59e 100644 --- a/resolve/workspace/LinAlgWorkspaceCpu.cpp +++ b/resolve/workspace/LinAlgWorkspaceCpu.cpp @@ -1,17 +1,11 @@ -#include #include "LinAlgWorkspaceCpu.hpp" +#include namespace ReSolve { - LinAlgWorkspaceCpu::LinAlgWorkspaceCpu() - { - } - - LinAlgWorkspaceCpu::~LinAlgWorkspaceCpu() - { - } +LinAlgWorkspaceCpu::LinAlgWorkspaceCpu() {} + +LinAlgWorkspaceCpu::~LinAlgWorkspaceCpu() {} - void LinAlgWorkspaceCpu::initializeHandles() - { - } -} +void LinAlgWorkspaceCpu::initializeHandles() {} +} // namespace ReSolve diff --git a/resolve/workspace/LinAlgWorkspaceCpu.hpp b/resolve/workspace/LinAlgWorkspaceCpu.hpp index 3c056b73c..050082913 100644 --- a/resolve/workspace/LinAlgWorkspaceCpu.hpp +++ b/resolve/workspace/LinAlgWorkspaceCpu.hpp @@ -1,18 +1,18 @@ #pragma once - #include namespace ReSolve { - class LinAlgWorkspaceCpu - { - public: - LinAlgWorkspaceCpu(); - ~LinAlgWorkspaceCpu(); - void initializeHandles(); - private: - // MemoryHandler mem_; ///< Memory handler not needed for now - }; +class LinAlgWorkspaceCpu +{ + public: + LinAlgWorkspaceCpu(); + ~LinAlgWorkspaceCpu(); + void initializeHandles(); + + private: + // MemoryHandler mem_; ///< Memory handler not needed for now +}; -} +} // namespace ReSolve diff --git a/resolve/workspace/LinAlgWorkspaceHIP.cpp b/resolve/workspace/LinAlgWorkspaceHIP.cpp index e64dff17f..00fa265a5 100644 --- a/resolve/workspace/LinAlgWorkspaceHIP.cpp +++ b/resolve/workspace/LinAlgWorkspaceHIP.cpp @@ -2,74 +2,44 @@ namespace ReSolve { - LinAlgWorkspaceHIP::LinAlgWorkspaceHIP() - { - handle_rocsparse_ = nullptr; - handle_rocblas_ = nullptr; +LinAlgWorkspaceHIP::LinAlgWorkspaceHIP() +{ + handle_rocsparse_ = nullptr; + handle_rocblas_ = nullptr; - matvec_setup_done_ = false; - } + matvec_setup_done_ = false; +} - LinAlgWorkspaceHIP::~LinAlgWorkspaceHIP() - { - rocsparse_destroy_handle(handle_rocsparse_); - rocblas_destroy_handle(handle_rocblas_); - rocsparse_destroy_mat_descr(mat_A_); - } +LinAlgWorkspaceHIP::~LinAlgWorkspaceHIP() +{ + rocsparse_destroy_handle(handle_rocsparse_); + rocblas_destroy_handle(handle_rocblas_); + rocsparse_destroy_mat_descr(mat_A_); +} - rocsparse_handle LinAlgWorkspaceHIP::getRocsparseHandle() - { - return handle_rocsparse_; - } +rocsparse_handle LinAlgWorkspaceHIP::getRocsparseHandle() { return handle_rocsparse_; } - void LinAlgWorkspaceHIP::setRocsparseHandle(rocsparse_handle handle) - { - handle_rocsparse_ = handle; - } +void LinAlgWorkspaceHIP::setRocsparseHandle(rocsparse_handle handle) { handle_rocsparse_ = handle; } - rocblas_handle LinAlgWorkspaceHIP::getRocblasHandle() - { - return handle_rocblas_; - } +rocblas_handle LinAlgWorkspaceHIP::getRocblasHandle() { return handle_rocblas_; } - void LinAlgWorkspaceHIP::setRocblasHandle(rocblas_handle handle) - { - handle_rocblas_ = handle; - } +void LinAlgWorkspaceHIP::setRocblasHandle(rocblas_handle handle) { handle_rocblas_ = handle; } - rocsparse_mat_descr LinAlgWorkspaceHIP::getSpmvMatrixDescriptor() - { - return mat_A_; - } +rocsparse_mat_descr LinAlgWorkspaceHIP::getSpmvMatrixDescriptor() { return mat_A_; } - void LinAlgWorkspaceHIP::setSpmvMatrixDescriptor(rocsparse_mat_descr mat) - { - mat_A_ = mat; - } +void LinAlgWorkspaceHIP::setSpmvMatrixDescriptor(rocsparse_mat_descr mat) { mat_A_ = mat; } - rocsparse_mat_info LinAlgWorkspaceHIP::getSpmvMatrixInfo() - { - return info_A_; - } +rocsparse_mat_info LinAlgWorkspaceHIP::getSpmvMatrixInfo() { return info_A_; } - void LinAlgWorkspaceHIP::setSpmvMatrixInfo(rocsparse_mat_info info) - { - info_A_ = info; - } +void LinAlgWorkspaceHIP::setSpmvMatrixInfo(rocsparse_mat_info info) { info_A_ = info; } - bool LinAlgWorkspaceHIP::matvecSetup() - { - return matvec_setup_done_; - } +bool LinAlgWorkspaceHIP::matvecSetup() { return matvec_setup_done_; } - void LinAlgWorkspaceHIP::matvecSetupDone() - { - matvec_setup_done_ = true; - } +void LinAlgWorkspaceHIP::matvecSetupDone() { matvec_setup_done_ = true; } - void LinAlgWorkspaceHIP::initializeHandles() - { - rocsparse_create_handle(&handle_rocsparse_); - rocblas_create_handle(&handle_rocblas_); - } - } // namespace ReSolve +void LinAlgWorkspaceHIP::initializeHandles() +{ + rocsparse_create_handle(&handle_rocsparse_); + rocblas_create_handle(&handle_rocblas_); +} +} // namespace ReSolve diff --git a/resolve/workspace/LinAlgWorkspaceHIP.hpp b/resolve/workspace/LinAlgWorkspaceHIP.hpp index abdc3e416..52c8e042a 100644 --- a/resolve/workspace/LinAlgWorkspaceHIP.hpp +++ b/resolve/workspace/LinAlgWorkspaceHIP.hpp @@ -1,52 +1,52 @@ #pragma once -#include -#include #include +#include +#include #include namespace ReSolve { - class LinAlgWorkspaceHIP - { - public: - LinAlgWorkspaceHIP(); - ~LinAlgWorkspaceHIP(); +class LinAlgWorkspaceHIP +{ + public: + LinAlgWorkspaceHIP(); + ~LinAlgWorkspaceHIP(); - rocblas_handle getRocblasHandle(); - rocsparse_handle getRocsparseHandle(); - rocsparse_mat_descr getSpmvMatrixDescriptor(); - rocsparse_mat_info getSpmvMatrixInfo(); + rocblas_handle getRocblasHandle(); + rocsparse_handle getRocsparseHandle(); + rocsparse_mat_descr getSpmvMatrixDescriptor(); + rocsparse_mat_info getSpmvMatrixInfo(); - void setRocblasHandle(rocblas_handle handle); - void setRocsparseHandle(rocsparse_handle handle); - void setSpmvMatrixDescriptor(rocsparse_mat_descr mat); - void setSpmvMatrixInfo(rocsparse_mat_info info); + void setRocblasHandle(rocblas_handle handle); + void setRocsparseHandle(rocsparse_handle handle); + void setSpmvMatrixDescriptor(rocsparse_mat_descr mat); + void setSpmvMatrixInfo(rocsparse_mat_info info); - void initializeHandles(); + void initializeHandles(); - bool matvecSetup(); - void matvecSetupDone(); + bool matvecSetup(); + void matvecSetupDone(); - private: - //handles - rocblas_handle handle_rocblas_; - rocsparse_handle handle_rocsparse_; + private: + // handles + rocblas_handle handle_rocblas_; + rocsparse_handle handle_rocsparse_; - //matrix descriptors - rocsparse_mat_descr mat_A_; + // matrix descriptors + rocsparse_mat_descr mat_A_; - //vector descriptors not needed, rocsparse uses RAW pointers. + // vector descriptors not needed, rocsparse uses RAW pointers. - //buffers - // there is no buffer needed in matvec - bool matvec_setup_done_; //check if setup is done for matvec (note: no buffer but there is analysis) + // buffers + // there is no buffer needed in matvec + bool matvec_setup_done_; // check if setup is done for matvec (note: no buffer but there is analysis) - //info - but we need info - rocsparse_mat_info info_A_; + // info - but we need info + rocsparse_mat_info info_A_; - // MemoryHandler mem_; ///< Memory handler not needed for now - }; + // MemoryHandler mem_; ///< Memory handler not needed for now +}; } // namespace ReSolve diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 19b799e4f..b15b3bee6 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -6,6 +6,5 @@ ]] -add_subdirectory(functionality) +add_subdirectory(functionality) add_subdirectory(unit) - diff --git a/tests/README.md b/tests/README.md index c11c5aa66..9453eaf8e 100644 --- a/tests/README.md +++ b/tests/README.md @@ -3,4 +3,4 @@ CI is ran per every merge request that makes sure ReSolve can be consumed as a p If you follow the [developer guidelines](CONTRIBUTING.md) for building resolve and run make test you will see ReSolve consumed and linked with an example test in Test #1 (resolve_Consume). -This ReSolve Consume test is executed via a cmake test that exectutes test.sh. This shell script then goes through the cmake build process to ensure that ReSolve can be built from scratch and linked to another cmake project. \ No newline at end of file +This ReSolve Consume test is executed via a cmake test that exectutes test.sh. This shell script then goes through the cmake build process to ensure that ReSolve can be built from scratch and linked to another cmake project. diff --git a/tests/functionality/CMakeLists.txt b/tests/functionality/CMakeLists.txt index 85b47fd72..fb2331a55 100644 --- a/tests/functionality/CMakeLists.txt +++ b/tests/functionality/CMakeLists.txt @@ -11,7 +11,7 @@ add_executable(klu_klu_test.exe testKLU.cpp) target_link_libraries(klu_klu_test.exe PRIVATE ReSolve) if(RESOLVE_USE_CUDA) - + # Build KLU+Rf test add_executable(klu_rf_test.exe testKLU_Rf.cpp) target_link_libraries(klu_rf_test.exe PRIVATE ReSolve) @@ -26,9 +26,8 @@ if(RESOLVE_USE_CUDA) endif(RESOLVE_USE_CUDA) - if(RESOLVE_USE_HIP) - + # Build KLU+rossolver test add_executable(rocsolver_rf_test.exe testKLU_RocSolver.cpp) target_link_libraries(rocsolver_rf_test.exe PRIVATE ReSolve) @@ -43,34 +42,47 @@ endif(RESOLVE_USE_HIP) set(installable_tests klu_klu_test.exe) if(RESOLVE_USE_CUDA) - set(installable_tests ${installable_tests} - klu_rf_test.exe - klu_rf_fgmres_test.exe - klu_glu_test.exe) + set(installable_tests ${installable_tests} klu_rf_test.exe + klu_rf_fgmres_test.exe klu_glu_test.exe + ) endif(RESOLVE_USE_CUDA) if(RESOLVE_USE_HIP) - set(installable_tests ${installable_tests} - rocsolver_rf_test.exe - rocsolver_rf_fgmres_test.exe) + set(installable_tests ${installable_tests} rocsolver_rf_test.exe + rocsolver_rf_fgmres_test.exe + ) endif(RESOLVE_USE_HIP) -install(TARGETS ${installable_tests} - RUNTIME DESTINATION bin/resolve/tests/functionality) +install(TARGETS ${installable_tests} + RUNTIME DESTINATION bin/resolve/tests/functionality +) # Install directory with data files install(DIRECTORY data DESTINATION bin/resolve/tests/functionality) set(test_data_dir ${CMAKE_SOURCE_DIR}/tests/functionality/) -add_test(NAME klu_klu_test COMMAND $ "${test_data_dir}") +add_test(NAME klu_klu_test COMMAND $ + "${test_data_dir}" +) if(RESOLVE_USE_CUDA) - add_test(NAME klu_rf_test COMMAND $ "${test_data_dir}") - add_test(NAME klu_rf_fgmres_test COMMAND $ "${test_data_dir}") - add_test(NAME klu_glu_test COMMAND $ "${test_data_dir}") + add_test(NAME klu_rf_test COMMAND $ + "${test_data_dir}" + ) + add_test(NAME klu_rf_fgmres_test COMMAND $ + "${test_data_dir}" + ) + add_test(NAME klu_glu_test COMMAND $ + "${test_data_dir}" + ) endif(RESOLVE_USE_CUDA) if(RESOLVE_USE_HIP) - add_test(NAME rocsolver_rf_test COMMAND $ "${test_data_dir}") - add_test(NAME rocsolver_rf_fgmres_test COMMAND $ "${test_data_dir}") + add_test(NAME rocsolver_rf_test COMMAND $ + "${test_data_dir}" + ) + add_test(NAME rocsolver_rf_fgmres_test + COMMAND $ + "${test_data_dir}" + ) endif(RESOLVE_USE_HIP) diff --git a/tests/functionality/testKLU.cpp b/tests/functionality/testKLU.cpp index 083c11d1a..882ff154d 100644 --- a/tests/functionality/testKLU.cpp +++ b/tests/functionality/testKLU.cpp @@ -1,42 +1,41 @@ -#include -#include -#include #include +#include +#include +#include -#include -#include +#include #include #include #include +#include +#include #include -#include #include -//author: KS -//functionality test to check whether KLU works correctly. +// author: KS +// functionality test to check whether KLU works correctly. using namespace ReSolve::constants; int main(int argc, char *argv[]) { // Use ReSolve data types. - using real_type = ReSolve::real_type; + using real_type = ReSolve::real_type; using vector_type = ReSolve::vector::Vector; - //we want error sum to be 0 at the end - //that means PASS. - //otheriwse it is a FAIL. + // we want error sum to be 0 at the end + // that means PASS. + // otheriwse it is a FAIL. int error_sum = 0; int status = 0; - ReSolve::LinAlgWorkspaceCpu* workspace = new ReSolve::LinAlgWorkspaceCpu(); - ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace); - ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace); - ReSolve::LinSolverDirectKLU* KLU = new ReSolve::LinSolverDirectKLU; + ReSolve::LinAlgWorkspaceCpu *workspace = new ReSolve::LinAlgWorkspaceCpu(); + ReSolve::MatrixHandler *matrix_handler = new ReSolve::MatrixHandler(workspace); + ReSolve::VectorHandler *vector_handler = new ReSolve::VectorHandler(workspace); + ReSolve::LinSolverDirectKLU *KLU = new ReSolve::LinSolverDirectKLU; KLU->setupParameters(1, 0.1, false); // Input to this code is location of `data` directory where matrix files are stored const std::string data_path = (argc == 2) ? argv[1] : "./"; - std::string matrixFileName1 = data_path + "data/matrix_ACTIVSg200_AC_10.mtx"; std::string matrixFileName2 = data_path + "data/matrix_ACTIVSg200_AC_11.mtx"; @@ -45,31 +44,26 @@ int main(int argc, char *argv[]) // Read first matrix std::ifstream mat1(matrixFileName1); - if(!mat1.is_open()) - { + if (!mat1.is_open()) { std::cout << "Failed to open file " << matrixFileName1 << "\n"; return -1; } - ReSolve::matrix::Coo* A_coo = ReSolve::io::readMatrixFromFile(mat1); - ReSolve::matrix::Csr* A = new ReSolve::matrix::Csr(A_coo->getNumRows(), - A_coo->getNumColumns(), - A_coo->getNnz(), - A_coo->symmetric(), - A_coo->expanded()); + ReSolve::matrix::Coo *A_coo = ReSolve::io::readMatrixFromFile(mat1); + ReSolve::matrix::Csr *A = + new ReSolve::matrix::Csr(A_coo->getNumRows(), A_coo->getNumColumns(), A_coo->getNnz(), A_coo->symmetric(), A_coo->expanded()); mat1.close(); // Read first rhs vector std::ifstream rhs1_file(rhsFileName1); - if(!rhs1_file.is_open()) - { + if (!rhs1_file.is_open()) { std::cout << "Failed to open file " << rhsFileName1 << "\n"; return -1; } - real_type* rhs = ReSolve::io::readRhsFromFile(rhs1_file); - real_type* x = new real_type[A->getNumRows()]; - vector_type* vec_rhs = new vector_type(A->getNumRows()); - vector_type* vec_x = new vector_type(A->getNumRows()); - vector_type* vec_r = new vector_type(A->getNumRows()); + real_type *rhs = ReSolve::io::readRhsFromFile(rhs1_file); + real_type *x = new real_type[A->getNumRows()]; + vector_type *vec_rhs = new vector_type(A->getNumRows()); + vector_type *vec_x = new vector_type(A->getNumRows()); + vector_type *vec_r = new vector_type(A->getNumRows()); rhs1_file.close(); // Convert first matrix to CSR format @@ -90,13 +84,12 @@ int main(int argc, char *argv[]) status = KLU->solve(vec_rhs, vec_x); error_sum += status; - - vector_type* vec_test; - vector_type* vec_diff; - vec_test = new vector_type(A->getNumRows()); - vec_diff = new vector_type(A->getNumRows()); - real_type* x_data = new real_type[A->getNumRows()]; - for (int i=0; igetNumRows(); ++i){ + vector_type *vec_test; + vector_type *vec_diff; + vec_test = new vector_type(A->getNumRows()); + vec_diff = new vector_type(A->getNumRows()); + real_type *x_data = new real_type[A->getNumRows()]; + for (int i = 0; i < A->getNumRows(); ++i) { x_data[i] = 1.0; } @@ -106,48 +99,46 @@ int main(int argc, char *argv[]) // real_type normXmatrix1 = sqrt(vector_handler->dot(vec_test, vec_test, ReSolve::memory::HOST)); matrix_handler->setValuesChanged(true, "cpu"); - status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr","cpu"); + status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "cpu"); error_sum += status; - + real_type normRmatrix1 = sqrt(vector_handler->dot(vec_r, vec_r, "cpu")); + // for testing only - control - //for testing only - control - real_type normXtrue = sqrt(vector_handler->dot(vec_x, vec_x, "cpu")); real_type normB1 = sqrt(vector_handler->dot(vec_rhs, vec_rhs, "cpu")); - - //compute x-x_true + + // compute x-x_true vector_handler->axpy(&MINUSONE, vec_x, vec_diff, "cpu"); - //evaluate its norm + // evaluate its norm real_type normDiffMatrix1 = sqrt(vector_handler->dot(vec_diff, vec_diff, "cpu")); - - //compute the residual using exact solution + + // compute the residual using exact solution vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); - status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE,"csr", "cpu"); + status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE, "csr", "cpu"); error_sum += status; real_type exactSol_normRmatrix1 = sqrt(vector_handler->dot(vec_r, vec_r, "cpu")); - //evaluate the residual ON THE CPU using COMPUTED solution - + // evaluate the residual ON THE CPU using COMPUTED solution + vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); - status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "cpu"); + status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "cpu"); error_sum += status; - + real_type normRmatrix1CPU = sqrt(vector_handler->dot(vec_r, vec_r, "cpu")); - - std::cout<<"Results (first matrix): "<update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); // and solve it too - status = KLU->refactorize(); + status = KLU->refactorize(); error_sum += status; status = KLU->solve(vec_rhs, vec_x); @@ -177,46 +167,45 @@ int main(int argc, char *argv[]) vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); matrix_handler->setValuesChanged(true, "cpu"); - status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "cpu"); + status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "cpu"); error_sum += status; real_type normRmatrix2 = sqrt(vector_handler->dot(vec_r, vec_r, "cpu")); - - //for testing only - control + + // for testing only - control real_type normB2 = sqrt(vector_handler->dot(vec_rhs, vec_rhs, "cpu")); - //compute x-x_true + // compute x-x_true vec_diff->update(x_data, ReSolve::memory::HOST, ReSolve::memory::HOST); vector_handler->axpy(&MINUSONE, vec_x, vec_diff, "cpu"); - //evaluate its norm + // evaluate its norm real_type normDiffMatrix2 = sqrt(vector_handler->dot(vec_diff, vec_diff, "cpu")); - - //compute the residual using exact solution + + // compute the residual using exact solution vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); - status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE, "csr", "cpu"); + status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE, "csr", "cpu"); error_sum += status; real_type exactSol_normRmatrix2 = sqrt(vector_handler->dot(vec_r, vec_r, "cpu")); - - std::cout<<"Results (second matrix): "< -#include #include +#include +#include -#include -#include +#include +#include #include #include #include +#include +#include #include -#include -#include #include -//author: KS -//functionality test to check whether cuSolverGLU works correctly. +// author: KS +// functionality test to check whether cuSolverGLU works correctly. using namespace ReSolve::constants; @@ -20,29 +20,28 @@ int main(int argc, char *argv[]) { // Use ReSolve data types. using index_type = ReSolve::index_type; - using real_type = ReSolve::real_type; + using real_type = ReSolve::real_type; using vector_type = ReSolve::vector::Vector; using matrix_type = ReSolve::matrix::Sparse; - //we want error sum to be 0 at the end - //that means PASS. - //otheriwse it is a FAIL. + // we want error sum to be 0 at the end + // that means PASS. + // otheriwse it is a FAIL. int error_sum = 0; int status = 0; - ReSolve::LinAlgWorkspaceCUDA* workspace_CUDA = new ReSolve::LinAlgWorkspaceCUDA(); + ReSolve::LinAlgWorkspaceCUDA *workspace_CUDA = new ReSolve::LinAlgWorkspaceCUDA(); workspace_CUDA->initializeHandles(); - ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace_CUDA); - ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace_CUDA); + ReSolve::MatrixHandler *matrix_handler = new ReSolve::MatrixHandler(workspace_CUDA); + ReSolve::VectorHandler *vector_handler = new ReSolve::VectorHandler(workspace_CUDA); - ReSolve::LinSolverDirectKLU* KLU = new ReSolve::LinSolverDirectKLU; + ReSolve::LinSolverDirectKLU *KLU = new ReSolve::LinSolverDirectKLU; KLU->setupParameters(1, 0.1, false); - - ReSolve::LinSolverDirectCuSolverGLU* GLU = new ReSolve::LinSolverDirectCuSolverGLU(workspace_CUDA); + + ReSolve::LinSolverDirectCuSolverGLU *GLU = new ReSolve::LinSolverDirectCuSolverGLU(workspace_CUDA); // Input to this code is location of `data` directory where matrix files are stored const std::string data_path = (argc == 2) ? argv[1] : "./"; - std::string matrixFileName1 = data_path + "data/matrix_ACTIVSg200_AC_10.mtx"; std::string matrixFileName2 = data_path + "data/matrix_ACTIVSg200_AC_11.mtx"; @@ -51,33 +50,28 @@ int main(int argc, char *argv[]) // Read first matrix std::ifstream mat1(matrixFileName1); - if(!mat1.is_open()) - { + if (!mat1.is_open()) { std::cout << "Failed to open file " << matrixFileName1 << "\n"; return -1; } - ReSolve::matrix::Coo* A_coo = ReSolve::io::readMatrixFromFile(mat1); - ReSolve::matrix::Csr* A = new ReSolve::matrix::Csr(A_coo->getNumRows(), - A_coo->getNumColumns(), - A_coo->getNnz(), - A_coo->symmetric(), - A_coo->expanded()); + ReSolve::matrix::Coo *A_coo = ReSolve::io::readMatrixFromFile(mat1); + ReSolve::matrix::Csr *A = + new ReSolve::matrix::Csr(A_coo->getNumRows(), A_coo->getNumColumns(), A_coo->getNnz(), A_coo->symmetric(), A_coo->expanded()); mat1.close(); // Read first rhs vector std::ifstream rhs1_file(rhsFileName1); - if(!rhs1_file.is_open()) - { + if (!rhs1_file.is_open()) { std::cout << "Failed to open file " << rhsFileName1 << "\n"; return -1; } - real_type* rhs = ReSolve::io::readRhsFromFile(rhs1_file); - real_type* x = new real_type[A->getNumRows()]; - vector_type* vec_rhs = new vector_type(A->getNumRows()); - vector_type* vec_x = new vector_type(A->getNumRows()); - vec_x->allocate(ReSolve::memory::HOST);//for KLU + real_type *rhs = ReSolve::io::readRhsFromFile(rhs1_file); + real_type *x = new real_type[A->getNumRows()]; + vector_type *vec_rhs = new vector_type(A->getNumRows()); + vector_type *vec_x = new vector_type(A->getNumRows()); + vec_x->allocate(ReSolve::memory::HOST); // for KLU vec_x->allocate(ReSolve::memory::DEVICE); - vector_type* vec_r = new vector_type(A->getNumRows()); + vector_type *vec_r = new vector_type(A->getNumRows()); rhs1_file.close(); // Convert first matrix to CSR format @@ -95,30 +89,29 @@ int main(int argc, char *argv[]) status = KLU->factorize(); error_sum += status; -// but DO NOT SOLVE with KLU! - + // but DO NOT SOLVE with KLU! - matrix_type* L = KLU->getLFactor(); - matrix_type* U = KLU->getUFactor(); - if (L == nullptr) {printf("ERROR");} - index_type* P = KLU->getPOrdering(); - index_type* Q = KLU->getQOrdering(); - status = GLU->setup(A, L, U, P, Q); + matrix_type *L = KLU->getLFactor(); + matrix_type *U = KLU->getUFactor(); + if (L == nullptr) { + printf("ERROR"); + } + index_type *P = KLU->getPOrdering(); + index_type *Q = KLU->getQOrdering(); + status = GLU->setup(A, L, U, P, Q); error_sum += status; - std::cout<<"GLU setup status: "<update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); status = GLU->solve(vec_rhs, vec_x); error_sum += status; - std::cout<<"GLU solve status: "<getNumRows()); - vec_diff = new vector_type(A->getNumRows()); - real_type* x_data = new real_type[A->getNumRows()]; - for (int i=0; igetNumRows(); ++i){ + std::cout << "GLU solve status: " << status << std::endl; + + vector_type *vec_test; + vector_type *vec_diff; + vec_test = new vector_type(A->getNumRows()); + vec_diff = new vector_type(A->getNumRows()); + real_type *x_data = new real_type[A->getNumRows()]; + for (int i = 0; i < A->getNumRows(); ++i) { x_data[i] = 1.0; } @@ -128,49 +121,46 @@ int main(int argc, char *argv[]) // real_type normXmatrix1 = sqrt(vector_handler->dot(vec_test, vec_test, "cuda")); matrix_handler->setValuesChanged(true, "cuda"); - status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr","cuda"); + status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "cuda"); error_sum += status; - + real_type normRmatrix1 = sqrt(vector_handler->dot(vec_r, vec_r, "cuda")); + // for testing only - control - //for testing only - control - real_type normXtrue = sqrt(vector_handler->dot(vec_x, vec_x, "cuda")); real_type normB1 = sqrt(vector_handler->dot(vec_rhs, vec_rhs, "cuda")); - - //compute x-x_true + + // compute x-x_true vector_handler->axpy(&MINUSONE, vec_x, vec_diff, "cuda"); - //evaluate its norm + // evaluate its norm real_type normDiffMatrix1 = sqrt(vector_handler->dot(vec_diff, vec_diff, "cuda")); - - //compute the residual using exact solution + + // compute the residual using exact solution vec_x->update(vec_x->getData(ReSolve::memory::DEVICE), ReSolve::memory::DEVICE, ReSolve::memory::HOST); - status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE,"csr", "cuda"); + status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE, "csr", "cuda"); error_sum += status; real_type exactSol_normRmatrix1 = sqrt(vector_handler->dot(vec_r, vec_r, "cuda")); - //evaluate the residual ON THE CPU using COMPUTED solution - + // evaluate the residual ON THE CPU using COMPUTED solution + vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); - status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "cpu"); + status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "cpu"); error_sum += status; - + real_type normRmatrix1CPU = sqrt(vector_handler->dot(vec_r, vec_r, "cuda")); - - std::cout<<"Results (first matrix): "<refactorize(); error_sum += status; - std::cout<<"CUSOLVER GLU refactorization status: "<solve(vec_rhs, vec_x); error_sum += status; - vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - matrix_handler->setValuesChanged(true, "cuda"); + vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); + matrix_handler->setValuesChanged(true, "cuda"); - status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "cuda"); + status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "cuda"); error_sum += status; real_type normRmatrix2 = sqrt(vector_handler->dot(vec_r, vec_r, "cuda")); - - //for testing only - control + + // for testing only - control real_type normB2 = sqrt(vector_handler->dot(vec_rhs, vec_rhs, "cuda")); - //compute x-x_true + // compute x-x_true vec_diff->update(x_data, ReSolve::memory::HOST, ReSolve::memory::DEVICE); vector_handler->axpy(&MINUSONE, vec_x, vec_diff, "cuda"); - //evaluate its norm + // evaluate its norm real_type normDiffMatrix2 = sqrt(vector_handler->dot(vec_diff, vec_diff, "cuda")); - - //compute the residual using exact solution + + // compute the residual using exact solution vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE, "csr", "cuda"); + status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE, "csr", "cuda"); error_sum += status; real_type exactSol_normRmatrix2 = sqrt(vector_handler->dot(vec_r, vec_r, "cuda")); - - std::cout<<"Results (second matrix): "< -#include #include +#include +#include -#include -#include +#include +#include #include -#include #include +#include #include +#include +#include #include -#include -#include #include -//author: KS -//functionality test to check whether cuSolverRf works correctly. +// author: KS +// functionality test to check whether cuSolverRf works correctly. using namespace ReSolve::constants; @@ -21,28 +21,27 @@ int main(int argc, char *argv[]) { // Use ReSolve data types. using index_type = ReSolve::index_type; - using real_type = ReSolve::real_type; + using real_type = ReSolve::real_type; using vector_type = ReSolve::vector::Vector; - //we want error sum to be 0 at the end - //that means PASS. - //otheriwse it is a FAIL. + // we want error sum to be 0 at the end + // that means PASS. + // otheriwse it is a FAIL. int error_sum = 0; int status = 0; - ReSolve::LinAlgWorkspaceCUDA* workspace_CUDA = new ReSolve::LinAlgWorkspaceCUDA(); + ReSolve::LinAlgWorkspaceCUDA *workspace_CUDA = new ReSolve::LinAlgWorkspaceCUDA(); workspace_CUDA->initializeHandles(); - ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace_CUDA); - ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace_CUDA); + ReSolve::MatrixHandler *matrix_handler = new ReSolve::MatrixHandler(workspace_CUDA); + ReSolve::VectorHandler *vector_handler = new ReSolve::VectorHandler(workspace_CUDA); - ReSolve::LinSolverDirectKLU* KLU = new ReSolve::LinSolverDirectKLU; + ReSolve::LinSolverDirectKLU *KLU = new ReSolve::LinSolverDirectKLU; KLU->setupParameters(1, 0.1, false); - - ReSolve::LinSolverDirectCuSolverRf* Rf = new ReSolve::LinSolverDirectCuSolverRf; + + ReSolve::LinSolverDirectCuSolverRf *Rf = new ReSolve::LinSolverDirectCuSolverRf; // Input to this code is location of `data` directory where matrix files are stored const std::string data_path = (argc == 2) ? argv[1] : "./"; - std::string matrixFileName1 = data_path + "data/matrix_ACTIVSg200_AC_10.mtx"; std::string matrixFileName2 = data_path + "data/matrix_ACTIVSg200_AC_11.mtx"; @@ -51,31 +50,26 @@ int main(int argc, char *argv[]) // Read first matrix std::ifstream mat1(matrixFileName1); - if(!mat1.is_open()) - { + if (!mat1.is_open()) { std::cout << "Failed to open file " << matrixFileName1 << "\n"; return -1; } - ReSolve::matrix::Coo* A_coo = ReSolve::io::readMatrixFromFile(mat1); - ReSolve::matrix::Csr* A = new ReSolve::matrix::Csr(A_coo->getNumRows(), - A_coo->getNumColumns(), - A_coo->getNnz(), - A_coo->symmetric(), - A_coo->expanded()); + ReSolve::matrix::Coo *A_coo = ReSolve::io::readMatrixFromFile(mat1); + ReSolve::matrix::Csr *A = + new ReSolve::matrix::Csr(A_coo->getNumRows(), A_coo->getNumColumns(), A_coo->getNnz(), A_coo->symmetric(), A_coo->expanded()); mat1.close(); // Read first rhs vector std::ifstream rhs1_file(rhsFileName1); - if(!rhs1_file.is_open()) - { + if (!rhs1_file.is_open()) { std::cout << "Failed to open file " << rhsFileName1 << "\n"; return -1; } - real_type* rhs = ReSolve::io::readRhsFromFile(rhs1_file); - real_type* x = new real_type[A->getNumRows()]; - vector_type* vec_rhs = new vector_type(A->getNumRows()); - vector_type* vec_x = new vector_type(A->getNumRows()); - vector_type* vec_r = new vector_type(A->getNumRows()); + real_type *rhs = ReSolve::io::readRhsFromFile(rhs1_file); + real_type *x = new real_type[A->getNumRows()]; + vector_type *vec_rhs = new vector_type(A->getNumRows()); + vector_type *vec_x = new vector_type(A->getNumRows()); + vector_type *vec_r = new vector_type(A->getNumRows()); rhs1_file.close(); // Convert first matrix to CSR format @@ -96,13 +90,12 @@ int main(int argc, char *argv[]) status = KLU->solve(vec_rhs, vec_x); error_sum += status; - - vector_type* vec_test; - vector_type* vec_diff; - vec_test = new vector_type(A->getNumRows()); - vec_diff = new vector_type(A->getNumRows()); - real_type* x_data = new real_type[A->getNumRows()]; - for (int i=0; igetNumRows(); ++i){ + vector_type *vec_test; + vector_type *vec_diff; + vec_test = new vector_type(A->getNumRows()); + vec_diff = new vector_type(A->getNumRows()); + real_type *x_data = new real_type[A->getNumRows()]; + for (int i = 0; i < A->getNumRows(); ++i) { x_data[i] = 1.0; } @@ -112,63 +105,61 @@ int main(int argc, char *argv[]) // real_type normXmatrix1 = sqrt(vector_handler->dot(vec_test, vec_test, "cuda")); matrix_handler->setValuesChanged(true, "cuda"); - status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr","cuda"); + status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "cuda"); error_sum += status; - + real_type normRmatrix1 = sqrt(vector_handler->dot(vec_r, vec_r, "cuda")); + // for testing only - control - //for testing only - control - real_type normXtrue = sqrt(vector_handler->dot(vec_x, vec_x, "cuda")); real_type normB1 = sqrt(vector_handler->dot(vec_rhs, vec_rhs, "cuda")); - - //compute x-x_true + + // compute x-x_true vector_handler->axpy(&MINUSONE, vec_x, vec_diff, "cuda"); - //evaluate its norm + // evaluate its norm real_type normDiffMatrix1 = sqrt(vector_handler->dot(vec_diff, vec_diff, "cuda")); - - //compute the residual using exact solution + + // compute the residual using exact solution vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE,"csr", "cuda"); + status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE, "csr", "cuda"); error_sum += status; real_type exactSol_normRmatrix1 = sqrt(vector_handler->dot(vec_r, vec_r, "cuda")); - //evaluate the residual ON THE CPU using COMPUTED solution - + // evaluate the residual ON THE CPU using COMPUTED solution + vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); - status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "cpu"); + status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "cpu"); error_sum += status; - + real_type normRmatrix1CPU = sqrt(vector_handler->dot(vec_r, vec_r, "cuda")); - - std::cout<<"Results (first matrix): "<getLFactor(); - ReSolve::matrix::Csc* U_csc = (ReSolve::matrix::Csc*) KLU->getUFactor(); - ReSolve::matrix::Csr* L = new ReSolve::matrix::Csr(L_csc->getNumRows(), L_csc->getNumColumns(), L_csc->getNnz()); - ReSolve::matrix::Csr* U = new ReSolve::matrix::Csr(U_csc->getNumRows(), U_csc->getNumColumns(), U_csc->getNnz()); - error_sum += matrix_handler->csc2csr(L_csc,L, "cuda"); - error_sum += matrix_handler->csc2csr(U_csc,U, "cuda"); + + ReSolve::matrix::Csc *L_csc = (ReSolve::matrix::Csc *)KLU->getLFactor(); + ReSolve::matrix::Csc *U_csc = (ReSolve::matrix::Csc *)KLU->getUFactor(); + ReSolve::matrix::Csr *L = new ReSolve::matrix::Csr(L_csc->getNumRows(), L_csc->getNumColumns(), L_csc->getNnz()); + ReSolve::matrix::Csr *U = new ReSolve::matrix::Csr(U_csc->getNumRows(), U_csc->getNumColumns(), U_csc->getNnz()); + error_sum += matrix_handler->csc2csr(L_csc, L, "cuda"); + error_sum += matrix_handler->csc2csr(U_csc, U, "cuda"); if (L == nullptr) { std::cout << "ERROR!\n"; } - index_type* P = KLU->getPOrdering(); - index_type* Q = KLU->getQOrdering(); - error_sum += Rf->setup(A, L, U, P, Q); + index_type *P = KLU->getPOrdering(); + index_type *Q = KLU->getQOrdering(); + error_sum += Rf->setup(A, L, U, P, Q); // Load the second matrix std::ifstream mat2(matrixFileName2); - if(!mat2.is_open()) - { + if (!mat2.is_open()) { std::cout << "Failed to open file " << matrixFileName2 << "\n"; return -1; } @@ -177,8 +168,7 @@ int main(int argc, char *argv[]) // Load the second rhs vector std::ifstream rhs2_file(rhsFileName2); - if(!rhs2_file.is_open()) - { + if (!rhs2_file.is_open()) { std::cout << "Failed to open file " << rhsFileName2 << "\n"; return -1; } @@ -197,42 +187,41 @@ int main(int argc, char *argv[]) vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); matrix_handler->setValuesChanged(true, "cuda"); - status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "cuda"); + status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "cuda"); error_sum += status; real_type normRmatrix2 = sqrt(vector_handler->dot(vec_r, vec_r, "cuda")); - - //for testing only - control + + // for testing only - control real_type normB2 = sqrt(vector_handler->dot(vec_rhs, vec_rhs, "cuda")); - //compute x-x_true + // compute x-x_true vec_diff->update(x_data, ReSolve::memory::HOST, ReSolve::memory::DEVICE); vector_handler->axpy(&MINUSONE, vec_x, vec_diff, "cuda"); - //evaluate its norm + // evaluate its norm real_type normDiffMatrix2 = sqrt(vector_handler->dot(vec_diff, vec_diff, "cuda")); - - //compute the residual using exact solution + + // compute the residual using exact solution vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE, "csr", "cuda"); + status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE, "csr", "cuda"); error_sum += status; real_type exactSol_normRmatrix2 = sqrt(vector_handler->dot(vec_r, vec_r, "cuda")); - - std::cout<<"Results (second matrix): "< -#include #include +#include +#include -#include -#include +#include +#include +#include #include -#include #include +#include #include +#include +#include #include -#include -#include -#include #include -//author: KS -//functionality test to check whether cuSolverRf/FGMRES works correctly. +// author: KS +// functionality test to check whether cuSolverRf/FGMRES works correctly. using namespace ReSolve::constants; @@ -22,65 +22,57 @@ int main(int argc, char *argv[]) { // Use ReSolve data types. using index_type = ReSolve::index_type; - using real_type = ReSolve::real_type; + using real_type = ReSolve::real_type; using vector_type = ReSolve::vector::Vector; - //we want error sum to be 0 at the end - //that means PASS. - //otheriwse it is a FAIL. + // we want error sum to be 0 at the end + // that means PASS. + // otheriwse it is a FAIL. int error_sum = 0; int status = 0; - ReSolve::LinAlgWorkspaceCUDA* workspace_CUDA = new ReSolve::LinAlgWorkspaceCUDA(); + ReSolve::LinAlgWorkspaceCUDA *workspace_CUDA = new ReSolve::LinAlgWorkspaceCUDA(); workspace_CUDA->initializeHandles(); - ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace_CUDA); - ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace_CUDA); + ReSolve::MatrixHandler *matrix_handler = new ReSolve::MatrixHandler(workspace_CUDA); + ReSolve::VectorHandler *vector_handler = new ReSolve::VectorHandler(workspace_CUDA); - ReSolve::LinSolverDirectKLU* KLU = new ReSolve::LinSolverDirectKLU; + ReSolve::LinSolverDirectKLU *KLU = new ReSolve::LinSolverDirectKLU; KLU->setupParameters(1, 0.1, false); - ReSolve::LinSolverDirectCuSolverRf* Rf = new ReSolve::LinSolverDirectCuSolverRf; - ReSolve::GramSchmidt* GS = new ReSolve::GramSchmidt(vector_handler, ReSolve::GramSchmidt::mgs_pm); - ReSolve::LinSolverIterativeFGMRES* FGMRES = new ReSolve::LinSolverIterativeFGMRES(matrix_handler, vector_handler, GS); + ReSolve::LinSolverDirectCuSolverRf *Rf = new ReSolve::LinSolverDirectCuSolverRf; + ReSolve::GramSchmidt *GS = new ReSolve::GramSchmidt(vector_handler, ReSolve::GramSchmidt::mgs_pm); + ReSolve::LinSolverIterativeFGMRES *FGMRES = new ReSolve::LinSolverIterativeFGMRES(matrix_handler, vector_handler, GS); // Input to this code is location of `data` directory where matrix files are stored const std::string data_path = (argc == 2) ? argv[1] : "./"; - std::string matrixFileName1 = data_path + "data/matrix_ACTIVSg2000_AC_00.mtx"; std::string matrixFileName2 = data_path + "data/matrix_ACTIVSg2000_AC_02.mtx"; std::string rhsFileName1 = data_path + "data/rhs_ACTIVSg2000_AC_00.mtx.ones"; std::string rhsFileName2 = data_path + "data/rhs_ACTIVSg2000_AC_02.mtx.ones"; - - // Read first matrix std::ifstream mat1(matrixFileName1); - if(!mat1.is_open()) - { + if (!mat1.is_open()) { std::cout << "Failed to open file " << matrixFileName1 << "\n"; return -1; } - ReSolve::matrix::Coo* A_coo = ReSolve::io::readMatrixFromFile(mat1); - ReSolve::matrix::Csr* A = new ReSolve::matrix::Csr(A_coo->getNumRows(), - A_coo->getNumColumns(), - A_coo->getNnz(), - A_coo->symmetric(), - A_coo->expanded()); + ReSolve::matrix::Coo *A_coo = ReSolve::io::readMatrixFromFile(mat1); + ReSolve::matrix::Csr *A = + new ReSolve::matrix::Csr(A_coo->getNumRows(), A_coo->getNumColumns(), A_coo->getNnz(), A_coo->symmetric(), A_coo->expanded()); mat1.close(); // Read first rhs vector std::ifstream rhs1_file(rhsFileName1); - if(!rhs1_file.is_open()) - { + if (!rhs1_file.is_open()) { std::cout << "Failed to open file " << rhsFileName1 << "\n"; return -1; } - real_type* rhs = ReSolve::io::readRhsFromFile(rhs1_file); - real_type* x = new real_type[A->getNumRows()]; - vector_type* vec_rhs = new vector_type(A->getNumRows()); - vector_type* vec_x = new vector_type(A->getNumRows()); - vector_type* vec_r = new vector_type(A->getNumRows()); + real_type *rhs = ReSolve::io::readRhsFromFile(rhs1_file); + real_type *x = new real_type[A->getNumRows()]; + vector_type *vec_rhs = new vector_type(A->getNumRows()); + vector_type *vec_x = new vector_type(A->getNumRows()); + vector_type *vec_r = new vector_type(A->getNumRows()); rhs1_file.close(); // Convert first matrix to CSR format @@ -101,14 +93,14 @@ int main(int argc, char *argv[]) status = KLU->solve(vec_rhs, vec_x); error_sum += status; - vector_type* vec_test; - vector_type* vec_diff; + vector_type *vec_test; + vector_type *vec_diff; - vec_test = new vector_type(A->getNumRows()); - vec_diff = new vector_type(A->getNumRows()); - real_type* x_data = new real_type[A->getNumRows()]; + vec_test = new vector_type(A->getNumRows()); + vec_diff = new vector_type(A->getNumRows()); + real_type *x_data = new real_type[A->getNumRows()]; - for (int i=0; igetNumRows(); ++i){ + for (int i = 0; i < A->getNumRows(); ++i) { x_data[i] = 1.0; } @@ -118,73 +110,70 @@ int main(int argc, char *argv[]) // real_type normXmatrix1 = sqrt(vector_handler->dot(vec_test, vec_test, ReSolve::memory::DEVICE)); matrix_handler->setValuesChanged(true, "cuda"); - //evaluate the residual ||b-Ax|| - status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr","cuda"); + // evaluate the residual ||b-Ax|| + status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "cuda"); error_sum += status; real_type normRmatrix1 = sqrt(vector_handler->dot(vec_r, vec_r, "cuda")); - - //for testing only - control + // for testing only - control real_type normXtrue = sqrt(vector_handler->dot(vec_x, vec_x, "cuda")); real_type normB1 = sqrt(vector_handler->dot(vec_rhs, vec_rhs, "cuda")); - //compute x-x_true + // compute x-x_true vector_handler->axpy(&MINUSONE, vec_x, vec_diff, "cuda"); - //evaluate its norm + // evaluate its norm real_type normDiffMatrix1 = sqrt(vector_handler->dot(vec_diff, vec_diff, "cuda")); - //compute the residual using exact solution + // compute the residual using exact solution vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE,"csr", "cuda"); + status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE, "csr", "cuda"); error_sum += status; real_type exactSol_normRmatrix1 = sqrt(vector_handler->dot(vec_r, vec_r, "cuda")); - //evaluate the residual ON THE CPU using COMPUTED solution + // evaluate the residual ON THE CPU using COMPUTED solution vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); - status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "cpu"); + status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "cpu"); error_sum += status; real_type normRmatrix1CPU = sqrt(vector_handler->dot(vec_r, vec_r, "cuda")); - std::cout<<"Results (first matrix): "<getLFactor(); - ReSolve::matrix::Csc* U_csc = (ReSolve::matrix::Csc*) KLU->getUFactor(); - ReSolve::matrix::Csr* L = new ReSolve::matrix::Csr(L_csc->getNumRows(), L_csc->getNumColumns(), L_csc->getNnz()); - ReSolve::matrix::Csr* U = new ReSolve::matrix::Csr(U_csc->getNumRows(), U_csc->getNumColumns(), U_csc->getNnz()); - error_sum += matrix_handler->csc2csr(L_csc,L, "cuda"); - error_sum += matrix_handler->csc2csr(U_csc,U, "cuda"); + ReSolve::matrix::Csc *L_csc = (ReSolve::matrix::Csc *)KLU->getLFactor(); + ReSolve::matrix::Csc *U_csc = (ReSolve::matrix::Csc *)KLU->getUFactor(); + ReSolve::matrix::Csr *L = new ReSolve::matrix::Csr(L_csc->getNumRows(), L_csc->getNumColumns(), L_csc->getNnz()); + ReSolve::matrix::Csr *U = new ReSolve::matrix::Csr(U_csc->getNumRows(), U_csc->getNumColumns(), U_csc->getNnz()); + error_sum += matrix_handler->csc2csr(L_csc, L, "cuda"); + error_sum += matrix_handler->csc2csr(U_csc, U, "cuda"); if (L == nullptr) { printf("ERROR"); } - index_type* P = KLU->getPOrdering(); - index_type* Q = KLU->getQOrdering(); - error_sum += Rf->setup(A, L, U, P, Q); + index_type *P = KLU->getPOrdering(); + index_type *Q = KLU->getQOrdering(); + error_sum += Rf->setup(A, L, U, P, Q); - FGMRES->setMaxit(200); - FGMRES->setRestart(100); + FGMRES->setMaxit(200); + FGMRES->setRestart(100); - GS->setup(A->getNumRows(), FGMRES->getRestart()); - status = FGMRES->setup(A); + GS->setup(A->getNumRows(), FGMRES->getRestart()); + status = FGMRES->setup(A); error_sum += status; // Load the second matrix std::ifstream mat2(matrixFileName2); - if(!mat2.is_open()) - { + if (!mat2.is_open()) { std::cout << "Failed to open file " << matrixFileName2 << "\n"; return -1; } @@ -193,8 +182,7 @@ int main(int argc, char *argv[]) // Load the second rhs vector std::ifstream rhs2_file(rhsFileName2); - if(!rhs2_file.is_open()) - { + if (!rhs2_file.is_open()) { std::cout << "Failed to open file " << rhsFileName2 << "\n"; return -1; } @@ -207,11 +195,11 @@ int main(int argc, char *argv[]) status = Rf->refactorize(); error_sum += status; - + vec_x->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); status = Rf->solve(vec_x); error_sum += status; - + FGMRES->resetMatrix(A); status = FGMRES->setupPreconditioner("LU", Rf); error_sum += status; @@ -223,40 +211,41 @@ int main(int argc, char *argv[]) vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); matrix_handler->setValuesChanged(true, "cuda"); - //evaluate final residual - status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "cuda"); + // evaluate final residual + status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "cuda"); error_sum += status; real_type normRmatrix2 = sqrt(vector_handler->dot(vec_r, vec_r, "cuda")); - - //for testing only - control + // for testing only - control real_type normB2 = sqrt(vector_handler->dot(vec_rhs, vec_rhs, "cuda")); - //compute x-x_true + // compute x-x_true vec_diff->update(x_data, ReSolve::memory::HOST, ReSolve::memory::DEVICE); vector_handler->axpy(&MINUSONE, vec_x, vec_diff, "cuda"); - //evaluate its norm + // evaluate its norm real_type normDiffMatrix2 = sqrt(vector_handler->dot(vec_diff, vec_diff, "cuda")); - //compute the residual using exact solution + // compute the residual using exact solution vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE, "csr", "cuda"); + status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE, "csr", "cuda"); error_sum += status; real_type exactSol_normRmatrix2 = sqrt(vector_handler->dot(vec_r, vec_r, "cuda")); - std::cout<<"Results (second matrix): "<getNumIter()<<" (max 200, restart 100)"<getInitResidualNorm()<<" "<getFinalResidualNorm()<<" (tol 1e-14)"<getNumIter() << " (max 200, restart 100)" << std::endl; + std::cout << "\t IR starting res. norm : " << FGMRES->getInitResidualNorm() << " " << std::endl; + std::cout << "\t IR final res. norm : " << FGMRES->getFinalResidualNorm() << " (tol 1e-14)" << std::endl << std::endl; + + if ((error_sum == 0) && (normRmatrix1 / normB1 < 1e-12) && (normRmatrix2 / normB2 < 1e-15)) { + std::cout << "Test 4 (KLU with cuSolverRf refactorization + IR) PASSED" << std::endl << std::endl; + ; } else { - std::cout<<"Test 4 (KLU with cuSolverRf refactorization + IR) FAILED, error sum: "< -#include #include +#include +#include -#include -#include +#include +#include #include #include #include +#include +#include #include -#include -#include #include -//author: KS -//functionality test to check whether rocsolver_rf works correctly. +// author: KS +// functionality test to check whether rocsolver_rf works correctly. using namespace ReSolve::constants; @@ -20,29 +20,28 @@ int main(int argc, char *argv[]) { // Use ReSolve data types. using index_type = ReSolve::index_type; - using real_type = ReSolve::real_type; + using real_type = ReSolve::real_type; using vector_type = ReSolve::vector::Vector; using matrix_type = ReSolve::matrix::Sparse; - //we want error sum to be 0 at the end - //that means PASS. - //otheriwse it is a FAIL. + // we want error sum to be 0 at the end + // that means PASS. + // otheriwse it is a FAIL. int error_sum = 0; int status = 0; - ReSolve::LinAlgWorkspaceHIP* workspace_HIP = new ReSolve::LinAlgWorkspaceHIP(); + ReSolve::LinAlgWorkspaceHIP *workspace_HIP = new ReSolve::LinAlgWorkspaceHIP(); workspace_HIP->initializeHandles(); - ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace_HIP); - ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace_HIP); + ReSolve::MatrixHandler *matrix_handler = new ReSolve::MatrixHandler(workspace_HIP); + ReSolve::VectorHandler *vector_handler = new ReSolve::VectorHandler(workspace_HIP); - ReSolve::LinSolverDirectKLU* KLU = new ReSolve::LinSolverDirectKLU; + ReSolve::LinSolverDirectKLU *KLU = new ReSolve::LinSolverDirectKLU; KLU->setupParameters(1, 0.1, false); - ReSolve::LinSolverDirectRocSolverRf* Rf = new ReSolve::LinSolverDirectRocSolverRf(workspace_HIP); + ReSolve::LinSolverDirectRocSolverRf *Rf = new ReSolve::LinSolverDirectRocSolverRf(workspace_HIP); // Input to this code is location of `data` directory where matrix files are stored const std::string data_path = (argc == 2) ? argv[1] : "./"; - std::string matrixFileName1 = data_path + "data/matrix_ACTIVSg200_AC_10.mtx"; std::string matrixFileName2 = data_path + "data/matrix_ACTIVSg200_AC_11.mtx"; @@ -51,33 +50,28 @@ int main(int argc, char *argv[]) // Read first matrix std::ifstream mat1(matrixFileName1); - if(!mat1.is_open()) - { + if (!mat1.is_open()) { std::cout << "Failed to open file " << matrixFileName1 << "\n"; return -1; } - ReSolve::matrix::Coo* A_coo = ReSolve::io::readMatrixFromFile(mat1); - ReSolve::matrix::Csr* A = new ReSolve::matrix::Csr(A_coo->getNumRows(), - A_coo->getNumColumns(), - A_coo->getNnz(), - A_coo->symmetric(), - A_coo->expanded()); + ReSolve::matrix::Coo *A_coo = ReSolve::io::readMatrixFromFile(mat1); + ReSolve::matrix::Csr *A = + new ReSolve::matrix::Csr(A_coo->getNumRows(), A_coo->getNumColumns(), A_coo->getNnz(), A_coo->symmetric(), A_coo->expanded()); mat1.close(); // Read first rhs vector std::ifstream rhs1_file(rhsFileName1); - if(!rhs1_file.is_open()) - { + if (!rhs1_file.is_open()) { std::cout << "Failed to open file " << rhsFileName1 << "\n"; return -1; } - real_type* rhs = ReSolve::io::readRhsFromFile(rhs1_file); - real_type* x = new real_type[A->getNumRows()]; - vector_type* vec_rhs = new vector_type(A->getNumRows()); - vector_type* vec_x = new vector_type(A->getNumRows()); - vec_x->allocate(ReSolve::memory::HOST);//for KLU + real_type *rhs = ReSolve::io::readRhsFromFile(rhs1_file); + real_type *x = new real_type[A->getNumRows()]; + vector_type *vec_rhs = new vector_type(A->getNumRows()); + vector_type *vec_x = new vector_type(A->getNumRows()); + vec_x->allocate(ReSolve::memory::HOST); // for KLU vec_x->allocate(ReSolve::memory::DEVICE); - vector_type* vec_r = new vector_type(A->getNumRows()); + vector_type *vec_r = new vector_type(A->getNumRows()); rhs1_file.close(); // Convert first matrix to CSR format @@ -98,28 +92,30 @@ int main(int argc, char *argv[]) status = KLU->solve(vec_rhs, vec_x); error_sum += status; - std::cout<<"KLU solve status: "<getLFactor(); - matrix_type* U = KLU->getUFactor(); - if (L == nullptr) {printf("ERROR");} - index_type* P = KLU->getPOrdering(); - index_type* Q = KLU->getQOrdering(); + matrix_type *L = KLU->getLFactor(); + matrix_type *U = KLU->getUFactor(); + if (L == nullptr) { + printf("ERROR"); + } + index_type *P = KLU->getPOrdering(); + index_type *Q = KLU->getQOrdering(); vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); vec_rhs->setDataUpdated(ReSolve::memory::DEVICE); - status = Rf->setup(A, L, U, P, Q, vec_rhs); + status = Rf->setup(A, L, U, P, Q, vec_rhs); error_sum += status; - std::cout<<"Rf setup status: "<refactorize(); error_sum += status; - vector_type* vec_test; - vector_type* vec_diff; - vec_test = new vector_type(A->getNumRows()); - vec_diff = new vector_type(A->getNumRows()); - real_type* x_data = new real_type[A->getNumRows()]; - for (int i=0; igetNumRows(); ++i){ + vector_type *vec_test; + vector_type *vec_diff; + vec_test = new vector_type(A->getNumRows()); + vec_diff = new vector_type(A->getNumRows()); + real_type *x_data = new real_type[A->getNumRows()]; + for (int i = 0; i < A->getNumRows(); ++i) { x_data[i] = 1.0; } @@ -129,48 +125,46 @@ int main(int argc, char *argv[]) // real_type normXmatrix1 = sqrt(vector_handler->dot(vec_test, vec_test, "hip")); matrix_handler->setValuesChanged(true, "hip"); - status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr","hip"); + status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "hip"); error_sum += status; real_type normRmatrix1 = sqrt(vector_handler->dot(vec_r, vec_r, "hip")); - //for testing only - control + // for testing only - control real_type normXtrue = sqrt(vector_handler->dot(vec_x, vec_x, "hip")); real_type normB1 = sqrt(vector_handler->dot(vec_rhs, vec_rhs, "hip")); - //compute x-x_true + // compute x-x_true vector_handler->axpy(&MINUSONE, vec_x, vec_diff, "hip"); - //evaluate its norm + // evaluate its norm real_type normDiffMatrix1 = sqrt(vector_handler->dot(vec_diff, vec_diff, "hip")); - //compute the residual using exact solution + // compute the residual using exact solution vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE,"csr", "hip"); + status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE, "csr", "hip"); error_sum += status; real_type exactSol_normRmatrix1 = sqrt(vector_handler->dot(vec_r, vec_r, "hip")); - //evaluate the residual ON THE CPU using COMPUTED solution + // evaluate the residual ON THE CPU using COMPUTED solution vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); - status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "cpu"); + status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "cpu"); error_sum += status; real_type normRmatrix1CPU = sqrt(vector_handler->dot(vec_r, vec_r, "hip")); - std::cout<<"Results (first matrix): "<refactorize(); error_sum += status; - std::cout<<"rocSolverRf refactorization status: "<solve(vec_rhs, vec_x); error_sum += status; vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); matrix_handler->setValuesChanged(true, "hip"); - status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "hip"); + status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "hip"); error_sum += status; real_type normRmatrix2 = sqrt(vector_handler->dot(vec_r, vec_r, "hip")); - //for testing only - control + // for testing only - control real_type normB2 = sqrt(vector_handler->dot(vec_rhs, vec_rhs, "hip")); - //compute x-x_true + // compute x-x_true vec_diff->update(x_data, ReSolve::memory::HOST, ReSolve::memory::DEVICE); vector_handler->axpy(&MINUSONE, vec_x, vec_diff, "hip"); - //evaluate its norm + // evaluate its norm real_type normDiffMatrix2 = sqrt(vector_handler->dot(vec_diff, vec_diff, "hip")); - //compute the residual using exact solution + // compute the residual using exact solution vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE, "csr", "hip"); + status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE, "csr", "hip"); error_sum += status; real_type exactSol_normRmatrix2 = sqrt(vector_handler->dot(vec_r, vec_r, "hip")); - std::cout<<"Results (second matrix): "< -#include #include +#include +#include -#include -#include +#include +#include +#include #include -#include #include +#include #include +#include +#include #include -#include -#include -#include #include -//author: KS -//functionality test to check whether cuSolverRf/FGMRES works correctly. +// author: KS +// functionality test to check whether cuSolverRf/FGMRES works correctly. using namespace ReSolve::constants; @@ -22,65 +22,57 @@ int main(int argc, char *argv[]) { // Use ReSolve data types. using index_type = ReSolve::index_type; - using real_type = ReSolve::real_type; + using real_type = ReSolve::real_type; using vector_type = ReSolve::vector::Vector; - //we want error sum to be 0 at the end - //that means PASS. - //otheriwse it is a FAIL. + // we want error sum to be 0 at the end + // that means PASS. + // otheriwse it is a FAIL. int error_sum = 0; int status = 0; - ReSolve::LinAlgWorkspaceHIP* workspace_HIP = new ReSolve::LinAlgWorkspaceHIP(); + ReSolve::LinAlgWorkspaceHIP *workspace_HIP = new ReSolve::LinAlgWorkspaceHIP(); workspace_HIP->initializeHandles(); - ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace_HIP); - ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace_HIP); + ReSolve::MatrixHandler *matrix_handler = new ReSolve::MatrixHandler(workspace_HIP); + ReSolve::VectorHandler *vector_handler = new ReSolve::VectorHandler(workspace_HIP); - ReSolve::LinSolverDirectKLU* KLU = new ReSolve::LinSolverDirectKLU; + ReSolve::LinSolverDirectKLU *KLU = new ReSolve::LinSolverDirectKLU; KLU->setupParameters(1, 0.1, false); - ReSolve::LinSolverDirectRocSolverRf* Rf = new ReSolve::LinSolverDirectRocSolverRf(workspace_HIP); - ReSolve::GramSchmidt* GS = new ReSolve::GramSchmidt(vector_handler, ReSolve::GramSchmidt::cgs2); - ReSolve::LinSolverIterativeFGMRES* FGMRES = new ReSolve::LinSolverIterativeFGMRES(matrix_handler, vector_handler, GS, "hip"); + ReSolve::LinSolverDirectRocSolverRf *Rf = new ReSolve::LinSolverDirectRocSolverRf(workspace_HIP); + ReSolve::GramSchmidt *GS = new ReSolve::GramSchmidt(vector_handler, ReSolve::GramSchmidt::cgs2); + ReSolve::LinSolverIterativeFGMRES *FGMRES = new ReSolve::LinSolverIterativeFGMRES(matrix_handler, vector_handler, GS, "hip"); // Input to this code is location of `data` directory where matrix files are stored const std::string data_path = (argc == 2) ? argv[1] : "./"; - std::string matrixFileName1 = data_path + "data/matrix_ACTIVSg2000_AC_00.mtx"; std::string matrixFileName2 = data_path + "data/matrix_ACTIVSg2000_AC_02.mtx"; std::string rhsFileName1 = data_path + "data/rhs_ACTIVSg2000_AC_00.mtx.ones"; std::string rhsFileName2 = data_path + "data/rhs_ACTIVSg2000_AC_02.mtx.ones"; - - // Read first matrix std::ifstream mat1(matrixFileName1); - if(!mat1.is_open()) - { + if (!mat1.is_open()) { std::cout << "Failed to open file " << matrixFileName1 << "\n"; return -1; } - ReSolve::matrix::Coo* A_coo = ReSolve::io::readMatrixFromFile(mat1); - ReSolve::matrix::Csr* A = new ReSolve::matrix::Csr(A_coo->getNumRows(), - A_coo->getNumColumns(), - A_coo->getNnz(), - A_coo->symmetric(), - A_coo->expanded()); + ReSolve::matrix::Coo *A_coo = ReSolve::io::readMatrixFromFile(mat1); + ReSolve::matrix::Csr *A = + new ReSolve::matrix::Csr(A_coo->getNumRows(), A_coo->getNumColumns(), A_coo->getNnz(), A_coo->symmetric(), A_coo->expanded()); mat1.close(); // Read first rhs vector std::ifstream rhs1_file(rhsFileName1); - if(!rhs1_file.is_open()) - { + if (!rhs1_file.is_open()) { std::cout << "Failed to open file " << rhsFileName1 << "\n"; return -1; } - real_type* rhs = ReSolve::io::readRhsFromFile(rhs1_file); - real_type* x = new real_type[A->getNumRows()]; - vector_type* vec_rhs = new vector_type(A->getNumRows()); - vector_type* vec_x = new vector_type(A->getNumRows()); - vector_type* vec_r = new vector_type(A->getNumRows()); + real_type *rhs = ReSolve::io::readRhsFromFile(rhs1_file); + real_type *x = new real_type[A->getNumRows()]; + vector_type *vec_rhs = new vector_type(A->getNumRows()); + vector_type *vec_x = new vector_type(A->getNumRows()); + vector_type *vec_r = new vector_type(A->getNumRows()); rhs1_file.close(); // Convert first matrix to CSR format @@ -101,14 +93,14 @@ int main(int argc, char *argv[]) status = KLU->solve(vec_rhs, vec_x); error_sum += status; - vector_type* vec_test; - vector_type* vec_diff; + vector_type *vec_test; + vector_type *vec_diff; - vec_test = new vector_type(A->getNumRows()); - vec_diff = new vector_type(A->getNumRows()); - real_type* x_data = new real_type[A->getNumRows()]; + vec_test = new vector_type(A->getNumRows()); + vec_diff = new vector_type(A->getNumRows()); + real_type *x_data = new real_type[A->getNumRows()]; - for (int i=0; igetNumRows(); ++i){ + for (int i = 0; i < A->getNumRows(); ++i) { x_data[i] = 1.0; } @@ -118,70 +110,67 @@ int main(int argc, char *argv[]) // real_type normXmatrix1 = sqrt(vector_handler->dot(vec_test, vec_test, ReSolve::memory::DEVICE)); matrix_handler->setValuesChanged(true, "hip"); - //evaluate the residual ||b-Ax|| - status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr","hip"); + // evaluate the residual ||b-Ax|| + status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "hip"); error_sum += status; real_type normRmatrix1 = sqrt(vector_handler->dot(vec_r, vec_r, "hip")); - - //for testing only - control + // for testing only - control real_type normXtrue = sqrt(vector_handler->dot(vec_x, vec_x, "hip")); real_type normB1 = sqrt(vector_handler->dot(vec_rhs, vec_rhs, "hip")); - //compute x-x_true + // compute x-x_true vector_handler->axpy(&MINUSONE, vec_x, vec_diff, "hip"); - //evaluate its norm + // evaluate its norm real_type normDiffMatrix1 = sqrt(vector_handler->dot(vec_diff, vec_diff, "hip")); - //compute the residual using exact solution + // compute the residual using exact solution vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE,"csr", "hip"); + status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE, "csr", "hip"); error_sum += status; real_type exactSol_normRmatrix1 = sqrt(vector_handler->dot(vec_r, vec_r, "hip")); - //evaluate the residual ON THE CPU using COMPUTED solution + // evaluate the residual ON THE CPU using COMPUTED solution vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); - status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "cpu"); + status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "cpu"); error_sum += status; real_type normRmatrix1CPU = sqrt(vector_handler->dot(vec_r, vec_r, "hip")); - std::cout<<"Results (first matrix): "<getLFactor(); - ReSolve::matrix::Csc* U = (ReSolve::matrix::Csc*) KLU->getUFactor(); + ReSolve::matrix::Csc *L = (ReSolve::matrix::Csc *)KLU->getLFactor(); + ReSolve::matrix::Csc *U = (ReSolve::matrix::Csc *)KLU->getUFactor(); if (L == nullptr) { printf("ERROR"); } - index_type* P = KLU->getPOrdering(); - index_type* Q = KLU->getQOrdering(); + index_type *P = KLU->getPOrdering(); + index_type *Q = KLU->getQOrdering(); Rf->setSolveMode(1); vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - error_sum += Rf->setup(A, L, U, P, Q, vec_rhs); - FGMRES->setMaxit(200); - FGMRES->setRestart(100); + error_sum += Rf->setup(A, L, U, P, Q, vec_rhs); + FGMRES->setMaxit(200); + FGMRES->setRestart(100); - GS->setup(A->getNumRows(), FGMRES->getRestart()); - status = FGMRES->setup(A); + GS->setup(A->getNumRows(), FGMRES->getRestart()); + status = FGMRES->setup(A); error_sum += status; // Load the second matrix std::ifstream mat2(matrixFileName2); - if(!mat2.is_open()) - { + if (!mat2.is_open()) { std::cout << "Failed to open file " << matrixFileName2 << "\n"; return -1; } @@ -190,8 +179,7 @@ int main(int argc, char *argv[]) // Load the second rhs vector std::ifstream rhs2_file(rhsFileName2); - if(!rhs2_file.is_open()) - { + if (!rhs2_file.is_open()) { std::cout << "Failed to open file " << rhsFileName2 << "\n"; return -1; } @@ -203,11 +191,11 @@ int main(int argc, char *argv[]) status = Rf->refactorize(); error_sum += status; - + vec_x->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); status = Rf->solve(vec_x); error_sum += status; - + FGMRES->resetMatrix(A); status = FGMRES->setupPreconditioner("LU", Rf); error_sum += status; @@ -219,39 +207,40 @@ int main(int argc, char *argv[]) vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); matrix_handler->setValuesChanged(true, "hip"); - //evaluate final residual - status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "hip"); + // evaluate final residual + status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "hip"); error_sum += status; real_type normRmatrix2 = sqrt(vector_handler->dot(vec_r, vec_r, "hip")); - - //for testing only - control + // for testing only - control real_type normB2 = sqrt(vector_handler->dot(vec_rhs, vec_rhs, "hip")); - //compute x-x_true + // compute x-x_true vec_diff->update(x_data, ReSolve::memory::HOST, ReSolve::memory::DEVICE); vector_handler->axpy(&MINUSONE, vec_x, vec_diff, "hip"); - //evaluate its norm + // evaluate its norm real_type normDiffMatrix2 = sqrt(vector_handler->dot(vec_diff, vec_diff, "hip")); - //compute the residual using exact solution + // compute the residual using exact solution vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); - status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE, "csr", "hip"); + status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE, "csr", "hip"); error_sum += status; real_type exactSol_normRmatrix2 = sqrt(vector_handler->dot(vec_r, vec_r, "hip")); - std::cout<<"Results (second matrix): "<getNumIter()<<" (max 200, restart 100)"<getInitResidualNorm()<<" "<getFinalResidualNorm()<<" (tol 1e-14)"<getNumIter() << " (max 200, restart 100)" << std::endl; + std::cout << "\t IR starting res. norm : " << FGMRES->getInitResidualNorm() << " " << std::endl; + std::cout << "\t IR final res. norm : " << FGMRES->getFinalResidualNorm() << " (tol 1e-14)" << std::endl << std::endl; + if ((error_sum == 0) && (normRmatrix1 / normB1 < 1e-12) && (normRmatrix2 / normB2 < 1e-9)) { + std::cout << "Test 4 (KLU with rocsolverrf refactorization + IR) PASSED" << std::endl << std::endl; + ; } else { - std::cout<<"Test 4 (KLU with rocsolverrf refactorization + IR) FAILED, error sum: "< #include #include +#include #include -namespace ReSolve { namespace tests { +namespace ReSolve +{ +namespace tests +{ -enum TestOutcome {PASS=0, FAIL, SKIP, EXPECTED_FAIL, UNEXPECTED_PASS}; +enum TestOutcome { PASS = 0, FAIL, SKIP, EXPECTED_FAIL, UNEXPECTED_PASS }; class TestStatus { -public: - TestStatus() - : outcome_(TestOutcome::PASS) - {} - TestStatus(const char* funcname) - : outcome_(TestOutcome::PASS), - funcname_(funcname) - {} - ~TestStatus() - {} - - TestStatus& operator=(const bool isPass) + public: + TestStatus() : outcome_(TestOutcome::PASS) {} + TestStatus(const char *funcname) : outcome_(TestOutcome::PASS), funcname_(funcname) {} + ~TestStatus() {} + + TestStatus &operator=(const bool isPass) { - if(isPass) + if (isPass) outcome_ = TestOutcome::PASS; else - outcome_ = TestOutcome::FAIL; + outcome_ = TestOutcome::FAIL; return *this; } - TestStatus& operator*=(const bool isPass) + TestStatus &operator*=(const bool isPass) { - if(!isPass) - outcome_ = TestOutcome::FAIL; + if (!isPass) + outcome_ = TestOutcome::FAIL; return *this; } - void skipTest() - { - outcome_ = TestOutcome::SKIP; - } + void skipTest() { outcome_ = TestOutcome::SKIP; } - void expectFailure() - { - expectFailure_ = true; - } + void expectFailure() { expectFailure_ = true; } - TestOutcome report() - { - return report(funcname_); - } + TestOutcome report() { return report(funcname_); } - TestOutcome report(const char* funcname) + TestOutcome report(const char *funcname) { - if (expectFailure_) - { + if (expectFailure_) { if ((outcome_ == FAIL) || (outcome_ == EXPECTED_FAIL)) outcome_ = EXPECTED_FAIL; else if ((outcome_ == PASS) || (outcome_ == UNEXPECTED_PASS)) @@ -75,100 +62,96 @@ class TestStatus outcome_ = SKIP; } - switch(outcome_) - { + switch (outcome_) { using namespace colors; - case PASS: - std::cout << "--- " << GREEN << "PASS" << CLEAR << ": Test " << funcname << "\n"; - break; - case FAIL: - std::cout << "--- " << RED << "FAIL" << CLEAR << ": Test " << funcname << "\n"; - break; - case SKIP: - std::cout << "--- " << YELLOW << "SKIP" << CLEAR << ": Test " << funcname << CLEAR << "\n"; - break; - case EXPECTED_FAIL: - std::cout << "--- " << ORANGE << "FAIL" << CLEAR << " (EXPECTED)" << ": Test " << funcname << "\n"; - break; - case UNEXPECTED_PASS: - std::cout << "--- " << YELLOW << "PASS" << CLEAR << "(UNEXPECTED)" << ": Test " << funcname << "\n"; - break; - default: - std::cout << "--- " << RED << "FAIL" << CLEAR << "Unrecognized test result " << outcome_ - << " for test " << funcname << "\n"; + case PASS: + std::cout << "--- " << GREEN << "PASS" << CLEAR << ": Test " << funcname << "\n"; + break; + case FAIL: + std::cout << "--- " << RED << "FAIL" << CLEAR << ": Test " << funcname << "\n"; + break; + case SKIP: + std::cout << "--- " << YELLOW << "SKIP" << CLEAR << ": Test " << funcname << CLEAR << "\n"; + break; + case EXPECTED_FAIL: + std::cout << "--- " << ORANGE << "FAIL" << CLEAR << " (EXPECTED)" + << ": Test " << funcname << "\n"; + break; + case UNEXPECTED_PASS: + std::cout << "--- " << YELLOW << "PASS" << CLEAR << "(UNEXPECTED)" + << ": Test " << funcname << "\n"; + break; + default: + std::cout << "--- " << RED << "FAIL" << CLEAR << "Unrecognized test result " << outcome_ << " for test " << funcname << "\n"; } return outcome_; } -private: + private: TestOutcome outcome_; - const char* funcname_; + const char *funcname_; bool expectFailure_ = false; }; - - -struct TestingResults -{ +struct TestingResults { int success = 0; int failure = 0; int skip = 0; int expected_failure = 0; int unexpected_success = 0; - TestingResults(){} - ~TestingResults(){} - TestingResults(const TestingResults& r) + TestingResults() {} + ~TestingResults() {} + TestingResults(const TestingResults &r) { - this->success = r.success; + this->success = r.success; this->failure = r.failure; - this->skip = r.skip; - this->expected_failure = r.expected_failure; + this->skip = r.skip; + this->expected_failure = r.expected_failure; this->unexpected_success = r.unexpected_success; } void init() { - this->success = 0; + this->success = 0; this->failure = 0; - this->skip = 0; - this->expected_failure = 0; + this->skip = 0; + this->expected_failure = 0; this->unexpected_success = 0; } - TestingResults& operator+=(const TestingResults& rhs) + TestingResults &operator+=(const TestingResults &rhs) { - this->success += rhs.success; + this->success += rhs.success; this->failure += rhs.failure; - this->skip += rhs.skip; - this->expected_failure += rhs.expected_failure; + this->skip += rhs.skip; + this->expected_failure += rhs.expected_failure; this->unexpected_success += rhs.unexpected_success; - + return *this; } - TestingResults& operator+=(const TestOutcome outcome) + TestingResults &operator+=(const TestOutcome outcome) { - switch(outcome) - { - case PASS: - this->success++; - break; - case FAIL: - this->failure++; - break; - case SKIP: - this->skip++; - break; - case EXPECTED_FAIL: - this->expected_failure++; - break; - case UNEXPECTED_PASS: - this->unexpected_success++; - break; - default: - std::cout << "Warning: Unrecognized test outcome code " << outcome << ". Assuming failure ...\n"; - this->failure++; + switch (outcome) { + case PASS: + this->success++; + break; + case FAIL: + this->failure++; + break; + case SKIP: + this->skip++; + break; + case EXPECTED_FAIL: + this->expected_failure++; + break; + case UNEXPECTED_PASS: + this->unexpected_success++; + break; + default: + std::cout << "Warning: Unrecognized test outcome code " << outcome << ". Assuming failure ...\n"; + this->failure++; } return *this; } @@ -177,10 +160,10 @@ struct TestingResults { std::cout << "\nTest Summary\n"; // std::cout << "----------------------------\n"; - std::cout << "\tSuccessful tests: " << success << "\n"; - std::cout << "\tFailed test: " << failure << "\n"; - std::cout << "\tSkipped tests: " << skip << "\n"; - std::cout << "\tExpected failures: " << expected_failure << "\n"; + std::cout << "\tSuccessful tests: " << success << "\n"; + std::cout << "\tFailed test: " << failure << "\n"; + std::cout << "\tSkipped tests: " << skip << "\n"; + std::cout << "\tExpected failures: " << expected_failure << "\n"; std::cout << "\tUnexpected successes: " << unexpected_success << "\n"; std::cout << "\n"; @@ -188,21 +171,11 @@ struct TestingResults } }; -TestingResults operator+(const TestingResults& lhs, const TestingResults& rhs) -{ - return TestingResults(lhs) += rhs; -} - -TestingResults operator+(const TestingResults& lhs, const TestOutcome outcome) -{ - return TestingResults(lhs) += outcome; -} +TestingResults operator+(const TestingResults &lhs, const TestingResults &rhs) { return TestingResults(lhs) += rhs; } -TestingResults operator+(const TestOutcome outcome, const TestingResults& rhs) -{ - return TestingResults(rhs) += outcome; -} +TestingResults operator+(const TestingResults &lhs, const TestOutcome outcome) { return TestingResults(lhs) += outcome; } +TestingResults operator+(const TestOutcome outcome, const TestingResults &rhs) { return TestingResults(rhs) += outcome; } static const real_type zero = 0.0; static const real_type quarter = 0.25; @@ -210,34 +183,22 @@ static const real_type half = 0.5; static const real_type one = 1.0; static const real_type two = 2.0; static const real_type three = 3.0; -static const real_type eps = 10*std::numeric_limits::epsilon(); - +static const real_type eps = 10 * std::numeric_limits::epsilon(); class TestBase { -public: - TestBase() - : mem_space_("DEFAULT") - { - } - inline void set_mem_space(const std::string& mem_space) - { - mem_space_ = mem_space; - } - inline std::string get_mem_space() const - { - return mem_space_; - } -protected: + public: + TestBase() : mem_space_("DEFAULT") {} + inline void set_mem_space(const std::string &mem_space) { mem_space_ = mem_space; } + inline std::string get_mem_space() const { return mem_space_; } + + protected: /// Returns true if two real numbers are equal within tolerance - [[nodiscard]] static - bool isEqual(const real_type a, const real_type b) - { - return (std::abs(a - b)/(1.0 + std::abs(b)) < eps); - } + [[nodiscard]] static bool isEqual(const real_type a, const real_type b) { return (std::abs(a - b) / (1.0 + std::abs(b)) < eps); } -protected: + protected: std::string mem_space_; }; -}} // namespace ReSolve::tests \ No newline at end of file +} // namespace tests +} // namespace ReSolve diff --git a/tests/unit/matrix/CMakeLists.txt b/tests/unit/matrix/CMakeLists.txt index 8476f181e..ab9ac99a2 100644 --- a/tests/unit/matrix/CMakeLists.txt +++ b/tests/unit/matrix/CMakeLists.txt @@ -16,8 +16,9 @@ target_link_libraries(runMatrixHandlerTests.exe PRIVATE ReSolve resolve_matrix) # Install tests set(installable_tests runMatrixIoTests.exe runMatrixHandlerTests.exe) -install(TARGETS ${installable_tests} - RUNTIME DESTINATION bin/resolve/tests/unit) +install(TARGETS ${installable_tests} RUNTIME DESTINATION bin/resolve/tests/unit) -add_test(NAME matrix_test COMMAND $) -add_test(NAME matrix_handler_test COMMAND $) +add_test(NAME matrix_test COMMAND $) +add_test(NAME matrix_handler_test + COMMAND $ +) diff --git a/tests/unit/matrix/MatrixHandlerTests.hpp b/tests/unit/matrix/MatrixHandlerTests.hpp index 63d2f49bb..3d5eea582 100644 --- a/tests/unit/matrix/MatrixHandlerTests.hpp +++ b/tests/unit/matrix/MatrixHandlerTests.hpp @@ -1,33 +1,34 @@ #pragma once -#include -#include -#include -#include #include +#include #include -#include #include #include +#include +#include +#include #include +#include -namespace ReSolve { namespace tests { +namespace ReSolve +{ +namespace tests +{ /** * @class Unit tests for matrix handler class */ class MatrixHandlerTests : TestBase { -public: - MatrixHandlerTests(std::string memspace) : memspace_(memspace) - {} - virtual ~MatrixHandlerTests() - {} + public: + MatrixHandlerTests(std::string memspace) : memspace_(memspace) {} + virtual ~MatrixHandlerTests() {} TestOutcome matrixHandlerConstructor() { TestStatus status; status.skipTest(); - + return status.report(__func__); } @@ -35,7 +36,7 @@ class MatrixHandlerTests : TestBase { TestStatus status; status.skipTest(); - + return status.report(__func__); } @@ -48,20 +49,21 @@ class MatrixHandlerTests : TestBase else ms = memory::DEVICE; - ReSolve::MatrixHandler* handler = createMatrixHandler(); + ReSolve::MatrixHandler *handler = createMatrixHandler(); - matrix::Csr* A = createCsrMatrix(N, memspace_); + matrix::Csr *A = createCsrMatrix(N, memspace_); vector::Vector x(N); vector::Vector y(N); x.allocate(ms); - if (x.getData(ms) == NULL) printf("oups we have an issue \n"); + if (x.getData(ms) == NULL) + printf("oups we have an issue \n"); y.allocate(ms); x.setToConst(1.0, ms); y.setToConst(1.0, ms); - real_type alpha = 2.0/30.0; - real_type beta = 2.0; + real_type alpha = 2.0 / 30.0; + real_type beta = 2.0; handler->setValuesChanged(true, memspace_); handler->matvec(A, &x, &y, &alpha, &beta, "csr", memspace_); @@ -73,23 +75,23 @@ class MatrixHandlerTests : TestBase return status.report(__func__); } -private: + private: std::string memspace_{"cpu"}; - ReSolve::MatrixHandler* createMatrixHandler() + ReSolve::MatrixHandler *createMatrixHandler() { if (memspace_ == "cpu") { - LinAlgWorkspaceCpu* workspace = new LinAlgWorkspaceCpu(); + LinAlgWorkspaceCpu *workspace = new LinAlgWorkspaceCpu(); return new MatrixHandler(workspace); #ifdef RESOLVE_USE_CUDA } else if (memspace_ == "cuda") { - LinAlgWorkspaceCUDA* workspace = new LinAlgWorkspaceCUDA(); + LinAlgWorkspaceCUDA *workspace = new LinAlgWorkspaceCUDA(); workspace->initializeHandles(); return new MatrixHandler(workspace); #endif #ifdef RESOLVE_USE_HIP } else if (memspace_ == "hip") { - LinAlgWorkspaceHIP* workspace = new LinAlgWorkspaceHIP(); + LinAlgWorkspaceHIP *workspace = new LinAlgWorkspaceHIP(); workspace->initializeHandles(); return new MatrixHandler(workspace); #endif @@ -99,7 +101,7 @@ class MatrixHandlerTests : TestBase return nullptr; } - bool verifyAnswer(vector::Vector& x, real_type answer, std::string memspace) + bool verifyAnswer(vector::Vector &x, real_type answer, std::string memspace) { bool status = true; if (memspace != "cpu") { @@ -110,54 +112,50 @@ class MatrixHandlerTests : TestBase // std::cout << x.getData(memory::HOST)[i] << "\n"; if (!isEqual(x.getData(memory::HOST)[i], answer)) { status = false; - std::cout << "Solution vector element x[" << i << "] = " << x.getData(memory::HOST)[i] - << ", expected: " << answer << "\n"; - break; + std::cout << "Solution vector element x[" << i << "] = " << x.getData(memory::HOST)[i] << ", expected: " << answer << "\n"; + break; } } return status; } - matrix::Csr* createCsrMatrix(const index_type N, std::string memspace) + matrix::Csr *createCsrMatrix(const index_type N, std::string memspace) { - std::vector r1 = {1., 5., 7., 8., 3., 2., 4.}; // sum 30 - std::vector r2 = {1., 3., 2., 2., 1., 6., 7., 3., 2., 3.}; // sum 30 - std::vector r3 = {11., 15., 4.}; // sum 30 + std::vector r1 = {1., 5., 7., 8., 3., 2., 4.}; // sum 30 + std::vector r2 = {1., 3., 2., 2., 1., 6., 7., 3., 2., 3.}; // sum 30 + std::vector r3 = {11., 15., 4.}; // sum 30 std::vector r4 = {1., 1., 5., 1., 9., 2., 1., 2., 3., 2., 3.}; // sum 30 - std::vector r5 = {6., 5., 7., 3., 2., 5., 2.}; // sum 30 + std::vector r5 = {6., 5., 7., 3., 2., 5., 2.}; // sum 30 - const std::vector > data = {r1, r2, r3, r4, r5}; + const std::vector> data = {r1, r2, r3, r4, r5}; // std::cout << N << "\n"; // First compute number of nonzeros index_type NNZ = 0; - for (index_type i = 0; i < N; ++i) - { - size_t reminder = static_cast(i%5); + for (index_type i = 0; i < N; ++i) { + size_t reminder = static_cast(i % 5); NNZ += static_cast(data[reminder].size()); } // Allocate NxN CSR matrix with NNZ nonzeros - matrix::Csr* A = new matrix::Csr(N, N, NNZ); + matrix::Csr *A = new matrix::Csr(N, N, NNZ); A->allocateMatrixData(memory::HOST); - index_type* rowptr = A->getRowData(memory::HOST); - index_type* colidx = A->getColData(memory::HOST); - real_type* val = A->getValues( memory::HOST); + index_type *rowptr = A->getRowData(memory::HOST); + index_type *colidx = A->getColData(memory::HOST); + real_type *val = A->getValues(memory::HOST); // Populate CSR matrix using same row pattern as for NNZ calculation rowptr[0] = 0; - for (index_type i=0; i < N; ++i) - { - size_t reminder = static_cast(i%5); - const std::vector& row_sample = data[reminder]; + for (index_type i = 0; i < N; ++i) { + size_t reminder = static_cast(i % 5); + const std::vector &row_sample = data[reminder]; index_type nnz_per_row = static_cast(row_sample.size()); - rowptr[i+1] = rowptr[i] + nnz_per_row; - for (index_type j = rowptr[i]; j < rowptr[i+1]; ++j) - { - colidx[j] = (j - rowptr[i]) * N/nnz_per_row + (N%(N/nnz_per_row)); + rowptr[i + 1] = rowptr[i] + nnz_per_row; + for (index_type j = rowptr[i]; j < rowptr[i + 1]; ++j) { + colidx[j] = (j - rowptr[i]) * N / nnz_per_row + (N % (N / nnz_per_row)); // evenly distribute nonzeros ^^^^ ^^^^^^^^ perturb offset val[j] = row_sample[static_cast(j - rowptr[i])]; } @@ -172,4 +170,5 @@ class MatrixHandlerTests : TestBase } }; // class MatrixHandlerTests -}} // namespace ReSolve::tests +} // namespace tests +} // namespace ReSolve diff --git a/tests/unit/matrix/MatrixIoTests.hpp b/tests/unit/matrix/MatrixIoTests.hpp index 1ce23ae25..72e84abca 100644 --- a/tests/unit/matrix/MatrixIoTests.hpp +++ b/tests/unit/matrix/MatrixIoTests.hpp @@ -1,18 +1,20 @@ #pragma once -#include -#include -#include #include +#include +#include #include +#include -namespace ReSolve { namespace tests { +namespace ReSolve +{ +namespace tests +{ class MatrixIoTests : TestBase { -public: - MatrixIoTests(){} - virtual ~MatrixIoTests(){} - + public: + MatrixIoTests() {} + virtual ~MatrixIoTests() {} TestOutcome cooMatrixImport() { @@ -20,7 +22,7 @@ class MatrixIoTests : TestBase // Read string into istream and status it to `readMatrixFromFile` function. std::istringstream file(general_coo_matrix_file_); - ReSolve::matrix::Coo* A = ReSolve::io::readMatrixFromFile(file); + ReSolve::matrix::Coo *A = ReSolve::io::readMatrixFromFile(file); // Check if the matrix data was correctly loaded status = true; @@ -71,7 +73,6 @@ class MatrixIoTests : TestBase return status.report(__func__); } - TestOutcome cooMatrixReadAndUpdate() { TestStatus status; @@ -122,14 +123,13 @@ class MatrixIoTests : TestBase std::istringstream file(general_vector_file_); // Create rhs vector and load its data from the input file - real_type* rhs = ReSolve::io::readRhsFromFile(file); + real_type *rhs = ReSolve::io::readRhsFromFile(file); // Check if the matrix data was correctly loaded status = true; for (size_t i = 0; i < general_vector_vals_.size(); ++i) { - if (!isEqual(rhs[i], general_vector_vals_[i])) - { + if (!isEqual(rhs[i], general_vector_vals_[i])) { std::cout << "Incorrect vector value at storage element " << i << ".\n"; status = false; break; @@ -148,7 +148,7 @@ class MatrixIoTests : TestBase std::istringstream file(general_vector_file_); // For now let's test only the case when `readAndUpdateRhs` does not allocate rhs - real_type* rhs = new real_type[5]; //nullptr; + real_type *rhs = new real_type[5]; // nullptr; // Update matrix A with data from the matrix market file ReSolve::io::readAndUpdateRhs(file, &rhs); @@ -157,8 +157,7 @@ class MatrixIoTests : TestBase status = true; for (size_t i = 0; i < general_vector_vals_.size(); ++i) { - if (!isEqual(rhs[i], general_vector_vals_[i])) - { + if (!isEqual(rhs[i], general_vector_vals_[i])) { std::cout << "Incorrect vector value at storage element " << i << ".\n"; status = false; break; @@ -169,17 +168,13 @@ class MatrixIoTests : TestBase return status.report(__func__); } -private: - bool verifyAnswer(/* const */ ReSolve::matrix::Coo& answer, - const std::vector& row_data, - const std::vector& col_data, - const std::vector& val_data) + private: + bool verifyAnswer(/* const */ ReSolve::matrix::Coo &answer, const std::vector &row_data, const std::vector &col_data, + const std::vector &val_data) { for (size_t i = 0; i < val_data.size(); ++i) { - if ((answer.getRowData(memory::HOST)[i] != row_data[i]) || - (answer.getColData(memory::HOST)[i] != col_data[i]) || - (!isEqual(answer.getValues(memory::HOST)[i], val_data[i]))) - { + if ((answer.getRowData(memory::HOST)[i] != row_data[i]) || (answer.getColData(memory::HOST)[i] != col_data[i]) || + (!isEqual(answer.getValues(memory::HOST)[i], val_data[i]))) { std::cout << "Incorrect matrix value at storage element " << i << ".\n"; return false; } @@ -187,15 +182,15 @@ class MatrixIoTests : TestBase return true; } -private: + private: // // Test examples // /// String pretending to be matrix market file /// Same stored in file `matrix_general_coo_ordered.mtx` - const std::string general_coo_matrix_file_ = -R"(% This ASCII file represents a sparse MxN matrix with L + const std::string general_coo_matrix_file_ = + R"(% This ASCII file represents a sparse MxN matrix with L % nonzeros in the following Matrix Market format: % % +----------------------------------------------+ @@ -226,19 +221,12 @@ R"(% This ASCII file represents a sparse MxN matrix with L )"; /// Matching COO matrix data as it is supposed to be read from the file - const std::vector general_coo_matrix_rows_ = {0,1,2,0,3,3,3,4}; - const std::vector general_coo_matrix_cols_ = {0,1,2,3,1,3,4,4}; - const std::vector general_coo_matrix_vals_ = { 1.000e+00, - 1.050e+01, - 1.500e-02, - 6.000e+00, - 2.505e+02, - -2.800e+02, - 3.332e+01, - 1.200e+01 }; + const std::vector general_coo_matrix_rows_ = {0, 1, 2, 0, 3, 3, 3, 4}; + const std::vector general_coo_matrix_cols_ = {0, 1, 2, 3, 1, 3, 4, 4}; + const std::vector general_coo_matrix_vals_ = {1.000e+00, 1.050e+01, 1.500e-02, 6.000e+00, 2.505e+02, -2.800e+02, 3.332e+01, 1.200e+01}; const std::string symmetric_coo_matrix_file_ = -R"(%%MatrixMarket matrix coordinate real symmetric + R"(%%MatrixMarket matrix coordinate real symmetric % 5 5 9 1 1 11.0 @@ -252,23 +240,13 @@ R"(%%MatrixMarket matrix coordinate real symmetric 5 5 55.0 )"; - /// Matching COO matrix data as it is supposed to be read from the file - const std::vector symmetric_coo_matrix_rows_ = {0,0,1,1,1,2,2,3,4}; - const std::vector symmetric_coo_matrix_cols_ = {0,4,1,2,3,2,4,3,4}; - const std::vector symmetric_coo_matrix_vals_ = { 11.0, - 15.0, - 22.0, - 23.0, - 24.0, - 33.0, - 35.0, - 44.0, - 55.0 }; - - - const std::string general_vector_file_ = -R"(% This ASCII file represents a sparse MxN matrix with L + const std::vector symmetric_coo_matrix_rows_ = {0, 0, 1, 1, 1, 2, 2, 3, 4}; + const std::vector symmetric_coo_matrix_cols_ = {0, 4, 1, 2, 3, 2, 4, 3, 4}; + const std::vector symmetric_coo_matrix_vals_ = {11.0, 15.0, 22.0, 23.0, 24.0, 33.0, 35.0, 44.0, 55.0}; + + const std::string general_vector_file_ = + R"(% This ASCII file represents a sparse MxN matrix with L % nonzeros in the following Matrix Market format: % % @@ -281,14 +259,11 @@ R"(% This ASCII file represents a sparse MxN matrix with L 5.505e+02 )"; - const std::vector general_vector_vals_ = { 1.000e+00, - 2.000e+01, - 3.000e-02, - 4.000e+00, - 5.505e+02 }; + const std::vector general_vector_vals_ = {1.000e+00, 2.000e+01, 3.000e-02, 4.000e+00, 5.505e+02}; /// Location of other test data std::string datafiles_folder_; }; // class MatrixIoTests -}} // namespace ReSolve::tests +} // namespace tests +} // namespace ReSolve diff --git a/tests/unit/matrix/runMatrixHandlerTests.cpp b/tests/unit/matrix/runMatrixHandlerTests.cpp index 26ad70b05..0d67889e0 100644 --- a/tests/unit/matrix/runMatrixHandlerTests.cpp +++ b/tests/unit/matrix/runMatrixHandlerTests.cpp @@ -1,18 +1,18 @@ -#include -#include +#include "MatrixHandlerTests.hpp" #include +#include #include #include -#include "MatrixHandlerTests.hpp" +#include -int main(int, char**) +int main(int, char **) { - ReSolve::tests::TestingResults result; + ReSolve::tests::TestingResults result; { std::cout << "Running tests on CPU:\n"; ReSolve::tests::MatrixHandlerTests test("cpu"); - + result += test.matrixHandlerConstructor(); result += test.matrixOneNorm(); result += test.matVec(50); diff --git a/tests/unit/matrix/runMatrixIoTests.cpp b/tests/unit/matrix/runMatrixIoTests.cpp index 0729fd9f2..a3e214d93 100644 --- a/tests/unit/matrix/runMatrixIoTests.cpp +++ b/tests/unit/matrix/runMatrixIoTests.cpp @@ -1,11 +1,11 @@ -#include -#include +#include "MatrixIoTests.hpp" #include +#include #include #include -#include "MatrixIoTests.hpp" +#include -int main(int, char**) +int main(int, char **) { ReSolve::tests::MatrixIoTests test; @@ -16,4 +16,4 @@ int main(int, char**) result += test.rhsVectorReadAndUpdate(); return result.summary(); -} \ No newline at end of file +} diff --git a/tests/unit/memory/CMakeLists.txt b/tests/unit/memory/CMakeLists.txt index 01313e33b..29468ac32 100644 --- a/tests/unit/memory/CMakeLists.txt +++ b/tests/unit/memory/CMakeLists.txt @@ -11,11 +11,9 @@ add_executable(runMemoryUtilsTests.exe runMemoryUtilsTests.cpp) target_link_libraries(runMemoryUtilsTests.exe PRIVATE ReSolve) message(STATUS "Resolve libraries: ${resolve_backend_hip}") - # Install tests set(installable_tests runMemoryUtilsTests.exe) -install(TARGETS ${installable_tests} - RUNTIME DESTINATION bin/resolve/tests/unit) +install(TARGETS ${installable_tests} RUNTIME DESTINATION bin/resolve/tests/unit) # Add tests to run add_test(NAME memory_test COMMAND $) diff --git a/tests/unit/memory/MemoryUtilsTests.hpp b/tests/unit/memory/MemoryUtilsTests.hpp index 4cc1ace88..daa788402 100644 --- a/tests/unit/memory/MemoryUtilsTests.hpp +++ b/tests/unit/memory/MemoryUtilsTests.hpp @@ -1,24 +1,25 @@ #pragma once -#include -#include -#include -#include #include +#include #include +#include +#include #include +#include -namespace ReSolve { namespace tests { +namespace ReSolve +{ +namespace tests +{ /** * @class Unit tests for memory handler class */ class MemoryUtilsTests : TestBase { -public: - MemoryUtilsTests(std::string memspace) : memspace_(memspace) - {} - virtual ~MemoryUtilsTests() - {} + public: + MemoryUtilsTests(std::string memspace) : memspace_(memspace) {} + virtual ~MemoryUtilsTests() {} TestOutcome allocateAndDelete() { @@ -28,12 +29,12 @@ class MemoryUtilsTests : TestBase MemoryHandler mh; index_type n = 1000; - size_t m = 8000; - index_type* i = nullptr; - real_type* r = nullptr; + size_t m = 8000; + index_type *i = nullptr; + real_type *r = nullptr; mh.allocateArrayOnDevice(&i, n); - mh.allocateBufferOnDevice((void**) &r, m); + mh.allocateBufferOnDevice((void **)&r, m); status *= (i != nullptr); status *= (r != nullptr); @@ -57,14 +58,14 @@ class MemoryUtilsTests : TestBase real_type minusone = -1.0; // Create raw arrays on the host and set their elements to -1 - real_type* array1 = new real_type[n]{0}; - real_type* array2 = new real_type[n]{0}; + real_type *array1 = new real_type[n]{0}; + real_type *array2 = new real_type[n]{0}; std::fill_n(array1, n, minusone); std::fill_n(array2, n, minusone); // Allocate arrays of size n on the device - real_type* devarray1 = nullptr; - real_type* devarray2 = nullptr; + real_type *devarray1 = nullptr; + real_type *devarray2 = nullptr; mh.allocateArrayOnDevice(&devarray1, n); mh.allocateArrayOnDevice(&devarray2, n); @@ -86,20 +87,18 @@ class MemoryUtilsTests : TestBase return status.report(__func__); } - -private: + private: std::string memspace_{"cpu"}; - bool verifyAnswer(real_type* x, real_type answer, index_type n) + bool verifyAnswer(real_type *x, real_type answer, index_type n) { bool status = true; for (index_type i = 0; i < n; ++i) { if (!isEqual(x[i], answer)) { status = false; - std::cout << "Solution vector element x[" << i << "] = " << x[i] - << ", expected: " << answer << "\n"; - break; + std::cout << "Solution vector element x[" << i << "] = " << x[i] << ", expected: " << answer << "\n"; + break; } } return status; @@ -107,4 +106,5 @@ class MemoryUtilsTests : TestBase }; // class MemoryUtilsTests -}} // namespace ReSolve::tests +} // namespace tests +} // namespace ReSolve diff --git a/tests/unit/memory/runMemoryUtilsTests.cpp b/tests/unit/memory/runMemoryUtilsTests.cpp index 00349c7c8..5e657663f 100644 --- a/tests/unit/memory/runMemoryUtilsTests.cpp +++ b/tests/unit/memory/runMemoryUtilsTests.cpp @@ -1,12 +1,12 @@ -#include -#include #include +#include +#include #include "MemoryUtilsTests.hpp" -int main(int, char**) +int main(int, char **) { - ReSolve::tests::TestingResults result; + ReSolve::tests::TestingResults result; #ifdef RESOLVE_USE_HIP { diff --git a/tests/unit/utilities/logger/CMakeLists.txt b/tests/unit/utilities/logger/CMakeLists.txt index f7ea65751..2cc8f4ea1 100644 --- a/tests/unit/utilities/logger/CMakeLists.txt +++ b/tests/unit/utilities/logger/CMakeLists.txt @@ -12,7 +12,6 @@ target_link_libraries(runLoggerTests.exe PRIVATE ReSolve resolve_logger) # Install tests set(installable_logger_tests runLoggerTests.exe) -install(TARGETS ${installable_tests} - RUNTIME DESTINATION bin/resolve/tests/unit) +install(TARGETS ${installable_tests} RUNTIME DESTINATION bin/resolve/tests/unit) add_test(NAME logger_test COMMAND $) diff --git a/tests/unit/utilities/logger/LoggerTests.hpp b/tests/unit/utilities/logger/LoggerTests.hpp index 9afa9c289..15792a78a 100644 --- a/tests/unit/utilities/logger/LoggerTests.hpp +++ b/tests/unit/utilities/logger/LoggerTests.hpp @@ -5,190 +5,194 @@ */ #pragma once -#include -#include -#include #include +#include +#include #include +#include + +namespace ReSolve +{ +namespace tests +{ +/** + * @brief Class implementing unit tests for Logger class. + * + * The LoggerTests class is implemented entirely in this header file. + * Adding new unit test requires simply adding another method to this + * class. + */ +class LoggerTests : TestBase +{ + public: + LoggerTests() {} + virtual ~LoggerTests() {} -namespace ReSolve { namespace tests { /** - * @brief Class implementing unit tests for Logger class. - * - * The LoggerTests class is implemented entirely in this header file. - * Adding new unit test requires simply adding another method to this - * class. + * @brief Test data stream for error log messages. + * + * This method tests streaming messages to `Logger::error()` data + * stream. The method streams messages to all available output streams, + * however only mesages streamed to the error stream should be logged. */ - class LoggerTests : TestBase + TestOutcome errorOutput() { - public: - LoggerTests(){} - virtual ~LoggerTests(){} - - /** - * @brief Test data stream for error log messages. - * - * This method tests streaming messages to `Logger::error()` data - * stream. The method streams messages to all available output streams, - * however only mesages streamed to the error stream should be logged. - */ - TestOutcome errorOutput() - { - using out = ReSolve::io::Logger; - std::string s1("Test error output ..."); - std::string s2("Another error output test ...\n"); - std::string answer = error_text() + s1 + "\n" + error_text() + s2; - - TestStatus status; - - std::ostringstream file; - - out::setOutput(file); - out::setVerbosity(out::ERRORS); - out::error() << s1 << std::endl; - out::error() << s2; - - out::warning() << s1; - out::warning() << s2; - out::summary() << s1; - out::misc() << s1; - - // std::cout << file.str(); - // std::cout << answer; - - status = (answer == file.str()); - - return status.report(__func__); - } - - /** - * @brief Test data stream for warning log messages. - * - * This method tests streaming messages to `Logger::error()` data - * stream. The method streams messages to all available output streams, - * however only mesages streamed to the error and warning streams should - * be logged. - */ - TestOutcome warningOutput() - { - using out = ReSolve::io::Logger; - std::string s1("Test error output ...\n"); - std::string s2("Test warning output ...\n"); - std::string answer = error_text() + s1 + warning_text() + s2; - - TestStatus status; - - std::ostringstream file; - - out::setOutput(file); - out::setVerbosity(out::WARNINGS); - - out::error() << s1; - out::warning() << s2; - out::summary() << s1; - out::misc() << s1; - - // std::cout << file.str(); - - status = (answer == file.str()); - - return status.report(__func__); - } - - /** - * @brief Test data stream for result summary log messages. - * - * This method tests streaming messages to `Logger::error()` data - * stream. The method streams messages to all available output streams, - * however only mesages streamed to the error, warning, and result summary - * streams should be logged. - */ - TestOutcome summaryOutput() - { - using out = ReSolve::io::Logger; - std::string s1("Test error output ...\n"); - std::string s2("Test warning output ...\n"); - std::string s3("Test summary output ...\n"); - std::string answer = error_text() + s1 + warning_text() + s2 + summary_ + s3; - - TestStatus status; - - std::ostringstream file; - - out::setOutput(file); - out::setVerbosity(out::SUMMARY); - - out::error() << s1; - out::warning() << s2; - out::summary() << s3; - out::misc() << s1; - - // std::cout << file.str(); - - status = (answer == file.str()); - - return status.report(__func__); - } - - /** - * @brief Test data stream for all other log messages. - * - * This method tests streaming messages to `Logger::error()` data - * stream. The method streams messages to all available output streams - * and all messages should be logged. - */ - TestOutcome miscOutput() - { - using out = ReSolve::io::Logger; - std::string s1("Test error output ...\n"); - std::string s2("Test warning output ...\n"); - std::string s3("Test summary output ...\n"); - std::string s4("Test any other output ...\n"); - std::string answer = error_text() + s1 + warning_text() + s2 + summary_ + s3 + message_ + s4; - - TestStatus status; + using out = ReSolve::io::Logger; + std::string s1("Test error output ..."); + std::string s2("Another error output test ...\n"); + std::string answer = error_text() + s1 + "\n" + error_text() + s2; + + TestStatus status; + + std::ostringstream file; + + out::setOutput(file); + out::setVerbosity(out::ERRORS); + out::error() << s1 << std::endl; + out::error() << s2; + + out::warning() << s1; + out::warning() << s2; + out::summary() << s1; + out::misc() << s1; + + // std::cout << file.str(); + // std::cout << answer; + + status = (answer == file.str()); + + return status.report(__func__); + } + + /** + * @brief Test data stream for warning log messages. + * + * This method tests streaming messages to `Logger::error()` data + * stream. The method streams messages to all available output streams, + * however only mesages streamed to the error and warning streams should + * be logged. + */ + TestOutcome warningOutput() + { + using out = ReSolve::io::Logger; + std::string s1("Test error output ...\n"); + std::string s2("Test warning output ...\n"); + std::string answer = error_text() + s1 + warning_text() + s2; + + TestStatus status; + + std::ostringstream file; + + out::setOutput(file); + out::setVerbosity(out::WARNINGS); + + out::error() << s1; + out::warning() << s2; + out::summary() << s1; + out::misc() << s1; + + // std::cout << file.str(); - std::ostringstream file; - - out::setOutput(file); - out::setVerbosity(out::EVERYTHING); + status = (answer == file.str()); - out::error() << s1; - out::warning() << s2; - out::summary() << s3; - out::misc() << s4; - - // std::cout << file.str(); - - status = (answer == file.str()); - - return status.report(__func__); - } + return status.report(__func__); + } + + /** + * @brief Test data stream for result summary log messages. + * + * This method tests streaming messages to `Logger::error()` data + * stream. The method streams messages to all available output streams, + * however only mesages streamed to the error, warning, and result summary + * streams should be logged. + */ + TestOutcome summaryOutput() + { + using out = ReSolve::io::Logger; + std::string s1("Test error output ...\n"); + std::string s2("Test warning output ...\n"); + std::string s3("Test summary output ...\n"); + std::string answer = error_text() + s1 + warning_text() + s2 + summary_ + s3; + + TestStatus status; + + std::ostringstream file; + + out::setOutput(file); + out::setVerbosity(out::SUMMARY); + + out::error() << s1; + out::warning() << s2; + out::summary() << s3; + out::misc() << s1; + + // std::cout << file.str(); + + status = (answer == file.str()); + + return status.report(__func__); + } + + /** + * @brief Test data stream for all other log messages. + * + * This method tests streaming messages to `Logger::error()` data + * stream. The method streams messages to all available output streams + * and all messages should be logged. + */ + TestOutcome miscOutput() + { + using out = ReSolve::io::Logger; + std::string s1("Test error output ...\n"); + std::string s2("Test warning output ...\n"); + std::string s3("Test summary output ...\n"); + std::string s4("Test any other output ...\n"); + std::string answer = error_text() + s1 + warning_text() + s2 + summary_ + s3 + message_ + s4; + + TestStatus status; + + std::ostringstream file; + + out::setOutput(file); + out::setVerbosity(out::EVERYTHING); + + out::error() << s1; + out::warning() << s2; + out::summary() << s3; + out::misc() << s4; + + // std::cout << file.str(); + + status = (answer == file.str()); + + return status.report(__func__); + } private: - /// Private method to return the string preceding error output - std::string error_text() - { - using namespace colors; - std::ostringstream stream; - stream << "[" << RED << "ERROR" << CLEAR << "] "; - return stream.str(); - } - - /// Private method to return the string preceding warning output - std::string warning_text() - { - using namespace colors; - std::ostringstream stream; - stream << "[" << YELLOW << "WARNING" << CLEAR << "] "; - return stream.str(); - } - - /// String preceding output of a result summary - const std::string summary_ = "[SUMMARY] "; - - /// String preceding miscellaneous output - const std::string message_ = "[MESSAGE] "; - }; // class LoggerTests - -}} // namespace ReSolve::tests + /// Private method to return the string preceding error output + std::string error_text() + { + using namespace colors; + std::ostringstream stream; + stream << "[" << RED << "ERROR" << CLEAR << "] "; + return stream.str(); + } + + /// Private method to return the string preceding warning output + std::string warning_text() + { + using namespace colors; + std::ostringstream stream; + stream << "[" << YELLOW << "WARNING" << CLEAR << "] "; + return stream.str(); + } + + /// String preceding output of a result summary + const std::string summary_ = "[SUMMARY] "; + + /// String preceding miscellaneous output + const std::string message_ = "[MESSAGE] "; +}; // class LoggerTests + +} // namespace tests +} // namespace ReSolve diff --git a/tests/unit/utilities/logger/runLoggerTests.cpp b/tests/unit/utilities/logger/runLoggerTests.cpp index 1ef2f4352..06c4eed72 100644 --- a/tests/unit/utilities/logger/runLoggerTests.cpp +++ b/tests/unit/utilities/logger/runLoggerTests.cpp @@ -7,9 +7,9 @@ #include #include -#include -#include #include "LoggerTests.hpp" +#include +#include int main() { diff --git a/tests/unit/vector/CMakeLists.txt b/tests/unit/vector/CMakeLists.txt index 3c6423ee9..1e7fba690 100644 --- a/tests/unit/vector/CMakeLists.txt +++ b/tests/unit/vector/CMakeLists.txt @@ -16,8 +16,9 @@ target_link_libraries(runGramSchmidtTests.exe PRIVATE ReSolve resolve_vector) # Install tests set(installable_tests runVectorHandlerTests.exe runGramSchmidtTests.exe) -install(TARGETS ${installable_tests} - RUNTIME DESTINATION bin/resolve/tests/unit) +install(TARGETS ${installable_tests} RUNTIME DESTINATION bin/resolve/tests/unit) -add_test(NAME vector_handler_test COMMAND $) +add_test(NAME vector_handler_test + COMMAND $ +) add_test(NAME gram_schmidt_test COMMAND $) diff --git a/tests/unit/vector/GramSchmidtTests.hpp b/tests/unit/vector/GramSchmidtTests.hpp index 4837b57ba..91ccdf28b 100644 --- a/tests/unit/vector/GramSchmidtTests.hpp +++ b/tests/unit/vector/GramSchmidtTests.hpp @@ -1,191 +1,187 @@ #pragma once -#include -#include #include #include #include #include -#include #include +#include +#include +#include -namespace ReSolve { - namespace tests { - const real_type var1 = 0.17; - const real_type var2 = 2.0; - class GramSchmidtTests : TestBase - { - public: - GramSchmidtTests(std::string memspace) : memspace_(memspace) - { - } - - virtual ~GramSchmidtTests() - { - } - - TestOutcome GramSchmidtConstructor() - { - TestStatus status; - // status.skipTest(); - - GramSchmidt gs1; - status *= (gs1.getVariant() == GramSchmidt::mgs); - status *= (gs1.getL() == nullptr); - status *= !gs1.isSetupComplete(); +namespace ReSolve +{ +namespace tests +{ +const real_type var1 = 0.17; +const real_type var2 = 2.0; +class GramSchmidtTests : TestBase +{ + public: + GramSchmidtTests(std::string memspace) : memspace_(memspace) {} + + virtual ~GramSchmidtTests() {} + + TestOutcome GramSchmidtConstructor() + { + TestStatus status; + // status.skipTest(); + + GramSchmidt gs1; + status *= (gs1.getVariant() == GramSchmidt::mgs); + status *= (gs1.getL() == nullptr); + status *= !gs1.isSetupComplete(); + + VectorHandler vh; + GramSchmidt gs2(&vh, GramSchmidt::mgs_pm); + status *= (gs2.getVariant() == GramSchmidt::mgs_pm); + status *= (gs1.getL() == nullptr); + status *= !gs1.isSetupComplete(); + + return status.report(__func__); + } - VectorHandler vh; - GramSchmidt gs2(&vh, GramSchmidt::mgs_pm); - status *= (gs2.getVariant() == GramSchmidt::mgs_pm); - status *= (gs1.getL() == nullptr); - status *= !gs1.isSetupComplete(); + TestOutcome orthogonalize(index_type N, GramSchmidt::GSVariant var) + { + TestStatus status; + + std::string testname(__func__); + switch (var) { + case GramSchmidt::mgs: + testname += " (Modified Gram-Schmidt)"; + break; + case GramSchmidt::mgs_two_synch: + testname += " (Modified Gram-Schmidt 2-Sync)"; + break; + case GramSchmidt::mgs_pm: + testname += " (Post-Modern Modified Gram-Schmidt)"; + break; + case GramSchmidt::cgs1: + testname += " (Classical Gram-Schmidt)"; + break; + case GramSchmidt::cgs2: + testname += " (Reorthogonalized Classical Gram-Schmidt)"; + break; + } + + ReSolve::memory::MemorySpace ms; + if (memspace_ == "cpu") + ms = memory::HOST; + else + ms = memory::DEVICE; + + ReSolve::VectorHandler *handler = createVectorHandler(); + + vector::Vector *V = new vector::Vector(N, 3); // we will be using a space of 3 vectors + real_type *H = new real_type[6]; // in this case, Hessenberg matrix is 3 x 2 + real_type *aux_data; // needed for setup + + V->allocate(ms); + if (ms != memory::HOST) { + V->allocate(memory::HOST); + } + + ReSolve::GramSchmidt *GS = new ReSolve::GramSchmidt(handler, var); + GS->setup(N, 3); + + // fill 2nd and 3rd vector with values + aux_data = V->getVectorData(1, memory::HOST); + for (int i = 0; i < N; ++i) { + if (i % 2 == 0) { + aux_data[i] = constants::ONE; + } else { + aux_data[i] = var1; + } + } + aux_data = V->getVectorData(2, memory::HOST); + for (int i = 0; i < N; ++i) { + if (i % 3 > 0) { + aux_data[i] = constants::ZERO; + } else { + aux_data[i] = var2; + } + } + V->setDataUpdated(memory::HOST); + V->copyData(memory::HOST, ms); + + // set the first vector to all 1s, normalize + V->setToConst(0, 1.0, ms); + real_type nrm = handler->dot(V, V, memspace_); + nrm = sqrt(nrm); + nrm = 1.0 / nrm; + handler->scal(&nrm, V, memspace_); + + GS->orthogonalize(N, V, H, 0, memspace_); + GS->orthogonalize(N, V, H, 1, memspace_); + + status *= verifyAnswer(V, 3, handler, memspace_); + + delete handler; + delete[] H; + delete V; + delete GS; + + return status.report(testname.c_str()); + } - return status.report(__func__); - } + private: + std::string memspace_{"cuda"}; - TestOutcome orthogonalize(index_type N, GramSchmidt::GSVariant var) - { - TestStatus status; - - std::string testname(__func__); - switch(var) - { - case GramSchmidt::mgs: - testname += " (Modified Gram-Schmidt)"; - break; - case GramSchmidt::mgs_two_synch: - testname += " (Modified Gram-Schmidt 2-Sync)"; - break; - case GramSchmidt::mgs_pm: - testname += " (Post-Modern Modified Gram-Schmidt)"; - break; - case GramSchmidt::cgs1: - testname += " (Classical Gram-Schmidt)"; - break; - case GramSchmidt::cgs2: - testname += " (Reorthogonalized Classical Gram-Schmidt)"; - break; - } - - ReSolve::memory::MemorySpace ms; - if (memspace_ == "cpu") - ms = memory::HOST; - else - ms = memory::DEVICE; - - ReSolve::VectorHandler* handler = createVectorHandler(); - - vector::Vector* V = new vector::Vector(N, 3); // we will be using a space of 3 vectors - real_type* H = new real_type[6]; //in this case, Hessenberg matrix is 3 x 2 - real_type* aux_data; // needed for setup - - V->allocate(ms); - if (ms != memory::HOST) { - V->allocate(memory::HOST); - } - - - ReSolve::GramSchmidt* GS = new ReSolve::GramSchmidt(handler, var); - GS->setup(N, 3); - - //fill 2nd and 3rd vector with values - aux_data = V->getVectorData(1, memory::HOST); - for (int i = 0; i < N; ++i) { - if ( i % 2 == 0) { - aux_data[i] = constants::ONE; - } else { - aux_data[i] = var1; - } - } - aux_data = V->getVectorData(2, memory::HOST); - for (int i = 0; i < N; ++i) { - if ( i % 3 > 0) { - aux_data[i] = constants::ZERO; - } else { - aux_data[i] = var2; - } - } - V->setDataUpdated(memory::HOST); - V->copyData(memory::HOST, ms); - - //set the first vector to all 1s, normalize - V->setToConst(0, 1.0, ms); - real_type nrm = handler->dot(V, V, memspace_); - nrm = sqrt(nrm); - nrm = 1.0 / nrm; - handler->scal(&nrm, V, memspace_); - - GS->orthogonalize(N, V, H, 0, memspace_ ); - GS->orthogonalize(N, V, H, 1, memspace_ ); - - status *= verifyAnswer(V, 3, handler, memspace_); - - delete handler; - delete [] H; - delete V; - delete GS; - - return status.report(testname.c_str()); - } - - private: - std::string memspace_{"cuda"}; - - ReSolve::VectorHandler* createVectorHandler() - { - if (memspace_ == "cpu") { // TODO: Fix memory leak here - LinAlgWorkspaceCpu* workpsace = new LinAlgWorkspaceCpu(); - return new VectorHandler(workpsace); + ReSolve::VectorHandler *createVectorHandler() + { + if (memspace_ == "cpu") { // TODO: Fix memory leak here + LinAlgWorkspaceCpu *workpsace = new LinAlgWorkspaceCpu(); + return new VectorHandler(workpsace); #ifdef RESOLVE_USE_CUDA - } else if (memspace_ == "cuda") { - LinAlgWorkspaceCUDA* workspace = new LinAlgWorkspaceCUDA(); - workspace->initializeHandles(); - return new VectorHandler(workspace); + } else if (memspace_ == "cuda") { + LinAlgWorkspaceCUDA *workspace = new LinAlgWorkspaceCUDA(); + workspace->initializeHandles(); + return new VectorHandler(workspace); #endif - } else { - std::cout << "ReSolve not built with support for memory space " << memspace_ << "\n"; - } - return nullptr; - } + } else { + std::cout << "ReSolve not built with support for memory space " << memspace_ << "\n"; + } + return nullptr; + } - // x is a multivector containing K vectors - bool verifyAnswer(vector::Vector* x, index_type K, ReSolve::VectorHandler* handler, std::string memspace) - { - ReSolve::memory::MemorySpace ms; - if (memspace == "cpu") - ms = memory::HOST; - else - ms = memory::DEVICE; - - vector::Vector* a = new vector::Vector(x->getSize()); - vector::Vector* b = new vector::Vector(x->getSize()); - - real_type ip; - bool status = true; - - for (index_type i = 0; i < K; ++i) { - for (index_type j = 0; j < K; ++j) { - a->update(x->getVectorData(i, ms), ms, memory::HOST); - b->update(x->getVectorData(j, ms), ms, memory::HOST); - ip = handler->dot(a, b, "cpu"); - - if ( (i != j) && (abs(ip) > 1e-14)) { - status = false; - std::cout << "Vectors " << i << " and " << j << " are not orthogonal!" - << " Inner product computed: " << ip << ", expected: " << 0.0 << "\n"; - break; - } - if ( (i == j) && !isEqual(abs(sqrt(ip)), 1.0)) { - status = false; - std::cout << std::setprecision(16); - std::cout << "Vector " << i << " has norm: " << sqrt(ip) << " expected: "<< 1.0 <<"\n"; - break; - } - } - } - delete a; - delete b; - return status; + // x is a multivector containing K vectors + bool verifyAnswer(vector::Vector *x, index_type K, ReSolve::VectorHandler *handler, std::string memspace) + { + ReSolve::memory::MemorySpace ms; + if (memspace == "cpu") + ms = memory::HOST; + else + ms = memory::DEVICE; + + vector::Vector *a = new vector::Vector(x->getSize()); + vector::Vector *b = new vector::Vector(x->getSize()); + + real_type ip; + bool status = true; + + for (index_type i = 0; i < K; ++i) { + for (index_type j = 0; j < K; ++j) { + a->update(x->getVectorData(i, ms), ms, memory::HOST); + b->update(x->getVectorData(j, ms), ms, memory::HOST); + ip = handler->dot(a, b, "cpu"); + + if ((i != j) && (abs(ip) > 1e-14)) { + status = false; + std::cout << "Vectors " << i << " and " << j << " are not orthogonal!" + << " Inner product computed: " << ip << ", expected: " << 0.0 << "\n"; + break; + } + if ((i == j) && !isEqual(abs(sqrt(ip)), 1.0)) { + status = false; + std::cout << std::setprecision(16); + std::cout << "Vector " << i << " has norm: " << sqrt(ip) << " expected: " << 1.0 << "\n"; + break; } - }; // class + } + } + delete a; + delete b; + return status; } -} +}; // class +} // namespace tests +} // namespace ReSolve diff --git a/tests/unit/vector/VectorHandlerTests.hpp b/tests/unit/vector/VectorHandlerTests.hpp index 856bb84db..c727bf5d9 100644 --- a/tests/unit/vector/VectorHandlerTests.hpp +++ b/tests/unit/vector/VectorHandlerTests.hpp @@ -1,306 +1,304 @@ #pragma once -#include -#include +#include #include -#include #include -#include #include #include -#include #include +#include +#include +#include +#include + +namespace ReSolve +{ +namespace tests +{ +/** + * @class Tests for vector handler + * + */ +class VectorHandlerTests : TestBase +{ + public: + VectorHandlerTests(std::string memspace) : memspace_(memspace) {} + + virtual ~VectorHandlerTests() {} + + TestOutcome vectorHandlerConstructor() + { + TestStatus status; + status.skipTest(); + + return status.report(__func__); + } + + TestOutcome axpy(index_type N) + { + TestStatus status; + + ReSolve::memory::MemorySpace ms; + if (memspace_ == "cpu") + ms = memory::HOST; + else + ms = memory::DEVICE; + + ReSolve::VectorHandler *handler = createVectorHandler(); + + vector::Vector *x = new vector::Vector(N); + vector::Vector *y = new vector::Vector(N); + + x->allocate(ms); + y->allocate(ms); + + x->setToConst(3.0, ms); + y->setToConst(1.0, ms); + + real_type alpha = 0.5; + // the result is a vector with y[i] = 2.5; + handler->axpy(&alpha, x, y, memspace_); + status *= verifyAnswer(y, 2.5, memspace_); + + delete handler; + delete x; + delete y; + + return status.report(__func__); + } + + TestOutcome dot(index_type N) + { + TestStatus status; + + ReSolve::memory::MemorySpace ms; + if (memspace_ == "cpu") + ms = memory::HOST; + else + ms = memory::DEVICE; -namespace ReSolve { - namespace tests { - /** - * @class Tests for vector handler - * - */ - class VectorHandlerTests : TestBase - { - public: - VectorHandlerTests(std::string memspace) : memspace_(memspace) - { - } + ReSolve::VectorHandler *handler = createVectorHandler(); - virtual ~VectorHandlerTests() - { - } + vector::Vector *x = new vector::Vector(N); + vector::Vector *y = new vector::Vector(N); - TestOutcome vectorHandlerConstructor() - { - TestStatus status; - status.skipTest(); + x->allocate(ms); + y->allocate(ms); - return status.report(__func__); - } + x->setToConst(0.25, ms); + y->setToConst(4.0, ms); + real_type ans; + // the result is N + ans = handler->dot(x, y, memspace_); - TestOutcome axpy(index_type N) - { - TestStatus status; + bool st = true; + ; + if (ans != (real_type)N) { + st = false; + printf("the wrong answer is %f expecting %f \n", ans, (real_type)N); + } + status *= st; - ReSolve::memory::MemorySpace ms; - if (memspace_ == "cpu") - ms = memory::HOST; - else - ms = memory::DEVICE; + delete handler; + delete x; + delete y; - ReSolve::VectorHandler* handler = createVectorHandler(); + return status.report(__func__); + } + + TestOutcome scal(index_type N) + { + TestStatus status; + + ReSolve::memory::MemorySpace ms; + if (memspace_ == "cpu") + ms = memory::HOST; + else + ms = memory::DEVICE; + + ReSolve::VectorHandler *handler = createVectorHandler(); + + vector::Vector *x = new vector::Vector(N); + + x->allocate(ms); + + x->setToConst(1.25, ms); + + real_type alpha = 3.5; + + // the answer is x[i] = 4.375; + handler->scal(&alpha, x, memspace_); + status *= verifyAnswer(x, 4.375, memspace_); + + delete handler; + delete x; + + return status.report(__func__); + } - vector::Vector* x = new vector::Vector(N); - vector::Vector* y = new vector::Vector(N); + TestOutcome massAxpy(index_type N, index_type K) + { + TestStatus status; + + ReSolve::memory::MemorySpace ms; + if (memspace_ == "cpu") + ms = memory::HOST; + else + ms = memory::DEVICE; + + ReSolve::VectorHandler *handler = createVectorHandler(); + + vector::Vector *x = new vector::Vector(N, K); + vector::Vector *y = new vector::Vector(N); + vector::Vector *alpha = new vector::Vector(K); + ; + x->allocate(ms); + y->allocate(ms); + alpha->allocate(ms); + + y->setToConst(2.0, ms); + alpha->setToConst(-1.0, ms); + for (int ii = 0; ii < K; ++ii) { + real_type c; + if (ii % 2 == 0) { + c = -1.0; + } else { + c = 0.5; + } + x->setToConst(ii, c, ms); + } + + index_type r = K % 2; + real_type res = (real_type)((floor((real_type)K / 2.0) + r) * 1.0 + floor((real_type)K / 2.0) * (-0.5)); + + handler->massAxpy(N, alpha, K, x, y, memspace_); + status *= verifyAnswer(y, 2.0 - res, memspace_); + + delete handler; + delete x; + delete y; + delete alpha; + + return status.report(__func__); + } + + TestOutcome massDot(index_type N, index_type K) + { + TestStatus status; + + ReSolve::memory::MemorySpace ms; + if (memspace_ == "cpu") + ms = memory::HOST; + else + ms = memory::DEVICE; + + ReSolve::VectorHandler *handler = createVectorHandler(); + + vector::Vector *x = new vector::Vector(N, K); + vector::Vector *y = new vector::Vector(N, 2); + vector::Vector *res = new vector::Vector(K, 2); + x->allocate(ms); + y->allocate(ms); + res->allocate(ms); + + x->setToConst(1.0, ms); + y->setToConst(-1.0, ms); + handler->massDot2Vec(N, x, K, y, res, memspace_); - x->allocate(ms); - y->allocate(ms); - - x->setToConst(3.0, ms); - y->setToConst(1.0, ms); + status *= verifyAnswer(res, (-1.0) * (real_type)N, memspace_); - real_type alpha = 0.5; - //the result is a vector with y[i] = 2.5; - handler->axpy(&alpha, x, y, memspace_); - status *= verifyAnswer(y, 2.5, memspace_); - - delete handler; - delete x; - delete y; - - return status.report(__func__); - } - - TestOutcome dot(index_type N) - { - TestStatus status; - - ReSolve::memory::MemorySpace ms; - if (memspace_ == "cpu") - ms = memory::HOST; - else - ms = memory::DEVICE; - - ReSolve::VectorHandler* handler = createVectorHandler(); - - vector::Vector* x = new vector::Vector(N); - vector::Vector* y = new vector::Vector(N); - - x->allocate(ms); - y->allocate(ms); - - x->setToConst(0.25, ms); - y->setToConst(4.0, ms); - real_type ans; - //the result is N - ans = handler->dot(x, y, memspace_); - - bool st = true;; - if (ans != (real_type) N) { - st = false; - printf("the wrong answer is %f expecting %f \n", ans, (real_type) N); - } - status *= st; - - delete handler; - delete x; - delete y; - - return status.report(__func__); - } - - TestOutcome scal(index_type N) - { - TestStatus status; - - ReSolve::memory::MemorySpace ms; - if (memspace_ == "cpu") - ms = memory::HOST; - else - ms = memory::DEVICE; - - ReSolve::VectorHandler* handler = createVectorHandler(); - - vector::Vector* x = new vector::Vector(N); - - x->allocate(ms); - - x->setToConst(1.25, ms); - - real_type alpha = 3.5; - - //the answer is x[i] = 4.375; - handler->scal(&alpha, x, memspace_); - status *= verifyAnswer(x, 4.375, memspace_); - - delete handler; - delete x; - - return status.report(__func__); - } - - TestOutcome massAxpy(index_type N, index_type K) - { - TestStatus status; - - ReSolve::memory::MemorySpace ms; - if (memspace_ == "cpu") - ms = memory::HOST; - else - ms = memory::DEVICE; - - ReSolve::VectorHandler* handler = createVectorHandler(); - - vector::Vector* x = new vector::Vector(N, K); - vector::Vector* y = new vector::Vector(N); - vector::Vector* alpha = new vector::Vector(K);; - x->allocate(ms); - y->allocate(ms); - alpha->allocate(ms); - - y->setToConst(2.0, ms); - alpha->setToConst(-1.0, ms); - for (int ii = 0; ii < K; ++ii) { - real_type c; - if (ii % 2 == 0) { - c = -1.0; - } else { - c = 0.5; - } - x->setToConst(ii, c, ms); - } - - index_type r = K % 2; - real_type res = (real_type) ((floor((real_type) K / 2.0) + r) * 1.0 + floor((real_type) K / 2.0) * (-0.5)); - - handler->massAxpy(N, alpha, K, x, y, memspace_); - status *= verifyAnswer(y, 2.0 - res, memspace_); - - delete handler; - delete x; - delete y; - delete alpha; - - return status.report(__func__); - } - - TestOutcome massDot(index_type N, index_type K) - { - TestStatus status; - - ReSolve::memory::MemorySpace ms; - if (memspace_ == "cpu") - ms = memory::HOST; - else - ms = memory::DEVICE; - - ReSolve::VectorHandler* handler = createVectorHandler(); - - vector::Vector* x = new vector::Vector(N, K); - vector::Vector* y = new vector::Vector(N, 2); - vector::Vector* res = new vector::Vector(K, 2); - x->allocate(ms); - y->allocate(ms); - res->allocate(ms); - - x->setToConst(1.0, ms); - y->setToConst(-1.0, ms); - handler->massDot2Vec(N, x, K, y, res, memspace_); - - status *= verifyAnswer(res, (-1.0) * (real_type) N, memspace_); - - delete handler; - delete x; - delete y; - delete res; - return status.report(__func__); - } - - TestOutcome gemv(index_type N, index_type K) - { - TestStatus status; - - ReSolve::memory::MemorySpace ms; - if (memspace_ == "cpu") - ms = memory::HOST; - else - ms = memory::DEVICE; - - ReSolve::VectorHandler* handler = createVectorHandler(); - vector::Vector* V = new vector::Vector(N, K); - // for the test with NO TRANSPOSE - vector::Vector* yN = new vector::Vector(K); - vector::Vector* xN = new vector::Vector(N); - // for the test with TRANSPOSE - vector::Vector* yT = new vector::Vector(N); - vector::Vector* xT = new vector::Vector(K); - - V->allocate(ms); - yN->allocate(ms); - xN->allocate(ms); - yT->allocate(ms); - xT->allocate(ms); - - V->setToConst(1.0, ms); - yN->setToConst(-1.0, ms); - xN->setToConst(.5, ms); - yT->setToConst(-1.0, ms); - xT->setToConst(.5, ms); - - real_type alpha = -1.0; - real_type beta = 1.0; - handler->gemv("N", N, K, &alpha, &beta, V, yN, xN, memspace_); - status *= verifyAnswer(xN, (real_type) (K) + 0.5, memspace_); - handler->gemv("T", N, K, &alpha, &beta, V, yT, xT, memspace_); - status *= verifyAnswer(xT, (real_type) (N) + 0.5, memspace_); - - return status.report(__func__); - } - - private: - std::string memspace_{"cpu"}; - - ReSolve::VectorHandler* createVectorHandler() - { - if (memspace_ == "cpu") { - LinAlgWorkspaceCpu* workpsace = new LinAlgWorkspaceCpu(); - return new VectorHandler(workpsace); + delete handler; + delete x; + delete y; + delete res; + return status.report(__func__); + } + + TestOutcome gemv(index_type N, index_type K) + { + TestStatus status; + + ReSolve::memory::MemorySpace ms; + if (memspace_ == "cpu") + ms = memory::HOST; + else + ms = memory::DEVICE; + + ReSolve::VectorHandler *handler = createVectorHandler(); + vector::Vector *V = new vector::Vector(N, K); + // for the test with NO TRANSPOSE + vector::Vector *yN = new vector::Vector(K); + vector::Vector *xN = new vector::Vector(N); + // for the test with TRANSPOSE + vector::Vector *yT = new vector::Vector(N); + vector::Vector *xT = new vector::Vector(K); + + V->allocate(ms); + yN->allocate(ms); + xN->allocate(ms); + yT->allocate(ms); + xT->allocate(ms); + + V->setToConst(1.0, ms); + yN->setToConst(-1.0, ms); + xN->setToConst(.5, ms); + yT->setToConst(-1.0, ms); + xT->setToConst(.5, ms); + + real_type alpha = -1.0; + real_type beta = 1.0; + handler->gemv("N", N, K, &alpha, &beta, V, yN, xN, memspace_); + status *= verifyAnswer(xN, (real_type)(K) + 0.5, memspace_); + handler->gemv("T", N, K, &alpha, &beta, V, yT, xT, memspace_); + status *= verifyAnswer(xT, (real_type)(N) + 0.5, memspace_); + + return status.report(__func__); + } + + private: + std::string memspace_{"cpu"}; + + ReSolve::VectorHandler *createVectorHandler() + { + if (memspace_ == "cpu") { + LinAlgWorkspaceCpu *workpsace = new LinAlgWorkspaceCpu(); + return new VectorHandler(workpsace); #ifdef RESOLVE_USE_CUDA - } else if (memspace_ == "cuda") { - LinAlgWorkspaceCUDA* workspace = new LinAlgWorkspaceCUDA(); - workspace->initializeHandles(); - return new VectorHandler(workspace); + } else if (memspace_ == "cuda") { + LinAlgWorkspaceCUDA *workspace = new LinAlgWorkspaceCUDA(); + workspace->initializeHandles(); + return new VectorHandler(workspace); #endif #ifdef RESOLVE_USE_HIP - } else if (memspace_ == "hip") { - LinAlgWorkspaceHIP* workspace = new LinAlgWorkspaceHIP(); - workspace->initializeHandles(); - return new VectorHandler(workspace); + } else if (memspace_ == "hip") { + LinAlgWorkspaceHIP *workspace = new LinAlgWorkspaceHIP(); + workspace->initializeHandles(); + return new VectorHandler(workspace); #endif - } else { - std::cout << "ReSolve not built with support for memory space " << memspace_ << "\n"; - } - return nullptr; - } - - // we can verify through norm but that would defeat the purpose of testing vector handler ... - bool verifyAnswer(vector::Vector* x, real_type answer, std::string memspace) - { - bool status = true; - if (memspace != "cpu") { - x->copyData(memory::DEVICE, memory::HOST); - } - - for (index_type i = 0; i < x->getSize(); ++i) { - // std::cout << x->getData("cpu")[i] << "\n"; - if (!isEqual(x->getData(memory::HOST)[i], answer)) { - std::cout << std::setprecision(16); - status = false; - std::cout << "Solution vector element x[" << i << "] = " << x->getData(memory::HOST)[i] - << ", expected: " << answer << "\n"; - break; - } - } - return status; - } - };//class + } else { + std::cout << "ReSolve not built with support for memory space " << memspace_ << "\n"; + } + return nullptr; } -} + // we can verify through norm but that would defeat the purpose of testing vector handler ... + bool verifyAnswer(vector::Vector *x, real_type answer, std::string memspace) + { + bool status = true; + if (memspace != "cpu") { + x->copyData(memory::DEVICE, memory::HOST); + } + + for (index_type i = 0; i < x->getSize(); ++i) { + // std::cout << x->getData("cpu")[i] << "\n"; + if (!isEqual(x->getData(memory::HOST)[i], answer)) { + std::cout << std::setprecision(16); + status = false; + std::cout << "Solution vector element x[" << i << "] = " << x->getData(memory::HOST)[i] << ", expected: " << answer << "\n"; + break; + } + } + return status; + } +}; // class +} // namespace tests +} // namespace ReSolve diff --git a/tests/unit/vector/runGramSchmidtTests.cpp b/tests/unit/vector/runGramSchmidtTests.cpp index e118eb6d3..1c368d95a 100644 --- a/tests/unit/vector/runGramSchmidtTests.cpp +++ b/tests/unit/vector/runGramSchmidtTests.cpp @@ -1,11 +1,11 @@ -#include -#include -#include #include "GramSchmidtTests.hpp" +#include +#include +#include -int main(int, char**) +int main(int, char **) { - ReSolve::tests::TestingResults result; + ReSolve::tests::TestingResults result; #ifdef RESOLVE_USE_CUDA { diff --git a/tests/unit/vector/runVectorHandlerTests.cpp b/tests/unit/vector/runVectorHandlerTests.cpp index 9bb543a52..75f708864 100644 --- a/tests/unit/vector/runVectorHandlerTests.cpp +++ b/tests/unit/vector/runVectorHandlerTests.cpp @@ -1,16 +1,16 @@ -#include -#include -#include #include "VectorHandlerTests.hpp" +#include +#include +#include -int main(int, char**) +int main(int, char **) { - ReSolve::tests::TestingResults result; + ReSolve::tests::TestingResults result; { std::cout << "Running tests on CPU:\n"; ReSolve::tests::VectorHandlerTests test("cpu"); - + result += test.vectorHandlerConstructor(); result += test.dot(50); result += test.axpy(50);