Skip to content

Commit

Permalink
Refactor CUDA and HIP vector kernel(s).
Browse files Browse the repository at this point in the history
  • Loading branch information
pelesh committed Mar 2, 2024
1 parent 3aabe36 commit f9bc75c
Show file tree
Hide file tree
Showing 20 changed files with 197 additions and 74 deletions.
30 changes: 27 additions & 3 deletions resolve/MemoryUtils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ namespace ReSolve
{
enum MemorySpace{HOST = 0, DEVICE};
enum MemoryDirection{HOST_TO_HOST = 0, HOST_TO_DEVICE, DEVICE_TO_HOST, DEVICE_TO_DEVICE};
enum DeviceType{NONE=0, CUDADEVICE, HIPDEVICE};
}
}

Expand Down Expand Up @@ -46,6 +47,9 @@ namespace ReSolve
template <typename I, typename T>
int setZeroArrayOnDevice(T* v, I n);

template <typename I, typename T>
int setArrayToConstOnDevice(T* v, T c, I n);

template <typename I, typename T>
int copyArrayDeviceToHost(T* dst, const T* src, I n);

Expand All @@ -55,12 +59,32 @@ namespace ReSolve
template <typename I, typename T>
int copyArrayHostToDevice(T* dst, const T* src, I n);

/// Implemented here as it is always needed
///
/// Methods implemented here are always needed
///

template <typename I, typename T>
int copyArrayHostToHost(T* dst, const T* src, I n)
{
std::size_t nelements = static_cast<std::size_t>(n);
memcpy(dst, src, nelements * sizeof(T));
std::size_t arraysize = static_cast<std::size_t>(n) * sizeof(T);
memcpy(dst, src, arraysize);
return 0;
}

template <typename I, typename T>
int setZeroArrayOnHost(T* v, I n)
{
std::size_t arraysize = static_cast<std::size_t>(n) * sizeof(T);
memset(v, 0, arraysize);
return 0;
}

template <typename I, typename T>
int setArrayToConstOnHost(T* v, T c, I n)
{
for (I i = 0; i < n; ++i) {
v[i] = c;
}
return 0;
}
};
Expand Down
7 changes: 7 additions & 0 deletions resolve/MemoryUtils.tpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,13 @@ namespace ReSolve
return Policy::template setZeroArrayOnDevice<I, T>(v, n);
}

template <class Policy>
template <typename I, typename T>
int MemoryUtils<Policy>::setArrayToConstOnDevice(T* v, T c, I n)
{
return Policy::template setArrayToConstOnDevice<I, T>(v, c, n);
}

template <class Policy>
template <typename I, typename T>
int MemoryUtils<Policy>::copyArrayDeviceToHost(T* dst, const T* src, I n)
Expand Down
2 changes: 1 addition & 1 deletion resolve/cpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
]]

set(ReSolve_CPU_SRC
cpuVectorKernels.cpp
#cpuVectorKernels.cpp
cpuKernels.cpp
MemoryUtils.cpp
)
Expand Down
15 changes: 15 additions & 0 deletions resolve/cpu/CpuMemory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,21 @@ namespace ReSolve
return -1;
}

/**
* @brief Dummy function to notify us something is wrong.
*
* This will be called only if GPU device support is not built, so
* trying to access a device should indicate a bug in the code.
*
* @return Allways return failure!
*/
template <typename I, typename T>
static int setArrayToConstOnDevice(T* /* v */, T /* c */, I /* n */)
{
ReSolve::io::Logger::error() << "Trying to initialize array on a GPU device, but GPU support not available.\n";
return -1;
}

/**
* @brief Dummy function to notify us something is wrong.
*
Expand Down
2 changes: 2 additions & 0 deletions resolve/cpu/MemoryUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ namespace ReSolve

template int MemoryUtils<memory::Cpu>::setZeroArrayOnDevice<index_type, real_type>( real_type*, index_type);

template int MemoryUtils<memory::Cpu>::setArrayToConstOnDevice<index_type, real_type>( real_type*, real_type, index_type);

template int MemoryUtils<memory::Cpu>::copyArrayDeviceToHost<index_type, real_type>( real_type*, const real_type*, index_type);
template int MemoryUtils<memory::Cpu>::copyArrayDeviceToHost<index_type, index_type>(index_type*, const index_type*, index_type);

Expand Down
6 changes: 3 additions & 3 deletions resolve/cpu/cpuVectorKernels.cpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#include <resolve/Common.hpp>
#include <resolve/vector/VectorKernels.hpp>
// #include <resolve/vector/VectorKernels.hpp>


namespace ReSolve { namespace vector {
namespace ReSolve { //namespace vector {


void set_array_const(index_type n, real_type val, real_type* arr)
Expand All @@ -12,4 +12,4 @@ void set_array_const(index_type n, real_type val, real_type* arr)
}
}

}} // namespace ReSolve::vector
} //} // namespace ReSolve::vector
3 changes: 2 additions & 1 deletion resolve/cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,13 @@

set(ReSolve_CUDA_SRC
cudaKernels.cu
VectorKernels.cu
cudaVectorKernels.cu
MemoryUtils.cu
)

set(ReSolve_CUDA_HEADER_INSTALL
cudaKernels.h
cudaVectorKernels.h
CudaMemory.hpp
cuda_check_errors.hpp
)
Expand Down
20 changes: 20 additions & 0 deletions resolve/cuda/CudaMemory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include <iostream>
#include <cuda_runtime.h>

#include <resolve/cuda/cudaVectorKernels.h>
#include "cuda_check_errors.hpp"

namespace ReSolve
Expand Down Expand Up @@ -95,6 +96,25 @@ namespace ReSolve
return checkCudaErrors(cudaMemset(v, 0, sizeof(T) * n));
}

/**
* @brief Sets elements of device array v to the value of c
*
* @param v - pointer to the array to be allocated on the device
* @param c - value to set all array elements
* @param n - number of the array elements to be set to zero
*
* @tparam T - Array element type
* @tparam I - Array index type
*
* @post First n elements of array v are set to zero
*/
template <typename I, typename T>
static int setArrayToConstOnDevice(T* v, T c, I n)
{
cuda_set_array_const(n, c, v);
return checkCudaErrors(0);
}

/**
* @brief Copies array `src` from device to the array `dst` on the host.
*
Expand Down
2 changes: 2 additions & 0 deletions resolve/cuda/MemoryUtils.cu
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ namespace ReSolve

template int MemoryUtils<memory::Cuda>::setZeroArrayOnDevice<index_type, real_type>( real_type*, index_type);

template int MemoryUtils<memory::Cuda>::setArrayToConstOnDevice<index_type, real_type>( real_type*, real_type, index_type);

template int MemoryUtils<memory::Cuda>::copyArrayDeviceToHost<index_type, real_type>( real_type*, const real_type*, index_type);
template int MemoryUtils<memory::Cuda>::copyArrayDeviceToHost<index_type, index_type>(index_type*, const index_type*, index_type);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,12 @@
* @note Kernel wrappers implemented here are intended for use in hardware
* agnostic code.
*/
#include <cuda_runtime.h>

#include <resolve/vector/VectorKernels.hpp>
#include <resolve/cuda/cudaVectorKernels.h>


namespace ReSolve { namespace vector {
namespace ReSolve {

namespace kernels {

Expand All @@ -36,12 +37,12 @@ namespace ReSolve { namespace vector {

} // namespace kernels

void set_array_const(index_type n, real_type val, real_type* arr)
void cuda_set_array_const(index_type n, real_type val, real_type* arr)
{
index_type num_blocks;
index_type block_size = 512;
num_blocks = (n + block_size - 1) / block_size;
kernels::set_const<<<num_blocks, block_size>>>(n, val, arr);
}

}} // namespace ReSolve::vector
} // namespace ReSolve
17 changes: 17 additions & 0 deletions resolve/cuda/cudaVectorKernels.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
/**
* @file cudaVectorKernels.h
* @author Slaven Peles ([email protected])
* @brief Contains declarations of CUDA vector kernel wrappers.
* @date 2023-12-08
*
* @note Kernel wrappers implemented here are intended for use in hardware
* agnostic code.
*/
#pragma once

#include <resolve/Common.hpp>

namespace ReSolve
{
void cuda_set_array_const(index_type n, real_type val, real_type* arr);
}
3 changes: 2 additions & 1 deletion resolve/hip/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,13 @@

set(ReSolve_HIP_SRC
hipKernels.hip
VectorKernels.hip
hipVectorKernels.hip
MemoryUtils.hip
)

set(ReSolve_HIP_HEADER_INSTALL
hipKernels.h
hipVectorKernels.h
HipMemory.hpp
hip_check_errors.hpp
)
Expand Down
20 changes: 20 additions & 0 deletions resolve/hip/HipMemory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include <iostream>
#include <hip/hip_runtime.h>

#include <resolve/hip/hipVectorKernels.h>
#include "hip_check_errors.hpp"

namespace ReSolve
Expand Down Expand Up @@ -95,6 +96,25 @@ namespace ReSolve
return checkHipErrors(hipMemset(v, 0, sizeof(T) * n));
}

/**
* @brief Sets elements of device array v to constant value c
*
* @param v - pointer to the array to be allocated on the device
* @param c - value to set all array elements
* @param n - number of the array elements to be set to zero
*
* @tparam T - Array element type
* @tparam I - Array index type
*
* @post First n elements of array v are set to zero
*/
template <typename I, typename T>
static int setArrayToConstOnDevice(T* v, T c, I n)
{
hip_set_array_const(n, c, v);
return checkHipErrors(0);
}

/**
* @brief Copies array `src` from device to the array `dst` on the host.
*
Expand Down
2 changes: 2 additions & 0 deletions resolve/hip/MemoryUtils.hip
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ namespace ReSolve

template int MemoryUtils<memory::Hip>::setZeroArrayOnDevice<index_type, real_type>( real_type*, index_type);

template int MemoryUtils<memory::Hip>::setArrayToConstOnDevice<index_type, real_type>( real_type*, real_type, index_type);

template int MemoryUtils<memory::Hip>::copyArrayDeviceToHost<index_type, real_type>( real_type*, const real_type*, index_type);
template int MemoryUtils<memory::Hip>::copyArrayDeviceToHost<index_type, index_type>(index_type*, const index_type*, index_type);

Expand Down
39 changes: 0 additions & 39 deletions resolve/hip/VectorKernels.hip

This file was deleted.

17 changes: 17 additions & 0 deletions resolve/hip/hipVectorKernels.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
/**
* @file hipVectorKernels.h
* @author Slaven Peles ([email protected])
* @brief Contains declaration of HIP vector kernels.
* @date 2023-12-08
*
* @note Kernel wrappers implemented here are intended for use in hardware
* agnostic code.
*/
#pragma once

#include <resolve/Common.hpp>

namespace ReSolve
{
void hip_set_array_const(index_type n, real_type c, real_type* v);
}
39 changes: 39 additions & 0 deletions resolve/hip/hipVectorKernels.hip
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/**
* @file hipVectorKernels.hip
* @author Slaven Peles ([email protected])
* @brief Contains implementation of HIP vector kernels.
* @date 2023-12-08
*
* @note Kernel wrappers implemented here are intended for use in hardware
* agnostic code.
*/

#include <hip/hip_runtime.h>

#include <resolve/Common.hpp>
#include <resolve/hip/hipVectorKernels.h>

namespace ReSolve {

namespace kernels {

__global__ void set_array_to_const(index_type n, real_type val, real_type* arr)
{
index_type i = blockIdx.x * blockDim.x + threadIdx.x;
while (i < n)
{
arr[i] = val;
i += blockDim.x * gridDim.x;
}
}
} // namespace kernels

void hip_set_array_const(index_type n, real_type c, real_type* v)
{
index_type num_blocks;
index_type block_size = 512;
num_blocks = (n + block_size - 1) / block_size;
hipLaunchKernelGGL(kernels::set_array_to_const, dim3(num_blocks), dim3(block_size), 0, 0, n, c, v);
}

} // namespace ReSolve
Loading

0 comments on commit f9bc75c

Please sign in to comment.