-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
started separating the gpu backend operations
- Loading branch information
1 parent
95d0f05
commit f426525
Showing
12 changed files
with
128 additions
and
139 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,7 +32,6 @@ private: | |
gpuReaction* reactions_; | ||
|
||
|
||
void allocate(); | ||
void deallocate(); | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,21 +1,6 @@ | ||
#pragma once | ||
//#include <string_view> | ||
//#include <string.h> | ||
#include <stdio.h> | ||
//#include "cuda_host_dev.H" | ||
|
||
|
||
#define check_ptr(val, name) if (!val) {printf("null ptr %s", name); assert(0);} | ||
|
||
/* | ||
template<class T> | ||
static inline CUDA_HOSTDEV void check_ptr(T ptr, std::string_view name) | ||
{ | ||
//Note string view may not be null terminated and this is dangerous | ||
if (!ptr) | ||
{ | ||
printf("Bad alloc for: %s \n", name.data()); | ||
} | ||
} | ||
*/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
#pragma once | ||
|
||
#include "error_handling.H" | ||
|
||
template<class T> | ||
static inline T* device_allocate(size_t length){ | ||
|
||
T* ptr; | ||
const auto bytesize = length * sizeof(T); | ||
gpuErrorCheck(cudaMalloc((void**)&ptr, bytesize)); | ||
return ptr; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
#pragma once | ||
|
||
#include "error_handling.H" | ||
|
||
template<class T> | ||
static inline void device_free(T* ptr){ | ||
gpuErrorCheck(cudaFree(ptr)); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
#pragma once | ||
|
||
#include "gpu_constants.H" | ||
#include "error_handling.H" | ||
|
||
namespace detail{ | ||
|
||
|
||
|
||
template<class UnaryOperation> | ||
__global__ void cuda_backend(gLabel n, UnaryOperation op) { | ||
|
||
int i = blockIdx.x * blockDim.x + threadIdx.x; | ||
if (i < n) { op(i); } | ||
} | ||
|
||
} | ||
|
||
/// | ||
///@brief Evaluates op(i) for all i in range [0, n[ in parallel. | ||
/// | ||
///@param op A unary opeartion taking a gLabel index as a parameter. | ||
///@param n The maximum i index (non-inclusive). | ||
/// | ||
template<class UnaryOperation> | ||
static inline void for_each_index(UnaryOperation op, gLabel n){ | ||
|
||
gLabel NTHREADS = 32; | ||
gLabel NBLOCKS = (n + NTHREADS - 1) / NTHREADS; | ||
detail::cuda_backend<<<NBLOCKS, NTHREADS>>>(n, op); | ||
|
||
gpuErrorCheck(cudaGetLastError()); | ||
gpuErrorCheck(cudaDeviceSynchronize()); | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
#pragma once | ||
|
||
#include "error_handling.H" | ||
#include "thrust/copy.h" | ||
|
||
template<class InputIter, class OutputIter> | ||
static inline void host_to_device(InputIter h_begin, InputIter h_end, OutputIter d_begin){ | ||
|
||
auto length = std::distance(h_begin, h_end); | ||
using T = typename std::iterator_traits<InputIter>::value_type; | ||
using T2 = typename std::iterator_traits<OutputIter>::value_type; | ||
|
||
static_assert(std::is_same_v<T, T2>, "Mismatching types in host_to_device"); | ||
|
||
auto bytesize = length * sizeof(T); | ||
gpuErrorCheck( | ||
cudaMemcpy(d_begin, &(*h_begin), bytesize, cudaMemcpyHostToDevice)); | ||
|
||
|
||
} | ||
|
||
|