Skip to content

Commit

Permalink
support non-default stream (#5)
Browse files Browse the repository at this point in the history
Co-authored-by: Ning Peiyang <[email protected]>
  • Loading branch information
PYNing and Ning Peiyang authored Oct 16, 2024
1 parent 72a8aab commit 29aa984
Showing 1 changed file with 3 additions and 1 deletion.
4 changes: 3 additions & 1 deletion src/torch_linear_assignment_cuda_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <thrust/fill.h>

#include <torch/extension.h>
#include <ATen/cuda/CUDAContext.h>

#include <limits>

Expand Down Expand Up @@ -222,7 +223,8 @@ void solve_cuda_batch(int bs, int nr, int nc,

int blockSize = SMPCores();
int gridSize = (bs + blockSize - 1) / blockSize;
solve_cuda_kernel_batch<<<gridSize, blockSize>>>(
at::cuda::CUDAStream stream = at::cuda::getCurrentCUDAStream();
solve_cuda_kernel_batch<<<gridSize, blockSize, 0, stream.stream()>>>(
bs, nr, nc,
cost,
thrust::raw_pointer_cast(&u.front()),
Expand Down

0 comments on commit 29aa984

Please sign in to comment.