Skip to content

Commit

Permalink
Minor
Browse files Browse the repository at this point in the history
Signed-off-by: Woosuk Kwon <[email protected]>
  • Loading branch information
WoosukKwon committed Dec 22, 2024
1 parent 8a4180c commit 03b1e6f
Showing 1 changed file with 10 additions and 1 deletion.
11 changes: 10 additions & 1 deletion csrc/prepare_inputs/copy_subranges.cu
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,16 @@ void copy_subranges(torch::Tensor& matrix_src, torch::Tensor& matrix_diff,

// One thread block per row.
int blocks = n;
int threads = 1024;
int threads;
if (blocks < 128) {
threads = 1024;
} else if (blocks < 256) {
threads = 512;
} else if (blocks < 512) {
threads = 256;
} else {
threads = 128;
}
const at::cuda::OptionalCUDAGuard device_guard(device_of(matrix_tgt));
const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
vllm::copy_subranges_kernel<<<blocks, threads, 0, stream>>>(
Expand Down

0 comments on commit 03b1e6f

Please sign in to comment.