From 23b43c33c7fdf23c655b52746852eed2ad599dfc Mon Sep 17 00:00:00 2001 From: Agnes Leroy Date: Thu, 12 Dec 2024 09:31:21 +0100 Subject: [PATCH] fix(gpu): fix scalar ne --- .../tfhe-cuda-backend/cuda/src/integer/comparison.cuh | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/backends/tfhe-cuda-backend/cuda/src/integer/comparison.cuh b/backends/tfhe-cuda-backend/cuda/src/integer/comparison.cuh index 945d4b83d9..80205eeff3 100644 --- a/backends/tfhe-cuda-backend/cuda/src/integer/comparison.cuh +++ b/backends/tfhe-cuda-backend/cuda/src/integer/comparison.cuh @@ -187,14 +187,18 @@ __host__ void is_at_least_one_comparisons_block_true( uint32_t remaining_blocks = num_radix_blocks; while (remaining_blocks > 0) { // Split in max_value chunks - uint32_t chunk_length = std::min(max_value, remaining_blocks); - int num_chunks = remaining_blocks / chunk_length; + int num_chunks = (remaining_blocks + max_value - 1) / max_value; // Since all blocks encrypt either 0 or 1, we can sum max_value of them // as in the worst case we will be adding `max_value` ones auto input_blocks = mem_ptr->tmp_lwe_array_out; auto accumulator = buffer->tmp_block_accumulated; + uint32_t chunk_lengths[num_chunks]; + auto begin_remaining_blocks = remaining_blocks; for (int i = 0; i < num_chunks; i++) { + uint32_t chunk_length = + std::min(max_value, begin_remaining_blocks - i * max_value); + chunk_lengths[i] = chunk_length; accumulate_all_blocks(streams[0], gpu_indexes[0], accumulator, input_blocks, big_lwe_dimension, chunk_length);