From cc4721a9bc00a32a46b752d5f11102b880f797bd Mon Sep 17 00:00:00 2001 From: Agnes Leroy Date: Thu, 1 Aug 2024 10:35:43 +0200 Subject: [PATCH] chore(gpu): fix lwe chunk size argument --- .../cuda/include/programmable_bootstrap_multibit.h | 14 +++++++------- .../cuda/src/pbs/programmable_bootstrap.cuh | 4 ++-- .../src/pbs/programmable_bootstrap_cg_multibit.cuh | 4 ++-- .../src/pbs/programmable_bootstrap_multibit.cuh | 4 ++-- .../pbs/programmable_bootstrap_tbc_multibit.cuh | 4 ++-- .../tests/test_multibit_pbs.cpp | 2 +- 6 files changed, 16 insertions(+), 16 deletions(-) diff --git a/backends/tfhe-cuda-backend/cuda/include/programmable_bootstrap_multibit.h b/backends/tfhe-cuda-backend/cuda/include/programmable_bootstrap_multibit.h index ce1617872c..92378ed8e0 100644 --- a/backends/tfhe-cuda-backend/cuda/include/programmable_bootstrap_multibit.h +++ b/backends/tfhe-cuda-backend/cuda/include/programmable_bootstrap_multibit.h @@ -20,7 +20,7 @@ void scratch_cuda_multi_bit_programmable_bootstrap_64( uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count, uint32_t grouping_factor, uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory, - uint32_t chunk_size = 0); + uint32_t chunk_size); void cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector_64( void *stream, uint32_t gpu_index, void *lwe_array_out, @@ -28,7 +28,7 @@ void cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector_64( void *lwe_array_in, void *lwe_input_indexes, void *bootstrapping_key, int8_t *buffer, uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t grouping_factor, uint32_t base_log, - uint32_t level_count, uint32_t num_samples, uint32_t lwe_chunk_size = 0); + uint32_t level_count, uint32_t num_samples, uint32_t lwe_chunk_size); void cleanup_cuda_multi_bit_programmable_bootstrap(void *stream, uint32_t gpu_index, @@ -71,14 +71,14 @@ void scratch_cuda_cg_multi_bit_programmable_bootstrap( uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count, uint32_t grouping_factor, uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory, - uint32_t lwe_chunk_size = 0); + uint32_t lwe_chunk_size); template void scratch_cuda_cg_multi_bit_programmable_bootstrap( void *stream, uint32_t gpu_index, pbs_buffer **pbs_buffer, uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count, uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory, - uint32_t lwe_chunk_size = 0); + uint32_t lwe_chunk_size); template void cuda_cg_multi_bit_programmable_bootstrap_lwe_ciphertext_vector( @@ -88,7 +88,7 @@ void cuda_cg_multi_bit_programmable_bootstrap_lwe_ciphertext_vector( pbs_buffer *pbs_buffer, uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t grouping_factor, uint32_t base_log, uint32_t level_count, uint32_t num_samples, - uint32_t lwe_chunk_size = 0); + uint32_t lwe_chunk_size); template void scratch_cuda_multi_bit_programmable_bootstrap( @@ -96,7 +96,7 @@ void scratch_cuda_multi_bit_programmable_bootstrap( uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count, uint32_t grouping_factor, uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory, - uint32_t lwe_chunk_size = 0); + uint32_t lwe_chunk_size); template void cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector( @@ -106,7 +106,7 @@ void cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector( pbs_buffer *pbs_buffer, uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t grouping_factor, uint32_t base_log, uint32_t level_count, uint32_t num_samples, - uint32_t lwe_chunk_size = 0); + uint32_t lwe_chunk_size); template __host__ __device__ uint64_t diff --git a/backends/tfhe-cuda-backend/cuda/src/pbs/programmable_bootstrap.cuh b/backends/tfhe-cuda-backend/cuda/src/pbs/programmable_bootstrap.cuh index f8f12d4403..2eaad68560 100644 --- a/backends/tfhe-cuda-backend/cuda/src/pbs/programmable_bootstrap.cuh +++ b/backends/tfhe-cuda-backend/cuda/src/pbs/programmable_bootstrap.cuh @@ -198,7 +198,7 @@ void execute_pbs_async( current_lwe_array_in, current_lwe_input_indexes, bootstrapping_keys[i], pbs_buffer[i], lwe_dimension, glwe_dimension, polynomial_size, grouping_factor, base_log, level_count, - num_inputs_on_gpu); + num_inputs_on_gpu, 0); } break; case CLASSICAL: @@ -270,7 +270,7 @@ void execute_scratch_pbs(cudaStream_t stream, uint32_t gpu_index, scratch_cuda_multi_bit_programmable_bootstrap_64( stream, gpu_index, pbs_buffer, lwe_dimension, glwe_dimension, polynomial_size, level_count, grouping_factor, - input_lwe_ciphertext_count, allocate_gpu_memory); + input_lwe_ciphertext_count, allocate_gpu_memory, 0); break; case CLASSICAL: scratch_cuda_programmable_bootstrap_64( diff --git a/backends/tfhe-cuda-backend/cuda/src/pbs/programmable_bootstrap_cg_multibit.cuh b/backends/tfhe-cuda-backend/cuda/src/pbs/programmable_bootstrap_cg_multibit.cuh index 4f9e96c4f4..99b55242f8 100644 --- a/backends/tfhe-cuda-backend/cuda/src/pbs/programmable_bootstrap_cg_multibit.cuh +++ b/backends/tfhe-cuda-backend/cuda/src/pbs/programmable_bootstrap_cg_multibit.cuh @@ -177,7 +177,7 @@ __host__ void scratch_cg_multi_bit_programmable_bootstrap( pbs_buffer **buffer, uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count, uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory, - uint32_t lwe_chunk_size = 0) { + uint32_t lwe_chunk_size) { uint64_t full_sm_keybundle = get_buffer_size_full_sm_multibit_programmable_bootstrap_keybundle( @@ -337,7 +337,7 @@ __host__ void host_cg_multi_bit_programmable_bootstrap( pbs_buffer *buffer, uint32_t glwe_dimension, uint32_t lwe_dimension, uint32_t polynomial_size, uint32_t grouping_factor, uint32_t base_log, uint32_t level_count, uint32_t num_samples, - uint32_t lwe_chunk_size = 0) { + uint32_t lwe_chunk_size) { if (!lwe_chunk_size) lwe_chunk_size = get_lwe_chunk_size(gpu_index, num_samples, diff --git a/backends/tfhe-cuda-backend/cuda/src/pbs/programmable_bootstrap_multibit.cuh b/backends/tfhe-cuda-backend/cuda/src/pbs/programmable_bootstrap_multibit.cuh index f913280eb9..4378a102ca 100644 --- a/backends/tfhe-cuda-backend/cuda/src/pbs/programmable_bootstrap_multibit.cuh +++ b/backends/tfhe-cuda-backend/cuda/src/pbs/programmable_bootstrap_multibit.cuh @@ -376,7 +376,7 @@ __host__ void scratch_multi_bit_programmable_bootstrap( pbs_buffer **buffer, uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count, uint32_t input_lwe_ciphertext_count, uint32_t grouping_factor, - bool allocate_gpu_memory, uint32_t lwe_chunk_size = 0) { + bool allocate_gpu_memory, uint32_t lwe_chunk_size) { int max_shared_memory = cuda_get_max_shared_memory(0); uint64_t full_sm_keybundle = @@ -632,7 +632,7 @@ __host__ void host_multi_bit_programmable_bootstrap( pbs_buffer *buffer, uint32_t glwe_dimension, uint32_t lwe_dimension, uint32_t polynomial_size, uint32_t grouping_factor, uint32_t base_log, uint32_t level_count, uint32_t num_samples, - uint32_t lwe_chunk_size = 0) { + uint32_t lwe_chunk_size) { // If a chunk size is not passed to this function, select one. if (!lwe_chunk_size) diff --git a/backends/tfhe-cuda-backend/cuda/src/pbs/programmable_bootstrap_tbc_multibit.cuh b/backends/tfhe-cuda-backend/cuda/src/pbs/programmable_bootstrap_tbc_multibit.cuh index 7e13451fd1..0474957499 100644 --- a/backends/tfhe-cuda-backend/cuda/src/pbs/programmable_bootstrap_tbc_multibit.cuh +++ b/backends/tfhe-cuda-backend/cuda/src/pbs/programmable_bootstrap_tbc_multibit.cuh @@ -172,7 +172,7 @@ __host__ void scratch_tbc_multi_bit_programmable_bootstrap( pbs_buffer **buffer, uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count, uint32_t input_lwe_ciphertext_count, uint32_t grouping_factor, - bool allocate_gpu_memory, uint32_t lwe_chunk_size = 0) { + bool allocate_gpu_memory, uint32_t lwe_chunk_size) { bool supports_dsm = supports_distributed_shared_memory_on_multibit_programmable_bootstrap< @@ -365,7 +365,7 @@ __host__ void host_tbc_multi_bit_programmable_bootstrap( pbs_buffer *buffer, uint32_t glwe_dimension, uint32_t lwe_dimension, uint32_t polynomial_size, uint32_t grouping_factor, uint32_t base_log, uint32_t level_count, uint32_t num_samples, - uint32_t lwe_chunk_size = 0) { + uint32_t lwe_chunk_size) { cudaSetDevice(gpu_index); if (!lwe_chunk_size) diff --git a/backends/tfhe-cuda-backend/cuda/tests_and_benchmarks/tests/test_multibit_pbs.cpp b/backends/tfhe-cuda-backend/cuda/tests_and_benchmarks/tests/test_multibit_pbs.cpp index 82e3bbb193..335e6bdeda 100644 --- a/backends/tfhe-cuda-backend/cuda/tests_and_benchmarks/tests/test_multibit_pbs.cpp +++ b/backends/tfhe-cuda-backend/cuda/tests_and_benchmarks/tests/test_multibit_pbs.cpp @@ -93,7 +93,7 @@ class MultiBitProgrammableBootstrapTestPrimitives_u64 scratch_cuda_multi_bit_programmable_bootstrap_64( stream, gpu_index, &pbs_buffer, lwe_dimension, glwe_dimension, - polynomial_size, pbs_level, grouping_factor, number_of_inputs, true); + polynomial_size, pbs_level, grouping_factor, number_of_inputs, true, 0); lwe_ct_out_array = (uint64_t *)malloc((glwe_dimension * polynomial_size + 1) *