Skip to content

Commit

Permalink
chore(gpu): fix lwe chunk size argument
Browse files Browse the repository at this point in the history
  • Loading branch information
agnesLeroy committed Aug 1, 2024
1 parent 5547d92 commit cc4721a
Show file tree
Hide file tree
Showing 6 changed files with 16 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,15 @@ void scratch_cuda_multi_bit_programmable_bootstrap_64(
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
uint32_t level_count, uint32_t grouping_factor,
uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory,
uint32_t chunk_size = 0);
uint32_t chunk_size);

void cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector_64(
void *stream, uint32_t gpu_index, void *lwe_array_out,
void *lwe_output_indexes, void *lut_vector, void *lut_vector_indexes,
void *lwe_array_in, void *lwe_input_indexes, void *bootstrapping_key,
int8_t *buffer, uint32_t lwe_dimension, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t grouping_factor, uint32_t base_log,
uint32_t level_count, uint32_t num_samples, uint32_t lwe_chunk_size = 0);
uint32_t level_count, uint32_t num_samples, uint32_t lwe_chunk_size);

void cleanup_cuda_multi_bit_programmable_bootstrap(void *stream,
uint32_t gpu_index,
Expand Down Expand Up @@ -71,14 +71,14 @@ void scratch_cuda_cg_multi_bit_programmable_bootstrap(
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
uint32_t level_count, uint32_t grouping_factor,
uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory,
uint32_t lwe_chunk_size = 0);
uint32_t lwe_chunk_size);

template <typename Torus>
void scratch_cuda_cg_multi_bit_programmable_bootstrap(
void *stream, uint32_t gpu_index, pbs_buffer<Torus, MULTI_BIT> **pbs_buffer,
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory,
uint32_t lwe_chunk_size = 0);
uint32_t lwe_chunk_size);

template <typename Torus>
void cuda_cg_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
Expand All @@ -88,15 +88,15 @@ void cuda_cg_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
pbs_buffer<Torus, MULTI_BIT> *pbs_buffer, uint32_t lwe_dimension,
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t grouping_factor,
uint32_t base_log, uint32_t level_count, uint32_t num_samples,
uint32_t lwe_chunk_size = 0);
uint32_t lwe_chunk_size);

template <typename Torus>
void scratch_cuda_multi_bit_programmable_bootstrap(
void *stream, uint32_t gpu_index, pbs_buffer<Torus, MULTI_BIT> **pbs_buffer,
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
uint32_t level_count, uint32_t grouping_factor,
uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory,
uint32_t lwe_chunk_size = 0);
uint32_t lwe_chunk_size);

template <typename Torus>
void cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
Expand All @@ -106,7 +106,7 @@ void cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
pbs_buffer<Torus, MULTI_BIT> *pbs_buffer, uint32_t lwe_dimension,
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t grouping_factor,
uint32_t base_log, uint32_t level_count, uint32_t num_samples,
uint32_t lwe_chunk_size = 0);
uint32_t lwe_chunk_size);

template <typename Torus>
__host__ __device__ uint64_t
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ void execute_pbs_async(
current_lwe_array_in, current_lwe_input_indexes,
bootstrapping_keys[i], pbs_buffer[i], lwe_dimension, glwe_dimension,
polynomial_size, grouping_factor, base_log, level_count,
num_inputs_on_gpu);
num_inputs_on_gpu, 0);
}
break;
case CLASSICAL:
Expand Down Expand Up @@ -270,7 +270,7 @@ void execute_scratch_pbs(cudaStream_t stream, uint32_t gpu_index,
scratch_cuda_multi_bit_programmable_bootstrap_64(
stream, gpu_index, pbs_buffer, lwe_dimension, glwe_dimension,
polynomial_size, level_count, grouping_factor,
input_lwe_ciphertext_count, allocate_gpu_memory);
input_lwe_ciphertext_count, allocate_gpu_memory, 0);
break;
case CLASSICAL:
scratch_cuda_programmable_bootstrap_64(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ __host__ void scratch_cg_multi_bit_programmable_bootstrap(
pbs_buffer<Torus, MULTI_BIT> **buffer, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t level_count,
uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory,
uint32_t lwe_chunk_size = 0) {
uint32_t lwe_chunk_size) {

uint64_t full_sm_keybundle =
get_buffer_size_full_sm_multibit_programmable_bootstrap_keybundle<Torus>(
Expand Down Expand Up @@ -337,7 +337,7 @@ __host__ void host_cg_multi_bit_programmable_bootstrap(
pbs_buffer<Torus, MULTI_BIT> *buffer, uint32_t glwe_dimension,
uint32_t lwe_dimension, uint32_t polynomial_size, uint32_t grouping_factor,
uint32_t base_log, uint32_t level_count, uint32_t num_samples,
uint32_t lwe_chunk_size = 0) {
uint32_t lwe_chunk_size) {

if (!lwe_chunk_size)
lwe_chunk_size = get_lwe_chunk_size<Torus, params>(gpu_index, num_samples,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,7 @@ __host__ void scratch_multi_bit_programmable_bootstrap(
pbs_buffer<Torus, MULTI_BIT> **buffer, uint32_t lwe_dimension,
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
uint32_t input_lwe_ciphertext_count, uint32_t grouping_factor,
bool allocate_gpu_memory, uint32_t lwe_chunk_size = 0) {
bool allocate_gpu_memory, uint32_t lwe_chunk_size) {

int max_shared_memory = cuda_get_max_shared_memory(0);
uint64_t full_sm_keybundle =
Expand Down Expand Up @@ -632,7 +632,7 @@ __host__ void host_multi_bit_programmable_bootstrap(
pbs_buffer<Torus, MULTI_BIT> *buffer, uint32_t glwe_dimension,
uint32_t lwe_dimension, uint32_t polynomial_size, uint32_t grouping_factor,
uint32_t base_log, uint32_t level_count, uint32_t num_samples,
uint32_t lwe_chunk_size = 0) {
uint32_t lwe_chunk_size) {

// If a chunk size is not passed to this function, select one.
if (!lwe_chunk_size)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ __host__ void scratch_tbc_multi_bit_programmable_bootstrap(
pbs_buffer<uint64_t, MULTI_BIT> **buffer, uint32_t lwe_dimension,
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
uint32_t input_lwe_ciphertext_count, uint32_t grouping_factor,
bool allocate_gpu_memory, uint32_t lwe_chunk_size = 0) {
bool allocate_gpu_memory, uint32_t lwe_chunk_size) {

bool supports_dsm =
supports_distributed_shared_memory_on_multibit_programmable_bootstrap<
Expand Down Expand Up @@ -365,7 +365,7 @@ __host__ void host_tbc_multi_bit_programmable_bootstrap(
pbs_buffer<Torus, MULTI_BIT> *buffer, uint32_t glwe_dimension,
uint32_t lwe_dimension, uint32_t polynomial_size, uint32_t grouping_factor,
uint32_t base_log, uint32_t level_count, uint32_t num_samples,
uint32_t lwe_chunk_size = 0) {
uint32_t lwe_chunk_size) {
cudaSetDevice(gpu_index);

if (!lwe_chunk_size)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ class MultiBitProgrammableBootstrapTestPrimitives_u64

scratch_cuda_multi_bit_programmable_bootstrap_64(
stream, gpu_index, &pbs_buffer, lwe_dimension, glwe_dimension,
polynomial_size, pbs_level, grouping_factor, number_of_inputs, true);
polynomial_size, pbs_level, grouping_factor, number_of_inputs, true, 0);

lwe_ct_out_array =
(uint64_t *)malloc((glwe_dimension * polynomial_size + 1) *
Expand Down

0 comments on commit cc4721a

Please sign in to comment.