Skip to content

Commit

Permalink
chore(gpu): remove omp from signed overflow add_sub and scalar compar…
Browse files Browse the repository at this point in the history
…isons
  • Loading branch information
agnesLeroy committed Aug 9, 2024
1 parent 9507058 commit 29e644a
Show file tree
Hide file tree
Showing 2 changed files with 187 additions and 244 deletions.
29 changes: 8 additions & 21 deletions backends/tfhe-cuda-backend/cuda/src/integer/addition.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
#include "utils/kernel_dimensions.cuh"
#include <fstream>
#include <iostream>
#include <omp.h>
#include <sstream>
#include <string>
#include <vector>
Expand Down Expand Up @@ -110,26 +109,14 @@ __host__ void host_integer_signed_overflowing_add_or_sub_kb(
cuda_synchronize_stream(streams[j], gpu_indexes[j]);
}

#pragma omp parallel sections
{
// generate input_carries and output_carry
#pragma omp section
{
host_propagate_single_carry(
mem_ptr->sub_streams_1, gpu_indexes, gpu_count, result, output_carry,
input_carries, mem_ptr->scp_mem, bsks, ksks, num_blocks);
}

// generate generate_last_block_inner_propagation
#pragma omp section
{
host_generate_last_block_inner_propagation(
mem_ptr->sub_streams_2, gpu_indexes, gpu_count,
last_block_inner_propagation, &lhs[(num_blocks - 1) * big_lwe_size],
&rhs[(num_blocks - 1) * big_lwe_size], mem_ptr->las_block_prop_mem,
bsks, ksks);
}
}
host_propagate_single_carry(mem_ptr->sub_streams_1, gpu_indexes, gpu_count,
result, output_carry, input_carries,
mem_ptr->scp_mem, bsks, ksks, num_blocks);
host_generate_last_block_inner_propagation(
mem_ptr->sub_streams_2, gpu_indexes, gpu_count,
last_block_inner_propagation, &lhs[(num_blocks - 1) * big_lwe_size],
&rhs[(num_blocks - 1) * big_lwe_size], mem_ptr->las_block_prop_mem, bsks,
ksks);

for (uint j = 0; j < mem_ptr->active_gpu_count; j++) {
cuda_synchronize_stream(mem_ptr->sub_streams_1[j], gpu_indexes[j]);
Expand Down
Loading

0 comments on commit 29e644a

Please sign in to comment.