From cd03b7eef7a1b9a3caaffb3966e480b60de7e8a2 Mon Sep 17 00:00:00 2001 From: Guillermo Oyarzun Date: Fri, 20 Dec 2024 09:50:01 +0100 Subject: [PATCH] feat(gpu): implement vector comparisons gpu --- .../cuda/src/integer/integer.cu | 7 +- tfhe/src/high_level_api/array/mod.rs | 96 ++-- tfhe/src/integer/gpu/mod.rs | 4 +- tfhe/src/integer/gpu/server_key/radix/mod.rs | 1 + .../gpu/server_key/radix/tests_signed/mod.rs | 44 ++ .../tests_signed/test_vector_comparisons.rs | 27 ++ .../server_key/radix/tests_unsigned/mod.rs | 41 ++ .../tests_unsigned/test_vector_comparisons.rs | 38 ++ .../server_key/radix/vector_comparisons.rs | 438 ++++++++++++++++++ .../gpu/server_key/radix/vector_find.rs | 4 +- 10 files changed, 664 insertions(+), 36 deletions(-) create mode 100644 tfhe/src/integer/gpu/server_key/radix/tests_signed/test_vector_comparisons.rs create mode 100644 tfhe/src/integer/gpu/server_key/radix/tests_unsigned/test_vector_comparisons.rs create mode 100644 tfhe/src/integer/gpu/server_key/radix/vector_comparisons.rs diff --git a/backends/tfhe-cuda-backend/cuda/src/integer/integer.cu b/backends/tfhe-cuda-backend/cuda/src/integer/integer.cu index e2d79f4d53..50de77a464 100644 --- a/backends/tfhe-cuda-backend/cuda/src/integer/integer.cu +++ b/backends/tfhe-cuda-backend/cuda/src/integer/integer.cu @@ -258,7 +258,7 @@ void cuda_apply_many_univariate_lut_kb_64( void scratch_cuda_apply_bivariate_lut_kb_64( void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count, - int8_t **mem_ptr, void *input_lut, uint32_t lwe_dimension, + int8_t **mem_ptr, void const *input_lut, uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t ks_level, uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor, uint32_t num_radix_blocks, @@ -272,8 +272,9 @@ void scratch_cuda_apply_bivariate_lut_kb_64( scratch_cuda_apply_bivariate_lut_kb( (cudaStream_t *)(streams), gpu_indexes, gpu_count, - (int_radix_lut **)mem_ptr, static_cast(input_lut), - num_radix_blocks, params, allocate_gpu_memory); + (int_radix_lut **)mem_ptr, + static_cast(input_lut), num_radix_blocks, params, + allocate_gpu_memory); } void cuda_apply_bivariate_lut_kb_64( diff --git a/tfhe/src/high_level_api/array/mod.rs b/tfhe/src/high_level_api/array/mod.rs index 5ee745a489..9c5e1936a8 100644 --- a/tfhe/src/high_level_api/array/mod.rs +++ b/tfhe/src/high_level_api/array/mod.rs @@ -12,8 +12,11 @@ pub(in crate::high_level_api) mod traits; use crate::array::traits::TensorSlice; use crate::high_level_api::array::traits::HasClear; -use crate::high_level_api::global_state::with_cpu_internal_keys; +use crate::high_level_api::global_state; +#[cfg(feature = "gpu")] +use crate::high_level_api::global_state::with_thread_local_cuda_streams; use crate::high_level_api::integers::FheUintId; +use crate::high_level_api::keys::InternalServerKey; use crate::{FheBool, FheId, FheUint}; use std::ops::RangeBounds; use traits::{ArrayBackend, BackendDataContainer, BackendDataContainerMut}; @@ -345,20 +348,36 @@ declare_concrete_array_types!( ); pub fn fhe_uint_array_eq(lhs: &[FheUint], rhs: &[FheUint]) -> FheBool { - with_cpu_internal_keys(|cpu_keys| { - let tmp_lhs = lhs - .iter() - .map(|fhe_uint| fhe_uint.ciphertext.on_cpu().to_owned()) - .collect::>(); - let tmp_rhs = rhs - .iter() - .map(|fhe_uint| fhe_uint.ciphertext.on_cpu().to_owned()) - .collect::>(); - - let result = cpu_keys - .pbs_key() - .all_eq_slices_parallelized(&tmp_lhs, &tmp_rhs); - FheBool::new(result, cpu_keys.tag.clone()) + global_state::with_internal_keys(|sks| match sks { + InternalServerKey::Cpu(cpu_key) => { + let tmp_lhs = lhs + .iter() + .map(|fhe_uint| fhe_uint.ciphertext.on_cpu().to_owned()) + .collect::>(); + let tmp_rhs = rhs + .iter() + .map(|fhe_uint| fhe_uint.ciphertext.on_cpu().to_owned()) + .collect::>(); + + let result = cpu_key + .pbs_key() + .all_eq_slices_parallelized(&tmp_lhs, &tmp_rhs); + FheBool::new(result, cpu_key.tag.clone()) + } + #[cfg(feature = "gpu")] + InternalServerKey::Cuda(gpu_key) => with_thread_local_cuda_streams(|streams| { + let tmp_lhs = lhs + .iter() + .map(|fhe_uint| fhe_uint.clone().ciphertext.into_gpu()) + .collect::>(); + let tmp_rhs = rhs + .iter() + .map(|fhe_uint| fhe_uint.clone().ciphertext.into_gpu()) + .collect::>(); + + let result = gpu_key.key.key.all_eq_slices(&tmp_lhs, &tmp_rhs, streams); + FheBool::new(result, gpu_key.tag.clone()) + }), }) } @@ -366,19 +385,38 @@ pub fn fhe_uint_array_contains_sub_slice( lhs: &[FheUint], pattern: &[FheUint], ) -> FheBool { - with_cpu_internal_keys(|cpu_keys| { - let tmp_lhs = lhs - .iter() - .map(|fhe_uint| fhe_uint.ciphertext.on_cpu().to_owned()) - .collect::>(); - let tmp_pattern = pattern - .iter() - .map(|fhe_uint| fhe_uint.ciphertext.on_cpu().to_owned()) - .collect::>(); - - let result = cpu_keys - .pbs_key() - .contains_sub_slice_parallelized(&tmp_lhs, &tmp_pattern); - FheBool::new(result, cpu_keys.tag.clone()) + global_state::with_internal_keys(|sks| match sks { + InternalServerKey::Cpu(cpu_key) => { + let tmp_lhs = lhs + .iter() + .map(|fhe_uint| fhe_uint.ciphertext.on_cpu().to_owned()) + .collect::>(); + let tmp_pattern = pattern + .iter() + .map(|fhe_uint| fhe_uint.ciphertext.on_cpu().to_owned()) + .collect::>(); + + let result = cpu_key + .pbs_key() + .contains_sub_slice_parallelized(&tmp_lhs, &tmp_pattern); + FheBool::new(result, cpu_key.tag.clone()) + } + #[cfg(feature = "gpu")] + InternalServerKey::Cuda(gpu_key) => with_thread_local_cuda_streams(|streams| { + let tmp_lhs = lhs + .iter() + .map(|fhe_uint| fhe_uint.clone().ciphertext.into_gpu()) + .collect::>(); + let tmp_pattern = pattern + .iter() + .map(|fhe_uint| fhe_uint.clone().ciphertext.into_gpu()) + .collect::>(); + + let result = gpu_key + .key + .key + .contains_sub_slice(&tmp_lhs, &tmp_pattern, streams); + FheBool::new(result, gpu_key.tag.clone()) + }), }) } diff --git a/tfhe/src/integer/gpu/mod.rs b/tfhe/src/integer/gpu/mod.rs index ca3a49feb4..6fc3dcac4b 100644 --- a/tfhe/src/integer/gpu/mod.rs +++ b/tfhe/src/integer/gpu/mod.rs @@ -2713,8 +2713,8 @@ pub unsafe fn apply_many_univariate_lut_kb_async pub unsafe fn apply_bivariate_lut_kb_async( streams: &CudaStreams, radix_lwe_output: &mut CudaSliceMut, - radix_lwe_input_1: &CudaSlice, - radix_lwe_input_2: &CudaSlice, + radix_lwe_input_1: &CudaVec, + radix_lwe_input_2: &CudaVec, input_lut: &[T], bootstrapping_key: &CudaVec, keyswitch_key: &CudaVec, diff --git a/tfhe/src/integer/gpu/server_key/radix/mod.rs b/tfhe/src/integer/gpu/server_key/radix/mod.rs index 24c2c14a63..e9cce4b2f6 100644 --- a/tfhe/src/integer/gpu/server_key/radix/mod.rs +++ b/tfhe/src/integer/gpu/server_key/radix/mod.rs @@ -47,6 +47,7 @@ mod scalar_shift; mod scalar_sub; mod shift; mod sub; +mod vector_comparisons; mod vector_find; #[cfg(test)] diff --git a/tfhe/src/integer/gpu/server_key/radix/tests_signed/mod.rs b/tfhe/src/integer/gpu/server_key/radix/tests_signed/mod.rs index b872f17176..57eecc5d96 100644 --- a/tfhe/src/integer/gpu/server_key/radix/tests_signed/mod.rs +++ b/tfhe/src/integer/gpu/server_key/radix/tests_signed/mod.rs @@ -18,6 +18,7 @@ pub(crate) mod test_scalar_shift; pub(crate) mod test_scalar_sub; pub(crate) mod test_shift; pub(crate) mod test_sub; +pub(crate) mod test_vector_comparisons; use crate::core_crypto::gpu::CudaStreams; use crate::integer::gpu::ciphertext::boolean_value::CudaBooleanBlock; @@ -565,3 +566,46 @@ where ) } } +impl<'a, F> + FunctionExecutor<(&'a [SignedRadixCiphertext], &'a [SignedRadixCiphertext]), BooleanBlock> + for GpuFunctionExecutor +where + F: Fn( + &CudaServerKey, + &[CudaSignedRadixCiphertext], + &[CudaSignedRadixCiphertext], + &CudaStreams, + ) -> CudaBooleanBlock, +{ + fn setup(&mut self, cks: &RadixClientKey, sks: Arc) { + self.setup_from_keys(cks, &sks); + } + + fn execute( + &mut self, + input: (&'a [SignedRadixCiphertext], &'a [SignedRadixCiphertext]), + ) -> BooleanBlock { + let context = self + .context + .as_ref() + .expect("setup was not properly called"); + + let mut d_ctxs1 = Vec::::with_capacity(input.0.len()); + for ctx in input.0 { + d_ctxs1.push(CudaSignedRadixCiphertext::from_signed_radix_ciphertext( + ctx, + &context.streams, + )); + } + let mut d_ctxs2 = Vec::::with_capacity(input.0.len()); + for ctx in input.1 { + d_ctxs2.push(CudaSignedRadixCiphertext::from_signed_radix_ciphertext( + ctx, + &context.streams, + )); + } + + let d_block = (self.func)(&context.sks, &d_ctxs1, &d_ctxs2, &context.streams); + d_block.to_boolean_block(&context.streams) + } +} diff --git a/tfhe/src/integer/gpu/server_key/radix/tests_signed/test_vector_comparisons.rs b/tfhe/src/integer/gpu/server_key/radix/tests_signed/test_vector_comparisons.rs new file mode 100644 index 0000000000..0ffdbc5a19 --- /dev/null +++ b/tfhe/src/integer/gpu/server_key/radix/tests_signed/test_vector_comparisons.rs @@ -0,0 +1,27 @@ +use crate::integer::gpu::server_key::radix::tests_unsigned::{ + create_gpu_parameterized_test, GpuFunctionExecutor, +}; +use crate::integer::gpu::CudaServerKey; +use crate::integer::server_key::radix_parallel::tests_signed::test_vector_comparisons::{ + default_all_eq_slices_test_case, unchecked_all_eq_slices_test_case, +}; +use crate::shortint::parameters::*; + +create_gpu_parameterized_test!(integer_signed_unchecked_all_eq_slices_test_case); +create_gpu_parameterized_test!(integer_signed_default_all_eq_slices_test_case); + +fn integer_signed_unchecked_all_eq_slices_test_case

(param: P) +where + P: Into, +{ + let executor = GpuFunctionExecutor::new(&CudaServerKey::unchecked_all_eq_slices); + unchecked_all_eq_slices_test_case(param, executor); +} + +fn integer_signed_default_all_eq_slices_test_case

(param: P) +where + P: Into, +{ + let executor = GpuFunctionExecutor::new(&CudaServerKey::all_eq_slices); + default_all_eq_slices_test_case(param, executor); +} diff --git a/tfhe/src/integer/gpu/server_key/radix/tests_unsigned/mod.rs b/tfhe/src/integer/gpu/server_key/radix/tests_unsigned/mod.rs index eaee727179..07b592879f 100644 --- a/tfhe/src/integer/gpu/server_key/radix/tests_unsigned/mod.rs +++ b/tfhe/src/integer/gpu/server_key/radix/tests_unsigned/mod.rs @@ -17,6 +17,7 @@ pub(crate) mod test_scalar_shift; pub(crate) mod test_scalar_sub; pub(crate) mod test_shift; pub(crate) mod test_sub; +pub(crate) mod test_vector_comparisons; pub(crate) mod test_vector_find; use crate::core_crypto::gpu::CudaStreams; @@ -865,3 +866,43 @@ where (res, block) } } + +impl<'a, F> FunctionExecutor<(&'a [RadixCiphertext], &'a [RadixCiphertext]), BooleanBlock> + for GpuFunctionExecutor +where + F: Fn( + &CudaServerKey, + &[CudaUnsignedRadixCiphertext], + &[CudaUnsignedRadixCiphertext], + &CudaStreams, + ) -> CudaBooleanBlock, +{ + fn setup(&mut self, cks: &RadixClientKey, sks: Arc) { + self.setup_from_keys(cks, &sks); + } + + fn execute(&mut self, input: (&'a [RadixCiphertext], &'a [RadixCiphertext])) -> BooleanBlock { + let context = self + .context + .as_ref() + .expect("setup was not properly called"); + + let mut d_ctxs1 = Vec::::with_capacity(input.0.len()); + for ctx in input.0 { + d_ctxs1.push(CudaUnsignedRadixCiphertext::from_radix_ciphertext( + ctx, + &context.streams, + )); + } + let mut d_ctxs2 = Vec::::with_capacity(input.0.len()); + for ctx in input.1 { + d_ctxs2.push(CudaUnsignedRadixCiphertext::from_radix_ciphertext( + ctx, + &context.streams, + )); + } + + let d_block = (self.func)(&context.sks, &d_ctxs1, &d_ctxs2, &context.streams); + d_block.to_boolean_block(&context.streams) + } +} diff --git a/tfhe/src/integer/gpu/server_key/radix/tests_unsigned/test_vector_comparisons.rs b/tfhe/src/integer/gpu/server_key/radix/tests_unsigned/test_vector_comparisons.rs new file mode 100644 index 0000000000..2b1c6b9cb0 --- /dev/null +++ b/tfhe/src/integer/gpu/server_key/radix/tests_unsigned/test_vector_comparisons.rs @@ -0,0 +1,38 @@ +use crate::integer::gpu::server_key::radix::tests_unsigned::{ + create_gpu_parameterized_test, GpuFunctionExecutor, +}; +use crate::integer::gpu::CudaServerKey; +use crate::shortint::parameters::*; + +use crate::integer::server_key::radix_parallel::tests_unsigned::test_vector_comparisons::{ + default_all_eq_slices_test_case, unchecked_all_eq_slices_test_case, + unchecked_slice_contains_test_case, +}; + +create_gpu_parameterized_test!(integer_unchecked_all_eq_slices_test_case); +create_gpu_parameterized_test!(integer_default_all_eq_slices_test_case); +create_gpu_parameterized_test!(integer_unchecked_contains_slice_test_case); + +fn integer_unchecked_all_eq_slices_test_case

(param: P) +where + P: Into, +{ + let executor = GpuFunctionExecutor::new(&CudaServerKey::unchecked_all_eq_slices); + unchecked_all_eq_slices_test_case(param, executor); +} + +fn integer_default_all_eq_slices_test_case

(param: P) +where + P: Into, +{ + let executor = GpuFunctionExecutor::new(&CudaServerKey::all_eq_slices); + default_all_eq_slices_test_case(param, executor); +} + +fn integer_unchecked_contains_slice_test_case

(param: P) +where + P: Into, +{ + let executor = GpuFunctionExecutor::new(&CudaServerKey::unchecked_contains_sub_slice); + unchecked_slice_contains_test_case(param, executor); +} diff --git a/tfhe/src/integer/gpu/server_key/radix/vector_comparisons.rs b/tfhe/src/integer/gpu/server_key/radix/vector_comparisons.rs new file mode 100644 index 0000000000..3efb158321 --- /dev/null +++ b/tfhe/src/integer/gpu/server_key/radix/vector_comparisons.rs @@ -0,0 +1,438 @@ +use crate::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList; +use crate::core_crypto::gpu::CudaStreams; +use crate::core_crypto::prelude::LweBskGroupingFactor; +use crate::integer::gpu::ciphertext::boolean_value::CudaBooleanBlock; +use crate::integer::gpu::ciphertext::{CudaIntegerRadixCiphertext, CudaUnsignedRadixCiphertext}; +use crate::integer::gpu::server_key::radix::CudaRadixCiphertext; +use crate::integer::gpu::server_key::{CudaBootstrappingKey, CudaServerKey}; +use crate::integer::gpu::{apply_bivariate_lut_kb_async, PBSType}; + +impl CudaServerKey { + #[allow(clippy::unused_self)] + pub(crate) fn convert_integer_radixes_vec_to_single_integer_radix_ciphertext( + &self, + radixes: &[T], + streams: &CudaStreams, + ) -> T + where + T: CudaIntegerRadixCiphertext, + { + let packed_list = CudaLweCiphertextList::from_vec_cuda_lwe_ciphertexts_list( + radixes + .iter() + .map(|ciphertext| &ciphertext.as_ref().d_blocks), + streams, + ); + CudaIntegerRadixCiphertext::from(CudaRadixCiphertext { + d_blocks: packed_list, + info: radixes[0].as_ref().info.clone(), + }) + } + + /// Compares two slices containing ciphertexts and returns an encryption of `true` if all + /// pairs are equal, otherwise, returns an encryption of `false`. + /// + /// - If slices do not have the same length, false is returned + /// - If at least one pair (`lhs[i]`, `rhs[i]`) do not have the same number of blocks, false is + /// returned + pub fn unchecked_all_eq_slices( + &self, + lhs: &[T], + rhs: &[T], + streams: &CudaStreams, + ) -> CudaBooleanBlock + where + T: CudaIntegerRadixCiphertext, + { + if lhs.len() != rhs.len() { + let trivial_ct: CudaUnsignedRadixCiphertext = self.create_trivial_radix(0, 1, streams); + + let trivial_bool = CudaBooleanBlock::from_cuda_radix_ciphertext( + trivial_ct.duplicate(streams).into_inner(), + ); + return trivial_bool; + } + // If both are empty, return true + if lhs.is_empty() { + let trivial_ct: CudaUnsignedRadixCiphertext = self.create_trivial_radix(1, 1, streams); + + let trivial_bool = CudaBooleanBlock::from_cuda_radix_ciphertext( + trivial_ct.duplicate(streams).into_inner(), + ); + return trivial_bool; + } + if lhs.iter().zip(rhs.iter()).any(|(l, r)| { + l.as_ref().d_blocks.lwe_ciphertext_count().0 + != r.as_ref().d_blocks.lwe_ciphertext_count().0 + }) { + let trivial_ct: CudaUnsignedRadixCiphertext = self.create_trivial_radix(0, 1, streams); + + let trivial_bool = CudaBooleanBlock::from_cuda_radix_ciphertext( + trivial_ct.duplicate(streams).into_inner(), + ); + return trivial_bool; + } + + let block_equality_lut = self.generate_lookup_table_bivariate(|l, r| u64::from(l == r)); + + let packed_lhs = CudaLweCiphertextList::from_vec_cuda_lwe_ciphertexts_list( + lhs.iter().map(|ciphertext| &ciphertext.as_ref().d_blocks), + streams, + ); + let packed_rhs = CudaLweCiphertextList::from_vec_cuda_lwe_ciphertexts_list( + rhs.iter().map(|ciphertext| &ciphertext.as_ref().d_blocks), + streams, + ); + + let num_radix_blocks = packed_rhs.lwe_ciphertext_count().0; + let lwe_size = lhs[0].as_ref().d_blocks.0.lwe_dimension.to_lwe_size().0; + let mut comparison_blocks: CudaUnsignedRadixCiphertext = + self.create_trivial_radix(0, num_radix_blocks, streams); + + let mut comparisons_slice = comparison_blocks + .as_mut() + .d_blocks + .0 + .d_vec + .as_mut_slice(0..lwe_size * num_radix_blocks, 0) + .unwrap(); + + unsafe { + match &self.bootstrapping_key { + CudaBootstrappingKey::Classic(d_bsk) => { + apply_bivariate_lut_kb_async( + streams, + &mut comparisons_slice, + &packed_lhs.0.d_vec, + &packed_rhs.0.d_vec, + block_equality_lut.acc.acc.as_ref(), + &d_bsk.d_vec, + &self.key_switching_key.d_vec, + self.key_switching_key + .output_key_lwe_size() + .to_lwe_dimension(), + d_bsk.glwe_dimension, + d_bsk.polynomial_size, + self.key_switching_key.decomposition_level_count(), + self.key_switching_key.decomposition_base_log(), + d_bsk.decomp_level_count, + d_bsk.decomp_base_log, + num_radix_blocks as u32, + self.message_modulus, + self.carry_modulus, + PBSType::Classical, + LweBskGroupingFactor(0), + self.message_modulus.0 as u32, + ); + } + CudaBootstrappingKey::MultiBit(d_multibit_bsk) => { + apply_bivariate_lut_kb_async( + streams, + &mut comparisons_slice, + &packed_lhs.0.d_vec, + &packed_rhs.0.d_vec, + block_equality_lut.acc.acc.as_ref(), + &d_multibit_bsk.d_vec, + &self.key_switching_key.d_vec, + self.key_switching_key + .output_key_lwe_size() + .to_lwe_dimension(), + d_multibit_bsk.glwe_dimension, + d_multibit_bsk.polynomial_size, + self.key_switching_key.decomposition_level_count(), + self.key_switching_key.decomposition_base_log(), + d_multibit_bsk.decomp_level_count, + d_multibit_bsk.decomp_base_log, + num_radix_blocks as u32, + self.message_modulus, + self.carry_modulus, + PBSType::MultiBit, + d_multibit_bsk.grouping_factor, + self.message_modulus.0 as u32, + ); + } + } + } + + self.unchecked_are_all_comparisons_block_true(&comparison_blocks, streams) + } + + /// Compares two slices containing ciphertexts and returns an encryption of `true` if all + /// pairs are equal, otherwise, returns an encryption of `false`. + /// + /// - If slices do not have the same length, false is returned + /// - If at least one pair (`lhs[i]`, `rhs[i]`) do not have the same number of blocks, false is + /// returned + /// # Example + /// + /// ```rust + /// use tfhe::core_crypto::gpu::CudaStreams; + /// use tfhe::core_crypto::gpu::vec::GpuIndex; + /// use tfhe::integer::gpu::ciphertext::CudaUnsignedRadixCiphertext; + /// use tfhe::integer::gpu::gen_keys_gpu; + /// use tfhe::shortint::parameters::PARAM_GPU_MULTI_BIT_GROUP_3_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64; + /// + /// { + /// let number_of_blocks = 4; + /// + /// let gpu_index = 0; + /// let streams = CudaStreams::new_single_gpu(GpuIndex(gpu_index)); + /// + /// // Generate the client key and the server key: + /// let (cks, sks) = gen_keys_gpu(PARAM_GPU_MULTI_BIT_GROUP_3_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64, &streams); + /// + /// let mut d_ctxt_vec1 = Vec::::with_capacity(4); + /// + /// for i in 0..4 { + /// let msg_tmp = 3u16 + i; + /// let ctxt_tmp = cks.encrypt_radix(msg_tmp, number_of_blocks); + /// let d_ctxt_tmp = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ctxt_tmp, &streams); + /// d_ctxt_vec1.push(d_ctxt_tmp); + /// } + /// + /// let mut d_ctxt_vec2 = Vec::::with_capacity(4); + /// for i in 0..4 { + /// let msg_tmp = 3u16 + i%2; + /// let ctxt_tmp = cks.encrypt_radix(msg_tmp, number_of_blocks); + /// let d_ctxt_tmp = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ctxt_tmp, &streams); + /// d_ctxt_vec2.push(d_ctxt_tmp); + /// } + /// + /// // Homomorphically check if two vectors of ciphertexts are equal + /// let d_check = sks.all_eq_slices(&d_ctxt_vec1, &d_ctxt_vec2, &streams); + /// + /// // Decrypt + /// let check = d_check.to_boolean_block(&streams); + /// let is_ok = cks.decrypt_bool(&check); + /// assert_eq!(is_ok, false) + /// } + /// { + /// let number_of_blocks = 4; + /// + /// let gpu_index = 0; + /// let streams = CudaStreams::new_single_gpu(GpuIndex(gpu_index)); + /// + /// // Generate the client key and the server key: + /// let (cks, sks) = gen_keys_gpu(PARAM_GPU_MULTI_BIT_GROUP_3_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64, &streams); + /// + /// let mut d_ctxt_vec1 = Vec::::with_capacity(4); + /// + /// for i in 0..4 { + /// let msg_tmp = 3u16 + i; + /// let ctxt_tmp = cks.encrypt_radix(msg_tmp, number_of_blocks); + /// let d_ctxt_tmp = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ctxt_tmp, &streams); + /// d_ctxt_vec1.push(d_ctxt_tmp); + /// } + /// + /// let mut d_ctxt_vec2 = Vec::::with_capacity(4); + /// for i in 0..4 { + /// let msg_tmp = 3u16 + i; + /// let ctxt_tmp = cks.encrypt_radix(msg_tmp, number_of_blocks); + /// let d_ctxt_tmp = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ctxt_tmp, &streams); + /// d_ctxt_vec2.push(d_ctxt_tmp); + /// } + /// + /// // Homomorphically check if two vectors of ciphertexts are equal + /// let d_check = sks.all_eq_slices(&d_ctxt_vec1, &d_ctxt_vec2, &streams); + /// + /// // Decrypt + /// let check = d_check.to_boolean_block(&streams); + /// let is_ok = cks.decrypt_bool(&check); + /// assert_eq!(is_ok, true) + /// } + /// ``` + pub fn all_eq_slices(&self, lhs: &[T], rhs: &[T], streams: &CudaStreams) -> CudaBooleanBlock + where + T: CudaIntegerRadixCiphertext, + { + let mut tmp_lhs = Vec::::with_capacity(lhs.len()); + let lhs = if lhs.iter().any(|ct| !ct.block_carries_are_empty()) { + // Need a way to parallelize this step + for ct in lhs.iter() { + let mut temp_ct = unsafe { ct.duplicate_async(streams) }; + if !temp_ct.block_carries_are_empty() { + unsafe { + self.full_propagate_assign_async(&mut temp_ct, streams); + } + } + tmp_lhs.push(temp_ct); + } + &tmp_lhs + } else { + lhs + }; + + let mut tmp_rhs = Vec::::with_capacity(rhs.len()); + let rhs = if rhs.iter().any(|ct| !ct.block_carries_are_empty()) { + // Need a way to parallelize this step + for ct in rhs.iter() { + let mut temp_ct = unsafe { ct.duplicate_async(streams) }; + if !temp_ct.block_carries_are_empty() { + unsafe { + self.full_propagate_assign_async(&mut temp_ct, streams); + } + } + tmp_rhs.push(temp_ct); + } + &tmp_rhs + } else { + rhs + }; + self.unchecked_all_eq_slices(lhs, rhs, streams) + } + + /// Returns a boolean ciphertext encrypting `true` if `lhs` contains `rhs`, `false` otherwise + pub fn unchecked_contains_sub_slice( + &self, + lhs: &[T], + rhs: &[T], + streams: &CudaStreams, + ) -> CudaBooleanBlock + where + T: CudaIntegerRadixCiphertext, + { + if rhs.len() > lhs.len() { + let trivial_ct: CudaUnsignedRadixCiphertext = self.create_trivial_radix(0, 1, streams); + + let trivial_bool = CudaBooleanBlock::from_cuda_radix_ciphertext( + trivial_ct.duplicate(streams).into_inner(), + ); + return trivial_bool; + } + + let windows_results = lhs + .windows(rhs.len()) + .map(|lhs_sub_slice| self.unchecked_all_eq_slices(lhs_sub_slice, rhs, streams).0) + .collect::>(); + let packed_windows_results = self + .convert_integer_radixes_vec_to_single_integer_radix_ciphertext( + &windows_results, + streams, + ); + self.unchecked_is_at_least_one_comparisons_block_true(&packed_windows_results, streams) + } + + /// Returns a boolean ciphertext encrypting `true` if `lhs` contains `rhs`, `false` otherwise + /// + /// # Example + /// + /// ```rust + /// use tfhe::core_crypto::gpu::CudaStreams; + /// use tfhe::core_crypto::gpu::vec::GpuIndex; + /// use tfhe::integer::gpu::ciphertext::CudaUnsignedRadixCiphertext; + /// use tfhe::integer::gpu::gen_keys_gpu; + /// use tfhe::shortint::parameters::PARAM_GPU_MULTI_BIT_GROUP_3_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64; + /// { + /// let number_of_blocks = 4; + /// + /// let gpu_index = 0; + /// let streams = CudaStreams::new_single_gpu(GpuIndex(gpu_index)); + /// + /// // Generate the client key and the server key: + /// let (cks, sks) = gen_keys_gpu(PARAM_GPU_MULTI_BIT_GROUP_3_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64, &streams); + /// + /// let mut d_ctxt_vec1 = Vec::::with_capacity(4); + /// + /// for i in 0..4 { + /// let msg_tmp = 3u16 + i; + /// let ctxt_tmp = cks.encrypt_radix(msg_tmp, number_of_blocks); + /// let d_ctxt_tmp = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ctxt_tmp, &streams); + /// d_ctxt_vec1.push(d_ctxt_tmp); + /// } + /// + /// let mut d_ctxt_vec2 = Vec::::with_capacity(4); + /// for i in 0..2 { + /// let msg_tmp = 8u16 + i; + /// let ctxt_tmp = cks.encrypt_radix(msg_tmp, number_of_blocks); + /// let d_ctxt_tmp = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ctxt_tmp, &streams); + /// d_ctxt_vec2.push(d_ctxt_tmp); + /// } + /// + /// // Homomorphically check if vectors1 contains vector2 + /// let d_check = sks.contains_sub_slice(&d_ctxt_vec1, &d_ctxt_vec2, &streams); + /// + /// // Decrypt + /// let check = d_check.to_boolean_block(&streams); + /// let is_ok = cks.decrypt_bool(&check); + /// assert_eq!(is_ok, false) + /// } + /// { + /// let number_of_blocks = 4; + /// + /// let gpu_index = 0; + /// let streams = CudaStreams::new_single_gpu(GpuIndex(gpu_index)); + /// + /// // Generate the client key and the server key: + /// let (cks, sks) = gen_keys_gpu(PARAM_GPU_MULTI_BIT_GROUP_3_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64, &streams); + /// + /// let mut d_ctxt_vec1 = Vec::::with_capacity(4); + /// + /// for i in 0..4 { + /// let msg_tmp = 3u16 + i; + /// let ctxt_tmp = cks.encrypt_radix(msg_tmp, number_of_blocks); + /// let d_ctxt_tmp = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ctxt_tmp, &streams); + /// d_ctxt_vec1.push(d_ctxt_tmp); + /// } + /// + /// let mut d_ctxt_vec2 = Vec::::with_capacity(4); + /// for i in 0..2 { + /// let msg_tmp = 4u16 + i; + /// let ctxt_tmp = cks.encrypt_radix(msg_tmp, number_of_blocks); + /// let d_ctxt_tmp = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ctxt_tmp, &streams); + /// d_ctxt_vec2.push(d_ctxt_tmp); + /// } + /// + /// // Homomorphically check if vectors1 contains vector2 + /// let d_check = sks.contains_sub_slice(&d_ctxt_vec1, &d_ctxt_vec2, &streams); + /// + /// // Decrypt + /// let check = d_check.to_boolean_block(&streams); + /// let is_ok = cks.decrypt_bool(&check); + /// assert_eq!(is_ok, true) + /// } + /// ``` + pub fn contains_sub_slice( + &self, + lhs: &[T], + rhs: &[T], + streams: &CudaStreams, + ) -> CudaBooleanBlock + where + T: CudaIntegerRadixCiphertext, + { + let mut tmp_lhs = Vec::::with_capacity(lhs.len()); + let lhs = if lhs.iter().any(|ct| !ct.block_carries_are_empty()) { + // Need a way to parallelize this step + for ct in lhs.iter() { + let mut temp_ct = unsafe { ct.duplicate_async(streams) }; + if !temp_ct.block_carries_are_empty() { + unsafe { + self.full_propagate_assign_async(&mut temp_ct, streams); + } + } + tmp_lhs.push(temp_ct); + } + &tmp_lhs + } else { + lhs + }; + + let mut tmp_rhs = Vec::::with_capacity(rhs.len()); + let rhs = if rhs.iter().any(|ct| !ct.block_carries_are_empty()) { + // Need a way to parallelize this step + for ct in rhs.iter() { + let mut temp_ct = unsafe { ct.duplicate_async(streams) }; + if !temp_ct.block_carries_are_empty() { + unsafe { + self.full_propagate_assign_async(&mut temp_ct, streams); + } + } + tmp_rhs.push(temp_ct); + } + &tmp_rhs + } else { + rhs + }; + self.unchecked_contains_sub_slice(lhs, rhs, streams) + } +} diff --git a/tfhe/src/integer/gpu/server_key/radix/vector_find.rs b/tfhe/src/integer/gpu/server_key/radix/vector_find.rs index 7e9f15337d..da1cf63475 100644 --- a/tfhe/src/integer/gpu/server_key/radix/vector_find.rs +++ b/tfhe/src/integer/gpu/server_key/radix/vector_find.rs @@ -279,9 +279,9 @@ impl CudaServerKey { .max(num_blocks_to_represent_or_value); let or_value: CudaUnsignedRadixCiphertext = self.create_trivial_radix(or_value, num_blocks, streams); - + let casted_result = self.cast_to_unsigned(result, num_blocks, streams); // Note, this could be slightly faster when we have scalar if then_else - self.unchecked_if_then_else(&selected, &result, &or_value, streams) + self.unchecked_if_then_else(&selected, &casted_result, &or_value, streams) } /// `match` an input value to an output value