From b7ced1e0392a789edddf90a02c027d4fd7ba2d95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Test=C3=A9?= Date: Thu, 12 Sep 2024 15:15:59 +0200 Subject: [PATCH] chore(bench): make compression benchmarks available for database --- .../workflows/benchmark_gpu_integer_full.yml | 6 + .github/workflows/benchmark_integer.yml | 6 + Makefile | 6 + .../integer/glwe_packing_compression.rs | 241 ++++++++++-------- tfhe/src/shortint/keycache.rs | 28 ++ .../shortint/parameters/list_compression.rs | 2 +- 6 files changed, 186 insertions(+), 103 deletions(-) diff --git a/.github/workflows/benchmark_gpu_integer_full.yml b/.github/workflows/benchmark_gpu_integer_full.yml index 27037a9ea9..84a929ad42 100644 --- a/.github/workflows/benchmark_gpu_integer_full.yml +++ b/.github/workflows/benchmark_gpu_integer_full.yml @@ -129,6 +129,12 @@ jobs: run: | make BENCH_OP_FLAVOR=${{ matrix.op_flavor }} bench_${{ matrix.command }}_gpu + # Run these benchmarks only once + - name: Run compression benchmarks with AVX512 + if: matrix.op_flavor == 'default' && matrix.command == 'integer' + run: | + make bench_integer_compression_gpu + - name: Parse results run: | python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \ diff --git a/.github/workflows/benchmark_integer.yml b/.github/workflows/benchmark_integer.yml index 8c094c81fc..76954ca1eb 100644 --- a/.github/workflows/benchmark_integer.yml +++ b/.github/workflows/benchmark_integer.yml @@ -125,6 +125,12 @@ jobs: run: | make BENCH_OP_FLAVOR=${{ matrix.op_flavor }} bench_${{ matrix.command }} + # Run these benchmarks only once + - name: Run compression benchmarks with AVX512 + if: matrix.op_flavor == 'default' && matrix.command == 'integer' + run: | + make bench_integer_compression + - name: Parse results run: | python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \ diff --git a/Makefile b/Makefile index 9b33b8b123..0698149f81 100644 --- a/Makefile +++ b/Makefile @@ -893,6 +893,12 @@ bench_integer_gpu: install_rs_check_toolchain --bench integer-bench \ --features=$(TARGET_ARCH_FEATURE),integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) -- +.PHONY: bench_integer_compression # Run benchmarks for unsigned integer compression +bench_integer_compression: install_rs_check_toolchain + RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \ + --bench glwe_packing_compression-integer-bench \ + --features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) -- + .PHONY: bench_integer_compression_gpu bench_integer_compression_gpu: install_rs_check_toolchain RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \ diff --git a/tfhe/benches/integer/glwe_packing_compression.rs b/tfhe/benches/integer/glwe_packing_compression.rs index 992fdb5652..850f2a6796 100644 --- a/tfhe/benches/integer/glwe_packing_compression.rs +++ b/tfhe/benches/integer/glwe_packing_compression.rs @@ -1,33 +1,24 @@ +#[path = "../utilities.rs"] +mod utilities; + +use crate::utilities::{write_to_json, OperatorType}; use criterion::{black_box, criterion_group, Criterion}; use tfhe::integer::ciphertext::CompressedCiphertextListBuilder; use tfhe::integer::{ClientKey, RadixCiphertext}; +use tfhe::keycache::NamedParam; use tfhe::shortint::parameters::list_compression::COMP_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64; use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64; -#[cfg(feature = "gpu")] -use tfhe::core_crypto::gpu::CudaStreams; - -#[cfg(feature = "gpu")] -use tfhe::integer::gpu::ciphertext::compressed_ciphertext_list::CudaCompressedCiphertextListBuilder; - -#[cfg(feature = "gpu")] -use tfhe::integer::gpu::ciphertext::{CudaRadixCiphertext, CudaUnsignedRadixCiphertext}; - -#[cfg(feature = "gpu")] -use tfhe::integer::gpu::gen_keys_radix_gpu; - fn cpu_glwe_packing(c: &mut Criterion) { - let param = PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64; - - let comp_param = COMP_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64; - - let bench_name = "integer_packing_compression"; - + let bench_name = "integer::packing_compression"; let mut bench_group = c.benchmark_group(bench_name); bench_group .sample_size(15) .measurement_time(std::time::Duration::from_secs(30)); + let param = PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64; + let comp_param = COMP_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64; + let cks = ClientKey::new(param); let private_compression_key = cks.new_compression_private_key(comp_param); @@ -37,7 +28,7 @@ fn cpu_glwe_packing(c: &mut Criterion) { let log_message_modulus = param.message_modulus.0.ilog2() as usize; - for num_bits in [ + for bit_size in [ 8, 16, 32, @@ -46,8 +37,8 @@ fn cpu_glwe_packing(c: &mut Criterion) { 256, comp_param.lwe_per_glwe.0 * log_message_modulus, ] { - assert_eq!(num_bits % log_message_modulus, 0); - let num_blocks = num_bits / log_message_modulus; + assert_eq!(bit_size % log_message_modulus, 0); + let num_blocks = bit_size / log_message_modulus; let ct = cks.encrypt_radix(0_u32, num_blocks); @@ -55,7 +46,8 @@ fn cpu_glwe_packing(c: &mut Criterion) { builder.push(ct); - bench_group.bench_function(format!("pack_u{num_bits}"), |b| { + let bench_id = format!("{bench_name}::pack_u{bit_size}"); + bench_group.bench_function(&bench_id, |b| { b.iter(|| { let compressed = builder.build(&compression_key); @@ -63,9 +55,20 @@ fn cpu_glwe_packing(c: &mut Criterion) { }) }); + write_to_json::( + &bench_id, + comp_param, + comp_param.name(), + "pack", + &OperatorType::Atomic, + bit_size as u32, + vec![param.message_modulus.0.ilog2(); num_blocks], + ); + let compressed = builder.build(&compression_key); - bench_group.bench_function(format!("unpack_u{num_bits}"), |b| { + let bench_id = format!("{bench_name}::unpack_u{bit_size}"); + bench_group.bench_function(&bench_id, |b| { b.iter(|| { let unpacked: RadixCiphertext = compressed.get(0, &decompression_key).unwrap().unwrap(); @@ -74,94 +77,128 @@ fn cpu_glwe_packing(c: &mut Criterion) { }) }); - bench_group.bench_function(format!("pack_unpack_u{num_bits}"), |b| { - b.iter(|| { - let compressed = builder.build(&compression_key); - - let unpacked: RadixCiphertext = - compressed.get(0, &decompression_key).unwrap().unwrap(); - - _ = black_box(unpacked); - }) - }); + write_to_json::( + &bench_id, + comp_param, + comp_param.name(), + "unpack", + &OperatorType::Atomic, + bit_size as u32, + vec![param.message_modulus.0.ilog2(); num_blocks], + ); } + + bench_group.finish() } #[cfg(feature = "gpu")] -fn gpu_glwe_packing(c: &mut Criterion) { - let bench_name = "integer_cuda_packing_compression"; - let mut bench_group = c.benchmark_group(bench_name); - bench_group - .sample_size(15) - .measurement_time(std::time::Duration::from_secs(30)); - - let stream = CudaStreams::new_multi_gpu(); - - let param = PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64; - let comp_param = COMP_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64; - - let log_message_modulus = param.message_modulus.0.ilog2() as usize; - - for bit_size in [ - 8, - 16, - 32, - 64, - 128, - 256, - comp_param.lwe_per_glwe.0 * log_message_modulus, - ] { - assert_eq!(bit_size % log_message_modulus, 0); - let num_blocks = bit_size / log_message_modulus; - - // Generate private compression key - let cks = ClientKey::new(param); - let private_compression_key = cks.new_compression_private_key(comp_param); - - // Generate and convert compression keys - let (radix_cks, _) = gen_keys_radix_gpu(param, num_blocks, &stream); - let (compressed_compression_key, compressed_decompression_key) = - radix_cks.new_compressed_compression_decompression_keys(&private_compression_key); - let cuda_compression_key = compressed_compression_key.decompress_to_cuda(&stream); - let cuda_decompression_key = - compressed_decompression_key.decompress_to_cuda(radix_cks.parameters(), &stream); - - // Encrypt - let ct = cks.encrypt_radix(0_u32, num_blocks); - let d_ct = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct, &stream); - - // Benchmark - let mut builder = CudaCompressedCiphertextListBuilder::new(); - - builder.push(d_ct, &stream); - - let bench_id = format!("pack_u{bit_size}"); - bench_group.bench_function(&bench_id, |b| { - b.iter(|| { - let compressed = builder.build(&cuda_compression_key, &stream); - - _ = black_box(compressed); - }) - }); - - let compressed = builder.build(&cuda_compression_key, &stream); - - let bench_id = format!("unpack_u{bit_size}"); - bench_group.bench_function(&bench_id, |b| { - b.iter(|| { - let unpacked: CudaRadixCiphertext = - compressed.get(0, &cuda_decompression_key, &stream); - - _ = black_box(unpacked); - }) - }); +mod cuda { + use super::*; + use tfhe::core_crypto::gpu::CudaStreams; + use tfhe::integer::gpu::ciphertext::compressed_ciphertext_list::CudaCompressedCiphertextListBuilder; + use tfhe::integer::gpu::ciphertext::{CudaRadixCiphertext, CudaUnsignedRadixCiphertext}; + use tfhe::integer::gpu::gen_keys_radix_gpu; + + fn gpu_glwe_packing(c: &mut Criterion) { + let bench_name = "integer::cuda::packing_compression"; + let mut bench_group = c.benchmark_group(bench_name); + bench_group + .sample_size(15) + .measurement_time(std::time::Duration::from_secs(30)); + + let stream = CudaStreams::new_multi_gpu(); + + let param = PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64; + let comp_param = COMP_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64; + + let log_message_modulus = param.message_modulus.0.ilog2() as usize; + + for bit_size in [ + 8, + 16, + 32, + 64, + 128, + 256, + comp_param.lwe_per_glwe.0 * log_message_modulus, + ] { + assert_eq!(bit_size % log_message_modulus, 0); + let num_blocks = bit_size / log_message_modulus; + + // Generate private compression key + let cks = ClientKey::new(param); + let private_compression_key = cks.new_compression_private_key(comp_param); + + // Generate and convert compression keys + let (radix_cks, _) = gen_keys_radix_gpu(param, num_blocks, &stream); + let (compressed_compression_key, compressed_decompression_key) = + radix_cks.new_compressed_compression_decompression_keys(&private_compression_key); + let cuda_compression_key = compressed_compression_key.decompress_to_cuda(&stream); + let cuda_decompression_key = + compressed_decompression_key.decompress_to_cuda(radix_cks.parameters(), &stream); + + // Encrypt + let ct = cks.encrypt_radix(0_u32, num_blocks); + let d_ct = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct, &stream); + + // Benchmark + let mut builder = CudaCompressedCiphertextListBuilder::new(); + + builder.push(d_ct, &stream); + + let bench_id = format!("{bench_name}::pack_u{bit_size}"); + bench_group.bench_function(&bench_id, |b| { + b.iter(|| { + let compressed = builder.build(&cuda_compression_key, &stream); + + _ = black_box(compressed); + }) + }); + + write_to_json::( + &bench_id, + comp_param, + comp_param.name(), + "pack", + &OperatorType::Atomic, + bit_size as u32, + vec![param.message_modulus.0.ilog2(); num_blocks], + ); + + let compressed = builder.build(&cuda_compression_key, &stream); + + let bench_id = format!("{bench_name}::unpack_u{bit_size}"); + bench_group.bench_function(&bench_id, |b| { + b.iter(|| { + let unpacked: CudaRadixCiphertext = + compressed.get(0, &cuda_decompression_key, &stream); + + _ = black_box(unpacked); + }) + }); + + write_to_json::( + &bench_id, + comp_param, + comp_param.name(), + "unpack", + &OperatorType::Atomic, + bit_size as u32, + vec![param.message_modulus.0.ilog2(); num_blocks], + ); + } + + bench_group.finish() } + + criterion_group!(gpu_glwe_packing2, gpu_glwe_packing); } -#[cfg(feature = "gpu")] -criterion_group!(gpu_glwe_packing2, gpu_glwe_packing); criterion_group!(cpu_glwe_packing2, cpu_glwe_packing); +#[cfg(feature = "gpu")] +use cuda::gpu_glwe_packing2; + fn main() { #[cfg(feature = "gpu")] gpu_glwe_packing2(); diff --git a/tfhe/src/shortint/keycache.rs b/tfhe/src/shortint/keycache.rs index 5a58a82774..e575bf309b 100644 --- a/tfhe/src/shortint/keycache.rs +++ b/tfhe/src/shortint/keycache.rs @@ -6,6 +6,7 @@ use crate::shortint::parameters::classic::tuniform::p_fail_2_minus_64::ks_pbs::P use crate::shortint::parameters::coverage_parameters::*; use crate::shortint::parameters::key_switching::p_fail_2_minus_64::ks_pbs::PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS; use crate::shortint::parameters::key_switching::*; +use crate::shortint::parameters::list_compression::*; use crate::shortint::parameters::multi_bit::*; use crate::shortint::parameters::parameters_wopbs::*; use crate::shortint::parameters::*; @@ -286,6 +287,33 @@ impl NamedParam for ShortintKeySwitchingParameters { } } +impl NamedParam for CompressionParameters { + fn name(&self) -> String { + named_params_impl!(expose + COMP_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M64, + COMP_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64 + ); + named_params_impl!( + { + *self; + Self + } == (COMP_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M64) + ); + + named_params_impl!( + { + *self; + Self + } == (COMP_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64) + ); + + format!( + "COMP_PARAM_CUSTOM_BR_LEVEL_{}_NOISE_DISTRIB_{}", + self.br_level.0, self.packing_ks_key_noise_distribution + ) + } +} + impl From for (ClientKey, ServerKey) { fn from(param: PBSParameters) -> Self { let param_set = ShortintParameterSet::from(param); diff --git a/tfhe/src/shortint/parameters/list_compression.rs b/tfhe/src/shortint/parameters/list_compression.rs index b6bad7ea83..eef5521bf8 100644 --- a/tfhe/src/shortint/parameters/list_compression.rs +++ b/tfhe/src/shortint/parameters/list_compression.rs @@ -8,7 +8,7 @@ use crate::shortint::parameters::{ }; use std::fmt::Debug; -#[derive(Copy, Clone, Debug, serde::Serialize, serde::Deserialize, Versionize)] +#[derive(Copy, Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize, Versionize)] #[versionize(CompressionParametersVersions)] pub struct CompressionParameters { pub br_level: DecompositionLevelCount,