Skip to content

Commit

Permalink
feat: implement ProverContextConfig
Browse files Browse the repository at this point in the history
  • Loading branch information
robik75 committed Sep 3, 2024
1 parent d4f2f8c commit ca9e8f7
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 63 deletions.
20 changes: 10 additions & 10 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,19 @@ resolver = "2"

[workspace.package]
# All the packages in the workspace should have the same version
version = "0.150.4"
version = "0.150.5"

[workspace.dependencies]
# Local dependencies
bindings-generator = { version = "=0.150.4", path = "crates/bindings-generator" }
boojum-cuda = { version = "=0.150.4", path = "crates/boojum-cuda" }
era_criterion_cuda = { version = "=0.150.4", path = "crates/criterion-cuda" }
era_cudart = { version = "=0.150.4", path = "crates/cudart" }
era_cudart_sys = { version = "=0.150.4", path = "crates/cudart-sys" }
gpu-ffi = { version = "=0.150.4", path = "crates/gpu-ffi", package = "zksync-gpu-ffi" }
gpu-prover = { version = "=0.150.4", path = "crates/gpu-prover", package = "zksync-gpu-prover" }
shivini = { version = "=0.150.4", path = "crates/shivini" }
wrapper-prover = { version = "=0.150.4", path = "crates/wrapper-prover", package = "zksync-wrapper-prover" }
bindings-generator = { version = "=0.150.5", path = "crates/bindings-generator" }
boojum-cuda = { version = "=0.150.5", path = "crates/boojum-cuda" }
era_criterion_cuda = { version = "=0.150.5", path = "crates/criterion-cuda" }
era_cudart = { version = "=0.150.5", path = "crates/cudart" }
era_cudart_sys = { version = "=0.150.5", path = "crates/cudart-sys" }
gpu-ffi = { version = "=0.150.5", path = "crates/gpu-ffi", package = "zksync-gpu-ffi" }
gpu-prover = { version = "=0.150.5", path = "crates/gpu-prover", package = "zksync-gpu-prover" }
shivini = { version = "=0.150.5", path = "crates/shivini" }
wrapper-prover = { version = "=0.150.5", path = "crates/wrapper-prover", package = "zksync-wrapper-prover" }

# These dependencies should be shared by all the crates.
circuit_definitions = { version = "=0.150.4" }
Expand Down
109 changes: 69 additions & 40 deletions crates/shivini/src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,60 @@ static mut CONTEXT: Option<ProverContextSingleton> = None;

pub struct ProverContext;

pub const ZKSYNC_DEFAULT_TRACE_LOG_LENGTH: usize = 20;
pub const ZKSYNC_DEFAULT_TRACE_LOG_LENGTH: u32 = 20;

#[derive(Copy, Clone, Debug)]
pub struct ProverContextConfig {
// minimum and maximum device allocations are in bytes
minimum_device_allocation: Option<usize>,
maximum_device_allocation: Option<usize>,
smallest_supported_domain_size: usize,
powers_of_w_coarse_log_count: u32,
powers_of_g_coarse_log_count: u32,
}

impl Default for ProverContextConfig {
fn default() -> Self {
Self {
minimum_device_allocation: None,
maximum_device_allocation: None,
smallest_supported_domain_size: 1 << ZKSYNC_DEFAULT_TRACE_LOG_LENGTH,
powers_of_w_coarse_log_count: 12,
powers_of_g_coarse_log_count: 12,
}
}
}

impl ProverContextConfig {
pub fn with_minimum_device_allocation(mut self, minimum_device_allocation: usize) -> Self {
self.minimum_device_allocation = Some(minimum_device_allocation);
self
}

pub fn with_maximum_device_allocation(mut self, maximum_device_allocation: usize) -> Self {
self.maximum_device_allocation = Some(maximum_device_allocation);
self
}

pub fn with_smallest_supported_domain_size(
mut self,
smallest_supported_domain_size: usize,
) -> Self {
assert!(smallest_supported_domain_size.is_power_of_two());
self.smallest_supported_domain_size = smallest_supported_domain_size;
self
}

pub fn with_powers_of_w_coarse_log_count(mut self, powers_of_w_coarse_log_count: u32) -> Self {
self.powers_of_w_coarse_log_count = powers_of_w_coarse_log_count;
self
}

pub fn with_powers_of_g_coarse_log_count(mut self, powers_of_g_coarse_log_count: u32) -> Self {
self.powers_of_g_coarse_log_count = powers_of_g_coarse_log_count;
self
}
}

impl ProverContext {
fn create_internal(
Expand Down Expand Up @@ -100,50 +153,26 @@ impl ProverContext {
}

pub fn create() -> CudaResult<Self> {
// size counts in field elements
let block_size = 1 << ZKSYNC_DEFAULT_TRACE_LOG_LENGTH;
let cuda_ctx = CudaContext::create(12, 12)?;
// grab small slice then consume everything
let small_device_alloc = SmallStaticDeviceAllocator::init()?;
let device_alloc = StaticDeviceAllocator::init_all(block_size)?;
let small_host_alloc = SmallStaticHostAllocator::init()?;
let host_alloc = StaticHostAllocator::init(1 << 8, block_size)?;
Self::create_internal(
cuda_ctx,
small_device_alloc,
device_alloc,
small_host_alloc,
host_alloc,
)
}

#[cfg(test)]
pub(crate) fn create_limited(num_blocks: usize) -> CudaResult<Self> {
// size counts in field elements
let block_size = 1 << ZKSYNC_DEFAULT_TRACE_LOG_LENGTH;
let cuda_ctx = CudaContext::create(12, 12)?;
// grab small slice then consume everything
let small_device_alloc = SmallStaticDeviceAllocator::init()?;
let device_alloc = StaticDeviceAllocator::init(num_blocks, num_blocks, block_size)?;
let small_host_alloc = SmallStaticHostAllocator::init()?;
let host_alloc = StaticHostAllocator::init(1 << 8, block_size)?;
Self::create_internal(
cuda_ctx,
small_device_alloc,
device_alloc,
small_host_alloc,
host_alloc,
)
Self::create_with_config(ProverContextConfig::default())
}

#[cfg(test)]
pub(crate) fn dev(domain_size: usize) -> CudaResult<Self> {
assert!(domain_size.is_power_of_two());
pub fn create_with_config(config: ProverContextConfig) -> CudaResult<Self> {
// size counts in field elements
let block_size = domain_size;
let block_size = config.smallest_supported_domain_size;
let block_size_in_bytes = block_size * size_of::<F>();
let cuda_ctx = CudaContext::create(12, 12)?;
let small_device_alloc = SmallStaticDeviceAllocator::init()?;
let device_alloc = StaticDeviceAllocator::init_all(block_size)?;
let min_num_blocks = if let Some(min) = config.minimum_device_allocation {
min / block_size_in_bytes
} else {
DEFAULT_MIN_NUM_BLOCKS
};
let device_alloc = if let Some(max) = config.maximum_device_allocation {
let max_num_blocks = max / block_size_in_bytes;
StaticDeviceAllocator::init(min_num_blocks, max_num_blocks, block_size)?
} else {
StaticDeviceAllocator::init_all(min_num_blocks, block_size)?
};
let small_host_alloc = SmallStaticHostAllocator::init()?;
let host_alloc = StaticHostAllocator::init(1 << 8, block_size)?;
Self::create_internal(
Expand Down
14 changes: 7 additions & 7 deletions crates/shivini/src/static_allocator/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ use std::ptr::NonNull;
use std::sync::{Arc, Mutex};

pub const FREE_MEMORY_SLACK: usize = 1 << 23; // 8 MB
pub const MIN_NUM_BLOCKS: usize = 512;
pub const DEFAULT_MIN_NUM_BLOCKS: usize = 512;
pub const SMALL_ALLOCATOR_BLOCK_SIZE: usize = 32;
pub const SMALL_ALLOCATOR_BLOCKS_COUNT: usize = 1 << 10; // 256 KB

#[derive(Derivative)]
Expand Down Expand Up @@ -145,7 +146,7 @@ mod stats {
impl Default for StaticDeviceAllocator {
fn default() -> Self {
let domain_size = 1 << ZKSYNC_DEFAULT_TRACE_LOG_LENGTH;
Self::init_all(domain_size).unwrap()
Self::init_all(DEFAULT_MIN_NUM_BLOCKS, domain_size).unwrap()
}
}

Expand All @@ -166,8 +167,8 @@ impl StaticDeviceAllocator {
}

pub fn init(
max_num_blocks: usize,
min_num_blocks: usize,
max_num_blocks: usize,
block_size: usize,
) -> CudaResult<Self> {
assert_ne!(min_num_blocks, 0);
Expand Down Expand Up @@ -205,14 +206,14 @@ impl StaticDeviceAllocator {
Err(CudaError::ErrorMemoryAllocation)
}

pub fn init_all(block_size: usize) -> CudaResult<Self> {
pub fn init_all(min_num_blocks: usize, block_size: usize) -> CudaResult<Self> {
let block_size_in_bytes = block_size * std::mem::size_of::<F>();
let (memory_size_in_bytes, _total) = memory_get_info().expect("get memory info");
assert!(memory_size_in_bytes >= FREE_MEMORY_SLACK);
let free_memory_size_in_bytes = memory_size_in_bytes - FREE_MEMORY_SLACK;
assert!(free_memory_size_in_bytes >= block_size);
let max_num_blocks = free_memory_size_in_bytes / block_size_in_bytes;
Self::init(max_num_blocks, MIN_NUM_BLOCKS, block_size)
Self::init(min_num_blocks, max_num_blocks, block_size)
}

fn find_free_block(&self) -> Option<usize> {
Expand Down Expand Up @@ -384,11 +385,10 @@ pub struct SmallStaticDeviceAllocator {
impl SmallStaticDeviceAllocator {
pub fn init() -> CudaResult<Self> {
// cuda requires alignment to be multiple of 32 goldilocks elems
const BLOCK_SIZE: usize = 32;
let inner = StaticDeviceAllocator::init(
SMALL_ALLOCATOR_BLOCKS_COUNT,
SMALL_ALLOCATOR_BLOCKS_COUNT,
BLOCK_SIZE,
SMALL_ALLOCATOR_BLOCK_SIZE,
)?;
Ok(Self { inner })
}
Expand Down
23 changes: 17 additions & 6 deletions crates/shivini/src/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,10 @@ fn test_proof_comparison_for_poseidon_gate_with_private_witnesses() {
prover_config.merkle_tree_cap_size,
);
let domain_size = setup_cs.max_trace_len;
let _ctx = ProverContext::dev(domain_size).expect("init gpu prover context");
let _ctx = ProverContext::create_with_config(
ProverContextConfig::default().with_smallest_supported_domain_size(domain_size),
)
.expect("init gpu prover context");
let gpu_setup = GpuSetup::<Global>::from_setup_and_hints(
setup_base.clone(),
clone_reference_tree(&setup_tree),
Expand Down Expand Up @@ -225,7 +228,8 @@ fn test_permutation_polys() {
let expected_permutation_polys = setup_base.copy_permutation_polys.clone();

let domain_size = setup_cs.max_trace_len;
let _ctx = ProverContext::dev(domain_size).expect("init gpu prover context");
let cfg = ProverContextConfig::default().with_smallest_supported_domain_size(domain_size);
let _ctx = ProverContext::create_with_config(cfg).expect("init gpu prover context");

let num_copy_permutation_polys = variables_hint.maps.len();
let gpu_setup = GpuSetup::<Global>::from_setup_and_hints(
Expand Down Expand Up @@ -289,7 +293,8 @@ fn test_setup_comparison() {
let _expected_permutation_polys = setup_base.copy_permutation_polys.clone();

let domain_size = setup_cs.max_trace_len;
let _ctx = ProverContext::dev(domain_size).expect("init gpu prover context");
let cfg = ProverContextConfig::default().with_smallest_supported_domain_size(domain_size);
let _ctx = ProverContext::create_with_config(cfg).expect("init gpu prover context");

let expected_setup = GenericSetupStorage::from_host_values(&setup_base).unwrap();

Expand Down Expand Up @@ -424,7 +429,8 @@ fn test_proof_comparison_for_sha256() {
prover_config.merkle_tree_cap_size,
);
let domain_size = setup_cs.max_trace_len;
let _ctx = ProverContext::dev(domain_size).expect("init gpu prover context");
let cfg = ProverContextConfig::default().with_smallest_supported_domain_size(domain_size);
let _ctx = ProverContext::create_with_config(cfg).expect("init gpu prover context");
let gpu_setup = GpuSetup::<Global>::from_setup_and_hints(
setup_base.clone(),
clone_reference_tree(&setup_tree),
Expand Down Expand Up @@ -1222,7 +1228,11 @@ mod zksync {
for i in 0..40 {
let num_blocks = 2560 - i * 64;
println!("num_blocks = {num_blocks}");
let ctx = ProverContext::create_limited(num_blocks).expect("gpu prover context");
let max_device_allocation =
(num_blocks * size_of::<F>()) << ZKSYNC_DEFAULT_TRACE_LOG_LENGTH;
let cfg = ProverContextConfig::default()
.with_maximum_device_allocation(max_device_allocation);
let ctx = ProverContext::create_with_config(cfg).expect("gpu prover context");
// technically not needed because CacheStrategy::get calls it internally,
// but nice for peace of mind
_setup_cache_reset();
Expand Down Expand Up @@ -1365,7 +1375,8 @@ mod zksync {
proof_config.merkle_tree_cap_size,
);
let domain_size = setup_cs.max_trace_len;
let _ctx = ProverContext::dev(domain_size).expect("init gpu prover context");
let cfg = ProverContextConfig::default().with_smallest_supported_domain_size(domain_size);
let _ctx = ProverContext::create_with_config(cfg).expect("init gpu prover context");
let (proving_cs, _) = init_or_synth_cs_for_sha256::<ProvingCSConfig, Global, true>(
finalization_hint.as_ref(),
);
Expand Down

0 comments on commit ca9e8f7

Please sign in to comment.