diff --git a/Cargo.toml b/Cargo.toml
index 2c35f6c..dcf5df6 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -6,19 +6,19 @@ resolver = "2"
 
 [workspace.package]
 # All the packages in the workspace should have the same version
-version = "0.150.4"
+version = "0.150.5"
 
 [workspace.dependencies]
 # Local dependencies
-bindings-generator = { version = "=0.150.4", path = "crates/bindings-generator" }
-boojum-cuda = { version = "=0.150.4", path = "crates/boojum-cuda" }
-era_criterion_cuda = { version = "=0.150.4", path = "crates/criterion-cuda" }
-era_cudart = { version = "=0.150.4", path = "crates/cudart" }
-era_cudart_sys = { version = "=0.150.4", path = "crates/cudart-sys" }
-gpu-ffi = { version = "=0.150.4", path = "crates/gpu-ffi", package = "zksync-gpu-ffi" }
-gpu-prover = { version = "=0.150.4", path = "crates/gpu-prover", package = "zksync-gpu-prover" }
-shivini = { version = "=0.150.4", path = "crates/shivini" }
-wrapper-prover = { version = "=0.150.4", path = "crates/wrapper-prover", package = "zksync-wrapper-prover" }
+bindings-generator = { version = "=0.150.5", path = "crates/bindings-generator" }
+boojum-cuda = { version = "=0.150.5", path = "crates/boojum-cuda" }
+era_criterion_cuda = { version = "=0.150.5", path = "crates/criterion-cuda" }
+era_cudart = { version = "=0.150.5", path = "crates/cudart" }
+era_cudart_sys = { version = "=0.150.5", path = "crates/cudart-sys" }
+gpu-ffi = { version = "=0.150.5", path = "crates/gpu-ffi", package = "zksync-gpu-ffi" }
+gpu-prover = { version = "=0.150.5", path = "crates/gpu-prover", package = "zksync-gpu-prover" }
+shivini = { version = "=0.150.5", path = "crates/shivini" }
+wrapper-prover = { version = "=0.150.5", path = "crates/wrapper-prover", package = "zksync-wrapper-prover" }
 
 # These dependencies should be shared by all the crates.
 circuit_definitions = { version = "=0.150.4" }
diff --git a/crates/shivini/src/context.rs b/crates/shivini/src/context.rs
index a9c2346..8682ce2 100644
--- a/crates/shivini/src/context.rs
+++ b/crates/shivini/src/context.rs
@@ -35,7 +35,60 @@ static mut CONTEXT: Option<ProverContextSingleton> = None;
 
 pub struct ProverContext;
 
-pub const ZKSYNC_DEFAULT_TRACE_LOG_LENGTH: usize = 20;
+pub const ZKSYNC_DEFAULT_TRACE_LOG_LENGTH: u32 = 20;
+
+#[derive(Copy, Clone, Debug)]
+pub struct ProverContextConfig {
+    // minimum and maximum device allocations are in bytes
+    minimum_device_allocation: Option<usize>,
+    maximum_device_allocation: Option<usize>,
+    smallest_supported_domain_size: usize,
+    powers_of_w_coarse_log_count: u32,
+    powers_of_g_coarse_log_count: u32,
+}
+
+impl Default for ProverContextConfig {
+    fn default() -> Self {
+        Self {
+            minimum_device_allocation: None,
+            maximum_device_allocation: None,
+            smallest_supported_domain_size: 1 << ZKSYNC_DEFAULT_TRACE_LOG_LENGTH,
+            powers_of_w_coarse_log_count: 12,
+            powers_of_g_coarse_log_count: 12,
+        }
+    }
+}
+
+impl ProverContextConfig {
+    pub fn with_minimum_device_allocation(mut self, minimum_device_allocation: usize) -> Self {
+        self.minimum_device_allocation = Some(minimum_device_allocation);
+        self
+    }
+
+    pub fn with_maximum_device_allocation(mut self, maximum_device_allocation: usize) -> Self {
+        self.maximum_device_allocation = Some(maximum_device_allocation);
+        self
+    }
+
+    pub fn with_smallest_supported_domain_size(
+        mut self,
+        smallest_supported_domain_size: usize,
+    ) -> Self {
+        assert!(smallest_supported_domain_size.is_power_of_two());
+        self.smallest_supported_domain_size = smallest_supported_domain_size;
+        self
+    }
+
+    pub fn with_powers_of_w_coarse_log_count(mut self, powers_of_w_coarse_log_count: u32) -> Self {
+        self.powers_of_w_coarse_log_count = powers_of_w_coarse_log_count;
+        self
+    }
+
+    pub fn with_powers_of_g_coarse_log_count(mut self, powers_of_g_coarse_log_count: u32) -> Self {
+        self.powers_of_g_coarse_log_count = powers_of_g_coarse_log_count;
+        self
+    }
+}
 
 impl ProverContext {
     fn create_internal(
@@ -100,50 +153,26 @@ impl ProverContext {
     }
 
     pub fn create() -> CudaResult<Self> {
-        // size counts in field elements
-        let block_size = 1 << ZKSYNC_DEFAULT_TRACE_LOG_LENGTH;
-        let cuda_ctx = CudaContext::create(12, 12)?;
-        // grab small slice then consume everything
-        let small_device_alloc = SmallStaticDeviceAllocator::init()?;
-        let device_alloc = StaticDeviceAllocator::init_all(block_size)?;
-        let small_host_alloc = SmallStaticHostAllocator::init()?;
-        let host_alloc = StaticHostAllocator::init(1 << 8, block_size)?;
-        Self::create_internal(
-            cuda_ctx,
-            small_device_alloc,
-            device_alloc,
-            small_host_alloc,
-            host_alloc,
-        )
-    }
-
-    #[cfg(test)]
-    pub(crate) fn create_limited(num_blocks: usize) -> CudaResult<Self> {
-        // size counts in field elements
-        let block_size = 1 << ZKSYNC_DEFAULT_TRACE_LOG_LENGTH;
-        let cuda_ctx = CudaContext::create(12, 12)?;
-        // grab small slice then consume everything
-        let small_device_alloc = SmallStaticDeviceAllocator::init()?;
-        let device_alloc = StaticDeviceAllocator::init(num_blocks, num_blocks, block_size)?;
-        let small_host_alloc = SmallStaticHostAllocator::init()?;
-        let host_alloc = StaticHostAllocator::init(1 << 8, block_size)?;
-        Self::create_internal(
-            cuda_ctx,
-            small_device_alloc,
-            device_alloc,
-            small_host_alloc,
-            host_alloc,
-        )
+        Self::create_with_config(ProverContextConfig::default())
     }
 
-    #[cfg(test)]
-    pub(crate) fn dev(domain_size: usize) -> CudaResult<Self> {
-        assert!(domain_size.is_power_of_two());
+    pub fn create_with_config(config: ProverContextConfig) -> CudaResult<Self> {
         // size counts in field elements
-        let block_size = domain_size;
+        let block_size = config.smallest_supported_domain_size;
+        let block_size_in_bytes = block_size * size_of::<F>();
         let cuda_ctx = CudaContext::create(12, 12)?;
         let small_device_alloc = SmallStaticDeviceAllocator::init()?;
-        let device_alloc = StaticDeviceAllocator::init_all(block_size)?;
+        let min_num_blocks = if let Some(min) = config.minimum_device_allocation {
+            min / block_size_in_bytes
+        } else {
+            DEFAULT_MIN_NUM_BLOCKS
+        };
+        let device_alloc = if let Some(max) = config.maximum_device_allocation {
+            let max_num_blocks = max / block_size_in_bytes;
+            StaticDeviceAllocator::init(min_num_blocks, max_num_blocks, block_size)?
+        } else {
+            StaticDeviceAllocator::init_all(min_num_blocks, block_size)?
+        };
         let small_host_alloc = SmallStaticHostAllocator::init()?;
         let host_alloc = StaticHostAllocator::init(1 << 8, block_size)?;
         Self::create_internal(
diff --git a/crates/shivini/src/static_allocator/device.rs b/crates/shivini/src/static_allocator/device.rs
index e8bd31f..6193c5c 100644
--- a/crates/shivini/src/static_allocator/device.rs
+++ b/crates/shivini/src/static_allocator/device.rs
@@ -10,7 +10,8 @@ use std::ptr::NonNull;
 use std::sync::{Arc, Mutex};
 
 pub const FREE_MEMORY_SLACK: usize = 1 << 23; // 8 MB
-pub const MIN_NUM_BLOCKS: usize = 512;
+pub const DEFAULT_MIN_NUM_BLOCKS: usize = 512;
+pub const SMALL_ALLOCATOR_BLOCK_SIZE: usize = 32;
 pub const SMALL_ALLOCATOR_BLOCKS_COUNT: usize = 1 << 10; // 256 KB
 
 #[derive(Derivative)]
@@ -145,7 +146,7 @@ mod stats {
 impl Default for StaticDeviceAllocator {
     fn default() -> Self {
         let domain_size = 1 << ZKSYNC_DEFAULT_TRACE_LOG_LENGTH;
-        Self::init_all(domain_size).unwrap()
+        Self::init_all(DEFAULT_MIN_NUM_BLOCKS, domain_size).unwrap()
     }
 }
 
@@ -166,8 +167,8 @@ impl StaticDeviceAllocator {
     }
 
     pub fn init(
-        max_num_blocks: usize,
         min_num_blocks: usize,
+        max_num_blocks: usize,
         block_size: usize,
     ) -> CudaResult<Self> {
         assert_ne!(min_num_blocks, 0);
@@ -205,14 +206,14 @@ impl StaticDeviceAllocator {
         Err(CudaError::ErrorMemoryAllocation)
     }
 
-    pub fn init_all(block_size: usize) -> CudaResult<Self> {
+    pub fn init_all(min_num_blocks: usize, block_size: usize) -> CudaResult<Self> {
         let block_size_in_bytes = block_size * std::mem::size_of::<F>();
         let (memory_size_in_bytes, _total) = memory_get_info().expect("get memory info");
         assert!(memory_size_in_bytes >= FREE_MEMORY_SLACK);
         let free_memory_size_in_bytes = memory_size_in_bytes - FREE_MEMORY_SLACK;
         assert!(free_memory_size_in_bytes >= block_size);
         let max_num_blocks = free_memory_size_in_bytes / block_size_in_bytes;
-        Self::init(max_num_blocks, MIN_NUM_BLOCKS, block_size)
+        Self::init(min_num_blocks, max_num_blocks, block_size)
     }
 
     fn find_free_block(&self) -> Option<usize> {
@@ -384,11 +385,10 @@ pub struct SmallStaticDeviceAllocator {
 impl SmallStaticDeviceAllocator {
     pub fn init() -> CudaResult<Self> {
         // cuda requires alignment to be  multiple of 32 goldilocks elems
-        const BLOCK_SIZE: usize = 32;
         let inner = StaticDeviceAllocator::init(
             SMALL_ALLOCATOR_BLOCKS_COUNT,
             SMALL_ALLOCATOR_BLOCKS_COUNT,
-            BLOCK_SIZE,
+            SMALL_ALLOCATOR_BLOCK_SIZE,
         )?;
         Ok(Self { inner })
     }
diff --git a/crates/shivini/src/test.rs b/crates/shivini/src/test.rs
index fa11b51..73d15c9 100644
--- a/crates/shivini/src/test.rs
+++ b/crates/shivini/src/test.rs
@@ -60,7 +60,10 @@ fn test_proof_comparison_for_poseidon_gate_with_private_witnesses() {
         prover_config.merkle_tree_cap_size,
     );
     let domain_size = setup_cs.max_trace_len;
-    let _ctx = ProverContext::dev(domain_size).expect("init gpu prover context");
+    let _ctx = ProverContext::create_with_config(
+        ProverContextConfig::default().with_smallest_supported_domain_size(domain_size),
+    )
+    .expect("init gpu prover context");
     let gpu_setup = GpuSetup::<Global>::from_setup_and_hints(
         setup_base.clone(),
         clone_reference_tree(&setup_tree),
@@ -225,7 +228,8 @@ fn test_permutation_polys() {
     let expected_permutation_polys = setup_base.copy_permutation_polys.clone();
 
     let domain_size = setup_cs.max_trace_len;
-    let _ctx = ProverContext::dev(domain_size).expect("init gpu prover context");
+    let cfg = ProverContextConfig::default().with_smallest_supported_domain_size(domain_size);
+    let _ctx = ProverContext::create_with_config(cfg).expect("init gpu prover context");
 
     let num_copy_permutation_polys = variables_hint.maps.len();
     let gpu_setup = GpuSetup::<Global>::from_setup_and_hints(
@@ -289,7 +293,8 @@ fn test_setup_comparison() {
     let _expected_permutation_polys = setup_base.copy_permutation_polys.clone();
 
     let domain_size = setup_cs.max_trace_len;
-    let _ctx = ProverContext::dev(domain_size).expect("init gpu prover context");
+    let cfg = ProverContextConfig::default().with_smallest_supported_domain_size(domain_size);
+    let _ctx = ProverContext::create_with_config(cfg).expect("init gpu prover context");
 
     let expected_setup = GenericSetupStorage::from_host_values(&setup_base).unwrap();
 
@@ -424,7 +429,8 @@ fn test_proof_comparison_for_sha256() {
         prover_config.merkle_tree_cap_size,
     );
     let domain_size = setup_cs.max_trace_len;
-    let _ctx = ProverContext::dev(domain_size).expect("init gpu prover context");
+    let cfg = ProverContextConfig::default().with_smallest_supported_domain_size(domain_size);
+    let _ctx = ProverContext::create_with_config(cfg).expect("init gpu prover context");
     let gpu_setup = GpuSetup::<Global>::from_setup_and_hints(
         setup_base.clone(),
         clone_reference_tree(&setup_tree),
@@ -1222,7 +1228,11 @@ mod zksync {
             for i in 0..40 {
                 let num_blocks = 2560 - i * 64;
                 println!("num_blocks = {num_blocks}");
-                let ctx = ProverContext::create_limited(num_blocks).expect("gpu prover context");
+                let max_device_allocation =
+                    (num_blocks * size_of::<F>()) << ZKSYNC_DEFAULT_TRACE_LOG_LENGTH;
+                let cfg = ProverContextConfig::default()
+                    .with_maximum_device_allocation(max_device_allocation);
+                let ctx = ProverContext::create_with_config(cfg).expect("gpu prover context");
                 // technically not needed because CacheStrategy::get calls it internally,
                 // but nice for peace of mind
                 _setup_cache_reset();
@@ -1365,7 +1375,8 @@ mod zksync {
             proof_config.merkle_tree_cap_size,
         );
         let domain_size = setup_cs.max_trace_len;
-        let _ctx = ProverContext::dev(domain_size).expect("init gpu prover context");
+        let cfg = ProverContextConfig::default().with_smallest_supported_domain_size(domain_size);
+        let _ctx = ProverContext::create_with_config(cfg).expect("init gpu prover context");
         let (proving_cs, _) = init_or_synth_cs_for_sha256::<ProvingCSConfig, Global, true>(
             finalization_hint.as_ref(),
         );